docs for pattern_lens v0.6.0
View Source on GitHub

pattern_lens.indexes

writes indexes to the model directory for the frontend to use or for record keeping


  1"""writes indexes to the model directory for the frontend to use or for record keeping"""
  2
  3import importlib.resources
  4import inspect
  5import itertools
  6import json
  7from collections.abc import Callable
  8from pathlib import Path
  9
 10import pattern_lens
 11from pattern_lens.attn_figure_funcs import (
 12	_FIGURE_NAMES_KEY,
 13	ATTENTION_MATRIX_FIGURE_FUNCS,
 14)
 15
 16
 17def generate_prompts_jsonl(model_dir: Path) -> None:
 18	"""creates a `prompts.jsonl` file with all the prompts in the model directory
 19
 20	looks in all directories in `{model_dir}/prompts` for a `prompt.json` file
 21	"""
 22	prompts: list[dict] = list()
 23	for prompt_dir in (model_dir / "prompts").iterdir():
 24		prompt_file: Path = prompt_dir / "prompt.json"
 25		if prompt_file.exists():
 26			with open(prompt_file, "r") as f:
 27				prompt_data: dict = json.load(f)
 28				prompts.append(prompt_data)
 29
 30	with open(model_dir / "prompts.jsonl", "w") as f:
 31		for prompt in prompts:
 32			f.write(json.dumps(prompt))
 33			f.write("\n")
 34
 35
 36def generate_models_jsonl(path: Path) -> None:
 37	"""creates a `models.jsonl` file with all the models"""
 38	models: list[dict] = list()
 39	for model_dir in (path).iterdir():
 40		model_cfg_path: Path = model_dir / "model_cfg.json"
 41		if model_cfg_path.exists():
 42			with open(model_cfg_path, "r") as f:
 43				model_cfg: dict = json.load(f)
 44				models.append(model_cfg)
 45
 46	with open(path / "models.jsonl", "w") as f:
 47		for model in models:
 48			f.write(json.dumps(model))
 49			f.write("\n")
 50
 51
 52def get_func_metadata(func: Callable) -> list[dict[str, str | None]]:
 53	"""get metadata for a function
 54
 55	# Parameters:
 56	- `func : Callable` which has a `_FIGURE_NAMES_KEY` (by default `_figure_names`) attribute
 57
 58	# Returns:
 59
 60	`list[dict[str, str | None]]`
 61	each dictionary is for a function, containing:
 62
 63	- `name : str` : the name of the figure
 64	- `func_name : str`
 65		the name of the function. if not a multi-figure function, this is identical to `name`
 66		if it is a multi-figure function, then `name` is `{func_name}.{figure_name}`
 67	- `doc : str` : the docstring of the function
 68	- `figure_save_fmt : str | None` : the format of the figure that the function saves, using the `figure_save_fmt` attribute of the function. `None` if the attribute does not exist
 69	- `source : str | None` : the source file of the function
 70	- `code : str | None` : the source code of the function, split by line. `None` if the source file cannot be read
 71
 72	"""
 73	source_file: str | None = inspect.getsourcefile(func)
 74	func_name: str = getattr(func, "__name__", "<unknown>")
 75	output: dict[str, str | None] = dict(
 76		func_name=func_name,
 77		doc=getattr(func, "__doc__", None),
 78		figure_save_fmt=getattr(func, "figure_save_fmt", None),
 79		source=Path(source_file).as_posix() if source_file else None,
 80	)
 81
 82	try:
 83		output["code"] = inspect.getsource(func)
 84	except OSError:
 85		output["code"] = None
 86
 87	fig_names: list[str] | None = getattr(func, _FIGURE_NAMES_KEY, None)
 88	if fig_names:
 89		return [
 90			{
 91				"name": fig_name,
 92				**output,
 93			}
 94			for fig_name in fig_names
 95		]
 96	else:
 97		return [
 98			{
 99				"name": func_name,
100				**output,
101			},
102		]
103
104
105def generate_functions_jsonl(path: Path) -> None:
106	"unions all functions from `figures.jsonl` and `ATTENTION_MATRIX_FIGURE_FUNCS` into the file"
107	figures_file: Path = path / "figures.jsonl"
108	existing_figures: dict[str, dict] = dict()
109
110	if figures_file.exists():
111		with open(figures_file, "r") as f:
112			for line in f:
113				func_data: dict = json.loads(line)
114				existing_figures[func_data["name"]] = func_data
115
116	# Add any new functions from ALL_FUNCTIONS
117	new_functions_lst: list[dict] = list(
118		itertools.chain.from_iterable(
119			get_func_metadata(func) for func in ATTENTION_MATRIX_FIGURE_FUNCS
120		),
121	)
122	new_functions: dict[str, dict] = {func["name"]: func for func in new_functions_lst}
123
124	all_functions: list[dict] = list(
125		{
126			**existing_figures,
127			**new_functions,
128		}.values(),
129	)
130
131	with open(figures_file, "w") as f:
132		for func_meta in sorted(all_functions, key=lambda x: x["name"]):
133			json.dump(func_meta, f)
134			f.write("\n")
135
136
137def write_html_index(
138	path: Path,
139	cfg_single: dict | None = None,
140	cfg_patternlens: dict | None = None,
141) -> None:
142	"""writes index.html and single.html files to the path"""
143	# TYPING: error: Argument 1 to "Path" has incompatible type "Traversable"; expected "str | PathLike[str]"  [arg-type]
144	frontend_resources_path: Path = Path(
145		importlib.resources.files(pattern_lens).joinpath("frontend"),  # type: ignore[arg-type]
146	)
147
148	pl_index_html: str = (frontend_resources_path / "patternlens.html").read_text()
149	sg_html: str = (frontend_resources_path / "single.html").read_text()
150
151	# Write both html files
152	with open(path / "index.html", "w", encoding="utf-8") as f:
153		f.write(pl_index_html)
154
155	with open(path / "single.html", "w", encoding="utf-8") as f:
156		f.write(sg_html)
157
158	# write the config files if they are provided
159	if cfg_single is not None:
160		with open(path / "sg_cfg.json", "w", encoding="utf-8") as f:
161			json.dump(cfg_single, f, indent="\t")
162
163	if cfg_patternlens is not None:
164		with open(path / "pl_cfg.json", "w", encoding="utf-8") as f:
165			json.dump(cfg_patternlens, f, indent="\t")

def generate_prompts_jsonl(model_dir: pathlib._local.Path) -> None:
18def generate_prompts_jsonl(model_dir: Path) -> None:
19	"""creates a `prompts.jsonl` file with all the prompts in the model directory
20
21	looks in all directories in `{model_dir}/prompts` for a `prompt.json` file
22	"""
23	prompts: list[dict] = list()
24	for prompt_dir in (model_dir / "prompts").iterdir():
25		prompt_file: Path = prompt_dir / "prompt.json"
26		if prompt_file.exists():
27			with open(prompt_file, "r") as f:
28				prompt_data: dict = json.load(f)
29				prompts.append(prompt_data)
30
31	with open(model_dir / "prompts.jsonl", "w") as f:
32		for prompt in prompts:
33			f.write(json.dumps(prompt))
34			f.write("\n")

creates a prompts.jsonl file with all the prompts in the model directory

looks in all directories in {model_dir}/prompts for a prompt.json file

def generate_models_jsonl(path: pathlib._local.Path) -> None:
37def generate_models_jsonl(path: Path) -> None:
38	"""creates a `models.jsonl` file with all the models"""
39	models: list[dict] = list()
40	for model_dir in (path).iterdir():
41		model_cfg_path: Path = model_dir / "model_cfg.json"
42		if model_cfg_path.exists():
43			with open(model_cfg_path, "r") as f:
44				model_cfg: dict = json.load(f)
45				models.append(model_cfg)
46
47	with open(path / "models.jsonl", "w") as f:
48		for model in models:
49			f.write(json.dumps(model))
50			f.write("\n")

creates a models.jsonl file with all the models

def get_func_metadata(func: Callable) -> list[dict[str, str | None]]:
 53def get_func_metadata(func: Callable) -> list[dict[str, str | None]]:
 54	"""get metadata for a function
 55
 56	# Parameters:
 57	- `func : Callable` which has a `_FIGURE_NAMES_KEY` (by default `_figure_names`) attribute
 58
 59	# Returns:
 60
 61	`list[dict[str, str | None]]`
 62	each dictionary is for a function, containing:
 63
 64	- `name : str` : the name of the figure
 65	- `func_name : str`
 66		the name of the function. if not a multi-figure function, this is identical to `name`
 67		if it is a multi-figure function, then `name` is `{func_name}.{figure_name}`
 68	- `doc : str` : the docstring of the function
 69	- `figure_save_fmt : str | None` : the format of the figure that the function saves, using the `figure_save_fmt` attribute of the function. `None` if the attribute does not exist
 70	- `source : str | None` : the source file of the function
 71	- `code : str | None` : the source code of the function, split by line. `None` if the source file cannot be read
 72
 73	"""
 74	source_file: str | None = inspect.getsourcefile(func)
 75	func_name: str = getattr(func, "__name__", "<unknown>")
 76	output: dict[str, str | None] = dict(
 77		func_name=func_name,
 78		doc=getattr(func, "__doc__", None),
 79		figure_save_fmt=getattr(func, "figure_save_fmt", None),
 80		source=Path(source_file).as_posix() if source_file else None,
 81	)
 82
 83	try:
 84		output["code"] = inspect.getsource(func)
 85	except OSError:
 86		output["code"] = None
 87
 88	fig_names: list[str] | None = getattr(func, _FIGURE_NAMES_KEY, None)
 89	if fig_names:
 90		return [
 91			{
 92				"name": fig_name,
 93				**output,
 94			}
 95			for fig_name in fig_names
 96		]
 97	else:
 98		return [
 99			{
100				"name": func_name,
101				**output,
102			},
103		]

get metadata for a function

Parameters:

  • func : Callable which has a _FIGURE_NAMES_KEY (by default _figure_names) attribute

Returns:

list[dict[str, str | None]] each dictionary is for a function, containing:

  • name : str : the name of the figure
  • func_name : str the name of the function. if not a multi-figure function, this is identical to name if it is a multi-figure function, then name is {func_name}.{figure_name}
  • doc : str : the docstring of the function
  • figure_save_fmt : str | None : the format of the figure that the function saves, using the figure_save_fmt attribute of the function. None if the attribute does not exist
  • source : str | None : the source file of the function
  • code : str | None : the source code of the function, split by line. None if the source file cannot be read
def generate_functions_jsonl(path: pathlib._local.Path) -> None:
106def generate_functions_jsonl(path: Path) -> None:
107	"unions all functions from `figures.jsonl` and `ATTENTION_MATRIX_FIGURE_FUNCS` into the file"
108	figures_file: Path = path / "figures.jsonl"
109	existing_figures: dict[str, dict] = dict()
110
111	if figures_file.exists():
112		with open(figures_file, "r") as f:
113			for line in f:
114				func_data: dict = json.loads(line)
115				existing_figures[func_data["name"]] = func_data
116
117	# Add any new functions from ALL_FUNCTIONS
118	new_functions_lst: list[dict] = list(
119		itertools.chain.from_iterable(
120			get_func_metadata(func) for func in ATTENTION_MATRIX_FIGURE_FUNCS
121		),
122	)
123	new_functions: dict[str, dict] = {func["name"]: func for func in new_functions_lst}
124
125	all_functions: list[dict] = list(
126		{
127			**existing_figures,
128			**new_functions,
129		}.values(),
130	)
131
132	with open(figures_file, "w") as f:
133		for func_meta in sorted(all_functions, key=lambda x: x["name"]):
134			json.dump(func_meta, f)
135			f.write("\n")

unions all functions from figures.jsonl and ATTENTION_MATRIX_FIGURE_FUNCS into the file

def write_html_index( path: pathlib._local.Path, cfg_single: dict | None = None, cfg_patternlens: dict | None = None) -> None:
138def write_html_index(
139	path: Path,
140	cfg_single: dict | None = None,
141	cfg_patternlens: dict | None = None,
142) -> None:
143	"""writes index.html and single.html files to the path"""
144	# TYPING: error: Argument 1 to "Path" has incompatible type "Traversable"; expected "str | PathLike[str]"  [arg-type]
145	frontend_resources_path: Path = Path(
146		importlib.resources.files(pattern_lens).joinpath("frontend"),  # type: ignore[arg-type]
147	)
148
149	pl_index_html: str = (frontend_resources_path / "patternlens.html").read_text()
150	sg_html: str = (frontend_resources_path / "single.html").read_text()
151
152	# Write both html files
153	with open(path / "index.html", "w", encoding="utf-8") as f:
154		f.write(pl_index_html)
155
156	with open(path / "single.html", "w", encoding="utf-8") as f:
157		f.write(sg_html)
158
159	# write the config files if they are provided
160	if cfg_single is not None:
161		with open(path / "sg_cfg.json", "w", encoding="utf-8") as f:
162			json.dump(cfg_single, f, indent="\t")
163
164	if cfg_patternlens is not None:
165		with open(path / "pl_cfg.json", "w", encoding="utf-8") as f:
166			json.dump(cfg_patternlens, f, indent="\t")

writes index.html and single.html files to the path