pattern_lens.indexes
writes indexes to the model directory for the frontend to use or for record keeping
1"""writes indexes to the model directory for the frontend to use or for record keeping""" 2 3import importlib.metadata 4import importlib.resources 5import inspect 6import itertools 7import json 8from collections.abc import Callable 9from pathlib import Path 10from typing import Literal 11 12import pattern_lens 13from pattern_lens.attn_figure_funcs import ( 14 _FIGURE_NAMES_KEY, 15 ATTENTION_MATRIX_FIGURE_FUNCS, 16) 17 18 19def generate_prompts_jsonl(model_dir: Path) -> None: 20 """creates a `prompts.jsonl` file with all the prompts in the model directory 21 22 looks in all directories in `{model_dir}/prompts` for a `prompt.json` file 23 """ 24 prompts: list[dict] = list() 25 for prompt_dir in (model_dir / "prompts").iterdir(): 26 prompt_file: Path = prompt_dir / "prompt.json" 27 if prompt_file.exists(): 28 with open(prompt_file, "r") as f: 29 prompt_data: dict = json.load(f) 30 prompts.append(prompt_data) 31 32 with open(model_dir / "prompts.jsonl", "w") as f: 33 for prompt in prompts: 34 f.write(json.dumps(prompt)) 35 f.write("\n") 36 37 38def generate_models_jsonl(path: Path) -> None: 39 """creates a `models.jsonl` file with all the models""" 40 models: list[dict] = list() 41 for model_dir in (path).iterdir(): 42 model_cfg_path: Path = model_dir / "model_cfg.json" 43 if model_cfg_path.exists(): 44 with open(model_cfg_path, "r") as f: 45 model_cfg: dict = json.load(f) 46 models.append(model_cfg) 47 48 with open(path / "models.jsonl", "w") as f: 49 for model in models: 50 f.write(json.dumps(model)) 51 f.write("\n") 52 53 54def get_func_metadata(func: Callable) -> list[dict[str, str | None]]: 55 """get metadata for a function 56 57 # Parameters: 58 - `func : Callable` which has a `_FIGURE_NAMES_KEY` (by default `_figure_names`) attribute 59 60 # Returns: 61 62 `list[dict[str, str | None]]` 63 each dictionary is for a function, containing: 64 65 - `name : str` : the name of the figure 66 - `func_name : str` 67 the name of the function. if not a multi-figure function, this is identical to `name` 68 if it is a multi-figure function, then `name` is `{func_name}.{figure_name}` 69 - `doc : str` : the docstring of the function 70 - `figure_save_fmt : str | None` : the format of the figure that the function saves, using the `figure_save_fmt` attribute of the function. `None` if the attribute does not exist 71 - `source : str | None` : the source file of the function 72 - `code : str | None` : the source code of the function, split by line. `None` if the source file cannot be read 73 74 """ 75 source_file: str | None = inspect.getsourcefile(func) 76 output: dict[str, str | None] = dict( 77 func_name=func.__name__, 78 doc=func.__doc__, 79 figure_save_fmt=getattr(func, "figure_save_fmt", None), 80 source=Path(source_file).as_posix() if source_file else None, 81 ) 82 83 try: 84 output["code"] = inspect.getsource(func) 85 except OSError: 86 output["code"] = None 87 88 fig_names: list[str] | None = getattr(func, _FIGURE_NAMES_KEY, None) 89 if fig_names: 90 return [ 91 { 92 "name": func_name, 93 **output, 94 } 95 for func_name in fig_names 96 ] 97 else: 98 return [ 99 { 100 "name": func.__name__, 101 **output, 102 }, 103 ] 104 105 106def generate_functions_jsonl(path: Path) -> None: 107 "unions all functions from `figures.jsonl` and `ATTENTION_MATRIX_FIGURE_FUNCS` into the file" 108 figures_file: Path = path / "figures.jsonl" 109 existing_figures: dict[str, dict] = dict() 110 111 if figures_file.exists(): 112 with open(figures_file, "r") as f: 113 for line in f: 114 func_data: dict = json.loads(line) 115 existing_figures[func_data["name"]] = func_data 116 117 # Add any new functions from ALL_FUNCTIONS 118 new_functions_lst: list[dict] = list( 119 itertools.chain.from_iterable( 120 get_func_metadata(func) for func in ATTENTION_MATRIX_FIGURE_FUNCS 121 ), 122 ) 123 new_functions: dict[str, dict] = {func["name"]: func for func in new_functions_lst} 124 125 all_functions: list[dict] = list( 126 { 127 **existing_figures, 128 **new_functions, 129 }.values(), 130 ) 131 132 with open(figures_file, "w") as f: 133 for func_meta in sorted(all_functions, key=lambda x: x["name"]): 134 json.dump(func_meta, f) 135 f.write("\n") 136 137 138def inline_assets( 139 html: str, 140 assets: list[tuple[Literal["script", "style"], str]], 141 base_path: Path, 142) -> str: 143 """Inline specified local CSS/JS files into an HTML document. 144 145 Each entry in `assets` should be a tuple like `("script", "app.js")` or `("style", "style.css")`. 146 147 # Parameters: 148 - `html : str` 149 input HTML content. 150 - `assets : list[tuple[Literal["script", "style"], str]]` 151 List of (tag_type, filename) tuples to inline. 152 153 # Returns: 154 `str` : Modified HTML content with inlined assets. 155 """ 156 for tag_type, filename in assets: 157 if tag_type not in ("style", "script"): 158 err_msg: str = f"Unsupported tag type: {tag_type}" 159 raise ValueError(err_msg) 160 161 # Dynamically create the pattern for the given tag and filename 162 pattern: str = rf'<{tag_type} src="{filename}"></{tag_type}>' 163 # assert it's in the text exactly once 164 assert html.count(pattern) == 1, ( 165 f"Pattern {pattern} should be in the html exactly once, found {html.count(pattern) = }" 166 ) 167 # read the content and create the replacement 168 content: str = (base_path / filename).read_text() 169 replacement: str = f"<{tag_type}>\n{content}\n</{tag_type}>" 170 # perform the replacement 171 html = html.replace(pattern, replacement) 172 173 return html 174 175 176def write_html_index(path: Path) -> None: 177 """writes an index.html file to the path""" 178 # TYPING: error: Argument 1 to "Path" has incompatible type "Traversable"; expected "str | PathLike[str]" [arg-type] 179 frontend_resources_path: Path = Path( 180 importlib.resources.files(pattern_lens).joinpath("frontend"), # type: ignore[arg-type] 181 ) 182 html_index: str = (frontend_resources_path / "index.template.html").read_text( 183 encoding="utf-8", 184 ) 185 # inline assets 186 html_index = inline_assets( 187 html_index, 188 [ 189 ("style", "style.css"), 190 ("script", "util.js"), 191 ("script", "app.js"), 192 ], 193 base_path=frontend_resources_path, 194 ) 195 196 # add version 197 pattern_lens_version: str = importlib.metadata.version("pattern-lens") 198 html_index = html_index.replace("$$PATTERN_LENS_VERSION$$", pattern_lens_version) 199 # write the index.html file 200 with open(path / "index.html", "w", encoding="utf-8") as f: 201 f.write(html_index)
def
generate_prompts_jsonl(model_dir: pathlib.Path) -> None:
20def generate_prompts_jsonl(model_dir: Path) -> None: 21 """creates a `prompts.jsonl` file with all the prompts in the model directory 22 23 looks in all directories in `{model_dir}/prompts` for a `prompt.json` file 24 """ 25 prompts: list[dict] = list() 26 for prompt_dir in (model_dir / "prompts").iterdir(): 27 prompt_file: Path = prompt_dir / "prompt.json" 28 if prompt_file.exists(): 29 with open(prompt_file, "r") as f: 30 prompt_data: dict = json.load(f) 31 prompts.append(prompt_data) 32 33 with open(model_dir / "prompts.jsonl", "w") as f: 34 for prompt in prompts: 35 f.write(json.dumps(prompt)) 36 f.write("\n")
creates a prompts.jsonl file with all the prompts in the model directory
looks in all directories in {model_dir}/prompts for a prompt.json file
def
generate_models_jsonl(path: pathlib.Path) -> None:
39def generate_models_jsonl(path: Path) -> None: 40 """creates a `models.jsonl` file with all the models""" 41 models: list[dict] = list() 42 for model_dir in (path).iterdir(): 43 model_cfg_path: Path = model_dir / "model_cfg.json" 44 if model_cfg_path.exists(): 45 with open(model_cfg_path, "r") as f: 46 model_cfg: dict = json.load(f) 47 models.append(model_cfg) 48 49 with open(path / "models.jsonl", "w") as f: 50 for model in models: 51 f.write(json.dumps(model)) 52 f.write("\n")
creates a models.jsonl file with all the models
def
get_func_metadata(func: Callable) -> list[dict[str, str | None]]:
55def get_func_metadata(func: Callable) -> list[dict[str, str | None]]: 56 """get metadata for a function 57 58 # Parameters: 59 - `func : Callable` which has a `_FIGURE_NAMES_KEY` (by default `_figure_names`) attribute 60 61 # Returns: 62 63 `list[dict[str, str | None]]` 64 each dictionary is for a function, containing: 65 66 - `name : str` : the name of the figure 67 - `func_name : str` 68 the name of the function. if not a multi-figure function, this is identical to `name` 69 if it is a multi-figure function, then `name` is `{func_name}.{figure_name}` 70 - `doc : str` : the docstring of the function 71 - `figure_save_fmt : str | None` : the format of the figure that the function saves, using the `figure_save_fmt` attribute of the function. `None` if the attribute does not exist 72 - `source : str | None` : the source file of the function 73 - `code : str | None` : the source code of the function, split by line. `None` if the source file cannot be read 74 75 """ 76 source_file: str | None = inspect.getsourcefile(func) 77 output: dict[str, str | None] = dict( 78 func_name=func.__name__, 79 doc=func.__doc__, 80 figure_save_fmt=getattr(func, "figure_save_fmt", None), 81 source=Path(source_file).as_posix() if source_file else None, 82 ) 83 84 try: 85 output["code"] = inspect.getsource(func) 86 except OSError: 87 output["code"] = None 88 89 fig_names: list[str] | None = getattr(func, _FIGURE_NAMES_KEY, None) 90 if fig_names: 91 return [ 92 { 93 "name": func_name, 94 **output, 95 } 96 for func_name in fig_names 97 ] 98 else: 99 return [ 100 { 101 "name": func.__name__, 102 **output, 103 }, 104 ]
get metadata for a function
Parameters:
func : Callablewhich has a_FIGURE_NAMES_KEY(by default_figure_names) attribute
Returns:
list[dict[str, str | None]]
each dictionary is for a function, containing:
name : str: the name of the figurefunc_name : strthe name of the function. if not a multi-figure function, this is identical tonameif it is a multi-figure function, thennameis{func_name}.{figure_name}doc : str: the docstring of the functionfigure_save_fmt : str | None: the format of the figure that the function saves, using thefigure_save_fmtattribute of the function.Noneif the attribute does not existsource : str | None: the source file of the functioncode : str | None: the source code of the function, split by line.Noneif the source file cannot be read
def
generate_functions_jsonl(path: pathlib.Path) -> None:
107def generate_functions_jsonl(path: Path) -> None: 108 "unions all functions from `figures.jsonl` and `ATTENTION_MATRIX_FIGURE_FUNCS` into the file" 109 figures_file: Path = path / "figures.jsonl" 110 existing_figures: dict[str, dict] = dict() 111 112 if figures_file.exists(): 113 with open(figures_file, "r") as f: 114 for line in f: 115 func_data: dict = json.loads(line) 116 existing_figures[func_data["name"]] = func_data 117 118 # Add any new functions from ALL_FUNCTIONS 119 new_functions_lst: list[dict] = list( 120 itertools.chain.from_iterable( 121 get_func_metadata(func) for func in ATTENTION_MATRIX_FIGURE_FUNCS 122 ), 123 ) 124 new_functions: dict[str, dict] = {func["name"]: func for func in new_functions_lst} 125 126 all_functions: list[dict] = list( 127 { 128 **existing_figures, 129 **new_functions, 130 }.values(), 131 ) 132 133 with open(figures_file, "w") as f: 134 for func_meta in sorted(all_functions, key=lambda x: x["name"]): 135 json.dump(func_meta, f) 136 f.write("\n")
unions all functions from figures.jsonl and ATTENTION_MATRIX_FIGURE_FUNCS into the file
def
inline_assets( html: str, assets: list[tuple[typing.Literal['script', 'style'], str]], base_path: pathlib.Path) -> str:
139def inline_assets( 140 html: str, 141 assets: list[tuple[Literal["script", "style"], str]], 142 base_path: Path, 143) -> str: 144 """Inline specified local CSS/JS files into an HTML document. 145 146 Each entry in `assets` should be a tuple like `("script", "app.js")` or `("style", "style.css")`. 147 148 # Parameters: 149 - `html : str` 150 input HTML content. 151 - `assets : list[tuple[Literal["script", "style"], str]]` 152 List of (tag_type, filename) tuples to inline. 153 154 # Returns: 155 `str` : Modified HTML content with inlined assets. 156 """ 157 for tag_type, filename in assets: 158 if tag_type not in ("style", "script"): 159 err_msg: str = f"Unsupported tag type: {tag_type}" 160 raise ValueError(err_msg) 161 162 # Dynamically create the pattern for the given tag and filename 163 pattern: str = rf'<{tag_type} src="{filename}"></{tag_type}>' 164 # assert it's in the text exactly once 165 assert html.count(pattern) == 1, ( 166 f"Pattern {pattern} should be in the html exactly once, found {html.count(pattern) = }" 167 ) 168 # read the content and create the replacement 169 content: str = (base_path / filename).read_text() 170 replacement: str = f"<{tag_type}>\n{content}\n</{tag_type}>" 171 # perform the replacement 172 html = html.replace(pattern, replacement) 173 174 return html
Inline specified local CSS/JS files into an HTML document.
Each entry in assets should be a tuple like ("script", "app.js") or ("style", "style.css").
Parameters:
html : strinput HTML content.assets : list[tuple[Literal["script", "style"], str]]List of (tag_type, filename) tuples to inline.
Returns:
str : Modified HTML content with inlined assets.
def
write_html_index(path: pathlib.Path) -> None:
177def write_html_index(path: Path) -> None: 178 """writes an index.html file to the path""" 179 # TYPING: error: Argument 1 to "Path" has incompatible type "Traversable"; expected "str | PathLike[str]" [arg-type] 180 frontend_resources_path: Path = Path( 181 importlib.resources.files(pattern_lens).joinpath("frontend"), # type: ignore[arg-type] 182 ) 183 html_index: str = (frontend_resources_path / "index.template.html").read_text( 184 encoding="utf-8", 185 ) 186 # inline assets 187 html_index = inline_assets( 188 html_index, 189 [ 190 ("style", "style.css"), 191 ("script", "util.js"), 192 ("script", "app.js"), 193 ], 194 base_path=frontend_resources_path, 195 ) 196 197 # add version 198 pattern_lens_version: str = importlib.metadata.version("pattern-lens") 199 html_index = html_index.replace("$$PATTERN_LENS_VERSION$$", pattern_lens_version) 200 # write the index.html file 201 with open(path / "index.html", "w", encoding="utf-8") as f: 202 f.write(html_index)
writes an index.html file to the path