trnbl.loggers.local
1from trnbl.loggers.local.locallogger import FilePaths, LocalLogger 2 3__all__ = [ 4 "FilePaths", 5 "LocalLogger", 6 # submodules 7 "build_dist", 8 "html_frontend", 9 "locallogger", 10 "start_server", 11]
class
FilePaths:
14class FilePaths: 15 # configs and metadata 16 TRAIN_CONFIG: Path = Path("config.json") 17 LOGGER_META: Path = Path("meta.json") 18 # configs and metadata in yaml format for easier human readability 19 TRAIN_CONFIG_YML: Path = Path("config.yml") 20 LOGGER_META_YML: Path = Path("meta.yml") 21 22 # logs, metrics, and artifacts 23 ARTIFACTS: Path = Path("artifacts.jsonl") 24 METRICS: Path = Path("metrics.jsonl") 25 LOG: Path = Path("log.jsonl") 26 # keeps error message if an error occurs 27 ERROR_FILE: Path = Path("ERROR.txt") 28 29 # manifest is shared between all runs in a project 30 # relative to project path instead of run path 31 RUNS_MANIFEST: Path = Path("runs.jsonl") 32 # directory in project path for runs 33 # relative to project path instead of run path 34 RUNS_DIR: Path = Path("runs") 35 36 # frontend files 37 HTML_INDEX: Path = Path("index.html") 38 START_SERVER: Path = Path("start_server.py")
41class LocalLogger(TrainingLoggerBase): 42 def __init__( 43 self, 44 project: str, 45 metric_names: list[str], 46 train_config: dict, 47 group: str = "", 48 base_path: str | Path = Path("trnbl-logs"), 49 memusage_as_metrics: bool = True, 50 console_msg_prefix: str = "# ", 51 ): 52 # set up lists 53 self.log_list: list[dict] = list() 54 self.metrics_list: list[dict] = list() 55 self.artifacts_list: list[dict] = list() 56 57 # copy kwargs 58 self.train_config: dict = train_config 59 self.project: str = project 60 self.group: str = group 61 self.group_str: str = self.group + ("-" if group and group[-1] != "-" else "") 62 self.base_path: Path = Path(base_path) 63 self.console_msg_prefix: str = console_msg_prefix 64 65 # set up id 66 self._syllabic_id: str = rand_syllabic_string() 67 self.run_init_timestamp: datetime.datetime = datetime.datetime.now() 68 self.run_id: str = self._get_run_id() 69 70 # set up paths 71 self.project_path: Path = self.base_path / project 72 self._run_path: Path = self.project_path / FilePaths.RUNS_DIR / self.run_id 73 # make sure the run path doesn't already exist 74 assert not self._run_path.exists() 75 self._run_path.mkdir(parents=True, exist_ok=True) 76 77 # set up files and objects for logs, artifacts, and metrics 78 # ---------------------------------------- 79 80 self.log_file: io.TextIOWrapper = open(self.run_path / FilePaths.LOG, "a") 81 82 self.metrics_file: io.TextIOWrapper = open( 83 self.run_path / FilePaths.METRICS, "a" 84 ) 85 86 self.artifacts_file: io.TextIOWrapper = open( 87 self.run_path / FilePaths.ARTIFACTS, "a" 88 ) 89 90 # metric names (getting mem usage might cause problems if we have an error) 91 self.metric_names: list[str] = metric_names 92 if memusage_as_metrics: 93 self.metric_names += list(self.get_mem_usage().keys()) 94 95 # put everything in a config 96 self.logger_meta: dict = dict( 97 run_id=self.run_id, 98 run_path=self.run_path.as_posix(), 99 syllabic_id=self.syllabic_id, 100 group=self.group, 101 project=self.project, 102 run_init_timestamp=str(self.run_init_timestamp.isoformat()), 103 metric_names=metric_names, 104 train_config=train_config, # TODO: this duplicates the contents of FilePaths.TRAIN_CONFIG, is that ok? 105 ) 106 107 # write to the project jsonl 108 with open(self.project_path / FilePaths.RUNS_MANIFEST, "a") as f: 109 json.dump(self.logger_meta, f) 110 f.write("\n") 111 112 # write the index.html and start_server.py files 113 # ---------------------------------------- 114 from trnbl.loggers.local.html_frontend import get_html_frontend 115 116 with open(self.project_path / FilePaths.HTML_INDEX, "w") as f: 117 f.write(get_html_frontend()) 118 119 import trnbl.loggers.local.start_server as start_server_module 120 121 with open(self.project_path / FilePaths.START_SERVER, "w") as f: 122 f.write(inspect.getsource(start_server_module)) 123 124 # write init files 125 # ---------------------------------------- 126 127 # logger metadata 128 with open(self.run_path / FilePaths.LOGGER_META, "w") as f: 129 json.dump(self.logger_meta, f, indent="\t") 130 131 with open(self.run_path / FilePaths.LOGGER_META_YML, "w") as f: 132 yaml.dump(self.logger_meta, f) 133 134 # training/model/dataset config 135 with open(self.run_path / FilePaths.TRAIN_CONFIG, "w") as f: 136 json.dump(train_config, f, indent="\t") 137 138 with open(self.run_path / FilePaths.TRAIN_CONFIG_YML, "w") as f: 139 yaml.dump(train_config, f) 140 141 self.message(f"starting logger with id {self.run_id}") 142 143 @property 144 def _run_hash(self) -> str: 145 return hashlib.md5(str(self.train_config).encode()).hexdigest() 146 147 @property 148 def syllabic_id(self) -> str: 149 return self._syllabic_id 150 151 def _get_run_id(self) -> str: 152 return f"{self.group_str}h{self._run_hash[:5]}-{self.run_init_timestamp.strftime('%y%m%d_%H%M')}-{self.syllabic_id}" 153 154 def get_timestamp(self) -> str: 155 return datetime.datetime.now().isoformat() 156 157 def _log(self, message: str, **kwargs) -> None: 158 """(internal) log a progress message""" 159 # TODO: also log messages via regular logger to stdout 160 msg_dict: dict = dict( 161 message=message, 162 timestamp=self.get_timestamp(), 163 ) 164 if kwargs: 165 msg_dict.update(kwargs) 166 167 self.log_list.append(msg_dict) 168 self.log_file.write(json.dumps(msg_dict) + "\n") 169 self.log_file.flush() 170 171 def debug(self, message: str, **kwargs) -> None: 172 """log a debug message""" 173 self._log(message, __dbg__=True, **kwargs) 174 175 def message(self, message: str, **kwargs) -> None: 176 """log a progress message""" 177 # TODO: also log messages via regular logger to stdout 178 self._log(message, **kwargs) 179 print(self.console_msg_prefix + message) 180 181 def warning(self, message: str, **kwargs) -> None: 182 """log a warning message""" 183 self.message( 184 f"WARNING: {message}", 185 __warning__=True, 186 **kwargs, 187 ) 188 189 def error(self, message: str, **kwargs) -> None: 190 """log an error message""" 191 self.message( 192 f"ERROR: {message}", 193 __error__=True, 194 **kwargs, 195 ) 196 with open(self.run_path / FilePaths.ERROR_FILE, "a") as f: 197 f.write("=" * 80 + "\n") 198 f.write("exception at " + self.get_timestamp() + "\n") 199 f.write(message) 200 f.write("\n") 201 f.flush() 202 203 def metrics(self, data: dict[str, Any]) -> None: 204 """log a dictionary of metrics""" 205 data["timestamp"] = self.get_timestamp() 206 207 self.metrics_list.append(data) 208 self.metrics_file.write(json.dumps(data) + "\n") 209 210 def artifact( 211 self, 212 path: Path, 213 type: str, 214 aliases: list[str] | None = None, 215 metadata: dict | None = None, 216 ) -> None: 217 """log an artifact from a file""" 218 artifact_dict: dict = dict( 219 timestamp=self.get_timestamp(), 220 path=path.as_posix(), 221 type=type, 222 aliases=aliases, 223 metadata=metadata if metadata else {}, 224 ) 225 226 self.artifacts_list.append(artifact_dict) 227 self.artifacts_file.write(json.dumps(artifact_dict) + "\n") 228 229 @property 230 def url(self) -> str: 231 """Get the URL for the current logging run""" 232 return self.run_path.as_posix() 233 234 @property 235 def run_path(self) -> Path: 236 """Get the path to the current logging run""" 237 return self._run_path 238 239 def flush(self) -> None: 240 self.log_file.flush() 241 self.metrics_file.flush() 242 self.artifacts_file.flush() 243 244 def finish(self) -> None: 245 self.message("closing logger") 246 247 self.log_file.flush() 248 self.log_file.close() 249 250 self.metrics_file.flush() 251 self.metrics_file.close() 252 253 self.artifacts_file.flush() 254 self.artifacts_file.close()
Base class for training loggers
LocalLogger( project: str, metric_names: list[str], train_config: dict, group: str = '', base_path: str | pathlib.Path = WindowsPath('trnbl-logs'), memusage_as_metrics: bool = True, console_msg_prefix: str = '# ')
42 def __init__( 43 self, 44 project: str, 45 metric_names: list[str], 46 train_config: dict, 47 group: str = "", 48 base_path: str | Path = Path("trnbl-logs"), 49 memusage_as_metrics: bool = True, 50 console_msg_prefix: str = "# ", 51 ): 52 # set up lists 53 self.log_list: list[dict] = list() 54 self.metrics_list: list[dict] = list() 55 self.artifacts_list: list[dict] = list() 56 57 # copy kwargs 58 self.train_config: dict = train_config 59 self.project: str = project 60 self.group: str = group 61 self.group_str: str = self.group + ("-" if group and group[-1] != "-" else "") 62 self.base_path: Path = Path(base_path) 63 self.console_msg_prefix: str = console_msg_prefix 64 65 # set up id 66 self._syllabic_id: str = rand_syllabic_string() 67 self.run_init_timestamp: datetime.datetime = datetime.datetime.now() 68 self.run_id: str = self._get_run_id() 69 70 # set up paths 71 self.project_path: Path = self.base_path / project 72 self._run_path: Path = self.project_path / FilePaths.RUNS_DIR / self.run_id 73 # make sure the run path doesn't already exist 74 assert not self._run_path.exists() 75 self._run_path.mkdir(parents=True, exist_ok=True) 76 77 # set up files and objects for logs, artifacts, and metrics 78 # ---------------------------------------- 79 80 self.log_file: io.TextIOWrapper = open(self.run_path / FilePaths.LOG, "a") 81 82 self.metrics_file: io.TextIOWrapper = open( 83 self.run_path / FilePaths.METRICS, "a" 84 ) 85 86 self.artifacts_file: io.TextIOWrapper = open( 87 self.run_path / FilePaths.ARTIFACTS, "a" 88 ) 89 90 # metric names (getting mem usage might cause problems if we have an error) 91 self.metric_names: list[str] = metric_names 92 if memusage_as_metrics: 93 self.metric_names += list(self.get_mem_usage().keys()) 94 95 # put everything in a config 96 self.logger_meta: dict = dict( 97 run_id=self.run_id, 98 run_path=self.run_path.as_posix(), 99 syllabic_id=self.syllabic_id, 100 group=self.group, 101 project=self.project, 102 run_init_timestamp=str(self.run_init_timestamp.isoformat()), 103 metric_names=metric_names, 104 train_config=train_config, # TODO: this duplicates the contents of FilePaths.TRAIN_CONFIG, is that ok? 105 ) 106 107 # write to the project jsonl 108 with open(self.project_path / FilePaths.RUNS_MANIFEST, "a") as f: 109 json.dump(self.logger_meta, f) 110 f.write("\n") 111 112 # write the index.html and start_server.py files 113 # ---------------------------------------- 114 from trnbl.loggers.local.html_frontend import get_html_frontend 115 116 with open(self.project_path / FilePaths.HTML_INDEX, "w") as f: 117 f.write(get_html_frontend()) 118 119 import trnbl.loggers.local.start_server as start_server_module 120 121 with open(self.project_path / FilePaths.START_SERVER, "w") as f: 122 f.write(inspect.getsource(start_server_module)) 123 124 # write init files 125 # ---------------------------------------- 126 127 # logger metadata 128 with open(self.run_path / FilePaths.LOGGER_META, "w") as f: 129 json.dump(self.logger_meta, f, indent="\t") 130 131 with open(self.run_path / FilePaths.LOGGER_META_YML, "w") as f: 132 yaml.dump(self.logger_meta, f) 133 134 # training/model/dataset config 135 with open(self.run_path / FilePaths.TRAIN_CONFIG, "w") as f: 136 json.dump(train_config, f, indent="\t") 137 138 with open(self.run_path / FilePaths.TRAIN_CONFIG_YML, "w") as f: 139 yaml.dump(train_config, f) 140 141 self.message(f"starting logger with id {self.run_id}")
def
debug(self, message: str, **kwargs) -> None:
171 def debug(self, message: str, **kwargs) -> None: 172 """log a debug message""" 173 self._log(message, __dbg__=True, **kwargs)
log a debug message
def
message(self, message: str, **kwargs) -> None:
175 def message(self, message: str, **kwargs) -> None: 176 """log a progress message""" 177 # TODO: also log messages via regular logger to stdout 178 self._log(message, **kwargs) 179 print(self.console_msg_prefix + message)
log a progress message
def
warning(self, message: str, **kwargs) -> None:
181 def warning(self, message: str, **kwargs) -> None: 182 """log a warning message""" 183 self.message( 184 f"WARNING: {message}", 185 __warning__=True, 186 **kwargs, 187 )
log a warning message
def
error(self, message: str, **kwargs) -> None:
189 def error(self, message: str, **kwargs) -> None: 190 """log an error message""" 191 self.message( 192 f"ERROR: {message}", 193 __error__=True, 194 **kwargs, 195 ) 196 with open(self.run_path / FilePaths.ERROR_FILE, "a") as f: 197 f.write("=" * 80 + "\n") 198 f.write("exception at " + self.get_timestamp() + "\n") 199 f.write(message) 200 f.write("\n") 201 f.flush()
log an error message
def
metrics(self, data: dict[str, typing.Any]) -> None:
203 def metrics(self, data: dict[str, Any]) -> None: 204 """log a dictionary of metrics""" 205 data["timestamp"] = self.get_timestamp() 206 207 self.metrics_list.append(data) 208 self.metrics_file.write(json.dumps(data) + "\n")
log a dictionary of metrics
def
artifact( self, path: pathlib.Path, type: str, aliases: list[str] | None = None, metadata: dict | None = None) -> None:
210 def artifact( 211 self, 212 path: Path, 213 type: str, 214 aliases: list[str] | None = None, 215 metadata: dict | None = None, 216 ) -> None: 217 """log an artifact from a file""" 218 artifact_dict: dict = dict( 219 timestamp=self.get_timestamp(), 220 path=path.as_posix(), 221 type=type, 222 aliases=aliases, 223 metadata=metadata if metadata else {}, 224 ) 225 226 self.artifacts_list.append(artifact_dict) 227 self.artifacts_file.write(json.dumps(artifact_dict) + "\n")
log an artifact from a file
url: str
229 @property 230 def url(self) -> str: 231 """Get the URL for the current logging run""" 232 return self.run_path.as_posix()
Get the URL for the current logging run
run_path: pathlib.Path
234 @property 235 def run_path(self) -> Path: 236 """Get the path to the current logging run""" 237 return self._run_path
Get the path to the current logging run
def
flush(self) -> None:
239 def flush(self) -> None: 240 self.log_file.flush() 241 self.metrics_file.flush() 242 self.artifacts_file.flush()
Flush the logger
def
finish(self) -> None:
244 def finish(self) -> None: 245 self.message("closing logger") 246 247 self.log_file.flush() 248 self.log_file.close() 249 250 self.metrics_file.flush() 251 self.metrics_file.close() 252 253 self.artifacts_file.flush() 254 self.artifacts_file.close()
Finish logging