docs for muutils v0.9.1
View Source on GitHub

muutils.logger

(deprecated) experimenting with logging utilities


 1"""(deprecated) experimenting with logging utilities"""
 2
 3import warnings
 4
 5from muutils.logger.logger import Logger
 6from muutils.logger.loggingstream import LoggingStream
 7from muutils.logger.simplelogger import SimpleLogger
 8from muutils.logger.timing import TimerContext
 9
10warnings.warn(
11    DeprecationWarning(
12        "muutils.logger is no longer maintained. Consider using [trnbl](https://github.com/mivanit/trnbl) instead."
13    )
14)
15
16__all__ = [
17    # submodules
18    "exception_context",
19    "headerfuncs",
20    "log_util",
21    "logger",
22    "loggingstream",
23    "simplelogger",
24    "timing",
25    # imports
26    "Logger",
27    "LoggingStream",
28    "SimpleLogger",
29    "TimerContext",
30]

 41class Logger(SimpleLogger):
 42    """logger with more features, including log levels and streams
 43
 44    # Parameters:
 45            - `log_path : str | None`
 46            default log file path
 47            (defaults to `None`)
 48            - `log_file : AnyIO | None`
 49            default log io, should have a `.write()` method (pass only this or `log_path`, not both)
 50            (defaults to `None`)
 51            - `timestamp : bool`
 52            whether to add timestamps to every log message (under the `_timestamp` key)
 53            (defaults to `True`)
 54            - `default_level : int`
 55            default log level for streams/messages that don't specify a level
 56            (defaults to `0`)
 57            - `console_print_threshold : int`
 58            log level at which to print to the console, anything greater will not be printed unless overridden by `console_print`
 59            (defaults to `50`)
 60            - `level_header : HeaderFunction`
 61            function for formatting log messages when printing to console
 62            (defaults to `HEADER_FUNCTIONS["md"]`)
 63    - `keep_last_msg_time : bool`
 64            whether to keep the last message time
 65            (defaults to `True`)
 66
 67
 68    # Raises:
 69            - `ValueError` : _description_
 70    """
 71
 72    def __init__(
 73        self,
 74        log_path: str | None = None,
 75        log_file: AnyIO | None = None,
 76        default_level: int = 0,
 77        console_print_threshold: int = 50,
 78        level_header: HeaderFunction = HEADER_FUNCTIONS["md"],
 79        streams: dict[str | None, LoggingStream] | Sequence[LoggingStream] = (),
 80        keep_last_msg_time: bool = True,
 81        # junk args
 82        timestamp: bool = True,
 83        **kwargs: Any,
 84    ) -> None:
 85        # junk arg checking
 86        # ==================================================
 87        if len(kwargs) > 0:
 88            raise ValueError(f"unrecognized kwargs: {kwargs}")
 89
 90        if not timestamp:
 91            raise ValueError(
 92                "timestamp must be True -- why would you not want timestamps?"
 93            )
 94
 95        # timing
 96        # ==================================================
 97        # timing compares
 98        self._keep_last_msg_time: bool = keep_last_msg_time
 99        # TODO: handle per stream?
100        self._last_msg_time: float | None = time.time()
101
102        # basic setup
103        # ==================================================
104        # init BaseLogger
105        super().__init__(log_file=log_file, log_path=log_path, timestamp=timestamp)
106
107        # level-related
108        self._console_print_threshold: int = console_print_threshold
109        self._default_level: int = default_level
110
111        # set up streams
112        self._streams: dict[str | None, LoggingStream] = (
113            streams if isinstance(streams, dict) else {s.name: s for s in streams}  # ty: ignore[invalid-assignment]
114        )
115        # default error stream
116        if "error" not in self._streams:
117            self._streams["error"] = LoggingStream(
118                "error",
119                aliases={
120                    "err",
121                    "except",
122                    "Exception",
123                    "exception",
124                    "exceptions",
125                    "errors",
126                },
127            )
128
129        # check alias duplicates
130        alias_set: set[str | None] = set()
131        for stream in self._streams.values():
132            for alias in stream.aliases:
133                if alias in alias_set:
134                    raise ValueError(f"alias {alias} is already in use")
135                alias_set.add(alias)
136
137        # add aliases
138        for stream in tuple(self._streams.values()):
139            for alias in stream.aliases:
140                if alias not in self._streams:
141                    self._streams[alias] = stream
142
143        # print formatting
144        self._level_header: HeaderFunction = level_header
145
146        print({k: str(v) for k, v in self._streams.items()})
147
148    def _exception_context(
149        self,
150        stream: str = "error",
151        # level: int = -256,
152        # **kwargs,
153    ) -> ExceptionContext:
154        import sys
155
156        s: LoggingStream = self._streams[stream]
157        handler = s.handler if s.handler is not None else sys.stderr
158        return ExceptionContext(stream=handler)
159
160    def log(
161        self,
162        msg: JSONitem = None,
163        *,
164        lvl: int | None = None,
165        stream: str | None = None,
166        console_print: bool = False,
167        extra_indent: str = "",
168        **kwargs: Any,
169    ) -> None:
170        """logging function
171
172        ### Parameters:
173         - `msg : JSONitem`
174           message (usually string or dict) to be logged
175         - `lvl : int | None`
176           level of message (lower levels are more important)
177           (defaults to `None`)
178         - `console_print : bool`
179           override `console_print_threshold` setting
180           (defaults to `False`)
181         - `stream : str | None`
182           whether to log to a stream (defaults to `None`), which logs to the default `None` stream
183           (defaults to `None`)
184        """
185
186        # add to known stream names if not present
187        if stream not in self._streams:
188            self._streams[stream] = LoggingStream(stream)
189
190        # set default level to either global or stream-specific default level
191        # ========================================
192        if lvl is None:
193            if stream is None:
194                lvl = self._default_level
195            else:
196                if self._streams[stream].default_level is not None:
197                    lvl = self._streams[stream].default_level
198                else:
199                    lvl = self._default_level
200
201        assert lvl is not None, "lvl should not be None at this point"
202
203        # print to console with formatting
204        # ========================================
205        _printed: bool = False
206        if console_print or (lvl <= self._console_print_threshold):
207            # add some formatting
208            print(
209                self._level_header(
210                    msg=msg,
211                    lvl=lvl,
212                    stream=stream,
213                    extra_indent=extra_indent,
214                )
215            )
216
217            # store the last message time
218            if self._last_msg_time is not None:
219                self._last_msg_time = time.time()
220
221            _printed = True
222
223        # convert and add data
224        # ========================================
225        # converting to dict
226        msg_dict: dict[str, Any]
227        if not isinstance(msg, typing.Mapping):
228            msg_dict = {"_msg": msg}
229        else:
230            msg_dict = dict(typing.cast(typing.Mapping[str, Any], msg))
231
232        # level+stream metadata
233        if lvl is not None:
234            msg_dict["_lvl"] = lvl
235
236        # msg_dict["_stream"] = stream # moved to LoggingStream
237
238        # extra data in kwargs
239        if len(kwargs) > 0:
240            msg_dict["_kwargs"] = kwargs
241
242        # add default contents (timing, etc)
243        msg_dict = {
244            **{k: v() for k, v in self._streams[stream].default_contents.items()},
245            **msg_dict,
246        }
247
248        # write
249        # ========================================
250        logfile_msg: str = json.dumps(json_serialize(msg_dict)) + "\n"
251        if (
252            (stream is None)
253            or (stream not in self._streams)
254            or (self._streams[stream].handler is None)
255        ):
256            # write to the main log file if no stream is specified
257            self._log_file_handle.write(logfile_msg)
258        else:
259            # otherwise, write to the stream-specific file
260            s_handler: AnyIO | None = self._streams[stream].handler
261            if s_handler is not None:
262                s_handler.write(logfile_msg)
263            else:
264                raise ValueError(
265                    f"stream handler is None! something in the logging stream setup is wrong:\n{self}"
266                )
267
268        # if it was important enough to print, flush all streams
269        if _printed:
270            self.flush_all()
271
272    def log_elapsed_last(
273        self,
274        lvl: int | None = None,
275        stream: str | None = None,
276        console_print: bool = True,
277        **kwargs: Any,
278    ) -> None:
279        """logs the time elapsed since the last message was printed to the console (in any stream)"""
280        if self._last_msg_time is None:
281            raise ValueError("no last message time!")
282        else:
283            self.log(
284                {"elapsed_time": round(time.time() - self._last_msg_time, 6)},
285                lvl=(lvl if lvl is not None else self._console_print_threshold),
286                stream=stream,
287                console_print=console_print,
288                **kwargs,
289            )
290
291    def flush_all(self):
292        """flush all streams"""
293
294        self._log_file_handle.flush()
295
296        for stream in self._streams.values():
297            if stream.handler is not None:
298                stream.handler.flush()
299
300    def __getattr__(self, stream: str) -> Callable[..., Any]:
301        if stream.startswith("_"):
302            raise AttributeError(f"invalid stream name {stream} (no underscores)")
303        return partial(self.log, stream=stream)
304
305    def __getitem__(self, stream: str) -> Callable[..., Any]:
306        return partial(self.log, stream=stream)
307
308    def __call__(self, *args: Any, **kwargs: Any) -> None:
309        self.log(*args, **kwargs)

logger with more features, including log levels and streams

Parameters:

    - `log_path : str | None`
    default log file path
    (defaults to `None`)
    - `log_file : AnyIO | None`
    default log io, should have a `.write()` method (pass only this or `log_path`, not both)
    (defaults to `None`)
    - `timestamp : bool`
    whether to add timestamps to every log message (under the `_timestamp` key)
    (defaults to `True`)
    - `default_level : int`
    default log level for streams/messages that don't specify a level
    (defaults to `0`)
    - `console_print_threshold : int`
    log level at which to print to the console, anything greater will not be printed unless overridden by `console_print`
    (defaults to `50`)
    - `level_header : HeaderFunction`
    function for formatting log messages when printing to console
    (defaults to `HEADER_FUNCTIONS["md"]`)

  • keep_last_msg_time : bool whether to keep the last message time (defaults to True)

Raises:

    - `ValueError` : _description_
Logger( log_path: str | None = None, log_file: Union[TextIO, muutils.logger.simplelogger.NullIO, NoneType] = None, default_level: int = 0, console_print_threshold: int = 50, level_header: muutils.logger.headerfuncs.HeaderFunction = <function md_header_function>, streams: Union[dict[str | None, LoggingStream], Sequence[LoggingStream]] = (), keep_last_msg_time: bool = True, timestamp: bool = True, **kwargs: Any)
 72    def __init__(
 73        self,
 74        log_path: str | None = None,
 75        log_file: AnyIO | None = None,
 76        default_level: int = 0,
 77        console_print_threshold: int = 50,
 78        level_header: HeaderFunction = HEADER_FUNCTIONS["md"],
 79        streams: dict[str | None, LoggingStream] | Sequence[LoggingStream] = (),
 80        keep_last_msg_time: bool = True,
 81        # junk args
 82        timestamp: bool = True,
 83        **kwargs: Any,
 84    ) -> None:
 85        # junk arg checking
 86        # ==================================================
 87        if len(kwargs) > 0:
 88            raise ValueError(f"unrecognized kwargs: {kwargs}")
 89
 90        if not timestamp:
 91            raise ValueError(
 92                "timestamp must be True -- why would you not want timestamps?"
 93            )
 94
 95        # timing
 96        # ==================================================
 97        # timing compares
 98        self._keep_last_msg_time: bool = keep_last_msg_time
 99        # TODO: handle per stream?
100        self._last_msg_time: float | None = time.time()
101
102        # basic setup
103        # ==================================================
104        # init BaseLogger
105        super().__init__(log_file=log_file, log_path=log_path, timestamp=timestamp)
106
107        # level-related
108        self._console_print_threshold: int = console_print_threshold
109        self._default_level: int = default_level
110
111        # set up streams
112        self._streams: dict[str | None, LoggingStream] = (
113            streams if isinstance(streams, dict) else {s.name: s for s in streams}  # ty: ignore[invalid-assignment]
114        )
115        # default error stream
116        if "error" not in self._streams:
117            self._streams["error"] = LoggingStream(
118                "error",
119                aliases={
120                    "err",
121                    "except",
122                    "Exception",
123                    "exception",
124                    "exceptions",
125                    "errors",
126                },
127            )
128
129        # check alias duplicates
130        alias_set: set[str | None] = set()
131        for stream in self._streams.values():
132            for alias in stream.aliases:
133                if alias in alias_set:
134                    raise ValueError(f"alias {alias} is already in use")
135                alias_set.add(alias)
136
137        # add aliases
138        for stream in tuple(self._streams.values()):
139            for alias in stream.aliases:
140                if alias not in self._streams:
141                    self._streams[alias] = stream
142
143        # print formatting
144        self._level_header: HeaderFunction = level_header
145
146        print({k: str(v) for k, v in self._streams.items()})
def log( self, msg: Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]] = None, *, lvl: int | None = None, stream: str | None = None, console_print: bool = False, extra_indent: str = '', **kwargs: Any) -> None:
160    def log(
161        self,
162        msg: JSONitem = None,
163        *,
164        lvl: int | None = None,
165        stream: str | None = None,
166        console_print: bool = False,
167        extra_indent: str = "",
168        **kwargs: Any,
169    ) -> None:
170        """logging function
171
172        ### Parameters:
173         - `msg : JSONitem`
174           message (usually string or dict) to be logged
175         - `lvl : int | None`
176           level of message (lower levels are more important)
177           (defaults to `None`)
178         - `console_print : bool`
179           override `console_print_threshold` setting
180           (defaults to `False`)
181         - `stream : str | None`
182           whether to log to a stream (defaults to `None`), which logs to the default `None` stream
183           (defaults to `None`)
184        """
185
186        # add to known stream names if not present
187        if stream not in self._streams:
188            self._streams[stream] = LoggingStream(stream)
189
190        # set default level to either global or stream-specific default level
191        # ========================================
192        if lvl is None:
193            if stream is None:
194                lvl = self._default_level
195            else:
196                if self._streams[stream].default_level is not None:
197                    lvl = self._streams[stream].default_level
198                else:
199                    lvl = self._default_level
200
201        assert lvl is not None, "lvl should not be None at this point"
202
203        # print to console with formatting
204        # ========================================
205        _printed: bool = False
206        if console_print or (lvl <= self._console_print_threshold):
207            # add some formatting
208            print(
209                self._level_header(
210                    msg=msg,
211                    lvl=lvl,
212                    stream=stream,
213                    extra_indent=extra_indent,
214                )
215            )
216
217            # store the last message time
218            if self._last_msg_time is not None:
219                self._last_msg_time = time.time()
220
221            _printed = True
222
223        # convert and add data
224        # ========================================
225        # converting to dict
226        msg_dict: dict[str, Any]
227        if not isinstance(msg, typing.Mapping):
228            msg_dict = {"_msg": msg}
229        else:
230            msg_dict = dict(typing.cast(typing.Mapping[str, Any], msg))
231
232        # level+stream metadata
233        if lvl is not None:
234            msg_dict["_lvl"] = lvl
235
236        # msg_dict["_stream"] = stream # moved to LoggingStream
237
238        # extra data in kwargs
239        if len(kwargs) > 0:
240            msg_dict["_kwargs"] = kwargs
241
242        # add default contents (timing, etc)
243        msg_dict = {
244            **{k: v() for k, v in self._streams[stream].default_contents.items()},
245            **msg_dict,
246        }
247
248        # write
249        # ========================================
250        logfile_msg: str = json.dumps(json_serialize(msg_dict)) + "\n"
251        if (
252            (stream is None)
253            or (stream not in self._streams)
254            or (self._streams[stream].handler is None)
255        ):
256            # write to the main log file if no stream is specified
257            self._log_file_handle.write(logfile_msg)
258        else:
259            # otherwise, write to the stream-specific file
260            s_handler: AnyIO | None = self._streams[stream].handler
261            if s_handler is not None:
262                s_handler.write(logfile_msg)
263            else:
264                raise ValueError(
265                    f"stream handler is None! something in the logging stream setup is wrong:\n{self}"
266                )
267
268        # if it was important enough to print, flush all streams
269        if _printed:
270            self.flush_all()

logging function

Parameters:

  • msg : JSONitem message (usually string or dict) to be logged
  • lvl : int | None level of message (lower levels are more important) (defaults to None)
  • console_print : bool override console_print_threshold setting (defaults to False)
  • stream : str | None whether to log to a stream (defaults to None), which logs to the default None stream (defaults to None)
def log_elapsed_last( self, lvl: int | None = None, stream: str | None = None, console_print: bool = True, **kwargs: Any) -> None:
272    def log_elapsed_last(
273        self,
274        lvl: int | None = None,
275        stream: str | None = None,
276        console_print: bool = True,
277        **kwargs: Any,
278    ) -> None:
279        """logs the time elapsed since the last message was printed to the console (in any stream)"""
280        if self._last_msg_time is None:
281            raise ValueError("no last message time!")
282        else:
283            self.log(
284                {"elapsed_time": round(time.time() - self._last_msg_time, 6)},
285                lvl=(lvl if lvl is not None else self._console_print_threshold),
286                stream=stream,
287                console_print=console_print,
288                **kwargs,
289            )

logs the time elapsed since the last message was printed to the console (in any stream)

def flush_all(self):
291    def flush_all(self):
292        """flush all streams"""
293
294        self._log_file_handle.flush()
295
296        for stream in self._streams.values():
297            if stream.handler is not None:
298                stream.handler.flush()

flush all streams

@dataclass
class LoggingStream:
 18@dataclass
 19class LoggingStream:
 20    """properties of a logging stream
 21
 22    - `name: str` name of the stream
 23    - `aliases: set[str]` aliases for the stream
 24            (calls to these names will be redirected to this stream. duplicate alises will result in errors)
 25            TODO: perhaps duplicate alises should result in duplicate writes?
 26    - `file: str|bool|AnyIO|None` file to write to
 27            - if `None`, will write to standard log
 28            - if `True`, will write to `name + ".log"`
 29            - if `False` will "write" to `NullIO` (throw it away)
 30            - if a string, will write to that file
 31            - if a fileIO type object, will write to that object
 32    - `default_level: int|None` default level for this stream
 33    - `default_contents: dict[str, Callable[[], Any]]` default contents for this stream
 34    - `last_msg: tuple[float, Any]|None` last message written to this stream (timestamp, message)
 35    """
 36
 37    name: str | None
 38    aliases: set[str | None] = field(default_factory=set)
 39    file: str | bool | AnyIO | None = None
 40    default_level: int | None = None
 41    default_contents: dict[str, Callable[[], Any]] = field(default_factory=dict)
 42    handler: AnyIO | None = None
 43
 44    # TODO: implement last-message caching
 45    # last_msg: tuple[float, Any]|None = None
 46
 47    def make_handler(self) -> AnyIO | None:
 48        if self.file is None:
 49            return None
 50        elif isinstance(self.file, str):
 51            # if its a string, open a file
 52            return open(
 53                self.file,
 54                "w",
 55                encoding="utf-8",
 56            )
 57        elif isinstance(self.file, bool):
 58            # if its a bool and true, open a file with the same name as the stream (in the current dir)
 59            # TODO: make this happen in the same dir as the main logfile?
 60            if self.file:
 61                return open(  # type: ignore[return-value]
 62                    f"{sanitize_fname(self.name)}.log.jsonl",
 63                    "w",
 64                    encoding="utf-8",
 65                )
 66            else:
 67                return NullIO()
 68        else:
 69            # if its neither, check it has `.write()` and `.flush()` methods
 70            if (
 71                (
 72                    not hasattr(self.file, "write")
 73                    or (not callable(self.file.write))
 74                    or (not hasattr(self.file, "flush"))
 75                    or (not callable(self.file.flush))
 76                )
 77                or (not hasattr(self.file, "close"))
 78                or (not callable(self.file.close))
 79            ):
 80                raise ValueError(f"stream {self.name} has invalid handler {self.file}")
 81            # ignore type check because we know it has a .write() method,
 82            # assume the user knows what they're doing
 83            return self.file  # type: ignore
 84
 85    def __post_init__(self):
 86        self.aliases = set(self.aliases)
 87        if any(x.startswith("_") for x in self.aliases if x is not None):
 88            raise ValueError(
 89                "stream names or aliases cannot start with an underscore, sorry"
 90            )
 91        self.aliases.add(self.name)
 92        self.default_contents["_timestamp"] = time.time
 93        self.default_contents["_stream"] = lambda: self.name
 94        self.handler = self.make_handler()
 95
 96    def __del__(self):
 97        if self.handler is not None:
 98            self.handler.flush()
 99            self.handler.close()
100
101    @override
102    def __str__(self):
103        return f"LoggingStream(name={self.name}, aliases={self.aliases}, file={self.file}, default_level={self.default_level}, default_contents={self.default_contents})"

properties of a logging stream

  • name: str name of the stream
  • aliases: set[str] aliases for the stream (calls to these names will be redirected to this stream. duplicate alises will result in errors) TODO: perhaps duplicate alises should result in duplicate writes?
  • file: str|bool|AnyIO|None file to write to
    • if None, will write to standard log
    • if True, will write to name + ".log"
    • if False will "write" to NullIO (throw it away)
    • if a string, will write to that file
    • if a fileIO type object, will write to that object
  • default_level: int|None default level for this stream
  • default_contents: dict[str, Callable[[], Any]] default contents for this stream
  • last_msg: tuple[float, Any]|None last message written to this stream (timestamp, message)
LoggingStream( name: str | None, aliases: set[str | None] = <factory>, file: Union[str, bool, TextIO, muutils.logger.simplelogger.NullIO, NoneType] = None, default_level: int | None = None, default_contents: dict[str, typing.Callable[[], typing.Any]] = <factory>, handler: Union[TextIO, muutils.logger.simplelogger.NullIO, NoneType] = None)
name: str | None
aliases: set[str | None]
file: Union[str, bool, TextIO, muutils.logger.simplelogger.NullIO, NoneType] = None
default_level: int | None = None
default_contents: dict[str, typing.Callable[[], typing.Any]]
handler: Union[TextIO, muutils.logger.simplelogger.NullIO, NoneType] = None
def make_handler(self) -> Union[TextIO, muutils.logger.simplelogger.NullIO, NoneType]:
47    def make_handler(self) -> AnyIO | None:
48        if self.file is None:
49            return None
50        elif isinstance(self.file, str):
51            # if its a string, open a file
52            return open(
53                self.file,
54                "w",
55                encoding="utf-8",
56            )
57        elif isinstance(self.file, bool):
58            # if its a bool and true, open a file with the same name as the stream (in the current dir)
59            # TODO: make this happen in the same dir as the main logfile?
60            if self.file:
61                return open(  # type: ignore[return-value]
62                    f"{sanitize_fname(self.name)}.log.jsonl",
63                    "w",
64                    encoding="utf-8",
65                )
66            else:
67                return NullIO()
68        else:
69            # if its neither, check it has `.write()` and `.flush()` methods
70            if (
71                (
72                    not hasattr(self.file, "write")
73                    or (not callable(self.file.write))
74                    or (not hasattr(self.file, "flush"))
75                    or (not callable(self.file.flush))
76                )
77                or (not hasattr(self.file, "close"))
78                or (not callable(self.file.close))
79            ):
80                raise ValueError(f"stream {self.name} has invalid handler {self.file}")
81            # ignore type check because we know it has a .write() method,
82            # assume the user knows what they're doing
83            return self.file  # type: ignore
class SimpleLogger:
35class SimpleLogger:
36    """logs training data to a jsonl file"""
37
38    def __init__(
39        self,
40        log_path: str | None = None,
41        log_file: AnyIO | None = None,
42        timestamp: bool = True,
43    ):
44        self._timestamp: bool = timestamp
45        self._log_path: str | None = log_path
46
47        self._log_file_handle: AnyIO
48
49        if (log_path is None) and (log_file is None):
50            print(
51                "[logger_internal] # no log file specified, will only write to console",
52                sys.stderr,
53            )
54            self._log_file_handle = sys.stdout
55
56        elif (log_path is not None) and (log_file is not None):
57            raise ValueError(
58                "cannot specify both log_path and log_file, use streams in `SimpleLogger`"
59            )
60        else:
61            # now exactly one of the two is None
62            if log_file is not None:
63                self._log_file_handle = log_file
64            else:
65                assert log_path is not None
66                self._log_file_handle = open(log_path, "w", encoding="utf-8")
67
68    def log(self, msg: JSONitem, *, console_print: bool = False, **kwargs: Any) -> None:
69        """log a message to the log file, and optionally to the console"""
70        if console_print:
71            print(msg)
72
73        msg_dict: dict[str, Any]
74        if not isinstance(msg, typing.Mapping):
75            msg_dict = {"_msg": msg}
76        else:
77            msg_dict = dict(typing.cast(typing.Mapping[str, Any], msg))
78
79        if self._timestamp:
80            msg_dict["_timestamp"] = time.time()
81
82        if len(kwargs) > 0:
83            msg_dict["_kwargs"] = kwargs
84
85        self._log_file_handle.write(json.dumps(json_serialize(msg_dict)) + "\n")

logs training data to a jsonl file

SimpleLogger( log_path: str | None = None, log_file: Union[TextIO, muutils.logger.simplelogger.NullIO, NoneType] = None, timestamp: bool = True)
38    def __init__(
39        self,
40        log_path: str | None = None,
41        log_file: AnyIO | None = None,
42        timestamp: bool = True,
43    ):
44        self._timestamp: bool = timestamp
45        self._log_path: str | None = log_path
46
47        self._log_file_handle: AnyIO
48
49        if (log_path is None) and (log_file is None):
50            print(
51                "[logger_internal] # no log file specified, will only write to console",
52                sys.stderr,
53            )
54            self._log_file_handle = sys.stdout
55
56        elif (log_path is not None) and (log_file is not None):
57            raise ValueError(
58                "cannot specify both log_path and log_file, use streams in `SimpleLogger`"
59            )
60        else:
61            # now exactly one of the two is None
62            if log_file is not None:
63                self._log_file_handle = log_file
64            else:
65                assert log_path is not None
66                self._log_file_handle = open(log_path, "w", encoding="utf-8")
def log( self, msg: Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]], *, console_print: bool = False, **kwargs: Any) -> None:
68    def log(self, msg: JSONitem, *, console_print: bool = False, **kwargs: Any) -> None:
69        """log a message to the log file, and optionally to the console"""
70        if console_print:
71            print(msg)
72
73        msg_dict: dict[str, Any]
74        if not isinstance(msg, typing.Mapping):
75            msg_dict = {"_msg": msg}
76        else:
77            msg_dict = dict(typing.cast(typing.Mapping[str, Any], msg))
78
79        if self._timestamp:
80            msg_dict["_timestamp"] = time.time()
81
82        if len(kwargs) > 0:
83            msg_dict["_kwargs"] = kwargs
84
85        self._log_file_handle.write(json.dumps(json_serialize(msg_dict)) + "\n")

log a message to the log file, and optionally to the console

class TimerContext:
 9class TimerContext:
10    """context manager for timing code"""
11
12    def __init__(self) -> None:
13        self.start_time: float
14        self.end_time: float
15        self.elapsed_time: float
16
17    def __enter__(self) -> "TimerContext":
18        self.start_time = time.time()
19        return self
20
21    def __exit__(
22        self,
23        exc_type: type[BaseException] | None,
24        exc_val: BaseException | None,
25        exc_tb: TracebackType | None,
26    ) -> Literal[False]:
27        self.end_time = time.time()
28        self.elapsed_time = self.end_time - self.start_time
29        return False

context manager for timing code

start_time: float
end_time: float
elapsed_time: float