docs for muutils v0.9.1
View Source on GitHub

muutils.json_serialize.json_serialize

provides the basic framework for json serialization of objects

notably:

  • SerializerHandler defines how to serialize a specific type of object
  • JsonSerializer handles configuration for which handlers to use
  • json_serialize provides the default configuration if you don't care -- call it on any object!

  1"""provides the basic framework for json serialization of objects
  2
  3notably:
  4
  5- `SerializerHandler` defines how to serialize a specific type of object
  6- `JsonSerializer` handles configuration for which handlers to use
  7- `json_serialize` provides the default configuration if you don't care -- call it on any object!
  8
  9"""
 10
 11from __future__ import annotations
 12
 13import inspect
 14import warnings
 15from dataclasses import dataclass, is_dataclass
 16from pathlib import Path
 17from typing import (
 18    TYPE_CHECKING,
 19    Any,
 20    Callable,
 21    Iterable,
 22    Mapping,
 23    Set,
 24    Union,
 25    cast,
 26    overload,
 27)
 28
 29from muutils.errormode import ErrorMode
 30
 31if TYPE_CHECKING:
 32    # always need array.py for type checking
 33    from muutils.json_serialize.array import ArrayMode, serialize_array
 34else:
 35    try:
 36        from muutils.json_serialize.array import ArrayMode, serialize_array
 37    except ImportError as e:
 38        # TYPING: obviously, these types are all wrong if we can't import array.py
 39        ArrayMode = str  # type: ignore[misc]
 40        serialize_array = lambda *args, **kwargs: None  # type: ignore[assignment, invalid-assignment] # noqa: E731
 41        warnings.warn(
 42            f"muutils.json_serialize.array could not be imported probably because missing numpy, array serialization will not work: \n{e}",
 43            ImportWarning,
 44        )
 45
 46from muutils.json_serialize.types import (
 47    _FORMAT_KEY,
 48    Hashableitem,
 49)  # pyright: ignore[reportPrivateUsage]
 50
 51from muutils.json_serialize.util import (
 52    JSONdict,
 53    JSONitem,
 54    MonoTuple,
 55    SerializationException,
 56    _recursive_hashify,  # pyright: ignore[reportPrivateUsage, reportUnknownVariableType]
 57    isinstance_namedtuple,
 58    safe_getsource,
 59    string_as_lines,
 60    try_catch,
 61)
 62
 63# pylint: disable=protected-access
 64
 65SERIALIZER_SPECIAL_KEYS: MonoTuple[str] = (
 66    "__name__",
 67    "__doc__",
 68    "__module__",
 69    "__class__",
 70    "__dict__",
 71    "__annotations__",
 72)
 73
 74SERIALIZER_SPECIAL_FUNCS: dict[str, Callable[..., str | list[str]]] = {
 75    "str": str,
 76    "dir": dir,
 77    "type": try_catch(lambda x: str(type(x).__name__)),  # pyright: ignore[reportUnknownArgumentType, reportUnknownLambdaType]
 78    "repr": try_catch(lambda x: repr(x)),  # pyright: ignore[reportUnknownArgumentType, reportUnknownLambdaType]
 79    "code": try_catch(lambda x: inspect.getsource(x)),  # pyright: ignore[reportUnknownArgumentType, reportUnknownLambdaType]
 80    "sourcefile": try_catch(lambda x: str(inspect.getsourcefile(x))),  # pyright: ignore[reportUnknownArgumentType, reportUnknownLambdaType]
 81}
 82
 83SERIALIZE_DIRECT_AS_STR: Set[str] = {
 84    "<class 'torch.device'>",
 85    "<class 'torch.dtype'>",
 86}
 87
 88ObjectPath = MonoTuple[Union[str, int]]
 89
 90
 91@dataclass
 92class SerializerHandler:
 93    """a handler for a specific type of object
 94
 95    # Parameters:
 96        - `check : Callable[[JsonSerializer, Any], bool]` takes a JsonSerializer and an object, returns whether to use this handler
 97        - `serialize : Callable[[JsonSerializer, Any, ObjectPath], JSONitem]` takes a JsonSerializer, an object, and the current path, returns the serialized object
 98        - `desc : str` description of the handler (optional)
 99    """
100
101    # (self_config, object) -> whether to use this handler
102    check: Callable[["JsonSerializer", Any, ObjectPath], bool]
103    # (self_config, object, path) -> serialized object
104    serialize_func: Callable[["JsonSerializer", Any, ObjectPath], JSONitem]
105    # unique identifier for the handler
106    uid: str
107    # description of this serializer
108    desc: str
109
110    def serialize(self) -> JSONdict:
111        """serialize the handler info"""
112        return {
113            # get the code and doc of the check function
114            "check": {
115                "code": safe_getsource(self.check),
116                "doc": string_as_lines(self.check.__doc__),
117            },
118            # get the code and doc of the load function
119            "serialize_func": {
120                "code": safe_getsource(self.serialize_func),
121                "doc": string_as_lines(self.serialize_func.__doc__),
122            },
123            # get the uid, source_pckg, priority, and desc
124            "uid": str(self.uid),
125            "source_pckg": getattr(self.serialize_func, "source_pckg", None),
126            "__module__": getattr(self.serialize_func, "__module__", None),
127            "desc": str(self.desc),
128        }
129
130
131BASE_HANDLERS: MonoTuple[SerializerHandler] = (
132    SerializerHandler(
133        check=lambda self, obj, path: isinstance(
134            obj, (bool, int, float, str, type(None))
135        ),
136        serialize_func=lambda self, obj, path: obj,
137        uid="base types",
138        desc="base types (bool, int, float, str, None)",
139    ),
140    SerializerHandler(
141        check=lambda self, obj, path: isinstance(obj, Mapping),
142        serialize_func=lambda self, obj, path: {
143            str(k): self.json_serialize(v, tuple(path) + (k,)) for k, v in obj.items()
144        },
145        uid="dictionaries",
146        desc="dictionaries",
147    ),
148    SerializerHandler(
149        check=lambda self, obj, path: isinstance_namedtuple(obj),
150        serialize_func=lambda self, obj, path: {
151            str(k): self.json_serialize(v, tuple(path) + (k,))
152            for k, v in obj._asdict().items()
153        },
154        uid="namedtuple -> dict",
155        desc="namedtuples as dicts",
156    ),
157    SerializerHandler(
158        check=lambda self, obj, path: isinstance(obj, (list, tuple)),
159        serialize_func=lambda self, obj, path: [
160            self.json_serialize(x, tuple(path) + (i,)) for i, x in enumerate(obj)
161        ],
162        uid="(list, tuple) -> list",
163        desc="lists and tuples as lists",
164    ),
165)
166
167
168def _serialize_override_serialize_func(
169    self: "JsonSerializer", obj: Any, path: ObjectPath
170) -> JSONitem:
171    # obj_cls: type = type(obj)
172    # if hasattr(obj_cls, "_register_self") and callable(obj_cls._register_self):
173    #     obj_cls._register_self()
174
175    # get the serialized object
176    return obj.serialize()
177
178
179DEFAULT_HANDLERS: MonoTuple[SerializerHandler] = tuple(BASE_HANDLERS) + (
180    SerializerHandler(
181        # TODO: allow for custom serialization handler name
182        check=lambda self, obj, path: (
183            hasattr(obj, "serialize") and callable(obj.serialize)
184        ),
185        serialize_func=_serialize_override_serialize_func,
186        uid=".serialize override",
187        desc="objects with .serialize method",
188    ),
189    SerializerHandler(
190        check=lambda self, obj, path: is_dataclass(obj),
191        serialize_func=lambda self, obj, path: {
192            k: self.json_serialize(getattr(obj, k), tuple(path) + (k,))
193            for k in obj.__dataclass_fields__
194        },
195        uid="dataclass -> dict",
196        desc="dataclasses as dicts",
197    ),
198    SerializerHandler(
199        check=lambda self, obj, path: isinstance(obj, Path),
200        serialize_func=lambda self, obj, path: obj.as_posix(),
201        uid="path -> str",
202        desc="Path objects as posix strings",
203    ),
204    SerializerHandler(
205        check=lambda self, obj, path: str(type(obj)) in SERIALIZE_DIRECT_AS_STR,
206        serialize_func=lambda self, obj, path: str(obj),
207        uid="obj -> str(obj)",
208        desc="directly serialize objects in `SERIALIZE_DIRECT_AS_STR` to strings",
209    ),
210    SerializerHandler(
211        check=lambda self, obj, path: str(type(obj)) == "<class 'numpy.ndarray'>",
212        serialize_func=lambda self, obj, path: cast(
213            JSONitem, serialize_array(self, obj, path=path)
214        ),
215        uid="numpy.ndarray",
216        desc="numpy arrays",
217    ),
218    SerializerHandler(
219        check=lambda self, obj, path: str(type(obj)) == "<class 'torch.Tensor'>",
220        serialize_func=lambda self, obj, path: cast(
221            JSONitem,
222            serialize_array(
223                self,
224                obj.detach().cpu(),
225                path=path,  # pyright: ignore[reportAny]
226            ),
227        ),
228        uid="torch.Tensor",
229        desc="pytorch tensors",
230    ),
231    SerializerHandler(
232        check=lambda self, obj, path: (
233            str(type(obj)) == "<class 'pandas.core.frame.DataFrame'>"
234        ),
235        # TYPING: type checkers have no idea that obj is a DataFrame here
236        serialize_func=lambda self, obj, path: {  # pyright: ignore[reportArgumentType, reportAny]
237            _FORMAT_KEY: "pandas.DataFrame",  # type: ignore[misc]
238            "columns": obj.columns.tolist(),  # pyright: ignore[reportAny]
239            "data": obj.to_dict(orient="records"),  # pyright: ignore[reportAny]
240            "path": path,
241        },
242        uid="pandas.DataFrame",
243        desc="pandas DataFrames",
244    ),
245    SerializerHandler(
246        check=lambda self, obj, path: isinstance(obj, (set, frozenset)),
247        serialize_func=lambda self, obj, path: {
248            _FORMAT_KEY: "set" if isinstance(obj, set) else "frozenset",  # type: ignore[misc]
249            "data": [
250                self.json_serialize(x, tuple(path) + (i,)) for i, x in enumerate(obj)
251            ],
252        },
253        uid="set -> dict[_FORMAT_KEY: 'set', data: list(...)]",
254        desc="sets as dicts with format key",
255    ),
256    SerializerHandler(
257        check=lambda self, obj, path: (
258            isinstance(obj, Iterable) and not isinstance(obj, (list, tuple, str))
259        ),
260        serialize_func=lambda self, obj, path: [
261            self.json_serialize(x, tuple(path) + (i,)) for i, x in enumerate(obj)
262        ],
263        uid="Iterable -> list",
264        desc="Iterables (not lists/tuples/strings) as lists",
265    ),
266    SerializerHandler(
267        check=lambda self, obj, path: True,
268        serialize_func=lambda self, obj, path: {
269            **{k: str(getattr(obj, k, None)) for k in SERIALIZER_SPECIAL_KEYS},  # type: ignore[typeddict-item]
270            **{k: f(obj) for k, f in SERIALIZER_SPECIAL_FUNCS.items()},
271        },
272        uid="fallback",
273        desc="fallback handler -- serialize object attributes and special functions as strings",
274    ),
275)
276
277
278class JsonSerializer:
279    """Json serialization class (holds configs)
280
281    # Parameters:
282    - `array_mode : ArrayMode`
283    how to write arrays
284    (defaults to `"array_list_meta"`)
285    - `error_mode : ErrorMode`
286    what to do when we can't serialize an object (will use repr as fallback if "ignore" or "warn")
287    (defaults to `"except"`)
288    - `handlers_pre : MonoTuple[SerializerHandler]`
289    handlers to use before the default handlers
290    (defaults to `tuple()`)
291    - `handlers_default : MonoTuple[SerializerHandler]`
292    default handlers to use
293    (defaults to `DEFAULT_HANDLERS`)
294    - `write_only_format : bool`
295    changes _FORMAT_KEY keys in output to "__write_format__" (when you want to serialize something in a way that zanj won't try to recover the object when loading)
296    (defaults to `False`)
297
298    # Raises:
299    - `ValueError`: on init, if `args` is not empty
300    - `SerializationException`: on `json_serialize()`, if any error occurs when trying to serialize an object and `error_mode` is set to `ErrorMode.EXCEPT"`
301
302    """
303
304    def __init__(
305        self,
306        *args: None,
307        array_mode: "ArrayMode" = "array_list_meta",
308        error_mode: ErrorMode = ErrorMode.EXCEPT,
309        handlers_pre: MonoTuple[SerializerHandler] = (),
310        handlers_default: MonoTuple[SerializerHandler] = DEFAULT_HANDLERS,
311        write_only_format: bool = False,
312    ):
313        if len(args) > 0:
314            raise ValueError(
315                f"JsonSerializer takes no positional arguments!\n{args = }"
316            )
317
318        self.array_mode: "ArrayMode" = array_mode
319        self.error_mode: ErrorMode = ErrorMode.from_any(error_mode)
320        self.write_only_format: bool = write_only_format
321        # join up the handlers
322        self.handlers: MonoTuple[SerializerHandler] = tuple(handlers_pre) + tuple(
323            handlers_default
324        )
325
326    @overload
327    def json_serialize(
328        self, obj: Mapping[str, Any], path: ObjectPath = ()
329    ) -> JSONdict: ...
330    @overload
331    def json_serialize(self, obj: list, path: ObjectPath = ()) -> list: ...
332    # @overload  # pyright: ignore[reportOverlappingOverload]
333    # def json_serialize(self, obj: set, path: ObjectPath = ()) -> _SerializedSet: ...
334    # @overload
335    # def json_serialize(
336    #     self, obj: frozenset, path: ObjectPath = ()
337    # ) -> _SerializedFrozenset: ...
338    @overload
339    def json_serialize(self, obj: Any, path: ObjectPath = ()) -> JSONitem: ...
340    def json_serialize(
341        self,
342        obj: Any,  # pyright: ignore[reportAny]
343        path: ObjectPath = (),
344    ) -> JSONitem:
345        handler = None
346        try:
347            for handler in self.handlers:
348                if handler.check(self, obj, path):
349                    output: JSONitem = handler.serialize_func(self, obj, path)
350                    if self.write_only_format:
351                        if isinstance(output, dict) and _FORMAT_KEY in output:
352                            # TYPING: JSONitem has no idea that _FORMAT_KEY is str
353                            new_fmt: str = output.pop(_FORMAT_KEY)  # type: ignore  # pyright: ignore[reportAssignmentType]
354                            output["__write_format__"] = new_fmt  # type: ignore
355                    return output
356
357            raise ValueError(f"no handler found for object with {type(obj) = }")  # pyright: ignore[reportAny]
358
359        except Exception as e:
360            if self.error_mode == ErrorMode.EXCEPT:
361                obj_str: str = repr(obj)  # pyright: ignore[reportAny]
362                if len(obj_str) > 1000:
363                    obj_str = obj_str[:1000] + "..."
364                handler_uid = handler.uid if handler else "no handler matched"
365                raise SerializationException(
366                    f"error serializing at {path = } with last handler: '{handler_uid}'\nfrom: {e}\nobj: {obj_str}"
367                ) from e
368            elif self.error_mode == ErrorMode.WARN:
369                warnings.warn(
370                    f"error serializing at {path = }, will return as string\n{obj = }\nexception = {e}"
371                )
372
373            return repr(obj)  # pyright: ignore[reportAny]
374
375    def hashify(
376        self,
377        obj: Any,  # pyright: ignore[reportAny]
378        path: ObjectPath = (),
379        force: bool = True,
380    ) -> Hashableitem:
381        """try to turn any object into something hashable"""
382        data = self.json_serialize(obj, path=path)
383
384        # recursive hashify, turning dicts and lists into tuples
385        return _recursive_hashify(data, force=force)
386
387
388GLOBAL_JSON_SERIALIZER: JsonSerializer = JsonSerializer()
389
390
391@overload
392def json_serialize(obj: Mapping[str, Any], path: ObjectPath = ()) -> JSONdict: ...
393@overload
394def json_serialize(obj: list, path: ObjectPath = ()) -> list: ...
395@overload  # pyright: ignore[reportOverlappingOverload]
396# def json_serialize(obj: set, path: ObjectPath = ()) -> _SerializedSet: ...
397# @overload
398# def json_serialize(obj: frozenset, path: ObjectPath = ()) -> _SerializedFrozenset: ...
399@overload
400def json_serialize(obj: Any, path: ObjectPath = ()) -> JSONitem: ...
401def json_serialize(obj: Any, path: ObjectPath = ()) -> JSONitem:  # pyright: ignore[reportAny]
402    """serialize object to json-serializable object with default config"""
403    return GLOBAL_JSON_SERIALIZER.json_serialize(obj, path=path)

SERIALIZER_SPECIAL_KEYS: None = ('__name__', '__doc__', '__module__', '__class__', '__dict__', '__annotations__')
SERIALIZER_SPECIAL_FUNCS: dict[str, typing.Callable[..., str | list[str]]] = {'str': <class 'str'>, 'dir': <built-in function dir>, 'type': <function <lambda>>, 'repr': <function <lambda>>, 'code': <function <lambda>>, 'sourcefile': <function <lambda>>}
SERIALIZE_DIRECT_AS_STR: Set[str] = {"<class 'torch.device'>", "<class 'torch.dtype'>"}
ObjectPath = tuple[typing.Union[str, int], ...]
@dataclass
class SerializerHandler:
 92@dataclass
 93class SerializerHandler:
 94    """a handler for a specific type of object
 95
 96    # Parameters:
 97        - `check : Callable[[JsonSerializer, Any], bool]` takes a JsonSerializer and an object, returns whether to use this handler
 98        - `serialize : Callable[[JsonSerializer, Any, ObjectPath], JSONitem]` takes a JsonSerializer, an object, and the current path, returns the serialized object
 99        - `desc : str` description of the handler (optional)
100    """
101
102    # (self_config, object) -> whether to use this handler
103    check: Callable[["JsonSerializer", Any, ObjectPath], bool]
104    # (self_config, object, path) -> serialized object
105    serialize_func: Callable[["JsonSerializer", Any, ObjectPath], JSONitem]
106    # unique identifier for the handler
107    uid: str
108    # description of this serializer
109    desc: str
110
111    def serialize(self) -> JSONdict:
112        """serialize the handler info"""
113        return {
114            # get the code and doc of the check function
115            "check": {
116                "code": safe_getsource(self.check),
117                "doc": string_as_lines(self.check.__doc__),
118            },
119            # get the code and doc of the load function
120            "serialize_func": {
121                "code": safe_getsource(self.serialize_func),
122                "doc": string_as_lines(self.serialize_func.__doc__),
123            },
124            # get the uid, source_pckg, priority, and desc
125            "uid": str(self.uid),
126            "source_pckg": getattr(self.serialize_func, "source_pckg", None),
127            "__module__": getattr(self.serialize_func, "__module__", None),
128            "desc": str(self.desc),
129        }

a handler for a specific type of object

Parameters:

- `check : Callable[[JsonSerializer, Any], bool]` takes a JsonSerializer and an object, returns whether to use this handler
- `serialize : Callable[[JsonSerializer, Any, ObjectPath], JSONitem]` takes a JsonSerializer, an object, and the current path, returns the serialized object
- `desc : str` description of the handler (optional)
SerializerHandler( check: Callable[[JsonSerializer, Any, tuple[Union[str, int], ...]], bool], serialize_func: Callable[[JsonSerializer, Any, tuple[Union[str, int], ...]], Union[bool, int, float, str, NoneType, Sequence[Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]], Dict[str, Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]]]], uid: str, desc: str)
check: Callable[[JsonSerializer, Any, tuple[Union[str, int], ...]], bool]
serialize_func: Callable[[JsonSerializer, Any, tuple[Union[str, int], ...]], Union[bool, int, float, str, NoneType, Sequence[Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]], Dict[str, Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]]]]
uid: str
desc: str
def serialize( self) -> Dict[str, Union[bool, int, float, str, NoneType, Sequence[Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]], Dict[str, Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]]]]:
111    def serialize(self) -> JSONdict:
112        """serialize the handler info"""
113        return {
114            # get the code and doc of the check function
115            "check": {
116                "code": safe_getsource(self.check),
117                "doc": string_as_lines(self.check.__doc__),
118            },
119            # get the code and doc of the load function
120            "serialize_func": {
121                "code": safe_getsource(self.serialize_func),
122                "doc": string_as_lines(self.serialize_func.__doc__),
123            },
124            # get the uid, source_pckg, priority, and desc
125            "uid": str(self.uid),
126            "source_pckg": getattr(self.serialize_func, "source_pckg", None),
127            "__module__": getattr(self.serialize_func, "__module__", None),
128            "desc": str(self.desc),
129        }

serialize the handler info

BASE_HANDLERS: None = (SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='base types', desc='base types (bool, int, float, str, None)'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='dictionaries', desc='dictionaries'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='namedtuple -> dict', desc='namedtuples as dicts'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='(list, tuple) -> list', desc='lists and tuples as lists'))
DEFAULT_HANDLERS: None = (SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='base types', desc='base types (bool, int, float, str, None)'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='dictionaries', desc='dictionaries'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='namedtuple -> dict', desc='namedtuples as dicts'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='(list, tuple) -> list', desc='lists and tuples as lists'), SerializerHandler(check=<function <lambda>>, serialize_func=<function _serialize_override_serialize_func>, uid='.serialize override', desc='objects with .serialize method'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='dataclass -> dict', desc='dataclasses as dicts'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='path -> str', desc='Path objects as posix strings'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='obj -> str(obj)', desc='directly serialize objects in `SERIALIZE_DIRECT_AS_STR` to strings'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='numpy.ndarray', desc='numpy arrays'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='torch.Tensor', desc='pytorch tensors'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='pandas.DataFrame', desc='pandas DataFrames'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid="set -> dict[_FORMAT_KEY: 'set', data: list(...)]", desc='sets as dicts with format key'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='Iterable -> list', desc='Iterables (not lists/tuples/strings) as lists'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='fallback', desc='fallback handler -- serialize object attributes and special functions as strings'))
class JsonSerializer:
279class JsonSerializer:
280    """Json serialization class (holds configs)
281
282    # Parameters:
283    - `array_mode : ArrayMode`
284    how to write arrays
285    (defaults to `"array_list_meta"`)
286    - `error_mode : ErrorMode`
287    what to do when we can't serialize an object (will use repr as fallback if "ignore" or "warn")
288    (defaults to `"except"`)
289    - `handlers_pre : MonoTuple[SerializerHandler]`
290    handlers to use before the default handlers
291    (defaults to `tuple()`)
292    - `handlers_default : MonoTuple[SerializerHandler]`
293    default handlers to use
294    (defaults to `DEFAULT_HANDLERS`)
295    - `write_only_format : bool`
296    changes _FORMAT_KEY keys in output to "__write_format__" (when you want to serialize something in a way that zanj won't try to recover the object when loading)
297    (defaults to `False`)
298
299    # Raises:
300    - `ValueError`: on init, if `args` is not empty
301    - `SerializationException`: on `json_serialize()`, if any error occurs when trying to serialize an object and `error_mode` is set to `ErrorMode.EXCEPT"`
302
303    """
304
305    def __init__(
306        self,
307        *args: None,
308        array_mode: "ArrayMode" = "array_list_meta",
309        error_mode: ErrorMode = ErrorMode.EXCEPT,
310        handlers_pre: MonoTuple[SerializerHandler] = (),
311        handlers_default: MonoTuple[SerializerHandler] = DEFAULT_HANDLERS,
312        write_only_format: bool = False,
313    ):
314        if len(args) > 0:
315            raise ValueError(
316                f"JsonSerializer takes no positional arguments!\n{args = }"
317            )
318
319        self.array_mode: "ArrayMode" = array_mode
320        self.error_mode: ErrorMode = ErrorMode.from_any(error_mode)
321        self.write_only_format: bool = write_only_format
322        # join up the handlers
323        self.handlers: MonoTuple[SerializerHandler] = tuple(handlers_pre) + tuple(
324            handlers_default
325        )
326
327    @overload
328    def json_serialize(
329        self, obj: Mapping[str, Any], path: ObjectPath = ()
330    ) -> JSONdict: ...
331    @overload
332    def json_serialize(self, obj: list, path: ObjectPath = ()) -> list: ...
333    # @overload  # pyright: ignore[reportOverlappingOverload]
334    # def json_serialize(self, obj: set, path: ObjectPath = ()) -> _SerializedSet: ...
335    # @overload
336    # def json_serialize(
337    #     self, obj: frozenset, path: ObjectPath = ()
338    # ) -> _SerializedFrozenset: ...
339    @overload
340    def json_serialize(self, obj: Any, path: ObjectPath = ()) -> JSONitem: ...
341    def json_serialize(
342        self,
343        obj: Any,  # pyright: ignore[reportAny]
344        path: ObjectPath = (),
345    ) -> JSONitem:
346        handler = None
347        try:
348            for handler in self.handlers:
349                if handler.check(self, obj, path):
350                    output: JSONitem = handler.serialize_func(self, obj, path)
351                    if self.write_only_format:
352                        if isinstance(output, dict) and _FORMAT_KEY in output:
353                            # TYPING: JSONitem has no idea that _FORMAT_KEY is str
354                            new_fmt: str = output.pop(_FORMAT_KEY)  # type: ignore  # pyright: ignore[reportAssignmentType]
355                            output["__write_format__"] = new_fmt  # type: ignore
356                    return output
357
358            raise ValueError(f"no handler found for object with {type(obj) = }")  # pyright: ignore[reportAny]
359
360        except Exception as e:
361            if self.error_mode == ErrorMode.EXCEPT:
362                obj_str: str = repr(obj)  # pyright: ignore[reportAny]
363                if len(obj_str) > 1000:
364                    obj_str = obj_str[:1000] + "..."
365                handler_uid = handler.uid if handler else "no handler matched"
366                raise SerializationException(
367                    f"error serializing at {path = } with last handler: '{handler_uid}'\nfrom: {e}\nobj: {obj_str}"
368                ) from e
369            elif self.error_mode == ErrorMode.WARN:
370                warnings.warn(
371                    f"error serializing at {path = }, will return as string\n{obj = }\nexception = {e}"
372                )
373
374            return repr(obj)  # pyright: ignore[reportAny]
375
376    def hashify(
377        self,
378        obj: Any,  # pyright: ignore[reportAny]
379        path: ObjectPath = (),
380        force: bool = True,
381    ) -> Hashableitem:
382        """try to turn any object into something hashable"""
383        data = self.json_serialize(obj, path=path)
384
385        # recursive hashify, turning dicts and lists into tuples
386        return _recursive_hashify(data, force=force)

Json serialization class (holds configs)

Parameters:

  • array_mode : ArrayMode how to write arrays (defaults to "array_list_meta")
  • error_mode : ErrorMode what to do when we can't serialize an object (will use repr as fallback if "ignore" or "warn") (defaults to "except")
  • handlers_pre : MonoTuple[SerializerHandler] handlers to use before the default handlers (defaults to tuple())
  • handlers_default : MonoTuple[SerializerHandler] default handlers to use (defaults to DEFAULT_HANDLERS)
  • write_only_format : bool changes _FORMAT_KEY keys in output to "__write_format__" (when you want to serialize something in a way that zanj won't try to recover the object when loading) (defaults to False)

Raises:

  • ValueError: on init, if args is not empty
  • SerializationException: on json_serialize(), if any error occurs when trying to serialize an object and error_mode is set to ErrorMode.EXCEPT"
JsonSerializer( *args: None, array_mode: Literal['list', 'array_list_meta', 'array_hex_meta', 'array_b64_meta', 'external', 'zero_dim'] = 'array_list_meta', error_mode: muutils.errormode.ErrorMode = ErrorMode.Except, handlers_pre: None = (), handlers_default: None = (SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='base types', desc='base types (bool, int, float, str, None)'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='dictionaries', desc='dictionaries'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='namedtuple -> dict', desc='namedtuples as dicts'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='(list, tuple) -> list', desc='lists and tuples as lists'), SerializerHandler(check=<function <lambda>>, serialize_func=<function _serialize_override_serialize_func>, uid='.serialize override', desc='objects with .serialize method'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='dataclass -> dict', desc='dataclasses as dicts'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='path -> str', desc='Path objects as posix strings'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='obj -> str(obj)', desc='directly serialize objects in `SERIALIZE_DIRECT_AS_STR` to strings'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='numpy.ndarray', desc='numpy arrays'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='torch.Tensor', desc='pytorch tensors'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='pandas.DataFrame', desc='pandas DataFrames'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid="set -> dict[_FORMAT_KEY: 'set', data: list(...)]", desc='sets as dicts with format key'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='Iterable -> list', desc='Iterables (not lists/tuples/strings) as lists'), SerializerHandler(check=<function <lambda>>, serialize_func=<function <lambda>>, uid='fallback', desc='fallback handler -- serialize object attributes and special functions as strings')), write_only_format: bool = False)
305    def __init__(
306        self,
307        *args: None,
308        array_mode: "ArrayMode" = "array_list_meta",
309        error_mode: ErrorMode = ErrorMode.EXCEPT,
310        handlers_pre: MonoTuple[SerializerHandler] = (),
311        handlers_default: MonoTuple[SerializerHandler] = DEFAULT_HANDLERS,
312        write_only_format: bool = False,
313    ):
314        if len(args) > 0:
315            raise ValueError(
316                f"JsonSerializer takes no positional arguments!\n{args = }"
317            )
318
319        self.array_mode: "ArrayMode" = array_mode
320        self.error_mode: ErrorMode = ErrorMode.from_any(error_mode)
321        self.write_only_format: bool = write_only_format
322        # join up the handlers
323        self.handlers: MonoTuple[SerializerHandler] = tuple(handlers_pre) + tuple(
324            handlers_default
325        )
array_mode: Literal['list', 'array_list_meta', 'array_hex_meta', 'array_b64_meta', 'external', 'zero_dim']
write_only_format: bool
handlers: None
def json_serialize( self, obj: Any, path: tuple[typing.Union[str, int], ...] = ()) -> Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]:
341    def json_serialize(
342        self,
343        obj: Any,  # pyright: ignore[reportAny]
344        path: ObjectPath = (),
345    ) -> JSONitem:
346        handler = None
347        try:
348            for handler in self.handlers:
349                if handler.check(self, obj, path):
350                    output: JSONitem = handler.serialize_func(self, obj, path)
351                    if self.write_only_format:
352                        if isinstance(output, dict) and _FORMAT_KEY in output:
353                            # TYPING: JSONitem has no idea that _FORMAT_KEY is str
354                            new_fmt: str = output.pop(_FORMAT_KEY)  # type: ignore  # pyright: ignore[reportAssignmentType]
355                            output["__write_format__"] = new_fmt  # type: ignore
356                    return output
357
358            raise ValueError(f"no handler found for object with {type(obj) = }")  # pyright: ignore[reportAny]
359
360        except Exception as e:
361            if self.error_mode == ErrorMode.EXCEPT:
362                obj_str: str = repr(obj)  # pyright: ignore[reportAny]
363                if len(obj_str) > 1000:
364                    obj_str = obj_str[:1000] + "..."
365                handler_uid = handler.uid if handler else "no handler matched"
366                raise SerializationException(
367                    f"error serializing at {path = } with last handler: '{handler_uid}'\nfrom: {e}\nobj: {obj_str}"
368                ) from e
369            elif self.error_mode == ErrorMode.WARN:
370                warnings.warn(
371                    f"error serializing at {path = }, will return as string\n{obj = }\nexception = {e}"
372                )
373
374            return repr(obj)  # pyright: ignore[reportAny]
def hashify( self, obj: Any, path: tuple[typing.Union[str, int], ...] = (), force: bool = True) -> Union[bool, int, float, str, NoneType, Tuple[ForwardRef('Hashableitem'), ...]]:
376    def hashify(
377        self,
378        obj: Any,  # pyright: ignore[reportAny]
379        path: ObjectPath = (),
380        force: bool = True,
381    ) -> Hashableitem:
382        """try to turn any object into something hashable"""
383        data = self.json_serialize(obj, path=path)
384
385        # recursive hashify, turning dicts and lists into tuples
386        return _recursive_hashify(data, force=force)

try to turn any object into something hashable

GLOBAL_JSON_SERIALIZER: JsonSerializer = <JsonSerializer object>
def json_serialize( obj: Any, path: tuple[typing.Union[str, int], ...] = ()) -> Union[bool, int, float, str, NoneType, Sequence[ForwardRef('JSONitem')], Dict[str, ForwardRef('JSONitem')]]:
402def json_serialize(obj: Any, path: ObjectPath = ()) -> JSONitem:  # pyright: ignore[reportAny]
403    """serialize object to json-serializable object with default config"""
404    return GLOBAL_JSON_SERIALIZER.json_serialize(obj, path=path)

serialize object to json-serializable object with default config