Coverage for Users / vladimirpavlov / PycharmProjects / parameterizable / src / mixinforge / json_processor.py: 97%
199 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-01 16:37 -0600
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-01 16:37 -0600
1"""JSON-compatible serialization helpers for complex Python objects.
3This module provides functions to convert rich Python data structures into a
4JSON-serializable representation and back. It supports primitive types as well
5as containers (list, tuple, set, dict), Enums, and certain custom objects.
7The serialized form is a pure-JSON structure containing only dicts, lists,
8strings, numbers, booleans, and null. Special container and object types are
9encoded using internal marker keys.
10"""
12import importlib
13import json
14import types
15from enum import Enum
16from typing import Any, Mapping, NewType
18from .dict_sorter import sort_dict_by_keys
20JsonSerializedObject = NewType("JsonSerializedObject", str)
22_UNSUPPORTED_TYPES = (
23 types.ModuleType,
24 types.FunctionType,
25 types.LambdaType,
26 types.BuiltinFunctionType,
27 types.MethodType,
28 types.CodeType,
29 type,
30)
32class _Markers:
33 """Internal keys used to tag non-JSON-native constructs.
35 The serializer uses these markers inside dictionaries to encode special
36 types and object metadata while still producing a JSON-compatible structure.
38 Attributes:
39 DICT: Marker for dictionaries to ensure all keys are strings and values
40 are JSON-serializable.
41 TUPLE: Marker key for tuple values. The value is a list of items.
42 SET: Marker key for set values. The value is a list of items.
43 ENUM: Marker key for Enum members. The value is the member name.
44 CLASS: Name of the object's class used during reconstruction.
45 MODULE: Name of the module where the object's class is defined.
46 PARAMS: Serialized mapping of constructor parameters for get_params-
47 based reconstruction.
48 STATE: Serialized state for __getstate__/__setstate__-based
49 reconstruction.
50 """
52 DICT = "..dict.."
53 TUPLE = "..tuple.."
54 SET = "..set.."
55 CLASS = "..class.."
56 MODULE = "..module.."
57 PARAMS = "..params.."
58 STATE = "..state.."
59 ENUM = "..enum.."
62def _to_serializable_dict(x: Any, seen: set[int] | None = None) -> Any:
63 """Convert a Python object into a JSON-serializable structure.
65 The transformation is recursive and supports primitives, lists, tuples,
66 sets, and dicts. Certain custom objects are supported either through
67 a get_params method or the pickle protocol __getstate__.
69 Args:
70 x: The object to convert.
71 seen: A set of visited object ids for cycle detection.
73 Returns:
74 A structure composed only of JSON-compatible types (dict, list, str,
75 int, float, bool, None), potentially enhanced with internal marker
76 keys to represent tuples, sets, and reconstructable objects.
78 Raises:
79 TypeError: If x (or any nested value) contains an unsupported type.
81 Examples:
82 - Tuples and sets are encoded with markers:
84 >>> _to_serializable_dict((1, 2))
85 {'..tuple..': [1, 2]}
86 >>> _to_serializable_dict({1, 2})
87 {'..set..': [1, 2]}
88 """
90 if isinstance(x,(int, float, bool, str, type(None))):
91 return x
92 elif isinstance(x, _UNSUPPORTED_TYPES):
93 raise TypeError(f"Unsupported type: {type(x).__name__}")
95 if seen is None:
96 seen = set()
98 obj_id = id(x)
99 if obj_id in seen:
100 raise RecursionError(
101 f"Cyclic reference detected while serializing object of type {type(x).__name__}")
102 seen.add(obj_id)
104 try:
105 if hasattr(x, "get_params"):
106 result = _process_state(x.get_params(), x, _Markers.PARAMS, seen)
107 elif isinstance(x, list):
108 result = [_to_serializable_dict(i, seen) for i in x]
109 elif isinstance(x, tuple):
110 result = {_Markers.TUPLE: [_to_serializable_dict(i, seen) for i in x]}
111 elif isinstance(x, set):
112 result = {_Markers.SET: [_to_serializable_dict(i, seen) for i in x]}
113 elif isinstance(x, dict):
114 result = {_Markers.DICT: { k: _to_serializable_dict(v, seen)
115 for k, v in x.items()}}
116 elif isinstance(x, Enum):
117 result = {_Markers.ENUM: x.name,
118 _Markers.CLASS: x.__class__.__qualname__,
119 _Markers.MODULE: x.__class__.__module__,}
120 elif hasattr(x, "__getstate__"):
121 result = _process_state(x.__getstate__(), x, _Markers.STATE, seen)
122 elif hasattr(x.__class__, "__slots__"):
123 # For slotted objects, create a pickle-style state tuple
124 slots = _get_all_slots(type(x))
125 # Raises AttributeError if a slot is uninitialized
126 slot_state = tuple(getattr(x, name) for name in slots)
128 if hasattr(x, "__dict__"):
129 # Hybrid object with slots and dict
130 final_state = (slot_state, x.__dict__)
131 else:
132 # Slots-only object: use a (slots, None) tuple for consistency
133 # in the reconstruction logic.
134 final_state = (slot_state, None)
135 result = _process_state(final_state, x, _Markers.STATE, seen)
136 elif hasattr(x, "__dict__"):
137 result = _process_state(x.__dict__, x, _Markers.STATE, seen)
138 else:
139 raise TypeError(f"Unsupported type: {type(x).__name__}")
140 finally:
141 seen.remove(obj_id)
142 return result
145def _process_state(state: Any, obj: Any, marker: str, seen: set[int]) -> dict:
146 """Wrap object identity and state into a marker-bearing mapping.
148 Produces a dictionary containing the object's class and module names along
149 with the provided state under the specified marker (e.g., PARAMS or
150 STATE). The state is recursively converted to JSON-serializable types.
152 Args:
153 state: The object's state, e.g. from __getstate__.
154 obj: The object being serialized (used to extract class/module names).
155 marker: Which marker to use for the state payload.
156 seen: A set of visited object ids for cycle detection.
158 Returns:
159 A dictionary suitable for JSON encoding that can be used by
160 _recreate_object to rebuild the instance.
161 """
163 return {_Markers.CLASS: obj.__class__.__qualname__,
164 _Markers.MODULE: obj.__class__.__module__,
165 marker: _to_serializable_dict(state, seen)}
168def _get_all_slots(cls: type) -> list[str]:
169 """Collect all slot names from a class hierarchy, excluding special ones.
171 Args:
172 cls: The class to inspect for __slots__.
174 Returns:
175 List of slot names in MRO order, excluding __dict__ and __weakref__.
176 """
177 slots_to_fill = []
178 # Traverse in reverse MRO to maintain parent-to-child slot order
179 for base_cls in reversed(cls.__mro__):
180 base_slots = getattr(base_cls, "__slots__", [])
181 if isinstance(base_slots, str):
182 base_slots = [base_slots]
183 for slot_name in base_slots:
184 if slot_name in ("__dict__", "__weakref__"):
185 continue
186 slots_to_fill.append(slot_name)
187 return slots_to_fill
190def _recreate_object(x: Mapping[str,Any]) -> Any:
191 """Recreate an object instance from its serialized metadata.
193 The input mapping must include MODULE and CLASS markers and either
194 PARAMS (constructor parameters), STATE (instance state), or ENUM
195 (Enum member name).
197 Args:
198 x: Marker-bearing mapping produced by _to_serializable_dict for
199 custom objects.
201 Returns:
202 A new instance of the referenced class reconstructed from parameters,
203 state, or Enum membership.
205 Raises:
206 TypeError: If the mapping does not contain sufficient information to
207 reconstruct the object.
208 ImportError: If the target module cannot be imported. (Surfaced via the
209 underlying import mechanism.)
210 AttributeError: If the class does not exist in the target module.
211 """
212 if not isinstance(x, Mapping):
213 raise TypeError(f"Object metadata must be a mapping, "
214 f"got: {type(x).__name__}")
215 if _Markers.MODULE not in x or _Markers.CLASS not in x:
216 raise TypeError("Object metadata missing required markers "
217 "MODULE and CLASS")
219 module_name = x[_Markers.MODULE]
220 class_name = x[_Markers.CLASS]
221 try:
222 module = importlib.import_module(module_name)
223 cls = getattr(module, class_name)
224 except (ImportError, AttributeError) as e:
225 raise ImportError(f"Could not import {class_name} from {module_name}"
226 ) from e
228 match x:
229 case {_Markers.PARAMS: params_json}:
230 return cls(**_from_serializable_dict(params_json))
231 case {_Markers.ENUM: member_name}:
232 if not issubclass(cls, Enum):
233 raise TypeError(f"Class {class_name} is not an Enum")
234 return cls[member_name]
235 case {_Markers.STATE: state_json}:
236 state = _from_serializable_dict(state_json)
237 obj = cls.__new__(cls)
238 if hasattr(obj, "__setstate__"):
239 obj.__setstate__(state)
240 elif isinstance(state, tuple):
241 # Handle tuple state from __getstate__ for slotted classes
242 slots_to_fill = _get_all_slots(cls)
244 # Support multiple tuple state formats:
245 # 1) (slot_values_seq, dict_values) where slot_values_seq is a sequence of values
246 # 2) (dict_values, slot_mapping) as produced by CPython's built-in __getstate__ for slotted classes
247 # 3) A plain tuple of slot values
248 slot_values_seq = None
249 slot_mapping = None
250 dict_values = None
252 if len(state) == 2:
253 a, b = state
254 if isinstance(a, dict) and isinstance(b, dict):
255 # CPython default: (dict_values, slot_mapping)
256 dict_values = a
257 slot_mapping = b
258 elif isinstance(a, (list, tuple)) and (b is None or isinstance(b, dict)):
259 # Our encoder format: (slot_values_seq, dict_values)
260 slot_values_seq = list(a)
261 dict_values = b
262 elif isinstance(a, dict) and isinstance(b, (list, tuple)):
263 # Be tolerant if components are swapped
264 dict_values = a
265 slot_values_seq = list(b)
266 elif a is None and isinstance(b, dict):
267 # No slots, only dict
268 dict_values = b
269 else:
270 # Fallback: treat entire state as slot values
271 slot_values_seq = list(state)
272 else:
273 # Otherwise, state is just a tuple of slot values
274 slot_values_seq = list(state)
276 # Apply slots
277 if slot_mapping is not None:
278 for name, value in slot_mapping.items():
279 setattr(obj, name, value)
280 elif slot_values_seq is not None and len(slot_values_seq) > 0:
281 if len(slot_values_seq) != len(slots_to_fill):
282 raise TypeError(
283 f"Tuple state length {len(slot_values_seq)} does not match "
284 f"slots length {len(slots_to_fill)} for class {cls.__name__}")
285 for value, name in zip(slot_values_seq, slots_to_fill):
286 setattr(obj, name, value)
288 # Apply dict attributes, if any
289 if dict_values:
290 for k, v in dict_values.items():
291 setattr(obj, k, v)
293 else: # Fallback reconstruction
294 for k, v in state.items():
295 setattr(obj, k, v)
296 return obj
297 case _:
298 raise TypeError("Unable to recreate object from provided data")
301def _from_serializable_dict(x: Any) -> Any:
302 """Inverse of _to_serializable_dict.
304 Recursively convert a JSON-compatible structure that may contain internal
305 markers back into native Python types and reconstruct supported custom
306 objects.
308 Args:
309 x: The JSON-loaded Python structure to convert.
311 Returns:
312 The reconstructed Python object graph.
314 Raises:
315 TypeError: If an unsupported structure is encountered.
316 """
317 match x:
318 case None | bool() | int() | float() | str():
319 return x
320 case list():
321 return [_from_serializable_dict(i) for i in x]
322 case {_Markers.TUPLE: val}:
323 if not len(x) == 1:
324 raise TypeError("TUPLE marker must be the only key")
325 if not isinstance(val, list):
326 raise TypeError("TUPLE marker must map to a list")
327 return tuple(_from_serializable_dict(i) for i in val)
328 case {_Markers.SET: val}:
329 if not len(x) == 1:
330 raise TypeError("SET marker must be the only key")
331 if not isinstance(val, list):
332 raise TypeError("SET marker must map to a list")
333 return set(_from_serializable_dict(i) for i in val)
334 case {_Markers.DICT: val}:
335 if not len(x) == 1:
336 raise TypeError("DICT marker must be the only key")
337 if not isinstance(val, dict):
338 raise TypeError("DICT marker must map to a dict")
339 return {k: _from_serializable_dict(v) for k, v in val.items()}
340 case {_Markers.MODULE: _, **__} | {_Markers.CLASS: _, **__} as d:
341 return _recreate_object(d)
342 case _:
343 raise TypeError(f"Unsupported type: {type(x).__name__}")
346def dumpjs(obj: Any, **kwargs) -> JsonSerializedObject:
347 """Dump an object to a JSON string using the custom serialization rules.
349 Args:
350 obj: The object to serialize.
351 **kwargs: Additional keyword arguments forwarded to
352 json.dumps (e.g., indent=2, sort_keys=True).
354 Returns:
355 The JSON string representing the object.
356 """
357 return json.dumps(_to_serializable_dict(obj), **kwargs)
360def loadjs(s: JsonSerializedObject, **kwargs) -> Any:
361 """Load an object from a JSON string produced by dumpjs.
363 Args:
364 s: The JSON string to parse.
365 **kwargs: Additional keyword arguments forwarded to
366 json.loads (object_hook is not allowed here).
368 Returns:
369 The Python object reconstructed from the JSON string.
371 Raises:
372 ValueError: If object_hook is provided in kwargs.
373 """
374 if "object_hook" in kwargs:
375 raise ValueError("object_hook cannot be used with mixinforge.loadjs()")
376 return _from_serializable_dict(json.loads(s, **kwargs))
379def _extract_params_dict(container: dict) -> dict:
380 """Extract the parameter dictionary from a serialized container.
382 Args:
383 container: A dictionary containing serialized parameters.
385 Returns:
386 The parameter dictionary extracted from PARAMS->DICT or top-level DICT.
388 Raises:
389 KeyError: If the expected DICT structure is not found.
390 """
391 def pick(block: Any) -> dict | None:
392 if isinstance(block, dict):
393 candidate = block.get(_Markers.DICT)
394 if isinstance(candidate, dict):
395 return candidate
396 return None
398 if _Markers.PARAMS in container:
399 candidate = pick(container[_Markers.PARAMS])
400 if candidate is None:
401 raise KeyError(f"Invalid structure: {_Markers.PARAMS} missing {_Markers.DICT} mapping")
402 return candidate
404 candidate = pick(container)
405 if candidate is None:
406 raise KeyError(f"Invalid structure: missing {_Markers.DICT} mapping in JSON object")
407 return candidate
410def update_jsparams(jsparams: JsonSerializedObject, **kwargs) -> JsonSerializedObject:
411 """Update constructor parameters inside a serialized JSON blob.
413 This helper takes a JSON string produced by dumpjs for an object that
414 was serialized via its get_params method and returns a new JSON string
415 with the provided parameters updated or added under the internal
416 PARAMS -> DICT mapping.
418 Args:
419 jsparams: The JSON string returned by dumpjs.
420 **kwargs: Key-value pairs to merge into the serialized parameters.
421 Existing keys are overwritten; new keys are added.
423 Returns:
424 A new JSON string with updated parameters.
426 Raises:
427 KeyError: If jsparams does not contain the expected
428 PARAMS -> DICT structure (i.e., the input is not a serialized
429 mixinforge object).
430 """
431 params = json.loads(jsparams)
433 if not isinstance(params, dict):
434 raise KeyError("Invalid structure: JSON root must be a dictionary")
436 target_dict = _extract_params_dict(params)
438 for k, v in kwargs.items():
439 target_dict[k] = _to_serializable_dict(v)
441 params = sort_dict_by_keys(params)
442 params_json = json.dumps(params)
443 return JsonSerializedObject(params_json)
446def access_jsparams(jsparams: JsonSerializedObject, *args: str) -> dict[str, Any]:
447 """Access selected constructor parameters from a serialized JSON blob.
449 Args:
450 jsparams: The JSON string produced by dumpjs.
451 *args: Parameter names to extract from the internal PARAMS -> DICT
452 mapping.
454 Returns:
455 A mapping of requested parameter names to their deserialized
456 values. The values are reconstructed Python objects (e.g., tuples, sets,
457 dicts) rather than the raw internal JSON representation.
459 Raises:
460 KeyError: If a requested key is not present, or if the JSON string does
461 not contain the expected PARAMS -> DICT structure.
462 """
463 params = json.loads(jsparams)
465 if not isinstance(params, dict):
466 raise KeyError("Invalid structure: JSON root must be a dictionary")
468 source_dict = _extract_params_dict(params)
470 result = {}
471 for k in args:
472 if k not in source_dict:
473 raise KeyError(f"Parameter '{k}' not found in serialized object")
474 result[k] = _from_serializable_dict(source_dict[k])
475 return result