Coverage for Users / vladimirpavlov / PycharmProjects / parameterizable / src / mixinforge / json_processor.py: 97%

199 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-01 16:37 -0600

1"""JSON-compatible serialization helpers for complex Python objects. 

2 

3This module provides functions to convert rich Python data structures into a 

4JSON-serializable representation and back. It supports primitive types as well 

5as containers (list, tuple, set, dict), Enums, and certain custom objects. 

6 

7The serialized form is a pure-JSON structure containing only dicts, lists, 

8strings, numbers, booleans, and null. Special container and object types are 

9encoded using internal marker keys. 

10""" 

11 

12import importlib 

13import json 

14import types 

15from enum import Enum 

16from typing import Any, Mapping, NewType 

17 

18from .dict_sorter import sort_dict_by_keys 

19 

20JsonSerializedObject = NewType("JsonSerializedObject", str) 

21 

22_UNSUPPORTED_TYPES = ( 

23 types.ModuleType, 

24 types.FunctionType, 

25 types.LambdaType, 

26 types.BuiltinFunctionType, 

27 types.MethodType, 

28 types.CodeType, 

29 type, 

30) 

31 

32class _Markers: 

33 """Internal keys used to tag non-JSON-native constructs. 

34 

35 The serializer uses these markers inside dictionaries to encode special 

36 types and object metadata while still producing a JSON-compatible structure. 

37 

38 Attributes: 

39 DICT: Marker for dictionaries to ensure all keys are strings and values 

40 are JSON-serializable. 

41 TUPLE: Marker key for tuple values. The value is a list of items. 

42 SET: Marker key for set values. The value is a list of items. 

43 ENUM: Marker key for Enum members. The value is the member name. 

44 CLASS: Name of the object's class used during reconstruction. 

45 MODULE: Name of the module where the object's class is defined. 

46 PARAMS: Serialized mapping of constructor parameters for get_params- 

47 based reconstruction. 

48 STATE: Serialized state for __getstate__/__setstate__-based 

49 reconstruction. 

50 """ 

51 

52 DICT = "..dict.." 

53 TUPLE = "..tuple.." 

54 SET = "..set.." 

55 CLASS = "..class.." 

56 MODULE = "..module.." 

57 PARAMS = "..params.." 

58 STATE = "..state.." 

59 ENUM = "..enum.." 

60 

61 

62def _to_serializable_dict(x: Any, seen: set[int] | None = None) -> Any: 

63 """Convert a Python object into a JSON-serializable structure. 

64 

65 The transformation is recursive and supports primitives, lists, tuples, 

66 sets, and dicts. Certain custom objects are supported either through 

67 a get_params method or the pickle protocol __getstate__. 

68 

69 Args: 

70 x: The object to convert. 

71 seen: A set of visited object ids for cycle detection. 

72 

73 Returns: 

74 A structure composed only of JSON-compatible types (dict, list, str, 

75 int, float, bool, None), potentially enhanced with internal marker 

76 keys to represent tuples, sets, and reconstructable objects. 

77 

78 Raises: 

79 TypeError: If x (or any nested value) contains an unsupported type. 

80 

81 Examples: 

82 - Tuples and sets are encoded with markers: 

83 

84 >>> _to_serializable_dict((1, 2)) 

85 {'..tuple..': [1, 2]} 

86 >>> _to_serializable_dict({1, 2}) 

87 {'..set..': [1, 2]} 

88 """ 

89 

90 if isinstance(x,(int, float, bool, str, type(None))): 

91 return x 

92 elif isinstance(x, _UNSUPPORTED_TYPES): 

93 raise TypeError(f"Unsupported type: {type(x).__name__}") 

94 

95 if seen is None: 

96 seen = set() 

97 

98 obj_id = id(x) 

99 if obj_id in seen: 

100 raise RecursionError( 

101 f"Cyclic reference detected while serializing object of type {type(x).__name__}") 

102 seen.add(obj_id) 

103 

104 try: 

105 if hasattr(x, "get_params"): 

106 result = _process_state(x.get_params(), x, _Markers.PARAMS, seen) 

107 elif isinstance(x, list): 

108 result = [_to_serializable_dict(i, seen) for i in x] 

109 elif isinstance(x, tuple): 

110 result = {_Markers.TUPLE: [_to_serializable_dict(i, seen) for i in x]} 

111 elif isinstance(x, set): 

112 result = {_Markers.SET: [_to_serializable_dict(i, seen) for i in x]} 

113 elif isinstance(x, dict): 

114 result = {_Markers.DICT: { k: _to_serializable_dict(v, seen) 

115 for k, v in x.items()}} 

116 elif isinstance(x, Enum): 

117 result = {_Markers.ENUM: x.name, 

118 _Markers.CLASS: x.__class__.__qualname__, 

119 _Markers.MODULE: x.__class__.__module__,} 

120 elif hasattr(x, "__getstate__"): 

121 result = _process_state(x.__getstate__(), x, _Markers.STATE, seen) 

122 elif hasattr(x.__class__, "__slots__"): 

123 # For slotted objects, create a pickle-style state tuple 

124 slots = _get_all_slots(type(x)) 

125 # Raises AttributeError if a slot is uninitialized 

126 slot_state = tuple(getattr(x, name) for name in slots) 

127 

128 if hasattr(x, "__dict__"): 

129 # Hybrid object with slots and dict 

130 final_state = (slot_state, x.__dict__) 

131 else: 

132 # Slots-only object: use a (slots, None) tuple for consistency 

133 # in the reconstruction logic. 

134 final_state = (slot_state, None) 

135 result = _process_state(final_state, x, _Markers.STATE, seen) 

136 elif hasattr(x, "__dict__"): 

137 result = _process_state(x.__dict__, x, _Markers.STATE, seen) 

138 else: 

139 raise TypeError(f"Unsupported type: {type(x).__name__}") 

140 finally: 

141 seen.remove(obj_id) 

142 return result 

143 

144 

145def _process_state(state: Any, obj: Any, marker: str, seen: set[int]) -> dict: 

146 """Wrap object identity and state into a marker-bearing mapping. 

147 

148 Produces a dictionary containing the object's class and module names along 

149 with the provided state under the specified marker (e.g., PARAMS or 

150 STATE). The state is recursively converted to JSON-serializable types. 

151 

152 Args: 

153 state: The object's state, e.g. from __getstate__. 

154 obj: The object being serialized (used to extract class/module names). 

155 marker: Which marker to use for the state payload. 

156 seen: A set of visited object ids for cycle detection. 

157 

158 Returns: 

159 A dictionary suitable for JSON encoding that can be used by 

160 _recreate_object to rebuild the instance. 

161 """ 

162 

163 return {_Markers.CLASS: obj.__class__.__qualname__, 

164 _Markers.MODULE: obj.__class__.__module__, 

165 marker: _to_serializable_dict(state, seen)} 

166 

167 

168def _get_all_slots(cls: type) -> list[str]: 

169 """Collect all slot names from a class hierarchy, excluding special ones. 

170 

171 Args: 

172 cls: The class to inspect for __slots__. 

173 

174 Returns: 

175 List of slot names in MRO order, excluding __dict__ and __weakref__. 

176 """ 

177 slots_to_fill = [] 

178 # Traverse in reverse MRO to maintain parent-to-child slot order 

179 for base_cls in reversed(cls.__mro__): 

180 base_slots = getattr(base_cls, "__slots__", []) 

181 if isinstance(base_slots, str): 

182 base_slots = [base_slots] 

183 for slot_name in base_slots: 

184 if slot_name in ("__dict__", "__weakref__"): 

185 continue 

186 slots_to_fill.append(slot_name) 

187 return slots_to_fill 

188 

189 

190def _recreate_object(x: Mapping[str,Any]) -> Any: 

191 """Recreate an object instance from its serialized metadata. 

192 

193 The input mapping must include MODULE and CLASS markers and either 

194 PARAMS (constructor parameters), STATE (instance state), or ENUM 

195 (Enum member name). 

196 

197 Args: 

198 x: Marker-bearing mapping produced by _to_serializable_dict for 

199 custom objects. 

200 

201 Returns: 

202 A new instance of the referenced class reconstructed from parameters, 

203 state, or Enum membership. 

204 

205 Raises: 

206 TypeError: If the mapping does not contain sufficient information to 

207 reconstruct the object. 

208 ImportError: If the target module cannot be imported. (Surfaced via the 

209 underlying import mechanism.) 

210 AttributeError: If the class does not exist in the target module. 

211 """ 

212 if not isinstance(x, Mapping): 

213 raise TypeError(f"Object metadata must be a mapping, " 

214 f"got: {type(x).__name__}") 

215 if _Markers.MODULE not in x or _Markers.CLASS not in x: 

216 raise TypeError("Object metadata missing required markers " 

217 "MODULE and CLASS") 

218 

219 module_name = x[_Markers.MODULE] 

220 class_name = x[_Markers.CLASS] 

221 try: 

222 module = importlib.import_module(module_name) 

223 cls = getattr(module, class_name) 

224 except (ImportError, AttributeError) as e: 

225 raise ImportError(f"Could not import {class_name} from {module_name}" 

226 ) from e 

227 

228 match x: 

229 case {_Markers.PARAMS: params_json}: 

230 return cls(**_from_serializable_dict(params_json)) 

231 case {_Markers.ENUM: member_name}: 

232 if not issubclass(cls, Enum): 

233 raise TypeError(f"Class {class_name} is not an Enum") 

234 return cls[member_name] 

235 case {_Markers.STATE: state_json}: 

236 state = _from_serializable_dict(state_json) 

237 obj = cls.__new__(cls) 

238 if hasattr(obj, "__setstate__"): 

239 obj.__setstate__(state) 

240 elif isinstance(state, tuple): 

241 # Handle tuple state from __getstate__ for slotted classes 

242 slots_to_fill = _get_all_slots(cls) 

243 

244 # Support multiple tuple state formats: 

245 # 1) (slot_values_seq, dict_values) where slot_values_seq is a sequence of values 

246 # 2) (dict_values, slot_mapping) as produced by CPython's built-in __getstate__ for slotted classes 

247 # 3) A plain tuple of slot values 

248 slot_values_seq = None 

249 slot_mapping = None 

250 dict_values = None 

251 

252 if len(state) == 2: 

253 a, b = state 

254 if isinstance(a, dict) and isinstance(b, dict): 

255 # CPython default: (dict_values, slot_mapping) 

256 dict_values = a 

257 slot_mapping = b 

258 elif isinstance(a, (list, tuple)) and (b is None or isinstance(b, dict)): 

259 # Our encoder format: (slot_values_seq, dict_values) 

260 slot_values_seq = list(a) 

261 dict_values = b 

262 elif isinstance(a, dict) and isinstance(b, (list, tuple)): 

263 # Be tolerant if components are swapped 

264 dict_values = a 

265 slot_values_seq = list(b) 

266 elif a is None and isinstance(b, dict): 

267 # No slots, only dict 

268 dict_values = b 

269 else: 

270 # Fallback: treat entire state as slot values 

271 slot_values_seq = list(state) 

272 else: 

273 # Otherwise, state is just a tuple of slot values 

274 slot_values_seq = list(state) 

275 

276 # Apply slots 

277 if slot_mapping is not None: 

278 for name, value in slot_mapping.items(): 

279 setattr(obj, name, value) 

280 elif slot_values_seq is not None and len(slot_values_seq) > 0: 

281 if len(slot_values_seq) != len(slots_to_fill): 

282 raise TypeError( 

283 f"Tuple state length {len(slot_values_seq)} does not match " 

284 f"slots length {len(slots_to_fill)} for class {cls.__name__}") 

285 for value, name in zip(slot_values_seq, slots_to_fill): 

286 setattr(obj, name, value) 

287 

288 # Apply dict attributes, if any 

289 if dict_values: 

290 for k, v in dict_values.items(): 

291 setattr(obj, k, v) 

292 

293 else: # Fallback reconstruction 

294 for k, v in state.items(): 

295 setattr(obj, k, v) 

296 return obj 

297 case _: 

298 raise TypeError("Unable to recreate object from provided data") 

299 

300 

301def _from_serializable_dict(x: Any) -> Any: 

302 """Inverse of _to_serializable_dict. 

303 

304 Recursively convert a JSON-compatible structure that may contain internal 

305 markers back into native Python types and reconstruct supported custom 

306 objects. 

307 

308 Args: 

309 x: The JSON-loaded Python structure to convert. 

310 

311 Returns: 

312 The reconstructed Python object graph. 

313 

314 Raises: 

315 TypeError: If an unsupported structure is encountered. 

316 """ 

317 match x: 

318 case None | bool() | int() | float() | str(): 

319 return x 

320 case list(): 

321 return [_from_serializable_dict(i) for i in x] 

322 case {_Markers.TUPLE: val}: 

323 if not len(x) == 1: 

324 raise TypeError("TUPLE marker must be the only key") 

325 if not isinstance(val, list): 

326 raise TypeError("TUPLE marker must map to a list") 

327 return tuple(_from_serializable_dict(i) for i in val) 

328 case {_Markers.SET: val}: 

329 if not len(x) == 1: 

330 raise TypeError("SET marker must be the only key") 

331 if not isinstance(val, list): 

332 raise TypeError("SET marker must map to a list") 

333 return set(_from_serializable_dict(i) for i in val) 

334 case {_Markers.DICT: val}: 

335 if not len(x) == 1: 

336 raise TypeError("DICT marker must be the only key") 

337 if not isinstance(val, dict): 

338 raise TypeError("DICT marker must map to a dict") 

339 return {k: _from_serializable_dict(v) for k, v in val.items()} 

340 case {_Markers.MODULE: _, **__} | {_Markers.CLASS: _, **__} as d: 

341 return _recreate_object(d) 

342 case _: 

343 raise TypeError(f"Unsupported type: {type(x).__name__}") 

344 

345 

346def dumpjs(obj: Any, **kwargs) -> JsonSerializedObject: 

347 """Dump an object to a JSON string using the custom serialization rules. 

348 

349 Args: 

350 obj: The object to serialize. 

351 **kwargs: Additional keyword arguments forwarded to 

352 json.dumps (e.g., indent=2, sort_keys=True). 

353 

354 Returns: 

355 The JSON string representing the object. 

356 """ 

357 return json.dumps(_to_serializable_dict(obj), **kwargs) 

358 

359 

360def loadjs(s: JsonSerializedObject, **kwargs) -> Any: 

361 """Load an object from a JSON string produced by dumpjs. 

362 

363 Args: 

364 s: The JSON string to parse. 

365 **kwargs: Additional keyword arguments forwarded to 

366 json.loads (object_hook is not allowed here). 

367 

368 Returns: 

369 The Python object reconstructed from the JSON string. 

370 

371 Raises: 

372 ValueError: If object_hook is provided in kwargs. 

373 """ 

374 if "object_hook" in kwargs: 

375 raise ValueError("object_hook cannot be used with mixinforge.loadjs()") 

376 return _from_serializable_dict(json.loads(s, **kwargs)) 

377 

378 

379def _extract_params_dict(container: dict) -> dict: 

380 """Extract the parameter dictionary from a serialized container. 

381 

382 Args: 

383 container: A dictionary containing serialized parameters. 

384 

385 Returns: 

386 The parameter dictionary extracted from PARAMS->DICT or top-level DICT. 

387 

388 Raises: 

389 KeyError: If the expected DICT structure is not found. 

390 """ 

391 def pick(block: Any) -> dict | None: 

392 if isinstance(block, dict): 

393 candidate = block.get(_Markers.DICT) 

394 if isinstance(candidate, dict): 

395 return candidate 

396 return None 

397 

398 if _Markers.PARAMS in container: 

399 candidate = pick(container[_Markers.PARAMS]) 

400 if candidate is None: 

401 raise KeyError(f"Invalid structure: {_Markers.PARAMS} missing {_Markers.DICT} mapping") 

402 return candidate 

403 

404 candidate = pick(container) 

405 if candidate is None: 

406 raise KeyError(f"Invalid structure: missing {_Markers.DICT} mapping in JSON object") 

407 return candidate 

408 

409 

410def update_jsparams(jsparams: JsonSerializedObject, **kwargs) -> JsonSerializedObject: 

411 """Update constructor parameters inside a serialized JSON blob. 

412 

413 This helper takes a JSON string produced by dumpjs for an object that 

414 was serialized via its get_params method and returns a new JSON string 

415 with the provided parameters updated or added under the internal 

416 PARAMS -> DICT mapping. 

417 

418 Args: 

419 jsparams: The JSON string returned by dumpjs. 

420 **kwargs: Key-value pairs to merge into the serialized parameters. 

421 Existing keys are overwritten; new keys are added. 

422 

423 Returns: 

424 A new JSON string with updated parameters. 

425 

426 Raises: 

427 KeyError: If jsparams does not contain the expected 

428 PARAMS -> DICT structure (i.e., the input is not a serialized 

429 mixinforge object). 

430 """ 

431 params = json.loads(jsparams) 

432 

433 if not isinstance(params, dict): 

434 raise KeyError("Invalid structure: JSON root must be a dictionary") 

435 

436 target_dict = _extract_params_dict(params) 

437 

438 for k, v in kwargs.items(): 

439 target_dict[k] = _to_serializable_dict(v) 

440 

441 params = sort_dict_by_keys(params) 

442 params_json = json.dumps(params) 

443 return JsonSerializedObject(params_json) 

444 

445 

446def access_jsparams(jsparams: JsonSerializedObject, *args: str) -> dict[str, Any]: 

447 """Access selected constructor parameters from a serialized JSON blob. 

448 

449 Args: 

450 jsparams: The JSON string produced by dumpjs. 

451 *args: Parameter names to extract from the internal PARAMS -> DICT 

452 mapping. 

453 

454 Returns: 

455 A mapping of requested parameter names to their deserialized 

456 values. The values are reconstructed Python objects (e.g., tuples, sets, 

457 dicts) rather than the raw internal JSON representation. 

458 

459 Raises: 

460 KeyError: If a requested key is not present, or if the JSON string does 

461 not contain the expected PARAMS -> DICT structure. 

462 """ 

463 params = json.loads(jsparams) 

464 

465 if not isinstance(params, dict): 

466 raise KeyError("Invalid structure: JSON root must be a dictionary") 

467 

468 source_dict = _extract_params_dict(params) 

469 

470 result = {} 

471 for k in args: 

472 if k not in source_dict: 

473 raise KeyError(f"Parameter '{k}' not found in serialized object") 

474 result[k] = _from_serializable_dict(source_dict[k]) 

475 return result