Coverage for src \ truenex_memory \ export \ fingerprint.py: 90%

21 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 10:21 +0200

1"""Deterministic data fingerprint for export/import equivalence checks. 

2 

3Produces stable hashes and comparisons that ignore volatile row ordering 

4so two exports of the same logical data always compare equal. 

5""" 

6 

7from __future__ import annotations 

8 

9import hashlib 

10import json 

11 

12_EXPORT_TABLES = ("documents", "chunks", "memory_nodes", "edges", "retrieval_logs", "schema_migrations") 

13 

14 

15def canonicalize_export(payload: dict[str, object]) -> dict[str, object]: 

16 """Return a canonicalized copy of *payload* with stable ordering. 

17 

18 Rows within each data table are sorted by the ``id`` column. 

19 Keys within each row dict are sorted alphabetically. 

20 ``memory_export_version`` and ``project_id`` are preserved as-is. 

21 """ 

22 result: dict[str, object] = { 

23 "memory_export_version": payload.get("memory_export_version"), 

24 "project_id": payload.get("project_id"), 

25 } 

26 for table in _EXPORT_TABLES: 

27 rows = payload.get(table, []) 

28 if not isinstance(rows, list): 

29 result[table] = rows 

30 continue 

31 sorted_rows = sorted( 

32 (_canonical_row(r) for r in rows if isinstance(r, dict)), 

33 key=lambda r: str(r.get("id", "")), 

34 ) 

35 result[table] = sorted_rows 

36 return result 

37 

38 

39def _canonical_row(row: dict[str, object]) -> dict[str, object]: 

40 """Return a copy of *row* with keys sorted alphabetically.""" 

41 return {k: row[k] for k in sorted(row)} 

42 

43 

44def export_fingerprint(payload: dict[str, object]) -> str: 

45 """SHA-256 hex fingerprint of a canonicalized export payload.""" 

46 canonical = canonicalize_export(payload) 

47 return hashlib.sha256( 

48 json.dumps(canonical, sort_keys=True, ensure_ascii=False).encode("utf-8") 

49 ).hexdigest() 

50 

51 

52def exports_equivalent(a: dict[str, object], b: dict[str, object]) -> bool: 

53 """Return ``True`` when two export payloads contain the same data. 

54 

55 Volatile ordering (row order, dict key order) is ignored. 

56 """ 

57 return export_fingerprint(a) == export_fingerprint(b)