Coverage for src \ truenex_memory \ export \ fingerprint.py: 90%
21 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
1"""Deterministic data fingerprint for export/import equivalence checks.
3Produces stable hashes and comparisons that ignore volatile row ordering
4so two exports of the same logical data always compare equal.
5"""
7from __future__ import annotations
9import hashlib
10import json
12_EXPORT_TABLES = ("documents", "chunks", "memory_nodes", "edges", "retrieval_logs", "schema_migrations")
15def canonicalize_export(payload: dict[str, object]) -> dict[str, object]:
16 """Return a canonicalized copy of *payload* with stable ordering.
18 Rows within each data table are sorted by the ``id`` column.
19 Keys within each row dict are sorted alphabetically.
20 ``memory_export_version`` and ``project_id`` are preserved as-is.
21 """
22 result: dict[str, object] = {
23 "memory_export_version": payload.get("memory_export_version"),
24 "project_id": payload.get("project_id"),
25 }
26 for table in _EXPORT_TABLES:
27 rows = payload.get(table, [])
28 if not isinstance(rows, list):
29 result[table] = rows
30 continue
31 sorted_rows = sorted(
32 (_canonical_row(r) for r in rows if isinstance(r, dict)),
33 key=lambda r: str(r.get("id", "")),
34 )
35 result[table] = sorted_rows
36 return result
39def _canonical_row(row: dict[str, object]) -> dict[str, object]:
40 """Return a copy of *row* with keys sorted alphabetically."""
41 return {k: row[k] for k in sorted(row)}
44def export_fingerprint(payload: dict[str, object]) -> str:
45 """SHA-256 hex fingerprint of a canonicalized export payload."""
46 canonical = canonicalize_export(payload)
47 return hashlib.sha256(
48 json.dumps(canonical, sort_keys=True, ensure_ascii=False).encode("utf-8")
49 ).hexdigest()
52def exports_equivalent(a: dict[str, object], b: dict[str, object]) -> bool:
53 """Return ``True`` when two export payloads contain the same data.
55 Volatile ordering (row order, dict key order) is ignored.
56 """
57 return export_fingerprint(a) == export_fingerprint(b)