Coverage for src \ truenex_memory \ ingestion \ manifest.py: 96%
80 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
1"""Source manifest domain model for local ingestion."""
3from __future__ import annotations
5from dataclasses import dataclass, field
6from pathlib import Path
7import json
9MANIFEST_VERSION = "1"
11# source_type values that can be indexed now (text-based parsers exist)
12INDEXABLE_SOURCE_TYPES = frozenset({"project_docs", "agent_session"})
14# source_type values reserved for future parse_later support
15PARSE_LATER_SOURCE_TYPES = frozenset(
16 {"agent_memory", "operations_note", "binary_document"}
17)
19VALID_SOURCE_TYPES = INDEXABLE_SOURCE_TYPES | PARSE_LATER_SOURCE_TYPES
20VALID_PRIVACY_SCOPES = frozenset({"local_private", "project_shared"})
23@dataclass(frozen=True)
24class SourceEntry:
25 """A single source declared in a manifest."""
27 source_type: str
28 source_path: str
29 source_tool: str = ""
30 privacy_scope: str = "local_private"
31 description: str = ""
33 def __post_init__(self) -> None:
34 if self.source_type not in VALID_SOURCE_TYPES:
35 raise ValueError(
36 f"invalid source_type {self.source_type!r}; "
37 f"expected one of {sorted(VALID_SOURCE_TYPES)}"
38 )
39 if self.privacy_scope not in VALID_PRIVACY_SCOPES:
40 raise ValueError(
41 f"invalid privacy_scope {self.privacy_scope!r}; "
42 f"expected one of {sorted(VALID_PRIVACY_SCOPES)}"
43 )
45 @classmethod
46 def from_dict(cls, data: dict[str, object]) -> SourceEntry:
47 source_type = _require_str(data, "source_type")
48 source_path = _require_str(data, "source_path")
49 return cls(
50 source_type=source_type,
51 source_path=source_path,
52 source_tool=str(data.get("source_tool", "")),
53 privacy_scope=str(data.get("privacy_scope", "local_private")),
54 description=str(data.get("description", "")),
55 )
58@dataclass(frozen=True)
59class SourceManifest:
60 """A local ingestion manifest listing sources to index."""
62 manifest_version: str
63 project: str
64 sources: list[SourceEntry]
66 @classmethod
67 def from_path(cls, path: Path) -> SourceManifest:
68 """Load and validate a manifest JSON file."""
69 try:
70 raw = path.read_text(encoding="utf-8")
71 except FileNotFoundError:
72 raise FileNotFoundError(f"manifest not found: {path}")
73 except OSError as exc:
74 raise ValueError(f"cannot read manifest {path}: {exc}") from exc
76 try:
77 data = json.loads(raw)
78 except json.JSONDecodeError as exc:
79 raise ValueError(f"invalid JSON in manifest {path}: {exc}") from exc
81 if not isinstance(data, dict):
82 raise ValueError(f"manifest must be a JSON object, got {type(data).__name__}")
84 version = str(data.get("manifest_version", ""))
85 if version != MANIFEST_VERSION:
86 raise ValueError(
87 f"unsupported manifest_version {version!r}, expected {MANIFEST_VERSION!r}"
88 )
90 project = str(data.get("project", ""))
91 if not project:
92 raise ValueError("manifest requires a non-empty 'project' field")
94 raw_sources = data.get("sources")
95 if not isinstance(raw_sources, list) or not raw_sources:
96 raise ValueError("manifest requires a non-empty 'sources' list")
98 sources: list[SourceEntry] = []
99 for idx, item in enumerate(raw_sources):
100 if not isinstance(item, dict):
101 raise ValueError(f"source[{idx}] must be a JSON object, got {type(item).__name__}")
102 sources.append(SourceEntry.from_dict(item))
104 return cls(manifest_version=version, project=project, sources=sources)
107@dataclass(frozen=True)
108class IngestionRecord:
109 """Normalized record produced by a parser, ready for indexing."""
111 project: str
112 source_type: str
113 source_path: str
114 source_tool: str
115 text: str
116 session_id: str | None = None
117 created_at: str | None = None
118 last_modified: str | None = None
119 privacy_scope: str = "local_private"
120 metadata: dict[str, object] = field(default_factory=dict)
122 @property
123 def filename(self) -> str:
124 return Path(self.source_path).name
127def _require_str(data: dict[str, object], key: str) -> str:
128 value = data.get(key)
129 if not isinstance(value, str) or not value.strip():
130 raise ValueError(f"manifest entry requires a non-empty string field {key!r}")
131 return value.strip()