Coverage for src \ truenex_memory \ ingestion \ manifest.py: 96%

80 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 10:21 +0200

1"""Source manifest domain model for local ingestion.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass, field 

6from pathlib import Path 

7import json 

8 

9MANIFEST_VERSION = "1" 

10 

11# source_type values that can be indexed now (text-based parsers exist) 

12INDEXABLE_SOURCE_TYPES = frozenset({"project_docs", "agent_session"}) 

13 

14# source_type values reserved for future parse_later support 

15PARSE_LATER_SOURCE_TYPES = frozenset( 

16 {"agent_memory", "operations_note", "binary_document"} 

17) 

18 

19VALID_SOURCE_TYPES = INDEXABLE_SOURCE_TYPES | PARSE_LATER_SOURCE_TYPES 

20VALID_PRIVACY_SCOPES = frozenset({"local_private", "project_shared"}) 

21 

22 

23@dataclass(frozen=True) 

24class SourceEntry: 

25 """A single source declared in a manifest.""" 

26 

27 source_type: str 

28 source_path: str 

29 source_tool: str = "" 

30 privacy_scope: str = "local_private" 

31 description: str = "" 

32 

33 def __post_init__(self) -> None: 

34 if self.source_type not in VALID_SOURCE_TYPES: 

35 raise ValueError( 

36 f"invalid source_type {self.source_type!r}; " 

37 f"expected one of {sorted(VALID_SOURCE_TYPES)}" 

38 ) 

39 if self.privacy_scope not in VALID_PRIVACY_SCOPES: 

40 raise ValueError( 

41 f"invalid privacy_scope {self.privacy_scope!r}; " 

42 f"expected one of {sorted(VALID_PRIVACY_SCOPES)}" 

43 ) 

44 

45 @classmethod 

46 def from_dict(cls, data: dict[str, object]) -> SourceEntry: 

47 source_type = _require_str(data, "source_type") 

48 source_path = _require_str(data, "source_path") 

49 return cls( 

50 source_type=source_type, 

51 source_path=source_path, 

52 source_tool=str(data.get("source_tool", "")), 

53 privacy_scope=str(data.get("privacy_scope", "local_private")), 

54 description=str(data.get("description", "")), 

55 ) 

56 

57 

58@dataclass(frozen=True) 

59class SourceManifest: 

60 """A local ingestion manifest listing sources to index.""" 

61 

62 manifest_version: str 

63 project: str 

64 sources: list[SourceEntry] 

65 

66 @classmethod 

67 def from_path(cls, path: Path) -> SourceManifest: 

68 """Load and validate a manifest JSON file.""" 

69 try: 

70 raw = path.read_text(encoding="utf-8") 

71 except FileNotFoundError: 

72 raise FileNotFoundError(f"manifest not found: {path}") 

73 except OSError as exc: 

74 raise ValueError(f"cannot read manifest {path}: {exc}") from exc 

75 

76 try: 

77 data = json.loads(raw) 

78 except json.JSONDecodeError as exc: 

79 raise ValueError(f"invalid JSON in manifest {path}: {exc}") from exc 

80 

81 if not isinstance(data, dict): 

82 raise ValueError(f"manifest must be a JSON object, got {type(data).__name__}") 

83 

84 version = str(data.get("manifest_version", "")) 

85 if version != MANIFEST_VERSION: 

86 raise ValueError( 

87 f"unsupported manifest_version {version!r}, expected {MANIFEST_VERSION!r}" 

88 ) 

89 

90 project = str(data.get("project", "")) 

91 if not project: 

92 raise ValueError("manifest requires a non-empty 'project' field") 

93 

94 raw_sources = data.get("sources") 

95 if not isinstance(raw_sources, list) or not raw_sources: 

96 raise ValueError("manifest requires a non-empty 'sources' list") 

97 

98 sources: list[SourceEntry] = [] 

99 for idx, item in enumerate(raw_sources): 

100 if not isinstance(item, dict): 

101 raise ValueError(f"source[{idx}] must be a JSON object, got {type(item).__name__}") 

102 sources.append(SourceEntry.from_dict(item)) 

103 

104 return cls(manifest_version=version, project=project, sources=sources) 

105 

106 

107@dataclass(frozen=True) 

108class IngestionRecord: 

109 """Normalized record produced by a parser, ready for indexing.""" 

110 

111 project: str 

112 source_type: str 

113 source_path: str 

114 source_tool: str 

115 text: str 

116 session_id: str | None = None 

117 created_at: str | None = None 

118 last_modified: str | None = None 

119 privacy_scope: str = "local_private" 

120 metadata: dict[str, object] = field(default_factory=dict) 

121 

122 @property 

123 def filename(self) -> str: 

124 return Path(self.source_path).name 

125 

126 

127def _require_str(data: dict[str, object], key: str) -> str: 

128 value = data.get(key) 

129 if not isinstance(value, str) or not value.strip(): 

130 raise ValueError(f"manifest entry requires a non-empty string field {key!r}") 

131 return value.strip()