Coverage for src \ truenex_memory \ store \ source_ledger.py: 100%

66 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 10:21 +0200

1"""Source ledger domain model for incremental refresh tracking.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass 

6from datetime import datetime, timezone 

7import sqlite3 

8 

9 

10SOURCE_LEDGER_STATUSES = frozenset({"active", "pending", "error", "missing", "skipped"}) 

11SOURCE_LEDGER_PHASE3_TRANSITIONS: dict[str | None, frozenset[str]] = { 

12 None: frozenset({"active", "skipped", "missing", "error"}), 

13 "active": frozenset({"active", "skipped", "missing", "error"}), 

14 "skipped": frozenset({"skipped", "active", "missing", "error"}), 

15 "missing": frozenset({"missing", "active"}), 

16 "error": frozenset({"error", "active", "missing"}), 

17 "pending": frozenset({"active", "skipped", "missing", "error"}), 

18} 

19 

20 

21def is_phase3_ledger_transition_allowed( 

22 previous_status: str | None, 

23 next_status: str, 

24) -> bool: 

25 """Return whether Phase 3 refresh policy allows this ledger transition.""" 

26 if previous_status not in SOURCE_LEDGER_PHASE3_TRANSITIONS: 

27 return False 

28 return next_status in SOURCE_LEDGER_PHASE3_TRANSITIONS[previous_status] 

29 

30 

31@dataclass(frozen=True) 

32class SourceLedgerRecord: 

33 """A row in the source_ledger table.""" 

34 

35 source_id: str 

36 source_path_or_alias: str 

37 project_name: str | None 

38 source_type: str 

39 parser_version: str 

40 content_hash: str | None 

41 last_modified_at: str | None 

42 last_indexed_at: str | None 

43 status: str 

44 error_message: str | None 

45 chunk_count: int 

46 created_at: str 

47 updated_at: str 

48 

49 

50def _now_sql() -> str: 

51 return datetime.now(timezone.utc).isoformat() 

52 

53 

54def upsert_ledger_entry( 

55 conn: sqlite3.Connection, 

56 source_id: str, 

57 source_path_or_alias: str, 

58 source_type: str, 

59 *, 

60 project_name: str | None = None, 

61 parser_version: str = "1", 

62 content_hash: str | None = None, 

63 last_modified_at: str | None = None, 

64 last_indexed_at: str | None = None, 

65 status: str = "pending", 

66 error_message: str | None = None, 

67 chunk_count: int = 0, 

68) -> SourceLedgerRecord: 

69 if status not in SOURCE_LEDGER_STATUSES: 

70 raise ValueError( 

71 f"invalid status {status!r}, expected one of {sorted(SOURCE_LEDGER_STATUSES)}" 

72 ) 

73 now = _now_sql() 

74 conn.execute( 

75 """ 

76 INSERT INTO source_ledger ( 

77 source_id, source_path_or_alias, project_name, source_type, 

78 parser_version, content_hash, last_modified_at, last_indexed_at, 

79 status, error_message, chunk_count, created_at, updated_at 

80 ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 

81 ON CONFLICT(source_id) DO UPDATE SET 

82 source_path_or_alias=excluded.source_path_or_alias, 

83 project_name=excluded.project_name, 

84 source_type=excluded.source_type, 

85 parser_version=excluded.parser_version, 

86 content_hash=excluded.content_hash, 

87 last_modified_at=excluded.last_modified_at, 

88 last_indexed_at=excluded.last_indexed_at, 

89 status=excluded.status, 

90 error_message=excluded.error_message, 

91 chunk_count=excluded.chunk_count, 

92 updated_at=excluded.updated_at 

93 """, 

94 ( 

95 source_id, 

96 source_path_or_alias, 

97 project_name, 

98 source_type, 

99 parser_version, 

100 content_hash, 

101 last_modified_at, 

102 last_indexed_at, 

103 status, 

104 error_message, 

105 chunk_count, 

106 now, 

107 now, 

108 ), 

109 ) 

110 conn.commit() 

111 row = conn.execute( 

112 "SELECT * FROM source_ledger WHERE source_id = ?", (source_id,) 

113 ).fetchone() 

114 return _ledger_record_from_row(row) 

115 

116 

117def list_ledger_entries( 

118 conn: sqlite3.Connection, 

119 *, 

120 status: str | None = None, 

121 source_type: str | None = None, 

122) -> list[SourceLedgerRecord]: 

123 if status is not None and status not in SOURCE_LEDGER_STATUSES: 

124 raise ValueError( 

125 f"invalid status {status!r}, expected one of {sorted(SOURCE_LEDGER_STATUSES)}" 

126 ) 

127 conditions: list[str] = [] 

128 params: list[str] = [] 

129 if status is not None: 

130 conditions.append("status = ?") 

131 params.append(status) 

132 if source_type is not None: 

133 conditions.append("source_type = ?") 

134 params.append(source_type) 

135 where = f" WHERE {' AND '.join(conditions)}" if conditions else "" 

136 rows = conn.execute( 

137 f"SELECT * FROM source_ledger{where} ORDER BY updated_at DESC", 

138 params, 

139 ).fetchall() 

140 return [_ledger_record_from_row(row) for row in rows] 

141 

142 

143def get_ledger_entry( 

144 conn: sqlite3.Connection, 

145 source_id: str, 

146) -> SourceLedgerRecord | None: 

147 row = conn.execute( 

148 "SELECT * FROM source_ledger WHERE source_id = ?", (source_id,) 

149 ).fetchone() 

150 if row is None: 

151 return None 

152 return _ledger_record_from_row(row) 

153 

154 

155def update_ledger_status( 

156 conn: sqlite3.Connection, 

157 source_id: str, 

158 status: str, 

159 *, 

160 error_message: str | None = None, 

161) -> SourceLedgerRecord: 

162 if status not in SOURCE_LEDGER_STATUSES: 

163 raise ValueError( 

164 f"invalid status {status!r}, expected one of {sorted(SOURCE_LEDGER_STATUSES)}" 

165 ) 

166 now = _now_sql() 

167 cursor = conn.execute( 

168 """ 

169 UPDATE source_ledger 

170 SET status = ?, error_message = ?, updated_at = ? 

171 WHERE source_id = ? 

172 """, 

173 (status, error_message, now, source_id), 

174 ) 

175 if cursor.rowcount == 0: 

176 raise LookupError(f"source ledger entry not found: {source_id!r}") 

177 conn.commit() 

178 row = conn.execute( 

179 "SELECT * FROM source_ledger WHERE source_id = ?", (source_id,) 

180 ).fetchone() 

181 return _ledger_record_from_row(row) 

182 

183 

184def _ledger_record_from_row(row: sqlite3.Row) -> SourceLedgerRecord: 

185 return SourceLedgerRecord( 

186 source_id=row["source_id"], 

187 source_path_or_alias=row["source_path_or_alias"], 

188 project_name=row["project_name"], 

189 source_type=row["source_type"], 

190 parser_version=row["parser_version"], 

191 content_hash=row["content_hash"], 

192 last_modified_at=row["last_modified_at"], 

193 last_indexed_at=row["last_indexed_at"], 

194 status=row["status"], 

195 error_message=row["error_message"], 

196 chunk_count=row["chunk_count"], 

197 created_at=row["created_at"], 

198 updated_at=row["updated_at"], 

199 )