Coverage for src / documint_mcp / symbol_graph.py: 0%

49 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 22:30 -0400

1"""Symbol knowledge graph for cross-artifact drift detection. 

2 

3Tracks symbol → artifact relationships so that when a symbol changes, 

4ALL artifacts that reference it can be identified (not just the one being scanned). 

5 

6Usage: 

7 graph = get_symbol_graph() 

8 graph.index_artifact("api-reference", "project-1", symbols) 

9 affected = graph.find_affected_artifacts(["execute", "retry"], "project-1") 

10 # Returns: {"api-reference": ["execute"], "sdk-guides": ["execute", "retry"]} 

11""" 

12from __future__ import annotations 

13 

14import logging 

15import os 

16import sqlite3 

17import threading 

18from datetime import UTC, datetime 

19from pathlib import Path 

20 

21logger = logging.getLogger(__name__) 

22 

23_DB_DIR = Path(".documint") 

24_DB_FILE = _DB_DIR / "symbol_graph.db" 

25 

26_CREATE_TABLE = """ 

27CREATE TABLE IF NOT EXISTS symbol_artifact_map ( 

28 symbol_name TEXT NOT NULL, 

29 artifact_id TEXT NOT NULL, 

30 project_id TEXT NOT NULL, 

31 last_seen TEXT NOT NULL, 

32 PRIMARY KEY (symbol_name, artifact_id, project_id) 

33); 

34""" 

35_CREATE_INDEX = "CREATE INDEX IF NOT EXISTS idx_symbol ON symbol_artifact_map(symbol_name);" 

36 

37 

38class SymbolGraph: 

39 """SQLite-backed symbol → artifact index.""" 

40 

41 def __init__(self, db_path: str | Path | None = None) -> None: 

42 path = Path(db_path) if db_path else _DB_FILE 

43 path.parent.mkdir(parents=True, exist_ok=True) 

44 self._lock = threading.Lock() 

45 self._conn = sqlite3.connect(str(path), check_same_thread=False) 

46 with self._lock: 

47 self._conn.execute(_CREATE_TABLE) 

48 self._conn.execute(_CREATE_INDEX) 

49 self._conn.commit() 

50 

51 def index_artifact(self, artifact_id: str, project_id: str, 

52 symbols: list[dict]) -> None: 

53 """Register all symbols for an artifact (upsert).""" 

54 now = datetime.now(UTC).isoformat() 

55 rows = [ 

56 (sym.get("n", ""), artifact_id, project_id, now) 

57 for sym in symbols 

58 if sym.get("n") 

59 ] 

60 if rows: 

61 with self._lock: 

62 self._conn.executemany( 

63 "INSERT OR REPLACE INTO symbol_artifact_map VALUES (?, ?, ?, ?)", 

64 rows, 

65 ) 

66 self._conn.commit() 

67 

68 def find_affected_artifacts(self, changed_symbol_names: list[str], 

69 project_id: str) -> dict[str, list[str]]: 

70 """Return {artifact_id: [symbol_names]} for all affected artifacts. 

71 

72 Checks ALL artifacts that reference any of the changed symbols, 

73 enabling cross-artifact drift detection. 

74 """ 

75 result: dict[str, list[str]] = {} 

76 with self._lock: 

77 for sym_name in changed_symbol_names: 

78 rows = self._conn.execute( 

79 "SELECT artifact_id FROM symbol_artifact_map " 

80 "WHERE symbol_name = ? AND project_id = ?", 

81 (sym_name, project_id), 

82 ).fetchall() 

83 for (artifact_id,) in rows: 

84 result.setdefault(artifact_id, []).append(sym_name) 

85 return result 

86 

87 def get_symbol_artifacts(self, symbol_name: str, project_id: str) -> list[str]: 

88 """Return all artifact IDs that reference this symbol.""" 

89 with self._lock: 

90 rows = self._conn.execute( 

91 "SELECT artifact_id FROM symbol_artifact_map " 

92 "WHERE symbol_name = ? AND project_id = ?", 

93 (symbol_name, project_id), 

94 ).fetchall() 

95 return [r[0] for r in rows] 

96 

97 def close(self) -> None: 

98 """Close the SQLite connection.""" 

99 with self._lock: 

100 self._conn.close() 

101 

102 

103_graph_instance: SymbolGraph | None = None 

104 

105 

106def get_symbol_graph() -> SymbolGraph: 

107 """Return the module-level SymbolGraph singleton.""" 

108 global _graph_instance # noqa: PLW0603 

109 if _graph_instance is None: 

110 _graph_instance = SymbolGraph() 

111 return _graph_instance