Coverage for src / documint_mcp / cascade_detector.py: 0%
48 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 22:30 -0400
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 22:30 -0400
1"""
2Cross-artifact cascade detection.
4When artifact A's symbols change, finds all other artifacts whose narrative or
5api_schema text references those changed symbol names. These artifacts may need
6secondary review even if their own source files didn't change.
8Example: add_memory() changes in cilow-api.mint
9→ cascade detector finds sdk-quickstart.mint mentions "add_memory"
10→ secondary drift finding created for sdk-quickstart
12No embeddings or ML needed — pure text search on exported symbol names.
13O(n × m) where n=changed symbols, m=artifacts. Fast for <100 artifacts.
14"""
15from __future__ import annotations
17import re
18from dataclasses import dataclass, field
20import structlog
22from .mint import MintDocument
24logger = structlog.get_logger(__name__)
27@dataclass
28class CascadeFinding:
29 """An artifact that may need secondary review due to a symbol change in another artifact."""
30 source_artifact_key: str # the artifact whose symbols changed
31 affected_artifact_key: str # the artifact that references those symbols
32 affected_symbol_names: list[str] # which changed symbols appear in the affected artifact
33 confidence: str = "MEDIUM" # HIGH (>2 symbols) or MEDIUM (1-2 symbols)
34 affected_sections: list[str] = field(default_factory=list) # which narrative sections are affected
37def find_cascades(
38 changed_symbol_names: list[str],
39 source_artifact_key: str,
40 all_artifacts: list[MintDocument],
41) -> list[CascadeFinding]:
42 """
43 Find all artifacts that reference changed symbols in their narrative or api_schema.
45 Args:
46 changed_symbol_names: Symbol names from the diff (e.g. ["add_memory", "MemoryEngine"])
47 source_artifact_key: The artifact whose symbols changed (excluded from results)
48 all_artifacts: All artifacts to search
50 Returns:
51 List of CascadeFinding objects, sorted by number of affected symbols desc
52 """
53 if not changed_symbol_names:
54 return []
56 cascades: list[CascadeFinding] = []
58 for artifact in all_artifacts:
59 # Resolve artifact key via helper so both LSIF and legacy formats work
60 artifact_key = artifact._artifact_key()
61 if artifact_key == source_artifact_key:
62 continue
64 # Search narrative and api_schema for changed symbol names
65 search_text = (artifact.narrative or "") + "\n" + (artifact.api_schema or "")
67 affected_symbols = [
68 sym for sym in changed_symbol_names
69 if _symbol_mentioned(sym, search_text)
70 ]
72 if not affected_symbols:
73 continue
75 # Identify which narrative sections (by heading) contain the references
76 affected_sections = _find_affected_sections(affected_symbols, artifact.narrative or "")
78 confidence = "HIGH" if len(affected_symbols) > 2 else "MEDIUM"
80 cascades.append(CascadeFinding(
81 source_artifact_key=source_artifact_key,
82 affected_artifact_key=artifact_key,
83 affected_symbol_names=affected_symbols,
84 confidence=confidence,
85 affected_sections=affected_sections,
86 ))
88 logger.info(
89 "cascade_found",
90 source=source_artifact_key,
91 affected=artifact_key,
92 symbols=affected_symbols,
93 confidence=confidence,
94 )
96 # Sort by number of affected symbols (most affected first)
97 return sorted(cascades, key=lambda c: len(c.affected_symbol_names), reverse=True)
100def _symbol_mentioned(symbol_name: str, text: str) -> bool:
101 """Check if a symbol name appears as a word boundary in text."""
102 if not symbol_name or not text:
103 return False
104 # Word-boundary heuristic: symbol must be preceded/followed by non-word chars
105 pattern = r'\b' + re.escape(symbol_name) + r'\b'
106 return bool(re.search(pattern, text))
109def _find_affected_sections(symbol_names: list[str], narrative: str) -> list[str]:
110 """Find which markdown sections (## headings) contain the affected symbols."""
111 if not narrative or not symbol_names:
112 return []
114 sections: list[str] = []
115 current_section = "Introduction"
117 for line in narrative.splitlines():
118 if line.startswith("#"):
119 current_section = line.lstrip("#").strip()
120 else:
121 for sym in symbol_names:
122 if sym in line and current_section not in sections:
123 sections.append(current_section)
124 break
126 return sections