Coverage for src / documint_mcp / cascade_detector.py: 0%

48 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 22:30 -0400

1""" 

2Cross-artifact cascade detection. 

3 

4When artifact A's symbols change, finds all other artifacts whose narrative or 

5api_schema text references those changed symbol names. These artifacts may need 

6secondary review even if their own source files didn't change. 

7 

8Example: add_memory() changes in cilow-api.mint 

9→ cascade detector finds sdk-quickstart.mint mentions "add_memory" 

10→ secondary drift finding created for sdk-quickstart 

11 

12No embeddings or ML needed — pure text search on exported symbol names. 

13O(n × m) where n=changed symbols, m=artifacts. Fast for <100 artifacts. 

14""" 

15from __future__ import annotations 

16 

17import re 

18from dataclasses import dataclass, field 

19 

20import structlog 

21 

22from .mint import MintDocument 

23 

24logger = structlog.get_logger(__name__) 

25 

26 

27@dataclass 

28class CascadeFinding: 

29 """An artifact that may need secondary review due to a symbol change in another artifact.""" 

30 source_artifact_key: str # the artifact whose symbols changed 

31 affected_artifact_key: str # the artifact that references those symbols 

32 affected_symbol_names: list[str] # which changed symbols appear in the affected artifact 

33 confidence: str = "MEDIUM" # HIGH (>2 symbols) or MEDIUM (1-2 symbols) 

34 affected_sections: list[str] = field(default_factory=list) # which narrative sections are affected 

35 

36 

37def find_cascades( 

38 changed_symbol_names: list[str], 

39 source_artifact_key: str, 

40 all_artifacts: list[MintDocument], 

41) -> list[CascadeFinding]: 

42 """ 

43 Find all artifacts that reference changed symbols in their narrative or api_schema. 

44 

45 Args: 

46 changed_symbol_names: Symbol names from the diff (e.g. ["add_memory", "MemoryEngine"]) 

47 source_artifact_key: The artifact whose symbols changed (excluded from results) 

48 all_artifacts: All artifacts to search 

49 

50 Returns: 

51 List of CascadeFinding objects, sorted by number of affected symbols desc 

52 """ 

53 if not changed_symbol_names: 

54 return [] 

55 

56 cascades: list[CascadeFinding] = [] 

57 

58 for artifact in all_artifacts: 

59 # Resolve artifact key via helper so both LSIF and legacy formats work 

60 artifact_key = artifact._artifact_key() 

61 if artifact_key == source_artifact_key: 

62 continue 

63 

64 # Search narrative and api_schema for changed symbol names 

65 search_text = (artifact.narrative or "") + "\n" + (artifact.api_schema or "") 

66 

67 affected_symbols = [ 

68 sym for sym in changed_symbol_names 

69 if _symbol_mentioned(sym, search_text) 

70 ] 

71 

72 if not affected_symbols: 

73 continue 

74 

75 # Identify which narrative sections (by heading) contain the references 

76 affected_sections = _find_affected_sections(affected_symbols, artifact.narrative or "") 

77 

78 confidence = "HIGH" if len(affected_symbols) > 2 else "MEDIUM" 

79 

80 cascades.append(CascadeFinding( 

81 source_artifact_key=source_artifact_key, 

82 affected_artifact_key=artifact_key, 

83 affected_symbol_names=affected_symbols, 

84 confidence=confidence, 

85 affected_sections=affected_sections, 

86 )) 

87 

88 logger.info( 

89 "cascade_found", 

90 source=source_artifact_key, 

91 affected=artifact_key, 

92 symbols=affected_symbols, 

93 confidence=confidence, 

94 ) 

95 

96 # Sort by number of affected symbols (most affected first) 

97 return sorted(cascades, key=lambda c: len(c.affected_symbol_names), reverse=True) 

98 

99 

100def _symbol_mentioned(symbol_name: str, text: str) -> bool: 

101 """Check if a symbol name appears as a word boundary in text.""" 

102 if not symbol_name or not text: 

103 return False 

104 # Word-boundary heuristic: symbol must be preceded/followed by non-word chars 

105 pattern = r'\b' + re.escape(symbol_name) + r'\b' 

106 return bool(re.search(pattern, text)) 

107 

108 

109def _find_affected_sections(symbol_names: list[str], narrative: str) -> list[str]: 

110 """Find which markdown sections (## headings) contain the affected symbols.""" 

111 if not narrative or not symbol_names: 

112 return [] 

113 

114 sections: list[str] = [] 

115 current_section = "Introduction" 

116 

117 for line in narrative.splitlines(): 

118 if line.startswith("#"): 

119 current_section = line.lstrip("#").strip() 

120 else: 

121 for sym in symbol_names: 

122 if sym in line and current_section not in sections: 

123 sections.append(current_section) 

124 break 

125 

126 return sections