Coverage for src / documint_mcp / agent_files.py: 0%

175 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-03-30 22:30 -0400

1""" 

2Agent file synthesis — generates and maintains CLAUDE.md, AGENTS.md, llms.txt 

3from a collection of .mint artifact files. 

4 

5The synthesis problem: when a project has 20+ .mint files, we need to produce 

6a CLAUDE.md that is under 300 lines and covers what matters most. 

7Solution: relevance scoring based on recency, type, cross-reference count, drift status. 

8""" 

9from __future__ import annotations 

10 

11import json 

12from dataclasses import dataclass 

13from datetime import UTC, datetime 

14from pathlib import Path 

15from typing import Any 

16 

17import structlog 

18 

19from .mint import MintDocument 

20 

21logger = structlog.get_logger(__name__) 

22 

23# Target lengths to stay within context window budgets 

24CLAUDE_MD_TARGET_LINES = 280 

25AGENTS_MD_TARGET_LINES = 200 

26LLMS_TXT_TARGET_LINES = 80 

27 

28 

29@dataclass 

30class ArtifactScore: 

31 artifact: MintDocument 

32 score: float 

33 reasons: list[str] 

34 

35 

36TYPE_WEIGHTS = { 

37 "api_reference": 1.0, 

38 "mcp_reference": 0.9, 

39 "sdk_guides": 0.8, 

40 "migration_notes": 0.6, 

41 "changelog": 0.4, 

42 "unknown": 0.2, 

43} 

44 

45 

46def score_artifact(artifact: MintDocument, all_artifacts: list[MintDocument]) -> ArtifactScore: 

47 """Score an artifact's relevance for synthesis. Higher = include more detail.""" 

48 score = 0.0 

49 reasons = [] 

50 

51 # Type weight 

52 artifact_type = artifact._artifact_type() 

53 type_weight = TYPE_WEIGHTS.get(artifact_type, 0.2) 

54 score += type_weight 

55 reasons.append(f"type={artifact_type}({type_weight:.1f})") 

56 

57 # Drift status boost — stale artifacts need attention 

58 if artifact.drift_status == "STALE": 

59 score += 0.5 

60 reasons.append("drift=STALE(+0.5)") 

61 

62 # Recency — decay over 30 days 

63 if artifact.generated_at: 

64 try: 

65 generated = datetime.fromisoformat(artifact.generated_at.replace("Z", "+00:00")) 

66 age_days = (datetime.now(UTC) - generated).days 

67 recency = max(0.0, 1.0 - age_days / 30) 

68 score += recency * 0.3 

69 reasons.append(f"recency={recency:.2f}(+{recency*0.3:.2f})") 

70 except (ValueError, TypeError): 

71 pass 

72 

73 # Cross-reference count — how many other artifacts mention this one's symbols 

74 own_symbols = _get_symbol_names(artifact) 

75 if own_symbols: 

76 ref_count = sum( 

77 1 for other in all_artifacts 

78 if other is not artifact 

79 and any(sym in other.narrative for sym in own_symbols[:5]) 

80 ) 

81 cross_ref_boost = min(0.3, ref_count * 0.1) 

82 if cross_ref_boost > 0: 

83 score += cross_ref_boost 

84 reasons.append(f"cross_refs={ref_count}(+{cross_ref_boost:.2f})") 

85 

86 # Symbol count — more exports = more important 

87 symbol_count = len(own_symbols) 

88 symbol_boost = min(0.2, symbol_count * 0.01) 

89 score += symbol_boost 

90 

91 return ArtifactScore(artifact=artifact, score=round(score, 3), reasons=reasons) 

92 

93 

94def _get_symbol_names(artifact: MintDocument) -> list[str]: 

95 """Extract symbol names from LSIF-compact symbol list.""" 

96 return [s.get("n", "") for s in artifact._export_symbols() if s.get("n")] 

97 

98 

99class AgentFileGenerator: 

100 """ 

101 Synthesizes agent-readable files from a collection of .mint artifacts. 

102 

103 Usage: 

104 gen = AgentFileGenerator() 

105 claude_md = gen.generate_claude_md(project_name="Cilow", artifacts=[...]) 

106 agents_md = gen.generate_agents_md(project_name="Cilow", artifacts=[...]) 

107 llms_txt = gen.generate_llms_txt(project_name="Cilow", base_url="https://cilow.ai", artifacts=[...]) 

108 """ 

109 

110 def generate_claude_md( 

111 self, 

112 project_name: str, 

113 artifacts: list[MintDocument], 

114 project_description: str = "", 

115 repo_url: str = "", 

116 ) -> str: 

117 """Generate a synthesized CLAUDE.md under 300 lines.""" 

118 scored = sorted( 

119 [score_artifact(a, artifacts) for a in artifacts], 

120 key=lambda x: x.score, 

121 reverse=True, 

122 ) 

123 

124 stale = [s for s in scored if s.artifact.drift_status == "STALE"] 

125 clean = [s for s in scored if s.artifact.drift_status != "STALE"] 

126 

127 lines = [ 

128 f"# {project_name}", 

129 "", 

130 ] 

131 if project_description: 

132 lines += [project_description, ""] 

133 

134 # Freshness header — the drift-status signal for coding agents 

135 lines += [ 

136 "<!-- DOCUMINT FRESHNESS — auto-maintained by documint.xyz -->", 

137 f"<!-- overall: {_freshness_pct(stale, len(artifacts))}% -->", 

138 ] 

139 if stale: 

140 for s in stale: 

141 title = s.artifact._title() 

142 lines.append(f"<!-- STALE: {title}{s.artifact.drift_status} -->") 

143 lines.append("") 

144 

145 # Top artifacts — full detail (symbols + api_schema + narrative summary) 

146 lines += ["## Key APIs & Context", ""] 

147 lines_used = len(lines) 

148 

149 for scored_artifact in scored[:3]: 

150 art = scored_artifact.artifact 

151 title = art._title() 

152 artifact_type = art._artifact_type() 

153 

154 lines += [f"### {title}", ""] 

155 

156 # Agent context XML block 

157 lines += [art._agent_context_xml(), ""] 

158 

159 # Symbol signatures 

160 symbol_names = _get_symbol_names(art) 

161 if symbol_names: 

162 lines += ["**Exports:** " + ", ".join(f"`{n}`" for n in symbol_names[:12]), ""] 

163 

164 # API schema (truncated) 

165 if art.api_schema: 

166 lines += ["```", art.api_schema[:600].rstrip(), "```", ""] 

167 

168 # Narrative (first 400 chars) 

169 if art.narrative: 

170 lines += [art.narrative[:400].rstrip(), ""] 

171 

172 # Source files 

173 if art.source_files: 

174 lines += ["**Source:** " + " · ".join(f"`{f}`" for f in art.source_files[:4]), ""] 

175 

176 # Drift status inline 

177 lines += [f"*Freshness: {art.drift_status} · hash: {art.codebase_hash[:12]}*", ""] 

178 lines_used = len(lines) 

179 

180 if lines_used > CLAUDE_MD_TARGET_LINES - 40: 

181 break 

182 

183 # Remaining artifacts — summary only 

184 if len(scored) > 3: 

185 lines += ["## Additional Artifacts", ""] 

186 for scored_artifact in scored[3:]: 

187 art = scored_artifact.artifact 

188 title = art._title() 

189 symbol_names = _get_symbol_names(art) 

190 sym_str = ", ".join(f"`{n}`" for n in symbol_names[:6]) 

191 lines.append(f"- **{title}** ({art.drift_status}): {sym_str}") 

192 if len(lines) > CLAUDE_MD_TARGET_LINES - 10: 

193 remaining = len(scored) - scored.index(scored_artifact) - 1 

194 lines.append( 

195 f" _(+ {remaining} more — see .mint/ directory)_" 

196 ) 

197 break 

198 lines.append("") 

199 

200 # Agent instructions from top artifact 

201 if scored and scored[0].artifact.ai_context.get("instructions"): 

202 instructions = scored[0].artifact.ai_context.get("instructions", "") 

203 if isinstance(instructions, str) and instructions: 

204 lines += ["## Agent Instructions", "", instructions, ""] 

205 

206 if repo_url: 

207 lines += ["## Repository", "", f"Source: {repo_url}", ""] 

208 

209 return "\n".join(lines) 

210 

211 def generate_agents_md( 

212 self, 

213 project_name: str, 

214 artifacts: list[MintDocument], 

215 project_description: str = "", 

216 ) -> str: 

217 """Generate AGENTS.md — simpler, less prescriptive than CLAUDE.md.""" 

218 scored = sorted( 

219 [score_artifact(a, artifacts) for a in artifacts], 

220 key=lambda x: x.score, 

221 reverse=True, 

222 ) 

223 

224 lines = [ 

225 f"# {project_name} — Agent Context", 

226 "", 

227 ] 

228 if project_description: 

229 lines += [project_description, ""] 

230 

231 lines += ["## Available APIs", ""] 

232 for s in scored: 

233 art = s.artifact 

234 title = art._title() 

235 artifact_type = art._artifact_type() 

236 symbol_names = _get_symbol_names(art) 

237 

238 lines += [f"### {title}", ""] 

239 if artifact_type: 

240 lines.append(f"Type: `{artifact_type}`") 

241 if symbol_names: 

242 lines.append("Exports: " + ", ".join(f"`{n}`" for n in symbol_names[:10])) 

243 if art.source_files: 

244 lines.append("Source: " + ", ".join(f"`{f}`" for f in art.source_files[:3])) 

245 if art.narrative: 

246 lines.append(art.narrative[:300].rstrip()) 

247 lines.append(f"Status: {art.drift_status}") 

248 lines.append("") 

249 

250 if len(lines) > AGENTS_MD_TARGET_LINES: 

251 remaining = len(scored) - scored.index(s) - 1 

252 if remaining > 0: 

253 lines.append(f"_({remaining} more artifacts in .mint/ directory)_") 

254 break 

255 

256 return "\n".join(lines) 

257 

258 def generate_llms_txt( 

259 self, 

260 project_name: str, 

261 artifacts: list[MintDocument], 

262 base_url: str = "", 

263 project_description: str = "", 

264 ) -> str: 

265 """Generate /llms.txt per llmstxt.org spec.""" 

266 lines = [ 

267 f"# {project_name}", 

268 "", 

269 ] 

270 if project_description: 

271 lines += [f"> {project_description}", ""] 

272 

273 lines.append("## Documentation") 

274 lines.append("") 

275 

276 for art in artifacts: 

277 title = art._title() 

278 if art.links.get("readme"): 

279 url = art.links["readme"] 

280 if base_url and not url.startswith("http"): 

281 url = base_url.rstrip("/") + "/" + url.lstrip("/") 

282 lines.append(f"- [{title}]({url})") 

283 else: 

284 lines.append(f"- {title}") 

285 

286 lines.append("") 

287 if base_url: 

288 lines += [ 

289 "## Optional", 

290 "", 

291 f"- [Full context]({base_url.rstrip('/')}/llms-full.txt)", 

292 f"- [API schema]({base_url.rstrip('/')}/openapi.json)", 

293 ] 

294 

295 return "\n".join(lines) 

296 

297 def generate_llms_full_txt( 

298 self, 

299 project_name: str, 

300 artifacts: list[MintDocument], 

301 project_description: str = "", 

302 ) -> str: 

303 """Generate /llms-full.txt — complete content for all artifacts.""" 

304 lines = [f"# {project_name}", ""] 

305 if project_description: 

306 lines += [project_description, ""] 

307 

308 for art in artifacts: 

309 lines.append(art.to_llms_full_txt()) 

310 lines.append("") 

311 

312 return "\n".join(lines) 

313 

314 def write_to_directory( 

315 self, 

316 output_dir: Path, 

317 project_name: str, 

318 artifacts: list[MintDocument], 

319 project_description: str = "", 

320 repo_url: str = "", 

321 base_url: str = "", 

322 ) -> dict[str, Path]: 

323 """Write all agent files to a directory. Returns dict of filename -> path.""" 

324 output_dir.mkdir(parents=True, exist_ok=True) 

325 written: dict[str, Path] = {} 

326 

327 for filename, content in [ 

328 ("CLAUDE.md", self.generate_claude_md(project_name, artifacts, project_description, repo_url)), 

329 ("AGENTS.md", self.generate_agents_md(project_name, artifacts, project_description)), 

330 ("llms.txt", self.generate_llms_txt(project_name, artifacts, base_url, project_description)), 

331 ("llms-full.txt", self.generate_llms_full_txt(project_name, artifacts, project_description)), 

332 ]: 

333 path = output_dir / filename 

334 path.write_text(content, encoding="utf-8") 

335 written[filename] = path 

336 

337 logger.info("agent_files_written", dir=str(output_dir), files=list(written.keys())) 

338 return written 

339 

340 

341def _freshness_pct(stale: list[ArtifactScore], total: int) -> int: 

342 if total == 0: 

343 return 100 

344 return round((total - len(stale)) / total * 100) 

345 

346 

347_generator = AgentFileGenerator() 

348 

349 

350def get_agent_file_generator() -> AgentFileGenerator: 

351 return _generator