Coverage for src / documint_mcp / agent_files.py: 0%
175 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 22:30 -0400
« prev ^ index » next coverage.py v7.13.5, created at 2026-03-30 22:30 -0400
1"""
2Agent file synthesis — generates and maintains CLAUDE.md, AGENTS.md, llms.txt
3from a collection of .mint artifact files.
5The synthesis problem: when a project has 20+ .mint files, we need to produce
6a CLAUDE.md that is under 300 lines and covers what matters most.
7Solution: relevance scoring based on recency, type, cross-reference count, drift status.
8"""
9from __future__ import annotations
11import json
12from dataclasses import dataclass
13from datetime import UTC, datetime
14from pathlib import Path
15from typing import Any
17import structlog
19from .mint import MintDocument
21logger = structlog.get_logger(__name__)
23# Target lengths to stay within context window budgets
24CLAUDE_MD_TARGET_LINES = 280
25AGENTS_MD_TARGET_LINES = 200
26LLMS_TXT_TARGET_LINES = 80
29@dataclass
30class ArtifactScore:
31 artifact: MintDocument
32 score: float
33 reasons: list[str]
36TYPE_WEIGHTS = {
37 "api_reference": 1.0,
38 "mcp_reference": 0.9,
39 "sdk_guides": 0.8,
40 "migration_notes": 0.6,
41 "changelog": 0.4,
42 "unknown": 0.2,
43}
46def score_artifact(artifact: MintDocument, all_artifacts: list[MintDocument]) -> ArtifactScore:
47 """Score an artifact's relevance for synthesis. Higher = include more detail."""
48 score = 0.0
49 reasons = []
51 # Type weight
52 artifact_type = artifact._artifact_type()
53 type_weight = TYPE_WEIGHTS.get(artifact_type, 0.2)
54 score += type_weight
55 reasons.append(f"type={artifact_type}({type_weight:.1f})")
57 # Drift status boost — stale artifacts need attention
58 if artifact.drift_status == "STALE":
59 score += 0.5
60 reasons.append("drift=STALE(+0.5)")
62 # Recency — decay over 30 days
63 if artifact.generated_at:
64 try:
65 generated = datetime.fromisoformat(artifact.generated_at.replace("Z", "+00:00"))
66 age_days = (datetime.now(UTC) - generated).days
67 recency = max(0.0, 1.0 - age_days / 30)
68 score += recency * 0.3
69 reasons.append(f"recency={recency:.2f}(+{recency*0.3:.2f})")
70 except (ValueError, TypeError):
71 pass
73 # Cross-reference count — how many other artifacts mention this one's symbols
74 own_symbols = _get_symbol_names(artifact)
75 if own_symbols:
76 ref_count = sum(
77 1 for other in all_artifacts
78 if other is not artifact
79 and any(sym in other.narrative for sym in own_symbols[:5])
80 )
81 cross_ref_boost = min(0.3, ref_count * 0.1)
82 if cross_ref_boost > 0:
83 score += cross_ref_boost
84 reasons.append(f"cross_refs={ref_count}(+{cross_ref_boost:.2f})")
86 # Symbol count — more exports = more important
87 symbol_count = len(own_symbols)
88 symbol_boost = min(0.2, symbol_count * 0.01)
89 score += symbol_boost
91 return ArtifactScore(artifact=artifact, score=round(score, 3), reasons=reasons)
94def _get_symbol_names(artifact: MintDocument) -> list[str]:
95 """Extract symbol names from LSIF-compact symbol list."""
96 return [s.get("n", "") for s in artifact._export_symbols() if s.get("n")]
99class AgentFileGenerator:
100 """
101 Synthesizes agent-readable files from a collection of .mint artifacts.
103 Usage:
104 gen = AgentFileGenerator()
105 claude_md = gen.generate_claude_md(project_name="Cilow", artifacts=[...])
106 agents_md = gen.generate_agents_md(project_name="Cilow", artifacts=[...])
107 llms_txt = gen.generate_llms_txt(project_name="Cilow", base_url="https://cilow.ai", artifacts=[...])
108 """
110 def generate_claude_md(
111 self,
112 project_name: str,
113 artifacts: list[MintDocument],
114 project_description: str = "",
115 repo_url: str = "",
116 ) -> str:
117 """Generate a synthesized CLAUDE.md under 300 lines."""
118 scored = sorted(
119 [score_artifact(a, artifacts) for a in artifacts],
120 key=lambda x: x.score,
121 reverse=True,
122 )
124 stale = [s for s in scored if s.artifact.drift_status == "STALE"]
125 clean = [s for s in scored if s.artifact.drift_status != "STALE"]
127 lines = [
128 f"# {project_name}",
129 "",
130 ]
131 if project_description:
132 lines += [project_description, ""]
134 # Freshness header — the drift-status signal for coding agents
135 lines += [
136 "<!-- DOCUMINT FRESHNESS — auto-maintained by documint.xyz -->",
137 f"<!-- overall: {_freshness_pct(stale, len(artifacts))}% -->",
138 ]
139 if stale:
140 for s in stale:
141 title = s.artifact._title()
142 lines.append(f"<!-- STALE: {title} — {s.artifact.drift_status} -->")
143 lines.append("")
145 # Top artifacts — full detail (symbols + api_schema + narrative summary)
146 lines += ["## Key APIs & Context", ""]
147 lines_used = len(lines)
149 for scored_artifact in scored[:3]:
150 art = scored_artifact.artifact
151 title = art._title()
152 artifact_type = art._artifact_type()
154 lines += [f"### {title}", ""]
156 # Agent context XML block
157 lines += [art._agent_context_xml(), ""]
159 # Symbol signatures
160 symbol_names = _get_symbol_names(art)
161 if symbol_names:
162 lines += ["**Exports:** " + ", ".join(f"`{n}`" for n in symbol_names[:12]), ""]
164 # API schema (truncated)
165 if art.api_schema:
166 lines += ["```", art.api_schema[:600].rstrip(), "```", ""]
168 # Narrative (first 400 chars)
169 if art.narrative:
170 lines += [art.narrative[:400].rstrip(), ""]
172 # Source files
173 if art.source_files:
174 lines += ["**Source:** " + " · ".join(f"`{f}`" for f in art.source_files[:4]), ""]
176 # Drift status inline
177 lines += [f"*Freshness: {art.drift_status} · hash: {art.codebase_hash[:12]}*", ""]
178 lines_used = len(lines)
180 if lines_used > CLAUDE_MD_TARGET_LINES - 40:
181 break
183 # Remaining artifacts — summary only
184 if len(scored) > 3:
185 lines += ["## Additional Artifacts", ""]
186 for scored_artifact in scored[3:]:
187 art = scored_artifact.artifact
188 title = art._title()
189 symbol_names = _get_symbol_names(art)
190 sym_str = ", ".join(f"`{n}`" for n in symbol_names[:6])
191 lines.append(f"- **{title}** ({art.drift_status}): {sym_str}")
192 if len(lines) > CLAUDE_MD_TARGET_LINES - 10:
193 remaining = len(scored) - scored.index(scored_artifact) - 1
194 lines.append(
195 f" _(+ {remaining} more — see .mint/ directory)_"
196 )
197 break
198 lines.append("")
200 # Agent instructions from top artifact
201 if scored and scored[0].artifact.ai_context.get("instructions"):
202 instructions = scored[0].artifact.ai_context.get("instructions", "")
203 if isinstance(instructions, str) and instructions:
204 lines += ["## Agent Instructions", "", instructions, ""]
206 if repo_url:
207 lines += ["## Repository", "", f"Source: {repo_url}", ""]
209 return "\n".join(lines)
211 def generate_agents_md(
212 self,
213 project_name: str,
214 artifacts: list[MintDocument],
215 project_description: str = "",
216 ) -> str:
217 """Generate AGENTS.md — simpler, less prescriptive than CLAUDE.md."""
218 scored = sorted(
219 [score_artifact(a, artifacts) for a in artifacts],
220 key=lambda x: x.score,
221 reverse=True,
222 )
224 lines = [
225 f"# {project_name} — Agent Context",
226 "",
227 ]
228 if project_description:
229 lines += [project_description, ""]
231 lines += ["## Available APIs", ""]
232 for s in scored:
233 art = s.artifact
234 title = art._title()
235 artifact_type = art._artifact_type()
236 symbol_names = _get_symbol_names(art)
238 lines += [f"### {title}", ""]
239 if artifact_type:
240 lines.append(f"Type: `{artifact_type}`")
241 if symbol_names:
242 lines.append("Exports: " + ", ".join(f"`{n}`" for n in symbol_names[:10]))
243 if art.source_files:
244 lines.append("Source: " + ", ".join(f"`{f}`" for f in art.source_files[:3]))
245 if art.narrative:
246 lines.append(art.narrative[:300].rstrip())
247 lines.append(f"Status: {art.drift_status}")
248 lines.append("")
250 if len(lines) > AGENTS_MD_TARGET_LINES:
251 remaining = len(scored) - scored.index(s) - 1
252 if remaining > 0:
253 lines.append(f"_({remaining} more artifacts in .mint/ directory)_")
254 break
256 return "\n".join(lines)
258 def generate_llms_txt(
259 self,
260 project_name: str,
261 artifacts: list[MintDocument],
262 base_url: str = "",
263 project_description: str = "",
264 ) -> str:
265 """Generate /llms.txt per llmstxt.org spec."""
266 lines = [
267 f"# {project_name}",
268 "",
269 ]
270 if project_description:
271 lines += [f"> {project_description}", ""]
273 lines.append("## Documentation")
274 lines.append("")
276 for art in artifacts:
277 title = art._title()
278 if art.links.get("readme"):
279 url = art.links["readme"]
280 if base_url and not url.startswith("http"):
281 url = base_url.rstrip("/") + "/" + url.lstrip("/")
282 lines.append(f"- [{title}]({url})")
283 else:
284 lines.append(f"- {title}")
286 lines.append("")
287 if base_url:
288 lines += [
289 "## Optional",
290 "",
291 f"- [Full context]({base_url.rstrip('/')}/llms-full.txt)",
292 f"- [API schema]({base_url.rstrip('/')}/openapi.json)",
293 ]
295 return "\n".join(lines)
297 def generate_llms_full_txt(
298 self,
299 project_name: str,
300 artifacts: list[MintDocument],
301 project_description: str = "",
302 ) -> str:
303 """Generate /llms-full.txt — complete content for all artifacts."""
304 lines = [f"# {project_name}", ""]
305 if project_description:
306 lines += [project_description, ""]
308 for art in artifacts:
309 lines.append(art.to_llms_full_txt())
310 lines.append("")
312 return "\n".join(lines)
314 def write_to_directory(
315 self,
316 output_dir: Path,
317 project_name: str,
318 artifacts: list[MintDocument],
319 project_description: str = "",
320 repo_url: str = "",
321 base_url: str = "",
322 ) -> dict[str, Path]:
323 """Write all agent files to a directory. Returns dict of filename -> path."""
324 output_dir.mkdir(parents=True, exist_ok=True)
325 written: dict[str, Path] = {}
327 for filename, content in [
328 ("CLAUDE.md", self.generate_claude_md(project_name, artifacts, project_description, repo_url)),
329 ("AGENTS.md", self.generate_agents_md(project_name, artifacts, project_description)),
330 ("llms.txt", self.generate_llms_txt(project_name, artifacts, base_url, project_description)),
331 ("llms-full.txt", self.generate_llms_full_txt(project_name, artifacts, project_description)),
332 ]:
333 path = output_dir / filename
334 path.write_text(content, encoding="utf-8")
335 written[filename] = path
337 logger.info("agent_files_written", dir=str(output_dir), files=list(written.keys()))
338 return written
341def _freshness_pct(stale: list[ArtifactScore], total: int) -> int:
342 if total == 0:
343 return 100
344 return round((total - len(stale)) / total * 100)
347_generator = AgentFileGenerator()
350def get_agent_file_generator() -> AgentFileGenerator:
351 return _generator