Coverage for little_loops / doc_counts.py: 95%
159 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-22 16:19 -0500
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-22 16:19 -0500
1"""Documentation count verification utilities.
3Provides automated verification that documented counts (commands, agents, skills)
4match actual file counts in the codebase.
5"""
7import re
8from dataclasses import dataclass, field
9from pathlib import Path
11_DEFAULT_BUDGET_TOKENS = 2000
12_DEFAULT_PER_SKILL_WARN_TOKENS = 200
14# Documentation files to check
15DOC_FILES = [
16 "README.md",
17 "CONTRIBUTING.md",
18 "docs/ARCHITECTURE.md",
19]
21# Directories to count
22COUNT_TARGETS = {
23 "commands": ("commands", "*.md"),
24 "agents": ("agents", "*.md"),
25 "skills": ("skills", "*/SKILL.md"),
26 "loops": ("scripts/little_loops/loops", "*.yaml"),
27}
29# Bridge skills are auto-generated from commands/ and should be excluded from the skill count
30BRIDGE_MARKER = "Bridged from `commands/"
33@dataclass
34class CountResult:
35 """Result of counting files in a directory."""
37 category: str
38 actual: int
39 documented: int | None = None
40 file: str | None = None
41 line: int | None = None
42 matches: bool = True
45@dataclass
46class VerificationResult:
47 """Overall verification result."""
49 total_checked: int = 0
50 mismatches: list[CountResult] = field(default_factory=list)
51 all_match: bool = True
53 def add_result(self, result: CountResult) -> None:
54 """Add a result and track mismatches."""
55 if not result.matches:
56 self.mismatches.append(result)
57 self.all_match = False
60@dataclass
61class FixResult:
62 """Result of fixing counts."""
64 fixed_count: int
65 files_modified: list[str]
68def count_files(directory: str, pattern: str, base_dir: Path | None = None) -> int:
69 """Count files matching pattern in directory.
71 Args:
72 directory: Directory name relative to base_dir
73 pattern: Glob pattern (e.g., "*.md" or "SKILL.md")
74 base_dir: Base directory path (defaults to current working directory)
76 Returns:
77 Number of matching files
78 """
79 if base_dir is None:
80 base_dir = Path.cwd()
81 dir_path = base_dir / directory
82 if not dir_path.exists():
83 return 0
85 return len(list(dir_path.glob(pattern)))
88def extract_count_from_line(line: str, category: str) -> int | None:
89 """Extract count from a documentation line.
91 Handles multiple formats:
92 - "34 commands" or "34 slash commands"
93 - "8 agents" or "8 specialized agents"
94 - "6 skills" or "6 skill definitions"
96 Args:
97 line: Line text to search
98 category: Category name (commands, agents, skills)
100 Returns:
101 Extracted count or None if not found
102 """
103 # For skills, also match singular "skill" (e.g., "skill definitions")
104 # Pattern matches: number followed by optional words and category name
105 # Examples: "34 commands", "8 specialized agents", "6 skill definitions"
106 if category == "skills":
107 # Match both "skills" and "skill" (singular)
108 pattern = r"(\d+)\s+\w*\s*skills?(?!\s+description)"
109 else:
110 pattern = rf"(\d+)\s+\w*\s*{category}"
112 match = re.search(pattern, line, re.IGNORECASE)
113 return int(match.group(1)) if match else None
116def verify_documentation(
117 base_dir: Path | None = None,
118) -> VerificationResult:
119 """Verify all documented counts against actual file counts.
121 Args:
122 base_dir: Base directory path (defaults to current working directory)
124 Returns:
125 VerificationResult with all results
126 """
127 if base_dir is None:
128 base_dir = Path.cwd()
129 result = VerificationResult(total_checked=0)
131 # Get actual counts
132 actual_counts: dict[str, int] = {}
133 for category, (directory, pattern) in COUNT_TARGETS.items():
134 actual_counts[category] = count_files(directory, pattern, base_dir)
136 # Adjust skill count to exclude bridge skills (auto-generated from commands/)
137 skills_dir = base_dir / "skills"
138 if "skills" in actual_counts and skills_dir.exists():
139 actual_counts["skills"] -= sum(
140 1 for p in skills_dir.glob("*/SKILL.md") if BRIDGE_MARKER in p.read_text()
141 )
143 # Check each documentation file
144 for doc_file in DOC_FILES:
145 doc_path = base_dir / doc_file
146 if not doc_path.exists():
147 continue
149 content = doc_path.read_text()
150 lines = content.splitlines()
152 for line_num, line in enumerate(lines, start=1):
153 for category in COUNT_TARGETS:
154 documented = extract_count_from_line(line, category)
155 if documented is not None:
156 actual = actual_counts[category]
157 matches = documented == actual
159 count_result = CountResult(
160 category=category,
161 actual=actual,
162 documented=documented,
163 file=str(doc_file),
164 line=line_num,
165 matches=matches,
166 )
167 result.add_result(count_result)
168 result.total_checked += 1
170 return result
173def format_result_text(result: VerificationResult) -> str:
174 """Format verification result as text.
176 Args:
177 result: Verification result
179 Returns:
180 Formatted text output
181 """
182 lines = ["Documentation Count Verification", "=" * 40]
184 if result.all_match:
185 lines.append(f"✓ All {result.total_checked} count(s) match!")
186 else:
187 lines.append(f"✗ Found {len(result.mismatches)} mismatch(es):")
188 lines.append("")
190 for mismatch in result.mismatches:
191 lines.append(
192 f" {mismatch.category}: documented={mismatch.documented}, actual={mismatch.actual}"
193 )
194 lines.append(f" at {mismatch.file}:{mismatch.line}")
196 return "\n".join(lines)
199def format_result_json(result: VerificationResult) -> str:
200 """Format verification result as JSON.
202 Args:
203 result: Verification result
205 Returns:
206 JSON string
207 """
208 import json
210 data = {
211 "all_match": result.all_match,
212 "total_checked": result.total_checked,
213 "mismatches": [
214 {
215 "category": m.category,
216 "documented": m.documented,
217 "actual": m.actual,
218 "file": m.file,
219 "line": m.line,
220 }
221 for m in result.mismatches
222 ],
223 }
225 return json.dumps(data, indent=2)
228def format_result_markdown(result: VerificationResult) -> str:
229 """Format verification result as Markdown.
231 Args:
232 result: Verification result
234 Returns:
235 Markdown formatted string
236 """
237 lines = ["# Documentation Count Verification", ""]
239 if result.all_match:
240 lines.append("## ✅ All Counts Match")
241 lines.append(f"\nAll {result.total_checked} documented count(s) are accurate.")
242 else:
243 lines.append("## ❌ Mismatches Found")
244 lines.append("")
245 lines.append("| Category | Documented | Actual | Location |")
246 lines.append("|----------|-----------|--------|----------|")
248 for mismatch in result.mismatches:
249 lines.append(
250 f"| {mismatch.category} | {mismatch.documented} | "
251 f"{mismatch.actual} | `{mismatch.file}:{mismatch.line}` |"
252 )
254 return "\n".join(lines)
257@dataclass
258class SkillBudgetResult:
259 """Result of checking skill description token budget."""
261 total_tokens: int
262 threshold_tokens: int
263 under_budget: bool
264 skill_breakdown: list[tuple[Path, str, int]]
265 violations: list[tuple[Path, str, int]]
268def _parse_skill_frontmatter(text: str) -> dict[str, str]:
269 """Extract flat key/value pairs from SKILL.md frontmatter."""
270 if not text.startswith("---"):
271 return {}
272 end = text.find("---", 3)
273 if end == -1:
274 return {}
275 fm: dict[str, str] = {}
276 for line in text[3:end].splitlines():
277 if ":" in line:
278 key, _, val = line.partition(":")
279 fm[key.strip()] = val.strip()
280 return fm
283def check_skill_budget(
284 base_dir: Path | None = None,
285 threshold_tokens: int = _DEFAULT_BUDGET_TOKENS,
286 per_skill_warn_tokens: int = _DEFAULT_PER_SKILL_WARN_TOKENS,
287) -> SkillBudgetResult:
288 """Scan skills/*/SKILL.md description fields, estimate tokens, check budget.
290 Skips skills with ``disable-model-invocation: true``. Token estimate uses
291 the character-count approximation ``len(description) // 4``.
293 Args:
294 base_dir: Base directory (defaults to cwd)
295 threshold_tokens: Total token budget (default: 2000 = ~1% of 200k context)
296 per_skill_warn_tokens: Per-skill threshold for listing as a violation
298 Returns:
299 SkillBudgetResult with total, sorted breakdown, and per-skill violations
300 """
301 if base_dir is None:
302 base_dir = Path.cwd()
304 skills_dir = base_dir / "skills"
305 skill_breakdown: list[tuple[Path, str, int]] = []
307 if skills_dir.exists():
308 for skill_md in sorted(skills_dir.glob("*/SKILL.md")):
309 try:
310 text = skill_md.read_text()
311 except OSError:
312 continue
313 fm = _parse_skill_frontmatter(text)
314 if fm.get("disable-model-invocation", "").lower() in ("true", "yes", "1"):
315 continue
316 description = fm.get("description", "")
317 tokens = len(description) // 4
318 skill_breakdown.append((skill_md, description, tokens))
320 skill_breakdown.sort(key=lambda x: x[2], reverse=True)
321 total_tokens = sum(t for _, _, t in skill_breakdown)
322 violations = [(p, d, t) for p, d, t in skill_breakdown if t >= per_skill_warn_tokens]
324 return SkillBudgetResult(
325 total_tokens=total_tokens,
326 threshold_tokens=threshold_tokens,
327 under_budget=total_tokens <= threshold_tokens,
328 skill_breakdown=skill_breakdown,
329 violations=violations,
330 )
333def fix_counts(base_dir: Path, result: VerificationResult) -> FixResult:
334 """Fix count mismatches in documentation files.
336 Args:
337 base_dir: Base directory path
338 result: Verification result with mismatches
340 Returns:
341 FixResult with counts of fixes made
342 """
343 files_modified: set[str] = set()
344 fixed_count = 0
346 # Group mismatches by file
347 mismatches_by_file: dict[str, list[CountResult]] = {}
348 for mismatch in result.mismatches:
349 if mismatch.file:
350 mismatches_by_file.setdefault(mismatch.file, []).append(mismatch)
352 # Fix each file
353 for file_path, mismatches in mismatches_by_file.items():
354 doc_path = base_dir / file_path
355 content = doc_path.read_text()
356 lines = content.splitlines()
358 for mismatch in mismatches:
359 if mismatch.line is not None and 1 <= mismatch.line <= len(lines):
360 line = lines[mismatch.line - 1]
362 # Build regex pattern based on category
363 # For skills, also match singular "skill"
364 if mismatch.category == "skills":
365 pattern = r"(\d+)(\s+\w*\s*skills?(?!\s+description))"
366 else:
367 pattern = rf"(\d+)(\s+\w*\s*{re.escape(mismatch.category)})"
369 # Replace the count while preserving the rest of the line
370 new_line = re.sub(
371 pattern,
372 str(mismatch.actual) + r"\2",
373 line,
374 count=1, # Only replace first occurrence
375 flags=re.IGNORECASE,
376 )
378 if new_line != line:
379 lines[mismatch.line - 1] = new_line
380 fixed_count += 1
381 files_modified.add(file_path)
383 # Write back if changes were made
384 if file_path in files_modified:
385 doc_path.write_text("\n".join(lines))
387 return FixResult(
388 fixed_count=fixed_count,
389 files_modified=list(files_modified),
390 )