Coverage for little_loops / doc_counts.py: 95%

159 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-05-22 16:19 -0500

1"""Documentation count verification utilities. 

2 

3Provides automated verification that documented counts (commands, agents, skills) 

4match actual file counts in the codebase. 

5""" 

6 

7import re 

8from dataclasses import dataclass, field 

9from pathlib import Path 

10 

11_DEFAULT_BUDGET_TOKENS = 2000 

12_DEFAULT_PER_SKILL_WARN_TOKENS = 200 

13 

14# Documentation files to check 

15DOC_FILES = [ 

16 "README.md", 

17 "CONTRIBUTING.md", 

18 "docs/ARCHITECTURE.md", 

19] 

20 

21# Directories to count 

22COUNT_TARGETS = { 

23 "commands": ("commands", "*.md"), 

24 "agents": ("agents", "*.md"), 

25 "skills": ("skills", "*/SKILL.md"), 

26 "loops": ("scripts/little_loops/loops", "*.yaml"), 

27} 

28 

29# Bridge skills are auto-generated from commands/ and should be excluded from the skill count 

30BRIDGE_MARKER = "Bridged from `commands/" 

31 

32 

33@dataclass 

34class CountResult: 

35 """Result of counting files in a directory.""" 

36 

37 category: str 

38 actual: int 

39 documented: int | None = None 

40 file: str | None = None 

41 line: int | None = None 

42 matches: bool = True 

43 

44 

45@dataclass 

46class VerificationResult: 

47 """Overall verification result.""" 

48 

49 total_checked: int = 0 

50 mismatches: list[CountResult] = field(default_factory=list) 

51 all_match: bool = True 

52 

53 def add_result(self, result: CountResult) -> None: 

54 """Add a result and track mismatches.""" 

55 if not result.matches: 

56 self.mismatches.append(result) 

57 self.all_match = False 

58 

59 

60@dataclass 

61class FixResult: 

62 """Result of fixing counts.""" 

63 

64 fixed_count: int 

65 files_modified: list[str] 

66 

67 

68def count_files(directory: str, pattern: str, base_dir: Path | None = None) -> int: 

69 """Count files matching pattern in directory. 

70 

71 Args: 

72 directory: Directory name relative to base_dir 

73 pattern: Glob pattern (e.g., "*.md" or "SKILL.md") 

74 base_dir: Base directory path (defaults to current working directory) 

75 

76 Returns: 

77 Number of matching files 

78 """ 

79 if base_dir is None: 

80 base_dir = Path.cwd() 

81 dir_path = base_dir / directory 

82 if not dir_path.exists(): 

83 return 0 

84 

85 return len(list(dir_path.glob(pattern))) 

86 

87 

88def extract_count_from_line(line: str, category: str) -> int | None: 

89 """Extract count from a documentation line. 

90 

91 Handles multiple formats: 

92 - "34 commands" or "34 slash commands" 

93 - "8 agents" or "8 specialized agents" 

94 - "6 skills" or "6 skill definitions" 

95 

96 Args: 

97 line: Line text to search 

98 category: Category name (commands, agents, skills) 

99 

100 Returns: 

101 Extracted count or None if not found 

102 """ 

103 # For skills, also match singular "skill" (e.g., "skill definitions") 

104 # Pattern matches: number followed by optional words and category name 

105 # Examples: "34 commands", "8 specialized agents", "6 skill definitions" 

106 if category == "skills": 

107 # Match both "skills" and "skill" (singular) 

108 pattern = r"(\d+)\s+\w*\s*skills?(?!\s+description)" 

109 else: 

110 pattern = rf"(\d+)\s+\w*\s*{category}" 

111 

112 match = re.search(pattern, line, re.IGNORECASE) 

113 return int(match.group(1)) if match else None 

114 

115 

116def verify_documentation( 

117 base_dir: Path | None = None, 

118) -> VerificationResult: 

119 """Verify all documented counts against actual file counts. 

120 

121 Args: 

122 base_dir: Base directory path (defaults to current working directory) 

123 

124 Returns: 

125 VerificationResult with all results 

126 """ 

127 if base_dir is None: 

128 base_dir = Path.cwd() 

129 result = VerificationResult(total_checked=0) 

130 

131 # Get actual counts 

132 actual_counts: dict[str, int] = {} 

133 for category, (directory, pattern) in COUNT_TARGETS.items(): 

134 actual_counts[category] = count_files(directory, pattern, base_dir) 

135 

136 # Adjust skill count to exclude bridge skills (auto-generated from commands/) 

137 skills_dir = base_dir / "skills" 

138 if "skills" in actual_counts and skills_dir.exists(): 

139 actual_counts["skills"] -= sum( 

140 1 for p in skills_dir.glob("*/SKILL.md") if BRIDGE_MARKER in p.read_text() 

141 ) 

142 

143 # Check each documentation file 

144 for doc_file in DOC_FILES: 

145 doc_path = base_dir / doc_file 

146 if not doc_path.exists(): 

147 continue 

148 

149 content = doc_path.read_text() 

150 lines = content.splitlines() 

151 

152 for line_num, line in enumerate(lines, start=1): 

153 for category in COUNT_TARGETS: 

154 documented = extract_count_from_line(line, category) 

155 if documented is not None: 

156 actual = actual_counts[category] 

157 matches = documented == actual 

158 

159 count_result = CountResult( 

160 category=category, 

161 actual=actual, 

162 documented=documented, 

163 file=str(doc_file), 

164 line=line_num, 

165 matches=matches, 

166 ) 

167 result.add_result(count_result) 

168 result.total_checked += 1 

169 

170 return result 

171 

172 

173def format_result_text(result: VerificationResult) -> str: 

174 """Format verification result as text. 

175 

176 Args: 

177 result: Verification result 

178 

179 Returns: 

180 Formatted text output 

181 """ 

182 lines = ["Documentation Count Verification", "=" * 40] 

183 

184 if result.all_match: 

185 lines.append(f"✓ All {result.total_checked} count(s) match!") 

186 else: 

187 lines.append(f"✗ Found {len(result.mismatches)} mismatch(es):") 

188 lines.append("") 

189 

190 for mismatch in result.mismatches: 

191 lines.append( 

192 f" {mismatch.category}: documented={mismatch.documented}, actual={mismatch.actual}" 

193 ) 

194 lines.append(f" at {mismatch.file}:{mismatch.line}") 

195 

196 return "\n".join(lines) 

197 

198 

199def format_result_json(result: VerificationResult) -> str: 

200 """Format verification result as JSON. 

201 

202 Args: 

203 result: Verification result 

204 

205 Returns: 

206 JSON string 

207 """ 

208 import json 

209 

210 data = { 

211 "all_match": result.all_match, 

212 "total_checked": result.total_checked, 

213 "mismatches": [ 

214 { 

215 "category": m.category, 

216 "documented": m.documented, 

217 "actual": m.actual, 

218 "file": m.file, 

219 "line": m.line, 

220 } 

221 for m in result.mismatches 

222 ], 

223 } 

224 

225 return json.dumps(data, indent=2) 

226 

227 

228def format_result_markdown(result: VerificationResult) -> str: 

229 """Format verification result as Markdown. 

230 

231 Args: 

232 result: Verification result 

233 

234 Returns: 

235 Markdown formatted string 

236 """ 

237 lines = ["# Documentation Count Verification", ""] 

238 

239 if result.all_match: 

240 lines.append("## ✅ All Counts Match") 

241 lines.append(f"\nAll {result.total_checked} documented count(s) are accurate.") 

242 else: 

243 lines.append("## ❌ Mismatches Found") 

244 lines.append("") 

245 lines.append("| Category | Documented | Actual | Location |") 

246 lines.append("|----------|-----------|--------|----------|") 

247 

248 for mismatch in result.mismatches: 

249 lines.append( 

250 f"| {mismatch.category} | {mismatch.documented} | " 

251 f"{mismatch.actual} | `{mismatch.file}:{mismatch.line}` |" 

252 ) 

253 

254 return "\n".join(lines) 

255 

256 

257@dataclass 

258class SkillBudgetResult: 

259 """Result of checking skill description token budget.""" 

260 

261 total_tokens: int 

262 threshold_tokens: int 

263 under_budget: bool 

264 skill_breakdown: list[tuple[Path, str, int]] 

265 violations: list[tuple[Path, str, int]] 

266 

267 

268def _parse_skill_frontmatter(text: str) -> dict[str, str]: 

269 """Extract flat key/value pairs from SKILL.md frontmatter.""" 

270 if not text.startswith("---"): 

271 return {} 

272 end = text.find("---", 3) 

273 if end == -1: 

274 return {} 

275 fm: dict[str, str] = {} 

276 for line in text[3:end].splitlines(): 

277 if ":" in line: 

278 key, _, val = line.partition(":") 

279 fm[key.strip()] = val.strip() 

280 return fm 

281 

282 

283def check_skill_budget( 

284 base_dir: Path | None = None, 

285 threshold_tokens: int = _DEFAULT_BUDGET_TOKENS, 

286 per_skill_warn_tokens: int = _DEFAULT_PER_SKILL_WARN_TOKENS, 

287) -> SkillBudgetResult: 

288 """Scan skills/*/SKILL.md description fields, estimate tokens, check budget. 

289 

290 Skips skills with ``disable-model-invocation: true``. Token estimate uses 

291 the character-count approximation ``len(description) // 4``. 

292 

293 Args: 

294 base_dir: Base directory (defaults to cwd) 

295 threshold_tokens: Total token budget (default: 2000 = ~1% of 200k context) 

296 per_skill_warn_tokens: Per-skill threshold for listing as a violation 

297 

298 Returns: 

299 SkillBudgetResult with total, sorted breakdown, and per-skill violations 

300 """ 

301 if base_dir is None: 

302 base_dir = Path.cwd() 

303 

304 skills_dir = base_dir / "skills" 

305 skill_breakdown: list[tuple[Path, str, int]] = [] 

306 

307 if skills_dir.exists(): 

308 for skill_md in sorted(skills_dir.glob("*/SKILL.md")): 

309 try: 

310 text = skill_md.read_text() 

311 except OSError: 

312 continue 

313 fm = _parse_skill_frontmatter(text) 

314 if fm.get("disable-model-invocation", "").lower() in ("true", "yes", "1"): 

315 continue 

316 description = fm.get("description", "") 

317 tokens = len(description) // 4 

318 skill_breakdown.append((skill_md, description, tokens)) 

319 

320 skill_breakdown.sort(key=lambda x: x[2], reverse=True) 

321 total_tokens = sum(t for _, _, t in skill_breakdown) 

322 violations = [(p, d, t) for p, d, t in skill_breakdown if t >= per_skill_warn_tokens] 

323 

324 return SkillBudgetResult( 

325 total_tokens=total_tokens, 

326 threshold_tokens=threshold_tokens, 

327 under_budget=total_tokens <= threshold_tokens, 

328 skill_breakdown=skill_breakdown, 

329 violations=violations, 

330 ) 

331 

332 

333def fix_counts(base_dir: Path, result: VerificationResult) -> FixResult: 

334 """Fix count mismatches in documentation files. 

335 

336 Args: 

337 base_dir: Base directory path 

338 result: Verification result with mismatches 

339 

340 Returns: 

341 FixResult with counts of fixes made 

342 """ 

343 files_modified: set[str] = set() 

344 fixed_count = 0 

345 

346 # Group mismatches by file 

347 mismatches_by_file: dict[str, list[CountResult]] = {} 

348 for mismatch in result.mismatches: 

349 if mismatch.file: 

350 mismatches_by_file.setdefault(mismatch.file, []).append(mismatch) 

351 

352 # Fix each file 

353 for file_path, mismatches in mismatches_by_file.items(): 

354 doc_path = base_dir / file_path 

355 content = doc_path.read_text() 

356 lines = content.splitlines() 

357 

358 for mismatch in mismatches: 

359 if mismatch.line is not None and 1 <= mismatch.line <= len(lines): 

360 line = lines[mismatch.line - 1] 

361 

362 # Build regex pattern based on category 

363 # For skills, also match singular "skill" 

364 if mismatch.category == "skills": 

365 pattern = r"(\d+)(\s+\w*\s*skills?(?!\s+description))" 

366 else: 

367 pattern = rf"(\d+)(\s+\w*\s*{re.escape(mismatch.category)})" 

368 

369 # Replace the count while preserving the rest of the line 

370 new_line = re.sub( 

371 pattern, 

372 str(mismatch.actual) + r"\2", 

373 line, 

374 count=1, # Only replace first occurrence 

375 flags=re.IGNORECASE, 

376 ) 

377 

378 if new_line != line: 

379 lines[mismatch.line - 1] = new_line 

380 fixed_count += 1 

381 files_modified.add(file_path) 

382 

383 # Write back if changes were made 

384 if file_path in files_modified: 

385 doc_path.write_text("\n".join(lines)) 

386 

387 return FixResult( 

388 fixed_count=fixed_count, 

389 files_modified=list(files_modified), 

390 )