Coverage for src / tracekit / reporting / index.py: 76%

210 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Index file generation for comprehensive analysis reports. 

2 

3This module provides HTML and Markdown index generation from analysis results 

4using a simple template engine (no external dependencies like Jinja2). 

5""" 

6 

7from __future__ import annotations 

8 

9import re 

10from pathlib import Path 

11from typing import TYPE_CHECKING, Any 

12 

13if TYPE_CHECKING: 

14 from tracekit.reporting.config import AnalysisResult 

15 from tracekit.reporting.output import OutputManager 

16 

17 

18class TemplateEngine: 

19 """Simple template engine for variable substitution and control flow. 

20 

21 Supports: 

22 - {{variable}} - Variable substitution 

23 - {{#if condition}}...{{/if}} - Conditional blocks 

24 - {{#each items}}...{{/each}} - Iteration blocks 

25 - {{this}} - Current item in iteration 

26 

27 Requirements: 

28 """ 

29 

30 def __init__(self) -> None: 

31 """Initialize template engine.""" 

32 self._var_pattern = re.compile(r"\{\{([^#/}][^}]*)\}\}") 

33 self._if_pattern = re.compile(r"\{\{#if\s+([^}]+)\}\}(.*?)\{\{/if\}\}", re.DOTALL) 

34 self._each_pattern = re.compile(r"\{\{#each\s+([^}]+)\}\}(.*?)\{\{/each\}\}", re.DOTALL) 

35 

36 def render(self, template: str, context: dict[str, Any]) -> str: 

37 """Render template with context. 

38 

39 Args: 

40 template: Template string with placeholders. 

41 context: Context dictionary for variable substitution. 

42 

43 Returns: 

44 Rendered template string. 

45 

46 Examples: 

47 >>> engine = TemplateEngine() 

48 >>> engine.render("Hello {{name}}", {"name": "World"}) 

49 'Hello World' 

50 >>> engine.render("{{#if show}}visible{{/if}}", {"show": True}) 

51 'visible' 

52 >>> engine.render("{{#each items}}{{this}} {{/each}}", {"items": [1, 2]}) 

53 '1 2 ' 

54 """ 

55 # Process each blocks first (innermost to outermost) 

56 result = self._process_each_blocks(template, context) 

57 

58 # Process if blocks 

59 result = self._process_if_blocks(result, context) 

60 

61 # Process variables 

62 result = self._process_variables(result, context) 

63 

64 return result 

65 

66 def _process_variables(self, template: str, context: dict[str, Any]) -> str: 

67 """Replace {{variable}} with values from context. 

68 

69 Args: 

70 template: Template string. 

71 context: Context dictionary. 

72 

73 Returns: 

74 Template with variables replaced. 

75 """ 

76 

77 def replace_var(match: re.Match[str]) -> str: 

78 var_name = match.group(1).strip() 

79 

80 # Handle "this" for current iteration item 

81 if var_name == "this": 

82 return str(context.get("_current_item", "")) 

83 

84 # Handle nested access like "domain.value" 

85 value: Any = context 

86 for key in var_name.split("."): 

87 if isinstance(value, dict): 87 ↛ 89line 87 didn't jump to line 89 because the condition on line 87 was always true

88 value = value.get(key, "") 

89 elif hasattr(value, key): 

90 value = getattr(value, key) 

91 else: 

92 value = "" 

93 break 

94 

95 # Handle enum values 

96 if hasattr(value, "value"): 96 ↛ 97line 96 didn't jump to line 97 because the condition on line 96 was never true

97 value = value.value 

98 

99 return str(value) if value is not None else "" 

100 

101 return self._var_pattern.sub(replace_var, template) 

102 

103 def _process_if_blocks(self, template: str, context: dict[str, Any]) -> str: 

104 """Process {{#if condition}}...{{/if}} blocks. 

105 

106 Args: 

107 template: Template string. 

108 context: Context dictionary. 

109 

110 Returns: 

111 Template with conditionals processed. 

112 """ 

113 

114 def replace_if(match: re.Match[str]) -> str: 

115 condition = match.group(1).strip() 

116 content = match.group(2) 

117 

118 # Evaluate condition 

119 value = context.get(condition, False) 

120 

121 # Truthy check 

122 if value and value != 0 and value != "" and value != []: 

123 return content 

124 return "" 

125 

126 return self._if_pattern.sub(replace_if, template) 

127 

128 def _process_each_blocks(self, template: str, context: dict[str, Any]) -> str: 

129 """Process {{#each items}}...{{/each}} blocks. 

130 

131 Args: 

132 template: Template string. 

133 context: Context dictionary. 

134 

135 Returns: 

136 Template with iterations processed. 

137 """ 

138 # Manually find and process each blocks to handle nesting 

139 result = [] 

140 pos = 0 

141 

142 while pos < len(template): 

143 # Look for next {{#each}} 

144 start_match = re.search(r"\{\{#each\s+([^}]+)\}\}", template[pos:]) 

145 if not start_match: 

146 # No more each blocks 

147 result.append(template[pos:]) 

148 break 

149 

150 # Add everything before this block 

151 result.append(template[pos : pos + start_match.start()]) 

152 

153 # Find the matching {{/each}} accounting for nesting 

154 items_name = start_match.group(1).strip() 

155 block_start = pos + start_match.end() 

156 block_end = self._find_matching_end(template, block_start, "each") 

157 

158 if block_end == -1: 158 ↛ 160line 158 didn't jump to line 160 because the condition on line 158 was never true

159 # No matching end tag, skip this 

160 result.append(start_match.group(0)) 

161 pos = block_start 

162 continue 

163 

164 # Extract the item template 

165 item_template = template[block_start:block_end] 

166 

167 # Get the items 

168 items = context.get(items_name, []) 

169 if not items: 

170 # Empty result 

171 pass 

172 else: 

173 # Render each item 

174 for item in items: 

175 # Create context for this iteration 

176 if isinstance(item, dict): 

177 item_context = {**context, **item, "_current_item": item} 

178 else: 

179 item_context = {**context, "this": item, "_current_item": item} 

180 

181 # Recursively process nested blocks 

182 rendered = self._process_each_blocks(item_template, item_context) 

183 rendered = self._process_if_blocks(rendered, item_context) 

184 rendered = self._process_variables(rendered, item_context) 

185 result.append(rendered) 

186 

187 # Move past the {{/each}} 

188 pos = block_end + len("{{/each}}") 

189 

190 return "".join(result) 

191 

192 def _find_matching_end(self, template: str, start_pos: int, block_type: str) -> int: 

193 """Find matching end tag for a block, accounting for nesting. 

194 

195 Args: 

196 template: Template string. 

197 start_pos: Position after the opening tag. 

198 block_type: Block type (e.g., "each", "if"). 

199 

200 Returns: 

201 Position of the start of the matching {{/block_type}} tag, or -1 if not found. 

202 """ 

203 open_tag = f"{{{{#{block_type}" 

204 close_tag = f"{{{{/{block_type}}}}}" 

205 depth = 1 

206 pos = start_pos 

207 

208 while pos < len(template) and depth > 0: 208 ↛ 228line 208 didn't jump to line 228 because the condition on line 208 was always true

209 # Look for next open or close tag 

210 next_open = template.find(open_tag, pos) 

211 next_close = template.find(close_tag, pos) 

212 

213 if next_close == -1: 213 ↛ 215line 213 didn't jump to line 215 because the condition on line 213 was never true

214 # No closing tag found 

215 return -1 

216 

217 if next_open != -1 and next_open < next_close: 

218 # Found nested open tag 

219 depth += 1 

220 pos = next_open + len(open_tag) 

221 else: 

222 # Found close tag 

223 depth -= 1 

224 if depth == 0: 

225 return next_close 

226 pos = next_close + len(close_tag) 

227 

228 return -1 

229 

230 

231class IndexGenerator: 

232 """Generate HTML and Markdown index files from analysis results. 

233 

234 Creates navigable index pages that link to all analysis outputs including 

235 plots, data files, and domain-specific results. 

236 

237 Attributes: 

238 output_manager: Output manager for file operations. 

239 

240 Requirements: 

241 """ 

242 

243 def __init__(self, output_manager: OutputManager) -> None: 

244 """Initialize index generator. 

245 

246 Args: 

247 output_manager: Output manager for file operations. 

248 

249 Examples: 

250 >>> from pathlib import Path 

251 >>> om = OutputManager(Path("/tmp/output"), "test") 

252 >>> generator = IndexGenerator(om) 

253 """ 

254 self._output_manager = output_manager 

255 self._engine = TemplateEngine() 

256 

257 # Template directory 

258 self._template_dir = Path(__file__).parent / "templates" 

259 

260 def generate( 

261 self, 

262 result: AnalysisResult, 

263 include_formats: list[str] | None = None, 

264 ) -> dict[str, Path]: 

265 """Generate index files in requested formats. 

266 

267 Args: 

268 result: Analysis result containing all output metadata. 

269 include_formats: Formats to generate (e.g., ["html", "md"]). 

270 Defaults to ["html", "md"] if None. 

271 

272 Returns: 

273 Dictionary mapping format name to generated file path. 

274 

275 Requirements: 

276 

277 Examples: 

278 >>> # result = AnalysisResult(...) 

279 >>> # generator = IndexGenerator(output_manager) 

280 >>> # paths = generator.generate(result, ["html", "md"]) 

281 >>> # paths["html"] # Path to index.html 

282 """ 

283 if include_formats is None: 283 ↛ 284line 283 didn't jump to line 284 because the condition on line 283 was never true

284 include_formats = ["html", "md"] 

285 

286 # Build context from result 

287 context = self._build_context(result) 

288 

289 # Generate each format 

290 outputs: dict[str, Path] = {} 

291 

292 if "html" in include_formats: 

293 html_content = self._render_html(context) 

294 html_path = self._output_manager.save_text("index.html", html_content) 

295 outputs["html"] = html_path 

296 

297 if "md" in include_formats: 

298 md_content = self._render_markdown(context) 

299 md_path = self._output_manager.save_text("index.md", md_content) 

300 outputs["md"] = md_path 

301 

302 return outputs 

303 

304 def _build_context(self, result: AnalysisResult) -> dict[str, Any]: 

305 """Build template context from AnalysisResult. 

306 

307 Args: 

308 result: Analysis result. 

309 

310 Returns: 

311 Context dictionary for template rendering. 

312 

313 Requirements: 

314 """ 

315 # Extract timestamp properly from output_dir name 

316 # Format is: YYYYMMDD_HHMMSS_name_analysis 

317 dir_name = result.output_dir.name 

318 timestamp = "N/A" 

319 if "_" in dir_name: 319 ↛ 334line 319 didn't jump to line 334 because the condition on line 319 was always true

320 parts = dir_name.split("_") 

321 if len(parts) >= 2: 321 ↛ 334line 321 didn't jump to line 334 because the condition on line 321 was always true

322 date_part = parts[0] # YYYYMMDD 

323 time_part = parts[1] # HHMMSS 

324 if len(date_part) == 8 and len(time_part) == 6: 324 ↛ 334line 324 didn't jump to line 334 because the condition on line 324 was always true

325 try: 

326 timestamp = ( 

327 f"{date_part[:4]}-{date_part[4:6]}-{date_part[6:8]} " 

328 f"{time_part[:2]}:{time_part[2:4]}:{time_part[4:6]}" 

329 ) 

330 except (IndexError, ValueError): 

331 timestamp = f"{date_part}_{time_part}" 

332 

333 # Basic metadata 

334 context: dict[str, Any] = { 

335 "title": "Analysis Report", 

336 "input_name": result.input_file or "In-Memory Data", 

337 "input_size": self._format_size(result.input_file), 

338 "input_type": result.input_type.value, 

339 "timestamp": timestamp, 

340 "duration": self._format_duration(result.duration_seconds), 

341 "total_analyses": result.total_analyses, 

342 "successful": result.successful_analyses, 

343 "failed": result.failed_analyses, 

344 "domains_count": len(result.domain_summaries), 

345 "has_errors": len(result.errors) > 0, 

346 } 

347 

348 # Build domain information 

349 # domain_summaries contains {AnalysisDomain: {func_name: result, ...}} 

350 domains: list[dict[str, Any]] = [] 

351 for domain, domain_results in result.domain_summaries.items(): 

352 # Count successful analyses in this domain 

353 # domain_results is a dict of {function_name: result_value} 

354 analyses_count = len(domain_results) if isinstance(domain_results, dict) else 0 

355 

356 # Find plots for this domain 

357 domain_plots = [] 

358 if result.plot_paths: 

359 domain_id = domain.value 

360 for plot_path in result.plot_paths: 360 ↛ 362line 360 didn't jump to line 362 because the loop on line 360 never started

361 # Check if plot belongs to this domain 

362 plot_str = str(plot_path) 

363 if f"/{domain_id}/" in plot_str or plot_str.startswith(domain_id): 

364 domain_plots.append( 

365 { 

366 "title": plot_path.stem.replace("_", " ").title(), 

367 "path": str(plot_path.name) 

368 if plot_path.parent == result.output_dir 

369 else str(plot_path.relative_to(result.output_dir)), 

370 "filename": plot_path.name, 

371 } 

372 ) 

373 

374 # Find data files for this domain 

375 domain_data_files = [] 

376 domain_dir = result.domain_dirs.get(domain) 

377 if domain_dir and domain_dir.exists(): 377 ↛ 388line 377 didn't jump to line 388 because the condition on line 377 was always true

378 for data_file in domain_dir.glob("*.json"): 

379 domain_data_files.append( 

380 { 

381 "filename": data_file.name, 

382 "path": str(data_file.relative_to(result.output_dir)), 

383 "format": "JSON", 

384 } 

385 ) 

386 

387 # Build key findings from results 

388 key_findings = self._extract_key_findings(domain_results) 

389 

390 domain_data: dict[str, Any] = { 

391 "domain_id": domain.value, 

392 "domain_name": domain.value.replace("_", " ").title(), 

393 "analyses_count": analyses_count, 

394 "plots_count": len(domain_plots), 

395 "data_files_count": len(domain_data_files), 

396 "key_findings": key_findings, 

397 "plots": domain_plots, 

398 "data_files": domain_data_files, 

399 } 

400 domains.append(domain_data) 

401 

402 context["domains"] = domains 

403 

404 # Build error information 

405 if result.errors: 

406 errors: list[dict[str, Any]] = [] 

407 for error in result.errors: 

408 errors.append( 

409 { 

410 "domain": error.domain.value, 

411 "analysis_name": error.function, 

412 "error_message": error.error_message, 

413 } 

414 ) 

415 context["errors"] = errors 

416 

417 return context 

418 

419 def _extract_key_findings(self, domain_results: dict[str, Any]) -> list[str]: 

420 """Extract key findings from domain results for display. 

421 

422 Args: 

423 domain_results: Dictionary of analysis function results. 

424 

425 Returns: 

426 List of key finding strings. 

427 """ 

428 findings = [] 

429 for func_name, result in domain_results.items(): 

430 # Extract function short name 

431 short_name = func_name.split(".")[-1].replace("_", " ").title() 

432 

433 # Format result based on type 

434 if result is None: 434 ↛ 435line 434 didn't jump to line 435 because the condition on line 434 was never true

435 continue 

436 elif isinstance(result, int | float): 

437 if not (isinstance(result, float) and (result != result)): # Check for NaN 437 ↛ 429line 437 didn't jump to line 429 because the condition on line 437 was always true

438 findings.append( 

439 f"{short_name}: {result:.4g}" 

440 if isinstance(result, float) 

441 else f"{short_name}: {result}" 

442 ) 

443 elif isinstance(result, dict) and len(result) <= 3: 443 ↛ 445line 443 didn't jump to line 445 because the condition on line 443 was never true

444 # Show small dicts inline 

445 items = [ 

446 f"{k}: {v:.4g}" if isinstance(v, float) else f"{k}: {v}" 

447 for k, v in list(result.items())[:3] 

448 if v is not None and not (isinstance(v, float) and v != v) 

449 ] 

450 if items: 

451 findings.append(f"{short_name}: {', '.join(items)}") 

452 

453 # Limit to most relevant findings 

454 return findings[:5] 

455 

456 def _format_plots(self, plots: list[dict[str, Any]]) -> list[dict[str, str]]: 

457 """Format plot information for templates. 

458 

459 Args: 

460 plots: List of plot dictionaries. 

461 

462 Returns: 

463 Formatted plot data. 

464 """ 

465 formatted = [] 

466 for plot in plots: 

467 formatted.append( 

468 { 

469 "title": plot.get("title", "Untitled"), 

470 "path": str(plot.get("path", "")), 

471 "filename": Path(plot.get("path", "")).name, 

472 } 

473 ) 

474 return formatted 

475 

476 def _format_data_files(self, data_files: list[dict[str, Any]]) -> list[dict[str, str]]: 

477 """Format data file information for templates. 

478 

479 Args: 

480 data_files: List of data file dictionaries. 

481 

482 Returns: 

483 Formatted data file data. 

484 """ 

485 formatted = [] 

486 for data_file in data_files: 

487 path = Path(data_file.get("path", "")) 

488 formatted.append( 

489 { 

490 "filename": path.name, 

491 "path": str(path), 

492 "format": path.suffix.lstrip(".").upper() or "DATA", 

493 } 

494 ) 

495 return formatted 

496 

497 def _format_size(self, filepath: str | None) -> str: 

498 """Format file size in human-readable format. 

499 

500 Args: 

501 filepath: Path to file. 

502 

503 Returns: 

504 Formatted size string (e.g., "1.5 MB"). 

505 """ 

506 if not filepath: 

507 return "N/A" 

508 

509 try: 

510 path = Path(filepath) 

511 if not path.exists(): 511 ↛ 514line 511 didn't jump to line 514 because the condition on line 511 was always true

512 return "N/A" 

513 

514 size_bytes = path.stat().st_size 

515 size_float = float(size_bytes) 

516 for unit in ["B", "KB", "MB", "GB"]: 

517 if size_float < 1024.0: 

518 return f"{size_float:.1f} {unit}" 

519 size_float /= 1024.0 

520 return f"{size_float:.1f} TB" 

521 except Exception: 

522 return "N/A" 

523 

524 def _format_duration(self, seconds: float) -> str: 

525 """Format duration in human-readable format. 

526 

527 Args: 

528 seconds: Duration in seconds. 

529 

530 Returns: 

531 Formatted duration string (e.g., "1m 30s"). 

532 """ 

533 if seconds < 60: 

534 return f"{seconds:.1f}s" 

535 elif seconds < 3600: 

536 minutes = int(seconds // 60) 

537 secs = int(seconds % 60) 

538 return f"{minutes}m {secs}s" 

539 else: 

540 hours = int(seconds // 3600) 

541 minutes = int((seconds % 3600) // 60) 

542 return f"{hours}h {minutes}m" 

543 

544 def _render_html(self, context: dict[str, Any]) -> str: 

545 """Render HTML index from template. 

546 

547 Args: 

548 context: Template context. 

549 

550 Returns: 

551 Rendered HTML string. 

552 

553 Raises: 

554 FileNotFoundError: If HTML template file not found. 

555 

556 Requirements: 

557 """ 

558 template_path = self._template_dir / "index.html" 

559 if not template_path.exists(): 559 ↛ 560line 559 didn't jump to line 560 because the condition on line 559 was never true

560 raise FileNotFoundError(f"HTML template not found: {template_path}") 

561 

562 template = template_path.read_text() 

563 return self._engine.render(template, context) 

564 

565 def _render_markdown(self, context: dict[str, Any]) -> str: 

566 """Render Markdown index from template. 

567 

568 Args: 

569 context: Template context. 

570 

571 Returns: 

572 Rendered Markdown string. 

573 

574 Raises: 

575 FileNotFoundError: If Markdown template file not found. 

576 

577 Requirements: 

578 """ 

579 template_path = self._template_dir / "index.md" 

580 if not template_path.exists(): 580 ↛ 581line 580 didn't jump to line 581 because the condition on line 580 was never true

581 raise FileNotFoundError(f"Markdown template not found: {template_path}") 

582 

583 template = template_path.read_text() 

584 return self._engine.render(template, context) 

585 

586 

587__all__ = [ 

588 "IndexGenerator", 

589 "TemplateEngine", 

590]