Coverage for src / tracekit / reporting / output.py: 70%

123 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Output management for comprehensive analysis reports. 

2 

3This module provides directory structure and file management for analysis 

4report outputs, including plots, JSON/YAML data exports, and logs. 

5""" 

6 

7from __future__ import annotations 

8 

9import json 

10from datetime import datetime 

11from pathlib import Path 

12from typing import Any 

13 

14import numpy as np 

15import yaml 

16 

17from tracekit.reporting.config import AnalysisDomain # noqa: TC001 

18 

19 

20def _sanitize_for_serialization(obj: Any, max_depth: int = 10) -> Any: 

21 """Convert non-serializable objects for JSON/YAML output. 

22 

23 Handles generators, numpy arrays, and other problematic types 

24 that can appear in analysis results. 

25 

26 Args: 

27 obj: Object to sanitize. 

28 max_depth: Maximum recursion depth to prevent infinite loops. 

29 

30 Returns: 

31 Serialization-safe version of the object. 

32 """ 

33 import types 

34 

35 from tracekit.core.types import DigitalTrace, TraceMetadata, WaveformTrace 

36 

37 if max_depth <= 0: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 return "<max depth exceeded>" 

39 

40 try: 

41 # Don't sanitize TraceKit types - let the JSONEncoder handle them 

42 if isinstance(obj, WaveformTrace | DigitalTrace | TraceMetadata): 

43 return obj 

44 if isinstance(obj, dict): 

45 # Sanitize both keys and values, convert non-string keys to strings 

46 sanitized = {} 

47 for k, v in obj.items(): 

48 # Convert bytes keys to hex strings 

49 if isinstance(k, bytes): 49 ↛ 50line 49 didn't jump to line 50 because the condition on line 49 was never true

50 k = f"0x{k.hex()}" 

51 # Convert other non-string keys to strings 

52 elif not isinstance(k, str | int | float | bool | type(None)): 52 ↛ 53line 52 didn't jump to line 53 because the condition on line 52 was never true

53 k = str(k) 

54 sanitized[k] = _sanitize_for_serialization(v, max_depth - 1) 

55 return sanitized 

56 elif isinstance(obj, list | tuple): 

57 return [_sanitize_for_serialization(item, max_depth - 1) for item in obj] 

58 elif isinstance(obj, types.GeneratorType): 58 ↛ 60line 58 didn't jump to line 60 because the condition on line 58 was never true

59 # Convert generators to lists, but catch errors 

60 try: 

61 items = list(obj) 

62 return [_sanitize_for_serialization(item, max_depth - 1) for item in items] 

63 except Exception: 

64 # Return None for incompatible generators (cleaner than error string) 

65 return None 

66 elif isinstance(obj, np.ndarray): 66 ↛ 68line 66 didn't jump to line 68 because the condition on line 66 was never true

67 # Limit large arrays 

68 if obj.size > 10000: 

69 return f"<ndarray shape={obj.shape} dtype={obj.dtype}>" 

70 return obj.tolist() 

71 elif isinstance(obj, np.generic): 71 ↛ 74line 71 didn't jump to line 74 because the condition on line 71 was never true

72 # Catch all numpy scalar types (int, float, complex, bool, str, etc.) 

73 # This includes np.integer, np.floating, np.bool_, np.complexfloating, etc. 

74 return obj.item() 

75 elif isinstance(obj, np.integer | np.floating): 75 ↛ 77line 75 didn't jump to line 77 because the condition on line 75 was never true

76 # Redundant but kept for clarity 

77 return obj.item() 

78 elif isinstance(obj, np.bool_): 78 ↛ 80line 78 didn't jump to line 80 because the condition on line 78 was never true

79 # Redundant but kept for clarity 

80 return bool(obj) 

81 elif isinstance(obj, float): 

82 # Handle Python float inf/nan (not caught by JSONEncoder.default) 

83 import math 

84 

85 if math.isinf(obj) or math.isnan(obj): 85 ↛ 86line 85 didn't jump to line 86 because the condition on line 85 was never true

86 return None 

87 return obj 

88 elif isinstance(obj, complex): 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was never true

89 # Handle complex numbers with inf/nan components 

90 import math 

91 

92 if ( 

93 math.isinf(obj.real) 

94 or math.isnan(obj.real) 

95 or math.isinf(obj.imag) 

96 or math.isnan(obj.imag) 

97 ): 

98 return None 

99 return {"real": obj.real, "imag": obj.imag} 

100 elif isinstance(obj, bytes): 100 ↛ 102line 100 didn't jump to line 102 because the condition on line 100 was never true

101 # Limit large byte sequences 

102 if len(obj) > 1000: 

103 return f"<bytes len={len(obj)}>" 

104 return obj.hex() 

105 elif hasattr(obj, "__dict__") and not isinstance(obj, type): 105 ↛ 107line 105 didn't jump to line 107 because the condition on line 105 was never true

106 # Convert dataclasses and objects to dicts 

107 try: 

108 return { 

109 k: _sanitize_for_serialization(v, max_depth - 1) 

110 for k, v in obj.__dict__.items() 

111 } 

112 except Exception: 

113 return str(obj) 

114 elif callable(obj): 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true

115 return f"<callable: {getattr(obj, '__name__', str(obj))}>" 

116 else: 

117 # Try to convert to string as last resort 

118 try: 

119 return obj 

120 except Exception: 

121 return str(obj) 

122 except Exception as e: 

123 return f"<error: {type(e).__name__}: {str(e)[:50]}>" 

124 

125 

126class OutputManager: 

127 """Manages output directory structure and file operations for analysis reports. 

128 

129 Creates timestamped output directories with organized subdirectories for 

130 different types of analysis outputs (plots, data files, logs, errors). 

131 

132 Attributes: 

133 root: Root directory path for this analysis output. 

134 timestamp: Timestamp for this output session. 

135 timestamp_str: Formatted timestamp string. 

136 

137 Requirements: 

138 """ 

139 

140 def __init__( 

141 self, 

142 base_dir: Path, 

143 input_name: str, 

144 timestamp: datetime | None = None, 

145 ) -> None: 

146 """Initialize output manager. 

147 

148 Args: 

149 base_dir: Base directory for all outputs. 

150 input_name: Name of the input file/dataset being analyzed. 

151 timestamp: Timestamp for this session (defaults to now). 

152 

153 Examples: 

154 >>> manager = OutputManager(Path("/output"), "signal_data") 

155 >>> manager.root.name 

156 '20260101_120000_signal_data_analysis' 

157 """ 

158 self._timestamp = timestamp or datetime.now() 

159 self._timestamp_str = self._timestamp.strftime("%Y%m%d_%H%M%S") 

160 

161 # Create timestamped directory name 

162 dirname = f"{self._timestamp_str}_{input_name}_analysis" 

163 self._root = base_dir / dirname 

164 

165 @property 

166 def root(self) -> Path: 

167 """Root directory path for this analysis output.""" 

168 return self._root 

169 

170 @property 

171 def timestamp(self) -> datetime: 

172 """Timestamp for this output session.""" 

173 return self._timestamp 

174 

175 @property 

176 def timestamp_str(self) -> str: 

177 """Formatted timestamp string (YYYYMMDD_HHMMSS).""" 

178 return self._timestamp_str 

179 

180 def create(self) -> Path: 

181 """Create output directory structure. 

182 

183 Creates the root directory and standard subdirectories: 

184 - plots/: Visualization outputs 

185 - errors/: Error logs and diagnostics 

186 - logs/: Analysis logs 

187 - input/: Input file copies/metadata 

188 

189 Returns: 

190 Path to the created root directory. 

191 

192 Note: 

193 This method is idempotent - calling it multiple times is safe. 

194 

195 Requirements: 

196 

197 Examples: 

198 >>> manager = OutputManager(Path("/tmp/output"), "test") 

199 >>> root = manager.create() 

200 >>> (root / "plots").exists() 

201 True 

202 """ 

203 self._root.mkdir(parents=True, exist_ok=True) 

204 

205 # Create standard subdirectories 

206 subdirs = ["plots", "errors", "logs", "input"] 

207 for subdir in subdirs: 

208 (self._root / subdir).mkdir(exist_ok=True) 

209 

210 return self._root 

211 

212 def create_domain_dir(self, domain: AnalysisDomain) -> Path: 

213 """Create and return domain-specific subdirectory. 

214 

215 Creates a subdirectory for organizing outputs from a specific 

216 analysis domain (e.g., spectral/, digital/, jitter/). 

217 

218 Args: 

219 domain: Analysis domain. 

220 

221 Returns: 

222 Path to the created domain directory. 

223 

224 Requirements: 

225 

226 Examples: 

227 >>> manager = OutputManager(Path("/tmp/output"), "test") 

228 >>> manager.create() 

229 >>> domain_dir = manager.create_domain_dir(AnalysisDomain.SPECTRAL) 

230 >>> domain_dir.name 

231 'spectral' 

232 """ 

233 domain_dir = self._root / domain.value 

234 domain_dir.mkdir(parents=True, exist_ok=True) 

235 return domain_dir 

236 

237 def save_json( 

238 self, 

239 name: str, 

240 data: dict[str, Any], 

241 subdir: str | None = None, 

242 ) -> Path: 

243 """Save data as JSON file with pretty formatting. 

244 

245 Args: 

246 name: Filename (without .json extension). 

247 data: Dictionary to serialize. 

248 subdir: Optional subdirectory within root. 

249 

250 Returns: 

251 Path to the saved JSON file. 

252 

253 Requirements: 

254 

255 Examples: 

256 >>> manager = OutputManager(Path("/tmp/output"), "test") 

257 >>> manager.create() 

258 >>> path = manager.save_json("metrics", {"snr": 42.5}) 

259 >>> path.name 

260 'metrics.json' 

261 """ 

262 target_dir = self._root / subdir if subdir else self._root 

263 target_dir.mkdir(parents=True, exist_ok=True) 

264 

265 filepath = target_dir / f"{name}.json" 

266 with filepath.open("w") as f: 

267 json.dump(data, f, indent=2, default=str) 

268 

269 return filepath 

270 

271 def save_yaml( 

272 self, 

273 name: str, 

274 data: dict[str, Any], 

275 subdir: str | None = None, 

276 ) -> Path: 

277 """Save data as YAML file. 

278 

279 Args: 

280 name: Filename (without .yaml extension). 

281 data: Dictionary to serialize. 

282 subdir: Optional subdirectory within root. 

283 

284 Returns: 

285 Path to the saved YAML file. 

286 

287 Requirements: 

288 

289 Examples: 

290 >>> manager = OutputManager(Path("/tmp/output"), "test") 

291 >>> manager.create() 

292 >>> path = manager.save_yaml("config", {"enabled": True}) 

293 >>> path.name 

294 'config.yaml' 

295 """ 

296 target_dir = self._root / subdir if subdir else self._root 

297 target_dir.mkdir(parents=True, exist_ok=True) 

298 

299 filepath = target_dir / f"{name}.yaml" 

300 # Sanitize data to handle generators, numpy arrays, etc. 

301 sanitized_data = _sanitize_for_serialization(data) 

302 with filepath.open("w") as f: 

303 yaml.dump(sanitized_data, f, default_flow_style=False, sort_keys=False) 

304 

305 return filepath 

306 

307 def save_plot( 

308 self, 

309 domain: AnalysisDomain, 

310 name: str, 

311 fig: Any, 

312 format: str = "png", 

313 dpi: int = 150, 

314 ) -> Path: 

315 """Save matplotlib figure to plots directory. 

316 

317 Saves plot with domain-prefixed filename in the plots/ subdirectory. 

318 

319 Args: 

320 domain: Analysis domain for this plot. 

321 name: Plot name (without extension). 

322 fig: Matplotlib figure object. 

323 format: Image format (png, pdf, svg, etc.). 

324 dpi: Resolution in dots per inch. 

325 

326 Returns: 

327 Path to the saved plot file. 

328 

329 Requirements: 

330 

331 Examples: 

332 >>> import matplotlib.pyplot as plt 

333 >>> manager = OutputManager(Path("/tmp/output"), "test") 

334 >>> manager.create() 

335 >>> fig, ax = plt.subplots() 

336 >>> path = manager.save_plot(AnalysisDomain.SPECTRAL, "fft", fig) 

337 >>> path.name 

338 'spectral_fft.png' 

339 """ 

340 plots_dir = self._root / "plots" 

341 plots_dir.mkdir(parents=True, exist_ok=True) 

342 

343 filename = f"{domain.value}_{name}.{format}" 

344 filepath = plots_dir / filename 

345 

346 fig.savefig(filepath, format=format, dpi=dpi, bbox_inches="tight") 

347 

348 return filepath 

349 

350 def save_text( 

351 self, 

352 name: str, 

353 content: str, 

354 subdir: str | None = None, 

355 ) -> Path: 

356 """Save text content to file. 

357 

358 Args: 

359 name: Filename (with extension). 

360 content: Text content to write. 

361 subdir: Optional subdirectory within root. 

362 

363 Returns: 

364 Path to the saved text file. 

365 

366 Examples: 

367 >>> manager = OutputManager(Path("/tmp/output"), "test") 

368 >>> manager.create() 

369 >>> path = manager.save_text("summary.txt", "Analysis complete") 

370 >>> path.name 

371 'summary.txt' 

372 """ 

373 target_dir = self._root / subdir if subdir else self._root 

374 target_dir.mkdir(parents=True, exist_ok=True) 

375 

376 filepath = target_dir / name 

377 filepath.write_text(content) 

378 

379 return filepath