Coverage for src / tracekit / reporting / config.py: 85%
161 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Configuration for comprehensive analysis report system.
3This module defines the simplified configuration schema for the comprehensive
4analysis report generator, combining input/output/analysis specs into a single
5unified configuration.
6"""
8from __future__ import annotations
10from collections.abc import Callable
11from dataclasses import dataclass, field
12from enum import Enum
13from pathlib import Path
14from typing import Any
17class InputType(str, Enum):
18 """Supported input data types."""
20 WAVEFORM = "waveform" # Analog waveform (WFM, CSV, NPZ, HDF5)
21 DIGITAL = "digital" # Digital logic signals
22 BINARY = "binary" # Binary packet data
23 PCAP = "pcap" # Network capture
24 IQ = "iq" # I/Q baseband data
25 PACKETS = "packets" # Pre-parsed packets
26 SPARAMS = "sparams" # S-parameter/Touchstone data (.s1p-.s8p)
29class AnalysisDomain(str, Enum):
30 """Analysis domain categories."""
32 WAVEFORM = "waveform" # Basic waveform measurements
33 DIGITAL = "digital" # Digital signal analysis
34 TIMING = "timing" # Timing measurements
35 SPECTRAL = "spectral" # FFT, PSD, THD, SNR
36 STATISTICS = "statistics" # Statistical analysis
37 PATTERNS = "patterns" # Pattern detection
38 JITTER = "jitter" # Jitter decomposition
39 EYE = "eye" # Eye diagram analysis
40 POWER = "power" # Power analysis
41 PROTOCOLS = "protocols" # Protocol decoding
42 SIGNAL_INTEGRITY = "signal_integrity" # S-params, equalization
43 INFERENCE = "inference" # Auto-inference
44 PACKET = "packet" # Packet metrics
45 ENTROPY = "entropy" # Entropy analysis
48@dataclass
49class DomainConfig:
50 """Configuration for a single analysis domain.
52 Attributes:
53 enabled: Whether to run this analysis domain.
54 parameters: Domain-specific parameters passed to analyzers.
55 timeout: Timeout in seconds for this domain (None = use global timeout).
56 """
58 enabled: bool = True
59 parameters: dict[str, Any] = field(default_factory=dict)
60 timeout: float | None = None
63@dataclass
64class DataOutputConfig:
65 """Configuration for data output limits and aggregation.
67 Controls how data is serialized in reports - whether to truncate,
68 aggregate similar values, or output complete data.
70 Attributes:
71 full_data_mode: If True, output ALL data without truncation.
72 max_array_elements: Max array elements (None = unlimited). Ignored if full_data_mode=True.
73 max_list_items: Max list items (None = unlimited). Ignored if full_data_mode=True.
74 max_bytes_sample: Max bytes to include (None = unlimited). Ignored if full_data_mode=True.
75 max_pdf_results_per_domain: Max results per domain in PDF (None = unlimited).
76 max_pdf_summary_length: Max summary string length in PDF (None = unlimited).
77 smart_aggregation: Enable smart aggregation of repeated/similar values.
78 aggregation_threshold: Min identical values to trigger aggregation (default: 5).
79 """
81 full_data_mode: bool = True # Default to full data - no truncation
82 max_array_elements: int | None = None # None = unlimited
83 max_list_items: int | None = None # None = unlimited
84 max_bytes_sample: int | None = None # None = unlimited
85 max_pdf_results_per_domain: int | None = None # None = unlimited
86 max_pdf_summary_length: int | None = None # None = unlimited
87 smart_aggregation: bool = True # Enable smart aggregation by default
88 aggregation_threshold: int = 5 # Min identical values to trigger aggregation
91# Default sample rates for different analysis contexts
92# These are used when sample_rate cannot be derived from input data
93DEFAULT_SAMPLE_RATE_HZ: float = 1e9 # 1 GHz - high-speed digital/eye diagram
94DEFAULT_SAMPLE_RATE_GENERAL_HZ: float = 1e6 # 1 MHz - general waveform analysis
95DEFAULT_SAMPLE_RATE_BINARY_HZ: float = 1.0 # 1 Hz (1 sample/s) - binary data
98@dataclass
99class AnalysisConfig:
100 """Unified configuration for comprehensive analysis.
102 Combines input specification, analysis selection, and output options
103 into a single simplified configuration.
105 Attributes:
106 domains: List of domains to analyze (None = all applicable domains).
107 exclude_domains: Domains to explicitly exclude.
108 domain_config: Per-domain configuration overrides.
109 output_formats: Output formats to generate (e.g., ["json", "yaml"]).
110 index_formats: Index formats to generate (e.g., ["html", "md"]).
111 generate_plots: Whether to generate visualization plots.
112 plot_format: Plot file format (png, svg, pdf).
113 plot_dpi: Plot resolution in DPI.
114 copy_input_file: Copy input file to output directory.
115 save_intermediate_data: Save intermediate analysis data.
116 full_data_mode: Output all data without truncation.
117 smart_aggregation: Enable smart aggregation of repeated values.
118 data_output: Advanced data output configuration.
119 timeout_per_analysis: Timeout per analysis function in seconds (None = no timeout).
120 continue_on_error: Continue analysis if individual functions fail.
121 log_level: Logging level (DEBUG, INFO, WARNING, ERROR).
122 parallel_domains: Enable parallel domain execution.
123 enable_quality_scoring: Attach quality scores to analysis results.
124 max_memory_mb: Maximum memory per analysis in MB (None = auto-detect).
125 max_cache_entries: Maximum cached results (prevents cache bloat).
126 max_parallel_workers: Maximum parallel threads for analysis.
127 chunk_size_mb: Maximum chunk size for large data processing in MB.
128 default_sample_rate: Default sample rate (Hz) when not derivable from data.
129 Used for analysis functions requiring sample_rate parameter.
130 Set to None to require explicit sample_rate in input data.
131 title: Report title.
132 author: Report author.
133 project: Project name.
134 notes: Additional notes/description.
135 custom_metadata: Custom metadata fields.
136 """
138 # Analysis selection
139 domains: list[AnalysisDomain] | None = None # None = all applicable
140 exclude_domains: list[AnalysisDomain] = field(default_factory=list)
141 domain_config: dict[AnalysisDomain, DomainConfig] = field(default_factory=dict)
143 # Output formats
144 output_formats: list[str] = field(default_factory=lambda: ["json", "yaml"])
145 index_formats: list[str] = field(default_factory=lambda: ["html", "md"])
147 # Visualization
148 generate_plots: bool = True
149 plot_format: str = "png"
150 plot_dpi: int = 150
152 # Data handling
153 copy_input_file: bool = False
154 save_intermediate_data: bool = True
155 full_data_mode: bool = True
156 smart_aggregation: bool = True
157 data_output: DataOutputConfig = field(default_factory=DataOutputConfig)
159 # Execution control
160 timeout_per_analysis: float | None = 30.0
161 continue_on_error: bool = True
162 log_level: str = "INFO"
163 parallel_domains: bool = True # Enable parallel domain execution
164 enable_quality_scoring: bool = True # Attach quality scores to analysis results
166 # Resource limits (MEM-010, MEM-015)
167 max_memory_mb: int = 2048 # Max memory per analysis (MB)
168 max_cache_entries: int = 100 # Max cached results
169 max_parallel_workers: int = 4 # Max parallel threads
170 chunk_size_mb: int = 100 # Max chunk size for large data
172 # Sample rate configuration
173 # Default sample rate used when not available in input data metadata.
174 # This is a fallback; sample_rate is preferably derived from:
175 # 1. Input data metadata (e.g., WaveformTrace.metadata.sample_rate)
176 # 2. Explicit parameter in domain_config
177 # 3. This default value
178 default_sample_rate: float | None = DEFAULT_SAMPLE_RATE_GENERAL_HZ
180 # Metadata
181 title: str = ""
182 author: str = ""
183 project: str = ""
184 notes: str = ""
185 custom_metadata: dict[str, Any] = field(default_factory=dict)
187 def get_domain_config(self, domain: AnalysisDomain) -> DomainConfig:
188 """Get configuration for a specific domain.
190 Args:
191 domain: Analysis domain to get configuration for.
193 Returns:
194 Domain configuration (default if not specified).
195 """
196 return self.domain_config.get(domain, DomainConfig())
198 def is_domain_enabled(self, domain: AnalysisDomain) -> bool:
199 """Check if a domain is enabled.
201 Args:
202 domain: Analysis domain to check.
204 Returns:
205 True if domain should be analyzed.
206 """
207 # Check explicit exclusion
208 if domain in self.exclude_domains:
209 return False
211 # Check domain-specific config
212 if domain in self.domain_config:
213 return self.domain_config[domain].enabled
215 # If domains list specified, check membership
216 if self.domains is not None:
217 return domain in self.domains
219 # Otherwise enabled by default
220 return True
222 def get_effective_sample_rate(
223 self, data_sample_rate: float | None = None, context: str = "general"
224 ) -> float:
225 """Get effective sample rate, preferring data metadata over defaults.
227 Priority order:
228 1. data_sample_rate (from input data metadata)
229 2. self.default_sample_rate (from config)
230 3. Context-appropriate default
232 Args:
233 data_sample_rate: Sample rate from input data metadata (if available).
234 context: Analysis context for selecting appropriate default.
235 Options: "general" (1 MHz), "highspeed" (1 GHz), "binary" (1 Hz).
237 Returns:
238 Effective sample rate in Hz.
239 """
240 if data_sample_rate is not None and data_sample_rate > 0: 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true
241 return data_sample_rate
243 if self.default_sample_rate is not None and self.default_sample_rate > 0: 243 ↛ 247line 243 didn't jump to line 247 because the condition on line 243 was always true
244 return self.default_sample_rate
246 # Context-appropriate defaults
247 if context == "highspeed":
248 return DEFAULT_SAMPLE_RATE_HZ
249 elif context == "binary":
250 return DEFAULT_SAMPLE_RATE_BINARY_HZ
251 else:
252 return DEFAULT_SAMPLE_RATE_GENERAL_HZ
255@dataclass
256class ProgressInfo:
257 """Progress information for callbacks.
259 Attributes:
260 phase: Current phase (e.g., "loading", "analyzing", "plotting", "saving").
261 domain: Current analysis domain (None during non-domain phases).
262 function: Current function name (None during non-function phases).
263 percent: Progress percentage (0.0 to 100.0).
264 message: Human-readable progress message.
265 elapsed_seconds: Time elapsed since analysis started.
266 estimated_remaining_seconds: Estimated time remaining (None if unknown).
267 """
269 phase: str
270 domain: AnalysisDomain | None
271 function: str | None
272 percent: float
273 message: str
274 elapsed_seconds: float
275 estimated_remaining_seconds: float | None
278@dataclass
279class AnalysisError:
280 """Record of an analysis error.
282 Attributes:
283 domain: Analysis domain where error occurred.
284 function: Function name that failed.
285 error_type: Error type/class name.
286 error_message: Error message.
287 traceback: Full traceback (None if not captured).
288 duration_ms: Time spent before error occurred.
289 """
291 domain: AnalysisDomain
292 function: str
293 error_type: str
294 error_message: str
295 traceback: str | None
296 duration_ms: float
299@dataclass
300class AnalysisResult:
301 """Result from comprehensive analysis.
303 Contains paths to all generated outputs and summary statistics.
305 Attributes:
306 output_dir: Root output directory.
307 index_html: Path to HTML index (None if not generated).
308 index_md: Path to Markdown index (None if not generated).
309 index_pdf: Path to PDF index (None if not generated).
310 summary_json: Path to JSON summary.
311 summary_yaml: Path to YAML summary (None if not generated).
312 metadata_json: Path to metadata file.
313 config_yaml: Path to saved configuration.
314 domain_dirs: Per-domain output directories.
315 plot_paths: List of all generated plot files.
316 error_log: Path to error log (None if no errors).
317 input_file: Input file path (None if from memory).
318 input_type: Input data type.
319 total_analyses: Total number of analysis functions attempted.
320 successful_analyses: Number of successful analyses.
321 failed_analyses: Number of failed analyses.
322 skipped_analyses: Number of skipped analyses.
323 duration_seconds: Total analysis duration.
324 domain_summaries: Per-domain summary data.
325 errors: List of errors encountered.
326 """
328 output_dir: Path
329 index_html: Path | None
330 index_md: Path | None
331 index_pdf: Path | None
332 summary_json: Path
333 summary_yaml: Path | None
334 metadata_json: Path
335 config_yaml: Path
336 domain_dirs: dict[AnalysisDomain, Path]
337 plot_paths: list[Path]
338 error_log: Path | None
339 input_file: str | None
340 input_type: InputType
341 total_analyses: int
342 successful_analyses: int
343 failed_analyses: int
344 skipped_analyses: int
345 duration_seconds: float
346 domain_summaries: dict[AnalysisDomain, dict[str, Any]]
347 errors: list[AnalysisError]
349 def open_index(self) -> None:
350 """Open the HTML index in the default web browser.
352 Raises:
353 FileNotFoundError: If HTML index was not generated.
354 """
355 if self.index_html is None:
356 raise FileNotFoundError("HTML index was not generated")
358 import webbrowser
360 webbrowser.open(self.index_html.as_uri())
362 def get_domain_results(self, domain: AnalysisDomain) -> dict[str, Any]:
363 """Get results for a specific domain.
365 Args:
366 domain: Domain to get results for.
368 Returns:
369 Domain summary data.
371 Raises:
372 KeyError: If domain was not analyzed.
373 """
374 if domain not in self.domain_summaries:
375 raise KeyError(f"Domain {domain.value} was not analyzed")
377 return self.domain_summaries[domain]
379 @property
380 def success_rate(self) -> float:
381 """Calculate success rate as percentage.
383 Returns:
384 Success rate (0.0 to 100.0).
385 """
386 if self.total_analyses == 0:
387 return 0.0
388 return (self.successful_analyses / self.total_analyses) * 100.0
390 def __repr__(self) -> str:
391 """Get string representation."""
392 return (
393 f"AnalysisResult("
394 f"domains={len(self.domain_summaries)}, "
395 f"success={self.successful_analyses}/{self.total_analyses}, "
396 f"duration={self.duration_seconds:.1f}s, "
397 f"output_dir={self.output_dir})"
398 )
401# Analysis capability registry - maps domains to available analyzers
402# NOTE: Each domain now supports MULTIPLE modules to capture all available functions
403# Updated to include all 68 submodules with 318+ total functions across 14 domains
404ANALYSIS_CAPABILITIES: dict[AnalysisDomain, dict[str, Any]] = {
405 AnalysisDomain.WAVEFORM: {
406 "description": "Basic waveform timing and amplitude measurements",
407 "modules": ["tracekit.analyzers.waveform.measurements"],
408 "requires": ["waveform"],
409 },
410 AnalysisDomain.SPECTRAL: {
411 "description": "FFT, PSD, THD, SNR, SFDR, SINAD, ENOB, wavelet analysis",
412 "modules": [
413 "tracekit.analyzers.waveform.spectral",
414 "tracekit.analyzers.spectral.chunked",
415 "tracekit.analyzers.spectral.chunked_fft",
416 "tracekit.analyzers.spectral.chunked_wavelet",
417 "tracekit.analyzers.waveform.wavelets",
418 ],
419 "requires": ["waveform"],
420 },
421 AnalysisDomain.DIGITAL: {
422 "description": "Digital signal extraction, edge detection, timing analysis",
423 "modules": [
424 "tracekit.analyzers.digital.extraction",
425 "tracekit.analyzers.digital.edges",
426 "tracekit.analyzers.digital.clock",
427 "tracekit.analyzers.digital.quality",
428 "tracekit.analyzers.digital.signal_quality",
429 "tracekit.analyzers.digital.thresholds",
430 "tracekit.analyzers.digital.bus",
431 "tracekit.analyzers.digital.correlation",
432 ],
433 "requires": ["waveform", "digital"],
434 },
435 AnalysisDomain.TIMING: {
436 "description": "Setup/hold time, propagation delay, skew, slew rate",
437 "modules": ["tracekit.analyzers.digital.timing"],
438 "requires": ["waveform", "digital"],
439 },
440 AnalysisDomain.STATISTICS: {
441 "description": "Statistical measures, outlier detection, trend analysis",
442 "modules": [
443 "tracekit.analyzers.statistics.basic",
444 "tracekit.analyzers.statistics.advanced",
445 "tracekit.analyzers.statistics.correlation",
446 "tracekit.analyzers.statistics.distribution",
447 "tracekit.analyzers.statistics.outliers",
448 "tracekit.analyzers.statistics.trend",
449 "tracekit.analyzers.statistical.chunked_corr",
450 ],
451 "requires": ["waveform", "digital", "binary"],
452 },
453 AnalysisDomain.ENTROPY: {
454 "description": "Entropy analysis, data classification, checksum detection",
455 "modules": [
456 "tracekit.analyzers.statistical.entropy",
457 "tracekit.analyzers.statistical.classification",
458 "tracekit.analyzers.statistical.checksum",
459 "tracekit.analyzers.statistical.ngrams",
460 ],
461 "requires": ["binary"],
462 },
463 AnalysisDomain.PATTERNS: {
464 "description": "Periodic patterns, motifs, signatures, clustering",
465 "modules": [
466 "tracekit.analyzers.patterns.discovery",
467 "tracekit.analyzers.patterns.sequences",
468 "tracekit.analyzers.patterns.periodic",
469 "tracekit.analyzers.patterns.matching",
470 "tracekit.analyzers.patterns.clustering",
471 "tracekit.analyzers.patterns.learning",
472 ],
473 "requires": ["waveform", "binary", "digital"],
474 },
475 AnalysisDomain.JITTER: {
476 "description": "RJ, DJ, PJ, DDJ, DCD, bathtub curve, TJ at BER",
477 "modules": [
478 "tracekit.analyzers.jitter.measurements",
479 "tracekit.analyzers.jitter.decomposition",
480 "tracekit.analyzers.jitter.spectrum",
481 "tracekit.analyzers.jitter.ber",
482 ],
483 "requires": ["waveform", "digital"],
484 },
485 AnalysisDomain.EYE: {
486 "description": "Eye diagram generation and metrics",
487 "modules": [
488 "tracekit.analyzers.eye.diagram",
489 "tracekit.analyzers.eye.metrics",
490 ],
491 "requires": ["waveform", "digital"],
492 },
493 AnalysisDomain.POWER: {
494 "description": "Power measurements, efficiency, switching loss, ripple",
495 "modules": [
496 "tracekit.analyzers.power.basic",
497 "tracekit.analyzers.power.ac_power",
498 "tracekit.analyzers.power.switching",
499 "tracekit.analyzers.power.conduction",
500 "tracekit.analyzers.power.efficiency",
501 "tracekit.analyzers.power.ripple",
502 "tracekit.analyzers.power.soa",
503 ],
504 "requires": ["waveform"],
505 },
506 AnalysisDomain.PROTOCOLS: {
507 "description": "Serial protocol decoding (UART, SPI, I2C, CAN, etc.)",
508 "modules": [
509 "tracekit.analyzers.protocols.uart",
510 "tracekit.analyzers.protocols.spi",
511 "tracekit.analyzers.protocols.i2c",
512 "tracekit.analyzers.protocols.can",
513 "tracekit.analyzers.protocols.can_fd",
514 "tracekit.analyzers.protocols.lin",
515 "tracekit.analyzers.protocols.flexray",
516 "tracekit.analyzers.protocols.manchester",
517 "tracekit.analyzers.protocols.onewire",
518 "tracekit.analyzers.protocols.usb",
519 "tracekit.analyzers.protocols.i2s",
520 "tracekit.analyzers.protocols.jtag",
521 "tracekit.analyzers.protocols.swd",
522 "tracekit.analyzers.protocols.hdlc",
523 ],
524 "requires": ["digital", "waveform"],
525 },
526 AnalysisDomain.SIGNAL_INTEGRITY: {
527 "description": "S-parameters, de-embedding, equalization",
528 "modules": [
529 "tracekit.analyzers.signal_integrity.sparams",
530 "tracekit.analyzers.signal_integrity.equalization",
531 "tracekit.analyzers.signal_integrity.embedding",
532 ],
533 "requires": ["sparams", "waveform"],
534 },
535 AnalysisDomain.PACKET: {
536 "description": "Packet metrics, throughput, latency, loss, payload analysis",
537 "modules": [
538 "tracekit.analyzers.packet.metrics",
539 "tracekit.analyzers.packet.parser",
540 "tracekit.analyzers.packet.payload",
541 "tracekit.analyzers.packet.stream",
542 "tracekit.analyzers.packet.daq",
543 ],
544 "requires": ["packets", "binary"],
545 },
546 AnalysisDomain.INFERENCE: {
547 "description": "Auto-inference, protocol detection, signal classification",
548 "modules": [
549 "tracekit.inference.signal_intelligence", # classify_signal, assess_signal_quality, suggest_measurements
550 "tracekit.inference.logic", # detect_logic_family
551 "tracekit.inference.protocol", # detect_protocol
552 "tracekit.inference.spectral", # auto_spectral_config
553 "tracekit.inference.stream", # reassemble_udp_stream, reassemble_tcp_stream, detect_message_framing
554 "tracekit.inference.binary", # detect_magic_bytes, detect_alignment, generate_parser
555 "tracekit.inference.message_format", # infer_format, detect_field_types, find_dependencies
556 "tracekit.inference.sequences", # detect_sequence_patterns, correlate_requests, find_message_dependencies
557 "tracekit.inference.alignment", # align_global, align_local, compute_similarity
558 "tracekit.inference.state_machine", # infer_rpni, minimize_dfa
559 "tracekit.inference.protocol_dsl", # decode_message, load_protocol
560 "tracekit.inference.protocol_library", # get_protocol, list_protocols, get_decoder
561 ],
562 "requires": ["waveform", "digital", "binary", "packets"],
563 },
564}
567def get_available_analyses(input_type: InputType) -> list[AnalysisDomain]:
568 """Get list of analyses applicable to input type.
570 Args:
571 input_type: Type of input data.
573 Returns:
574 List of applicable analysis domains.
575 """
576 type_mapping = {
577 InputType.WAVEFORM: "waveform",
578 InputType.DIGITAL: "digital",
579 InputType.BINARY: "binary",
580 InputType.PCAP: "packets",
581 InputType.IQ: "waveform",
582 InputType.PACKETS: "packets",
583 InputType.SPARAMS: "sparams",
584 }
586 input_category = type_mapping.get(input_type, "waveform")
588 applicable = []
589 for domain, config in ANALYSIS_CAPABILITIES.items():
590 if input_category in config["requires"]:
591 applicable.append(domain)
593 return applicable
596# Type alias for progress callbacks
597ProgressCallback = Callable[[ProgressInfo], None]
600__all__ = [
601 "ANALYSIS_CAPABILITIES",
602 "DEFAULT_SAMPLE_RATE_BINARY_HZ",
603 "DEFAULT_SAMPLE_RATE_GENERAL_HZ",
604 "DEFAULT_SAMPLE_RATE_HZ",
605 "AnalysisConfig",
606 "AnalysisDomain",
607 "AnalysisError",
608 "AnalysisResult",
609 "DataOutputConfig",
610 "DomainConfig",
611 "InputType",
612 "ProgressCallback",
613 "ProgressInfo",
614 "get_available_analyses",
615]