Coverage for src / tracekit / reporting / summary_generator.py: 96%
138 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Natural language summary generation for signal analysis.
3This module generates human-readable descriptions of measurements and analysis
4results that avoid jargon and explain findings in accessible language.
7Example:
8 >>> from tracekit.reporting import generate_summary
9 >>> trace = load("capture.wfm")
10 >>> summary = generate_summary(trace)
11 >>> print(summary.text)
13References:
14 TraceKit Auto-Discovery Specification
15"""
17from __future__ import annotations
19from dataclasses import dataclass, field
20from typing import TYPE_CHECKING, Any
22import numpy as np
24if TYPE_CHECKING:
25 from tracekit.core.types import WaveformTrace
28@dataclass
29class Finding:
30 """Individual analysis finding.
32 Attributes:
33 title: Short title for the finding.
34 description: Plain language description.
35 confidence: Confidence score (0.0-1.0).
36 severity: Severity level (INFO, WARNING, CRITICAL).
37 """
39 title: str
40 description: str
41 confidence: float = 1.0
42 severity: str = "INFO"
45@dataclass
46class Summary:
47 """Natural language summary of signal analysis.
49 Attributes:
50 text: Complete summary text (2-3 sentences, 100-200 words).
51 overview: High-level overview sentence.
52 findings: List of key findings (minimum 3).
53 recommendations: Actionable insights and next steps.
54 word_count: Number of words in summary text.
55 grade_level: Flesch-Kincaid grade level.
56 """
58 text: str
59 overview: str
60 findings: list[Finding] = field(default_factory=list)
61 recommendations: list[str] = field(default_factory=list)
62 word_count: int = 0
63 grade_level: float = 0.0
66def _estimate_grade_level(text: str) -> float:
67 """Estimate Flesch-Kincaid grade level.
69 Simple approximation based on sentence and word length.
71 Args:
72 text: Text to analyze.
74 Returns:
75 Estimated grade level.
76 """
77 # Split into sentences (simple split on period)
78 sentences = [s.strip() for s in text.split(".") if s.strip()]
79 if not sentences:
80 return 0.0
82 # Split into words
83 words = text.split()
84 if not words: 84 ↛ 85line 84 didn't jump to line 85 because the condition on line 84 was never true
85 return 0.0
87 # Count syllables (approximation: count vowel groups)
88 total_syllables = 0
89 for word in words:
90 word_lower = word.lower()
91 syllable_count = 0
92 previous_was_vowel = False
94 for char in word_lower:
95 is_vowel = char in "aeiouy"
96 if is_vowel and not previous_was_vowel:
97 syllable_count += 1
98 previous_was_vowel = is_vowel
100 # Minimum 1 syllable per word
101 total_syllables += max(1, syllable_count)
103 # Flesch-Kincaid formula
104 avg_words_per_sentence = len(words) / len(sentences)
105 avg_syllables_per_word = total_syllables / len(words)
107 grade_level = 0.39 * avg_words_per_sentence + 11.8 * avg_syllables_per_word - 15.59
109 return max(0.0, grade_level)
112def _characterize_signal_type(trace: WaveformTrace) -> tuple[str, float]:
113 """Characterize basic signal type.
115 Simple heuristic-based signal type detection.
117 Args:
118 trace: Waveform to analyze.
120 Returns:
121 Tuple of (signal_type, confidence).
122 """
123 data = trace.data.astype(np.float64)
125 # Check if digital (only 2-3 distinct levels)
126 unique_values = len(np.unique(np.round(data, decimals=2)))
127 value_range = np.ptp(data)
129 if unique_values <= 3 and value_range > 0.1:
130 # Likely digital
131 return "digital", 0.85
132 elif value_range < 0.01:
133 # Constant signal
134 return "DC level", 0.90
135 else:
136 # Analog
137 # Check for periodicity
138 if len(data) > 100: 138 ↛ 146line 138 didn't jump to line 146 because the condition on line 138 was always true
139 autocorr = np.correlate(data - np.mean(data), data - np.mean(data), mode="full")
140 autocorr = autocorr[len(autocorr) // 2 :]
141 autocorr = autocorr / autocorr[0]
143 # Look for peaks in autocorrelation
144 if len(autocorr) > 10 and np.max(autocorr[10:]) > 0.5:
145 return "periodic analog", 0.75
146 return "analog", 0.70
149def _assess_quality(trace: WaveformTrace) -> tuple[str, list[str]]:
150 """Assess signal quality.
152 Args:
153 trace: Waveform to analyze.
155 Returns:
156 Tuple of (quality_level, issues).
157 """
158 data = trace.data.astype(np.float64)
159 issues = []
161 # Check for sufficient data
162 if len(data) < 100:
163 issues.append("Very short capture (less than 100 samples)")
165 # Check noise level (standard deviation relative to range)
166 data_range = np.ptp(data)
167 if data_range > 0:
168 noise_ratio = np.std(data) / data_range
169 if noise_ratio > 0.2:
170 issues.append("High noise level detected")
172 # Check for clipping
173 if len(data) > 0: 173 ↛ 187line 173 didn't jump to line 187 because the condition on line 173 was always true
174 data_min = np.min(data)
175 data_max = np.max(data)
177 # Check if many samples at min/max (possible clipping)
178 at_min = np.sum(data == data_min)
179 at_max = np.sum(data == data_max)
181 if at_min > len(data) * 0.05:
182 issues.append("Possible clipping at minimum level")
183 if at_max > len(data) * 0.05:
184 issues.append("Possible clipping at maximum level")
186 # Determine quality level
187 if not issues:
188 quality = "excellent"
189 elif len(issues) == 1:
190 quality = "good"
191 elif len(issues) == 2:
192 quality = "fair"
193 else:
194 quality = "poor"
196 return quality, issues
199def _format_frequency(freq_hz: float) -> str:
200 """Format frequency in human-readable form.
202 Args:
203 freq_hz: Frequency in Hz.
205 Returns:
206 Formatted string.
207 """
208 if freq_hz >= 1e9:
209 return f"{freq_hz / 1e9:.1f} GHz"
210 elif freq_hz >= 1e6:
211 return f"{freq_hz / 1e6:.1f} MHz"
212 elif freq_hz >= 1e3:
213 return f"{freq_hz / 1e3:.1f} kHz"
214 else:
215 return f"{freq_hz:.1f} Hz"
218def generate_summary(
219 trace: WaveformTrace,
220 *,
221 context: dict[str, Any] | None = None,
222 detail_level: str = "summary",
223 max_words: int = 200,
224 include_sections: list[str] | None = None,
225) -> Summary:
226 """Generate natural language summary of signal analysis.
228 Creates a plain-English description of the signal and analysis results,
229 avoiding technical jargon and explaining findings in accessible terms.
231 Args:
232 trace: Waveform to summarize.
233 context: Optional analysis context (characterization, anomalies, etc.).
234 detail_level: Summary detail level ("summary", "intermediate", "expert").
235 max_words: Maximum word count for summary text.
236 include_sections: Sections to include (default: all).
238 Returns:
239 Summary object with natural language description.
241 Example:
242 >>> trace = load("uart_signal.wfm")
243 >>> summary = generate_summary(trace)
244 >>> print(summary.text)
245 This is a digital signal with two voltage levels...
247 References:
248 DISC-003: Natural Language Summaries
249 """
250 context = context or {}
251 include_sections = include_sections or ["overview", "findings", "recommendations"]
253 # Characterize signal type
254 signal_type, type_confidence = _characterize_signal_type(trace)
256 # Assess quality
257 quality_level, quality_issues = _assess_quality(trace)
259 # Build overview
260 sample_rate = trace.metadata.sample_rate
261 duration_ms = len(trace.data) / sample_rate * 1000
263 overview = f"This is a {signal_type} signal captured at {_format_frequency(sample_rate)} sample rate for {duration_ms:.1f} milliseconds."
265 # Build findings
266 findings = []
268 # Signal type finding
269 findings.append(
270 Finding(
271 title="Signal Type",
272 description=f"Identified as {signal_type}",
273 confidence=type_confidence,
274 severity="INFO",
275 )
276 )
278 # Quality finding
279 quality_desc = f"Signal quality is {quality_level}"
280 if quality_issues:
281 quality_desc += f" with {len(quality_issues)} issue(s) noted"
283 findings.append(
284 Finding(
285 title="Signal Quality",
286 description=quality_desc,
287 confidence=0.85,
288 severity="WARNING" if quality_issues else "INFO",
289 )
290 )
292 # Voltage levels
293 v_min = float(np.min(trace.data))
294 v_max = float(np.max(trace.data))
295 v_range = v_max - v_min
297 findings.append(
298 Finding(
299 title="Voltage Range",
300 description=f"Signal ranges from {v_min:.3f}V to {v_max:.3f}V (swing: {v_range:.3f}V)",
301 confidence=1.0,
302 severity="INFO",
303 )
304 )
306 # Build recommendations
307 recommendations = []
309 if "very short" in str(quality_issues).lower():
310 recommendations.append("Capture a longer duration to enable more detailed analysis")
312 if "noise" in str(quality_issues).lower():
313 recommendations.append(
314 "Check signal integrity and consider using better probes or shielding"
315 )
317 if "clipping" in str(quality_issues).lower():
318 recommendations.append("Adjust voltage range to prevent signal clipping and data loss")
320 if signal_type == "digital" and not recommendations: 320 ↛ 321line 320 didn't jump to line 321 because the condition on line 320 was never true
321 recommendations.append("Signal appears clean and suitable for digital protocol analysis")
322 elif signal_type in ["analog", "periodic analog"] and not recommendations:
323 recommendations.append("Consider spectral analysis to identify frequency components")
325 # Build complete summary text
326 summary_parts = []
328 if "overview" in include_sections: 328 ↛ 331line 328 didn't jump to line 331 because the condition on line 328 was always true
329 summary_parts.append(overview)
331 if "findings" in include_sections and findings:
332 key_findings = findings[:3] # Top 3 findings
333 findings_text = " ".join(
334 [f"{finding.title}: {finding.description}." for finding in key_findings]
335 )
336 summary_parts.append(findings_text)
338 if "recommendations" in include_sections and recommendations:
339 rec_text = "Recommended next steps: " + "; ".join(recommendations[:2]) + "."
340 summary_parts.append(rec_text)
342 full_text = " ".join(summary_parts)
344 # Truncate to max_words if needed
345 words = full_text.split()
346 if len(words) > max_words:
347 words = words[:max_words]
348 full_text = " ".join(words) + "..."
350 # Calculate statistics
351 word_count = len(full_text.split())
352 grade_level = _estimate_grade_level(full_text)
354 return Summary(
355 text=full_text,
356 overview=overview,
357 findings=findings,
358 recommendations=recommendations,
359 word_count=word_count,
360 grade_level=grade_level,
361 )
364__all__ = [
365 "Finding",
366 "Summary",
367 "generate_summary",
368]