Coverage for src / tracekit / quality / scoring.py: 34%
199 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Analysis quality scoring for TraceKit.
3This module provides quality scoring and reliability categorization for
4analysis results, enabling users to assess confidence in automated findings.
7Example:
8 >>> from tracekit.quality.scoring import AnalysisQualityScore, ReliabilityCategory
9 >>> score = AnalysisQualityScore(
10 ... confidence=0.85,
11 ... category=ReliabilityCategory.HIGH,
12 ... data_quality_factor=0.9,
13 ... sample_sufficiency=0.8,
14 ... method_reliability=0.85,
15 ... )
16 >>> print(score.explain())
17 >>> recommendations = score.get_recommendations()
19References:
20 - Quality scoring for automated analysis results
21"""
23from __future__ import annotations
25import logging
26from dataclasses import dataclass, field
27from enum import Enum
28from typing import TYPE_CHECKING, Any
30import numpy as np
32if TYPE_CHECKING:
33 from numpy.typing import NDArray
35logger = logging.getLogger(__name__)
38class ReliabilityCategory(Enum):
39 """Reliability categories for analysis results.
41 Attributes:
42 HIGH: Result is highly reliable (confidence >= 0.8)
43 MEDIUM: Result has moderate reliability (0.6 <= confidence < 0.8)
44 LOW: Result has low reliability (0.4 <= confidence < 0.6)
45 UNRELIABLE: Result is unreliable (confidence < 0.4)
46 """
48 HIGH = "high"
49 MEDIUM = "medium"
50 LOW = "low"
51 UNRELIABLE = "unreliable"
53 @classmethod
54 def from_confidence(cls, confidence: float) -> ReliabilityCategory:
55 """Get category from confidence score.
57 Args:
58 confidence: Confidence value in range [0, 1]
60 Returns:
61 Appropriate ReliabilityCategory
62 """
63 if confidence >= 0.8: 63 ↛ 65line 63 didn't jump to line 65 because the condition on line 63 was always true
64 return cls.HIGH
65 elif confidence >= 0.6:
66 return cls.MEDIUM
67 elif confidence >= 0.4:
68 return cls.LOW
69 else:
70 return cls.UNRELIABLE
73@dataclass
74class AnalysisQualityScore:
75 """Quality score for an analysis result.
77 Attributes:
78 confidence: Overall confidence in result (0-1)
79 category: Reliability category
80 data_quality_factor: Quality of input data (0-1)
81 sample_sufficiency: Sufficiency of sample count (0-1)
82 method_reliability: Inherent reliability of method (0-1)
83 factors: Additional contributing factors
84 warnings: Quality warnings
85 metadata: Additional metadata
87 Example:
88 >>> score = AnalysisQualityScore(
89 ... confidence=0.85,
90 ... category=ReliabilityCategory.HIGH,
91 ... data_quality_factor=0.9,
92 ... sample_sufficiency=0.8,
93 ... method_reliability=0.85,
94 ... )
95 >>> if score.is_reliable:
96 ... print("Result is reliable")
97 """
99 confidence: float
100 category: ReliabilityCategory
101 data_quality_factor: float
102 sample_sufficiency: float
103 method_reliability: float
104 factors: dict[str, float] = field(default_factory=dict)
105 warnings: list[str] = field(default_factory=list)
106 metadata: dict[str, Any] = field(default_factory=dict)
108 def __post_init__(self) -> None:
109 """Validate score values."""
110 # Ensure confidence is in valid range
111 if not 0 <= self.confidence <= 1: 111 ↛ 112line 111 didn't jump to line 112 because the condition on line 111 was never true
112 raise ValueError(f"Confidence must be in [0, 1], got {self.confidence}")
114 # Validate factors
115 for name, value in [
116 ("data_quality_factor", self.data_quality_factor),
117 ("sample_sufficiency", self.sample_sufficiency),
118 ("method_reliability", self.method_reliability),
119 ]:
120 if not 0 <= value <= 1: 120 ↛ 121line 120 didn't jump to line 121 because the condition on line 120 was never true
121 raise ValueError(f"{name} must be in [0, 1], got {value}")
123 @property
124 def is_reliable(self) -> bool:
125 """Check if result is reliable (medium confidence or higher).
127 Returns:
128 True if category is HIGH or MEDIUM
129 """
130 return self.category in (ReliabilityCategory.HIGH, ReliabilityCategory.MEDIUM)
132 def explain(self, include_factors: bool = True) -> str:
133 """Generate human-readable explanation of the quality score.
135 Args:
136 include_factors: Whether to include factor breakdown
138 Returns:
139 Human-readable explanation string
141 Example:
142 >>> print(score.explain())
143 ✓ High confidence result (85.0%)
145 Contributing factors:
146 ✓ Data Quality Factor: 90.0%
147 ✓ Sample Sufficiency: 80.0%
148 ✓ Method Reliability: 85.0%
149 """
150 lines = []
152 # Overall assessment
153 if self.category == ReliabilityCategory.HIGH:
154 lines.append(f"✓ High confidence result ({self.confidence:.1%})")
155 elif self.category == ReliabilityCategory.MEDIUM:
156 lines.append(f"◐ Medium confidence result ({self.confidence:.1%})")
157 elif self.category == ReliabilityCategory.LOW:
158 lines.append(f"◯ Low confidence result ({self.confidence:.1%})")
159 else:
160 lines.append(f"✗ Unreliable result ({self.confidence:.1%})")
162 # Factor breakdown
163 if include_factors and self.factors:
164 lines.append("\nContributing factors:")
165 for factor_name, factor_value in sorted(self.factors.items()):
166 status = "✓" if factor_value >= 0.7 else "◐" if factor_value >= 0.4 else "✗"
167 lines.append(
168 f" {status} {factor_name.replace('_', ' ').title()}: {factor_value:.1%}"
169 )
171 # Warnings
172 if self.warnings:
173 lines.append("\nWarnings:")
174 for warning in self.warnings:
175 lines.append(f" ⚠ {warning}")
177 return "\n".join(lines)
179 def get_recommendations(self) -> list[str]:
180 """Get actionable recommendations to improve result quality.
182 Returns:
183 List of recommendation strings
185 Example:
186 >>> recommendations = score.get_recommendations()
187 >>> for rec in recommendations:
188 ... print(rec)
189 Consider improving input signal quality (filtering, averaging)
190 """
191 recommendations = []
193 if self.data_quality_factor < 0.5:
194 recommendations.append("Consider improving input signal quality (filtering, averaging)")
196 if self.sample_sufficiency < 0.5:
197 recommendations.append("Capture more data points for reliable analysis")
199 if "snr" in str(self.warnings).lower():
200 recommendations.append("Use a bandpass filter to improve SNR")
202 if "clipping" in str(self.warnings).lower():
203 recommendations.append("Adjust input gain to avoid signal clipping")
205 if not recommendations:
206 recommendations.append("Result quality is acceptable")
208 return recommendations
210 def to_dict(self) -> dict[str, Any]:
211 """Convert to dictionary for serialization.
213 Returns:
214 Dictionary representation of quality score.
215 """
216 return {
217 "confidence": self.confidence,
218 "category": self.category.value,
219 "is_reliable": self.is_reliable,
220 "data_quality_factor": self.data_quality_factor,
221 "method_reliability": self.method_reliability,
222 "sample_sufficiency": self.sample_sufficiency,
223 "factors": self.factors,
224 "warnings": self.warnings,
225 "metadata": self.metadata,
226 }
229def calculate_quality_score(
230 data_quality_factor: float,
231 sample_sufficiency: float,
232 method_reliability: float,
233 *,
234 weights: tuple[float, float, float] | None = None,
235 warnings: list[str] | None = None,
236 factors: dict[str, float] | None = None,
237 metadata: dict[str, Any] | None = None,
238) -> AnalysisQualityScore:
239 """Calculate overall quality score from component factors.
241 Args:
242 data_quality_factor: Quality of input data (0-1)
243 sample_sufficiency: Sufficiency of sample count (0-1)
244 method_reliability: Inherent reliability of method (0-1)
245 weights: Optional custom weights (data, sample, method), defaults to (0.4, 0.3, 0.3)
246 warnings: Optional quality warnings
247 factors: Optional additional factors
248 metadata: Optional metadata
250 Returns:
251 AnalysisQualityScore with computed confidence
253 Example:
254 >>> score = calculate_quality_score(
255 ... data_quality_factor=0.9,
256 ... sample_sufficiency=0.8,
257 ... method_reliability=0.85,
258 ... )
259 >>> print(f"Confidence: {score.confidence:.1%}")
260 """
261 if weights is None:
262 weights = (0.4, 0.3, 0.3)
264 w_data, w_sample, w_method = weights
266 # Calculate weighted confidence
267 confidence = (
268 w_data * data_quality_factor + w_sample * sample_sufficiency + w_method * method_reliability
269 )
271 # Determine category
272 category = ReliabilityCategory.from_confidence(confidence)
274 # Build factors dictionary
275 all_factors = {
276 "data_quality_factor": data_quality_factor,
277 "sample_sufficiency": sample_sufficiency,
278 "method_reliability": method_reliability,
279 }
280 if factors:
281 all_factors.update(factors)
283 return AnalysisQualityScore(
284 confidence=confidence,
285 category=category,
286 data_quality_factor=data_quality_factor,
287 sample_sufficiency=sample_sufficiency,
288 method_reliability=method_reliability,
289 factors=all_factors,
290 warnings=warnings or [],
291 metadata=metadata or {},
292 )
295@dataclass
296class DataQualityMetrics:
297 """Metrics describing input data quality.
299 QUAL-002: Data quality assessment
301 Attributes:
302 snr_db: Signal-to-noise ratio in decibels
303 sample_count: Number of samples in the data
304 has_clipping: Whether the signal shows clipping
305 has_saturation: Whether the signal shows saturation
306 noise_floor: Estimated noise floor level
307 completeness: Fraction of non-NaN values (0-1)
308 """
310 snr_db: float | None = None
311 sample_count: int = 0
312 has_clipping: bool = False
313 has_saturation: bool = False
314 noise_floor: float | None = None
315 completeness: float = 1.0 # Fraction of non-NaN values
317 def to_factor(self) -> float:
318 """Convert metrics to single quality factor (0-1).
320 Returns:
321 Quality factor between 0 and 1.
322 """
323 factors = []
325 # SNR contribution
326 if self.snr_db is not None:
327 snr_factor = min(1.0, max(0.0, self.snr_db / 40.0))
328 factors.append(snr_factor)
330 # Sample count contribution (diminishing returns after 1000)
331 sample_factor = min(1.0, np.log10(max(1, self.sample_count)) / 4.0)
332 factors.append(sample_factor)
334 # Clipping/saturation penalties
335 if self.has_clipping:
336 factors.append(0.7)
337 if self.has_saturation:
338 factors.append(0.6)
340 # Completeness
341 factors.append(self.completeness)
343 return float(np.mean(factors)) if factors else 0.5
346# Method reliability scores (based on algorithm characteristics)
347#: Method reliability tracking
348METHOD_RELIABILITY: dict[str, float] = {
349 # High reliability methods
350 "fft": 0.95,
351 "welch": 0.90,
352 "autocorrelation": 0.85,
353 "histogram": 0.95,
354 "statistics": 0.95,
355 # Medium reliability methods
356 "edge_detection": 0.80,
357 "zero_crossing": 0.75,
358 "peak_detection": 0.70,
359 "pattern_matching": 0.75,
360 # Lower reliability methods (heuristic-based)
361 "protocol_inference": 0.60,
362 "signal_classification": 0.65,
363 "anomaly_detection": 0.60,
364}
367def assess_data_quality(
368 data: NDArray[np.float64], sample_rate: float | None = None
369) -> DataQualityMetrics:
370 """Assess quality of input data.
372 QUAL-002: Data quality assessment
374 Args:
375 data: Input data array
376 sample_rate: Sample rate in Hz (optional)
378 Returns:
379 DataQualityMetrics with quality assessment
380 """
381 metrics = DataQualityMetrics()
383 try:
384 # Sample count
385 metrics.sample_count = len(data)
387 # Check for NaN/Inf
388 valid_mask = np.isfinite(data)
389 metrics.completeness = float(np.mean(valid_mask))
391 if metrics.completeness < 0.01: 391 ↛ 392line 391 didn't jump to line 392 because the condition on line 391 was never true
392 return metrics
394 valid_data = data[valid_mask]
396 # Check for clipping (values at min/max bounds)
397 data_range = np.ptp(valid_data)
398 if data_range > 0: 398 ↛ 406line 398 didn't jump to line 406 because the condition on line 398 was always true
399 min_count = np.sum(valid_data == np.min(valid_data))
400 max_count = np.sum(valid_data == np.max(valid_data))
401 clip_threshold = 0.01 * len(valid_data)
402 metrics.has_clipping = min_count > clip_threshold or max_count > clip_threshold
404 # Estimate SNR using signal variance vs noise floor
405 # Use median absolute deviation for robust noise estimation
406 median = np.median(valid_data)
407 mad = np.median(np.abs(valid_data - median)) * 1.4826
408 metrics.noise_floor = float(mad)
410 signal_power = float(np.var(valid_data))
411 noise_power = mad**2
413 if noise_power > 0: 413 ↛ 420line 413 didn't jump to line 420 because the condition on line 413 was always true
414 snr_linear = signal_power / noise_power
415 metrics.snr_db = float(10 * np.log10(max(1e-10, snr_linear)))
417 except Exception as e:
418 logger.debug(f"Error assessing data quality: {e}")
420 return metrics
423def score_analysis_result(
424 result: Any,
425 method_name: str,
426 data: NDArray[np.float64] | None = None,
427 data_quality: DataQualityMetrics | None = None,
428 min_samples: int = 10,
429) -> AnalysisQualityScore:
430 """Score the quality of an analysis result.
432 QUAL-001: Quality scoring foundation
434 Args:
435 result: The analysis result to score
436 method_name: Name of the analysis method
437 data: Input data (for quality assessment)
438 data_quality: Pre-computed data quality metrics
439 min_samples: Minimum samples for reliable result
441 Returns:
442 AnalysisQualityScore with confidence and factors
443 """
444 factors = {}
445 warnings = []
447 # Get data quality
448 if data_quality is None and data is not None:
449 data_quality = assess_data_quality(data)
451 # Data quality factor
452 if data_quality is not None:
453 data_factor = data_quality.to_factor()
454 factors["data_quality"] = data_factor
456 if data_quality.has_clipping:
457 warnings.append("Input data shows clipping")
458 if data_quality.snr_db is not None and data_quality.snr_db < 20:
459 warnings.append(f"Low SNR ({data_quality.snr_db:.1f} dB)")
460 else:
461 data_factor = 0.5
462 factors["data_quality"] = data_factor
464 # Method reliability
465 method_key = method_name.lower().split(".")[-1].replace("_", "")
466 method_reliability = METHOD_RELIABILITY.get(method_key, 0.7)
468 # Check for partial matches
469 for key, reliability in METHOD_RELIABILITY.items():
470 if key in method_name.lower():
471 method_reliability = reliability
472 break
474 factors["method_reliability"] = method_reliability
476 # Sample sufficiency
477 if data_quality is not None:
478 sample_sufficiency = min(1.0, data_quality.sample_count / (min_samples * 10))
479 if data_quality.sample_count < min_samples:
480 warnings.append(f"Insufficient samples ({data_quality.sample_count} < {min_samples})")
481 else:
482 sample_sufficiency = 0.5
483 factors["sample_sufficiency"] = sample_sufficiency
485 # Result-specific scoring
486 result_factor = _score_result_value(result)
487 factors["result_validity"] = result_factor
489 # Combine factors
490 confidence = (
491 data_factor * 0.3
492 + method_reliability * 0.25
493 + sample_sufficiency * 0.25
494 + result_factor * 0.2
495 )
497 # Determine category from confidence
498 category = ReliabilityCategory.from_confidence(confidence)
500 return AnalysisQualityScore(
501 confidence=confidence,
502 category=category,
503 data_quality_factor=data_factor,
504 method_reliability=method_reliability,
505 sample_sufficiency=sample_sufficiency,
506 factors=factors,
507 warnings=warnings,
508 )
511def _score_result_value(result: Any) -> float:
512 """Score result validity based on value characteristics.
514 Args:
515 result: Analysis result to score.
517 Returns:
518 Validity score between 0 and 1.
519 """
520 if result is None:
521 return 0.0
523 # Handle numeric results
524 if isinstance(result, int | float):
525 if np.isnan(result) or np.isinf(result):
526 return 0.0
527 return 1.0
529 # Handle array results
530 if isinstance(result, np.ndarray):
531 valid_ratio = np.mean(np.isfinite(result))
532 return float(valid_ratio)
534 # Handle dict results
535 if isinstance(result, dict):
536 if not result:
537 return 0.3
538 return 1.0
540 # Handle list results
541 if isinstance(result, list):
542 if not result:
543 return 0.3
544 return 1.0
546 return 0.7 # Default for other types
549def combine_quality_scores(
550 scores: list[AnalysisQualityScore],
551 weights: list[float] | None = None,
552) -> AnalysisQualityScore:
553 """Combine multiple quality scores into one.
555 Args:
556 scores: List of quality scores to combine
557 weights: Optional weights for each score
559 Returns:
560 Combined quality score
561 """
562 if not scores: 562 ↛ 563line 562 didn't jump to line 563 because the condition on line 562 was never true
563 return AnalysisQualityScore(
564 confidence=0.0,
565 category=ReliabilityCategory.UNRELIABLE,
566 data_quality_factor=0.0,
567 method_reliability=0.0,
568 sample_sufficiency=0.0,
569 )
571 if weights is None: 571 ↛ 572line 571 didn't jump to line 572 because the condition on line 571 was never true
572 weights = [1.0] * len(scores)
574 total_weight = sum(weights)
576 combined_confidence = (
577 sum(s.confidence * w for s, w in zip(scores, weights, strict=True)) / total_weight
578 )
579 combined_data = (
580 sum(s.data_quality_factor * w for s, w in zip(scores, weights, strict=True)) / total_weight
581 )
582 combined_method = (
583 sum(s.method_reliability * w for s, w in zip(scores, weights, strict=True)) / total_weight
584 )
585 combined_samples = (
586 sum(s.sample_sufficiency * w for s, w in zip(scores, weights, strict=True)) / total_weight
587 )
589 # Aggregate warnings
590 all_warnings = []
591 for score in scores:
592 all_warnings.extend(score.warnings)
594 # Determine category
595 category = ReliabilityCategory.from_confidence(combined_confidence)
597 return AnalysisQualityScore(
598 confidence=combined_confidence,
599 category=category,
600 data_quality_factor=combined_data,
601 method_reliability=combined_method,
602 sample_sufficiency=combined_samples,
603 warnings=list(set(all_warnings)),
604 )
607__all__ = [
608 "METHOD_RELIABILITY",
609 "AnalysisQualityScore",
610 "DataQualityMetrics",
611 "ReliabilityCategory",
612 "assess_data_quality",
613 "calculate_quality_score",
614 "combine_quality_scores",
615 "score_analysis_result",
616]