Coverage for src / tracekit / core / confidence.py: 100%

72 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Universal confidence scoring for auto-detection. 

2 

3This module provides confidence scoring infrastructure used across 

4all auto-discovery and analysis functions. 

5 

6 

7Example: 

8 >>> from tracekit.core.confidence import ConfidenceScore 

9 >>> score = ConfidenceScore(value=0.85, factors={'signal_quality': 0.9, 'pattern_match': 0.8}) 

10 >>> print(f"Confidence: {score.value:.2f} ({score.interpretation})") 

11 Confidence: 0.85 (likely) 

12 

13References: 

14 TraceKit Auto-Discovery Specification 

15""" 

16 

17from __future__ import annotations 

18 

19from dataclasses import dataclass, field 

20from typing import Any 

21 

22 

23@dataclass 

24class ConfidenceScore: 

25 """Confidence score for auto-detection results. 

26 

27 Represents reliability of automated analysis results with 

28 standardized 0.0-1.0 scale and human-readable interpretation. 

29 

30 Confidence scale: 

31 - 0.9-1.0: High - "almost certain", trust the result 

32 - 0.7-0.9: Medium - "likely", verify if critical 

33 - 0.5-0.7: Low - "possible", check alternatives 

34 - 0.0-0.5: Unreliable - "uncertain", manual analysis recommended 

35 

36 Attributes: 

37 value: Confidence value (0.0-1.0, 2 decimal precision). 

38 factors: Dictionary of contributing factors and their scores. 

39 explanation: Optional explanation of confidence calculation. 

40 

41 Example: 

42 >>> score = ConfidenceScore(0.92, factors={'snr': 0.95, 'timing': 0.89}) 

43 >>> print(f"{score.value:.2f} - {score.interpretation}") 

44 0.92 - almost certain 

45 """ 

46 

47 value: float 

48 factors: dict[str, float] = field(default_factory=dict) 

49 explanation: str | None = None 

50 

51 def __post_init__(self) -> None: 

52 """Validate confidence score after initialization.""" 

53 if not 0.0 <= self.value <= 1.0: 

54 raise ValueError(f"Confidence value must be in [0.0, 1.0], got {self.value}") 

55 # Round to 2 decimal places 

56 self.value = round(self.value, 2) 

57 

58 # Validate factors 

59 for name, factor_value in self.factors.items(): 

60 if not 0.0 <= factor_value <= 1.0: 

61 raise ValueError(f"Factor '{name}' must be in [0.0, 1.0], got {factor_value}") 

62 

63 @property 

64 def level(self) -> str: 

65 """Confidence level classification. 

66 

67 Returns: 

68 str: One of "high", "medium", "low", "unreliable". 

69 """ 

70 if self.value >= 0.9: 

71 return "high" 

72 elif self.value >= 0.7: 

73 return "medium" 

74 elif self.value >= 0.5: 

75 return "low" 

76 else: 

77 return "unreliable" 

78 

79 @property 

80 def interpretation(self) -> str: 

81 """Human-readable interpretation. 

82 

83 Returns: 

84 Descriptive interpretation string. 

85 """ 

86 if self.value >= 0.95: 

87 return "almost certain" 

88 elif self.value >= 0.85: 

89 return "likely" 

90 elif self.value >= 0.75: 

91 return "possible" 

92 elif self.value >= 0.55: 

93 return "uncertain" 

94 else: 

95 return "unlikely" 

96 

97 @staticmethod 

98 def combine( 

99 scores: list[float], 

100 weights: list[float] | None = None, 

101 ) -> float: 

102 """Combine multiple confidence scores into one. 

103 

104 Uses weighted average to combine scores. Equal weights if not specified. 

105 

106 Args: 

107 scores: List of confidence values (0.0-1.0). 

108 weights: Optional weight for each score (must sum to 1.0). 

109 

110 Returns: 

111 Combined confidence score (0.0-1.0). 

112 

113 Raises: 

114 ValueError: If scores/weights are invalid or don't match. 

115 

116 Example: 

117 >>> scores = [0.9, 0.8, 0.7] 

118 >>> combined = ConfidenceScore.combine(scores, weights=[0.5, 0.3, 0.2]) 

119 >>> print(f"{combined:.2f}") 

120 0.83 

121 """ 

122 if not scores: 

123 raise ValueError("Cannot combine empty score list") 

124 

125 for score in scores: 

126 if not 0.0 <= score <= 1.0: 

127 raise ValueError(f"Score must be in [0.0, 1.0], got {score}") 

128 

129 if weights is None: 

130 # Equal weights 

131 weights = [1.0 / len(scores)] * len(scores) 

132 

133 if len(scores) != len(weights): 

134 raise ValueError(f"Scores ({len(scores)}) and weights ({len(weights)}) length mismatch") 

135 

136 # Normalize weights to sum to 1.0 

137 weight_sum = sum(weights) 

138 if weight_sum == 0: 

139 raise ValueError("Weights must sum to non-zero value") 

140 

141 normalized_weights = [w / weight_sum for w in weights] 

142 

143 # Weighted average 

144 combined = sum(s * w for s, w in zip(scores, normalized_weights, strict=False)) 

145 return round(combined, 2) 

146 

147 def to_dict(self) -> dict[str, Any]: 

148 """Convert to dictionary representation. 

149 

150 Returns: 

151 Dictionary with confidence details. 

152 """ 

153 return { 

154 "value": self.value, 

155 "level": self.level, 

156 "interpretation": self.interpretation, 

157 "factors": self.factors, 

158 "explanation": self.explanation, 

159 } 

160 

161 def __repr__(self) -> str: 

162 """String representation.""" 

163 return f"ConfidenceScore({self.value:.2f}, level='{self.level}')" 

164 

165 def __float__(self) -> float: 

166 """Convert to float (returns value).""" 

167 return self.value 

168 

169 

170def calculate_confidence( 

171 factors: dict[str, float], 

172 weights: dict[str, float] | None = None, 

173 *, 

174 explanation: str | None = None, 

175) -> ConfidenceScore: 

176 """Calculate confidence score from multiple factors. 

177 

178 Args: 

179 factors: Dictionary of factor names to values (0.0-1.0). 

180 weights: Optional weights for each factor (must sum to 1.0). 

181 explanation: Optional explanation of calculation. 

182 

183 Returns: 

184 ConfidenceScore object with combined value. 

185 

186 Raises: 

187 ValueError: If factors is empty or missing weight for a factor. 

188 

189 Example: 

190 >>> factors = {'signal_quality': 0.9, 'pattern_match': 0.85, 'timing': 0.8} 

191 >>> weights = {'signal_quality': 0.4, 'pattern_match': 0.4, 'timing': 0.2} 

192 >>> score = calculate_confidence(factors, weights) 

193 >>> print(f"Confidence: {score.value:.2f}") 

194 Confidence: 0.86 

195 """ 

196 if not factors: 

197 raise ValueError("Cannot calculate confidence from empty factors") 

198 

199 if weights is None: 

200 # Equal weights 

201 score_values = list(factors.values()) 

202 weight_values = None 

203 else: 

204 # Use provided weights 

205 score_values = [] 

206 weight_values = [] 

207 for name, value in factors.items(): 

208 score_values.append(value) 

209 if name not in weights: 

210 raise ValueError(f"Missing weight for factor '{name}'") 

211 weight_values.append(weights[name]) 

212 

213 combined_value = ConfidenceScore.combine(score_values, weight_values) 

214 

215 return ConfidenceScore( 

216 value=combined_value, 

217 factors=factors, 

218 explanation=explanation, 

219 ) 

220 

221 

222__all__ = [ 

223 "ConfidenceScore", 

224 "calculate_confidence", 

225]