Coverage for src / tracekit / core / provenance.py: 100%

87 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Measurement provenance tracking for reproducibility. 

2 

3This module provides provenance tracking to record the complete history 

4of how measurements were computed, including algorithms, parameters, 

5timestamps, and library versions. 

6""" 

7 

8from __future__ import annotations 

9 

10import hashlib 

11from dataclasses import dataclass, field 

12from datetime import UTC, datetime 

13from typing import TYPE_CHECKING, Any 

14 

15import numpy as np 

16 

17if TYPE_CHECKING: 

18 from numpy.typing import NDArray 

19 

20# TraceKit version (in production would import from __version__) 

21TRACEKIT_VERSION = "0.1.0" 

22 

23 

24@dataclass 

25class Provenance: 

26 """Provenance information for a computation. 

27 

28 Tracks the complete chain of operations, parameters, and context 

29 for reproducibility and debugging. 

30 

31 Attributes: 

32 algorithm: Name of algorithm or method used. 

33 parameters: Dictionary of parameters passed to the algorithm. 

34 timestamp: ISO 8601 timestamp of computation. 

35 library_version: Version of TraceKit used. 

36 input_hash: Optional hash of input data for change detection. 

37 metadata: Additional context information. 

38 

39 Example: 

40 >>> prov = Provenance( 

41 ... algorithm='rise_time', 

42 ... parameters={'ref_levels': (10, 90)}, 

43 ... timestamp='2025-12-21T10:30:00Z', 

44 ... library_version='0.1.0' 

45 ... ) 

46 

47 References: 

48 API-011: Measurement Provenance Tracking 

49 """ 

50 

51 algorithm: str 

52 parameters: dict[str, Any] = field(default_factory=dict) 

53 timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat()) 

54 library_version: str = TRACEKIT_VERSION 

55 input_hash: str | None = None 

56 metadata: dict[str, Any] = field(default_factory=dict) 

57 

58 def to_dict(self) -> dict[str, Any]: 

59 """Convert provenance to dictionary for serialization. 

60 

61 Returns: 

62 Dictionary representation of provenance. 

63 

64 Example: 

65 >>> prov_dict = prov.to_dict() 

66 >>> import json 

67 >>> json.dumps(prov_dict) 

68 """ 

69 return { 

70 "algorithm": self.algorithm, 

71 "parameters": self.parameters, 

72 "timestamp": self.timestamp, 

73 "library_version": self.library_version, 

74 "input_hash": self.input_hash, 

75 "metadata": self.metadata, 

76 } 

77 

78 @classmethod 

79 def from_dict(cls, data: dict[str, Any]) -> Provenance: 

80 """Create Provenance from dictionary. 

81 

82 Args: 

83 data: Dictionary containing provenance fields. 

84 

85 Returns: 

86 Provenance object. 

87 

88 Example: 

89 >>> prov = Provenance.from_dict(prov_dict) 

90 """ 

91 return cls( 

92 algorithm=data["algorithm"], 

93 parameters=data.get("parameters", {}), 

94 timestamp=data.get("timestamp", ""), 

95 library_version=data.get("library_version", TRACEKIT_VERSION), 

96 input_hash=data.get("input_hash"), 

97 metadata=data.get("metadata", {}), 

98 ) 

99 

100 def __str__(self) -> str: 

101 """Human-readable provenance summary.""" 

102 lines = [ 

103 f"Algorithm: {self.algorithm}", 

104 f"Timestamp: {self.timestamp}", 

105 f"Version: {self.library_version}", 

106 ] 

107 if self.parameters: 

108 params_str = ", ".join(f"{k}={v}" for k, v in self.parameters.items()) 

109 lines.append(f"Parameters: {params_str}") 

110 if self.input_hash: 

111 lines.append(f"Input Hash: {self.input_hash[:16]}...") 

112 return "\n".join(lines) 

113 

114 

115@dataclass 

116class MeasurementResultWithProvenance: 

117 """Measurement result with full provenance tracking. 

118 

119 Extends the basic measurement result with comprehensive provenance 

120 information for reproducibility and debugging. 

121 

122 Attributes: 

123 value: Measured value. 

124 units: Units of measurement (e.g., 'V', 'Hz', 's'). 

125 provenance: Provenance information. 

126 confidence: Optional confidence interval (low, high). 

127 

128 Example: 

129 >>> result = MeasurementResultWithProvenance( 

130 ... value=3.3, 

131 ... units='V', 

132 ... provenance=Provenance( 

133 ... algorithm='peak_to_peak', 

134 ... parameters={'window': (0, 1e-3)} 

135 ... ) 

136 ... ) 

137 >>> print(result) 

138 3.3 V (peak_to_peak) 

139 

140 References: 

141 API-011: Measurement Provenance Tracking 

142 """ 

143 

144 value: float 

145 units: str | None = None 

146 provenance: Provenance | None = None 

147 confidence: tuple[float, float] | None = None 

148 

149 def is_equivalent( 

150 self, 

151 other: MeasurementResultWithProvenance, 

152 *, 

153 rtol: float = 1e-9, 

154 atol: float = 0.0, 

155 check_parameters: bool = True, 

156 ) -> bool: 

157 """Check if two results are equivalent. 

158 

159 Compares values within tolerance and optionally checks if the 

160 same algorithm and parameters were used. 

161 

162 Args: 

163 other: Other measurement result to compare. 

164 rtol: Relative tolerance for value comparison. 

165 atol: Absolute tolerance for value comparison. 

166 check_parameters: If True, also verify matching algorithm and parameters. 

167 

168 Returns: 

169 True if results are equivalent. 

170 

171 Example: 

172 >>> result1.is_equivalent(result2, rtol=1e-6) 

173 True 

174 """ 

175 # Check value equivalence 

176 if not np.isclose(self.value, other.value, rtol=rtol, atol=atol): 

177 return False 

178 

179 # Check units match 

180 if self.units != other.units: 

181 return False 

182 

183 # Optionally check provenance 

184 if check_parameters and self.provenance and other.provenance: 

185 if self.provenance.algorithm != other.provenance.algorithm: 

186 return False 

187 # Check if critical parameters match 

188 if self.provenance.parameters != other.provenance.parameters: 

189 return False 

190 

191 return True 

192 

193 def to_dict(self) -> dict[str, Any]: 

194 """Convert to dictionary for serialization. 

195 

196 Returns: 

197 Dictionary representation including provenance. 

198 

199 Example: 

200 >>> result_dict = result.to_dict() 

201 >>> import json 

202 >>> json_str = json.dumps(result_dict) 

203 """ 

204 result: dict[str, Any] = { 

205 "value": self.value, 

206 "units": self.units, 

207 } 

208 if self.provenance: 

209 result["provenance"] = self.provenance.to_dict() 

210 if self.confidence: 

211 result["confidence"] = self.confidence 

212 return result 

213 

214 @classmethod 

215 def from_dict(cls, data: dict[str, Any]) -> MeasurementResultWithProvenance: 

216 """Create result from dictionary. 

217 

218 Args: 

219 data: Dictionary containing result fields. 

220 

221 Returns: 

222 MeasurementResultWithProvenance object. 

223 """ 

224 provenance = None 

225 if "provenance" in data: 

226 provenance = Provenance.from_dict(data["provenance"]) 

227 

228 confidence = None 

229 if "confidence" in data: 

230 confidence = tuple(data["confidence"]) 

231 

232 return cls( 

233 value=data["value"], 

234 units=data.get("units"), 

235 provenance=provenance, 

236 confidence=confidence, 

237 ) 

238 

239 def __str__(self) -> str: 

240 """Human-readable string representation.""" 

241 parts = [str(self.value)] 

242 if self.units: 

243 parts.append(self.units) 

244 if self.provenance: 

245 parts.append(f"({self.provenance.algorithm})") 

246 return " ".join(parts) 

247 

248 def __repr__(self) -> str: 

249 """Detailed representation.""" 

250 parts = [f"value={self.value}"] 

251 if self.units: 

252 parts.append(f"units='{self.units}'") 

253 if self.provenance: 

254 parts.append(f"algorithm='{self.provenance.algorithm}'") 

255 return f"MeasurementResultWithProvenance({', '.join(parts)})" 

256 

257 def pretty_print(self) -> str: 

258 """Pretty-print result with full provenance. 

259 

260 Returns: 

261 Multi-line formatted string with all details. 

262 

263 Example: 

264 >>> print(result.pretty_print()) 

265 Value: 3.3 V 

266 Algorithm: peak_to_peak 

267 Timestamp: 2025-12-21T10:30:00Z 

268 Version: 0.1.0 

269 Parameters: window=(0, 0.001) 

270 """ 

271 lines = [f"Value: {self.value}"] 

272 if self.units: 

273 lines[-1] += f" {self.units}" 

274 

275 if self.confidence: 

276 lines.append(f"Confidence: ({self.confidence[0]}, {self.confidence[1]})") 

277 

278 if self.provenance: 

279 lines.append(str(self.provenance)) 

280 

281 return "\n".join(lines) 

282 

283 

284def compute_input_hash(data: NDArray[np.float64]) -> str: 

285 """Compute hash of input data for change detection. 

286 

287 Uses SHA-256 hash of data array for reproducibility checks. 

288 

289 Args: 

290 data: Input numpy array. 

291 

292 Returns: 

293 Hexadecimal hash string. 

294 

295 Example: 

296 >>> data = np.array([1.0, 2.0, 3.0]) 

297 >>> hash_str = compute_input_hash(data) 

298 

299 References: 

300 API-011: Measurement Provenance Tracking 

301 """ 

302 # Convert to bytes and hash 

303 data_bytes = data.tobytes() 

304 hash_obj = hashlib.sha256(data_bytes) 

305 return hash_obj.hexdigest() 

306 

307 

308def create_provenance( 

309 algorithm: str, 

310 parameters: dict[str, Any] | None = None, 

311 *, 

312 input_data: NDArray[np.float64] | None = None, 

313 metadata: dict[str, Any] | None = None, 

314) -> Provenance: 

315 """Create provenance record for a computation. 

316 

317 Convenience function to create provenance with automatic timestamp 

318 and optional input hash. 

319 

320 Args: 

321 algorithm: Name of algorithm or method. 

322 parameters: Parameters used in computation. 

323 input_data: Optional input data to hash for change detection. 

324 metadata: Additional context information. 

325 

326 Returns: 

327 Provenance object. 

328 

329 Example: 

330 >>> import numpy as np 

331 >>> data = np.array([1.0, 2.0, 3.0]) 

332 >>> prov = create_provenance( 

333 ... algorithm='mean', 

334 ... parameters={'axis': 0}, 

335 ... input_data=data 

336 ... ) 

337 

338 References: 

339 API-011: Measurement Provenance Tracking 

340 """ 

341 input_hash = None 

342 if input_data is not None: 

343 input_hash = compute_input_hash(input_data) 

344 

345 return Provenance( 

346 algorithm=algorithm, 

347 parameters=parameters or {}, 

348 input_hash=input_hash, 

349 metadata=metadata or {}, 

350 ) 

351 

352 

353__all__ = [ 

354 "MeasurementResultWithProvenance", 

355 "Provenance", 

356 "compute_input_hash", 

357 "create_provenance", 

358]