Coverage for src / tracekit / core / provenance.py: 100%
87 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Measurement provenance tracking for reproducibility.
3This module provides provenance tracking to record the complete history
4of how measurements were computed, including algorithms, parameters,
5timestamps, and library versions.
6"""
8from __future__ import annotations
10import hashlib
11from dataclasses import dataclass, field
12from datetime import UTC, datetime
13from typing import TYPE_CHECKING, Any
15import numpy as np
17if TYPE_CHECKING:
18 from numpy.typing import NDArray
20# TraceKit version (in production would import from __version__)
21TRACEKIT_VERSION = "0.1.0"
24@dataclass
25class Provenance:
26 """Provenance information for a computation.
28 Tracks the complete chain of operations, parameters, and context
29 for reproducibility and debugging.
31 Attributes:
32 algorithm: Name of algorithm or method used.
33 parameters: Dictionary of parameters passed to the algorithm.
34 timestamp: ISO 8601 timestamp of computation.
35 library_version: Version of TraceKit used.
36 input_hash: Optional hash of input data for change detection.
37 metadata: Additional context information.
39 Example:
40 >>> prov = Provenance(
41 ... algorithm='rise_time',
42 ... parameters={'ref_levels': (10, 90)},
43 ... timestamp='2025-12-21T10:30:00Z',
44 ... library_version='0.1.0'
45 ... )
47 References:
48 API-011: Measurement Provenance Tracking
49 """
51 algorithm: str
52 parameters: dict[str, Any] = field(default_factory=dict)
53 timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
54 library_version: str = TRACEKIT_VERSION
55 input_hash: str | None = None
56 metadata: dict[str, Any] = field(default_factory=dict)
58 def to_dict(self) -> dict[str, Any]:
59 """Convert provenance to dictionary for serialization.
61 Returns:
62 Dictionary representation of provenance.
64 Example:
65 >>> prov_dict = prov.to_dict()
66 >>> import json
67 >>> json.dumps(prov_dict)
68 """
69 return {
70 "algorithm": self.algorithm,
71 "parameters": self.parameters,
72 "timestamp": self.timestamp,
73 "library_version": self.library_version,
74 "input_hash": self.input_hash,
75 "metadata": self.metadata,
76 }
78 @classmethod
79 def from_dict(cls, data: dict[str, Any]) -> Provenance:
80 """Create Provenance from dictionary.
82 Args:
83 data: Dictionary containing provenance fields.
85 Returns:
86 Provenance object.
88 Example:
89 >>> prov = Provenance.from_dict(prov_dict)
90 """
91 return cls(
92 algorithm=data["algorithm"],
93 parameters=data.get("parameters", {}),
94 timestamp=data.get("timestamp", ""),
95 library_version=data.get("library_version", TRACEKIT_VERSION),
96 input_hash=data.get("input_hash"),
97 metadata=data.get("metadata", {}),
98 )
100 def __str__(self) -> str:
101 """Human-readable provenance summary."""
102 lines = [
103 f"Algorithm: {self.algorithm}",
104 f"Timestamp: {self.timestamp}",
105 f"Version: {self.library_version}",
106 ]
107 if self.parameters:
108 params_str = ", ".join(f"{k}={v}" for k, v in self.parameters.items())
109 lines.append(f"Parameters: {params_str}")
110 if self.input_hash:
111 lines.append(f"Input Hash: {self.input_hash[:16]}...")
112 return "\n".join(lines)
115@dataclass
116class MeasurementResultWithProvenance:
117 """Measurement result with full provenance tracking.
119 Extends the basic measurement result with comprehensive provenance
120 information for reproducibility and debugging.
122 Attributes:
123 value: Measured value.
124 units: Units of measurement (e.g., 'V', 'Hz', 's').
125 provenance: Provenance information.
126 confidence: Optional confidence interval (low, high).
128 Example:
129 >>> result = MeasurementResultWithProvenance(
130 ... value=3.3,
131 ... units='V',
132 ... provenance=Provenance(
133 ... algorithm='peak_to_peak',
134 ... parameters={'window': (0, 1e-3)}
135 ... )
136 ... )
137 >>> print(result)
138 3.3 V (peak_to_peak)
140 References:
141 API-011: Measurement Provenance Tracking
142 """
144 value: float
145 units: str | None = None
146 provenance: Provenance | None = None
147 confidence: tuple[float, float] | None = None
149 def is_equivalent(
150 self,
151 other: MeasurementResultWithProvenance,
152 *,
153 rtol: float = 1e-9,
154 atol: float = 0.0,
155 check_parameters: bool = True,
156 ) -> bool:
157 """Check if two results are equivalent.
159 Compares values within tolerance and optionally checks if the
160 same algorithm and parameters were used.
162 Args:
163 other: Other measurement result to compare.
164 rtol: Relative tolerance for value comparison.
165 atol: Absolute tolerance for value comparison.
166 check_parameters: If True, also verify matching algorithm and parameters.
168 Returns:
169 True if results are equivalent.
171 Example:
172 >>> result1.is_equivalent(result2, rtol=1e-6)
173 True
174 """
175 # Check value equivalence
176 if not np.isclose(self.value, other.value, rtol=rtol, atol=atol):
177 return False
179 # Check units match
180 if self.units != other.units:
181 return False
183 # Optionally check provenance
184 if check_parameters and self.provenance and other.provenance:
185 if self.provenance.algorithm != other.provenance.algorithm:
186 return False
187 # Check if critical parameters match
188 if self.provenance.parameters != other.provenance.parameters:
189 return False
191 return True
193 def to_dict(self) -> dict[str, Any]:
194 """Convert to dictionary for serialization.
196 Returns:
197 Dictionary representation including provenance.
199 Example:
200 >>> result_dict = result.to_dict()
201 >>> import json
202 >>> json_str = json.dumps(result_dict)
203 """
204 result: dict[str, Any] = {
205 "value": self.value,
206 "units": self.units,
207 }
208 if self.provenance:
209 result["provenance"] = self.provenance.to_dict()
210 if self.confidence:
211 result["confidence"] = self.confidence
212 return result
214 @classmethod
215 def from_dict(cls, data: dict[str, Any]) -> MeasurementResultWithProvenance:
216 """Create result from dictionary.
218 Args:
219 data: Dictionary containing result fields.
221 Returns:
222 MeasurementResultWithProvenance object.
223 """
224 provenance = None
225 if "provenance" in data:
226 provenance = Provenance.from_dict(data["provenance"])
228 confidence = None
229 if "confidence" in data:
230 confidence = tuple(data["confidence"])
232 return cls(
233 value=data["value"],
234 units=data.get("units"),
235 provenance=provenance,
236 confidence=confidence,
237 )
239 def __str__(self) -> str:
240 """Human-readable string representation."""
241 parts = [str(self.value)]
242 if self.units:
243 parts.append(self.units)
244 if self.provenance:
245 parts.append(f"({self.provenance.algorithm})")
246 return " ".join(parts)
248 def __repr__(self) -> str:
249 """Detailed representation."""
250 parts = [f"value={self.value}"]
251 if self.units:
252 parts.append(f"units='{self.units}'")
253 if self.provenance:
254 parts.append(f"algorithm='{self.provenance.algorithm}'")
255 return f"MeasurementResultWithProvenance({', '.join(parts)})"
257 def pretty_print(self) -> str:
258 """Pretty-print result with full provenance.
260 Returns:
261 Multi-line formatted string with all details.
263 Example:
264 >>> print(result.pretty_print())
265 Value: 3.3 V
266 Algorithm: peak_to_peak
267 Timestamp: 2025-12-21T10:30:00Z
268 Version: 0.1.0
269 Parameters: window=(0, 0.001)
270 """
271 lines = [f"Value: {self.value}"]
272 if self.units:
273 lines[-1] += f" {self.units}"
275 if self.confidence:
276 lines.append(f"Confidence: ({self.confidence[0]}, {self.confidence[1]})")
278 if self.provenance:
279 lines.append(str(self.provenance))
281 return "\n".join(lines)
284def compute_input_hash(data: NDArray[np.float64]) -> str:
285 """Compute hash of input data for change detection.
287 Uses SHA-256 hash of data array for reproducibility checks.
289 Args:
290 data: Input numpy array.
292 Returns:
293 Hexadecimal hash string.
295 Example:
296 >>> data = np.array([1.0, 2.0, 3.0])
297 >>> hash_str = compute_input_hash(data)
299 References:
300 API-011: Measurement Provenance Tracking
301 """
302 # Convert to bytes and hash
303 data_bytes = data.tobytes()
304 hash_obj = hashlib.sha256(data_bytes)
305 return hash_obj.hexdigest()
308def create_provenance(
309 algorithm: str,
310 parameters: dict[str, Any] | None = None,
311 *,
312 input_data: NDArray[np.float64] | None = None,
313 metadata: dict[str, Any] | None = None,
314) -> Provenance:
315 """Create provenance record for a computation.
317 Convenience function to create provenance with automatic timestamp
318 and optional input hash.
320 Args:
321 algorithm: Name of algorithm or method.
322 parameters: Parameters used in computation.
323 input_data: Optional input data to hash for change detection.
324 metadata: Additional context information.
326 Returns:
327 Provenance object.
329 Example:
330 >>> import numpy as np
331 >>> data = np.array([1.0, 2.0, 3.0])
332 >>> prov = create_provenance(
333 ... algorithm='mean',
334 ... parameters={'axis': 0},
335 ... input_data=data
336 ... )
338 References:
339 API-011: Measurement Provenance Tracking
340 """
341 input_hash = None
342 if input_data is not None:
343 input_hash = compute_input_hash(input_data)
345 return Provenance(
346 algorithm=algorithm,
347 parameters=parameters or {},
348 input_hash=input_hash,
349 metadata=metadata or {},
350 )
353__all__ = [
354 "MeasurementResultWithProvenance",
355 "Provenance",
356 "compute_input_hash",
357 "create_provenance",
358]