Coverage for src / tracekit / core / cache.py: 98%
189 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Persistent LRU cache with disk spillover for TraceKit intermediate results.
3This module provides memory-bounded caching for expensive operations like FFT,
4spectrograms, and filtered traces with automatic disk spillover when memory
5limits are exceeded.
8Example:
9 >>> from tracekit.core.cache import TraceKitCache, get_cache
10 >>> cache = get_cache(max_memory="2GB")
11 >>> result = cache.get_or_compute("fft_key", compute_fft, signal, 1024)
12 >>> cache.show_stats()
13 Cache Statistics: 42 hits, 15 misses (73.7% hit rate)
15References:
16 Python functools.lru_cache
17 Python pickle for serialization
18"""
20from __future__ import annotations
22import contextlib
23import hashlib
24import pickle
25import tempfile
26import threading
27import time
28from collections import OrderedDict
29from dataclasses import dataclass
30from pathlib import Path
31from typing import TYPE_CHECKING, Any, TypeVar
33import numpy as np
35if TYPE_CHECKING:
36 from collections.abc import Callable
39T = TypeVar("T")
42@dataclass
43class CacheEntry:
44 """Single cache entry with metadata.
46 Attributes:
47 key: Cache key (hash of inputs).
48 value: Cached value (in memory).
49 disk_path: Path to disk file if spilled.
50 size_bytes: Size of cached value in bytes.
51 created_at: Creation timestamp.
52 last_accessed: Last access timestamp.
53 access_count: Number of times accessed.
54 in_memory: True if value is in memory.
55 """
57 key: str
58 value: Any | None
59 disk_path: Path | None
60 size_bytes: int
61 created_at: float
62 last_accessed: float
63 access_count: int
64 in_memory: bool
67@dataclass
68class CacheStats:
69 """Cache statistics.
72 Attributes:
73 hits: Number of cache hits.
74 misses: Number of cache misses.
75 evictions: Number of entries evicted.
76 disk_spills: Number of entries spilled to disk.
77 current_memory: Current memory usage (bytes).
78 current_entries: Number of entries in cache.
79 disk_entries: Number of entries on disk.
80 """
82 hits: int
83 misses: int
84 evictions: int
85 disk_spills: int
86 current_memory: int
87 current_entries: int
88 disk_entries: int
90 @property
91 def hit_rate(self) -> float:
92 """Calculate cache hit rate (0.0-1.0)."""
93 total = self.hits + self.misses
94 return self.hits / total if total > 0 else 0.0
96 def __str__(self) -> str:
97 """Format stats as human-readable string."""
98 return (
99 f"Cache Statistics:\n"
100 f" Hits: {self.hits}\n"
101 f" Misses: {self.misses}\n"
102 f" Hit Rate: {self.hit_rate * 100:.1f}%\n"
103 f" Evictions: {self.evictions}\n"
104 f" Disk Spills: {self.disk_spills}\n"
105 f" Memory Usage: {self.current_memory / 1e9:.2f} GB\n"
106 f" Entries (Memory): {self.current_entries}\n"
107 f" Entries (Disk): {self.disk_entries}\n"
108 )
111class TraceKitCache:
112 """LRU cache with disk spillover for intermediate results.
115 Caches expensive computation results with automatic memory management.
116 When memory limit is exceeded, least-recently-used entries are spilled
117 to disk. Automatic cleanup on exit.
119 Args:
120 max_memory: Maximum memory for cache (bytes or string like "2GB").
121 cache_dir: Directory for disk cache (default: /tmp/tracekit_cache).
122 auto_cleanup: Clean up disk cache on exit (default: True).
124 Example:
125 >>> cache = TraceKitCache(max_memory="1GB")
126 >>> result = cache.get_or_compute("key", expensive_func, arg1, arg2)
127 >>> stats = cache.get_stats()
128 >>> cache.clear()
130 Security Note:
131 Cache files use pickle serialization. Cache directory should be in
132 a secure location with appropriate permissions. Do not share cache
133 directories across security boundaries. The cache is intended for
134 single-user, local computation only.
136 References:
137 MEM-031: Persistent Cache (Disk-Based)
138 MEM-029: LRU Cache for Intermediate Results
139 """
141 def __init__(
142 self,
143 max_memory: int | str = "2GB",
144 *,
145 cache_dir: str | Path | None = None,
146 auto_cleanup: bool = True,
147 ):
148 """Initialize cache.
150 Args:
151 max_memory: Maximum memory (bytes or string).
152 cache_dir: Directory for disk cache.
153 auto_cleanup: Clean up on exit.
154 """
155 # Parse max_memory
156 if isinstance(max_memory, str):
157 self.max_memory = self._parse_memory_string(max_memory)
158 else:
159 self.max_memory = int(max_memory)
161 # Set up cache directory
162 if cache_dir is None:
163 self.cache_dir = Path(tempfile.gettempdir()) / "tracekit_cache"
164 else:
165 self.cache_dir = Path(cache_dir)
167 self.cache_dir.mkdir(parents=True, exist_ok=True)
168 self.auto_cleanup = auto_cleanup
170 # Cache storage (LRU via OrderedDict)
171 self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
173 # Thread lock for thread-safe operations (MEM-031)
174 self._lock = threading.RLock()
176 # Statistics
177 self._hits = 0
178 self._misses = 0
179 self._evictions = 0
180 self._disk_spills = 0
181 self._current_memory = 0
183 def __enter__(self) -> TraceKitCache:
184 """Enter context."""
185 return self
187 def __exit__(self, exc_type, exc_val, exc_tb) -> None: # type: ignore[no-untyped-def]
188 """Exit context and clean up if enabled."""
189 # Note: exc_val and exc_tb intentionally unused but required for Python 3.11+ compatibility
190 if self.auto_cleanup:
191 self.clear()
193 def get(self, key: str) -> Any | None:
194 """Get value from cache.
197 Args:
198 key: Cache key.
200 Returns:
201 Cached value or None if not found.
203 Example:
204 >>> value = cache.get("my_key")
205 >>> if value is None:
206 ... value = compute_value()
207 ... cache.put("my_key", value)
209 References:
210 MEM-031: Persistent Cache (Disk-Based)
211 """
212 with self._lock:
213 if key not in self._cache:
214 self._misses += 1
215 return None
217 # Cache hit
218 self._hits += 1
219 entry = self._cache[key]
221 # Update access metadata
222 entry.last_accessed = time.time()
223 entry.access_count += 1
225 # Move to end (most recently used)
226 self._cache.move_to_end(key)
228 # Load from disk if needed
229 if not entry.in_memory:
230 entry.value = self._load_from_disk(entry.disk_path) # type: ignore[arg-type]
231 entry.in_memory = True
232 self._current_memory += entry.size_bytes
234 # Check if we need to spill to make room
235 self._ensure_memory_limit()
237 return entry.value
239 def put(self, key: str, value: Any) -> None:
240 """Put value in cache.
243 Args:
244 key: Cache key.
245 value: Value to cache.
247 Example:
248 >>> cache.put("result_key", computed_result)
250 References:
251 MEM-031: Persistent Cache (Disk-Based)
252 """
253 # Calculate size outside lock (potentially expensive)
254 size_bytes = self._estimate_size(value)
256 with self._lock:
257 # Remove old entry if exists
258 if key in self._cache:
259 old_entry = self._cache[key]
260 self._current_memory -= old_entry.size_bytes
261 if old_entry.disk_path and old_entry.disk_path.exists(): 261 ↛ 262line 261 didn't jump to line 262 because the condition on line 261 was never true
262 old_entry.disk_path.unlink()
263 del self._cache[key]
265 # Create new entry
266 entry = CacheEntry(
267 key=key,
268 value=value,
269 disk_path=None,
270 size_bytes=size_bytes,
271 created_at=time.time(),
272 last_accessed=time.time(),
273 access_count=0,
274 in_memory=True,
275 )
277 # Add to cache
278 self._cache[key] = entry
279 self._current_memory += size_bytes
281 # Ensure memory limit
282 self._ensure_memory_limit()
284 def get_or_compute(
285 self,
286 key: str,
287 compute_fn: Callable[..., T],
288 *args: Any,
289 **kwargs: Any,
290 ) -> T:
291 """Get cached value or compute and cache it.
294 Args:
295 key: Cache key.
296 compute_fn: Function to compute value if not cached.
297 args: Arguments to compute_fn.
298 kwargs: Keyword arguments to compute_fn.
300 Returns:
301 Cached or computed value.
303 Example:
304 >>> result = cache.get_or_compute("fft_key", np.fft.fft, signal)
306 References:
307 MEM-029: LRU Cache for Intermediate Results
308 """
309 value = self.get(key)
310 if value is not None:
311 return value # type: ignore[no-any-return]
313 # Compute value
314 value = compute_fn(*args, **kwargs)
316 # Cache it
317 self.put(key, value)
319 return value
321 def clear(self) -> None:
322 """Clear all cache entries and disk files.
325 Example:
326 >>> cache.clear()
328 References:
329 MEM-031: Persistent Cache (Disk-Based)
330 """
331 with self._lock:
332 # Remove disk files
333 for entry in self._cache.values():
334 if entry.disk_path and entry.disk_path.exists():
335 with contextlib.suppress(OSError):
336 entry.disk_path.unlink()
338 # Clear cache
339 self._cache.clear()
340 self._current_memory = 0
342 # Try to remove cache directory if empty (outside lock, not critical)
343 try:
344 if self.cache_dir.exists() and not any(self.cache_dir.iterdir()):
345 self.cache_dir.rmdir()
346 except OSError:
347 pass
349 def get_stats(self) -> CacheStats:
350 """Get cache statistics.
353 Returns:
354 CacheStats object.
356 Example:
357 >>> stats = cache.get_stats()
358 >>> print(f"Hit rate: {stats.hit_rate * 100:.1f}%")
360 References:
361 MEM-031: Persistent Cache (Disk-Based)
362 """
363 with self._lock:
364 disk_entries = sum(1 for e in self._cache.values() if not e.in_memory)
365 return CacheStats(
366 hits=self._hits,
367 misses=self._misses,
368 evictions=self._evictions,
369 disk_spills=self._disk_spills,
370 current_memory=self._current_memory,
371 current_entries=len(self._cache),
372 disk_entries=disk_entries,
373 )
375 def show_stats(self) -> None:
376 """Print cache statistics.
379 Example:
380 >>> cache.show_stats()
381 Cache Statistics: 42 hits, 15 misses (73.7% hit rate)
383 References:
384 MEM-031: Persistent Cache (Disk-Based)
385 """
386 stats = self.get_stats()
387 print(stats)
389 def compute_key(self, *args: Any, **kwargs: Any) -> str:
390 """Compute cache key from arguments.
392 Creates a hash key from arbitrary arguments for cache lookups.
394 Args:
395 args: Positional arguments.
396 kwargs: Keyword arguments.
398 Returns:
399 Hash key string.
401 Example:
402 >>> key = cache.compute_key("operation", param1=10, param2="value")
404 References:
405 MEM-029: LRU Cache for Intermediate Results
406 """
407 # Create hashable representation
408 hash_obj = hashlib.sha256()
410 # Hash positional args
411 for arg in args:
412 hash_obj.update(self._make_hashable(arg))
414 # Hash keyword args (sorted for consistency)
415 for k in sorted(kwargs.keys()):
416 hash_obj.update(k.encode())
417 hash_obj.update(self._make_hashable(kwargs[k]))
419 return hash_obj.hexdigest()
421 def _ensure_memory_limit(self) -> None:
422 """Ensure cache memory usage is within limit."""
423 while self._current_memory > self.max_memory and self._cache:
424 # Evict least recently used entry
425 key, entry = self._cache.popitem(last=False)
427 if entry.in_memory:
428 # Spill to disk if not already there
429 if entry.disk_path is None: 429 ↛ 439line 429 didn't jump to line 439 because the condition on line 429 was always true
430 entry.disk_path = self._spill_to_disk(key, entry.value)
431 entry.in_memory = False
432 entry.value = None
433 self._disk_spills += 1
435 # Put back in cache (on disk)
436 self._cache[key] = entry
437 self._cache.move_to_end(key, last=False)
439 self._current_memory -= entry.size_bytes
441 self._evictions += 1
443 def _spill_to_disk(self, key: str, value: Any) -> Path:
444 """Write value to disk.
447 Args:
448 key: Cache key.
449 value: Value to write.
451 Returns:
452 Path to disk file.
453 """
454 disk_path = self.cache_dir / f"{key}.pkl"
455 with open(disk_path, "wb") as f:
456 pickle.dump(value, f, protocol=pickle.HIGHEST_PROTOCOL)
457 return disk_path
459 def _load_from_disk(self, disk_path: Path) -> Any:
460 """Load value from disk.
463 Args:
464 disk_path: Path to disk file.
466 Returns:
467 Loaded value.
468 """
469 with open(disk_path, "rb") as f:
470 return pickle.load(f)
472 def _estimate_size(self, value: Any) -> int:
473 """Estimate size of value in bytes."""
474 if isinstance(value, np.ndarray):
475 return value.nbytes # type: ignore[no-any-return]
476 elif isinstance(value, list | tuple):
477 return sum(self._estimate_size(item) for item in value)
478 elif isinstance(value, dict):
479 return sum(self._estimate_size(k) + self._estimate_size(v) for k, v in value.items())
480 else:
481 # Fallback: use pickle size
482 try:
483 return len(pickle.dumps(value, protocol=pickle.HIGHEST_PROTOCOL))
484 except (TypeError, pickle.PicklingError):
485 return 1024 # Default estimate
487 def _make_hashable(self, obj: Any) -> bytes:
488 """Convert object to hashable bytes."""
489 if isinstance(obj, np.ndarray):
490 # Use array bytes for hashing
491 return obj.tobytes() # type: ignore[no-any-return]
492 elif isinstance(obj, str | bytes):
493 return obj.encode() if isinstance(obj, str) else obj
494 elif isinstance(obj, int | float | bool):
495 return str(obj).encode()
496 elif isinstance(obj, list | tuple):
497 return b"".join(self._make_hashable(item) for item in obj)
498 else:
499 # Fallback: use string representation
500 return str(obj).encode()
502 def _parse_memory_string(self, memory_str: str) -> int:
503 """Parse memory string like '2GB' to bytes."""
504 memory_str = memory_str.strip().upper()
506 if memory_str.endswith("GB"):
507 return int(float(memory_str[:-2]) * 1e9)
508 elif memory_str.endswith("MB"):
509 return int(float(memory_str[:-2]) * 1e6)
510 elif memory_str.endswith("KB"):
511 return int(float(memory_str[:-2]) * 1e3)
512 else:
513 return int(memory_str)
516# Global cache instance
517_global_cache: TraceKitCache | None = None
520def get_cache(
521 max_memory: int | str = "2GB",
522 *,
523 cache_dir: str | Path | None = None,
524) -> TraceKitCache:
525 """Get or create global cache instance.
528 Args:
529 max_memory: Maximum memory for cache.
530 cache_dir: Cache directory.
532 Returns:
533 Global TraceKitCache instance.
535 Example:
536 >>> cache = get_cache(max_memory="1GB")
537 >>> result = cache.get_or_compute("key", compute_fn, args)
539 References:
540 MEM-031: Persistent Cache (Disk-Based)
541 """
542 global _global_cache
544 if _global_cache is None:
545 _global_cache = TraceKitCache(max_memory, cache_dir=cache_dir)
547 return _global_cache
550def clear_cache() -> None:
551 """Clear global cache.
553 Example:
554 >>> clear_cache()
556 References:
557 MEM-031: Persistent Cache (Disk-Based)
558 """
559 global _global_cache
561 if _global_cache is not None:
562 _global_cache.clear()
563 _global_cache = None
566def show_cache_stats() -> None:
567 """Show global cache statistics.
570 Example:
571 >>> show_cache_stats()
572 Cache Statistics: 42 hits, 15 misses (73.7% hit rate)
574 References:
575 MEM-031: Persistent Cache (Disk-Based)
576 """
577 if _global_cache is not None:
578 _global_cache.show_stats()
579 else:
580 print("Cache not initialized")
583__all__ = [
584 "CacheEntry",
585 "CacheStats",
586 "TraceKitCache",
587 "clear_cache",
588 "get_cache",
589 "show_cache_stats",
590]