Coverage for src / tracekit / core / memoize.py: 95%
51 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Memory-safe memoization decorators for TraceKit analyzer functions.
3This module provides lightweight memoization decorators optimized for analyzer
4functions that process numpy arrays. Unlike the full TraceKitCache, these
5decorators provide simple in-memory caching with bounded size.
8Example:
9 >>> from tracekit.core.memoize import memoize_analysis
10 >>> @memoize_analysis(maxsize=32)
11 ... def expensive_fft(signal, nperseg):
12 ... return scipy.fft.fft(signal, n=nperseg)
13 >>> result = expensive_fft(signal_array, 1024) # Computed
14 >>> result = expensive_fft(signal_array, 1024) # Cached
16References:
17 functools.lru_cache for standard Python memoization
18 hashlib for stable array hashing
19"""
21from __future__ import annotations
23import hashlib
24from functools import wraps
25from typing import TYPE_CHECKING, Any, TypeVar
27import numpy as np
28from numpy.typing import NDArray
30if TYPE_CHECKING:
31 from collections.abc import Callable
33T = TypeVar("T")
36def array_hash(arr: NDArray[Any], sample_size: int = 10000) -> str:
37 """Create stable hash for numpy array.
39 Uses first `sample_size` bytes of array data to create a hash key.
40 This is faster than hashing the entire array while maintaining
41 good cache hit rates for typical analysis workflows.
43 Args:
44 arr: Numpy array to hash.
45 sample_size: Number of bytes to sample for hashing (default: 10KB).
47 Returns:
48 16-character hex hash string.
50 Example:
51 >>> arr = np.arange(1000000, dtype=np.float32)
52 >>> hash1 = array_hash(arr)
53 >>> hash2 = array_hash(arr)
54 >>> assert hash1 == hash2
55 """
56 # Use shape, dtype, and sample of data for hash
57 hash_obj = hashlib.sha256()
59 # Include shape and dtype
60 hash_obj.update(str(arr.shape).encode())
61 hash_obj.update(str(arr.dtype).encode())
63 # Sample first N bytes of data
64 data_bytes = arr.tobytes()[:sample_size]
65 hash_obj.update(data_bytes)
67 return hash_obj.hexdigest()[:16]
70def memoize_analysis(maxsize: int = 32) -> Callable[[Callable[..., T]], Callable[..., T]]:
71 """Decorator for memoizing analysis functions with numpy arrays.
73 Automatically hashes numpy array arguments for cache keys.
74 Memory-safe with bounded cache size using LRU eviction.
77 Args:
78 maxsize: Maximum number of cached results (default: 32).
80 Returns:
81 Decorator function.
83 Example:
84 >>> @memoize_analysis(maxsize=16)
85 ... def detect_edges(signal, threshold):
86 ... # Expensive edge detection...
87 ... return edges
88 >>> # First call computes
89 >>> edges1 = detect_edges(signal_array, 0.5)
90 >>> # Second call uses cache
91 >>> edges2 = detect_edges(signal_array, 0.5)
92 >>> assert edges1 is edges2
94 Note:
95 Cache is stored per-function. Use TraceKitCache from core.cache
96 for persistent cross-function caching.
98 References:
99 PERF-001: Performance optimization requirements
100 """
102 def decorator(func: Callable[..., T]) -> Callable[..., T]:
103 cache: dict[str, T] = {}
104 cache_order: list[str] = [] # Track insertion order for LRU
106 @wraps(func)
107 def wrapper(*args: Any, **kwargs: Any) -> T:
108 # Build cache key from args
109 key_parts: list[str] = []
111 for arg in args:
112 if isinstance(arg, np.ndarray):
113 key_parts.append(f"arr_{len(arg)}_{array_hash(arg)}")
114 else:
115 key_parts.append(str(arg))
117 for k, v in sorted(kwargs.items()):
118 if isinstance(v, np.ndarray):
119 key_parts.append(f"{k}=arr_{len(v)}_{array_hash(v)}")
120 else:
121 key_parts.append(f"{k}={v}")
123 cache_key = ":".join(key_parts)
125 # Check cache
126 if cache_key in cache:
127 # Move to end (most recently used)
128 cache_order.remove(cache_key)
129 cache_order.append(cache_key)
130 return cache[cache_key]
132 # Compute result
133 result = func(*args, **kwargs)
135 # Evict oldest if at capacity
136 if len(cache) >= maxsize:
137 oldest = cache_order.pop(0)
138 del cache[oldest]
140 # Store result
141 cache[cache_key] = result
142 cache_order.append(cache_key)
144 return result
146 def cache_clear() -> None:
147 """Clear all cached results."""
148 cache.clear()
149 cache_order.clear()
151 def cache_info() -> dict[str, Any]:
152 """Get cache statistics.
154 Returns:
155 Dictionary with cache size and maxsize.
156 """
157 return {"size": len(cache), "maxsize": maxsize}
159 # Attach utility methods
160 wrapper.cache_clear = cache_clear # type: ignore[attr-defined]
161 wrapper.cache_info = cache_info # type: ignore[attr-defined]
163 return wrapper
165 return decorator
168__all__ = [
169 "array_hash",
170 "memoize_analysis",
171]