Coverage for src / tracekit / core / memoize.py: 95%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Memory-safe memoization decorators for TraceKit analyzer functions. 

2 

3This module provides lightweight memoization decorators optimized for analyzer 

4functions that process numpy arrays. Unlike the full TraceKitCache, these 

5decorators provide simple in-memory caching with bounded size. 

6 

7 

8Example: 

9 >>> from tracekit.core.memoize import memoize_analysis 

10 >>> @memoize_analysis(maxsize=32) 

11 ... def expensive_fft(signal, nperseg): 

12 ... return scipy.fft.fft(signal, n=nperseg) 

13 >>> result = expensive_fft(signal_array, 1024) # Computed 

14 >>> result = expensive_fft(signal_array, 1024) # Cached 

15 

16References: 

17 functools.lru_cache for standard Python memoization 

18 hashlib for stable array hashing 

19""" 

20 

21from __future__ import annotations 

22 

23import hashlib 

24from functools import wraps 

25from typing import TYPE_CHECKING, Any, TypeVar 

26 

27import numpy as np 

28from numpy.typing import NDArray 

29 

30if TYPE_CHECKING: 

31 from collections.abc import Callable 

32 

33T = TypeVar("T") 

34 

35 

36def array_hash(arr: NDArray[Any], sample_size: int = 10000) -> str: 

37 """Create stable hash for numpy array. 

38 

39 Uses first `sample_size` bytes of array data to create a hash key. 

40 This is faster than hashing the entire array while maintaining 

41 good cache hit rates for typical analysis workflows. 

42 

43 Args: 

44 arr: Numpy array to hash. 

45 sample_size: Number of bytes to sample for hashing (default: 10KB). 

46 

47 Returns: 

48 16-character hex hash string. 

49 

50 Example: 

51 >>> arr = np.arange(1000000, dtype=np.float32) 

52 >>> hash1 = array_hash(arr) 

53 >>> hash2 = array_hash(arr) 

54 >>> assert hash1 == hash2 

55 """ 

56 # Use shape, dtype, and sample of data for hash 

57 hash_obj = hashlib.sha256() 

58 

59 # Include shape and dtype 

60 hash_obj.update(str(arr.shape).encode()) 

61 hash_obj.update(str(arr.dtype).encode()) 

62 

63 # Sample first N bytes of data 

64 data_bytes = arr.tobytes()[:sample_size] 

65 hash_obj.update(data_bytes) 

66 

67 return hash_obj.hexdigest()[:16] 

68 

69 

70def memoize_analysis(maxsize: int = 32) -> Callable[[Callable[..., T]], Callable[..., T]]: 

71 """Decorator for memoizing analysis functions with numpy arrays. 

72 

73 Automatically hashes numpy array arguments for cache keys. 

74 Memory-safe with bounded cache size using LRU eviction. 

75 

76 

77 Args: 

78 maxsize: Maximum number of cached results (default: 32). 

79 

80 Returns: 

81 Decorator function. 

82 

83 Example: 

84 >>> @memoize_analysis(maxsize=16) 

85 ... def detect_edges(signal, threshold): 

86 ... # Expensive edge detection... 

87 ... return edges 

88 >>> # First call computes 

89 >>> edges1 = detect_edges(signal_array, 0.5) 

90 >>> # Second call uses cache 

91 >>> edges2 = detect_edges(signal_array, 0.5) 

92 >>> assert edges1 is edges2 

93 

94 Note: 

95 Cache is stored per-function. Use TraceKitCache from core.cache 

96 for persistent cross-function caching. 

97 

98 References: 

99 PERF-001: Performance optimization requirements 

100 """ 

101 

102 def decorator(func: Callable[..., T]) -> Callable[..., T]: 

103 cache: dict[str, T] = {} 

104 cache_order: list[str] = [] # Track insertion order for LRU 

105 

106 @wraps(func) 

107 def wrapper(*args: Any, **kwargs: Any) -> T: 

108 # Build cache key from args 

109 key_parts: list[str] = [] 

110 

111 for arg in args: 

112 if isinstance(arg, np.ndarray): 

113 key_parts.append(f"arr_{len(arg)}_{array_hash(arg)}") 

114 else: 

115 key_parts.append(str(arg)) 

116 

117 for k, v in sorted(kwargs.items()): 

118 if isinstance(v, np.ndarray): 

119 key_parts.append(f"{k}=arr_{len(v)}_{array_hash(v)}") 

120 else: 

121 key_parts.append(f"{k}={v}") 

122 

123 cache_key = ":".join(key_parts) 

124 

125 # Check cache 

126 if cache_key in cache: 

127 # Move to end (most recently used) 

128 cache_order.remove(cache_key) 

129 cache_order.append(cache_key) 

130 return cache[cache_key] 

131 

132 # Compute result 

133 result = func(*args, **kwargs) 

134 

135 # Evict oldest if at capacity 

136 if len(cache) >= maxsize: 

137 oldest = cache_order.pop(0) 

138 del cache[oldest] 

139 

140 # Store result 

141 cache[cache_key] = result 

142 cache_order.append(cache_key) 

143 

144 return result 

145 

146 def cache_clear() -> None: 

147 """Clear all cached results.""" 

148 cache.clear() 

149 cache_order.clear() 

150 

151 def cache_info() -> dict[str, Any]: 

152 """Get cache statistics. 

153 

154 Returns: 

155 Dictionary with cache size and maxsize. 

156 """ 

157 return {"size": len(cache), "maxsize": maxsize} 

158 

159 # Attach utility methods 

160 wrapper.cache_clear = cache_clear # type: ignore[attr-defined] 

161 wrapper.cache_info = cache_info # type: ignore[attr-defined] 

162 

163 return wrapper 

164 

165 return decorator 

166 

167 

168__all__ = [ 

169 "array_hash", 

170 "memoize_analysis", 

171]