Coverage for src / tracekit / core / memory_guard.py: 32%

62 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Memory-safe guards for TraceKit analysis. 

2 

3This module provides memory guards and resource limiting utilities to prevent 

4out-of-memory conditions during analysis operations. 

5 

6 

7Example: 

8 >>> from tracekit.core.memory_guard import MemoryGuard, check_memory_available 

9 >>> if check_memory_available(500): # Need at least 500MB 

10 ... with MemoryGuard(max_mb=1000, name="fft") as guard: 

11 ... result = compute_fft(data) 

12 ... if not guard.check(): 

13 ... raise MemoryError("Exceeded memory limit") 

14 

15References: 

16 See tracekit.core.memory_monitor for runtime monitoring. 

17""" 

18 

19from __future__ import annotations 

20 

21import logging 

22import os 

23import sys 

24from typing import Any 

25 

26logger = logging.getLogger(__name__) 

27 

28 

29def get_memory_usage_mb() -> float: 

30 """Get current process memory usage in MB. 

31 

32 Returns: 

33 Current resident set size (RSS) in megabytes. 

34 

35 Example: 

36 >>> mem_mb = get_memory_usage_mb() 

37 >>> print(f"Current process using {mem_mb:.1f} MB") 

38 """ 

39 try: 

40 import psutil 

41 

42 process = psutil.Process(os.getpid()) 

43 return float(process.memory_info().rss / (1024 * 1024)) 

44 except ImportError: 

45 # Fallback for systems without psutil 

46 logger.debug("psutil not available, memory usage tracking disabled") 

47 return 0.0 

48 

49 

50def check_memory_available(required_mb: float = 100) -> bool: 

51 """Check if sufficient memory is available. 

52 

53 Args: 

54 required_mb: Required available memory in megabytes. 

55 

56 Returns: 

57 True if sufficient memory is available. 

58 

59 Example: 

60 >>> if not check_memory_available(1000): 

61 ... print("Warning: Less than 1GB available") 

62 ... # Reduce batch size or chunk operations 

63 """ 

64 try: 

65 import psutil 

66 

67 available = float(psutil.virtual_memory().available / (1024 * 1024)) 

68 return bool(available > required_mb) 

69 except ImportError: 

70 # Assume OK if we can't check 

71 return True 

72 

73 

74class MemoryGuard: 

75 """Context manager for memory-safe operations. 

76 

77 

78 Monitors memory usage within a context and raises warnings/errors 

79 if limits are exceeded. 

80 

81 Attributes: 

82 max_mb: Maximum memory limit in megabytes. 

83 name: Operation name for logging. 

84 start_mem: Starting memory usage (MB). 

85 

86 Example: 

87 >>> with MemoryGuard(max_mb=2000, name="spectrogram") as guard: 

88 ... # Perform memory-intensive operation 

89 ... for chunk in data_chunks: 

90 ... process_chunk(chunk) 

91 ... if not guard.check(): 

92 ... break # Stop before exceeding limit 

93 >>> stats = guard.get_stats() 

94 >>> print(f"Peak: {stats['peak_mb']:.1f} MB, Delta: {stats['delta_mb']:.1f} MB") 

95 """ 

96 

97 def __init__(self, max_mb: float = 1000, name: str = "operation"): 

98 """Initialize memory guard. 

99 

100 Args: 

101 max_mb: Maximum memory increase allowed in megabytes. 

102 name: Operation name for logging and error messages. 

103 """ 

104 self.max_mb = max_mb 

105 self.name = name 

106 self.start_mem = 0.0 

107 self._peak_mem = 0.0 

108 

109 def __enter__(self) -> MemoryGuard: 

110 """Enter context and record starting memory.""" 

111 self.start_mem = get_memory_usage_mb() 

112 self._peak_mem = self.start_mem 

113 return self 

114 

115 def __exit__( 

116 self, 

117 exc_type: type[BaseException] | None, 

118 exc_val: BaseException | None, 

119 exc_tb: Any, 

120 ) -> None: 

121 """Exit context and report memory usage.""" 

122 # Note: exc_val and exc_tb intentionally unused but required for Python 3.11+ compatibility 

123 end_mem = get_memory_usage_mb() 

124 delta = end_mem - self.start_mem 

125 

126 if delta > self.max_mb: 

127 logger.warning( 

128 f"{self.name} used {delta:.1f} MB (limit: {self.max_mb:.1f} MB). " 

129 f"Consider reducing batch size or enabling chunked processing." 

130 ) 

131 

132 # Update peak 

133 self._peak_mem = max(self._peak_mem, end_mem) 

134 

135 def check(self) -> bool: 

136 """Check if within memory limit. 

137 

138 Returns: 

139 True if within limit, False if limit exceeded. 

140 

141 Example: 

142 >>> with MemoryGuard(max_mb=500) as guard: 

143 ... for i in range(1000): 

144 ... # Do work 

145 ... if i % 100 == 0 and not guard.check(): 

146 ... raise MemoryError("Memory limit exceeded") 

147 """ 

148 current = get_memory_usage_mb() 

149 self._peak_mem = max(self._peak_mem, current) 

150 delta = current - self.start_mem 

151 

152 if delta > self.max_mb: 

153 logger.warning( 

154 f"{self.name}: Memory usage {delta:.1f} MB exceeds limit {self.max_mb:.1f} MB" 

155 ) 

156 return False 

157 

158 return True 

159 

160 def get_stats(self) -> dict[str, float]: 

161 """Get memory statistics for this guard. 

162 

163 Returns: 

164 Dictionary with keys: 

165 - start_mb: Starting memory 

166 - current_mb: Current memory 

167 - peak_mb: Peak memory 

168 - delta_mb: Memory increase since start 

169 - limit_mb: Configured limit 

170 

171 Example: 

172 >>> with MemoryGuard(max_mb=1000, name="test") as guard: 

173 ... # ... work ... 

174 ... pass 

175 >>> stats = guard.get_stats() 

176 >>> print(f"Used {stats['delta_mb']:.1f} / {stats['limit_mb']:.1f} MB") 

177 """ 

178 current = get_memory_usage_mb() 

179 return { 

180 "start_mb": self.start_mem, 

181 "current_mb": current, 

182 "peak_mb": self._peak_mem, 

183 "delta_mb": current - self.start_mem, 

184 "limit_mb": self.max_mb, 

185 } 

186 

187 

188def safe_array_size(shape: tuple[int, ...], dtype_bytes: int = 8) -> int: 

189 """Calculate array size in bytes, checking for overflow. 

190 

191 

192 Args: 

193 shape: Array shape tuple. 

194 dtype_bytes: Bytes per element (default: 8 for float64). 

195 

196 Returns: 

197 Total array size in bytes. 

198 

199 Raises: 

200 OverflowError: If array size would overflow. 

201 

202 Example: 

203 >>> size = safe_array_size((1000, 1000, 8), dtype_bytes=8) 

204 >>> print(f"Array would use {size / 1e6:.1f} MB") 

205 >>> # Check if safe to allocate 

206 >>> if can_allocate(size): 

207 ... arr = np.zeros((1000, 1000, 8)) 

208 """ 

209 try: 

210 import numpy as np 

211 

212 total_elements = np.prod(shape) 

213 

214 # Check for overflow in element count 

215 if total_elements > sys.maxsize // dtype_bytes: 

216 raise OverflowError(f"Array size too large: {shape}") 

217 

218 size = int(total_elements) * dtype_bytes 

219 return size 

220 

221 except (OverflowError, ValueError) as e: 

222 raise OverflowError(f"Array dimensions {shape} would cause overflow") from e 

223 

224 

225def can_allocate(size_bytes: int) -> bool: 

226 """Check if allocation is safe given available memory. 

227 

228 

229 Args: 

230 size_bytes: Requested allocation size in bytes. 

231 

232 Returns: 

233 True if allocation is safe (with 2x safety margin). 

234 

235 Example: 

236 >>> import numpy as np 

237 >>> shape = (10000, 10000) 

238 >>> size = safe_array_size(shape, dtype_bytes=8) 

239 >>> if can_allocate(size): 

240 ... arr = np.zeros(shape) 

241 ... else: 

242 ... print("Not enough memory, use chunked processing") 

243 """ 

244 size_mb = size_bytes / (1024 * 1024) 

245 

246 # Check with 2x safety margin 

247 return check_memory_available(size_mb * 2) 

248 

249 

250def get_safe_chunk_size( 

251 total_samples: int, 

252 dtype_bytes: int = 8, 

253 max_chunk_mb: float = 100, 

254) -> int: 

255 """Calculate safe chunk size for processing large datasets. 

256 

257 

258 Args: 

259 total_samples: Total number of samples to process. 

260 dtype_bytes: Bytes per sample (default: 8 for float64). 

261 max_chunk_mb: Maximum chunk size in megabytes. 

262 

263 Returns: 

264 Chunk size in samples that fits within memory limit. 

265 

266 Example: 

267 >>> total = 1_000_000_000 # 1 billion samples 

268 >>> chunk_size = get_safe_chunk_size(total, max_chunk_mb=100) 

269 >>> print(f"Process in chunks of {chunk_size:,} samples") 

270 >>> for i in range(0, total, chunk_size): 

271 ... chunk = data[i:i+chunk_size] 

272 ... process(chunk) 

273 """ 

274 max_bytes = max_chunk_mb * 1024 * 1024 

275 max_samples = max_bytes // dtype_bytes 

276 

277 # Ensure at least 1000 samples per chunk, but not more than total 

278 chunk_size = max(1000, min(max_samples, total_samples)) 

279 

280 return int(chunk_size) 

281 

282 

283__all__ = [ 

284 "MemoryGuard", 

285 "can_allocate", 

286 "check_memory_available", 

287 "get_memory_usage_mb", 

288 "get_safe_chunk_size", 

289 "safe_array_size", 

290]