Coverage for src / tracekit / core / backend_selector.py: 0%

109 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Automatic backend selection for optimal performance. 

2 

3This module provides intelligent backend selection based on data characteristics, 

4available hardware, and performance requirements. Automatically chooses between 

5NumPy, Numba, GPU (CuPy), and distributed (Dask) backends. 

6 

7Usage: 

8 from tracekit.core.backend_selector import BackendSelector, select_backend 

9 

10 selector = BackendSelector() 

11 backend = selector.select_for_fft(signal_size=10_000_000) 

12 # Returns 'gpu' if available, else 'scipy' 

13 

14Performance decision tree: 

15 - Small data (<100K): NumPy/SciPy 

16 - Medium data (100K-10M): Numba JIT 

17 - Large data (>10M): GPU if available, else Numba 

18 - Huge data (>1GB): Dask distributed 

19 

20Example: 

21 >>> from tracekit.core.backend_selector import select_backend 

22 >>> import numpy as np 

23 >>> 

24 >>> data = np.random.randn(50_000_000) 

25 >>> backend = select_backend('fft', data_size=len(data)) 

26 >>> print(f"Selected backend: {backend}") # 'gpu' or 'scipy' 

27""" 

28 

29from __future__ import annotations 

30 

31from dataclasses import dataclass 

32from typing import Literal 

33 

34import numpy as np 

35import psutil 

36 

37# Check available backends 

38try: 

39 from tracekit.core.gpu_backend import gpu 

40 

41 HAS_GPU = gpu.gpu_available 

42except (ImportError, AttributeError): 

43 HAS_GPU = False 

44 

45try: 

46 import numba # type: ignore[import-untyped] 

47 

48 HAS_NUMBA = True 

49 del numba 

50except ImportError: 

51 HAS_NUMBA = False 

52 

53try: 

54 import dask.array # type: ignore[import-not-found] 

55 

56 HAS_DASK = True 

57 del dask 

58except ImportError: 

59 HAS_DASK = False 

60 

61try: 

62 import scipy.fft 

63 

64 HAS_SCIPY = True 

65 del scipy 

66except ImportError: 

67 HAS_SCIPY = False 

68 

69 

70BackendType = Literal["numpy", "scipy", "numba", "gpu", "dask"] 

71 

72 

73@dataclass 

74class BackendCapabilities: 

75 """Available backend capabilities on this system. 

76 

77 Attributes: 

78 has_gpu: Whether GPU (CuPy) is available. 

79 has_numba: Whether Numba JIT is available. 

80 has_dask: Whether Dask distributed is available. 

81 has_scipy: Whether SciPy is available. 

82 cpu_count: Number of CPU cores. 

83 total_memory_gb: Total system RAM in GB. 

84 gpu_memory_gb: GPU memory in GB (0 if no GPU). 

85 """ 

86 

87 has_gpu: bool 

88 has_numba: bool 

89 has_dask: bool 

90 has_scipy: bool 

91 cpu_count: int 

92 total_memory_gb: float 

93 gpu_memory_gb: float 

94 

95 

96def get_system_capabilities() -> BackendCapabilities: 

97 """Detect available backends and system resources. 

98 

99 Returns: 

100 BackendCapabilities object with system information. 

101 

102 Example: 

103 >>> caps = get_system_capabilities() 

104 >>> if caps.has_gpu: 

105 ... print(f"GPU available with {caps.gpu_memory_gb:.1f} GB memory") 

106 """ 

107 # CPU and memory info 

108 cpu_count = psutil.cpu_count(logical=False) or 1 

109 total_memory = psutil.virtual_memory().total 

110 total_memory_gb = total_memory / (1024**3) 

111 

112 # GPU memory 

113 gpu_memory_gb = 0.0 

114 if HAS_GPU: 

115 try: 

116 from tracekit.core.gpu_backend import gpu 

117 

118 # Get GPU memory in bytes, convert to GB 

119 gpu_memory_gb = gpu.get_memory_info()[1] / (1024**3) # type: ignore[attr-defined] 

120 except Exception: 

121 gpu_memory_gb = 0.0 

122 

123 return BackendCapabilities( 

124 has_gpu=HAS_GPU, 

125 has_numba=HAS_NUMBA, 

126 has_dask=HAS_DASK, 

127 has_scipy=HAS_SCIPY, 

128 cpu_count=cpu_count, 

129 total_memory_gb=total_memory_gb, 

130 gpu_memory_gb=gpu_memory_gb, 

131 ) 

132 

133 

134class BackendSelector: 

135 """Intelligent backend selector for optimal performance. 

136 

137 This class analyzes data characteristics and system capabilities to 

138 automatically select the best backend for each operation. 

139 

140 Example: 

141 >>> selector = BackendSelector() 

142 >>> # For FFT on 50M samples 

143 >>> backend = selector.select_for_fft(50_000_000) 

144 >>> # For edge detection with hysteresis 

145 >>> backend = selector.select_for_edge_detection(1_000_000, has_hysteresis=True) 

146 """ 

147 

148 def __init__(self) -> None: 

149 """Initialize backend selector with system capabilities.""" 

150 self.capabilities = get_system_capabilities() 

151 

152 def select_for_fft( 

153 self, 

154 data_size: int, 

155 dtype: type = np.float64, 

156 ) -> BackendType: 

157 """Select optimal backend for FFT operations. 

158 

159 Args: 

160 data_size: Number of samples in signal. 

161 dtype: Data type (affects memory usage). 

162 

163 Returns: 

164 BackendType: 'numpy', 'scipy', 'gpu', or 'dask'. 

165 

166 Example: 

167 >>> selector = BackendSelector() 

168 >>> backend = selector.select_for_fft(10_000_000) 

169 """ 

170 # Decision tree based on data size 

171 if data_size > 100_000_000 and self.capabilities.has_dask: 

172 # Huge data: use distributed 

173 return "dask" 

174 elif data_size > 10_000_000 and self.capabilities.has_gpu: 

175 # Large data + GPU: use GPU 

176 return "gpu" 

177 elif self.capabilities.has_scipy: 

178 # Use scipy.fft with workers (faster than numpy.fft) 

179 return "scipy" 

180 else: 

181 # Fallback to numpy 

182 return "numpy" 

183 

184 def select_for_edge_detection( 

185 self, 

186 data_size: int, 

187 has_hysteresis: bool = False, 

188 ) -> BackendType: 

189 """Select optimal backend for edge detection. 

190 

191 Args: 

192 data_size: Number of samples in signal. 

193 has_hysteresis: Whether hysteresis is used (affects vectorization). 

194 

195 Returns: 

196 BackendType: 'numpy', 'numba', or 'gpu'. 

197 

198 Example: 

199 >>> selector = BackendSelector() 

200 >>> backend = selector.select_for_edge_detection(5_000_000, has_hysteresis=True) 

201 """ 

202 if data_size > 10_000_000 and self.capabilities.has_gpu: 

203 return "gpu" 

204 elif has_hysteresis and self.capabilities.has_numba and data_size > 100_000: 

205 return "numba" 

206 elif has_hysteresis: 

207 return "numpy" # Actually uses Python for hysteresis state machine 

208 else: 

209 return "numpy" # Vectorized without hysteresis 

210 

211 def select_for_correlation( 

212 self, 

213 signal1_size: int, 

214 signal2_size: int, 

215 mode: Literal["full", "valid", "same"] = "full", 

216 ) -> BackendType: 

217 """Select optimal backend for correlation. 

218 

219 Args: 

220 signal1_size: Size of first signal. 

221 signal2_size: Size of second signal. 

222 mode: Correlation mode. 

223 

224 Returns: 

225 Backend name. 

226 

227 Example: 

228 >>> selector = BackendSelector() 

229 >>> backend = selector.select_for_correlation(1_000_000, 10_000) 

230 """ 

231 total_size = signal1_size + signal2_size 

232 output_size = self._estimate_correlation_output(signal1_size, signal2_size, mode) 

233 

234 # Estimate memory 

235 total_memory_mb = (total_size + output_size) * 8 / (1024**2) 

236 

237 if total_memory_mb > self.capabilities.total_memory_gb * 1024 * 0.5: 

238 # Would use >50% RAM: use chunked/streaming 

239 return "dask" if self.capabilities.has_dask else "numpy" 

240 elif signal1_size > 10_000_000 and self.capabilities.has_gpu: 

241 return "gpu" 

242 elif self.capabilities.has_scipy: 

243 return "scipy" 

244 else: 

245 return "numpy" 

246 

247 def select_for_protocol_decode( 

248 self, 

249 data_size: int, 

250 protocol: str, 

251 ) -> BackendType: 

252 """Select optimal backend for protocol decoding. 

253 

254 Args: 

255 data_size: Number of samples in signal. 

256 protocol: Protocol name (e.g., 'uart', 'spi', 'i2c'). 

257 

258 Returns: 

259 Backend name. 

260 

261 Example: 

262 >>> selector = BackendSelector() 

263 >>> backend = selector.select_for_protocol_decode(5_000_000, 'uart') 

264 """ 

265 # Protocol decoders use edge detection + state machines 

266 # Large signals benefit from Numba-compiled state machines 

267 if data_size > 1_000_000 and self.capabilities.has_numba: 

268 return "numba" 

269 else: 

270 return "numpy" 

271 

272 def select_for_pattern_matching( 

273 self, 

274 data_size: int, 

275 pattern_count: int, 

276 approximate: bool = False, 

277 ) -> BackendType: 

278 """Select optimal backend for pattern matching. 

279 

280 Args: 

281 data_size: Size of data to search. 

282 pattern_count: Number of patterns. 

283 approximate: Whether approximate matching is acceptable. 

284 

285 Returns: 

286 Backend name. 

287 

288 Example: 

289 >>> selector = BackendSelector() 

290 >>> backend = selector.select_for_pattern_matching(1_000_000, 100, approximate=True) 

291 """ 

292 # For approximate matching with many patterns, LSH is best 

293 # Otherwise use standard string matching 

294 if approximate and pattern_count > 10: 

295 return "numpy" # LSH implementation in NumPy 

296 elif data_size > 10_000_000: 

297 return "numba" 

298 else: 

299 return "numpy" 

300 

301 def _estimate_correlation_output( 

302 self, 

303 size1: int, 

304 size2: int, 

305 mode: Literal["full", "valid", "same"], 

306 ) -> int: 

307 """Estimate output size of correlation. 

308 

309 Args: 

310 size1: Size of first signal. 

311 size2: Size of second signal. 

312 mode: Correlation mode. 

313 

314 Returns: 

315 Estimated output size in samples. 

316 """ 

317 if mode == "full": 

318 return size1 + size2 - 1 

319 elif mode == "valid": 

320 return max(size1, size2) - min(size1, size2) + 1 

321 else: # same 

322 return max(size1, size2) 

323 

324 

325# Global selector instance 

326_global_selector: BackendSelector | None = None 

327 

328 

329def get_global_selector() -> BackendSelector: 

330 """Get global backend selector instance (singleton). 

331 

332 Returns: 

333 Global BackendSelector instance. 

334 

335 Example: 

336 >>> selector = get_global_selector() 

337 >>> backend = selector.select_for_fft(1_000_000) 

338 """ 

339 global _global_selector 

340 if _global_selector is None: 

341 _global_selector = BackendSelector() 

342 return _global_selector 

343 

344 

345def select_backend( 

346 operation: Literal[ 

347 "fft", "edge_detection", "correlation", "protocol_decode", "pattern_matching" 

348 ], 

349 **kwargs: int | str | bool, 

350) -> BackendType: 

351 """Convenience function to select backend for an operation. 

352 

353 Args: 

354 operation: Type of operation. 

355 **kwargs: Operation-specific parameters. 

356 

357 Returns: 

358 Selected backend name. 

359 

360 Example: 

361 >>> backend = select_backend('fft', data_size=10_000_000) 

362 >>> backend = select_backend('edge_detection', data_size=5_000_000, has_hysteresis=True) 

363 >>> backend = select_backend('correlation', signal1_size=1_000_000, signal2_size=10_000) 

364 """ 

365 selector = get_global_selector() 

366 

367 if operation == "fft": 

368 return selector.select_for_fft( 

369 data_size=int(kwargs.get("data_size", 0)), 

370 dtype=kwargs.get("dtype", np.float64), # type: ignore[arg-type] 

371 ) 

372 elif operation == "edge_detection": 

373 return selector.select_for_edge_detection( 

374 data_size=int(kwargs.get("data_size", 0)), 

375 has_hysteresis=bool(kwargs.get("has_hysteresis", False)), 

376 ) 

377 elif operation == "correlation": 

378 return selector.select_for_correlation( 

379 signal1_size=int(kwargs.get("signal1_size", 0)), 

380 signal2_size=int(kwargs.get("signal2_size", 0)), 

381 mode=kwargs.get("mode", "full"), # type: ignore[arg-type] 

382 ) 

383 elif operation == "protocol_decode": 

384 return selector.select_for_protocol_decode( 

385 data_size=int(kwargs.get("data_size", 0)), 

386 protocol=str(kwargs.get("protocol", "")), 

387 ) 

388 elif operation == "pattern_matching": 

389 return selector.select_for_pattern_matching( 

390 data_size=int(kwargs.get("data_size", 0)), 

391 pattern_count=int(kwargs.get("pattern_count", 0)), 

392 approximate=bool(kwargs.get("approximate", False)), 

393 ) 

394 else: 

395 return "numpy" # type: ignore[unreachable] 

396 

397 

398__all__ = [ 

399 "BackendCapabilities", 

400 "BackendSelector", 

401 "BackendType", 

402 "get_global_selector", 

403 "get_system_capabilities", 

404 "select_backend", 

405]