Coverage for src / tracekit / core / backend_selector.py: 0%
109 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Automatic backend selection for optimal performance.
3This module provides intelligent backend selection based on data characteristics,
4available hardware, and performance requirements. Automatically chooses between
5NumPy, Numba, GPU (CuPy), and distributed (Dask) backends.
7Usage:
8 from tracekit.core.backend_selector import BackendSelector, select_backend
10 selector = BackendSelector()
11 backend = selector.select_for_fft(signal_size=10_000_000)
12 # Returns 'gpu' if available, else 'scipy'
14Performance decision tree:
15 - Small data (<100K): NumPy/SciPy
16 - Medium data (100K-10M): Numba JIT
17 - Large data (>10M): GPU if available, else Numba
18 - Huge data (>1GB): Dask distributed
20Example:
21 >>> from tracekit.core.backend_selector import select_backend
22 >>> import numpy as np
23 >>>
24 >>> data = np.random.randn(50_000_000)
25 >>> backend = select_backend('fft', data_size=len(data))
26 >>> print(f"Selected backend: {backend}") # 'gpu' or 'scipy'
27"""
29from __future__ import annotations
31from dataclasses import dataclass
32from typing import Literal
34import numpy as np
35import psutil
37# Check available backends
38try:
39 from tracekit.core.gpu_backend import gpu
41 HAS_GPU = gpu.gpu_available
42except (ImportError, AttributeError):
43 HAS_GPU = False
45try:
46 import numba # type: ignore[import-untyped]
48 HAS_NUMBA = True
49 del numba
50except ImportError:
51 HAS_NUMBA = False
53try:
54 import dask.array # type: ignore[import-not-found]
56 HAS_DASK = True
57 del dask
58except ImportError:
59 HAS_DASK = False
61try:
62 import scipy.fft
64 HAS_SCIPY = True
65 del scipy
66except ImportError:
67 HAS_SCIPY = False
70BackendType = Literal["numpy", "scipy", "numba", "gpu", "dask"]
73@dataclass
74class BackendCapabilities:
75 """Available backend capabilities on this system.
77 Attributes:
78 has_gpu: Whether GPU (CuPy) is available.
79 has_numba: Whether Numba JIT is available.
80 has_dask: Whether Dask distributed is available.
81 has_scipy: Whether SciPy is available.
82 cpu_count: Number of CPU cores.
83 total_memory_gb: Total system RAM in GB.
84 gpu_memory_gb: GPU memory in GB (0 if no GPU).
85 """
87 has_gpu: bool
88 has_numba: bool
89 has_dask: bool
90 has_scipy: bool
91 cpu_count: int
92 total_memory_gb: float
93 gpu_memory_gb: float
96def get_system_capabilities() -> BackendCapabilities:
97 """Detect available backends and system resources.
99 Returns:
100 BackendCapabilities object with system information.
102 Example:
103 >>> caps = get_system_capabilities()
104 >>> if caps.has_gpu:
105 ... print(f"GPU available with {caps.gpu_memory_gb:.1f} GB memory")
106 """
107 # CPU and memory info
108 cpu_count = psutil.cpu_count(logical=False) or 1
109 total_memory = psutil.virtual_memory().total
110 total_memory_gb = total_memory / (1024**3)
112 # GPU memory
113 gpu_memory_gb = 0.0
114 if HAS_GPU:
115 try:
116 from tracekit.core.gpu_backend import gpu
118 # Get GPU memory in bytes, convert to GB
119 gpu_memory_gb = gpu.get_memory_info()[1] / (1024**3) # type: ignore[attr-defined]
120 except Exception:
121 gpu_memory_gb = 0.0
123 return BackendCapabilities(
124 has_gpu=HAS_GPU,
125 has_numba=HAS_NUMBA,
126 has_dask=HAS_DASK,
127 has_scipy=HAS_SCIPY,
128 cpu_count=cpu_count,
129 total_memory_gb=total_memory_gb,
130 gpu_memory_gb=gpu_memory_gb,
131 )
134class BackendSelector:
135 """Intelligent backend selector for optimal performance.
137 This class analyzes data characteristics and system capabilities to
138 automatically select the best backend for each operation.
140 Example:
141 >>> selector = BackendSelector()
142 >>> # For FFT on 50M samples
143 >>> backend = selector.select_for_fft(50_000_000)
144 >>> # For edge detection with hysteresis
145 >>> backend = selector.select_for_edge_detection(1_000_000, has_hysteresis=True)
146 """
148 def __init__(self) -> None:
149 """Initialize backend selector with system capabilities."""
150 self.capabilities = get_system_capabilities()
152 def select_for_fft(
153 self,
154 data_size: int,
155 dtype: type = np.float64,
156 ) -> BackendType:
157 """Select optimal backend for FFT operations.
159 Args:
160 data_size: Number of samples in signal.
161 dtype: Data type (affects memory usage).
163 Returns:
164 BackendType: 'numpy', 'scipy', 'gpu', or 'dask'.
166 Example:
167 >>> selector = BackendSelector()
168 >>> backend = selector.select_for_fft(10_000_000)
169 """
170 # Decision tree based on data size
171 if data_size > 100_000_000 and self.capabilities.has_dask:
172 # Huge data: use distributed
173 return "dask"
174 elif data_size > 10_000_000 and self.capabilities.has_gpu:
175 # Large data + GPU: use GPU
176 return "gpu"
177 elif self.capabilities.has_scipy:
178 # Use scipy.fft with workers (faster than numpy.fft)
179 return "scipy"
180 else:
181 # Fallback to numpy
182 return "numpy"
184 def select_for_edge_detection(
185 self,
186 data_size: int,
187 has_hysteresis: bool = False,
188 ) -> BackendType:
189 """Select optimal backend for edge detection.
191 Args:
192 data_size: Number of samples in signal.
193 has_hysteresis: Whether hysteresis is used (affects vectorization).
195 Returns:
196 BackendType: 'numpy', 'numba', or 'gpu'.
198 Example:
199 >>> selector = BackendSelector()
200 >>> backend = selector.select_for_edge_detection(5_000_000, has_hysteresis=True)
201 """
202 if data_size > 10_000_000 and self.capabilities.has_gpu:
203 return "gpu"
204 elif has_hysteresis and self.capabilities.has_numba and data_size > 100_000:
205 return "numba"
206 elif has_hysteresis:
207 return "numpy" # Actually uses Python for hysteresis state machine
208 else:
209 return "numpy" # Vectorized without hysteresis
211 def select_for_correlation(
212 self,
213 signal1_size: int,
214 signal2_size: int,
215 mode: Literal["full", "valid", "same"] = "full",
216 ) -> BackendType:
217 """Select optimal backend for correlation.
219 Args:
220 signal1_size: Size of first signal.
221 signal2_size: Size of second signal.
222 mode: Correlation mode.
224 Returns:
225 Backend name.
227 Example:
228 >>> selector = BackendSelector()
229 >>> backend = selector.select_for_correlation(1_000_000, 10_000)
230 """
231 total_size = signal1_size + signal2_size
232 output_size = self._estimate_correlation_output(signal1_size, signal2_size, mode)
234 # Estimate memory
235 total_memory_mb = (total_size + output_size) * 8 / (1024**2)
237 if total_memory_mb > self.capabilities.total_memory_gb * 1024 * 0.5:
238 # Would use >50% RAM: use chunked/streaming
239 return "dask" if self.capabilities.has_dask else "numpy"
240 elif signal1_size > 10_000_000 and self.capabilities.has_gpu:
241 return "gpu"
242 elif self.capabilities.has_scipy:
243 return "scipy"
244 else:
245 return "numpy"
247 def select_for_protocol_decode(
248 self,
249 data_size: int,
250 protocol: str,
251 ) -> BackendType:
252 """Select optimal backend for protocol decoding.
254 Args:
255 data_size: Number of samples in signal.
256 protocol: Protocol name (e.g., 'uart', 'spi', 'i2c').
258 Returns:
259 Backend name.
261 Example:
262 >>> selector = BackendSelector()
263 >>> backend = selector.select_for_protocol_decode(5_000_000, 'uart')
264 """
265 # Protocol decoders use edge detection + state machines
266 # Large signals benefit from Numba-compiled state machines
267 if data_size > 1_000_000 and self.capabilities.has_numba:
268 return "numba"
269 else:
270 return "numpy"
272 def select_for_pattern_matching(
273 self,
274 data_size: int,
275 pattern_count: int,
276 approximate: bool = False,
277 ) -> BackendType:
278 """Select optimal backend for pattern matching.
280 Args:
281 data_size: Size of data to search.
282 pattern_count: Number of patterns.
283 approximate: Whether approximate matching is acceptable.
285 Returns:
286 Backend name.
288 Example:
289 >>> selector = BackendSelector()
290 >>> backend = selector.select_for_pattern_matching(1_000_000, 100, approximate=True)
291 """
292 # For approximate matching with many patterns, LSH is best
293 # Otherwise use standard string matching
294 if approximate and pattern_count > 10:
295 return "numpy" # LSH implementation in NumPy
296 elif data_size > 10_000_000:
297 return "numba"
298 else:
299 return "numpy"
301 def _estimate_correlation_output(
302 self,
303 size1: int,
304 size2: int,
305 mode: Literal["full", "valid", "same"],
306 ) -> int:
307 """Estimate output size of correlation.
309 Args:
310 size1: Size of first signal.
311 size2: Size of second signal.
312 mode: Correlation mode.
314 Returns:
315 Estimated output size in samples.
316 """
317 if mode == "full":
318 return size1 + size2 - 1
319 elif mode == "valid":
320 return max(size1, size2) - min(size1, size2) + 1
321 else: # same
322 return max(size1, size2)
325# Global selector instance
326_global_selector: BackendSelector | None = None
329def get_global_selector() -> BackendSelector:
330 """Get global backend selector instance (singleton).
332 Returns:
333 Global BackendSelector instance.
335 Example:
336 >>> selector = get_global_selector()
337 >>> backend = selector.select_for_fft(1_000_000)
338 """
339 global _global_selector
340 if _global_selector is None:
341 _global_selector = BackendSelector()
342 return _global_selector
345def select_backend(
346 operation: Literal[
347 "fft", "edge_detection", "correlation", "protocol_decode", "pattern_matching"
348 ],
349 **kwargs: int | str | bool,
350) -> BackendType:
351 """Convenience function to select backend for an operation.
353 Args:
354 operation: Type of operation.
355 **kwargs: Operation-specific parameters.
357 Returns:
358 Selected backend name.
360 Example:
361 >>> backend = select_backend('fft', data_size=10_000_000)
362 >>> backend = select_backend('edge_detection', data_size=5_000_000, has_hysteresis=True)
363 >>> backend = select_backend('correlation', signal1_size=1_000_000, signal2_size=10_000)
364 """
365 selector = get_global_selector()
367 if operation == "fft":
368 return selector.select_for_fft(
369 data_size=int(kwargs.get("data_size", 0)),
370 dtype=kwargs.get("dtype", np.float64), # type: ignore[arg-type]
371 )
372 elif operation == "edge_detection":
373 return selector.select_for_edge_detection(
374 data_size=int(kwargs.get("data_size", 0)),
375 has_hysteresis=bool(kwargs.get("has_hysteresis", False)),
376 )
377 elif operation == "correlation":
378 return selector.select_for_correlation(
379 signal1_size=int(kwargs.get("signal1_size", 0)),
380 signal2_size=int(kwargs.get("signal2_size", 0)),
381 mode=kwargs.get("mode", "full"), # type: ignore[arg-type]
382 )
383 elif operation == "protocol_decode":
384 return selector.select_for_protocol_decode(
385 data_size=int(kwargs.get("data_size", 0)),
386 protocol=str(kwargs.get("protocol", "")),
387 )
388 elif operation == "pattern_matching":
389 return selector.select_for_pattern_matching(
390 data_size=int(kwargs.get("data_size", 0)),
391 pattern_count=int(kwargs.get("pattern_count", 0)),
392 approximate=bool(kwargs.get("approximate", False)),
393 )
394 else:
395 return "numpy" # type: ignore[unreachable]
398__all__ = [
399 "BackendCapabilities",
400 "BackendSelector",
401 "BackendType",
402 "get_global_selector",
403 "get_system_capabilities",
404 "select_backend",
405]