Coverage for src / tracekit / core / gpu_backend.py: 59%
108 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""GPU acceleration backend with automatic numpy fallback.
3This module provides optional GPU acceleration using CuPy with seamless
4fallback to NumPy when CuPy is unavailable or GPU processing is disabled.
6The GPU backend is lazy-initialized and memory-safe, automatically transferring
7data to/from GPU as needed. GPU usage can be controlled via the environment
8variable TRACEKIT_USE_GPU (0 to disable, 1 to enable).
11Example:
12 >>> from tracekit.core.gpu_backend import gpu
13 >>> # Automatically uses GPU if available, numpy otherwise
14 >>> freqs = gpu.fft(signal_data)
15 >>>
16 >>> # Force CPU-only operation
17 >>> from tracekit.core.gpu_backend import GPUBackend
18 >>> cpu_only = GPUBackend(force_cpu=True)
19 >>> freqs = cpu_only.fft(signal_data)
21Configuration:
22 Set TRACEKIT_USE_GPU environment variable to control GPU usage:
23 - TRACEKIT_USE_GPU=0: Force CPU-only operation
24 - TRACEKIT_USE_GPU=1: Enable GPU if available (default)
26References:
27 - CuPy documentation: https://docs.cupy.dev/
28 - NumPy FFT module: https://numpy.org/doc/stable/reference/routines.fft.html
29"""
31from __future__ import annotations
33import os
34import warnings
35from typing import TYPE_CHECKING, Any, Literal
37import numpy as np
39if TYPE_CHECKING:
40 from numpy.typing import NDArray
43class GPUBackend:
44 """Optional GPU acceleration with transparent numpy fallback.
46 This class provides GPU-accelerated versions of common array operations
47 with automatic fallback to NumPy when CuPy is unavailable or GPU is disabled.
49 GPU availability is checked lazily on first use, and data is automatically
50 transferred between CPU and GPU as needed for transparent operation.
52 Args:
53 force_cpu: If True, always use CPU (NumPy) even if GPU is available.
54 Useful for testing or when GPU memory is limited.
56 Attributes:
57 gpu_available: True if CuPy is available and GPU is enabled.
58 using_gpu: True if currently using GPU backend (may differ from
59 gpu_available if lazy initialization hasn't occurred yet).
61 Example:
62 >>> backend = GPUBackend()
63 >>> if backend.gpu_available:
64 ... print("Using GPU acceleration")
65 >>> else:
66 ... print("Using CPU (NumPy) fallback")
67 >>>
68 >>> # All operations work identically regardless of backend
69 >>> result = backend.fft(data)
71 References:
72 PERF-001 through PERF-004: GPU acceleration requirements
73 """
75 def __init__(self, force_cpu: bool = False) -> None:
76 """Initialize GPU backend with optional CPU-only mode.
78 Args:
79 force_cpu: If True, never use GPU even if available.
80 """
81 self._force_cpu = force_cpu
82 self._gpu_available: bool | None = None
83 self._cp: Any = None # CuPy module if available
84 self._initialized = False
86 def _check_gpu(self) -> bool:
87 """Check if GPU/CuPy is available and should be used.
89 This is called lazily on first operation to avoid import overhead
90 when GPU is not needed.
92 Returns:
93 True if GPU should be used, False to fall back to NumPy.
94 """
95 if self._initialized:
96 return self._gpu_available or False
98 self._initialized = True
100 # Check environment variable override
101 use_gpu_env = os.environ.get("TRACEKIT_USE_GPU", "1")
102 if use_gpu_env == "0" or self._force_cpu:
103 self._gpu_available = False
104 return False
106 # Try to import CuPy
107 try:
108 import cupy as cp # type: ignore[import-not-found]
110 # Verify GPU is actually accessible
111 try:
112 # Try a simple operation to verify GPU works
113 _ = cp.array([1.0])
114 self._cp = cp
115 self._gpu_available = True
116 return True
117 except Exception as e:
118 warnings.warn(
119 f"CuPy is installed but GPU is not accessible: {e}. Falling back to NumPy.",
120 RuntimeWarning,
121 stacklevel=2,
122 )
123 self._gpu_available = False
124 return False
125 except ImportError:
126 # CuPy not installed - silent fallback
127 self._gpu_available = False
128 return False
130 @property
131 def gpu_available(self) -> bool:
132 """Check if GPU acceleration is available.
134 Returns:
135 True if CuPy is available and GPU can be used.
136 """
137 if not self._initialized:
138 self._check_gpu()
139 return self._gpu_available or False
141 @property
142 def using_gpu(self) -> bool:
143 """Alias for gpu_available for backwards compatibility.
145 Returns:
146 True if currently using GPU backend.
147 """
148 return self.gpu_available
150 def _to_cpu(self, array: Any) -> NDArray[Any]:
151 """Transfer array from GPU to CPU if needed.
153 Args:
154 array: Array that may be on GPU or CPU.
156 Returns:
157 NumPy array on CPU.
158 """
159 if self.gpu_available and self._cp is not None:
160 if isinstance(array, self._cp.ndarray):
161 return self._cp.asnumpy(array) # type: ignore[no-any-return]
162 return np.asarray(array)
164 def _to_gpu(self, array: NDArray[Any]) -> Any:
165 """Transfer array from CPU to GPU if GPU is enabled.
167 Args:
168 array: NumPy array on CPU.
170 Returns:
171 CuPy array on GPU if GPU is available, otherwise NumPy array.
172 """
173 if self.gpu_available and self._cp is not None:
174 return self._cp.asarray(array)
175 return array
177 def fft(
178 self,
179 data: NDArray[np.complex128] | NDArray[np.float64],
180 n: int | None = None,
181 axis: int = -1,
182 norm: Literal["backward", "ortho", "forward"] | None = None,
183 ) -> NDArray[np.complex128]:
184 """GPU-accelerated FFT with automatic fallback to NumPy.
186 Computes the one-dimensional discrete Fourier Transform using GPU
187 if available, otherwise falls back to NumPy.
189 Args:
190 data: Input array (can be real or complex).
191 n: Length of the transformed axis. If None, uses data.shape[axis].
192 axis: Axis over which to compute the FFT.
193 norm: Normalization mode ("backward", "ortho", or "forward").
195 Returns:
196 Complex-valued FFT of the input array (always NumPy array on CPU).
198 Example:
199 >>> signal = np.random.randn(1000)
200 >>> spectrum = gpu.fft(signal)
201 >>> # Result is always NumPy array, regardless of backend
203 References:
204 SPE-001: Standard FFT Computation
205 PERF-001: GPU-accelerated FFT
206 """
207 if self._check_gpu() and self._cp is not None: 207 ↛ 209line 207 didn't jump to line 209 because the condition on line 207 was never true
208 # GPU path
209 gpu_data = self._to_gpu(data)
210 result = self._cp.fft.fft(gpu_data, n=n, axis=axis, norm=norm)
211 return self._to_cpu(result)
212 else:
213 # CPU fallback
214 return np.fft.fft(data, n=n, axis=axis, norm=norm)
216 def ifft(
217 self,
218 data: NDArray[np.complex128],
219 n: int | None = None,
220 axis: int = -1,
221 norm: Literal["backward", "ortho", "forward"] | None = None,
222 ) -> NDArray[np.complex128]:
223 """GPU-accelerated inverse FFT with automatic fallback.
225 Computes the one-dimensional inverse discrete Fourier Transform.
227 Args:
228 data: Input complex array.
229 n: Length of the transformed axis. If None, uses data.shape[axis].
230 axis: Axis over which to compute the IFFT.
231 norm: Normalization mode ("backward", "ortho", or "forward").
233 Returns:
234 Complex-valued IFFT of the input array (always NumPy array on CPU).
236 Example:
237 >>> spectrum = np.fft.fft(signal)
238 >>> recovered = gpu.ifft(spectrum)
240 References:
241 SPE-001: Standard FFT Computation
242 PERF-001: GPU-accelerated FFT
243 """
244 if self._check_gpu() and self._cp is not None: 244 ↛ 246line 244 didn't jump to line 246 because the condition on line 244 was never true
245 # GPU path
246 gpu_data = self._to_gpu(data)
247 result = self._cp.fft.ifft(gpu_data, n=n, axis=axis, norm=norm)
248 return self._to_cpu(result)
249 else:
250 # CPU fallback
251 return np.fft.ifft(data, n=n, axis=axis, norm=norm)
253 def rfft(
254 self,
255 data: NDArray[np.float64],
256 n: int | None = None,
257 axis: int = -1,
258 norm: Literal["backward", "ortho", "forward"] | None = None,
259 ) -> NDArray[np.complex128]:
260 """GPU-accelerated real FFT with automatic fallback.
262 Computes the one-dimensional FFT of real-valued input, returning
263 only the positive frequency components (memory efficient).
265 Args:
266 data: Input real-valued array.
267 n: Length of the transformed axis. If None, uses data.shape[axis].
268 axis: Axis over which to compute the FFT.
269 norm: Normalization mode ("backward", "ortho", or "forward").
271 Returns:
272 Complex-valued FFT (positive frequencies only) on CPU.
274 Example:
275 >>> signal = np.random.randn(1000)
276 >>> spectrum = gpu.rfft(signal)
277 >>> # Result has length n//2 + 1
279 References:
280 SPE-001: Standard FFT Computation
281 PERF-001: GPU-accelerated FFT
282 """
283 if self._check_gpu() and self._cp is not None: 283 ↛ 285line 283 didn't jump to line 285 because the condition on line 283 was never true
284 # GPU path
285 gpu_data = self._to_gpu(data)
286 result = self._cp.fft.rfft(gpu_data, n=n, axis=axis, norm=norm)
287 return self._to_cpu(result)
288 else:
289 # CPU fallback
290 return np.fft.rfft(data, n=n, axis=axis, norm=norm)
292 def irfft(
293 self,
294 data: NDArray[np.complex128],
295 n: int | None = None,
296 axis: int = -1,
297 norm: Literal["backward", "ortho", "forward"] | None = None,
298 ) -> NDArray[np.float64]:
299 """GPU-accelerated inverse real FFT with automatic fallback.
301 Computes the inverse FFT of rfft, returning real-valued output.
303 Args:
304 data: Input complex array (from rfft).
305 n: Length of output. If None, uses (data.shape[axis] - 1) * 2.
306 axis: Axis over which to compute the IFFT.
307 norm: Normalization mode ("backward", "ortho", or "forward").
309 Returns:
310 Real-valued IFFT on CPU.
312 Example:
313 >>> spectrum = gpu.rfft(signal)
314 >>> recovered = gpu.irfft(spectrum)
316 References:
317 SPE-001: Standard FFT Computation
318 PERF-001: GPU-accelerated FFT
319 """
320 if self._check_gpu() and self._cp is not None: 320 ↛ 322line 320 didn't jump to line 322 because the condition on line 320 was never true
321 # GPU path
322 gpu_data = self._to_gpu(data)
323 result = self._cp.fft.irfft(gpu_data, n=n, axis=axis, norm=norm)
324 return self._to_cpu(result)
325 else:
326 # CPU fallback
327 return np.fft.irfft(data, n=n, axis=axis, norm=norm)
329 def convolve(
330 self,
331 data: NDArray[np.float64],
332 kernel: NDArray[np.float64],
333 mode: Literal["full", "valid", "same"] = "full",
334 ) -> NDArray[np.float64]:
335 """GPU-accelerated convolution with automatic fallback.
337 Computes the discrete linear convolution of data with kernel.
338 Uses FFT-based convolution for efficiency on large arrays.
340 Args:
341 data: Input signal array.
342 kernel: Convolution kernel (filter coefficients).
343 mode: Convolution mode:
344 - "full": Full convolution (length N + M - 1)
345 - "valid": Only where data and kernel fully overlap
346 - "same": Same length as data (centered)
348 Returns:
349 Convolved array on CPU.
351 Example:
352 >>> signal = np.random.randn(1000)
353 >>> kernel = np.array([0.25, 0.5, 0.25]) # Simple smoothing
354 >>> smoothed = gpu.convolve(signal, kernel, mode="same")
356 References:
357 PERF-002: GPU-accelerated convolution
358 """
359 if self._check_gpu() and self._cp is not None: 359 ↛ 361line 359 didn't jump to line 361 because the condition on line 359 was never true
360 # GPU path
361 gpu_data = self._to_gpu(data)
362 gpu_kernel = self._to_gpu(kernel)
363 result = self._cp.convolve(gpu_data, gpu_kernel, mode=mode)
364 return self._to_cpu(result)
365 else:
366 # CPU fallback
367 return np.convolve(data, kernel, mode=mode)
369 def correlate(
370 self,
371 a: NDArray[np.float64],
372 v: NDArray[np.float64],
373 mode: Literal["full", "valid", "same"] = "full",
374 ) -> NDArray[np.float64]:
375 """GPU-accelerated correlation with automatic fallback.
377 Computes the cross-correlation of two 1-dimensional sequences.
379 Args:
380 a: First input sequence.
381 v: Second input sequence.
382 mode: Correlation mode ("full", "valid", or "same").
384 Returns:
385 Cross-correlation on CPU.
387 Example:
388 >>> signal = np.random.randn(1000)
389 >>> template = signal[100:200]
390 >>> corr = gpu.correlate(signal, template, mode="valid")
391 >>> # Find best match location
392 >>> match_idx = np.argmax(corr)
394 References:
395 PERF-003: GPU-accelerated pattern matching
396 """
397 if self._check_gpu() and self._cp is not None: 397 ↛ 399line 397 didn't jump to line 399 because the condition on line 397 was never true
398 # GPU path
399 gpu_a = self._to_gpu(a)
400 gpu_v = self._to_gpu(v)
401 result = self._cp.correlate(gpu_a, gpu_v, mode=mode)
402 return self._to_cpu(result)
403 else:
404 # CPU fallback
405 return np.correlate(a, v, mode=mode)
407 def histogram(
408 self,
409 data: NDArray[np.float64],
410 bins: int | NDArray[np.float64] = 10,
411 range: tuple[float, float] | None = None,
412 density: bool = False,
413 ) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
414 """GPU-accelerated histogram with automatic fallback.
416 Computes the histogram of a dataset, useful for statistical analysis
417 and signal quality metrics.
419 Args:
420 data: Input data array.
421 bins: Number of bins or array of bin edges.
422 range: Lower and upper range of bins. If None, uses (data.min(), data.max()).
423 density: If True, return probability density instead of counts.
425 Returns:
426 Tuple of (counts, bin_edges) both on CPU.
428 Example:
429 >>> signal = np.random.randn(10000)
430 >>> counts, edges = gpu.histogram(signal, bins=100)
431 >>> # Plot histogram
432 >>> plt.bar(edges[:-1], counts, width=np.diff(edges))
434 References:
435 PERF-004: GPU-accelerated histogram computation
436 """
437 if self._check_gpu() and self._cp is not None: 437 ↛ 439line 437 didn't jump to line 439 because the condition on line 437 was never true
438 # GPU path
439 gpu_data = self._to_gpu(data)
440 # Transfer bins to GPU if it's an array (not just int)
441 gpu_bins = self._to_gpu(bins) if isinstance(bins, np.ndarray) else bins
442 counts, edges = self._cp.histogram(
443 gpu_data, bins=gpu_bins, range=range, density=density
444 )
445 return self._to_cpu(counts), self._to_cpu(edges)
446 else:
447 # CPU fallback
448 return np.histogram(data, bins=bins, range=range, density=density) # type: ignore[no-any-return]
450 def dot(
451 self,
452 a: NDArray[np.float64],
453 b: NDArray[np.float64],
454 ) -> NDArray[np.float64] | np.float64:
455 """GPU-accelerated dot product with automatic fallback.
457 Computes the dot product of two arrays, useful for correlation
458 and pattern matching operations.
460 Args:
461 a: First array.
462 b: Second array.
464 Returns:
465 Dot product on CPU (scalar or array depending on input dimensions).
467 Example:
468 >>> a = np.random.randn(1000)
469 >>> b = np.random.randn(1000)
470 >>> similarity = gpu.dot(a, b)
472 References:
473 PERF-003: GPU-accelerated matrix operations
474 """
475 if self._check_gpu() and self._cp is not None: 475 ↛ 477line 475 didn't jump to line 477 because the condition on line 475 was never true
476 # GPU path
477 gpu_a = self._to_gpu(a)
478 gpu_b = self._to_gpu(b)
479 result = self._cp.dot(gpu_a, gpu_b)
480 return self._to_cpu(result) # type: ignore[return-value]
481 else:
482 # CPU fallback
483 return np.dot(a, b) # type: ignore[return-value, no-any-return]
485 def matmul(
486 self,
487 a: NDArray[np.float64],
488 b: NDArray[np.float64],
489 ) -> NDArray[np.float64]:
490 """GPU-accelerated matrix multiplication with automatic fallback.
492 Computes the matrix product of two arrays.
494 Args:
495 a: First matrix.
496 b: Second matrix.
498 Returns:
499 Matrix product on CPU.
501 Example:
502 >>> A = np.random.randn(100, 50)
503 >>> B = np.random.randn(50, 100)
504 >>> C = gpu.matmul(A, B)
506 References:
507 PERF-003: GPU-accelerated matrix operations
508 """
509 if self._check_gpu() and self._cp is not None: 509 ↛ 511line 509 didn't jump to line 511 because the condition on line 509 was never true
510 # GPU path
511 gpu_a = self._to_gpu(a)
512 gpu_b = self._to_gpu(b)
513 result = self._cp.matmul(gpu_a, gpu_b)
514 return self._to_cpu(result) # type: ignore[return-value]
515 else:
516 # CPU fallback
517 return np.matmul(a, b) # type: ignore[return-value, no-any-return]
520# Module-level singleton for convenient access
521gpu = GPUBackend()
523__all__ = ["GPUBackend", "gpu"]