Coverage for src / tracekit / core / gpu_backend.py: 59%

108 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""GPU acceleration backend with automatic numpy fallback. 

2 

3This module provides optional GPU acceleration using CuPy with seamless 

4fallback to NumPy when CuPy is unavailable or GPU processing is disabled. 

5 

6The GPU backend is lazy-initialized and memory-safe, automatically transferring 

7data to/from GPU as needed. GPU usage can be controlled via the environment 

8variable TRACEKIT_USE_GPU (0 to disable, 1 to enable). 

9 

10 

11Example: 

12 >>> from tracekit.core.gpu_backend import gpu 

13 >>> # Automatically uses GPU if available, numpy otherwise 

14 >>> freqs = gpu.fft(signal_data) 

15 >>> 

16 >>> # Force CPU-only operation 

17 >>> from tracekit.core.gpu_backend import GPUBackend 

18 >>> cpu_only = GPUBackend(force_cpu=True) 

19 >>> freqs = cpu_only.fft(signal_data) 

20 

21Configuration: 

22 Set TRACEKIT_USE_GPU environment variable to control GPU usage: 

23 - TRACEKIT_USE_GPU=0: Force CPU-only operation 

24 - TRACEKIT_USE_GPU=1: Enable GPU if available (default) 

25 

26References: 

27 - CuPy documentation: https://docs.cupy.dev/ 

28 - NumPy FFT module: https://numpy.org/doc/stable/reference/routines.fft.html 

29""" 

30 

31from __future__ import annotations 

32 

33import os 

34import warnings 

35from typing import TYPE_CHECKING, Any, Literal 

36 

37import numpy as np 

38 

39if TYPE_CHECKING: 

40 from numpy.typing import NDArray 

41 

42 

43class GPUBackend: 

44 """Optional GPU acceleration with transparent numpy fallback. 

45 

46 This class provides GPU-accelerated versions of common array operations 

47 with automatic fallback to NumPy when CuPy is unavailable or GPU is disabled. 

48 

49 GPU availability is checked lazily on first use, and data is automatically 

50 transferred between CPU and GPU as needed for transparent operation. 

51 

52 Args: 

53 force_cpu: If True, always use CPU (NumPy) even if GPU is available. 

54 Useful for testing or when GPU memory is limited. 

55 

56 Attributes: 

57 gpu_available: True if CuPy is available and GPU is enabled. 

58 using_gpu: True if currently using GPU backend (may differ from 

59 gpu_available if lazy initialization hasn't occurred yet). 

60 

61 Example: 

62 >>> backend = GPUBackend() 

63 >>> if backend.gpu_available: 

64 ... print("Using GPU acceleration") 

65 >>> else: 

66 ... print("Using CPU (NumPy) fallback") 

67 >>> 

68 >>> # All operations work identically regardless of backend 

69 >>> result = backend.fft(data) 

70 

71 References: 

72 PERF-001 through PERF-004: GPU acceleration requirements 

73 """ 

74 

75 def __init__(self, force_cpu: bool = False) -> None: 

76 """Initialize GPU backend with optional CPU-only mode. 

77 

78 Args: 

79 force_cpu: If True, never use GPU even if available. 

80 """ 

81 self._force_cpu = force_cpu 

82 self._gpu_available: bool | None = None 

83 self._cp: Any = None # CuPy module if available 

84 self._initialized = False 

85 

86 def _check_gpu(self) -> bool: 

87 """Check if GPU/CuPy is available and should be used. 

88 

89 This is called lazily on first operation to avoid import overhead 

90 when GPU is not needed. 

91 

92 Returns: 

93 True if GPU should be used, False to fall back to NumPy. 

94 """ 

95 if self._initialized: 

96 return self._gpu_available or False 

97 

98 self._initialized = True 

99 

100 # Check environment variable override 

101 use_gpu_env = os.environ.get("TRACEKIT_USE_GPU", "1") 

102 if use_gpu_env == "0" or self._force_cpu: 

103 self._gpu_available = False 

104 return False 

105 

106 # Try to import CuPy 

107 try: 

108 import cupy as cp # type: ignore[import-not-found] 

109 

110 # Verify GPU is actually accessible 

111 try: 

112 # Try a simple operation to verify GPU works 

113 _ = cp.array([1.0]) 

114 self._cp = cp 

115 self._gpu_available = True 

116 return True 

117 except Exception as e: 

118 warnings.warn( 

119 f"CuPy is installed but GPU is not accessible: {e}. Falling back to NumPy.", 

120 RuntimeWarning, 

121 stacklevel=2, 

122 ) 

123 self._gpu_available = False 

124 return False 

125 except ImportError: 

126 # CuPy not installed - silent fallback 

127 self._gpu_available = False 

128 return False 

129 

130 @property 

131 def gpu_available(self) -> bool: 

132 """Check if GPU acceleration is available. 

133 

134 Returns: 

135 True if CuPy is available and GPU can be used. 

136 """ 

137 if not self._initialized: 

138 self._check_gpu() 

139 return self._gpu_available or False 

140 

141 @property 

142 def using_gpu(self) -> bool: 

143 """Alias for gpu_available for backwards compatibility. 

144 

145 Returns: 

146 True if currently using GPU backend. 

147 """ 

148 return self.gpu_available 

149 

150 def _to_cpu(self, array: Any) -> NDArray[Any]: 

151 """Transfer array from GPU to CPU if needed. 

152 

153 Args: 

154 array: Array that may be on GPU or CPU. 

155 

156 Returns: 

157 NumPy array on CPU. 

158 """ 

159 if self.gpu_available and self._cp is not None: 

160 if isinstance(array, self._cp.ndarray): 

161 return self._cp.asnumpy(array) # type: ignore[no-any-return] 

162 return np.asarray(array) 

163 

164 def _to_gpu(self, array: NDArray[Any]) -> Any: 

165 """Transfer array from CPU to GPU if GPU is enabled. 

166 

167 Args: 

168 array: NumPy array on CPU. 

169 

170 Returns: 

171 CuPy array on GPU if GPU is available, otherwise NumPy array. 

172 """ 

173 if self.gpu_available and self._cp is not None: 

174 return self._cp.asarray(array) 

175 return array 

176 

177 def fft( 

178 self, 

179 data: NDArray[np.complex128] | NDArray[np.float64], 

180 n: int | None = None, 

181 axis: int = -1, 

182 norm: Literal["backward", "ortho", "forward"] | None = None, 

183 ) -> NDArray[np.complex128]: 

184 """GPU-accelerated FFT with automatic fallback to NumPy. 

185 

186 Computes the one-dimensional discrete Fourier Transform using GPU 

187 if available, otherwise falls back to NumPy. 

188 

189 Args: 

190 data: Input array (can be real or complex). 

191 n: Length of the transformed axis. If None, uses data.shape[axis]. 

192 axis: Axis over which to compute the FFT. 

193 norm: Normalization mode ("backward", "ortho", or "forward"). 

194 

195 Returns: 

196 Complex-valued FFT of the input array (always NumPy array on CPU). 

197 

198 Example: 

199 >>> signal = np.random.randn(1000) 

200 >>> spectrum = gpu.fft(signal) 

201 >>> # Result is always NumPy array, regardless of backend 

202 

203 References: 

204 SPE-001: Standard FFT Computation 

205 PERF-001: GPU-accelerated FFT 

206 """ 

207 if self._check_gpu() and self._cp is not None: 207 ↛ 209line 207 didn't jump to line 209 because the condition on line 207 was never true

208 # GPU path 

209 gpu_data = self._to_gpu(data) 

210 result = self._cp.fft.fft(gpu_data, n=n, axis=axis, norm=norm) 

211 return self._to_cpu(result) 

212 else: 

213 # CPU fallback 

214 return np.fft.fft(data, n=n, axis=axis, norm=norm) 

215 

216 def ifft( 

217 self, 

218 data: NDArray[np.complex128], 

219 n: int | None = None, 

220 axis: int = -1, 

221 norm: Literal["backward", "ortho", "forward"] | None = None, 

222 ) -> NDArray[np.complex128]: 

223 """GPU-accelerated inverse FFT with automatic fallback. 

224 

225 Computes the one-dimensional inverse discrete Fourier Transform. 

226 

227 Args: 

228 data: Input complex array. 

229 n: Length of the transformed axis. If None, uses data.shape[axis]. 

230 axis: Axis over which to compute the IFFT. 

231 norm: Normalization mode ("backward", "ortho", or "forward"). 

232 

233 Returns: 

234 Complex-valued IFFT of the input array (always NumPy array on CPU). 

235 

236 Example: 

237 >>> spectrum = np.fft.fft(signal) 

238 >>> recovered = gpu.ifft(spectrum) 

239 

240 References: 

241 SPE-001: Standard FFT Computation 

242 PERF-001: GPU-accelerated FFT 

243 """ 

244 if self._check_gpu() and self._cp is not None: 244 ↛ 246line 244 didn't jump to line 246 because the condition on line 244 was never true

245 # GPU path 

246 gpu_data = self._to_gpu(data) 

247 result = self._cp.fft.ifft(gpu_data, n=n, axis=axis, norm=norm) 

248 return self._to_cpu(result) 

249 else: 

250 # CPU fallback 

251 return np.fft.ifft(data, n=n, axis=axis, norm=norm) 

252 

253 def rfft( 

254 self, 

255 data: NDArray[np.float64], 

256 n: int | None = None, 

257 axis: int = -1, 

258 norm: Literal["backward", "ortho", "forward"] | None = None, 

259 ) -> NDArray[np.complex128]: 

260 """GPU-accelerated real FFT with automatic fallback. 

261 

262 Computes the one-dimensional FFT of real-valued input, returning 

263 only the positive frequency components (memory efficient). 

264 

265 Args: 

266 data: Input real-valued array. 

267 n: Length of the transformed axis. If None, uses data.shape[axis]. 

268 axis: Axis over which to compute the FFT. 

269 norm: Normalization mode ("backward", "ortho", or "forward"). 

270 

271 Returns: 

272 Complex-valued FFT (positive frequencies only) on CPU. 

273 

274 Example: 

275 >>> signal = np.random.randn(1000) 

276 >>> spectrum = gpu.rfft(signal) 

277 >>> # Result has length n//2 + 1 

278 

279 References: 

280 SPE-001: Standard FFT Computation 

281 PERF-001: GPU-accelerated FFT 

282 """ 

283 if self._check_gpu() and self._cp is not None: 283 ↛ 285line 283 didn't jump to line 285 because the condition on line 283 was never true

284 # GPU path 

285 gpu_data = self._to_gpu(data) 

286 result = self._cp.fft.rfft(gpu_data, n=n, axis=axis, norm=norm) 

287 return self._to_cpu(result) 

288 else: 

289 # CPU fallback 

290 return np.fft.rfft(data, n=n, axis=axis, norm=norm) 

291 

292 def irfft( 

293 self, 

294 data: NDArray[np.complex128], 

295 n: int | None = None, 

296 axis: int = -1, 

297 norm: Literal["backward", "ortho", "forward"] | None = None, 

298 ) -> NDArray[np.float64]: 

299 """GPU-accelerated inverse real FFT with automatic fallback. 

300 

301 Computes the inverse FFT of rfft, returning real-valued output. 

302 

303 Args: 

304 data: Input complex array (from rfft). 

305 n: Length of output. If None, uses (data.shape[axis] - 1) * 2. 

306 axis: Axis over which to compute the IFFT. 

307 norm: Normalization mode ("backward", "ortho", or "forward"). 

308 

309 Returns: 

310 Real-valued IFFT on CPU. 

311 

312 Example: 

313 >>> spectrum = gpu.rfft(signal) 

314 >>> recovered = gpu.irfft(spectrum) 

315 

316 References: 

317 SPE-001: Standard FFT Computation 

318 PERF-001: GPU-accelerated FFT 

319 """ 

320 if self._check_gpu() and self._cp is not None: 320 ↛ 322line 320 didn't jump to line 322 because the condition on line 320 was never true

321 # GPU path 

322 gpu_data = self._to_gpu(data) 

323 result = self._cp.fft.irfft(gpu_data, n=n, axis=axis, norm=norm) 

324 return self._to_cpu(result) 

325 else: 

326 # CPU fallback 

327 return np.fft.irfft(data, n=n, axis=axis, norm=norm) 

328 

329 def convolve( 

330 self, 

331 data: NDArray[np.float64], 

332 kernel: NDArray[np.float64], 

333 mode: Literal["full", "valid", "same"] = "full", 

334 ) -> NDArray[np.float64]: 

335 """GPU-accelerated convolution with automatic fallback. 

336 

337 Computes the discrete linear convolution of data with kernel. 

338 Uses FFT-based convolution for efficiency on large arrays. 

339 

340 Args: 

341 data: Input signal array. 

342 kernel: Convolution kernel (filter coefficients). 

343 mode: Convolution mode: 

344 - "full": Full convolution (length N + M - 1) 

345 - "valid": Only where data and kernel fully overlap 

346 - "same": Same length as data (centered) 

347 

348 Returns: 

349 Convolved array on CPU. 

350 

351 Example: 

352 >>> signal = np.random.randn(1000) 

353 >>> kernel = np.array([0.25, 0.5, 0.25]) # Simple smoothing 

354 >>> smoothed = gpu.convolve(signal, kernel, mode="same") 

355 

356 References: 

357 PERF-002: GPU-accelerated convolution 

358 """ 

359 if self._check_gpu() and self._cp is not None: 359 ↛ 361line 359 didn't jump to line 361 because the condition on line 359 was never true

360 # GPU path 

361 gpu_data = self._to_gpu(data) 

362 gpu_kernel = self._to_gpu(kernel) 

363 result = self._cp.convolve(gpu_data, gpu_kernel, mode=mode) 

364 return self._to_cpu(result) 

365 else: 

366 # CPU fallback 

367 return np.convolve(data, kernel, mode=mode) 

368 

369 def correlate( 

370 self, 

371 a: NDArray[np.float64], 

372 v: NDArray[np.float64], 

373 mode: Literal["full", "valid", "same"] = "full", 

374 ) -> NDArray[np.float64]: 

375 """GPU-accelerated correlation with automatic fallback. 

376 

377 Computes the cross-correlation of two 1-dimensional sequences. 

378 

379 Args: 

380 a: First input sequence. 

381 v: Second input sequence. 

382 mode: Correlation mode ("full", "valid", or "same"). 

383 

384 Returns: 

385 Cross-correlation on CPU. 

386 

387 Example: 

388 >>> signal = np.random.randn(1000) 

389 >>> template = signal[100:200] 

390 >>> corr = gpu.correlate(signal, template, mode="valid") 

391 >>> # Find best match location 

392 >>> match_idx = np.argmax(corr) 

393 

394 References: 

395 PERF-003: GPU-accelerated pattern matching 

396 """ 

397 if self._check_gpu() and self._cp is not None: 397 ↛ 399line 397 didn't jump to line 399 because the condition on line 397 was never true

398 # GPU path 

399 gpu_a = self._to_gpu(a) 

400 gpu_v = self._to_gpu(v) 

401 result = self._cp.correlate(gpu_a, gpu_v, mode=mode) 

402 return self._to_cpu(result) 

403 else: 

404 # CPU fallback 

405 return np.correlate(a, v, mode=mode) 

406 

407 def histogram( 

408 self, 

409 data: NDArray[np.float64], 

410 bins: int | NDArray[np.float64] = 10, 

411 range: tuple[float, float] | None = None, 

412 density: bool = False, 

413 ) -> tuple[NDArray[np.float64], NDArray[np.float64]]: 

414 """GPU-accelerated histogram with automatic fallback. 

415 

416 Computes the histogram of a dataset, useful for statistical analysis 

417 and signal quality metrics. 

418 

419 Args: 

420 data: Input data array. 

421 bins: Number of bins or array of bin edges. 

422 range: Lower and upper range of bins. If None, uses (data.min(), data.max()). 

423 density: If True, return probability density instead of counts. 

424 

425 Returns: 

426 Tuple of (counts, bin_edges) both on CPU. 

427 

428 Example: 

429 >>> signal = np.random.randn(10000) 

430 >>> counts, edges = gpu.histogram(signal, bins=100) 

431 >>> # Plot histogram 

432 >>> plt.bar(edges[:-1], counts, width=np.diff(edges)) 

433 

434 References: 

435 PERF-004: GPU-accelerated histogram computation 

436 """ 

437 if self._check_gpu() and self._cp is not None: 437 ↛ 439line 437 didn't jump to line 439 because the condition on line 437 was never true

438 # GPU path 

439 gpu_data = self._to_gpu(data) 

440 # Transfer bins to GPU if it's an array (not just int) 

441 gpu_bins = self._to_gpu(bins) if isinstance(bins, np.ndarray) else bins 

442 counts, edges = self._cp.histogram( 

443 gpu_data, bins=gpu_bins, range=range, density=density 

444 ) 

445 return self._to_cpu(counts), self._to_cpu(edges) 

446 else: 

447 # CPU fallback 

448 return np.histogram(data, bins=bins, range=range, density=density) # type: ignore[no-any-return] 

449 

450 def dot( 

451 self, 

452 a: NDArray[np.float64], 

453 b: NDArray[np.float64], 

454 ) -> NDArray[np.float64] | np.float64: 

455 """GPU-accelerated dot product with automatic fallback. 

456 

457 Computes the dot product of two arrays, useful for correlation 

458 and pattern matching operations. 

459 

460 Args: 

461 a: First array. 

462 b: Second array. 

463 

464 Returns: 

465 Dot product on CPU (scalar or array depending on input dimensions). 

466 

467 Example: 

468 >>> a = np.random.randn(1000) 

469 >>> b = np.random.randn(1000) 

470 >>> similarity = gpu.dot(a, b) 

471 

472 References: 

473 PERF-003: GPU-accelerated matrix operations 

474 """ 

475 if self._check_gpu() and self._cp is not None: 475 ↛ 477line 475 didn't jump to line 477 because the condition on line 475 was never true

476 # GPU path 

477 gpu_a = self._to_gpu(a) 

478 gpu_b = self._to_gpu(b) 

479 result = self._cp.dot(gpu_a, gpu_b) 

480 return self._to_cpu(result) # type: ignore[return-value] 

481 else: 

482 # CPU fallback 

483 return np.dot(a, b) # type: ignore[return-value, no-any-return] 

484 

485 def matmul( 

486 self, 

487 a: NDArray[np.float64], 

488 b: NDArray[np.float64], 

489 ) -> NDArray[np.float64]: 

490 """GPU-accelerated matrix multiplication with automatic fallback. 

491 

492 Computes the matrix product of two arrays. 

493 

494 Args: 

495 a: First matrix. 

496 b: Second matrix. 

497 

498 Returns: 

499 Matrix product on CPU. 

500 

501 Example: 

502 >>> A = np.random.randn(100, 50) 

503 >>> B = np.random.randn(50, 100) 

504 >>> C = gpu.matmul(A, B) 

505 

506 References: 

507 PERF-003: GPU-accelerated matrix operations 

508 """ 

509 if self._check_gpu() and self._cp is not None: 509 ↛ 511line 509 didn't jump to line 511 because the condition on line 509 was never true

510 # GPU path 

511 gpu_a = self._to_gpu(a) 

512 gpu_b = self._to_gpu(b) 

513 result = self._cp.matmul(gpu_a, gpu_b) 

514 return self._to_cpu(result) # type: ignore[return-value] 

515 else: 

516 # CPU fallback 

517 return np.matmul(a, b) # type: ignore[return-value, no-any-return] 

518 

519 

520# Module-level singleton for convenient access 

521gpu = GPUBackend() 

522 

523__all__ = ["GPUBackend", "gpu"]