Coverage for src / tracekit / loaders / __init__.py: 80%

143 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""TraceKit data loaders for various file formats. 

2 

3This module provides a unified load() function that auto-detects file formats 

4and delegates to the appropriate loader. 

5 

6 

7Example: 

8 >>> import tracekit as tk 

9 >>> trace = tk.load("capture.wfm") 

10 >>> print(f"Loaded {len(trace.data)} samples") 

11 

12 >>> # Load all channels from multi-channel file 

13 >>> channels = tk.load_all_channels("multi_channel.wfm") 

14 >>> for name, trace in channels.items(): 

15 ... print(f"{name}: {len(trace.data)} samples") 

16""" 

17 

18from __future__ import annotations 

19 

20import logging 

21import warnings 

22from pathlib import Path 

23from typing import TYPE_CHECKING, Any 

24 

25from tracekit.core.exceptions import LoaderError, UnsupportedFormatError 

26from tracekit.core.types import DigitalTrace, WaveformTrace 

27 

28# Import alias modules for DSL compatibility 

29from tracekit.loaders import ( 

30 binary, 

31 csv, 

32 hdf5, 

33) 

34 

35# Import configurable binary loading functionality 

36from tracekit.loaders.configurable import ( 

37 BitfieldDef, 

38 BitfieldExtractor, 

39 ConfigurablePacketLoader, 

40 DeviceConfig, 

41 DeviceInfo, 

42 DeviceMapper, 

43 HeaderFieldDef, 

44 PacketFormatConfig, 

45 ParsedPacket, 

46 SampleFormatDef, 

47 detect_source_type, 

48 extract_channels, 

49 load_binary_packets, 

50 load_packets_streaming, 

51) 

52from tracekit.loaders.lazy import LazyWaveformTrace, load_trace_lazy 

53from tracekit.loaders.preprocessing import ( 

54 IdleRegion, 

55 IdleStatistics, 

56 IdleStats, 

57 detect_idle_regions, 

58 get_idle_statistics, 

59 trim_idle, 

60) 

61from tracekit.loaders.validation import ( 

62 PacketValidator, 

63 SequenceGap, 

64 SequenceValidation, 

65 ValidationResult, 

66 ValidationStats, 

67) 

68 

69if TYPE_CHECKING: 

70 from os import PathLike 

71 

72 from tracekit.core.types import Trace 

73 

74# Logger for debug output 

75logger = logging.getLogger(__name__) 

76 

77# Supported format extensions mapped to loader names 

78SUPPORTED_FORMATS: dict[str, str] = { 

79 ".wfm": "auto_wfm", # Auto-detect Tektronix vs Rigol 

80 ".npz": "numpy", 

81 ".csv": "csv", 

82 ".h5": "hdf5", 

83 ".hdf5": "hdf5", 

84 ".sr": "sigrok", 

85 ".pcap": "pcap", 

86 ".pcapng": "pcap", 

87 ".wav": "wav", 

88 ".vcd": "vcd", 

89 ".tdms": "tdms", 

90 # Touchstone S-parameter formats 

91 ".s1p": "touchstone", 

92 ".s2p": "touchstone", 

93 ".s3p": "touchstone", 

94 ".s4p": "touchstone", 

95 ".s5p": "touchstone", 

96 ".s6p": "touchstone", 

97 ".s7p": "touchstone", 

98 ".s8p": "touchstone", 

99} 

100 

101# File size warning threshold for lazy loading suggestion (100 MB) 

102LARGE_FILE_WARNING_THRESHOLD = 100 * 1024 * 1024 

103 

104 

105def load( 

106 path: str | PathLike[str], 

107 *, 

108 format: str | None = None, 

109 channel: str | int | None = None, 

110 lazy: bool = False, 

111 **kwargs: Any, 

112) -> Trace: 

113 """Load trace data from file with automatic format detection. 

114 

115 This is the primary entry point for loading oscilloscope and logic 

116 analyzer data. The file format is auto-detected from the extension 

117 unless explicitly specified. 

118 

119 Supports both analog waveforms (WaveformTrace) and digital waveforms 

120 (DigitalTrace) from mixed-signal oscilloscopes. 

121 

122 Args: 

123 path: Path to the file to load. 

124 format: Optional format override (e.g., "tektronix", "rigol", "csv"). 

125 If not specified, format is auto-detected from file extension. 

126 channel: Optional channel name or index for multi-channel files. 

127 lazy: If True, use lazy loading for huge files (see load_lazy). 

128 **kwargs: Additional arguments passed to the specific loader. 

129 

130 Returns: 

131 WaveformTrace or DigitalTrace depending on the file content. 

132 

133 Raises: 

134 UnsupportedFormatError: If the file format is not recognized. 

135 FileNotFoundError: If the file does not exist. 

136 

137 Example: 

138 >>> import tracekit as tk 

139 >>> trace = tk.load("oscilloscope_capture.wfm") 

140 >>> print(f"Loaded {len(trace.data)} samples at {trace.metadata.sample_rate} Hz") 

141 

142 >>> # Force specific loader 

143 >>> trace = tk.load("data.bin", format="tektronix") 

144 

145 >>> # Check if digital trace 

146 >>> if isinstance(trace, DigitalTrace): 

147 ... print("Loaded digital waveform") 

148 """ 

149 path = Path(path) 

150 

151 if not path.exists(): 

152 raise FileNotFoundError(f"File not found: {path}") 

153 

154 # Check file size and warn for large files 

155 file_size = path.stat().st_size 

156 if file_size > LARGE_FILE_WARNING_THRESHOLD and not lazy: 

157 warnings.warn( 

158 f"File is large ({file_size / 1024 / 1024:.1f} MB). " 

159 "Consider using lazy=True for better memory efficiency.", 

160 stacklevel=2, 

161 ) 

162 

163 # Handle lazy loading request 

164 if lazy: 

165 return load_lazy(path, **kwargs) # type: ignore[return-value] 

166 

167 # Determine format 

168 if format is not None: 

169 loader_name = format.lower() 

170 else: 

171 ext = path.suffix.lower() 

172 if ext not in SUPPORTED_FORMATS: 

173 raise UnsupportedFormatError( 

174 ext, 

175 list(SUPPORTED_FORMATS.keys()), 

176 file_path=str(path), 

177 ) 

178 loader_name = SUPPORTED_FORMATS[ext] 

179 

180 # Dispatch to appropriate loader 

181 if loader_name == "auto_wfm": 

182 return _load_wfm_auto(path, channel=channel, **kwargs) 

183 elif loader_name in ("tektronix", "tek"): 

184 from tracekit.loaders.tektronix import load_tektronix_wfm 

185 

186 return load_tektronix_wfm(path, **kwargs) 

187 elif loader_name == "rigol": 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true

188 from tracekit.loaders.rigol import load_rigol_wfm 

189 

190 return load_rigol_wfm(path, **kwargs) 

191 elif loader_name == "numpy": 

192 from tracekit.loaders.numpy_loader import load_npz 

193 

194 return load_npz(path, channel=channel, **kwargs) 

195 elif loader_name == "csv": 

196 from tracekit.loaders.csv_loader import load_csv 

197 

198 return load_csv(path, **kwargs) # type: ignore[return-value] 

199 elif loader_name == "hdf5": 

200 from tracekit.loaders.hdf5_loader import load_hdf5 

201 

202 return load_hdf5(path, channel=channel, **kwargs) # type: ignore[return-value] 

203 elif loader_name == "sigrok": 

204 from tracekit.loaders.sigrok import load_sigrok 

205 

206 return load_sigrok(path, channel=channel, **kwargs) 

207 elif loader_name == "vcd": 

208 from tracekit.loaders.vcd import load_vcd 

209 

210 return load_vcd(path, **kwargs) 

211 elif loader_name == "pcap": 

212 from tracekit.loaders.pcap import load_pcap 

213 

214 return load_pcap(path, **kwargs) # type: ignore[return-value] 

215 elif loader_name == "wav": 

216 from tracekit.loaders.wav import load_wav 

217 

218 return load_wav(path, channel=channel, **kwargs) 

219 elif loader_name == "tdms": 

220 from tracekit.loaders.tdms import load_tdms 

221 

222 return load_tdms(path, channel=channel, **kwargs) 

223 elif loader_name == "touchstone": 223 ↛ 224line 223 didn't jump to line 224 because the condition on line 223 was never true

224 from tracekit.analyzers.signal_integrity.sparams import load_touchstone 

225 

226 return load_touchstone(path) # type: ignore[return-value] 

227 else: 

228 raise UnsupportedFormatError( 

229 loader_name, 

230 list(SUPPORTED_FORMATS.keys()), 

231 file_path=str(path), 

232 ) 

233 

234 

235def _load_wfm_auto( 

236 path: Path, 

237 *, 

238 channel: str | int | None = None, 

239 **kwargs: Any, 

240) -> Trace: 

241 """Auto-detect WFM format (Tektronix vs Rigol) and load. 

242 

243 Distinguishes between Tektronix and Rigol WFM formats by examining 

244 the file's magic bytes. 

245 

246 Args: 

247 path: Path to the .wfm file. 

248 channel: Optional channel for multi-channel files. 

249 **kwargs: Additional arguments for the loader. 

250 

251 Returns: 

252 WaveformTrace or DigitalTrace from the detected loader. 

253 

254 Raises: 

255 LoaderError: If the WFM format cannot be determined. 

256 """ 

257 # Read first bytes to detect format 

258 try: 

259 with open(path, "rb") as f: 

260 magic = f.read(32) 

261 except OSError as e: 

262 raise LoaderError( 

263 "Failed to read file for format detection", 

264 file_path=str(path), 

265 details=str(e), 

266 ) from e 

267 

268 # Tektronix WFM files typically start with specific patterns 

269 # Rigol files have different magic bytes 

270 # This is a simplified detection - real implementation would be more robust 

271 

272 # Check for Rigol signature (often starts with certain patterns) 

273 if magic[:4] in (b"\x00\x00\x01\x00", b"RIGOL"): 

274 from tracekit.loaders.rigol import load_rigol_wfm 

275 

276 return load_rigol_wfm(path, **kwargs) 

277 

278 # Default to Tektronix 

279 from tracekit.loaders.tektronix import load_tektronix_wfm 

280 

281 return load_tektronix_wfm(path, **kwargs) 

282 

283 

284def load_all_channels( 

285 path: str | PathLike[str], 

286 *, 

287 format: str | None = None, 

288) -> dict[str, WaveformTrace | DigitalTrace]: 

289 """Load all channels from a multi-channel waveform file. 

290 

291 Reads the file once and extracts all available channels (both analog 

292 and digital). This is more efficient than loading each channel 

293 separately when you need multiple channels. 

294 

295 Args: 

296 path: Path to the multi-channel waveform file. 

297 format: Optional format override (e.g., "tektronix", "rigol"). 

298 

299 Returns: 

300 Dictionary mapping channel names to traces. 

301 Analog channels are named "ch1", "ch2", etc. 

302 Digital channels are named "d1", "d2", etc. 

303 

304 Raises: 

305 UnsupportedFormatError: If the file format is not recognized. 

306 FileNotFoundError: If the file does not exist. 

307 

308 Example: 

309 >>> import tracekit as tk 

310 >>> channels = tk.load_all_channels("multi_channel.wfm") 

311 >>> for name, trace in channels.items(): 

312 ... print(f"{name}: {len(trace.data)} samples") 

313 ch1: 10000 samples 

314 ch2: 10000 samples 

315 d1: 10000 samples 

316 

317 >>> # Access specific channel 

318 >>> analog_ch1 = channels["ch1"] 

319 >>> digital_d1 = channels["d1"] 

320 """ 

321 path = Path(path) 

322 

323 if not path.exists(): 

324 raise FileNotFoundError(f"File not found: {path}") 

325 

326 # Determine format 

327 if format is not None: 

328 loader_name = format.lower() 

329 else: 

330 ext = path.suffix.lower() 

331 if ext not in SUPPORTED_FORMATS: 

332 raise UnsupportedFormatError( 

333 ext, 

334 list(SUPPORTED_FORMATS.keys()), 

335 file_path=str(path), 

336 ) 

337 loader_name = SUPPORTED_FORMATS[ext] 

338 

339 # Currently only supports Tektronix WFM for multi-channel loading 

340 if loader_name in ("auto_wfm", "tektronix", "tek"): 

341 return _load_all_channels_tektronix(path) 

342 else: 

343 # For other formats, try loading as single channel 

344 trace = load(path, format=format) 

345 channel_name = getattr(trace.metadata, "channel_name", None) or "ch1" 

346 return {channel_name: trace} # type: ignore[dict-item] 

347 

348 

349def _load_all_channels_tektronix( 

350 path: Path, 

351) -> dict[str, WaveformTrace | DigitalTrace]: 

352 """Load all channels from a Tektronix WFM file. 

353 

354 Args: 

355 path: Path to the Tektronix .wfm file. 

356 

357 Returns: 

358 Dictionary mapping channel names to traces. 

359 

360 Raises: 

361 LoaderError: If the file cannot be read or parsed. 

362 """ 

363 try: 

364 import tm_data_types # type: ignore[import-not-found, import-untyped] 

365 except ImportError: 

366 # Fall back to single channel loading 

367 trace = load(path, format="tektronix") 

368 channel_name = getattr(trace.metadata, "channel_name", None) or "ch1" 

369 return {channel_name: trace} # type: ignore[dict-item] 

370 

371 try: 

372 wfm = tm_data_types.read_file(str(path)) 

373 except Exception as e: 

374 raise LoaderError( 

375 "Failed to read Tektronix WFM file", 

376 file_path=str(path), 

377 details=str(e), 

378 ) from e 

379 

380 channels: dict[str, WaveformTrace | DigitalTrace] = {} 

381 

382 # Extract analog waveforms 

383 if hasattr(wfm, "analog_waveforms") and wfm.analog_waveforms: 383 ↛ 384line 383 didn't jump to line 384 because the condition on line 383 was never true

384 import numpy as np 

385 

386 from tracekit.loaders.tektronix import _build_waveform_trace 

387 

388 for i, awfm in enumerate(wfm.analog_waveforms): 

389 try: 

390 data = np.array(awfm.y_data, dtype=np.float64) 

391 x_increment = getattr(awfm, "x_increment", 1e-6) 

392 sample_rate = 1.0 / x_increment if x_increment > 0 else 1e6 

393 vertical_scale = getattr(awfm, "y_scale", None) 

394 vertical_offset = getattr(awfm, "y_offset", None) 

395 channel_name = getattr(awfm, "name", f"CH{i + 1}") 

396 

397 trace = _build_waveform_trace( 

398 data=data, 

399 sample_rate=sample_rate, 

400 vertical_scale=vertical_scale, 

401 vertical_offset=vertical_offset, 

402 channel_name=channel_name, 

403 path=path, 

404 wfm=awfm, 

405 ) 

406 channels[f"ch{i + 1}"] = trace 

407 except Exception as e: 

408 logger.warning("Failed to extract analog channel %d: %s", i + 1, e) 

409 

410 # Extract digital waveforms 

411 if hasattr(wfm, "digital_waveforms") and wfm.digital_waveforms: 411 ↛ 412line 411 didn't jump to line 412 because the condition on line 411 was never true

412 from tracekit.loaders.tektronix import _load_digital_waveform 

413 

414 for i, dwfm in enumerate(wfm.digital_waveforms): 

415 try: 

416 trace = _load_digital_waveform(dwfm, path, i) 

417 channels[f"d{i + 1}"] = trace 

418 except Exception as e: 

419 logger.warning("Failed to extract digital channel %d: %s", i + 1, e) 

420 

421 # Handle direct waveform formats (single file = single channel) 

422 if not channels: 422 ↛ 438line 422 didn't jump to line 438 because the condition on line 422 was always true

423 wfm_type = type(wfm).__name__ 

424 

425 if wfm_type == "DigitalWaveform" or hasattr(wfm, "y_axis_byte_values"): 425 ↛ 426line 425 didn't jump to line 426 because the condition on line 425 was never true

426 from tracekit.loaders.tektronix import _load_digital_waveform 

427 

428 trace = _load_digital_waveform(wfm, path, 0) 

429 channel_name = trace.metadata.channel_name or "d1" 

430 channels[channel_name.lower()] = trace 

431 

432 elif hasattr(wfm, "y_axis_values") or hasattr(wfm, "y_data"): 

433 # Direct analog waveform 

434 trace = load(path, format="tektronix") 

435 channel_name = trace.metadata.channel_name or "ch1" 

436 channels[channel_name.lower()] = trace # type: ignore[assignment] 

437 

438 if not channels: 

439 raise LoaderError( 

440 "No channels found in file", 

441 file_path=str(path), 

442 fix_hint="File may be empty or use an unsupported format variant.", 

443 ) 

444 

445 return channels 

446 

447 

448def get_supported_formats() -> list[str]: 

449 """Get list of supported file formats. 

450 

451 Returns: 

452 List of supported file extensions. 

453 

454 Example: 

455 >>> from tracekit.loaders import get_supported_formats 

456 >>> print(get_supported_formats()) 

457 ['.wfm', '.npz', '.csv', '.h5', ...] 

458 """ 

459 return list(SUPPORTED_FORMATS.keys()) 

460 

461 

462def load_lazy(path: str | PathLike[str], **kwargs: Any) -> LazyWaveformTrace | WaveformTrace: 

463 """Load trace with lazy loading for huge files. 

464 

465 Convenience wrapper for lazy loading. See load_trace_lazy for details. 

466 

467 Args: 

468 path: Path to the file. 

469 **kwargs: Additional arguments (sample_rate, lazy=True, etc.). 

470 

471 Returns: 

472 LazyWaveformTrace or WaveformTrace. 

473 

474 Example: 

475 >>> trace = tk.loaders.load_lazy("huge_trace.npy", sample_rate=1e9) 

476 >>> print(f"Length: {trace.length}") # Metadata available immediately 

477 

478 References: 

479 API-017: Lazy Loading for Huge Files 

480 """ 

481 from tracekit.loaders.lazy import load_trace_lazy 

482 

483 return load_trace_lazy(path, **kwargs) # type: ignore[arg-type] 

484 

485 

486__all__ = [ 

487 "LARGE_FILE_WARNING_THRESHOLD", 

488 "SUPPORTED_FORMATS", 

489 # Configurable binary loading 

490 "BitfieldDef", 

491 "BitfieldExtractor", 

492 "ConfigurablePacketLoader", 

493 "DeviceConfig", 

494 "DeviceInfo", 

495 "DeviceMapper", 

496 "DigitalTrace", 

497 "HeaderFieldDef", 

498 "IdleRegion", 

499 "IdleStatistics", 

500 "IdleStats", 

501 "LazyWaveformTrace", 

502 "PacketFormatConfig", 

503 "PacketValidator", 

504 "ParsedPacket", 

505 "SampleFormatDef", 

506 "SequenceGap", 

507 "SequenceValidation", 

508 "ValidationResult", 

509 "ValidationStats", 

510 "WaveformTrace", 

511 "binary", 

512 "csv", 

513 "detect_idle_regions", 

514 "detect_source_type", 

515 "extract_channels", 

516 "get_idle_statistics", 

517 "get_supported_formats", 

518 "hdf5", 

519 "load", 

520 "load_all_channels", 

521 "load_binary_packets", 

522 "load_lazy", 

523 "load_packets_streaming", 

524 "load_trace_lazy", 

525 "trim_idle", 

526]