Coverage for src / tracekit / loaders / __init__.py: 80%
143 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""TraceKit data loaders for various file formats.
3This module provides a unified load() function that auto-detects file formats
4and delegates to the appropriate loader.
7Example:
8 >>> import tracekit as tk
9 >>> trace = tk.load("capture.wfm")
10 >>> print(f"Loaded {len(trace.data)} samples")
12 >>> # Load all channels from multi-channel file
13 >>> channels = tk.load_all_channels("multi_channel.wfm")
14 >>> for name, trace in channels.items():
15 ... print(f"{name}: {len(trace.data)} samples")
16"""
18from __future__ import annotations
20import logging
21import warnings
22from pathlib import Path
23from typing import TYPE_CHECKING, Any
25from tracekit.core.exceptions import LoaderError, UnsupportedFormatError
26from tracekit.core.types import DigitalTrace, WaveformTrace
28# Import alias modules for DSL compatibility
29from tracekit.loaders import (
30 binary,
31 csv,
32 hdf5,
33)
35# Import configurable binary loading functionality
36from tracekit.loaders.configurable import (
37 BitfieldDef,
38 BitfieldExtractor,
39 ConfigurablePacketLoader,
40 DeviceConfig,
41 DeviceInfo,
42 DeviceMapper,
43 HeaderFieldDef,
44 PacketFormatConfig,
45 ParsedPacket,
46 SampleFormatDef,
47 detect_source_type,
48 extract_channels,
49 load_binary_packets,
50 load_packets_streaming,
51)
52from tracekit.loaders.lazy import LazyWaveformTrace, load_trace_lazy
53from tracekit.loaders.preprocessing import (
54 IdleRegion,
55 IdleStatistics,
56 IdleStats,
57 detect_idle_regions,
58 get_idle_statistics,
59 trim_idle,
60)
61from tracekit.loaders.validation import (
62 PacketValidator,
63 SequenceGap,
64 SequenceValidation,
65 ValidationResult,
66 ValidationStats,
67)
69if TYPE_CHECKING:
70 from os import PathLike
72 from tracekit.core.types import Trace
74# Logger for debug output
75logger = logging.getLogger(__name__)
77# Supported format extensions mapped to loader names
78SUPPORTED_FORMATS: dict[str, str] = {
79 ".wfm": "auto_wfm", # Auto-detect Tektronix vs Rigol
80 ".npz": "numpy",
81 ".csv": "csv",
82 ".h5": "hdf5",
83 ".hdf5": "hdf5",
84 ".sr": "sigrok",
85 ".pcap": "pcap",
86 ".pcapng": "pcap",
87 ".wav": "wav",
88 ".vcd": "vcd",
89 ".tdms": "tdms",
90 # Touchstone S-parameter formats
91 ".s1p": "touchstone",
92 ".s2p": "touchstone",
93 ".s3p": "touchstone",
94 ".s4p": "touchstone",
95 ".s5p": "touchstone",
96 ".s6p": "touchstone",
97 ".s7p": "touchstone",
98 ".s8p": "touchstone",
99}
101# File size warning threshold for lazy loading suggestion (100 MB)
102LARGE_FILE_WARNING_THRESHOLD = 100 * 1024 * 1024
105def load(
106 path: str | PathLike[str],
107 *,
108 format: str | None = None,
109 channel: str | int | None = None,
110 lazy: bool = False,
111 **kwargs: Any,
112) -> Trace:
113 """Load trace data from file with automatic format detection.
115 This is the primary entry point for loading oscilloscope and logic
116 analyzer data. The file format is auto-detected from the extension
117 unless explicitly specified.
119 Supports both analog waveforms (WaveformTrace) and digital waveforms
120 (DigitalTrace) from mixed-signal oscilloscopes.
122 Args:
123 path: Path to the file to load.
124 format: Optional format override (e.g., "tektronix", "rigol", "csv").
125 If not specified, format is auto-detected from file extension.
126 channel: Optional channel name or index for multi-channel files.
127 lazy: If True, use lazy loading for huge files (see load_lazy).
128 **kwargs: Additional arguments passed to the specific loader.
130 Returns:
131 WaveformTrace or DigitalTrace depending on the file content.
133 Raises:
134 UnsupportedFormatError: If the file format is not recognized.
135 FileNotFoundError: If the file does not exist.
137 Example:
138 >>> import tracekit as tk
139 >>> trace = tk.load("oscilloscope_capture.wfm")
140 >>> print(f"Loaded {len(trace.data)} samples at {trace.metadata.sample_rate} Hz")
142 >>> # Force specific loader
143 >>> trace = tk.load("data.bin", format="tektronix")
145 >>> # Check if digital trace
146 >>> if isinstance(trace, DigitalTrace):
147 ... print("Loaded digital waveform")
148 """
149 path = Path(path)
151 if not path.exists():
152 raise FileNotFoundError(f"File not found: {path}")
154 # Check file size and warn for large files
155 file_size = path.stat().st_size
156 if file_size > LARGE_FILE_WARNING_THRESHOLD and not lazy:
157 warnings.warn(
158 f"File is large ({file_size / 1024 / 1024:.1f} MB). "
159 "Consider using lazy=True for better memory efficiency.",
160 stacklevel=2,
161 )
163 # Handle lazy loading request
164 if lazy:
165 return load_lazy(path, **kwargs) # type: ignore[return-value]
167 # Determine format
168 if format is not None:
169 loader_name = format.lower()
170 else:
171 ext = path.suffix.lower()
172 if ext not in SUPPORTED_FORMATS:
173 raise UnsupportedFormatError(
174 ext,
175 list(SUPPORTED_FORMATS.keys()),
176 file_path=str(path),
177 )
178 loader_name = SUPPORTED_FORMATS[ext]
180 # Dispatch to appropriate loader
181 if loader_name == "auto_wfm":
182 return _load_wfm_auto(path, channel=channel, **kwargs)
183 elif loader_name in ("tektronix", "tek"):
184 from tracekit.loaders.tektronix import load_tektronix_wfm
186 return load_tektronix_wfm(path, **kwargs)
187 elif loader_name == "rigol": 187 ↛ 188line 187 didn't jump to line 188 because the condition on line 187 was never true
188 from tracekit.loaders.rigol import load_rigol_wfm
190 return load_rigol_wfm(path, **kwargs)
191 elif loader_name == "numpy":
192 from tracekit.loaders.numpy_loader import load_npz
194 return load_npz(path, channel=channel, **kwargs)
195 elif loader_name == "csv":
196 from tracekit.loaders.csv_loader import load_csv
198 return load_csv(path, **kwargs) # type: ignore[return-value]
199 elif loader_name == "hdf5":
200 from tracekit.loaders.hdf5_loader import load_hdf5
202 return load_hdf5(path, channel=channel, **kwargs) # type: ignore[return-value]
203 elif loader_name == "sigrok":
204 from tracekit.loaders.sigrok import load_sigrok
206 return load_sigrok(path, channel=channel, **kwargs)
207 elif loader_name == "vcd":
208 from tracekit.loaders.vcd import load_vcd
210 return load_vcd(path, **kwargs)
211 elif loader_name == "pcap":
212 from tracekit.loaders.pcap import load_pcap
214 return load_pcap(path, **kwargs) # type: ignore[return-value]
215 elif loader_name == "wav":
216 from tracekit.loaders.wav import load_wav
218 return load_wav(path, channel=channel, **kwargs)
219 elif loader_name == "tdms":
220 from tracekit.loaders.tdms import load_tdms
222 return load_tdms(path, channel=channel, **kwargs)
223 elif loader_name == "touchstone": 223 ↛ 224line 223 didn't jump to line 224 because the condition on line 223 was never true
224 from tracekit.analyzers.signal_integrity.sparams import load_touchstone
226 return load_touchstone(path) # type: ignore[return-value]
227 else:
228 raise UnsupportedFormatError(
229 loader_name,
230 list(SUPPORTED_FORMATS.keys()),
231 file_path=str(path),
232 )
235def _load_wfm_auto(
236 path: Path,
237 *,
238 channel: str | int | None = None,
239 **kwargs: Any,
240) -> Trace:
241 """Auto-detect WFM format (Tektronix vs Rigol) and load.
243 Distinguishes between Tektronix and Rigol WFM formats by examining
244 the file's magic bytes.
246 Args:
247 path: Path to the .wfm file.
248 channel: Optional channel for multi-channel files.
249 **kwargs: Additional arguments for the loader.
251 Returns:
252 WaveformTrace or DigitalTrace from the detected loader.
254 Raises:
255 LoaderError: If the WFM format cannot be determined.
256 """
257 # Read first bytes to detect format
258 try:
259 with open(path, "rb") as f:
260 magic = f.read(32)
261 except OSError as e:
262 raise LoaderError(
263 "Failed to read file for format detection",
264 file_path=str(path),
265 details=str(e),
266 ) from e
268 # Tektronix WFM files typically start with specific patterns
269 # Rigol files have different magic bytes
270 # This is a simplified detection - real implementation would be more robust
272 # Check for Rigol signature (often starts with certain patterns)
273 if magic[:4] in (b"\x00\x00\x01\x00", b"RIGOL"):
274 from tracekit.loaders.rigol import load_rigol_wfm
276 return load_rigol_wfm(path, **kwargs)
278 # Default to Tektronix
279 from tracekit.loaders.tektronix import load_tektronix_wfm
281 return load_tektronix_wfm(path, **kwargs)
284def load_all_channels(
285 path: str | PathLike[str],
286 *,
287 format: str | None = None,
288) -> dict[str, WaveformTrace | DigitalTrace]:
289 """Load all channels from a multi-channel waveform file.
291 Reads the file once and extracts all available channels (both analog
292 and digital). This is more efficient than loading each channel
293 separately when you need multiple channels.
295 Args:
296 path: Path to the multi-channel waveform file.
297 format: Optional format override (e.g., "tektronix", "rigol").
299 Returns:
300 Dictionary mapping channel names to traces.
301 Analog channels are named "ch1", "ch2", etc.
302 Digital channels are named "d1", "d2", etc.
304 Raises:
305 UnsupportedFormatError: If the file format is not recognized.
306 FileNotFoundError: If the file does not exist.
308 Example:
309 >>> import tracekit as tk
310 >>> channels = tk.load_all_channels("multi_channel.wfm")
311 >>> for name, trace in channels.items():
312 ... print(f"{name}: {len(trace.data)} samples")
313 ch1: 10000 samples
314 ch2: 10000 samples
315 d1: 10000 samples
317 >>> # Access specific channel
318 >>> analog_ch1 = channels["ch1"]
319 >>> digital_d1 = channels["d1"]
320 """
321 path = Path(path)
323 if not path.exists():
324 raise FileNotFoundError(f"File not found: {path}")
326 # Determine format
327 if format is not None:
328 loader_name = format.lower()
329 else:
330 ext = path.suffix.lower()
331 if ext not in SUPPORTED_FORMATS:
332 raise UnsupportedFormatError(
333 ext,
334 list(SUPPORTED_FORMATS.keys()),
335 file_path=str(path),
336 )
337 loader_name = SUPPORTED_FORMATS[ext]
339 # Currently only supports Tektronix WFM for multi-channel loading
340 if loader_name in ("auto_wfm", "tektronix", "tek"):
341 return _load_all_channels_tektronix(path)
342 else:
343 # For other formats, try loading as single channel
344 trace = load(path, format=format)
345 channel_name = getattr(trace.metadata, "channel_name", None) or "ch1"
346 return {channel_name: trace} # type: ignore[dict-item]
349def _load_all_channels_tektronix(
350 path: Path,
351) -> dict[str, WaveformTrace | DigitalTrace]:
352 """Load all channels from a Tektronix WFM file.
354 Args:
355 path: Path to the Tektronix .wfm file.
357 Returns:
358 Dictionary mapping channel names to traces.
360 Raises:
361 LoaderError: If the file cannot be read or parsed.
362 """
363 try:
364 import tm_data_types # type: ignore[import-not-found, import-untyped]
365 except ImportError:
366 # Fall back to single channel loading
367 trace = load(path, format="tektronix")
368 channel_name = getattr(trace.metadata, "channel_name", None) or "ch1"
369 return {channel_name: trace} # type: ignore[dict-item]
371 try:
372 wfm = tm_data_types.read_file(str(path))
373 except Exception as e:
374 raise LoaderError(
375 "Failed to read Tektronix WFM file",
376 file_path=str(path),
377 details=str(e),
378 ) from e
380 channels: dict[str, WaveformTrace | DigitalTrace] = {}
382 # Extract analog waveforms
383 if hasattr(wfm, "analog_waveforms") and wfm.analog_waveforms: 383 ↛ 384line 383 didn't jump to line 384 because the condition on line 383 was never true
384 import numpy as np
386 from tracekit.loaders.tektronix import _build_waveform_trace
388 for i, awfm in enumerate(wfm.analog_waveforms):
389 try:
390 data = np.array(awfm.y_data, dtype=np.float64)
391 x_increment = getattr(awfm, "x_increment", 1e-6)
392 sample_rate = 1.0 / x_increment if x_increment > 0 else 1e6
393 vertical_scale = getattr(awfm, "y_scale", None)
394 vertical_offset = getattr(awfm, "y_offset", None)
395 channel_name = getattr(awfm, "name", f"CH{i + 1}")
397 trace = _build_waveform_trace(
398 data=data,
399 sample_rate=sample_rate,
400 vertical_scale=vertical_scale,
401 vertical_offset=vertical_offset,
402 channel_name=channel_name,
403 path=path,
404 wfm=awfm,
405 )
406 channels[f"ch{i + 1}"] = trace
407 except Exception as e:
408 logger.warning("Failed to extract analog channel %d: %s", i + 1, e)
410 # Extract digital waveforms
411 if hasattr(wfm, "digital_waveforms") and wfm.digital_waveforms: 411 ↛ 412line 411 didn't jump to line 412 because the condition on line 411 was never true
412 from tracekit.loaders.tektronix import _load_digital_waveform
414 for i, dwfm in enumerate(wfm.digital_waveforms):
415 try:
416 trace = _load_digital_waveform(dwfm, path, i)
417 channels[f"d{i + 1}"] = trace
418 except Exception as e:
419 logger.warning("Failed to extract digital channel %d: %s", i + 1, e)
421 # Handle direct waveform formats (single file = single channel)
422 if not channels: 422 ↛ 438line 422 didn't jump to line 438 because the condition on line 422 was always true
423 wfm_type = type(wfm).__name__
425 if wfm_type == "DigitalWaveform" or hasattr(wfm, "y_axis_byte_values"): 425 ↛ 426line 425 didn't jump to line 426 because the condition on line 425 was never true
426 from tracekit.loaders.tektronix import _load_digital_waveform
428 trace = _load_digital_waveform(wfm, path, 0)
429 channel_name = trace.metadata.channel_name or "d1"
430 channels[channel_name.lower()] = trace
432 elif hasattr(wfm, "y_axis_values") or hasattr(wfm, "y_data"):
433 # Direct analog waveform
434 trace = load(path, format="tektronix")
435 channel_name = trace.metadata.channel_name or "ch1"
436 channels[channel_name.lower()] = trace # type: ignore[assignment]
438 if not channels:
439 raise LoaderError(
440 "No channels found in file",
441 file_path=str(path),
442 fix_hint="File may be empty or use an unsupported format variant.",
443 )
445 return channels
448def get_supported_formats() -> list[str]:
449 """Get list of supported file formats.
451 Returns:
452 List of supported file extensions.
454 Example:
455 >>> from tracekit.loaders import get_supported_formats
456 >>> print(get_supported_formats())
457 ['.wfm', '.npz', '.csv', '.h5', ...]
458 """
459 return list(SUPPORTED_FORMATS.keys())
462def load_lazy(path: str | PathLike[str], **kwargs: Any) -> LazyWaveformTrace | WaveformTrace:
463 """Load trace with lazy loading for huge files.
465 Convenience wrapper for lazy loading. See load_trace_lazy for details.
467 Args:
468 path: Path to the file.
469 **kwargs: Additional arguments (sample_rate, lazy=True, etc.).
471 Returns:
472 LazyWaveformTrace or WaveformTrace.
474 Example:
475 >>> trace = tk.loaders.load_lazy("huge_trace.npy", sample_rate=1e9)
476 >>> print(f"Length: {trace.length}") # Metadata available immediately
478 References:
479 API-017: Lazy Loading for Huge Files
480 """
481 from tracekit.loaders.lazy import load_trace_lazy
483 return load_trace_lazy(path, **kwargs) # type: ignore[arg-type]
486__all__ = [
487 "LARGE_FILE_WARNING_THRESHOLD",
488 "SUPPORTED_FORMATS",
489 # Configurable binary loading
490 "BitfieldDef",
491 "BitfieldExtractor",
492 "ConfigurablePacketLoader",
493 "DeviceConfig",
494 "DeviceInfo",
495 "DeviceMapper",
496 "DigitalTrace",
497 "HeaderFieldDef",
498 "IdleRegion",
499 "IdleStatistics",
500 "IdleStats",
501 "LazyWaveformTrace",
502 "PacketFormatConfig",
503 "PacketValidator",
504 "ParsedPacket",
505 "SampleFormatDef",
506 "SequenceGap",
507 "SequenceValidation",
508 "ValidationResult",
509 "ValidationStats",
510 "WaveformTrace",
511 "binary",
512 "csv",
513 "detect_idle_regions",
514 "detect_source_type",
515 "extract_channels",
516 "get_idle_statistics",
517 "get_supported_formats",
518 "hdf5",
519 "load",
520 "load_all_channels",
521 "load_binary_packets",
522 "load_lazy",
523 "load_packets_streaming",
524 "load_trace_lazy",
525 "trim_idle",
526]