Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ io \ metronix \ metronix_collection.py: 100%
50 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:01 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:01 -0800
1# -*- coding: utf-8 -*-
2"""
3Metronix collection utilities for managing ATSS files.
5This module provides classes for collecting and managing Metronix ATSS
6(Audio Time Series System) files and creating pandas DataFrames with
7metadata for processing workflows.
9Classes
10-------
11MetronixCollection
12 Collection class for managing Metronix ATSS files
14Created on Fri Nov 22 13:22:44 2024
16@author: jpeacock
17"""
19# =============================================================================
20# Imports
21# =============================================================================
23from pathlib import Path
24from typing import Any, Union
26import pandas as pd
28from mth5.io.collection import Collection
29from mth5.io.metronix import ATSS
32# =============================================================================
35class MetronixCollection(Collection):
36 """
37 Collection class for managing Metronix ATSS files.
39 This class extends the base Collection class to handle Metronix ATSS
40 (Audio Time Series System) files and their associated JSON metadata files.
41 It provides functionality to create pandas DataFrames with comprehensive
42 metadata for processing workflows.
44 Parameters
45 ----------
46 file_path : Union[str, Path, None], optional
47 Path to directory containing Metronix ATSS files, by default None
48 **kwargs
49 Additional keyword arguments passed to parent Collection class
51 Attributes
52 ----------
53 file_ext : list[str]
54 List of file extensions to search for (["atss"])
56 Examples
57 --------
58 >>> from mth5.io.metronix import MetronixCollection
59 >>> collection = MetronixCollection("/path/to/metronix/files")
60 >>> df = collection.to_dataframe(sample_rates=[128, 256])
61 """
63 def __init__(self, file_path: Union[str, Path, None] = None, **kwargs: Any) -> None:
64 super().__init__(file_path=file_path, **kwargs)
65 self.file_ext: list[str] = ["atss"]
67 def to_dataframe(
68 self,
69 sample_rates: list[int] = [128],
70 run_name_zeros: int = 0,
71 calibration_path: Union[str, Path, None] = None,
72 ) -> pd.DataFrame:
73 """
74 Create DataFrame for Metronix timeseries ATSS + JSON file sets.
76 Processes all ATSS files in the collection directory, extracts metadata,
77 and creates a comprehensive pandas DataFrame with information about each
78 channel including timing, location, and instrument details.
80 Parameters
81 ----------
82 sample_rates : list[int], optional
83 List of sample rates to include in Hz, by default [128]
84 run_name_zeros : int, optional
85 Number of zeros for zero-padding run names. If 0, run names
86 are unchanged. If > 0, run names are formatted as
87 'sr{sample_rate}_{run_number:0{zeros}d}', by default 0
88 calibration_path : Union[str, Path, None], optional
89 Path to calibration files (currently unused), by default None
91 Returns
92 -------
93 pd.DataFrame
94 DataFrame with columns:
95 - survey: Survey ID
96 - station: Station ID
97 - run: Run ID
98 - start: Start time (datetime)
99 - end: End time (datetime)
100 - channel_id: Channel number
101 - component: Component name (ex, ey, hx, hy, hz)
102 - fn: File path
103 - sample_rate: Sample rate in Hz
104 - file_size: File size in bytes
105 - n_samples: Number of samples
106 - sequence_number: Sequence number (always 0)
107 - dipole: Dipole length (always 0)
108 - coil_number: Coil serial number (magnetic channels only)
109 - latitude: Latitude in decimal degrees
110 - longitude: Longitude in decimal degrees
111 - elevation: Elevation in meters
112 - instrument_id: Instrument/system number
113 - calibration_fn: Calibration file path (always None)
115 Examples
116 --------
117 >>> collection = MetronixCollection("/path/to/files")
118 >>> df = collection.to_dataframe(sample_rates=[128, 256])
119 >>> df = collection.to_dataframe(run_name_zeros=4) # Zero-pad run names
120 """
121 entries = []
122 for atss_fn in set(self.get_files(self.file_ext)):
123 atss_obj = ATSS(atss_fn)
124 if not atss_obj.sample_rate in sample_rates:
125 continue
126 ch_metadata = atss_obj.channel_metadata
128 entry = self.get_empty_entry_dict()
129 entry["survey"] = atss_obj.survey_id
130 entry["station"] = atss_obj.station_id
131 entry["run"] = atss_obj.run_id
132 entry["start"] = ch_metadata.time_period.start
133 entry["end"] = ch_metadata.time_period.end
134 entry["channel_id"] = atss_obj.channel_number
135 entry["component"] = atss_obj.component
136 entry["fn"] = atss_fn
137 entry["sample_rate"] = ch_metadata.sample_rate
138 entry["file_size"] = atss_obj.file_size
139 entry["n_samples"] = atss_obj.n_samples
140 entry["sequence_number"] = 0
141 entry["dipole"] = 0
142 if ch_metadata.type in ["magnetic"]:
143 entry["coil_number"] = ch_metadata.sensor.id
144 entry["latitude"] = ch_metadata.location.latitude
145 entry["longitude"] = ch_metadata.location.longitude
146 entry["elevation"] = ch_metadata.location.elevation
147 else:
148 entry["coil_number"] = None
149 entry["latitude"] = ch_metadata.positive.latitude
150 entry["longitude"] = ch_metadata.positive.longitude
151 entry["elevation"] = ch_metadata.positive.elevation
153 entry["instrument_id"] = atss_obj.system_number
154 entry["calibration_fn"] = None
155 entries.append(entry)
156 # make pandas dataframe and set data types
157 df = self._sort_df(self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros)
159 return df
161 def assign_run_names(self, df: pd.DataFrame, zeros: int = 0) -> pd.DataFrame:
162 """
163 Assign formatted run names based on sample rate and run number.
165 If zeros is 0, run names are unchanged. Otherwise, run names are
166 formatted as 'sr{sample_rate}_{run_number:0{zeros}d}' where the
167 run number is extracted from the original run name after the first
168 underscore.
170 Parameters
171 ----------
172 df : pd.DataFrame
173 DataFrame containing run information with 'run' and 'sample_rate' columns
174 zeros : int, optional
175 Number of zeros for zero-padding run numbers. If 0, run names
176 are unchanged, by default 0
178 Returns
179 -------
180 pd.DataFrame
181 DataFrame with updated run names
183 Examples
184 --------
185 >>> df = pd.DataFrame({
186 ... 'run': ['run_1', 'run_2'],
187 ... 'sample_rate': [128, 256]
188 ... })
189 >>> collection = MetronixCollection()
190 >>> result = collection.assign_run_names(df, zeros=3)
191 >>> print(result['run'].tolist())
192 ['sr128_001', 'sr256_002']
194 Notes
195 -----
196 The method expects run names to be in format 'prefix_number' where
197 'number' can be extracted and converted to an integer for formatting.
198 """
199 if zeros == 0:
200 return df
202 for row in df.itertuples():
203 df.loc[
204 row.Index, "run"
205 ] = f"sr{row.sample_rate:.0f}_{int(row.run.split('_')[1]):0{zeros}}"
206 return df