Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ io \ metronix \ metronix_collection.py: 100%

50 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-10 00:01 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Metronix collection utilities for managing ATSS files. 

4 

5This module provides classes for collecting and managing Metronix ATSS 

6(Audio Time Series System) files and creating pandas DataFrames with 

7metadata for processing workflows. 

8 

9Classes 

10------- 

11MetronixCollection 

12 Collection class for managing Metronix ATSS files 

13 

14Created on Fri Nov 22 13:22:44 2024 

15 

16@author: jpeacock 

17""" 

18 

19# ============================================================================= 

20# Imports 

21# ============================================================================= 

22 

23from pathlib import Path 

24from typing import Any, Union 

25 

26import pandas as pd 

27 

28from mth5.io.collection import Collection 

29from mth5.io.metronix import ATSS 

30 

31 

32# ============================================================================= 

33 

34 

35class MetronixCollection(Collection): 

36 """ 

37 Collection class for managing Metronix ATSS files. 

38 

39 This class extends the base Collection class to handle Metronix ATSS 

40 (Audio Time Series System) files and their associated JSON metadata files. 

41 It provides functionality to create pandas DataFrames with comprehensive 

42 metadata for processing workflows. 

43 

44 Parameters 

45 ---------- 

46 file_path : Union[str, Path, None], optional 

47 Path to directory containing Metronix ATSS files, by default None 

48 **kwargs 

49 Additional keyword arguments passed to parent Collection class 

50 

51 Attributes 

52 ---------- 

53 file_ext : list[str] 

54 List of file extensions to search for (["atss"]) 

55 

56 Examples 

57 -------- 

58 >>> from mth5.io.metronix import MetronixCollection 

59 >>> collection = MetronixCollection("/path/to/metronix/files") 

60 >>> df = collection.to_dataframe(sample_rates=[128, 256]) 

61 """ 

62 

63 def __init__(self, file_path: Union[str, Path, None] = None, **kwargs: Any) -> None: 

64 super().__init__(file_path=file_path, **kwargs) 

65 self.file_ext: list[str] = ["atss"] 

66 

67 def to_dataframe( 

68 self, 

69 sample_rates: list[int] = [128], 

70 run_name_zeros: int = 0, 

71 calibration_path: Union[str, Path, None] = None, 

72 ) -> pd.DataFrame: 

73 """ 

74 Create DataFrame for Metronix timeseries ATSS + JSON file sets. 

75 

76 Processes all ATSS files in the collection directory, extracts metadata, 

77 and creates a comprehensive pandas DataFrame with information about each 

78 channel including timing, location, and instrument details. 

79 

80 Parameters 

81 ---------- 

82 sample_rates : list[int], optional 

83 List of sample rates to include in Hz, by default [128] 

84 run_name_zeros : int, optional 

85 Number of zeros for zero-padding run names. If 0, run names 

86 are unchanged. If > 0, run names are formatted as 

87 'sr{sample_rate}_{run_number:0{zeros}d}', by default 0 

88 calibration_path : Union[str, Path, None], optional 

89 Path to calibration files (currently unused), by default None 

90 

91 Returns 

92 ------- 

93 pd.DataFrame 

94 DataFrame with columns: 

95 - survey: Survey ID 

96 - station: Station ID 

97 - run: Run ID 

98 - start: Start time (datetime) 

99 - end: End time (datetime) 

100 - channel_id: Channel number 

101 - component: Component name (ex, ey, hx, hy, hz) 

102 - fn: File path 

103 - sample_rate: Sample rate in Hz 

104 - file_size: File size in bytes 

105 - n_samples: Number of samples 

106 - sequence_number: Sequence number (always 0) 

107 - dipole: Dipole length (always 0) 

108 - coil_number: Coil serial number (magnetic channels only) 

109 - latitude: Latitude in decimal degrees 

110 - longitude: Longitude in decimal degrees 

111 - elevation: Elevation in meters 

112 - instrument_id: Instrument/system number 

113 - calibration_fn: Calibration file path (always None) 

114 

115 Examples 

116 -------- 

117 >>> collection = MetronixCollection("/path/to/files") 

118 >>> df = collection.to_dataframe(sample_rates=[128, 256]) 

119 >>> df = collection.to_dataframe(run_name_zeros=4) # Zero-pad run names 

120 """ 

121 entries = [] 

122 for atss_fn in set(self.get_files(self.file_ext)): 

123 atss_obj = ATSS(atss_fn) 

124 if not atss_obj.sample_rate in sample_rates: 

125 continue 

126 ch_metadata = atss_obj.channel_metadata 

127 

128 entry = self.get_empty_entry_dict() 

129 entry["survey"] = atss_obj.survey_id 

130 entry["station"] = atss_obj.station_id 

131 entry["run"] = atss_obj.run_id 

132 entry["start"] = ch_metadata.time_period.start 

133 entry["end"] = ch_metadata.time_period.end 

134 entry["channel_id"] = atss_obj.channel_number 

135 entry["component"] = atss_obj.component 

136 entry["fn"] = atss_fn 

137 entry["sample_rate"] = ch_metadata.sample_rate 

138 entry["file_size"] = atss_obj.file_size 

139 entry["n_samples"] = atss_obj.n_samples 

140 entry["sequence_number"] = 0 

141 entry["dipole"] = 0 

142 if ch_metadata.type in ["magnetic"]: 

143 entry["coil_number"] = ch_metadata.sensor.id 

144 entry["latitude"] = ch_metadata.location.latitude 

145 entry["longitude"] = ch_metadata.location.longitude 

146 entry["elevation"] = ch_metadata.location.elevation 

147 else: 

148 entry["coil_number"] = None 

149 entry["latitude"] = ch_metadata.positive.latitude 

150 entry["longitude"] = ch_metadata.positive.longitude 

151 entry["elevation"] = ch_metadata.positive.elevation 

152 

153 entry["instrument_id"] = atss_obj.system_number 

154 entry["calibration_fn"] = None 

155 entries.append(entry) 

156 # make pandas dataframe and set data types 

157 df = self._sort_df(self._set_df_dtypes(pd.DataFrame(entries)), run_name_zeros) 

158 

159 return df 

160 

161 def assign_run_names(self, df: pd.DataFrame, zeros: int = 0) -> pd.DataFrame: 

162 """ 

163 Assign formatted run names based on sample rate and run number. 

164 

165 If zeros is 0, run names are unchanged. Otherwise, run names are 

166 formatted as 'sr{sample_rate}_{run_number:0{zeros}d}' where the 

167 run number is extracted from the original run name after the first 

168 underscore. 

169 

170 Parameters 

171 ---------- 

172 df : pd.DataFrame 

173 DataFrame containing run information with 'run' and 'sample_rate' columns 

174 zeros : int, optional 

175 Number of zeros for zero-padding run numbers. If 0, run names 

176 are unchanged, by default 0 

177 

178 Returns 

179 ------- 

180 pd.DataFrame 

181 DataFrame with updated run names 

182 

183 Examples 

184 -------- 

185 >>> df = pd.DataFrame({ 

186 ... 'run': ['run_1', 'run_2'], 

187 ... 'sample_rate': [128, 256] 

188 ... }) 

189 >>> collection = MetronixCollection() 

190 >>> result = collection.assign_run_names(df, zeros=3) 

191 >>> print(result['run'].tolist()) 

192 ['sr128_001', 'sr256_002'] 

193 

194 Notes 

195 ----- 

196 The method expects run names to be in format 'prefix_number' where 

197 'number' can be extracted and converted to an integer for formatting. 

198 """ 

199 if zeros == 0: 

200 return df 

201 

202 for row in df.itertuples(): 

203 df.loc[ 

204 row.Index, "run" 

205 ] = f"sr{row.sample_rate:.0f}_{int(row.run.split('_')[1]):0{zeros}}" 

206 return df