Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ io \ conversion.py: 82%

121 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-10 00:01 -0800

1""" 

2Convert MTH5 to other formats 

3 

4- MTH5 -> miniSEED + StationXML 

5""" 

6 

7from __future__ import annotations 

8 

9import datetime 

10 

11# ================================================================================== 

12# Imports 

13# ================================================================================== 

14import re 

15from pathlib import Path 

16from typing import Any 

17 

18from loguru import logger 

19from mt_metadata.timeseries.stationxml import XMLInventoryMTExperiment 

20from obspy import read 

21from obspy.core import UTCDateTime 

22 

23from mth5.mth5 import MTH5 

24 

25 

26# ================================================================================== 

27 

28 

29class MTH5ToMiniSEEDStationXML: 

30 """ 

31 Convert MTH5 files to miniSEED and StationXML formats. 

32 

33 This class provides functionality to convert magnetotelluric data stored 

34 in MTH5 format to industry-standard miniSEED time series files and 

35 StationXML metadata files for data exchange and archival purposes. 

36 

37 Parameters 

38 ---------- 

39 mth5_path : str, Path, or None, default None 

40 Path to the input MTH5 file to be converted 

41 save_path : str, Path, or None, default None 

42 Directory path where output files will be saved. If None, uses 

43 the parent directory of mth5_path 

44 network_code : str, default "ZU" 

45 Two-character FDSN network code for the output files 

46 use_runs_with_data_only : bool, default True 

47 If True, only process runs that contain actual time series data 

48 **kwargs : dict 

49 Additional keyword arguments to set as instance attributes 

50 

51 Attributes 

52 ---------- 

53 mth5_path : Path or None 

54 Path to the MTH5 input file 

55 save_path : Path 

56 Directory where output files are saved 

57 network_code : str 

58 FDSN network code for output files 

59 use_runs_with_data_only : bool 

60 Flag to process only runs with data 

61 encoding : str or None 

62 Encoding format for miniSEED files 

63 

64 Examples 

65 -------- 

66 >>> converter = MTH5ToMiniSEEDStationXML( 

67 ... mth5_path="/path/to/data.h5", 

68 ... network_code="MT", 

69 ... save_path="/path/to/output" 

70 ... ) 

71 >>> xml_file, mseed_files = converter.convert_mth5_to_ms_stationxml() 

72 """ 

73 

74 def __init__( 

75 self, 

76 mth5_path: str | Path | None = None, 

77 save_path: str | Path | None = None, 

78 network_code: str = "ZU", 

79 use_runs_with_data_only: bool = True, 

80 **kwargs: Any, 

81 ) -> None: 

82 """ 

83 Initialize MTH5 to miniSEED/StationXML converter. 

84 

85 Parameters 

86 ---------- 

87 mth5_path : str, Path, or None, default None 

88 Path to the input MTH5 file to be converted 

89 save_path : str, Path, or None, default None 

90 Directory path where output files will be saved. If None, uses 

91 the parent directory of mth5_path 

92 network_code : str, default "ZU" 

93 Two-character FDSN network code for the output files 

94 use_runs_with_data_only : bool, default True 

95 If True, only process runs that contain actual time series data 

96 **kwargs : dict 

97 Additional keyword arguments to set as instance attributes 

98 """ 

99 self._network_code_pattern = r"^[a-zA-Z0-9]{2}$" 

100 self.mth5_path = mth5_path 

101 self.save_path = save_path 

102 self.network_code = network_code 

103 self.use_runs_with_data_only = use_runs_with_data_only 

104 self.encoding = None 

105 

106 for key, value in kwargs.items(): 

107 setattr(self, key, value) 

108 

109 @property 

110 def mth5_path(self) -> Path | None: 

111 """ 

112 Path to the MTH5 input file. 

113 

114 Returns 

115 ------- 

116 Path or None 

117 Path to the MTH5 file to be converted, or None if not set. 

118 """ 

119 return self._mth5_path 

120 

121 @mth5_path.setter 

122 def mth5_path(self, value: str | Path | None) -> None: 

123 """ 

124 Set the MTH5 file path with validation. 

125 

126 Parameters 

127 ---------- 

128 value : str, Path, or None 

129 Path to the MTH5 file. Must exist if not None. 

130 

131 Raises 

132 ------ 

133 TypeError 

134 If value cannot be converted to a Path object. 

135 FileNotFoundError 

136 If the specified file does not exist. 

137 """ 

138 if value is None: 

139 self._mth5_path = None 

140 return 

141 try: 

142 value = Path(value) 

143 except Exception as error: 

144 raise TypeError(f"Could not convert value to Path: {error}") 

145 

146 if not value.exists(): 

147 raise FileExistsError(f"Could not find {value}") 

148 

149 self._mth5_path = value 

150 

151 @property 

152 def save_path(self) -> Path: 

153 """ 

154 Directory path where output files will be saved. 

155 

156 Returns 

157 ------- 

158 Path 

159 Directory path for saving miniSEED and StationXML files. 

160 """ 

161 return self._save_path 

162 

163 @save_path.setter 

164 def save_path(self, value: str | Path | None) -> None: 

165 """ 

166 Set the save directory path with automatic creation. 

167 

168 Parameters 

169 ---------- 

170 value : str, Path, or None 

171 Directory path where files will be saved. If None, uses the 

172 parent directory of mth5_path or current working directory. 

173 

174 Notes 

175 ----- 

176 Creates the directory if it doesn't exist. 

177 """ 

178 """Set the save path, if None set to parent directory of mth5_path""" 

179 if value is None: 

180 if self._mth5_path is None: 

181 self._save_path = Path().cwd() 

182 else: 

183 self._save_path = self._mth5_path.parent 

184 else: 

185 self._save_path = Path(value) 

186 

187 if not self._save_path.exists(): 

188 self._save_path.mkdir(exists_ok=True) 

189 

190 @property 

191 def network_code(self) -> str: 

192 """ 

193 Two-character FDSN network code. 

194 

195 Returns 

196 ------- 

197 str 

198 Alphanumeric string of exactly 2 characters as required by FDSN DMC. 

199 """ 

200 return self._network_code 

201 

202 @network_code.setter 

203 def network_code(self, value: str) -> None: 

204 """ 

205 Set the FDSN network code with validation. 

206 

207 Parameters 

208 ---------- 

209 value : str 

210 Two-character alphanumeric network code. 

211 

212 Raises 

213 ------ 

214 ValueError 

215 If value is None or doesn't match the required 2-character pattern. 

216 

217 Notes 

218 ----- 

219 Request temporary codes from https://www.fdsn.org/networks/request/temp/ 

220 """ 

221 if value is None: 

222 raise ValueError( 

223 "Must input a network code. " 

224 "Request a temporary code from https://www.fdsn.org/networks/request/temp/" 

225 ) 

226 if not re.match(self._network_code_pattern, value): 

227 raise ValueError( 

228 f"{value} is not a valid network code. It must be 2 alphanumeric characters" 

229 ) 

230 self._network_code = value 

231 

232 @classmethod 

233 def convert_mth5_to_ms_stationxml( 

234 cls, 

235 mth5_path: str | Path, 

236 save_path: str | Path | None = None, 

237 network_code: str = "ZU", 

238 use_runs_with_data_only: bool = True, 

239 **kwargs: Any, 

240 ) -> tuple[Path, list[Path]]: 

241 """ 

242 Convert an MTH5 file to miniSEED and StationXML formats. 

243 

244 Class method that provides a convenient interface to convert MTH5 data 

245 to standard seismological formats for data exchange and archival. 

246 

247 Parameters 

248 ---------- 

249 mth5_path : str or Path 

250 Path to the input MTH5 file to be converted 

251 save_path : str, Path, or None, default None 

252 Directory where output files will be saved. If None, uses the 

253 parent directory of mth5_path 

254 network_code : str, default "ZU" 

255 Two-character FDSN network code for output files 

256 use_runs_with_data_only : bool, default True 

257 If True, only process runs containing actual time series data 

258 **kwargs : dict 

259 Additional keyword arguments passed to converter initialization 

260 

261 Returns 

262 ------- 

263 tuple[Path, list[Path]] 

264 Tuple containing: 

265 - Path to the generated StationXML file 

266 - List of paths to generated miniSEED files (one per day per channel) 

267 

268 Examples 

269 -------- 

270 >>> xml_file, mseed_files = MTH5ToMiniSEEDStationXML.convert_mth5_to_ms_stationxml( 

271 ... "/path/to/data.h5", 

272 ... network_code="MT", 

273 ... save_path="/output/directory" 

274 ... ) 

275 >>> print(f"Created {len(mseed_files)} miniSEED files and {xml_file}") 

276 """ 

277 

278 converter = cls( 

279 mth5_path=mth5_path, 

280 save_path=save_path, 

281 network_code=network_code, 

282 use_runs_with_data_only=use_runs_with_data_only, 

283 **kwargs, 

284 ) 

285 

286 with MTH5() as m: 

287 m.open_mth5(converter.mth5_path) 

288 experiment = m.to_experiment(has_data=converter.use_runs_with_data_only) 

289 stream_list = [] 

290 for row in m.run_summary.itertuples(): 

291 if row.has_data: 

292 run_ts = m.from_reference(row.run_hdf5_reference).to_runts() 

293 if converter.encoding is None: 

294 encoding = get_encoding(run_ts) 

295 else: 

296 encoding = converter.encoding 

297 stream = run_ts.to_obspy_stream( 

298 network_code=converter.network_code, encoding=encoding 

299 ) 

300 # write to miniseed files 

301 stream_list += converter.split_ms_to_days( 

302 stream, converter.save_path, encoding 

303 ) 

304 

305 # write StationXML 

306 experiment.surveys[0].fdsn.network = converter.network_code 

307 

308 translator = XMLInventoryMTExperiment() 

309 xml_fn = converter.save_path.joinpath(f"{converter.mth5_path.stem}.xml") 

310 stationxml = translator.mt_to_xml( 

311 experiment, 

312 stationxml_fn=xml_fn, 

313 ) 

314 logger.info(f"Wrote StationXML to {xml_fn}") 

315 

316 return xml_fn, stream_list 

317 

318 def split_ms_to_days(self, streams, save_path: Path, encoding: str) -> list[Path]: 

319 """ 

320 Split miniSEED traces into daily files. 

321 

322 Splits continuous time series traces into separate files for each day 

323 to conform with standard seismological data archiving practices. 

324 

325 Parameters 

326 ---------- 

327 streams : obspy.Stream 

328 Stream object containing traces to be split by day 

329 save_path : Path 

330 Directory where daily miniSEED files will be saved 

331 encoding : str 

332 Data encoding format for miniSEED files (e.g., 'INT32', 'FLOAT64') 

333 

334 Returns 

335 ------- 

336 list[Path] 

337 List of paths to the generated daily miniSEED files 

338 

339 Notes 

340 ----- 

341 Files are named using the pattern: 

342 {network}_{station}_{location}_{channel}_{YYYY_MM_DDTHH_MM_SS}.mseed 

343 """ 

344 fn_list = [] 

345 for tr in streams: 

346 start_time = tr.stats.starttime 

347 end_time = tr.stats.endtime 

348 

349 # Split the trace by day 

350 current_time = start_time 

351 while current_time < end_time: 

352 next_day = UTCDateTime(current_time.date + datetime.timedelta(days=1)) 

353 if next_day > end_time: 

354 next_day = end_time 

355 

356 # Slice the trace for the current day 

357 tr_day = tr.slice(current_time, next_day) 

358 

359 # Generate the output file name 

360 output_file = save_path.joinpath( 

361 f"{tr.stats.network}_{tr.stats.station}_{tr.stats.location}_{tr.stats.channel}_{current_time.isoformat().replace('-', '_').replace(':', '_')}.mseed" 

362 ) 

363 logger.info(f"Wrote miniseed file to: {output_file}") 

364 

365 fn_list.append(output_file) 

366 

367 # Write the sliced trace to a new MiniSEED file 

368 tr_day.write(output_file, format="MSEED", reclen=256, encoding=encoding) 

369 

370 # Move to the next day 

371 current_time = next_day 

372 

373 return fn_list 

374 

375 

376def get_encoding(run_ts) -> str: 

377 """ 

378 Determine consistent data encoding for miniSEED files across channels. 

379 

380 Analyzes data types across all channels in a run and selects a median 

381 encoding to ensure compatibility in miniSEED file generation. 

382 

383 Parameters 

384 ---------- 

385 run_ts : RunTS 

386 Run time series object containing multiple channels of data 

387 

388 Returns 

389 ------- 

390 str 

391 String identifier for miniSEED encoding format (e.g., 'INT32', 'FLOAT64') 

392 

393 Notes 

394 ----- 

395 Uses median data type to handle mixed precision datasets. Automatically 

396 converts INT64 to INT32 for miniSEED compatibility since some readers 

397 don't support 64-bit integers. 

398 

399 Examples 

400 -------- 

401 >>> encoding = get_encoding(run_timeseries) 

402 >>> print(f"Selected encoding: {encoding}") 

403 """ 

404 dtypes = [run_ts.dataset[ch].data.dtype.name for ch in run_ts.channels] 

405 encoding = sorted(dtypes)[int(len(dtypes) / 2)].upper() 

406 if encoding in ["INT64"]: 

407 encoding = "INT32" 

408 logger.warning("Casting INT64 to INT32") 

409 

410 return encoding 

411 

412 

413def split_miniseed_by_day(input_file: str | Path) -> list[Path]: 

414 """ 

415 Split an existing miniSEED file into daily files. 

416 

417 Utility function to split a multi-day miniSEED file into separate files 

418 for each calendar day, following standard seismological archiving practices. 

419 

420 Parameters 

421 ---------- 

422 input_file : str or Path 

423 Path to the input miniSEED file to be split 

424 

425 Returns 

426 ------- 

427 list[Path] 

428 List of paths to the generated daily miniSEED files 

429 

430 Notes 

431 ----- 

432 Output files are named using the pattern: 

433 {network}.{station}.{location}.{channel}.{YYYY-MM-DD}.mseed 

434 

435 Files are saved in the same directory as the input file. 

436 

437 Examples 

438 -------- 

439 >>> daily_files = split_miniseed_by_day("/path/to/continuous.mseed") 

440 >>> print(f"Created {len(daily_files)} daily files") 

441 """ 

442 save_path = Path(input_file).parent 

443 # Read the MiniSEED file 

444 st = read(input_file) 

445 

446 tr_list = [] 

447 # Iterate over each trace in the stream 

448 for tr in st: 

449 start_time = tr.stats.starttime 

450 end_time = tr.stats.endtime 

451 

452 # Split the trace by day 

453 current_time = start_time 

454 while current_time < end_time: 

455 next_day = UTCDateTime(current_time.date + datetime.timedelta(days=1)) 

456 if next_day > end_time: 

457 next_day = end_time 

458 

459 # Slice the trace for the current day 

460 tr_day = tr.slice(current_time, next_day) 

461 

462 # Generate the output file name 

463 output_file = save_path.joinpath( 

464 f"{tr.stats.network}.{tr.stats.station}.{tr.stats.location}.{tr.stats.channel}.{current_time.date}.mseed" 

465 ) 

466 

467 # Write the sliced trace to a new MiniSEED file 

468 tr_day.write(output_file, format="MSEED") 

469 tr_list.append(output_file) 

470 

471 # Move to the next day 

472 current_time = next_day 

473 

474 return tr_list