Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ io \ conversion.py: 82%
121 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:01 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-10 00:01 -0800
1"""
2Convert MTH5 to other formats
4- MTH5 -> miniSEED + StationXML
5"""
7from __future__ import annotations
9import datetime
11# ==================================================================================
12# Imports
13# ==================================================================================
14import re
15from pathlib import Path
16from typing import Any
18from loguru import logger
19from mt_metadata.timeseries.stationxml import XMLInventoryMTExperiment
20from obspy import read
21from obspy.core import UTCDateTime
23from mth5.mth5 import MTH5
26# ==================================================================================
29class MTH5ToMiniSEEDStationXML:
30 """
31 Convert MTH5 files to miniSEED and StationXML formats.
33 This class provides functionality to convert magnetotelluric data stored
34 in MTH5 format to industry-standard miniSEED time series files and
35 StationXML metadata files for data exchange and archival purposes.
37 Parameters
38 ----------
39 mth5_path : str, Path, or None, default None
40 Path to the input MTH5 file to be converted
41 save_path : str, Path, or None, default None
42 Directory path where output files will be saved. If None, uses
43 the parent directory of mth5_path
44 network_code : str, default "ZU"
45 Two-character FDSN network code for the output files
46 use_runs_with_data_only : bool, default True
47 If True, only process runs that contain actual time series data
48 **kwargs : dict
49 Additional keyword arguments to set as instance attributes
51 Attributes
52 ----------
53 mth5_path : Path or None
54 Path to the MTH5 input file
55 save_path : Path
56 Directory where output files are saved
57 network_code : str
58 FDSN network code for output files
59 use_runs_with_data_only : bool
60 Flag to process only runs with data
61 encoding : str or None
62 Encoding format for miniSEED files
64 Examples
65 --------
66 >>> converter = MTH5ToMiniSEEDStationXML(
67 ... mth5_path="/path/to/data.h5",
68 ... network_code="MT",
69 ... save_path="/path/to/output"
70 ... )
71 >>> xml_file, mseed_files = converter.convert_mth5_to_ms_stationxml()
72 """
74 def __init__(
75 self,
76 mth5_path: str | Path | None = None,
77 save_path: str | Path | None = None,
78 network_code: str = "ZU",
79 use_runs_with_data_only: bool = True,
80 **kwargs: Any,
81 ) -> None:
82 """
83 Initialize MTH5 to miniSEED/StationXML converter.
85 Parameters
86 ----------
87 mth5_path : str, Path, or None, default None
88 Path to the input MTH5 file to be converted
89 save_path : str, Path, or None, default None
90 Directory path where output files will be saved. If None, uses
91 the parent directory of mth5_path
92 network_code : str, default "ZU"
93 Two-character FDSN network code for the output files
94 use_runs_with_data_only : bool, default True
95 If True, only process runs that contain actual time series data
96 **kwargs : dict
97 Additional keyword arguments to set as instance attributes
98 """
99 self._network_code_pattern = r"^[a-zA-Z0-9]{2}$"
100 self.mth5_path = mth5_path
101 self.save_path = save_path
102 self.network_code = network_code
103 self.use_runs_with_data_only = use_runs_with_data_only
104 self.encoding = None
106 for key, value in kwargs.items():
107 setattr(self, key, value)
109 @property
110 def mth5_path(self) -> Path | None:
111 """
112 Path to the MTH5 input file.
114 Returns
115 -------
116 Path or None
117 Path to the MTH5 file to be converted, or None if not set.
118 """
119 return self._mth5_path
121 @mth5_path.setter
122 def mth5_path(self, value: str | Path | None) -> None:
123 """
124 Set the MTH5 file path with validation.
126 Parameters
127 ----------
128 value : str, Path, or None
129 Path to the MTH5 file. Must exist if not None.
131 Raises
132 ------
133 TypeError
134 If value cannot be converted to a Path object.
135 FileNotFoundError
136 If the specified file does not exist.
137 """
138 if value is None:
139 self._mth5_path = None
140 return
141 try:
142 value = Path(value)
143 except Exception as error:
144 raise TypeError(f"Could not convert value to Path: {error}")
146 if not value.exists():
147 raise FileExistsError(f"Could not find {value}")
149 self._mth5_path = value
151 @property
152 def save_path(self) -> Path:
153 """
154 Directory path where output files will be saved.
156 Returns
157 -------
158 Path
159 Directory path for saving miniSEED and StationXML files.
160 """
161 return self._save_path
163 @save_path.setter
164 def save_path(self, value: str | Path | None) -> None:
165 """
166 Set the save directory path with automatic creation.
168 Parameters
169 ----------
170 value : str, Path, or None
171 Directory path where files will be saved. If None, uses the
172 parent directory of mth5_path or current working directory.
174 Notes
175 -----
176 Creates the directory if it doesn't exist.
177 """
178 """Set the save path, if None set to parent directory of mth5_path"""
179 if value is None:
180 if self._mth5_path is None:
181 self._save_path = Path().cwd()
182 else:
183 self._save_path = self._mth5_path.parent
184 else:
185 self._save_path = Path(value)
187 if not self._save_path.exists():
188 self._save_path.mkdir(exists_ok=True)
190 @property
191 def network_code(self) -> str:
192 """
193 Two-character FDSN network code.
195 Returns
196 -------
197 str
198 Alphanumeric string of exactly 2 characters as required by FDSN DMC.
199 """
200 return self._network_code
202 @network_code.setter
203 def network_code(self, value: str) -> None:
204 """
205 Set the FDSN network code with validation.
207 Parameters
208 ----------
209 value : str
210 Two-character alphanumeric network code.
212 Raises
213 ------
214 ValueError
215 If value is None or doesn't match the required 2-character pattern.
217 Notes
218 -----
219 Request temporary codes from https://www.fdsn.org/networks/request/temp/
220 """
221 if value is None:
222 raise ValueError(
223 "Must input a network code. "
224 "Request a temporary code from https://www.fdsn.org/networks/request/temp/"
225 )
226 if not re.match(self._network_code_pattern, value):
227 raise ValueError(
228 f"{value} is not a valid network code. It must be 2 alphanumeric characters"
229 )
230 self._network_code = value
232 @classmethod
233 def convert_mth5_to_ms_stationxml(
234 cls,
235 mth5_path: str | Path,
236 save_path: str | Path | None = None,
237 network_code: str = "ZU",
238 use_runs_with_data_only: bool = True,
239 **kwargs: Any,
240 ) -> tuple[Path, list[Path]]:
241 """
242 Convert an MTH5 file to miniSEED and StationXML formats.
244 Class method that provides a convenient interface to convert MTH5 data
245 to standard seismological formats for data exchange and archival.
247 Parameters
248 ----------
249 mth5_path : str or Path
250 Path to the input MTH5 file to be converted
251 save_path : str, Path, or None, default None
252 Directory where output files will be saved. If None, uses the
253 parent directory of mth5_path
254 network_code : str, default "ZU"
255 Two-character FDSN network code for output files
256 use_runs_with_data_only : bool, default True
257 If True, only process runs containing actual time series data
258 **kwargs : dict
259 Additional keyword arguments passed to converter initialization
261 Returns
262 -------
263 tuple[Path, list[Path]]
264 Tuple containing:
265 - Path to the generated StationXML file
266 - List of paths to generated miniSEED files (one per day per channel)
268 Examples
269 --------
270 >>> xml_file, mseed_files = MTH5ToMiniSEEDStationXML.convert_mth5_to_ms_stationxml(
271 ... "/path/to/data.h5",
272 ... network_code="MT",
273 ... save_path="/output/directory"
274 ... )
275 >>> print(f"Created {len(mseed_files)} miniSEED files and {xml_file}")
276 """
278 converter = cls(
279 mth5_path=mth5_path,
280 save_path=save_path,
281 network_code=network_code,
282 use_runs_with_data_only=use_runs_with_data_only,
283 **kwargs,
284 )
286 with MTH5() as m:
287 m.open_mth5(converter.mth5_path)
288 experiment = m.to_experiment(has_data=converter.use_runs_with_data_only)
289 stream_list = []
290 for row in m.run_summary.itertuples():
291 if row.has_data:
292 run_ts = m.from_reference(row.run_hdf5_reference).to_runts()
293 if converter.encoding is None:
294 encoding = get_encoding(run_ts)
295 else:
296 encoding = converter.encoding
297 stream = run_ts.to_obspy_stream(
298 network_code=converter.network_code, encoding=encoding
299 )
300 # write to miniseed files
301 stream_list += converter.split_ms_to_days(
302 stream, converter.save_path, encoding
303 )
305 # write StationXML
306 experiment.surveys[0].fdsn.network = converter.network_code
308 translator = XMLInventoryMTExperiment()
309 xml_fn = converter.save_path.joinpath(f"{converter.mth5_path.stem}.xml")
310 stationxml = translator.mt_to_xml(
311 experiment,
312 stationxml_fn=xml_fn,
313 )
314 logger.info(f"Wrote StationXML to {xml_fn}")
316 return xml_fn, stream_list
318 def split_ms_to_days(self, streams, save_path: Path, encoding: str) -> list[Path]:
319 """
320 Split miniSEED traces into daily files.
322 Splits continuous time series traces into separate files for each day
323 to conform with standard seismological data archiving practices.
325 Parameters
326 ----------
327 streams : obspy.Stream
328 Stream object containing traces to be split by day
329 save_path : Path
330 Directory where daily miniSEED files will be saved
331 encoding : str
332 Data encoding format for miniSEED files (e.g., 'INT32', 'FLOAT64')
334 Returns
335 -------
336 list[Path]
337 List of paths to the generated daily miniSEED files
339 Notes
340 -----
341 Files are named using the pattern:
342 {network}_{station}_{location}_{channel}_{YYYY_MM_DDTHH_MM_SS}.mseed
343 """
344 fn_list = []
345 for tr in streams:
346 start_time = tr.stats.starttime
347 end_time = tr.stats.endtime
349 # Split the trace by day
350 current_time = start_time
351 while current_time < end_time:
352 next_day = UTCDateTime(current_time.date + datetime.timedelta(days=1))
353 if next_day > end_time:
354 next_day = end_time
356 # Slice the trace for the current day
357 tr_day = tr.slice(current_time, next_day)
359 # Generate the output file name
360 output_file = save_path.joinpath(
361 f"{tr.stats.network}_{tr.stats.station}_{tr.stats.location}_{tr.stats.channel}_{current_time.isoformat().replace('-', '_').replace(':', '_')}.mseed"
362 )
363 logger.info(f"Wrote miniseed file to: {output_file}")
365 fn_list.append(output_file)
367 # Write the sliced trace to a new MiniSEED file
368 tr_day.write(output_file, format="MSEED", reclen=256, encoding=encoding)
370 # Move to the next day
371 current_time = next_day
373 return fn_list
376def get_encoding(run_ts) -> str:
377 """
378 Determine consistent data encoding for miniSEED files across channels.
380 Analyzes data types across all channels in a run and selects a median
381 encoding to ensure compatibility in miniSEED file generation.
383 Parameters
384 ----------
385 run_ts : RunTS
386 Run time series object containing multiple channels of data
388 Returns
389 -------
390 str
391 String identifier for miniSEED encoding format (e.g., 'INT32', 'FLOAT64')
393 Notes
394 -----
395 Uses median data type to handle mixed precision datasets. Automatically
396 converts INT64 to INT32 for miniSEED compatibility since some readers
397 don't support 64-bit integers.
399 Examples
400 --------
401 >>> encoding = get_encoding(run_timeseries)
402 >>> print(f"Selected encoding: {encoding}")
403 """
404 dtypes = [run_ts.dataset[ch].data.dtype.name for ch in run_ts.channels]
405 encoding = sorted(dtypes)[int(len(dtypes) / 2)].upper()
406 if encoding in ["INT64"]:
407 encoding = "INT32"
408 logger.warning("Casting INT64 to INT32")
410 return encoding
413def split_miniseed_by_day(input_file: str | Path) -> list[Path]:
414 """
415 Split an existing miniSEED file into daily files.
417 Utility function to split a multi-day miniSEED file into separate files
418 for each calendar day, following standard seismological archiving practices.
420 Parameters
421 ----------
422 input_file : str or Path
423 Path to the input miniSEED file to be split
425 Returns
426 -------
427 list[Path]
428 List of paths to the generated daily miniSEED files
430 Notes
431 -----
432 Output files are named using the pattern:
433 {network}.{station}.{location}.{channel}.{YYYY-MM-DD}.mseed
435 Files are saved in the same directory as the input file.
437 Examples
438 --------
439 >>> daily_files = split_miniseed_by_day("/path/to/continuous.mseed")
440 >>> print(f"Created {len(daily_files)} daily files")
441 """
442 save_path = Path(input_file).parent
443 # Read the MiniSEED file
444 st = read(input_file)
446 tr_list = []
447 # Iterate over each trace in the stream
448 for tr in st:
449 start_time = tr.stats.starttime
450 end_time = tr.stats.endtime
452 # Split the trace by day
453 current_time = start_time
454 while current_time < end_time:
455 next_day = UTCDateTime(current_time.date + datetime.timedelta(days=1))
456 if next_day > end_time:
457 next_day = end_time
459 # Slice the trace for the current day
460 tr_day = tr.slice(current_time, next_day)
462 # Generate the output file name
463 output_file = save_path.joinpath(
464 f"{tr.stats.network}.{tr.stats.station}.{tr.stats.location}.{tr.stats.channel}.{current_time.date}.mseed"
465 )
467 # Write the sliced trace to a new MiniSEED file
468 tr_day.write(output_file, format="MSEED")
469 tr_list.append(output_file)
471 # Move to the next day
472 current_time = next_day
474 return tr_list