Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ station.py: 72%

177 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2from __future__ import annotations 

3 

4 

5"""Station-level HDF5 helpers for MTH5.""" 

6 

7# ============================================================================= 

8# Imports 

9# ============================================================================= 

10import inspect 

11from typing import Any 

12 

13import h5py 

14import numpy as np 

15import pandas as pd 

16from mt_metadata import timeseries as metadata 

17from mt_metadata.common.mttime import MTime 

18 

19from mth5.groups import ( 

20 BaseGroup, 

21 MasterFCGroup, 

22 MasterFeaturesGroup, 

23 RunGroup, 

24 TransferFunctionsGroup, 

25) 

26from mth5.helpers import read_attrs_to_dict 

27from mth5.utils.exceptions import MTH5Error 

28 

29 

30meta_classes = dict(inspect.getmembers(metadata, inspect.isclass)) 

31# ============================================================================= 

32# Standards Group 

33# ============================================================================= 

34 

35 

36class MasterStationGroup(BaseGroup): 

37 """Collection helper for all stations in a survey. 

38 

39 The group lives at ``/Survey/Stations`` and offers convenience accessors 

40 to add, fetch, or remove stations along with a summary table. 

41 

42 Examples 

43 -------- 

44 >>> from mth5 import mth5 

45 >>> mth5_obj = mth5.MTH5() 

46 >>> _ = mth5_obj.open_mth5("/tmp/example.mth5", mode="a") 

47 >>> stations = mth5_obj.stations_group 

48 >>> _ = stations.add_station("MT001") 

49 >>> stations.station_summary.head() # doctest: +SKIP 

50 """ 

51 

52 def __init__(self, group: h5py.Group, **kwargs: Any) -> None: 

53 super().__init__(group, **kwargs) 

54 

55 @property 

56 def station_summary(self) -> pd.DataFrame: 

57 """Return a summary DataFrame of all stations in the file. 

58 

59 Returns 

60 ------- 

61 pandas.DataFrame 

62 Columns include ``station``, ``start``, ``end``, ``latitude``, 

63 and ``longitude``. Empty if no stations are present. 

64 

65 Notes 

66 ----- 

67 Timestamps are parsed to pandas ``datetime64[ns]`` when possible. 

68 

69 Examples 

70 -------- 

71 >>> summary = stations.station_summary 

72 >>> list(summary.columns) 

73 ['station', 'start', 'end', 'latitude', 'longitude'] 

74 """ 

75 

76 def _get_entry(group: h5py.Group) -> dict[str, Any]: 

77 return { 

78 "station": group.attrs["id"], 

79 "start": group.attrs["time_period.start"], 

80 "end": group.attrs["time_period.end"], 

81 "latitude": group.attrs["location.latitude"], 

82 "longitude": group.attrs["location.longitude"], 

83 } 

84 

85 def _recursive_get_station_entry( 

86 group: h5py.Group, 

87 entry_list: list[dict[str, Any]] | None = None, 

88 ) -> list[dict[str, Any]]: 

89 """Collect station entries recursively from nested groups.""" 

90 

91 if entry_list is None: 

92 entry_list = [] 

93 

94 if isinstance(group, h5py._hl.group.Group): 

95 try: 

96 group_type = group.attrs["mth5_type"].lower() 

97 if group_type in ["station"]: 

98 entry_list.append(_get_entry(group)) 

99 elif group_type in ["masterstation"]: 

100 for node in group.values(): 

101 entry_list = _recursive_get_station_entry(node, entry_list) 

102 except KeyError: 

103 pass 

104 return entry_list 

105 

106 st_list: list[dict[str, Any]] = [] 

107 st_list = _recursive_get_station_entry(self.hdf5_group, st_list) 

108 df = pd.DataFrame(st_list) 

109 if len(df): 

110 try: 

111 df.start = pd.to_datetime(df.start, format="mixed") 

112 df.end = pd.to_datetime(df.end, format="mixed") 

113 except ValueError: 

114 df.start = pd.to_datetime(df.start) 

115 df.end = pd.to_datetime(df.end) 

116 

117 return df 

118 

119 def add_station( 

120 self, station_name: str, station_metadata: metadata.Station | None = None 

121 ) -> "StationGroup": 

122 """Add or fetch a station group at ``/Survey/Stations/<name>``. 

123 

124 Parameters 

125 ---------- 

126 station_name : str 

127 Station identifier, typically matches ``metadata.id``. 

128 station_metadata : mt_metadata.timeseries.Station, optional 

129 Metadata container to seed the station attributes. 

130 

131 Returns 

132 ------- 

133 StationGroup 

134 Convenience wrapper for the created or existing station. 

135 

136 Raises 

137 ------ 

138 ValueError 

139 If ``station_name`` is empty. 

140 

141 Examples 

142 -------- 

143 >>> station = stations.add_station("MT001") 

144 >>> station.metadata.id 

145 'MT001' 

146 """ 

147 if not station_name: 

148 raise ValueError("station name is None, do not know what to name it") 

149 

150 return self._add_group(station_name, StationGroup, station_metadata, match="id") 

151 

152 def get_station(self, station_name: str) -> "StationGroup": 

153 """Return an existing station by name. 

154 

155 Parameters 

156 ---------- 

157 station_name : str 

158 Name of the station to retrieve. 

159 

160 Returns 

161 ------- 

162 StationGroup 

163 Wrapper for the requested station. 

164 

165 Raises 

166 ------ 

167 MTH5Error 

168 If the station does not exist. 

169 

170 Examples 

171 -------- 

172 >>> existing = stations.get_station("MT001") 

173 >>> existing.name 

174 'MT001' 

175 """ 

176 return self._get_group(station_name, StationGroup) 

177 

178 def remove_station(self, station_name: str) -> None: 

179 """Delete a station group reference from the file. 

180 

181 Parameters 

182 ---------- 

183 station_name : str 

184 Existing station name. 

185 

186 Notes 

187 ----- 

188 HDF5 deletion removes the reference only; underlying storage is not 

189 reclaimed. 

190 

191 Examples 

192 -------- 

193 >>> stations.remove_station("MT001") 

194 """ 

195 

196 self._remove_group(station_name) 

197 

198 

199# ============================================================================= 

200# Station Group 

201# ============================================================================= 

202class StationGroup(BaseGroup): 

203 """Utility wrapper for a single station at ``/Survey/Stations/<id>``. 

204 

205 Station groups manage run collections, metadata propagation, and provide 

206 summary utilities for quick inspection. 

207 

208 Examples 

209 -------- 

210 >>> from mth5 import mth5 

211 >>> m5 = mth5.MTH5() 

212 >>> _ = m5.open_mth5("/tmp/example.mth5", mode="a") 

213 >>> station = m5.stations_group.add_station("MT001") 

214 >>> _ = station.add_run("MT001a") 

215 >>> station.run_summary.shape[0] >= 1 

216 True 

217 """ 

218 

219 def __init__( 

220 self, 

221 group: h5py.Group, 

222 station_metadata: metadata.Station | None = None, 

223 **kwargs: Any, 

224 ) -> None: 

225 self._default_subgroup_names = [ 

226 "Transfer_Functions", 

227 "Fourier_Coefficients", 

228 "Features", 

229 ] 

230 super().__init__(group, group_metadata=station_metadata, **kwargs) 

231 

232 def initialize_group(self, **kwargs: Any) -> None: 

233 """Create default subgroups and write metadata. 

234 

235 Parameters 

236 ---------- 

237 **kwargs 

238 Additional attributes to set on the instance before initialization. 

239 

240 Examples 

241 -------- 

242 >>> station.initialize_group() 

243 """ 

244 for key, value in kwargs.items(): 

245 setattr(self, key, value) 

246 self.write_metadata() 

247 

248 for group_name in self._default_subgroup_names: 

249 try: 

250 self.hdf5_group.create_group(f"{group_name}") 

251 m5_grp = getattr(self, f"{group_name.lower()}_group") 

252 m5_grp.initialize_group() 

253 except ValueError as value_error: 

254 if "Unable to synchronously create group" in str(value_error): 

255 self.logger.warning("File is in write mode, cannot create group.") 

256 else: 

257 raise ValueError(value_error) 

258 

259 @property 

260 def master_station_group(self) -> MasterStationGroup: 

261 """Shortcut to the containing master station group.""" 

262 return MasterStationGroup(self.hdf5_group.parent) 

263 

264 @property 

265 def transfer_functions_group(self) -> TransferFunctionsGroup: 

266 """Convenience accessor for ``/Station/Transfer_Functions``.""" 

267 return TransferFunctionsGroup( 

268 self.hdf5_group["Transfer_Functions"], **self.dataset_options 

269 ) 

270 

271 @property 

272 def fourier_coefficients_group(self) -> MasterFCGroup: 

273 """Convenience accessor for ``/Station/Fourier_Coefficients``.""" 

274 return MasterFCGroup( 

275 self.hdf5_group["Fourier_Coefficients"], **self.dataset_options 

276 ) 

277 

278 @property 

279 def features_group(self) -> MasterFeaturesGroup: 

280 """Convenience accessor for ``/Station/Features``.""" 

281 return MasterFeaturesGroup(self.hdf5_group["Features"], **self.dataset_options) 

282 

283 @property 

284 def survey_metadata(self) -> metadata.Survey: 

285 """Return survey metadata with this station appended.""" 

286 

287 meta_dict = read_attrs_to_dict( 

288 dict(self.hdf5_group.parent.parent.attrs), metadata.Survey() 

289 ) 

290 survey_metadata = metadata.Survey() 

291 survey_metadata.from_dict({"survey": meta_dict}) 

292 survey_metadata.add_station(self.metadata) 

293 return survey_metadata 

294 

295 @BaseGroup.metadata.getter 

296 def metadata(self) -> metadata.Station: 

297 """Station metadata enriched with run information.""" 

298 

299 if not self._has_read_metadata: 

300 self.read_metadata() 

301 self._has_read_metadata = True 

302 

303 for key in self.groups_list: 

304 if key.lower() in [name.lower() for name in self._default_subgroup_names]: 

305 continue 

306 try: 

307 key_group = self.get_run(key) 

308 if key_group.metadata.mth5_type.lower() in ["run"]: 

309 self._metadata.add_run(key_group.metadata) 

310 except MTH5Error: 

311 self.logger.warning(f"Could not find run {key}") 

312 return self._metadata 

313 

314 @property 

315 def name(self) -> str: 

316 return self.metadata.id 

317 

318 @name.setter 

319 def name(self, name: str) -> None: 

320 self.metadata.id = name 

321 

322 @property 

323 def run_summary(self) -> pd.DataFrame: 

324 """Return a summary of runs belonging to the station. 

325 

326 Returns 

327 ------- 

328 pandas.DataFrame 

329 Columns include ``id``, ``start``, ``end``, ``components``, 

330 ``measurement_type``, ``sample_rate``, and ``hdf5_reference``. 

331 

332 Notes 

333 ----- 

334 Channel lists stored as byte arrays or JSON strings are normalized 

335 before summarization. 

336 

337 Examples 

338 -------- 

339 >>> station.run_summary.head() # doctest: +SKIP 

340 """ 

341 

342 run_list = [] 

343 for key, group in self.hdf5_group.items(): 

344 if group.attrs["mth5_type"].lower() in ["run"]: 

345 # Helper function to handle both array and string cases 

346 def get_channel_list(attr_value): 

347 if hasattr(attr_value, "tolist"): 

348 # If it's an array, use tolist() 

349 return attr_value.tolist() 

350 elif isinstance(attr_value, str): 

351 # If it's a string, try to parse as JSON list 

352 try: 

353 import json 

354 

355 parsed = json.loads(attr_value) 

356 if isinstance(parsed, list): 

357 return parsed 

358 except (json.JSONDecodeError, ValueError): 

359 pass 

360 # If JSON parsing fails, treat as empty list 

361 return [] 

362 else: 

363 # For other types, convert to list if possible 

364 try: 

365 return list(attr_value) 

366 except (TypeError, ValueError): 

367 return [] 

368 

369 # Get channel lists, handling both string and array formats 

370 aux_channels = get_channel_list( 

371 group.attrs["channels_recorded_auxiliary"] 

372 ) 

373 elec_channels = get_channel_list( 

374 group.attrs["channels_recorded_electric"] 

375 ) 

376 mag_channels = get_channel_list( 

377 group.attrs["channels_recorded_magnetic"] 

378 ) 

379 

380 comps = ",".join( 

381 [ 

382 ii.decode() if isinstance(ii, bytes) else str(ii) 

383 for ii in aux_channels + elec_channels + mag_channels 

384 ] 

385 ) 

386 run_list.append( 

387 ( 

388 group.attrs["id"], 

389 group.attrs["time_period.start"].split("+")[0], 

390 group.attrs["time_period.end"].split("+")[0], 

391 comps, 

392 group.attrs["data_type"], 

393 group.attrs["sample_rate"], 

394 group.ref, 

395 ) 

396 ) 

397 run_summary = np.array( 

398 run_list, 

399 dtype=np.dtype( 

400 [ 

401 ("id", "U20"), 

402 ("start", "datetime64[ns]"), 

403 ("end", "datetime64[ns]"), 

404 ("components", "U100"), 

405 ("measurement_type", "U12"), 

406 ("sample_rate", float), 

407 ("hdf5_reference", h5py.ref_dtype), 

408 ] 

409 ), 

410 ) 

411 

412 return pd.DataFrame(run_summary) 

413 

414 def make_run_name(self, alphabet: bool = False) -> str | None: 

415 """Generate the next run name using an alphabetic or numeric suffix. 

416 

417 Parameters 

418 ---------- 

419 alphabet : bool, default False 

420 If ``True`` use letters (``a``, ``b``, ...); otherwise use 

421 numeric suffixes (``001``). 

422 

423 Returns 

424 ------- 

425 str or None 

426 Proposed run name or ``None`` if generation fails. 

427 

428 Examples 

429 -------- 

430 >>> station.metadata.id = "MT001" 

431 >>> station.make_run_name() 

432 'MT001a' 

433 """ 

434 

435 run_list = sorted( 

436 [group[-1:] for group in self.groups_list if self.name in group] 

437 ) 

438 

439 next_letter = None 

440 if len(run_list) == 0: 

441 if alphabet: 

442 next_letter = "a" 

443 else: 

444 next_letter = "001" 

445 else: 

446 try: 

447 next_letter = chr(ord(run_list[-1]) + 1) 

448 except TypeError: 

449 try: 

450 next_letter = f"{int(run_list[-1]) + 1}" 

451 except ValueError: 

452 self.logger.info("Could not create a new run name") 

453 return next_letter 

454 

455 def locate_run(self, sample_rate: float, start: str | MTime) -> pd.DataFrame | None: 

456 """Locate runs matching a sample rate and start time. 

457 

458 Parameters 

459 ---------- 

460 sample_rate : float 

461 Sample rate in samples per second. 

462 start : str or MTime 

463 Start time string or ``MTime`` instance. 

464 

465 Returns 

466 ------- 

467 pandas.DataFrame or None 

468 Matching rows from ``run_summary`` or ``None`` when no match exists. 

469 

470 Examples 

471 -------- 

472 >>> station.locate_run(256.0, "2020-01-01T00:00:00") # doctest: +SKIP 

473 """ 

474 

475 if not isinstance(start, MTime): 

476 start = MTime(time_stamp=start) 

477 

478 run_summary = self.run_summary.copy() 

479 if run_summary.size < 1: 

480 return None 

481 sr_find = run_summary[ 

482 (run_summary.sample_rate == sample_rate) & (run_summary.start == start) 

483 ] 

484 if sr_find.size < 1: 

485 return None 

486 return sr_find 

487 

488 def add_run( 

489 self, run_name: str, run_metadata: metadata.Run | None = None 

490 ) -> RunGroup: 

491 """Add a run under this station. 

492 

493 Parameters 

494 ---------- 

495 run_name : str 

496 Run identifier (for example ``id`` + suffix). 

497 run_metadata : mt_metadata.timeseries.Run, optional 

498 Metadata container to seed the run attributes. 

499 

500 Returns 

501 ------- 

502 RunGroup 

503 Wrapper for the created or existing run. 

504 

505 Examples 

506 -------- 

507 >>> run = station.add_run("MT001a") 

508 >>> run.metadata.id 

509 'MT001a' 

510 """ 

511 

512 return self._add_group( 

513 run_name, RunGroup, group_metadata=run_metadata, match="id" 

514 ) 

515 

516 def get_run(self, run_name: str) -> RunGroup: 

517 """Return a run by name. 

518 

519 Parameters 

520 ---------- 

521 run_name : str 

522 Existing run name. 

523 

524 Returns 

525 ------- 

526 RunGroup 

527 Wrapper for the requested run. 

528 

529 Raises 

530 ------ 

531 MTH5Error 

532 If the run does not exist. 

533 

534 Examples 

535 -------- 

536 >>> existing_run = station.get_run("MT001a") 

537 >>> existing_run.name 

538 'MT001a' 

539 """ 

540 

541 return self._get_group(run_name, RunGroup) 

542 

543 def remove_run(self, run_name: str) -> None: 

544 """Remove a run from this station. 

545 

546 Parameters 

547 ---------- 

548 run_name : str 

549 Existing run name. 

550 

551 Notes 

552 ----- 

553 Deleting removes the reference only; storage is not reclaimed. 

554 

555 Examples 

556 -------- 

557 >>> station.remove_run("MT001a") 

558 """ 

559 

560 self._remove_group(run_name) 

561 

562 def update_station_metadata(self) -> None: 

563 """Deprecated alias for :py:meth:`update_metadata`. 

564 

565 Raises 

566 ------ 

567 DeprecationWarning 

568 Always raised to direct callers to ``update_metadata``. 

569 

570 Examples 

571 -------- 

572 >>> station.update_station_metadata() # doctest: +ELLIPSIS 

573 Traceback (most recent call last): 

574 ... 

575 DeprecationWarning: 'update_station_metadata' has been deprecated use 'update_metadata()' 

576 """ 

577 raise DeprecationWarning( 

578 "'update_station_metadata' has been deprecated use 'update_metadata()'" 

579 ) 

580 

581 def update_metadata(self) -> None: 

582 """Synchronize station metadata from contained runs. 

583 

584 Notes 

585 ----- 

586 The station ``time_period`` is set to the min/max of all runs, and 

587 ``channels_recorded`` combines all recorded components. 

588 

589 Examples 

590 -------- 

591 >>> _ = station.update_metadata() 

592 >>> station.metadata.time_period.start # doctest: +SKIP 

593 '2020-01-01T00:00:00' 

594 """ 

595 

596 run_summary = self.run_summary.copy() 

597 self._metadata.time_period.start = run_summary.start.min().isoformat() 

598 self._metadata.time_period.end = run_summary.end.max().isoformat() 

599 self._metadata.channels_recorded = list( 

600 set(",".join(run_summary.components.to_list()).split(",")) 

601 ) 

602 

603 self.write_metadata()