Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ utils \ helpers.py: 68%

123 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# ============================================================================= 

2# Imports 

3# ============================================================================= 

4from __future__ import annotations 

5 

6import functools 

7import pathlib 

8from typing import Any, Callable, TypeVar 

9 

10from loguru import logger 

11 

12from mth5.helpers import close_open_files 

13from mth5.mth5 import MTH5 

14 

15 

16# ============================================================================= 

17# Module Documentation 

18# ============================================================================= 

19""" 

20MTH5 Utility Helper Functions. 

21 

22Provides decorators and utility functions for working with MTH5 objects, 

23including path/object conversion, file operations, and data validation. 

24 

25Notes 

26----- 

27Many functions use the `path_or_mth5_object` decorator to transparently 

28handle both file paths and MTH5 objects as input. 

29 

30Examples 

31-------- 

32Initialize and open an MTH5 file:: 

33 

34 >>> m = initialize_mth5('/path/to/file.mth5', mode='a') 

35 >>> m.close_mth5() 

36""" 

37 

38T = TypeVar("T") 

39 

40 

41# ============================================================================= 

42 

43 

44def path_or_mth5_object(func: Callable[..., T]) -> Callable[..., T]: 

45 """ 

46 Decorator allowing functions to accept MTH5 file paths or MTH5 objects. 

47 

48 Transparently converts file paths to MTH5 objects, opens the file, 

49 and passes the MTH5 object to the decorated function. 

50 

51 Parameters 

52 ---------- 

53 func : Callable 

54 A function that takes an MTH5 object as its first argument. 

55 Signature: func(mth5_obj: MTH5, *args, **kwargs) -> T 

56 

57 Returns 

58 ------- 

59 Callable 

60 Wrapped function accepting str/Path or MTH5 as first argument. 

61 

62 Raises 

63 ------ 

64 TypeError 

65 If first argument is not a string, pathlib.Path, or MTH5 object. 

66 

67 Notes 

68 ----- 

69 The decorated function can be called with either: 

70 - A file path string or pathlib.Path 

71 - An MTH5 object 

72 

73 When given a file path, the decorator automatically opens the file 

74 in 'append' mode by default, unless overridden in kwargs. 

75 

76 TODO: add support for file_version in kwargs 

77 

78 Examples 

79 -------- 

80 Decorate a function to work with both paths and objects:: 

81 

82 @path_or_mth5_object 

83 def get_metadata(m: MTH5) -> dict: 

84 return m.survey_group.metadata.to_dict() 

85 

86 # Call with file path 

87 metadata = get_metadata('/path/to/file.mth5') 

88 

89 # Call with MTH5 object 

90 with MTH5() as m: 

91 m.open_mth5('/path/to/file.mth5', mode='r') 

92 metadata = get_metadata(m) 

93 """ 

94 

95 @functools.wraps(func) 

96 def wrapper_decorator(*args: Any, **kwargs: Any) -> T: 

97 def call_function(func: Callable[..., T], *args: Any, **kwargs: Any) -> T: 

98 if isinstance(func, staticmethod): 

99 callable_func = func.__get__(None, object) 

100 result = callable_func(*args, **kwargs) 

101 else: 

102 result = func(*args, **kwargs) 

103 return result 

104 

105 if isinstance(args[0], (pathlib.Path, str)): 

106 h5_path = args[0] 

107 mode = kwargs.get("mode", "a") 

108 # with MTH5().open_mth5(h5_path, mode=mode) as m: 

109 with MTH5() as m: 

110 m.open_mth5(h5_path, mode=mode) 

111 new_args = [x for x in args] 

112 new_args[0] = m 

113 new_args = tuple(new_args) 

114 result = call_function(func, *new_args, **kwargs) 

115 

116 elif isinstance(args[0], MTH5): 

117 result = call_function(func, *args, **kwargs) 

118 else: 

119 msg = f"expected h5, got {type(args[0])}" 

120 logger.error(msg) 

121 raise TypeError(msg) 

122 

123 return result 

124 

125 return wrapper_decorator # type: ignore 

126 

127 

128@path_or_mth5_object 

129def get_version(m: str | pathlib.Path | MTH5) -> str: 

130 """ 

131 Get the file version from an MTH5 file. 

132 

133 Parameters 

134 ---------- 

135 m : str | pathlib.Path | MTH5 

136 Path to MTH5 file or MTH5 object. 

137 

138 Returns 

139 ------- 

140 str 

141 File version string (e.g., '0.1.0', '0.2.0'). 

142 

143 Examples 

144 -------- 

145 Get version from file path:: 

146 

147 >>> version = get_version('/path/to/file.mth5') 

148 >>> print(version) 

149 '0.2.0' 

150 

151 Get version from MTH5 object:: 

152 

153 >>> with MTH5() as m: 

154 ... m.open_mth5('/path/to/file.mth5') 

155 ... version = get_version(m) 

156 """ 

157 return m.file_version # type: ignore 

158 

159 

160@path_or_mth5_object 

161def get_channel_summary(m: str | pathlib.Path | MTH5, show: bool = True) -> Any: 

162 """ 

163 Get channel summary from MTH5 file as pandas DataFrame. 

164 

165 Retrieves the channel summary table and converts to DataFrame. 

166 Automatically re-summarizes if the summary appears incomplete. 

167 

168 Parameters 

169 ---------- 

170 m : str | pathlib.Path | MTH5 

171 Path to MTH5 file or MTH5 object. 

172 show : bool, default True 

173 Whether to log the summary DataFrame to console. 

174 

175 Returns 

176 ------- 

177 pandas.DataFrame 

178 Channel summary with station, run, and channel information. 

179 

180 Warnings 

181 -------- 

182 If the summary appears incomplete, the channel summary table is 

183 re-summarized which may take time for large files. 

184 

185 Examples 

186 -------- 

187 Get channel summary from file path:: 

188 

189 >>> df = get_channel_summary('/path/to/file.mth5') 

190 >>> print(df.shape) 

191 (42, 8) 

192 

193 Get summary without logging:: 

194 

195 >>> df = get_channel_summary('/path/to/file.mth5', show=False) 

196 """ 

197 logger.info(f"{m.filename} channel summary") # type: ignore 

198 df = m.channel_summary.to_dataframe() # type: ignore 

199 if len(df) <= 1: 

200 logger.warning("channel summary smaller than expected -- re-summarizing") 

201 m.channel_summary.summarize() # type: ignore 

202 df = m.channel_summary.to_dataframe() # type: ignore 

203 if show: 

204 logger.info(f"{df}") 

205 return df 

206 

207 

208@path_or_mth5_object 

209def add_filters( 

210 m: str | pathlib.Path | MTH5, 

211 filters_list: list[Any], 

212 survey_id: str = "", 

213) -> None: 

214 """ 

215 Add filter objects to MTH5 file. 

216 

217 Adds a list of filter objects to the MTH5 file's filter group. 

218 Automatically selects the appropriate filters group based on file version. 

219 

220 Parameters 

221 ---------- 

222 m : str | pathlib.Path | MTH5 

223 Path to MTH5 file or MTH5 object. 

224 filters_list : list 

225 List of filter objects to add. Each filter should have a 'name' 

226 attribute and be compatible with the filters group. 

227 survey_id : str, default '' 

228 Survey ID for file version 0.2.0. Required for version 0.2.0, 

229 ignored for version 0.1.0. 

230 

231 Raises 

232 ------ 

233 AttributeError 

234 If filter objects lack required attributes. 

235 ValueError 

236 If survey_id is not found in version 0.2.0 files. 

237 

238 Notes 

239 ----- 

240 File version 0.1.0 stores filters globally. 

241 File version 0.2.0 stores filters per survey. 

242 

243 Examples 

244 -------- 

245 Add filters to MTH5 file:: 

246 

247 >>> from mth5.timeseries import Filter 

248 >>> filters = [Filter(name='test_filter')] 

249 >>> add_filters('/path/to/file.mth5', filters) 

250 

251 Add survey-specific filters (version 0.2.0):: 

252 

253 >>> add_filters('/path/to/file.mth5', filters, survey_id='MT01') 

254 """ 

255 if m.file_version == "0.1.0": # type: ignore 

256 fg = m.filters_group # type: ignore 

257 assert fg is not None 

258 else: 

259 # m.file_version == "0.2.0": 

260 survey = m.get_survey(survey_id) # type: ignore 

261 fg = survey.filters_group 

262 

263 for filt3r in filters_list: 

264 if filt3r.name not in fg.filter_dict.keys(): # type: ignore 

265 fg.add_filter(filt3r) # type: ignore 

266 return 

267 

268 

269def initialize_mth5( 

270 h5_path: str | pathlib.Path, 

271 mode: str = "a", 

272 file_version: str = "0.1.0", 

273) -> MTH5: 

274 """ 

275 Initialize and open an MTH5 file for reading or writing. 

276 

277 Creates or opens an MTH5 file with specified file version. 

278 Optionally removes existing files before write operations. 

279 

280 Parameters 

281 ---------- 

282 h5_path : str | pathlib.Path 

283 Path to MTH5 file. Created if it doesn't exist. 

284 mode : {'r', 'w', 'a'}, default 'a' 

285 File access mode: 

286 - 'r': read-only 

287 - 'w': write (overwrites existing file) 

288 - 'a': append/read-write 

289 file_version : {'0.1.0', '0.2.0'}, default '0.1.0' 

290 MTH5 file format version. 

291 

292 Returns 

293 ------- 

294 MTH5 

295 Initialized and opened MTH5 object. 

296 

297 Warnings 

298 -------- 

299 When mode='w' and file exists, all open h5 files are closed before 

300 removal. This may affect other processes using HDF5 files. 

301 

302 Examples 

303 -------- 

304 Create a new MTH5 file:: 

305 

306 >>> m = initialize_mth5('/path/to/file.mth5', mode='w') 

307 >>> m.file_version 

308 '0.1.0' 

309 >>> m.close_mth5() 

310 

311 Open existing file for appending:: 

312 

313 >>> m = initialize_mth5('/path/to/file.mth5', mode='a') 

314 >>> m.add_station('MT001') 

315 >>> m.close_mth5() 

316 

317 Open file with version 0.2.0 schema:: 

318 

319 >>> m = initialize_mth5('/path/to/file.mth5', file_version='0.2.0') 

320 """ 

321 h5_path = pathlib.Path(h5_path) 

322 if mode == "w": 

323 if h5_path.exists(): 

324 msg = f"File {h5_path} exists, removing from file system." 

325 msg = f"{msg}\n closing all open h5 files before removal" 

326 logger.warning(f"{msg}") 

327 close_open_files() 

328 h5_path.unlink() 

329 mth5_obj = MTH5(file_version=file_version) 

330 mth5_obj.open_mth5(str(h5_path), mode=mode) 

331 

332 return mth5_obj 

333 

334 

335def read_back_data( 

336 mth5_path: str | pathlib.Path, 

337 station_id: str, 

338 run_id: str, 

339 survey: str | None = None, 

340 close_mth5: bool = True, 

341 return_objects: list[str] | None = None, 

342) -> dict[str, Any]: 

343 """ 

344 Read station/run data from MTH5 file for testing and validation. 

345 

346 Helper function to confirm MTH5 file accessibility and validate 

347 that data dimensions match expectations. 

348 

349 Parameters 

350 ---------- 

351 mth5_path : str | pathlib.Path 

352 Full path to MTH5 file to read. 

353 station_id : str 

354 Station identifier (e.g., 'PKD', 'MT001'). 

355 run_id : str 

356 Run identifier (e.g., '001', '1'). 

357 survey : str, optional 

358 Survey identifier. Required for file version 0.2.0. 

359 close_mth5 : bool, default True 

360 Whether to close MTH5 object after reading. 

361 Set to False if you need to access the object later. 

362 return_objects : list of str, optional 

363 Specifies what objects to return. Options: 

364 - 'run': RunGroup object 

365 - 'run_ts': RunTS time series object 

366 If None, returns empty dict with only mth5_obj if close_mth5=False. 

367 

368 Returns 

369 ------- 

370 dict 

371 Dictionary containing requested objects: 

372 - 'run': RunGroup (if 'run' in return_objects) 

373 - 'run_ts': RunTS (if 'run_ts' in return_objects) 

374 - 'mth5_obj': MTH5 (if close_mth5=False) 

375 

376 Warnings 

377 -------- 

378 If close_mth5=False, the MTH5 object must be manually closed 

379 to avoid resource leaks. 

380 

381 Notes 

382 ----- 

383 This is primarily a testing utility. Data shape is logged to console. 

384 

385 Examples 

386 -------- 

387 Read run data and close immediately:: 

388 

389 >>> result = read_back_data( 

390 ... '/path/to/file.mth5', 

391 ... 'PKD', 

392 ... '001', 

393 ... return_objects=['run_ts'] 

394 ... ) 

395 >>> ts = result['run_ts'] 

396 >>> print(ts.dataset.shape) 

397 

398 Read data and keep MTH5 object open:: 

399 

400 >>> result = read_back_data( 

401 ... '/path/to/file.mth5', 

402 ... 'MT001', 

403 ... '1', 

404 ... survey='survey_01', 

405 ... close_mth5=False, 

406 ... return_objects=['run', 'run_ts'] 

407 ... ) 

408 >>> run = result['run'] 

409 >>> m = result['mth5_obj'] 

410 >>> # ... use objects ... 

411 >>> m.close_mth5() 

412 

413 TODO: add path_or_mth5_decorator to this function 

414 """ 

415 if return_objects is None: 

416 return_objects = [] 

417 processing_config: dict[str, Any] = {} 

418 processing_config["mth5_path"] = str(mth5_path) 

419 processing_config["local_station_id"] = station_id 

420 config = processing_config 

421 m = initialize_mth5(config["mth5_path"], mode="r") 

422 local_run_obj = m.get_run(config["local_station_id"], run_id, survey=survey) 

423 local_run_ts = local_run_obj.to_runts() 

424 data_array = local_run_ts.dataset.to_array() 

425 logger.info(f"data shape = {data_array.shape}") 

426 

427 return_dict: dict[str, Any] = {} 

428 if "run" in return_objects: 

429 return_dict["run"] = local_run_obj 

430 if "run_ts" in return_objects: 

431 return_dict["run_ts"] = local_run_ts 

432 if close_mth5: 

433 m.close_mth5() 

434 else: 

435 return_dict["mth5_obj"] = m 

436 return return_dict 

437 

438 

439def get_compare_dict(input_dict: dict[str, Any]) -> dict[str, Any]: 

440 """ 

441 Remove MTH5-specific metadata attributes for comparison. 

442 

443 Removes internal attributes added by MTH5 that may interfere 

444 with dictionary comparisons between metadata objects. 

445 

446 Parameters 

447 ---------- 

448 input_dict : dict 

449 Dictionary to clean, typically metadata dictionary. 

450 

451 Returns 

452 ------- 

453 dict 

454 Dictionary with MTH5 internal attributes removed. 

455 Original dict is modified in-place. 

456 

457 Notes 

458 ----- 

459 Removed attributes: 

460 - hdf5_reference: HDF5 object reference (internal) 

461 - mth5_type: MTH5 data type marker (internal) 

462 

463 Examples 

464 -------- 

465 Clean metadata dictionary before comparison:: 

466 

467 >>> metadata = { 

468 ... 'id': 'station_001', 

469 ... 'latitude': 45.5, 

470 ... 'hdf5_reference': <h5py reference>, 

471 ... 'mth5_type': 'Station' 

472 ... } 

473 >>> clean = get_compare_dict(metadata) 

474 >>> print(clean) 

475 {'id': 'station_001', 'latitude': 45.5} 

476 

477 Safe to call with incomplete dicts:: 

478 

479 >>> metadata = {'id': 'station_001'} 

480 >>> clean = get_compare_dict(metadata) # No error if keys absent 

481 """ 

482 for key in ["hdf5_reference", "mth5_type"]: 

483 try: 

484 input_dict.pop(key) 

485 except KeyError: 

486 pass 

487 

488 return input_dict 

489 

490 

491@path_or_mth5_object 

492def station_in_mth5( 

493 m: str | pathlib.Path | MTH5, 

494 station_id: str, 

495 survey_id: str | None = None, 

496) -> bool: 

497 """ 

498 Check if a station exists in MTH5 file. 

499 

500 Determines whether a station with the given ID is present 

501 in the MTH5 file using the groups list. 

502 

503 Parameters 

504 ---------- 

505 m : str | pathlib.Path | MTH5 

506 Path to MTH5 file or MTH5 object. 

507 station_id : str 

508 Station identifier (e.g., 'PKD', 'MT001'). 

509 survey_id : str, optional 

510 Survey identifier. Required for file version 0.2.0, 

511 ignored for version 0.1.0. 

512 

513 Returns 

514 ------- 

515 bool 

516 True if station exists, False otherwise. 

517 

518 Raises 

519 ------ 

520 NotImplementedError 

521 If file version is not 0.1.0 or 0.2.0. 

522 

523 Notes 

524 ----- 

525 File version 0.1.0 has global stations group. 

526 File version 0.2.0 has per-survey stations groups. 

527 

528 Alternative method: Use channel_summary DataFrame:: 

529 

530 df = m.channel_summary.to_dataframe() 

531 station_exists = station_id in df['Station'].unique() 

532 

533 Examples 

534 -------- 

535 Check if station exists (file version 0.1.0):: 

536 

537 >>> exists = station_in_mth5('/path/to/file.mth5', 'PKD') 

538 >>> print(exists) 

539 True 

540 

541 Check in version 0.2.0 with survey ID:: 

542 

543 >>> exists = station_in_mth5( 

544 ... '/path/to/file.mth5', 

545 ... 'MT001', 

546 ... survey_id='survey_01' 

547 ... ) 

548 """ 

549 file_version = m.file_version # type: ignore # decorated by path_or_mth5_object 

550 if file_version == "0.1.0": 

551 station_exists = station_id in m.stations_group.groups_list # type: ignore # decorated by path_or_mth5_object 

552 elif file_version == "0.2.0": 

553 survey = m.get_survey(survey_id) # type: ignore # decorated by path_or_mth5_object 

554 station_exists = station_id in survey.stations_group.groups_list 

555 else: 

556 msg = f"MTH5 file_version {file_version} not understood" 

557 logger.error(msg) 

558 raise NotImplementedError(msg) 

559 return station_exists 

560 

561 

562@path_or_mth5_object 

563def survey_in_mth5(m: str | pathlib.Path | MTH5, survey_id: str | None = None) -> bool: 

564 """ 

565 Check if a survey exists in MTH5 file. 

566 

567 Determines whether a survey with the given ID exists in the MTH5 file. 

568 Behavior varies by file version: 0.1.0 has a single survey, while 

569 0.2.0 supports multiple surveys. 

570 

571 Parameters 

572 ---------- 

573 m : str | pathlib.Path | MTH5 

574 Path to MTH5 file or MTH5 object. 

575 survey_id : str, optional 

576 Survey identifier. For file version 0.1.0, compared against the 

577 global survey ID. For version 0.2.0, checked in surveys group. 

578 

579 Returns 

580 ------- 

581 bool 

582 True if survey exists, False otherwise. 

583 

584 Raises 

585 ------ 

586 NotImplementedError 

587 If file version is not 0.1.0 or 0.2.0. 

588 

589 Notes 

590 ----- 

591 File version 0.1.0 has a single survey with fixed ID. 

592 File version 0.2.0 supports multiple named surveys. 

593 

594 Alternative method: Use channel_summary DataFrame:: 

595 

596 df = m.channel_summary.to_dataframe() 

597 surveys = df['Survey'].unique() 

598 survey_exists = survey_id in surveys 

599 

600 Examples 

601 -------- 

602 Check if survey exists (file version 0.1.0):: 

603 

604 >>> exists = survey_in_mth5('/path/to/file.mth5', 'survey_01') 

605 >>> print(exists) 

606 True 

607 

608 Check in version 0.2.0:: 

609 

610 >>> exists = survey_in_mth5('/path/to/file.mth5', survey_id='MT') 

611 >>> if exists: 

612 ... print(f"Survey MT found in file") 

613 """ 

614 file_version = m.file_version # type: ignore # decorated by path_or_mth5_object 

615 if file_version == "0.1.0": 

616 survey_metadata = m.survey_group.metadata # type: ignore 

617 survey_exists = survey_metadata.id == survey_id # type: ignore 

618 elif file_version == "0.2.0": 

619 survey_exists = survey_id in m.surveys_group.groups_list # type: ignore 

620 else: 

621 msg = f"MTH5 file_version {file_version} not understood" 

622 logger.error(msg) 

623 raise NotImplementedError(msg) 

624 return survey_exists