Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ run.py: 78%

270 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Created on Sat May 27 09:59:03 2023 

4 

5@author: jpeacock 

6""" 

7 

8# ============================================================================= 

9# Imports 

10# ============================================================================= 

11from __future__ import annotations 

12 

13import inspect 

14from typing import Any, Optional 

15 

16import h5py 

17import numpy as np 

18import pandas as pd 

19from mt_metadata import timeseries as metadata 

20 

21from mth5 import CHUNK_SIZE 

22from mth5.groups import ( 

23 AuxiliaryDataset, 

24 BaseGroup, 

25 ChannelDataset, 

26 ElectricDataset, 

27 MagneticDataset, 

28) 

29from mth5.helpers import read_attrs_to_dict, to_numpy_type, validate_name 

30from mth5.timeseries import ChannelTS, RunTS 

31from mth5.utils.exceptions import MTH5Error 

32 

33 

34meta_classes = dict(inspect.getmembers(metadata, inspect.isclass)) 

35# ============================================================================= 

36 

37 

38# ============================================================================= 

39# Run Group 

40# ============================================================================= 

41class RunGroup(BaseGroup): 

42 """ 

43 Container for a single MT measurement run with multiple channels. 

44 

45 Manages time series data and metadata for one measurement run within a station. 

46 A run can contain multiple channels of electric, magnetic, and auxiliary data. 

47 This class provides methods to add, retrieve, and manage individual channels, 

48 along with convenient access to station and survey metadata. 

49 

50 The run group is located at ``/Survey/Stations/{station_name}/{run_name}`` in 

51 the HDF5 file hierarchy. 

52 

53 Attributes 

54 ---------- 

55 metadata : mt_metadata.timeseries.Run 

56 Run metadata including sample rate, time period, and channel information. 

57 channel_summary : pd.DataFrame 

58 Summary table of all channels in the run. 

59 groups_list : list[str] 

60 List of channel names in the run. 

61 

62 Parameters 

63 ---------- 

64 group : h5py.Group 

65 HDF5 group for the run, should have path like 

66 ``/Survey/Stations/{station_name}/{run_name}`` 

67 run_metadata : mt_metadata.timeseries.Run, optional 

68 Metadata container for the run. Default is None. 

69 **kwargs : Any 

70 Additional keyword arguments passed to BaseGroup. 

71 

72 Notes 

73 ----- 

74 Key behaviors: 

75 

76 - Channels can be of type: electric, magnetic, or auxiliary 

77 - All metadata updates should use the metadata object for validation 

78 - Call write_metadata() after modifying metadata to persist changes 

79 - Channel metadata is cached for performance during repeated access 

80 - Deleting a channel removes the reference but doesn't reduce file size 

81 

82 Examples 

83 -------- 

84 Access run from an open MTH5 file: 

85 

86 >>> from mth5 import mth5 

87 >>> mth5_obj = mth5.MTH5() 

88 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a') 

89 >>> run = mth5_obj.stations_group.get_station('MT001').get_run('MT001a') 

90 

91 Check available channels: 

92 

93 >>> run.groups_list 

94 ['Ex', 'Ey', 'Hx', 'Hy'] 

95 

96 Access HDF5 group directly: 

97 

98 >>> run.hdf5_group.ref 

99 <HDF5 Group Reference> 

100 

101 Update metadata and persist to file: 

102 

103 >>> run.metadata.sample_rate = 512.0 

104 >>> run.write_metadata() 

105 

106 Add a channel: 

107 

108 >>> import numpy as np 

109 >>> data = np.random.rand(4096) 

110 >>> ex = run.add_channel('Ex', 'electric', data=data) 

111 

112 This class provides methods to add and get channels. A summary table of 

113 all existing channels in the run is also provided as a convenience look up 

114 table to make searching easier. 

115 

116 :param group: HDF5 group for a station, should have a path 

117 ``/Survey/Stations/station_name/run_name`` 

118 :type group: :class:`h5py.Group` 

119 :param station_metadata: metadata container, defaults to None 

120 :type station_metadata: :class:`mth5.metadata.Station`, optional 

121 

122 :Access RunGroup from an open MTH5 file: 

123 

124 >>> from mth5 import mth5 

125 >>> mth5_obj = mth5.MTH5() 

126 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a') 

127 >>> run = mth5_obj.stations_group.get_station('MT001').get_run('MT001a') 

128 

129 :Check what channels exist: 

130 

131 >>> station.groups_list 

132 ['Ex', 'Ey', 'Hx', 'Hy'] 

133 

134 To access the hdf5 group directly use `RunGroup.hdf5_group` 

135 

136 >>> station.hdf5_group.ref 

137 <HDF5 Group Reference> 

138 

139 .. note:: All attributes should be input into the metadata object, that 

140 way all input will be validated against the metadata standards. 

141 If you change attributes in metadata object, you should run the 

142 `SurveyGroup.write_metadata()` method. This is a temporary 

143 solution, working on an automatic updater if metadata is changed. 

144 

145 >>> run.metadata.existing_attribute = 'update_existing_attribute' 

146 >>> run.write_metadata() 

147 

148 If you want to add a new attribute this should be done using the 

149 `metadata.add_base_attribute` method. 

150 

151 >>> station.metadata.add_base_attribute('new_attribute', 

152 >>> ... 'new_attribute_value', 

153 >>> ... {'type':str, 

154 >>> ... 'required':True, 

155 >>> ... 'style':'free form', 

156 >>> ... 'description': 'new attribute desc.', 

157 >>> ... 'units':None, 

158 >>> ... 'options':[], 

159 >>> ... 'alias':[], 

160 >>> ... 'example':'new attribute 

161 

162 :Add a channel: 

163 

164 >>> new_channel = run.add_channel('Ex', 'electric', 

165 >>> ... data=numpy.random.rand(4096)) 

166 >>> new_run 

167 /Survey/Stations/MT001/MT001a: 

168 ======================================= 

169 --> Dataset: summary 

170 ...................... 

171 --> Dataset: Ex 

172 ...................... 

173 --> Dataset: Ey 

174 ...................... 

175 --> Dataset: Hx 

176 ...................... 

177 --> Dataset: Hy 

178 ...................... 

179 

180 

181 :Add a channel with metadata: 

182 

183 >>> from mth5.metadata import Electric 

184 >>> ex_metadata = Electric() 

185 >>> ex_metadata.time_period.start = '2020-01-01T12:30:00' 

186 >>> ex_metadata.time_period.end = '2020-01-03T16:30:00' 

187 >>> new_ex = run.add_channel('Ex', 'electric', 

188 >>> ... channel_metadata=ex_metadata) 

189 >>> # to look at the metadata 

190 >>> new_ex.metadata 

191 { 

192 "electric": { 

193 "ac.end": 1.2, 

194 "ac.start": 2.3, 

195 ... 

196 } 

197 } 

198 

199 

200 .. seealso:: `mth5.metadata` for details on how to add metadata from 

201 various files and python objects. 

202 

203 :Remove a channel: 

204 

205 >>> run.remove_channel('Ex') 

206 >>> station 

207 /Survey/Stations/MT001/MT001a: 

208 ======================================= 

209 --> Dataset: summary 

210 ...................... 

211 --> Dataset: Ey 

212 ...................... 

213 --> Dataset: Hx 

214 ...................... 

215 --> Dataset: Hy 

216 ...................... 

217 

218 .. note:: Deleting a station is not as simple as del(station). In HDF5 

219 this does not free up memory, it simply removes the reference 

220 to that station. The common way to get around this is to 

221 copy what you want into a new file, or overwrite the station. 

222 

223 :Get a channel: 

224 

225 >>> existing_ex = stations.get_channel('Ex') 

226 >>> existing_ex 

227 Channel Electric: 

228 ------------------- 

229 data type: Ex 

230 data type: electric 

231 data format: float32 

232 data shape: (4096,) 

233 start: 1980-01-01T00:00:00+00:00 

234 end: 1980-01-01T00:32:+08:00 

235 sample rate: 8 

236 

237 

238 :summary Table: 

239 

240 A summary table is provided to make searching easier. The table 

241 summarized all stations within a survey. To see what names are in the 

242 summary table: 

243 

244 >>> run.summary_table.dtype.descr 

245 [('component', ('|S5', {'h5py_encoding': 'ascii'})), 

246 ('start', ('|S32', {'h5py_encoding': 'ascii'})), 

247 ('end', ('|S32', {'h5py_encoding': 'ascii'})), 

248 ('n_samples', '<i4'), 

249 ('measurement_type', ('|S12', {'h5py_encoding': 'ascii'})), 

250 ('units', ('|S25', {'h5py_encoding': 'ascii'})), 

251 ('hdf5_reference', ('|O', {'ref': h5py.h5r.Reference}))] 

252 

253 

254 .. note:: When a run is added an entry is added to the summary table, 

255 where the information is pulled from the metadata. 

256 

257 >>> new_run.summary_table 

258 index | component | start | end | n_samples | measurement_type | units | 

259 hdf5_reference 

260 -------------------------------------------------------------------------- 

261 ------------- 

262 """ 

263 

264 def __init__( 

265 self, 

266 group: h5py.Group, 

267 run_metadata: Optional[metadata.Run] = None, 

268 **kwargs: Any, 

269 ) -> None: 

270 """ 

271 Initialize RunGroup. 

272 

273 Parameters 

274 ---------- 

275 group : h5py.Group 

276 HDF5 group for the run. 

277 run_metadata : mt_metadata.timeseries.Run, optional 

278 Metadata container for the run. Default is None. 

279 **kwargs : Any 

280 Additional keyword arguments passed to BaseGroup. 

281 """ 

282 self._non_channel_groups = ["Features"] 

283 super().__init__(group, group_metadata=run_metadata, **kwargs) 

284 # Channel metadata cache to share objects between add_channel and metadata property 

285 self._channel_metadata_cache: dict[ 

286 str, metadata.Electric | metadata.Magnetic | metadata.Auxiliary 

287 ] = {} 

288 

289 @property 

290 def station_metadata(self) -> metadata.Station: 

291 """ 

292 Get station metadata with current run included. 

293 

294 Returns 

295 ------- 

296 metadata.Station 

297 Station metadata object containing this run's information. 

298 

299 Examples 

300 -------- 

301 >>> from mth5 import mth5 

302 >>> mth5_obj = mth5.MTH5() 

303 >>> mth5_obj.open_mth5("example.h5", mode='r') 

304 >>> run = mth5_obj.get_run("MT001", "MT001a") 

305 >>> station_meta = run.station_metadata 

306 >>> print(station_meta.id) 

307 MT001 

308 """ 

309 meta_dict = dict(self.hdf5_group.parent.attrs) 

310 meta_dict["run_list"] = [self.metadata.id] 

311 station_metadata = metadata.Station() 

312 meta_dict = read_attrs_to_dict(meta_dict, metadata.Station()) 

313 station_metadata.from_dict({"station": meta_dict}) 

314 station_metadata.add_run(self.metadata) 

315 

316 return station_metadata 

317 

318 @property 

319 def survey_metadata(self) -> metadata.Survey: 

320 """ 

321 Get survey metadata with current station and run included. 

322 

323 Returns 

324 ------- 

325 metadata.Survey 

326 Survey metadata object containing the full hierarchy. 

327 

328 Examples 

329 -------- 

330 >>> from mth5 import mth5 

331 >>> mth5_obj = mth5.MTH5() 

332 >>> mth5_obj.open_mth5("example.h5", mode='r') 

333 >>> run = mth5_obj.get_run("MT001", "MT001a") 

334 >>> survey_meta = run.survey_metadata 

335 >>> print(survey_meta.id) 

336 CONUS_South 

337 """ 

338 meta_dict = read_attrs_to_dict( 

339 dict(self.hdf5_group.parent.parent.parent.attrs), metadata.Survey() 

340 ) 

341 survey_metadata = metadata.Survey() 

342 survey_metadata.from_dict({"survey": meta_dict}) 

343 survey_metadata.add_station(self.station_metadata) 

344 return survey_metadata 

345 

346 def _read_channel_metadata_from_hdf5( 

347 self, channel_name: str 

348 ) -> metadata.Electric | metadata.Magnetic | metadata.Auxiliary: 

349 """ 

350 Read channel metadata from HDF5 and return metadata object. 

351 

352 Parameters 

353 ---------- 

354 channel_name : str 

355 Name of the channel to read metadata for. 

356 

357 Returns 

358 ------- 

359 metadata.Electric | metadata.Magnetic | metadata.Auxiliary 

360 Channel metadata object of appropriate type. 

361 

362 Examples 

363 -------- 

364 >>> run = mth5_obj.get_run("MT001", "MT001a") 

365 >>> ex_meta = run._read_channel_metadata_from_hdf5("ex") 

366 >>> print(ex_meta.type) 

367 electric 

368 """ 

369 meta_dict = dict(self.hdf5_group[channel_name].attrs) 

370 meta_dict = read_attrs_to_dict( 

371 meta_dict, 

372 meta_classes[meta_dict["type"].capitalize()](), 

373 ) 

374 ch_metadata = meta_classes[meta_dict["type"].capitalize()]() 

375 ch_metadata.from_dict(meta_dict) 

376 return ch_metadata 

377 

378 def recache_channel_metadata(self) -> None: 

379 """ 

380 Clear and rebuild the channel metadata cache from current HDF5 data. 

381 

382 This method reads all channel metadata from HDF5 storage and updates 

383 the internal cache. Useful when channel metadata has been modified 

384 externally or needs to be synchronized. 

385 

386 Examples 

387 -------- 

388 >>> run = mth5_obj.get_run("MT001", "MT001a") 

389 >>> run.recache_channel_metadata() 

390 >>> # Cache is now synchronized with HDF5 storage 

391 """ 

392 self._channel_metadata_cache = {} 

393 for ch in self.groups_list: 

394 if ch in self._non_channel_groups: 

395 continue 

396 ch_metadata = self._read_channel_metadata_from_hdf5(ch) 

397 self._channel_metadata_cache[ch] = ch_metadata 

398 

399 @BaseGroup.metadata.getter 

400 def metadata(self) -> metadata.Run: 

401 """ 

402 Get run metadata including all channel information. 

403 

404 This property dynamically reads and caches channel metadata from HDF5, 

405 ensuring the run metadata always reflects the current state of channels. 

406 

407 Returns 

408 ------- 

409 metadata.Run 

410 Run metadata object with all channels included. 

411 

412 Examples 

413 -------- 

414 >>> run = mth5_obj.get_run("MT001", "MT001a") 

415 >>> run_meta = run.metadata 

416 >>> print(run_meta.channels_recorded_electric) 

417 ['ex', 'ey'] 

418 >>> print(run_meta.sample_rate) 

419 256.0 

420 """ 

421 if not self._has_read_metadata: 

422 self.read_metadata() 

423 self._has_read_metadata = True 

424 

425 if len(self._metadata.channels) > 0: 

426 if ( 

427 self._metadata.time_period.start 

428 != self._metadata.channels[0].time_period.start 

429 ) or ( 

430 self._metadata.time_period.end 

431 != self._metadata.channels[0].time_period.end 

432 ): 

433 self.recache_channel_metadata() 

434 

435 # Clear and rebuild the channels list 

436 self._metadata._empty_channels_recorded() 

437 self._metadata.channels = [] 

438 

439 for ch in self.groups_list: 

440 if ch in self._non_channel_groups: 

441 continue 

442 if ch in self._channel_metadata_cache: 

443 # Reuse cached metadata to prevent duplicate processing 

444 cached_metadata = self._channel_metadata_cache[ch] 

445 self._metadata.add_channel(cached_metadata) 

446 else: 

447 # Create new metadata if not cached 

448 ch_metadata = self._read_channel_metadata_from_hdf5(ch) 

449 # Cache the metadata for future use 

450 self._channel_metadata_cache[ch] = ch_metadata 

451 self._metadata.add_channel(ch_metadata) 

452 

453 # Only rebuild channels if they haven't been built yet or if the group list has changed 

454 if not self._metadata.channels or len(self._metadata.channels) != len( 

455 self.groups_list 

456 ): 

457 # Get current channel names from the groups and existing channels 

458 current_group_names = set(self.groups_list) 

459 existing_channel_names = set(ch.component for ch in self._metadata.channels) 

460 

461 # Only rebuild if there's actually a difference in the channel sets 

462 if current_group_names != existing_channel_names: 

463 # Clear and rebuild the channels list 

464 self._metadata._empty_channels_recorded() 

465 self._metadata.channels = [] 

466 

467 # List of known non-channel subgroups to skip 

468 for ch in self.groups_list: 

469 # Skip non-channel groups 

470 if ch in self._non_channel_groups: 

471 continue 

472 if ch in self._channel_metadata_cache: 

473 # Reuse cached metadata to prevent duplicate processing 

474 cached_metadata = self._channel_metadata_cache[ch] 

475 self._metadata.add_channel(cached_metadata) 

476 else: 

477 # Create new metadata if not cached 

478 ch_metadata = self._read_channel_metadata_from_hdf5(ch) 

479 # Cache the metadata for future use 

480 self._channel_metadata_cache[ch] = ch_metadata 

481 self._metadata.add_channel(ch_metadata) 

482 # If channel sets are identical, skip rebuilding to prevent duplicates 

483 self._metadata.hdf5_reference = self.hdf5_group.ref 

484 return self._metadata 

485 

486 @property 

487 def channel_summary(self) -> pd.DataFrame: 

488 """ 

489 Get summary of all channels in the run as a DataFrame. 

490 

491 Returns 

492 ------- 

493 pandas.DataFrame 

494 DataFrame with columns: component, start, end, n_samples, 

495 sample_rate, measurement_type, units, hdf5_reference. 

496 

497 Examples 

498 -------- 

499 >>> run = mth5_obj.get_run("MT001", "MT001a") 

500 >>> summary = run.channel_summary 

501 >>> print(summary[['component', 'sample_rate', 'n_samples']]) 

502 component sample_rate n_samples 

503 0 ex 256.0 65536 

504 1 ey 256.0 65536 

505 2 hx 256.0 65536 

506 3 hy 256.0 65536 

507 """ 

508 ch_list = [] 

509 for key, group in self.hdf5_group.items(): 

510 try: 

511 ch_type = group.attrs["type"] 

512 if ch_type in ["electric", "magnetic", "auxiliary"]: 

513 ch_list.append( 

514 ( 

515 group.attrs["component"], 

516 group.attrs["time_period.start"].split("+")[0], 

517 group.attrs["time_period.end"].split("+")[0], 

518 group.size, 

519 group.attrs["sample_rate"], 

520 group.attrs["type"], 

521 group.attrs["units"], 

522 group.ref, 

523 ) 

524 ) 

525 except KeyError: 

526 pass 

527 ch_summary = np.array( 

528 ch_list, 

529 dtype=np.dtype( 

530 [ 

531 ("component", "U20"), 

532 ("start", "datetime64[ns]"), 

533 ("end", "datetime64[ns]"), 

534 ("n_samples", int), 

535 ("sample_rate", float), 

536 ("measurement_type", "U12"), 

537 ("units", "U25"), 

538 ("hdf5_reference", h5py.ref_dtype), 

539 ] 

540 ), 

541 ) 

542 

543 return pd.DataFrame(ch_summary) 

544 

545 def write_metadata(self) -> None: 

546 """ 

547 Write run metadata to HDF5 attributes. 

548 

549 Converts metadata object to dictionary and writes all attributes 

550 to the HDF5 group. 

551 

552 Examples 

553 -------- 

554 >>> run = mth5_obj.get_run("MT001", "MT001a") 

555 >>> run.metadata.sample_rate = 512.0 

556 >>> run.write_metadata() 

557 >>> # Metadata is now persisted to HDF5 file 

558 """ 

559 for key, value in self.metadata.to_dict(single=True).items(): 

560 value = to_numpy_type(value) 

561 self.hdf5_group.attrs.create(key, value) 

562 

563 def add_channel( 

564 self, 

565 channel_name, 

566 channel_type, 

567 data, 

568 channel_dtype="int32", 

569 shape=None, 

570 max_shape=(None,), 

571 chunks=True, 

572 channel_metadata=None, 

573 **kwargs, 

574 ): 

575 """ 

576 Add a channel to the run. 

577 

578 Parameters 

579 ---------- 

580 channel_name : str 

581 Name of the channel (e.g., 'ex', 'ey', 'hx', 'hy', 'hz'). 

582 channel_type : str 

583 Type of channel: 'electric', 'magnetic', or 'auxiliary'. 

584 data : numpy.ndarray or None 

585 Time series data for the channel. If None, an empty resizable 

586 dataset will be created. 

587 channel_dtype : str, optional 

588 Data type for the channel if data is None, by default "int32". 

589 shape : tuple of int, optional 

590 Initial shape of the dataset. If None and data is None, shape 

591 is estimated from metadata or set to (1,), by default None. 

592 max_shape : tuple of int or None, optional 

593 Maximum shape the dataset can be resized to. Use None for 

594 unlimited growth in that dimension, by default (None,). 

595 chunks : bool or int, optional 

596 Enable chunked storage. If True, uses automatic chunking. 

597 If int, uses that chunk size, by default True. 

598 channel_metadata : mt_metadata.timeseries.Electric, Magnetic, or Auxiliary, optional 

599 Metadata object for the channel, by default None. 

600 **kwargs : dict 

601 Additional keyword arguments. 

602 

603 Returns 

604 ------- 

605 ElectricDataset or MagneticDataset or AuxiliaryDataset 

606 The created channel dataset object. 

607 

608 Raises 

609 ------ 

610 MTH5Error 

611 If channel_type is not one of: electric, magnetic, auxiliary. 

612 

613 Examples 

614 -------- 

615 Add a channel with data: 

616 

617 >>> import numpy as np 

618 >>> from mth5 import mth5 

619 >>> mth5_obj = mth5.MTH5() 

620 >>> mth5_obj.open_mth5("example.h5", mode='a') 

621 >>> run = mth5_obj.get_run("MT001", "MT001a") 

622 >>> data = np.random.rand(4096) 

623 >>> ex = run.add_channel('ex', 'electric', data) 

624 >>> print(ex.metadata.component) 

625 ex 

626 

627 Add a channel with metadata: 

628 

629 >>> from mt_metadata.timeseries import Electric 

630 >>> ex_meta = Electric() 

631 >>> ex_meta.time_period.start = '2020-01-01T12:30:00' 

632 >>> ex_meta.sample_rate = 256.0 

633 >>> ex = run.add_channel('ex', 'electric', None, 

634 ... channel_metadata=ex_meta) 

635 >>> print(ex.metadata.sample_rate) 

636 256.0 

637 

638 Add a channel with custom shape: 

639 

640 >>> ex = run.add_channel('ex', 'electric', None, 

641 ... shape=(8192,), channel_dtype='float32') 

642 >>> print(ex.hdf5_dataset.shape) 

643 (8192,) 

644 """ 

645 channel_name = validate_name(channel_name.lower()) 

646 estimate_size = (1,) 

647 for key, value in kwargs.items(): 

648 setattr(self, key, value) 

649 if data is not None: 

650 if data.size < 1024: 

651 chunks = None 

652 try: 

653 if data is not None: 

654 channel_group = self.hdf5_group.create_dataset( 

655 channel_name, 

656 data=data, 

657 dtype=data.dtype, 

658 chunks=chunks, 

659 maxshape=max_shape, 

660 **self.dataset_options, 

661 ) 

662 # initialize a resizable data array 

663 # need to set the chunk size to something useful, if the chunk 

664 # size is 1 this causes performance issues and bloating of the 

665 # hdf5 file. Set to 8196 for now. 

666 else: 

667 if shape is None: 

668 if channel_metadata is not None: 

669 # can estimate a size, this will help with allocating 

670 # and set the chunk size to a realistic value 

671 if ( 

672 channel_metadata.time_period.start 

673 != channel_metadata.time_period.end 

674 ): 

675 if channel_metadata.sample_rate > 0: 

676 estimate_size = ( 

677 int( 

678 ( 

679 channel_metadata.time_period.end 

680 - channel_metadata.time_period.start 

681 ) 

682 * channel_metadata.sample_rate 

683 ), 

684 ) 

685 else: 

686 estimate_size = (1,) 

687 chunks = CHUNK_SIZE 

688 else: 

689 estimate_size = (1,) 

690 chunks = CHUNK_SIZE 

691 if estimate_size[0] > 2**31: 

692 estimate_size = (1,) 

693 self.logger.warning( 

694 "Estimated size is too large. Check start and end " 

695 "times, initializing with size (1,)" 

696 ) 

697 else: 

698 estimate_size = shape 

699 ## Create the dataset 

700 channel_group = self.hdf5_group.create_dataset( 

701 channel_name, 

702 shape=estimate_size, 

703 maxshape=max_shape, 

704 dtype=channel_dtype, 

705 chunks=chunks, 

706 **self.dataset_options, 

707 ) 

708 if channel_metadata: 

709 if channel_metadata.component != channel_name: 

710 self.logger.warning( 

711 f"Channel name {channel_name} != " 

712 f"channel_metadata.component " 

713 f"{channel_metadata.component}, setting to {channel_name}" 

714 ) 

715 channel_metadata.component = channel_name 

716 if channel_type.lower() in ["magnetic"]: 

717 channel_obj = MagneticDataset( 

718 channel_group, dataset_metadata=channel_metadata 

719 ) 

720 elif channel_type.lower() in ["electric"]: 

721 channel_obj = ElectricDataset( 

722 channel_group, dataset_metadata=channel_metadata 

723 ) 

724 elif channel_type.lower() in ["auxiliary"]: 

725 channel_obj = AuxiliaryDataset( 

726 channel_group, dataset_metadata=channel_metadata 

727 ) 

728 else: 

729 msg = ( 

730 "`channel_type` must be in [ electric | magnetic | " 

731 f"auxiliary ]. Input was {channel_type}" 

732 ) 

733 self.logger.error(msg) 

734 raise MTH5Error(msg) 

735 except (OSError, RuntimeError, ValueError): 

736 msg = f"channel {channel_name} already exists, returning existing group." 

737 self.logger.debug(msg) 

738 channel_obj = self.get_channel(channel_name) 

739 

740 if data is not None: 

741 self.logger.debug(f"Replacing data with new shape {data.shape}") 

742 channel_obj.replace_dataset(data) 

743 

744 self.logger.debug("Updating metadata") 

745 channel_obj.metadata.update(channel_metadata) 

746 channel_obj.write_metadata() 

747 self.logger.debug(f"Done with {channel_name}") 

748 # need to make sure the channel name is passed. 

749 if channel_obj.metadata.component != channel_name: 

750 channel_obj.metadata.component = channel_name 

751 channel_obj.write_metadata() 

752 

753 # Cache the processed channel metadata to prevent duplicate processing in metadata property 

754 # Use the channel object's metadata which has already been processed through from_dict 

755 self._channel_metadata_cache[channel_name] = channel_obj.metadata 

756 

757 return channel_obj 

758 

759 def get_channel( 

760 self, channel_name: str 

761 ) -> ElectricDataset | MagneticDataset | AuxiliaryDataset | ChannelDataset: 

762 """ 

763 Get a channel from an existing name. 

764 

765 Returns the appropriate channel dataset container based on the 

766 channel type (electric, magnetic, or auxiliary). 

767 

768 Parameters 

769 ---------- 

770 channel_name : str 

771 Name of the channel to retrieve (e.g., 'ex', 'ey', 'hx'). 

772 

773 Returns 

774 ------- 

775 ElectricDataset or MagneticDataset or AuxiliaryDataset or ChannelDataset 

776 Channel dataset object containing the channel data and metadata. 

777 

778 Raises 

779 ------ 

780 MTH5Error 

781 If the channel does not exist in the run. 

782 

783 Examples 

784 -------- 

785 Attempting to get a non-existent channel: 

786 

787 >>> from mth5 import mth5 

788 >>> mth5_obj = mth5.MTH5() 

789 >>> mth5_obj.open_mth5("example.h5", mode='r') 

790 >>> run = mth5_obj.get_run("MT001", "MT001a") 

791 >>> ex = run.get_channel('ex') 

792 MTH5Error: ex does not exist, check groups_list for existing names 

793 

794 Check available channels first: 

795 

796 >>> run.groups_list 

797 ['ey', 'hx', 'hz'] 

798 

799 Get an existing channel: 

800 

801 >>> ey = run.get_channel('ey') 

802 >>> print(ey) 

803 Channel Electric: 

804 ------------------- 

805 component: ey 

806 data type: electric 

807 data format: float32 

808 data shape: (4096,) 

809 start: 1980-01-01T00:00:00+00:00 

810 end: 1980-01-01T00:00:01+00:00 

811 sample rate: 4096 

812 """ 

813 

814 channel_name = validate_name(channel_name.lower()) 

815 try: 

816 ch_dataset = self.hdf5_group[channel_name] 

817 except KeyError: 

818 msg = ( 

819 f"{channel_name} does not exist, check groups_list " 

820 "for existing names" 

821 ) 

822 self.logger.debug(msg) 

823 raise MTH5Error(msg) 

824 if ch_dataset.attrs["mth5_type"].lower() in ["electric"]: 

825 channel = ElectricDataset( 

826 ch_dataset, 

827 ) 

828 elif ch_dataset.attrs["mth5_type"].lower() in ["magnetic"]: 

829 channel = MagneticDataset( 

830 ch_dataset, 

831 ) 

832 elif ch_dataset.attrs["mth5_type"].lower() in ["auxiliary"]: 

833 channel = AuxiliaryDataset( 

834 ch_dataset, 

835 ) 

836 else: 

837 channel = ChannelDataset(ch_dataset) 

838 channel.read_metadata() 

839 

840 return channel 

841 

842 def remove_channel(self, channel_name: str) -> None: 

843 """ 

844 Remove a channel from the run. 

845 

846 Deleting a channel is not as simple as del(channel). In HDF5, 

847 this does not free up memory; it simply removes the reference 

848 to that channel. The common way to get around this is to 

849 copy what you want into a new file, or overwrite the channel. 

850 

851 Parameters 

852 ---------- 

853 channel_name : str 

854 Name of the existing channel to remove. 

855 

856 Notes 

857 ----- 

858 Deleting a channel does not reduce the HDF5 file size. It simply 

859 removes the reference. If file size reduction is your goal, copy 

860 what you want into another file. 

861 

862 Todo: Need to remove summary table entry as well. 

863 

864 Examples 

865 -------- 

866 >>> from mth5 import mth5 

867 >>> mth5_obj = mth5.MTH5() 

868 >>> mth5_obj.open_mth5(r"/test.mth5", mode='a') 

869 >>> run = mth5_obj.stations_group.get_station('MT001').get_run('MT001a') 

870 >>> run.remove_channel('ex') 

871 """ 

872 

873 channel_name = validate_name(channel_name.lower()) 

874 

875 try: 

876 del self.hdf5_group[channel_name] 

877 # Remove from metadata cache if present 

878 if channel_name in self._channel_metadata_cache: 

879 del self._channel_metadata_cache[channel_name] 

880 self.logger.info( 

881 "Deleting a channel does not reduce the HDF5" 

882 "file size it simply remove the reference. If " 

883 "file size reduction is your goal, simply copy" 

884 " what you want into another file." 

885 ) 

886 except KeyError: 

887 msg = ( 

888 f"{channel_name} does not exist, " 

889 "check groups_list for existing names" 

890 ) 

891 self.logger.debug("Error: " + msg) 

892 raise MTH5Error(msg) 

893 

894 def has_data(self) -> bool: 

895 """ 

896 Check if the run contains any non-empty, non-zero data. 

897 

898 Verifies that all channels in the run have valid data (non-zero and 

899 non-empty arrays). Returns False if any channel lacks data. 

900 

901 Returns 

902 ------- 

903 bool 

904 True if all channels have data, False if any channel is empty 

905 or all zeros. 

906 

907 Notes 

908 ----- 

909 A channel is considered to have data if its has_data() method 

910 returns True, meaning it contains non-zero values. 

911 

912 Examples 

913 -------- 

914 >>> run = mth5_obj.get_run("MT001", "MT001a") 

915 >>> if run.has_data(): 

916 ... print("Run contains valid data") 

917 ... runts = run.to_runts() 

918 """ 

919 has_data_list = [] 

920 has_data = True 

921 for channel in self.groups_list: 

922 if channel in ["summary"]: 

923 continue 

924 ch_obj = self.get_channel(channel) 

925 has_data_list.append(f"{ch_obj.metadata.component}: {ch_obj.has_data()}") 

926 if not ch_obj.has_data(): 

927 has_data = False 

928 

929 if not has_data: 

930 self.logger.info(", ".join(has_data_list)) 

931 return has_data 

932 

933 def to_runts( 

934 self, 

935 start: Optional[str] = None, 

936 end: Optional[str] = None, 

937 n_samples: Optional[int] = None, 

938 ) -> RunTS: 

939 """ 

940 Convert run to a RunTS timeseries object. 

941 

942 Combines all channels in the run into a RunTS object which handles 

943 multi-channel time series data with associated metadata. 

944 

945 Parameters 

946 ---------- 

947 start : str, optional 

948 Start time for time slice in ISO format (e.g., '2023-01-01T12:00:00'). 

949 If None, uses entire channel data. Default is None. 

950 end : str, optional 

951 End time for time slice in ISO format. Only used if start is specified. 

952 Default is None. 

953 n_samples : int, optional 

954 Number of samples to extract from start. If both end and n_samples 

955 are specified, end takes precedence. Default is None. 

956 

957 Returns 

958 ------- 

959 RunTS 

960 RunTS object containing all channels with full run and station metadata. 

961 

962 Notes 

963 ----- 

964 - Includes run, station, and survey metadata in the output 

965 - Skips the 'summary' group which is not a channel 

966 - If start is specified, performs time slicing; otherwise returns full data 

967 

968 Examples 

969 -------- 

970 Convert entire run to RunTS: 

971 

972 >>> run = mth5_obj.get_run("MT001", "MT001a") 

973 >>> runts = run.to_runts() 

974 >>> print(runts.channels) 

975 ['ex', 'ey', 'hx', 'hy'] 

976 

977 Time slice the run: 

978 

979 >>> runts = run.to_runts(start='2023-01-01T12:00:00', 

980 ... end='2023-01-01T13:00:00') 

981 >>> print(runts.ex.ts.shape) 

982 (1024,) 

983 """ 

984 ch_list = [] 

985 for channel in self.groups_list: 

986 if channel in ["summary"]: 

987 continue 

988 ch_obj = self.get_channel(channel) 

989 

990 if start is not None: 

991 ts_obj = ch_obj.time_slice(start, end=end, n_samples=n_samples) 

992 else: 

993 ts_obj = ch_obj.to_channel_ts() 

994 ch_list.append(ts_obj) 

995 return RunTS( 

996 ch_list, 

997 run_metadata=self.metadata, 

998 station_metadata=self.station_metadata, 

999 survey_metadata=self.survey_metadata, 

1000 ) 

1001 

1002 def from_runts( 

1003 self, run_ts_obj: RunTS, **kwargs: Any 

1004 ) -> list[ElectricDataset | MagneticDataset | AuxiliaryDataset]: 

1005 """ 

1006 Create channel datasets from a RunTS timeseries object. 

1007 

1008 Converts a RunTS object with multiple channels and metadata into 

1009 HDF5 channel datasets and updates run metadata accordingly. 

1010 

1011 Parameters 

1012 ---------- 

1013 run_ts_obj : RunTS 

1014 RunTS object containing multiple channels and metadata. 

1015 **kwargs : Any 

1016 Additional keyword arguments. 

1017 

1018 Returns 

1019 ------- 

1020 list[ElectricDataset | MagneticDataset | AuxiliaryDataset] 

1021 List of created channel dataset objects. 

1022 

1023 Raises 

1024 ------ 

1025 MTH5Error 

1026 If input is not a RunTS object. 

1027 

1028 Notes 

1029 ----- 

1030 - Updates run metadata from input object 

1031 - Validates station and run IDs match current context 

1032 - Creates appropriate channel type based on channel metadata 

1033 - Automatically registers recorded channels in run metadata 

1034 

1035 Examples 

1036 -------- 

1037 >>> from mth5.timeseries import RunTS 

1038 >>> run = mth5_obj.get_run("MT001", "MT001a") 

1039 >>> runts = RunTS.from_file("timeseries_data.txt") 

1040 >>> channels = run.from_runts(runts) 

1041 >>> print(f"Created {len(channels)} channels") 

1042 Created 4 channels 

1043 """ 

1044 

1045 if not isinstance(run_ts_obj, RunTS): 

1046 msg = f"Input must be a mth5.timeseries.RunTS object not {type(run_ts_obj)}" 

1047 self.logger.error(msg) 

1048 raise MTH5Error(msg) 

1049 self._metadata.update(run_ts_obj.run_metadata) 

1050 

1051 channels = [] 

1052 

1053 for comp in run_ts_obj.channels: 

1054 ch = getattr(run_ts_obj, comp) 

1055 

1056 if ch.station_metadata.id is not None: 

1057 if ch.station_metadata.id != self.station_metadata.id: 

1058 if ch.station_metadata.id not in ["0", None]: 

1059 self.logger.warning( 

1060 f"Channel station.id {ch.station_metadata.id} != " 

1061 f" group station.id {self.station_metadata.id}. " 

1062 f"Setting to ch.station_metadata.id to {self.station_metadata.id}" 

1063 ) 

1064 ch.station_metadata.id = self.station_metadata.id 

1065 if ch.run_metadata.id is not None: 

1066 if ch.run_metadata.id != self.metadata.id: 

1067 if ch.run_metadata.id not in ["0", None]: 

1068 self.logger.warning( 

1069 f"Channel run.id {ch.run_metadata.id} != " 

1070 f" group run.id {self.metadata.id}. " 

1071 f"Setting to ch.run_metadata.id to {self.metadata.id}" 

1072 ) 

1073 ch.run_metadata.id = self.metadata.id 

1074 

1075 channels.append(self.from_channel_ts(ch)) 

1076 self.update_metadata() 

1077 return channels 

1078 

1079 def from_channel_ts( 

1080 self, channel_ts_obj: ChannelTS 

1081 ) -> ElectricDataset | MagneticDataset | AuxiliaryDataset: 

1082 """ 

1083 Create a channel dataset from a ChannelTS timeseries object. 

1084 

1085 Converts a single ChannelTS object with time series data and metadata 

1086 into an HDF5 channel dataset. Handles filter registration and updates 

1087 run metadata with channel information. 

1088 

1089 Parameters 

1090 ---------- 

1091 channel_ts_obj : ChannelTS 

1092 ChannelTS object containing time series data and metadata. 

1093 

1094 Returns 

1095 ------- 

1096 ElectricDataset | MagneticDataset | AuxiliaryDataset 

1097 Created channel dataset object. 

1098 

1099 Raises 

1100 ------ 

1101 MTH5Error 

1102 If input is not a ChannelTS object. 

1103 

1104 Notes 

1105 ----- 

1106 - Registers filters from channel response if present 

1107 - Validates and corrects station/run ID mismatches 

1108 - Updates run metadata recorded channel lists 

1109 - Automatically determines channel type from metadata 

1110 

1111 Examples 

1112 -------- 

1113 >>> from mth5.timeseries import ChannelTS 

1114 >>> run = mth5_obj.get_run("MT001", "MT001a") 

1115 >>> channel = ChannelTS.from_file("ex_timeseries.txt") 

1116 >>> ex = run.from_channel_ts(channel) 

1117 >>> print(ex.metadata.component) 

1118 ex 

1119 """ 

1120 

1121 if not isinstance(channel_ts_obj, ChannelTS): 

1122 msg = f"Input must be a mth5.timeseries.ChannelTS object not {type(channel_ts_obj)}" 

1123 self.logger.error(msg) 

1124 raise MTH5Error(msg) 

1125 ## Need to add in the filters 

1126 if channel_ts_obj.channel_response.filters_list != []: 

1127 from mth5.groups import FiltersGroup 

1128 

1129 fg = FiltersGroup(self.hdf5_group.parent.parent.parent["Filters"]) 

1130 for ff in channel_ts_obj.channel_response.filters_list: 

1131 fg.add_filter(ff) 

1132 ch_obj = self.add_channel( 

1133 channel_ts_obj.component, 

1134 channel_ts_obj.channel_metadata.type, 

1135 channel_ts_obj.ts, 

1136 channel_metadata=channel_ts_obj.channel_metadata, 

1137 ) 

1138 

1139 # need to update the channels recorded 

1140 if channel_ts_obj.channel_metadata.type == "electric": 

1141 if self.metadata.channels_recorded_electric is None: 

1142 self.metadata.channels_recorded_electric = [channel_ts_obj.component] 

1143 elif ( 

1144 channel_ts_obj.component not in self.metadata.channels_recorded_electric 

1145 ): 

1146 self.metadata.channels_recorded_electric.append( 

1147 channel_ts_obj.component 

1148 ) 

1149 elif channel_ts_obj.channel_metadata.type == "magnetic": 

1150 if self.metadata.channels_recorded_magnetic is None: 

1151 self.metadata.channels_recorded_magnetic = [channel_ts_obj.component] 

1152 elif ( 

1153 channel_ts_obj.component not in self.metadata.channels_recorded_magnetic 

1154 ): 

1155 self.metadata.channels_recorded_magnetic.append( 

1156 channel_ts_obj.component 

1157 ) 

1158 elif channel_ts_obj.channel_metadata.type == "auxiliary": 

1159 if self.metadata.channels_recorded_auxiliary is None: 

1160 self.metadata.channels_recorded_auxiliary = [channel_ts_obj.component] 

1161 elif ( 

1162 channel_ts_obj.component 

1163 not in self.metadata.channels_recorded_auxiliary 

1164 ): 

1165 self.metadata.channels_recorded_auxiliary.append( 

1166 channel_ts_obj.component 

1167 ) 

1168 return ch_obj 

1169 

1170 def update_run_metadata(self) -> None: 

1171 """ 

1172 Update metadata and table entries (Deprecated). 

1173 .. deprecated:: 

1174 Use update_metadata() instead. 

1175 Raises 

1176 ------ 

1177 DeprecationWarning 

1178 Always raised to indicate this method should not be used. 

1179 """ 

1180 

1181 raise DeprecationWarning( 

1182 "'update_run_metadata' has been deprecated use 'update_metadata()'" 

1183 ) 

1184 

1185 def update_metadata(self) -> None: 

1186 """ 

1187 Update run metadata from all channels and persist to HDF5. 

1188 

1189 Aggregates metadata from all channels including time period and 

1190 sample rate, then writes updated metadata to HDF5 attributes. 

1191 

1192 Raises 

1193 ------ 

1194 Exception 

1195 May raise exceptions if no channels exist (logs warning). 

1196 

1197 Notes 

1198 ----- 

1199 Updates: 

1200 

1201 - Time period start from minimum of all channels 

1202 - Time period end from maximum of all channels 

1203 - Sample rate from first channel (assumes uniform across channels) 

1204 

1205 Should be called after adding or removing channels to maintain 

1206 consistency between channel and run metadata. 

1207 

1208 Examples 

1209 -------- 

1210 >>> run = mth5_obj.get_run("MT001", "MT001a") 

1211 >>> run.add_channel('ex', 'electric', data=ex_data) 

1212 >>> run.add_channel('ey', 'electric', data=ey_data) 

1213 >>> run.update_metadata() # Updates time period and sample rate 

1214 """ 

1215 channel_summary = self.channel_summary.copy() 

1216 

1217 self._metadata.time_period.start = channel_summary.start.min().isoformat() 

1218 self._metadata.time_period.end = channel_summary.end.max().isoformat() 

1219 try: 

1220 self._metadata.sample_rate = channel_summary.sample_rate.unique()[0] 

1221 except IndexError: 

1222 msg = "There maybe no channels associated with this run -- setting sample_rate to 0" 

1223 self.logger.critical(msg) 

1224 self._metadata.sample_rate = 0 

1225 self.write_metadata() 

1226 

1227 def plot( 

1228 self, 

1229 start: Optional[str] = None, 

1230 end: Optional[str] = None, 

1231 n_samples: Optional[int] = None, 

1232 ) -> Any: 

1233 """ 

1234 Create a matplotlib plot of all channels in the run. 

1235 

1236 Generates a multi-panel plot showing all channels in the run using 

1237 the RunTS plotting functionality. 

1238 

1239 Parameters 

1240 ---------- 

1241 start : str, optional 

1242 Start time for time slice in ISO format. If None, plots entire 

1243 channel data. Default is None. 

1244 end : str, optional 

1245 End time for time slice in ISO format. Only used if start is 

1246 specified. Default is None. 

1247 n_samples : int, optional 

1248 Number of samples to extract from start. If both end and n_samples 

1249 are specified, end takes precedence. Default is None. 

1250 

1251 Returns 

1252 ------- 

1253 Any 

1254 Matplotlib figure or axes object (depends on RunTS.plot() implementation). 

1255 

1256 Notes 

1257 ----- 

1258 - Creates separate subplots for each channel type (electric, magnetic, auxiliary) 

1259 - Time slice parameters work the same as to_runts() 

1260 - Requires matplotlib to be installed 

1261 

1262 Examples 

1263 -------- 

1264 Plot entire run: 

1265 

1266 >>> run = mth5_obj.get_run("MT001", "MT001a") 

1267 >>> fig = run.plot() 

1268 >>> fig.show() 

1269 

1270 Plot time slice: 

1271 

1272 >>> fig = run.plot(start='2023-01-01T12:00:00', 

1273 ... end='2023-01-01T13:00:00') 

1274 """ 

1275 runts = self.to_runts(start=start, end=end, n_samples=n_samples) 

1276 

1277 return runts.plot()