Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ clients \ fdsn.py: 63%

338 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2""" 

3FDSN 

4========= 

5 

6Module for working with FDSN clients using Obspy 

7 

8Created on Fri Feb 4 15:53:21 2022 

9 

10@author: jpeacock 

11""" 

12# ============================================================================= 

13# Imports 

14# ============================================================================= 

15import copy 

16import time 

17from gzip import BadGzipFile 

18from pathlib import Path 

19 

20import numpy as np 

21import obspy 

22import pandas as pd 

23from loguru import logger 

24from mt_metadata.timeseries.stationxml import XMLInventoryMTExperiment 

25from obspy.clients.fdsn import Client as FDSNClient 

26 

27from mth5.clients.base import ClientBase 

28from mth5.mth5 import MTH5 

29from mth5.timeseries import RunTS 

30 

31 

32# ============================================================================= 

33 

34 

35class FDSN(ClientBase): 

36 def __init__(self, client: str = "IRIS", **kwargs) -> None: 

37 self.logger = logger 

38 

39 super().__init__( 

40 Path.cwd(), 

41 **kwargs, 

42 ) 

43 self.request_columns = [ 

44 "network", 

45 "station", 

46 "location", 

47 "channel", 

48 "start", 

49 "end", 

50 ] 

51 self.client = client 

52 self._streams = None 

53 

54 for key, value in kwargs.items(): 

55 setattr(self, key, value) 

56 

57 def _validate_dataframe(self, df: pd.DataFrame | str | Path) -> pd.DataFrame: 

58 if not isinstance(df, pd.DataFrame): 

59 if isinstance(df, (str, Path)): 

60 fn = Path(df) 

61 if not fn.exists(): 

62 raise IOError(f"File {fn} does not exist. Check path") 

63 df = pd.read_csv(fn) 

64 df = df.fillna("") 

65 else: 

66 raise ValueError(f"Input must be a pandas.Dataframe not {type(df)}") 

67 if df.columns.to_list() != self.request_columns: 

68 raise ValueError( 

69 f"column names in file {df.columns} are not the expected " 

70 f"{self.request_columns}" 

71 ) 

72 return df 

73 

74 @property 

75 def run_list_ne_stream_intervals_message(self) -> str: 

76 """note about not equal stream intervals""" 

77 return ( 

78 "More or less runs have been requested by the user " 

79 "than are defined in the metadata. Runs will be " 

80 "defined but only the requested run extents contain " 

81 "time series data based on the users request." 

82 ) 

83 

84 def _loop_stations(self, stations: list[str], m: MTH5, survey_group=None) -> None: 

85 """ 

86 loop over stations 

87 """ 

88 for station_id in stations: 

89 self.wrangle_runs_into_containers(m, station_id, survey_group=survey_group) 

90 

91 def _run_010(self, unique_list: list[dict], m: MTH5, **kwargs) -> None: 

92 """ 

93 kwargs are supported just to make this a general function that can be 

94 kept in a dict and used as in process_list 

95 

96 Parameters 

97 ---------- 

98 unique_list 

99 m 

100 kwargs 

101 

102 Returns 

103 ------- 

104 

105 """ 

106 station_list = unique_list[0]["stations"] 

107 self._loop_stations(station_list, m) 

108 

109 def _run_020(self, unique_list: list[dict], m: MTH5, experiment=None) -> None: 

110 """ 

111 mt_metadata translates mt survey id into survey id if it (which?) is 

112 provided which will be different from the fdsn network id, so we need 

113 to map the fdsn networks onto the survey id. 

114 

115 Parameters 

116 ---------- 

117 unique_list 

118 m 

119 experiment 

120 

121 Returns 

122 ------- 

123 

124 """ 

125 survey_map = dict([(s.fdsn.network, s.id) for s in experiment.surveys]) 

126 

127 for survey_dict in unique_list: 

128 # get the mt survey id that maps to the fdsn network 

129 fdsn_network = survey_dict["network"] 

130 survey_id = survey_map[fdsn_network] 

131 survey_group = m.get_survey(survey_id) 

132 stations_list = survey_dict["stations"] 

133 self._loop_stations(stations_list, m, survey_group=survey_group) 

134 

135 def _process_list(self, experiment, unique_list: list[dict], m: MTH5) -> None: 

136 """ 

137 Routes job to correct processing based on mth5_version 

138 Maintainable way to handle future file versions and send them to their 

139 own processing functions if needed 

140 

141 Parameters 

142 ---------- 

143 experiment 

144 unique_list 

145 m 

146 

147 Returns 

148 ------- 

149 

150 """ 

151 

152 version_dict = {"0.1.0": self._run_010, "0.2.0": self._run_020} 

153 

154 process_run = version_dict[self.mth5_version] 

155 process_run(unique_list, m, experiment=experiment) 

156 

157 def get_run_list_from_station_id( 

158 self, m: MTH5, station_id: str, survey_id: str | None = None 

159 ) -> list[str]: 

160 """ 

161 ignored_groups created to address issue #153. This might be better placed 

162 closer to the core of mth5. 

163 

164 Parameters 

165 ---------- 

166 m 

167 station_id 

168 

169 Returns 

170 ------- 

171 run_list: list of strings 

172 """ 

173 ignored_groups = [ 

174 "Fourier_Coefficients", 

175 "Transfer_Functions", 

176 "Features", 

177 ] 

178 run_list = m.get_station(station_id, survey_id).groups_list 

179 run_list = [x for x in run_list if x not in ignored_groups] 

180 return run_list 

181 

182 def stream_boundaries( 

183 self, streams: obspy.Stream 

184 ) -> tuple[list[obspy.UTCDateTime], list[obspy.UTCDateTime]]: 

185 """ 

186 Identify start and end times of streams 

187 

188 Parameters 

189 ---------- 

190 streams: obspy.core.stream.Stream 

191 

192 Returns 

193 ------- 

194 

195 """ 

196 start_times = [tr.stats.starttime.isoformat() for tr in streams] 

197 start_times = sorted(list(set(start_times))) 

198 end_times = [tr.stats.endtime.isoformat() for tr in streams] 

199 end_times = sorted(list(set(end_times))) 

200 if len(start_times) != len(end_times): 

201 raise ValueError( 

202 f"Do not have the same number of start {len(start_times)}" 

203 f" and end times {len(end_times)} from streams" 

204 ) 

205 start_times = [obspy.UTCDateTime(x) for x in start_times] 

206 end_times = [obspy.UTCDateTime(x) for x in end_times] 

207 return start_times, end_times 

208 

209 def get_station_streams(self, station_id: str) -> obspy.Stream: 

210 """Get streams for a certain station""" 

211 return self._streams.select(station=station_id) 

212 

213 def get_run_group(self, mth5_obj_or_survey, station_id: str, run_id: str): 

214 """ 

215 This method is key to merging wrangle_runs_into_containers_v1 and 

216 wrangle_runs_into_containers_v2. 

217 Because a v1 mth5 object can get a survey group with the same method 

218 as can a v2 survey_group 

219 

220 Thus we can replace 

221 run_group = m.stations_group.get_station(station_id).add_run(run_id) 

222 & 

223 run_group = survey_group.stations_group.get_station(station_id).add_run(run_id) 

224 with 

225 run_group = mth5_obj_or_survey.stations_group.get_station(station_id).add_run(run_id) 

226 Parameters 

227 ---------- 

228 mth5_obj_or_survey: mth5.mth5.MTH5 or mth5.groups.survey.SurveyGroup 

229 

230 Returns 

231 ------- 

232 

233 """ 

234 run_group = mth5_obj_or_survey.stations_group.get_station(station_id).add_run( 

235 run_id 

236 ) 

237 return run_group 

238 

239 def pack_stream_into_run_group(self, run_group, run_stream: obspy.Stream): 

240 """""" 

241 run_ts_obj = RunTS() 

242 run_ts_obj.from_obspy_stream(run_stream, run_group.metadata) 

243 run_group.from_runts(run_ts_obj) 

244 

245 return run_group 

246 

247 def run_timings_match_stream_timing( 

248 self, run_group, stream_start: obspy.UTCDateTime, stream_end: obspy.UTCDateTime 

249 ) -> bool: 

250 """ 

251 Checks start and end times in the run. 

252 Compares start and end times of runs to start and end times of traces. 

253 If True, will packs runs based on time spans. 

254 

255 Parameters 

256 ---------- 

257 run_group: mth5.groups.run.RunGroup 

258 

259 stream_start: obspy.UTCDateTime 

260 

261 stream_end: obspy.UTCDateTime 

262 

263 Returns 

264 ------- 

265 bool 

266 

267 """ 

268 streams_and_run_timings_match = False 

269 run_start = run_group.metadata.time_period.start 

270 run_end = run_group.metadata.time_period.end 

271 

272 # Handle MTime objects by converting to string first 

273 if hasattr(run_start, "isoformat"): 

274 run_start = run_start.isoformat() 

275 if hasattr(run_end, "isoformat"): 

276 run_end = run_end.isoformat() 

277 

278 cond1 = stream_start >= obspy.UTCDateTime(run_start) 

279 cond2 = stream_end <= obspy.UTCDateTime(run_end) 

280 if cond1 and cond2: # paired up 

281 streams_and_run_timings_match = True 

282 return streams_and_run_timings_match 

283 

284 def wrangle_runs_into_containers( 

285 self, m: MTH5, station_id: str, survey_group=None 

286 ) -> None: 

287 """ 

288 Note 1: There used to be two separate functions for this, but now there 

289 is one run_group_source is defined as either m or survey_group depending 

290 on v0.1.0 or 0.2.0 

291 

292 Note 2: If/elif/elif/else Logic: 

293 The strategy is to add the group first. This will get the already filled 

294 in metadata to update the run_ts_obj. Then get streams an add existing 

295 metadata. 

296 

297 

298 Parameters 

299 ---------- 

300 m 

301 streams 

302 station_id 

303 survey_group 

304 

305 Returns 

306 ------- 

307 

308 """ 

309 if survey_group is not None: 

310 survey_id = survey_group.metadata.id 

311 run_group_source = survey_group 

312 else: 

313 survey_id = None 

314 run_group_source = m 

315 # get the streams for the given station 

316 msstreams = self.get_station_streams(station_id) 

317 trace_start_times, trace_end_times = self.stream_boundaries(msstreams) 

318 run_list = self.get_run_list_from_station_id(m, station_id, survey_id=survey_id) 

319 num_streams = len(trace_start_times) 

320 

321 # See Note 2 

322 # If number of runs and number of streams are the same, then metadata 

323 # matches the data and an easy pack. 

324 if len(run_list) == num_streams: 

325 for run_id, start, end in zip(run_list, trace_start_times, trace_end_times): 

326 run_group = self.get_run_group(run_group_source, station_id, run_id) 

327 run_stream = msstreams.slice(start, end) 

328 self.pack_stream_into_run_group(run_group, run_stream) 

329 

330 # if the metadata contains only one run but there are multiple streams 

331 # then there is missing metadata that we need to add logically. Add 

332 # runs sequentially and use metadata from the first run. 

333 elif len(run_list) == 1: 

334 self.logger.warning( 

335 "Only one run in the StationXML, but multiple runs identified " 

336 "from the data. Using first run metadata and channel metadata " 

337 "for the other channels and runs except time periods." 

338 ) 

339 og_run_group = self.get_run_group(run_group_source, station_id, run_list[0]) 

340 for run_num, times in enumerate(zip(trace_start_times, trace_end_times), 1): 

341 start = times[0] 

342 end = times[1] 

343 run_id = f"{run_num:03}" 

344 run_group = self.get_run_group(run_group_source, station_id, run_id) 

345 if run_num > 1: 

346 # cleaner, but not working 

347 og_run_group_metadata_dict = og_run_group.metadata.to_dict() 

348 for key in ["id", "time_period.start", "time_period.end"]: 

349 og_run_group_metadata_dict["run"].pop(key) 

350 run_group.metadata.from_dict(og_run_group_metadata_dict) 

351 run_group.write_metadata() 

352 

353 run_stream = msstreams.slice(start, end) 

354 run_group = self.pack_stream_into_run_group(run_group, run_stream) 

355 

356 # update channels from run 1 metadata 

357 if run_num > 1: 

358 for ch in run_group.groups_list: 

359 og_ch = og_run_group.get_channel(ch) 

360 og_ch_metadata_dict = og_ch.metadata.to_dict(single=True) 

361 # skip the start and end times 

362 for key in ["time_period.start", "time_period.end"]: 

363 og_ch_metadata_dict.pop(key) 

364 

365 new_ch = run_group.get_channel(ch) 

366 new_ch.metadata.from_dict(og_ch_metadata_dict) 

367 new_ch.write_metadata() 

368 

369 run_group.update_metadata() 

370 

371 # If the number of runs does not equal the number of streams then 

372 # there is missing data or metadata. 

373 elif len(run_list) != num_streams: 

374 self.logger.warning(self.run_list_ne_stream_intervals_message) 

375 for start, end in zip(trace_start_times, trace_end_times): 

376 for run in run_list: 

377 run_group = self.get_run_group(run_group_source, station_id, run) 

378 if self.run_timings_match_stream_timing(run_group, start, end): 

379 run_stream = msstreams.slice(start, end) 

380 self.pack_stream_into_run_group(run_group, run_stream) 

381 break 

382 else: 

383 continue 

384 else: 

385 raise ValueError("Cannot add Run for some reason.") 

386 return 

387 

388 def make_mth5_from_fdsn_client( 

389 self, 

390 df: pd.DataFrame | str | Path, 

391 path: str | Path | None = None, 

392 client: str | None = None, 

393 interact: bool = False, 

394 ) -> Path: 

395 """ 

396 Create an MTH5 file from an FDSN data center request. 

397 

398 Parameters 

399 ---------- 

400 df : pandas.DataFrame or str or Path 

401 DataFrame or path to CSV with columns: 

402 - 'network' : FDSN Network code 

403 - 'station' : FDSN Station code 

404 - 'location' : FDSN Location code 

405 - 'channel' : FDSN Channel code 

406 - 'start' : Start time YYYY-MM-DDThh:mm:ss 

407 - 'end' : End time YYYY-MM-DDThh:mm:ss 

408 path : str or Path, optional 

409 Path to save MTH5 file (default: current directory). 

410 client : str, optional 

411 FDSN client name (default: "IRIS"). 

412 interact : bool, optional 

413 Deprecated. If True, logs a warning (default: False). 

414 

415 Returns 

416 ------- 

417 file_name : Path 

418 Path to the created MTH5 file. 

419 

420 Raises 

421 ------ 

422 AttributeError 

423 If the input DataFrame is not properly formatted. 

424 ValueError 

425 If the values of the DataFrame are not correct. 

426 

427 Examples 

428 -------- 

429 >>> from mth5.clients.fdsn import FDSN 

430 >>> import pandas as pd 

431 >>> df = pd.DataFrame({ 

432 ... 'network': ['XX'], 

433 ... 'station': ['1234'], 

434 ... 'location': [''], 

435 ... 'channel': ['LHZ'], 

436 ... 'start': ['2022-01-01T00:00:00'], 

437 ... 'end': ['2022-01-02T00:00:00'] 

438 ... }) 

439 >>> client = FDSN() 

440 >>> file_path = client.make_mth5_from_fdsn_client(df) 

441 """ 

442 

443 if client is not None: 

444 self.client = client 

445 df = self._validate_dataframe(df) 

446 

447 unique_list = self.get_unique_networks_and_stations(df) 

448 if self.mth5_version in ["0.1.0"]: 

449 if len(unique_list) != 1: 

450 raise AttributeError("MTH5 supports one survey/network per container.") 

451 

452 # read in inventory and streams 

453 inv, streams = self.get_inventory_from_df(df, self.client) 

454 if interact: 

455 self.logger.warning( 

456 "Interact is deprecated. Open the returned file path. \n\t" 

457 "> with MTH5() as m:\n\t\tm.open_mth5(filepath)\n\t\tdo something." 

458 ) 

459 return self.make_mth5_from_inventory_and_streams(inv, streams, save_path=path) 

460 

461 @property 

462 def streams(self): 

463 """obspy.Stream object""" 

464 return self._streams 

465 

466 @streams.setter 

467 def streams(self, streams): 

468 """set streams can be a list of filenames""" 

469 

470 if not isinstance(streams, obspy.Stream): 

471 if isinstance(streams, (list, tuple)): 

472 if not isinstance(streams[0], obspy.Stream): 

473 if isinstance(streams[0], (str, Path)): 

474 stream_list = obspy.read() 

475 for fn in streams: 

476 stream_list += obspy.read(fn) 

477 self._streams = stream_list 

478 else: 

479 raise TypeError("Cannot understand streams input.") 

480 else: 

481 self._streams = streams 

482 

483 def make_mth5_from_inventory_and_streams( 

484 self, 

485 inventory: obspy.Inventory | str | Path, 

486 streams: obspy.Stream | list[str | Path], 

487 save_path: str | Path | None = None, 

488 ) -> Path: 

489 """ 

490 Create an MTH5 file from an ObsPy Inventory and waveform streams. 

491 

492 Parameters 

493 ---------- 

494 inventory : obspy.Inventory or str or Path 

495 ObsPy Inventory object or path to StationXML file. 

496 streams : obspy.Stream or list of str or Path 

497 ObsPy Stream object or list of file paths to waveform data. 

498 save_path : str or Path, optional 

499 Path to save MTH5 file (default: current directory). 

500 

501 Returns 

502 ------- 

503 file_name : Path 

504 Path to the created MTH5 file. 

505 

506 Examples 

507 -------- 

508 >>> from mth5.clients.fdsn import FDSN 

509 >>> inv = ... # ObsPy Inventory 

510 >>> streams = ... # ObsPy Stream 

511 >>> client = FDSN() 

512 >>> file_path = client.make_mth5_from_inventory_and_streams(inv, streams) 

513 """ 

514 

515 if not isinstance(inventory, obspy.Inventory): 

516 if isinstance(inventory, (str, Path)): 

517 inventory = obspy.read_inventory(inventory) 

518 else: 

519 raise TypeError(f"Cannot understand inventory type {type(inventory)}") 

520 

521 if save_path is None: 

522 save_path = Path().cwd() 

523 else: 

524 save_path = Path(save_path) 

525 

526 self.streams = streams 

527 # translate obspy.core.Inventory to an mt_metadata.timeseries.Experiment 

528 translator = XMLInventoryMTExperiment() 

529 experiment = translator.xml_to_mt(inventory) 

530 

531 retrieved_df = self.get_df_from_inventory(inventory) 

532 retrieved_unique_list = self.get_unique_networks_and_stations(retrieved_df) 

533 file_name = save_path.joinpath(self.make_filename(retrieved_df)) 

534 

535 # initiate MTH5 file 

536 with MTH5(**self.h5_kwargs) as m: 

537 m.open_mth5(file_name, self.mth5_file_mode) 

538 

539 m.from_experiment(experiment) 

540 self._process_list(experiment, retrieved_unique_list, m) 

541 

542 return m.filename 

543 

544 def build_network_dict(self, df: pd.DataFrame, client: FDSNClient) -> dict: 

545 """ 

546 Build a dictionary of networks keyed by network_id and start_time. 

547 

548 Parameters 

549 ---------- 

550 df : pandas.DataFrame 

551 Request DataFrame. 

552 client : obspy.clients.fdsn.Client 

553 FDSN client instance. 

554 

555 Returns 

556 ------- 

557 networks : dict 

558 Dictionary of networks. 

559 

560 Examples 

561 -------- 

562 >>> networks = client.build_network_dict(df, client) 

563 """ 

564 # Build the dictionary 

565 networks = {} 

566 for row in df.itertuples(): 

567 # First for loop builds out networks and stations 

568 if row.network not in networks.keys(): 

569 networks[row.network] = {} 

570 net_inv = _fdsn_client_get_inventory( 

571 client, row, response_level="network" 

572 ) 

573 networks[row.network][row.start] = net_inv.networks[0] 

574 elif networks.get(row.network) is not None: 

575 if row.start not in networks[row.network].keys(): 

576 net_inv = _fdsn_client_get_inventory( 

577 client, row, response_level="network" 

578 ) 

579 networks[row.network][row.start] = net_inv.networks[0] 

580 else: 

581 continue 

582 if len(net_inv.networks) != 1: 

583 msg = ( 

584 f"Expected a unique network associated with {row.start}--{row.end}" 

585 ) 

586 msg += f"Instead found {len(net_inv.networks)} networks" 

587 raise NotImplementedError(msg) 

588 return networks 

589 

590 # def add_network_objects_to_request_df(self, df): 

591 # networks_dict = self.build_network_dict(df, client) 

592 # network_column = [networks[x.netork][x.start] for x in df.itertuples()] 

593 # df["network_object"] = network_column 

594 # return df 

595 

596 def build_station_dict( 

597 self, 

598 df: pd.DataFrame, 

599 client: FDSNClient, 

600 networks_dict: dict, 

601 ) -> dict: 

602 """ 

603 Build a dictionary of stations keyed by network_id and start_time. 

604 

605 Parameters 

606 ---------- 

607 df : pandas.DataFrame 

608 Request DataFrame. 

609 client : obspy.clients.fdsn.Client 

610 FDSN client instance. 

611 networks_dict : dict 

612 Dictionary of networks. 

613 

614 Returns 

615 ------- 

616 stations : dict 

617 Dictionary of stations. 

618 

619 Examples 

620 -------- 

621 >>> stations = client.build_station_dict(df, client, networks_dict) 

622 """ 

623 stations_dict = copy.deepcopy(networks_dict) 

624 for network_id in networks_dict.keys(): 

625 for start_time in networks_dict[network_id].keys(): 

626 stations_dict[network_id][start_time] = {} 

627 cond1 = df.network == network_id 

628 cond2 = df.start == start_time 

629 sub_df = df[cond1 & cond2] 

630 sub_df.drop_duplicates("station", inplace=True) 

631 sub_df.reset_index(inplace=True, drop=True) 

632 

633 for station_row in sub_df.itertuples(): 

634 sta_inv = _fdsn_client_get_inventory( 

635 client, 

636 station_row, 

637 response_level="station", 

638 max_tries=10, 

639 ) 

640 

641 stations_dict[network_id][start_time][ 

642 station_row.station 

643 ] = sta_inv.networks[0].stations[0] 

644 return stations_dict 

645 

646 def get_waveforms_from_request_row(self, client: FDSNClient, row) -> obspy.Stream: 

647 """ 

648 Retrieve waveform data for a request row. 

649 

650 Parameters 

651 ---------- 

652 client : obspy.clients.fdsn.Client 

653 FDSN client instance. 

654 row : pandas.Series 

655 Row of request DataFrame. 

656 

657 Returns 

658 ------- 

659 streams : obspy.Stream 

660 ObsPy Stream object with waveform data. 

661 

662 Examples 

663 -------- 

664 >>> streams = client.get_waveforms_from_request_row(client, row) 

665 """ 

666 start = obspy.UTCDateTime(row.start) 

667 end = obspy.UTCDateTime(row.end) 

668 streams = client.get_waveforms( 

669 row.network, row.station, row.location, row.channel, start, end 

670 ) 

671 return streams 

672 

673 def get_inventory_from_df( 

674 self, 

675 df: pd.DataFrame | str | Path, 

676 client: str | None = None, 

677 data: bool = True, 

678 max_tries: int = 10, 

679 ) -> tuple[obspy.Inventory, obspy.Stream]: 

680 """ 

681 Get an ObsPy Inventory and Stream from a DataFrame request. 

682 

683 Parameters 

684 ---------- 

685 df : pandas.DataFrame or str or Path 

686 DataFrame or path to CSV with columns: 

687 - 'network' : FDSN Network code 

688 - 'station' : FDSN Station code 

689 - 'location' : FDSN Location code 

690 - 'channel' : FDSN Channel code 

691 - 'start' : Start time YYYY-MM-DDThh:mm:ss 

692 - 'end' : End time YYYY-MM-DDThh:mm:ss 

693 client : str, optional 

694 FDSN client name (default: self.client). 

695 data : bool, optional 

696 If True, retrieves waveform data (default: True). 

697 max_tries : int, optional 

698 Maximum number of retry attempts (default: 10). 

699 

700 Returns 

701 ------- 

702 inventory : obspy.Inventory 

703 Inventory of metadata requested. 

704 streams : obspy.Stream 

705 Stream of waveform data. 

706 

707 Examples 

708 -------- 

709 >>> from mth5.clients.fdsn import FDSN 

710 >>> import pandas as pd 

711 >>> df = pd.DataFrame({ 

712 ... 'network': ['XX'], 

713 ... 'station': ['1234'], 

714 ... 'location': [''], 

715 ... 'channel': ['LHZ'], 

716 ... 'start': ['2022-01-01T00:00:00'], 

717 ... 'end': ['2022-01-02T00:00:00'] 

718 ... }) 

719 >>> client = FDSN() 

720 >>> inv, streams = client.get_inventory_from_df(df) 

721 """ 

722 if client is not None: 

723 self.client = client 

724 df = self._validate_dataframe(df) 

725 

726 # get the metadata from an obspy client 

727 client = FDSNClient(self.client) 

728 

729 # creat an empty stream to add to 

730 streams = obspy.read() 

731 streams.clear() 

732 

733 inv = obspy.Inventory(networks=[], source="MTH5") 

734 

735 # sort the values to be logically ordered 

736 df.sort_values(self.request_columns[:-1]) 

737 

738 # Build helper dictionares of networks and stations 

739 networks_dict = self.build_network_dict(df, client) 

740 stations_dict = self.build_station_dict(df, client, networks_dict) 

741 

742 # Pack channels into stations 

743 for ch_row in df.itertuples(): 

744 station_obj = stations_dict[ch_row.network][ch_row.start][ch_row.station] 

745 cha_inv = _fdsn_client_get_inventory( 

746 client, ch_row, response_level="response", max_tries=10 

747 ) 

748 

749 for returned_chan in cha_inv.networks[0].stations[0].channels: 

750 station_obj.channels.append(returned_chan) 

751 

752 # ----------------------------- 

753 # get data if desired 

754 if data: 

755 streams += self.get_waveforms_from_request_row(client, ch_row) 

756 

757 # Pack the stations into networks 

758 for network_key in stations_dict.keys(): 

759 for start_key in stations_dict[network_key].keys(): 

760 for station_id, packed_station in stations_dict[network_key][ 

761 start_key 

762 ].items(): 

763 networks_dict[network_key][start_key].stations.append( 

764 packed_station 

765 ) 

766 # Pack the networks into the inventory 

767 for network_key in networks_dict.keys(): 

768 for start_key in networks_dict[network_key].keys(): 

769 inv.networks.append(networks_dict[network_key][start_key]) 

770 return inv, streams 

771 

772 def get_df_from_inventory(self, inventory: obspy.Inventory) -> pd.DataFrame: 

773 """ 

774 Create a DataFrame from an ObsPy Inventory object. 

775 

776 Parameters 

777 ---------- 

778 inventory : obspy.Inventory 

779 ObsPy Inventory object. 

780 

781 Returns 

782 ------- 

783 df : pandas.DataFrame 

784 DataFrame in request format. 

785 

786 Examples 

787 -------- 

788 >>> df = client.get_df_from_inventory(inventory) 

789 """ 

790 

791 rows = [] 

792 for network in inventory.networks: 

793 for station in network.stations: 

794 for channel in station.channels: 

795 entry = ( 

796 network.code, 

797 station.code, 

798 channel.location_code, 

799 channel.code, 

800 channel.start_date, 

801 channel.end_date, 

802 ) 

803 rows.append(entry) 

804 return pd.DataFrame(rows, columns=self.request_columns) 

805 

806 def get_unique_networks_and_stations(self, df: pd.DataFrame) -> list[dict]: 

807 """ 

808 Get unique networks and stations from a request DataFrame. 

809 

810 Parameters 

811 ---------- 

812 df : pandas.DataFrame 

813 Request DataFrame. 

814 

815 Returns 

816 ------- 

817 unique_list : list of dict 

818 List of network dictionaries with stations. 

819 

820 Examples 

821 -------- 

822 >>> unique_list = client.get_unique_networks_and_stations(df) 

823 """ 

824 unique_list = [] 

825 networks = df["network"].unique() 

826 for network in networks: 

827 network_dict = { 

828 "network": network, 

829 "stations": df[df.network == network].station.unique().tolist(), 

830 } 

831 unique_list.append(network_dict) 

832 return unique_list 

833 

834 def make_filename(self, df: pd.DataFrame) -> str: 

835 """ 

836 Make a filename from a request DataFrame of networks and stations. 

837 

838 Parameters 

839 ---------- 

840 df : pandas.DataFrame 

841 Request DataFrame. 

842 

843 Returns 

844 ------- 

845 filename : str 

846 Filename in the format network_01+stations_network_02+stations.h5 

847 

848 Examples 

849 -------- 

850 >>> filename = client.make_filename(df) 

851 """ 

852 

853 if self.mth5_filename is not None: 

854 if self.mth5_filename != "from_client.h5": 

855 self.logger.info( 

856 f"Using user defined mth5 file name {self.mth5_filename}" 

857 ) 

858 return self.mth5_filename 

859 

860 unique_list = self.get_unique_networks_and_stations(df) 

861 

862 return ( 

863 "_".join([f"{d['network']}_{'_'.join(d['stations'])}" for d in unique_list]) 

864 + ".h5" 

865 ) 

866 

867 def get_fdsn_channel_map(self) -> dict[str, str]: 

868 """ 

869 Get mapping of FDSN channel codes to internal codes. 

870 

871 Returns 

872 ------- 

873 FDSN_CHANNEL_MAP : dict 

874 Dictionary mapping FDSN channel codes. 

875 

876 Examples 

877 -------- 

878 >>> channel_map = client.get_fdsn_channel_map() 

879 """ 

880 FDSN_CHANNEL_MAP = {} 

881 

882 FDSN_CHANNEL_MAP["BQ2"] = "BQ1" 

883 FDSN_CHANNEL_MAP["BQ3"] = "BQ2" 

884 FDSN_CHANNEL_MAP["BQN"] = "BQ1" 

885 FDSN_CHANNEL_MAP["BQE"] = "BQ2" 

886 FDSN_CHANNEL_MAP["BQZ"] = "BQ3" 

887 FDSN_CHANNEL_MAP["BT1"] = "BF1" 

888 FDSN_CHANNEL_MAP["BT2"] = "BF2" 

889 FDSN_CHANNEL_MAP["BT3"] = "BF3" 

890 FDSN_CHANNEL_MAP["LQ2"] = "LQ1" 

891 FDSN_CHANNEL_MAP["LQ3"] = "LQ2" 

892 FDSN_CHANNEL_MAP["LT1"] = "LF1" 

893 FDSN_CHANNEL_MAP["LT2"] = "LF2" 

894 FDSN_CHANNEL_MAP["LT3"] = "LF3" 

895 FDSN_CHANNEL_MAP["LFE"] = "LF1" 

896 FDSN_CHANNEL_MAP["LFN"] = "LF2" 

897 FDSN_CHANNEL_MAP["LFZ"] = "LF3" 

898 FDSN_CHANNEL_MAP["LQE"] = "LQ1" 

899 FDSN_CHANNEL_MAP["LQN"] = "LQ2" 

900 return FDSN_CHANNEL_MAP 

901 

902 

903def _fdsn_client_get_inventory(client, row, response_level, max_tries=10): 

904 """ 

905 Attempt to retrieve inventory from FDSN client with retries. 

906 

907 Parameters 

908 ---------- 

909 client : obspy.clients.fdsn.Client 

910 FDSN client instance (e.g., EarthScope). 

911 row : pandas.core.frame.Pandas 

912 Row of a DataFrame specifying start/end times, station, network. 

913 response_level : {"network", "station", "response"} 

914 Level of response to request from FDSN client. 

915 max_tries : int, optional 

916 Maximum number of retry attempts (default: 10). 

917 

918 Returns 

919 ------- 

920 inventory : obspy.Inventory 

921 Retrieved inventory object. 

922 

923 Examples 

924 -------- 

925 >>> from obspy.clients.fdsn import Client 

926 >>> client = Client("IRIS") 

927 >>> row = ... # DataFrame row with required fields 

928 >>> inv = _fdsn_client_get_inventory(client, row, "network") 

929 """ 

930 from lxml.etree import XMLSyntaxError 

931 

932 def sleep_random_time(): 

933 """Sleep for a fraction of a second before trying again""" 

934 sleep_time = np.random.randint(0, 100) * 0.01 

935 logger.info(f"Sleeping for {sleep_time}s") 

936 time.sleep(sleep_time) 

937 return 

938 

939 i_try = 0 

940 if response_level == "station": 

941 while i_try < max_tries: 

942 try: 

943 inventory = client.get_stations( 

944 row.start, 

945 row.end, 

946 network=row.network, 

947 station=row.station, 

948 level=response_level, 

949 ) 

950 i_try += max_tries 

951 except (BadGzipFile, XMLSyntaxError, ValueError) as e: 

952 logger.error(f"{e}") 

953 msg = f"Failed to get Station {row.network}-{row.station} inventory try {i_try} of {max_tries}" 

954 logger.warning(msg) 

955 sleep_random_time() 

956 i_try += 1 

957 

958 if response_level == "response": # channel level 

959 while i_try < max_tries: 

960 try: 

961 inventory = client.get_stations( 

962 row.start, 

963 row.end, 

964 network=row.network, 

965 station=row.station, 

966 loc=row.location, 

967 channel=row.channel, 

968 level=response_level, 

969 ) 

970 i_try += max_tries 

971 except (BadGzipFile, XMLSyntaxError, ValueError) as e: 

972 logger.error(f"{e}") 

973 msg = f"Failed to get Channel {row.network}-{row.station}-{row.channel} inventory try {i_try} of {max_tries}" 

974 logger.warning(msg) 

975 sleep_random_time() 

976 i_try += 1 

977 

978 if response_level == "network": 

979 try: 

980 inventory = client.get_stations( 

981 row.start, 

982 row.end, 

983 network=row.network, 

984 level=response_level, 

985 ) 

986 i_try += max_tries 

987 except (BadGzipFile, XMLSyntaxError, ValueError) as e: 

988 logger.error(f"{e}") 

989 msg = f"Failed to get Network {row.network}-{row.station}-{row.channel} inventory try {i_try} of {max_tries}" 

990 logger.warning(msg) 

991 sleep_random_time() 

992 i_try += 1 

993 

994 return inventory