Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ survey.py: 68%

154 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2from __future__ import annotations 

3 

4 

5"""Survey-level HDF5 helpers for MTH5.""" 

6 

7from typing import Any 

8 

9import h5py 

10 

11# ============================================================================= 

12# Imports 

13# ============================================================================= 

14import numpy as np 

15import pandas as pd 

16from mt_metadata.timeseries import Survey 

17 

18from mth5.groups import ( 

19 BaseGroup, 

20 FiltersGroup, 

21 MasterStationGroup, 

22 ReportsGroup, 

23 StandardsGroup, 

24) 

25from mth5.helpers import to_numpy_type, validate_name 

26from mth5.utils.exceptions import MTH5Error 

27 

28 

29# ============================================================================= 

30# Survey Group 

31# ============================================================================= 

32class MasterSurveyGroup(BaseGroup): 

33 """Collection helper for surveys under ``Experiment/Surveys``. 

34 

35 Provides helpers to add, fetch, or remove surveys and to summarize all 

36 channels in the experiment. 

37 

38 Examples 

39 -------- 

40 >>> from mth5 import mth5 

41 >>> m5 = mth5.MTH5() 

42 >>> _ = m5.open_mth5("/tmp/example.mth5", mode="a") 

43 >>> surveys = m5.surveys_group 

44 >>> _ = surveys.add_survey("survey_01") 

45 >>> surveys.channel_summary.head() # doctest: +SKIP 

46 """ 

47 

48 def __init__(self, group: h5py.Group, **kwargs: Any) -> None: 

49 super().__init__(group, **kwargs) 

50 

51 @property 

52 def channel_summary(self) -> pd.DataFrame: 

53 """Return a DataFrame summarizing all channels across surveys. 

54 

55 Returns 

56 ------- 

57 pandas.DataFrame 

58 Columns include survey, station, run, location, component, 

59 start/end, sample info, orientation, units, and HDF5 reference. 

60 

61 Examples 

62 -------- 

63 >>> summary = surveys.channel_summary 

64 >>> set(summary.columns) >= {"survey", "station", "run", "component"} 

65 True 

66 """ 

67 ch_list = [] 

68 for survey in self.groups_list: 

69 survey_group = self.get_survey(survey) 

70 for station in survey_group.stations_group.groups_list: 

71 station_group = survey_group.stations_group.get_station(station) 

72 for run in station_group.groups_list: 

73 run_group = station_group.get_run(run) 

74 for ch in run_group.groups_list: 

75 ch_dataset = run_group.get_channel(ch) 

76 entry = np.array( 

77 [ 

78 ( 

79 survey_group.metadata.id, 

80 station_group.metadata.id, 

81 run_group.metadata.id, 

82 station_group.metadata.location.latitude, 

83 station_group.metadata.location.longitude, 

84 station_group.metadata.location.elevation, 

85 ch_dataset.metadata.component, 

86 ch_dataset.metadata.time_period.start, 

87 ch_dataset.metadata.time_period.end, 

88 ch_dataset.hdf5_dataset.size, 

89 ch_dataset.metadata.sample_rate, 

90 ch_dataset.metadata.type, 

91 ch_dataset.metadata.measurement_azimuth, 

92 ch_dataset.metadata.measurement_tilt, 

93 ch_dataset.metadata.units, 

94 ch_dataset.hdf5_dataset.ref, 

95 ) 

96 ], 

97 dtype=np.dtype( 

98 [ 

99 ("survey", "U10"), 

100 ("station", "U10"), 

101 ("run", "U11"), 

102 ("latitude", float), 

103 ("longitude", float), 

104 ("elevation", float), 

105 ("component", "U20"), 

106 ("start", "datetime64[ns]"), 

107 ("end", "datetime64[ns]"), 

108 ("n_samples", int), 

109 ("sample_rate", float), 

110 ("measurement_type", "U12"), 

111 ("azimuth", float), 

112 ("tilt", float), 

113 ("units", "U25"), 

114 ("hdf5_reference", h5py.ref_dtype), 

115 ] 

116 ), 

117 ) 

118 ch_list.append(entry) 

119 ch_list = np.array(ch_list) 

120 return pd.DataFrame(ch_list.flatten()) 

121 

122 def add_survey( 

123 self, survey_name: str, survey_metadata: Survey | None = None 

124 ) -> "SurveyGroup": 

125 """Add or fetch a survey at ``/Experiment/Surveys/<name>``. 

126 

127 Parameters 

128 ---------- 

129 survey_name : str 

130 Survey identifier; validated with ``validate_name``. 

131 survey_metadata : Survey, optional 

132 Metadata container used to seed the survey attributes. 

133 

134 Returns 

135 ------- 

136 SurveyGroup 

137 Wrapper for the created or existing survey. 

138 

139 Raises 

140 ------ 

141 ValueError 

142 If ``survey_name`` is empty. 

143 MTH5Error 

144 If the provided metadata id conflicts with the group name. 

145 

146 Examples 

147 -------- 

148 >>> survey = surveys.add_survey("survey_01") 

149 >>> survey.metadata.id 

150 'survey_01' 

151 """ 

152 if not survey_name: 

153 raise ValueError("survey name is None, do not know what to name it") 

154 survey_name = validate_name(survey_name) 

155 try: 

156 survey_group = self.hdf5_group.create_group(survey_name) 

157 self.logger.debug(f"Created group {survey_group.name}") 

158 

159 if survey_metadata is None: 

160 survey_metadata = Survey(id=survey_name) 

161 else: 

162 if validate_name(survey_metadata.id) != survey_name: 

163 msg = ( 

164 f"survey group name {survey_name} must be same as " 

165 f"survey id {survey_metadata.id.replace(' ', '_')}" 

166 ) 

167 self.logger.error(msg) 

168 raise MTH5Error(msg) 

169 survey_obj = SurveyGroup( 

170 survey_group, 

171 survey_metadata=survey_metadata, 

172 **self.dataset_options, 

173 ) 

174 survey_obj.initialize_group() 

175 except ValueError: 

176 msg = f"survey {survey_name} already exists, returning existing group." 

177 self.logger.info(msg) 

178 survey_obj = self.get_survey(survey_name) 

179 return survey_obj 

180 

181 def get_survey(self, survey_name: str) -> "SurveyGroup": 

182 """Return an existing survey by name. 

183 

184 Parameters 

185 ---------- 

186 survey_name : str 

187 Existing survey name. 

188 

189 Returns 

190 ------- 

191 SurveyGroup 

192 Wrapper for the requested survey. 

193 

194 Raises 

195 ------ 

196 MTH5Error 

197 If the survey does not exist. 

198 

199 Examples 

200 -------- 

201 >>> existing = surveys.get_survey("survey_01") 

202 >>> existing.metadata.id 

203 'survey_01' 

204 """ 

205 

206 survey_name = validate_name(survey_name) 

207 

208 try: 

209 return SurveyGroup(self.hdf5_group[survey_name], **self.dataset_options) 

210 except KeyError: 

211 msg = ( 

212 f"{survey_name} does not exist, " 

213 + "check survey_list for existing names" 

214 ) 

215 self.logger.exception(msg) 

216 raise MTH5Error(msg) 

217 

218 def remove_survey(self, survey_name: str) -> None: 

219 """Delete a survey reference from the file. 

220 

221 Parameters 

222 ---------- 

223 survey_name : str 

224 Existing survey name. 

225 

226 Notes 

227 ----- 

228 HDF5 deletion removes the reference only; storage is not reclaimed. 

229 

230 Examples 

231 -------- 

232 >>> surveys.remove_survey("survey_01") 

233 """ 

234 

235 survey_name = validate_name(survey_name) 

236 

237 try: 

238 del self.hdf5_group[survey_name] 

239 self.logger.info( 

240 "Deleting a survey does not reduce the HDF5" 

241 "file size it simply remove the reference. If " 

242 "file size reduction is your goal, simply copy" 

243 " what you want into another file." 

244 ) 

245 except KeyError: 

246 msg = f"{survey_name} does not exist, check survey_list for existing names" 

247 self.logger.exception(msg) 

248 raise MTH5Error(msg) 

249 

250 

251class SurveyGroup(BaseGroup): 

252 """Wrapper for a single survey at ``Experiment/Surveys/<id>``. 

253 

254 Handles survey-level metadata, child groups (stations, reports, filters, 

255 standards), and synchronization utilities. 

256 

257 Examples 

258 -------- 

259 >>> survey = surveys.add_survey("survey_01") 

260 >>> survey.metadata.id 

261 'survey_01' 

262 """ 

263 

264 def __init__( 

265 self, 

266 group: h5py.Group, 

267 survey_metadata: Survey | None = None, 

268 **kwargs: Any, 

269 ) -> None: 

270 super().__init__(group, group_metadata=survey_metadata, **kwargs) 

271 

272 self._default_subgroup_names = [ 

273 "Stations", 

274 "Reports", 

275 "Filters", 

276 "Standards", 

277 ] 

278 

279 def initialize_group(self, **kwargs: Any) -> None: 

280 """Create default subgroups and write survey metadata. 

281 

282 Parameters 

283 ---------- 

284 **kwargs 

285 Additional attributes to set on the instance before initialization. 

286 

287 Examples 

288 -------- 

289 >>> survey.initialize_group() 

290 """ 

291 # need to make groups first because metadata pulls from them. 

292 for group_name in self._default_subgroup_names: 

293 self.hdf5_group.create_group(f"{group_name}") 

294 m5_grp = getattr(self, f"{group_name.lower()}_group") 

295 m5_grp.initialize_group() 

296 

297 for key, value in kwargs.items(): 

298 setattr(self, key, value) 

299 self.write_metadata() 

300 

301 @BaseGroup.metadata.getter 

302 def metadata(self) -> Survey: 

303 """Survey metadata enriched with station and filter information.""" 

304 

305 if not self._has_read_metadata: 

306 self.read_metadata() 

307 self._has_read_metadata = True 

308 

309 try: 

310 if self.stations_group.groups_list != self._metadata.station_names: 

311 for key in self.stations_group.groups_list: 

312 try: 

313 key_group = self.stations_group.get_station(key) 

314 if key_group.metadata.id in self._metadata.stations.keys(): 

315 continue 

316 # skip non-station groups like Features, FCs, TransferFunction 

317 elif key_group.metadata.mth5_type.lower() not in ["station"]: 

318 continue 

319 self._metadata.add_station(key_group.metadata) 

320 except MTH5Error: 

321 self.logger.warning(f"Could not find station {key}") 

322 except KeyError: 

323 self.logger.debug( 

324 "Stations Group does not exists yet. Metadata contains no station information" 

325 ) 

326 

327 try: 

328 filters_group = self.filters_group 

329 if list(filters_group.filter_dict.keys()) != list( 

330 self._metadata.filters.keys() 

331 ): 

332 for key in self.filters_group.filter_dict.keys(): 

333 try: 

334 if key in self._metadata.filters.keys(): 

335 continue 

336 filter_obj = filters_group.to_filter_object(key) 

337 self._metadata.filters[key] = filter_obj 

338 except MTH5Error: 

339 self.logger.warning(f"Could not find filter {key}") 

340 

341 except KeyError: 

342 self.logger.debug( 

343 "Filters Group does not exists yet. Metadata contains no filter information" 

344 ) 

345 return self._metadata 

346 

347 def write_metadata(self) -> None: 

348 """Write HDF5 attributes from the survey metadata object.""" 

349 

350 try: 

351 for key, value in self._metadata.to_dict(single=True).items(): 

352 value = to_numpy_type(value) 

353 self.logger.debug(f"wrote metadata {key} = {value}") 

354 self.hdf5_group.attrs.create(key, value) 

355 self._has_read_metadata = True 

356 except KeyError as key_error: 

357 if "no write intent" in str(key_error): 

358 self.logger.warning("File is in read-only mode, cannot write metadata.") 

359 else: 

360 raise KeyError(key_error) 

361 except ValueError as value_error: 

362 if "Unable to synchronously create group" in str(value_error): 

363 self.logger.warning("File is in read-only mode, cannot write metadata.") 

364 else: 

365 raise ValueError(value_error) 

366 

367 @property 

368 def stations_group(self) -> MasterStationGroup: 

369 return MasterStationGroup(self.hdf5_group["Stations"]) 

370 

371 @property 

372 def filters_group(self) -> FiltersGroup: 

373 """Convenience accessor for ``/Survey/Filters`` group.""" 

374 return FiltersGroup(self.hdf5_group["Filters"], **self.dataset_options) 

375 

376 @property 

377 def reports_group(self) -> ReportsGroup: 

378 """Convenience accessor for ``/Survey/Reports`` group.""" 

379 return ReportsGroup(self.hdf5_group["Reports"], **self.dataset_options) 

380 

381 @property 

382 def standards_group(self) -> StandardsGroup: 

383 """Convenience accessor for ``/Survey/Standards`` group.""" 

384 return StandardsGroup(self.hdf5_group["Standards"], **self.dataset_options) 

385 

386 def update_survey_metadata(self, survey_dict: dict[str, Any] | None = None) -> None: 

387 """Deprecated alias for :py:meth:`update_metadata`. 

388 

389 Raises 

390 ------ 

391 DeprecationWarning 

392 Always raised to direct callers to ``update_metadata``. 

393 

394 Examples 

395 -------- 

396 >>> survey.update_survey_metadata() # doctest: +ELLIPSIS 

397 Traceback (most recent call last): 

398 ... 

399 DeprecationWarning: 'update_survey_metadata' has been deprecated use 'update_metadata()' 

400 """ 

401 

402 raise DeprecationWarning( 

403 "'update_survey_metadata' has been deprecated use 'update_metadata()'" 

404 ) 

405 

406 def update_metadata(self, survey_dict: dict[str, Any] | None = None) -> None: 

407 """Synchronize survey metadata from station summaries. 

408 

409 Parameters 

410 ---------- 

411 survey_dict : dict, optional 

412 Additional metadata values to merge before synchronization. 

413 

414 Notes 

415 ----- 

416 Updates survey start/end dates and bounding box from station summaries, 

417 then writes metadata to HDF5. 

418 

419 Examples 

420 -------- 

421 >>> _ = survey.update_metadata() 

422 >>> survey.metadata.time_period.start_date # doctest: +SKIP 

423 '2020-01-01' 

424 """ 

425 

426 station_summary = self.stations_group.station_summary.copy() 

427 self.logger.debug("Updating survey metadata from stations summary table") 

428 

429 if survey_dict: 

430 self.metadata.from_dict(survey_dict, skip_none=True) 

431 

432 if not len(station_summary): # if station info is empty df, skip parsing 

433 self.write_metadata() 

434 return 

435 

436 self._metadata.time_period.start_date = ( 

437 station_summary.start.min().isoformat().split("T")[0] 

438 ) 

439 self._metadata.time_period.end_date = ( 

440 station_summary.end.max().isoformat().split("T")[0] 

441 ) 

442 self._metadata.northwest_corner.latitude = station_summary.latitude.max() 

443 self._metadata.northwest_corner.longitude = station_summary.longitude.min() 

444 self._metadata.southeast_corner.latitude = station_summary.latitude.min() 

445 self._metadata.southeast_corner.longitude = station_summary.longitude.max() 

446 

447 # metadata by default comes with stations and runs, need to remove those 

448 # before writing the metadata. 

449 self.write_metadata()