Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ standards.py: 58%

84 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Created on Wed Dec 23 17:05:33 2020 

4 

5:copyright: 

6 Jared Peacock (jpeacock@usgs.gov) 

7 

8:license: MIT 

9 

10""" 

11 

12# ============================================================================= 

13# Imports 

14# ============================================================================= 

15from __future__ import annotations 

16 

17import inspect 

18from typing import Any, Optional 

19 

20import numpy as np 

21from mt_metadata import timeseries 

22from mt_metadata.base import BaseDict 

23from mt_metadata.timeseries import filters 

24from mt_metadata.utils.summarize import summarize_standards 

25from mt_metadata.utils.validators import validate_attribute 

26 

27from mth5 import STANDARDS_DTYPE 

28from mth5.groups.base import BaseGroup 

29from mth5.tables import MTH5Table 

30from mth5.utils.exceptions import MTH5TableError 

31 

32 

33ts_classes = dict(inspect.getmembers(timeseries, inspect.isclass)) 

34flt_classes = dict(inspect.getmembers(filters, inspect.isclass)) 

35 

36 

37# ============================================================================= 

38# Summarize standards 

39# ============================================================================= 

40def summarize_metadata_standards() -> BaseDict: 

41 """ 

42 Summarize metadata standards into a dictionary. 

43 

44 Aggregates metadata standard definitions from timeseries and filter 

45 classes, creating a flattened dictionary suitable for storage in 

46 the standards summary table. 

47 

48 Returns 

49 ------- 

50 BaseDict 

51 Flattened dictionary containing metadata standards for all supported 

52 classes (Survey, Station, Run, Electric, Magnetic, Auxiliary, 

53 and various Filter types). 

54 

55 Notes 

56 ----- 

57 Creates copies of attribute dictionaries to avoid mutations to the 

58 original class definitions. 

59 

60 Examples 

61 -------- 

62 >>> standards = summarize_metadata_standards() 

63 >>> 'survey' in standards 

64 True 

65 >>> 'electric' in standards 

66 True 

67 """ 

68 

69 # need to be sure to make copies otherwise things will get 

70 # added in not great places. 

71 summary_dict = BaseDict() 

72 for key in [ 

73 "survey", 

74 "station", 

75 "run", 

76 "electric", 

77 "magnetic", 

78 "auxiliary", 

79 ]: 

80 obj = ts_classes[key.capitalize()]() 

81 summary_dict.add_dict(obj._attr_dict.copy(), key) 

82 for key in [ 

83 "Coefficient", 

84 "FIR", 

85 "FrequencyResponseTable", 

86 "PoleZero", 

87 "TimeDelay", 

88 ]: 

89 key += "Filter" 

90 obj = flt_classes[key]() 

91 summary_dict.add_dict(obj._attr_dict.copy(), validate_attribute(key)) 

92 return summary_dict 

93 

94 

95# ============================================================================= 

96# Standards Group 

97# ============================================================================= 

98 

99 

100class StandardsGroup(BaseGroup): 

101 """ 

102 Container for metadata standards documentation stored in the HDF5 file. 

103 

104 Stores metadata standards used throughout the survey in a standardized 

105 summary table. This enables users to understand metadata directly from 

106 the file without requiring external documentation. 

107 

108 The standards are organized in a summary table at ``/Survey/Standards/summary`` 

109 with columns for attribute name, type, requirements, style, units, and 

110 descriptions. 

111 

112 Attributes 

113 ---------- 

114 summary_table : MTH5Table 

115 The standards summary table with metadata definitions. 

116 

117 Notes 

118 ----- 

119 Standards include definitions for: 

120 

121 - Survey, Station, Run, Electric, Magnetic, Auxiliary metadata 

122 - Filter types: Coefficient, FIR, FrequencyResponseTable, PoleZero, TimeDelay 

123 - Processing standards from aurora and fourier_coefficients modules 

124 

125 Examples 

126 -------- 

127 >>> with MTH5('survey.mth5') as mth5_obj: 

128 ... standards = mth5_obj.standards_group 

129 ... summary = standards.summary_table 

130 ... print(summary.array.dtype.names) 

131 ('attribute', 'type', 'required', 'style', 'units', 'description', ...) 

132 

133 Get information about a specific attribute: 

134 

135 >>> standards.get_attribute_information('survey.release_license') 

136 survey.release_license 

137 -------------------------- 

138 type : string 

139 required : True 

140 style : controlled vocabulary 

141 ... 

142 """ 

143 

144 def __init__(self, group: Any, **kwargs: Any) -> None: 

145 """ 

146 Initialize StandardsGroup. 

147 

148 Parameters 

149 ---------- 

150 group : h5py.Group 

151 HDF5 group to manage standards data. 

152 **kwargs : Any 

153 Additional keyword arguments passed to BaseGroup. 

154 """ 

155 super().__init__(group, **kwargs) 

156 

157 self._defaults_summary_attrs = { 

158 "name": "summary", 

159 "max_shape": (1000,), 

160 "dtype": STANDARDS_DTYPE, 

161 } 

162 

163 self._modules = [ 

164 "common", 

165 "timeseries", 

166 "timeseries.filters", 

167 "transfer_functions.tf", 

168 "features", 

169 "features.weights", 

170 "processing", 

171 "processing.fourier_coefficients", 

172 "processing.aurora", 

173 ] 

174 

175 @property 

176 def summary_table(self) -> MTH5Table: 

177 return self._get_summary_table() 

178 

179 def _get_summary_table(self) -> MTH5Table: 

180 """ 

181 Get the standards summary table from HDF5. 

182 

183 Returns 

184 ------- 

185 MTH5Table 

186 The MTH5Table object wrapping the standards summary dataset. 

187 """ 

188 return MTH5Table(self.hdf5_group["summary"], STANDARDS_DTYPE) 

189 

190 def get_attribute_information(self, attribute_name: str) -> None: 

191 """ 

192 Print detailed information about a metadata attribute. 

193 

194 Retrieves and displays all metadata standards information for 

195 the specified attribute from the standards summary table. 

196 

197 Parameters 

198 ---------- 

199 attribute_name : str 

200 Name of the attribute to describe (e.g., 'survey.release_license'). 

201 

202 Raises 

203 ------ 

204 MTH5TableError 

205 If the attribute is not found in the standards summary table. 

206 

207 Notes 

208 ----- 

209 Prints formatted output including: 

210 

211 - Data type 

212 - Whether attribute is required 

213 - Style (e.g., controlled vocabulary) 

214 - Units 

215 - Description 

216 - Valid options 

217 - Aliases 

218 - Example values 

219 - Default value 

220 

221 Examples 

222 -------- 

223 >>> standards = mth5_obj.standards_group 

224 >>> standards.get_attribute_information('survey.release_license') 

225 survey.release_license 

226 -------------------------- 

227 type : string 

228 required : True 

229 style : controlled vocabulary 

230 units : 

231 description : How the data can be used. The options are based on 

232 Creative Commons licenses. 

233 options : CC-0,CC-BY,CC-BY-SA,CC-BY-ND,CC-BY-NC-SA 

234 alias : 

235 example : CC-0 

236 default : CC-0 

237 """ 

238 find = self.summary_table.locate("attribute", attribute_name) 

239 if len(find) == 0: 

240 msg = f"Could not find {attribute_name} in standards." 

241 self.logger.error(msg) 

242 raise MTH5TableError(msg) 

243 meta_item = self.summary_table.array[find] 

244 lines = ["", attribute_name, "-" * (len(attribute_name) + 4)] 

245 for name, value in zip(meta_item.dtype.names[1:], meta_item.item()[1:]): 

246 if isinstance(value, (bytes, np.bytes_)): 

247 value = value.decode() 

248 lines.append("\t{0:<14} {1}".format(name + ":", value)) 

249 print("\n".join(lines)) 

250 

251 def summary_table_from_dict(self, summary_dict: dict[str, Any]) -> None: 

252 """ 

253 Populate summary table from a dictionary of metadata standards. 

254 

255 Converts a flattened dictionary of metadata standards into rows 

256 in the HDF5 summary table. 

257 

258 Parameters 

259 ---------- 

260 summary_dict : dict[str, Any] 

261 Flattened dictionary of all metadata standards. Keys are 

262 attribute names, values are dictionaries with type, required, 

263 style, units, description, etc. 

264 

265 Notes 

266 ----- 

267 Processes dictionary values: 

268 

269 - Lists are converted to comma-separated strings 

270 - None values become empty strings 

271 - Bytes are decoded to UTF-8 

272 

273 TODO 

274 ---- 

275 Adapt method to accept pandas.DataFrame as alternative input. 

276 

277 Examples 

278 -------- 

279 >>> standards = StandardsGroup(group) 

280 >>> metadata = summarize_metadata_standards() 

281 >>> standards.summary_table_from_dict(metadata) 

282 """ 

283 

284 for key, v_dict in summary_dict.items(): 

285 key_list = [key] 

286 for dkey in self.summary_table.dtype.names[1:]: 

287 value = v_dict[dkey] 

288 

289 if isinstance(value, list): 

290 if len(value) == 0: 

291 value = "" 

292 else: 

293 value = ",".join(["{0}".format(ii) for ii in value]) 

294 if value is None: 

295 value = "" 

296 key_list.append(value) 

297 key_list = np.array([tuple(key_list)], self.summary_table.dtype) 

298 index = self.summary_table.add_row(key_list) 

299 self.logger.debug(f"Added {index} rows to Standards Group") 

300 

301 def get_standards_summary(self, modules: Optional[list[str]] = None) -> np.ndarray: 

302 """ 

303 Get standards for specified metadata modules. 

304 

305 Retrieves and concatenates standards arrays from one or more 

306 metadata modules for inclusion in the standards table. 

307 

308 Parameters 

309 ---------- 

310 modules : list[str], optional 

311 List of module names to include (e.g., 'timeseries', 'filters'). 

312 If None, uses default modules: common, timeseries, timeseries.filters, 

313 transfer_functions.tf, features, features.weights, processing, 

314 processing.fourier_coefficients, processing.aurora. 

315 Default is None. 

316 

317 Returns 

318 ------- 

319 np.ndarray 

320 Concatenated numpy structured array containing standards for all 

321 requested modules with dtype matching STANDARDS_DTYPE. 

322 

323 Examples 

324 -------- 

325 >>> standards = StandardsGroup(group) 

326 >>> ts_standards = standards.get_standards_summary(['timeseries']) 

327 >>> print(ts_standards.shape) 

328 (45,) 

329 

330 Get all default modules: 

331 

332 >>> all_standards = standards.get_standards_summary() 

333 """ 

334 if modules is None: 

335 modules = self._modules 

336 

337 summaries = [] 

338 for module in modules: 

339 summaries.append( 

340 summarize_standards(module, output_type="array", dtype=STANDARDS_DTYPE) 

341 ) 

342 

343 return np.concatenate(summaries) 

344 

345 def summary_table_from_array(self, array: np.ndarray) -> None: 

346 """ 

347 Populate summary table from a numpy structured array. 

348 

349 Converts a structured numpy array into rows in the HDF5 summary table. 

350 

351 Parameters 

352 ---------- 

353 array : np.ndarray 

354 Structured numpy array with dtype matching STANDARDS_DTYPE. 

355 Each row represents one metadata attribute definition. 

356 

357 Notes 

358 ----- 

359 Iterates through all rows of the structured array and adds them 

360 sequentially to the summary table using add_row(). 

361 

362 Examples 

363 -------- 

364 >>> standards = StandardsGroup(group) 

365 >>> standards_array = standards.get_standards_summary() 

366 >>> standards.summary_table_from_array(standards_array) 

367 """ 

368 summary_table = self._get_summary_table() 

369 

370 for index, row in enumerate(np.nditer(array)): 

371 index = summary_table.add_row(row) 

372 self.logger.debug(f"Added {index} rows to Standards Group") 

373 

374 def initialize_group(self) -> None: 

375 """ 

376 Initialize the standards group and create the summary table. 

377 

378 Creates the summary table dataset in the HDF5 file and populates it 

379 with metadata standards from all default modules. Sets appropriate 

380 HDF5 attributes and writes the group metadata. 

381 

382 Notes 

383 ----- 

384 Initialization process: 

385 

386 1. Creates HDF5 dataset for summary table with maximum expandable shape 

387 2. Applies compression if configured in dataset_options 

388 3. Sets HDF5 attributes: type, last_updated, reference 

389 4. Populates table with standards from all default modules 

390 5. Writes group metadata to HDF5 

391 

392 The summary table uses STANDARDS_DTYPE and supports up to 1000 rows. 

393 

394 Examples 

395 -------- 

396 >>> mth5_obj.initialize_group() 

397 >>> summary_table = mth5_obj.standards_group.summary_table 

398 >>> print(summary_table.array.shape) 

399 (342,) 

400 """ 

401 if self.dataset_options["compression"] is None: 

402 summary_dataset = self.hdf5_group.create_dataset( 

403 self._defaults_summary_attrs["name"], 

404 (0,), 

405 maxshape=self._defaults_summary_attrs["max_shape"], 

406 dtype=self._defaults_summary_attrs["dtype"], 

407 ) 

408 else: 

409 summary_dataset = self.hdf5_group.create_dataset( 

410 self._defaults_summary_attrs["name"], 

411 (0,), 

412 maxshape=self._defaults_summary_attrs["max_shape"], 

413 dtype=self._defaults_summary_attrs["dtype"], 

414 **self.dataset_options, 

415 ) 

416 summary_dataset.attrs.update( 

417 { 

418 "type": "summary table", 

419 "last_updated": "date_time", 

420 "reference": summary_dataset.ref, 

421 } 

422 ) 

423 

424 self.logger.debug( 

425 f"Created {self._defaults_summary_attrs['name']} table with " 

426 f"max_shape = {self._defaults_summary_attrs['max_shape']}, " 

427 "dtype={self._defaults_summary_attrs['dtype']}" 

428 ) 

429 self.logger.debug( 

430 "used options: " 

431 "; ".join([f"{k} = {v}" for k, v in self.dataset_options.items()]) 

432 ) 

433 

434 self.summary_table_from_array(self.get_standards_summary()) 

435 

436 self.write_metadata()