Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ groups \ standards.py: 58%
84 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3Created on Wed Dec 23 17:05:33 2020
5:copyright:
6 Jared Peacock (jpeacock@usgs.gov)
8:license: MIT
10"""
12# =============================================================================
13# Imports
14# =============================================================================
15from __future__ import annotations
17import inspect
18from typing import Any, Optional
20import numpy as np
21from mt_metadata import timeseries
22from mt_metadata.base import BaseDict
23from mt_metadata.timeseries import filters
24from mt_metadata.utils.summarize import summarize_standards
25from mt_metadata.utils.validators import validate_attribute
27from mth5 import STANDARDS_DTYPE
28from mth5.groups.base import BaseGroup
29from mth5.tables import MTH5Table
30from mth5.utils.exceptions import MTH5TableError
33ts_classes = dict(inspect.getmembers(timeseries, inspect.isclass))
34flt_classes = dict(inspect.getmembers(filters, inspect.isclass))
37# =============================================================================
38# Summarize standards
39# =============================================================================
40def summarize_metadata_standards() -> BaseDict:
41 """
42 Summarize metadata standards into a dictionary.
44 Aggregates metadata standard definitions from timeseries and filter
45 classes, creating a flattened dictionary suitable for storage in
46 the standards summary table.
48 Returns
49 -------
50 BaseDict
51 Flattened dictionary containing metadata standards for all supported
52 classes (Survey, Station, Run, Electric, Magnetic, Auxiliary,
53 and various Filter types).
55 Notes
56 -----
57 Creates copies of attribute dictionaries to avoid mutations to the
58 original class definitions.
60 Examples
61 --------
62 >>> standards = summarize_metadata_standards()
63 >>> 'survey' in standards
64 True
65 >>> 'electric' in standards
66 True
67 """
69 # need to be sure to make copies otherwise things will get
70 # added in not great places.
71 summary_dict = BaseDict()
72 for key in [
73 "survey",
74 "station",
75 "run",
76 "electric",
77 "magnetic",
78 "auxiliary",
79 ]:
80 obj = ts_classes[key.capitalize()]()
81 summary_dict.add_dict(obj._attr_dict.copy(), key)
82 for key in [
83 "Coefficient",
84 "FIR",
85 "FrequencyResponseTable",
86 "PoleZero",
87 "TimeDelay",
88 ]:
89 key += "Filter"
90 obj = flt_classes[key]()
91 summary_dict.add_dict(obj._attr_dict.copy(), validate_attribute(key))
92 return summary_dict
95# =============================================================================
96# Standards Group
97# =============================================================================
100class StandardsGroup(BaseGroup):
101 """
102 Container for metadata standards documentation stored in the HDF5 file.
104 Stores metadata standards used throughout the survey in a standardized
105 summary table. This enables users to understand metadata directly from
106 the file without requiring external documentation.
108 The standards are organized in a summary table at ``/Survey/Standards/summary``
109 with columns for attribute name, type, requirements, style, units, and
110 descriptions.
112 Attributes
113 ----------
114 summary_table : MTH5Table
115 The standards summary table with metadata definitions.
117 Notes
118 -----
119 Standards include definitions for:
121 - Survey, Station, Run, Electric, Magnetic, Auxiliary metadata
122 - Filter types: Coefficient, FIR, FrequencyResponseTable, PoleZero, TimeDelay
123 - Processing standards from aurora and fourier_coefficients modules
125 Examples
126 --------
127 >>> with MTH5('survey.mth5') as mth5_obj:
128 ... standards = mth5_obj.standards_group
129 ... summary = standards.summary_table
130 ... print(summary.array.dtype.names)
131 ('attribute', 'type', 'required', 'style', 'units', 'description', ...)
133 Get information about a specific attribute:
135 >>> standards.get_attribute_information('survey.release_license')
136 survey.release_license
137 --------------------------
138 type : string
139 required : True
140 style : controlled vocabulary
141 ...
142 """
144 def __init__(self, group: Any, **kwargs: Any) -> None:
145 """
146 Initialize StandardsGroup.
148 Parameters
149 ----------
150 group : h5py.Group
151 HDF5 group to manage standards data.
152 **kwargs : Any
153 Additional keyword arguments passed to BaseGroup.
154 """
155 super().__init__(group, **kwargs)
157 self._defaults_summary_attrs = {
158 "name": "summary",
159 "max_shape": (1000,),
160 "dtype": STANDARDS_DTYPE,
161 }
163 self._modules = [
164 "common",
165 "timeseries",
166 "timeseries.filters",
167 "transfer_functions.tf",
168 "features",
169 "features.weights",
170 "processing",
171 "processing.fourier_coefficients",
172 "processing.aurora",
173 ]
175 @property
176 def summary_table(self) -> MTH5Table:
177 return self._get_summary_table()
179 def _get_summary_table(self) -> MTH5Table:
180 """
181 Get the standards summary table from HDF5.
183 Returns
184 -------
185 MTH5Table
186 The MTH5Table object wrapping the standards summary dataset.
187 """
188 return MTH5Table(self.hdf5_group["summary"], STANDARDS_DTYPE)
190 def get_attribute_information(self, attribute_name: str) -> None:
191 """
192 Print detailed information about a metadata attribute.
194 Retrieves and displays all metadata standards information for
195 the specified attribute from the standards summary table.
197 Parameters
198 ----------
199 attribute_name : str
200 Name of the attribute to describe (e.g., 'survey.release_license').
202 Raises
203 ------
204 MTH5TableError
205 If the attribute is not found in the standards summary table.
207 Notes
208 -----
209 Prints formatted output including:
211 - Data type
212 - Whether attribute is required
213 - Style (e.g., controlled vocabulary)
214 - Units
215 - Description
216 - Valid options
217 - Aliases
218 - Example values
219 - Default value
221 Examples
222 --------
223 >>> standards = mth5_obj.standards_group
224 >>> standards.get_attribute_information('survey.release_license')
225 survey.release_license
226 --------------------------
227 type : string
228 required : True
229 style : controlled vocabulary
230 units :
231 description : How the data can be used. The options are based on
232 Creative Commons licenses.
233 options : CC-0,CC-BY,CC-BY-SA,CC-BY-ND,CC-BY-NC-SA
234 alias :
235 example : CC-0
236 default : CC-0
237 """
238 find = self.summary_table.locate("attribute", attribute_name)
239 if len(find) == 0:
240 msg = f"Could not find {attribute_name} in standards."
241 self.logger.error(msg)
242 raise MTH5TableError(msg)
243 meta_item = self.summary_table.array[find]
244 lines = ["", attribute_name, "-" * (len(attribute_name) + 4)]
245 for name, value in zip(meta_item.dtype.names[1:], meta_item.item()[1:]):
246 if isinstance(value, (bytes, np.bytes_)):
247 value = value.decode()
248 lines.append("\t{0:<14} {1}".format(name + ":", value))
249 print("\n".join(lines))
251 def summary_table_from_dict(self, summary_dict: dict[str, Any]) -> None:
252 """
253 Populate summary table from a dictionary of metadata standards.
255 Converts a flattened dictionary of metadata standards into rows
256 in the HDF5 summary table.
258 Parameters
259 ----------
260 summary_dict : dict[str, Any]
261 Flattened dictionary of all metadata standards. Keys are
262 attribute names, values are dictionaries with type, required,
263 style, units, description, etc.
265 Notes
266 -----
267 Processes dictionary values:
269 - Lists are converted to comma-separated strings
270 - None values become empty strings
271 - Bytes are decoded to UTF-8
273 TODO
274 ----
275 Adapt method to accept pandas.DataFrame as alternative input.
277 Examples
278 --------
279 >>> standards = StandardsGroup(group)
280 >>> metadata = summarize_metadata_standards()
281 >>> standards.summary_table_from_dict(metadata)
282 """
284 for key, v_dict in summary_dict.items():
285 key_list = [key]
286 for dkey in self.summary_table.dtype.names[1:]:
287 value = v_dict[dkey]
289 if isinstance(value, list):
290 if len(value) == 0:
291 value = ""
292 else:
293 value = ",".join(["{0}".format(ii) for ii in value])
294 if value is None:
295 value = ""
296 key_list.append(value)
297 key_list = np.array([tuple(key_list)], self.summary_table.dtype)
298 index = self.summary_table.add_row(key_list)
299 self.logger.debug(f"Added {index} rows to Standards Group")
301 def get_standards_summary(self, modules: Optional[list[str]] = None) -> np.ndarray:
302 """
303 Get standards for specified metadata modules.
305 Retrieves and concatenates standards arrays from one or more
306 metadata modules for inclusion in the standards table.
308 Parameters
309 ----------
310 modules : list[str], optional
311 List of module names to include (e.g., 'timeseries', 'filters').
312 If None, uses default modules: common, timeseries, timeseries.filters,
313 transfer_functions.tf, features, features.weights, processing,
314 processing.fourier_coefficients, processing.aurora.
315 Default is None.
317 Returns
318 -------
319 np.ndarray
320 Concatenated numpy structured array containing standards for all
321 requested modules with dtype matching STANDARDS_DTYPE.
323 Examples
324 --------
325 >>> standards = StandardsGroup(group)
326 >>> ts_standards = standards.get_standards_summary(['timeseries'])
327 >>> print(ts_standards.shape)
328 (45,)
330 Get all default modules:
332 >>> all_standards = standards.get_standards_summary()
333 """
334 if modules is None:
335 modules = self._modules
337 summaries = []
338 for module in modules:
339 summaries.append(
340 summarize_standards(module, output_type="array", dtype=STANDARDS_DTYPE)
341 )
343 return np.concatenate(summaries)
345 def summary_table_from_array(self, array: np.ndarray) -> None:
346 """
347 Populate summary table from a numpy structured array.
349 Converts a structured numpy array into rows in the HDF5 summary table.
351 Parameters
352 ----------
353 array : np.ndarray
354 Structured numpy array with dtype matching STANDARDS_DTYPE.
355 Each row represents one metadata attribute definition.
357 Notes
358 -----
359 Iterates through all rows of the structured array and adds them
360 sequentially to the summary table using add_row().
362 Examples
363 --------
364 >>> standards = StandardsGroup(group)
365 >>> standards_array = standards.get_standards_summary()
366 >>> standards.summary_table_from_array(standards_array)
367 """
368 summary_table = self._get_summary_table()
370 for index, row in enumerate(np.nditer(array)):
371 index = summary_table.add_row(row)
372 self.logger.debug(f"Added {index} rows to Standards Group")
374 def initialize_group(self) -> None:
375 """
376 Initialize the standards group and create the summary table.
378 Creates the summary table dataset in the HDF5 file and populates it
379 with metadata standards from all default modules. Sets appropriate
380 HDF5 attributes and writes the group metadata.
382 Notes
383 -----
384 Initialization process:
386 1. Creates HDF5 dataset for summary table with maximum expandable shape
387 2. Applies compression if configured in dataset_options
388 3. Sets HDF5 attributes: type, last_updated, reference
389 4. Populates table with standards from all default modules
390 5. Writes group metadata to HDF5
392 The summary table uses STANDARDS_DTYPE and supports up to 1000 rows.
394 Examples
395 --------
396 >>> mth5_obj.initialize_group()
397 >>> summary_table = mth5_obj.standards_group.summary_table
398 >>> print(summary_table.array.shape)
399 (342,)
400 """
401 if self.dataset_options["compression"] is None:
402 summary_dataset = self.hdf5_group.create_dataset(
403 self._defaults_summary_attrs["name"],
404 (0,),
405 maxshape=self._defaults_summary_attrs["max_shape"],
406 dtype=self._defaults_summary_attrs["dtype"],
407 )
408 else:
409 summary_dataset = self.hdf5_group.create_dataset(
410 self._defaults_summary_attrs["name"],
411 (0,),
412 maxshape=self._defaults_summary_attrs["max_shape"],
413 dtype=self._defaults_summary_attrs["dtype"],
414 **self.dataset_options,
415 )
416 summary_dataset.attrs.update(
417 {
418 "type": "summary table",
419 "last_updated": "date_time",
420 "reference": summary_dataset.ref,
421 }
422 )
424 self.logger.debug(
425 f"Created {self._defaults_summary_attrs['name']} table with "
426 f"max_shape = {self._defaults_summary_attrs['max_shape']}, "
427 "dtype={self._defaults_summary_attrs['dtype']}"
428 )
429 self.logger.debug(
430 "used options: "
431 "; ".join([f"{k} = {v}" for k, v in self.dataset_options.items()])
432 )
434 self.summary_table_from_array(self.get_standards_summary())
436 self.write_metadata()