Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ tables \ fc_table.py: 76%
58 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3Tabulate Fourier coefficients stored in an MTH5 file.
5This module provides a small utility for summarizing Fourier-coefficient
6datasets (e.g., `FCChannel`) into a structured table and exporting
7to a convenient `pandas.DataFrame` for querying and analysis.
9Notes
10-----
11- A basic test for this module exists under
12 ``mth5/tests/version_1/test_fcs.py``.
13- The table is populated by traversing the HDF5 hierarchy and collecting
14 entries for datasets labeled with the attribute ``mth5_type='FCChannel'``.
16"""
18from __future__ import annotations
20import h5py
21import numpy as np
23# =============================================================================
24# Imports
25# =============================================================================
26import pandas as pd
28from mth5 import FC_DTYPE
29from mth5.tables import MTH5Table
32# =============================================================================
35class FCSummaryTable(MTH5Table):
36 """
37 Summary table for Fourier coefficients.
39 This class wraps an HDF5 dataset that stores a summary of Fourier
40 coefficient datasets and provides convenience functions such as
41 `summarize()` (to populate the table) and `to_dataframe()` (to export
42 entries).
44 Examples
45 --------
46 Populate and export a summary from an existing MTH5 file::
48 >>> import h5py
49 >>> from mth5.tables.fc_table import FCSummaryTable
50 >>> f = h5py.File('example.mth5', 'r')
51 >>> # Assume the summary dataset already exists at this path
52 >>> table_ds = f['Exchange']['FC_Summary']
53 >>> fc_table = FCSummaryTable(table_ds)
54 >>> fc_table.summarize() # walk the file and fill entries
55 >>> df = fc_table.to_dataframe()
56 >>> df.head()
58 """
60 def __init__(self, hdf5_dataset: h5py.Dataset) -> None:
61 super().__init__(hdf5_dataset, FC_DTYPE)
63 def to_dataframe(self) -> pd.DataFrame:
64 """
65 Convert the table to a `pandas.DataFrame` for easier querying.
67 Returns
68 -------
69 pandas.DataFrame
70 A dataframe with decoded string columns and parsed start/end
71 timestamps.
73 Examples
74 --------
75 Export to a dataframe and filter by component::
77 >>> df = fc_table.to_dataframe()
78 >>> df[df.component == 'ex']
79 """
81 if self.array is None:
82 raise ValueError("Summary table dataset is not initialized.")
83 df = pd.DataFrame(self.array[()])
84 for key in [
85 "survey",
86 "station",
87 "run",
88 "component",
89 "measurement_type",
90 "units",
91 ]:
92 setattr(df, key, getattr(df, key).str.decode("utf-8"))
93 try:
94 df.start = pd.to_datetime(df.start.str.decode("utf-8"), format="mixed")
95 df.end = pd.to_datetime(df.end.str.decode("utf-8"), format="mixed")
96 except ValueError:
97 df.start = pd.to_datetime(df.start.str.decode("utf-8"))
98 df.end = pd.to_datetime(df.end.str.decode("utf-8"))
100 return df
102 def summarize(self) -> None:
103 """
104 Populate the summary table by traversing the HDF5 hierarchy.
106 The traversal searches for datasets with attribute
107 ``mth5_type == 'FCChannel'`` and adds a corresponding summary row
108 for each.
110 Returns
111 -------
112 None
114 Notes
115 -----
116 - If the table contains rows from a different OS/encoding,
117 row insertion can raise a `ValueError`. A warning is logged and
118 processing continues for subsequent rows.
120 Examples
121 --------
122 Refresh the table entries::
124 >>> fc_table.clear_table()
125 >>> fc_table.summarize()
126 """
128 def recursive_get_fc_entry(
129 group: h5py.Group | h5py.File | h5py.Dataset,
130 ) -> None:
131 """Recursively collect FC summary entries from the hierarchy."""
132 if isinstance(group, (h5py.Group, h5py.File)):
133 for key, node in group.items():
134 recursive_get_fc_entry(node)
135 elif isinstance(group, h5py.Dataset):
136 try:
137 ch_type = group.attrs["mth5_type"]
138 if ch_type in [
139 "FCChannel",
140 ]:
141 fc_entry = _get_fc_entry(group)
142 try:
143 self.add_row(fc_entry)
144 except ValueError as error:
145 msg = (
146 f"{error}. "
147 "it is possible that the OS that made the table is not the OS operating on it."
148 )
149 self.logger.warning(msg)
150 except KeyError:
151 pass
153 self.clear_table()
154 # self.fc_entries = []
155 if self.array is None or getattr(self.array, "parent", None) is None:
156 raise ValueError("Summary table dataset parent is not available.")
157 parent = self.array.parent
158 # Allow Mock objects and dictionaries for testing, in addition to h5py types
159 if not (
160 isinstance(parent, (h5py.Group, h5py.File, h5py.Dataset))
161 or hasattr(parent, "items")
162 or isinstance(parent, dict)
163 ):
164 raise TypeError("Unexpected parent type for summary dataset.")
165 recursive_get_fc_entry(parent)
166 # for row in self.fc_entries:
167 # try:
168 # self.add_row(row)
169 # except Exception as ee:
170 # msg = f"Failed due to unknown exception {e}"
171 # self.logger.warning(msg)
172 # return
175def _get_fc_entry(
176 group: h5py.Dataset,
177 dtype: np.dtype | None = FC_DTYPE,
178) -> np.ndarray:
179 """
180 Build a single FC summary table row.
182 Parameters
183 ----------
184 group : h5py._hl.dataset.Dataset
185 The HDF5 dataset representing a Fourier-coefficient channel
186 (i.e., with attribute ``mth5_type='FCChannel'``).
187 dtype : numpy.dtype, optional
188 The dtype describing the summary table schema. Defaults to
189 :data:`mth5.FC_DTYPE`.
191 Returns
192 -------
193 numpy.ndarray
194 A 1-row structured array matching the summary table schema.
196 Examples
197 --------
198 Create a row for an existing FC dataset::
200 >>> fc_ds = f['Survey']['station']['run']['FC']['ex']
201 >>> row = _get_fc_entry(fc_ds)
202 >>> row.dtype == FC_DTYPE
203 True
204 """
206 def _as_bytes(value: object) -> bytes:
207 try:
208 if isinstance(value, np.ndarray):
209 value = value.item() if value.shape == () else value[0]
210 except Exception:
211 pass
212 if isinstance(value, bytes):
213 return value
214 return str(value).encode("utf-8")
216 fc_entry = np.array(
217 [
218 (
219 _as_bytes(
220 group.parent.parent.parent.parent.parent.parent.attrs["id"]
221 ), # get survey from FCChannel
222 _as_bytes(
223 group.parent.parent.parent.parent.attrs["id"]
224 ), # get station from FCChannel
225 group.parent.parent.attrs["id"], # get run from FCChannel
226 group.parent.attrs[
227 "decimation_level"
228 ], # get decimation_level from FCChannel
229 group.parent.parent.parent.parent.attrs["location.latitude"],
230 group.parent.parent.parent.parent.attrs["location.longitude"],
231 group.parent.parent.parent.parent.attrs["location.elevation"],
232 group.attrs["component"],
233 group.attrs["time_period.start"],
234 group.attrs["time_period.end"],
235 group.size,
236 group.attrs["sample_rate_window_step"],
237 group.attrs["mth5_type"],
238 # group.attrs["measurement_azimuth"], # DO NOT go to the time series to access this info
239 # group.attrs["measurement_tilt"], # the time series may not be in the mth5
240 # TODO: add azimuth and tilt on FCChannel creation
241 group.attrs["units"],
242 group.ref,
243 group.parent.ref,
244 group.parent.parent.ref,
245 group.parent.parent.parent.parent.ref,
246 )
247 ],
248 dtype=dtype,
249 )
250 return fc_entry