Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ tables \ fc_table.py: 76%

58 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Tabulate Fourier coefficients stored in an MTH5 file. 

4 

5This module provides a small utility for summarizing Fourier-coefficient 

6datasets (e.g., `FCChannel`) into a structured table and exporting 

7to a convenient `pandas.DataFrame` for querying and analysis. 

8 

9Notes 

10----- 

11- A basic test for this module exists under 

12 ``mth5/tests/version_1/test_fcs.py``. 

13- The table is populated by traversing the HDF5 hierarchy and collecting 

14 entries for datasets labeled with the attribute ``mth5_type='FCChannel'``. 

15 

16""" 

17 

18from __future__ import annotations 

19 

20import h5py 

21import numpy as np 

22 

23# ============================================================================= 

24# Imports 

25# ============================================================================= 

26import pandas as pd 

27 

28from mth5 import FC_DTYPE 

29from mth5.tables import MTH5Table 

30 

31 

32# ============================================================================= 

33 

34 

35class FCSummaryTable(MTH5Table): 

36 """ 

37 Summary table for Fourier coefficients. 

38 

39 This class wraps an HDF5 dataset that stores a summary of Fourier 

40 coefficient datasets and provides convenience functions such as 

41 `summarize()` (to populate the table) and `to_dataframe()` (to export 

42 entries). 

43 

44 Examples 

45 -------- 

46 Populate and export a summary from an existing MTH5 file:: 

47 

48 >>> import h5py 

49 >>> from mth5.tables.fc_table import FCSummaryTable 

50 >>> f = h5py.File('example.mth5', 'r') 

51 >>> # Assume the summary dataset already exists at this path 

52 >>> table_ds = f['Exchange']['FC_Summary'] 

53 >>> fc_table = FCSummaryTable(table_ds) 

54 >>> fc_table.summarize() # walk the file and fill entries 

55 >>> df = fc_table.to_dataframe() 

56 >>> df.head() 

57 

58 """ 

59 

60 def __init__(self, hdf5_dataset: h5py.Dataset) -> None: 

61 super().__init__(hdf5_dataset, FC_DTYPE) 

62 

63 def to_dataframe(self) -> pd.DataFrame: 

64 """ 

65 Convert the table to a `pandas.DataFrame` for easier querying. 

66 

67 Returns 

68 ------- 

69 pandas.DataFrame 

70 A dataframe with decoded string columns and parsed start/end 

71 timestamps. 

72 

73 Examples 

74 -------- 

75 Export to a dataframe and filter by component:: 

76 

77 >>> df = fc_table.to_dataframe() 

78 >>> df[df.component == 'ex'] 

79 """ 

80 

81 if self.array is None: 

82 raise ValueError("Summary table dataset is not initialized.") 

83 df = pd.DataFrame(self.array[()]) 

84 for key in [ 

85 "survey", 

86 "station", 

87 "run", 

88 "component", 

89 "measurement_type", 

90 "units", 

91 ]: 

92 setattr(df, key, getattr(df, key).str.decode("utf-8")) 

93 try: 

94 df.start = pd.to_datetime(df.start.str.decode("utf-8"), format="mixed") 

95 df.end = pd.to_datetime(df.end.str.decode("utf-8"), format="mixed") 

96 except ValueError: 

97 df.start = pd.to_datetime(df.start.str.decode("utf-8")) 

98 df.end = pd.to_datetime(df.end.str.decode("utf-8")) 

99 

100 return df 

101 

102 def summarize(self) -> None: 

103 """ 

104 Populate the summary table by traversing the HDF5 hierarchy. 

105 

106 The traversal searches for datasets with attribute 

107 ``mth5_type == 'FCChannel'`` and adds a corresponding summary row 

108 for each. 

109 

110 Returns 

111 ------- 

112 None 

113 

114 Notes 

115 ----- 

116 - If the table contains rows from a different OS/encoding, 

117 row insertion can raise a `ValueError`. A warning is logged and 

118 processing continues for subsequent rows. 

119 

120 Examples 

121 -------- 

122 Refresh the table entries:: 

123 

124 >>> fc_table.clear_table() 

125 >>> fc_table.summarize() 

126 """ 

127 

128 def recursive_get_fc_entry( 

129 group: h5py.Group | h5py.File | h5py.Dataset, 

130 ) -> None: 

131 """Recursively collect FC summary entries from the hierarchy.""" 

132 if isinstance(group, (h5py.Group, h5py.File)): 

133 for key, node in group.items(): 

134 recursive_get_fc_entry(node) 

135 elif isinstance(group, h5py.Dataset): 

136 try: 

137 ch_type = group.attrs["mth5_type"] 

138 if ch_type in [ 

139 "FCChannel", 

140 ]: 

141 fc_entry = _get_fc_entry(group) 

142 try: 

143 self.add_row(fc_entry) 

144 except ValueError as error: 

145 msg = ( 

146 f"{error}. " 

147 "it is possible that the OS that made the table is not the OS operating on it." 

148 ) 

149 self.logger.warning(msg) 

150 except KeyError: 

151 pass 

152 

153 self.clear_table() 

154 # self.fc_entries = [] 

155 if self.array is None or getattr(self.array, "parent", None) is None: 

156 raise ValueError("Summary table dataset parent is not available.") 

157 parent = self.array.parent 

158 # Allow Mock objects and dictionaries for testing, in addition to h5py types 

159 if not ( 

160 isinstance(parent, (h5py.Group, h5py.File, h5py.Dataset)) 

161 or hasattr(parent, "items") 

162 or isinstance(parent, dict) 

163 ): 

164 raise TypeError("Unexpected parent type for summary dataset.") 

165 recursive_get_fc_entry(parent) 

166 # for row in self.fc_entries: 

167 # try: 

168 # self.add_row(row) 

169 # except Exception as ee: 

170 # msg = f"Failed due to unknown exception {e}" 

171 # self.logger.warning(msg) 

172 # return 

173 

174 

175def _get_fc_entry( 

176 group: h5py.Dataset, 

177 dtype: np.dtype | None = FC_DTYPE, 

178) -> np.ndarray: 

179 """ 

180 Build a single FC summary table row. 

181 

182 Parameters 

183 ---------- 

184 group : h5py._hl.dataset.Dataset 

185 The HDF5 dataset representing a Fourier-coefficient channel 

186 (i.e., with attribute ``mth5_type='FCChannel'``). 

187 dtype : numpy.dtype, optional 

188 The dtype describing the summary table schema. Defaults to 

189 :data:`mth5.FC_DTYPE`. 

190 

191 Returns 

192 ------- 

193 numpy.ndarray 

194 A 1-row structured array matching the summary table schema. 

195 

196 Examples 

197 -------- 

198 Create a row for an existing FC dataset:: 

199 

200 >>> fc_ds = f['Survey']['station']['run']['FC']['ex'] 

201 >>> row = _get_fc_entry(fc_ds) 

202 >>> row.dtype == FC_DTYPE 

203 True 

204 """ 

205 

206 def _as_bytes(value: object) -> bytes: 

207 try: 

208 if isinstance(value, np.ndarray): 

209 value = value.item() if value.shape == () else value[0] 

210 except Exception: 

211 pass 

212 if isinstance(value, bytes): 

213 return value 

214 return str(value).encode("utf-8") 

215 

216 fc_entry = np.array( 

217 [ 

218 ( 

219 _as_bytes( 

220 group.parent.parent.parent.parent.parent.parent.attrs["id"] 

221 ), # get survey from FCChannel 

222 _as_bytes( 

223 group.parent.parent.parent.parent.attrs["id"] 

224 ), # get station from FCChannel 

225 group.parent.parent.attrs["id"], # get run from FCChannel 

226 group.parent.attrs[ 

227 "decimation_level" 

228 ], # get decimation_level from FCChannel 

229 group.parent.parent.parent.parent.attrs["location.latitude"], 

230 group.parent.parent.parent.parent.attrs["location.longitude"], 

231 group.parent.parent.parent.parent.attrs["location.elevation"], 

232 group.attrs["component"], 

233 group.attrs["time_period.start"], 

234 group.attrs["time_period.end"], 

235 group.size, 

236 group.attrs["sample_rate_window_step"], 

237 group.attrs["mth5_type"], 

238 # group.attrs["measurement_azimuth"], # DO NOT go to the time series to access this info 

239 # group.attrs["measurement_tilt"], # the time series may not be in the mth5 

240 # TODO: add azimuth and tilt on FCChannel creation 

241 group.attrs["units"], 

242 group.ref, 

243 group.parent.ref, 

244 group.parent.parent.ref, 

245 group.parent.parent.parent.parent.ref, 

246 ) 

247 ], 

248 dtype=dtype, 

249 ) 

250 return fc_entry