Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ timeseries \ ts_helpers.py: 94%

48 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-27 20:09 -0800

1# -*- coding: utf-8 -*- 

2""" 

3Time series helper functions. 

4 

5Provides utilities for time series processing including decimation planning, 

6significant figure counting, and datetime coordinate generation. 

7 

8Notes 

9----- 

10Created on Wed Mar 29 14:27:22 2023 

11 

12Author: jpeacock 

13""" 

14 

15from __future__ import annotations 

16 

17# ============================================================================= 

18# Imports 

19# ============================================================================= 

20import numpy as np 

21import pandas as pd 

22from loguru import logger 

23from mt_metadata.common.mttime import MTime 

24 

25 

26# ============================================================================= 

27 

28 

29def get_decimation_sample_rates( 

30 old_sample_rate: float, new_sample_rate: float, max_decimation: int 

31) -> list[float]: 

32 """ 

33 Compute decimation steps from old to new sample rate. 

34 

35 Generates a list of intermediate sample rates to decimate from 

36 `old_sample_rate` to `new_sample_rate` without exceeding 

37 `max_decimation` at any single step. Uses geometric series to 

38 distribute decimation evenly. 

39 

40 Parameters 

41 ---------- 

42 old_sample_rate : float 

43 Original sample rate in samples per second. 

44 new_sample_rate : float 

45 Target sample rate in samples per second. 

46 max_decimation : int 

47 Maximum allowed decimation factor per step. 

48 

49 Returns 

50 ------- 

51 list[float] 

52 List of intermediate sample rates for multi-stage decimation. 

53 If total decimation <= max_decimation, returns [new_sample_rate]. 

54 

55 Examples 

56 -------- 

57 Single-step decimation (factor <= max):: 

58 

59 >>> get_decimation_sample_rates(100, 10, 13) 

60 [10] 

61 

62 Multi-step decimation (factor > max):: 

63 

64 >>> get_decimation_sample_rates(1000, 10, 13) 

65 [77, 10] 

66 >>> # Decimates 1000 -> 77 -> 10 

67 

68 Three-step decimation:: 

69 

70 >>> get_decimation_sample_rates(10000, 10, 13) 

71 [770, 60, 10] 

72 """ 

73 if (old_sample_rate / new_sample_rate) > max_decimation: 

74 # get the number of entries in a geometric series 

75 n_levels = int( 

76 np.ceil(np.log(old_sample_rate / new_sample_rate) / np.log(max_decimation)) 

77 ) 

78 # make a geometric series 

79 sr_list = [ 

80 int(np.ceil(old_sample_rate / max_decimation ** (ii))) 

81 for ii in range(1, n_levels) 

82 ] + [new_sample_rate] 

83 

84 return sr_list 

85 return [new_sample_rate] 

86 

87 

88def _count_decimal_sig_figs(digits: float | int | str) -> int: 

89 """ 

90 Count decimal significant figures. 

91 

92 Returns the number of significant figures in the fractional part 

93 of a number. Trailing zeros are ignored. 

94 

95 Parameters 

96 ---------- 

97 digits : float | int | str 

98 Number or string representation of a number. 

99 

100 Returns 

101 ------- 

102 int 

103 Number of significant decimal places (excluding trailing zeros). 

104 

105 Examples 

106 -------- 

107 Count decimal places:: 

108 

109 >>> _count_decimal_sig_figs(1.234) 

110 3 

111 >>> _count_decimal_sig_figs(0.00100) 

112 3 

113 >>> _count_decimal_sig_figs("1.500") 

114 1 

115 >>> _count_decimal_sig_figs(42) 

116 0 

117 """ 

118 

119 _, _, fractional = str(digits).partition(".") 

120 

121 return len(fractional.rstrip("0")) 

122 

123 

124def make_dt_coordinates( 

125 start_time: str | MTime | None, 

126 sample_rate: float | None, 

127 n_samples: int, 

128 end_time: str | MTime | None = None, 

129) -> pd.DatetimeIndex: 

130 """ 

131 Generate datetime coordinates for time series data. 

132 

133 Creates a pandas DatetimeIndex with proper rounding based on 

134 sample rate precision. Handles edge cases like invalid sample 

135 rates and missing start times with sensible defaults. 

136 

137 Parameters 

138 ---------- 

139 start_time : str | MTime | None 

140 Start time in ISO format or MTime object. If None, defaults 

141 to '1980-01-01T00:00:00'. 

142 sample_rate : float | None 

143 Sample rate in samples per second. If None or 0, defaults to 1 

144 and issues a warning. 

145 n_samples : int 

146 Number of samples in the time series. Must be >= 1. 

147 end_time : str | MTime | None, default None 

148 End time in ISO format or MTime object. If None, computed from 

149 start_time, sample_rate, and n_samples. 

150 

151 Returns 

152 ------- 

153 pandas.DatetimeIndex 

154 DatetimeIndex with `n_samples` timestamps, rounded to appropriate 

155 precision (ms, us, or ns) based on significant figures. 

156 

157 Raises 

158 ------ 

159 ValueError 

160 If `n_samples` < 1. 

161 

162 Warnings 

163 -------- 

164 UserWarning 

165 If sample_rate is invalid (None or 0) or start_time is None. 

166 

167 Notes 

168 ----- 

169 - End time is calculated as: start_time + (n_samples - 1) / sample_rate 

170 - Rounding precision determined by significant figures in start time 

171 and sample period (1 / sample_rate): 

172 * < 3 sig figs: millisecond rounding 

173 * 3-5 sig figs: microsecond rounding 

174 * 6-8 sig figs: nanosecond rounding 

175 * >= 9 sig figs: no rounding 

176 

177 Examples 

178 -------- 

179 Create 100 Hz time index for 1000 samples:: 

180 

181 >>> dt = make_dt_coordinates('2023-01-01T00:00:00', 100.0, 1000) 

182 >>> len(dt) 

183 1000 

184 >>> dt[0] 

185 Timestamp('2023-01-01 00:00:00') 

186 >>> dt[-1] 

187 Timestamp('2023-01-01 00:00:09.990000') 

188 

189 With explicit end time:: 

190 

191 >>> dt = make_dt_coordinates( 

192 ... '2023-01-01T00:00:00', 

193 ... 10.0, 

194 ... 100, 

195 ... end_time='2023-01-01T00:00:09.9' 

196 ... ) 

197 

198 Handle invalid sample rate (uses default of 1 Hz):: 

199 

200 >>> dt = make_dt_coordinates('2023-01-01T00:00:00', None, 10) 

201 # Warning: Need to input a valid sample rate... 

202 """ 

203 

204 if sample_rate in [0, None]: 

205 msg = ( 

206 f"Need to input a valid sample rate. Not {sample_rate}, " 

207 "returning a time index assuming a sample rate of 1" 

208 ) 

209 logger.warning(msg) 

210 sample_rate = 1.0 

211 if start_time is None: 

212 msg = ( 

213 f"Need to input a start time. Not {start_time}, " 

214 "returning a time index with start time of " 

215 "1980-01-01T00:00:00" 

216 ) 

217 logger.warning(msg) 

218 start_time = "1980-01-01T00:00:00" 

219 if n_samples < 1: 

220 msg = f"Need to input a valid n_samples. Not {n_samples}" 

221 logger.error(msg) 

222 raise ValueError(msg) 

223 if not isinstance(start_time, MTime): 

224 start_time = MTime(time_stamp=start_time) 

225 

226 # At this point, sample_rate is guaranteed to be float (not None) 

227 # and start_time is guaranteed to be MTime 

228 assert sample_rate is not None # Type narrowing for static analysis 

229 

230 # there is something screwy that happens when your sample rate is not a 

231 # nice value that can easily fit into the 60 base. For instance if you 

232 # have a sample rate of 24000 the dt_freq will be '41667N', but that is 

233 # not quite right since the rounding clips some samples and your 

234 # end time will be incorrect (short).\n # FIX: therefore estimate the end time based on the decimal sample rate. 

235 # need to account for the fact that the start time is the first sample 

236 # need n_samples - 1 

237 if end_time is None: 

238 end_time = start_time + (n_samples - 1) / sample_rate 

239 else: 

240 if not isinstance(end_time, MTime): 

241 end_time = MTime(time_stamp=end_time) 

242 # dt_freq = "{0:.0f}N".format(1.0e9 / (sample_rate)) 

243 

244 dt_index = pd.date_range( 

245 start=start_time.iso_no_tz, 

246 end=end_time.iso_no_tz, 

247 periods=int(round(n_samples)), 

248 ) 

249 

250 ## need to enforce some rounding errors otherwise an expected time step 

251 ## will have a rounding error, messes things up when reindexing. 

252 start_sig_figs = _count_decimal_sig_figs(str(start_time)) 

253 sr_sig_figs = _count_decimal_sig_figs(1 / sample_rate) 

254 if start_sig_figs > sr_sig_figs: 

255 test_sf = start_sig_figs 

256 else: 

257 test_sf = sr_sig_figs 

258 if test_sf < 3: 

259 dt_index = dt_index.round(freq="ms") 

260 elif test_sf >= 3 and test_sf < 6: 

261 dt_index = dt_index.round(freq="us") 

262 elif test_sf >= 6 and test_sf < 9: 

263 dt_index = dt_index.round(freq="ns") 

264 else: 

265 pass 

266 return dt_index