Coverage for C: \ Users \ peaco \ OneDrive \ Documents \ GitHub \ mth5 \ mth5 \ timeseries \ ts_helpers.py: 94%
48 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-27 20:09 -0800
1# -*- coding: utf-8 -*-
2"""
3Time series helper functions.
5Provides utilities for time series processing including decimation planning,
6significant figure counting, and datetime coordinate generation.
8Notes
9-----
10Created on Wed Mar 29 14:27:22 2023
12Author: jpeacock
13"""
15from __future__ import annotations
17# =============================================================================
18# Imports
19# =============================================================================
20import numpy as np
21import pandas as pd
22from loguru import logger
23from mt_metadata.common.mttime import MTime
26# =============================================================================
29def get_decimation_sample_rates(
30 old_sample_rate: float, new_sample_rate: float, max_decimation: int
31) -> list[float]:
32 """
33 Compute decimation steps from old to new sample rate.
35 Generates a list of intermediate sample rates to decimate from
36 `old_sample_rate` to `new_sample_rate` without exceeding
37 `max_decimation` at any single step. Uses geometric series to
38 distribute decimation evenly.
40 Parameters
41 ----------
42 old_sample_rate : float
43 Original sample rate in samples per second.
44 new_sample_rate : float
45 Target sample rate in samples per second.
46 max_decimation : int
47 Maximum allowed decimation factor per step.
49 Returns
50 -------
51 list[float]
52 List of intermediate sample rates for multi-stage decimation.
53 If total decimation <= max_decimation, returns [new_sample_rate].
55 Examples
56 --------
57 Single-step decimation (factor <= max)::
59 >>> get_decimation_sample_rates(100, 10, 13)
60 [10]
62 Multi-step decimation (factor > max)::
64 >>> get_decimation_sample_rates(1000, 10, 13)
65 [77, 10]
66 >>> # Decimates 1000 -> 77 -> 10
68 Three-step decimation::
70 >>> get_decimation_sample_rates(10000, 10, 13)
71 [770, 60, 10]
72 """
73 if (old_sample_rate / new_sample_rate) > max_decimation:
74 # get the number of entries in a geometric series
75 n_levels = int(
76 np.ceil(np.log(old_sample_rate / new_sample_rate) / np.log(max_decimation))
77 )
78 # make a geometric series
79 sr_list = [
80 int(np.ceil(old_sample_rate / max_decimation ** (ii)))
81 for ii in range(1, n_levels)
82 ] + [new_sample_rate]
84 return sr_list
85 return [new_sample_rate]
88def _count_decimal_sig_figs(digits: float | int | str) -> int:
89 """
90 Count decimal significant figures.
92 Returns the number of significant figures in the fractional part
93 of a number. Trailing zeros are ignored.
95 Parameters
96 ----------
97 digits : float | int | str
98 Number or string representation of a number.
100 Returns
101 -------
102 int
103 Number of significant decimal places (excluding trailing zeros).
105 Examples
106 --------
107 Count decimal places::
109 >>> _count_decimal_sig_figs(1.234)
110 3
111 >>> _count_decimal_sig_figs(0.00100)
112 3
113 >>> _count_decimal_sig_figs("1.500")
114 1
115 >>> _count_decimal_sig_figs(42)
116 0
117 """
119 _, _, fractional = str(digits).partition(".")
121 return len(fractional.rstrip("0"))
124def make_dt_coordinates(
125 start_time: str | MTime | None,
126 sample_rate: float | None,
127 n_samples: int,
128 end_time: str | MTime | None = None,
129) -> pd.DatetimeIndex:
130 """
131 Generate datetime coordinates for time series data.
133 Creates a pandas DatetimeIndex with proper rounding based on
134 sample rate precision. Handles edge cases like invalid sample
135 rates and missing start times with sensible defaults.
137 Parameters
138 ----------
139 start_time : str | MTime | None
140 Start time in ISO format or MTime object. If None, defaults
141 to '1980-01-01T00:00:00'.
142 sample_rate : float | None
143 Sample rate in samples per second. If None or 0, defaults to 1
144 and issues a warning.
145 n_samples : int
146 Number of samples in the time series. Must be >= 1.
147 end_time : str | MTime | None, default None
148 End time in ISO format or MTime object. If None, computed from
149 start_time, sample_rate, and n_samples.
151 Returns
152 -------
153 pandas.DatetimeIndex
154 DatetimeIndex with `n_samples` timestamps, rounded to appropriate
155 precision (ms, us, or ns) based on significant figures.
157 Raises
158 ------
159 ValueError
160 If `n_samples` < 1.
162 Warnings
163 --------
164 UserWarning
165 If sample_rate is invalid (None or 0) or start_time is None.
167 Notes
168 -----
169 - End time is calculated as: start_time + (n_samples - 1) / sample_rate
170 - Rounding precision determined by significant figures in start time
171 and sample period (1 / sample_rate):
172 * < 3 sig figs: millisecond rounding
173 * 3-5 sig figs: microsecond rounding
174 * 6-8 sig figs: nanosecond rounding
175 * >= 9 sig figs: no rounding
177 Examples
178 --------
179 Create 100 Hz time index for 1000 samples::
181 >>> dt = make_dt_coordinates('2023-01-01T00:00:00', 100.0, 1000)
182 >>> len(dt)
183 1000
184 >>> dt[0]
185 Timestamp('2023-01-01 00:00:00')
186 >>> dt[-1]
187 Timestamp('2023-01-01 00:00:09.990000')
189 With explicit end time::
191 >>> dt = make_dt_coordinates(
192 ... '2023-01-01T00:00:00',
193 ... 10.0,
194 ... 100,
195 ... end_time='2023-01-01T00:00:09.9'
196 ... )
198 Handle invalid sample rate (uses default of 1 Hz)::
200 >>> dt = make_dt_coordinates('2023-01-01T00:00:00', None, 10)
201 # Warning: Need to input a valid sample rate...
202 """
204 if sample_rate in [0, None]:
205 msg = (
206 f"Need to input a valid sample rate. Not {sample_rate}, "
207 "returning a time index assuming a sample rate of 1"
208 )
209 logger.warning(msg)
210 sample_rate = 1.0
211 if start_time is None:
212 msg = (
213 f"Need to input a start time. Not {start_time}, "
214 "returning a time index with start time of "
215 "1980-01-01T00:00:00"
216 )
217 logger.warning(msg)
218 start_time = "1980-01-01T00:00:00"
219 if n_samples < 1:
220 msg = f"Need to input a valid n_samples. Not {n_samples}"
221 logger.error(msg)
222 raise ValueError(msg)
223 if not isinstance(start_time, MTime):
224 start_time = MTime(time_stamp=start_time)
226 # At this point, sample_rate is guaranteed to be float (not None)
227 # and start_time is guaranteed to be MTime
228 assert sample_rate is not None # Type narrowing for static analysis
230 # there is something screwy that happens when your sample rate is not a
231 # nice value that can easily fit into the 60 base. For instance if you
232 # have a sample rate of 24000 the dt_freq will be '41667N', but that is
233 # not quite right since the rounding clips some samples and your
234 # end time will be incorrect (short).\n # FIX: therefore estimate the end time based on the decimal sample rate.
235 # need to account for the fact that the start time is the first sample
236 # need n_samples - 1
237 if end_time is None:
238 end_time = start_time + (n_samples - 1) / sample_rate
239 else:
240 if not isinstance(end_time, MTime):
241 end_time = MTime(time_stamp=end_time)
242 # dt_freq = "{0:.0f}N".format(1.0e9 / (sample_rate))
244 dt_index = pd.date_range(
245 start=start_time.iso_no_tz,
246 end=end_time.iso_no_tz,
247 periods=int(round(n_samples)),
248 )
250 ## need to enforce some rounding errors otherwise an expected time step
251 ## will have a rounding error, messes things up when reindexing.
252 start_sig_figs = _count_decimal_sig_figs(str(start_time))
253 sr_sig_figs = _count_decimal_sig_figs(1 / sample_rate)
254 if start_sig_figs > sr_sig_figs:
255 test_sf = start_sig_figs
256 else:
257 test_sf = sr_sig_figs
258 if test_sf < 3:
259 dt_index = dt_index.round(freq="ms")
260 elif test_sf >= 3 and test_sf < 6:
261 dt_index = dt_index.round(freq="us")
262 elif test_sf >= 6 and test_sf < 9:
263 dt_index = dt_index.round(freq="ns")
264 else:
265 pass
266 return dt_index