Coverage for src / tracekit / analyzers / statistics / distribution.py: 100%
78 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Distribution analysis functions.
3This module provides histogram generation and distribution
4shape metrics for signal analysis.
7Example:
8 >>> from tracekit.analyzers.statistics.distribution import histogram, distribution_metrics
9 >>> counts, edges = histogram(trace, bins=50)
10 >>> metrics = distribution_metrics(trace)
11 >>> print(f"Skewness: {metrics['skewness']}")
13References:
14 scipy.stats for distribution analysis
15"""
17from __future__ import annotations
19from typing import TYPE_CHECKING, Any, Literal
21import numpy as np
22from scipy import stats as sp_stats
24from tracekit.core.types import WaveformTrace
26if TYPE_CHECKING:
27 from numpy.typing import NDArray
30def histogram(
31 trace: WaveformTrace | NDArray[np.floating[Any]],
32 bins: int | str | NDArray[np.floating[Any]] = "auto",
33 *,
34 density: bool = False,
35 range: tuple[float, float] | None = None,
36) -> tuple[NDArray[np.float64], NDArray[np.float64]]:
37 """Generate amplitude histogram.
39 Args:
40 trace: Input trace or numpy array.
41 bins: Number of bins, binning strategy ("auto", "fd", "sturges"),
42 or explicit bin edges.
43 density: If True, normalize to probability density.
44 range: (min, max) range for histogram.
46 Returns:
47 (counts, bin_edges) - Histogram counts and bin edges.
49 Example:
50 >>> counts, edges = histogram(trace, bins=50)
51 >>> centers = (edges[:-1] + edges[1:]) / 2
52 >>> plt.bar(centers, counts, width=np.diff(edges))
53 """
54 data = trace.data if isinstance(trace, WaveformTrace) else trace
56 counts, edges = np.histogram(data, bins=bins, density=density, range=range)
58 return counts.astype(np.float64), edges.astype(np.float64)
61def distribution_metrics(
62 trace: WaveformTrace | NDArray[np.floating[Any]],
63) -> dict[str, float]:
64 """Compute distribution shape metrics.
66 Calculates skewness, kurtosis, and crest factor.
68 Args:
69 trace: Input trace or numpy array.
71 Returns:
72 Dictionary with distribution metrics:
73 - skewness: Distribution asymmetry (-=left, +=right tail)
74 - kurtosis: Tail weight (>3=heavy, <3=light tails)
75 - excess_kurtosis: Kurtosis - 3 (0 for normal)
76 - crest_factor: Peak / RMS ratio
77 - crest_factor_db: Crest factor in dB
79 Example:
80 >>> metrics = distribution_metrics(trace)
81 >>> print(f"Skewness: {metrics['skewness']:.3f}")
82 >>> print(f"Kurtosis: {metrics['kurtosis']:.3f}")
84 References:
85 Fisher's definitions for skewness and kurtosis
86 """
87 data = trace.data if isinstance(trace, WaveformTrace) else trace
89 # Skewness (Fisher's definition)
90 skewness = float(sp_stats.skew(data))
92 # Kurtosis (Fisher's definition gives excess kurtosis)
93 excess_kurtosis = float(sp_stats.kurtosis(data, fisher=True))
94 kurtosis = excess_kurtosis + 3 # Pearson's definition
96 # Crest factor
97 rms = np.sqrt(np.mean(data**2))
98 peak = np.max(np.abs(data))
100 if rms > 0:
101 crest_factor = peak / rms
102 crest_factor_db = 20 * np.log10(crest_factor)
103 else:
104 crest_factor = float("inf")
105 crest_factor_db = float("inf")
107 return {
108 "skewness": skewness,
109 "kurtosis": kurtosis,
110 "excess_kurtosis": excess_kurtosis,
111 "crest_factor": float(crest_factor),
112 "crest_factor_db": float(crest_factor_db),
113 }
116def moment(
117 trace: WaveformTrace | NDArray[np.floating[Any]],
118 order: int,
119 *,
120 central: bool = True,
121) -> float:
122 """Compute statistical moment.
124 Args:
125 trace: Input trace or numpy array.
126 order: Moment order (1=mean, 2=variance, 3=skewness, 4=kurtosis).
127 central: If True, compute central moment (about mean).
129 Returns:
130 Moment value.
132 Example:
133 >>> m2 = moment(trace, 2) # Variance
134 >>> m3 = moment(trace, 3) # Third central moment
135 """
136 data = trace.data if isinstance(trace, WaveformTrace) else trace
138 if central:
139 return float(sp_stats.moment(data, moment=order))
140 else:
141 return float(np.mean(data**order))
144def fit_distribution(
145 trace: WaveformTrace | NDArray[np.floating[Any]],
146 distribution: Literal["normal", "lognormal", "exponential", "uniform"] = "normal",
147) -> dict[str, float]:
148 """Fit distribution to data and compute goodness of fit.
150 Args:
151 trace: Input trace or numpy array.
152 distribution: Distribution to fit.
154 Returns:
155 Dictionary with fitted parameters and fit quality:
156 - params: Distribution parameters
157 - ks_statistic: Kolmogorov-Smirnov test statistic
158 - p_value: KS test p-value
160 Raises:
161 ValueError: If distribution is not one of the supported types.
163 Example:
164 >>> fit = fit_distribution(trace, "normal")
165 >>> print(f"Mean: {fit['loc']}, Std: {fit['scale']}")
166 >>> print(f"Normality p-value: {fit['p_value']}")
167 """
168 data = trace.data if isinstance(trace, WaveformTrace) else trace
170 result: dict[str, float] = {}
172 if distribution == "normal":
173 loc, scale = sp_stats.norm.fit(data)
174 result["loc"] = float(loc)
175 result["scale"] = float(scale)
176 ks_stat, p_value = sp_stats.kstest(data, "norm", args=(loc, scale))
178 elif distribution == "lognormal":
179 shape, loc, scale = sp_stats.lognorm.fit(data, floc=0)
180 result["shape"] = float(shape)
181 result["loc"] = float(loc)
182 result["scale"] = float(scale)
183 ks_stat, p_value = sp_stats.kstest(data, "lognorm", args=(shape, loc, scale))
185 elif distribution == "exponential":
186 loc, scale = sp_stats.expon.fit(data)
187 result["loc"] = float(loc)
188 result["scale"] = float(scale)
189 ks_stat, p_value = sp_stats.kstest(data, "expon", args=(loc, scale))
191 elif distribution == "uniform":
192 loc, scale = sp_stats.uniform.fit(data)
193 result["loc"] = float(loc)
194 result["scale"] = float(scale)
195 ks_stat, p_value = sp_stats.kstest(data, "uniform", args=(loc, scale))
197 else:
198 raise ValueError(f"Unknown distribution: {distribution}")
200 result["ks_statistic"] = float(ks_stat)
201 result["p_value"] = float(p_value)
203 return result
206def normality_test(
207 trace: WaveformTrace | NDArray[np.floating[Any]],
208 method: Literal["shapiro", "dagostino", "anderson"] = "shapiro",
209) -> dict[str, float]:
210 """Test for normality.
212 Args:
213 trace: Input trace or numpy array.
214 method: Test method:
215 - "shapiro": Shapiro-Wilk test (best for small samples)
216 - "dagostino": D'Agostino-Pearson test
217 - "anderson": Anderson-Darling test
219 Returns:
220 Dictionary with test results:
221 - statistic: Test statistic
222 - p_value: P-value (probability data is normal)
223 - is_normal: True if p_value > 0.05
225 Raises:
226 ValueError: If method is not one of the supported types.
228 Example:
229 >>> result = normality_test(trace)
230 >>> if result['is_normal']:
231 ... print("Data appears normally distributed")
232 """
233 data = trace.data if isinstance(trace, WaveformTrace) else trace
235 if method == "shapiro":
236 # Shapiro-Wilk limited to 5000 samples
237 if len(data) > 5000:
238 data = np.random.choice(data, 5000, replace=False)
239 stat, p_value = sp_stats.shapiro(data)
241 elif method == "dagostino":
242 stat, p_value = sp_stats.normaltest(data)
244 elif method == "anderson":
245 result = sp_stats.anderson(data, dist="norm")
246 stat = result.statistic
247 # Approximate p-value from critical values
248 # Using 5% significance level
249 crit_5pct = result.critical_values[2] # 5% level
250 p_value = 0.05 if stat > crit_5pct else 0.10
252 else:
253 raise ValueError(f"Unknown method: {method}")
255 return {
256 "statistic": float(stat),
257 "p_value": float(p_value),
258 "is_normal": p_value > 0.05,
259 }
262def bimodality_coefficient(
263 trace: WaveformTrace | NDArray[np.floating[Any]],
264) -> float:
265 """Compute bimodality coefficient.
267 Values > 0.555 suggest bimodal distribution.
269 Args:
270 trace: Input trace or numpy array.
272 Returns:
273 Bimodality coefficient (0-1).
275 Example:
276 >>> bc = bimodality_coefficient(trace)
277 >>> if bc > 0.555:
278 ... print("Distribution appears bimodal")
279 """
280 data = trace.data if isinstance(trace, WaveformTrace) else trace
282 n = len(data)
283 skewness = sp_stats.skew(data)
284 kurtosis = sp_stats.kurtosis(data, fisher=True) # Excess kurtosis
286 # Bimodality coefficient formula
287 bc = (skewness**2 + 1) / (kurtosis + 3 * (n - 1) ** 2 / ((n - 2) * (n - 3)))
289 return float(bc)
292__all__ = [
293 "bimodality_coefficient",
294 "distribution_metrics",
295 "fit_distribution",
296 "histogram",
297 "moment",
298 "normality_test",
299]