Coverage for src / tracekit / analyzers / statistics / distribution.py: 100%

78 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Distribution analysis functions. 

2 

3This module provides histogram generation and distribution 

4shape metrics for signal analysis. 

5 

6 

7Example: 

8 >>> from tracekit.analyzers.statistics.distribution import histogram, distribution_metrics 

9 >>> counts, edges = histogram(trace, bins=50) 

10 >>> metrics = distribution_metrics(trace) 

11 >>> print(f"Skewness: {metrics['skewness']}") 

12 

13References: 

14 scipy.stats for distribution analysis 

15""" 

16 

17from __future__ import annotations 

18 

19from typing import TYPE_CHECKING, Any, Literal 

20 

21import numpy as np 

22from scipy import stats as sp_stats 

23 

24from tracekit.core.types import WaveformTrace 

25 

26if TYPE_CHECKING: 

27 from numpy.typing import NDArray 

28 

29 

30def histogram( 

31 trace: WaveformTrace | NDArray[np.floating[Any]], 

32 bins: int | str | NDArray[np.floating[Any]] = "auto", 

33 *, 

34 density: bool = False, 

35 range: tuple[float, float] | None = None, 

36) -> tuple[NDArray[np.float64], NDArray[np.float64]]: 

37 """Generate amplitude histogram. 

38 

39 Args: 

40 trace: Input trace or numpy array. 

41 bins: Number of bins, binning strategy ("auto", "fd", "sturges"), 

42 or explicit bin edges. 

43 density: If True, normalize to probability density. 

44 range: (min, max) range for histogram. 

45 

46 Returns: 

47 (counts, bin_edges) - Histogram counts and bin edges. 

48 

49 Example: 

50 >>> counts, edges = histogram(trace, bins=50) 

51 >>> centers = (edges[:-1] + edges[1:]) / 2 

52 >>> plt.bar(centers, counts, width=np.diff(edges)) 

53 """ 

54 data = trace.data if isinstance(trace, WaveformTrace) else trace 

55 

56 counts, edges = np.histogram(data, bins=bins, density=density, range=range) 

57 

58 return counts.astype(np.float64), edges.astype(np.float64) 

59 

60 

61def distribution_metrics( 

62 trace: WaveformTrace | NDArray[np.floating[Any]], 

63) -> dict[str, float]: 

64 """Compute distribution shape metrics. 

65 

66 Calculates skewness, kurtosis, and crest factor. 

67 

68 Args: 

69 trace: Input trace or numpy array. 

70 

71 Returns: 

72 Dictionary with distribution metrics: 

73 - skewness: Distribution asymmetry (-=left, +=right tail) 

74 - kurtosis: Tail weight (>3=heavy, <3=light tails) 

75 - excess_kurtosis: Kurtosis - 3 (0 for normal) 

76 - crest_factor: Peak / RMS ratio 

77 - crest_factor_db: Crest factor in dB 

78 

79 Example: 

80 >>> metrics = distribution_metrics(trace) 

81 >>> print(f"Skewness: {metrics['skewness']:.3f}") 

82 >>> print(f"Kurtosis: {metrics['kurtosis']:.3f}") 

83 

84 References: 

85 Fisher's definitions for skewness and kurtosis 

86 """ 

87 data = trace.data if isinstance(trace, WaveformTrace) else trace 

88 

89 # Skewness (Fisher's definition) 

90 skewness = float(sp_stats.skew(data)) 

91 

92 # Kurtosis (Fisher's definition gives excess kurtosis) 

93 excess_kurtosis = float(sp_stats.kurtosis(data, fisher=True)) 

94 kurtosis = excess_kurtosis + 3 # Pearson's definition 

95 

96 # Crest factor 

97 rms = np.sqrt(np.mean(data**2)) 

98 peak = np.max(np.abs(data)) 

99 

100 if rms > 0: 

101 crest_factor = peak / rms 

102 crest_factor_db = 20 * np.log10(crest_factor) 

103 else: 

104 crest_factor = float("inf") 

105 crest_factor_db = float("inf") 

106 

107 return { 

108 "skewness": skewness, 

109 "kurtosis": kurtosis, 

110 "excess_kurtosis": excess_kurtosis, 

111 "crest_factor": float(crest_factor), 

112 "crest_factor_db": float(crest_factor_db), 

113 } 

114 

115 

116def moment( 

117 trace: WaveformTrace | NDArray[np.floating[Any]], 

118 order: int, 

119 *, 

120 central: bool = True, 

121) -> float: 

122 """Compute statistical moment. 

123 

124 Args: 

125 trace: Input trace or numpy array. 

126 order: Moment order (1=mean, 2=variance, 3=skewness, 4=kurtosis). 

127 central: If True, compute central moment (about mean). 

128 

129 Returns: 

130 Moment value. 

131 

132 Example: 

133 >>> m2 = moment(trace, 2) # Variance 

134 >>> m3 = moment(trace, 3) # Third central moment 

135 """ 

136 data = trace.data if isinstance(trace, WaveformTrace) else trace 

137 

138 if central: 

139 return float(sp_stats.moment(data, moment=order)) 

140 else: 

141 return float(np.mean(data**order)) 

142 

143 

144def fit_distribution( 

145 trace: WaveformTrace | NDArray[np.floating[Any]], 

146 distribution: Literal["normal", "lognormal", "exponential", "uniform"] = "normal", 

147) -> dict[str, float]: 

148 """Fit distribution to data and compute goodness of fit. 

149 

150 Args: 

151 trace: Input trace or numpy array. 

152 distribution: Distribution to fit. 

153 

154 Returns: 

155 Dictionary with fitted parameters and fit quality: 

156 - params: Distribution parameters 

157 - ks_statistic: Kolmogorov-Smirnov test statistic 

158 - p_value: KS test p-value 

159 

160 Raises: 

161 ValueError: If distribution is not one of the supported types. 

162 

163 Example: 

164 >>> fit = fit_distribution(trace, "normal") 

165 >>> print(f"Mean: {fit['loc']}, Std: {fit['scale']}") 

166 >>> print(f"Normality p-value: {fit['p_value']}") 

167 """ 

168 data = trace.data if isinstance(trace, WaveformTrace) else trace 

169 

170 result: dict[str, float] = {} 

171 

172 if distribution == "normal": 

173 loc, scale = sp_stats.norm.fit(data) 

174 result["loc"] = float(loc) 

175 result["scale"] = float(scale) 

176 ks_stat, p_value = sp_stats.kstest(data, "norm", args=(loc, scale)) 

177 

178 elif distribution == "lognormal": 

179 shape, loc, scale = sp_stats.lognorm.fit(data, floc=0) 

180 result["shape"] = float(shape) 

181 result["loc"] = float(loc) 

182 result["scale"] = float(scale) 

183 ks_stat, p_value = sp_stats.kstest(data, "lognorm", args=(shape, loc, scale)) 

184 

185 elif distribution == "exponential": 

186 loc, scale = sp_stats.expon.fit(data) 

187 result["loc"] = float(loc) 

188 result["scale"] = float(scale) 

189 ks_stat, p_value = sp_stats.kstest(data, "expon", args=(loc, scale)) 

190 

191 elif distribution == "uniform": 

192 loc, scale = sp_stats.uniform.fit(data) 

193 result["loc"] = float(loc) 

194 result["scale"] = float(scale) 

195 ks_stat, p_value = sp_stats.kstest(data, "uniform", args=(loc, scale)) 

196 

197 else: 

198 raise ValueError(f"Unknown distribution: {distribution}") 

199 

200 result["ks_statistic"] = float(ks_stat) 

201 result["p_value"] = float(p_value) 

202 

203 return result 

204 

205 

206def normality_test( 

207 trace: WaveformTrace | NDArray[np.floating[Any]], 

208 method: Literal["shapiro", "dagostino", "anderson"] = "shapiro", 

209) -> dict[str, float]: 

210 """Test for normality. 

211 

212 Args: 

213 trace: Input trace or numpy array. 

214 method: Test method: 

215 - "shapiro": Shapiro-Wilk test (best for small samples) 

216 - "dagostino": D'Agostino-Pearson test 

217 - "anderson": Anderson-Darling test 

218 

219 Returns: 

220 Dictionary with test results: 

221 - statistic: Test statistic 

222 - p_value: P-value (probability data is normal) 

223 - is_normal: True if p_value > 0.05 

224 

225 Raises: 

226 ValueError: If method is not one of the supported types. 

227 

228 Example: 

229 >>> result = normality_test(trace) 

230 >>> if result['is_normal']: 

231 ... print("Data appears normally distributed") 

232 """ 

233 data = trace.data if isinstance(trace, WaveformTrace) else trace 

234 

235 if method == "shapiro": 

236 # Shapiro-Wilk limited to 5000 samples 

237 if len(data) > 5000: 

238 data = np.random.choice(data, 5000, replace=False) 

239 stat, p_value = sp_stats.shapiro(data) 

240 

241 elif method == "dagostino": 

242 stat, p_value = sp_stats.normaltest(data) 

243 

244 elif method == "anderson": 

245 result = sp_stats.anderson(data, dist="norm") 

246 stat = result.statistic 

247 # Approximate p-value from critical values 

248 # Using 5% significance level 

249 crit_5pct = result.critical_values[2] # 5% level 

250 p_value = 0.05 if stat > crit_5pct else 0.10 

251 

252 else: 

253 raise ValueError(f"Unknown method: {method}") 

254 

255 return { 

256 "statistic": float(stat), 

257 "p_value": float(p_value), 

258 "is_normal": p_value > 0.05, 

259 } 

260 

261 

262def bimodality_coefficient( 

263 trace: WaveformTrace | NDArray[np.floating[Any]], 

264) -> float: 

265 """Compute bimodality coefficient. 

266 

267 Values > 0.555 suggest bimodal distribution. 

268 

269 Args: 

270 trace: Input trace or numpy array. 

271 

272 Returns: 

273 Bimodality coefficient (0-1). 

274 

275 Example: 

276 >>> bc = bimodality_coefficient(trace) 

277 >>> if bc > 0.555: 

278 ... print("Distribution appears bimodal") 

279 """ 

280 data = trace.data if isinstance(trace, WaveformTrace) else trace 

281 

282 n = len(data) 

283 skewness = sp_stats.skew(data) 

284 kurtosis = sp_stats.kurtosis(data, fisher=True) # Excess kurtosis 

285 

286 # Bimodality coefficient formula 

287 bc = (skewness**2 + 1) / (kurtosis + 3 * (n - 1) ** 2 / ((n - 2) * (n - 3))) 

288 

289 return float(bc) 

290 

291 

292__all__ = [ 

293 "bimodality_coefficient", 

294 "distribution_metrics", 

295 "fit_distribution", 

296 "histogram", 

297 "moment", 

298 "normality_test", 

299]