Coverage for src / tracekit / analyzers / statistics / basic.py: 100%

55 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Basic statistical analysis functions. 

2 

3This module provides basic statistical measures for signal analysis, 

4including mean, variance, percentiles, and moment statistics. 

5 

6 

7Example: 

8 >>> from tracekit.analyzers.statistics.basic import basic_stats, percentiles 

9 >>> stats = basic_stats(trace) 

10 >>> print(f"Mean: {stats['mean']}, Std: {stats['std']}") 

11 >>> pct = percentiles(trace, [25, 50, 75]) 

12 

13References: 

14 IEEE 1241-2010 Statistical analysis methods 

15""" 

16 

17from __future__ import annotations 

18 

19from typing import TYPE_CHECKING, Any 

20 

21import numpy as np 

22 

23from tracekit.core.types import WaveformTrace 

24 

25if TYPE_CHECKING: 

26 from numpy.typing import NDArray 

27 

28 

29def basic_stats( 

30 trace: WaveformTrace | NDArray[np.floating[Any]], 

31 *, 

32 ddof: int = 0, 

33) -> dict[str, float]: 

34 """Compute basic statistical measures. 

35 

36 Calculates mean, variance, standard deviation, min, max, and range. 

37 

38 Args: 

39 trace: Input trace or numpy array. 

40 ddof: Delta degrees of freedom for variance (default 0). 

41 

42 Returns: 

43 Dictionary with statistics: 

44 - mean: Arithmetic mean 

45 - variance: Sample variance 

46 - std: Standard deviation 

47 - min: Minimum value 

48 - max: Maximum value 

49 - range: Max - min 

50 - count: Number of samples 

51 

52 Example: 

53 >>> stats = basic_stats(trace) 

54 >>> print(f"Mean: {stats['mean']:.6f}") 

55 >>> print(f"Range: {stats['range']:.3f}") 

56 """ 

57 data = trace.data if isinstance(trace, WaveformTrace) else trace 

58 

59 return { 

60 "mean": float(np.mean(data)), 

61 "variance": float(np.var(data, ddof=ddof)), 

62 "std": float(np.std(data, ddof=ddof)), 

63 "min": float(np.min(data)), 

64 "max": float(np.max(data)), 

65 "range": float(np.max(data) - np.min(data)), 

66 "count": len(data), 

67 } 

68 

69 

70def percentiles( 

71 trace: WaveformTrace | NDArray[np.floating[Any]], 

72 p: list[float] | None = None, 

73) -> dict[str, float]: 

74 """Compute percentiles and quartiles. 

75 

76 Args: 

77 trace: Input trace or numpy array. 

78 p: List of percentile values (0-100). If None, computes standard 

79 quartiles [0, 25, 50, 75, 100]. 

80 

81 Returns: 

82 Dictionary mapping percentile names to values: 

83 - p0, p25, p50, p75, p100 for quartiles 

84 - p{n} for custom percentiles 

85 

86 Example: 

87 >>> pct = percentiles(trace) 

88 >>> print(f"Median: {pct['p50']}") 

89 >>> print(f"IQR: {pct['p75'] - pct['p25']}") 

90 

91 >>> custom = percentiles(trace, [1, 10, 90, 99]) 

92 >>> print(f"1st percentile: {custom['p1']}") 

93 """ 

94 data = trace.data if isinstance(trace, WaveformTrace) else trace 

95 

96 if p is None: 

97 p = [0, 25, 50, 75, 100] 

98 

99 values = np.percentile(data, p) 

100 

101 result = {} 

102 for pct, val in zip(p, values, strict=False): 

103 key = f"p{int(pct)}" if pct == int(pct) else f"p{pct}" 

104 result[key] = float(val) 

105 

106 return result 

107 

108 

109def quartiles( 

110 trace: WaveformTrace | NDArray[np.floating[Any]], 

111) -> dict[str, float]: 

112 """Compute quartiles and IQR. 

113 

114 Convenience function for quartile analysis. 

115 

116 Args: 

117 trace: Input trace or numpy array. 

118 

119 Returns: 

120 Dictionary with quartile statistics: 

121 - q1: First quartile (25th percentile) 

122 - median: Median (50th percentile) 

123 - q3: Third quartile (75th percentile) 

124 - iqr: Interquartile range (Q3 - Q1) 

125 - lower_fence: Q1 - 1.5 * IQR 

126 - upper_fence: Q3 + 1.5 * IQR 

127 

128 Example: 

129 >>> q = quartiles(trace) 

130 >>> print(f"IQR: {q['iqr']}") 

131 """ 

132 pct = percentiles(trace, [25, 50, 75]) 

133 

134 q1 = pct["p25"] 

135 median = pct["p50"] 

136 q3 = pct["p75"] 

137 iqr = q3 - q1 

138 

139 return { 

140 "q1": q1, 

141 "median": median, 

142 "q3": q3, 

143 "iqr": iqr, 

144 "lower_fence": q1 - 1.5 * iqr, 

145 "upper_fence": q3 + 1.5 * iqr, 

146 } 

147 

148 

149def weighted_mean( 

150 trace: WaveformTrace | NDArray[np.floating[Any]], 

151 weights: NDArray[np.floating[Any]] | None = None, 

152) -> float: 

153 """Compute weighted mean. 

154 

155 Args: 

156 trace: Input trace or numpy array. 

157 weights: Weight array (same length as data). If None, equal weights. 

158 

159 Returns: 

160 Weighted mean value. 

161 

162 Example: 

163 >>> weights = np.linspace(0.5, 1.0, len(trace.data)) 

164 >>> wm = weighted_mean(trace, weights) 

165 """ 

166 data = trace.data if isinstance(trace, WaveformTrace) else trace 

167 

168 if weights is None: 

169 return float(np.mean(data)) 

170 

171 return float(np.average(data, weights=weights)) 

172 

173 

174def running_stats( 

175 trace: WaveformTrace | NDArray[np.floating[Any]], 

176 window_size: int, 

177) -> dict[str, NDArray[np.float64]]: 

178 """Compute running (rolling) statistics. 

179 

180 Args: 

181 trace: Input trace or numpy array. 

182 window_size: Rolling window size in samples. 

183 

184 Returns: 

185 Dictionary with running statistics arrays: 

186 - mean: Running mean 

187 - std: Running standard deviation 

188 - min: Running minimum 

189 - max: Running maximum 

190 

191 Example: 

192 >>> running = running_stats(trace, window_size=100) 

193 >>> plt.plot(running['mean']) 

194 """ 

195 data = trace.data if isinstance(trace, WaveformTrace) else trace 

196 

197 n = len(data) 

198 

199 window_size = min(window_size, n) 

200 

201 # Pre-allocate output arrays 

202 result_len = n - window_size + 1 

203 running_mean = np.zeros(result_len, dtype=np.float64) 

204 running_std = np.zeros(result_len, dtype=np.float64) 

205 running_min = np.zeros(result_len, dtype=np.float64) 

206 running_max = np.zeros(result_len, dtype=np.float64) 

207 

208 # Compute statistics for each window position 

209 for i in range(result_len): 

210 window = data[i : i + window_size] 

211 running_mean[i] = np.mean(window) 

212 running_std[i] = np.std(window) 

213 running_min[i] = np.min(window) 

214 running_max[i] = np.max(window) 

215 

216 return { 

217 "mean": running_mean, 

218 "std": running_std, 

219 "min": running_min, 

220 "max": running_max, 

221 } 

222 

223 

224def summary_stats( 

225 trace: WaveformTrace | NDArray[np.floating[Any]], 

226) -> dict[str, Any]: 

227 """Compute comprehensive statistical summary. 

228 

229 Combines basic stats, percentiles, and additional measures. 

230 

231 Args: 

232 trace: Input trace or numpy array. 

233 

234 Returns: 

235 Dictionary with comprehensive statistics. 

236 

237 Example: 

238 >>> summary = summary_stats(trace) 

239 >>> for key, value in summary.items(): 

240 ... print(f"{key}: {value}") 

241 """ 

242 basic = basic_stats(trace) 

243 quart = quartiles(trace) 

244 

245 data = trace.data if isinstance(trace, WaveformTrace) else trace 

246 

247 # Add additional measures 

248 basic.update(quart) 

249 basic["median_abs_dev"] = float(np.median(np.abs(data - np.median(data)))) 

250 basic["rms"] = float(np.sqrt(np.mean(data**2))) 

251 basic["peak_to_rms"] = basic["max"] / basic["rms"] if basic["rms"] > 0 else float("nan") 

252 

253 return basic 

254 

255 

256__all__ = [ 

257 "basic_stats", 

258 "percentiles", 

259 "quartiles", 

260 "running_stats", 

261 "summary_stats", 

262 "weighted_mean", 

263]