Coverage for src / tracekit / analyzers / statistical / __init__.py: 100%

13 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Statistical analysis module for reverse engineering. 

2 

3This module combines general statistical analysis (from tracekit.analyzers.statistics) 

4with additional entropy-based and binary data analysis functions for protocol 

5reverse engineering. 

6 

7Use cases: 

8- Binary data analysis: shannon_entropy, byte_frequency_distribution 

9- Checksum detection: detect_checksum_fields, verify_checksums 

10- Data classification: classify_data_type, detect_encrypted_regions 

11- Plus all functions from tracekit.analyzers.statistics 

12 

13For general signal statistics without reverse engineering features, use 

14tracekit.analyzers.statistics instead. See IMPORT-PATHS.md for details. 

15 

16Requirements: 

17- RE-ENT-002: Byte Frequency Distribution 

18""" 

19 

20from typing import TYPE_CHECKING, Union 

21 

22import numpy as np 

23 

24if TYPE_CHECKING: 

25 from numpy.typing import NDArray 

26 

27from tracekit.analyzers.statistics import ( 

28 ChangePointResult, 

29 CoherenceResult, 

30 CrossCorrelationResult, 

31 DecompositionResult, 

32 IsolationForestResult, 

33 KDEResult, 

34 LOFResult, 

35 OutlierResult, 

36 TrendResult, 

37 autocorrelation, 

38 basic_stats, 

39 change_point_detection, 

40 coherence, 

41 correlation_coefficient, 

42 cross_correlation, 

43 detect_change_points, 

44 detect_drift_segments, 

45 detect_outliers, 

46 detect_trend, 

47 detrend, 

48 find_periodicity, 

49 iqr_outliers, 

50 isolation_forest_outliers, 

51 kernel_density, 

52 local_outlier_factor, 

53 modified_zscore_outliers, 

54 moving_average, 

55 percentiles, 

56 phase_coherence, 

57 piecewise_linear_fit, 

58 quartiles, 

59 remove_outliers, 

60 running_stats, 

61 seasonal_decompose, 

62 summary_stats, 

63 weighted_mean, 

64 zscore_outliers, 

65) 

66 

67from .checksum import ( 

68 ChecksumCandidate, 

69 ChecksumDetectionResult, 

70 ChecksumDetector, 

71 ChecksumMatch, 

72 compute_checksum, 

73 crc8, 

74 crc16_ccitt, 

75 crc16_ibm, 

76 crc32, 

77 detect_checksum_fields, 

78 identify_checksum_algorithm, 

79 sum8, 

80 sum16, 

81 verify_checksums, 

82 xor_checksum, 

83) 

84from .classification import ( 

85 ClassificationResult, 

86 DataClassifier, 

87 RegionClassification, 

88 classify_data_type, 

89 detect_compressed_regions, 

90 detect_encrypted_regions, 

91 detect_padding_regions, 

92 detect_text_regions, 

93 segment_by_type, 

94) 

95from .entropy import ( 

96 ByteFrequencyResult, 

97 CompressionIndicator, 

98 EntropyAnalyzer, 

99 EntropyResult, 

100 EntropyTransition, 

101 FrequencyAnomalyResult, 

102 bit_entropy, 

103 byte_frequency_distribution, 

104 classify_by_entropy, 

105 compare_byte_distributions, 

106 detect_compression_indicators, 

107 detect_entropy_transitions, 

108 detect_frequency_anomalies, 

109 entropy_histogram, 

110 entropy_profile, 

111 shannon_entropy, 

112 sliding_byte_frequency, 

113 sliding_entropy, 

114) 

115from .ngrams import ( 

116 NGramAnalyzer, 

117 NgramComparison, 

118 NgramProfile, 

119 compare_ngram_profiles, 

120 find_unusual_ngrams, 

121 ngram_entropy, 

122 ngram_frequency, 

123 ngram_heatmap, 

124) 

125 

126# Function alias for test compatibility 

127calculate_entropy = shannon_entropy 

128entropy = shannon_entropy 

129 

130# Type alias for input data (matching entropy.py) 

131DataType = Union[bytes, bytearray, "NDArray[np.uint8]"] 

132 

133 

134def entropy_windowed( 

135 data: DataType, window_size: int = 256, step: int = 1 

136) -> "NDArray[np.float64]": 

137 """Windowed entropy calculation (alias for sliding_entropy).""" 

138 return sliding_entropy(data, window_size=window_size, step=step) 

139 

140 

141__all__ = [ 

142 # RE-ENT-002: Byte Frequency Distribution 

143 "ByteFrequencyResult", 

144 # Result types 

145 "ChangePointResult", 

146 "ChecksumCandidate", 

147 "ChecksumDetectionResult", 

148 "ChecksumDetector", 

149 "ChecksumMatch", 

150 "ClassificationResult", 

151 "CoherenceResult", 

152 "CompressionIndicator", 

153 "CrossCorrelationResult", 

154 "DataClassifier", 

155 "DecompositionResult", 

156 "EntropyAnalyzer", 

157 "EntropyResult", 

158 "EntropyTransition", 

159 "FrequencyAnomalyResult", 

160 "IsolationForestResult", 

161 "KDEResult", 

162 "LOFResult", 

163 "NGramAnalyzer", 

164 "NgramComparison", 

165 "NgramProfile", 

166 "OutlierResult", 

167 "RegionClassification", 

168 "TrendResult", 

169 # Correlation 

170 "autocorrelation", 

171 # Basic statistics 

172 "basic_stats", 

173 "bit_entropy", 

174 "byte_frequency_distribution", 

175 "calculate_entropy", 

176 "change_point_detection", 

177 "classify_by_entropy", 

178 "classify_data_type", 

179 "coherence", 

180 "compare_byte_distributions", 

181 "compare_ngram_profiles", 

182 "compute_checksum", 

183 "correlation_coefficient", 

184 "crc8", 

185 "crc16_ccitt", 

186 "crc16_ibm", 

187 "crc32", 

188 "cross_correlation", 

189 # Advanced (STAT-014) 

190 "detect_change_points", 

191 "detect_checksum_fields", 

192 "detect_compressed_regions", 

193 "detect_compression_indicators", 

194 "detect_drift_segments", 

195 "detect_encrypted_regions", 

196 "detect_entropy_transitions", 

197 "detect_frequency_anomalies", 

198 "detect_outliers", 

199 "detect_padding_regions", 

200 "detect_text_regions", 

201 # Trend 

202 "detect_trend", 

203 "detrend", 

204 "entropy", 

205 "entropy_histogram", 

206 "entropy_profile", 

207 "entropy_windowed", 

208 "find_periodicity", 

209 "find_unusual_ngrams", 

210 "identify_checksum_algorithm", 

211 "iqr_outliers", 

212 # Advanced (STAT-011) 

213 "isolation_forest_outliers", 

214 # Advanced (STAT-016) 

215 "kernel_density", 

216 # Advanced (STAT-012) 

217 "local_outlier_factor", 

218 "modified_zscore_outliers", 

219 "moving_average", 

220 "ngram_entropy", 

221 "ngram_frequency", 

222 "ngram_heatmap", 

223 "percentiles", 

224 # Advanced (STAT-015) 

225 "phase_coherence", 

226 "piecewise_linear_fit", 

227 "quartiles", 

228 "remove_outliers", 

229 "running_stats", 

230 # Advanced (STAT-013) 

231 "seasonal_decompose", 

232 "segment_by_type", 

233 "shannon_entropy", 

234 "sliding_byte_frequency", 

235 "sliding_entropy", 

236 "sum8", 

237 "sum16", 

238 "summary_stats", 

239 "verify_checksums", 

240 "weighted_mean", 

241 "xor_checksum", 

242 # Outlier detection 

243 "zscore_outliers", 

244]