Coverage for src / tracekit / analyzers / statistical / __init__.py: 100%
13 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Statistical analysis module for reverse engineering.
3This module combines general statistical analysis (from tracekit.analyzers.statistics)
4with additional entropy-based and binary data analysis functions for protocol
5reverse engineering.
7Use cases:
8- Binary data analysis: shannon_entropy, byte_frequency_distribution
9- Checksum detection: detect_checksum_fields, verify_checksums
10- Data classification: classify_data_type, detect_encrypted_regions
11- Plus all functions from tracekit.analyzers.statistics
13For general signal statistics without reverse engineering features, use
14tracekit.analyzers.statistics instead. See IMPORT-PATHS.md for details.
16Requirements:
17- RE-ENT-002: Byte Frequency Distribution
18"""
20from typing import TYPE_CHECKING, Union
22import numpy as np
24if TYPE_CHECKING:
25 from numpy.typing import NDArray
27from tracekit.analyzers.statistics import (
28 ChangePointResult,
29 CoherenceResult,
30 CrossCorrelationResult,
31 DecompositionResult,
32 IsolationForestResult,
33 KDEResult,
34 LOFResult,
35 OutlierResult,
36 TrendResult,
37 autocorrelation,
38 basic_stats,
39 change_point_detection,
40 coherence,
41 correlation_coefficient,
42 cross_correlation,
43 detect_change_points,
44 detect_drift_segments,
45 detect_outliers,
46 detect_trend,
47 detrend,
48 find_periodicity,
49 iqr_outliers,
50 isolation_forest_outliers,
51 kernel_density,
52 local_outlier_factor,
53 modified_zscore_outliers,
54 moving_average,
55 percentiles,
56 phase_coherence,
57 piecewise_linear_fit,
58 quartiles,
59 remove_outliers,
60 running_stats,
61 seasonal_decompose,
62 summary_stats,
63 weighted_mean,
64 zscore_outliers,
65)
67from .checksum import (
68 ChecksumCandidate,
69 ChecksumDetectionResult,
70 ChecksumDetector,
71 ChecksumMatch,
72 compute_checksum,
73 crc8,
74 crc16_ccitt,
75 crc16_ibm,
76 crc32,
77 detect_checksum_fields,
78 identify_checksum_algorithm,
79 sum8,
80 sum16,
81 verify_checksums,
82 xor_checksum,
83)
84from .classification import (
85 ClassificationResult,
86 DataClassifier,
87 RegionClassification,
88 classify_data_type,
89 detect_compressed_regions,
90 detect_encrypted_regions,
91 detect_padding_regions,
92 detect_text_regions,
93 segment_by_type,
94)
95from .entropy import (
96 ByteFrequencyResult,
97 CompressionIndicator,
98 EntropyAnalyzer,
99 EntropyResult,
100 EntropyTransition,
101 FrequencyAnomalyResult,
102 bit_entropy,
103 byte_frequency_distribution,
104 classify_by_entropy,
105 compare_byte_distributions,
106 detect_compression_indicators,
107 detect_entropy_transitions,
108 detect_frequency_anomalies,
109 entropy_histogram,
110 entropy_profile,
111 shannon_entropy,
112 sliding_byte_frequency,
113 sliding_entropy,
114)
115from .ngrams import (
116 NGramAnalyzer,
117 NgramComparison,
118 NgramProfile,
119 compare_ngram_profiles,
120 find_unusual_ngrams,
121 ngram_entropy,
122 ngram_frequency,
123 ngram_heatmap,
124)
126# Function alias for test compatibility
127calculate_entropy = shannon_entropy
128entropy = shannon_entropy
130# Type alias for input data (matching entropy.py)
131DataType = Union[bytes, bytearray, "NDArray[np.uint8]"]
134def entropy_windowed(
135 data: DataType, window_size: int = 256, step: int = 1
136) -> "NDArray[np.float64]":
137 """Windowed entropy calculation (alias for sliding_entropy)."""
138 return sliding_entropy(data, window_size=window_size, step=step)
141__all__ = [
142 # RE-ENT-002: Byte Frequency Distribution
143 "ByteFrequencyResult",
144 # Result types
145 "ChangePointResult",
146 "ChecksumCandidate",
147 "ChecksumDetectionResult",
148 "ChecksumDetector",
149 "ChecksumMatch",
150 "ClassificationResult",
151 "CoherenceResult",
152 "CompressionIndicator",
153 "CrossCorrelationResult",
154 "DataClassifier",
155 "DecompositionResult",
156 "EntropyAnalyzer",
157 "EntropyResult",
158 "EntropyTransition",
159 "FrequencyAnomalyResult",
160 "IsolationForestResult",
161 "KDEResult",
162 "LOFResult",
163 "NGramAnalyzer",
164 "NgramComparison",
165 "NgramProfile",
166 "OutlierResult",
167 "RegionClassification",
168 "TrendResult",
169 # Correlation
170 "autocorrelation",
171 # Basic statistics
172 "basic_stats",
173 "bit_entropy",
174 "byte_frequency_distribution",
175 "calculate_entropy",
176 "change_point_detection",
177 "classify_by_entropy",
178 "classify_data_type",
179 "coherence",
180 "compare_byte_distributions",
181 "compare_ngram_profiles",
182 "compute_checksum",
183 "correlation_coefficient",
184 "crc8",
185 "crc16_ccitt",
186 "crc16_ibm",
187 "crc32",
188 "cross_correlation",
189 # Advanced (STAT-014)
190 "detect_change_points",
191 "detect_checksum_fields",
192 "detect_compressed_regions",
193 "detect_compression_indicators",
194 "detect_drift_segments",
195 "detect_encrypted_regions",
196 "detect_entropy_transitions",
197 "detect_frequency_anomalies",
198 "detect_outliers",
199 "detect_padding_regions",
200 "detect_text_regions",
201 # Trend
202 "detect_trend",
203 "detrend",
204 "entropy",
205 "entropy_histogram",
206 "entropy_profile",
207 "entropy_windowed",
208 "find_periodicity",
209 "find_unusual_ngrams",
210 "identify_checksum_algorithm",
211 "iqr_outliers",
212 # Advanced (STAT-011)
213 "isolation_forest_outliers",
214 # Advanced (STAT-016)
215 "kernel_density",
216 # Advanced (STAT-012)
217 "local_outlier_factor",
218 "modified_zscore_outliers",
219 "moving_average",
220 "ngram_entropy",
221 "ngram_frequency",
222 "ngram_heatmap",
223 "percentiles",
224 # Advanced (STAT-015)
225 "phase_coherence",
226 "piecewise_linear_fit",
227 "quartiles",
228 "remove_outliers",
229 "running_stats",
230 # Advanced (STAT-013)
231 "seasonal_decompose",
232 "segment_by_type",
233 "shannon_entropy",
234 "sliding_byte_frequency",
235 "sliding_entropy",
236 "sum8",
237 "sum16",
238 "summary_stats",
239 "verify_checksums",
240 "weighted_mean",
241 "xor_checksum",
242 # Outlier detection
243 "zscore_outliers",
244]