Coverage for src / tracekit / analyzers / patterns / __init__.py: 75%
45 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-11 23:04 +0000
1"""Pattern Detection & Analysis module for TraceKit.
3This module provides comprehensive pattern detection and analysis capabilities
4for digital signals and binary data, including:
6- Periodic pattern detection (autocorrelation, FFT, suffix array)
7- Repeating sequence detection (n-grams, LRS, approximate matching)
8- Automatic signature discovery (headers, delimiters, magic bytes)
9- Pattern clustering by similarity (Hamming, edit distance, hierarchical)
10- Binary regex pattern matching
11- Multi-pattern search (Aho-Corasick)
12- Fuzzy/approximate pattern matching
13- Pattern learning and discovery
15 - RE-PAT-001: Binary Regex Pattern Matching
16 - RE-PAT-002: Multi-Pattern Search (Aho-Corasick)
17 - RE-PAT-003: Fuzzy Pattern Matching
18 - RE-PAT-004: Pattern Learning and Discovery
20Author: TraceKit Development Team
21"""
23# Periodic pattern detection (PAT-001)
24# Pattern clustering (PAT-004)
25from .clustering import (
26 ClusteringResult,
27 ClusterResult,
28 analyze_cluster,
29 cluster_by_edit_distance,
30 cluster_by_hamming,
31 cluster_hierarchical,
32 compute_distance_matrix,
33)
35# Signature discovery (PAT-003)
36from .discovery import (
37 CandidateSignature,
38 SignatureDiscovery,
39 discover_signatures,
40 find_delimiter_candidates,
41 find_header_candidates,
42)
44# RE-PAT-004: Pattern Learning and Discovery
45from .learning import (
46 LearnedPattern,
47 NgramModel,
48 PatternLearner,
49 StructureHypothesis,
50 find_recurring_structures,
51 infer_structure,
52 learn_patterns_from_data,
53)
55# RE-PAT-001, RE-PAT-002, RE-PAT-003: Advanced pattern matching
56from .matching import (
57 AhoCorasickMatcher,
58 # Classes
59 BinaryRegex,
60 FuzzyMatcher,
61 FuzzyMatchResult,
62 # Data classes
63 PatternMatchResult,
64 # RE-PAT-001: Binary Regex
65 binary_regex_search,
66 count_pattern_occurrences,
67 # Utilities
68 find_pattern_positions,
69 find_similar_sequences,
70 # RE-PAT-003: Fuzzy Matching
71 fuzzy_search,
72 # RE-PAT-002: Multi-Pattern Search
73 multi_pattern_search,
74)
75from .periodic import (
76 PeriodicPatternDetector,
77 PeriodResult,
78 detect_period,
79 detect_periods_autocorr,
80 detect_periods_fft,
81 validate_period,
82)
84# Alias for backward compatibility
85detect_period_autocorr = detect_periods_autocorr
86detect_period_fft = detect_periods_fft
88# Repeating sequence detection (PAT-002)
89# Motif detection functions (aliases for test compatibility)
90from typing import TYPE_CHECKING, Any, cast
92from .sequences import (
93 NgramResult,
94 RepeatingSequence,
95 find_approximate_repeats,
96 find_frequent_ngrams,
97 find_longest_repeat,
98 find_repeating_sequences,
99)
101if TYPE_CHECKING:
102 import numpy as np
103 from numpy.typing import NDArray
106def find_motifs(
107 data: Any, motif_length: int = 8, max_distance: float = 0.1
108) -> list[RepeatingSequence]:
109 """Find motifs (repeating patterns) in data.
111 This is an alias for find_repeating_sequences for test compatibility.
113 Args:
114 data: Input data array.
115 motif_length: Length of motifs to find.
116 max_distance: Maximum distance for fuzzy matching (unused).
118 Returns:
119 List of RepeatingSequence objects.
120 """
121 import numpy as np
123 data = np.asarray(data)
124 results = find_repeating_sequences(data, min_length=motif_length, min_count=2)
125 return results
128def extract_motif(data: Any, start: int, length: int) -> "NDArray[np.generic]":
129 """Extract a motif from data.
131 Args:
132 data: Input data array.
133 start: Start index.
134 length: Length to extract.
136 Returns:
137 Extracted motif as numpy array.
138 """
139 import numpy as np
140 from numpy.typing import NDArray
142 data_arr = np.asarray(data)
143 result: NDArray[np.generic] = data_arr[start : start + length]
144 return result
147def detect_anomalies(data: Any, threshold: float = 3.0) -> list[int]:
148 """Detect anomalies in data using z-score.
150 Args:
151 data: Input data array.
152 threshold: Z-score threshold for anomaly detection.
154 Returns:
155 List of anomaly indices.
156 """
157 import numpy as np
159 data_arr = np.asarray(data)
160 mean = np.mean(data_arr)
161 std = np.std(data_arr)
162 if std == 0: 162 ↛ 163line 162 didn't jump to line 163 because the condition on line 162 was never true
163 return []
165 z_scores = np.abs((data_arr - mean) / std)
166 indices = np.where(z_scores > threshold)[0].tolist()
167 return cast("list[int]", indices)
170def cluster_patterns(patterns: Any, method: str = "hamming") -> ClusteringResult:
171 """Cluster patterns by similarity.
173 Args:
174 patterns: List of patterns to cluster.
175 method: Clustering method ('hamming' or 'edit').
177 Returns:
178 ClusteringResult with cluster assignments.
179 """
180 if method == "hamming": 180 ↛ 183line 180 didn't jump to line 183 because the condition on line 180 was always true
181 return cluster_by_hamming(patterns)
182 else:
183 return cluster_by_edit_distance(patterns)
186def pattern_similarity(pattern1: Any, pattern2: Any) -> float:
187 """Calculate similarity between two patterns.
189 Args:
190 pattern1: First pattern.
191 pattern2: Second pattern.
193 Returns:
194 Similarity score (0-1, 1 = identical).
195 """
196 import numpy as np
198 p1 = np.asarray(pattern1)
199 p2 = np.asarray(pattern2)
201 if len(p1) != len(p2): 201 ↛ 202line 201 didn't jump to line 202 because the condition on line 201 was never true
202 return 0.0
204 if len(p1) == 0: 204 ↛ 205line 204 didn't jump to line 205 because the condition on line 204 was never true
205 return 1.0
207 matches = int(np.sum(p1 == p2))
208 return float(matches / len(p1))
211__all__ = [
212 # RE-PAT-002: Multi-Pattern Search
213 "AhoCorasickMatcher",
214 # RE-PAT-001: Binary Regex Pattern Matching
215 "BinaryRegex",
216 "CandidateSignature",
217 "ClusterResult",
218 "ClusteringResult",
219 "FuzzyMatchResult",
220 # RE-PAT-003: Fuzzy Pattern Matching
221 "FuzzyMatcher",
222 # RE-PAT-004: Pattern Learning and Discovery
223 "LearnedPattern",
224 "NgramModel",
225 "NgramResult",
226 "PatternLearner",
227 "PatternMatchResult",
228 "PeriodResult",
229 "PeriodicPatternDetector",
230 "RepeatingSequence",
231 "SignatureDiscovery",
232 "StructureHypothesis",
233 "analyze_cluster",
234 "binary_regex_search",
235 "cluster_by_edit_distance",
236 "cluster_by_hamming",
237 "cluster_hierarchical",
238 "cluster_patterns",
239 "compute_distance_matrix",
240 "count_pattern_occurrences",
241 # Motif detection (compatibility)
242 "detect_anomalies",
243 "detect_period",
244 "detect_period_autocorr",
245 "detect_period_fft",
246 "detect_periods_autocorr",
247 "detect_periods_fft",
248 "discover_signatures",
249 "extract_motif",
250 "find_approximate_repeats",
251 "find_delimiter_candidates",
252 "find_frequent_ngrams",
253 "find_header_candidates",
254 "find_longest_repeat",
255 "find_motifs",
256 "find_pattern_positions",
257 "find_recurring_structures",
258 "find_repeating_sequences",
259 "find_similar_sequences",
260 "fuzzy_search",
261 "infer_structure",
262 "learn_patterns_from_data",
263 "multi_pattern_search",
264 "pattern_similarity",
265 "validate_period",
266]