Coverage for session_buddy / utils / regex_patterns.py: 100.00%
2 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-04 00:43 -0800
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-04 00:43 -0800
1#!/usr/bin/env python3
2"""Local regex patterns for session-mgmt-mcp using crackerjack's SAFE_PATTERNS architecture.
4This module contains all validated regex patterns used throughout the session-mgmt-mcp
5codebase, following crackerjack's centralized pattern approach for security and reliability.
7All patterns are validated with comprehensive test cases and use proper replacement syntax.
8"""
10import re
12from crackerjack.services.regex_patterns import ValidatedPattern
14# Session Management MCP Validated Patterns Registry
15SAFE_PATTERNS: dict[str, ValidatedPattern] = {
16 # Code extraction patterns for search_enhanced.py
17 "python_code_block": ValidatedPattern(
18 name="python_code_block",
19 pattern=r"```python\n(.*?)\n```",
20 replacement=r"\1",
21 description="Extract Python code from markdown code blocks",
22 flags=re.DOTALL,
23 test_cases=[
24 ("```python\nprint('hello')\n```", "print('hello')"),
25 ("```python\ndef func():\n pass\n```", "def func():\n pass"),
26 ("```python\n\n```", ""),
27 ("no code here", "no code here"), # No change
28 ],
29 ),
30 "generic_code_block": ValidatedPattern(
31 name="generic_code_block",
32 pattern=r"```\n(.*?)\n```",
33 replacement=r"\1",
34 description="Extract code from generic markdown code blocks",
35 flags=re.DOTALL,
36 test_cases=[
37 ("```\nsome code\n```", "some code"),
38 ("```\nline1\nline2\n```", "line1\nline2"),
39 ("```\n\n```", ""),
40 ("no fenced code", "no fenced code"), # No change
41 ],
42 ),
43 # Error pattern matching for search_enhanced.py
44 "python_traceback": ValidatedPattern(
45 name="python_traceback",
46 pattern=r"Traceback \(most recent call last\):.*?(?=\n\n|\Z)",
47 replacement=r"<TRACEBACK_MASKED>",
48 description="Match Python traceback blocks with safe termination",
49 flags=re.MULTILINE | re.DOTALL,
50 test_cases=[
51 (
52 "Traceback (most recent call last):\n File test.py\nError: msg\n\nNext line",
53 "<TRACEBACK_MASKED>\n\nNext line",
54 ),
55 (
56 "Traceback (most recent call last):\n File test.py\nError: msg",
57 "<TRACEBACK_MASKED>",
58 ),
59 ("No traceback here", "No traceback here"), # No change
60 ],
61 ),
62 "python_exception": ValidatedPattern(
63 name="python_exception",
64 pattern=r"\b(ValueError|TypeError|RuntimeError|SyntaxError|ImportError|AttributeError|KeyError|IndexError|FileNotFoundError|PermissionError|ConnectionError|TimeoutError|AssertionError|Exception|BaseException): (.+)",
65 replacement=r"\1: <ERROR_MESSAGE_MASKED>",
66 description="Match Python exception patterns safely",
67 test_cases=[
68 ("ValueError: invalid input", "ValueError: <ERROR_MESSAGE_MASKED>"),
69 (
70 "RuntimeError: something went wrong",
71 "RuntimeError: <ERROR_MESSAGE_MASKED>",
72 ),
73 (
74 "NotAnError: this should not match",
75 "NotAnError: this should not match",
76 ), # No change
77 ("SyntaxError: bad syntax", "SyntaxError: <ERROR_MESSAGE_MASKED>"),
78 ],
79 ),
80 "javascript_error": ValidatedPattern(
81 name="javascript_error",
82 pattern=r"\b(Error|TypeError|ReferenceError): (.+)",
83 replacement=r"\1: <JS_ERROR_MASKED>",
84 description="Match JavaScript error patterns",
85 test_cases=[
86 ("TypeError: Cannot read property", "TypeError: <JS_ERROR_MASKED>"),
87 ("Error: Something failed", "Error: <JS_ERROR_MASKED>"),
88 ("ReferenceError: x is not defined", "ReferenceError: <JS_ERROR_MASKED>"),
89 ("CustomError: not matched", "CustomError: not matched"), # No change
90 ],
91 ),
92 "compile_error": ValidatedPattern(
93 name="compile_error",
94 pattern=r"(error|Error): (.+) at line (\d+)",
95 replacement=r"\1: <COMPILE_ERROR_MASKED> at line \3",
96 description="Match compilation error patterns with line numbers",
97 test_cases=[
98 (
99 "error: syntax error at line 42",
100 "error: <COMPILE_ERROR_MASKED> at line 42",
101 ),
102 (
103 "Error: missing semicolon at line 10",
104 "Error: <COMPILE_ERROR_MASKED> at line 10",
105 ),
106 (
107 "warning: deprecated at line 5",
108 "warning: deprecated at line 5",
109 ), # No change
110 ],
111 ),
112 "warning_pattern": ValidatedPattern(
113 name="warning_pattern",
114 pattern=r"(warning|Warning): (.+)",
115 replacement=r"\1: <WARNING_MASKED>",
116 description="Match warning message patterns",
117 test_cases=[
118 ("warning: deprecated function", "warning: <WARNING_MASKED>"),
119 ("Warning: potential issue", "Warning: <WARNING_MASKED>"),
120 ("info: just information", "info: just information"), # No change
121 ],
122 ),
123 "assertion_error": ValidatedPattern(
124 name="assertion_error",
125 pattern=r"AssertionError: (.+)",
126 replacement=r"AssertionError: <ASSERTION_MASKED>",
127 description="Match assertion error patterns",
128 test_cases=[
129 ("AssertionError: expected True", "AssertionError: <ASSERTION_MASKED>"),
130 (
131 "AssertionError: values don't match",
132 "AssertionError: <ASSERTION_MASKED>",
133 ),
134 ("ValueError: not assertion", "ValueError: not assertion"), # No change
135 ],
136 ),
137 "import_error": ValidatedPattern(
138 name="import_error",
139 pattern=r"ImportError: (.+)",
140 replacement=r"ImportError: <IMPORT_ERROR_MASKED>",
141 description="Match import error patterns",
142 test_cases=[
143 (
144 "ImportError: No module named 'xyz'",
145 "ImportError: <IMPORT_ERROR_MASKED>",
146 ),
147 ("ImportError: cannot import name", "ImportError: <IMPORT_ERROR_MASKED>"),
148 (
149 "ModuleNotFoundError: different",
150 "ModuleNotFoundError: different",
151 ), # No change
152 ],
153 ),
154 "module_not_found": ValidatedPattern(
155 name="module_not_found",
156 pattern=r"ModuleNotFoundError: (.+)",
157 replacement=r"ModuleNotFoundError: <MODULE_NOT_FOUND_MASKED>",
158 description="Match module not found error patterns",
159 test_cases=[
160 (
161 "ModuleNotFoundError: No module named 'test'",
162 "ModuleNotFoundError: <MODULE_NOT_FOUND_MASKED>",
163 ),
164 (
165 "ModuleNotFoundError: missing dependency",
166 "ModuleNotFoundError: <MODULE_NOT_FOUND_MASKED>",
167 ),
168 (
169 "ImportError: different error",
170 "ImportError: different error",
171 ), # No change
172 ],
173 ),
174 "file_not_found": ValidatedPattern(
175 name="file_not_found",
176 pattern=r"FileNotFoundError: (.+)",
177 replacement=r"FileNotFoundError: <FILE_NOT_FOUND_MASKED>",
178 description="Match file not found error patterns",
179 test_cases=[
180 (
181 "FileNotFoundError: [Errno 2] No such file",
182 "FileNotFoundError: <FILE_NOT_FOUND_MASKED>",
183 ),
184 (
185 "FileNotFoundError: file missing",
186 "FileNotFoundError: <FILE_NOT_FOUND_MASKED>",
187 ),
188 ("PermissionError: different", "PermissionError: different"), # No change
189 ],
190 ),
191 "permission_denied": ValidatedPattern(
192 name="permission_denied",
193 pattern=r"PermissionError: (.+)",
194 replacement=r"PermissionError: <PERMISSION_ERROR_MASKED>",
195 description="Match permission error patterns",
196 test_cases=[
197 (
198 "PermissionError: [Errno 13] Permission denied",
199 "PermissionError: <PERMISSION_ERROR_MASKED>",
200 ),
201 (
202 "PermissionError: access denied",
203 "PermissionError: <PERMISSION_ERROR_MASKED>",
204 ),
205 (
206 "FileNotFoundError: different",
207 "FileNotFoundError: different",
208 ), # No change
209 ],
210 ),
211 "network_error": ValidatedPattern(
212 name="network_error",
213 pattern=r"(ConnectionError|TimeoutError|HTTPError): (.+)",
214 replacement=r"\1: <NETWORK_ERROR_MASKED>",
215 description="Match network-related error patterns",
216 test_cases=[
217 (
218 "ConnectionError: Failed to connect",
219 "ConnectionError: <NETWORK_ERROR_MASKED>",
220 ),
221 ("TimeoutError: Request timed out", "TimeoutError: <NETWORK_ERROR_MASKED>"),
222 ("HTTPError: 404 Not Found", "HTTPError: <NETWORK_ERROR_MASKED>"),
223 ("ValueError: not network", "ValueError: not network"), # No change
224 ],
225 ),
226 # Context pattern matching (boolean search)
227 "debugging_context": ValidatedPattern(
228 name="debugging_context",
229 pattern=r"\b(debug|debugging|breakpoint|pdb|print\(\))\b",
230 replacement=r"<DEBUG_CONTEXT>",
231 description="Match debugging-related context patterns",
232 flags=re.IGNORECASE,
233 test_cases=[
234 ("let's debug this issue", "let's <DEBUG_CONTEXT> this issue"),
235 ("debugging the problem", "<DEBUG_CONTEXT> the problem"),
236 ("set a breakpoint here", "set a <DEBUG_CONTEXT> here"),
237 ("regular code", "regular code"), # No change
238 ],
239 ),
240 "testing_context": ValidatedPattern(
241 name="testing_context",
242 pattern=r"(test|pytest|unittest|assert|mock)",
243 replacement=r"<TEST_CONTEXT>",
244 description="Match testing-related context patterns",
245 flags=re.IGNORECASE,
246 test_cases=[
247 ("run the test suite", "run the <TEST_CONTEXT> suite"),
248 ("pytest configuration", "<TEST_CONTEXT> configuration"),
249 ("unittest framework", "<TEST_CONTEXT> framework"),
250 ("regular text", "regular text"), # No change
251 ],
252 ),
253 "error_handling_context": ValidatedPattern(
254 name="error_handling_context",
255 pattern=r"(try|except|finally|raise|catch)",
256 replacement=r"<ERROR_HANDLING_CONTEXT>",
257 description="Match error handling context patterns",
258 flags=re.IGNORECASE,
259 test_cases=[
260 ("try to handle", "<ERROR_HANDLING_CONTEXT> to handle"),
261 ("except ValueError", "<ERROR_HANDLING_CONTEXT> ValueError"),
262 ("finally block", "<ERROR_HANDLING_CONTEXT> block"),
263 ("normal flow", "normal flow"), # No change
264 ],
265 ),
266 "performance_context": ValidatedPattern(
267 name="performance_context",
268 pattern=r"(slow|performance|benchmark|optimize|profil)",
269 replacement=r"<PERFORMANCE_CONTEXT>",
270 description="Match performance-related context patterns",
271 flags=re.IGNORECASE,
272 test_cases=[
273 ("this is slow", "this is <PERFORMANCE_CONTEXT>"),
274 ("performance improvement", "<PERFORMANCE_CONTEXT> improvement"),
275 ("benchmark results", "<PERFORMANCE_CONTEXT> results"),
276 ("fast code", "fast code"), # No change
277 ],
278 ),
279 "security_context": ValidatedPattern(
280 name="security_context",
281 pattern=r"(security|authentication|authorization|token|password)",
282 replacement=r"<SECURITY_CONTEXT>",
283 description="Match security-related context patterns",
284 flags=re.IGNORECASE,
285 test_cases=[
286 ("security audit", "<SECURITY_CONTEXT> audit"),
287 ("authentication required", "<SECURITY_CONTEXT> required"),
288 ("token validation", "<SECURITY_CONTEXT> validation"),
289 ("regular text", "regular text"), # No change
290 ],
291 ),
292 # Time parsing patterns for search_enhanced.py
293 "time_ago_pattern": ValidatedPattern(
294 name="time_ago_pattern",
295 pattern=r"(\d+)\s+(minute|hour|day|week|month|year)s?\s+ago",
296 replacement=r"\1 \2 ago",
297 description="Match time ago expressions for parsing",
298 test_cases=[
299 ("5 minutes ago", "5 minute ago"),
300 ("2 hours ago", "2 hour ago"),
301 ("1 day ago", "1 day ago"),
302 ("3 weeks ago", "3 week ago"),
303 ("not a time", "not a time"), # No change
304 ],
305 ),
306 "relative_time_pattern": ValidatedPattern(
307 name="relative_time_pattern",
308 pattern=r"(today|yesterday|this\s+week|last\s+week|this\s+month|last\s+month)",
309 replacement=r"<RELATIVE_TIME>",
310 description="Match relative time expressions",
311 flags=re.IGNORECASE,
312 test_cases=[
313 ("today is good", "<RELATIVE_TIME> is good"),
314 ("yesterday we worked", "<RELATIVE_TIME> we worked"),
315 ("this week's plan", "<RELATIVE_TIME>'s plan"),
316 ("some other day", "some other day"), # No change
317 ],
318 ),
319 "since_time_pattern": ValidatedPattern(
320 name="since_time_pattern",
321 pattern=r"since\s+(today|yesterday|this\s+week|last\s+week)",
322 replacement=r"since <TIME_REFERENCE>",
323 description="Match 'since' time expressions",
324 flags=re.IGNORECASE,
325 test_cases=[
326 ("since today", "since <TIME_REFERENCE>"),
327 ("since yesterday", "since <TIME_REFERENCE>"),
328 ("since this week", "since <TIME_REFERENCE>"),
329 ("since forever", "since forever"), # No change
330 ],
331 ),
332 "last_duration_pattern": ValidatedPattern(
333 name="last_duration_pattern",
334 pattern=r"in\s+the\s+last\s+(\d+)\s+(minute|hour|day|week|month|year)s?",
335 replacement=r"in the last \1 \2",
336 description="Match 'in the last X units' patterns",
337 test_cases=[
338 ("in the last 5 minutes", "in the last 5 minute"),
339 ("in the last 2 hours", "in the last 2 hour"),
340 ("in the last 10 days", "in the last 10 day"),
341 ("not a duration", "not a duration"), # No change
342 ],
343 ),
344 "iso_date_pattern": ValidatedPattern(
345 name="iso_date_pattern",
346 pattern=r"(\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01]))",
347 replacement=r"<ISO_DATE>",
348 description="Match valid ISO date format (YYYY-MM-DD)",
349 test_cases=[
350 ("2023-12-25", "<ISO_DATE>"),
351 ("Date: 2024-01-15 is today", "Date: <ISO_DATE> is today"),
352 ("not-a-date", "not-a-date"), # No change
353 (
354 "2023-13-45",
355 "2023-13-45",
356 ), # Invalid date, no change expected
357 ],
358 ),
359 "us_date_pattern": ValidatedPattern(
360 name="us_date_pattern",
361 pattern=r"(\d{1,2}/\d{1,2}/\d{4})",
362 replacement=r"<US_DATE>",
363 description="Match US date format (MM/DD/YYYY or M/D/YYYY)",
364 test_cases=[
365 ("12/25/2023", "<US_DATE>"),
366 ("1/5/2024", "<US_DATE>"),
367 ("Meeting on 3/15/2024 at noon", "Meeting on <US_DATE> at noon"),
368 ("not/a/date", "not/a/date"), # No change
369 ],
370 ),
371 # Crackerjack integration patterns for output parsing
372 "pytest_result": ValidatedPattern(
373 name="pytest_result",
374 pattern=r"(\w+\.py)::\s*(\w+)\s*(PASSED|FAILED|SKIPPED|ERROR|XFAIL|XPASS)\s*(?:\[(\d+%)\])?\s*(?:\((.+)\))?",
375 replacement=r"TEST: \1::\2 -> \3",
376 description="Parse pytest test results with optional percentage and timing",
377 test_cases=[
378 (
379 "test_file.py:: test_function PASSED",
380 "TEST: test_file.py::test_function -> PASSED",
381 ),
382 (
383 "test_example.py:: test_method FAILED [50%] (0.05s)",
384 "TEST: test_example.py::test_method -> FAILED",
385 ),
386 (
387 "test_skip.py:: test_skip SKIPPED",
388 "TEST: test_skip.py::test_skip -> SKIPPED",
389 ),
390 ("not a test result", "not a test result"), # No change
391 ],
392 ),
393 "coverage_summary": ValidatedPattern(
394 name="coverage_summary",
395 pattern=r"TOTAL\s+\d+\s+\d+\s+(\d+)%",
396 replacement=r"COVERAGE: \1%",
397 description="Extract total coverage percentage from coverage reports",
398 test_cases=[
399 ("TOTAL 1000 50 95%", "COVERAGE: 95%"),
400 ("TOTAL 500 25 78%", "COVERAGE: 78%"),
401 ("subtotal 100 5 90%", "subtotal 100 5 90%"), # No change
402 ],
403 ),
404 "ruff_error": ValidatedPattern(
405 name="ruff_error",
406 pattern=r"([^:\s]+):(\d+):(\d+):\s*([A-Z]\d{3,4})\s*(.+)",
407 replacement=r"RUFF: \1 line \2 -> \4: \5",
408 description="Parse Ruff linting errors with file, line, column, code, and message",
409 test_cases=[
410 (
411 "src/main.py:42:10: E501 line too long (88 > 79 characters)",
412 "RUFF: src/main.py line 42 -> E501: line too long (88 > 79 characters)",
413 ),
414 (
415 "test.py:1:1: F401 imported but unused",
416 "RUFF: test.py line 1 -> F401: imported but unused",
417 ),
418 ("not a ruff error", "not a ruff error"), # No change
419 ],
420 ),
421 "mypy_error": ValidatedPattern(
422 name="mypy_error",
423 pattern=r"([^:\s]+):(\d+):\s*error:\s*(.+)",
424 replacement=r"MYPY: \1 line \2 -> \3",
425 description="Parse mypy type checking errors with file, line, and message",
426 test_cases=[
427 (
428 "src/module.py:15: error: Argument 1 has incompatible type",
429 "MYPY: src/module.py line 15 -> Argument 1 has incompatible type",
430 ),
431 (
432 "main.py:8: error: Name 'x' is not defined",
433 "MYPY: main.py line 8 -> Name 'x' is not defined",
434 ),
435 ("not a mypy error", "not a mypy error"), # No change
436 ],
437 ),
438 "bandit_finding": ValidatedPattern(
439 name="bandit_finding",
440 pattern=r">> Issue: \[([A-Z]\d+):([a-z_]+)\]\s*(.+)",
441 replacement=r"BANDIT: \1 (\2) -> \3",
442 description="Parse Bandit security findings with code, severity, and description",
443 test_cases=[
444 (
445 ">> Issue: [B602:subprocess_popen_with_shell_equals_true] Possible shell injection",
446 "BANDIT: B602 (subprocess_popen_with_shell_equals_true) -> Possible shell injection",
447 ),
448 (
449 ">> Issue: [B108:hardcoded_tmp_directory] Use of insecure temp",
450 "BANDIT: B108 (hardcoded_tmp_directory) -> Use of insecure temp",
451 ),
452 ("not a bandit finding", "not a bandit finding"), # No change
453 ],
454 ),
455 "quality_score": ValidatedPattern(
456 name="quality_score",
457 pattern=r"Quality Score:\s*(\d+(?:\.\d+)?)/(\d+(?:\.\d+)?)\s*\((\d+(?:\.\d+)?)%\)",
458 replacement=r"QUALITY: \3% (\1/\2)",
459 description="Parse quality score with percentage calculation",
460 test_cases=[
461 ("Quality Score: 85.5/100 (85.5%)", "QUALITY: 85.5% (85.5/100)"),
462 ("Quality Score: 90/100 (90%)", "QUALITY: 90% (90/100)"),
463 ("Final Score: 95/100", "Final Score: 95/100"), # No change
464 ],
465 ),
466 "execution_time": ValidatedPattern(
467 name="execution_time",
468 pattern=r"(\d+(?:\.\d+)?)\s*(s|ms|seconds?|milliseconds?)",
469 replacement=r"\1\2",
470 description="Normalize execution time formats",
471 test_cases=[
472 ("2.5 seconds", "2.5seconds"),
473 ("150 ms", "150ms"),
474 ("0.05 s", "0.05s"),
475 ("fast enough", "fast enough"), # No change
476 ],
477 ),
478 "progress_indicator": ValidatedPattern(
479 name="progress_indicator",
480 pattern=r"\[([=>\s]*)\]\s*(\d+)%",
481 replacement=r"PROGRESS: \2%",
482 description="Parse progress bars with percentage",
483 test_cases=[
484 ("[=====> ] 50%", "PROGRESS: 50%"),
485 ("[=========>] 90%", "PROGRESS: 90%"),
486 ("[ ] 0%", "PROGRESS: 0%"),
487 ("not progress", "not progress"), # No change
488 ],
489 ),
490 "git_commit_hash": ValidatedPattern(
491 name="git_commit_hash",
492 pattern=r"\b([a-f0-9]{7,40})\b",
493 replacement=r"<COMMIT:\1>",
494 description="Match Git commit hashes (7-40 hex characters)",
495 test_cases=[
496 ("commit abc1234 was merged", "commit <COMMIT:abc1234> was merged"),
497 (
498 "long hash abcdef1234567890abcdef1234567890abcdef12",
499 "long hash <COMMIT:abcdef1234567890abcdef1234567890abcdef12>",
500 ),
501 ("short ab12", "short ab12"), # Too short
502 ("not hex ghi1234", "not hex ghi1234"), # Contains non-hex
503 ],
504 ),
505 "file_path_with_line": ValidatedPattern(
506 name="file_path_with_line",
507 pattern=r"([A-Za-z_][A-Za-z0-9_/.-]*\.py):(\d+)",
508 replacement=r"\1 line \2",
509 description="Match file paths with line numbers (file.py:123)",
510 test_cases=[
511 ("src/main.py:42", "src/main.py line 42"),
512 ("test_file.py:1", "test_file.py line 1"),
513 ("sub/dir/module.py:100", "sub/dir/module.py line 100"),
514 ("not-python.txt:50", "not-python.txt:50"), # No change
515 ],
516 ),
517 # Memory optimizer patterns
518 "sentence_split": ValidatedPattern(
519 name="sentence_split",
520 pattern=r"[.!\?]+",
521 replacement=r" ",
522 description="Replace sentence-ending punctuation with spaces for splitting",
523 test_cases=[
524 ("Hello world.", "Hello world "),
525 ("How are you?", "How are you "),
526 ("Multiple!!!", "Multiple "),
527 ],
528 ),
529 "code_block_cleanup": ValidatedPattern(
530 name="code_block_cleanup",
531 pattern=r"```.*?```",
532 replacement=r"",
533 description="Remove code blocks from text for keyword extraction",
534 flags=re.DOTALL,
535 test_cases=[
536 ("Text ```python\ncode\n``` more text", "Text more text"),
537 ("```\njust code\n```", ""),
538 ("no code blocks", "no code blocks"),
539 ],
540 ),
541 "inline_code_cleanup": ValidatedPattern(
542 name="inline_code_cleanup",
543 pattern=r"`[^`]+`",
544 replacement=r"",
545 description="Remove inline code from text",
546 test_cases=[
547 ("Use `function()` to call", "Use to call"),
548 ("No inline code", "No inline code"),
549 ("`code`", ""),
550 ],
551 ),
552 "word_extraction": ValidatedPattern(
553 name="word_extraction",
554 pattern=r"\b[a-zA-Z]{1,2}\b",
555 replacement=r"",
556 description="Remove short words (1-2 chars) for keyword analysis",
557 test_cases=[
558 ("short words", "short words"),
559 ("a test", " test"),
560 ("123 in 456", "123 456"),
561 ],
562 ),
563 "word_boundary": ValidatedPattern(
564 name="word_boundary",
565 pattern=r"[^\w\s]+",
566 replacement=r" ",
567 description="Replace non-word characters with spaces for word boundary detection",
568 test_cases=[
569 ("hello-world", "hello world"),
570 ("test@example", "test example"),
571 ("underscored_var", "underscored_var"),
572 ],
573 ),
574 # File extension patterns for memory optimizer
575 "python_files": ValidatedPattern(
576 name="python_files",
577 pattern=r"(\w+\.py)",
578 replacement=r"[\1]",
579 description="Wrap Python file references in brackets",
580 test_cases=[
581 ("main.py script", "[main.py] script"),
582 ("test_file.py found", "[test_file.py] found"),
583 ("no files here", "no files here"),
584 ],
585 ),
586 "javascript_files": ValidatedPattern(
587 name="javascript_files",
588 pattern=r"(\w+\.js)",
589 replacement=r"[\1]",
590 description="Wrap JavaScript file references in brackets",
591 test_cases=[
592 ("app.js file", "[app.js] file"),
593 ("script.js found", "[script.js] found"),
594 ("no files", "no files"),
595 ],
596 ),
597 "typescript_files": ValidatedPattern(
598 name="typescript_files",
599 pattern=r"(\w+\.ts)",
600 replacement=r"[\1]",
601 description="Wrap TypeScript file references in brackets",
602 test_cases=[
603 ("index.ts file", "[index.ts] file"),
604 ("component.ts interface", "[component.ts] interface"),
605 ("other files", "other files"),
606 ],
607 ),
608 "json_files": ValidatedPattern(
609 name="json_files",
610 pattern=r"(\w+\.json)",
611 replacement=r"[\1]",
612 description="Match JSON file references",
613 test_cases=[
614 ("config.json settings", "[config.json] settings"),
615 ("package.json file", "[package.json] file"),
616 ("no json", "no json"),
617 ],
618 ),
619 "markdown_files": ValidatedPattern(
620 name="markdown_files",
621 pattern=r"(\w+\.md)",
622 replacement=r"[\1]",
623 description="Match Markdown file references",
624 test_cases=[
625 ("README.md documentation", "[README.md] documentation"),
626 ("docs.md file", "[docs.md] file"),
627 ("no markdown", "no markdown"),
628 ],
629 ),
630 # Advanced search patterns
631 "function_definition": ValidatedPattern(
632 name="function_definition",
633 pattern=r"\bdef\s+(\w+)",
634 replacement=r"function:\1",
635 description="Extract Python function definitions",
636 test_cases=[
637 ("def main():", "function:main():"),
638 ("def get_data(param):", "function:get_data(param):"),
639 ("no functions here", "no functions here"),
640 ],
641 ),
642 "class_definition": ValidatedPattern(
643 name="class_definition",
644 pattern=r"\bclass\s+(\w+)",
645 replacement=r"class:\1",
646 description="Extract Python class definitions",
647 test_cases=[
648 ("class MyClass:", "class:MyClass:"),
649 ("class SearchEngine(Base):", "class:SearchEngine(Base):"),
650 ("no classes here", "no classes here"),
651 ],
652 ),
653 "file_extension": ValidatedPattern(
654 name="file_extension",
655 pattern=r"\.(\w{2,4})\b",
656 replacement=r"filetype:\1",
657 description="Extract file extensions for categorization",
658 test_cases=[
659 ("file.py and test.json", "filefiletype:py and test.json"),
660 ("config.yaml setup", "configfiletype:yaml setup"),
661 ("no extensions", "no extensions"),
662 ],
663 ),
664 # Language detection patterns
665 "python_code": ValidatedPattern(
666 name="python_code",
667 pattern=r"\b(def|class|import|from|if __name__|async|await|yield)\b",
668 replacement=r"python",
669 description="Detect Python code patterns",
670 flags=re.IGNORECASE,
671 test_cases=[
672 ("def function():", "python function():"),
673 ("import os", "python os"),
674 ("if __name__ == '__main__'", "python == '__main__'"),
675 ("regular text", "regular text"),
676 ],
677 ),
678 "javascript_code": ValidatedPattern(
679 name="javascript_code",
680 pattern=r"\b(function|var|let|const|=>|require|module\.exports|console\.log)\b",
681 replacement=r"javascript",
682 description="Detect JavaScript code patterns",
683 flags=re.IGNORECASE,
684 test_cases=[
685 ("function myFunc()", "javascript myFunc()"),
686 ("const data = []", "javascript data = []"),
687 ("console.log('hello')", "javascript('hello')"),
688 ("regular text", "regular text"),
689 ],
690 ),
691 "sql_code": ValidatedPattern(
692 name="sql_code",
693 pattern=r"\b(SELECT|FROM|WHERE|JOIN|INSERT|UPDATE|DELETE|CREATE|TABLE)\b",
694 replacement=r"sql",
695 description="Detect SQL code patterns",
696 flags=re.IGNORECASE,
697 test_cases=[
698 ("SELECT * FROM users", "sql * FROM users"),
699 ("INSERT INTO table", "sql INTO table"),
700 ("CREATE TABLE test", "sql TABLE test"),
701 ("regular text", "regular text"),
702 ],
703 ),
704 "error_keywords": ValidatedPattern(
705 name="error_keywords",
706 pattern=r"\b(Error|Exception|Traceback|Failed|TypeError|ValueError)\b",
707 replacement=r"error",
708 description="Detect error-related keywords",
709 flags=re.IGNORECASE,
710 test_cases=[
711 ("ValueError occurred", "error occurred"),
712 ("Exception raised", "error raised"),
713 ("Traceback found", "error found"),
714 ("regular text", "regular text"),
715 ],
716 ),
717 # Crackerjack-specific patterns for tools integration
718 "crackerjack_command": ValidatedPattern(
719 name="crackerjack_command",
720 pattern=r"crackerjack\s+(\w+)",
721 replacement=r"[\1]",
722 description="Extract command from crackerjack execution logs",
723 flags=0,
724 test_cases=[
725 ("crackerjack lint", "[lint]"),
726 ("running crackerjack test now", "running [test] now"),
727 ("crackerjack analyze completed", "[analyze] completed"),
728 ("just crackerjack", "just crackerjack"), # No change - no command
729 ],
730 ),
731 # Git and system security patterns
732 "safe_branch_name": ValidatedPattern(
733 name="safe_branch_name",
734 pattern=r"^[a-zA-Z0-9_/-]+$",
735 replacement=r"\g<0>",
736 description="Validate safe Git branch names (alphanumeric, dashes, underscores, slashes)",
737 flags=0,
738 test_cases=[
739 ("main", "main"),
740 ("feature/new-feature", "feature/new-feature"),
741 ("dev_branch", "dev_branch"),
742 ("hotfix-123", "hotfix-123"),
743 ("origin/main", "origin/main"),
744 ("invalid;branch", "invalid;branch"), # No match expected for invalid chars
745 ("branch with spaces", "branch with spaces"), # No match expected
746 ],
747 ),
748 # Token optimization patterns
749 "whitespace_normalize": ValidatedPattern(
750 name="whitespace_normalize",
751 pattern=r"\s+",
752 replacement=r" ",
753 description="Normalize whitespace for content hashing",
754 flags=0,
755 global_replace=True,
756 test_cases=[
757 (" multiple spaces ", " multiple spaces "),
758 ("tabs\t\tand\nnewlines\n\n", "tabs and newlines "),
759 ("normal text", "normal text"),
760 ],
761 ),
762}