Coverage for session_buddy / utils / regex_patterns.py: 100.00%

2 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-04 00:43 -0800

1#!/usr/bin/env python3 

2"""Local regex patterns for session-mgmt-mcp using crackerjack's SAFE_PATTERNS architecture. 

3 

4This module contains all validated regex patterns used throughout the session-mgmt-mcp 

5codebase, following crackerjack's centralized pattern approach for security and reliability. 

6 

7All patterns are validated with comprehensive test cases and use proper replacement syntax. 

8""" 

9 

10import re 

11 

12from crackerjack.services.regex_patterns import ValidatedPattern 

13 

14# Session Management MCP Validated Patterns Registry 

15SAFE_PATTERNS: dict[str, ValidatedPattern] = { 

16 # Code extraction patterns for search_enhanced.py 

17 "python_code_block": ValidatedPattern( 

18 name="python_code_block", 

19 pattern=r"```python\n(.*?)\n```", 

20 replacement=r"\1", 

21 description="Extract Python code from markdown code blocks", 

22 flags=re.DOTALL, 

23 test_cases=[ 

24 ("```python\nprint('hello')\n```", "print('hello')"), 

25 ("```python\ndef func():\n pass\n```", "def func():\n pass"), 

26 ("```python\n\n```", ""), 

27 ("no code here", "no code here"), # No change 

28 ], 

29 ), 

30 "generic_code_block": ValidatedPattern( 

31 name="generic_code_block", 

32 pattern=r"```\n(.*?)\n```", 

33 replacement=r"\1", 

34 description="Extract code from generic markdown code blocks", 

35 flags=re.DOTALL, 

36 test_cases=[ 

37 ("```\nsome code\n```", "some code"), 

38 ("```\nline1\nline2\n```", "line1\nline2"), 

39 ("```\n\n```", ""), 

40 ("no fenced code", "no fenced code"), # No change 

41 ], 

42 ), 

43 # Error pattern matching for search_enhanced.py 

44 "python_traceback": ValidatedPattern( 

45 name="python_traceback", 

46 pattern=r"Traceback \(most recent call last\):.*?(?=\n\n|\Z)", 

47 replacement=r"<TRACEBACK_MASKED>", 

48 description="Match Python traceback blocks with safe termination", 

49 flags=re.MULTILINE | re.DOTALL, 

50 test_cases=[ 

51 ( 

52 "Traceback (most recent call last):\n File test.py\nError: msg\n\nNext line", 

53 "<TRACEBACK_MASKED>\n\nNext line", 

54 ), 

55 ( 

56 "Traceback (most recent call last):\n File test.py\nError: msg", 

57 "<TRACEBACK_MASKED>", 

58 ), 

59 ("No traceback here", "No traceback here"), # No change 

60 ], 

61 ), 

62 "python_exception": ValidatedPattern( 

63 name="python_exception", 

64 pattern=r"\b(ValueError|TypeError|RuntimeError|SyntaxError|ImportError|AttributeError|KeyError|IndexError|FileNotFoundError|PermissionError|ConnectionError|TimeoutError|AssertionError|Exception|BaseException): (.+)", 

65 replacement=r"\1: <ERROR_MESSAGE_MASKED>", 

66 description="Match Python exception patterns safely", 

67 test_cases=[ 

68 ("ValueError: invalid input", "ValueError: <ERROR_MESSAGE_MASKED>"), 

69 ( 

70 "RuntimeError: something went wrong", 

71 "RuntimeError: <ERROR_MESSAGE_MASKED>", 

72 ), 

73 ( 

74 "NotAnError: this should not match", 

75 "NotAnError: this should not match", 

76 ), # No change 

77 ("SyntaxError: bad syntax", "SyntaxError: <ERROR_MESSAGE_MASKED>"), 

78 ], 

79 ), 

80 "javascript_error": ValidatedPattern( 

81 name="javascript_error", 

82 pattern=r"\b(Error|TypeError|ReferenceError): (.+)", 

83 replacement=r"\1: <JS_ERROR_MASKED>", 

84 description="Match JavaScript error patterns", 

85 test_cases=[ 

86 ("TypeError: Cannot read property", "TypeError: <JS_ERROR_MASKED>"), 

87 ("Error: Something failed", "Error: <JS_ERROR_MASKED>"), 

88 ("ReferenceError: x is not defined", "ReferenceError: <JS_ERROR_MASKED>"), 

89 ("CustomError: not matched", "CustomError: not matched"), # No change 

90 ], 

91 ), 

92 "compile_error": ValidatedPattern( 

93 name="compile_error", 

94 pattern=r"(error|Error): (.+) at line (\d+)", 

95 replacement=r"\1: <COMPILE_ERROR_MASKED> at line \3", 

96 description="Match compilation error patterns with line numbers", 

97 test_cases=[ 

98 ( 

99 "error: syntax error at line 42", 

100 "error: <COMPILE_ERROR_MASKED> at line 42", 

101 ), 

102 ( 

103 "Error: missing semicolon at line 10", 

104 "Error: <COMPILE_ERROR_MASKED> at line 10", 

105 ), 

106 ( 

107 "warning: deprecated at line 5", 

108 "warning: deprecated at line 5", 

109 ), # No change 

110 ], 

111 ), 

112 "warning_pattern": ValidatedPattern( 

113 name="warning_pattern", 

114 pattern=r"(warning|Warning): (.+)", 

115 replacement=r"\1: <WARNING_MASKED>", 

116 description="Match warning message patterns", 

117 test_cases=[ 

118 ("warning: deprecated function", "warning: <WARNING_MASKED>"), 

119 ("Warning: potential issue", "Warning: <WARNING_MASKED>"), 

120 ("info: just information", "info: just information"), # No change 

121 ], 

122 ), 

123 "assertion_error": ValidatedPattern( 

124 name="assertion_error", 

125 pattern=r"AssertionError: (.+)", 

126 replacement=r"AssertionError: <ASSERTION_MASKED>", 

127 description="Match assertion error patterns", 

128 test_cases=[ 

129 ("AssertionError: expected True", "AssertionError: <ASSERTION_MASKED>"), 

130 ( 

131 "AssertionError: values don't match", 

132 "AssertionError: <ASSERTION_MASKED>", 

133 ), 

134 ("ValueError: not assertion", "ValueError: not assertion"), # No change 

135 ], 

136 ), 

137 "import_error": ValidatedPattern( 

138 name="import_error", 

139 pattern=r"ImportError: (.+)", 

140 replacement=r"ImportError: <IMPORT_ERROR_MASKED>", 

141 description="Match import error patterns", 

142 test_cases=[ 

143 ( 

144 "ImportError: No module named 'xyz'", 

145 "ImportError: <IMPORT_ERROR_MASKED>", 

146 ), 

147 ("ImportError: cannot import name", "ImportError: <IMPORT_ERROR_MASKED>"), 

148 ( 

149 "ModuleNotFoundError: different", 

150 "ModuleNotFoundError: different", 

151 ), # No change 

152 ], 

153 ), 

154 "module_not_found": ValidatedPattern( 

155 name="module_not_found", 

156 pattern=r"ModuleNotFoundError: (.+)", 

157 replacement=r"ModuleNotFoundError: <MODULE_NOT_FOUND_MASKED>", 

158 description="Match module not found error patterns", 

159 test_cases=[ 

160 ( 

161 "ModuleNotFoundError: No module named 'test'", 

162 "ModuleNotFoundError: <MODULE_NOT_FOUND_MASKED>", 

163 ), 

164 ( 

165 "ModuleNotFoundError: missing dependency", 

166 "ModuleNotFoundError: <MODULE_NOT_FOUND_MASKED>", 

167 ), 

168 ( 

169 "ImportError: different error", 

170 "ImportError: different error", 

171 ), # No change 

172 ], 

173 ), 

174 "file_not_found": ValidatedPattern( 

175 name="file_not_found", 

176 pattern=r"FileNotFoundError: (.+)", 

177 replacement=r"FileNotFoundError: <FILE_NOT_FOUND_MASKED>", 

178 description="Match file not found error patterns", 

179 test_cases=[ 

180 ( 

181 "FileNotFoundError: [Errno 2] No such file", 

182 "FileNotFoundError: <FILE_NOT_FOUND_MASKED>", 

183 ), 

184 ( 

185 "FileNotFoundError: file missing", 

186 "FileNotFoundError: <FILE_NOT_FOUND_MASKED>", 

187 ), 

188 ("PermissionError: different", "PermissionError: different"), # No change 

189 ], 

190 ), 

191 "permission_denied": ValidatedPattern( 

192 name="permission_denied", 

193 pattern=r"PermissionError: (.+)", 

194 replacement=r"PermissionError: <PERMISSION_ERROR_MASKED>", 

195 description="Match permission error patterns", 

196 test_cases=[ 

197 ( 

198 "PermissionError: [Errno 13] Permission denied", 

199 "PermissionError: <PERMISSION_ERROR_MASKED>", 

200 ), 

201 ( 

202 "PermissionError: access denied", 

203 "PermissionError: <PERMISSION_ERROR_MASKED>", 

204 ), 

205 ( 

206 "FileNotFoundError: different", 

207 "FileNotFoundError: different", 

208 ), # No change 

209 ], 

210 ), 

211 "network_error": ValidatedPattern( 

212 name="network_error", 

213 pattern=r"(ConnectionError|TimeoutError|HTTPError): (.+)", 

214 replacement=r"\1: <NETWORK_ERROR_MASKED>", 

215 description="Match network-related error patterns", 

216 test_cases=[ 

217 ( 

218 "ConnectionError: Failed to connect", 

219 "ConnectionError: <NETWORK_ERROR_MASKED>", 

220 ), 

221 ("TimeoutError: Request timed out", "TimeoutError: <NETWORK_ERROR_MASKED>"), 

222 ("HTTPError: 404 Not Found", "HTTPError: <NETWORK_ERROR_MASKED>"), 

223 ("ValueError: not network", "ValueError: not network"), # No change 

224 ], 

225 ), 

226 # Context pattern matching (boolean search) 

227 "debugging_context": ValidatedPattern( 

228 name="debugging_context", 

229 pattern=r"\b(debug|debugging|breakpoint|pdb|print\(\))\b", 

230 replacement=r"<DEBUG_CONTEXT>", 

231 description="Match debugging-related context patterns", 

232 flags=re.IGNORECASE, 

233 test_cases=[ 

234 ("let's debug this issue", "let's <DEBUG_CONTEXT> this issue"), 

235 ("debugging the problem", "<DEBUG_CONTEXT> the problem"), 

236 ("set a breakpoint here", "set a <DEBUG_CONTEXT> here"), 

237 ("regular code", "regular code"), # No change 

238 ], 

239 ), 

240 "testing_context": ValidatedPattern( 

241 name="testing_context", 

242 pattern=r"(test|pytest|unittest|assert|mock)", 

243 replacement=r"<TEST_CONTEXT>", 

244 description="Match testing-related context patterns", 

245 flags=re.IGNORECASE, 

246 test_cases=[ 

247 ("run the test suite", "run the <TEST_CONTEXT> suite"), 

248 ("pytest configuration", "<TEST_CONTEXT> configuration"), 

249 ("unittest framework", "<TEST_CONTEXT> framework"), 

250 ("regular text", "regular text"), # No change 

251 ], 

252 ), 

253 "error_handling_context": ValidatedPattern( 

254 name="error_handling_context", 

255 pattern=r"(try|except|finally|raise|catch)", 

256 replacement=r"<ERROR_HANDLING_CONTEXT>", 

257 description="Match error handling context patterns", 

258 flags=re.IGNORECASE, 

259 test_cases=[ 

260 ("try to handle", "<ERROR_HANDLING_CONTEXT> to handle"), 

261 ("except ValueError", "<ERROR_HANDLING_CONTEXT> ValueError"), 

262 ("finally block", "<ERROR_HANDLING_CONTEXT> block"), 

263 ("normal flow", "normal flow"), # No change 

264 ], 

265 ), 

266 "performance_context": ValidatedPattern( 

267 name="performance_context", 

268 pattern=r"(slow|performance|benchmark|optimize|profil)", 

269 replacement=r"<PERFORMANCE_CONTEXT>", 

270 description="Match performance-related context patterns", 

271 flags=re.IGNORECASE, 

272 test_cases=[ 

273 ("this is slow", "this is <PERFORMANCE_CONTEXT>"), 

274 ("performance improvement", "<PERFORMANCE_CONTEXT> improvement"), 

275 ("benchmark results", "<PERFORMANCE_CONTEXT> results"), 

276 ("fast code", "fast code"), # No change 

277 ], 

278 ), 

279 "security_context": ValidatedPattern( 

280 name="security_context", 

281 pattern=r"(security|authentication|authorization|token|password)", 

282 replacement=r"<SECURITY_CONTEXT>", 

283 description="Match security-related context patterns", 

284 flags=re.IGNORECASE, 

285 test_cases=[ 

286 ("security audit", "<SECURITY_CONTEXT> audit"), 

287 ("authentication required", "<SECURITY_CONTEXT> required"), 

288 ("token validation", "<SECURITY_CONTEXT> validation"), 

289 ("regular text", "regular text"), # No change 

290 ], 

291 ), 

292 # Time parsing patterns for search_enhanced.py 

293 "time_ago_pattern": ValidatedPattern( 

294 name="time_ago_pattern", 

295 pattern=r"(\d+)\s+(minute|hour|day|week|month|year)s?\s+ago", 

296 replacement=r"\1 \2 ago", 

297 description="Match time ago expressions for parsing", 

298 test_cases=[ 

299 ("5 minutes ago", "5 minute ago"), 

300 ("2 hours ago", "2 hour ago"), 

301 ("1 day ago", "1 day ago"), 

302 ("3 weeks ago", "3 week ago"), 

303 ("not a time", "not a time"), # No change 

304 ], 

305 ), 

306 "relative_time_pattern": ValidatedPattern( 

307 name="relative_time_pattern", 

308 pattern=r"(today|yesterday|this\s+week|last\s+week|this\s+month|last\s+month)", 

309 replacement=r"<RELATIVE_TIME>", 

310 description="Match relative time expressions", 

311 flags=re.IGNORECASE, 

312 test_cases=[ 

313 ("today is good", "<RELATIVE_TIME> is good"), 

314 ("yesterday we worked", "<RELATIVE_TIME> we worked"), 

315 ("this week's plan", "<RELATIVE_TIME>'s plan"), 

316 ("some other day", "some other day"), # No change 

317 ], 

318 ), 

319 "since_time_pattern": ValidatedPattern( 

320 name="since_time_pattern", 

321 pattern=r"since\s+(today|yesterday|this\s+week|last\s+week)", 

322 replacement=r"since <TIME_REFERENCE>", 

323 description="Match 'since' time expressions", 

324 flags=re.IGNORECASE, 

325 test_cases=[ 

326 ("since today", "since <TIME_REFERENCE>"), 

327 ("since yesterday", "since <TIME_REFERENCE>"), 

328 ("since this week", "since <TIME_REFERENCE>"), 

329 ("since forever", "since forever"), # No change 

330 ], 

331 ), 

332 "last_duration_pattern": ValidatedPattern( 

333 name="last_duration_pattern", 

334 pattern=r"in\s+the\s+last\s+(\d+)\s+(minute|hour|day|week|month|year)s?", 

335 replacement=r"in the last \1 \2", 

336 description="Match 'in the last X units' patterns", 

337 test_cases=[ 

338 ("in the last 5 minutes", "in the last 5 minute"), 

339 ("in the last 2 hours", "in the last 2 hour"), 

340 ("in the last 10 days", "in the last 10 day"), 

341 ("not a duration", "not a duration"), # No change 

342 ], 

343 ), 

344 "iso_date_pattern": ValidatedPattern( 

345 name="iso_date_pattern", 

346 pattern=r"(\d{4}-(0[1-9]|1[0-2])-(0[1-9]|[12]\d|3[01]))", 

347 replacement=r"<ISO_DATE>", 

348 description="Match valid ISO date format (YYYY-MM-DD)", 

349 test_cases=[ 

350 ("2023-12-25", "<ISO_DATE>"), 

351 ("Date: 2024-01-15 is today", "Date: <ISO_DATE> is today"), 

352 ("not-a-date", "not-a-date"), # No change 

353 ( 

354 "2023-13-45", 

355 "2023-13-45", 

356 ), # Invalid date, no change expected 

357 ], 

358 ), 

359 "us_date_pattern": ValidatedPattern( 

360 name="us_date_pattern", 

361 pattern=r"(\d{1,2}/\d{1,2}/\d{4})", 

362 replacement=r"<US_DATE>", 

363 description="Match US date format (MM/DD/YYYY or M/D/YYYY)", 

364 test_cases=[ 

365 ("12/25/2023", "<US_DATE>"), 

366 ("1/5/2024", "<US_DATE>"), 

367 ("Meeting on 3/15/2024 at noon", "Meeting on <US_DATE> at noon"), 

368 ("not/a/date", "not/a/date"), # No change 

369 ], 

370 ), 

371 # Crackerjack integration patterns for output parsing 

372 "pytest_result": ValidatedPattern( 

373 name="pytest_result", 

374 pattern=r"(\w+\.py)::\s*(\w+)\s*(PASSED|FAILED|SKIPPED|ERROR|XFAIL|XPASS)\s*(?:\[(\d+%)\])?\s*(?:\((.+)\))?", 

375 replacement=r"TEST: \1::\2 -> \3", 

376 description="Parse pytest test results with optional percentage and timing", 

377 test_cases=[ 

378 ( 

379 "test_file.py:: test_function PASSED", 

380 "TEST: test_file.py::test_function -> PASSED", 

381 ), 

382 ( 

383 "test_example.py:: test_method FAILED [50%] (0.05s)", 

384 "TEST: test_example.py::test_method -> FAILED", 

385 ), 

386 ( 

387 "test_skip.py:: test_skip SKIPPED", 

388 "TEST: test_skip.py::test_skip -> SKIPPED", 

389 ), 

390 ("not a test result", "not a test result"), # No change 

391 ], 

392 ), 

393 "coverage_summary": ValidatedPattern( 

394 name="coverage_summary", 

395 pattern=r"TOTAL\s+\d+\s+\d+\s+(\d+)%", 

396 replacement=r"COVERAGE: \1%", 

397 description="Extract total coverage percentage from coverage reports", 

398 test_cases=[ 

399 ("TOTAL 1000 50 95%", "COVERAGE: 95%"), 

400 ("TOTAL 500 25 78%", "COVERAGE: 78%"), 

401 ("subtotal 100 5 90%", "subtotal 100 5 90%"), # No change 

402 ], 

403 ), 

404 "ruff_error": ValidatedPattern( 

405 name="ruff_error", 

406 pattern=r"([^:\s]+):(\d+):(\d+):\s*([A-Z]\d{3,4})\s*(.+)", 

407 replacement=r"RUFF: \1 line \2 -> \4: \5", 

408 description="Parse Ruff linting errors with file, line, column, code, and message", 

409 test_cases=[ 

410 ( 

411 "src/main.py:42:10: E501 line too long (88 > 79 characters)", 

412 "RUFF: src/main.py line 42 -> E501: line too long (88 > 79 characters)", 

413 ), 

414 ( 

415 "test.py:1:1: F401 imported but unused", 

416 "RUFF: test.py line 1 -> F401: imported but unused", 

417 ), 

418 ("not a ruff error", "not a ruff error"), # No change 

419 ], 

420 ), 

421 "mypy_error": ValidatedPattern( 

422 name="mypy_error", 

423 pattern=r"([^:\s]+):(\d+):\s*error:\s*(.+)", 

424 replacement=r"MYPY: \1 line \2 -> \3", 

425 description="Parse mypy type checking errors with file, line, and message", 

426 test_cases=[ 

427 ( 

428 "src/module.py:15: error: Argument 1 has incompatible type", 

429 "MYPY: src/module.py line 15 -> Argument 1 has incompatible type", 

430 ), 

431 ( 

432 "main.py:8: error: Name 'x' is not defined", 

433 "MYPY: main.py line 8 -> Name 'x' is not defined", 

434 ), 

435 ("not a mypy error", "not a mypy error"), # No change 

436 ], 

437 ), 

438 "bandit_finding": ValidatedPattern( 

439 name="bandit_finding", 

440 pattern=r">> Issue: \[([A-Z]\d+):([a-z_]+)\]\s*(.+)", 

441 replacement=r"BANDIT: \1 (\2) -> \3", 

442 description="Parse Bandit security findings with code, severity, and description", 

443 test_cases=[ 

444 ( 

445 ">> Issue: [B602:subprocess_popen_with_shell_equals_true] Possible shell injection", 

446 "BANDIT: B602 (subprocess_popen_with_shell_equals_true) -> Possible shell injection", 

447 ), 

448 ( 

449 ">> Issue: [B108:hardcoded_tmp_directory] Use of insecure temp", 

450 "BANDIT: B108 (hardcoded_tmp_directory) -> Use of insecure temp", 

451 ), 

452 ("not a bandit finding", "not a bandit finding"), # No change 

453 ], 

454 ), 

455 "quality_score": ValidatedPattern( 

456 name="quality_score", 

457 pattern=r"Quality Score:\s*(\d+(?:\.\d+)?)/(\d+(?:\.\d+)?)\s*\((\d+(?:\.\d+)?)%\)", 

458 replacement=r"QUALITY: \3% (\1/\2)", 

459 description="Parse quality score with percentage calculation", 

460 test_cases=[ 

461 ("Quality Score: 85.5/100 (85.5%)", "QUALITY: 85.5% (85.5/100)"), 

462 ("Quality Score: 90/100 (90%)", "QUALITY: 90% (90/100)"), 

463 ("Final Score: 95/100", "Final Score: 95/100"), # No change 

464 ], 

465 ), 

466 "execution_time": ValidatedPattern( 

467 name="execution_time", 

468 pattern=r"(\d+(?:\.\d+)?)\s*(s|ms|seconds?|milliseconds?)", 

469 replacement=r"\1\2", 

470 description="Normalize execution time formats", 

471 test_cases=[ 

472 ("2.5 seconds", "2.5seconds"), 

473 ("150 ms", "150ms"), 

474 ("0.05 s", "0.05s"), 

475 ("fast enough", "fast enough"), # No change 

476 ], 

477 ), 

478 "progress_indicator": ValidatedPattern( 

479 name="progress_indicator", 

480 pattern=r"\[([=>\s]*)\]\s*(\d+)%", 

481 replacement=r"PROGRESS: \2%", 

482 description="Parse progress bars with percentage", 

483 test_cases=[ 

484 ("[=====> ] 50%", "PROGRESS: 50%"), 

485 ("[=========>] 90%", "PROGRESS: 90%"), 

486 ("[ ] 0%", "PROGRESS: 0%"), 

487 ("not progress", "not progress"), # No change 

488 ], 

489 ), 

490 "git_commit_hash": ValidatedPattern( 

491 name="git_commit_hash", 

492 pattern=r"\b([a-f0-9]{7,40})\b", 

493 replacement=r"<COMMIT:\1>", 

494 description="Match Git commit hashes (7-40 hex characters)", 

495 test_cases=[ 

496 ("commit abc1234 was merged", "commit <COMMIT:abc1234> was merged"), 

497 ( 

498 "long hash abcdef1234567890abcdef1234567890abcdef12", 

499 "long hash <COMMIT:abcdef1234567890abcdef1234567890abcdef12>", 

500 ), 

501 ("short ab12", "short ab12"), # Too short 

502 ("not hex ghi1234", "not hex ghi1234"), # Contains non-hex 

503 ], 

504 ), 

505 "file_path_with_line": ValidatedPattern( 

506 name="file_path_with_line", 

507 pattern=r"([A-Za-z_][A-Za-z0-9_/.-]*\.py):(\d+)", 

508 replacement=r"\1 line \2", 

509 description="Match file paths with line numbers (file.py:123)", 

510 test_cases=[ 

511 ("src/main.py:42", "src/main.py line 42"), 

512 ("test_file.py:1", "test_file.py line 1"), 

513 ("sub/dir/module.py:100", "sub/dir/module.py line 100"), 

514 ("not-python.txt:50", "not-python.txt:50"), # No change 

515 ], 

516 ), 

517 # Memory optimizer patterns 

518 "sentence_split": ValidatedPattern( 

519 name="sentence_split", 

520 pattern=r"[.!\?]+", 

521 replacement=r" ", 

522 description="Replace sentence-ending punctuation with spaces for splitting", 

523 test_cases=[ 

524 ("Hello world.", "Hello world "), 

525 ("How are you?", "How are you "), 

526 ("Multiple!!!", "Multiple "), 

527 ], 

528 ), 

529 "code_block_cleanup": ValidatedPattern( 

530 name="code_block_cleanup", 

531 pattern=r"```.*?```", 

532 replacement=r"", 

533 description="Remove code blocks from text for keyword extraction", 

534 flags=re.DOTALL, 

535 test_cases=[ 

536 ("Text ```python\ncode\n``` more text", "Text more text"), 

537 ("```\njust code\n```", ""), 

538 ("no code blocks", "no code blocks"), 

539 ], 

540 ), 

541 "inline_code_cleanup": ValidatedPattern( 

542 name="inline_code_cleanup", 

543 pattern=r"`[^`]+`", 

544 replacement=r"", 

545 description="Remove inline code from text", 

546 test_cases=[ 

547 ("Use `function()` to call", "Use to call"), 

548 ("No inline code", "No inline code"), 

549 ("`code`", ""), 

550 ], 

551 ), 

552 "word_extraction": ValidatedPattern( 

553 name="word_extraction", 

554 pattern=r"\b[a-zA-Z]{1,2}\b", 

555 replacement=r"", 

556 description="Remove short words (1-2 chars) for keyword analysis", 

557 test_cases=[ 

558 ("short words", "short words"), 

559 ("a test", " test"), 

560 ("123 in 456", "123 456"), 

561 ], 

562 ), 

563 "word_boundary": ValidatedPattern( 

564 name="word_boundary", 

565 pattern=r"[^\w\s]+", 

566 replacement=r" ", 

567 description="Replace non-word characters with spaces for word boundary detection", 

568 test_cases=[ 

569 ("hello-world", "hello world"), 

570 ("test@example", "test example"), 

571 ("underscored_var", "underscored_var"), 

572 ], 

573 ), 

574 # File extension patterns for memory optimizer 

575 "python_files": ValidatedPattern( 

576 name="python_files", 

577 pattern=r"(\w+\.py)", 

578 replacement=r"[\1]", 

579 description="Wrap Python file references in brackets", 

580 test_cases=[ 

581 ("main.py script", "[main.py] script"), 

582 ("test_file.py found", "[test_file.py] found"), 

583 ("no files here", "no files here"), 

584 ], 

585 ), 

586 "javascript_files": ValidatedPattern( 

587 name="javascript_files", 

588 pattern=r"(\w+\.js)", 

589 replacement=r"[\1]", 

590 description="Wrap JavaScript file references in brackets", 

591 test_cases=[ 

592 ("app.js file", "[app.js] file"), 

593 ("script.js found", "[script.js] found"), 

594 ("no files", "no files"), 

595 ], 

596 ), 

597 "typescript_files": ValidatedPattern( 

598 name="typescript_files", 

599 pattern=r"(\w+\.ts)", 

600 replacement=r"[\1]", 

601 description="Wrap TypeScript file references in brackets", 

602 test_cases=[ 

603 ("index.ts file", "[index.ts] file"), 

604 ("component.ts interface", "[component.ts] interface"), 

605 ("other files", "other files"), 

606 ], 

607 ), 

608 "json_files": ValidatedPattern( 

609 name="json_files", 

610 pattern=r"(\w+\.json)", 

611 replacement=r"[\1]", 

612 description="Match JSON file references", 

613 test_cases=[ 

614 ("config.json settings", "[config.json] settings"), 

615 ("package.json file", "[package.json] file"), 

616 ("no json", "no json"), 

617 ], 

618 ), 

619 "markdown_files": ValidatedPattern( 

620 name="markdown_files", 

621 pattern=r"(\w+\.md)", 

622 replacement=r"[\1]", 

623 description="Match Markdown file references", 

624 test_cases=[ 

625 ("README.md documentation", "[README.md] documentation"), 

626 ("docs.md file", "[docs.md] file"), 

627 ("no markdown", "no markdown"), 

628 ], 

629 ), 

630 # Advanced search patterns 

631 "function_definition": ValidatedPattern( 

632 name="function_definition", 

633 pattern=r"\bdef\s+(\w+)", 

634 replacement=r"function:\1", 

635 description="Extract Python function definitions", 

636 test_cases=[ 

637 ("def main():", "function:main():"), 

638 ("def get_data(param):", "function:get_data(param):"), 

639 ("no functions here", "no functions here"), 

640 ], 

641 ), 

642 "class_definition": ValidatedPattern( 

643 name="class_definition", 

644 pattern=r"\bclass\s+(\w+)", 

645 replacement=r"class:\1", 

646 description="Extract Python class definitions", 

647 test_cases=[ 

648 ("class MyClass:", "class:MyClass:"), 

649 ("class SearchEngine(Base):", "class:SearchEngine(Base):"), 

650 ("no classes here", "no classes here"), 

651 ], 

652 ), 

653 "file_extension": ValidatedPattern( 

654 name="file_extension", 

655 pattern=r"\.(\w{2,4})\b", 

656 replacement=r"filetype:\1", 

657 description="Extract file extensions for categorization", 

658 test_cases=[ 

659 ("file.py and test.json", "filefiletype:py and test.json"), 

660 ("config.yaml setup", "configfiletype:yaml setup"), 

661 ("no extensions", "no extensions"), 

662 ], 

663 ), 

664 # Language detection patterns 

665 "python_code": ValidatedPattern( 

666 name="python_code", 

667 pattern=r"\b(def|class|import|from|if __name__|async|await|yield)\b", 

668 replacement=r"python", 

669 description="Detect Python code patterns", 

670 flags=re.IGNORECASE, 

671 test_cases=[ 

672 ("def function():", "python function():"), 

673 ("import os", "python os"), 

674 ("if __name__ == '__main__'", "python == '__main__'"), 

675 ("regular text", "regular text"), 

676 ], 

677 ), 

678 "javascript_code": ValidatedPattern( 

679 name="javascript_code", 

680 pattern=r"\b(function|var|let|const|=>|require|module\.exports|console\.log)\b", 

681 replacement=r"javascript", 

682 description="Detect JavaScript code patterns", 

683 flags=re.IGNORECASE, 

684 test_cases=[ 

685 ("function myFunc()", "javascript myFunc()"), 

686 ("const data = []", "javascript data = []"), 

687 ("console.log('hello')", "javascript('hello')"), 

688 ("regular text", "regular text"), 

689 ], 

690 ), 

691 "sql_code": ValidatedPattern( 

692 name="sql_code", 

693 pattern=r"\b(SELECT|FROM|WHERE|JOIN|INSERT|UPDATE|DELETE|CREATE|TABLE)\b", 

694 replacement=r"sql", 

695 description="Detect SQL code patterns", 

696 flags=re.IGNORECASE, 

697 test_cases=[ 

698 ("SELECT * FROM users", "sql * FROM users"), 

699 ("INSERT INTO table", "sql INTO table"), 

700 ("CREATE TABLE test", "sql TABLE test"), 

701 ("regular text", "regular text"), 

702 ], 

703 ), 

704 "error_keywords": ValidatedPattern( 

705 name="error_keywords", 

706 pattern=r"\b(Error|Exception|Traceback|Failed|TypeError|ValueError)\b", 

707 replacement=r"error", 

708 description="Detect error-related keywords", 

709 flags=re.IGNORECASE, 

710 test_cases=[ 

711 ("ValueError occurred", "error occurred"), 

712 ("Exception raised", "error raised"), 

713 ("Traceback found", "error found"), 

714 ("regular text", "regular text"), 

715 ], 

716 ), 

717 # Crackerjack-specific patterns for tools integration 

718 "crackerjack_command": ValidatedPattern( 

719 name="crackerjack_command", 

720 pattern=r"crackerjack\s+(\w+)", 

721 replacement=r"[\1]", 

722 description="Extract command from crackerjack execution logs", 

723 flags=0, 

724 test_cases=[ 

725 ("crackerjack lint", "[lint]"), 

726 ("running crackerjack test now", "running [test] now"), 

727 ("crackerjack analyze completed", "[analyze] completed"), 

728 ("just crackerjack", "just crackerjack"), # No change - no command 

729 ], 

730 ), 

731 # Git and system security patterns 

732 "safe_branch_name": ValidatedPattern( 

733 name="safe_branch_name", 

734 pattern=r"^[a-zA-Z0-9_/-]+$", 

735 replacement=r"\g<0>", 

736 description="Validate safe Git branch names (alphanumeric, dashes, underscores, slashes)", 

737 flags=0, 

738 test_cases=[ 

739 ("main", "main"), 

740 ("feature/new-feature", "feature/new-feature"), 

741 ("dev_branch", "dev_branch"), 

742 ("hotfix-123", "hotfix-123"), 

743 ("origin/main", "origin/main"), 

744 ("invalid;branch", "invalid;branch"), # No match expected for invalid chars 

745 ("branch with spaces", "branch with spaces"), # No match expected 

746 ], 

747 ), 

748 # Token optimization patterns 

749 "whitespace_normalize": ValidatedPattern( 

750 name="whitespace_normalize", 

751 pattern=r"\s+", 

752 replacement=r" ", 

753 description="Normalize whitespace for content hashing", 

754 flags=0, 

755 global_replace=True, 

756 test_cases=[ 

757 (" multiple spaces ", " multiple spaces "), 

758 ("tabs\t\tand\nnewlines\n\n", "tabs and newlines "), 

759 ("normal text", "normal text"), 

760 ], 

761 ), 

762}