Coverage for src \ truenex_memory \ core \ indexer.py: 84%
31 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
1"""Local file indexing."""
3from __future__ import annotations
5from pathlib import Path
7from truenex_memory.core.chunker import chunk_text
8from truenex_memory.store.repository import MemoryRepository
11INDEX_EXTENSIONS = {".md", ".markdown", ".txt", ".py", ".toml", ".yaml", ".yml", ".json"}
12EXCLUDED_DIRS = {".git", ".venv", "venv", "__pycache__", ".pytest_cache", ".truenex-memory"}
15def index_path(path: Path, *, project_root: Path, repository: MemoryRepository) -> int:
16 """Index supported files under a path into the local SQLite store."""
18 target = path.resolve()
19 files = [target] if target.is_file() else list(_iter_indexable_files(target))
20 indexed = 0
21 for file_path in files:
22 if file_path.suffix.lower() not in INDEX_EXTENSIONS:
23 continue
24 text = file_path.read_text(encoding="utf-8", errors="replace")
25 chunks = chunk_text(text)
26 if not chunks:
27 continue
28 try:
29 relative_path = str(file_path.resolve().relative_to(project_root.resolve()))
30 except ValueError:
31 relative_path = str(file_path.resolve())
32 repository.upsert_document(file_path, relative_path, chunks)
33 indexed += 1
34 return indexed
37def _iter_indexable_files(root: Path):
38 for path in root.rglob("*"):
39 if path.is_dir():
40 continue
41 if any(part in EXCLUDED_DIRS for part in path.parts):
42 continue
43 yield path