Coverage for src \ truenex_memory \ core \ indexer.py: 84%

31 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 10:21 +0200

1"""Local file indexing.""" 

2 

3from __future__ import annotations 

4 

5from pathlib import Path 

6 

7from truenex_memory.core.chunker import chunk_text 

8from truenex_memory.store.repository import MemoryRepository 

9 

10 

11INDEX_EXTENSIONS = {".md", ".markdown", ".txt", ".py", ".toml", ".yaml", ".yml", ".json"} 

12EXCLUDED_DIRS = {".git", ".venv", "venv", "__pycache__", ".pytest_cache", ".truenex-memory"} 

13 

14 

15def index_path(path: Path, *, project_root: Path, repository: MemoryRepository) -> int: 

16 """Index supported files under a path into the local SQLite store.""" 

17 

18 target = path.resolve() 

19 files = [target] if target.is_file() else list(_iter_indexable_files(target)) 

20 indexed = 0 

21 for file_path in files: 

22 if file_path.suffix.lower() not in INDEX_EXTENSIONS: 

23 continue 

24 text = file_path.read_text(encoding="utf-8", errors="replace") 

25 chunks = chunk_text(text) 

26 if not chunks: 

27 continue 

28 try: 

29 relative_path = str(file_path.resolve().relative_to(project_root.resolve())) 

30 except ValueError: 

31 relative_path = str(file_path.resolve()) 

32 repository.upsert_document(file_path, relative_path, chunks) 

33 indexed += 1 

34 return indexed 

35 

36 

37def _iter_indexable_files(root: Path): 

38 for path in root.rglob("*"): 

39 if path.is_dir(): 

40 continue 

41 if any(part in EXCLUDED_DIRS for part in path.parts): 

42 continue 

43 yield path