Coverage for src \ truenex_memory \ retrieval \ semantic.py: 67%

48 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-19 10:21 +0200

1"""Local semantic retrieval primitives.""" 

2 

3from __future__ import annotations 

4 

5from dataclasses import dataclass 

6import hashlib 

7import math 

8import uuid 

9from typing import Protocol 

10 

11from truenex_memory.core.embedder import HashingEmbedder 

12 

13 

14@dataclass(frozen=True) 

15class VectorPoint: 

16 """A chunk embedding ready for vector-store upsert.""" 

17 

18 point_id: str 

19 vector: list[float] 

20 payload: dict[str, object] 

21 

22 

23@dataclass(frozen=True) 

24class VectorMatch: 

25 """A vector-store match returned by semantic search.""" 

26 

27 point_id: str 

28 score: float 

29 

30 

31class Embedder(Protocol): 

32 """Minimal embedding interface used by local semantic retrieval.""" 

33 

34 @property 

35 def model_name(self) -> str: 

36 """Return the model/backend name stored with persisted vectors.""" 

37 

38 def embed(self, text: str) -> list[float]: 

39 """Return an embedding for text.""" 

40 

41 

42class VectorStore(Protocol): 

43 """Minimal vector store interface used by the repository.""" 

44 

45 def upsert(self, points: list[VectorPoint]) -> None: 

46 """Store or replace vector points.""" 

47 

48 def search(self, vector: list[float], *, top_k: int) -> list[VectorMatch]: 

49 """Return nearest points for a query vector.""" 

50 

51 

52class InMemoryVectorStore: 

53 """Small deterministic vector store for local tests.""" 

54 

55 def __init__(self) -> None: 

56 self.points: dict[str, VectorPoint] = {} 

57 

58 def upsert(self, points: list[VectorPoint]) -> None: 

59 for point in points: 

60 self.points[point.point_id] = point 

61 

62 def search(self, vector: list[float], *, top_k: int) -> list[VectorMatch]: 

63 if top_k < 1: 

64 raise ValueError("top_k must be greater than zero") 

65 matches = [ 

66 VectorMatch(point_id=point.point_id, score=round(_cosine(vector, point.vector), 4)) 

67 for point in self.points.values() 

68 ] 

69 matches = [match for match in matches if match.score > 0] 

70 matches.sort(key=lambda item: item.score, reverse=True) 

71 return matches[:top_k] 

72 

73 

74def chunk_point_id(chunk_id: str) -> str: 

75 """Return a stable Qdrant-compatible point id for an indexed chunk.""" 

76 

77 digest = hashlib.sha256(chunk_id.encode("utf-8")).hexdigest()[:32] 

78 return str(uuid.UUID(hex=digest)) 

79 

80 

81def _normalize(vector: list[float]) -> list[float]: 

82 magnitude = math.sqrt(sum(value * value for value in vector)) 

83 if magnitude == 0: 

84 return vector 

85 return [value / magnitude for value in vector] 

86 

87 

88def _cosine(left: list[float], right: list[float]) -> float: 

89 if len(left) != len(right): 

90 return 0.0 

91 return sum(a * b for a, b in zip(left, right, strict=True))