Coverage for src \ truenex_memory \ retrieval \ semantic.py: 67%
48 statements
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
« prev ^ index » next coverage.py v7.14.0, created at 2026-05-19 10:21 +0200
1"""Local semantic retrieval primitives."""
3from __future__ import annotations
5from dataclasses import dataclass
6import hashlib
7import math
8import uuid
9from typing import Protocol
11from truenex_memory.core.embedder import HashingEmbedder
14@dataclass(frozen=True)
15class VectorPoint:
16 """A chunk embedding ready for vector-store upsert."""
18 point_id: str
19 vector: list[float]
20 payload: dict[str, object]
23@dataclass(frozen=True)
24class VectorMatch:
25 """A vector-store match returned by semantic search."""
27 point_id: str
28 score: float
31class Embedder(Protocol):
32 """Minimal embedding interface used by local semantic retrieval."""
34 @property
35 def model_name(self) -> str:
36 """Return the model/backend name stored with persisted vectors."""
38 def embed(self, text: str) -> list[float]:
39 """Return an embedding for text."""
42class VectorStore(Protocol):
43 """Minimal vector store interface used by the repository."""
45 def upsert(self, points: list[VectorPoint]) -> None:
46 """Store or replace vector points."""
48 def search(self, vector: list[float], *, top_k: int) -> list[VectorMatch]:
49 """Return nearest points for a query vector."""
52class InMemoryVectorStore:
53 """Small deterministic vector store for local tests."""
55 def __init__(self) -> None:
56 self.points: dict[str, VectorPoint] = {}
58 def upsert(self, points: list[VectorPoint]) -> None:
59 for point in points:
60 self.points[point.point_id] = point
62 def search(self, vector: list[float], *, top_k: int) -> list[VectorMatch]:
63 if top_k < 1:
64 raise ValueError("top_k must be greater than zero")
65 matches = [
66 VectorMatch(point_id=point.point_id, score=round(_cosine(vector, point.vector), 4))
67 for point in self.points.values()
68 ]
69 matches = [match for match in matches if match.score > 0]
70 matches.sort(key=lambda item: item.score, reverse=True)
71 return matches[:top_k]
74def chunk_point_id(chunk_id: str) -> str:
75 """Return a stable Qdrant-compatible point id for an indexed chunk."""
77 digest = hashlib.sha256(chunk_id.encode("utf-8")).hexdigest()[:32]
78 return str(uuid.UUID(hex=digest))
81def _normalize(vector: list[float]) -> list[float]:
82 magnitude = math.sqrt(sum(value * value for value in vector))
83 if magnitude == 0:
84 return vector
85 return [value / magnitude for value in vector]
88def _cosine(left: list[float], right: list[float]) -> float:
89 if len(left) != len(right):
90 return 0.0
91 return sum(a * b for a, b in zip(left, right, strict=True))