Coverage for src/dataknobs_data/vector/stores/factory.py: 31%
29 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 14:14 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 14:14 -0600
1"""Factory for creating vector store backends."""
3import logging
4from typing import Any
6from dataknobs_config import FactoryBase
8from .base import VectorStore
10logger = logging.getLogger(__name__)
13class VectorStoreFactory(FactoryBase):
14 """Factory for creating vector store backends dynamically.
16 This factory allows creating different vector store implementations
17 based on configuration, supporting specialized vector databases.
19 Configuration Options:
20 backend (str): Backend type (faiss, chroma, memory)
21 dimensions (int): Vector dimensions (required for some backends)
22 **kwargs: Backend-specific configuration options
24 Example Configuration:
25 vector_stores:
26 - name: main_vectors
27 factory: vector_store
28 backend: faiss
29 dimensions: 768
30 index_type: ivfflat
31 persist_path: ./vectors/main
33 - name: doc_search
34 factory: vector_store
35 backend: chroma
36 collection_name: documents
37 persist_path: ./chroma_db
38 """
40 def create(self, **config) -> VectorStore:
41 """Create a vector store instance based on configuration.
43 Args:
44 **config: Configuration including 'backend' field and backend-specific options
46 Returns:
47 Instance of appropriate vector store backend
49 Raises:
50 ValueError: If backend type is not recognized or not available
51 """
52 backend_type = config.pop("backend", "memory").lower()
54 logger.info(f"Creating vector store with backend: {backend_type}")
56 if backend_type == "memory":
57 # Simple in-memory implementation
58 from .memory import MemoryVectorStore
59 return MemoryVectorStore(config)
61 elif backend_type == "faiss":
62 try:
63 from .faiss import FaissVectorStore
64 return FaissVectorStore(config)
65 except ImportError as e:
66 raise ValueError(
67 "Faiss backend requires faiss-cpu. "
68 "Install with: pip install faiss-cpu"
69 ) from e
71 elif backend_type in ("chroma", "chromadb"):
72 try:
73 from .chroma import ChromaVectorStore
74 return ChromaVectorStore(config)
75 except ImportError as e:
76 raise ValueError(
77 "Chroma backend requires chromadb. "
78 "Install with: pip install chromadb"
79 ) from e
81 else:
82 raise ValueError(
83 f"Unknown backend type: {backend_type}. "
84 f"Available backends: memory, faiss, chroma"
85 )
87 def get_backend_info(self, backend_type: str) -> dict[str, Any]:
88 """Get information about a specific backend.
90 Args:
91 backend_type: Name of the backend
93 Returns:
94 Dictionary with backend information
95 """
96 info = {
97 "memory": {
98 "description": "In-memory vector storage for testing",
99 "persistent": False,
100 "requires_install": False,
101 "config_options": {
102 "dimensions": "Vector dimensions (required)",
103 "metric": "Distance metric: cosine, euclidean, dot_product",
104 }
105 },
106 "faiss": {
107 "description": "Facebook AI Similarity Search - efficient vector search",
108 "persistent": True,
109 "requires_install": "pip install faiss-cpu",
110 "config_options": {
111 "dimensions": "Vector dimensions (required)",
112 "metric": "Distance metric: cosine, euclidean, dot_product",
113 "index_type": "Index type: flat, ivfflat, hnsw, auto",
114 "persist_path": "Path to save/load index",
115 "nlist": "Number of clusters for IVF index",
116 "m": "Number of connections for HNSW",
117 }
118 },
119 "chroma": {
120 "description": "ChromaDB - AI-native vector database",
121 "persistent": True,
122 "requires_install": "pip install chromadb",
123 "config_options": {
124 "collection_name": "Name of the collection",
125 "persist_path": "Path for persistent storage",
126 "embedding_function": "Embedding function name or object",
127 "metric": "Distance metric: cosine, euclidean, dot_product",
128 }
129 },
130 }
132 return info.get(backend_type.lower(), {
133 "description": "Unknown backend",
134 "error": f"Backend '{backend_type}' not recognized"
135 })
138# Create singleton instance for registration
139vector_store_factory = VectorStoreFactory()