Coverage for src / dataknobs_data / vector / stores / factory.py: 32%
31 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-26 15:45 -0700
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-26 15:45 -0700
1"""Factory for creating vector store backends."""
3import logging
4from typing import Any
6from dataknobs_config import FactoryBase
8from . import vector_backends
9from .base import VectorStore
11logger = logging.getLogger(__name__)
14class VectorStoreFactory(FactoryBase):
15 """Factory for creating vector store backends dynamically.
17 This factory allows creating different vector store implementations
18 based on configuration, supporting specialized vector databases.
20 Configuration Options:
21 backend (str): Backend type (faiss, chroma, memory)
22 dimensions (int): Vector dimensions (required for some backends)
23 **kwargs: Backend-specific configuration options
25 Example Configuration:
26 vector_stores:
27 - name: main_vectors
28 factory: vector_store
29 backend: faiss
30 dimensions: 768
31 index_type: ivfflat
32 persist_path: ./vectors/main
34 - name: doc_search
35 factory: vector_store
36 backend: chroma
37 collection_name: documents
38 persist_path: ./chroma_db
39 """
41 def create(self, **config: Any) -> VectorStore:
42 """Create a vector store instance based on configuration.
44 Args:
45 **config: Configuration including 'backend' field and backend-specific options
47 Returns:
48 Instance of appropriate vector store backend
50 Raises:
51 ValueError: If backend type is not recognized or not available
52 """
53 backend_type = config.pop("backend", "memory").lower()
55 logger.info(f"Creating vector store with backend: {backend_type}")
57 # Get backend class from registry
58 try:
59 backend_class = vector_backends.get(backend_type)
60 except Exception as e:
61 # Backend not found - provide helpful error message
62 available = vector_backends.list_keys()
63 raise ValueError(
64 f"Unknown backend type: {backend_type}. "
65 f"Available backends: {', '.join(sorted(set(available)))}"
66 ) from e
68 # Create and return backend instance
69 try:
70 return backend_class(config)
71 except ImportError as e:
72 # Convert ImportError to ValueError with expected format
73 # Extract package name from "pip install X" in error message
74 import re
75 match = re.search(r'pip install ([\w-]+)', str(e))
76 if match:
77 package = match.group(1)
78 raise ValueError(
79 f"{backend_type.capitalize()} backend requires {package}"
80 ) from e
81 else:
82 # Fallback if pattern doesn't match
83 raise ValueError(
84 f"Backend '{backend_type}' has missing dependencies"
85 ) from e
87 def get_backend_info(self, backend_type: str) -> dict[str, Any]:
88 """Get information about a specific backend.
90 Args:
91 backend_type: Name of the backend
93 Returns:
94 Dictionary with backend information from registry metadata
95 """
96 # Normalize to lowercase for case-insensitive lookup
97 backend_type = backend_type.lower()
99 # Check if backend exists first
100 if not vector_backends.has(backend_type):
101 return {
102 "description": "Unknown backend",
103 "error": f"Backend '{backend_type}' not recognized",
104 }
106 # Get metadata from registry
107 metrics = vector_backends.get_metrics(backend_type)
108 return metrics.get("metadata", {})
111# Create singleton instance for registration
112vector_store_factory = VectorStoreFactory()