Coverage for src/dataknobs_data/vector/stores/factory.py: 31%

29 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-29 14:14 -0600

1"""Factory for creating vector store backends.""" 

2 

3import logging 

4from typing import Any 

5 

6from dataknobs_config import FactoryBase 

7 

8from .base import VectorStore 

9 

10logger = logging.getLogger(__name__) 

11 

12 

13class VectorStoreFactory(FactoryBase): 

14 """Factory for creating vector store backends dynamically. 

15  

16 This factory allows creating different vector store implementations 

17 based on configuration, supporting specialized vector databases. 

18  

19 Configuration Options: 

20 backend (str): Backend type (faiss, chroma, memory) 

21 dimensions (int): Vector dimensions (required for some backends) 

22 **kwargs: Backend-specific configuration options 

23  

24 Example Configuration: 

25 vector_stores: 

26 - name: main_vectors 

27 factory: vector_store 

28 backend: faiss 

29 dimensions: 768 

30 index_type: ivfflat 

31 persist_path: ./vectors/main 

32  

33 - name: doc_search 

34 factory: vector_store 

35 backend: chroma 

36 collection_name: documents 

37 persist_path: ./chroma_db 

38 """ 

39 

40 def create(self, **config) -> VectorStore: 

41 """Create a vector store instance based on configuration. 

42  

43 Args: 

44 **config: Configuration including 'backend' field and backend-specific options 

45  

46 Returns: 

47 Instance of appropriate vector store backend 

48  

49 Raises: 

50 ValueError: If backend type is not recognized or not available 

51 """ 

52 backend_type = config.pop("backend", "memory").lower() 

53 

54 logger.info(f"Creating vector store with backend: {backend_type}") 

55 

56 if backend_type == "memory": 

57 # Simple in-memory implementation 

58 from .memory import MemoryVectorStore 

59 return MemoryVectorStore(config) 

60 

61 elif backend_type == "faiss": 

62 try: 

63 from .faiss import FaissVectorStore 

64 return FaissVectorStore(config) 

65 except ImportError as e: 

66 raise ValueError( 

67 "Faiss backend requires faiss-cpu. " 

68 "Install with: pip install faiss-cpu" 

69 ) from e 

70 

71 elif backend_type in ("chroma", "chromadb"): 

72 try: 

73 from .chroma import ChromaVectorStore 

74 return ChromaVectorStore(config) 

75 except ImportError as e: 

76 raise ValueError( 

77 "Chroma backend requires chromadb. " 

78 "Install with: pip install chromadb" 

79 ) from e 

80 

81 else: 

82 raise ValueError( 

83 f"Unknown backend type: {backend_type}. " 

84 f"Available backends: memory, faiss, chroma" 

85 ) 

86 

87 def get_backend_info(self, backend_type: str) -> dict[str, Any]: 

88 """Get information about a specific backend. 

89  

90 Args: 

91 backend_type: Name of the backend 

92  

93 Returns: 

94 Dictionary with backend information 

95 """ 

96 info = { 

97 "memory": { 

98 "description": "In-memory vector storage for testing", 

99 "persistent": False, 

100 "requires_install": False, 

101 "config_options": { 

102 "dimensions": "Vector dimensions (required)", 

103 "metric": "Distance metric: cosine, euclidean, dot_product", 

104 } 

105 }, 

106 "faiss": { 

107 "description": "Facebook AI Similarity Search - efficient vector search", 

108 "persistent": True, 

109 "requires_install": "pip install faiss-cpu", 

110 "config_options": { 

111 "dimensions": "Vector dimensions (required)", 

112 "metric": "Distance metric: cosine, euclidean, dot_product", 

113 "index_type": "Index type: flat, ivfflat, hnsw, auto", 

114 "persist_path": "Path to save/load index", 

115 "nlist": "Number of clusters for IVF index", 

116 "m": "Number of connections for HNSW", 

117 } 

118 }, 

119 "chroma": { 

120 "description": "ChromaDB - AI-native vector database", 

121 "persistent": True, 

122 "requires_install": "pip install chromadb", 

123 "config_options": { 

124 "collection_name": "Name of the collection", 

125 "persist_path": "Path for persistent storage", 

126 "embedding_function": "Embedding function name or object", 

127 "metric": "Distance metric: cosine, euclidean, dot_product", 

128 } 

129 }, 

130 } 

131 

132 return info.get(backend_type.lower(), { 

133 "description": "Unknown backend", 

134 "error": f"Backend '{backend_type}' not recognized" 

135 }) 

136 

137 

138# Create singleton instance for registration 

139vector_store_factory = VectorStoreFactory()