Coverage for src/dataknobs_data/vector/stores/factory.py: 32%

31 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2025-11-13 11:23 -0700

1"""Factory for creating vector store backends.""" 

2 

3import logging 

4from typing import Any 

5 

6from dataknobs_config import FactoryBase 

7 

8from . import vector_backends 

9from .base import VectorStore 

10 

11logger = logging.getLogger(__name__) 

12 

13 

14class VectorStoreFactory(FactoryBase): 

15 """Factory for creating vector store backends dynamically. 

16 

17 This factory allows creating different vector store implementations 

18 based on configuration, supporting specialized vector databases. 

19 

20 Configuration Options: 

21 backend (str): Backend type (faiss, chroma, memory) 

22 dimensions (int): Vector dimensions (required for some backends) 

23 **kwargs: Backend-specific configuration options 

24 

25 Example Configuration: 

26 vector_stores: 

27 - name: main_vectors 

28 factory: vector_store 

29 backend: faiss 

30 dimensions: 768 

31 index_type: ivfflat 

32 persist_path: ./vectors/main 

33 

34 - name: doc_search 

35 factory: vector_store 

36 backend: chroma 

37 collection_name: documents 

38 persist_path: ./chroma_db 

39 """ 

40 

41 def create(self, **config: Any) -> VectorStore: 

42 """Create a vector store instance based on configuration. 

43 

44 Args: 

45 **config: Configuration including 'backend' field and backend-specific options 

46 

47 Returns: 

48 Instance of appropriate vector store backend 

49 

50 Raises: 

51 ValueError: If backend type is not recognized or not available 

52 """ 

53 backend_type = config.pop("backend", "memory").lower() 

54 

55 logger.info(f"Creating vector store with backend: {backend_type}") 

56 

57 # Get backend class from registry 

58 try: 

59 backend_class = vector_backends.get(backend_type) 

60 except Exception as e: 

61 # Backend not found - provide helpful error message 

62 available = vector_backends.list_keys() 

63 raise ValueError( 

64 f"Unknown backend type: {backend_type}. " 

65 f"Available backends: {', '.join(sorted(set(available)))}" 

66 ) from e 

67 

68 # Create and return backend instance 

69 try: 

70 return backend_class(config) 

71 except ImportError as e: 

72 # Convert ImportError to ValueError with expected format 

73 # Extract package name from "pip install X" in error message 

74 import re 

75 match = re.search(r'pip install ([\w-]+)', str(e)) 

76 if match: 

77 package = match.group(1) 

78 raise ValueError( 

79 f"{backend_type.capitalize()} backend requires {package}" 

80 ) from e 

81 else: 

82 # Fallback if pattern doesn't match 

83 raise ValueError( 

84 f"Backend '{backend_type}' has missing dependencies" 

85 ) from e 

86 

87 def get_backend_info(self, backend_type: str) -> dict[str, Any]: 

88 """Get information about a specific backend. 

89 

90 Args: 

91 backend_type: Name of the backend 

92 

93 Returns: 

94 Dictionary with backend information from registry metadata 

95 """ 

96 # Normalize to lowercase for case-insensitive lookup 

97 backend_type = backend_type.lower() 

98 

99 # Check if backend exists first 

100 if not vector_backends.has(backend_type): 

101 return { 

102 "description": "Unknown backend", 

103 "error": f"Backend '{backend_type}' not recognized", 

104 } 

105 

106 # Get metadata from registry 

107 metrics = vector_backends.get_metrics(backend_type) 

108 return metrics.get("metadata", {}) 

109 

110 

111# Create singleton instance for registration 

112vector_store_factory = VectorStoreFactory()