Coverage for src/dataknobs_data/factory.py: 19%
72 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 14:14 -0600
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-29 14:14 -0600
1"""Backend factory for dynamic database creation."""
3import logging
4from typing import Any
6from dataknobs_config import FactoryBase
8from dataknobs_data.database import SyncDatabase
10# Import the VectorStoreFactory from vector.stores.factory
11from dataknobs_data.vector.stores.factory import VectorStoreFactory
14logger = logging.getLogger(__name__)
17class DatabaseFactory(FactoryBase):
18 """Factory for creating database backends dynamically.
20 This factory allows creating different database implementations
21 based on configuration, supporting all available backends.
23 Configuration Options:
24 backend (str): Backend type (memory, file, postgres, elasticsearch, s3)
25 **kwargs: Backend-specific configuration options
27 Example Configuration:
28 databases:
29 - name: main
30 factory: database
31 backend: postgres
32 host: localhost
33 database: myapp
35 - name: cache
36 factory: database
37 backend: memory
39 - name: archive
40 factory: database
41 backend: s3
42 bucket: my-archive-bucket
43 prefix: archives/
44 """
46 def create(self, **config) -> SyncDatabase:
47 """Create a database instance based on configuration.
49 Args:
50 **config: Configuration including 'backend' field and backend-specific options
52 Returns:
53 Instance of appropriate database backend
55 Raises:
56 ValueError: If backend type is not recognized or not available
57 """
58 backend_type = config.pop("backend", "memory").lower()
60 logger.info(f"Creating database with backend: {backend_type}")
62 # Check if vector_enabled is set
63 vector_enabled = config.get("vector_enabled", False)
65 if vector_enabled:
66 # All backends now have vector support (some native, some via Python)
67 logger.debug(f"Vector support enabled for backend: {backend_type}")
69 if backend_type in ("memory", "mem"):
70 from dataknobs_data.backends.memory import SyncMemoryDatabase
71 return SyncMemoryDatabase.from_config(config)
73 elif backend_type == "file":
74 from dataknobs_data.backends.file import SyncFileDatabase
75 return SyncFileDatabase.from_config(config)
77 elif backend_type in ("postgres", "postgresql", "pg"):
78 try:
79 from dataknobs_data.backends.postgres import SyncPostgresDatabase
80 return SyncPostgresDatabase.from_config(config)
81 except ImportError as e:
82 raise ValueError(
83 "PostgreSQL backend requires psycopg2. "
84 "Install with: pip install dataknobs-data[postgres]"
85 ) from e
87 elif backend_type in ("elasticsearch", "es"):
88 try:
89 from dataknobs_data.backends.elasticsearch import SyncElasticsearchDatabase
90 return SyncElasticsearchDatabase.from_config(config)
91 except ImportError as e:
92 raise ValueError(
93 "Elasticsearch backend requires elasticsearch package. "
94 "Install with: pip install dataknobs-data[elasticsearch]"
95 ) from e
97 elif backend_type == "sqlite":
98 from dataknobs_data.backends.sqlite import SyncSQLiteDatabase
99 return SyncSQLiteDatabase.from_config(config)
101 elif backend_type == "s3":
102 try:
103 from dataknobs_data.backends.s3 import SyncS3Database
104 return SyncS3Database.from_config(config)
105 except ImportError as e:
106 raise ValueError(
107 "S3 backend requires boto3. "
108 "Install with: pip install dataknobs-data[s3]"
109 ) from e
111 else:
112 raise ValueError(
113 f"Unknown backend type: {backend_type}. "
114 f"Available backends: memory, file, postgres, elasticsearch, sqlite, s3"
115 )
118 def get_backend_info(self, backend_type: str) -> dict[str, Any]:
119 """Get information about a specific backend.
121 Args:
122 backend_type: Name of the backend
124 Returns:
125 Dictionary with backend information
126 """
127 info = {
128 "memory": {
129 "description": "In-memory storage for testing and caching",
130 "persistent": False,
131 "requires_install": False,
132 "config_options": {
133 "initial_data": "Optional initial data dictionary"
134 }
135 },
136 "file": {
137 "description": "File-based storage (JSON, CSV, Parquet)",
138 "persistent": True,
139 "requires_install": False,
140 "config_options": {
141 "path": "Path to the file (required)",
142 "format": "File format: json, csv, parquet (default: json)",
143 "compression": "Optional compression: gzip, bz2, xz"
144 }
145 },
146 "postgres": {
147 "description": "PostgreSQL database backend with native vector support (pgvector)",
148 "persistent": True,
149 "requires_install": "pip install dataknobs-data[postgres]",
150 "vector_support": True,
151 "config_options": {
152 "host": "Database host (required)",
153 "port": "Database port (default: 5432)",
154 "database": "Database name (required)",
155 "user": "Username (required)",
156 "password": "Password (required)",
157 "table": "Table name (default: records)",
158 "vector_enabled": "Enable vector support (default: False)",
159 "vector_metric": "Distance metric for vectors: cosine, euclidean, dot_product (default: cosine)"
160 }
161 },
162 "elasticsearch": {
163 "description": "Elasticsearch search engine backend with native KNN vector support",
164 "persistent": True,
165 "requires_install": "pip install dataknobs-data[elasticsearch]",
166 "vector_support": True,
167 "config_options": {
168 "hosts": "List of host URLs (required)",
169 "index": "Index name (required)",
170 "doc_type": "Document type (default: _doc)",
171 "username": "Optional username",
172 "password": "Optional password",
173 "vector_enabled": "Enable vector support (default: False)",
174 "vector_metric": "Distance metric for vectors: cosine, euclidean, dot_product (default: cosine)"
175 }
176 },
177 "sqlite": {
178 "description": "SQLite database backend with Python-based vector support",
179 "persistent": True,
180 "requires_install": False,
181 "vector_support": True,
182 "config_options": {
183 "path": "Path to database file (required)",
184 "table": "Table name (default: records)",
185 "vector_enabled": "Enable vector support (default: False)",
186 "vector_metric": "Distance metric for vectors: cosine, euclidean, dot_product (default: cosine)"
187 }
188 },
189 "s3": {
190 "description": "AWS S3 object storage backend",
191 "persistent": True,
192 "requires_install": "pip install dataknobs-data[s3]",
193 "config_options": {
194 "bucket": "S3 bucket name (required)",
195 "prefix": "Object key prefix (default: records/)",
196 "region": "AWS region (default: us-east-1)",
197 "endpoint_url": "Custom endpoint for S3-compatible services",
198 "access_key_id": "AWS access key (or use IAM role)",
199 "secret_access_key": "AWS secret key (or use IAM role)"
200 }
201 }
202 }
204 return info.get(backend_type.lower(), {
205 "description": "Unknown backend",
206 "error": f"Backend '{backend_type}' not recognized"
207 })
210class AsyncDatabaseFactory(FactoryBase):
211 """Factory for creating async database backends.
213 Note: Currently only some backends support async operations.
214 """
216 def create(self, **config) -> Any:
217 """Create an async database instance.
219 Args:
220 **config: Configuration including 'backend' field
222 Returns:
223 Instance of appropriate async database backend
225 Raises:
226 ValueError: If backend doesn't support async operations
227 """
228 backend_type = config.pop("backend", "memory").lower()
230 # Check if vector_enabled is set
231 vector_enabled = config.get("vector_enabled", False)
233 if vector_enabled:
234 # All backends now have vector support (some native, some via Python)
235 logger.debug(f"Vector support enabled for async backend: {backend_type}")
237 if backend_type in ("memory", "mem"):
238 from dataknobs_data.backends.memory import AsyncMemoryDatabase
239 return AsyncMemoryDatabase.from_config(config)
241 elif backend_type == "file":
242 from dataknobs_data.backends.file import AsyncFileDatabase
243 return AsyncFileDatabase.from_config(config)
245 elif backend_type in ("postgres", "postgresql", "pg"):
246 from dataknobs_data.backends.postgres import AsyncPostgresDatabase
247 return AsyncPostgresDatabase.from_config(config)
249 elif backend_type in ("elasticsearch", "es"):
250 from dataknobs_data.backends.elasticsearch_async import AsyncElasticsearchDatabase
251 return AsyncElasticsearchDatabase.from_config(config)
253 elif backend_type == "s3":
254 from dataknobs_data.backends.s3_async import AsyncS3Database
255 return AsyncS3Database.from_config(config)
257 elif backend_type == "sqlite":
258 from dataknobs_data.backends.sqlite_async import AsyncSQLiteDatabase
259 return AsyncSQLiteDatabase.from_config(config)
261 else:
262 raise ValueError(
263 f"Backend '{backend_type}' does not support async operations yet. "
264 f"Available async backends: memory, file, postgres, elasticsearch, s3, sqlite"
265 )
268# TODO: Add AsyncVectorStoreFactory when async vector stores are implemented
269# The async vector store implementations (AsyncFaissVectorStore, AsyncChromaVectorStore,
270# AsyncMemoryVectorStore) and base class (AsyncVectorStore) need to be created first.
273# Create singleton instances for registration
274database_factory = DatabaseFactory()
275async_database_factory = AsyncDatabaseFactory()
276vector_store_factory = VectorStoreFactory()
277# TODO: add an 'async_vector_store_factory = AsyncVectorStoreFactory()' when async vector stores are implemented