Coverage for intelligence_toolkit/tests/unit/AI/test_vector_store.py: 100%
78 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2# Licensed under the MIT license. See LICENSE file in the project.
3#
4import tempfile
5from pathlib import Path
6from unittest.mock import MagicMock, patch
8import pyarrow as pa
9import pytest
11from intelligence_toolkit.AI.vector_store import VectorStore
13schema = pa.schema(
14 [
15 pa.field("hash", pa.string()),
16 pa.field("text", pa.string()),
17 pa.field("vector", pa.list_(pa.float64())),
18 pa.field("additional_details", pa.string()),
19 ]
20)
23@pytest.fixture
24def temp_db_path():
25 with tempfile.TemporaryDirectory() as tmpdir:
26 yield tmpdir
29@pytest.fixture
30def vector_store(temp_db_path):
31 return VectorStore("test_table", temp_db_path, schema)
34def test_vector_store_initialization_without_table():
35 with tempfile.TemporaryDirectory() as tmpdir:
36 store = VectorStore(path=tmpdir)
37 assert store.table is None
38 assert store.duckdb_data is None
41def test_vector_store_initialization_with_table(temp_db_path):
42 store = VectorStore("test_table", temp_db_path, schema)
43 assert store.table is not None
44 assert store.duckdb_data is not None
47def test_vector_store_save(vector_store):
48 items = [
49 {
50 "hash": "hash1",
51 "text": "test text 1",
52 "vector": [0.1, 0.2, 0.3],
53 "additional_details": "{}",
54 },
55 {
56 "hash": "hash2",
57 "text": "test text 2",
58 "vector": [0.4, 0.5, 0.6],
59 "additional_details": "{}",
60 },
61 ]
63 vector_store.save(items)
64 # If no exception, the save was successful
65 assert True
68def test_vector_store_save_without_table():
69 with tempfile.TemporaryDirectory() as tmpdir:
70 store = VectorStore(path=tmpdir)
72 with pytest.raises(ValueError, match="Table not initialized"):
73 store.save([{"hash": "test"}])
76def test_vector_store_search_by_column(vector_store):
77 items = [
78 {
79 "hash": "hash1",
80 "text": "test text 1",
81 "vector": [0.1, 0.2, 0.3],
82 "additional_details": "{}",
83 },
84 {
85 "hash": "hash2",
86 "text": "test text 2",
87 "vector": [0.4, 0.5, 0.6],
88 "additional_details": "{}",
89 },
90 ]
92 vector_store.save(items)
93 vector_store.update_duckdb_data()
95 result = vector_store.search_by_column("hash1", "hash")
96 assert len(result) > 0
97 assert result.iloc[0]["hash"] == "hash1"
100def test_vector_store_search_by_column_multiple(vector_store):
101 items = [
102 {
103 "hash": "hash1",
104 "text": "test text 1",
105 "vector": [0.1, 0.2, 0.3],
106 "additional_details": "{}",
107 },
108 {
109 "hash": "hash2",
110 "text": "test text 2",
111 "vector": [0.4, 0.5, 0.6],
112 "additional_details": "{}",
113 },
114 ]
116 vector_store.save(items)
117 vector_store.update_duckdb_data()
119 result = vector_store.search_by_column(["hash1", "hash2"], "hash")
120 assert len(result) >= 2
123def test_vector_store_search_by_column_without_table():
124 with tempfile.TemporaryDirectory() as tmpdir:
125 store = VectorStore(path=tmpdir)
127 with pytest.raises(ValueError, match="Table not initialized"):
128 store.search_by_column("test", "hash")
131def test_vector_store_search_by_vector(temp_db_path):
132 # LanceDB requires fixed_size_list for vector columns, not variable list
133 vector_schema = pa.schema(
134 [
135 pa.field("hash", pa.string()),
136 pa.field("text", pa.string()),
137 pa.field("vector", pa.list_(pa.float64(), 3)), # Fixed size for vector search
138 pa.field("additional_details", pa.string()),
139 ]
140 )
142 vector_store = VectorStore("test_vector_table", temp_db_path, vector_schema)
144 items = [
145 {
146 "hash": "hash1",
147 "text": "test text 1",
148 "vector": [0.1, 0.2, 0.3],
149 "additional_details": "{}",
150 },
151 ]
153 vector_store.save(items)
155 result = vector_store.search_by_vector([0.1, 0.2, 0.3], k=1)
156 assert len(result) > 0
157 assert "hash" in result[0]
160def test_vector_store_search_by_vector_without_table():
161 with tempfile.TemporaryDirectory() as tmpdir:
162 store = VectorStore(path=tmpdir)
164 with pytest.raises(ValueError, match="Table not initialized"):
165 store.search_by_vector([0.1, 0.2, 0.3])
168def test_vector_store_update_duckdb_data(vector_store):
169 items = [
170 {
171 "hash": "hash1",
172 "text": "test text 1",
173 "vector": [0.1, 0.2, 0.3],
174 "additional_details": "{}",
175 },
176 ]
178 vector_store.save(items)
179 vector_store.update_duckdb_data()
180 # If no exception, the update was successful
181 assert vector_store.duckdb_data is not None
184def test_vector_store_update_duckdb_data_without_table():
185 with tempfile.TemporaryDirectory() as tmpdir:
186 store = VectorStore(path=tmpdir)
188 with pytest.raises(ValueError, match="Table not initialized"):
189 store.update_duckdb_data()
192def test_vector_store_drop_table_without_table():
193 with tempfile.TemporaryDirectory() as tmpdir:
194 store = VectorStore(path=tmpdir)
196 with pytest.raises(ValueError, match="Table not initialized"):
197 store.drop_table()