Coverage for intelligence_toolkit/tests/unit/AI/test_vector_store.py: 100%

78 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3# 

4import tempfile 

5from pathlib import Path 

6from unittest.mock import MagicMock, patch 

7 

8import pyarrow as pa 

9import pytest 

10 

11from intelligence_toolkit.AI.vector_store import VectorStore 

12 

13schema = pa.schema( 

14 [ 

15 pa.field("hash", pa.string()), 

16 pa.field("text", pa.string()), 

17 pa.field("vector", pa.list_(pa.float64())), 

18 pa.field("additional_details", pa.string()), 

19 ] 

20) 

21 

22 

23@pytest.fixture 

24def temp_db_path(): 

25 with tempfile.TemporaryDirectory() as tmpdir: 

26 yield tmpdir 

27 

28 

29@pytest.fixture 

30def vector_store(temp_db_path): 

31 return VectorStore("test_table", temp_db_path, schema) 

32 

33 

34def test_vector_store_initialization_without_table(): 

35 with tempfile.TemporaryDirectory() as tmpdir: 

36 store = VectorStore(path=tmpdir) 

37 assert store.table is None 

38 assert store.duckdb_data is None 

39 

40 

41def test_vector_store_initialization_with_table(temp_db_path): 

42 store = VectorStore("test_table", temp_db_path, schema) 

43 assert store.table is not None 

44 assert store.duckdb_data is not None 

45 

46 

47def test_vector_store_save(vector_store): 

48 items = [ 

49 { 

50 "hash": "hash1", 

51 "text": "test text 1", 

52 "vector": [0.1, 0.2, 0.3], 

53 "additional_details": "{}", 

54 }, 

55 { 

56 "hash": "hash2", 

57 "text": "test text 2", 

58 "vector": [0.4, 0.5, 0.6], 

59 "additional_details": "{}", 

60 }, 

61 ] 

62 

63 vector_store.save(items) 

64 # If no exception, the save was successful 

65 assert True 

66 

67 

68def test_vector_store_save_without_table(): 

69 with tempfile.TemporaryDirectory() as tmpdir: 

70 store = VectorStore(path=tmpdir) 

71 

72 with pytest.raises(ValueError, match="Table not initialized"): 

73 store.save([{"hash": "test"}]) 

74 

75 

76def test_vector_store_search_by_column(vector_store): 

77 items = [ 

78 { 

79 "hash": "hash1", 

80 "text": "test text 1", 

81 "vector": [0.1, 0.2, 0.3], 

82 "additional_details": "{}", 

83 }, 

84 { 

85 "hash": "hash2", 

86 "text": "test text 2", 

87 "vector": [0.4, 0.5, 0.6], 

88 "additional_details": "{}", 

89 }, 

90 ] 

91 

92 vector_store.save(items) 

93 vector_store.update_duckdb_data() 

94 

95 result = vector_store.search_by_column("hash1", "hash") 

96 assert len(result) > 0 

97 assert result.iloc[0]["hash"] == "hash1" 

98 

99 

100def test_vector_store_search_by_column_multiple(vector_store): 

101 items = [ 

102 { 

103 "hash": "hash1", 

104 "text": "test text 1", 

105 "vector": [0.1, 0.2, 0.3], 

106 "additional_details": "{}", 

107 }, 

108 { 

109 "hash": "hash2", 

110 "text": "test text 2", 

111 "vector": [0.4, 0.5, 0.6], 

112 "additional_details": "{}", 

113 }, 

114 ] 

115 

116 vector_store.save(items) 

117 vector_store.update_duckdb_data() 

118 

119 result = vector_store.search_by_column(["hash1", "hash2"], "hash") 

120 assert len(result) >= 2 

121 

122 

123def test_vector_store_search_by_column_without_table(): 

124 with tempfile.TemporaryDirectory() as tmpdir: 

125 store = VectorStore(path=tmpdir) 

126 

127 with pytest.raises(ValueError, match="Table not initialized"): 

128 store.search_by_column("test", "hash") 

129 

130 

131def test_vector_store_search_by_vector(temp_db_path): 

132 # LanceDB requires fixed_size_list for vector columns, not variable list 

133 vector_schema = pa.schema( 

134 [ 

135 pa.field("hash", pa.string()), 

136 pa.field("text", pa.string()), 

137 pa.field("vector", pa.list_(pa.float64(), 3)), # Fixed size for vector search 

138 pa.field("additional_details", pa.string()), 

139 ] 

140 ) 

141 

142 vector_store = VectorStore("test_vector_table", temp_db_path, vector_schema) 

143 

144 items = [ 

145 { 

146 "hash": "hash1", 

147 "text": "test text 1", 

148 "vector": [0.1, 0.2, 0.3], 

149 "additional_details": "{}", 

150 }, 

151 ] 

152 

153 vector_store.save(items) 

154 

155 result = vector_store.search_by_vector([0.1, 0.2, 0.3], k=1) 

156 assert len(result) > 0 

157 assert "hash" in result[0] 

158 

159 

160def test_vector_store_search_by_vector_without_table(): 

161 with tempfile.TemporaryDirectory() as tmpdir: 

162 store = VectorStore(path=tmpdir) 

163 

164 with pytest.raises(ValueError, match="Table not initialized"): 

165 store.search_by_vector([0.1, 0.2, 0.3]) 

166 

167 

168def test_vector_store_update_duckdb_data(vector_store): 

169 items = [ 

170 { 

171 "hash": "hash1", 

172 "text": "test text 1", 

173 "vector": [0.1, 0.2, 0.3], 

174 "additional_details": "{}", 

175 }, 

176 ] 

177 

178 vector_store.save(items) 

179 vector_store.update_duckdb_data() 

180 # If no exception, the update was successful 

181 assert vector_store.duckdb_data is not None 

182 

183 

184def test_vector_store_update_duckdb_data_without_table(): 

185 with tempfile.TemporaryDirectory() as tmpdir: 

186 store = VectorStore(path=tmpdir) 

187 

188 with pytest.raises(ValueError, match="Table not initialized"): 

189 store.update_duckdb_data() 

190 

191 

192def test_vector_store_drop_table_without_table(): 

193 with tempfile.TemporaryDirectory() as tmpdir: 

194 store = VectorStore(path=tmpdir) 

195 

196 with pytest.raises(ValueError, match="Table not initialized"): 

197 store.drop_table()