Coverage for repo_ctx / storage.py: 94%

126 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-25 17:42 +0100

1"""Storage layer using SQLite.""" 

2import aiosqlite 

3from pathlib import Path 

4from typing import Optional 

5from .models import Library, Version, Document, SearchResult, FuzzySearchResult 

6 

7 

8def levenshtein_distance(s1: str, s2: str) -> int: 

9 """Calculate Levenshtein distance between two strings.""" 

10 if len(s1) < len(s2): 

11 return levenshtein_distance(s2, s1) 

12 if len(s2) == 0: 

13 return len(s1) 

14 

15 previous_row = range(len(s2) + 1) 

16 for i, c1 in enumerate(s1): 

17 current_row = [i + 1] 

18 for j, c2 in enumerate(s2): 

19 insertions = previous_row[j + 1] + 1 

20 deletions = current_row[j] + 1 

21 substitutions = previous_row[j] + (c1 != c2) 

22 current_row.append(min(insertions, deletions, substitutions)) 

23 previous_row = current_row 

24 

25 return previous_row[-1] 

26 

27 

28class Storage: 

29 def __init__(self, db_path: str): 

30 self.db_path = db_path 

31 Path(db_path).parent.mkdir(parents=True, exist_ok=True) 

32 

33 async def init_db(self): 

34 """Initialize database schema.""" 

35 async with aiosqlite.connect(self.db_path) as db: 

36 await db.execute(""" 

37 CREATE TABLE IF NOT EXISTS libraries ( 

38 id INTEGER PRIMARY KEY AUTOINCREMENT, 

39 group_name TEXT NOT NULL, 

40 project_name TEXT NOT NULL, 

41 description TEXT, 

42 default_version TEXT, 

43 last_indexed TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 

44 UNIQUE(group_name, project_name) 

45 ) 

46 """) 

47 await db.execute(""" 

48 CREATE TABLE IF NOT EXISTS versions ( 

49 id INTEGER PRIMARY KEY AUTOINCREMENT, 

50 library_id INTEGER NOT NULL, 

51 version_tag TEXT NOT NULL, 

52 commit_sha TEXT, 

53 UNIQUE(library_id, version_tag), 

54 FOREIGN KEY (library_id) REFERENCES libraries(id) 

55 ) 

56 """) 

57 await db.execute(""" 

58 CREATE TABLE IF NOT EXISTS documents ( 

59 id INTEGER PRIMARY KEY AUTOINCREMENT, 

60 version_id INTEGER NOT NULL, 

61 file_path TEXT NOT NULL, 

62 content TEXT NOT NULL, 

63 content_type TEXT DEFAULT 'markdown', 

64 tokens INTEGER DEFAULT 0, 

65 UNIQUE(version_id, file_path), 

66 FOREIGN KEY (version_id) REFERENCES versions(id) 

67 ) 

68 """) 

69 await db.execute("CREATE INDEX IF NOT EXISTS idx_libraries_search ON libraries(group_name, project_name)") 

70 await db.execute("CREATE INDEX IF NOT EXISTS idx_documents_version ON documents(version_id)") 

71 await db.commit() 

72 

73 async def save_library(self, library: Library) -> int: 

74 """Save or update library.""" 

75 async with aiosqlite.connect(self.db_path) as db: 

76 cursor = await db.execute( 

77 """INSERT OR REPLACE INTO libraries (group_name, project_name, description, default_version) 

78 VALUES (?, ?, ?, ?)""", 

79 (library.group_name, library.project_name, library.description, library.default_version) 

80 ) 

81 await db.commit() 

82 return cursor.lastrowid 

83 

84 async def save_version(self, version: Version) -> int: 

85 """Save version.""" 

86 async with aiosqlite.connect(self.db_path) as db: 

87 cursor = await db.execute( 

88 """INSERT OR REPLACE INTO versions (library_id, version_tag, commit_sha) 

89 VALUES (?, ?, ?)""", 

90 (version.library_id, version.version_tag, version.commit_sha) 

91 ) 

92 await db.commit() 

93 return cursor.lastrowid 

94 

95 async def save_document(self, doc: Document): 

96 """Save document.""" 

97 async with aiosqlite.connect(self.db_path) as db: 

98 await db.execute( 

99 """INSERT OR REPLACE INTO documents (version_id, file_path, content, content_type, tokens) 

100 VALUES (?, ?, ?, ?, ?)""", 

101 (doc.version_id, doc.file_path, doc.content, doc.content_type, doc.tokens) 

102 ) 

103 await db.commit() 

104 

105 async def search(self, query: str) -> list[SearchResult]: 

106 """Search libraries by name.""" 

107 async with aiosqlite.connect(self.db_path) as db: 

108 db.row_factory = aiosqlite.Row 

109 cursor = await db.execute( 

110 """SELECT l.id, l.group_name, l.project_name, l.description, 

111 GROUP_CONCAT(v.version_tag) as versions 

112 FROM libraries l 

113 LEFT JOIN versions v ON l.id = v.library_id 

114 WHERE l.group_name LIKE ? OR l.project_name LIKE ? OR l.description LIKE ? 

115 GROUP BY l.id""", 

116 (f"%{query}%", f"%{query}%", f"%{query}%") 

117 ) 

118 rows = await cursor.fetchall() 

119 

120 results = [] 

121 for row in rows: 

122 versions = row["versions"].split(",") if row["versions"] else [] 

123 results.append(SearchResult( 

124 library_id=f"/{row['group_name']}/{row['project_name']}", 

125 name=f"{row['group_name']}/{row['project_name']}", 

126 description=row["description"] or "", 

127 versions=versions, 

128 score=1.0 

129 )) 

130 return results 

131 

132 async def get_library(self, group: str, project: str) -> Optional[Library]: 

133 """Get library by group and project.""" 

134 async with aiosqlite.connect(self.db_path) as db: 

135 db.row_factory = aiosqlite.Row 

136 cursor = await db.execute( 

137 "SELECT * FROM libraries WHERE group_name = ? AND project_name = ?", 

138 (group, project) 

139 ) 

140 row = await cursor.fetchone() 

141 if not row: 

142 return None 

143 return Library( 

144 id=row["id"], 

145 group_name=row["group_name"], 

146 project_name=row["project_name"], 

147 description=row["description"], 

148 default_version=row["default_version"] 

149 ) 

150 

151 async def get_version_id(self, library_id: int, version_tag: str) -> Optional[int]: 

152 """Get version ID.""" 

153 async with aiosqlite.connect(self.db_path) as db: 

154 cursor = await db.execute( 

155 "SELECT id FROM versions WHERE library_id = ? AND version_tag = ?", 

156 (library_id, version_tag) 

157 ) 

158 row = await cursor.fetchone() 

159 return row[0] if row else None 

160 

161 async def get_documents(self, version_id: int, topic: Optional[str] = None, page: int = 1, page_size: int = 10) -> list[Document]: 

162 """Get documents for a version.""" 

163 offset = (page - 1) * page_size 

164 async with aiosqlite.connect(self.db_path) as db: 

165 db.row_factory = aiosqlite.Row 

166 if topic: 

167 cursor = await db.execute( 

168 """SELECT * FROM documents  

169 WHERE version_id = ? AND (file_path LIKE ? OR content LIKE ?) 

170 LIMIT ? OFFSET ?""", 

171 (version_id, f"%{topic}%", f"%{topic}%", page_size, offset) 

172 ) 

173 else: 

174 cursor = await db.execute( 

175 "SELECT * FROM documents WHERE version_id = ? LIMIT ? OFFSET ?", 

176 (version_id, page_size, offset) 

177 ) 

178 rows = await cursor.fetchall() 

179 return [Document( 

180 id=row["id"], 

181 version_id=row["version_id"], 

182 file_path=row["file_path"], 

183 content=row["content"], 

184 content_type=row["content_type"], 

185 tokens=row["tokens"] 

186 ) for row in rows] 

187 

188 async def get_all_libraries(self) -> list[Library]: 

189 """Get all indexed libraries with metadata.""" 

190 async with aiosqlite.connect(self.db_path) as db: 

191 db.row_factory = aiosqlite.Row 

192 cursor = await db.execute( 

193 "SELECT * FROM libraries ORDER BY last_indexed DESC" 

194 ) 

195 rows = await cursor.fetchall() 

196 

197 return [Library( 

198 id=row["id"], 

199 group_name=row["group_name"], 

200 project_name=row["project_name"], 

201 description=row["description"], 

202 default_version=row["default_version"], 

203 last_indexed=row["last_indexed"] 

204 ) for row in rows] 

205 

206 async def fuzzy_search(self, query: str, limit: int = 10) -> list[FuzzySearchResult]: 

207 """Fuzzy search across libraries.""" 

208 query_lower = query.lower() 

209 results = [] 

210 

211 async with aiosqlite.connect(self.db_path) as db: 

212 db.row_factory = aiosqlite.Row 

213 cursor = await db.execute("SELECT * FROM libraries") 

214 rows = await cursor.fetchall() 

215 

216 for row in rows: 

217 name = row["project_name"].lower() 

218 group = row["group_name"].lower() 

219 desc = (row["description"] or "").lower() 

220 

221 score = 0.0 

222 match_type = "" 

223 matched_field = "" 

224 

225 # Exact match 

226 if query_lower == name: 

227 score = 1.0 

228 match_type = "exact" 

229 matched_field = "name" 

230 # Starts with 

231 elif name.startswith(query_lower): 

232 score = 0.9 

233 match_type = "starts_with" 

234 matched_field = "name" 

235 # Contains in name 

236 elif query_lower in name: 

237 score = 0.8 

238 match_type = "contains" 

239 matched_field = "name" 

240 # Description match 

241 elif query_lower in desc: 

242 score = 0.6 

243 match_type = "contains" 

244 matched_field = "description" 

245 # Group match 

246 elif query_lower in group: 

247 score = 0.5 

248 match_type = "contains" 

249 matched_field = "group" 

250 # Fuzzy match (Levenshtein) 

251 else: 

252 distance = levenshtein_distance(query_lower, name) 

253 if distance <= 3 and len(name) > 0: 

254 score = max(0.4, 1.0 - (distance / len(name))) 

255 match_type = "fuzzy" 

256 matched_field = "name" 

257 

258 if score > 0: 

259 results.append(FuzzySearchResult( 

260 library_id=f"/{row['group_name']}/{row['project_name']}", 

261 name=row["project_name"], 

262 group=row["group_name"], 

263 description=row["description"] or "", 

264 score=score, 

265 match_type=match_type, 

266 matched_field=matched_field 

267 )) 

268 

269 # Sort by score descending 

270 results.sort(key=lambda x: x.score, reverse=True) 

271 return results[:limit]