Coverage for repo_ctx / storage.py: 94%
126 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 17:42 +0100
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 17:42 +0100
1"""Storage layer using SQLite."""
2import aiosqlite
3from pathlib import Path
4from typing import Optional
5from .models import Library, Version, Document, SearchResult, FuzzySearchResult
8def levenshtein_distance(s1: str, s2: str) -> int:
9 """Calculate Levenshtein distance between two strings."""
10 if len(s1) < len(s2):
11 return levenshtein_distance(s2, s1)
12 if len(s2) == 0:
13 return len(s1)
15 previous_row = range(len(s2) + 1)
16 for i, c1 in enumerate(s1):
17 current_row = [i + 1]
18 for j, c2 in enumerate(s2):
19 insertions = previous_row[j + 1] + 1
20 deletions = current_row[j] + 1
21 substitutions = previous_row[j] + (c1 != c2)
22 current_row.append(min(insertions, deletions, substitutions))
23 previous_row = current_row
25 return previous_row[-1]
28class Storage:
29 def __init__(self, db_path: str):
30 self.db_path = db_path
31 Path(db_path).parent.mkdir(parents=True, exist_ok=True)
33 async def init_db(self):
34 """Initialize database schema."""
35 async with aiosqlite.connect(self.db_path) as db:
36 await db.execute("""
37 CREATE TABLE IF NOT EXISTS libraries (
38 id INTEGER PRIMARY KEY AUTOINCREMENT,
39 group_name TEXT NOT NULL,
40 project_name TEXT NOT NULL,
41 description TEXT,
42 default_version TEXT,
43 last_indexed TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
44 UNIQUE(group_name, project_name)
45 )
46 """)
47 await db.execute("""
48 CREATE TABLE IF NOT EXISTS versions (
49 id INTEGER PRIMARY KEY AUTOINCREMENT,
50 library_id INTEGER NOT NULL,
51 version_tag TEXT NOT NULL,
52 commit_sha TEXT,
53 UNIQUE(library_id, version_tag),
54 FOREIGN KEY (library_id) REFERENCES libraries(id)
55 )
56 """)
57 await db.execute("""
58 CREATE TABLE IF NOT EXISTS documents (
59 id INTEGER PRIMARY KEY AUTOINCREMENT,
60 version_id INTEGER NOT NULL,
61 file_path TEXT NOT NULL,
62 content TEXT NOT NULL,
63 content_type TEXT DEFAULT 'markdown',
64 tokens INTEGER DEFAULT 0,
65 UNIQUE(version_id, file_path),
66 FOREIGN KEY (version_id) REFERENCES versions(id)
67 )
68 """)
69 await db.execute("CREATE INDEX IF NOT EXISTS idx_libraries_search ON libraries(group_name, project_name)")
70 await db.execute("CREATE INDEX IF NOT EXISTS idx_documents_version ON documents(version_id)")
71 await db.commit()
73 async def save_library(self, library: Library) -> int:
74 """Save or update library."""
75 async with aiosqlite.connect(self.db_path) as db:
76 cursor = await db.execute(
77 """INSERT OR REPLACE INTO libraries (group_name, project_name, description, default_version)
78 VALUES (?, ?, ?, ?)""",
79 (library.group_name, library.project_name, library.description, library.default_version)
80 )
81 await db.commit()
82 return cursor.lastrowid
84 async def save_version(self, version: Version) -> int:
85 """Save version."""
86 async with aiosqlite.connect(self.db_path) as db:
87 cursor = await db.execute(
88 """INSERT OR REPLACE INTO versions (library_id, version_tag, commit_sha)
89 VALUES (?, ?, ?)""",
90 (version.library_id, version.version_tag, version.commit_sha)
91 )
92 await db.commit()
93 return cursor.lastrowid
95 async def save_document(self, doc: Document):
96 """Save document."""
97 async with aiosqlite.connect(self.db_path) as db:
98 await db.execute(
99 """INSERT OR REPLACE INTO documents (version_id, file_path, content, content_type, tokens)
100 VALUES (?, ?, ?, ?, ?)""",
101 (doc.version_id, doc.file_path, doc.content, doc.content_type, doc.tokens)
102 )
103 await db.commit()
105 async def search(self, query: str) -> list[SearchResult]:
106 """Search libraries by name."""
107 async with aiosqlite.connect(self.db_path) as db:
108 db.row_factory = aiosqlite.Row
109 cursor = await db.execute(
110 """SELECT l.id, l.group_name, l.project_name, l.description,
111 GROUP_CONCAT(v.version_tag) as versions
112 FROM libraries l
113 LEFT JOIN versions v ON l.id = v.library_id
114 WHERE l.group_name LIKE ? OR l.project_name LIKE ? OR l.description LIKE ?
115 GROUP BY l.id""",
116 (f"%{query}%", f"%{query}%", f"%{query}%")
117 )
118 rows = await cursor.fetchall()
120 results = []
121 for row in rows:
122 versions = row["versions"].split(",") if row["versions"] else []
123 results.append(SearchResult(
124 library_id=f"/{row['group_name']}/{row['project_name']}",
125 name=f"{row['group_name']}/{row['project_name']}",
126 description=row["description"] or "",
127 versions=versions,
128 score=1.0
129 ))
130 return results
132 async def get_library(self, group: str, project: str) -> Optional[Library]:
133 """Get library by group and project."""
134 async with aiosqlite.connect(self.db_path) as db:
135 db.row_factory = aiosqlite.Row
136 cursor = await db.execute(
137 "SELECT * FROM libraries WHERE group_name = ? AND project_name = ?",
138 (group, project)
139 )
140 row = await cursor.fetchone()
141 if not row:
142 return None
143 return Library(
144 id=row["id"],
145 group_name=row["group_name"],
146 project_name=row["project_name"],
147 description=row["description"],
148 default_version=row["default_version"]
149 )
151 async def get_version_id(self, library_id: int, version_tag: str) -> Optional[int]:
152 """Get version ID."""
153 async with aiosqlite.connect(self.db_path) as db:
154 cursor = await db.execute(
155 "SELECT id FROM versions WHERE library_id = ? AND version_tag = ?",
156 (library_id, version_tag)
157 )
158 row = await cursor.fetchone()
159 return row[0] if row else None
161 async def get_documents(self, version_id: int, topic: Optional[str] = None, page: int = 1, page_size: int = 10) -> list[Document]:
162 """Get documents for a version."""
163 offset = (page - 1) * page_size
164 async with aiosqlite.connect(self.db_path) as db:
165 db.row_factory = aiosqlite.Row
166 if topic:
167 cursor = await db.execute(
168 """SELECT * FROM documents
169 WHERE version_id = ? AND (file_path LIKE ? OR content LIKE ?)
170 LIMIT ? OFFSET ?""",
171 (version_id, f"%{topic}%", f"%{topic}%", page_size, offset)
172 )
173 else:
174 cursor = await db.execute(
175 "SELECT * FROM documents WHERE version_id = ? LIMIT ? OFFSET ?",
176 (version_id, page_size, offset)
177 )
178 rows = await cursor.fetchall()
179 return [Document(
180 id=row["id"],
181 version_id=row["version_id"],
182 file_path=row["file_path"],
183 content=row["content"],
184 content_type=row["content_type"],
185 tokens=row["tokens"]
186 ) for row in rows]
188 async def get_all_libraries(self) -> list[Library]:
189 """Get all indexed libraries with metadata."""
190 async with aiosqlite.connect(self.db_path) as db:
191 db.row_factory = aiosqlite.Row
192 cursor = await db.execute(
193 "SELECT * FROM libraries ORDER BY last_indexed DESC"
194 )
195 rows = await cursor.fetchall()
197 return [Library(
198 id=row["id"],
199 group_name=row["group_name"],
200 project_name=row["project_name"],
201 description=row["description"],
202 default_version=row["default_version"],
203 last_indexed=row["last_indexed"]
204 ) for row in rows]
206 async def fuzzy_search(self, query: str, limit: int = 10) -> list[FuzzySearchResult]:
207 """Fuzzy search across libraries."""
208 query_lower = query.lower()
209 results = []
211 async with aiosqlite.connect(self.db_path) as db:
212 db.row_factory = aiosqlite.Row
213 cursor = await db.execute("SELECT * FROM libraries")
214 rows = await cursor.fetchall()
216 for row in rows:
217 name = row["project_name"].lower()
218 group = row["group_name"].lower()
219 desc = (row["description"] or "").lower()
221 score = 0.0
222 match_type = ""
223 matched_field = ""
225 # Exact match
226 if query_lower == name:
227 score = 1.0
228 match_type = "exact"
229 matched_field = "name"
230 # Starts with
231 elif name.startswith(query_lower):
232 score = 0.9
233 match_type = "starts_with"
234 matched_field = "name"
235 # Contains in name
236 elif query_lower in name:
237 score = 0.8
238 match_type = "contains"
239 matched_field = "name"
240 # Description match
241 elif query_lower in desc:
242 score = 0.6
243 match_type = "contains"
244 matched_field = "description"
245 # Group match
246 elif query_lower in group:
247 score = 0.5
248 match_type = "contains"
249 matched_field = "group"
250 # Fuzzy match (Levenshtein)
251 else:
252 distance = levenshtein_distance(query_lower, name)
253 if distance <= 3 and len(name) > 0:
254 score = max(0.4, 1.0 - (distance / len(name)))
255 match_type = "fuzzy"
256 matched_field = "name"
258 if score > 0:
259 results.append(FuzzySearchResult(
260 library_id=f"/{row['group_name']}/{row['project_name']}",
261 name=row["project_name"],
262 group=row["group_name"],
263 description=row["description"] or "",
264 score=score,
265 match_type=match_type,
266 matched_field=matched_field
267 ))
269 # Sort by score descending
270 results.sort(key=lambda x: x.score, reverse=True)
271 return results[:limit]