Coverage for repo_ctx / core.py: 0%

137 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-25 17:42 +0100

1"""Core business logic.""" 

2from typing import Optional, Dict 

3from .config import Config 

4from .storage import Storage 

5from .parser import Parser 

6from .models import Library, Version, Document, SearchResult 

7from .providers import ( 

8 GitProvider, 

9 ProviderFactory, 

10 ProviderDetector, 

11 ProviderProject, 

12 ProviderNotFoundError 

13) 

14 

15 

16class RepositoryContext: 

17 """ 

18 Core repository indexing and search functionality. 

19 

20 Supports multiple repository providers (GitLab, GitHub, local, etc.) 

21 """ 

22 

23 def __init__(self, config: Config): 

24 """ 

25 Initialize repository context with providers. 

26 

27 Args: 

28 config: Configuration object with provider settings 

29 """ 

30 self.config = config 

31 self.storage = Storage(config.storage_path) 

32 self.parser = Parser() 

33 

34 # Initialize providers based on config 

35 self.providers: Dict[str, GitProvider] = {} 

36 self._init_providers() 

37 

38 # Default provider (for backward compatibility) 

39 self.default_provider = "gitlab" 

40 

41 def _init_providers(self): 

42 """Initialize configured providers.""" 

43 # Initialize GitLab provider if configured 

44 if self.config.gitlab_url and self.config.gitlab_token: 

45 try: 

46 self.providers["gitlab"] = ProviderFactory.create_gitlab( 

47 url=self.config.gitlab_url, 

48 token=self.config.gitlab_token 

49 ) 

50 except Exception: 

51 # GitLab provider initialization failed, continue without it 

52 pass 

53 

54 # Initialize GitHub provider if configured 

55 if self.config.github_url or self.config.github_token: 

56 try: 

57 github_url = self.config.github_url or "https://api.github.com" 

58 self.providers["github"] = ProviderFactory.create_github( 

59 url=github_url, 

60 token=self.config.github_token 

61 ) 

62 except Exception: 

63 # GitHub provider initialization failed, continue without it 

64 pass 

65 

66 # Local provider is always available (no config needed) 

67 # It will be instantiated on-demand when indexing a specific path 

68 self.local_provider_available = True 

69 

70 # Set default provider based on what's available 

71 if "gitlab" in self.providers: 

72 self.default_provider = "gitlab" 

73 elif "github" in self.providers: 

74 self.default_provider = "github" 

75 else: 

76 # If no remote providers configured, default to local 

77 self.default_provider = "local" 

78 

79 def get_provider(self, provider_type: Optional[str] = None, repo_path: Optional[str] = None) -> GitProvider: 

80 """ 

81 Get provider instance. 

82 

83 Args: 

84 provider_type: Provider type, or None to use default 

85 repo_path: Repository path (required for local provider) 

86 

87 Returns: 

88 Provider instance 

89 

90 Raises: 

91 ValueError: Provider not configured 

92 """ 

93 provider_type = provider_type or self.default_provider 

94 

95 # Special handling for local provider (created on-demand) 

96 if provider_type == "local": 

97 if not repo_path: 

98 raise ValueError("Local provider requires repo_path parameter") 

99 from .providers.local import LocalGitProvider 

100 return LocalGitProvider(repo_path) 

101 

102 if provider_type not in self.providers: 

103 raise ValueError( 

104 f"Provider '{provider_type}' not configured. " 

105 f"Available: {list(self.providers.keys()) + ['local']}" 

106 ) 

107 

108 return self.providers[provider_type] 

109 

110 async def init(self): 

111 """Initialize storage.""" 

112 await self.storage.init_db() 

113 

114 async def search_libraries(self, query: str) -> list[SearchResult]: 

115 """Search for libraries by name.""" 

116 results = await self.storage.search(query) 

117 # Simple ranking: exact matches first 

118 for result in results: 

119 if query.lower() in result.name.lower(): 

120 result.score = 2.0 

121 if query.lower() == result.name.lower(): 

122 result.score = 3.0 

123 results.sort(key=lambda x: x.score, reverse=True) 

124 return results 

125 

126 async def fuzzy_search_libraries(self, query: str, limit: int = 10) -> list: 

127 """Fuzzy search for libraries.""" 

128 return await self.storage.fuzzy_search(query, limit) 

129 

130 async def get_documentation( 

131 self, 

132 library_id: str, 

133 topic: Optional[str] = None, 

134 page: int = 1 

135 ) -> dict: 

136 """ 

137 Get documentation for a library. 

138 

139 Args: 

140 library_id: Library identifier (format: /group/project or /group/project/version) 

141 topic: Optional topic filter 

142 page: Page number for pagination 

143 

144 Returns: 

145 Documentation content and metadata 

146 """ 

147 # Parse library_id: /group/project or /group/subgroup/project or /group/project/version 

148 # Or URI format: gitlab://group/project, github://owner/repo, local:///path 

149 

150 # Handle URI format 

151 if "://" in library_id: 

152 provider_type, path = ProviderDetector.from_library_id(library_id) 

153 parts = path.strip("/").split("/") 

154 else: 

155 # Legacy format: /group/project 

156 parts = library_id.strip("/").split("/") 

157 

158 if len(parts) < 2: 

159 raise ValueError(f"Invalid library_id: {library_id}") 

160 

161 # Check if last part is a version (exists in versions table) 

162 # For now, assume last part is project, second-to-last might be version 

163 # Simple heuristic: if we have more than 2 parts, last could be version 

164 project = parts[-1] 

165 group = "/".join(parts[:-1]) 

166 version = None 

167 

168 # Try to get library with full path first 

169 library = await self.storage.get_library(group, project) 

170 

171 # If not found and we have 3+ parts, try treating last as version 

172 if not library and len(parts) >= 3: 

173 version = parts[-1] 

174 project = parts[-2] 

175 group = "/".join(parts[:-2]) 

176 library = await self.storage.get_library(group, project) 

177 

178 if not library: 

179 raise ValueError(f"Library not found: {group}/{project}") 

180 

181 # Use default version if not specified 

182 if not version: 

183 version = library.default_version 

184 

185 # Get version_id 

186 version_id = await self.storage.get_version_id(library.id, version) 

187 if not version_id: 

188 raise ValueError(f"Version not found: {version}") 

189 

190 # Get documents 

191 documents = await self.storage.get_documents(version_id, topic, page) 

192 

193 # Format for LLM 

194 content = self.parser.format_for_llm(documents, library_id) 

195 

196 return { 

197 "content": [{"type": "text", "text": content}], 

198 "metadata": { 

199 "library": f"{group}/{project}", 

200 "version": version, 

201 "page": page, 

202 "documents_count": len(documents) 

203 } 

204 } 

205 

206 async def index_repository( 

207 self, 

208 group: str, 

209 project: str, 

210 provider_type: Optional[str] = None 

211 ): 

212 """ 

213 Index a repository from any provider. 

214 

215 Args: 

216 group: Group/organization path (or full path for local repos) 

217 project: Project/repository name (or empty for local repos) 

218 provider_type: Provider type (gitlab, github, local) or None for auto-detect 

219 

220 Raises: 

221 ValueError: Provider not configured 

222 ProviderNotFoundError: Repository not found 

223 """ 

224 # Auto-detect provider if not specified 

225 if provider_type is None: 

226 path = f"{group}/{project}" if project else group 

227 provider_type = ProviderDetector.detect(path, default=self.default_provider) 

228 

229 # For local provider, group contains the full path 

230 if provider_type == "local": 

231 repo_path = f"{group}/{project}" if project else group 

232 provider = self.get_provider(provider_type, repo_path=repo_path) 

233 project_path = repo_path 

234 else: 

235 provider = self.get_provider(provider_type) 

236 project_path = f"{group}/{project}" 

237 

238 # Get project via provider interface 

239 proj = await provider.get_project(project_path) 

240 

241 # Get default branch 

242 default_branch = await provider.get_default_branch(proj) 

243 

244 # Read config file if exists 

245 config = await provider.read_config(proj, default_branch) 

246 

247 # Get project description 

248 description = config.get("description", proj.description) if config else proj.description 

249 

250 # Save library with provider URI format 

251 library_id_uri = ProviderDetector.to_library_id(project_path, provider_type) 

252 

253 library = Library( 

254 group_name=group, 

255 project_name=project, 

256 description=description or "", 

257 default_version=default_branch 

258 ) 

259 db_library_id = await self.storage.save_library(library) 

260 

261 # Index default branch 

262 await self._index_version( 

263 provider, 

264 proj, 

265 db_library_id, 

266 default_branch, 

267 config 

268 ) 

269 

270 # Index tags 

271 tags = await provider.get_tags(proj, limit=5) 

272 for tag in tags: 

273 await self._index_version( 

274 provider, 

275 proj, 

276 db_library_id, 

277 tag, 

278 config 

279 ) 

280 

281 async def index_group( 

282 self, 

283 group_path: str, 

284 include_subgroups: bool = True, 

285 provider_type: Optional[str] = None 

286 ) -> dict: 

287 """ 

288 Index all projects in a group/organization. 

289 

290 Args: 

291 group_path: Group path 

292 include_subgroups: Include nested subgroups (GitLab only) 

293 provider_type: Provider type or None for default 

294 

295 Returns: 

296 Summary of indexing results 

297 """ 

298 provider_type = provider_type or self.default_provider 

299 provider = self.get_provider(provider_type) 

300 

301 projects = await provider.list_projects_in_group( 

302 group_path, 

303 include_subgroups 

304 ) 

305 

306 results = { 

307 "total": len(projects), 

308 "indexed": [], 

309 "failed": [] 

310 } 

311 

312 for proj in projects: 

313 # Parse path to extract group and project 

314 parts = proj.path.split("/") 

315 if len(parts) < 2: 

316 continue 

317 

318 project_name = parts[-1] 

319 group_name = "/".join(parts[:-1]) 

320 

321 try: 

322 await self.index_repository( 

323 group_name, 

324 project_name, 

325 provider_type 

326 ) 

327 results["indexed"].append(proj.path) 

328 except Exception as e: 

329 results["failed"].append({ 

330 "path": proj.path, 

331 "error": str(e) 

332 }) 

333 

334 return results 

335 

336 async def _index_version( 

337 self, 

338 provider: GitProvider, 

339 project: ProviderProject, 

340 library_id: int, 

341 ref: str, 

342 config: Optional[dict] 

343 ): 

344 """ 

345 Index a specific version/branch/tag. 

346 

347 Args: 

348 provider: Provider instance 

349 project: Project metadata 

350 library_id: Database library ID 

351 ref: Branch, tag, or commit SHA 

352 config: Optional repo-ctx configuration 

353 """ 

354 # For commit SHA, we need to get it from the ref 

355 # For now, use ref as commit SHA (works for GitLab) 

356 # TODO: Get actual commit SHA for the ref 

357 commit_sha = ref # Simplified for now 

358 

359 # Save version 

360 version = Version( 

361 library_id=library_id, 

362 version_tag=ref, 

363 commit_sha=commit_sha 

364 ) 

365 version_id = await self.storage.save_version(version) 

366 

367 # Get file tree 

368 file_paths = await provider.get_file_tree(project, ref, recursive=True) 

369 

370 # Filter and process files 

371 for path in file_paths: 

372 if not self.parser.should_include_file(path, config): 

373 continue 

374 

375 # Read file content 

376 try: 

377 file = await provider.read_file(project, path, ref) 

378 parsed_content = self.parser.parse_markdown(file.content) 

379 tokens = self.parser.count_tokens(parsed_content) 

380 

381 # Save document 

382 doc = Document( 

383 version_id=version_id, 

384 file_path=path, 

385 content=parsed_content, 

386 tokens=tokens 

387 ) 

388 await self.storage.save_document(doc) 

389 except Exception as e: 

390 # Skip files that can't be read 

391 print(f"Warning: Could not read {path}: {e}") 

392 continue 

393 

394 

395# Backward compatibility alias 

396GitLabContext = RepositoryContext