Coverage for repo_ctx / core.py: 0%
137 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 17:42 +0100
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 17:42 +0100
1"""Core business logic."""
2from typing import Optional, Dict
3from .config import Config
4from .storage import Storage
5from .parser import Parser
6from .models import Library, Version, Document, SearchResult
7from .providers import (
8 GitProvider,
9 ProviderFactory,
10 ProviderDetector,
11 ProviderProject,
12 ProviderNotFoundError
13)
16class RepositoryContext:
17 """
18 Core repository indexing and search functionality.
20 Supports multiple repository providers (GitLab, GitHub, local, etc.)
21 """
23 def __init__(self, config: Config):
24 """
25 Initialize repository context with providers.
27 Args:
28 config: Configuration object with provider settings
29 """
30 self.config = config
31 self.storage = Storage(config.storage_path)
32 self.parser = Parser()
34 # Initialize providers based on config
35 self.providers: Dict[str, GitProvider] = {}
36 self._init_providers()
38 # Default provider (for backward compatibility)
39 self.default_provider = "gitlab"
41 def _init_providers(self):
42 """Initialize configured providers."""
43 # Initialize GitLab provider if configured
44 if self.config.gitlab_url and self.config.gitlab_token:
45 try:
46 self.providers["gitlab"] = ProviderFactory.create_gitlab(
47 url=self.config.gitlab_url,
48 token=self.config.gitlab_token
49 )
50 except Exception:
51 # GitLab provider initialization failed, continue without it
52 pass
54 # Initialize GitHub provider if configured
55 if self.config.github_url or self.config.github_token:
56 try:
57 github_url = self.config.github_url or "https://api.github.com"
58 self.providers["github"] = ProviderFactory.create_github(
59 url=github_url,
60 token=self.config.github_token
61 )
62 except Exception:
63 # GitHub provider initialization failed, continue without it
64 pass
66 # Local provider is always available (no config needed)
67 # It will be instantiated on-demand when indexing a specific path
68 self.local_provider_available = True
70 # Set default provider based on what's available
71 if "gitlab" in self.providers:
72 self.default_provider = "gitlab"
73 elif "github" in self.providers:
74 self.default_provider = "github"
75 else:
76 # If no remote providers configured, default to local
77 self.default_provider = "local"
79 def get_provider(self, provider_type: Optional[str] = None, repo_path: Optional[str] = None) -> GitProvider:
80 """
81 Get provider instance.
83 Args:
84 provider_type: Provider type, or None to use default
85 repo_path: Repository path (required for local provider)
87 Returns:
88 Provider instance
90 Raises:
91 ValueError: Provider not configured
92 """
93 provider_type = provider_type or self.default_provider
95 # Special handling for local provider (created on-demand)
96 if provider_type == "local":
97 if not repo_path:
98 raise ValueError("Local provider requires repo_path parameter")
99 from .providers.local import LocalGitProvider
100 return LocalGitProvider(repo_path)
102 if provider_type not in self.providers:
103 raise ValueError(
104 f"Provider '{provider_type}' not configured. "
105 f"Available: {list(self.providers.keys()) + ['local']}"
106 )
108 return self.providers[provider_type]
110 async def init(self):
111 """Initialize storage."""
112 await self.storage.init_db()
114 async def search_libraries(self, query: str) -> list[SearchResult]:
115 """Search for libraries by name."""
116 results = await self.storage.search(query)
117 # Simple ranking: exact matches first
118 for result in results:
119 if query.lower() in result.name.lower():
120 result.score = 2.0
121 if query.lower() == result.name.lower():
122 result.score = 3.0
123 results.sort(key=lambda x: x.score, reverse=True)
124 return results
126 async def fuzzy_search_libraries(self, query: str, limit: int = 10) -> list:
127 """Fuzzy search for libraries."""
128 return await self.storage.fuzzy_search(query, limit)
130 async def get_documentation(
131 self,
132 library_id: str,
133 topic: Optional[str] = None,
134 page: int = 1
135 ) -> dict:
136 """
137 Get documentation for a library.
139 Args:
140 library_id: Library identifier (format: /group/project or /group/project/version)
141 topic: Optional topic filter
142 page: Page number for pagination
144 Returns:
145 Documentation content and metadata
146 """
147 # Parse library_id: /group/project or /group/subgroup/project or /group/project/version
148 # Or URI format: gitlab://group/project, github://owner/repo, local:///path
150 # Handle URI format
151 if "://" in library_id:
152 provider_type, path = ProviderDetector.from_library_id(library_id)
153 parts = path.strip("/").split("/")
154 else:
155 # Legacy format: /group/project
156 parts = library_id.strip("/").split("/")
158 if len(parts) < 2:
159 raise ValueError(f"Invalid library_id: {library_id}")
161 # Check if last part is a version (exists in versions table)
162 # For now, assume last part is project, second-to-last might be version
163 # Simple heuristic: if we have more than 2 parts, last could be version
164 project = parts[-1]
165 group = "/".join(parts[:-1])
166 version = None
168 # Try to get library with full path first
169 library = await self.storage.get_library(group, project)
171 # If not found and we have 3+ parts, try treating last as version
172 if not library and len(parts) >= 3:
173 version = parts[-1]
174 project = parts[-2]
175 group = "/".join(parts[:-2])
176 library = await self.storage.get_library(group, project)
178 if not library:
179 raise ValueError(f"Library not found: {group}/{project}")
181 # Use default version if not specified
182 if not version:
183 version = library.default_version
185 # Get version_id
186 version_id = await self.storage.get_version_id(library.id, version)
187 if not version_id:
188 raise ValueError(f"Version not found: {version}")
190 # Get documents
191 documents = await self.storage.get_documents(version_id, topic, page)
193 # Format for LLM
194 content = self.parser.format_for_llm(documents, library_id)
196 return {
197 "content": [{"type": "text", "text": content}],
198 "metadata": {
199 "library": f"{group}/{project}",
200 "version": version,
201 "page": page,
202 "documents_count": len(documents)
203 }
204 }
206 async def index_repository(
207 self,
208 group: str,
209 project: str,
210 provider_type: Optional[str] = None
211 ):
212 """
213 Index a repository from any provider.
215 Args:
216 group: Group/organization path (or full path for local repos)
217 project: Project/repository name (or empty for local repos)
218 provider_type: Provider type (gitlab, github, local) or None for auto-detect
220 Raises:
221 ValueError: Provider not configured
222 ProviderNotFoundError: Repository not found
223 """
224 # Auto-detect provider if not specified
225 if provider_type is None:
226 path = f"{group}/{project}" if project else group
227 provider_type = ProviderDetector.detect(path, default=self.default_provider)
229 # For local provider, group contains the full path
230 if provider_type == "local":
231 repo_path = f"{group}/{project}" if project else group
232 provider = self.get_provider(provider_type, repo_path=repo_path)
233 project_path = repo_path
234 else:
235 provider = self.get_provider(provider_type)
236 project_path = f"{group}/{project}"
238 # Get project via provider interface
239 proj = await provider.get_project(project_path)
241 # Get default branch
242 default_branch = await provider.get_default_branch(proj)
244 # Read config file if exists
245 config = await provider.read_config(proj, default_branch)
247 # Get project description
248 description = config.get("description", proj.description) if config else proj.description
250 # Save library with provider URI format
251 library_id_uri = ProviderDetector.to_library_id(project_path, provider_type)
253 library = Library(
254 group_name=group,
255 project_name=project,
256 description=description or "",
257 default_version=default_branch
258 )
259 db_library_id = await self.storage.save_library(library)
261 # Index default branch
262 await self._index_version(
263 provider,
264 proj,
265 db_library_id,
266 default_branch,
267 config
268 )
270 # Index tags
271 tags = await provider.get_tags(proj, limit=5)
272 for tag in tags:
273 await self._index_version(
274 provider,
275 proj,
276 db_library_id,
277 tag,
278 config
279 )
281 async def index_group(
282 self,
283 group_path: str,
284 include_subgroups: bool = True,
285 provider_type: Optional[str] = None
286 ) -> dict:
287 """
288 Index all projects in a group/organization.
290 Args:
291 group_path: Group path
292 include_subgroups: Include nested subgroups (GitLab only)
293 provider_type: Provider type or None for default
295 Returns:
296 Summary of indexing results
297 """
298 provider_type = provider_type or self.default_provider
299 provider = self.get_provider(provider_type)
301 projects = await provider.list_projects_in_group(
302 group_path,
303 include_subgroups
304 )
306 results = {
307 "total": len(projects),
308 "indexed": [],
309 "failed": []
310 }
312 for proj in projects:
313 # Parse path to extract group and project
314 parts = proj.path.split("/")
315 if len(parts) < 2:
316 continue
318 project_name = parts[-1]
319 group_name = "/".join(parts[:-1])
321 try:
322 await self.index_repository(
323 group_name,
324 project_name,
325 provider_type
326 )
327 results["indexed"].append(proj.path)
328 except Exception as e:
329 results["failed"].append({
330 "path": proj.path,
331 "error": str(e)
332 })
334 return results
336 async def _index_version(
337 self,
338 provider: GitProvider,
339 project: ProviderProject,
340 library_id: int,
341 ref: str,
342 config: Optional[dict]
343 ):
344 """
345 Index a specific version/branch/tag.
347 Args:
348 provider: Provider instance
349 project: Project metadata
350 library_id: Database library ID
351 ref: Branch, tag, or commit SHA
352 config: Optional repo-ctx configuration
353 """
354 # For commit SHA, we need to get it from the ref
355 # For now, use ref as commit SHA (works for GitLab)
356 # TODO: Get actual commit SHA for the ref
357 commit_sha = ref # Simplified for now
359 # Save version
360 version = Version(
361 library_id=library_id,
362 version_tag=ref,
363 commit_sha=commit_sha
364 )
365 version_id = await self.storage.save_version(version)
367 # Get file tree
368 file_paths = await provider.get_file_tree(project, ref, recursive=True)
370 # Filter and process files
371 for path in file_paths:
372 if not self.parser.should_include_file(path, config):
373 continue
375 # Read file content
376 try:
377 file = await provider.read_file(project, path, ref)
378 parsed_content = self.parser.parse_markdown(file.content)
379 tokens = self.parser.count_tokens(parsed_content)
381 # Save document
382 doc = Document(
383 version_id=version_id,
384 file_path=path,
385 content=parsed_content,
386 tokens=tokens
387 )
388 await self.storage.save_document(doc)
389 except Exception as e:
390 # Skip files that can't be read
391 print(f"Warning: Could not read {path}: {e}")
392 continue
395# Backward compatibility alias
396GitLabContext = RepositoryContext