Coverage for repo_ctx / providers / github.py: 11%
134 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 17:42 +0100
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-25 17:42 +0100
1"""GitHub provider implementation."""
2import base64
3import json
4from typing import Optional, List
5from github import Github, GithubException, UnknownObjectException, BadCredentialsException
6from .base import GitProvider, ProviderProject, ProviderFile
7from .exceptions import (
8 ProviderNotFoundError,
9 ProviderAuthError,
10 ProviderFileNotFoundError,
11 ProviderError,
12 ProviderRateLimitError
13)
16class GitHubProvider(GitProvider):
17 """GitHub repository provider using PyGithub."""
19 def __init__(self, url: str = "https://api.github.com", token: Optional[str] = None):
20 """
21 Initialize GitHub provider.
23 Args:
24 url: GitHub API URL (default: public GitHub, or GitHub Enterprise URL)
25 token: Personal access token (optional for public repos)
27 Raises:
28 ProviderAuthError: Authentication failed
29 """
30 self.url = url
31 self.token = token
33 try:
34 if url == "https://api.github.com":
35 # Public GitHub
36 self.client = Github(token) if token else Github()
37 else:
38 # GitHub Enterprise
39 base_url = url.replace("/api/v3", "").rstrip("/")
40 self.client = Github(base_url=base_url, login_or_token=token)
42 # Verify authentication
43 if token:
44 try:
45 self.client.get_user().login
46 except BadCredentialsException as e:
47 raise ProviderAuthError(f"GitHub authentication failed: {e}")
49 except Exception as e:
50 if isinstance(e, ProviderAuthError):
51 raise
52 raise ProviderError(f"Failed to initialize GitHub client: {e}")
54 async def get_project(self, path: str) -> ProviderProject:
55 """
56 Get project metadata from GitHub.
58 Args:
59 path: Repository path (format: owner/repo)
61 Returns:
62 ProviderProject with normalized metadata
64 Raises:
65 ProviderNotFoundError: Repository doesn't exist
66 ProviderAuthError: Authentication failed
67 """
68 try:
69 repo = self.client.get_repo(path)
71 return ProviderProject(
72 id=str(repo.id),
73 name=repo.name,
74 path=repo.full_name,
75 description=repo.description,
76 default_branch=repo.default_branch,
77 web_url=repo.html_url
78 )
80 except UnknownObjectException:
81 raise ProviderNotFoundError(f"GitHub repository not found: {path}")
82 except BadCredentialsException:
83 raise ProviderAuthError(f"Authentication failed for repository: {path}")
84 except GithubException as e:
85 if e.status == 404:
86 raise ProviderNotFoundError(f"GitHub repository not found: {path}")
87 elif e.status == 401:
88 raise ProviderAuthError(f"Authentication failed for repository: {path}")
89 elif e.status == 403:
90 # Could be rate limit or permissions
91 if "rate limit" in str(e).lower():
92 raise ProviderRateLimitError(f"GitHub rate limit exceeded")
93 raise ProviderAuthError(f"Access denied to repository: {path}")
94 else:
95 raise ProviderError(f"Error getting GitHub repository {path}: {e}")
96 except Exception as e:
97 raise ProviderError(f"Unexpected error getting repository {path}: {e}")
99 async def get_default_branch(self, project: ProviderProject) -> str:
100 """
101 Get default branch name.
103 Args:
104 project: Project to query
106 Returns:
107 Default branch name (e.g., "main", "master")
108 """
109 # Already in ProviderProject
110 if project.default_branch:
111 return project.default_branch
113 # Fallback: fetch from API
114 try:
115 repo = self.client.get_repo(project.path)
116 return repo.default_branch
117 except Exception as e:
118 raise ProviderError(f"Error getting default branch: {e}")
120 async def get_file_tree(
121 self,
122 project: ProviderProject,
123 ref: str,
124 recursive: bool = True
125 ) -> List[str]:
126 """
127 Get list of all file paths in repository.
129 Args:
130 project: Project to query
131 ref: Branch, tag, or commit SHA
132 recursive: Include subdirectories
134 Returns:
135 List of file paths relative to repo root
137 Raises:
138 ProviderError: Error accessing file tree
139 """
140 try:
141 repo = self.client.get_repo(project.path)
143 # Get git tree
144 tree = repo.get_git_tree(ref, recursive=recursive)
146 # Extract file paths (blobs only, not trees)
147 file_paths = []
148 for item in tree.tree:
149 if item.type == "blob": # Files only, not directories
150 file_paths.append(item.path)
152 return file_paths
154 except GithubException as e:
155 if e.status == 404:
156 raise ProviderError(
157 f"Branch/tag '{ref}' not found in repository {project.path}"
158 )
159 elif e.status == 409:
160 # Empty repository
161 return []
162 else:
163 raise ProviderError(f"Error getting file tree: {e}")
164 except Exception as e:
165 raise ProviderError(f"Unexpected error getting file tree: {e}")
167 async def read_file(
168 self,
169 project: ProviderProject,
170 path: str,
171 ref: str
172 ) -> ProviderFile:
173 """
174 Read file contents from GitHub.
176 Args:
177 project: Project to query
178 path: File path relative to repo root
179 ref: Branch, tag, or commit SHA
181 Returns:
182 ProviderFile with content and metadata
184 Raises:
185 ProviderFileNotFoundError: File doesn't exist at ref
186 """
187 try:
188 repo = self.client.get_repo(project.path)
189 file_content = repo.get_contents(path, ref=ref)
191 # Handle file content (could be list if path is directory)
192 if isinstance(file_content, list):
193 raise ProviderError(f"Path '{path}' is a directory, not a file")
195 # Decode content
196 content = file_content.decoded_content.decode('utf-8')
198 return ProviderFile(
199 path=path,
200 content=content,
201 size=file_content.size
202 )
204 except UnknownObjectException:
205 raise ProviderFileNotFoundError(
206 f"File '{path}' not found in {project.path} at ref '{ref}'"
207 )
208 except GithubException as e:
209 if e.status == 404:
210 raise ProviderFileNotFoundError(
211 f"File '{path}' not found in {project.path} at ref '{ref}'"
212 )
213 else:
214 raise ProviderError(f"Error reading file {path}: {e}")
215 except UnicodeDecodeError:
216 raise ProviderError(
217 f"File '{path}' is not valid UTF-8 (binary file?)"
218 )
219 except Exception as e:
220 raise ProviderError(f"Unexpected error reading file {path}: {e}")
222 async def read_config(
223 self,
224 project: ProviderProject,
225 ref: str
226 ) -> Optional[dict]:
227 """
228 Read repo-ctx configuration file if it exists.
230 Searches for configuration files in this order:
231 1. git_context.json (current name)
232 2. .git_context.json
233 3. repo_context.json
234 4. .repo-ctx.json
235 5. .github/repo-ctx.json (GitHub-specific)
237 Args:
238 project: Project to query
239 ref: Branch, tag, or commit SHA
241 Returns:
242 Parsed JSON config or None if not found
243 """
244 config_filenames = [
245 "git_context.json",
246 ".git_context.json",
247 "repo_context.json",
248 ".repo-ctx.json",
249 ".github/repo-ctx.json" # GitHub-specific location
250 ]
252 for filename in config_filenames:
253 try:
254 file = await self.read_file(project, filename, ref)
255 return json.loads(file.content)
256 except ProviderFileNotFoundError:
257 # Try next filename
258 continue
259 except json.JSONDecodeError as e:
260 raise ProviderError(
261 f"Invalid JSON in config file {filename}: {e}"
262 )
263 except Exception:
264 # Try next filename
265 continue
267 # No config file found
268 return None
270 async def get_tags(
271 self,
272 project: ProviderProject,
273 limit: int = 5
274 ) -> List[str]:
275 """
276 Get repository tags (most recent first).
278 Args:
279 project: Project to query
280 limit: Maximum number of tags to return
282 Returns:
283 List of tag names
285 Raises:
286 ProviderError: Error accessing tags
287 """
288 try:
289 repo = self.client.get_repo(project.path)
290 tags = repo.get_tags()
292 # Get first N tags (already sorted by date descending)
293 tag_names = [tag.name for tag in tags[:limit]]
295 return tag_names
297 except GithubException as e:
298 raise ProviderError(f"Error getting tags: {e}")
299 except Exception as e:
300 raise ProviderError(f"Unexpected error getting tags: {e}")
302 async def list_projects_in_group(
303 self,
304 group_path: str,
305 include_subgroups: bool = True
306 ) -> List[ProviderProject]:
307 """
308 List all projects in a GitHub organization.
310 Args:
311 group_path: Organization name (e.g., "fastapi")
312 include_subgroups: Ignored for GitHub (no nested orgs)
314 Returns:
315 List of public repositories in the organization
317 Raises:
318 ProviderNotFoundError: Organization not found
319 ProviderError: Error accessing organization
321 Note:
322 GitHub doesn't have nested organizations like GitLab,
323 so include_subgroups parameter is ignored.
324 """
325 try:
326 org = self.client.get_organization(group_path)
327 repos = org.get_repos()
329 result = []
330 for repo in repos:
331 result.append(ProviderProject(
332 id=str(repo.id),
333 name=repo.name,
334 path=repo.full_name,
335 description=repo.description,
336 default_branch=repo.default_branch,
337 web_url=repo.html_url
338 ))
340 return result
342 except UnknownObjectException:
343 raise ProviderNotFoundError(f"GitHub organization not found: {group_path}")
344 except BadCredentialsException:
345 raise ProviderAuthError(
346 f"Authentication failed for organization: {group_path}"
347 )
348 except GithubException as e:
349 if e.status == 404:
350 raise ProviderNotFoundError(f"GitHub organization not found: {group_path}")
351 elif e.status == 401:
352 raise ProviderAuthError(
353 f"Authentication failed for organization: {group_path}"
354 )
355 elif e.status == 403:
356 if "rate limit" in str(e).lower():
357 raise ProviderRateLimitError(
358 f"GitHub rate limit exceeded for organization: {group_path}"
359 )
360 else:
361 raise ProviderAuthError(
362 f"Access denied to organization: {group_path}"
363 )
364 else:
365 raise ProviderError(f"Error getting organization {group_path}: {e}")
366 except Exception as e:
367 raise ProviderError(
368 f"Unexpected error listing projects in organization {group_path}: {e}"
369 )