Coverage for repo_ctx / providers / github.py: 11%

134 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-25 17:42 +0100

1"""GitHub provider implementation.""" 

2import base64 

3import json 

4from typing import Optional, List 

5from github import Github, GithubException, UnknownObjectException, BadCredentialsException 

6from .base import GitProvider, ProviderProject, ProviderFile 

7from .exceptions import ( 

8 ProviderNotFoundError, 

9 ProviderAuthError, 

10 ProviderFileNotFoundError, 

11 ProviderError, 

12 ProviderRateLimitError 

13) 

14 

15 

16class GitHubProvider(GitProvider): 

17 """GitHub repository provider using PyGithub.""" 

18 

19 def __init__(self, url: str = "https://api.github.com", token: Optional[str] = None): 

20 """ 

21 Initialize GitHub provider. 

22 

23 Args: 

24 url: GitHub API URL (default: public GitHub, or GitHub Enterprise URL) 

25 token: Personal access token (optional for public repos) 

26 

27 Raises: 

28 ProviderAuthError: Authentication failed 

29 """ 

30 self.url = url 

31 self.token = token 

32 

33 try: 

34 if url == "https://api.github.com": 

35 # Public GitHub 

36 self.client = Github(token) if token else Github() 

37 else: 

38 # GitHub Enterprise 

39 base_url = url.replace("/api/v3", "").rstrip("/") 

40 self.client = Github(base_url=base_url, login_or_token=token) 

41 

42 # Verify authentication 

43 if token: 

44 try: 

45 self.client.get_user().login 

46 except BadCredentialsException as e: 

47 raise ProviderAuthError(f"GitHub authentication failed: {e}") 

48 

49 except Exception as e: 

50 if isinstance(e, ProviderAuthError): 

51 raise 

52 raise ProviderError(f"Failed to initialize GitHub client: {e}") 

53 

54 async def get_project(self, path: str) -> ProviderProject: 

55 """ 

56 Get project metadata from GitHub. 

57 

58 Args: 

59 path: Repository path (format: owner/repo) 

60 

61 Returns: 

62 ProviderProject with normalized metadata 

63 

64 Raises: 

65 ProviderNotFoundError: Repository doesn't exist 

66 ProviderAuthError: Authentication failed 

67 """ 

68 try: 

69 repo = self.client.get_repo(path) 

70 

71 return ProviderProject( 

72 id=str(repo.id), 

73 name=repo.name, 

74 path=repo.full_name, 

75 description=repo.description, 

76 default_branch=repo.default_branch, 

77 web_url=repo.html_url 

78 ) 

79 

80 except UnknownObjectException: 

81 raise ProviderNotFoundError(f"GitHub repository not found: {path}") 

82 except BadCredentialsException: 

83 raise ProviderAuthError(f"Authentication failed for repository: {path}") 

84 except GithubException as e: 

85 if e.status == 404: 

86 raise ProviderNotFoundError(f"GitHub repository not found: {path}") 

87 elif e.status == 401: 

88 raise ProviderAuthError(f"Authentication failed for repository: {path}") 

89 elif e.status == 403: 

90 # Could be rate limit or permissions 

91 if "rate limit" in str(e).lower(): 

92 raise ProviderRateLimitError(f"GitHub rate limit exceeded") 

93 raise ProviderAuthError(f"Access denied to repository: {path}") 

94 else: 

95 raise ProviderError(f"Error getting GitHub repository {path}: {e}") 

96 except Exception as e: 

97 raise ProviderError(f"Unexpected error getting repository {path}: {e}") 

98 

99 async def get_default_branch(self, project: ProviderProject) -> str: 

100 """ 

101 Get default branch name. 

102 

103 Args: 

104 project: Project to query 

105 

106 Returns: 

107 Default branch name (e.g., "main", "master") 

108 """ 

109 # Already in ProviderProject 

110 if project.default_branch: 

111 return project.default_branch 

112 

113 # Fallback: fetch from API 

114 try: 

115 repo = self.client.get_repo(project.path) 

116 return repo.default_branch 

117 except Exception as e: 

118 raise ProviderError(f"Error getting default branch: {e}") 

119 

120 async def get_file_tree( 

121 self, 

122 project: ProviderProject, 

123 ref: str, 

124 recursive: bool = True 

125 ) -> List[str]: 

126 """ 

127 Get list of all file paths in repository. 

128 

129 Args: 

130 project: Project to query 

131 ref: Branch, tag, or commit SHA 

132 recursive: Include subdirectories 

133 

134 Returns: 

135 List of file paths relative to repo root 

136 

137 Raises: 

138 ProviderError: Error accessing file tree 

139 """ 

140 try: 

141 repo = self.client.get_repo(project.path) 

142 

143 # Get git tree 

144 tree = repo.get_git_tree(ref, recursive=recursive) 

145 

146 # Extract file paths (blobs only, not trees) 

147 file_paths = [] 

148 for item in tree.tree: 

149 if item.type == "blob": # Files only, not directories 

150 file_paths.append(item.path) 

151 

152 return file_paths 

153 

154 except GithubException as e: 

155 if e.status == 404: 

156 raise ProviderError( 

157 f"Branch/tag '{ref}' not found in repository {project.path}" 

158 ) 

159 elif e.status == 409: 

160 # Empty repository 

161 return [] 

162 else: 

163 raise ProviderError(f"Error getting file tree: {e}") 

164 except Exception as e: 

165 raise ProviderError(f"Unexpected error getting file tree: {e}") 

166 

167 async def read_file( 

168 self, 

169 project: ProviderProject, 

170 path: str, 

171 ref: str 

172 ) -> ProviderFile: 

173 """ 

174 Read file contents from GitHub. 

175 

176 Args: 

177 project: Project to query 

178 path: File path relative to repo root 

179 ref: Branch, tag, or commit SHA 

180 

181 Returns: 

182 ProviderFile with content and metadata 

183 

184 Raises: 

185 ProviderFileNotFoundError: File doesn't exist at ref 

186 """ 

187 try: 

188 repo = self.client.get_repo(project.path) 

189 file_content = repo.get_contents(path, ref=ref) 

190 

191 # Handle file content (could be list if path is directory) 

192 if isinstance(file_content, list): 

193 raise ProviderError(f"Path '{path}' is a directory, not a file") 

194 

195 # Decode content 

196 content = file_content.decoded_content.decode('utf-8') 

197 

198 return ProviderFile( 

199 path=path, 

200 content=content, 

201 size=file_content.size 

202 ) 

203 

204 except UnknownObjectException: 

205 raise ProviderFileNotFoundError( 

206 f"File '{path}' not found in {project.path} at ref '{ref}'" 

207 ) 

208 except GithubException as e: 

209 if e.status == 404: 

210 raise ProviderFileNotFoundError( 

211 f"File '{path}' not found in {project.path} at ref '{ref}'" 

212 ) 

213 else: 

214 raise ProviderError(f"Error reading file {path}: {e}") 

215 except UnicodeDecodeError: 

216 raise ProviderError( 

217 f"File '{path}' is not valid UTF-8 (binary file?)" 

218 ) 

219 except Exception as e: 

220 raise ProviderError(f"Unexpected error reading file {path}: {e}") 

221 

222 async def read_config( 

223 self, 

224 project: ProviderProject, 

225 ref: str 

226 ) -> Optional[dict]: 

227 """ 

228 Read repo-ctx configuration file if it exists. 

229 

230 Searches for configuration files in this order: 

231 1. git_context.json (current name) 

232 2. .git_context.json 

233 3. repo_context.json 

234 4. .repo-ctx.json 

235 5. .github/repo-ctx.json (GitHub-specific) 

236 

237 Args: 

238 project: Project to query 

239 ref: Branch, tag, or commit SHA 

240 

241 Returns: 

242 Parsed JSON config or None if not found 

243 """ 

244 config_filenames = [ 

245 "git_context.json", 

246 ".git_context.json", 

247 "repo_context.json", 

248 ".repo-ctx.json", 

249 ".github/repo-ctx.json" # GitHub-specific location 

250 ] 

251 

252 for filename in config_filenames: 

253 try: 

254 file = await self.read_file(project, filename, ref) 

255 return json.loads(file.content) 

256 except ProviderFileNotFoundError: 

257 # Try next filename 

258 continue 

259 except json.JSONDecodeError as e: 

260 raise ProviderError( 

261 f"Invalid JSON in config file {filename}: {e}" 

262 ) 

263 except Exception: 

264 # Try next filename 

265 continue 

266 

267 # No config file found 

268 return None 

269 

270 async def get_tags( 

271 self, 

272 project: ProviderProject, 

273 limit: int = 5 

274 ) -> List[str]: 

275 """ 

276 Get repository tags (most recent first). 

277 

278 Args: 

279 project: Project to query 

280 limit: Maximum number of tags to return 

281 

282 Returns: 

283 List of tag names 

284 

285 Raises: 

286 ProviderError: Error accessing tags 

287 """ 

288 try: 

289 repo = self.client.get_repo(project.path) 

290 tags = repo.get_tags() 

291 

292 # Get first N tags (already sorted by date descending) 

293 tag_names = [tag.name for tag in tags[:limit]] 

294 

295 return tag_names 

296 

297 except GithubException as e: 

298 raise ProviderError(f"Error getting tags: {e}") 

299 except Exception as e: 

300 raise ProviderError(f"Unexpected error getting tags: {e}") 

301 

302 async def list_projects_in_group( 

303 self, 

304 group_path: str, 

305 include_subgroups: bool = True 

306 ) -> List[ProviderProject]: 

307 """ 

308 List all projects in a GitHub organization. 

309 

310 Args: 

311 group_path: Organization name (e.g., "fastapi") 

312 include_subgroups: Ignored for GitHub (no nested orgs) 

313 

314 Returns: 

315 List of public repositories in the organization 

316 

317 Raises: 

318 ProviderNotFoundError: Organization not found 

319 ProviderError: Error accessing organization 

320 

321 Note: 

322 GitHub doesn't have nested organizations like GitLab, 

323 so include_subgroups parameter is ignored. 

324 """ 

325 try: 

326 org = self.client.get_organization(group_path) 

327 repos = org.get_repos() 

328 

329 result = [] 

330 for repo in repos: 

331 result.append(ProviderProject( 

332 id=str(repo.id), 

333 name=repo.name, 

334 path=repo.full_name, 

335 description=repo.description, 

336 default_branch=repo.default_branch, 

337 web_url=repo.html_url 

338 )) 

339 

340 return result 

341 

342 except UnknownObjectException: 

343 raise ProviderNotFoundError(f"GitHub organization not found: {group_path}") 

344 except BadCredentialsException: 

345 raise ProviderAuthError( 

346 f"Authentication failed for organization: {group_path}" 

347 ) 

348 except GithubException as e: 

349 if e.status == 404: 

350 raise ProviderNotFoundError(f"GitHub organization not found: {group_path}") 

351 elif e.status == 401: 

352 raise ProviderAuthError( 

353 f"Authentication failed for organization: {group_path}" 

354 ) 

355 elif e.status == 403: 

356 if "rate limit" in str(e).lower(): 

357 raise ProviderRateLimitError( 

358 f"GitHub rate limit exceeded for organization: {group_path}" 

359 ) 

360 else: 

361 raise ProviderAuthError( 

362 f"Access denied to organization: {group_path}" 

363 ) 

364 else: 

365 raise ProviderError(f"Error getting organization {group_path}: {e}") 

366 except Exception as e: 

367 raise ProviderError( 

368 f"Unexpected error listing projects in organization {group_path}: {e}" 

369 )