Coverage for frappe_manager / site_manager / modules / app_cloner.py: 10%

172 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-07-02 18:13 +0530

1""" 

2AppCloner - Parallel Git Cloning Module 

3 

4This module handles parallel cloning of Frappe apps with multi-auth fallback. 

5Runs on the host machine (not in container) to access SSH keys and avoid Docker overhead. 

6 

7Key Features: 

8- Parallel cloning using ThreadPoolExecutor (2-3x faster) 

9- Smart auth prioritization: Token first when provided, HTTPS fallback 

10- Support for subdirectory apps (monorepos) 

11- Shallow clones for speed (--depth 1) 

12- Repository validation before cloning 

13""" 

14 

15import os 

16from concurrent.futures import ThreadPoolExecutor, as_completed 

17from pathlib import Path 

18 

19from git import GitCommandError, Repo # type: ignore 

20 

21from frappe_manager.logger.contextual import ContextualLogger 

22from frappe_manager.output_manager import OutputHandler 

23from frappe_manager.site_manager.bench_config import AppConfig, extract_app_python_module_name 

24 

25 

26class AppClonerError(Exception): 

27 """Custom exception for app cloning errors.""" 

28 

29 

30class AppCloner: 

31 """ 

32 Handles parallel Git cloning with authentication fallback. 

33 

34 Runs on HOST MACHINE (not in container) to: 

35 - Access SSH keys (~/.ssh/) 

36 - Avoid Docker overhead 

37 - Enable parallel operations 

38 

39 Authentication priority: 

40 1. HTTPS (public repos) 

41 2. GitHub token (private repos with --github-token) 

42 3. SSH (private repos with SSH keys configured) 

43 """ 

44 

45 def __init__( 

46 self, 

47 logger: ContextualLogger, 

48 apps_dir: Path, 

49 github_token: str | None = None, 

50 output_handler: OutputHandler | None = None, 

51 ): 

52 """ 

53 Initialize AppCloner. 

54 

55 Args: 

56 logger: Contextual logger for audit/debug logging 

57 apps_dir: Path to bench apps directory (e.g., /benches/mybench/workspace/frappe-bench/apps) 

58 github_token: Optional GitHub personal access token for private repos 

59 output_handler: Optional output handler for progress updates 

60 """ 

61 self.logger = logger.child(component="app_cloner") 

62 self.apps_dir = Path(apps_dir) 

63 self.github_token = github_token 

64 self.output = output_handler 

65 

66 self.apps_dir.mkdir(parents=True, exist_ok=True) 

67 

68 def clone_apps_parallel(self, apps: list[AppConfig], max_workers: int = 5) -> dict[str, Path]: 

69 """ 

70 Clone multiple apps in parallel. 

71 

72 Optimizes for monorepos: If multiple apps come from the same repo+ref with 

73 subdirectories, the monorepo is cloned once and shared. 

74 

75 Args: 

76 apps: List of AppConfig objects to clone 

77 max_workers: Maximum number of parallel workers (default: 5) 

78 

79 Returns: 

80 Dict mapping app_name to clone_path 

81 

82 Raises: 

83 AppClonerError: If any clone operation fails 

84 """ 

85 if not apps: 

86 return {} 

87 

88 self.logger.info(f"Starting parallel clone of {len(apps)} apps") 

89 

90 # Group apps by monorepo (same repo+ref with subdirs) 

91 monorepo_groups = {} 

92 standalone_apps = [] 

93 

94 for app in apps: 

95 if app.subdir_path: 

96 # This is a monorepo app - group by repo+ref 

97 key = f"{app.repo}:{app.ref or 'default'}" 

98 if key not in monorepo_groups: 

99 monorepo_groups[key] = [] 

100 monorepo_groups[key].append(app) 

101 else: 

102 # Standalone app 

103 standalone_apps.append(app) 

104 

105 cloned_apps = {} 

106 failed_apps = [] 

107 

108 # Clone standalone apps in parallel 

109 if standalone_apps: 

110 with ThreadPoolExecutor(max_workers=max_workers) as executor: 

111 future_to_app = {executor.submit(self._clone_app, app): app for app in standalone_apps} 

112 

113 for future in as_completed(future_to_app): 

114 app = future_to_app[future] 

115 try: 

116 app_name, clone_path = future.result() 

117 cloned_apps[app_name] = clone_path 

118 if self.output: 

119 self.output.print(f"Cloned {app_name}", emoji_code=":white_check_mark:") 

120 self.logger.info(f"Successfully cloned {app_name} to {clone_path}") 

121 except Exception as e: 

122 failed_apps.append((app.name, str(e))) 

123 self.logger.error(f"Failed to clone {app.name}: {e}") 

124 

125 # Handle monorepo groups (sequential per group, but parallel within group extraction) 

126 for repo_key, group_apps in monorepo_groups.items(): 

127 try: 

128 self.logger.info(f"Processing monorepo {repo_key} with {len(group_apps)} apps") 

129 monorepo_results = self._clone_monorepo_apps(group_apps) 

130 cloned_apps.update(monorepo_results) 

131 except Exception as e: 

132 for app in group_apps: 

133 failed_apps.append((app.name, str(e))) 

134 self.logger.error(f"Failed to clone {app.name} from monorepo: {e}") 

135 

136 # Raise exception if any clones failed 

137 if failed_apps: 

138 error_msg = "Failed to clone apps:\n" + "\n".join(f" - {name}: {error}" for name, error in failed_apps) 

139 raise AppClonerError(error_msg) 

140 

141 return cloned_apps 

142 

143 def _clone_monorepo_apps(self, apps: list[AppConfig]) -> dict[str, Path]: 

144 """ 

145 Clone multiple apps from the same monorepo efficiently. 

146 

147 Strategy: 

148 1. Clone the monorepo once to a temporary location 

149 2. Extract each subdirectory to its respective app directory 

150 3. Clean up the shared monorepo clone 

151 

152 Args: 

153 apps: List of AppConfig objects from the same repo 

154 

155 Returns: 

156 Dict mapping app_name to clone_path 

157 """ 

158 import shutil 

159 

160 if not apps: 

161 return {} 

162 

163 # Use the first app's config for cloning (they all share repo+ref) 

164 first_app = apps[0] 

165 repo_name = first_app.repo.replace("/", "_") 

166 

167 # Clone monorepo to a temporary shared location 

168 shared_clone_path = self.apps_dir / f".tmp_monorepo_{repo_name}" 

169 

170 if shared_clone_path.exists(): 

171 shutil.rmtree(shared_clone_path) 

172 

173 self.logger.info(f"Cloning shared monorepo {first_app.repo} to {shared_clone_path}") 

174 

175 # Get auth methods and clone 

176 auth_methods = self._get_auth_methods(first_app) 

177 cloned = False 

178 

179 for method_name, repo_url in auth_methods: 

180 try: 

181 self.logger.debug(f"Trying {method_name} for monorepo {first_app.repo}") 

182 self._git_clone(repo_url, shared_clone_path, first_app) 

183 self.logger.info(f"Successfully cloned monorepo using {method_name}") 

184 cloned = True 

185 break 

186 except Exception as e: 

187 self.logger.debug(f"{method_name} failed for monorepo: {e}") 

188 if shared_clone_path.exists(): 

189 shutil.rmtree(shared_clone_path) 

190 continue 

191 

192 if not cloned: 

193 raise Exception(f"Failed to clone monorepo {first_app.repo}") 

194 

195 # Extract each app's subdirectory 

196 result = {} 

197 

198 for app in apps: 

199 try: 

200 # First, use the subdirectory name as temporary location 

201 temp_app_path = self.apps_dir / app.name 

202 

203 if temp_app_path.exists(): 

204 self.logger.info(f"App {app.name} already exists, skipping") 

205 result[app.name] = temp_app_path 

206 continue 

207 

208 subdir_path = shared_clone_path / (app.subdir_path or "") 

209 

210 if not subdir_path.exists(): 

211 raise Exception( 

212 f"Subdirectory '{app.subdir_path}' not found in monorepo. " 

213 f"Available: {[d.name for d in shared_clone_path.iterdir() if d.is_dir() and not d.name.startswith('.')]}", 

214 ) 

215 

216 # Copy subdirectory to temporary location 

217 self.logger.info(f"Extracting {app.name} from {app.subdir_path}") 

218 shutil.copytree(subdir_path, temp_app_path, symlinks=True) 

219 

220 # Extract actual Python module name from pyproject.toml or hooks.py 

221 actual_app_name = extract_app_python_module_name(temp_app_path) 

222 

223 # If the actual app name differs from directory name, rename 

224 if actual_app_name != app.name: 

225 final_app_path = self.apps_dir / actual_app_name 

226 if final_app_path.exists(): 

227 # Target already exists - should not happen, but handle it 

228 self.logger.warning( 

229 f"App directory {actual_app_name} already exists. Using subdirectory name instead.", 

230 ) 

231 actual_app_name = app.name 

232 final_app_path = temp_app_path 

233 else: 

234 self.logger.info( 

235 f"Renaming app directory from '{app.name}' to '{actual_app_name}' (Python module name)", 

236 ) 

237 shutil.move(str(temp_app_path), str(final_app_path)) 

238 else: 

239 final_app_path = temp_app_path 

240 

241 # Update the AppConfig with the correct name 

242 app.name = actual_app_name 

243 result[actual_app_name] = final_app_path 

244 

245 if self.output: 

246 self.output.print(f"Extracted {actual_app_name}", emoji_code=":white_check_mark:") 

247 

248 except Exception as e: 

249 self.logger.error(f"Failed to extract {app.name}: {e}") 

250 raise 

251 

252 # Clean up shared monorepo 

253 if shared_clone_path.exists(): 

254 self.logger.debug(f"Cleaning up shared monorepo at {shared_clone_path}") 

255 shutil.rmtree(shared_clone_path) 

256 

257 return result 

258 

259 def _clone_app(self, app: AppConfig) -> tuple[str, Path]: 

260 """ 

261 Clone a single standalone app (non-subdirectory) with authentication fallback. 

262 

263 NOTE: This method should ONLY be called for standalone apps (app.subdir_path is None). 

264 Subdirectory apps are handled by _clone_monorepo_apps() for efficiency. 

265 

266 If app.repo_url is set (by validate_repos_exist), it will be used directly 

267 without trying other authentication methods. This avoids redundant auth attempts. 

268 

269 Args: 

270 app: AppConfig object with repo details (must NOT have subdir_path) 

271 

272 Returns: 

273 Tuple of (app_name, clone_path) 

274 

275 Raises: 

276 Exception: If all authentication methods fail 

277 """ 

278 if app.subdir_path: 

279 raise ValueError( 

280 f"_clone_app() called with subdirectory app {app.name}. " 

281 "Subdirectory apps must be handled by _clone_monorepo_apps().", 

282 ) 

283 

284 clone_path = self.apps_dir / app.name 

285 

286 # Skip if already cloned 

287 if clone_path.exists(): 

288 self.logger.info(f"App {app.name} already exists at {clone_path}, skipping") 

289 return (app.name, clone_path) 

290 

291 # Get authentication methods to try 

292 # If app.repo_url is set (by validation), it will be tried first 

293 # Otherwise, tries HTTPS → Token → SSH in order 

294 auth_methods = self._get_auth_methods(app) 

295 last_error = None 

296 

297 for method_name, repo_url in auth_methods: 

298 try: 

299 self.logger.debug(f"Trying {method_name} for {app.name}: {repo_url}") 

300 self._git_clone(repo_url, clone_path, app) 

301 self.logger.info(f"Successfully cloned {app.name} using {method_name}") 

302 

303 # Detect actual Python module name from pyproject.toml or hooks.py 

304 actual_app_name = extract_app_python_module_name(clone_path) 

305 

306 # Rename directory if module name differs from initial name 

307 if actual_app_name != app.name: 

308 final_path = self.apps_dir / actual_app_name 

309 if final_path.exists(): 

310 # Target already exists - should not happen, but handle it 

311 self.logger.warning(f"App directory {actual_app_name} already exists. Using repo name instead.") 

312 actual_app_name = app.name 

313 final_path = clone_path 

314 else: 

315 self.logger.info( 

316 f"Renaming app directory from '{app.name}' to '{actual_app_name}' (Python module name)", 

317 ) 

318 import shutil 

319 

320 shutil.move(str(clone_path), str(final_path)) 

321 clone_path = final_path 

322 

323 # Update the AppConfig with correct name 

324 app.name = actual_app_name 

325 

326 return (actual_app_name, clone_path) 

327 

328 except (GitCommandError, Exception) as e: 

329 last_error = e 

330 self.logger.debug(f"{method_name} failed for {app.name}: {e}") 

331 # Clean up failed clone attempt 

332 if clone_path.exists(): 

333 import shutil 

334 

335 shutil.rmtree(clone_path) 

336 continue 

337 

338 # All methods failed 

339 raise Exception( 

340 f"Failed to clone {app.name} from {app.repo}. Tried all authentication methods. Last error: {last_error}", 

341 ) 

342 

343 def _get_auth_methods(self, app: AppConfig) -> list[tuple[str, str]]: 

344 """ 

345 Get list of authentication methods to try in order. 

346 

347 Delegates to AppConfig.get_auth_methods() for consistency with validation. 

348 Priority order: 

349 - With token: Token → HTTPS → SSH 

350 - Without token: HTTPS → SSH 

351 

352 Returns: 

353 List of (method_name, repo_url) tuples 

354 """ 

355 return app.get_auth_methods(github_token=self.github_token) 

356 

357 def _git_clone(self, repo_url: str, clone_path: Path, app: AppConfig) -> None: 

358 """ 

359 Execute git clone operation. 

360 

361 Args: 

362 repo_url: Git repository URL 

363 clone_path: Destination path for clone 

364 app: AppConfig object with clone options 

365 """ 

366 clone_kwargs = { 

367 "branch": app.ref if app.ref and not app.is_commit else None, 

368 "depth": 1 if app.shallow_clone and not app.is_commit else None, 

369 } 

370 

371 env = os.environ.copy() 

372 env["GIT_TERMINAL_PROMPT"] = "0" 

373 env["GIT_ASKPASS"] = "echo" 

374 clone_kwargs["env"] = env 

375 

376 clone_kwargs = {k: v for k, v in clone_kwargs.items() if v is not None} 

377 

378 clone_type = "shallow" if clone_kwargs.get("depth") == 1 else "full" 

379 ref_info = f" (ref: {app.ref})" if app.ref else "" 

380 self.logger.debug(f"Cloning {app.name} from {repo_url}{ref_info} [{clone_type} clone]") 

381 

382 repo = Repo.clone_from(repo_url, clone_path, **clone_kwargs) 

383 

384 if app.is_commit: 

385 self.logger.debug(f"Checking out commit {app.ref} for {app.name}") 

386 repo.git.checkout(app.ref) 

387 

388 @staticmethod 

389 def validate_repos_exist(apps: list[AppConfig], github_token: str | None = None) -> tuple[bool, list[str]]: 

390 """ 

391 Validate that all app repositories exist before attempting to clone. 

392 

393 DEPRECATED: This method now delegates to AppConfig.validate_repos_batch(). 

394 New code should call AppConfig.validate_repos_batch() directly. 

395 

396 Args: 

397 apps: List of AppConfig objects to validate (modified in-place) 

398 github_token: Optional GitHub token for private repos 

399 

400 Returns: 

401 Tuple of (all_valid: bool, messages: List[str]) 

402 Messages include both success (✓) and error (❌) messages with auth method details 

403 """ 

404 result = AppConfig.validate_repos_batch(apps, github_token) 

405 return (result.all_valid, result.messages)