Coverage for frappe_manager / site_manager / modules / app_cloner.py: 10%
172 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-07-02 18:13 +0530
« prev ^ index » next coverage.py v7.13.5, created at 2026-07-02 18:13 +0530
1"""
2AppCloner - Parallel Git Cloning Module
4This module handles parallel cloning of Frappe apps with multi-auth fallback.
5Runs on the host machine (not in container) to access SSH keys and avoid Docker overhead.
7Key Features:
8- Parallel cloning using ThreadPoolExecutor (2-3x faster)
9- Smart auth prioritization: Token first when provided, HTTPS fallback
10- Support for subdirectory apps (monorepos)
11- Shallow clones for speed (--depth 1)
12- Repository validation before cloning
13"""
15import os
16from concurrent.futures import ThreadPoolExecutor, as_completed
17from pathlib import Path
19from git import GitCommandError, Repo # type: ignore
21from frappe_manager.logger.contextual import ContextualLogger
22from frappe_manager.output_manager import OutputHandler
23from frappe_manager.site_manager.bench_config import AppConfig, extract_app_python_module_name
26class AppClonerError(Exception):
27 """Custom exception for app cloning errors."""
30class AppCloner:
31 """
32 Handles parallel Git cloning with authentication fallback.
34 Runs on HOST MACHINE (not in container) to:
35 - Access SSH keys (~/.ssh/)
36 - Avoid Docker overhead
37 - Enable parallel operations
39 Authentication priority:
40 1. HTTPS (public repos)
41 2. GitHub token (private repos with --github-token)
42 3. SSH (private repos with SSH keys configured)
43 """
45 def __init__(
46 self,
47 logger: ContextualLogger,
48 apps_dir: Path,
49 github_token: str | None = None,
50 output_handler: OutputHandler | None = None,
51 ):
52 """
53 Initialize AppCloner.
55 Args:
56 logger: Contextual logger for audit/debug logging
57 apps_dir: Path to bench apps directory (e.g., /benches/mybench/workspace/frappe-bench/apps)
58 github_token: Optional GitHub personal access token for private repos
59 output_handler: Optional output handler for progress updates
60 """
61 self.logger = logger.child(component="app_cloner")
62 self.apps_dir = Path(apps_dir)
63 self.github_token = github_token
64 self.output = output_handler
66 self.apps_dir.mkdir(parents=True, exist_ok=True)
68 def clone_apps_parallel(self, apps: list[AppConfig], max_workers: int = 5) -> dict[str, Path]:
69 """
70 Clone multiple apps in parallel.
72 Optimizes for monorepos: If multiple apps come from the same repo+ref with
73 subdirectories, the monorepo is cloned once and shared.
75 Args:
76 apps: List of AppConfig objects to clone
77 max_workers: Maximum number of parallel workers (default: 5)
79 Returns:
80 Dict mapping app_name to clone_path
82 Raises:
83 AppClonerError: If any clone operation fails
84 """
85 if not apps:
86 return {}
88 self.logger.info(f"Starting parallel clone of {len(apps)} apps")
90 # Group apps by monorepo (same repo+ref with subdirs)
91 monorepo_groups = {}
92 standalone_apps = []
94 for app in apps:
95 if app.subdir_path:
96 # This is a monorepo app - group by repo+ref
97 key = f"{app.repo}:{app.ref or 'default'}"
98 if key not in monorepo_groups:
99 monorepo_groups[key] = []
100 monorepo_groups[key].append(app)
101 else:
102 # Standalone app
103 standalone_apps.append(app)
105 cloned_apps = {}
106 failed_apps = []
108 # Clone standalone apps in parallel
109 if standalone_apps:
110 with ThreadPoolExecutor(max_workers=max_workers) as executor:
111 future_to_app = {executor.submit(self._clone_app, app): app for app in standalone_apps}
113 for future in as_completed(future_to_app):
114 app = future_to_app[future]
115 try:
116 app_name, clone_path = future.result()
117 cloned_apps[app_name] = clone_path
118 if self.output:
119 self.output.print(f"Cloned {app_name}", emoji_code=":white_check_mark:")
120 self.logger.info(f"Successfully cloned {app_name} to {clone_path}")
121 except Exception as e:
122 failed_apps.append((app.name, str(e)))
123 self.logger.error(f"Failed to clone {app.name}: {e}")
125 # Handle monorepo groups (sequential per group, but parallel within group extraction)
126 for repo_key, group_apps in monorepo_groups.items():
127 try:
128 self.logger.info(f"Processing monorepo {repo_key} with {len(group_apps)} apps")
129 monorepo_results = self._clone_monorepo_apps(group_apps)
130 cloned_apps.update(monorepo_results)
131 except Exception as e:
132 for app in group_apps:
133 failed_apps.append((app.name, str(e)))
134 self.logger.error(f"Failed to clone {app.name} from monorepo: {e}")
136 # Raise exception if any clones failed
137 if failed_apps:
138 error_msg = "Failed to clone apps:\n" + "\n".join(f" - {name}: {error}" for name, error in failed_apps)
139 raise AppClonerError(error_msg)
141 return cloned_apps
143 def _clone_monorepo_apps(self, apps: list[AppConfig]) -> dict[str, Path]:
144 """
145 Clone multiple apps from the same monorepo efficiently.
147 Strategy:
148 1. Clone the monorepo once to a temporary location
149 2. Extract each subdirectory to its respective app directory
150 3. Clean up the shared monorepo clone
152 Args:
153 apps: List of AppConfig objects from the same repo
155 Returns:
156 Dict mapping app_name to clone_path
157 """
158 import shutil
160 if not apps:
161 return {}
163 # Use the first app's config for cloning (they all share repo+ref)
164 first_app = apps[0]
165 repo_name = first_app.repo.replace("/", "_")
167 # Clone monorepo to a temporary shared location
168 shared_clone_path = self.apps_dir / f".tmp_monorepo_{repo_name}"
170 if shared_clone_path.exists():
171 shutil.rmtree(shared_clone_path)
173 self.logger.info(f"Cloning shared monorepo {first_app.repo} to {shared_clone_path}")
175 # Get auth methods and clone
176 auth_methods = self._get_auth_methods(first_app)
177 cloned = False
179 for method_name, repo_url in auth_methods:
180 try:
181 self.logger.debug(f"Trying {method_name} for monorepo {first_app.repo}")
182 self._git_clone(repo_url, shared_clone_path, first_app)
183 self.logger.info(f"Successfully cloned monorepo using {method_name}")
184 cloned = True
185 break
186 except Exception as e:
187 self.logger.debug(f"{method_name} failed for monorepo: {e}")
188 if shared_clone_path.exists():
189 shutil.rmtree(shared_clone_path)
190 continue
192 if not cloned:
193 raise Exception(f"Failed to clone monorepo {first_app.repo}")
195 # Extract each app's subdirectory
196 result = {}
198 for app in apps:
199 try:
200 # First, use the subdirectory name as temporary location
201 temp_app_path = self.apps_dir / app.name
203 if temp_app_path.exists():
204 self.logger.info(f"App {app.name} already exists, skipping")
205 result[app.name] = temp_app_path
206 continue
208 subdir_path = shared_clone_path / (app.subdir_path or "")
210 if not subdir_path.exists():
211 raise Exception(
212 f"Subdirectory '{app.subdir_path}' not found in monorepo. "
213 f"Available: {[d.name for d in shared_clone_path.iterdir() if d.is_dir() and not d.name.startswith('.')]}",
214 )
216 # Copy subdirectory to temporary location
217 self.logger.info(f"Extracting {app.name} from {app.subdir_path}")
218 shutil.copytree(subdir_path, temp_app_path, symlinks=True)
220 # Extract actual Python module name from pyproject.toml or hooks.py
221 actual_app_name = extract_app_python_module_name(temp_app_path)
223 # If the actual app name differs from directory name, rename
224 if actual_app_name != app.name:
225 final_app_path = self.apps_dir / actual_app_name
226 if final_app_path.exists():
227 # Target already exists - should not happen, but handle it
228 self.logger.warning(
229 f"App directory {actual_app_name} already exists. Using subdirectory name instead.",
230 )
231 actual_app_name = app.name
232 final_app_path = temp_app_path
233 else:
234 self.logger.info(
235 f"Renaming app directory from '{app.name}' to '{actual_app_name}' (Python module name)",
236 )
237 shutil.move(str(temp_app_path), str(final_app_path))
238 else:
239 final_app_path = temp_app_path
241 # Update the AppConfig with the correct name
242 app.name = actual_app_name
243 result[actual_app_name] = final_app_path
245 if self.output:
246 self.output.print(f"Extracted {actual_app_name}", emoji_code=":white_check_mark:")
248 except Exception as e:
249 self.logger.error(f"Failed to extract {app.name}: {e}")
250 raise
252 # Clean up shared monorepo
253 if shared_clone_path.exists():
254 self.logger.debug(f"Cleaning up shared monorepo at {shared_clone_path}")
255 shutil.rmtree(shared_clone_path)
257 return result
259 def _clone_app(self, app: AppConfig) -> tuple[str, Path]:
260 """
261 Clone a single standalone app (non-subdirectory) with authentication fallback.
263 NOTE: This method should ONLY be called for standalone apps (app.subdir_path is None).
264 Subdirectory apps are handled by _clone_monorepo_apps() for efficiency.
266 If app.repo_url is set (by validate_repos_exist), it will be used directly
267 without trying other authentication methods. This avoids redundant auth attempts.
269 Args:
270 app: AppConfig object with repo details (must NOT have subdir_path)
272 Returns:
273 Tuple of (app_name, clone_path)
275 Raises:
276 Exception: If all authentication methods fail
277 """
278 if app.subdir_path:
279 raise ValueError(
280 f"_clone_app() called with subdirectory app {app.name}. "
281 "Subdirectory apps must be handled by _clone_monorepo_apps().",
282 )
284 clone_path = self.apps_dir / app.name
286 # Skip if already cloned
287 if clone_path.exists():
288 self.logger.info(f"App {app.name} already exists at {clone_path}, skipping")
289 return (app.name, clone_path)
291 # Get authentication methods to try
292 # If app.repo_url is set (by validation), it will be tried first
293 # Otherwise, tries HTTPS → Token → SSH in order
294 auth_methods = self._get_auth_methods(app)
295 last_error = None
297 for method_name, repo_url in auth_methods:
298 try:
299 self.logger.debug(f"Trying {method_name} for {app.name}: {repo_url}")
300 self._git_clone(repo_url, clone_path, app)
301 self.logger.info(f"Successfully cloned {app.name} using {method_name}")
303 # Detect actual Python module name from pyproject.toml or hooks.py
304 actual_app_name = extract_app_python_module_name(clone_path)
306 # Rename directory if module name differs from initial name
307 if actual_app_name != app.name:
308 final_path = self.apps_dir / actual_app_name
309 if final_path.exists():
310 # Target already exists - should not happen, but handle it
311 self.logger.warning(f"App directory {actual_app_name} already exists. Using repo name instead.")
312 actual_app_name = app.name
313 final_path = clone_path
314 else:
315 self.logger.info(
316 f"Renaming app directory from '{app.name}' to '{actual_app_name}' (Python module name)",
317 )
318 import shutil
320 shutil.move(str(clone_path), str(final_path))
321 clone_path = final_path
323 # Update the AppConfig with correct name
324 app.name = actual_app_name
326 return (actual_app_name, clone_path)
328 except (GitCommandError, Exception) as e:
329 last_error = e
330 self.logger.debug(f"{method_name} failed for {app.name}: {e}")
331 # Clean up failed clone attempt
332 if clone_path.exists():
333 import shutil
335 shutil.rmtree(clone_path)
336 continue
338 # All methods failed
339 raise Exception(
340 f"Failed to clone {app.name} from {app.repo}. Tried all authentication methods. Last error: {last_error}",
341 )
343 def _get_auth_methods(self, app: AppConfig) -> list[tuple[str, str]]:
344 """
345 Get list of authentication methods to try in order.
347 Delegates to AppConfig.get_auth_methods() for consistency with validation.
348 Priority order:
349 - With token: Token → HTTPS → SSH
350 - Without token: HTTPS → SSH
352 Returns:
353 List of (method_name, repo_url) tuples
354 """
355 return app.get_auth_methods(github_token=self.github_token)
357 def _git_clone(self, repo_url: str, clone_path: Path, app: AppConfig) -> None:
358 """
359 Execute git clone operation.
361 Args:
362 repo_url: Git repository URL
363 clone_path: Destination path for clone
364 app: AppConfig object with clone options
365 """
366 clone_kwargs = {
367 "branch": app.ref if app.ref and not app.is_commit else None,
368 "depth": 1 if app.shallow_clone and not app.is_commit else None,
369 }
371 env = os.environ.copy()
372 env["GIT_TERMINAL_PROMPT"] = "0"
373 env["GIT_ASKPASS"] = "echo"
374 clone_kwargs["env"] = env
376 clone_kwargs = {k: v for k, v in clone_kwargs.items() if v is not None}
378 clone_type = "shallow" if clone_kwargs.get("depth") == 1 else "full"
379 ref_info = f" (ref: {app.ref})" if app.ref else ""
380 self.logger.debug(f"Cloning {app.name} from {repo_url}{ref_info} [{clone_type} clone]")
382 repo = Repo.clone_from(repo_url, clone_path, **clone_kwargs)
384 if app.is_commit:
385 self.logger.debug(f"Checking out commit {app.ref} for {app.name}")
386 repo.git.checkout(app.ref)
388 @staticmethod
389 def validate_repos_exist(apps: list[AppConfig], github_token: str | None = None) -> tuple[bool, list[str]]:
390 """
391 Validate that all app repositories exist before attempting to clone.
393 DEPRECATED: This method now delegates to AppConfig.validate_repos_batch().
394 New code should call AppConfig.validate_repos_batch() directly.
396 Args:
397 apps: List of AppConfig objects to validate (modified in-place)
398 github_token: Optional GitHub token for private repos
400 Returns:
401 Tuple of (all_valid: bool, messages: List[str])
402 Messages include both success (✓) and error (❌) messages with auth method details
403 """
404 result = AppConfig.validate_repos_batch(apps, github_token)
405 return (result.all_valid, result.messages)