#!/usr/bin/env python3
"""Clone Daylily analysis repositories into the shared FSx workspace."""

import argparse
import os
import re
import subprocess
import sys
from typing import Any, Dict, Iterable, List, Optional, Tuple

import yaml


CONFIG_DIR = os.path.expanduser("~/.config/daylily")
GLOBAL_CONFIG_PATH = os.path.join(CONFIG_DIR, "daylily_cli_global.yaml")
AVAILABLE_REPOS_PATH = os.path.join(CONFIG_DIR, "daylily_pipeline_command_catalog.yaml")
SAFE_SEGMENT_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
CLUSTER_NAME_ENV_KEYS = (
    "DAYLILY_CLUSTER_NAME",
    "DAY_EC_CLUSTER_NAME",
    "PCLUSTER_CLUSTER_NAME",
    "PARALLELCLUSTER_CLUSTER_NAME",
    "stack_name",
    "STACK_NAME",
    "cfn_cluster_name",
    "CFN_CLUSTER_NAME",
)
CLUSTER_NAME_CONFIG_PATHS = (
    "/etc/parallelcluster/cfnconfig",
)
CLUSTER_NAME_CONFIG_KEYS = {
    "stack_name",
    "cluster_name",
    "clusterName",
    "ClusterName",
    "cfn_cluster_name",
}


class ConfigError(RuntimeError):
    """Raised when configuration files are missing or malformed."""


def _load_yaml_mapping(path: str) -> Dict[str, Any]:
    """Load a YAML file that must contain a mapping at the top level."""
    if not os.path.exists(path):
        raise ConfigError(f"Configuration file not found: {path}")

    try:
        with open(path, "r", encoding="utf-8") as handle:
            payload = yaml.safe_load(handle)
    except yaml.YAMLError as exc:
        raise ConfigError(f"Invalid YAML in {path}: {exc}") from exc

    if not isinstance(payload, dict):
        raise ConfigError(f"Configuration file must contain a YAML mapping: {path}")
    return payload


def load_global_config() -> Dict[str, Any]:
    config = _load_yaml_mapping(GLOBAL_CONFIG_PATH)
    if "daylily" not in config:
        raise ConfigError(f"Missing 'daylily' section in {GLOBAL_CONFIG_PATH}.")
    return config["daylily"]


def load_available_repos() -> Tuple[str, Dict[str, Dict[str, Any]]]:
    config = _load_yaml_mapping(AVAILABLE_REPOS_PATH)
    repositories = config.get("repositories")
    if not isinstance(repositories, dict) or not repositories:
        raise ConfigError(f"No repositories defined in {AVAILABLE_REPOS_PATH}.")
    default_repo = config.get("default_repository")
    if not default_repo:
        raise ConfigError(f"Missing default_repository in {AVAILABLE_REPOS_PATH}.")
    return default_repo, repositories


def ensure_directory(path: str) -> None:
    os.makedirs(path, exist_ok=True)


def safe_path_segment(value: str, *, field_name: str) -> str:
    text = str(value or "").strip()
    if not text:
        raise ConfigError(f"{field_name} is required.")
    if not SAFE_SEGMENT_RE.fullmatch(text):
        raise ConfigError(
            f"{field_name} must be a single path-safe segment matching "
            f"{SAFE_SEGMENT_RE.pattern!r}: {text!r}"
        )
    if text in {".", ".."} or ".." in text or "/" in text or "%" in text:
        raise ConfigError(f"{field_name} must not contain path traversal.")
    return text


def _strip_shell_value(value: str) -> str:
    text = value.strip()
    if len(text) >= 2 and text[0] == text[-1] and text[0] in {"'", '"'}:
        return text[1:-1]
    return text


def _cluster_name_from_env(keys: Iterable[str] = CLUSTER_NAME_ENV_KEYS) -> Optional[str]:
    for key in keys:
        value = os.environ.get(key)
        if value:
            return safe_path_segment(value, field_name=key)
    return None


def _cluster_name_from_cfnconfig(path: str) -> Optional[str]:
    if not os.path.exists(path):
        return None
    try:
        with open(path, "r", encoding="utf-8") as handle:
            for line in handle:
                text = line.strip()
                if not text or text.startswith("#") or "=" not in text:
                    continue
                key, value = text.split("=", 1)
                if key.strip() in CLUSTER_NAME_CONFIG_KEYS:
                    return safe_path_segment(
                        _strip_shell_value(value),
                        field_name=f"{path}:{key.strip()}",
                    )
    except OSError as exc:
        raise ConfigError(f"Could not read ParallelCluster config {path}: {exc}") from exc
    return None


def resolve_executing_entity(explicit_value: Optional[str]) -> str:
    if explicit_value:
        return safe_path_segment(explicit_value, field_name="executing_entity")

    env_cluster_name = _cluster_name_from_env()
    if env_cluster_name:
        return env_cluster_name

    for path in CLUSTER_NAME_CONFIG_PATHS:
        cluster_name = _cluster_name_from_cfnconfig(path)
        if cluster_name:
            return cluster_name

    raise ConfigError(
        "executing_entity is required when ParallelCluster cluster identity is unavailable. "
        "Pass -u/--executing-entity, export DAYLILY_CLUSTER_NAME, or run on a headnode "
        "with /etc/parallelcluster/cfnconfig containing stack_name."
    )


def clone_repository(
    git_url: str,
    destination: str,
    ref: Optional[str],
) -> None:
    cmd = ["git", "clone"]
    if ref:
        cmd.extend(["--branch", ref])
    cmd.extend([git_url, destination])
    try:
        subprocess.run(cmd, check=True)
    except subprocess.CalledProcessError as exc:
        raise RuntimeError(f"Git clone failed with exit code {exc.returncode}.") from exc


def derive_default_path(repo_url: str) -> str:
    basename = os.path.basename(repo_url)
    if basename.endswith(".git"):
        basename = basename[:-4]
    return basename or "repository"


def print_available_repositories(repositories: Dict[str, Dict[str, Any]]) -> None:
    print("Available repositories:\n")
    for key, repo in repositories.items():
        name = repo.get("display_name", key)
        print(f"- {key}: {name}")
        if description := repo.get("description"):
            print(f"    {description}")
        default_ref = repo.get("default_ref")
        if default_ref:
            print(f"    Default ref: {default_ref}")
        print()


def main(argv: List[str]) -> int:
    try:
        global_cfg = load_global_config()
        default_repo_key, available_repos = load_available_repos()
    except ConfigError as err:
        print(f"Error: {err}", file=sys.stderr)
        return 2

    parser = argparse.ArgumentParser(
        description="Clone Daylily analysis repositories into the FSx analysis workspace.",
    )
    parser.add_argument(
        "-d",
        "--destination",
        help="Name of the analysis workspace directory to create under the user-specific root.",
    )
    parser.add_argument(
        "-t",
        "--git-tag",
        help="Git branch or tag to clone. Defaults to the repository's configured default.",
    )
    parser.add_argument(
        "-r",
        "--git-repo",
        help="Override the git repository URL to clone. Overrides --repository.",
    )
    parser.add_argument(
        "-w",
        "--which-one",
        choices=("https", "ssh"),
        default="https",
        help="Repository URL transport to use from the registry. Defaults to https.",
    )
    parser.add_argument(
        "-c",
        "--clone-root",
        help="Root directory where analysis workspaces are created. Defaults to the configured analysis_root.",
    )
    parser.add_argument(
        "-u",
        "--executing-entity",
        help=(
            "Entity directory to create within the clone root. Defaults to the cluster "
            "name from exported cluster env or /etc/parallelcluster/cfnconfig."
        ),
    )
    parser.add_argument(
        "--repository",
        help="Key of the repository defined in daylily_pipeline_command_catalog.yaml to clone.",
    )
    parser.add_argument(
        "--list",
        action="store_true",
        help="List available repositories and exit.",
    )

    args = parser.parse_args(argv)

    if args.list:
        print_available_repositories(available_repos)
        return 0

    if not args.destination:
        print(
            "Error: --destination (-d) is required when cloning a repository.",
            file=sys.stderr,
        )
        return 1

    clone_root = args.clone_root or global_cfg.get("analysis_root")
    if not clone_root:
        print(
            "Error: analysis_root is not configured in daylily_cli_global.yaml.",
            file=sys.stderr,
        )
        return 2

    clone_root = os.path.abspath(os.path.expanduser(clone_root))
    if not os.path.isdir(clone_root):
        print(
            f"Error: clone_root directory '{clone_root}' does not exist.",
            file=sys.stderr,
        )
        return 1

    try:
        executing_entity = resolve_executing_entity(args.executing_entity)
        destination = safe_path_segment(args.destination, field_name="destination")
    except ConfigError as err:
        print(f"Error: {err}", file=sys.stderr)
        return 1

    user_root = os.path.join(clone_root, executing_entity)
    ensure_directory(user_root)

    destination_root = os.path.join(user_root, destination)
    if os.path.exists(destination_root):
        print(
            f"Error: destination '{destination_root}' already exists.",
            file=sys.stderr,
        )
        return 1

    repo_config: Optional[Dict[str, Any]] = None
    if not args.git_repo:
        repo_key = args.repository or default_repo_key
        repo_config = available_repos.get(repo_key)
        if repo_config is None:
            print(
                f"Error: repository '{repo_key}' is not defined in daylily_pipeline_command_catalog.yaml.",
                file=sys.stderr,
            )
            return 1
    else:
        repo_key = "custom"

    if args.git_repo:
        git_url = args.git_repo
        relative_path = derive_default_path(git_url)
        default_ref = None
    else:
        url_key = f"{args.which_one}_url"
        git_url = repo_config.get(url_key, "")
        if not git_url:
            print(
                f"Error: repository '{repo_key}' does not define a {url_key}.",
                file=sys.stderr,
            )
            return 1
        relative_path = repo_config.get("relative_path") or repo_key
        default_ref = repo_config.get("default_ref")

    git_ref = args.git_tag or default_ref

    relative_path = os.path.basename(relative_path.rstrip("/"))
    target_repo_dir = os.path.join(destination_root, relative_path)
    ensure_directory(destination_root)

    print("Cloning repository...")
    try:
        clone_repository(git_url, target_repo_dir, git_ref)
    except RuntimeError as err:
        print(f"Error: {err}", file=sys.stderr)
        return 1

    print()
    print("Great success! Daylily repository cloned.")
    print(f"Repository: {git_url}")
    if git_ref:
        print(f"Reference : {git_ref}")
    print(f"Location  : {target_repo_dir}")
    print()
    print("To get started:")
    print(f"  cd {target_repo_dir}")
    print("  # initialize and run the analysis repository per its documentation")

    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
