#!/usr/bin/env python3
# /// script
# requires-python = ">=3.10"
# ///
"""Install vendor-native Fabric agent profiles into a target repository."""

from __future__ import annotations

import argparse
import datetime as dt
import os
import re
import shutil
import subprocess
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
PROFILES = ROOT / "profiles"
MANAGED_BEGIN = "<!-- BEGIN MANAGED BY fabric-skills-settings -->"
MANAGED_END = "<!-- END MANAGED BY fabric-skills-settings -->"
GITIGNORE_BEGIN = "# BEGIN MANAGED BY fabric-skills-settings"
GITIGNORE_END = "# END MANAGED BY fabric-skills-settings"
REFRESHABLE_PLACEHOLDER_FILES = {Path(".env.example")}
REFRESHABLE_SCAFFOLD_MARKERS = {
    Path("tool/data/mock-data-generator.py"):              "Generate deterministic sandbox CSV data for a Fabric topic",
    Path("tool/lakehouse/list-tables.py"):                "List Microsoft Fabric Lakehouse tables and column schemas",
    Path("tool/semantic-model/inspect.py"):              "Inspect Microsoft Fabric Semantic Models",
    Path("tool/pipeline/manage.py"):                      "Create, deploy, and test a Fabric Data Pipeline that chains topic notebooks",
    Path("tool/mcp/server.py"):                           "Minimal MCP server that exposes selected Microsoft Fabric CLI commands",
    Path("tool/mcp/graph-server.py"):                     "MCP server exposing the knowledge graph through the `fabric-graph` MCP `graph_*` tools",
    Path("tool/graph/__init__.py"):                       "Knowledge graph over the Fabric agent profile vault",
    Path("tool/graph/schema.py"):                         "Node, Edge, frontmatter parser, and path -> id mapping for the graph",
    Path("tool/graph/store.py"):                          "networkx-backed graph store with atomic JSON save/load",
    Path("tool/graph/builder.py"):                        "Discover markdown files, parse them, and assemble a graph",
    Path("tool/graph/search.py"):                         "BM25 + edge-aware re-rank over the knowledge graph",
    Path("tool/graph/extract.py"):                        "Auto-edge extraction from markdown prose",
    Path("tool/graph/lock.py"):                           "Cross-platform exclusive file lock for atomic graph writes",
    Path("tool/graph/writes.py"):                         "CRUD writes for the knowledge graph",
    Path("tool/notebook/build.py"):                       "Build simple Fabric .Notebook folders",
    Path("tool/notebook/deploy.py"):                      "Deploy, run, and monitor Fabric notebooks via REST API",
    Path("tool/notebook/smoke-test.sh"):                  "smoke-test.sh",
    Path("tool/setup/fab-sandbox"):                       "Sandbox wrapper for Microsoft Fabric CLI",
    Path("tool/setup/fabric-inventory-readonly"):         "Human-run read-only Fabric inventory helper",
    Path("tool/setup/setup.ps1"):                         "setup.ps1 - idempotent target repository setup for Fabric agent projects",
    Path("tool/setup/setup.sh"):                          "setup.sh - idempotent target repository setup for Fabric agent projects",
    Path("tool/validate/pipeline-lineage.py"):            "Verify staging-path consistency across notebooks in the same pipeline topic",
    Path("tool/workspace/init.py"):                       "Discover all Fabric workspaces and resources from the API into workspaces.json",
    Path("tool/workspace/switch.py"):                     "Switch the active Fabric workspace and write resource IDs to .env",
    Path("tool/workspace/transfer.py"):                   "Transfer notebooks and pipelines across Fabric workspaces by name-matching",
}
PLACEHOLDER_VALUES = {
    "",
    "sandbox",
    "dev",
    "prod",
    "file",
    "<workspace-uuid>",
    "<lakehouse-uuid>",
    "<server>.<tenant>.fabric.microsoft.com",
    "<warehouse-or-sql-endpoint-db-name>",
}


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--profile", choices=("codex", "claude", "all"), required=True)
    parser.add_argument("--target", required=True, help="Target git repository path")
    parser.add_argument("--dry-run", action="store_true", help="Print planned changes without writing")
    parser.add_argument("--check", action="store_true", help="Verify target state without writing")
    parser.add_argument("--force", action="store_true", help="Overwrite non-managed existing files")
    parser.add_argument("--backup", action="store_true", help="Back up replaced files")
    parser.add_argument("--self-test", action="store_true", help="Allow targeting this source repository")
    return parser.parse_args()


def is_git_repo(path: Path) -> bool:
    result = subprocess.run(
        ["git", "-C", str(path), "rev-parse", "--show-toplevel"],
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        text=True,
        check=False,
    )
    return result.returncode == 0


def planned_profiles(profile: str) -> list[str]:
    if profile == "all":
        return ["codex", "claude"]
    return [profile]


def collect_files(profile: str) -> list[tuple[Path, Path, bool]]:
    """Return (source, relative target, managed_marker) entries."""
    entries: list[tuple[Path, Path, bool]] = []
    if profile == "codex":
        entries.append((PROFILES / "codex" / "AGENTS.md", Path("AGENTS.md"), True))
        entries.append((PROFILES / "codex" / "config.toml", Path(".codex/config.toml"), False))
        for src in sorted((PROFILES / "skills").glob("*/SKILL.md")):
            entries.append((src, Path(".agents/skills") / src.parent.name / "SKILL.md", True))
            sections_dir = src.parent / "sections"
            if sections_dir.is_dir():
                for section in sorted(sections_dir.glob("*.md")):
                    entries.append((
                        section,
                        Path(".agents/skills") / src.parent.name / "sections" / section.name,
                        True,
                    ))
        for src in sorted((PROFILES / "codex" / "agents").glob("*.toml")):
            entries.append((src, Path(".codex/agents") / src.name, False))
    elif profile == "claude":
        entries.append((PROFILES / "claude" / "CLAUDE.md", Path("CLAUDE.md"), True))
        entries.append((PROFILES / "claude" / "settings.local.json", Path(".claude/settings.local.json"), False))
        for src in sorted((PROFILES / "skills").glob("*/SKILL.md")):
            entries.append((src, Path(".claude/skills") / src.parent.name / "SKILL.md", True))
            sections_dir = src.parent / "sections"
            if sections_dir.is_dir():
                for section in sorted(sections_dir.glob("*.md")):
                    entries.append((
                        section,
                        Path(".claude/skills") / src.parent.name / "sections" / section.name,
                        True,
                    ))
        for src in sorted((PROFILES / "claude" / "agents").glob("*.md")):
            entries.append((src, Path(".claude/agents") / src.name, True))
    else:
        raise ValueError(f"Unknown profile: {profile}")
    return entries


def collect_shared_files() -> list[tuple[Path, Path, bool]]:
    entries: list[tuple[Path, Path, bool]] = []
    shared = PROFILES / "shared"
    for src in sorted((shared / "memory").rglob("*")):
        if src.is_file():
            entries.append((src, Path("memory") / src.relative_to(shared / "memory"), True))
    if (shared / "graph-content").exists():
        for src in sorted((shared / "graph-content").rglob("*")):
            if src.is_file():
                entries.append((src, Path("memory/graph-content") / src.relative_to(shared / "graph-content"), True))
    for src in sorted((shared / "project-layout").rglob("*")):
        if src.is_file() and "__pycache__" not in src.parts and src.suffix not in {".pyc", ".pyo", ".pyd"}:
            entries.append((src, src.relative_to(shared / "project-layout"), False))
    entries.append((shared / ".env.example", Path(".env.example"), False))
    return entries


def render_content(src: Path, managed: bool) -> str:
    content = src.read_text(encoding="utf-8")
    if managed and src.suffix in {".md", ""}:
        if content.startswith("---\n"):
            close = content.find("\n---\n", 4)
            if close != -1:
                frontmatter = content[: close + 5]   # up to and including closing ---\n
                body = content[close + 5 :]
                return f"{frontmatter}{MANAGED_BEGIN}\n{body.rstrip()}\n{MANAGED_END}\n"
        return f"{MANAGED_BEGIN}\n{content.rstrip()}\n{MANAGED_END}\n"
    return content


def has_managed_marker(path: Path) -> bool:
    if not path.exists() or not path.is_file():
        return False
    text = path.read_text(encoding="utf-8", errors="ignore")
    return MANAGED_BEGIN in text and MANAGED_END in text


def strip_inline_comment(value: str) -> str:
    """Remove shell-style comments from simple KEY=value template lines."""
    quote: str | None = None
    for idx, char in enumerate(value):
        if char in {"'", '"'}:
            quote = None if quote == char else char
        elif char == "#" and quote is None:
            return value[:idx].strip()
    return value.strip()


def has_non_placeholder_env_values(path: Path) -> bool:
    """Return true if a refreshable env template appears to contain local values."""
    text = path.read_text(encoding="utf-8", errors="ignore")
    suspicious_patterns = [
        r"https?://",
        r"abfss://",
        r"jdbc:",
        r"AccountKey=",
        r"SharedAccessSignature=",
        r"eyJ[A-Za-z0-9_-]+",
    ]
    sensitive_key = re.compile(r"(SECRET|PASSWORD|TOKEN|KEY|CONNECTION_STRING)", re.IGNORECASE)

    for line in text.splitlines():
        stripped = line.strip()
        if not stripped or stripped.startswith("#") or "=" not in stripped:
            continue
        key, raw_value = stripped.split("=", 1)
        value = strip_inline_comment(raw_value).strip().strip('"').strip("'")
        if value in PLACEHOLDER_VALUES:
            continue
        if sensitive_key.search(key) and value:
            return True
        if any(re.search(pattern, value) for pattern in suspicious_patterns):
            return True
        if value:
            return True
    return False


def can_refresh_unmanaged_placeholder(rel: Path, dest: Path) -> bool:
    if rel not in REFRESHABLE_PLACEHOLDER_FILES or not dest.exists():
        return False
    return not has_non_placeholder_env_values(dest)


def can_refresh_unmanaged_scaffold(rel: Path, dest: Path) -> bool:
    marker = REFRESHABLE_SCAFFOLD_MARKERS.get(rel)
    if marker is None or not dest.exists():
        return False
    text = dest.read_text(encoding="utf-8", errors="ignore")
    return marker in text


def backup_file(path: Path) -> Path:
    stamp = dt.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    backup = path.with_name(f"{path.name}.{stamp}.bak")
    shutil.copy2(path, backup)
    return backup


def write_file(src: Path, dest: Path, managed: bool, args: argparse.Namespace, rel: Path | None = None) -> str:
    content = render_content(src, managed)
    if args.check:
        if not dest.exists():
            return f"MISSING {dest}"
        current = dest.read_text(encoding="utf-8", errors="ignore")
        if current != content:
            return f"DIFF {dest}"
        return f"OK {dest}"

    action = "CREATE"
    if dest.exists():
        current = dest.read_text(encoding="utf-8", errors="ignore")
        if current == content:
            return f"UNCHANGED {dest}"
        rel_path = rel or Path()
        if (
            not args.force
            and not has_managed_marker(dest)
            and not can_refresh_unmanaged_placeholder(rel_path, dest)
            and not can_refresh_unmanaged_scaffold(rel_path, dest)
        ):
            raise SystemExit(f"Refusing to overwrite non-managed file: {dest}")
        action = "UPDATE"

    if args.dry_run:
        return f"{action} {dest}"

    dest.parent.mkdir(parents=True, exist_ok=True)
    if dest.exists() and args.backup:
        backup = backup_file(dest)
        print(f"BACKUP {dest} -> {backup}")
    dest.write_text(content, encoding="utf-8")
    if os.access(src, os.X_OK):
        dest.chmod(0o755)
    return f"{action} {dest}"


_PROFILE_IGNORES: dict[str, list[str]] = {
    "shared": ["tool/", "contracts/", "data/", "memory/"],
    "codex": ["AGENTS.md", ".agents/", ".codex/"],
    "claude": ["CLAUDE.md", ".claude/"],
}


def merge_gitignore(target: Path, profiles: list[str], args: argparse.Namespace) -> str:
    src = PROFILES / "shared" / ".gitignore.fragment"
    lines = [src.read_text(encoding="utf-8").rstrip()]
    profile_entries = [e for p in ["shared"] + profiles for e in _PROFILE_IGNORES.get(p, [])]
    if profile_entries:
        lines.append("\n# Installed agent profiles")
        lines.extend(profile_entries)
    fragment = "\n".join(lines)
    block = f"{GITIGNORE_BEGIN}\n{fragment}\n{GITIGNORE_END}\n"
    dest = target / ".gitignore"

    if args.check:
        if not dest.exists():
            return "MISSING .gitignore managed block"
        text = dest.read_text(encoding="utf-8", errors="ignore")
        return "OK .gitignore" if block in text else "DIFF .gitignore managed block"

    if dest.exists():
        text = dest.read_text(encoding="utf-8", errors="ignore")
        if block in text:
            return "UNCHANGED .gitignore"
        if GITIGNORE_BEGIN in text and GITIGNORE_END in text:
            before, rest = text.split(GITIGNORE_BEGIN, 1)
            _, after = rest.split(GITIGNORE_END, 1)
            new_text = before.rstrip() + "\n" + block + after.lstrip("\n")
            action = "UPDATE .gitignore"
        else:
            new_text = text.rstrip() + "\n\n" + block
            action = "UPDATE .gitignore"
    else:
        new_text = block
        action = "CREATE .gitignore"

    if args.dry_run:
        return action
    dest.write_text(new_text, encoding="utf-8")
    return action


def remove_obsolete_profile_files(target: Path, profiles: list[str], args: argparse.Namespace) -> list[str]:
    operations: list[str] = []
    if "claude" not in profiles:
        return operations

    old = target / ".claude" / "settings.json"
    if not old.exists():
        return operations

    expected = render_content(PROFILES / "claude" / "settings.local.json", False)
    current = old.read_text(encoding="utf-8", errors="ignore")
    if current != expected:
        operations.append(f"KEEP custom obsolete path {old}")
        return operations

    if args.check:
        operations.append(f"OBSOLETE {old}")
        return operations
    if args.dry_run:
        operations.append(f"DELETE {old}")
        return operations

    old.unlink()
    operations.append(f"DELETE {old}")
    return operations


def main() -> int:
    args = parse_args()
    target = Path(args.target).expanduser().resolve()
    if not target.exists() or not target.is_dir():
        raise SystemExit(f"Target does not exist or is not a directory: {target}")
    if not is_git_repo(target):
        raise SystemExit(f"Target is not a git repository: {target}")
    if target == ROOT and not args.self_test:
        raise SystemExit("Refusing to install into this source repository without --self-test")

    profiles = planned_profiles(args.profile)
    operations: list[str] = []
    for profile in profiles:
        for src, rel, managed in collect_files(profile):
            operations.append(write_file(src, target / rel, managed, args, rel))
    for src, rel, managed in collect_shared_files():
        dest = target / rel
        if dest.exists() and rel.parts and rel.parts[0] == "memory" and not has_managed_marker(dest):
            operations.append(f"KEEP existing {dest}")
            continue
        operations.append(write_file(src, dest, managed, args, rel))
    operations.extend(remove_obsolete_profile_files(target, profiles, args))
    operations.append(merge_gitignore(target, profiles, args))

    for operation in operations:
        print(operation)
    if args.check and any(op.startswith(("MISSING", "DIFF", "OBSOLETE")) for op in operations):
        return 1
    return 0


if __name__ == "__main__":
    sys.exit(main())
