#!/usr/bin/env python3
"""Merge DIVERGENT recovered jsonls into their live profile counterparts.

Reads the latest verify-manifest. For each entry:
  - SUBSET    : delete recovered (it's a true duplicate by record key).
  - DIVERGENT : append recovered-only records to the live <UUID>.jsonl,
                then delete recovered.
  - ORPHAN    : leave alone.
  - IDENTICAL : delete recovered.

Atomicity & safety:
  - Live file backed up to data/merge-backups/<ts>/<profile>/<encoded-cwd>/<UUID>.jsonl.bak
    before modification.
  - Append happens via temp-file-in-same-dir + os.replace() (atomic on POSIX).
  - Records reserialized in compact JSON to match live-profile formatting
    ({"key":"value"} with no whitespace).
  - Recovered records sorted by `timestamp` field (when present) before append,
    so the resulting file is roughly chronological.
  - Per-file failures are logged but don't abort the run.

Without --apply, dry-run only: prints planned actions, writes nothing.
"""
from __future__ import annotations

import argparse
import datetime as _dt
import json
import os
import shutil
import sys
from pathlib import Path

from claude_timeline.recovery import (
    discover_real_profiles,
    profile_of,
    record_key,
)

PROJECT_ROOT = Path(__file__).resolve().parent.parent
MANIFEST_DIR = PROJECT_ROOT / "data" / "verify-manifests"
BACKUP_ROOT = PROJECT_ROOT / "data" / "merge-backups"
REAL_PROFILES = discover_real_profiles()


def latest_manifest() -> Path:
    cands = sorted(MANIFEST_DIR.glob("verify-*.json"))
    if not cands:
        raise SystemExit(f"no manifests in {MANIFEST_DIR}")
    return cands[-1]


def parse_jsonl(p: Path) -> list[dict]:
    out: list[dict] = []
    with p.open("rb") as f:
        for raw in f:
            s = raw.strip()
            if not s:
                continue
            try:
                obj = json.loads(s)
            except (UnicodeDecodeError, json.JSONDecodeError):
                continue
            if isinstance(obj, dict):
                out.append(obj)
    return out


def keys_of(records: list[dict]) -> set[str]:
    keys: set[str] = set()
    for r in records:
        k = record_key(r)
        if k:
            keys.add(k)
    return keys


def backup(real: Path, run_root: Path) -> Path:
    """Copy `real` under run_root, mirroring profile-relative path."""
    prof = profile_of(real)
    if prof is None:
        raise RuntimeError(f"path {real} is not under any real profile")
    rel = real.relative_to(prof)
    dst = run_root / prof.name / rel
    dst.parent.mkdir(parents=True, exist_ok=True)
    # copy2 preserves mtime and permissions.
    shutil.copy2(real, dst)
    return dst


def append_records(real: Path, new_records: list[dict]) -> int:
    """Atomically append new_records to real. Returns number appended."""
    if not new_records:
        return 0
    # Sort by timestamp if available -- preserves chronological order.
    def ts(r: dict) -> str:
        v = r.get("timestamp")
        return v if isinstance(v, str) else ""
    new_records = sorted(new_records, key=ts)

    # Read existing bytes; ensure trailing newline before append.
    with real.open("rb") as f:
        existing = f.read()
    needs_nl = bool(existing) and not existing.endswith(b"\n")

    # Write to temp file in same dir, then os.replace().
    tmp = real.with_suffix(real.suffix + f".tmp.{os.getpid()}")
    try:
        with tmp.open("wb") as f:
            f.write(existing)
            if needs_nl:
                f.write(b"\n")
            for r in new_records:
                line = json.dumps(r, separators=(",", ":"), ensure_ascii=False)
                f.write(line.encode("utf-8"))
                f.write(b"\n")
        # Preserve mode/owner of the original.
        st = real.stat()
        os.chmod(tmp, st.st_mode)
        os.replace(tmp, real)
    finally:
        if tmp.exists():
            try:
                tmp.unlink()
            except OSError:
                pass
    return len(new_records)


def main() -> int:
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument(
        "--apply", action="store_true",
        help="Actually merge + delete. Without this flag, dry-run only.",
    )
    ap.add_argument(
        "--manifest", type=Path, default=None,
        help="Path to a verify-manifest JSON (default: latest in data/verify-manifests/).",
    )
    args = ap.parse_args()

    m_path = args.manifest or latest_manifest()
    print(f"[merge] manifest: {m_path}", file=sys.stderr)
    data = json.loads(m_path.read_text())
    results = data["results"]

    plan = {"SUBSET": [], "DIVERGENT": [], "IDENTICAL": [], "ORPHAN": []}
    for r in results:
        plan[r["class"]].append(r)

    print(f"[merge] plan: "
          f"DIVERGENT={len(plan['DIVERGENT'])} merge+delete, "
          f"SUBSET={len(plan['SUBSET'])} delete, "
          f"IDENTICAL={len(plan['IDENTICAL'])} delete, "
          f"ORPHAN={len(plan['ORPHAN'])} keep",
          file=sys.stderr)

    if not args.apply:
        # Show a sample.
        if plan["DIVERGENT"][:3]:
            print("\nFirst few DIVERGENT plans:", file=sys.stderr)
            for r in plan["DIVERGENT"][:3]:
                target = profile_of(Path(r["real_match"]))
                print(f"  {r['uuid']:<40} -> {target.name if target else '?'}", file=sys.stderr)
        print("\nDry run -- pass --apply to execute.", file=sys.stderr)
        return 0

    ts = _dt.datetime.now(_dt.timezone.utc).strftime("%Y%m%dT%H%M%S_%fZ")
    run_root = BACKUP_ROOT / ts
    run_root.mkdir(parents=True, exist_ok=True)
    print(f"[merge] backups -> {run_root}", file=sys.stderr)

    log = []
    appended_total = 0
    deleted_total = 0
    failed_total = 0

    # 1) DIVERGENT: merge then delete.
    for r in plan["DIVERGENT"]:
        rec = Path(r["recovered"])
        real = Path(r["real_match"])
        try:
            real_records = parse_jsonl(real)
            real_keys = keys_of(real_records)
            rec_records = parse_jsonl(rec)
            new_records = [
                obj for obj in rec_records
                if (k := record_key(obj)) is not None and k not in real_keys
            ]
            backup(real, run_root)
            n = append_records(real, new_records)
            rec.unlink()
            log.append({
                "class": "DIVERGENT",
                "uuid": r["uuid"],
                "appended": n,
                "real": str(real),
                "recovered": str(rec),
            })
            appended_total += n
            deleted_total += 1
        except Exception as e:  # pragma: no cover -- defensive
            failed_total += 1
            log.append({
                "class": "DIVERGENT",
                "uuid": r["uuid"],
                "error": str(e),
                "real": str(real),
                "recovered": str(rec),
            })
            print(f"  FAIL {r['uuid']}: {e}", file=sys.stderr)

    # 2) SUBSET + IDENTICAL: just delete.
    for r in plan["SUBSET"] + plan["IDENTICAL"]:
        rec = Path(r["recovered"])
        try:
            rec.unlink()
            log.append({
                "class": r["class"], "uuid": r["uuid"],
                "appended": 0, "recovered": str(rec),
            })
            deleted_total += 1
        except FileNotFoundError:
            log.append({
                "class": r["class"], "uuid": r["uuid"],
                "appended": 0, "skipped": "already gone", "recovered": str(rec),
            })
        except Exception as e:
            failed_total += 1
            log.append({
                "class": r["class"], "uuid": r["uuid"],
                "error": str(e), "recovered": str(rec),
            })
            print(f"  FAIL delete {r['uuid']}: {e}", file=sys.stderr)

    # 3) Prune empty per-project subdirs under ~/.claude-lost/projects/
    lost_projects = Path.home() / ".claude-lost" / "projects"
    pruned_dirs = 0
    if lost_projects.is_dir():
        for sub in sorted(lost_projects.iterdir(), reverse=True):
            if sub.is_dir() and not any(sub.iterdir()):
                try:
                    sub.rmdir()
                    pruned_dirs += 1
                except OSError:
                    pass

    # 4) Write the run log.
    log_path = run_root / "merge-log.json"
    log_path.write_text(json.dumps({
        "manifest": str(m_path),
        "appended_records": appended_total,
        "deleted_files": deleted_total,
        "failed": failed_total,
        "pruned_dirs": pruned_dirs,
        "entries": log,
    }, indent=2))

    print(f"\n[merge] done.")
    print(f"  records appended : {appended_total}")
    print(f"  files deleted    : {deleted_total}")
    print(f"  failures         : {failed_total}")
    print(f"  empty dirs pruned: {pruned_dirs}")
    print(f"  log              : {log_path}")
    print(f"  backups          : {run_root}")
    return 0 if failed_total == 0 else 2


if __name__ == "__main__":
    sys.exit(main())
