#!/usr/bin/env python3
"""Adopt the truly-orphan recovered sessions (no <UUID>/ dir in any real
profile) into the most-likely profile based on their cwd.

Heuristic: for each orphan, encode its cwd to the project-dir form
(`/` -> `-`) and look up that encoded path in every real profile's
projects/ directory. The profile with the MOST existing sessions for
that project is the winner. Tie -> first match in profile order.
Zero matches -> NO_OWNER (left in ~/.claude-lost/).

Read-only against ~/.claude-* until --apply. Writes audit manifests to
data/restore-manifests/ like its sibling restore-to-profiles script.

Usage:
    uv run scripts/adopt-orphans            # dry-run, prints attribution plan
    uv run scripts/adopt-orphans --apply    # actually move
"""

from __future__ import annotations

import argparse
import glob
import hashlib
import json
import os
import sys
from collections import Counter
from datetime import datetime, timezone
from pathlib import Path
from typing import NamedTuple

HOME = Path.home()
LOST = HOME / ".claude-lost"
REAL_PROFILES = [
    HOME / ".claude-personal",
    HOME / ".claude-work",
    HOME / ".claude-the-third",
]
PROJECT_ROOT = Path(__file__).resolve().parent.parent
MANIFEST_DIR = PROJECT_ROOT / "data" / "restore-manifests"
TARGET_FILE_MODE = 0o600


def is_uuid_name(name: str) -> bool:
    return len(name) == 36 and name.count("-") == 4


def sha256_file(p: Path) -> str:
    h = hashlib.sha256()
    with p.open("rb") as f:
        for chunk in iter(lambda: f.read(64 * 1024), b""):
            h.update(chunk)
    return h.hexdigest()


def encode_cwd(cwd: str) -> str:
    return cwd.replace("/", "-")


def first_line_meta(path: Path) -> dict:
    with path.open() as f:
        for line in f:
            try:
                return json.loads(line)
            except json.JSONDecodeError:
                continue
    return {}


# ─────────────────────────────────────────────────────────────────────────


def collect_real_state() -> tuple[set[str], dict[str, dict[str, int]]]:
    """Returns (uuids_present_in_any_real, per_profile_project_session_counts).

    `uuids_present_in_any_real` = union of all <UUID>.jsonl filenames AND
    <UUID>/ directory names across the three real profiles. Used to
    distinguish true orphans (not in this set) from in_both (in this set).

    `per_profile_project_session_counts` = profile_name -> encoded_dir -> count
    where count = number of session UUIDs (jsonl files OR <UUID>/ subdirs)
    inside that encoded dir. Used to pick the winner profile.
    """
    uuids_anywhere: set[str] = set()
    counts: dict[str, dict[str, int]] = {p.name: {} for p in REAL_PROFILES}

    for prof in REAL_PROFILES:
        proj_root = prof / "projects"
        if not proj_root.is_dir():
            continue
        for enc_dir in proj_root.iterdir():
            if not enc_dir.is_dir():
                continue
            n = 0
            for child in enc_dir.iterdir():
                base = child.name
                if child.is_file() and base.endswith(".jsonl"):
                    uuid = base[:-len(".jsonl")]
                    if is_uuid_name(uuid):
                        uuids_anywhere.add(uuid)
                        n += 1
                elif child.is_dir() and is_uuid_name(base):
                    uuids_anywhere.add(base)
                    n += 1
            if n > 0:
                counts[prof.name][enc_dir.name] = n

    return uuids_anywhere, counts


# ─────────────────────────────────────────────────────────────────────────


class Adoption(NamedTuple):
    uuid: str
    cwd: str
    src: Path
    encoded: str         # encoded form derived from on-disk dir, fallback to cwd
    winner_profile: str  # may be "NO_OWNER"
    evidence: str        # human-readable rationale


def plan_adoptions(uuids_anywhere: set[str], counts: dict[str, dict[str, int]]) -> list[Adoption]:
    adoptions: list[Adoption] = []
    if not (LOST / "projects").is_dir():
        return adoptions

    for enc_dir in (LOST / "projects").iterdir():
        if not enc_dir.is_dir():
            continue
        for child in enc_dir.iterdir():
            if not (child.is_file() and child.name.endswith(".jsonl")):
                continue
            uuid = child.name[:-len(".jsonl")]
            if not is_uuid_name(uuid):
                continue
            if uuid in uuids_anywhere:
                # in_both or has-dir-elsewhere: not an orphan, skip.
                continue

            meta = first_line_meta(child)
            cwd = meta.get("cwd") or "/_unknown_cwd"
            encoded = encode_cwd(cwd)

            # Try the encoded form from cwd in each profile, also try the
            # parent-segment form (drop trailing path components) since
            # CC sometimes encodes a parent project for sessions that ran
            # in subdirs (e.g., worktrees).
            candidate_encoded_forms = [encoded]
            parts = cwd.strip("/").split("/")
            while len(parts) > 1:
                parts.pop()
                candidate_encoded_forms.append("-" + "-".join(parts))

            best_profile = "NO_OWNER"
            best_n = 0
            best_form = encoded
            evidence = "no profile hosts this project"
            for form in candidate_encoded_forms:
                ranked: list[tuple[str, int]] = []
                for prof_name, m in counts.items():
                    if form in m:
                        ranked.append((prof_name, m[form]))
                if ranked:
                    ranked.sort(key=lambda t: (-t[1], t[0]))
                    best_profile, best_n = ranked[0]
                    best_form = form
                    if len(ranked) == 1:
                        evidence = f"only {best_profile} hosts {form} ({best_n} sessions)"
                    else:
                        runners = ", ".join(f"{p}:{n}" for p, n in ranked)
                        evidence = f"winner by session count: {runners}"
                    break

            adoptions.append(Adoption(
                uuid=uuid,
                cwd=cwd,
                src=child,
                encoded=best_form,
                winner_profile=best_profile,
                evidence=evidence,
            ))
    return adoptions


# ─────────────────────────────────────────────────────────────────────────


def apply_adoptions(adoptions: list[Adoption]) -> tuple[list[Adoption], list[str]]:
    """For each adoptable orphan, link+chmod+unlink. Same TOCTOU-safe
    pattern as restore-to-profiles. Returns (successful, errors)."""
    errors: list[str] = []
    successful: list[Adoption] = []
    for a in adoptions:
        if a.winner_profile == "NO_OWNER":
            continue
        target_dir = HOME / a.winner_profile / "projects" / a.encoded
        target = target_dir / f"{a.uuid}.jsonl"
        try:
            target_dir.mkdir(parents=True, exist_ok=True, mode=0o700)
        except OSError as e:
            errors.append(f"mkdir {target_dir}: {e}")
            continue
        try:
            os.link(a.src, target)
        except FileExistsError:
            errors.append(f"refuse to overwrite existing {target}")
            continue
        except OSError as e:
            errors.append(f"link {a.src} -> {target}: {e}")
            continue
        try:
            os.chmod(target, TARGET_FILE_MODE)
        except OSError as e:
            errors.append(f"chmod {target}: {e}")
        try:
            os.unlink(a.src)
        except OSError as e:
            errors.append(f"unlink {a.src}: {e}")
            continue
        successful.append(a)
    try:
        os.sync()
    except OSError:
        pass
    return successful, errors


# ─────────────────────────────────────────────────────────────────────────


def main() -> int:
    ap = argparse.ArgumentParser(description=__doc__)
    ap.add_argument("--apply", action="store_true",
                    help="Actually move files (default is dry-run)")
    ap.add_argument("--yes", action="store_true",
                    help="Skip the interactive confirmation prompt")
    args = ap.parse_args()

    if HOME != Path("/home/m"):
        print(f"ERROR: HOME is {HOME}, expected /home/m", file=sys.stderr)
        return 1

    print("[1/3] surveying real profiles + ~/.claude-lost/")
    uuids_anywhere, counts = collect_real_state()
    print(f"      uuids in real profiles (jsonl OR dir): {len(uuids_anywhere):,}")
    for prof_name, m in counts.items():
        if m:
            print(f"      {prof_name}: {len(m)} project dirs, "
                  f"{sum(m.values())} total session UUIDs across them")

    print(f"\n[2/3] planning adoptions")
    adoptions = plan_adoptions(uuids_anywhere, counts)
    by_profile = Counter(a.winner_profile for a in adoptions)

    print(f"\n=== adoption plan ({len(adoptions)} orphans) ===")
    for prof, n in by_profile.most_common():
        print(f"  -> {prof:<22} {n:>3}")
    print()
    print("Per-orphan attribution:")
    for a in adoptions:
        marker = "  " if a.winner_profile != "NO_OWNER" else "??"
        print(f"  {marker} {a.uuid}  cwd={a.cwd}")
        print(f"        -> {a.winner_profile}  ({a.evidence})")

    if not args.apply:
        print("\nDry-run: no changes made. Re-run with --apply to execute.")
        return 0

    moveable = [a for a in adoptions if a.winner_profile != "NO_OWNER"]
    if not moveable:
        print("\nNo adoptable orphans (all NO_OWNER). Nothing to apply.")
        return 0

    if not args.yes:
        try:
            answer = input(f"\nType 'yes' to adopt {len(moveable)} orphans: ").strip().lower()
        except EOFError:
            answer = ""
        if answer != "yes":
            print("Aborted (no files moved).")
            return 0

    print(f"\n[3/3] APPLY")
    successful, errs = apply_adoptions(adoptions)
    print(f"      adopted: {len(successful)}/{len(moveable)}")
    for e in errs[:20]:
        print(f"      ERROR: {e}")
    if len(errs) > 20:
        print(f"      ... and {len(errs) - 20} more errors")

    no_owner = [a for a in adoptions if a.winner_profile == "NO_OWNER"]
    if no_owner:
        print(f"\n{len(no_owner)} orphan(s) with no candidate profile remain in ~/.claude-lost/:")
        for a in no_owner:
            print(f"  - {a.uuid}  cwd={a.cwd}")

    print(f"\nNext: handle ~/.claude-lost/ residue, then `uv run scripts/build-db`")
    return 0 if not errs else 1


if __name__ == "__main__":
    sys.exit(main())
