#!/usr/bin/env python3
"""Show which real profile each DIVERGENT recovered file maps to as a merge
target. Reports counts by profile and flags any UUIDs that appear in
multiple real profiles (which would need a tiebreak).
"""
from __future__ import annotations

import json
from collections import Counter
from pathlib import Path

MANIFEST_DIR = Path(__file__).resolve().parent.parent / "data" / "verify-manifests"
REAL_PROFILES = [
    Path.home() / ".claude-personal",
    Path.home() / ".claude-work",
    Path.home() / ".claude-the-third",
]


def latest_manifest() -> Path:
    cands = sorted(MANIFEST_DIR.glob("verify-*.json"))
    if not cands:
        raise SystemExit(f"no manifests in {MANIFEST_DIR}")
    return cands[-1]


def profile_of(path_str: str) -> str:
    p = Path(path_str)
    for prof in REAL_PROFILES:
        try:
            p.relative_to(prof)
            return prof.name
        except ValueError:
            pass
    return "(unknown)"


def real_matches(uuid: str) -> list[Path]:
    out: list[Path] = []
    for prof in REAL_PROFILES:
        proj_dir = prof / "projects"
        if not proj_dir.is_dir():
            continue
        for jl in proj_dir.glob(f"*/{uuid}.jsonl"):
            if jl.is_file():
                out.append(jl)
    return out


def main() -> None:
    m_path = latest_manifest()
    data = json.loads(m_path.read_text())
    divergent = [r for r in data["results"] if r["class"] == "DIVERGENT"]

    by_target: Counter[str] = Counter()
    multi: list[tuple[str, list[str]]] = []

    for r in divergent:
        uuid = r["uuid"]
        all_matches = real_matches(uuid)
        if len(all_matches) > 1:
            multi.append((uuid, [profile_of(str(m)) for m in all_matches]))
        target = profile_of(r["real_match"])
        by_target[target] += 1

    print(f"Manifest: {m_path}")
    print(f"DIVERGENT files: {len(divergent)}")
    print()
    print("Merge target distribution:")
    for prof, n in by_target.most_common():
        print(f"  {prof:<24} {n:>4}")
    print()

    if multi:
        print(f"UUIDs present in MULTIPLE real profiles ({len(multi)}):")
        for uuid, profs in multi:
            print(f"  {uuid}  ->  {profs}")
    else:
        print("No multi-profile collisions: every DIVERGENT UUID exists in exactly one profile.")

    # Also break out by recovered-only record count, per target profile
    print()
    print("Aggregate recovered-only records per target profile (records that")
    print("would be ADDED to that profile's <UUID>.jsonl on merge):")
    rec_only_by_prof: Counter[str] = Counter()
    for r in divergent:
        target = profile_of(r["real_match"])
        # missing_keys was capped at 10 in the verify script; use the
        # quantified field if present, else fall back to its count.
        det = r.get("divergent_detail")
        if det:
            rec_only_by_prof[target] += det["only_in_rec"]
        else:
            # Best-effort: parse and recount.
            rec_only_by_prof[target] += len(r.get("missing_keys", []))
    for prof, n in rec_only_by_prof.most_common():
        print(f"  {prof:<24} {n:>6}")


if __name__ == "__main__":
    main()
