#!/usr/bin/env python3
"""Re-parse executables column for all bash_commands using latest parser logic."""

import json
import sqlite3
import sys
import time

from claude_timeline.build.pipeline import rebuild_bash_executables
from claude_timeline.config import DB_PATH
from claude_timeline.parsers import extract_executables


def main():
    if not DB_PATH.exists():
        print(f"Database not found at {DB_PATH}", file=sys.stderr)
        sys.exit(1)

    t0 = time.monotonic()
    conn = sqlite3.connect(str(DB_PATH))
    # FK enforcement is per-connection; on by default elsewhere in the app,
    # set here too for consistency.
    conn.execute("PRAGMA foreign_keys = ON")

    total = conn.execute("SELECT count(*) FROM bash_commands").fetchone()[0]
    print(f"Re-parsing executables for {total:,} bash commands...", file=sys.stderr)

    batch_size = 5000
    updated = 0
    last_id = 0

    while True:
        rows = conn.execute(
            "SELECT id, command FROM bash_commands WHERE id > ? ORDER BY id LIMIT ?",
            (last_id, batch_size),
        ).fetchall()
        if not rows:
            break

        updates = []
        for row_id, command in rows:
            exes = extract_executables(command)
            updates.append((json.dumps(exes) if exes else "[]", row_id))

        conn.executemany("UPDATE bash_commands SET executables = ? WHERE id = ?", updates)
        conn.commit()
        updated += len(updates)
        last_id = rows[-1][0]
        print(f"  {updated:,}/{total:,}", file=sys.stderr)

    # v5: re-derive the normalized bash_executables table so the analytical
    # query layer reflects the freshly-reparsed JSON. Without this step the
    # JSON column would be updated but the indexed table would still hold the
    # old parses, silently skewing top-executables counts and EXISTS filters.
    print("[bash-executables] normalizing executables JSON into table...", file=sys.stderr)
    rebuild_bash_executables(conn)
    conn.commit()
    be_total = conn.execute("SELECT count(*) FROM bash_executables").fetchone()[0]
    print(f"  Bash-executables rows: {be_total:,}", file=sys.stderr)

    conn.close()

    elapsed = time.monotonic() - t0
    print(f"\nDone in {elapsed:.1f}s -- updated {updated:,} rows", file=sys.stderr)


if __name__ == "__main__":
    main()
