#!/usr/bin/env python3
"""bty live env: flash-on-boot.

Reads ``bty.*`` parameters from ``/proc/cmdline``, fetches the
assigned image from the bty server, runs ``bty flash``, signals
completion, and reboots.

Driven by ``bty-flash-on-boot.service`` after ``network-online.target``.
If no ``bty.*`` parameters are present in /proc/cmdline (e.g. the
operator is booting the live env interactively for debugging), the
service exits 0 and the live env drops to its console.

Recognised cmdline parameters:
- ``bty.server=URL``       - base URL of the bty-web server
- ``bty.mac=MAC``          - this machine's MAC (xx-xx-...)
- ``bty.image_url=URL``    - direct URL to the image to flash
- ``bty.mode=interactive`` - short-circuit; ``bty-tui-on-tty1.service``
                             handles the interactive flash flow on
                             tty1 instead. Set by the server's
                             ``ipxe_tui.j2`` for ``boot_policy=tui``
                             (default for unknown MACs).
"""

from __future__ import annotations

import json
import shlex
import subprocess
import sys
import urllib.parse
import urllib.request
from pathlib import Path

CMDLINE = Path("/proc/cmdline")
LOCAL_IMAGE_DIR = Path("/var/tmp")

REQUIRED = ("bty.server", "bty.mac", "bty.image_url")


def local_image_path(image_url: str) -> Path:
    """Pick a local cache path that preserves the URL's filename.

    ``bty.images.detect_format()`` keys off the file extension
    (.qcow2, .img, .img.{zst,xz,gz,bz2}) so the on-disk name has
    to keep it -- a fixed ``bty-flash-on-boot.image`` would
    always fail validation.
    """
    parsed_name = Path(urllib.parse.urlparse(image_url).path).name
    if not parsed_name:
        parsed_name = "bty-flash-on-boot.img"
    return LOCAL_IMAGE_DIR / parsed_name


def cmdline_args() -> dict[str, str]:
    """Parse /proc/cmdline into a dict of ``bty.*`` -> value tokens."""
    raw = CMDLINE.read_text()
    out: dict[str, str] = {}
    for token in shlex.split(raw):
        if token.startswith("bty.") and "=" in token:
            k, _, v = token.partition("=")
            out[k] = v
    return out


def pick_target() -> str:
    """First non-removable, non-readonly disk on the system.

    bty-web doesn't yet send an explicit target hint in the per-MAC
    plan, so the live env picks whatever the kernel enumerates
    first. Right answer for a bare-metal target with one fixed
    disk; a future ``bty.target=`` cmdline param could let the
    server override for multi-disk hosts.

    Shells out to ``lsblk -d -e7 -J`` (the same form recommended in
    the docs for disk discovery -- ``-d`` strips partitions, ``-e7``
    excludes loop devices). Used to call ``bty list disks --json``
    but that wrapper was dropped in v0.8.4; lsblk's structured
    output is the documented replacement.
    """
    out = subprocess.run(
        ["lsblk", "-d", "-e7", "-J", "-o", "PATH,RM,RO,TYPE"],
        check=True,
        capture_output=True,
        text=True,
    )
    payload = json.loads(out.stdout)
    for disk in payload.get("blockdevices", []):
        if disk.get("type") != "disk":
            continue
        # lsblk emits ``rm`` / ``ro`` as booleans in JSON (older
        # versions emitted "0" / "1" strings; tolerate both).
        if disk.get("rm") in (True, "1", 1):
            continue
        if disk.get("ro") in (True, "1", 1):
            continue
        path = disk.get("path")
        if path:
            return str(path)
    raise SystemExit("bty-flash-on-boot: no flashable disk found")


def download(url: str, dest: Path) -> None:
    """Stream ``url`` into ``dest``.

    Atomic via a sibling ``.partial`` file + rename so a torn
    download (server hangup, OOM kill, kernel panic) can't leave
    a half-written file where ``bty flash --yes`` would read it
    as a complete image. ``timeout=300`` on the HTTP read keeps
    the boot from hanging indefinitely on a misconfigured server.
    """
    print(f"bty-flash-on-boot: downloading {url} -> {dest}", flush=True)
    dest.parent.mkdir(parents=True, exist_ok=True)
    tmp = dest.with_suffix(dest.suffix + ".partial")
    try:
        with urllib.request.urlopen(url, timeout=300) as resp, tmp.open("wb") as f:
            while True:
                chunk = resp.read(1 << 20)
                if not chunk:
                    break
                f.write(chunk)
        tmp.replace(dest)
    except BaseException:
        # Clean up the partial on any failure -- including
        # KeyboardInterrupt -- so a re-attempted boot starts fresh.
        try:
            tmp.unlink()
        except FileNotFoundError:
            pass
        raise


def signal_done(server: str, mac: str) -> None:
    """Best-effort completion signal. The endpoint lands in D-3."""
    url = f"{server.rstrip('/')}/pxe/{mac}/done"
    try:
        req = urllib.request.Request(url, data=b"", method="POST")
        with urllib.request.urlopen(req, timeout=10) as _:
            print(f"bty-flash-on-boot: completion signalled to {url}", flush=True)
    except Exception as exc:  # noqa: BLE001
        print(
            f"bty-flash-on-boot: completion signal to {url} failed: {exc}; "
            "continuing to reboot anyway",
            file=sys.stderr,
            flush=True,
        )


def main() -> int:
    args = cmdline_args()

    # Interactive mode (boot_policy=tui on the server side) -> defer
    # to bty-tui-on-tty1.service; that unit owns tty1 and runs
    # bty-tui --server URL --mac MAC for the operator. Trying to
    # flash here would race the TUI session.
    if args.get("bty.mode") == "interactive":
        print(
            "bty-flash-on-boot: bty.mode=interactive on cmdline; "
            "bty-tui-on-tty1.service handles this boot",
            flush=True,
        )
        return 0

    missing = [k for k in REQUIRED if k not in args]
    if missing:
        print(
            "bty-flash-on-boot: cmdline missing required keys "
            f"({', '.join(missing)}); not flashing - dropping to console",
            file=sys.stderr,
            flush=True,
        )
        return 0  # not an error, just no work

    server = args["bty.server"].rstrip("/")
    mac = args["bty.mac"]
    image_url = args["bty.image_url"]

    print(f"bty-flash-on-boot: server={server} mac={mac}", flush=True)

    local_image = local_image_path(image_url)
    download(image_url, local_image)
    target = pick_target()
    print(f"bty-flash-on-boot: target disk {target}", flush=True)

    subprocess.run(
        ["bty", "flash", str(local_image), target, "--yes"],
        check=True,
    )

    signal_done(server, mac)

    print("bty-flash-on-boot: flash complete; rebooting in 5s", flush=True)
    subprocess.run(["sleep", "5"], check=False)
    # ``check=True`` so a failing ``systemctl reboot`` (no-systemd,
    # permission-denied, transition-blocked, ...) crashes the script
    # loudly instead of leaving the operator with a "flash complete;
    # rebooting in 5s" marker on the serial console followed by
    # nothing happening forever.
    subprocess.run(["systemctl", "reboot"], check=True)
    return 0


if __name__ == "__main__":
    sys.exit(main())
