#!/bin/sh
# bty-state-migrate - move bty's durable state onto a dedicated disk.
#
# Usage: bty-state-migrate [--yes] /dev/sdX
#
# Relocates the WHOLE bty state directory -- ``/var/lib/bty``
# (images, netboot artifacts under ``boot/``, the content-addressed
# ``cache/``, ``state.db``, ``workflows/``, ``session-secret``) -- onto
# the given block device, then mounts that device AT ``/var/lib/bty``
# so the state survives an OS reflash.
#
# Why the whole state dir, not just images/: the heavy data
# (multi-GB content cache + the machine/catalog DB) live as SIBLINGS
# of ``images/`` under ``/var/lib/bty``. Mounting only ``images/`` (the
# pre-v0.22.17 behaviour) left the cache + DB on the rootfs, so the
# rootfs stayed full and a reflash lost the inventory. Mounting the
# state dir itself fixes that: reflash upgrades the OS + venv (which
# stay on the rootfs at ``/opt/bty/venv``) while the disk preserves
# images, netboot artifacts, and the machine inventory -- so the
# appliance comes back without re-inventorying machines or
# re-downloading images.
#
# The device is formatted ext4 with label ``BTY_IMAGE_STORE`` (the
# bake ships a matching ``nofail`` /etc/fstab line at ``/var/lib/bty``,
# so a freshly-reflashed appliance auto-mounts an already-prepared
# disk on boot).
#
# Safety rails:
#   - Refuses to format the rootfs disk.
#   - Refuses a device with a currently-mounted partition.
#   - No-ops if /var/lib/bty is ALREADY a separate mount.
#   - Confirmation prompt unless ``--yes`` / ``-y`` is given.
#   - Copies onto the new disk and verifies BEFORE removing the
#     rootfs copy, so an interrupted run never loses data (the disk
#     auto-mounts on the next boot via the fstab line regardless).

set -eu

DEVICE=
ASSUME_YES=
STATE_DIR=/var/lib/bty
LABEL=BTY_IMAGE_STORE

usage() {
    cat <<'EOF'
Usage: bty-state-migrate [--yes|-y] DEVICE

Move bty's durable state (/var/lib/bty: images, netboot artifacts,
content cache, machine DB) onto DEVICE (e.g. /dev/sdb, /dev/nvme1n1)
and mount it there so the state survives an OS reflash.

DEVICE is wiped, partitioned (GPT, single partition), formatted ext4
with label BTY_IMAGE_STORE, populated from the current /var/lib/bty,
and mounted at /var/lib/bty (auto-mounts on subsequent boots,
including after reflashing the appliance).

  --yes, -y    skip the confirmation prompt
  -h, --help   show this help and exit
EOF
}

while [ $# -gt 0 ]; do
    case "$1" in
        --yes|-y) ASSUME_YES=1; shift ;;
        --help|-h) usage; exit 0 ;;
        --) shift; break ;;
        -*) echo "bty-state-migrate: unknown flag: $1" >&2; usage >&2; exit 2 ;;
        *) DEVICE="$1"; shift; break ;;
    esac
done

if [ -z "${DEVICE:-}" ]; then
    echo "bty-state-migrate: DEVICE argument required" >&2
    usage >&2
    exit 2
fi

if [ "$(id -u)" -ne 0 ]; then
    echo "bty-state-migrate: must be run as root (try sudo)" >&2
    exit 2
fi

if [ ! -b "$DEVICE" ]; then
    echo "bty-state-migrate: $DEVICE is not a block device" >&2
    exit 2
fi

# Already migrated? If /var/lib/bty is its own mount we're done --
# re-running would needlessly wipe the disk it's already on.
if mountpoint -q "$STATE_DIR"; then
    echo "bty-state-migrate: $STATE_DIR is already a separate mount:"
    findmnt -no SOURCE,FSTYPE "$STATE_DIR"
    echo "Nothing to do (the state already lives on its own disk)."
    exit 0
fi

# Refuse to format the rootfs disk. ``findmnt`` returns the rootfs
# partition (e.g. /dev/sda2); strip the trailing partition number /
# ``pN`` suffix to get the parent disk (/dev/sda or /dev/nvme0n1).
ROOTFS_PART=$(findmnt -no SOURCE / 2>/dev/null || true)
case "$ROOTFS_PART" in
    /dev/nvme*|/dev/mmcblk*)
        ROOTFS_DISK=$(printf '%s' "$ROOTFS_PART" | sed 's/p[0-9]\+$//')
        ;;
    /dev/*)
        ROOTFS_DISK=$(printf '%s' "$ROOTFS_PART" | sed 's/[0-9]\+$//')
        ;;
    *)
        ROOTFS_DISK=
        ;;
esac
if [ -n "$ROOTFS_DISK" ] && [ "$DEVICE" = "$ROOTFS_DISK" ]; then
    echo "bty-state-migrate: $DEVICE is the rootfs disk; refusing to format" >&2
    exit 2
fi

# Refuse if any partition of DEVICE is currently mounted somewhere.
MOUNTED=$(lsblk -no MOUNTPOINT "$DEVICE" 2>/dev/null | sed '/^[[:space:]]*$/d' || true)
if [ -n "$MOUNTED" ]; then
    echo "bty-state-migrate: $DEVICE has currently-mounted partitions:" >&2
    printf '  %s\n' $MOUNTED >&2
    echo "Unmount them first, then re-run." >&2
    exit 2
fi

STATE_BYTES=$(du -sh "$STATE_DIR" 2>/dev/null | cut -f1 || echo '?')

echo
echo "About to:"
echo "  - STOP bty-web.service for the duration of the migration"
echo "  - WIPE all existing data on $DEVICE"
echo "  - Create a GPT label + single partition spanning the whole disk"
echo "  - Format with ext4, label $LABEL"
echo "  - Copy the current $STATE_DIR (~${STATE_BYTES}) onto the new disk"
echo "  - Mount the disk at $STATE_DIR (and on every subsequent boot)"
echo "  - Remove the now-redundant rootfs copy + restart bty-web.service"
echo

if [ -z "$ASSUME_YES" ]; then
    printf 'Type "yes" to continue: '
    read -r CONFIRM
    if [ "$CONFIRM" != "yes" ]; then
        echo "aborted." >&2
        exit 1
    fi
fi

echo "Stopping bty-web.service ..."
systemctl stop bty-web.service 2>/dev/null || true

echo "Wiping $DEVICE ..."
wipefs -a "$DEVICE" >/dev/null

echo "Partitioning $DEVICE (GPT, single partition) ..."
parted --script "$DEVICE" mklabel gpt
parted --script "$DEVICE" mkpart bty-state ext4 0% 100%
partprobe "$DEVICE" 2>/dev/null || true
udevadm settle

# Resolve partition device path (``/dev/sda1`` vs ``/dev/nvme0n1p1``).
PART=$(lsblk -lno NAME,TYPE "$DEVICE" | awk '$2 == "part" {print "/dev/" $1; exit}')
if [ -z "${PART:-}" ] || [ ! -b "$PART" ]; then
    echo "bty-state-migrate: cannot find partition 1 on $DEVICE after partprobe" >&2
    exit 2
fi

echo "Formatting $PART as ext4 (label $LABEL) ..."
mkfs.ext4 -F -L "$LABEL" "$PART" >/dev/null

# Copy the live state onto the new disk via a temp mount, then verify
# BEFORE touching the rootfs copy. ``cp -a`` preserves ownership /
# perms / timestamps and needs no extra package (rsync isn't baked).
TMP_MNT=$(mktemp -d /mnt/bty-migrate.XXXXXX)
cleanup() { mountpoint -q "$TMP_MNT" && umount "$TMP_MNT"; rmdir "$TMP_MNT" 2>/dev/null || true; }
trap cleanup EXIT INT TERM
mount "$PART" "$TMP_MNT"

echo "Copying $STATE_DIR -> new disk (this can take a while) ..."
cp -a "$STATE_DIR"/. "$TMP_MNT"/

# Verify the copy looks complete before we remove the original.
if [ -e "$STATE_DIR/state.db" ] && [ ! -e "$TMP_MNT/state.db" ]; then
    echo "bty-state-migrate: copy verification failed (state.db missing on disk)" >&2
    echo "Left $STATE_DIR untouched; bty-web not restarted." >&2
    exit 1
fi

umount "$TMP_MNT"
trap - EXIT INT TERM
rmdir "$TMP_MNT" 2>/dev/null || true

# Add the fstab line if absent (the bake ships the same line, so on a
# reflashed appliance this is a no-op; the grep guard keeps re-runs
# idempotent). ``nofail`` so a diskless appliance still boots on its
# rootfs $STATE_DIR.
if ! grep -qE "^LABEL=$LABEL[[:space:]]" /etc/fstab; then
    echo "Adding fstab line ..."
    printf 'LABEL=%s %s ext4 nofail,x-systemd.device-timeout=10s 0 2\n' \
        "$LABEL" "$STATE_DIR" >> /etc/fstab
fi
systemctl daemon-reload

# Swap the rootfs copy out for the disk: move it aside, recreate an
# empty mountpoint, mount the disk, verify, then free the old copy.
# If anything aborts mid-swap the data is safe (on the disk + in the
# .pre-migrate copy + auto-mounted on the next boot via fstab).
PRE_MIGRATE="${STATE_DIR}.pre-migrate.$$"
mv "$STATE_DIR" "$PRE_MIGRATE"
mkdir -p "$STATE_DIR"
mount "$STATE_DIR"

if ! mountpoint -q "$STATE_DIR"; then
    echo "bty-state-migrate: $STATE_DIR did not mount; restoring rootfs copy" >&2
    rmdir "$STATE_DIR" 2>/dev/null || true
    mv "$PRE_MIGRATE" "$STATE_DIR"
    systemctl start bty-web.service 2>/dev/null || true
    exit 1
fi

echo "Removing redundant rootfs copy ..."
rm -rf "$PRE_MIGRATE"

echo "Restarting bty-web.service ..."
systemctl start bty-web.service 2>/dev/null || true

echo
echo "Done. $STATE_DIR now lives on $DEVICE (label $LABEL)."
echo "It auto-mounts on every boot, including after the appliance is"
echo "reflashed -- images, netboot artifacts, and the machine"
echo "inventory persist while the OS + venv upgrade with the reflash."
