# Justfile for local development of agentguides.
#
# Dev install: `uv sync` (creates .venv from pyproject.toml + uv.lock).
# End-user install: `uv tool install agentguides` puts `guide` on PATH.
#
# `just launch` starts a Claude Code session against an example Guide with the
# `guide` MCP server attached. The server is the v0.1 reference runtime
# (FastMCP-backed) and owns all state mutations (atomic markdown writes).

default:
    @just --list

# One-time setup: sync the uv-managed venv.
setup:
    uv sync

# Working-tree edits to src/agentguides/ take effect immediately — no reinstall.
# Install `guide` globally as an editable uv tool pointing at this checkout.
install-dev:
    uv tool install --editable . --reinstall

# Remove the globally installed `guide` tool (paired with `install-dev`).
uninstall-dev:
    uv tool uninstall agentguides

# Validate: schema + DAG + refs across every example Guide.
validate:
    #!/usr/bin/env bash
    set -euo pipefail
    for guide in hello-walk api-latency-spike-triage wildland-fire-incident-command; do
        echo "== examples/guides/$guide =="
        uv run guide validate --root "examples/guides/$guide" --book examples/guides
    done
    for guide in postgres-major-upgrade postgres-major-upgrade-rescue; do
        echo "== examples/books/postgres-upgrade/$guide =="
        uv run guide validate \
            --root "examples/books/postgres-upgrade/$guide" \
            --book examples/books/postgres-upgrade
    done
    for guide in db-backup db-restore db-rollback; do
        echo "== examples/books/db-ops/$guide =="
        uv run guide validate \
            --root "examples/books/db-ops/$guide" \
            --book examples/books/db-ops
    done

# Run the Python test suite.
test:
    uv run pytest

# === Release / packaging — local mechanics a CI runner would run (no GHA yet) ===

# NOTE: first full run is a large diff (pre-commit formats only changed files).
# Format the whole tree with ruff.
[group('release')]
fmt:
    uv run ruff format .

# Legacy debt exists; pre-commit enforces clean on changed files going forward.
# Lint with ruff (lean high-signal rules).
[group('release')]
lint:
    uv run ruff check .

# Build wheel + sdist into dist/.
[group('release')]
build:
    rm -rf dist
    uv build

# Verify the built dist: twine metadata + schemas hoisted + version + `guide` entry point.
[group('release')]
check-dist: build
    #!/usr/bin/env bash
    set -euo pipefail
    uvx twine check dist/*
    wheel=$(ls dist/*.whl)
    uv run python scripts/check_wheel.py "$wheel"
    tmp=$(mktemp -d); trap 'rm -rf "$tmp"' EXIT
    uv venv -q "$tmp/v"
    VIRTUAL_ENV="$tmp/v" uv pip install -q "$wheel"
    "$tmp/v/bin/guide" --help >/dev/null && echo "  guide --help: OK"

# Show the next version + CHANGELOG cz would produce from the commit history
# (no writes). Conventional commits drive the bump; commitizen owns version<->CHANGELOG.
[group('release')]
release-check:
    uv run cz bump --dry-run

# Every version that lives in >1 place must agree: the spec_version mirrors
# (schema enums, models Literal, SKILL template) <-> agentguides.SPEC_VERSION, and
# the generated schemas are in sync with their Pydantic source. (Package version
# <-> CHANGELOG is owned by commitizen.) Runs on pre-commit and inside release-dryrun.
[group('release')]
check-versions:
    uv run python scripts/check_spec_version.py
    uv run python scripts/gen_schemas.py --check

# Regenerate the generate-all JSON schemas from their Pydantic source models
# (agentguides.spec). Edit the models, not schemas/*.json. Drift-guarded by
# `check-versions` (--check) and pre-commit.
[group('release')]
gen-schemas:
    uv run python scripts/gen_schemas.py

# 'Ready to publish' gate: clean tree + spec/schema consistency + tests + dist
# checks, then PRINT the bump+publish command (never publishes — no secrets).
[group('release')]
release-dryrun:
    #!/usr/bin/env bash
    set -euo pipefail
    [ -z "$(git status --porcelain)" ] || { echo "working tree not clean — commit or stash first"; exit 1; }
    just check-versions
    uv run python -m pytest -q
    just check-dist
    uv run cz bump --dry-run
    echo
    echo "READY — bump + tag from the commit history, then push to publish:"
    echo "    just bump                 # cz: writes version + CHANGELOG, creates the v* tag"
    echo "    git push --follow-tags    # the v* tag triggers release.yml (Trusted Publishing)"

# Bump version + CHANGELOG from Conventional Commits and create the annotated tag.
# commitizen with version_provider=uv updates pyproject.toml + uv.lock. Forward
# flags to cz, e.g. `just bump -- --increment patch` or `just bump -- --prerelease rc`.
[group('release')]
bump *ARGS:
    uv run cz bump {{ARGS}}

# Run the local pre-commit hooks across all files (full pass; first run reformats).
[group('release')]
precommit:
    uvx pre-commit run --all-files

# Compat gate: run each sibling plugin's `verify-runtime` against THIS runtime
# source — the answer to "does my runtime change force a plugin update?". A
# failure (render-parity drift or an integration/harness break) means the plugin
# must re-render / bump / re-tag. Each plugin rebuilds the wheel from ../runtime
# itself (a few extra seconds; keeps each recipe self-contained).
[group('release')]
verify-plugins:
    #!/usr/bin/env bash
    set -euo pipefail
    for p in claude-plugin hermes-plugin; do
        sib="../$p"
        [ -d "$sib" ] || { echo "skip $p (not checked out at $sib)"; continue; }
        echo "== verify $p against current runtime =="
        ( cd "$sib" && just verify-runtime )
    done
    echo "all checked-out plugins verified against the current runtime"

# Scaffold a new Guide skeleton (passes `guide validate` immediately).
#   just new my-guide                       # scaffolds ./my-guide/
#   just new my-guide INTO=guides           # scaffolds ./guides/my-guide/
new NAME INTO=".":
    uv run guide new {{NAME}} --into {{INTO}}

# Package a Guide directory into a .guide bundle (M5).
pack DIR OUT=".":
    uv run guide pack {{DIR}} --out {{OUT}}

# Extract a .guide bundle into a directory (M5).
unpack BUNDLE DEST:
    uv run guide unpack {{BUNDLE}} {{DEST}}

# Launch: start Claude Code with the example plugin(s) loaded and the
# `guide` MCP server attached. Run state goes to a fresh /tmp dir.
#
#   just launch                          # default = postgres-major-upgrade
#   just launch postgres-major-upgrade-rescue
launch GUIDE="postgres-major-upgrade":
    #!/usr/bin/env bash
    set -euo pipefail
    # v0.4.6: the walk Skill and walk-observer plugin live under
    # ~/.claude/skills/ via `guide setup`. Idempotent — re-runs are no-ops
    # once the install matches.
    uv run guide setup claude-code --mode observer
    timestamp="$(date -u +%Y-%m-%dT%H-%M-%SZ)"
    state_dir="/tmp/guide-test-${timestamp}"
    mkdir -p "$state_dir"
    echo "$state_dir" > .last-state-path
    repo_root="$(pwd)"
    guide="{{GUIDE}}"

    echo ""
    echo "=== Guide test session prepared ==="
    echo "Guide:       $guide"
    echo "Repo root:   $repo_root"
    echo "State path:  $state_dir"
    echo "MCP server:  guide (uv run guide mcp)"
    echo ""

    export REPO_ROOT="$repo_root"
    export STATE_DIR="$state_dir"
    export GUIDE="$guide"
    prompt="$(envsubst < scripts/test-mode-prompt.tmpl)"

    # Write the MCP config to a real file in the ephemeral state dir; some
    # `claude` builds resolve --mcp-config via Node's fs.readFile which chokes
    # on bash process substitution paths.
    mcp_config="$state_dir/mcp-config.json"
    printf '{"mcpServers":{"guide":{"command":"uv","args":["run","--directory","%s","guide","mcp"]}}}\n' \
        "$repo_root" > "$mcp_config"

    # --mcp-config is variadic in claude (`<configs...>`) — it would otherwise
    # consume the trailing "$prompt" positional as another config path, hitting
    # ENAMETOOLONG on the multi-KB prompt. --strict-mcp-config is the next flag
    # and stops the consumption; it also isolates the session to only the
    # guide server, which is what test mode wants anyway.
    exec claude \
        --plugin-dir "$repo_root/examples/books/postgres-upgrade/postgres-major-upgrade" \
        --plugin-dir "$repo_root/examples/books/postgres-upgrade/postgres-major-upgrade-rescue" \
        --mcp-config "$mcp_config" \
        --strict-mcp-config \
        "$prompt"

# Tail the most recent run state file from the last `launch`.
watch:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ ! -f .last-state-path ]]; then
        echo "no .last-state-path - run 'just launch' first" >&2
        exit 1
    fi
    state_dir="$(cat .last-state-path)"
    if [[ ! -d "$state_dir" ]]; then
        echo "state dir $state_dir is gone - run 'just launch' to start a new one" >&2
        exit 1
    fi
    echo "Waiting for first run file under $state_dir/*/runs/..."
    until compgen -G "$state_dir/*/runs/*.md" > /dev/null; do sleep 1; done
    latest="$(ls -t $state_dir/*/runs/*.md | head -1)"
    echo "Tailing: $latest"
    echo "---"
    tail -F "$latest"

# Open the most recent run file in $EDITOR.
inspect:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ ! -f .last-state-path ]]; then
        echo "no .last-state-path - run 'just launch' first" >&2
        exit 1
    fi
    state_dir="$(cat .last-state-path)"
    latest="$(ls -t $state_dir/*/runs/*.md 2>/dev/null | head -1 || true)"
    if [[ -z "$latest" ]]; then
        echo "no run files under $state_dir yet" >&2
        exit 1
    fi
    "${EDITOR:-less}" "$latest"

# Print the active test state directory.
state-path:
    @cat .last-state-path 2>/dev/null || (echo "no .last-state-path - run 'just launch' first" >&2; exit 1)

# Cleanup: wipe the temp state dir(s) created by `launch`.
clean:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ -f .last-state-path ]]; then
        last="$(cat .last-state-path)"
        if [[ -d "$last" ]]; then
            rm -rf "$last"
            echo "removed $last"
        fi
        rm .last-state-path
    fi
    shopt -s nullglob
    for d in /tmp/guide-test-*; do
        rm -rf "$d"
        echo "removed $d"
    done

# --- v0.3 exit-criterion smoke check --------------------------------------
# Runbook: .planning/plans/v0.3-exit-check.md
# Combined:    just smoke
# Step-by-step: smoke-setup → smoke-review → smoke-validate → smoke-clean

# TUI-based manual review: set up the smoke fixture and print Claude Code launch instructions.
[group('smoke')]
smoke: smoke-setup smoke-review

# Fully-automated programmatic review: drives the LLM via `claude -p` (uses your Claude subscription, not the API key).
[group('smoke')]
smoke-auto: smoke-setup smoke-review-auto smoke-verify

# Build a fixture corpus: two hello-walk runs with a recurring step.failed pattern.
[group('smoke')]
smoke-setup:
    #!/usr/bin/env bash
    set -euo pipefail
    SP=$(mktemp -d -t guide-v0.3-smoke.XXXXXX)
    mk_walk() {
        uv run guide state start-run --guide hello-walk --state-path "$SP" > /tmp/guide-smoke-start.json
        local rid
        rid=$(uv run python -c "import json; print(json.load(open('/tmp/guide-smoke-start.json'))['frontmatter']['id'])")
        for ev in \
            '{"type":"step.start","timestamp":"2026-05-12T14:30:00+00:00","step_id":"greet","prose":"running greet"}' \
            '{"type":"tool.call.start","timestamp":"2026-05-12T14:30:01+00:00","step_id":"greet","prose":"bash scripts/say-hello.sh"}' \
            '{"type":"tool.call.failed","timestamp":"2026-05-12T14:30:02+00:00","step_id":"greet","prose":"exit 127 - command not found"}' \
            '{"type":"step.failed","timestamp":"2026-05-12T14:30:02+00:00","step_id":"greet","prose":"greet failed: script missing"}'; do
            uv run guide state append-event --run "$rid" --state-path "$SP" "$ev" > /dev/null
        done
        uv run guide state update-step --run "$rid" --step greet --status failed --state-path "$SP" > /dev/null
        uv run guide state set-status --run "$rid" --status failed --state-path "$SP" > /dev/null
        echo "$rid"
    }
    RID_A=$(mk_walk)
    RID_B=$(mk_walk)
    echo "$SP" > .last-smoke-path
    echo "$RID_A,$RID_B" > .last-smoke-runs
    echo ""
    echo "=== v0.3 smoke fixture ready ==="
    echo "state path: $SP"
    echo "runs:       $RID_A, $RID_B"

# Write the MCP config + prompt for the fixture, then print Claude Code launch instructions.
[group('smoke')]
smoke-review:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ ! -f .last-smoke-path ]]; then
        echo "no .last-smoke-path - run 'just smoke-setup' first" >&2
        exit 1
    fi
    SP=$(cat .last-smoke-path)
    RUNS=$(cat .last-smoke-runs)
    RID_A="${RUNS%%,*}"
    RID_B="${RUNS#*,}"
    REPO_ROOT="$(pwd)"

    # MCP config so Claude Code can call guide tools (guide.read_events, …).
    mcp_config="$SP/mcp-config.json"
    printf '{"mcpServers":{"guide":{"command":"uv","args":["run","--directory","%s","guide","mcp"]}}}\n' \
        "$REPO_ROOT" > "$mcp_config"

    # Prompt: tell Claude where the walks are and what JSON to write.
    export SP RID_A RID_B REPO_ROOT
    prompt_file="$SP/smoke-prompt.txt"
    envsubst < scripts/smoke-review-prompt.tmpl > "$prompt_file"

    # The proposal path the verify step will look for.
    echo "$SP/proposal.json" > .last-smoke-proposal

    # Render and print the launch instructions.
    export MCP_CONFIG="$mcp_config"
    export PROMPT_FILE="$prompt_file"
    export PROMPT_BODY="$(cat "$prompt_file")"
    envsubst < scripts/smoke-launch-instructions.tmpl

# Programmatic equivalent of smoke-review: runs `claude -p` headlessly (uses your Claude Code subscription; needs `claude` on PATH).
[group('smoke')]
smoke-review-auto:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ ! -f .last-smoke-path ]]; then
        echo "no .last-smoke-path - run 'just smoke-setup' first" >&2
        exit 1
    fi
    if ! command -v claude >/dev/null 2>&1; then
        echo "claude not found on PATH - install Claude Code first" >&2
        exit 1
    fi
    SP=$(cat .last-smoke-path)
    RUNS=$(cat .last-smoke-runs)
    RID_A="${RUNS%%,*}"
    RID_B="${RUNS#*,}"
    REPO_ROOT="$(pwd)"

    mcp_config="$SP/mcp-config.json"
    printf '{"mcpServers":{"guide":{"command":"uv","args":["run","--directory","%s","guide","mcp"]}}}\n' \
        "$REPO_ROOT" > "$mcp_config"

    export SP RID_A RID_B REPO_ROOT
    prompt_file="$SP/smoke-prompt.txt"
    envsubst < scripts/smoke-review-prompt.tmpl > "$prompt_file"
    echo "$SP/proposal.json" > .last-smoke-proposal

    echo "=== Running claude -p (programmatic Claude Code, using your subscription) ==="
    echo "    State path: $SP"
    echo "    Transcript: $SP/claude-transcript.log"
    echo ""
    # Prompt comes via stdin — `--allowedTools <tools...>` is variadic and
    # would otherwise gobble a positional prompt as an extra tool name.
    claude -p \
        --mcp-config "$mcp_config" \
        --strict-mcp-config \
        --allowedTools "Read,Write,mcp__guide__guide.read_events" \
        < "$prompt_file" \
        | tee "$SP/claude-transcript.log"
    echo ""
    echo "=== claude -p complete ==="
    echo "    Proposal expected at: $SP/proposal.json"

# Verify the Claude-written proposal: Pydantic schema check + `guide apply-proposal --dry-run`.
[group('smoke')]
smoke-verify:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ ! -f .last-smoke-proposal ]]; then
        echo "no .last-smoke-proposal - run 'just smoke-review' first" >&2
        exit 1
    fi
    PROP=$(cat .last-smoke-proposal)
    if [[ ! -f "$PROP" ]]; then
        echo "expected proposal at $PROP but it doesn't exist yet." >&2
        echo "did Claude finish writing it? (See 'just smoke-review' output for the prompt.)" >&2
        exit 1
    fi

    echo "--- schema check (Proposal pydantic) ---"
    uv run python -c "
    from agentguides.models import Proposal
    import json, sys
    p = Proposal.model_validate_json(open('$PROP').read())
    print(json.dumps({
        'proposal_id': p.proposal_id,
        'guide_id': p.guide_id,
        'summary': p.summary,
        'source_run_ids': p.source_run_ids,
        'has_patch': len(p.patch) > 0,
    }, indent=2))
    "
    echo ""
    echo "--- guide apply-proposal --dry-run ---"
    uv run guide apply-proposal --proposal "$PROP" --guide examples/guides/hello-walk --dry-run
    echo ""
    echo "=== v0.3 exit criterion: PASS ==="

# Wipe the v0.3 smoke fixture (state dir + .last-smoke-* dotfiles).
[group('smoke')]
smoke-clean:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ -f .last-smoke-path ]]; then
        last="$(cat .last-smoke-path)"
        if [[ -d "$last" ]]; then
            rm -rf "$last"
            echo "removed $last"
        fi
    fi
    rm -f .last-smoke-path .last-smoke-runs .last-smoke-proposal /tmp/guide-smoke-start.json

# --- v0.4 ACP sidecar / bridge / drain demos ------------------------------
# These are manual sanity-checks complementary to `just test` (which runs
# the recorded-fixture CI signal for v0.4). Each recipe shows you visible
# output for one segment of the v0.4 audit pipeline so you can sign off
# end-to-end before tagging the release.

# Run the v0.4-specific test subset with verbose output.
[group('acp')]
acp-tests:
    uv run pytest \
        tests/adapters/acp/ \
        tests/test_acp_bridge.py \
        tests/test_transcript_reader.py \
        tests/test_jsonl_drain.py \
        tests/test_audit_sinks.py \
        tests/test_audit_observer_config.py \
        tests/test_acp_bridge_drain_hook.py \
        tests/test_docs_v04_adapters.py \
        -v

# Demo: spawn the sidecar wrapping a fixture ACP Agent and show captured audit.
[group('acp')]
acp-sidecar-demo:
    uv run python scripts/acp_sidecar_demo.py

# Demo: pipe a synthesized Claude Code hook payload through acp_bridge.py and show ACP JSONL.
[group('acp')]
acp-bridge-demo:
    #!/usr/bin/env bash
    set -euo pipefail
    OUT=$(mktemp -d -t guide-v0.4-bridge.XXXXXX)
    export GUIDE_ACP_BRIDGE_JSONL="$OUT/updates.jsonl"
    echo "out jsonl: $GUIDE_ACP_BRIDGE_JSONL"
    echo ""

    feed() {
        local payload="$1" label="$2"
        echo "→ feeding $label"
        echo "$payload" | uv run python adapters/claude-code/hooks/acp_bridge.py
    }
    feed '{"tool_name":"Bash","tool_input":{"command":"ls -la /tmp"},"hook_event_name":"PreToolUse"}' "PreToolUse"
    feed '{"tool_name":"Bash","tool_response":"ok","hook_event_name":"PostToolUse"}' "PostToolUse"
    feed '{"prompt":"hi there","hook_event_name":"UserPromptSubmit"}' "UserPromptSubmit"
    feed '{"hook_event_name":"Stop"}' "Stop (no SessionUpdate expected)"

    echo ""
    echo "=== ACP SessionUpdate JSONL ==="
    cat "$GUIDE_ACP_BRIDGE_JSONL" | jq -c '{kind: (.session_update // .sessionUpdate), title, status, content}' 2>/dev/null \
        || cat "$GUIDE_ACP_BRIDGE_JSONL"
    rm -rf "$OUT"

# Demo: drain a fixture JSONL into a temp StateBackend and show the captured AuditEvents.
[group('acp')]
acp-drain-demo:
    uv run python scripts/acp_drain_demo.py

# Demo (v0.4.2): sidecar wrapping a fixture Agent that dies mid-prompt; show death-drain + exit code.
[group('acp')]
acp-resilience-demo:
    uv run python scripts/acp_resilience_demo.py

# Demo (v0.4.3): round-trip a stored walk through ACP JSONL — export + replay parity check.
[group('acp')]
acp-replay-demo:
    uv run python scripts/acp_replay_demo.py

# Run every v0.4 ACP smoke demo in sequence.
[group('acp')]
acp-smoke: acp-tests acp-sidecar-demo acp-bridge-demo acp-drain-demo acp-resilience-demo acp-replay-demo

# --- v0.5.0 walk-efficiency analytics -------------------------------------
# Build a typed GuideDAG, evaluate completed walks against it, and surface
# friction signals. Every recipe here is a thin wrapper over `guide eval`;
# see `docs/cli/eval.md` for the full CLI walkthrough.

# Run the v0.5.0 eval test subset with verbose output.
[group('eval')]
eval-tests:
    uv run pytest tests/eval/ -v

# End-to-end demo: synthesize four walks against examples/books/postgres-upgrade/postgres-major-upgrade
# (clean / failed / recovered / heavy-tool-density) and drive every
# `guide eval` subcommand against the corpus. Prints the actual output so you
# can see the typed graph, the worst-case failure trajectories, per-walk
# narratives across every outcome literal, the GuideHeatmap aggregate, and
# the ACP round-trip identity check.
[group('eval')]
eval-demo:
    uv run python scripts/eval_demo.py

# Regenerate the static-mode demo data shipped with `guide web`
# (src/agentguides/web/static/data/*.json) plus a local
# .dev-state/web-demo state corpus for dynamic-mode testing.
[group('eval')]
regen-web-demo:
    uv run python scripts/regen_web_demo.py

# Print the typed GuideDAG for examples/books/postgres-upgrade/postgres-major-upgrade as JSON.
[group('eval')]
eval-graph:
    uv run guide eval graph --guide examples/books/postgres-upgrade/postgres-major-upgrade

# Emit Graphviz DOT for examples/books/postgres-upgrade/postgres-major-upgrade and render it through
# `dot` to a real .svg file. Pass GUIDE=<path> to render a different Guide.
# Requires Graphviz (`brew install graphviz`).
[group('eval')]
eval-graph-svg GUIDE="examples/books/postgres-upgrade/postgres-major-upgrade":
    #!/usr/bin/env bash
    set -euo pipefail
    if ! command -v dot >/dev/null 2>&1; then
        echo "error: \`dot\` not on PATH. \`brew install graphviz\` or equivalent." >&2
        exit 2
    fi
    # mktemp -t on macOS appends random suffix AFTER the template, so use a
    # real tmp dir + explicit .svg extension.
    OUT="${TMPDIR:-/tmp}/guide-eval-graph-$(date +%s)-$$.svg"
    uv run guide eval graph --guide "{{GUIDE}}" --format dot \
        | dot -Tsvg > "$OUT"
    echo "wrote $OUT"
    echo "view it: open \"$OUT\"   # macOS: opens in default SVG viewer"

# Compose examples/books/postgres-upgrade/postgres-major-upgrade + its rescue sibling into a
# GuideUniverse and enumerate worst-case failure trajectories.
[group('eval')]
eval-universe:
    uv run guide eval universe --guide examples/books/postgres-upgrade/postgres-major-upgrade --worst-case

# --- v0.4.4 adapter test harnesses ----------------------------------------
# Validate the v0.4 ACP pipeline against real host binaries (Claude Code,
# Hermes). Each recipe writes the host's config files into a workdir,
# launches the host, and prints a HarnessReport.

# Run the v0.4.4 harness test subset with verbose output.
[group('harness')]
harness-tests:
    uv run pytest tests/harness/ -v

# Walk examples/guides/hello-walk through the Claude Code harness (observer mode).
# Needs `claude` on PATH (skips cleanly if absent). MODEL defaults to the
# sonnet-4-6 alias; pass `MODEL=claude-haiku-4-5` for the cheap path or
# `MODEL=""` to defer to the claude account default.
#
# v0.6: the `guide` plugin loads per-session via `claude -p --plugin-dir <src>`.
# No `guide setup` step — `_require_setup` just verifies the --plugin-dir
# source. AG_CLAUDE_HOME isolates the run from your real ~/.claude/;
# GUIDE_CLAUDE_PLUGIN_SRC points --plugin-dir at the sibling claude-plugin checkout.
[group('harness')]
harness-walk-cc MODEL="claude-sonnet-4-6":
    #!/usr/bin/env bash
    set -euo pipefail
    if ! command -v claude >/dev/null 2>&1; then
        echo "claude not on PATH — skipping. See docs/adapters/claude-code.md."
        exit 0
    fi
    fixture="$(pwd)/.dev-state/claude-home"
    export AG_CLAUDE_HOME="$fixture"
    export GUIDE_CLAUDE_PLUGIN_SRC="$(pwd)/../claude-plugin"
    WD=$(mktemp -d -t guide-harness-cc.XXXXXX)
    echo "workdir: $WD"
    echo "claude home (isolated): $fixture"
    # v0.4.9 M5: pass --mode at run time so the walk is authoritative on its own
    # mode regardless of the router default; lets harness-walk-all fan out.
    MODEL_ARG=""
    if [ -n "{{MODEL}}" ]; then MODEL_ARG="--model {{MODEL}}"; fi
    uv run guide harness walk --adapter claude-code --mode observer \
        --guide examples/guides/hello-walk \
        --workdir "$WD" \
        $MODEL_ARG \
        --exec
    echo ""
    echo "=== Report ==="
    uv run guide harness report --workdir "$WD"

# Walk examples/guides/hello-walk through the Claude Code harness (inline mode).
# Smoke pass for the inline registry. See harness-walk-cc for MODEL + fixture
# semantics — both modes share the isolated home; --mode is per-walk (v0.5.8:
# the cc plugin is install-invariant across mode, so no --force re-setup).
[group('harness')]
harness-walk-cc-inline MODEL="claude-sonnet-4-6":
    #!/usr/bin/env bash
    set -euo pipefail
    if ! command -v claude >/dev/null 2>&1; then
        echo "claude not on PATH — skipping. See docs/adapters/claude-code.md."
        exit 0
    fi
    fixture="$(pwd)/.dev-state/claude-home"
    export AG_CLAUDE_HOME="$fixture"
    export GUIDE_CLAUDE_PLUGIN_SRC="$(pwd)/../claude-plugin"
    WD=$(mktemp -d -t guide-harness-cc-inline.XXXXXX)
    echo "workdir: $WD"
    echo "claude home (isolated): $fixture"
    MODEL_ARG=""
    if [ -n "{{MODEL}}" ]; then MODEL_ARG="--model {{MODEL}}"; fi
    uv run guide harness walk --adapter claude-code --mode inline \
        --guide examples/guides/hello-walk \
        --workdir "$WD" \
        $MODEL_ARG \
        --exec
    echo ""
    echo "=== Report ==="
    uv run guide harness report --workdir "$WD"

# Walk examples/guides/hello-walk through the Hermes harness (observer mode).
# Needs `hermes` on PATH. Hermes IS the ACP Agent (not the Client) —
# the sidecar wraps `hermes acp` and our embedded ACP driver plays
# the editor role. Hermes also needs its `[acp]` extras installed
# (`pipx install --force 'hermes-agent[acp,mcp]'`).
#
# Uses an ISOLATED `.dev-state/hermes-home/` fixture as $HERMES_HOME +
# $AG_HERMES_HOME, so the walk never touches your real ~/.hermes/. Run
# `just hermes-test-home-bootstrap` once to populate the fixture; the
# recipe bails with a hint if it's missing.
#
# MODEL records the requested model in launch notes only — `hermes acp`
# doesn't accept --model; selection lives in the fixture's config.yaml
# (set there via AG_HERMES_MODEL_ID at bootstrap time).
[group('harness')]
harness-walk-hermes MODEL="":
    #!/usr/bin/env bash
    set -euo pipefail
    if ! command -v hermes >/dev/null 2>&1; then
        echo "hermes not on PATH — skipping. See docs/adapters/hermes.md."
        exit 0
    fi
    fixture="$(pwd)/.dev-state/hermes-home"
    if [[ ! -f "$fixture/config.yaml" ]]; then
        echo "error: no fixture Hermes home at $fixture." >&2
        echo "  Bootstrap it once with:" >&2
        echo "    cp .env.example .env  # fill in AG_HERMES_INFERENCE_BASE_URL + AG_HERMES_MODEL_ID" >&2
        echo "    just hermes-test-home-bootstrap" >&2
        exit 2
    fi
    export HERMES_HOME="$fixture"
    export AG_HERMES_HOME="$fixture"
    # v0.4.6: harness setup is a hard prereq. Idempotent — re-runs are no-ops.
    uv run guide setup hermes --mode observer
    WD=$(mktemp -d -t guide-harness-hermes.XXXXXX)
    echo "workdir: $WD"
    echo "hermes home (isolated): $fixture"
    # v0.4.9 M5: per-walk --mode override eliminates the TOCTOU window
    # between this recipe's setup and harness-walk-hermes-inline's --force
    # setup when both run in parallel via harness-walk-all.
    MODEL_ARG=""
    if [ -n "{{MODEL}}" ]; then MODEL_ARG="--model {{MODEL}}"; fi
    uv run guide harness walk --adapter hermes --mode observer \
        --guide examples/guides/hello-walk \
        --workdir "$WD" \
        $MODEL_ARG \
        --exec
    echo ""
    echo "=== Report ==="
    uv run guide harness report --workdir "$WD"

# Walk examples/guides/hello-walk through the Hermes harness (inline mode).
# Smoke pass for the inline registry (v0.4.7 M3). See harness-walk-hermes
# for fixture + MODEL semantics — both modes share the isolated home.
[group('harness')]
harness-walk-hermes-inline MODEL="":
    #!/usr/bin/env bash
    set -euo pipefail
    if ! command -v hermes >/dev/null 2>&1; then
        echo "hermes not on PATH — skipping. See docs/adapters/hermes.md."
        exit 0
    fi
    fixture="$(pwd)/.dev-state/hermes-home"
    if [[ ! -f "$fixture/config.yaml" ]]; then
        echo "error: no fixture Hermes home at $fixture." >&2
        echo "  Bootstrap it once with:" >&2
        echo "    cp .env.example .env  # fill in AG_HERMES_INFERENCE_BASE_URL + AG_HERMES_MODEL_ID" >&2
        echo "    just hermes-test-home-bootstrap" >&2
        exit 2
    fi
    export HERMES_HOME="$fixture"
    export AG_HERMES_HOME="$fixture"
    uv run guide setup hermes --mode inline --capability full --force
    WD=$(mktemp -d -t guide-harness-hermes-inline.XXXXXX)
    echo "workdir: $WD"
    echo "hermes home (isolated): $fixture"
    # v0.4.9 M5: per-walk --mode override; see harness-walk-hermes comment.
    MODEL_ARG=""
    if [ -n "{{MODEL}}" ]; then MODEL_ARG="--model {{MODEL}}"; fi
    uv run guide harness walk --adapter hermes --mode inline \
        --guide examples/guides/hello-walk \
        --workdir "$WD" \
        $MODEL_ARG \
        --exec
    echo ""
    echo "=== Report ==="
    uv run guide harness report --workdir "$WD"

# Build an ISOLATED hermes-agent[acp,mcp] venv under .dev-state/ and print the
# AG_HERMES_BIN export to use it (v0.5.8 M5). The system / pipx hermes often
# lacks the `mcp` Python SDK (→ silent 0-tools walks); this pins a known-good
# install so `harness check` and hermes walks are reproducible. Idempotent:
# re-running with the same VERSION reuses the venv. The default VERSION maps to
# `.dev-state/hermes-venv-016` (major.minor, dots stripped) to match the
# fixture the hermes walk recipes reference.
[group('harness')]
hermes-venv-bootstrap VERSION="0.16.0":
    #!/usr/bin/env bash
    set -euo pipefail
    slug="${VERSION%.*}"; slug="${slug//./}"   # 0.16.0 -> 0.16 -> 016
    venv=".dev-state/hermes-venv-${slug}"
    if [[ ! -x "$venv/bin/hermes" ]]; then
        echo "creating $venv (hermes-agent[acp,mcp]==${VERSION}) …"
        python3 -m venv "$venv"
        "$venv/bin/pip" install --quiet --upgrade pip
        "$venv/bin/pip" install --quiet "hermes-agent[acp,mcp]==${VERSION}"
    fi
    # The whole point of the isolated venv: the mcp SDK must import.
    if ! "$venv/bin/python" -c "import mcp" 2>/dev/null; then
        echo "error: mcp SDK missing in $venv (install incomplete)" >&2
        exit 1
    fi
    echo "ready. Use it with:"
    echo "  export AG_HERMES_BIN=\"\$(pwd)/$venv/bin/hermes\""

# Run is_available + dry prepare() for every registered harness.
# One-line PASS/SKIP/FAIL/NEEDS-SETUP per host; non-zero exit when any FAIL.
[group('harness')]
harness-check:
    uv run guide harness check --guide examples/guides/hello-walk

# Full v0.4.7 end-to-end verification: harness-check first (cheap, no LLM),
# then the 2×2 (adapter × mode) walk matrix. cc walks run in parallel
# with each Hermes walk; the two Hermes walks run SERIALLY because
# Hermes' per-walk env overlay (`active-walk.env`) is a single file at
# a stable path — concurrent Hermes walks race on it (v0.4.6 known
# limitation). Output is captured per walk and printed sequentially.
# Hermes walks skip automatically when `hermes` isn't on PATH.
#
# CC_MODEL / HERMES_MODEL override per-adapter; defaults match the
# child recipes (haiku alias for cc, empty → config.yaml for hermes).
[group('harness')]
harness-walk-all CC_MODEL="claude-sonnet-4-6" HERMES_MODEL="":
    #!/usr/bin/env bash
    set -uo pipefail   # NOT -e: we want every walk to run even if one fails
    echo "=== harness-check ==="
    if ! just harness-check; then
        echo "  harness-check failed; aborting before any walk."
        exit 1
    fi
    cc_obs_log=$(mktemp -t guide-harness-walk-cc-observer.XXXXXX.log)
    cc_in_log=$(mktemp -t guide-harness-walk-cc-inline.XXXXXX.log)
    h_obs_log=$(mktemp -t guide-harness-walk-hermes-observer.XXXXXX.log)
    h_in_log=$(mktemp -t guide-harness-walk-hermes-inline.XXXXXX.log)

    echo ""
    echo "=== launching all four walks in parallel ==="
    echo "  cc model:     {{CC_MODEL}}"
    echo "  hermes model: {{ if HERMES_MODEL == '' { '(config.yaml default)' } else { HERMES_MODEL } }}"
    # v0.4.8 M2: per-walk PID-keyed marker dirs replace the v0.4.6 shared
    # `active-walk.env` overlay so same-mode concurrent walks against the
    # same Hermes profile are race-free.
    #
    # v0.4.8 M6: setup installs the walk Skill triple (router + walk-observer
    # + walk-inline) side-by-side, so cross-mode walks no longer race on the
    # install paths either. `guide setup --mode X` only swaps the router's
    # default_mode; both concrete surfaces are always present.
    (just harness-walk-cc            "{{CC_MODEL}}"     > "$cc_obs_log" 2>&1; echo "__EXIT__=$?" >> "$cc_obs_log") &
    cc_obs_pid=$!
    (just harness-walk-cc-inline     "{{CC_MODEL}}"     > "$cc_in_log"  2>&1; echo "__EXIT__=$?" >> "$cc_in_log") &
    cc_in_pid=$!
    (just harness-walk-hermes        "{{HERMES_MODEL}}" > "$h_obs_log" 2>&1; echo "__EXIT__=$?" >> "$h_obs_log") &
    h_obs_pid=$!
    (just harness-walk-hermes-inline "{{HERMES_MODEL}}" > "$h_in_log"  2>&1; echo "__EXIT__=$?" >> "$h_in_log") &
    h_in_pid=$!
    echo "  cc-observer     pid=$cc_obs_pid → $cc_obs_log"
    echo "  cc-inline       pid=$cc_in_pid  → $cc_in_log"
    echo "  hermes-observer pid=$h_obs_pid → $h_obs_log"
    echo "  hermes-inline   pid=$h_in_pid   → $h_in_log"
    wait $cc_obs_pid
    wait $cc_in_pid
    wait $h_obs_pid
    wait $h_in_pid

    print_section() {
        local label=$1 log=$2
        echo ""
        echo "=== $label ==="
        sed '/^__EXIT__=/d' "$log"
    }
    print_section "cc-observer"      "$cc_obs_log"
    print_section "cc-inline"        "$cc_in_log"
    print_section "hermes-observer"  "$h_obs_log"
    print_section "hermes-inline"    "$h_in_log"
    cc_obs_exit=$(grep '^__EXIT__=' "$cc_obs_log" | tail -1 | cut -d= -f2)
    cc_in_exit=$(grep '^__EXIT__=' "$cc_in_log" | tail -1 | cut -d= -f2)
    h_obs_exit=$(grep '^__EXIT__=' "$h_obs_log" | tail -1 | cut -d= -f2)
    h_in_exit=$(grep '^__EXIT__=' "$h_in_log" | tail -1 | cut -d= -f2)
    echo ""
    echo "=== summary ==="
    echo "  cc-observer     exit=$cc_obs_exit"
    echo "  cc-inline       exit=$cc_in_exit"
    echo "  hermes-observer exit=$h_obs_exit"
    echo "  hermes-inline   exit=$h_in_exit"
    rm -f "$cc_obs_log" "$cc_in_log" "$h_obs_log" "$h_in_log"
    if [ "$cc_obs_exit" -ne 0 ] || [ "$cc_in_exit" -ne 0 ] || [ "$h_obs_exit" -ne 0 ] || [ "$h_in_exit" -ne 0 ]; then
        exit 1
    fi

# List every registered harness with availability.
[group('harness')]
harness-list:
    uv run guide harness list

# Side-install a local LLM (Ollama + qwen3:8b) so the Hermes harness has
# a provider to talk to. Tuned for 16GB Apple Silicon. Idempotent.
# Prints Hermes-side config follow-ups when done.
[group('harness')]
install-local-llm:
    bash scripts/install-local-llm.sh

# Bootstrap an ISOLATED Hermes home for the harness-walk-hermes recipes
# under .dev-state/hermes-home/. The recipe sources .env (so contributors
# can `cp .env.example .env` and fill in their endpoint without long
# arg lists), then runs install-local-llm.sh in remote-mode pointing
# HERMES_HOME at the fixture path. Never touches ~/.hermes/.
#
# Required in `.env`:
#   AG_HERMES_INFERENCE_BASE_URL  (e.g. http://localhost:11434/v1)
#   AG_HERMES_MODEL_ID            (e.g. qwen3:8b)
# Optional:
#   AG_HERMES_INFERENCE_API_KEY
#
# Re-run any time the endpoint moves. Wipe + rebuild with
# `rm -rf .dev-state/hermes-home && just hermes-test-home-bootstrap`.
[group('harness')]
hermes-test-home-bootstrap:
    #!/usr/bin/env bash
    set -euo pipefail
    if [[ -f .env ]]; then
        set -o allexport
        # shellcheck disable=SC1091
        source .env
        set +o allexport
    fi
    if [[ -z "${AG_HERMES_INFERENCE_BASE_URL:-}" || -z "${AG_HERMES_MODEL_ID:-}" ]]; then
        echo "error: AG_HERMES_INFERENCE_BASE_URL and AG_HERMES_MODEL_ID required." >&2
        echo "  cp .env.example .env, fill them in, and re-run." >&2
        exit 2
    fi
    fixture="$(pwd)/.dev-state/hermes-home"
    export HERMES_HOME="$fixture"
    echo "bootstrapping isolated Hermes home at $fixture"
    bash scripts/install-local-llm.sh

# v0.5.5 — Install plugins/hermes-plugin/ INTO the fixture Hermes home and
# verify it loads. Uses a file:// source pointing at packed examples in
# .dev-state/dist/. Idempotent — wipe with `rm -rf .dev-state/guide-home`
# to force a fresh first-run.
#
# Prereq: `just hermes-test-home-bootstrap` populated .dev-state/hermes-home/.
# `guide` and `hermes` on PATH (the recipe puts .venv/bin first to use the
# editable build).
#
# After install, the verification script asserts:
#   - the plugin's `.local/profile.toml` exists,
#   - `.local/library-view/{books,guides}/<id>` symlinks point at the
#     master library at .dev-state/guide-home/library/,
#   - mcp_servers.guide is written with the right GUIDE_* env block,
#   - cron/jobs.json carries the `guide-sync` entry.
[group('harness')]
hermes-plugin-install SOURCE="file":
    #!/usr/bin/env bash
    set -euo pipefail
    export PATH="$(pwd)/.venv/bin:$PATH"
    # v0.5.9: hermes-plugin lives in its own repo now; consume the sibling
    # checkout (../hermes-plugin) instead of the removed in-repo plugins/ tree.
    export GUIDE_HERMES_PLUGIN_SRC="${GUIDE_HERMES_PLUGIN_SRC:-$(pwd)/../hermes-plugin}"
    if ! command -v hermes >/dev/null 2>&1; then
        echo "hermes not on PATH — skipping. See docs/adapters/hermes.md."
        exit 0
    fi
    fixture="$(pwd)/.dev-state/hermes-home"
    if [[ ! -f "$fixture/config.yaml" ]]; then
        echo "error: no fixture Hermes home at $fixture." >&2
        echo "  Run `just hermes-test-home-bootstrap` first." >&2
        exit 2
    fi
    uv run python scripts/hermes_plugin_live.py install --source {{SOURCE}}

# v0.5.5 — Same as hermes-plugin-install but seeds an https-layout catalog
# served by a local http.server (exercises the M2 https-resolution path
# end-to-end inside the plugin bootstrap).
[group('harness')]
hermes-plugin-install-http:
    just hermes-plugin-install http

# v0.5.5 — Drive a walk end-to-end through the plugin's MCP server using the
# official mcp Python SDK as the client (the same protocol Hermes itself
# uses). Confirms the full chain: wrapper script → exec guide mcp → walk_*
# tools resolve → walk record lands in $GUIDE_HOME/state/ tagged with
# harness=hermes scope=<derived>. Prereq: `just hermes-plugin-install` must
# have been run.
[group('harness')]
hermes-plugin-walk GUIDE="hello-walk":
    #!/usr/bin/env bash
    set -euo pipefail
    export PATH="$(pwd)/.venv/bin:$PATH"
    if ! command -v hermes >/dev/null 2>&1; then
        echo "hermes not on PATH — skipping. See docs/adapters/hermes.md."
        exit 0
    fi
    plugin=".dev-state/hermes-home/plugins/guide"
    if [[ ! -e "$plugin/.local/profile.toml" ]]; then
        echo "error: plugin not installed; run `just hermes-plugin-install` first." >&2
        exit 2
    fi
    uv run python scripts/hermes_plugin_walk.py --guide {{GUIDE}}

# v0.5.5 — Wipe every artifact the plugin install creates, leaving the
# fixture Hermes home + ~/.guide/ pristine. Use between iterations when
# debugging first-run behavior.
[group('harness')]
hermes-plugin-clean:
    #!/usr/bin/env bash
    set -euo pipefail
    rm -rf .dev-state/guide-home
    rm -rf .dev-state/dist
    rm -rf .dev-state/catalog
    # v0.5.9: hermes-plugin moved to a sibling repo; clean its install scratch
    # there if present (no-op when the sibling checkout isn't alongside).
    rm -rf ../hermes-plugin/.local
    plugin_link=".dev-state/hermes-home/plugins/guide"
    if [[ -L "$plugin_link" ]]; then
        rm "$plugin_link"
    fi
    jobs=".dev-state/hermes-home/cron/jobs.json"
    if [[ -f "$jobs" ]]; then rm "$jobs"; fi
    echo "cleaned plugin install artifacts."

# v0.5.9 — Regenerate each plugin's walk Skill triple (walk / walk-observer /
# walk-inline SKILL.md) from the runtime renderer. These files are derived
# artifacts of `agentguides.resources.render_*`. The plugins now live in their own
# repos, so the drift guard + canonical committed copies live there; this recipe
# is the reproducible regen step that renders into the SIBLING plugin checkouts.
# No args = ../claude-plugin and ../hermes-plugin (the default sibling layout);
# pass explicit plugin-checkout dirs to target other locations.
[group('harness')]
render-plugin-skills *PLUGINS:
    #!/usr/bin/env bash
    set -euo pipefail
    export PATH="$(pwd)/.venv/bin:$PATH"
    plugins="{{PLUGINS}}"
    if [[ -z "$plugins" ]]; then
        plugins="$(pwd)/../claude-plugin $(pwd)/../hermes-plugin"
    fi
    uv run python scripts/render_plugin_skills.py $plugins

# v0.6 — Provision the `guide` plugin into a fixture ~/.claude/ rooted at
# .dev-state/claude-home/ via the real `guide setup claude-code` wrapper, which
# shells out to `claude plugin marketplace add` + `claude plugin install`. Then
# verify with `guide setup-status`. Needs `claude` on PATH; GUIDE_CLAUDE_PLUGIN_SRC
# points the marketplace at the sibling claude-plugin checkout.
[group('harness')]
claude-plugin-install:
    #!/usr/bin/env bash
    set -euo pipefail
    AG_CLAUDE_HOME="$(pwd)/.dev-state/claude-home" GUIDE_CLAUDE_PLUGIN_SRC="$(pwd)/../claude-plugin" uv run guide setup claude-code
    AG_CLAUDE_HOME="$(pwd)/.dev-state/claude-home" uv run guide setup-status claude-code

# v0.5.6 — Drive a walk through the plugin's MCP server end-to-end. Spawns
# `guide mcp` directly via the mcp Python SDK with the env block from the
# plugin's `.mcp.json` plus CLAUDE_PROJECT_DIR (so the runtime's scope
# derivation fires), runs walk_start_run against examples/guides/hello-walk,
# and asserts the run record carries `harness=claude-code scope=<derived>`.
# Prereq: `just claude-plugin-install` must have been run.
[group('harness')]
claude-plugin-walk GUIDE="examples/guides/hello-walk":
    #!/usr/bin/env bash
    set -euo pipefail
    export PATH="$(pwd)/.venv/bin:$PATH"
    plugin=".dev-state/claude-home/plugins/guide"
    if [[ ! -e "$plugin/.claude-plugin/plugin.json" ]]; then
        echo "error: plugin not installed; run `just claude-plugin-install` first." >&2
        exit 2
    fi
    uv run python scripts/claude_plugin_walk.py --guide {{GUIDE}}

# v0.5.6 — Wipe the claude-plugin fixture artifacts.
[group('harness')]
claude-plugin-clean:
    #!/usr/bin/env bash
    set -euo pipefail
    rm -rf .dev-state/claude-home
    rm -rf .dev-state/guide-home-cc
    echo "cleaned claude-plugin install artifacts."

# List committed scenario idea specs under scenarios/.
[group('scenarios')]
scenarios-list:
    uv run guide scenarios list

# Validate one idea spec without touching the file system.
[group('scenarios')]
scenario-validate SLUG:
    uv run guide scenarios validate scenarios/{{SLUG}}.md

# Generate walks for an already-realized scenario. WALKS controls the
# extra-walk count layered on top of the cover plan; COVERAGE is the
# greedy edge-cover target (0.0–1.0).
[group('scenarios')]
scenario-generate SLUG WALKS="1" COVERAGE="0.8" DIST="happy_heavy":
    uv run guide scenarios generate \
        --idea scenarios/{{SLUG}}.md \
        --walks {{WALKS}} \
        --coverage {{COVERAGE}} \
        --distribution {{DIST}}

# Boot guide web against the committed examples/ as the fixtures-root.
# Every example with a state/ subdir surfaces in the backend selector.
# The SPA assets ship with the wheel; --static-dir is no longer needed.
[group('scenarios')]
web-examples:
    GUIDE_WEB_ENABLED=1 uv run guide web \
        --fixtures-root examples
