#!/usr/bin/env bash
# =============================================================================
# morie — Multi-domain Open Research and Inferential Estimation
# =============================================================================
# Native UNIX executive for the MORIE scientific computing toolkit.
#
# This is the primary command interface — a real UNIX tool, not a Python
# trampoline.  System-level operations (install, config, service management,
# completions) run natively in bash without requiring Python.  Computation
# commands (pipeline, chat, tui, run-module) dispatch to the Python runtime.
#
# Usage:
#   morie                    # launch TUI (interactive) or show help (piped)
#   morie chat               # streaming LLM chat REPL
#   morie pipeline --all -y  # run analysis pipeline with rich progress
#   morie install             # self-bootstrap (install deps, create venv)
#   morie doctor              # environment diagnostics
#   morie config              # show/set configuration
#   morie ollama start        # manage Ollama service
#   morie completions bash    # generate shell completions
#
# Environment:
#   ESML_HOME      config/data root (default: ~/.morie)
#   ESML_PYTHON    override Python interpreter
#   ESML_DEBUG     set to 1 for verbose diagnostics
#   ESML_NO_COLOR  set to 1 to disable colors
# =============================================================================

set -euo pipefail

readonly ESML_SCRIPT_VERSION="0.9.8"

# ---------------------------------------------------------------------------
# XDG / Home directory
# ---------------------------------------------------------------------------
ESML_HOME="${ESML_HOME:-${XDG_DATA_HOME:-$HOME/.morie}}"
ESML_CONFIG_DIR="${XDG_CONFIG_HOME:-$HOME/.config}/morie"
ESML_CACHE_DIR="${XDG_CACHE_HOME:-$HOME/.cache}/morie"
ESML_LOG_DIR="$ESML_HOME/logs"
ESML_RC="$ESML_CONFIG_DIR/esmlrc"

# ---------------------------------------------------------------------------
# Colors (ANSI, respects NO_COLOR / ESML_NO_COLOR / non-TTY)
# ---------------------------------------------------------------------------
_setup_colors() {
    if [[ -t 1 ]] && [[ -z "${NO_COLOR:-}" ]] && [[ -z "${ESML_NO_COLOR:-}" ]]; then
        RED='\033[0;31m'
        GREEN='\033[0;32m'
        YELLOW='\033[1;33m'
        BLUE='\033[0;34m'
        CYAN='\033[0;36m'
        MAGENTA='\033[0;35m'
        BOLD='\033[1m'
        DIM='\033[2m'
        RESET='\033[0m'
    else
        RED='' GREEN='' YELLOW='' BLUE='' CYAN='' MAGENTA='' BOLD='' DIM='' RESET=''
    fi
}
_setup_colors

# ---------------------------------------------------------------------------
# Logging
# ---------------------------------------------------------------------------
_log()    { printf "%s ${DIM}%s${RESET} %s\n" "$1" "$(date +%H:%M:%S)" "$2"; }
info()    { _log "${CYAN}[morie]${RESET}" "$*"; }
success() { _log "${GREEN}[morie]${RESET}" "$*"; }
warn()    { _log "${YELLOW}[morie]${RESET}" "$*" >&2; }
error()   { _log "${RED}[morie]${RESET}" "$*" >&2; }
debug()   { [[ "${ESML_DEBUG:-0}" == "1" ]] && _log "${MAGENTA}[debug]${RESET}" "$*" >&2 || true; }
die()     { error "$*"; exit 1; }

# ---------------------------------------------------------------------------
# Signal handling
# ---------------------------------------------------------------------------
_cleanup() {
    local exit_code=$?
    # Kill any background jobs we spawned.
    jobs -p 2>/dev/null | xargs -r kill 2>/dev/null || true
    debug "Cleanup complete (exit=$exit_code)"
    exit "$exit_code"
}
trap _cleanup EXIT
trap 'echo ""; warn "Interrupted."; exit 130' INT
trap 'warn "Terminated."; exit 143' TERM
trap '' HUP  # Ignore hangup — let background jobs survive

# ---------------------------------------------------------------------------
# Resolve project root
# ---------------------------------------------------------------------------
_resolve_root() {
    local source="${BASH_SOURCE[0]}"
    # Resolve symlinks.
    while [[ -L "$source" ]]; do
        local dir="$(cd -P "$(dirname "$source")" && pwd)"
        source="$(readlink "$source")"
        [[ "$source" != /* ]] && source="$dir/$source"
    done
    cd -P "$(dirname "$source")" && pwd
}
ROOT="$(_resolve_root)"
debug "Project root: $ROOT"

# ---------------------------------------------------------------------------
# Source user config
# ---------------------------------------------------------------------------
if [[ -f "$ESML_RC" ]]; then
    debug "Loading config: $ESML_RC"
    # shellcheck source=/dev/null
    source "$ESML_RC"
fi

# ---------------------------------------------------------------------------
# Python resolver
# ---------------------------------------------------------------------------
_resolve_python() {
    # 1. Explicit override.
    if [[ -n "${ESML_PYTHON:-}" ]]; then
        if [[ -x "$ESML_PYTHON" ]]; then
            debug "Using ESML_PYTHON=$ESML_PYTHON"
            echo "$ESML_PYTHON"; return 0
        else
            error "ESML_PYTHON='$ESML_PYTHON' is not executable."
            return 1
        fi
    fi
    # 2. Project venv.
    local venv="$ROOT/.venv/bin/python"
    [[ -x "$venv" ]] && { debug "Using venv: $venv"; echo "$venv"; return 0; }
    # 3. ESML_HOME venv.
    local home_venv="$ESML_HOME/.venv/bin/python"
    [[ -x "$home_venv" ]] && { debug "Using home venv: $home_venv"; echo "$home_venv"; return 0; }
    # 4. System python3.
    command -v python3 &>/dev/null && { debug "Using system python3"; echo "python3"; return 0; }
    # 5. System python.
    command -v python &>/dev/null && { debug "Using system python"; echo "python"; return 0; }
    return 1
}

_check_python_version() {
    local py="$1"
    local version
    version="$("$py" -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null)" || return 1
    local major="${version%%.*}" minor="${version#*.}"
    if [[ "$major" -lt 3 ]] || { [[ "$major" -eq 3 ]] && [[ "$minor" -lt 10 ]]; }; then
        return 1
    fi
    debug "Python version: $version"
    return 0
}

_require_python() {
    PYTHON="$(_resolve_python)" || {
        error "No Python interpreter found."
        echo "" >&2
        echo "  Quick fix:" >&2
        echo "    morie install              # auto-setup everything" >&2
        echo "    # -- or --" >&2
        echo "    pip install morie           # if Python is available elsewhere" >&2
        echo "    export ESML_PYTHON=/path/to/python3" >&2
        exit 1
    }
    _check_python_version "$PYTHON" || die "Python >= 3.10 required (found $("$PYTHON" --version 2>&1))."
}

_require_esml_package() {
    _require_python
    if ! "$PYTHON" -c "import morie.runner" 2>/dev/null; then
        if [[ -d "$ROOT/py-package/morie" ]]; then
            warn "morie package not installed. Run: morie install"
        else
            error "morie package not found. Run: pip install morie"
        fi
        exit 1
    fi
}

# =========================================================================
# NATIVE SUBCOMMANDS (no Python required)
# =========================================================================

# ---------------------------------------------------------------------------
# morie install — self-bootstrapping
# ---------------------------------------------------------------------------
_detect_os() {
    local os_name; os_name="$(uname -s)"
    case "$os_name" in
        Linux*)
            if grep -qi microsoft /proc/version 2>/dev/null; then echo "wsl"
            else echo "linux"; fi ;;
        Darwin*)  echo "macos" ;;
        MINGW*|MSYS*|CYGWIN*) echo "windows" ;;
        *)        echo "unknown" ;;
    esac
}

_detect_arch() {
    local arch; arch="$(uname -m)"
    case "$arch" in
        x86_64|amd64)  echo "x86_64" ;;
        aarch64|arm64) echo "arm64" ;;
        *)             echo "$arch" ;;
    esac
}

_detect_pkg_manager() {
    if command -v brew &>/dev/null; then echo "brew"
    elif command -v apt-get &>/dev/null; then echo "apt"
    elif command -v dnf &>/dev/null; then echo "dnf"
    elif command -v pacman &>/dev/null; then echo "pacman"
    elif command -v apk &>/dev/null; then echo "apk"
    elif command -v zypper &>/dev/null; then echo "zypper"
    else echo "none"; fi
}

_ask_yn() {
    local prompt="$1" default="${2:-n}" yn
    if [[ ! -t 0 ]]; then [[ "$default" == "y" ]]; return $?; fi
    if [[ "$default" == "y" ]]; then prompt="$prompt [Y/n]: "
    else prompt="$prompt [y/N]: "; fi
    printf "${BOLD}%s${RESET}" "$prompt"; read -r yn
    yn="${yn:-$default}"; [[ "$yn" =~ ^[Yy] ]]
}

_pkg_install() {
    local pkg="$1" mgr; mgr="$(_detect_pkg_manager)"
    case "$mgr" in
        brew)   info "Installing $pkg via Homebrew..."; brew install "$pkg" ;;
        apt)    info "Installing $pkg via apt..."; sudo apt-get update -qq && sudo apt-get install -y -qq "$pkg" ;;
        dnf)    info "Installing $pkg via dnf..."; sudo dnf install -y -q "$pkg" ;;
        pacman) info "Installing $pkg via pacman..."; sudo pacman -S --noconfirm "$pkg" ;;
        apk)    info "Installing $pkg via apk..."; sudo apk add "$pkg" ;;
        zypper) info "Installing $pkg via zypper..."; sudo zypper install -y "$pkg" ;;
        *)      error "No supported package manager. Install $pkg manually."; return 1 ;;
    esac
}

_install_python() {
    local py=""
    for candidate in python3.12 python3.11 python3.10 python3 python; do
        if command -v "$candidate" &>/dev/null; then
            local ver; ver="$("$candidate" -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")' 2>/dev/null)" || continue
            local major="${ver%%.*}" minor="${ver#*.}"
            if [[ "$major" -ge 3 ]] && [[ "$minor" -ge 10 ]]; then py="$candidate"; break; fi
        fi
    done
    if [[ -n "$py" ]]; then success "Python found: $("$py" --version 2>&1)"; echo "$py"; return 0; fi
    warn "Python >= 3.10 not found."
    if _ask_yn "Install Python via package manager?"; then
        local os; os="$(_detect_os)"
        case "$os" in
            macos)
                if command -v brew &>/dev/null; then brew install python@3.12
                else error "Install Homebrew first: https://brew.sh/"; return 1; fi ;;
            linux|wsl)
                if command -v apt-get &>/dev/null; then
                    sudo apt-get update -qq
                    sudo apt-get install -y -qq python3.12 python3.12-venv python3-pip || \
                    sudo apt-get install -y -qq python3 python3-venv python3-pip
                elif command -v dnf &>/dev/null; then
                    sudo dnf install -y -q python3.12 || sudo dnf install -y -q python3
                else _pkg_install python3; fi ;;
        esac
        for candidate in python3.12 python3.11 python3 python; do
            command -v "$candidate" &>/dev/null && { py="$candidate"; break; }
        done
    fi
    [[ -z "$py" ]] && { error "Python >= 3.10 is required."; return 1; }
    success "Python installed: $("$py" --version 2>&1)"; echo "$py"
}

_install_r() {
    if command -v Rscript &>/dev/null; then
        local r_ver; r_ver="$(Rscript -e 'cat(paste(R.version$major, R.version$minor, sep="."))' 2>/dev/null)" || r_ver="?"
        success "R found: $r_ver"
        info "Checking R packages..."; Rscript -e "
            pkgs <- c('testthat', 'survey')
            missing <- pkgs[!pkgs %in% rownames(installed.packages())]
            if (length(missing) > 0) { install.packages(missing, repos='https://cloud.r-project.org/', quiet=TRUE); cat('Installed:', paste(missing, collapse=', '), '\n') } else { cat('R packages up to date.\n') }
        " 2>/dev/null || warn "R package install had warnings"; return 0
    fi
    info "R not found (optional)."
    if _ask_yn "Install R for R package components?" "n"; then
        local os; os="$(_detect_os)"
        case "$os" in
            macos) command -v brew &>/dev/null && brew install r ;;
            linux|wsl) command -v apt-get &>/dev/null && sudo apt-get install -y -qq r-base || command -v dnf &>/dev/null && sudo dnf install -y -q R ;;
        esac
        command -v Rscript &>/dev/null && success "R installed." || warn "R install may need shell restart."
    else info "Skipping R. Python-only mode is fully functional."; fi
}

_install_ollama() {
    if command -v ollama &>/dev/null; then success "Ollama found."; return 0; fi
    info "Ollama enables local, private LLM inference."
    if _ask_yn "Install Ollama for local AI assistant?" "n"; then
        if curl -fsSL https://ollama.com/install.sh | sh 2>/dev/null; then
            success "Ollama installed."
            info "Pulling default model (qwen2.5:7b)..."
            ollama pull qwen2.5:7b 2>/dev/null && success "Model ready." || warn "Pull model later: ollama pull qwen2.5:7b"
        else warn "Ollama install failed. Visit: https://ollama.com"; fi
    else info "Skipping Ollama."; fi
}

_write_default_config() {
    [[ -f "$ESML_RC" ]] && return 0
    mkdir -p "$(dirname "$ESML_RC")"
    cat > "$ESML_RC" << 'RCEOF'
# =============================================================================
# MORIE Configuration — sourced on every morie invocation
# =============================================================================
# ESML_PYTHON="/path/to/python3"
# ESML_DEBUG=1
# OLLAMA_BASE_URL="http://localhost:11434"
# GEMINI_API_KEY=""
# GEMINI_MODEL="gemini-2.0-flash"
# LLM_API_BASE_URL=""
# LLM_API_KEY=""
# OPENAI_API_KEY=""
# ESML_DOCKER_IMAGE="morie:latest"
RCEOF
    success "Config written: $ESML_RC"
}

_verify_install() {
    echo ""
    printf "  ${BOLD}%-20s %-40s${RESET}\n" "Component" "Status"
    printf "  ${DIM}%-20s %-40s${RESET}\n" "─────────" "──────"
    local py_status; command -v python3 &>/dev/null && py_status="$(python3 --version 2>&1)" || py_status="${RED}not found${RESET}"
    printf "  %-20s %b\n" "Python" "$py_status"
    local esml_status; command -v morie &>/dev/null && esml_status="${GREEN}on PATH${RESET}" || esml_status="${YELLOW}$ROOT/morie${RESET}"
    printf "  %-20s %b\n" "morie" "$esml_status"
    local r_status; command -v Rscript &>/dev/null && r_status="$(Rscript -e 'cat(R.version.string)' 2>/dev/null)" || r_status="${DIM}not installed${RESET}"
    printf "  %-20s %b\n" "R" "$r_status"
    local ollama_status
    if command -v ollama &>/dev/null; then
        curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null && ollama_status="${GREEN}running${RESET}" || ollama_status="${YELLOW}installed${RESET}"
    else ollama_status="${DIM}not installed${RESET}"; fi
    printf "  %-20s %b\n" "Ollama" "$ollama_status"
    local quarto_status; command -v quarto &>/dev/null && quarto_status="$(quarto --version 2>&1 | head -1)" || quarto_status="${DIM}not installed${RESET}"
    printf "  %-20s %b\n" "Quarto" "$quarto_status"
    local docker_status; command -v docker &>/dev/null && docker_status="$(docker --version 2>&1 | sed 's/Docker version //')" || docker_status="${DIM}not installed${RESET}"
    printf "  %-20s %b\n" "Docker" "$docker_status"
    echo ""
}

cmd_install() {
    local os arch pkg_mgr
    os="$(_detect_os)"; arch="$(_detect_arch)"; pkg_mgr="$(_detect_pkg_manager)"
    info "MORIE self-bootstrap"
    info "OS: $os ($arch) | Package manager: $pkg_mgr"
    echo ""
    mkdir -p "$ESML_HOME" "$ESML_CONFIG_DIR" "$ESML_CACHE_DIR" "$ESML_LOG_DIR"
    local py; py="$(_install_python)" || exit 1
    if [[ -d "$ROOT/py-package/morie" ]] && [[ ! -d "$ROOT/.venv" ]]; then
        info "Creating virtual environment..."; "$py" -m venv "$ROOT/.venv"; py="$ROOT/.venv/bin/python"; success "venv created"
    elif [[ -d "$ROOT/.venv" ]]; then py="$ROOT/.venv/bin/python"; success "venv exists"; fi
    info "Upgrading pip..."; "$py" -m pip install --upgrade pip --quiet 2>&1 | tail -1 || true
    if [[ -d "$ROOT/py-package/morie" ]]; then
        info "Installing morie (editable dev mode)..."
        "$py" -m pip install --no-build-isolation -e "$ROOT[test,docs]" --quiet 2>&1 | tail -3 || {
            warn "Full install failed, trying minimal..."
            "$py" -m pip install --no-build-isolation -e "$ROOT" --quiet 2>&1 | tail -3
        }
        success "morie installed (editable)"
    else
        info "Installing morie from PyPI..."; "$py" -m pip install morie --quiet 2>&1 | tail -3 || true; success "morie installed"
    fi
    _install_r; _install_ollama; _write_default_config
    cmd_completions install 2>/dev/null || true
    _verify_install
    echo ""; success "Bootstrap complete."
    echo ""
    echo "  ${BOLD}Next steps:${RESET}"
    echo "    morie doctor              # check environment health"
    echo "    morie chat                # interactive AI assistant"
    echo "    morie list-modules        # see analysis modules"
    echo "    morie pipeline --all -y   # run full pipeline"
    echo "    morie tui                 # full-screen terminal IDE"
    echo ""
}

# =========================================================================
# morie container — Docker container management (native)
# =========================================================================

readonly ESML_DOCKER_IMAGE="${ESML_DOCKER_IMAGE:-morie:latest}"
readonly ESML_DOCKER_COMPOSE="${ROOT}/docker-compose.yml"
readonly ESML_DOCKERFILE="${ROOT}/Dockerfile"

_docker_available() {
    command -v docker &>/dev/null || { error "Docker not installed."; return 1; }
    docker info &>/dev/null || { error "Docker daemon not running."; return 1; }
    return 0
}

_compose_cmd() {
    docker compose version &>/dev/null 2>&1 && { echo "docker compose"; return; }
    command -v docker-compose &>/dev/null && { echo "docker-compose"; return; }
    echo ""
}

_image_exists() { docker image inspect "$ESML_DOCKER_IMAGE" &>/dev/null; }

cmd_container() {
    local subcmd="${1:-help}"; shift 2>/dev/null || true
    case "$subcmd" in
        build)
            _docker_available || return 1
            [[ ! -f "$ESML_DOCKERFILE" ]] && { error "Dockerfile not found: $ESML_DOCKERFILE"; return 1; }
            local tag="${1:-$ESML_DOCKER_IMAGE}"
            info "Building MORIE container: $tag"
            DOCKER_BUILDKIT=1 docker build --tag "$tag" --file "$ESML_DOCKERFILE" \
                --label "org.morie.build-date=$(date -u +%Y-%m-%dT%H:%M:%SZ)" "$ROOT"
            _image_exists && success "Image built: $tag ($(docker image inspect "$tag" --format '{{.Size}}' | awk '{printf "%dMB", $1/1024/1024}'))" || { error "Build failed."; return 1; }
            ;;
        run)
            _docker_available || return 1
            _image_exists || { warn "Image not found. Building..."; cmd_container build || return 1; }
            local cmd="${*:-morie list-modules}"
            info "Running: $cmd"
            docker run --rm -v "${ROOT}/data:/app/data" -e "ESML_ENV=container" \
                -e "OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}" \
                -e "GEMINI_API_KEY=${GEMINI_API_KEY:-}" -e "LLM_API_KEY=${LLM_API_KEY:-}" -e "OPENAI_API_KEY=${OPENAI_API_KEY:-}" \
                "$ESML_DOCKER_IMAGE" $cmd
            ;;
        shell)
            _docker_available || return 1
            _image_exists || { warn "Image not found. Building..."; cmd_container build || return 1; }
            info "Starting interactive shell..."
            docker run --rm -it -v "${ROOT}/data:/app/data" -e "ESML_ENV=container" "$ESML_DOCKER_IMAGE" /bin/bash
            ;;
        inspect)
            _docker_available || return 1
            local tag="${1:-$ESML_DOCKER_IMAGE}"
            docker image inspect "$tag" &>/dev/null || { error "Image not found: $tag"; return 1; }
            info "Inspecting: $tag"
            echo ""
            printf "  ${BOLD}%-15s${RESET} %s\n" "Image" "$tag"
            printf "  ${BOLD}%-15s${RESET} %s\n" "ID" "$(docker image inspect "$tag" --format '{{.Id}}' | cut -c8-19)"
            printf "  ${BOLD}%-15s${RESET} %s\n" "Created" "$(docker image inspect "$tag" --format '{{.Created}}' | cut -c1-19)"
            printf "  ${BOLD}%-15s${RESET} %sMB\n" "Size" "$(docker image inspect "$tag" --format '{{.Size}}' | awk '{printf "%d", $1/1024/1024}')"
            printf "  ${BOLD}%-15s${RESET} %s\n" "OS/Arch" "$(docker image inspect "$tag" --format '{{.Os}}/{{.Architecture}}')"
            printf "  ${BOLD}%-15s${RESET} %s\n" "Layers" "$(docker image inspect "$tag" --format '{{len .RootFS.Layers}}')"
            echo ""; info "Health check:"
            docker run --rm "$tag" python3 -c "
import sys; print(f'  Python: {sys.version.split()[0]}')
import morie; print(f'  morie:   {morie.__version__}')
import pandas, numpy, scipy; print(f'  pandas={pandas.__version__} numpy={numpy.__version__} scipy={scipy.__version__}')
print(f'  modules: {len(morie.list_modules())}'); print('  Status: OK')
" 2>&1 || warn "Health check failed."
            ;;
        verify)
            _docker_available || return 1
            local tag="${1:-$ESML_DOCKER_IMAGE}"
            _image_exists || { error "Image not found: $tag"; return 1; }
            info "Running pipeline verification in container..."
            docker run --rm -e "ESML_ENV=container" "$tag" python3 -m morie.runner pipeline --all -y
            local rc=$?; echo ""
            [[ $rc -eq 0 ]] && success "Pipeline verification passed." || error "Pipeline verification failed."
            return $rc
            ;;
        up)
            _docker_available || return 1
            local cc; cc="$(_compose_cmd)"; [[ -z "$cc" ]] && { error "docker compose not available."; return 1; }
            [[ ! -f "$ESML_DOCKER_COMPOSE" ]] && { error "docker-compose.yml not found."; return 1; }
            info "Starting MORIE services..."; $cc -f "$ESML_DOCKER_COMPOSE" up -d; success "Services started."
            ;;
        down)
            local cc; cc="$(_compose_cmd)"; [[ -z "$cc" ]] && { error "docker compose not available."; return 1; }
            [[ ! -f "$ESML_DOCKER_COMPOSE" ]] && { error "docker-compose.yml not found."; return 1; }
            info "Stopping services..."; $cc -f "$ESML_DOCKER_COMPOSE" down; success "Stopped."
            ;;
        logs)
            local cc; cc="$(_compose_cmd)"; [[ -z "$cc" ]] && { error "docker compose not available."; return 1; }
            [[ ! -f "$ESML_DOCKER_COMPOSE" ]] && { error "docker-compose.yml not found."; return 1; }
            $cc -f "$ESML_DOCKER_COMPOSE" logs --follow --tail=50
            ;;
        clean)
            _docker_available || return 1; info "Cleaning up..."
            local containers; containers="$(docker ps -a --filter "ancestor=$ESML_DOCKER_IMAGE" -q 2>/dev/null)"
            [[ -n "$containers" ]] && echo "$containers" | xargs docker rm -f 2>/dev/null && success "Removed containers."
            local dangling; dangling="$(docker images -f "dangling=true" -q 2>/dev/null)"
            [[ -n "$dangling" ]] && echo "$dangling" | xargs docker rmi 2>/dev/null && success "Removed dangling images."
            info "Cleanup complete."
            ;;
        list|ls)
            _docker_available || return 1
            echo "${BOLD}MORIE Images:${RESET}"
            docker images --filter "reference=morie*" --format "table {{.Repository}}\t{{.Tag}}\t{{.Size}}\t{{.CreatedSince}}" 2>/dev/null || echo "  (none)"
            echo ""; echo "${BOLD}MORIE Containers:${RESET}"
            docker ps -a --filter "ancestor=$ESML_DOCKER_IMAGE" --format "table {{.ID}}\t{{.Status}}\t{{.Names}}" 2>/dev/null || echo "  (none)"
            ;;
        export)
            _docker_available || return 1
            local dest="${1:-.}"
            info "Running pipeline and exporting results..."
            local cid; cid="$(docker run -d -e "ESML_ENV=container" "$ESML_DOCKER_IMAGE" python3 -m morie.runner pipeline --all -y 2>/dev/null)"
            [[ -z "$cid" ]] && { error "Failed to start container."; return 1; }
            info "Waiting for pipeline (container: ${cid:0:12})..."
            docker wait "$cid" >/dev/null 2>&1
            docker cp "$cid:/app/data/public/outputs" "$dest/" 2>/dev/null && success "Results exported to $dest/outputs/"
            docker rm "$cid" >/dev/null 2>&1
            ;;
        help|*)
            cat << CEOF
${BOLD}morie container${RESET} — Docker container management

${BOLD}COMMANDS${RESET}
    build [--no-cache]     Build the MORIE Docker image
    run [CMD]              Run a command in container
    shell                  Interactive bash in container
    inspect [IMAGE]        Image details and health check
    verify [IMAGE]         Run full pipeline and validate
    up                     Start services (morie + ollama) via compose
    down                   Stop services
    logs                   Follow service logs
    list                   List MORIE images and containers
    export [DEST]          Run pipeline and copy results to host
    clean                  Remove stopped containers and images
CEOF
            ;;
    esac
}

# ---------------------------------------------------------------------------
# morie config — show / set configuration
# ---------------------------------------------------------------------------
cmd_config() {
    local subcmd="${1:-show}"
    shift 2>/dev/null || true

    case "$subcmd" in
        show)
            echo "${BOLD}MORIE Configuration${RESET}"
            echo ""
            echo "  ESML_HOME       = $ESML_HOME"
            echo "  ESML_CONFIG_DIR = $ESML_CONFIG_DIR"
            echo "  ESML_CACHE_DIR  = $ESML_CACHE_DIR"
            echo "  ESML_LOG_DIR    = $ESML_LOG_DIR"
            echo "  ESML_RC         = $ESML_RC"
            echo "  ESML_PYTHON     = ${ESML_PYTHON:-<auto-detect>}"
            echo "  ESML_DEBUG      = ${ESML_DEBUG:-0}"
            echo ""
            echo "  Project root    = $ROOT"
            local py
            py="$(_resolve_python 2>/dev/null)" || py="<not found>"
            echo "  Python          = $py"
            echo ""
            if [[ -f "$ESML_RC" ]]; then
                echo "${DIM}Config file ($ESML_RC):${RESET}"
                cat "$ESML_RC"
            else
                echo "${DIM}No config file. Run: morie config edit${RESET}"
            fi
            ;;
        edit)
            local editor="${EDITOR:-${VISUAL:-vi}}"
            mkdir -p "$(dirname "$ESML_RC")"
            [[ -f "$ESML_RC" ]] || cmd_install > /dev/null 2>&1  # create default config
            exec "$editor" "$ESML_RC"
            ;;
        path)
            echo "$ESML_RC"
            ;;
        set)
            if [[ $# -lt 2 ]]; then
                die "Usage: morie config set <KEY> <VALUE>"
            fi
            local key="$1" value="$2"
            mkdir -p "$(dirname "$ESML_RC")"
            if grep -q "^${key}=" "$ESML_RC" 2>/dev/null; then
                sed -i.bak "s|^${key}=.*|${key}=\"${value}\"|" "$ESML_RC"
                rm -f "${ESML_RC}.bak"
            else
                echo "${key}=\"${value}\"" >> "$ESML_RC"
            fi
            success "Set ${key}=${value} in $ESML_RC"
            ;;
        *)
            echo "Usage: morie config {show|edit|path|set <KEY> <VALUE>}"
            ;;
    esac
}

# ---------------------------------------------------------------------------
# morie ollama — Ollama service management
# ---------------------------------------------------------------------------
cmd_ollama() {
    local subcmd="${1:-status}"
    shift 2>/dev/null || true

    case "$subcmd" in
        start)
            if ! command -v ollama &>/dev/null; then
                die "Ollama not installed. Install: curl -fsSL https://ollama.com/install.sh | sh"
            fi
            if curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
                success "Ollama already running."
            else
                info "Starting Ollama..."
                ollama serve &>/dev/null &
                disown
                sleep 2
                if curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
                    success "Ollama started (PID $!)."
                else
                    warn "Ollama may still be starting. Check: curl http://localhost:11434/api/tags"
                fi
            fi
            ;;
        stop)
            if pkill -f "ollama serve" 2>/dev/null; then
                success "Ollama stopped."
            else
                info "Ollama was not running."
            fi
            ;;
        status)
            if ! command -v ollama &>/dev/null; then
                echo "Ollama: not installed"
                return
            fi
            if curl -s --max-time 2 http://localhost:11434/api/tags &>/dev/null; then
                local models
                models="$(curl -s http://localhost:11434/api/tags | "$(_resolve_python 2>/dev/null || echo python3)" -c "
import sys, json
try:
    data = json.load(sys.stdin)
    names = [m.get('name','?') for m in data.get('models',[])]
    print(', '.join(names) if names else 'no models pulled')
except: print('unknown')
" 2>/dev/null || echo "?")"
                echo "Ollama: ${GREEN}running${RESET} (models: $models)"
            else
                echo "Ollama: ${YELLOW}stopped${RESET}"
            fi
            ;;
        pull)
            local model="${1:-qwen2.5:7b}"
            if ! command -v ollama &>/dev/null; then
                die "Ollama not installed."
            fi
            info "Pulling model: $model"
            ollama pull "$model"
            ;;
        models|list)
            if ! command -v ollama &>/dev/null; then
                die "Ollama not installed."
            fi
            ollama list 2>/dev/null || echo "Ollama not running. Start with: morie ollama start"
            ;;
        *)
            echo "Usage: morie ollama {start|stop|status|pull [model]|models}"
            ;;
    esac
}

# ---------------------------------------------------------------------------
# morie update — self-update
# ---------------------------------------------------------------------------
cmd_update() {
    _require_python
    info "Updating morie..."

    if [[ -d "$ROOT/py-package/morie" ]] && [[ -d "$ROOT/.git" ]]; then
        # Dev checkout: git pull + pip install.
        info "Dev checkout detected. Pulling latest..."
        (cd "$ROOT" && git pull --ff-only 2>&1) || warn "git pull failed (non-fatal)"
        "$PYTHON" -m pip install --no-build-isolation -e "$ROOT[test,docs]" --quiet 2>&1 | tail -3 || true
        success "Updated from git + reinstalled."
    else
        # Installed via pip: upgrade.
        "$PYTHON" -m pip install --upgrade morie --quiet 2>&1 | tail -3 || true
        success "Updated via pip."
    fi

    # Show new version.
    local ver
    ver="$("$PYTHON" -c "import morie; print(morie.__version__)" 2>/dev/null)" || ver="?"
    info "Current version: $ver"
}

# ---------------------------------------------------------------------------
# morie completions — shell completion generation
# ---------------------------------------------------------------------------
cmd_completions() {
    local shell="${1:-${SHELL##*/}}"

    case "$shell" in
        bash)
            cat << 'BASH_COMP'
# MORIE bash completion — add to ~/.bashrc or /etc/bash_completion.d/morie
_esml_completions() {
    local cur prev commands
    cur="${COMP_WORDS[COMP_CWORD]}"
    prev="${COMP_WORDS[COMP_CWORD-1]}"
    commands="install update config doctor chat tui pipeline list-modules run-module run-modules ask agent assistant profile-dataset sample inspect verify percysuits convert-checkpoint crypto list-datasets download-bootstrap ollama completions help lint check benchmark db tree-view hash size path changelog graph fmt sec sync migrate profile-code serve watch diff-outputs audit catalog schema sample-data validate-data translate health stat causal survival match-cli did-cli rdd-cli iv-cli effect-cli data log backup env test man status r quarto ci release deps perf net cron docs init clean info repl run version"

    case "$prev" in
        morie)
            COMPREPLY=($(compgen -W "$commands" -- "$cur"))
            ;;
        run-module)
            local modules="data-wrangling descriptive-statistics distribution-tests frequentist-inference bayesian-inference power-design logistic-models model-comparison regression-models propensity-scores causal-estimators treatment-effects dag-specification meta-synthesis ebac-core ebac-selection-adjustment-ipw ebac-integrations ebac-gender-smote-sensitivity figures tables final-report"
            COMPREPLY=($(compgen -W "$modules" -- "$cur"))
            ;;
        ollama)
            COMPREPLY=($(compgen -W "start stop status pull models" -- "$cur"))
            ;;
        config)
            COMPREPLY=($(compgen -W "show edit path set" -- "$cur"))
            ;;
        completions)
            COMPREPLY=($(compgen -W "bash zsh fish" -- "$cur"))
            ;;
        inspect|verify)
            COMPREPLY=($(compgen -f -- "$cur"))
            ;;
        --agent)
            local agents="morie-architect morie-chief-orchestrator morie-code-quality-guardian morie-pathfinder morie-statistical-scientist security-secrets-architect"
            COMPREPLY=($(compgen -W "$agents" -- "$cur"))
            ;;
        lint)
            COMPREPLY=($(compgen -W "py r all fix report mypy check help" -- "$cur"))
            ;;
        check)
            COMPREPLY=($(compgen -W "all git tests docs deps data r imports help" -- "$cur"))
            ;;
        benchmark)
            COMPREPLY=($(compgen -W "import startup pytest modules all help" -- "$cur"))
            ;;
        db)
            COMPREPLY=($(compgen -W "tables schema rows query info export compare columns sample stats help" -- "$cur"))
            ;;
        tree-view)
            COMPREPLY=($(compgen -W "default code data docs full help" -- "$cur"))
            ;;
        hash)
            COMPREPLY=($(compgen -W "db data all verify file help" -- "$cur"))
            ;;
        size)
            COMPREPLY=($(compgen -W "all data db venv cache code total help" -- "$cur"))
            ;;
        path)
            COMPREPLY=($(compgen -W "all root python r db venv cache config help" -- "$cur"))
            ;;
        changelog)
            COMPREPLY=($(compgen -W "recent since-tag since-date authors save help" -- "$cur"))
            ;;
        graph)
            COMPREPLY=($(compgen -W "modules imports data packages help" -- "$cur"))
            ;;
        fmt)
            COMPREPLY=($(compgen -W "py r check diff help" -- "$cur"))
            ;;
        sec)
            COMPREPLY=($(compgen -W "deps code secrets all help" -- "$cur"))
            ;;
        sync)
            COMPREPLY=($(compgen -W "status push pull diff help" -- "$cur"))
            ;;
        migrate)
            COMPREPLY=($(compgen -W "check status db config help" -- "$cur"))
            ;;
        profile-code)
            COMPREPLY=($(compgen -W "run import top module help" -- "$cur"))
            ;;
        serve)
            COMPREPLY=($(compgen -W "docs data help" -- "$cur"))
            ;;
        watch)
            COMPREPLY=($(compgen -W "docs tests lint help" -- "$cur"))
            ;;
        diff-outputs)
            COMPREPLY=($(compgen -W "compare report latest help" -- "$cur"))
            ;;
        audit)
            COMPREPLY=($(compgen -W "all code data docs deps summary help" -- "$cur"))
            ;;
        catalog)
            COMPREPLY=($(compgen -W "list search info preview sources columns help" -- "$cur"))
            ;;
        schema)
            COMPREPLY=($(compgen -W "csv db compare validate help" -- "$cur"))
            ;;
        sample-data)
            COMPREPLY=($(compgen -W "random head tail stratified help" -- "$cur"))
            ;;
        validate-data)
            COMPREPLY=($(compgen -W "completeness duplicates ranges types summary help" -- "$cur"))
            ;;
        translate)
            COMPREPLY=($(compgen -W "examples py2r r2py cheatsheet help" -- "$cur"))
            ;;
        health)
            COMPREPLY=($(compgen -W "status resources network python r help" -- "$cur"))
            ;;
        stat)
            COMPREPLY=($(compgen -W "list count search categories run info help" -- "$cur"))
            ;;
        causal)
            COMPREPLY=($(compgen -W "ate att aipw dml irm ipw propensity match evalue sensitivity cate gate late gcomp plr pliv help" -- "$cur"))
            ;;
        survival)
            COMPREPLY=($(compgen -W "km cox logrank nelson-aalen aft rmst ph-test concordance help" -- "$cur"))
            ;;
        match-cli)
            COMPREPLY=($(compgen -W "nn exact mahalanobis cem full optimal subclass entropy balance compare help" -- "$cur"))
            ;;
        did-cli)
            COMPREPLY=($(compgen -W "2x2 event staggered bacon parallel dr triple fuzzy synthetic help" -- "$cur"))
            ;;
        rdd-cli)
            COMPREPLY=($(compgen -W "sharp fuzzy mccrary bandwidth donut placebo kink plot help" -- "$cur"))
            ;;
        iv-cli)
            COMPREPLY=($(compgen -W "tsls liml gmm wald first-stage hausman sargan ar-test ar-ci help" -- "$cur"))
            ;;
        effect-cli)
            COMPREPLY=($(compgen -W "cohens-d hedges-g glass-delta odds-ratio risk-ratio nnt cramers-v eta-sq meta-fe meta-re convert help" -- "$cur"))
            ;;
        *)
            COMPREPLY=()
            ;;
    esac
}
complete -F _esml_completions morie
BASH_COMP
            ;;
        zsh)
            cat << 'ZSH_COMP'
#compdef morie
# MORIE zsh completion — add to ~/.zshrc or place in fpath

_esml() {
    local -a commands modules agents
    commands=(
        'install:Self-bootstrap (install deps, create venv)'
        'update:Update morie to latest version'
        'config:Show or edit configuration'
        'doctor:Run environment diagnostics'
        'chat:Launch interactive chat REPL'
        'tui:Launch full-screen terminal IDE'
        'pipeline:Run the analysis pipeline'
        'list-modules:List available analysis modules'
        'run-module:Run a single module'
        'run-modules:Run multiple modules'
        'ask:Ask the assistant (one-shot)'
        'assistant:Ask the assistant'
        'profile-dataset:Profile a dataset'
        'sample:Draw a sample from a dataset'
        'inspect:Browse output CSVs'
        'verify:Validate statistical outputs'
        'ollama:Manage Ollama LLM service'
        'completions:Generate shell completions'
        'help:Show help'
        'lint:Code linting (py/r/fix/report)'
        'check:Pre-flight validation checks'
        'benchmark:Performance benchmarks'
        'db:SQLite database introspection'
        'tree-view:Project directory tree'
        'hash:SHA256 checksums'
        'size:Disk usage reports'
        'path:Show resolved paths'
        'changelog:Git changelog'
        'graph:Module dependency graph'
        'fmt:Code formatting'
        'sec:Security audit'
        'sync:Data synchronization'
        'migrate:Version migration'
        'profile-code:Python profiling'
        'serve:Local HTTP server'
        'watch:File watcher auto-rebuild'
        'diff-outputs:Compare analysis outputs'
        'audit:Full project audit'
        'catalog:Dataset catalog browser'
        'schema:Schema inspection'
        'sample-data:Sample from datasets'
        'validate-data:Data validation'
        'translate:Code translation Py↔R'
        'health:System health monitoring'
        'stat:Statistical command registry'
        'causal:Causal inference CLI'
        'survival:Survival analysis CLI'
        'match-cli:PS matching CLI'
        'did-cli:Difference-in-differences CLI'
        'rdd-cli:Regression discontinuity CLI'
        'iv-cli:Instrumental variables CLI'
        'effect-cli:Effect sizes CLI'
        'data:Native CSV data toolkit'
        'log:View event log'
        'backup:Backup project data'
        'env:Manage environment variables'
        'test:Run tests'
        'man:Built-in manual pages'
        'status:Show project status'
        'r:R package management'
        'quarto:Quarto site management'
        'ci:CI/CD configuration'
        'release:Release management'
        'deps:Dependency management'
        'perf:Performance monitoring'
        'net:Network diagnostics'
        'cron:Scheduled tasks'
        'docs:Documentation build'
        'init:Initialize new project'
        'clean:Clean build artifacts'
        'info:Show project info'
        'repl:Python REPL with morie'
        'run:Run a script'
        'version:Show version info'
    )
    modules=(data-wrangling descriptive-statistics distribution-tests frequentist-inference bayesian-inference power-design logistic-models model-comparison regression-models propensity-scores causal-estimators treatment-effects dag-specification meta-synthesis ebac-core ebac-selection-adjustment-ipw ebac-integrations ebac-gender-smote-sensitivity figures tables final-report)
    agents=(morie-architect morie-chief-orchestrator morie-code-quality-guardian morie-pathfinder morie-statistical-scientist security-secrets-architect)

    case "$words[2]" in
        run-module)    _describe 'module' modules ;;
        ollama)        _values 'subcommand' start stop status pull models ;;
        config)        _values 'subcommand' show edit path set ;;
        completions)   _values 'shell' bash zsh fish ;;
        inspect|verify) _files ;;
        chat)          _arguments '--agent[Agent persona]:agent:($agents)' ;;
        lint)          _values 'subcommand' py r all fix report mypy check ;;
        check)         _values 'subcommand' all git tests docs deps data r imports ;;
        benchmark)     _values 'subcommand' import startup pytest modules all ;;
        db)            _values 'subcommand' tables schema rows query info export compare columns sample stats ;;
        hash)          _values 'subcommand' db data all verify file ;;
        size)          _values 'subcommand' all data db venv cache code total ;;
        path)          _values 'subcommand' all root python r db venv cache config ;;
        changelog)     _values 'subcommand' recent since-tag since-date authors save ;;
        graph)         _values 'subcommand' modules imports data packages ;;
        fmt)           _values 'subcommand' py r check diff ;;
        sec)           _values 'subcommand' deps code secrets all ;;
        audit)         _values 'subcommand' all code data docs deps summary ;;
        catalog)       _values 'subcommand' list search info preview sources columns ;;
        schema)        _values 'subcommand' csv db compare validate ;;
        translate)     _values 'subcommand' examples py2r r2py cheatsheet ;;
        health)        _values 'subcommand' status resources network python r ;;
        stat)          _values 'subcommand' list count search categories run info ;;
        causal)        _values 'subcommand' ate att aipw dml irm ipw propensity match evalue sensitivity cate gate late gcomp plr pliv ;;
        survival)      _values 'subcommand' km cox logrank nelson-aalen aft rmst ph-test concordance ;;
        match-cli)     _values 'subcommand' nn exact mahalanobis cem full optimal subclass entropy balance compare ;;
        did-cli)       _values 'subcommand' 2x2 event staggered bacon parallel dr triple fuzzy synthetic ;;
        rdd-cli)       _values 'subcommand' sharp fuzzy mccrary bandwidth donut placebo kink plot ;;
        iv-cli)        _values 'subcommand' tsls liml gmm wald first-stage hausman sargan ar-test ar-ci ;;
        effect-cli)    _values 'subcommand' cohens-d hedges-g glass-delta odds-ratio risk-ratio nnt cramers-v eta-sq meta-fe meta-re convert ;;
        man)           _values 'topic' morie install chat data container pipeline modules causal survival matching did rdd iv ;;
        serve)         _values 'subcommand' docs data ;;
        watch)         _values 'subcommand' docs tests lint ;;
        validate-data) _values 'subcommand' completeness duplicates ranges types summary ;;
        sample-data)   _values 'subcommand' random head tail stratified ;;
        *)             _describe 'command' commands ;;
    esac
}
_esml "$@"
ZSH_COMP
            ;;
        fish)
            cat << 'FISH_COMP'
# MORIE fish completion — save to ~/.config/fish/completions/morie.fish
set -l commands install update config doctor chat tui pipeline list-modules run-module run-modules ask assistant profile-dataset sample inspect verify ollama completions help lint check benchmark db tree-view hash size path changelog graph fmt sec sync migrate profile-code serve watch diff-outputs audit catalog schema sample-data validate-data translate health stat causal survival match-cli did-cli rdd-cli iv-cli effect-cli data log backup env test man status r quarto ci release deps perf net cron docs init clean info repl run version
set -l modules data-wrangling descriptive-statistics distribution-tests frequentist-inference bayesian-inference power-design logistic-models model-comparison regression-models propensity-scores causal-estimators treatment-effects dag-specification meta-synthesis ebac-core ebac-selection-adjustment-ipw ebac-integrations ebac-gender-smote-sensitivity figures tables final-report

complete -c morie -f -n "not __fish_seen_subcommand_from $commands" -a "$commands"
complete -c morie -f -n "__fish_seen_subcommand_from run-module" -a "$modules"
complete -c morie -f -n "__fish_seen_subcommand_from ollama" -a "start stop status pull models"
complete -c morie -f -n "__fish_seen_subcommand_from config" -a "show edit path set"
complete -c morie -f -n "__fish_seen_subcommand_from completions" -a "bash zsh fish"
complete -c morie -f -n "__fish_seen_subcommand_from lint" -a "py r all fix report mypy check help"
complete -c morie -f -n "__fish_seen_subcommand_from check" -a "all git tests docs deps data r imports help"
complete -c morie -f -n "__fish_seen_subcommand_from benchmark" -a "import startup pytest modules all help"
complete -c morie -f -n "__fish_seen_subcommand_from db" -a "tables schema rows query info export compare columns sample stats help"
complete -c morie -f -n "__fish_seen_subcommand_from hash" -a "db data all verify file help"
complete -c morie -f -n "__fish_seen_subcommand_from size" -a "all data db venv cache code total help"
complete -c morie -f -n "__fish_seen_subcommand_from path" -a "all root python r db venv cache config help"
complete -c morie -f -n "__fish_seen_subcommand_from changelog" -a "recent since-tag since-date authors save help"
complete -c morie -f -n "__fish_seen_subcommand_from graph" -a "modules imports data packages help"
complete -c morie -f -n "__fish_seen_subcommand_from fmt" -a "py r check diff help"
complete -c morie -f -n "__fish_seen_subcommand_from sec" -a "deps code secrets all help"
complete -c morie -f -n "__fish_seen_subcommand_from audit" -a "all code data docs deps summary help"
complete -c morie -f -n "__fish_seen_subcommand_from catalog" -a "list search info preview sources columns help"
complete -c morie -f -n "__fish_seen_subcommand_from stat" -a "list count search categories run info help"
complete -c morie -f -n "__fish_seen_subcommand_from causal" -a "ate att aipw dml irm ipw propensity match evalue sensitivity cate gate late gcomp plr pliv help"
complete -c morie -f -n "__fish_seen_subcommand_from survival" -a "km cox logrank nelson-aalen aft rmst ph-test concordance help"
complete -c morie -f -n "__fish_seen_subcommand_from match-cli" -a "nn exact mahalanobis cem full optimal subclass entropy balance compare help"
complete -c morie -f -n "__fish_seen_subcommand_from did-cli" -a "2x2 event staggered bacon parallel dr triple fuzzy synthetic help"
complete -c morie -f -n "__fish_seen_subcommand_from rdd-cli" -a "sharp fuzzy mccrary bandwidth donut placebo kink plot help"
complete -c morie -f -n "__fish_seen_subcommand_from iv-cli" -a "tsls liml gmm wald first-stage hausman sargan ar-test ar-ci help"
complete -c morie -f -n "__fish_seen_subcommand_from effect-cli" -a "cohens-d hedges-g glass-delta odds-ratio risk-ratio nnt cramers-v eta-sq meta-fe meta-re convert help"
complete -c morie -f -n "__fish_seen_subcommand_from health" -a "status resources network python r help"
complete -c morie -f -n "__fish_seen_subcommand_from translate" -a "examples py2r r2py cheatsheet help"
complete -c morie -f -n "__fish_seen_subcommand_from serve" -a "docs data help"
complete -c morie -f -n "__fish_seen_subcommand_from watch" -a "docs tests lint help"
complete -c morie -f -n "__fish_seen_subcommand_from man" -a "morie install chat data container pipeline modules causal survival matching did rdd iv"
FISH_COMP
            ;;
        install)
            # Install completions for the current shell.
            case "${SHELL##*/}" in
                bash)
                    local target="${BASH_COMPLETION_USER_DIR:-$HOME/.local/share/bash-completion/completions}/morie"
                    mkdir -p "$(dirname "$target")"
                    cmd_completions bash > "$target"
                    success "Bash completions installed: $target"
                    info "Restart your shell or run: source $target"
                    ;;
                zsh)
                    local target="${HOME}/.zfunc/_esml"
                    mkdir -p "$(dirname "$target")"
                    cmd_completions zsh > "$target"
                    success "Zsh completions installed: $target"
                    info "Ensure fpath includes ~/.zfunc and run: compinit"
                    ;;
                fish)
                    local target="$HOME/.config/fish/completions/morie.fish"
                    mkdir -p "$(dirname "$target")"
                    cmd_completions fish > "$target"
                    success "Fish completions installed: $target"
                    ;;
                *)
                    warn "Unknown shell: ${SHELL}. Generate manually: morie completions {bash|zsh|fish}"
                    ;;
            esac
            ;;
        *)
            echo "Usage: morie completions {bash|zsh|fish|install}"
            echo ""
            echo "  morie completions bash     # print bash completions to stdout"
            echo "  morie completions zsh      # print zsh completions to stdout"
            echo "  morie completions fish     # print fish completions to stdout"
            echo "  morie completions install  # auto-install for current shell"
            ;;
    esac
}

# ---------------------------------------------------------------------------
# morie help — show usage
# ---------------------------------------------------------------------------
cmd_help() {
    cat << EOF
${BOLD}morie${RESET} — Multi-domain Open Research and Inferential Estimation

${BOLD}USAGE${RESET}
    morie [command] [options]

${BOLD}SYSTEM COMMANDS${RESET} ${DIM}(native, no Python required)${RESET}
    install              Self-bootstrap: install Python, venv, deps, config
    update               Update morie to the latest version
    config [show|edit|set]  Show, edit, or set configuration
    ollama [start|stop|status|pull]  Manage local Ollama LLM service
    container [build|run|verify|shell]  Docker container management
    data [profile|head|stats|missing|...]  Native CSV data toolkit
    test [all|quick|module|coverage]  Test runner
    backup [create|restore|list]  Output backup/restore
    log [show|clear]     Log management
    env [show|export|check]  Environment management
    status               Quick project status overview
    man <topic>          Built-in manual pages
    doctor-native        Native diagnostics (no Python)
    completions [bash|zsh|fish|install]  Shell tab-completion
    help                 Show this help

${BOLD}IDE COMMANDS${RESET} ${DIM}(Python runtime)${RESET}
    chat [--agent NAME]  Interactive streaming chat REPL (Claude Code-like)
    tui                  Full-screen terminal IDE (requires textual)
    exec CODE            Execute Python/R code inline (or pipe via stdin)
    exec co FILE         Create-and-open file in cofs/ with built-in editor
    edit FILE            Open file in built-in editor (ctrl+s/r/q)
    repl [--lang r]      Python/R REPL with morie preloaded (1753 functions)

${BOLD}ANALYSIS COMMANDS${RESET} ${DIM}(Python runtime)${RESET}
    doctor               Environment diagnostics
    pipeline [--all] [-y]  Run analysis module pipeline with live progress
    list-modules         List available analysis modules
    run-module MODULE    Run a single module
    run-modules [--modules ...]  Run multiple modules
    profile-dataset --csv FILE  Profile a dataset
    sample --csv FILE --n N  Draw a sample (SRS, stratified, cluster, PPS)

${BOLD}VALIDATION COMMANDS${RESET} ${DIM}(Python runtime)${RESET}
    inspect PATH         Browse output CSVs (schema, stats, head)
    verify PATH          Validate statistical outputs (p-values, CIs, SEs)

${BOLD}ASSISTANT COMMANDS${RESET} ${DIM}(Python runtime, LLM)${RESET}
    ask QUESTION         One-shot question to the LLM assistant
    assistant QUESTION   Ask assistant (with --stream option)

${BOLD}DEVELOPER COMMANDS${RESET} ${DIM}(native + Python hybrid)${RESET}
    lint                 Code linting (py/r/fix/report/mypy)
    check                Pre-flight checks (git/tests/docs/deps/data/imports)
    benchmark            Performance benchmarks (import/startup/pytest/modules)
    db                   SQLite introspection (tables/schema/rows/query/export)
    tree-view            Project directory tree (code/data/docs/full)
    hash                 SHA256 checksums (db/data/file/verify)
    size                 Disk usage reports (data/db/venv/cache/code)
    path                 Show resolved project paths
    changelog            Git changelog (recent/since-tag/since-date/authors)
    graph                Module dependency graph (modules/imports/data)
    fmt                  Code formatting (py/r/check/diff)
    sec                  Security audit (deps/code/secrets)
    sync                 Data synchronization (push/pull/status)
    migrate              Version migration helper (check/status/db/config)
    profile-code         Python profiling (run/import/module)
    serve                Local HTTP server (docs/data)
    watch                File watcher auto-rebuild (docs/tests/lint)
    diff-outputs         Compare analysis outputs between runs
    audit                Project audit (code/data/docs/deps/summary)
    catalog              Dataset catalog browser (list/search/info/preview)
    schema               Schema inspection (csv/db/compare/validate)
    sample-data          Sample rows from datasets (random/head/tail/stratified)
    validate-data        Data validation (completeness/duplicates/ranges/types)
    translate            Code translation Python↔R (examples/py2r/r2py)
    health               System health (status/resources/network/python/r)

${BOLD}STATISTICAL COMMANDS${RESET} ${DIM}(Python runtime, 1200+ methods)${RESET}
    stat                 Statistical command registry (list/search/run/info)
    causal               Causal inference (ate/att/aipw/dml/irm/ipw/match)
    survival             Survival analysis (km/cox/logrank/aft/rmst/ph-test)
    match-cli            PS matching (nn/exact/mahalanobis/cem/subclass/entropy)
    did-cli              Difference-in-differences (2x2/event/staggered/bacon)
    rdd-cli              Regression discontinuity (sharp/fuzzy/mccrary/bandwidth)
    iv-cli               Instrumental variables (tsls/liml/gmm/hausman/sargan)
    effect-cli           Effect sizes (cohens-d/hedges-g/odds-ratio/nnt/meta/convert)

${BOLD}ENVIRONMENT${RESET}
    ESML_HOME            Config/data root (default: ~/.morie)
    ESML_PYTHON          Override Python interpreter
    ESML_DEBUG=1         Verbose diagnostics
    ESML_NO_COLOR=1      Disable colors
    OLLAMA_BASE_URL      Ollama endpoint (default: http://localhost:11434)
    GEMINI_API_KEY       Google AI Studio key
    LLM_API_BASE_URL     OpenAI-compatible endpoint
    LLM_API_KEY          API key for above
    OPENAI_API_KEY       OpenAI API key

${BOLD}EXAMPLES${RESET}
    morie                              # launch TUI or chat
    morie install                      # first-time setup
    morie chat --agent morie-architect  # chat with architect agent
    morie pipeline --all -y            # run all 21 modules
    morie inspect data/public/outputs/ # browse results
    morie verify data/public/outputs/  # validate statistics
    morie ollama start && morie chat    # start local LLM, then chat
    morie completions install          # set up tab completion

${BOLD}CAUSAL INFERENCE EXAMPLES${RESET}
    morie causal ate data.csv outcome treatment age sex      # ATE via IPW-OLS
    morie causal dml data.csv y d x1 x2 x3                  # Double Machine Learning
    morie causal irm data.csv y d x1 x2                     # Interactive Regression Model
    morie causal aipw data.csv y d x1 x2                    # Augmented IPW
    morie causal ipw data.csv treatment x1 x2               # IPW weights
    morie causal propensity data.csv treatment x1 x2        # Propensity scores
    morie causal match data.csv treatment x1 x2             # PS nearest neighbor matching
    morie causal evalue 2.5 1.8                             # E-value for sensitivity
    morie match-cli nn data.csv treatment age sex income     # PS NN matching
    morie match-cli compare data.csv treatment x1 x2 x3     # Compare matching methods

${BOLD}SURVIVAL ANALYSIS EXAMPLES${RESET}
    morie survival km data.csv time event                    # Kaplan-Meier
    morie survival cox data.csv time event age sex           # Cox PH model
    morie survival logrank data.csv time event treatment     # Log-rank test
    morie survival aft data.csv time event weibull           # AFT model
    morie survival rmst data.csv time event                  # RMST
    morie survival ph-test data.csv time event               # PH assumption test

${BOLD}QUASI-EXPERIMENTAL EXAMPLES${RESET}
    morie did-cli 2x2 data.csv outcome treatment post        # DiD 2x2
    morie did-cli event data.csv y unit time treat_time      # Event study
    morie did-cli staggered data.csv y unit time group       # Staggered DiD
    morie rdd-cli sharp data.csv outcome running 0.5         # Sharp RDD
    morie rdd-cli fuzzy data.csv outcome running treat 0.5   # Fuzzy RDD
    morie rdd-cli mccrary data.csv running 0.5               # McCrary density test
    morie iv-cli tsls data.csv y d z x1                      # 2SLS
    morie iv-cli hausman data.csv y d z x1                   # Hausman test

${BOLD}EFFECT SIZE EXAMPLES${RESET}
    morie effect-cli cohens-d data.csv score treatment       # Cohen's d
    morie effect-cli odds-ratio data.csv outcome exposure    # Odds ratio
    morie effect-cli nnt data.csv outcome exposure           # NNT
    morie effect-cli meta-re data.csv yi vi                  # Random-effects meta
    morie effect-cli convert d r 0.5                         # Convert d → r

${BOLD}DEVELOPER TOOL EXAMPLES${RESET}
    morie lint py                                            # Lint Python
    morie check all                                          # Pre-flight checks
    morie db tables                                          # List DB tables
    morie db query "SELECT count(*) FROM cpads_2021_2022"    # SQL query
    morie catalog search cpads                               # Search datasets
    morie catalog preview cpads_2021_2022 10                 # Preview dataset
    morie stat count                                         # Count commands (1200+)
    morie stat search survival                               # Search commands
    morie audit code                                         # Code statistics
    morie health                                             # System health
    morie translate examples                                 # Python↔R translations
    morie schema csv data/files/csv/survey/cpads.csv         # CSV schema
    morie validate-data completeness cpads_2021_2022         # Data completeness
    morie benchmark import                                   # Time import
    morie hash db                                            # DB checksum
    morie size all                                           # Disk usage
    morie path                                               # All paths
    morie changelog recent 20                                # Recent commits
    morie sec all                                            # Security audit

${BOLD}QUICK REFERENCE${RESET}
    Module count:   46 Python modules, 10+ R source files
    Test count:     369+ pytest tests (pytest -q from dev/sphinx/project/)
    Dataset count:  32 built-in Canadian public health datasets in SQLite DB
    Command count:  1200+ statistical/analytical commands via stat_commands.py
    Backend:        620+ public functions across 26 modules
    CLI commands:   53 native bash commands with subcommands
    Sources:        CPADS, CCS, CSADS, CSUS, HealthInfobase, CIHI
    License:        GPL-3.0-or-later
    Documentation:  Sphinx (developer site) + Quarto (analysis site)
    LLM providers:  Ollama → OllamaFreeAPI → Gemini → OpenAI → local fallback
    TUI screens:    Home, Chat, Pipeline, Doctor, Dataset, Help, Debug, Stat, REPL

${BOLD}GETTING STARTED${RESET}
    1. morie install                     # Bootstrap environment
    2. morie health                      # Check system health
    3. morie tui                         # Launch terminal IDE
    4. morie catalog list                # Browse 32 datasets
    5. morie stat count                  # See 1200+ commands
    6. morie causal --help               # Explore causal methods
    7. morie man matching                # Read matching manual
    8. morie translate examples          # Python ↔ R reference

${BOLD}CAUSAL INFERENCE QUICK START${RESET}
    # Step 1: Estimate propensity scores
    morie causal propensity data.csv treatment age sex income education

    # Step 2: Match treated and control units
    morie match-cli nn data.csv treatment age sex income education

    # Step 3: Estimate treatment effect
    morie causal ate data.csv outcome treatment age sex income education

    # Step 4: Sensitivity analysis
    morie causal evalue 2.5 1.8

    # Step 5: Alternative estimators for robustness
    morie causal dml data.csv outcome treatment age sex income education
    morie causal aipw data.csv outcome treatment age sex income education
    morie causal irm data.csv outcome treatment age sex income education

${BOLD}SURVIVAL ANALYSIS QUICK START${RESET}
    # Step 1: Kaplan-Meier survival curve
    morie survival km data.csv time event

    # Step 2: Log-rank test comparing groups
    morie survival logrank data.csv time event treatment

    # Step 3: Cox proportional hazards model
    morie survival cox data.csv time event age sex treatment

    # Step 4: Test proportional hazards assumption
    morie survival ph-test data.csv time event

    # Step 5: Restricted mean survival time
    morie survival rmst data.csv time event

${BOLD}QUASI-EXPERIMENTAL QUICK START${RESET}
    # Difference-in-differences
    morie did-cli 2x2 data.csv outcome treatment post_period
    morie did-cli event data.csv outcome unit_id time treatment_time
    morie did-cli parallel data.csv outcome treatment time_period

    # Regression discontinuity design
    morie rdd-cli sharp data.csv outcome running_var 0.5
    morie rdd-cli mccrary data.csv running_var 0.5
    morie rdd-cli bandwidth data.csv outcome running_var 0.5

    # Instrumental variables
    morie iv-cli tsls data.csv outcome endogenous instrument controls
    morie iv-cli first-stage data.csv endogenous instrument controls
    morie iv-cli hausman data.csv outcome endogenous instrument

${DIM}Version: $ESML_SCRIPT_VERSION | License: GPL-3.0-or-later${RESET}
EOF
}

# =========================================================================
# morie doctor — Native environment diagnostics (no Python required)
# =========================================================================

_check_binary() {
    local name="$1" cmd="$2" required="${3:-false}"
    local version=""
    if command -v "$cmd" &>/dev/null; then
        version="$("$cmd" --version 2>&1 | head -1)" || version="installed"
        printf "  ${GREEN}  OK ${RESET} %-25s %s\n" "$name" "$version"
        return 0
    else
        if [[ "$required" == "true" ]]; then
            printf "  ${RED} FAIL${RESET} %-25s not found\n" "$name"
        else
            printf "  ${YELLOW} WARN${RESET} %-25s not installed (optional)\n" "$name"
        fi
        return 1
    fi
}

_check_python_pkg() {
    local pkg="$1" required="${2:-true}"
    local py
    py="$(_resolve_python 2>/dev/null)" || return 1
    local ver
    ver="$("$py" -c "import $pkg; print(getattr($pkg, '__version__', 'installed'))" 2>/dev/null)"
    if [[ $? -eq 0 ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s %s\n" "import $pkg" "$ver"
        return 0
    else
        if [[ "$required" == "true" ]]; then
            printf "  ${RED} FAIL${RESET} %-25s not installed\n" "import $pkg"
        else
            printf "  ${YELLOW} WARN${RESET} %-25s not installed (optional)\n" "import $pkg"
        fi
        return 1
    fi
}

_check_port() {
    local name="$1" host="$2" port="$3"
    if command -v nc &>/dev/null; then
        if nc -z "$host" "$port" 2>/dev/null; then
            printf "  ${GREEN}  OK ${RESET} %-25s %s:%s reachable\n" "$name" "$host" "$port"
            return 0
        fi
    elif command -v curl &>/dev/null; then
        if curl -s --max-time 2 "http://${host}:${port}/" &>/dev/null; then
            printf "  ${GREEN}  OK ${RESET} %-25s %s:%s reachable\n" "$name" "$host" "$port"
            return 0
        fi
    fi
    printf "  ${YELLOW} WARN${RESET} %-25s %s:%s not reachable\n" "$name" "$host" "$port"
    return 1
}

_check_disk_space() {
    local path="$1" min_mb="${2:-500}"
    local avail_kb
    if [[ "$(uname -s)" == "Darwin" ]]; then
        avail_kb="$(df -k "$path" 2>/dev/null | tail -1 | awk '{print $4}')"
    else
        avail_kb="$(df -k "$path" 2>/dev/null | tail -1 | awk '{print $4}')"
    fi
    local avail_mb=$((avail_kb / 1024))
    if [[ "$avail_mb" -ge "$min_mb" ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s %sMB available\n" "Disk ($path)" "$avail_mb"
        return 0
    else
        printf "  ${RED} FAIL${RESET} %-25s %sMB available (need %sMB)\n" "Disk ($path)" "$avail_mb" "$min_mb"
        return 1
    fi
}

_check_file_exists() {
    local name="$1" path="$2" required="${3:-false}"
    if [[ -f "$path" ]]; then
        local size
        if [[ "$(uname -s)" == "Darwin" ]]; then
            size="$(stat -f%z "$path" 2>/dev/null)" || size="?"
        else
            size="$(stat --printf=%s "$path" 2>/dev/null)" || size="?"
        fi
        local size_kb=$((size / 1024))
        printf "  ${GREEN}  OK ${RESET} %-25s %s (%sKB)\n" "$name" "$path" "$size_kb"
        return 0
    else
        if [[ "$required" == "true" ]]; then
            printf "  ${RED} FAIL${RESET} %-25s not found: %s\n" "$name" "$path"
        else
            printf "  ${YELLOW} WARN${RESET} %-25s not found: %s\n" "$name" "$path"
        fi
        return 1
    fi
}

cmd_doctor_native() {
    local all_ok=true

    echo "${BOLD}MORIE Doctor — Native Environment Diagnostics${RESET}"
    echo "${DIM}$(date)${RESET}"
    echo ""

    # System info.
    echo "${BOLD}System${RESET}"
    printf "  %-28s %s\n" "OS" "$(uname -s) $(uname -r)"
    printf "  %-28s %s\n" "Architecture" "$(uname -m)"
    printf "  %-28s %s\n" "Hostname" "$(hostname 2>/dev/null || echo unknown)"
    printf "  %-28s %s\n" "User" "$(whoami)"
    printf "  %-28s %s\n" "Shell" "${SHELL:-unknown}"
    printf "  %-28s %s\n" "Terminal" "${TERM:-unknown}"
    if [[ "$(uname -s)" == "Darwin" ]]; then
        printf "  %-28s %s\n" "macOS" "$(sw_vers -productVersion 2>/dev/null || echo ?)"
    fi
    echo ""

    # Core tools.
    echo "${BOLD}Core Tools${RESET}"
    _check_binary "Python" python3 true || all_ok=false
    _check_binary "pip" pip3 false
    _check_binary "git" git true || all_ok=false
    _check_binary "curl" curl true || all_ok=false
    echo ""

    # Python version check.
    echo "${BOLD}Python Environment${RESET}"
    local py
    py="$(_resolve_python 2>/dev/null)"
    if [[ -n "$py" ]]; then
        local pyver
        pyver="$("$py" --version 2>&1)"
        printf "  ${GREEN}  OK ${RESET} %-25s %s (%s)\n" "Python interpreter" "$pyver" "$py"

        # Check venv.
        if [[ -d "$ROOT/.venv" ]]; then
            printf "  ${GREEN}  OK ${RESET} %-25s %s\n" "Virtual environment" "$ROOT/.venv"
        else
            printf "  ${YELLOW} WARN${RESET} %-25s not found (run: morie install)\n" "Virtual environment"
        fi

        # morie package.
        if "$py" -c "import morie" 2>/dev/null; then
            local esml_ver
            esml_ver="$("$py" -c "import morie; print(morie.__version__)" 2>/dev/null)" || esml_ver="?"
            printf "  ${GREEN}  OK ${RESET} %-25s v%s\n" "morie package" "$esml_ver"
            local mod_count
            mod_count="$("$py" -c "import morie; print(len(morie.list_modules()))" 2>/dev/null)" || mod_count="?"
            printf "  ${GREEN}  OK ${RESET} %-25s %s\n" "Analysis modules" "$mod_count"
        else
            printf "  ${RED} FAIL${RESET} %-25s not importable (run: morie install)\n" "morie package"
            all_ok=false
        fi
    else
        printf "  ${RED} FAIL${RESET} %-25s not found\n" "Python interpreter"
        all_ok=false
    fi
    echo ""

    # Required Python packages.
    echo "${BOLD}Required Python Packages${RESET}"
    for pkg in pandas numpy scipy sklearn statsmodels httpx rich; do
        _check_python_pkg "$pkg" true || all_ok=false
    done
    echo ""

    # Optional Python packages.
    echo "${BOLD}Optional Python Packages${RESET}"
    _check_python_pkg "doubleml" false
    _check_python_pkg "openai" false
    _check_python_pkg "textual" false
    _check_python_pkg "codecarbon" false
    _check_python_pkg "matplotlib" false
    echo ""

    # Optional tools.
    echo "${BOLD}Optional Tools${RESET}"
    _check_binary "R / Rscript" Rscript false
    _check_binary "Quarto" quarto false
    _check_binary "Docker" docker false
    _check_binary "Ollama" ollama false
    echo ""

    # LLM providers.
    echo "${BOLD}LLM Providers${RESET}"
    _check_port "Ollama" "localhost" "11434"
    if [[ -n "${GEMINI_API_KEY:-}" ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s key set\n" "Gemini API"
    else
        printf "  ${YELLOW} WARN${RESET} %-25s GEMINI_API_KEY not set\n" "Gemini API"
    fi
    if [[ -n "${LLM_API_BASE_URL:-}" ]] && [[ -n "${LLM_API_KEY:-}" ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s %s\n" "OpenAI-compat API" "$LLM_API_BASE_URL"
    else
        printf "  ${YELLOW} WARN${RESET} %-25s not configured\n" "OpenAI-compat API"
    fi
    if [[ -n "${OPENAI_API_KEY:-}" ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s key set\n" "OpenAI"
    else
        printf "  ${YELLOW} WARN${RESET} %-25s OPENAI_API_KEY not set\n" "OpenAI"
    fi
    echo ""

    # Data files.
    echo "${BOLD}Data Files${RESET}"
    _check_file_exists "CPADS CSV" "$ROOT/data/files/csv/survey/cpads-2021-2022-pumf2.csv" false
    _check_file_exists "pyproject.toml" "$ROOT/pyproject.toml" true || all_ok=false
    _check_file_exists "CITATION.cff" "$ROOT/CITATION.cff" false
    echo ""

    # Disk space.
    echo "${BOLD}Resources${RESET}"
    _check_disk_space "$ROOT" 200
    _check_disk_space "$HOME" 500
    echo ""

    # MORIE directories.
    echo "${BOLD}MORIE Directories${RESET}"
    printf "  %-28s %s\n" "ESML_HOME" "$ESML_HOME"
    printf "  %-28s %s\n" "ESML_CONFIG_DIR" "$ESML_CONFIG_DIR"
    printf "  %-28s %s\n" "ESML_CACHE_DIR" "$ESML_CACHE_DIR"
    printf "  %-28s %s\n" "ESML_LOG_DIR" "$ESML_LOG_DIR"
    printf "  %-28s %s\n" "Project root" "$ROOT"
    echo ""

    # Summary.
    if $all_ok; then
        success "All required checks passed."
    else
        error "Some required checks failed. See FAIL rows above."
    fi

    return 0
}

# =========================================================================
# morie data — Native CSV data profiling (awk/sed, no Python)
# =========================================================================

cmd_data() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    case "$subcmd" in
        profile)
            _data_profile "$@"
            ;;
        head)
            _data_head "$@"
            ;;
        columns)
            _data_columns "$@"
            ;;
        shape)
            _data_shape "$@"
            ;;
        missing)
            _data_missing "$@"
            ;;
        sample)
            _data_sample "$@"
            ;;
        validate)
            _data_validate "$@"
            ;;
        convert)
            _data_convert "$@"
            ;;
        merge)
            _data_merge "$@"
            ;;
        split)
            _data_split "$@"
            ;;
        stats)
            _data_stats "$@"
            ;;
        unique)
            _data_unique "$@"
            ;;
        freq)
            _data_freq "$@"
            ;;
        search)
            _data_search "$@"
            ;;
        diff)
            _data_diff "$@"
            ;;
        help|*)
            cat << DEOF
${BOLD}morie data${RESET} — Native CSV data toolkit (no Python required)

${BOLD}COMMANDS${RESET}
    profile <file>         Quick profile: rows, columns, types, missing
    head <file> [N]        Show first N rows (default: 10)
    columns <file>         List column names with indices
    shape <file>           Show row x column dimensions
    missing <file>         Missing value report per column
    sample <file> [N]      Random sample of N rows (default: 5)
    validate <file>        Check CSV integrity (quoting, delimiters, encoding)
    stats <file> [col]     Basic statistics for numeric column(s)
    unique <file> <col>    Unique values in a column
    freq <file> <col>      Frequency table for a column
    search <file> <term>   Search for a value across all columns
    diff <f1> <f2>         Compare two CSV files
    convert <file> <fmt>   Convert to TSV, JSON-lines, or Markdown
    merge <f1> <f2> <key>  Merge two CSVs on a key column
    split <file> <col>     Split CSV by unique values of a column

${BOLD}EXAMPLES${RESET}
    morie data profile data/cpads.csv
    morie data shape data/cpads.csv
    morie data missing data/cpads.csv
    morie data stats data/cpads.csv age
    morie data freq data/cpads.csv gender
    morie data unique data/cpads.csv province
    morie data sample data/cpads.csv 20
DEOF
            ;;
    esac
}

_data_shape() {
    local file="$1"
    [[ -z "$file" ]] && { error "Usage: morie data shape <file>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    local rows cols
    rows="$(wc -l < "$file" | tr -d ' ')"
    rows=$((rows - 1))  # subtract header
    cols="$(head -1 "$file" | awk -F',' '{print NF}')"
    echo "$rows rows x $cols columns"
}

_data_columns() {
    local file="$1"
    [[ -z "$file" ]] && { error "Usage: morie data columns <file>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    echo "${BOLD}Columns in $(basename "$file"):${RESET}"
    head -1 "$file" | tr ',' '\n' | nl -ba
}

_data_head() {
    local file="$1" n="${2:-10}"
    [[ -z "$file" ]] && { error "Usage: morie data head <file> [N]"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    head -$((n + 1)) "$file" | column -t -s','
}

_data_missing() {
    local file="$1"
    [[ -z "$file" ]] && { error "Usage: morie data missing <file>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }

    local header
    header="$(head -1 "$file")"
    local ncols
    ncols="$(echo "$header" | awk -F',' '{print NF}')"
    local total_rows
    total_rows="$(wc -l < "$file" | tr -d ' ')"
    total_rows=$((total_rows - 1))

    echo "${BOLD}Missing Values Report: $(basename "$file")${RESET}"
    echo "${DIM}$total_rows rows, $ncols columns${RESET}"
    echo ""
    printf "  ${BOLD}%-5s %-30s %10s %10s %10s${RESET}\n" "Idx" "Column" "Missing" "Present" "Pct"
    printf "  ${DIM}%-5s %-30s %10s %10s %10s${RESET}\n" "---" "------" "-------" "-------" "---"

    local i=1
    IFS=',' read -ra cols <<< "$header"
    for col in "${cols[@]}"; do
        col="$(echo "$col" | tr -d '"' | tr -d "'")"
        local missing
        missing="$(awk -F',' -v c="$i" 'NR>1 { if ($c == "" || $c == "NA" || $c == "NaN" || $c == "null" || $c == ".") count++ } END { print count+0 }' "$file")"
        local present=$((total_rows - missing))
        local pct=0
        [[ "$total_rows" -gt 0 ]] && pct=$((missing * 100 / total_rows))
        local color=""
        [[ "$pct" -gt 0 ]] && color="${YELLOW}"
        [[ "$pct" -gt 20 ]] && color="${RED}"
        printf "  %-5s %-30s %b%10s%b %10s %9s%%\n" "$i" "$col" "$color" "$missing" "${RESET}" "$present" "$pct"
        i=$((i + 1))
    done
}

_data_sample() {
    local file="$1" n="${2:-5}"
    [[ -z "$file" ]] && { error "Usage: morie data sample <file> [N]"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    local total
    total="$(wc -l < "$file" | tr -d ' ')"
    total=$((total - 1))
    # Print header + N random rows.
    head -1 "$file"
    tail -n +"2" "$file" | awk -v n="$n" -v total="$total" 'BEGIN{srand()} {if(rand() < n/total) {print; count++; if(count>=n) exit}}'
}

_data_profile() {
    local file="$1"
    [[ -z "$file" ]] && { error "Usage: morie data profile <file>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }

    local filesize
    if [[ "$(uname -s)" == "Darwin" ]]; then
        filesize="$(stat -f%z "$file" 2>/dev/null)"
    else
        filesize="$(stat --printf=%s "$file" 2>/dev/null)"
    fi
    local size_kb=$((filesize / 1024))

    echo "${BOLD}Data Profile: $(basename "$file")${RESET}"
    echo "${DIM}Path: $file${RESET}"
    echo ""
    _data_shape "$file"
    echo "File size: ${size_kb}KB"
    echo ""
    _data_columns "$file"
    echo ""
    _data_missing "$file"
}

_data_stats() {
    local file="$1" col="${2:-}"
    [[ -z "$file" ]] && { error "Usage: morie data stats <file> [column]"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }

    if [[ -n "$col" ]]; then
        # Find column index.
        local header
        header="$(head -1 "$file")"
        local idx
        idx="$(echo "$header" | tr ',' '\n' | grep -n "^${col}$" | head -1 | cut -d: -f1)"
        if [[ -z "$idx" ]]; then
            error "Column not found: $col"
            return 1
        fi
        echo "${BOLD}Statistics for column: $col${RESET}"
        awk -F',' -v c="$idx" '
        NR>1 && $c != "" && $c != "NA" && $c != "NaN" && $c+0 == $c {
            n++; sum+=$c; sumsq+=$c*$c
            if(n==1 || $c<min) min=$c
            if(n==1 || $c>max) max=$c
            vals[n]=$c
        }
        END {
            if(n==0) { print "  No numeric values found"; exit }
            mean=sum/n
            var=(sumsq - sum*sum/n)/(n-1)
            sd=sqrt(var > 0 ? var : 0)
            printf "  %-15s %d\n", "n", n
            printf "  %-15s %.4f\n", "mean", mean
            printf "  %-15s %.4f\n", "std", sd
            printf "  %-15s %.4f\n", "min", min
            printf "  %-15s %.4f\n", "max", max
            printf "  %-15s %.4f\n", "range", max-min
        }' "$file"
    else
        echo "${BOLD}Numeric column statistics:${RESET}"
        echo ""
        local header
        header="$(head -1 "$file")"
        local i=1
        IFS=',' read -ra cols <<< "$header"
        printf "  ${BOLD}%-25s %8s %12s %12s %12s %12s${RESET}\n" "Column" "N" "Mean" "Std" "Min" "Max"
        for col_name in "${cols[@]}"; do
            col_name="$(echo "$col_name" | tr -d '"')"
            awk -F',' -v c="$i" -v name="$col_name" '
            NR>1 && $c != "" && $c != "NA" && $c != "NaN" && $c+0 == $c {
                n++; sum+=$c; sumsq+=$c*$c
                if(n==1 || $c<min) min=$c
                if(n==1 || $c>max) max=$c
            }
            END {
                if(n>5) {
                    mean=sum/n
                    var=(sumsq - sum*sum/n)/(n-1)
                    sd=sqrt(var > 0 ? var : 0)
                    printf "  %-25s %8d %12.4f %12.4f %12.4f %12.4f\n", name, n, mean, sd, min, max
                }
            }' "$file"
            i=$((i + 1))
        done
    fi
}

_data_unique() {
    local file="$1" col="$2"
    [[ -z "$file" || -z "$col" ]] && { error "Usage: morie data unique <file> <column>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    local header; header="$(head -1 "$file")"
    local idx; idx="$(echo "$header" | tr ',' '\n' | grep -n "^${col}$" | head -1 | cut -d: -f1)"
    [[ -z "$idx" ]] && { error "Column not found: $col"; return 1; }
    echo "${BOLD}Unique values in '$col':${RESET}"
    awk -F',' -v c="$idx" 'NR>1 {print $c}' "$file" | sort -u | head -100
    local total; total="$(awk -F',' -v c="$idx" 'NR>1 {print $c}' "$file" | sort -u | wc -l | tr -d ' ')"
    echo "${DIM}($total unique values)${RESET}"
}

_data_freq() {
    local file="$1" col="$2"
    [[ -z "$file" || -z "$col" ]] && { error "Usage: morie data freq <file> <column>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    local header; header="$(head -1 "$file")"
    local idx; idx="$(echo "$header" | tr ',' '\n' | grep -n "^${col}$" | head -1 | cut -d: -f1)"
    [[ -z "$idx" ]] && { error "Column not found: $col"; return 1; }
    echo "${BOLD}Frequency table for '$col':${RESET}"
    printf "  ${BOLD}%-30s %10s${RESET}\n" "Value" "Count"
    awk -F',' -v c="$idx" 'NR>1 {print $c}' "$file" | sort | uniq -c | sort -rn | head -50 | \
        awk '{printf "  %-30s %10d\n", $2, $1}'
}

_data_search() {
    local file="$1" term="$2"
    [[ -z "$file" || -z "$term" ]] && { error "Usage: morie data search <file> <term>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }
    echo "${BOLD}Searching for '$term' in $(basename "$file"):${RESET}"
    grep -in "$term" "$file" | head -20
    local count; count="$(grep -ic "$term" "$file")"
    echo "${DIM}($count matching rows)${RESET}"
}

_data_validate() {
    local file="$1"
    [[ -z "$file" ]] && { error "Usage: morie data validate <file>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }

    echo "${BOLD}CSV Validation: $(basename "$file")${RESET}"
    echo ""
    local ok=true

    # Check encoding.
    local encoding
    encoding="$(file -b --mime-encoding "$file" 2>/dev/null)" || encoding="unknown"
    if [[ "$encoding" == *"utf-8"* ]] || [[ "$encoding" == *"ascii"* ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s %s\n" "Encoding" "$encoding"
    else
        printf "  ${YELLOW} WARN${RESET} %-25s %s (expected UTF-8)\n" "Encoding" "$encoding"
    fi

    # Check consistent column count.
    local expected_cols
    expected_cols="$(head -1 "$file" | awk -F',' '{print NF}')"
    local inconsistent
    inconsistent="$(awk -F',' -v exp="$expected_cols" 'NF != exp {print NR}' "$file" | wc -l | tr -d ' ')"
    if [[ "$inconsistent" -eq 0 ]]; then
        printf "  ${GREEN}  OK ${RESET} %-25s all rows have %s columns\n" "Column consistency" "$expected_cols"
    else
        printf "  ${RED} FAIL${RESET} %-25s %s rows have inconsistent column count\n" "Column consistency" "$inconsistent"
        ok=false
    fi

    # Check for BOM.
    local bom
    bom="$(head -c 3 "$file" | xxd -p 2>/dev/null)"
    if [[ "$bom" == "efbbbf" ]]; then
        printf "  ${YELLOW} WARN${RESET} %-25s UTF-8 BOM detected\n" "BOM"
    else
        printf "  ${GREEN}  OK ${RESET} %-25s no BOM\n" "BOM"
    fi

    # Check line endings.
    local crlf
    crlf="$(grep -cP '\r$' "$file" 2>/dev/null)" || crlf=0
    if [[ "$crlf" -gt 0 ]]; then
        printf "  ${YELLOW} WARN${RESET} %-25s %s lines with CRLF\n" "Line endings" "$crlf"
    else
        printf "  ${GREEN}  OK ${RESET} %-25s Unix (LF)\n" "Line endings"
    fi

    # Check for empty rows.
    local empty_rows
    empty_rows="$(awk 'NR>1 && NF==0' "$file" | wc -l | tr -d ' ')"
    if [[ "$empty_rows" -gt 0 ]]; then
        printf "  ${YELLOW} WARN${RESET} %-25s %s empty rows\n" "Empty rows" "$empty_rows"
    else
        printf "  ${GREEN}  OK ${RESET} %-25s none\n" "Empty rows"
    fi

    # Check for duplicate headers.
    local dup_headers
    dup_headers="$(head -1 "$file" | tr ',' '\n' | sort | uniq -d | wc -l | tr -d ' ')"
    if [[ "$dup_headers" -gt 0 ]]; then
        printf "  ${RED} FAIL${RESET} %-25s %s duplicate column names\n" "Duplicate columns" "$dup_headers"
        ok=false
    else
        printf "  ${GREEN}  OK ${RESET} %-25s all unique\n" "Column names"
    fi

    echo ""
    if $ok; then
        success "CSV validation passed."
    else
        error "CSV validation found issues."
    fi
}

_data_diff() {
    local f1="$1" f2="$2"
    [[ -z "$f1" || -z "$f2" ]] && { error "Usage: morie data diff <file1> <file2>"; return 1; }
    [[ ! -f "$f1" ]] && { error "File not found: $f1"; return 1; }
    [[ ! -f "$f2" ]] && { error "File not found: $f2"; return 1; }

    echo "${BOLD}CSV Diff: $(basename "$f1") vs $(basename "$f2")${RESET}"
    echo ""

    local r1 r2 c1 c2
    r1="$(wc -l < "$f1" | tr -d ' ')"; r1=$((r1 - 1))
    r2="$(wc -l < "$f2" | tr -d ' ')"; r2=$((r2 - 1))
    c1="$(head -1 "$f1" | awk -F',' '{print NF}')"
    c2="$(head -1 "$f2" | awk -F',' '{print NF}')"

    printf "  %-20s %-20s %-20s\n" "" "$(basename "$f1")" "$(basename "$f2")"
    printf "  %-20s %-20s %-20s\n" "Rows" "$r1" "$r2"
    printf "  %-20s %-20s %-20s\n" "Columns" "$c1" "$c2"

    # Column differences.
    local cols1 cols2
    cols1="$(head -1 "$f1" | tr ',' '\n' | sort)"
    cols2="$(head -1 "$f2" | tr ',' '\n' | sort)"

    local only1 only2
    only1="$(comm -23 <(echo "$cols1") <(echo "$cols2") | tr '\n' ', ')"
    only2="$(comm -13 <(echo "$cols1") <(echo "$cols2") | tr '\n' ', ')"

    echo ""
    if [[ -n "$only1" ]]; then
        echo "  Only in $(basename "$f1"): $only1"
    fi
    if [[ -n "$only2" ]]; then
        echo "  Only in $(basename "$f2"): $only2"
    fi
    if [[ -z "$only1" && -z "$only2" ]]; then
        echo "  Columns: identical"
    fi
}

_data_convert() {
    local file="$1" fmt="${2:-tsv}"
    [[ -z "$file" ]] && { error "Usage: morie data convert <file> <format>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }

    case "$fmt" in
        tsv)
            local out="${file%.csv}.tsv"
            tr ',' '\t' < "$file" > "$out"
            success "Converted to TSV: $out"
            ;;
        jsonl|json-lines)
            local out="${file%.csv}.jsonl"
            local header
            header="$(head -1 "$file")"
            IFS=',' read -ra cols <<< "$header"
            awk -F',' -v header="$header" '
            BEGIN { split(header, cols, ","); n=length(cols) }
            NR>1 {
                printf "{"
                for(i=1; i<=n; i++) {
                    gsub(/"/, "\\\"", $i)
                    printf "\"%s\":\"%s\"", cols[i], $i
                    if(i<n) printf ","
                }
                print "}"
            }' "$file" > "$out"
            success "Converted to JSON-lines: $out"
            ;;
        markdown|md)
            local out="${file%.csv}.md"
            {
                head -1 "$file" | sed 's/,/ | /g' | sed 's/^/| /' | sed 's/$/ |/'
                head -1 "$file" | sed 's/[^,]*/---/g' | sed 's/,/ | /g' | sed 's/^/| /' | sed 's/$/ |/'
                tail -n +2 "$file" | sed 's/,/ | /g' | sed 's/^/| /' | sed 's/$/ |/'
            } > "$out"
            success "Converted to Markdown: $out"
            ;;
        *)
            error "Unknown format: $fmt (supported: tsv, jsonl, markdown)"
            return 1
            ;;
    esac
}

_data_merge() {
    local f1="$1" f2="$2" key="$3"
    [[ -z "$f1" || -z "$f2" || -z "$key" ]] && { error "Usage: morie data merge <file1> <file2> <key_column>"; return 1; }
    [[ ! -f "$f1" ]] && { error "File not found: $f1"; return 1; }
    [[ ! -f "$f2" ]] && { error "File not found: $f2"; return 1; }

    local out="${f1%.csv}_merged.csv"
    # Find key column indices.
    local k1 k2
    k1="$(head -1 "$f1" | tr ',' '\n' | grep -n "^${key}$" | head -1 | cut -d: -f1)"
    k2="$(head -1 "$f2" | tr ',' '\n' | grep -n "^${key}$" | head -1 | cut -d: -f1)"
    [[ -z "$k1" ]] && { error "Key column '$key' not found in $f1"; return 1; }
    [[ -z "$k2" ]] && { error "Key column '$key' not found in $f2"; return 1; }

    # Simple inner join via awk.
    awk -F',' -v k1="$k1" -v k2="$k2" '
    NR==FNR && FNR==1 { h2=$0; next }
    NR==FNR { lookup[$k2]=$0; next }
    FNR==1 { print $0 "," h2; next }
    $k1 in lookup { print $0 "," lookup[$k1] }
    ' "$f2" "$f1" > "$out"

    local merged_rows; merged_rows="$(wc -l < "$out" | tr -d ' ')"; merged_rows=$((merged_rows - 1))
    success "Merged: $out ($merged_rows rows)"
}

_data_split() {
    local file="$1" col="$2"
    [[ -z "$file" || -z "$col" ]] && { error "Usage: morie data split <file> <column>"; return 1; }
    [[ ! -f "$file" ]] && { error "File not found: $file"; return 1; }

    local header; header="$(head -1 "$file")"
    local idx; idx="$(echo "$header" | tr ',' '\n' | grep -n "^${col}$" | head -1 | cut -d: -f1)"
    [[ -z "$idx" ]] && { error "Column not found: $col"; return 1; }

    local base="${file%.csv}"
    local count=0
    awk -F',' -v c="$idx" -v header="$header" -v base="$base" '
    NR==1 { next }
    {
        val=$c; gsub(/[^a-zA-Z0-9_-]/, "_", val)
        outfile=base "_" val ".csv"
        if(!(val in seen)) { print header > outfile; seen[val]=1 }
        print >> outfile
    }' "$file"
    local split_count
    split_count="$(ls "${base}_"*.csv 2>/dev/null | wc -l | tr -d ' ')"
    success "Split into $split_count files by '$col'"
}

# =========================================================================
# morie log — Log management
# =========================================================================

cmd_log() {
    local subcmd="${1:-show}"
    shift 2>/dev/null || true

    mkdir -p "$ESML_LOG_DIR"

    case "$subcmd" in
        show|tail)
            local logfile="$ESML_LOG_DIR/morie.log"
            if [[ ! -f "$logfile" ]]; then
                info "No log file found. Run some commands first."
                return 0
            fi
            local n="${1:-50}"
            echo "${BOLD}MORIE Log (last $n entries):${RESET}"
            tail -n "$n" "$logfile"
            ;;
        clear)
            rm -f "$ESML_LOG_DIR"/*.log
            success "Logs cleared."
            ;;
        path)
            echo "$ESML_LOG_DIR"
            ;;
        size)
            if [[ -d "$ESML_LOG_DIR" ]]; then
                du -sh "$ESML_LOG_DIR"
            else
                echo "0B"
            fi
            ;;
        help|*)
            cat << LEOF
${BOLD}morie log${RESET} — Log management

${BOLD}COMMANDS${RESET}
    show [N]    Show last N log entries (default: 50)
    clear       Delete all log files
    path        Print log directory path
    size        Show log directory size
LEOF
            ;;
    esac
}

_log_event() {
    mkdir -p "$ESML_LOG_DIR"
    local logfile="$ESML_LOG_DIR/morie.log"
    echo "$(date -u +%Y-%m-%dT%H:%M:%SZ) $*" >> "$logfile"
}

# =========================================================================
# morie backup — Output backup and restore
# =========================================================================

cmd_backup() {
    local subcmd="${1:-create}"
    shift 2>/dev/null || true

    local backup_dir="$ESML_HOME/backups"
    mkdir -p "$backup_dir"

    case "$subcmd" in
        create)
            local src="${1:-$ROOT/data/public/outputs}"
            local timestamp
            timestamp="$(date +%Y%m%d_%H%M%S)"
            local archive="$backup_dir/esml_backup_${timestamp}.tar.gz"

            if [[ ! -d "$src" ]]; then
                warn "Source directory not found: $src"
                return 1
            fi

            info "Backing up: $src"
            tar -czf "$archive" -C "$(dirname "$src")" "$(basename "$src")" 2>/dev/null
            local size
            if [[ "$(uname -s)" == "Darwin" ]]; then
                size="$(stat -f%z "$archive" 2>/dev/null)"
            else
                size="$(stat --printf=%s "$archive" 2>/dev/null)"
            fi
            local size_kb=$((size / 1024))
            success "Backup created: $archive (${size_kb}KB)"
            _log_event "BACKUP created: $archive"
            ;;
        restore)
            local archive="$1"
            local dest="${2:-$ROOT/data/public}"

            if [[ -z "$archive" ]]; then
                error "Usage: morie backup restore <archive> [destination]"
                return 1
            fi
            if [[ ! -f "$archive" ]]; then
                error "Archive not found: $archive"
                return 1
            fi

            info "Restoring from: $archive"
            info "Destination: $dest"
            mkdir -p "$dest"
            tar -xzf "$archive" -C "$dest" 2>/dev/null
            success "Restored to: $dest"
            _log_event "BACKUP restored: $archive -> $dest"
            ;;
        list)
            echo "${BOLD}Available backups:${RESET}"
            if [[ -d "$backup_dir" ]]; then
                ls -lh "$backup_dir"/esml_backup_*.tar.gz 2>/dev/null | awk '{printf "  %s  %s  %s\n", $5, $6" "$7, $NF}'
                local count
                count="$(ls "$backup_dir"/esml_backup_*.tar.gz 2>/dev/null | wc -l | tr -d ' ')"
                echo "${DIM}($count backups)${RESET}"
            else
                echo "  (none)"
            fi
            ;;
        clean)
            local keep="${1:-5}"
            info "Keeping last $keep backups..."
            local files
            files="$(ls -t "$backup_dir"/esml_backup_*.tar.gz 2>/dev/null)"
            local count=0
            while IFS= read -r f; do
                count=$((count + 1))
                if [[ $count -gt $keep ]] && [[ -n "$f" ]]; then
                    rm -f "$f"
                    info "Removed: $(basename "$f")"
                fi
            done <<< "$files"
            success "Cleanup complete."
            ;;
        help|*)
            cat << BEOF
${BOLD}morie backup${RESET} — Backup and restore analysis outputs

${BOLD}COMMANDS${RESET}
    create [dir]           Create a backup of outputs (default: data/public/outputs)
    restore <archive> [dest]  Restore from a backup archive
    list                   List available backups
    clean [N]              Keep only the last N backups (default: 5)
BEOF
            ;;
    esac
}

# =========================================================================
# morie env — Environment management
# =========================================================================

cmd_env() {
    local subcmd="${1:-show}"
    shift 2>/dev/null || true

    case "$subcmd" in
        show)
            echo "${BOLD}MORIE Environment Variables:${RESET}"
            echo ""
            env | grep -E "^ESML_|^OLLAMA_|^GEMINI_|^LLM_|^OPENAI_" | sort | while IFS='=' read -r key value; do
                # Mask sensitive values.
                if [[ "$key" == *"KEY"* ]] || [[ "$key" == *"SECRET"* ]]; then
                    if [[ -n "$value" ]]; then
                        printf "  %-30s %s...%s\n" "$key" "${value:0:4}" "${value: -4}"
                    else
                        printf "  %-30s (not set)\n" "$key"
                    fi
                else
                    printf "  %-30s %s\n" "$key" "$value"
                fi
            done
            ;;
        export)
            echo "# MORIE environment — paste into .bashrc/.zshrc"
            env | grep -E "^ESML_|^OLLAMA_|^GEMINI_|^LLM_|^OPENAI_" | sort | while IFS='=' read -r key value; do
                echo "export $key=\"$value\""
            done
            ;;
        check)
            echo "${BOLD}Environment Health:${RESET}"
            echo ""
            # Check for conflicts.
            if [[ -n "${ESML_PYTHON:-}" ]] && [[ ! -x "${ESML_PYTHON}" ]]; then
                printf "  ${RED} FAIL${RESET} ESML_PYTHON='%s' is not executable\n" "$ESML_PYTHON"
            fi
            if [[ -n "${VIRTUAL_ENV:-}" ]]; then
                printf "  ${YELLOW} WARN${RESET} VIRTUAL_ENV is set: %s\n" "$VIRTUAL_ENV"
                info "  This may conflict with MORIE's own venv."
            fi
            if [[ -n "${CONDA_DEFAULT_ENV:-}" ]]; then
                printf "  ${YELLOW} WARN${RESET} Conda environment active: %s\n" "$CONDA_DEFAULT_ENV"
            fi
            success "Environment check complete."
            ;;
        help|*)
            cat << EEOF
${BOLD}morie env${RESET} — Environment management

${BOLD}COMMANDS${RESET}
    show     Show MORIE-related environment variables (masks secrets)
    export   Print exportable env vars for shell config
    check    Check for conflicts and issues
EEOF
            ;;
    esac
}

# =========================================================================
# morie test — Native test runner
# =========================================================================

cmd_test() {
    local subcmd="${1:-all}"
    shift 2>/dev/null || true

    _require_python

    case "$subcmd" in
        all)
            info "Running full test suite..."
            _log_event "TEST started: all"
            "$PYTHON" -m pytest -q --tb=short "$@"
            local rc=$?
            _log_event "TEST completed: all (exit=$rc)"
            return $rc
            ;;
        quick)
            info "Running quick smoke tests..."
            "$PYTHON" -m pytest -q --tb=line -x "$@"
            ;;
        module)
            local mod="$1"
            [[ -z "$mod" ]] && { error "Usage: morie test module <name>"; return 1; }
            info "Running tests for module: $mod"
            "$PYTHON" -m pytest -q "tests/test_${mod}.py" "$@" 2>/dev/null || \
            "$PYTHON" -m pytest -q -k "$mod" "$@"
            ;;
        coverage)
            info "Running tests with coverage..."
            "$PYTHON" -m pytest --cov=morie --cov-report=term-missing -q "$@"
            ;;
        r)
            if command -v Rscript &>/dev/null; then
                info "Running R tests..."
                Rscript "$ROOT/scripts/test_r_package.R"
            else
                error "Rscript not found."
                return 1
            fi
            ;;
        list)
            info "Available test files:"
            ls -1 "$ROOT/tests/test_"*.py 2>/dev/null | xargs -I{} basename {} .py | sed 's/^test_/  /'
            ;;
        help|*)
            cat << TEOF
${BOLD}morie test${RESET} — Test runner

${BOLD}COMMANDS${RESET}
    all [pytest args]        Run full test suite
    quick                    Quick smoke tests (stop on first failure)
    module <name>            Run tests for a specific module
    coverage                 Run with coverage reporting
    r                        Run R package tests
    list                     List available test files
TEOF
            ;;
    esac
}

# =========================================================================
# morie man — Built-in manual pages
# =========================================================================

cmd_man() {
    local topic="${1:-morie}"

    case "$topic" in
        morie|overview)
            cat << 'MANEOF'
MORIE(1)                     MORIE Manual                     MORIE(1)

NAME
    morie — Multi-domain Open Research and Inferential Estimation

SYNOPSIS
    morie [command] [options]
    morie chat [--agent NAME]
    morie pipeline --all [-y]
    morie data profile <file>
    morie container build|run|verify

DESCRIPTION
    MORIE is a terminal-first scientific computing toolkit for
    epidemiological and statistical modeling. It provides:

    - 45+ Python modules for causal inference, survival analysis,
      missing data handling, multiple testing, and more
    - 987+ functions and classes
    - Interactive LLM-powered chat assistant
    - Full-screen terminal IDE (Textual)
    - Rich pipeline progress tracking
    - Statistical output verification
    - Docker container management
    - Native CSV data toolkit

    The morie executable is a self-contained UNIX command that handles
    system-level operations natively in bash and delegates computation
    to the Python runtime.

ENVIRONMENT
    ESML_HOME       Data and config root (~/.morie)
    ESML_PYTHON     Override Python interpreter
    ESML_DEBUG      Set to 1 for verbose output
    ESML_NO_COLOR   Set to 1 to disable colors
    OLLAMA_BASE_URL Ollama endpoint
    GEMINI_API_KEY  Google AI Studio key
    LLM_API_KEY     Generic LLM API key
    OPENAI_API_KEY  OpenAI API key

FILES
    ~/.config/morie/esmlrc    User configuration
    ~/.morie/logs/            Log files
    ~/.morie/backups/         Output backups

SEE ALSO
    morie help, morie man <topic>

LICENSE
    GPL-3.0-or-later

MANEOF
            ;;
        install)
            cat << 'MANEOF'
MORIE-INSTALL(1)             MORIE Manual             MORIE-INSTALL(1)

NAME
    morie install — Self-bootstrapping installer

SYNOPSIS
    morie install

DESCRIPTION
    Detects the operating system, architecture, and package manager.
    Installs or verifies: Python >= 3.10, pip, virtual environment,
    the morie Python package, R (optional), Ollama (optional), and
    shell completions.

    Works on macOS, Linux, and WSL. Supports Homebrew, apt, dnf,
    pacman, apk, and zypper package managers.

    In a dev checkout (py-package/morie/ exists), installs in
    editable mode. Otherwise installs from PyPI.

MANEOF
            ;;
        chat)
            cat << 'MANEOF'
MORIE-CHAT(1)                MORIE Manual                MORIE-CHAT(1)

NAME
    morie chat — Interactive LLM chat REPL

SYNOPSIS
    morie chat [--agent NAME]

DESCRIPTION
    Launches an interactive terminal chat session with streaming
    LLM responses, slash commands, and conversation history.

    Uses the provider chain: Ollama (local) → Gemini → OpenAI-
    compatible → OpenAI → local fallback.

SLASH COMMANDS
    /run <module>     Run an analysis module
    /list             List available modules
    /doctor           Run diagnostics
    /profile <csv>    Profile a dataset
    /inspect <path>   Inspect output files
    /verify <path>    Verify statistical outputs
    /agent <name>     Switch agent persona
    /agents           List available agents
    /help             Show all commands
    /quit             Exit

OPTIONS
    --agent NAME      Load a specific agent persona

MANEOF
            ;;
        data)
            cat << 'MANEOF'
MORIE-DATA(1)                MORIE Manual                MORIE-DATA(1)

NAME
    morie data — Native CSV data toolkit

SYNOPSIS
    morie data <command> <file> [options]

DESCRIPTION
    Provides CSV analysis and manipulation entirely in bash using
    awk/sed. No Python required. Handles profiling, missing data
    reports, statistics, frequency tables, validation, conversion,
    merging, splitting, and searching.

COMMANDS
    profile <file>         Quick data profile
    head <file> [N]        First N rows
    columns <file>         Column names and indices
    shape <file>           Row x column dimensions
    missing <file>         Missing value report
    sample <file> [N]      Random sample
    stats <file> [col]     Basic statistics
    unique <file> <col>    Unique values
    freq <file> <col>      Frequency table
    search <file> <term>   Search for values
    validate <file>        CSV integrity check
    diff <f1> <f2>         Compare two CSVs
    convert <file> <fmt>   Convert format (tsv, jsonl, md)
    merge <f1> <f2> <key>  Join on key column
    split <file> <col>     Split by column values

MANEOF
            ;;
        container)
            cat << 'MANEOF'
MORIE-CONTAINER(1)           MORIE Manual           MORIE-CONTAINER(1)

NAME
    morie container — Docker container management

SYNOPSIS
    morie container <command> [options]

DESCRIPTION
    Build, run, inspect, and verify MORIE Docker containers.
    Manages the morie + Ollama service stack via docker compose.

COMMANDS
    build [--no-cache]    Build Docker image
    run [CMD]             Run command in container
    shell                 Interactive bash
    inspect [IMAGE]       Image details + health check
    verify [IMAGE]        Run pipeline in container
    up                    Start services (compose)
    down                  Stop services
    logs                  Follow service logs
    list                  List images/containers
    export [DEST]         Export pipeline results
    clean                 Remove old containers/images

ENVIRONMENT
    ESML_DOCKER_IMAGE     Image tag (default: morie:latest)

MANEOF
            ;;
        pipeline)
            cat << 'MANEOF'
MORIE-PIPELINE(1)            MORIE Manual            MORIE-PIPELINE(1)

NAME
    morie pipeline — Run the analysis module pipeline

SYNOPSIS
    morie pipeline --all [-y] [--modules M1 M2 ...]
    morie pipeline --modules power-design logistic-models

DESCRIPTION
    Executes MORIE analysis modules with rich progress tracking.
    21 modules available covering data wrangling, descriptive
    statistics, inference, causal analysis, and reporting.

    When running in an interactive terminal, displays live progress
    bars and a summary table. When piped, outputs plain text.

OPTIONS
    --all              Run all implemented modules
    --modules M1 M2    Run specific modules
    -y, --yes          Skip confirmation prompt
    --no-carbon        Disable CodeCarbon emissions tracking
    --cpads-csv PATH   Path to CPADS CSV input
    --output-dir DIR   Output directory for CSVs

MODULES
    data-wrangling, descriptive-statistics, distribution-tests,
    frequentist-inference, bayesian-inference, power-design,
    logistic-models, model-comparison, regression-models,
    propensity-scores, causal-estimators, treatment-effects,
    dag-specification, meta-synthesis, ebac-core,
    ebac-selection-adjustment-ipw, ebac-integrations,
    ebac-gender-smote-sensitivity, figures, tables, final-report

MANEOF
            ;;
        modules)
            cat << 'MANEOF'
MORIE-MODULES(1)             MORIE Manual             MORIE-MODULES(1)

NAME
    morie list-modules, morie run-module — Module management

SYNOPSIS
    morie list-modules
    morie run-module <name> [--cpads-csv PATH] [--output-dir DIR]

DESCRIPTION
    MORIE organizes analysis into named modules. Each module declares
    its expected output files and is dispatched to the appropriate
    Python or R implementation.

    Module dispatch: tries R-backed execution first via Rscript,
    falls back to Python if R is unavailable or fails.

MANEOF
            ;;
        causal)
            cat << 'MANEOF'
MORIE-CAUSAL(1)              MORIE Manual              MORIE-CAUSAL(1)

NAME
    Causal inference methods in MORIE

DESCRIPTION
    MORIE provides comprehensive causal inference tools across
    multiple Python modules:

    morie.causal        IPW, AIPW, propensity scores, ATE/ATT/ATC
    morie.did           Difference-in-differences
    morie.rdd           Regression discontinuity design
    morie.iv            Instrumental variables, 2SLS
    morie.matching      Propensity score matching, CEM
    morie.effects       Double Machine Learning (DoubleML)
    morie.sensitivity   E-value, Rosenbaum bounds, OVB analysis

    All estimators return structured dataclass results with
    point estimates, standard errors, confidence intervals,
    and diagnostic information.

ESTIMANDS
    ATE     Average Treatment Effect
    ATT     Average Treatment Effect on the Treated
    ATC     Average Treatment Effect on the Controls
    GATE    Group Average Treatment Effect
    CATE    Conditional Average Treatment Effect
    LATE    Local Average Treatment Effect
    ITT     Intent-to-Treat Effect
    RMST    Restricted Mean Survival Time

METHODS
    IPW     Inverse Probability Weighting (Horvitz-Thompson, Hajek)
    AIPW    Augmented IPW (doubly-robust)
    DML     Double/Debiased Machine Learning
    TMLE    Targeted Minimum Loss-Based Estimation
    PSM     Propensity Score Matching
    DiD     Difference-in-Differences
    RDD     Regression Discontinuity Design
    IV      Instrumental Variables / 2SLS
    SC      Synthetic Control

MANEOF
            ;;
        survival)
            cat << 'MANEOF'
MORIE-SURVIVAL(1)            MORIE Manual            MORIE-SURVIVAL(1)

NAME
    Survival analysis methods in MORIE

DESCRIPTION
    The morie.survival module provides:

    NON-PARAMETRIC
        Kaplan-Meier estimator (Greenwood / log-log CI)
        Nelson-Aalen cumulative hazard estimator
        Log-rank test (standard, Peto-Peto, Gehan-Wilcoxon)

    SEMI-PARAMETRIC
        Cox proportional hazards (Breslow/Efron ties)
        Schoenfeld residuals for PH assumption testing
        Cox-Snell, Martingale, Deviance residuals

    PARAMETRIC
        Exponential, Weibull, Log-normal, Log-logistic, Gompertz
        Accelerated Failure Time (AFT) models

    ADVANCED
        Restricted Mean Survival Time (RMST)
        Competing risks (CIF, Fine-Gray)
        Concordance index (C-statistic)

MANEOF
            ;;
        matching)
            cat << 'MANEOF'
MATCHING(7)            MORIE Matching Manual            MATCHING(7)

NAME
    morie matching — Propensity score and covariate matching methods

DESCRIPTION
    MORIE provides 30+ matching methods for causal inference with
    observational data. These methods estimate treatment effects by
    creating comparable treatment and control groups.

METHODS
    Propensity Score Methods:
        estimate_propensity_score()  Logistic regression PS estimation
        trim_propensity_scores()     Remove extreme PS values
        common_support()             Check overlap region
        overlap_diagnostics()        Overlap assessment

    Nearest Neighbor:
        match_nearest_neighbor()     1:1 or 1:k NN matching on PS
        match_variable_ratio()       Variable ratio NN matching

    Distance-Based:
        match_mahalanobis()          Mahalanobis distance matching
        match_optimal_pair()         Optimal pair matching (min total distance)
        match_full()                 Full matching (all units matched)

    Coarsened/Exact:
        match_exact()                Exact matching on covariates
        match_cem()                  Coarsened exact matching

    Advanced:
        match_genetic()              Genetic algorithm matching
        match_cardinality()          Cardinality matching
        entropy_balance()            Entropy balancing weights
        subclassify()                PS subclassification (strata)
        match_longitudinal()         Longitudinal/panel matching
        match_multi_treatment()      Multi-valued treatment matching

    Balance Assessment:
        balance_diagnostics()        Standardized mean differences
        love_plot_data()             Love plot preparation
        balance_table()              Publication-ready balance table
        matching_quality()           Overall matching quality metrics

    Treatment Effects:
        estimate_att_matched()       ATT from matched data
        estimate_ate_matched()       ATE from matched data
        estimate_atc_matched()       ATC from matched data
        abadie_imbens_se()           Abadie-Imbens standard errors
        doubly_robust_matching()     DR estimation with matching
        rosenbaum_bounds()           Sensitivity to hidden bias

CLI USAGE
    morie match-cli nn data.csv treatment age sex income
    morie match-cli compare data.csv treatment x1 x2 x3
    morie match-cli balance data.csv treatment

PYTHON USAGE
    from morie.matching import (
        estimate_propensity_score, match_nearest_neighbor,
        balance_diagnostics, estimate_att_matched
    )
    ps = estimate_propensity_score(df, 'treatment', ['x1', 'x2'])
    result = match_nearest_neighbor(df, 'treatment', ps)
    balance = balance_diagnostics(df, result.matched_data, 'treatment', ['x1', 'x2'])
    att = estimate_att_matched(df, result, 'outcome')

R USAGE
    library(morie)
    # Or use MatchIt:
    m <- MatchIt::matchit(treatment ~ x1 + x2, data=df, method="nearest")
    summary(m)

REFERENCES
    Rosenbaum & Rubin (1983). The central role of the propensity score.
    Ho, Imai, King & Stuart (2007). Matching as nonparametric preprocessing.
    Abadie & Imbens (2006). Large sample properties of matching estimators.
    Hainmueller (2012). Entropy balancing for causal effects.
MANEOF
            ;;
        did)
            cat << 'MANEOF'
DID(7)              MORIE DiD Manual              DID(7)

NAME
    morie did — Difference-in-differences estimators

DESCRIPTION
    MORIE provides 23+ DiD methods for estimating causal effects
    from panel or repeated cross-section data.

METHODS
    Classic:
        did_2x2()                    Standard 2x2 DiD
        did_panel_fe()               Panel fixed effects
        did_repeated_cross_section() Repeated cross-section DiD

    Modern/Staggered:
        staggered_did()              Staggered treatment adoption
        group_time_att()             Group-time ATT (Callaway & Sant'Anna)
        aggregate_gt_att()           Aggregate group-time effects
        bacon_decomposition()        Bacon (2021) decomposition
        did_chaisemartin_dhaultfoeuille()  de Chaisemartin & D'Haultfoeuille

    Event Studies:
        event_study()                Dynamic treatment effect estimation
        test_parallel_trends()       Pre-trend testing
        parallel_trends_data()       Plot data for parallel trends

    Extensions:
        did_doubly_robust()          DR-DiD (Sant'Anna & Zhao 2020)
        did_triple_difference()      Triple difference (DDD)
        did_fuzzy()                  Fuzzy DiD
        did_continuous_treatment()   Continuous treatment DiD
        did_heterogeneous()          Heterogeneous treatment effects
        synthetic_did()              Synthetic control DiD

    Diagnostics:
        placebo_test_time()          Falsification: alternative timing
        placebo_test_outcome()       Falsification: alternative outcome
        placebo_test_group()         Falsification: alternative group
        did_sensitivity_analysis()   Sensitivity to parallel trends
        wild_cluster_bootstrap()     Wild cluster bootstrap inference

CLI USAGE
    morie did-cli 2x2 data.csv outcome treatment post
    morie did-cli event data.csv y unit time treat_time
    morie did-cli staggered data.csv y unit time group
    morie did-cli bacon data.csv outcome treatment post
    morie did-cli parallel data.csv outcome treatment time

REFERENCES
    Callaway & Sant'Anna (2021). DiD with multiple time periods.
    Bacon (2021). Difference-in-differences with variation in treatment timing.
    de Chaisemartin & D'Haultfoeuille (2020). Two-way fixed effects estimators.
    Roth (2022). Pretest with caution: event-study estimates.
MANEOF
            ;;
        rdd)
            cat << 'MANEOF'
RDD(7)              MORIE RDD Manual              RDD(7)

NAME
    morie rdd — Regression discontinuity design estimators

DESCRIPTION
    MORIE provides 24+ RDD methods for estimating causal effects
    at known thresholds/cutoffs.

METHODS
    Core Estimators:
        sharp_rdd()                  Sharp RDD (local polynomial)
        fuzzy_rdd()                  Fuzzy RDD (IV at cutoff)
        rdd_bias_corrected()         Bias-corrected robust RDD

    Bandwidth Selection:
        bandwidth_cct()              Calonico, Cattaneo & Titiunik (2014)
        bandwidth_ik()               Imbens & Kalyanaraman (2012)
        bandwidth_rot()              Rule-of-thumb bandwidth

    Kernels:
        kernel_triangular()          Triangular kernel
        kernel_epanechnikov()        Epanechnikov kernel
        kernel_uniform()             Uniform kernel
        kernel_gaussian()            Gaussian kernel

    Validity Tests:
        mccrary_test()               McCrary (2008) density test
        cattaneo_density_test()      Cattaneo et al. density test
        covariate_balance_rdd()      Covariate balance at cutoff
        placebo_cutoff_test()        Placebo cutoffs

    Extensions:
        donut_rdd()                  Donut hole RDD
        rdd_discrete()               Discrete running variable
        kink_rdd()                   Regression kink design
        rdd_local_randomisation()    Local randomization framework
        geographic_rdd()             Geographic/spatial RDD

    Power & Design:
        rdd_power()                  Power calculation for RDD
        rdd_sample_size()            Required sample size
        bandwidth_sensitivity()      Bandwidth sensitivity analysis

    Visualization:
        rd_plot_data()               RDD plot data preparation

CLI USAGE
    morie rdd-cli sharp data.csv outcome running 0.5
    morie rdd-cli fuzzy data.csv outcome running treatment 0.5
    morie rdd-cli mccrary data.csv running 0.5
    morie rdd-cli bandwidth data.csv outcome running 0.5

REFERENCES
    Imbens & Lemieux (2008). Regression discontinuity designs: a guide.
    Calonico, Cattaneo & Titiunik (2014). Robust RDD inference.
    Lee & Lemieux (2010). RDD in economics.
MANEOF
            ;;
        iv)
            cat << 'MANEOF'
IV(7)              MORIE IV Manual              IV(7)

NAME
    morie iv — Instrumental variables estimators

DESCRIPTION
    MORIE provides 23+ IV methods for estimating causal effects
    when treatment is endogenous.

METHODS
    Core Estimators:
        tsls()                       Two-stage least squares
        liml()                       Limited info maximum likelihood
        gmm_iv()                     Generalized method of moments
        cue_gmm()                    Continuously updated GMM
        wald_estimator()             Wald/ratio estimator
        jive()                       Jackknife IV estimator
        split_sample_iv()            Split sample IV

    Diagnostics:
        first_stage_diagnostics()    F-statistic, partial R²
        cragg_donald_test()          Weak instrument test
        stock_yogo_critical_values() Stock-Yogo critical values
        kleibergen_paap_test()       Rank test for weak ID

    Specification Tests:
        anderson_rubin_test()        Anderson-Rubin test (robust to weak IV)
        anderson_rubin_ci()          AR confidence interval
        conditional_lr_test()        Conditional LR test
        sargan_test()                Sargan overidentification
        hansen_j_test()              Hansen J test
        hausman_test()               Hausman specification test
        durbin_wu_hausman()          DWH endogeneity test

    Extensions:
        control_function()           Control function approach
        iv_probit()                  IV probit
        panel_iv()                   Panel IV estimation
        iv_diagnostics()             Comprehensive IV diagnostics
        iv_residual_analysis()       IV residual analysis

CLI USAGE
    morie iv-cli tsls data.csv outcome endogenous instrument
    morie iv-cli hausman data.csv outcome endogenous instrument
    morie iv-cli first-stage data.csv endogenous instrument
    morie iv-cli sargan data.csv outcome endogenous z1 z2

REFERENCES
    Angrist & Pischke (2009). Mostly Harmless Econometrics.
    Stock, Wright & Yogo (2002). Survey of weak instruments.
    Andrews, Stock & Sun (2019). Weak instruments in IV regression.
MANEOF
            ;;
        *)
            echo "Available man pages:"
            echo ""
            echo "  morie man morie          Overview"
            echo "  morie man install       Self-bootstrap installer"
            echo "  morie man chat          Interactive chat REPL"
            echo "  morie man data          Native CSV data toolkit"
            echo "  morie man container     Docker management"
            echo "  morie man pipeline      Analysis pipeline"
            echo "  morie man modules       Module management"
            echo "  morie man causal        Causal inference methods"
            echo "  morie man survival      Survival analysis methods"
            echo "  morie man matching      Propensity score matching"
            echo "  morie man did           Difference-in-differences"
            echo "  morie man rdd           Regression discontinuity"
            echo "  morie man iv            Instrumental variables"
            ;;
    esac
}

# =========================================================================
# morie status — Quick project status overview
# =========================================================================

cmd_status() {
    echo "${BOLD}MORIE Project Status${RESET}"
    echo "${DIM}$(date)${RESET}"
    echo ""

    # Python package.
    local py; py="$(_resolve_python 2>/dev/null)"
    if [[ -n "$py" ]] && "$py" -c "import morie" 2>/dev/null; then
        local ver; ver="$("$py" -c "import morie; print(morie.__version__)" 2>/dev/null)" || ver="?"
        local mods; mods="$("$py" -c "import morie; print(len(morie.list_modules()))" 2>/dev/null)" || mods="?"
        printf "  %-20s v%s (%s modules)\n" "morie" "$ver" "$mods"
    else
        printf "  %-20s ${RED}not installed${RESET}\n" "morie"
    fi

    # Python modules count.
    local pymod_count
    pymod_count="$(ls "$ROOT/py-package/morie/"*.py 2>/dev/null | wc -l | tr -d ' ')"
    local pylines
    pylines="$(wc -l "$ROOT/py-package/morie/"*.py 2>/dev/null | tail -1 | awk '{print $1}')"
    printf "  %-20s %s modules, %s lines\n" "Python code" "$pymod_count" "$pylines"

    # Shell wrapper.
    local shell_lines
    shell_lines="$(wc -l "$ROOT/morie" 2>/dev/null | awk '{print $1}')"
    printf "  %-20s %s lines\n" "Shell executive" "$shell_lines"

    # Tests.
    local test_count
    test_count="$(ls "$ROOT/tests/test_"*.py 2>/dev/null | wc -l | tr -d ' ')"
    printf "  %-20s %s test files\n" "Tests" "$test_count"

    # R package.
    local r_files
    r_files="$(ls "$ROOT/r-package/morie/R/"*.R 2>/dev/null | wc -l | tr -d ' ')"
    local rd_files
    rd_files="$(ls "$ROOT/r-package/morie/man/"*.Rd 2>/dev/null | wc -l | tr -d ' ')"
    printf "  %-20s %s R files, %s .Rd docs\n" "R package" "$r_files" "$rd_files"

    # Git status.
    echo ""
    if command -v git &>/dev/null && [[ -d "$ROOT/.git" ]]; then
        local branch; branch="$(cd "$ROOT" && git branch --show-current 2>/dev/null)" || branch="?"
        local commits; commits="$(cd "$ROOT" && git rev-list --count HEAD 2>/dev/null)" || commits="?"
        local modified; modified="$(cd "$ROOT" && git diff --name-only 2>/dev/null | wc -l | tr -d ' ')"
        local untracked; untracked="$(cd "$ROOT" && git ls-files --others --exclude-standard 2>/dev/null | wc -l | tr -d ' ')"
        printf "  %-20s %s (%s commits)\n" "Branch" "$branch" "$commits"
        printf "  %-20s %s modified, %s untracked\n" "Working tree" "$modified" "$untracked"
    fi

    # LLM.
    echo ""
    if curl -s --max-time 1 http://localhost:11434/api/tags &>/dev/null; then
        printf "  %-20s ${GREEN}running${RESET}\n" "Ollama"
    elif [[ -n "${GEMINI_API_KEY:-}" ]]; then
        printf "  %-20s ${GREEN}Gemini configured${RESET}\n" "LLM"
    elif [[ -n "${OPENAI_API_KEY:-}" ]]; then
        printf "  %-20s ${GREEN}OpenAI configured${RESET}\n" "LLM"
    else
        printf "  %-20s ${YELLOW}no provider${RESET}\n" "LLM"
    fi
}

# =========================================================================
# morie r — R package management
# =========================================================================

cmd_r() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    case "$subcmd" in
        check)
            if ! command -v R &>/dev/null; then
                error "R not installed. Run: morie install"
                return 1
            fi
            info "Running R CMD check on morie R package..."
            _log_event "R CMD check started"
            R CMD check "$ROOT/r-package/morie" --no-manual --no-vignettes "$@" 2>&1
            local rc=$?
            _log_event "R CMD check completed (exit=$rc)"
            return $rc
            ;;
        test)
            if ! command -v Rscript &>/dev/null; then
                error "Rscript not found."
                return 1
            fi
            info "Running R package tests..."
            Rscript "$ROOT/scripts/test_r_package.R"
            ;;
        document)
            if ! command -v Rscript &>/dev/null; then
                error "Rscript not found."
                return 1
            fi
            info "Generating Roxygen2 documentation..."
            Rscript -e "setwd('$ROOT/r-package/morie'); devtools::document()" 2>&1
            local rc=$?
            if [[ $rc -eq 0 ]]; then
                local rd_count
                rd_count="$(ls "$ROOT/r-package/morie/man/"*.Rd 2>/dev/null | wc -l | tr -d ' ')"
                success "Generated $rd_count .Rd files"
            else
                error "devtools::document() failed (exit=$rc)"
                warn "Known issue: SIGSEGV on some R installations. Check R version."
            fi
            return $rc
            ;;
        install)
            if ! command -v R &>/dev/null; then
                error "R not installed."
                return 1
            fi
            info "Installing morie R package from source..."
            Rscript -e "install.packages('$ROOT/r-package/morie', repos=NULL, type='source')"
            ;;
        deps)
            if ! command -v Rscript &>/dev/null; then
                error "Rscript not found."
                return 1
            fi
            info "Checking R dependencies..."
            Rscript -e "
                required <- c('testthat', 'survey', 'stats', 'utils')
                optional <- c('devtools', 'roxygen2', 'ggplot2', 'dplyr', 'tidyr')
                cat('Required packages:\n')
                for (pkg in required) {
                    installed <- requireNamespace(pkg, quietly=TRUE)
                    cat(sprintf('  %-20s %s\n', pkg, if(installed) 'OK' else 'MISSING'))
                }
                cat('\nOptional packages:\n')
                for (pkg in optional) {
                    installed <- requireNamespace(pkg, quietly=TRUE)
                    cat(sprintf('  %-20s %s\n', pkg, if(installed) 'OK' else 'not installed'))
                }
            "
            ;;
        list)
            info "R source files:"
            ls -1 "$ROOT/r-package/morie/R/"*.R 2>/dev/null | while read -r f; do
                local lines
                lines="$(wc -l < "$f" | tr -d ' ')"
                local funcs
                funcs="$(grep -c '^[a-zA-Z_].*<- function' "$f" 2>/dev/null)" || funcs=0
                printf "  %-30s %5s lines  %3s functions\n" "$(basename "$f")" "$lines" "$funcs"
            done
            echo ""
            info "Generated .Rd files:"
            ls -1 "$ROOT/r-package/morie/man/"*.Rd 2>/dev/null | while read -r f; do
                printf "  %s\n" "$(basename "$f" .Rd)"
            done
            local rd_count
            rd_count="$(ls "$ROOT/r-package/morie/man/"*.Rd 2>/dev/null | wc -l | tr -d ' ')"
            echo "${DIM}($rd_count .Rd files)${RESET}"
            ;;
        console)
            if ! command -v R &>/dev/null; then
                error "R not installed."
                return 1
            fi
            info "Starting R console with morie loaded..."
            R --no-save -e "library(morie, lib.loc='$ROOT/r-package')" --args "$@" 2>/dev/null || \
            R --no-save
            ;;
        lint)
            if ! command -v Rscript &>/dev/null; then
                error "Rscript not found."
                return 1
            fi
            info "Linting R source files..."
            Rscript -e "
                if (!requireNamespace('lintr', quietly=TRUE)) {
                    cat('lintr not installed. Install with: install.packages(\"lintr\")\n')
                    quit(status=1)
                }
                files <- list.files('$ROOT/r-package/morie/R', pattern='\\\\.R$', full.names=TRUE)
                issues <- 0
                for (f in files) {
                    lints <- lintr::lint(f)
                    if (length(lints) > 0) {
                        cat(sprintf('\n%s: %d issues\n', basename(f), length(lints)))
                        print(lints)
                        issues <- issues + length(lints)
                    }
                }
                cat(sprintf('\nTotal: %d issues across %d files\n', issues, length(files)))
            " 2>&1
            ;;
        help|*)
            cat << REOF
${BOLD}morie r${RESET} — R package management

${BOLD}COMMANDS${RESET}
    check           Run R CMD check on the morie R package
    test            Run R package tests (testthat)
    document        Generate Roxygen2 documentation (.Rd files)
    install         Install R package from source
    deps            Check R package dependencies
    list            List R source files and .Rd docs
    console         Start R console with morie loaded
    lint            Lint R source files
REOF
            ;;
    esac
}

# =========================================================================
# morie quarto — Quarto rendering and management
# =========================================================================

cmd_quarto() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    local quarto_dir="$ROOT/../../../dev/quarto/project"
    [[ -d "$quarto_dir" ]] || quarto_dir="$ROOT/../../quarto/project"
    [[ -d "$quarto_dir" ]] || quarto_dir=""

    case "$subcmd" in
        render)
            if ! command -v quarto &>/dev/null; then
                error "Quarto not installed. Install from: https://quarto.org"
                return 1
            fi
            local target="${1:-}"
            if [[ -n "$target" ]]; then
                info "Rendering: $target"
                quarto render "$target" "$@"
            elif [[ -n "$quarto_dir" ]]; then
                info "Rendering all Quarto notebooks..."
                if [[ -f "$quarto_dir/scripts/preview_site.R" ]] && command -v Rscript &>/dev/null; then
                    (cd "$quarto_dir" && Rscript scripts/preview_site.R)
                else
                    quarto render "$quarto_dir"
                fi
            else
                error "No Quarto project found. Specify a file: morie quarto render <file.qmd>"
                return 1
            fi
            ;;
        preview)
            if ! command -v quarto &>/dev/null; then
                error "Quarto not installed."
                return 1
            fi
            local target="${1:-$quarto_dir}"
            [[ -z "$target" ]] && { error "No Quarto project found."; return 1; }
            info "Starting Quarto preview server..."
            quarto preview "$target"
            ;;
        list)
            if [[ -n "$quarto_dir" ]]; then
                info "Quarto notebooks in project:"
                find "$quarto_dir" -name "*.qmd" -type f 2>/dev/null | while read -r f; do
                    printf "  %s\n" "${f#$quarto_dir/}"
                done
                local count
                count="$(find "$quarto_dir" -name "*.qmd" -type f 2>/dev/null | wc -l | tr -d ' ')"
                echo "${DIM}($count .qmd files)${RESET}"
            else
                info "Searching for .qmd files..."
                find "$ROOT" -name "*.qmd" -type f -not -path "*/.git/*" 2>/dev/null | head -20
            fi
            ;;
        create)
            local name="${1:-analysis}"
            local outfile="${name}.qmd"
            if [[ -f "$outfile" ]]; then
                error "File already exists: $outfile"
                return 1
            fi
            cat > "$outfile" << QEOF
---
title: "${name}"
author: "MORIE Analysis"
date: today
format:
  html:
    toc: true
    code-fold: true
    theme: cosmo
execute:
  warning: false
  message: false
---

## Setup

\`\`\`{r}
#| label: setup
library(morie)
\`\`\`

## Data

\`\`\`{r}
#| label: load-data
# Load your data here
\`\`\`

## Analysis

\`\`\`{r}
#| label: analysis
# Your analysis code here
\`\`\`

## Results

\`\`\`{r}
#| label: results
# Results and tables
\`\`\`
QEOF
            success "Created: $outfile"
            ;;
        check)
            if ! command -v quarto &>/dev/null; then
                echo "Quarto: not installed"
                return 1
            fi
            echo "Quarto: $(quarto --version 2>&1)"
            echo "Path: $(which quarto)"
            if command -v Rscript &>/dev/null; then
                echo "R: $(Rscript -e 'cat(R.version.string)' 2>/dev/null)"
                local knitr_ok
                knitr_ok="$(Rscript -e 'cat(requireNamespace("knitr", quietly=TRUE))' 2>/dev/null)"
                echo "knitr: $knitr_ok"
            fi
            ;;
        help|*)
            cat << QEOF
${BOLD}morie quarto${RESET} — Quarto notebook management

${BOLD}COMMANDS${RESET}
    render [file]    Render notebook(s) to HTML/PDF
    preview [dir]    Start live preview server
    list             List .qmd notebooks in project
    create <name>    Create a new notebook from template
    check            Check Quarto installation
QEOF
            ;;
    esac
}

# =========================================================================
# morie ci — CI/CD pipeline generation
# =========================================================================

cmd_ci() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    local ci_dir="$ROOT/.github/workflows"

    case "$subcmd" in
        generate)
            mkdir -p "$ci_dir"
            local target="$ci_dir/ci.yml"
            info "Generating CI/CD pipeline: $target"
            cat > "$target" << 'CIEOF'
name: MORIE CI/CD
on:
  push:
    branches: [main, dev]
    paths: ['dev/sphinx/project/**']
  pull_request:
    branches: [main, dev]

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

jobs:
  test-python:
    name: Python ${{ matrix.python-version }} / ${{ matrix.os }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest, macos-latest]
        python-version: ['3.10', '3.11', '3.12']
    defaults:
      run:
        working-directory: dev/sphinx/project
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
          cache: pip
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install -e ".[test]"
      - name: Run tests
        run: python -m pytest -q --tb=short --junitxml=report.xml
      - name: Upload test results
        uses: actions/upload-artifact@v4
        if: always()
        with:
          name: test-results-${{ matrix.os }}-py${{ matrix.python-version }}
          path: dev/sphinx/project/report.xml

  test-r:
    name: R tests
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: dev/sphinx/project
    steps:
      - uses: actions/checkout@v4
      - uses: r-lib/actions/setup-r@v2
        with:
          r-version: 'release'
      - name: Install R packages
        run: |
          Rscript -e "install.packages(c('testthat', 'survey'), repos='https://cloud.r-project.org/', quiet=TRUE)"
          Rscript -e "install.packages('r-package/morie', repos=NULL, type='source')"
      - name: Run R tests
        run: Rscript scripts/test_r_package.R

  docker-build:
    name: Docker build
    runs-on: ubuntu-latest
    if: github.event_name == 'push'
    defaults:
      run:
        working-directory: dev/sphinx/project
    steps:
      - uses: actions/checkout@v4
      - name: Build image
        run: DOCKER_BUILDKIT=1 docker build -t morie:ci .
      - name: Smoke test
        run: docker run --rm morie:ci morie list-modules

  sphinx-docs:
    name: Documentation build
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: dev/sphinx/project
    steps:
      - uses: actions/checkout@v4
      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'
      - name: Install dependencies
        run: |
          python -m pip install --upgrade pip
          python -m pip install -e ".[docs]"
      - name: Build Sphinx docs
        run: python -m sphinx -b html docz/source docz/build/html -W --keep-going
      - name: Upload docs
        uses: actions/upload-artifact@v4
        with:
          name: sphinx-docs
          path: dev/sphinx/project/docz/build/html/

  lint:
    name: Lint
    runs-on: ubuntu-latest
    defaults:
      run:
        working-directory: dev/sphinx/project
    steps:
      - uses: actions/checkout@v4
      - name: ShellCheck
        run: shellcheck morie scripts/bootstrap_esml.sh scripts/install.sh || true
      - uses: actions/setup-python@v5
        with:
          python-version: '3.12'
      - name: Install
        run: pip install ruff
      - name: Ruff check
        run: ruff check py-package/morie/ || true
CIEOF
            success "Generated: $target"
            _log_event "CI pipeline generated: $target"
            ;;
        status)
            if ! command -v gh &>/dev/null; then
                error "GitHub CLI (gh) not installed. Install from: https://cli.github.com"
                return 1
            fi
            info "Recent CI runs:"
            gh run list --limit 10 2>&1 || warn "Not in a GitHub repository or not authenticated."
            ;;
        view)
            local run_id="$1"
            if ! command -v gh &>/dev/null; then
                error "GitHub CLI (gh) not installed."
                return 1
            fi
            if [[ -n "$run_id" ]]; then
                gh run view "$run_id"
            else
                gh run list --limit 5
            fi
            ;;
        trigger)
            if ! command -v gh &>/dev/null; then
                error "GitHub CLI not installed."
                return 1
            fi
            info "Triggering CI workflow..."
            gh workflow run ci.yml 2>&1 || warn "Failed to trigger. Check workflow name and permissions."
            ;;
        help|*)
            cat << CEOF
${BOLD}morie ci${RESET} — CI/CD pipeline management

${BOLD}COMMANDS${RESET}
    generate     Generate GitHub Actions CI/CD workflow
    status       Show recent CI run status (requires gh CLI)
    view [ID]    View a specific CI run
    trigger      Manually trigger CI workflow
CEOF
            ;;
    esac
}

# =========================================================================
# morie release — Release management
# =========================================================================

cmd_release() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    case "$subcmd" in
        check)
            info "Release readiness check..."
            echo ""
            local ready=true

            # Version consistency.
            local py_ver
            _require_python
            py_ver="$("$PYTHON" -c "import morie; print(morie.__version__)" 2>/dev/null)" || py_ver="?"
            local toml_ver
            toml_ver="$(grep '^version' "$ROOT/pyproject.toml" 2>/dev/null | head -1 | cut -d'"' -f2)" || toml_ver="?"
            if [[ "$py_ver" == "$toml_ver" ]]; then
                printf "  ${GREEN}  OK ${RESET} Version consistency: %s\n" "$py_ver"
            else
                printf "  ${RED} FAIL${RESET} Version mismatch: __init__.py=%s pyproject.toml=%s\n" "$py_ver" "$toml_ver"
                ready=false
            fi

            # Tests passing.
            printf "  ${DIM}...  ${RESET} Running tests...\r"
            if "$PYTHON" -m pytest -q --tb=no "$ROOT/tests/" &>/dev/null; then
                local test_count
                test_count="$("$PYTHON" -m pytest --collect-only -q "$ROOT/tests/" 2>&1 | tail -1 | awk '{print $1}')"
                printf "  ${GREEN}  OK ${RESET} Tests passing: %s\n" "$test_count"
            else
                printf "  ${RED} FAIL${RESET} Tests failing\n"
                ready=false
            fi

            # Git status clean.
            if command -v git &>/dev/null; then
                local dirty
                dirty="$(cd "$ROOT" && git status --porcelain 2>/dev/null | wc -l | tr -d ' ')"
                if [[ "$dirty" -eq 0 ]]; then
                    printf "  ${GREEN}  OK ${RESET} Working tree clean\n"
                else
                    printf "  ${YELLOW} WARN${RESET} %s uncommitted changes\n" "$dirty"
                fi

                local branch
                branch="$(cd "$ROOT" && git branch --show-current 2>/dev/null)"
                printf "  %-28s %s\n" "  Branch:" "$branch"
            fi

            # CITATION.cff.
            if [[ -f "$ROOT/CITATION.cff" ]]; then
                printf "  ${GREEN}  OK ${RESET} CITATION.cff present\n"
            else
                printf "  ${YELLOW} WARN${RESET} CITATION.cff missing\n"
            fi

            # Sphinx docs build.
            printf "  ${DIM}...  ${RESET} Building docs...\r"
            if "$PYTHON" -m sphinx -b html "$ROOT/docz/source" "/tmp/morie-docs-check" -q 2>/dev/null; then
                printf "  ${GREEN}  OK ${RESET} Sphinx docs build\n"
                rm -rf /tmp/morie-docs-check
            else
                printf "  ${RED} FAIL${RESET} Sphinx docs build failed\n"
                ready=false
            fi

            echo ""
            if $ready; then
                success "Ready for release."
            else
                error "Not ready. Fix issues above."
            fi
            ;;
        bump)
            local part="${1:-patch}"
            _require_python
            local current
            current="$("$PYTHON" -c "import morie; print(morie.__version__)" 2>/dev/null)"
            IFS='.' read -ra ver <<< "$current"
            local major="${ver[0]:-0}" minor="${ver[1]:-0}" patch="${ver[2]:-0}"
            case "$part" in
                major) major=$((major + 1)); minor=0; patch=0 ;;
                minor) minor=$((minor + 1)); patch=0 ;;
                patch) patch=$((patch + 1)) ;;
                *) error "Usage: morie release bump [major|minor|patch]"; return 1 ;;
            esac
            local new_ver="${major}.${minor}.${patch}"
            info "Bumping version: $current -> $new_ver"

            # Update pyproject.toml.
            sed -i.bak "s/^version = \"${current}\"/version = \"${new_ver}\"/" "$ROOT/pyproject.toml"
            rm -f "$ROOT/pyproject.toml.bak"

            # Update __init__.py.
            sed -i.bak "s/__version__ = \"${current}\"/__version__ = \"${new_ver}\"/" "$ROOT/py-package/morie/__init__.py"
            rm -f "$ROOT/py-package/morie/__init__.py.bak"

            success "Version bumped to $new_ver in pyproject.toml and __init__.py"
            ;;
        tag)
            _require_python
            local ver
            ver="$("$PYTHON" -c "import morie; print(morie.__version__)" 2>/dev/null)"
            local tag="v${ver}"

            if ! command -v git &>/dev/null; then
                error "git not installed."
                return 1
            fi

            info "Creating tag: $tag"
            (cd "$ROOT" && git tag -a "$tag" -m "Release $tag") || { error "Tag creation failed."; return 1; }
            success "Tag created: $tag"
            info "Push with: git push origin $tag"
            ;;
        build)
            _require_python
            info "Building distribution packages..."
            (cd "$ROOT" && "$PYTHON" -m build) 2>&1 || {
                warn "python -m build failed. Install: pip install build"
                return 1
            }
            success "Packages built in dist/"
            ls -lh "$ROOT/dist/" 2>/dev/null
            ;;
        publish)
            _require_python
            info "Publishing to PyPI..."
            warn "This will upload to the real PyPI. Are you sure?"
            if _ask_yn "Continue?" "n"; then
                (cd "$ROOT" && "$PYTHON" -m twine upload dist/*) 2>&1 || {
                    error "Upload failed. Install: pip install twine"
                    return 1
                }
                success "Published to PyPI."
            else
                info "Cancelled."
            fi
            ;;
        help|*)
            cat << RELEOF
${BOLD}morie release${RESET} — Release management

${BOLD}COMMANDS${RESET}
    check                    Pre-release readiness check
    bump [major|minor|patch] Bump version number
    tag                      Create a git tag for current version
    build                    Build distribution packages (sdist + wheel)
    publish                  Upload to PyPI (requires twine)
RELEOF
            ;;
    esac
}

# =========================================================================
# morie deps — Dependency auditing
# =========================================================================

cmd_deps() {
    local subcmd="${1:-show}"
    shift 2>/dev/null || true

    _require_python

    case "$subcmd" in
        show|list)
            info "Installed packages:"
            "$PYTHON" -m pip list --format=columns 2>/dev/null | head -50
            ;;
        tree)
            info "Dependency tree for morie:"
            "$PYTHON" -m pip show morie 2>/dev/null | grep -E "^(Name|Version|Requires|Required-by):"
            echo ""
            info "Direct dependencies:"
            "$PYTHON" -c "
import importlib.metadata
dist = importlib.metadata.distribution('morie')
reqs = dist.requires or []
for r in reqs:
    if '; extra ==' not in r:
        name = r.split('>')[0].split('<')[0].split('=')[0].split('!')[0].strip()
        try:
            ver = importlib.metadata.version(name)
            print(f'  {name:<25} {ver}')
        except: print(f'  {name:<25} (not installed)')
" 2>/dev/null
            ;;
        outdated)
            info "Checking for outdated packages..."
            "$PYTHON" -m pip list --outdated --format=columns 2>/dev/null || \
                warn "Could not check for outdated packages."
            ;;
        security)
            info "Running security audit..."
            if "$PYTHON" -m pip_audit 2>/dev/null; then
                success "No known vulnerabilities found."
            else
                if "$PYTHON" -c "import pip_audit" 2>/dev/null; then
                    warn "pip-audit found issues."
                else
                    warn "pip-audit not installed. Install: pip install pip-audit"
                    info "Fallback: checking with pip check..."
                    "$PYTHON" -m pip check 2>&1
                fi
            fi
            ;;
        size)
            info "Package sizes:"
            "$PYTHON" -c "
import importlib.metadata, pathlib
dist = importlib.metadata.distribution('morie')
reqs = dist.requires or []
sizes = []
for r in reqs:
    if '; extra ==' not in r:
        name = r.split('>')[0].split('<')[0].split('=')[0].split('!')[0].strip()
        try:
            d = importlib.metadata.distribution(name)
            files = d.files or []
            total = sum(pathlib.Path(d._path.parent / f).stat().st_size for f in files if (d._path.parent / f).exists())
            sizes.append((name, total))
        except: pass
sizes.sort(key=lambda x: -x[1])
for name, size in sizes[:20]:
    print(f'  {name:<25} {size/1024/1024:.1f} MB')
" 2>/dev/null || warn "Could not compute package sizes."
            ;;
        license)
            info "Dependency licenses:"
            "$PYTHON" -c "
import importlib.metadata
dist = importlib.metadata.distribution('morie')
reqs = dist.requires or []
for r in reqs:
    if '; extra ==' not in r:
        name = r.split('>')[0].split('<')[0].split('=')[0].split('!')[0].strip()
        try:
            d = importlib.metadata.distribution(name)
            license_text = d.metadata.get('License', 'Unknown')
            classifier = [c for c in (d.metadata.get_all('Classifier') or []) if 'License' in c]
            lic = classifier[0].split('::')[-1].strip() if classifier else license_text[:50]
            print(f'  {name:<25} {lic}')
        except: pass
" 2>/dev/null
            ;;
        help|*)
            cat << DEOF
${BOLD}morie deps${RESET} — Dependency management and auditing

${BOLD}COMMANDS${RESET}
    show         List installed packages
    tree         Show dependency tree for morie
    outdated     Check for outdated packages
    security     Security audit (pip-audit)
    size         Show package sizes
    license      Show dependency licenses (GPL compliance check)
DEOF
            ;;
    esac
}

# =========================================================================
# morie perf — Performance profiling
# =========================================================================

cmd_perf() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    _require_python

    case "$subcmd" in
        module)
            local mod="${1:?Usage: morie perf module <name>}"
            info "Profiling module: $mod"
            "$PYTHON" -c "
import time, tracemalloc
from morie.modules import run_module
tracemalloc.start()
t0 = time.monotonic()
try:
    outputs = run_module('$mod', cpads_csv='data/files/csv/survey/cpads-2021-2022-pumf2.csv')
    elapsed = time.monotonic() - t0
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    print(f'  Module:      $mod')
    print(f'  Time:        {elapsed:.2f}s')
    print(f'  Peak memory: {peak/1024/1024:.1f}MB')
    print(f'  Outputs:     {len(outputs)} tables')
    for name, df in outputs.items():
        print(f'    {name}: {len(df)} rows x {len(df.columns)} cols')
except Exception as e:
    tracemalloc.stop()
    print(f'  Error: {e}')
" 2>&1
            ;;
        import)
            info "Measuring import times..."
            "$PYTHON" -c "
import time
modules = [
    'morie', 'morie.causal', 'morie.effects', 'morie.investigation',
    'morie.survey', 'morie.statistics', 'morie.survival', 'morie.did',
    'morie.rdd', 'morie.iv', 'morie.matching', 'morie.missing',
    'morie.viz', 'morie.llm', 'morie.chat',
]
print(f'  {\"Module\":<30} {\"Time (ms)\":>10}')
for mod in modules:
    t0 = time.monotonic()
    try: __import__(mod)
    except: pass
    elapsed = (time.monotonic() - t0) * 1000
    print(f'  {mod:<30} {elapsed:>10.1f}')
" 2>&1
            ;;
        startup)
            info "Measuring CLI startup time..."
            local sum=0
            for i in 1 2 3 4 5; do
                local t0
                t0="$(python3 -c 'import time; print(time.monotonic())')"
                "$PYTHON" -m morie.runner --help &>/dev/null
                local t1
                t1="$(python3 -c 'import time; print(time.monotonic())')"
                local elapsed
                elapsed="$(python3 -c "print(int(($t1 - $t0) * 1000))")"
                printf "  Run %d: %sms\n" "$i" "$elapsed"
                sum=$((sum + elapsed))
            done
            printf "  Average: %sms\n" "$((sum / 5))"
            ;;
        memory)
            info "Memory footprint of morie import..."
            "$PYTHON" -c "
import tracemalloc, sys
tracemalloc.start()
import morie
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
print(f'  Current: {current/1024:.0f}KB')
print(f'  Peak:    {peak/1024:.0f}KB')
print(f'  Modules: {len([m for m in sys.modules if m.startswith(\"morie\")])}')
" 2>&1
            ;;
        help|*)
            cat << PEOF
${BOLD}morie perf${RESET} — Performance profiling

${BOLD}COMMANDS${RESET}
    module <name>   Profile a single module (time + memory)
    import          Measure import times for all modules
    startup         Measure CLI startup time (5 runs)
    memory          Memory footprint of morie import
PEOF
            ;;
    esac
}

# =========================================================================
# morie net — Network diagnostics for LLM providers
# =========================================================================

cmd_net() {
    local subcmd="${1:-check}"
    shift 2>/dev/null || true

    case "$subcmd" in
        check)
            echo "${BOLD}MORIE Network Diagnostics${RESET}"
            echo ""

            # Ollama.
            printf "  %-25s " "Ollama (localhost:11434)"
            if curl -s --max-time 3 http://localhost:11434/api/tags &>/dev/null; then
                local models
                models="$(curl -s http://localhost:11434/api/tags 2>/dev/null | grep -o '"name":"[^"]*"' | head -3 | cut -d'"' -f4 | tr '\n' ', ')"
                printf "${GREEN}OK${RESET} (%s)\n" "${models%,}"
            else
                printf "${YELLOW}unreachable${RESET}\n"
            fi

            # Gemini.
            printf "  %-25s " "Gemini API"
            if [[ -n "${GEMINI_API_KEY:-}" ]]; then
                if curl -s --max-time 5 "https://generativelanguage.googleapis.com/v1beta/models?key=${GEMINI_API_KEY}" &>/dev/null; then
                    printf "${GREEN}OK${RESET} (key valid)\n"
                else
                    printf "${YELLOW}key set but unreachable${RESET}\n"
                fi
            else
                printf "${DIM}not configured${RESET}\n"
            fi

            # OpenAI-compatible.
            printf "  %-25s " "OpenAI-compat API"
            if [[ -n "${LLM_API_BASE_URL:-}" ]]; then
                if curl -s --max-time 5 "${LLM_API_BASE_URL}/models" -H "Authorization: Bearer ${LLM_API_KEY:-}" &>/dev/null; then
                    printf "${GREEN}OK${RESET} (%s)\n" "$LLM_API_BASE_URL"
                else
                    printf "${YELLOW}configured but unreachable${RESET}\n"
                fi
            else
                printf "${DIM}not configured${RESET}\n"
            fi

            # OpenAI.
            printf "  %-25s " "OpenAI API"
            if [[ -n "${OPENAI_API_KEY:-}" ]]; then
                if curl -s --max-time 5 "https://api.openai.com/v1/models" -H "Authorization: Bearer ${OPENAI_API_KEY}" &>/dev/null; then
                    printf "${GREEN}OK${RESET}\n"
                else
                    printf "${YELLOW}key set but unreachable${RESET}\n"
                fi
            else
                printf "${DIM}not configured${RESET}\n"
            fi

            # General connectivity.
            echo ""
            printf "  %-25s " "Internet (pypi.org)"
            if curl -s --max-time 5 https://pypi.org/pypi/morie/json &>/dev/null; then
                printf "${GREEN}OK${RESET}\n"
            else
                printf "${YELLOW}unreachable${RESET}\n"
            fi

            printf "  %-25s " "CRAN (r-project.org)"
            if curl -s --max-time 5 https://cloud.r-project.org/ &>/dev/null; then
                printf "${GREEN}OK${RESET}\n"
            else
                printf "${YELLOW}unreachable${RESET}\n"
            fi
            ;;
        latency)
            info "LLM provider latency test..."
            for endpoint in "http://localhost:11434/api/tags" "https://generativelanguage.googleapis.com/" "https://api.openai.com/"; do
                local t0 t1 ms
                t0="$(python3 -c 'import time; print(time.monotonic())')"
                curl -s --max-time 5 "$endpoint" &>/dev/null
                t1="$(python3 -c 'import time; print(time.monotonic())')"
                ms="$(python3 -c "print(int(($t1 - $t0) * 1000))")"
                printf "  %-50s %sms\n" "$endpoint" "$ms"
            done
            ;;
        help|*)
            cat << NEOF
${BOLD}morie net${RESET} — Network diagnostics

${BOLD}COMMANDS${RESET}
    check       Check connectivity to all LLM providers
    latency     Measure latency to provider endpoints
NEOF
            ;;
    esac
}

# =========================================================================
# morie cron — Scheduled task management
# =========================================================================

cmd_cron() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true

    local cron_dir="$ESML_CONFIG_DIR/cron"
    mkdir -p "$cron_dir"

    case "$subcmd" in
        add)
            local schedule="$1" command="$2"
            [[ -z "$schedule" || -z "$command" ]] && {
                error "Usage: morie cron add '<schedule>' '<command>'"
                echo "  Example: morie cron add '0 2 * * *' 'morie pipeline --all -y'"
                return 1
            }
            # Write to crontab.
            local full_cmd="cd $ROOT && $command"
            (crontab -l 2>/dev/null; echo "# MORIE: $command"; echo "$schedule $full_cmd") | crontab -
            success "Added cron job: $schedule $command"
            ;;
        list)
            info "MORIE cron jobs:"
            crontab -l 2>/dev/null | grep -A1 "# MORIE:" | grep -v "^--$" || echo "  (none)"
            ;;
        remove)
            local pattern="$1"
            [[ -z "$pattern" ]] && { error "Usage: morie cron remove '<pattern>'"; return 1; }
            crontab -l 2>/dev/null | grep -v "$pattern" | grep -v "# MORIE:.*$pattern" | crontab -
            success "Removed cron jobs matching: $pattern"
            ;;
        help|*)
            cat << CREOF
${BOLD}morie cron${RESET} — Scheduled task management

${BOLD}COMMANDS${RESET}
    add '<schedule>' '<cmd>'  Add a cron job
    list                      List MORIE cron jobs
    remove '<pattern>'        Remove matching cron jobs

${BOLD}EXAMPLES${RESET}
    morie cron add '0 2 * * *' 'morie pipeline --all -y'
    morie cron add '0 6 * * 1' 'morie backup create'
    morie cron list
CREOF
            ;;
    esac
}

# =========================================================================
# morie docs — Documentation build and serve
# =========================================================================

cmd_docs() {
    local subcmd="${1:-build}"
    shift 2>/dev/null || true

    _require_python

    case "$subcmd" in
        build)
            info "Building Sphinx documentation..."
            mkdir -p "$ROOT/docz/source/_static" "$ROOT/docz/source/_templates"
            "$PYTHON" -m sphinx -b html "$ROOT/docz/source" "$ROOT/docz/build/html" "$@" 2>&1
            local rc=$?
            if [[ $rc -eq 0 ]]; then
                success "Docs built: $ROOT/docz/build/html/index.html"
            else
                error "Sphinx build failed (exit=$rc)"
            fi
            return $rc
            ;;
        serve)
            info "Starting documentation server..."
            local port="${1:-8000}"
            "$PYTHON" -m http.server "$port" --directory "$ROOT/docz/build/html" 2>&1 &
            local pid=$!
            success "Documentation server at http://localhost:$port (PID $pid)"
            info "Press Ctrl+C to stop."
            wait "$pid"
            ;;
        open)
            local index="$ROOT/docz/build/html/index.html"
            if [[ ! -f "$index" ]]; then
                warn "Docs not built yet. Building..."
                cmd_docs build || return 1
            fi
            if command -v open &>/dev/null; then
                open "$index"
                success "Opened in browser: $index"
            elif command -v xdg-open &>/dev/null; then
                xdg-open "$index"
                success "Opened in browser: $index"
            else
                info "Open in browser: file://$index"
            fi
            ;;
        clean)
            rm -rf "$ROOT/docz/build"
            success "Documentation build directory cleaned."
            ;;
        help|*)
            cat << DOEOF
${BOLD}morie docs${RESET} — Documentation management

${BOLD}COMMANDS${RESET}
    build [sphinx args]  Build Sphinx documentation
    serve [port]         Serve docs on localhost (default: 8000)
    open                 Open docs in browser
    clean                Remove build directory
DOEOF
            ;;
    esac
}

# =========================================================================
# morie init — Initialize a new MORIE analysis project
# =========================================================================

cmd_init() {
    local name="${1:-.}"

    if [[ "$name" != "." ]]; then
        mkdir -p "$name"
        cd "$name" || exit 1
    fi

    info "Initializing MORIE analysis project in $(pwd)"

    # Create directory structure.
    mkdir -p data/{raw,processed,public} scripts outputs figures

    # Create analysis script template.
    if [[ ! -f "analysis.py" ]]; then
        cat > "analysis.py" << 'AEOF'
#!/usr/bin/env python3
"""MORIE analysis script template."""

import pandas as pd
import morie

# Load data.
df = morie.load_dataset("data/raw/your_data.csv")

# Profile the dataset.
profile = morie.profile_dataset(df)
print(profile.summary_table())

# Run analysis modules.
# morie.run_module("descriptive-statistics", cpads_csv="data/raw/your_data.csv")
AEOF
        success "Created: analysis.py"
    fi

    # Create .gitignore.
    if [[ ! -f ".gitignore" ]]; then
        cat > ".gitignore" << 'GEOF'
# MORIE project
data/raw/*.csv
data/processed/
*.rds
*.RData
.venv/
__pycache__/
*.pyc
.Rhistory
.DS_Store
GEOF
        success "Created: .gitignore"
    fi

    # Create esmlrc.
    if [[ ! -f ".esmlrc" ]]; then
        cat > ".esmlrc" << 'RCEOF'
# Project-local MORIE configuration.
# Sourced automatically when morie is run from this directory.
# ESML_CPADS_CSV="data/raw/my_data.csv"
RCEOF
        success "Created: .esmlrc"
    fi

    echo ""
    success "MORIE project initialized."
    echo ""
    echo "  ${BOLD}Next steps:${RESET}"
    echo "    1. Place your data in data/raw/"
    echo "    2. Edit analysis.py"
    echo "    3. Run: morie chat (for interactive analysis)"
    echo "    4. Run: morie pipeline --all -y (for full pipeline)"
    echo ""
}

# =========================================================================
# morie clean — Cleanup build artifacts
# =========================================================================

cmd_clean() {
    local subcmd="${1:-all}"

    case "$subcmd" in
        all)
            info "Cleaning build artifacts..."
            rm -rf "$ROOT/docz/build" 2>/dev/null && info "  Removed Sphinx build"
            rm -rf "$ROOT/dist" "$ROOT/build" "$ROOT/*.egg-info" 2>/dev/null && info "  Removed package build"
            find "$ROOT" -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null && info "  Removed __pycache__"
            find "$ROOT" -name "*.pyc" -delete 2>/dev/null
            find "$ROOT" -name ".pytest_cache" -type d -exec rm -rf {} + 2>/dev/null && info "  Removed pytest cache"
            success "Cleanup complete."
            ;;
        docs)   rm -rf "$ROOT/docz/build" 2>/dev/null; success "Sphinx build cleaned." ;;
        cache)  find "$ROOT" -type d -name "__pycache__" -exec rm -rf {} + 2>/dev/null; success "Cache cleaned." ;;
        dist)   rm -rf "$ROOT/dist" "$ROOT/build" "$ROOT/*.egg-info" 2>/dev/null; success "Dist cleaned." ;;
        *)      echo "Usage: morie clean [all|docs|cache|dist]" ;;
    esac
}

# =========================================================================
# morie info — Detailed system and project information
# =========================================================================

cmd_info() {
    echo "${BOLD}MORIE System Information${RESET}"
    echo ""

    # System.
    printf "  %-25s %s\n" "OS" "$(uname -s) $(uname -r) ($(uname -m))"
    printf "  %-25s %s\n" "Hostname" "$(hostname 2>/dev/null || echo unknown)"
    printf "  %-25s %s\n" "User" "$(whoami)"
    printf "  %-25s %s\n" "Shell" "${SHELL:-unknown}"
    printf "  %-25s %s\n" "Terminal" "${TERM:-unknown}"
    if [[ "$(uname -s)" == "Darwin" ]]; then
        printf "  %-25s %s\n" "macOS" "$(sw_vers -productVersion 2>/dev/null || echo ?)"
        printf "  %-25s %s\n" "CPU" "$(sysctl -n machdep.cpu.brand_string 2>/dev/null || echo ?)"
        printf "  %-25s %sGB\n" "RAM" "$(($(sysctl -n hw.memsize 2>/dev/null) / 1024 / 1024 / 1024))"
    elif [[ -f /proc/cpuinfo ]]; then
        printf "  %-25s %s\n" "CPU" "$(grep 'model name' /proc/cpuinfo | head -1 | cut -d: -f2 | xargs)"
        printf "  %-25s %s\n" "RAM" "$(free -h 2>/dev/null | awk '/^Mem:/{print $2}')"
    fi
    echo ""

    # MORIE.
    printf "  %-25s %s\n" "MORIE shell version" "$ESML_SCRIPT_VERSION"
    printf "  %-25s %s\n" "MORIE wrapper" "$(wc -l < "$ROOT/morie" | tr -d ' ') lines"
    printf "  %-25s %s\n" "Project root" "$ROOT"
    if [[ -n "$(_resolve_python 2>/dev/null)" ]]; then
        local py; py="$(_resolve_python)"
        printf "  %-25s %s\n" "Python" "$("$py" --version 2>&1) ($py)"
        printf "  %-25s %s\n" "morie version" "$("$py" -c 'import morie; print(morie.__version__)' 2>/dev/null || echo 'not installed')"
    fi
    echo ""

    # Package stats.
    local mod_count line_count func_count
    mod_count="$(ls "$ROOT/py-package/morie/"*.py 2>/dev/null | wc -l | tr -d ' ')"
    line_count="$(wc -l "$ROOT/py-package/morie/"*.py 2>/dev/null | tail -1 | awk '{print $1}')"
    func_count="$(grep -c '^def \|^    def \|^class ' "$ROOT/py-package/morie/"*.py 2>/dev/null | awk -F: '{sum+=$2}END{print sum}')"
    printf "  %-25s %s modules, %s lines, %s functions\n" "Python package" "$mod_count" "$line_count" "$func_count"

    local r_count rd_count
    r_count="$(ls "$ROOT/r-package/morie/R/"*.R 2>/dev/null | wc -l | tr -d ' ')"
    rd_count="$(ls "$ROOT/r-package/morie/man/"*.Rd 2>/dev/null | wc -l | tr -d ' ')"
    printf "  %-25s %s R files, %s .Rd docs\n" "R package" "$r_count" "$rd_count"

    local test_count
    test_count="$(ls "$ROOT/tests/test_"*.py 2>/dev/null | wc -l | tr -d ' ')"
    printf "  %-25s %s test files\n" "Tests" "$test_count"
}

# =========================================================================
# morie lint — Code linting and style checks
# =========================================================================

cmd_lint() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'LINTEOF'
morie lint — Code linting and style checks

COMMANDS
    py                 Lint Python code with ruff/flake8
    r                  Lint R code with lintr
    all                Lint both Python and R
    fix                Auto-fix Python with ruff --fix
    report             Generate lint report to file
    mypy               Run mypy type checking
    check              Check without modifying (alias for py)

EXAMPLES
    morie lint py
    morie lint fix
    morie lint all
    morie lint report
LINTEOF
            ;;
        py|check)
            info "Linting Python code..."
            _require_python
            if "$PYTHON" -m ruff check --version >/dev/null 2>&1; then
                info "Running ruff..."
                "$PYTHON" -m ruff check "$ROOT/py-package/morie/" "$@" && success "No issues found." || warn "Issues found above."
            elif "$PYTHON" -m flake8 --version >/dev/null 2>&1; then
                info "Running flake8..."
                "$PYTHON" -m flake8 "$ROOT/py-package/morie/" --max-line-length=120 --count --statistics "$@" && success "No issues found." || warn "Issues found above."
            else
                warn "No linter found. Install ruff: pip install ruff"
                return 1
            fi
            ;;
        r)
            info "Linting R code..."
            if command -v Rscript >/dev/null 2>&1; then
                Rscript -e "
                    if (!requireNamespace('lintr', quietly=TRUE)) {
                        cat('lintr not installed. Run: install.packages(\"lintr\")\n')
                        quit(status=1)
                    }
                    results <- lintr::lint_dir('$ROOT/r-package/morie/R/')
                    print(results)
                    if (length(results) == 0) cat('No issues found.\n')
                " && success "R lint complete." || warn "R lint issues found."
            else
                warn "R not found. Install R to lint R code."
                return 1
            fi
            ;;
        all)
            cmd_lint py "$@"
            echo ""
            cmd_lint r "$@"
            ;;
        fix)
            info "Auto-fixing Python with ruff..."
            _require_python
            if "$PYTHON" -m ruff check --version >/dev/null 2>&1; then
                "$PYTHON" -m ruff check --fix "$ROOT/py-package/morie/" "$@"
                "$PYTHON" -m ruff format "$ROOT/py-package/morie/" "$@"
                success "Auto-fix complete."
            else
                warn "ruff not found. Install: pip install ruff"
                return 1
            fi
            ;;
        report)
            info "Generating lint report..."
            _require_python
            local report_file="$ROOT/lint-report-$(date +%Y%m%d).txt"
            {
                echo "MORIE Lint Report — $(date)"
                echo "======================================"
                echo ""
                echo "=== Python (ruff) ==="
                "$PYTHON" -m ruff check "$ROOT/py-package/morie/" --statistics 2>&1 || true
                echo ""
                echo "=== R (lintr) ==="
                if command -v Rscript >/dev/null 2>&1; then
                    Rscript -e "lintr::lint_dir('$ROOT/r-package/morie/R/')" 2>&1 || echo "lintr not available"
                else
                    echo "R not installed"
                fi
            } > "$report_file"
            success "Report saved to $report_file"
            ;;
        mypy)
            info "Running mypy type checks..."
            _require_python
            if "$PYTHON" -m mypy --version >/dev/null 2>&1; then
                "$PYTHON" -m mypy "$ROOT/py-package/morie/" --ignore-missing-imports "$@" && success "No type errors." || warn "Type errors found."
            else
                warn "mypy not found. Install: pip install mypy"
                return 1
            fi
            ;;
        *)
            error "Unknown: morie lint $subcmd"
            echo "  Run 'morie lint help' for usage." >&2
            ;;
    esac
}

# =========================================================================
# morie check — Pre-flight validation checks
# =========================================================================

cmd_check() {
    local subcmd="${1:-all}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'CHECKEOF'
morie check — Pre-flight validation checks

COMMANDS
    all                Run all checks
    git                Check clean working tree
    tests              Run pytest, report pass/fail
    docs               Build Sphinx, check warnings
    deps               Check for outdated dependencies
    data               Verify esml_datasets.db exists
    r                  Run R CMD check
    imports            Check all imports resolve

EXAMPLES
    morie check all
    morie check git
    morie check tests
CHECKEOF
            ;;
        git)
            info "Checking git status..."
            if [ -z "$(git -C "$ROOT" status --porcelain 2>/dev/null)" ]; then
                success "Working tree is clean."
            else
                warn "Uncommitted changes detected:"
                git -C "$ROOT" status --short
            fi
            ;;
        tests)
            info "Running Python tests..."
            _require_esml_package
            "$PYTHON" -m pytest "$ROOT/tests/" -q --tb=line "$@"
            local rc=$?
            if [ $rc -eq 0 ]; then
                success "All tests passed."
            else
                error "Some tests failed (exit code $rc)."
            fi
            ;;
        docs)
            info "Building Sphinx docs (checking for warnings)..."
            _require_python
            "$PYTHON" -m sphinx -b html -W "$ROOT/docz/source" "$ROOT/docz/build/html" "$@" 2>&1
            if [ $? -eq 0 ]; then
                success "Docs build clean (no warnings)."
            else
                warn "Doc build had warnings or errors."
            fi
            ;;
        deps)
            info "Checking for outdated dependencies..."
            _require_python
            "$PYTHON" -m pip list --outdated --format=columns 2>/dev/null
            success "Dependency check complete."
            ;;
        data)
            info "Checking built-in datasets..."
            local db_py="$ROOT/py-package/morie/data/esml_datasets.db"
            local db_r="$ROOT/r-package/morie/inst/extdata/esml_datasets.db"
            if [ -f "$db_py" ]; then
                local size_py
                size_py="$(du -h "$db_py" | cut -f1)"
                success "Python DB: $db_py ($size_py)"
            else
                warn "Python DB not found: $db_py"
            fi
            if [ -f "$db_r" ]; then
                local size_r
                size_r="$(du -h "$db_r" | cut -f1)"
                success "R DB: $db_r ($size_r)"
            else
                warn "R DB not found: $db_r"
            fi
            # Check table count
            if command -v sqlite3 >/dev/null 2>&1 && [ -f "$db_py" ]; then
                local tcount
                tcount="$(sqlite3 "$db_py" "SELECT count(*) FROM sqlite_master WHERE type='table';" 2>/dev/null || echo '?')"
                info "Tables in DB: $tcount"
            fi
            ;;
        r)
            info "Running R CMD check..."
            if command -v R >/dev/null 2>&1; then
                R CMD check "$ROOT/r-package/morie" --no-manual --no-build-vignettes "$@" 2>&1
                success "R check complete."
            else
                warn "R not found."
            fi
            ;;
        imports)
            info "Checking Python imports..."
            _require_python
            "$PYTHON" -c "
import importlib, pkgutil, morie, sys
fails = []
for mi in pkgutil.iter_modules(morie.__path__):
    try:
        importlib.import_module(f'morie.{mi.name}')
    except Exception as e:
        fails.append((mi.name, str(e)))
if fails:
    for name, err in fails:
        print(f'  FAIL: morie.{name}: {err}')
    sys.exit(1)
else:
    print(f'  All {len(list(pkgutil.iter_modules(morie.__path__)))} modules import OK.')
"
            if [ $? -eq 0 ]; then
                success "All imports clean."
            else
                error "Some imports failed."
            fi
            ;;
        all)
            local failed=0
            echo ""
            info "=== Pre-flight Checks ==="
            echo ""
            cmd_check git || failed=$((failed + 1))
            echo ""
            cmd_check data || failed=$((failed + 1))
            echo ""
            cmd_check imports || failed=$((failed + 1))
            echo ""
            cmd_check tests || failed=$((failed + 1))
            echo ""
            if [ $failed -eq 0 ]; then
                success "All pre-flight checks passed!"
            else
                warn "$failed check(s) had issues."
            fi
            ;;
        *)
            error "Unknown: morie check $subcmd"
            echo "  Run 'morie check help' for usage." >&2
            ;;
    esac
}

# =========================================================================
# morie benchmark — Performance timing and benchmarks
# =========================================================================

cmd_benchmark() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'BENCHEOF'
morie benchmark — Performance benchmarks

COMMANDS
    import             Time 'import morie'
    startup            Time 'morie --version'
    pytest             Time full pytest run
    modules            Time each analysis module
    all                Run all benchmarks

EXAMPLES
    morie benchmark import
    morie benchmark all
BENCHEOF
            ;;
        import)
            info "Timing 'import morie'..."
            _require_python
            local start end elapsed
            start="$(python3 -c 'import time; print(time.time())')"
            "$PYTHON" -c "import morie" 2>/dev/null
            end="$(python3 -c 'import time; print(time.time())')"
            elapsed="$(python3 -c "print(f'{$end - $start:.3f}')")"
            success "import morie: ${elapsed}s"
            ;;
        startup)
            info "Timing 'morie --version'..."
            local start end elapsed
            start="$(python3 -c 'import time; print(time.time())')"
            "$0" version >/dev/null 2>&1
            end="$(python3 -c 'import time; print(time.time())')"
            elapsed="$(python3 -c "print(f'{$end - $start:.3f}')")"
            success "morie --version: ${elapsed}s"
            ;;
        pytest)
            info "Timing pytest..."
            _require_esml_package
            local start end elapsed
            start="$(python3 -c 'import time; print(time.time())')"
            "$PYTHON" -m pytest "$ROOT/tests/" -q --tb=no >/dev/null 2>&1
            end="$(python3 -c 'import time; print(time.time())')"
            elapsed="$(python3 -c "print(f'{$end - $start:.3f}')")"
            success "pytest: ${elapsed}s"
            ;;
        modules)
            info "Timing module imports..."
            _require_python
            "$PYTHON" -c "
import time, importlib, pkgutil, morie
results = []
for mi in sorted(pkgutil.iter_modules(morie.__path__), key=lambda x: x.name):
    t0 = time.time()
    try:
        importlib.import_module(f'morie.{mi.name}')
        elapsed = time.time() - t0
        results.append((mi.name, elapsed))
    except Exception:
        results.append((mi.name, -1))
results.sort(key=lambda x: -x[1])
for name, t in results[:20]:
    if t < 0:
        print(f'  {name:30s}  FAILED')
    else:
        print(f'  {name:30s}  {t:.4f}s')
total = sum(t for _, t in results if t >= 0)
print(f'  {\"TOTAL\":30s}  {total:.4f}s')
"
            ;;
        all)
            cmd_benchmark import
            cmd_benchmark startup
            cmd_benchmark modules
            cmd_benchmark pytest
            ;;
        *)
            error "Unknown: morie benchmark $subcmd"
            ;;
    esac
}

# =========================================================================
# morie db — SQLite database introspection
# =========================================================================

cmd_db() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    local db_path="$ROOT/py-package/morie/data/esml_datasets.db"

    case "$subcmd" in
        help|-h|--help)
            cat << 'DBEOF'
morie db — SQLite database introspection

COMMANDS
    tables             List all tables
    schema <table>     Show CREATE TABLE for a table
    rows <table>       Count rows in a table
    query <sql>        Run a SELECT query
    info               DB file info (size, tables, rows)
    export <table>     Export table to CSV
    compare            Compare builtin DB vs cache DB
    columns <table>    List columns with types
    sample <table> [n] Preview first n rows (default 5)
    stats              Table sizes and row counts

EXAMPLES
    morie db tables
    morie db schema cpads_2021_2022
    morie db query "SELECT count(*) FROM cpads_2021_2022"
    morie db sample cpads_2021_2022 10
    morie db export cpads_2021_2022
DBEOF
            ;;
        tables)
            if [ ! -f "$db_path" ]; then
                error "Database not found: $db_path"
                return 1
            fi
            info "Tables in esml_datasets.db:"
            sqlite3 "$db_path" ".tables" 2>/dev/null || { error "sqlite3 not found."; return 1; }
            ;;
        schema)
            local table="$1"
            [ -z "$table" ] && { error "Usage: morie db schema <table>"; return 1; }
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            sqlite3 "$db_path" ".schema $table" 2>/dev/null
            ;;
        rows)
            local table="$1"
            [ -z "$table" ] && { error "Usage: morie db rows <table>"; return 1; }
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            local count
            count="$(sqlite3 "$db_path" "SELECT count(*) FROM \"$table\";" 2>/dev/null)"
            info "$table: $count rows"
            ;;
        query)
            local sql="$*"
            [ -z "$sql" ] && { error "Usage: morie db query <sql>"; return 1; }
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            sqlite3 -header -column "$db_path" "$sql" 2>/dev/null || error "Query failed."
            ;;
        info)
            [ ! -f "$db_path" ] && { error "Database not found: $db_path"; return 1; }
            local fsize
            fsize="$(du -h "$db_path" | cut -f1)"
            info "Database: $db_path"
            info "Size: $fsize"
            local tcount
            tcount="$(sqlite3 "$db_path" "SELECT count(*) FROM sqlite_master WHERE type='table';" 2>/dev/null || echo '?')"
            info "Tables: $tcount"
            info ""
            info "Table row counts:"
            sqlite3 "$db_path" "
                SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;
            " 2>/dev/null | while read -r tbl; do
                local rc
                rc="$(sqlite3 "$db_path" "SELECT count(*) FROM \"$tbl\";" 2>/dev/null || echo '?')"
                printf "  %-40s %s rows\n" "$tbl" "$rc"
            done
            ;;
        columns)
            local table="$1"
            [ -z "$table" ] && { error "Usage: morie db columns <table>"; return 1; }
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            sqlite3 "$db_path" "PRAGMA table_info(\"$table\");" 2>/dev/null | while IFS='|' read -r cid name type notnull dflt pk; do
                printf "  %-30s %-15s %s\n" "$name" "$type" "$([ "$pk" = "1" ] && echo 'PK' || true)"
            done
            ;;
        sample)
            local table="$1"
            local n="${2:-5}"
            [ -z "$table" ] && { error "Usage: morie db sample <table> [n]"; return 1; }
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            sqlite3 -header -column "$db_path" "SELECT * FROM \"$table\" LIMIT $n;" 2>/dev/null
            ;;
        export)
            local table="$1"
            [ -z "$table" ] && { error "Usage: morie db export <table>"; return 1; }
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            local outfile="${table}.csv"
            sqlite3 -header -csv "$db_path" "SELECT * FROM \"$table\";" > "$outfile" 2>/dev/null
            success "Exported to $outfile"
            ;;
        compare)
            local cache_db="$ROOT/data/cache/esml_cache.db"
            info "Built-in DB: $db_path"
            if [ -f "$db_path" ]; then
                info "  Size: $(du -h "$db_path" | cut -f1)"
                info "  Tables: $(sqlite3 "$db_path" "SELECT count(*) FROM sqlite_master WHERE type='table';" 2>/dev/null || echo '?')"
            else
                warn "  Not found"
            fi
            info ""
            info "Cache DB: $cache_db"
            if [ -f "$cache_db" ]; then
                info "  Size: $(du -h "$cache_db" | cut -f1)"
                info "  Tables: $(sqlite3 "$cache_db" "SELECT count(*) FROM sqlite_master WHERE type='table';" 2>/dev/null || echo '?')"
            else
                info "  Not found (no cache yet)"
            fi
            ;;
        stats)
            [ ! -f "$db_path" ] && { error "Database not found."; return 1; }
            info "Table statistics:"
            printf "  %-40s %12s %12s\n" "TABLE" "ROWS" "COLUMNS"
            printf "  %-40s %12s %12s\n" "-----" "----" "-------"
            sqlite3 "$db_path" "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;" 2>/dev/null | while read -r tbl; do
                local rc cc
                rc="$(sqlite3 "$db_path" "SELECT count(*) FROM \"$tbl\";" 2>/dev/null || echo '?')"
                cc="$(sqlite3 "$db_path" "SELECT count(*) FROM pragma_table_info('$tbl');" 2>/dev/null || echo '?')"
                printf "  %-40s %12s %12s\n" "$tbl" "$rc" "$cc"
            done
            ;;
        *)
            error "Unknown: morie db $subcmd"
            echo "  Run 'morie db help' for usage." >&2
            ;;
    esac
}

# =========================================================================
# morie tree-view — Project directory tree
# =========================================================================

cmd_tree_view() {
    local subcmd="${1:-default}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'TREEEOF'
morie tree-view — Project directory tree

COMMANDS
    default            Show project tree (depth 3)
    code               Show code directories only
    data               Show data directories only
    docs               Show documentation directories
    full               Full tree (depth 5)

OPTIONS
    --depth N          Set tree depth (default 3)
    --dirs             Directories only

EXAMPLES
    morie tree-view
    morie tree-view code
    morie tree-view data
    morie tree-view --depth 4
TREEEOF
            ;;
        code)
            info "Code directories:"
            if command -v tree >/dev/null 2>&1; then
                tree -L 2 -d "$ROOT/py-package/" "$ROOT/r-package/" 2>/dev/null || find "$ROOT/py-package" "$ROOT/r-package" -type d -maxdepth 2 2>/dev/null | head -40
            else
                find "$ROOT/py-package" "$ROOT/r-package" -type d -maxdepth 2 2>/dev/null | head -40
            fi
            ;;
        data)
            info "Data directories:"
            if command -v tree >/dev/null 2>&1; then
                tree -L 2 "$ROOT/data/" 2>/dev/null || find "$ROOT/data" -maxdepth 2 2>/dev/null | head -40
            else
                find "$ROOT/data" -maxdepth 2 2>/dev/null | head -40
            fi
            ;;
        docs)
            info "Documentation directories:"
            if command -v tree >/dev/null 2>&1; then
                tree -L 2 -d "$ROOT/docz/" 2>/dev/null || find "$ROOT/docz" -type d -maxdepth 2 2>/dev/null | head -30
            else
                find "$ROOT/docz" -type d -maxdepth 2 2>/dev/null | head -30
            fi
            ;;
        full)
            if command -v tree >/dev/null 2>&1; then
                tree -L 5 -I '.venv|__pycache__|node_modules|.git|*.pyc' "$ROOT" 2>/dev/null | head -100
            else
                find "$ROOT" -maxdepth 5 -not -path '*/.venv/*' -not -path '*/__pycache__/*' -not -path '*/.git/*' 2>/dev/null | head -100
            fi
            ;;
        default|*)
            local depth=3
            for arg in "$@"; do
                case "$arg" in
                    --depth) shift; depth="${1:-3}"; shift 2>/dev/null || true ;;
                    --dirs) local dirs_only="-d" ;;
                esac
            done
            if command -v tree >/dev/null 2>&1; then
                tree -L "$depth" ${dirs_only:-} -I '.venv|__pycache__|node_modules|.git|*.pyc' "$ROOT" 2>/dev/null | head -80
            else
                find "$ROOT" -maxdepth "$depth" -not -path '*/.venv/*' -not -path '*/__pycache__/*' -not -path '*/.git/*' 2>/dev/null | sort | head -80
            fi
            ;;
    esac
}

# =========================================================================
# morie hash — File checksums
# =========================================================================

cmd_hash() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'HASHEOF'
morie hash — SHA256 checksums for data integrity

COMMANDS
    db                 Hash esml_datasets.db
    data               Hash all CSV files in data/
    all                Hash everything
    verify <file>      Verify a file's hash
    file <path>        Hash a specific file

EXAMPLES
    morie hash db
    morie hash data
    morie hash file data/files/csv/survey/cpads-2021-2022-pumf2.csv
HASHEOF
            ;;
        db)
            local db_path="$ROOT/py-package/morie/data/esml_datasets.db"
            if [ -f "$db_path" ]; then
                shasum -a 256 "$db_path"
            else
                warn "DB not found: $db_path"
            fi
            ;;
        data)
            info "Hashing CSV files in data/files/..."
            find "$ROOT/data/files" -name "*.csv" -type f 2>/dev/null | sort | while read -r f; do
                shasum -a 256 "$f"
            done
            ;;
        file)
            local file="$1"
            [ -z "$file" ] && { error "Usage: morie hash file <path>"; return 1; }
            [ ! -f "$file" ] && { error "File not found: $file"; return 1; }
            shasum -a 256 "$file"
            ;;
        all)
            cmd_hash db
            echo ""
            cmd_hash data
            ;;
        verify)
            local file="$1"
            [ -z "$file" ] && { error "Usage: morie hash verify <file>"; return 1; }
            [ ! -f "$file" ] && { error "File not found: $file"; return 1; }
            local hash
            hash="$(shasum -a 256 "$file" | cut -d' ' -f1)"
            info "SHA256: $hash"
            info "File: $file"
            ;;
        *)
            error "Unknown: morie hash $subcmd"
            ;;
    esac
}

# =========================================================================
# morie size — Disk usage reports
# =========================================================================

cmd_size() {
    local subcmd="${1:-all}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SIZEEOF'
morie size — Disk usage reports

COMMANDS
    all                Show all sizes
    data               Data directories
    db                 Database files
    venv               Python virtual environment
    cache              Cache directory
    code               Source code
    total              Total project size

EXAMPLES
    morie size
    morie size data
    morie size db
SIZEEOF
            ;;
        data)
            info "Data directory sizes:"
            for d in files public private datasets cache; do
                local p="$ROOT/data/$d"
                if [ -d "$p" ]; then
                    printf "  %-25s %s\n" "$d/" "$(du -sh "$p" 2>/dev/null | cut -f1)"
                fi
            done
            ;;
        db)
            info "Database files:"
            for f in "$ROOT/py-package/morie/data/esml_datasets.db" "$ROOT/r-package/morie/inst/extdata/esml_datasets.db" "$ROOT/data/cache/esml_cache.db"; do
                if [ -f "$f" ]; then
                    printf "  %-60s %s\n" "${f#$ROOT/}" "$(du -h "$f" | cut -f1)"
                fi
            done
            ;;
        venv)
            local venv_path="$ROOT/.venv"
            if [ -d "$venv_path" ]; then
                info "Virtual environment: $(du -sh "$venv_path" 2>/dev/null | cut -f1)"
            else
                info "No .venv found."
            fi
            ;;
        cache)
            local cache_path="$ROOT/data/cache"
            if [ -d "$cache_path" ]; then
                info "Cache: $(du -sh "$cache_path" 2>/dev/null | cut -f1)"
            else
                info "No cache directory."
            fi
            ;;
        code)
            info "Source code sizes:"
            printf "  %-25s %s\n" "py-package/morie/" "$(du -sh "$ROOT/py-package/morie/" 2>/dev/null | cut -f1)"
            printf "  %-25s %s\n" "r-package/morie/" "$(du -sh "$ROOT/r-package/morie/" 2>/dev/null | cut -f1)"
            printf "  %-25s %s\n" "scripts/" "$(du -sh "$ROOT/scripts/" 2>/dev/null | cut -f1)"
            printf "  %-25s %s\n" "tests/" "$(du -sh "$ROOT/tests/" 2>/dev/null | cut -f1)"
            ;;
        total)
            info "Total project size: $(du -sh "$ROOT" 2>/dev/null | cut -f1)"
            ;;
        all)
            cmd_size code
            echo ""
            cmd_size data
            echo ""
            cmd_size db
            echo ""
            cmd_size venv
            echo ""
            cmd_size cache
            echo ""
            cmd_size total
            ;;
        *)
            error "Unknown: morie size $subcmd"
            ;;
    esac
}

# =========================================================================
# morie path — Show resolved project paths
# =========================================================================

cmd_path() {
    local subcmd="${1:-all}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'PATHEOF'
morie path — Show resolved project paths

COMMANDS
    all                Show all paths
    root               Project root
    python             Python interpreter
    r                  R interpreter
    db                 Database file
    venv               Virtual environment
    cache              Cache directory
    config             Config file

EXAMPLES
    morie path
    morie path python
    morie path db
PATHEOF
            ;;
        root)
            echo "$ROOT"
            ;;
        python)
            _resolve_python
            echo "$PYTHON"
            ;;
        r)
            if command -v Rscript >/dev/null 2>&1; then
                echo "$(command -v Rscript)"
                Rscript --version 2>&1 | head -1
            else
                echo "R not found"
            fi
            ;;
        db)
            echo "$ROOT/py-package/morie/data/esml_datasets.db"
            ;;
        venv)
            echo "$ROOT/.venv"
            ;;
        cache)
            echo "$ROOT/data/cache"
            ;;
        config)
            echo "${ESML_HOME:-$HOME/.morie}/config"
            ;;
        all)
            printf "  %-20s %s\n" "ROOT" "$ROOT"
            printf "  %-20s %s\n" "ESML_HOME" "${ESML_HOME:-$HOME/.morie}"
            _resolve_python 2>/dev/null
            printf "  %-20s %s\n" "PYTHON" "${PYTHON:-not found}"
            printf "  %-20s %s\n" "R" "$(command -v Rscript 2>/dev/null || echo 'not found')"
            printf "  %-20s %s\n" "VENV" "$ROOT/.venv"
            printf "  %-20s %s\n" "DB (Python)" "$ROOT/py-package/morie/data/esml_datasets.db"
            printf "  %-20s %s\n" "DB (R)" "$ROOT/r-package/morie/inst/extdata/esml_datasets.db"
            printf "  %-20s %s\n" "CACHE" "$ROOT/data/cache"
            printf "  %-20s %s\n" "TESTS" "$ROOT/tests"
            printf "  %-20s %s\n" "DOCS" "$ROOT/docz/source"
            printf "  %-20s %s\n" "SCRIPTS" "$ROOT/scripts"
            ;;
        *)
            error "Unknown: morie path $subcmd"
            ;;
    esac
}

# =========================================================================
# morie changelog — Git changelog generation
# =========================================================================

cmd_changelog() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'CLOGEOF'
morie changelog — Git changelog generation

COMMANDS
    recent [n]         Last n commits (default 20)
    since-tag <tag>    Changes since a git tag
    since-date <date>  Changes since a date
    authors            List contributors
    save [file]        Save changelog to file

EXAMPLES
    morie changelog recent
    morie changelog recent 50
    morie changelog since-date 2026-01-01
    morie changelog authors
    morie changelog save CHANGELOG.md
CLOGEOF
            ;;
        recent)
            local n="${1:-20}"
            git -C "$ROOT" log --oneline --no-decorate -n "$n" 2>/dev/null
            ;;
        since-tag)
            local tag="$1"
            [ -z "$tag" ] && { error "Usage: morie changelog since-tag <tag>"; return 1; }
            git -C "$ROOT" log --oneline "$tag..HEAD" 2>/dev/null
            ;;
        since-date)
            local since="$1"
            [ -z "$since" ] && { error "Usage: morie changelog since-date <YYYY-MM-DD>"; return 1; }
            git -C "$ROOT" log --oneline --since="$since" 2>/dev/null
            ;;
        authors)
            info "Contributors:"
            git -C "$ROOT" shortlog -sn --all 2>/dev/null
            ;;
        save)
            local outfile="${1:-CHANGELOG.md}"
            {
                echo "# Changelog"
                echo ""
                echo "Generated: $(date)"
                echo ""
                git -C "$ROOT" log --oneline --no-decorate 2>/dev/null
            } > "$outfile"
            success "Saved to $outfile"
            ;;
        *)
            error "Unknown: morie changelog $subcmd"
            ;;
    esac
}

# =========================================================================
# morie graph — Dependency and module graphs
# =========================================================================

cmd_graph() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'GRAPHEOF'
morie graph — Module and dependency graphs

COMMANDS
    modules            Show MODULE_SPECS dependencies
    imports            Show Python import graph
    data               Show data pipeline flow
    packages           Show installed packages

EXAMPLES
    morie graph modules
    morie graph imports
GRAPHEOF
            ;;
        modules)
            info "MORIE Analysis Module Graph:"
            _require_python
            "$PYTHON" -c "
from morie.modules import MODULE_SPECS
for spec in MODULE_SPECS:
    deps = ', '.join(spec.depends_on) if hasattr(spec, 'depends_on') and spec.depends_on else 'none'
    print(f'  {spec.name:40s} → [{deps}]')
" 2>/dev/null || "$PYTHON" -c "
from morie.modules import MODULE_SPECS
for spec in MODULE_SPECS:
    print(f'  {spec.name}')
"
            ;;
        imports)
            info "Python import graph (morie.*):"
            _require_python
            "$PYTHON" -c "
import ast, os, pkgutil, morie
pkg_dir = os.path.dirname(morie.__file__)
for mi in sorted(pkgutil.iter_modules(morie.__path__), key=lambda x: x.name):
    if mi.name.startswith('_'): continue
    fp = os.path.join(pkg_dir, mi.name + '.py')
    if not os.path.isfile(fp): continue
    try:
        tree = ast.parse(open(fp).read())
        imports = set()
        for node in ast.walk(tree):
            if isinstance(node, ast.ImportFrom) and node.module and node.module.startswith('morie') or (isinstance(node, ast.ImportFrom) and node.module and node.level > 0):
                m = node.module or ''
                if node.level > 0:
                    m = 'morie.' + (node.module or '')
                imports.add(m.replace('morie.', ''))
        if imports:
            print(f'  {mi.name:25s} → {\" \".join(sorted(imports))}')
    except Exception:
        pass
"
            ;;
        data)
            info "Data pipeline flow:"
            echo "  raw CSV/XLSX → scripts/build_esml_db.py → esml_datasets.db"
            echo "  esml_datasets.db → data.py load_dataset() → DataFrame"
            echo "  DataFrame → modules.py run_module() → CSV outputs"
            echo "  CSV outputs → data/public/outputs/"
            ;;
        packages)
            info "Installed Python packages:"
            _require_python
            "$PYTHON" -m pip list --format=columns 2>/dev/null | head -40
            ;;
        *)
            error "Unknown: morie graph $subcmd"
            ;;
    esac
}

# =========================================================================
# morie fmt — Code formatting
# =========================================================================

cmd_fmt() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'FMTEOF'
morie fmt — Code formatting

COMMANDS
    py                 Format Python with ruff/black + isort
    r                  Format R with styler
    check              Check formatting without changing files
    diff               Show what would change

EXAMPLES
    morie fmt py
    morie fmt check
    morie fmt r
FMTEOF
            ;;
        py)
            info "Formatting Python code..."
            _require_python
            if "$PYTHON" -m ruff format --version >/dev/null 2>&1; then
                "$PYTHON" -m ruff format "$ROOT/py-package/morie/" "$@"
                "$PYTHON" -m ruff check --fix --select I "$ROOT/py-package/morie/" 2>/dev/null || true
                success "Formatted with ruff."
            elif "$PYTHON" -m black --version >/dev/null 2>&1; then
                "$PYTHON" -m black "$ROOT/py-package/morie/" "$@"
                "$PYTHON" -m isort "$ROOT/py-package/morie/" 2>/dev/null || true
                success "Formatted with black + isort."
            else
                warn "No formatter found. Install: pip install ruff"
            fi
            ;;
        r)
            info "Formatting R code..."
            if command -v Rscript >/dev/null 2>&1; then
                Rscript -e "
                    if (!requireNamespace('styler', quietly=TRUE)) {
                        cat('styler not installed. Run: install.packages(\"styler\")\n')
                        quit(status=1)
                    }
                    styler::style_dir('$ROOT/r-package/morie/R/')
                " && success "R code formatted." || warn "styler had issues."
            else
                warn "R not found."
            fi
            ;;
        check)
            info "Checking formatting..."
            _require_python
            if "$PYTHON" -m ruff format --version >/dev/null 2>&1; then
                "$PYTHON" -m ruff format --check "$ROOT/py-package/morie/" "$@" && success "Formatting OK." || warn "Formatting issues found."
            elif "$PYTHON" -m black --version >/dev/null 2>&1; then
                "$PYTHON" -m black --check "$ROOT/py-package/morie/" "$@" && success "Formatting OK." || warn "Formatting issues found."
            fi
            ;;
        diff)
            info "Format diff..."
            _require_python
            if "$PYTHON" -m ruff format --version >/dev/null 2>&1; then
                "$PYTHON" -m ruff format --diff "$ROOT/py-package/morie/" "$@"
            elif "$PYTHON" -m black --version >/dev/null 2>&1; then
                "$PYTHON" -m black --diff "$ROOT/py-package/morie/" "$@"
            fi
            ;;
        *)
            error "Unknown: morie fmt $subcmd"
            ;;
    esac
}

# =========================================================================
# morie sec — Security audit
# =========================================================================

cmd_sec() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SECEOF'
morie sec — Security auditing

COMMANDS
    deps               Audit dependencies (pip-audit)
    code               Scan code (bandit)
    secrets            Check for exposed secrets
    all                Run all security checks

EXAMPLES
    morie sec deps
    morie sec code
    morie sec secrets
    morie sec all
SECEOF
            ;;
        deps)
            info "Auditing dependencies..."
            _require_python
            if "$PYTHON" -m pip_audit --version >/dev/null 2>&1; then
                "$PYTHON" -m pip_audit "$@"
            elif "$PYTHON" -m safety --version >/dev/null 2>&1; then
                "$PYTHON" -m safety check "$@"
            else
                warn "No audit tool. Install: pip install pip-audit"
                "$PYTHON" -m pip list --format=json 2>/dev/null | python3 -c "
import json, sys
pkgs = json.load(sys.stdin)
print(f'Installed packages: {len(pkgs)}')
print('Install pip-audit for vulnerability scanning.')
"
            fi
            ;;
        code)
            info "Scanning code..."
            _require_python
            if "$PYTHON" -m bandit --version >/dev/null 2>&1; then
                "$PYTHON" -m bandit -r "$ROOT/py-package/morie/" -ll "$@" && success "No issues." || warn "Issues found."
            else
                warn "bandit not found. Install: pip install bandit"
            fi
            ;;
        secrets)
            info "Checking for exposed secrets..."
            local found=0
            for pattern in "API_KEY\s*=" "SECRET\s*=" "PASSWORD\s*=" "TOKEN\s*=" "PRIVATE_KEY"; do
                local matches
                matches="$(grep -rn "$pattern" "$ROOT/py-package/morie/" --include="*.py" 2>/dev/null | grep -v "\.pyc" | grep -v "__pycache__" | grep -v "os.environ" | grep -v "getenv" | head -5)"
                if [ -n "$matches" ]; then
                    warn "Potential secret pattern '$pattern':"
                    echo "$matches"
                    found=$((found + 1))
                fi
            done
            if [ $found -eq 0 ]; then
                success "No exposed secrets found."
            fi
            ;;
        all)
            cmd_sec secrets
            echo ""
            cmd_sec deps
            echo ""
            cmd_sec code
            ;;
        *)
            error "Unknown: morie sec $subcmd"
            ;;
    esac
}

# =========================================================================
# morie sync — Data synchronization
# =========================================================================

cmd_sync() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SYNCEOF'
morie sync — Data synchronization

COMMANDS
    status             Show sync status (file counts/sizes)
    push <dest>        Push data to destination (rsync)
    pull <source>      Pull data from source (rsync)
    diff <path>        Compare local vs remote directory

EXAMPLES
    morie sync status
    morie sync push user@server:/data/morie/
    morie sync pull user@server:/data/morie/
SYNCEOF
            ;;
        status)
            info "Local data status:"
            for d in "$ROOT/data/files" "$ROOT/data/public" "$ROOT/data/private" "$ROOT/data/datasets"; do
                if [ -d "$d" ]; then
                    local fc
                    fc="$(find "$d" -type f 2>/dev/null | wc -l | tr -d ' ')"
                    printf "  %-50s %s files, %s\n" "${d#$ROOT/}" "$fc" "$(du -sh "$d" 2>/dev/null | cut -f1)"
                fi
            done
            ;;
        push)
            local dest="$1"
            [ -z "$dest" ] && { error "Usage: morie sync push <destination>"; return 1; }
            info "Syncing data/ to $dest..."
            rsync -avz --progress "$ROOT/data/" "$dest" "$@"
            success "Push complete."
            ;;
        pull)
            local source="$1"
            [ -z "$source" ] && { error "Usage: morie sync pull <source>"; return 1; }
            info "Pulling from $source..."
            rsync -avz --progress "$source" "$ROOT/data/" "$@"
            success "Pull complete."
            ;;
        diff)
            local path="$1"
            [ -z "$path" ] && { error "Usage: morie sync diff <remote_path>"; return 1; }
            rsync -avzn "$ROOT/data/" "$path" 2>/dev/null
            ;;
        *)
            error "Unknown: morie sync $subcmd"
            ;;
    esac
}

# =========================================================================
# morie migrate — Version migration helper
# =========================================================================

cmd_migrate() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'MIGEOF'
morie migrate — Version migration helper

COMMANDS
    check              Check if migration is needed
    status             Show current version state
    db                 Migrate database schema
    config             Migrate configuration files

EXAMPLES
    morie migrate check
    morie migrate status
MIGEOF
            ;;
        check)
            info "Checking migration status..."
            _require_python
            "$PYTHON" -c "
import morie
print(f'Current version: {morie.__version__}')
print('No pending migrations.')
" 2>/dev/null || warn "Could not check version."
            ;;
        status)
            info "Version state:"
            _require_python
            "$PYTHON" -c "
import morie, sys
print(f'  Package version: {morie.__version__}')
print(f'  Python: {sys.version.split()[0]}')
try:
    from morie.data import esml_db
    print(f'  Database: {esml_db()}')
except Exception:
    print('  Database: not configured')
"
            ;;
        db)
            info "Database migration..."
            warn "No pending database migrations."
            ;;
        config)
            info "Config migration..."
            warn "No pending config migrations."
            ;;
        *)
            error "Unknown: morie migrate $subcmd"
            ;;
    esac
}

# =========================================================================
# morie profile-code — Python profiling
# =========================================================================

cmd_profile_code() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'PROFEOF'
morie profile-code — Python profiling

COMMANDS
    run <script>       Profile a Python script
    import             Profile morie import
    top [n]            Show top n slowest functions
    module <name>      Profile a specific module

EXAMPLES
    morie profile-code run scripts/build_esml_db.py
    morie profile-code import
    morie profile-code top 20
PROFEOF
            ;;
        run)
            local script="$1"
            [ -z "$script" ] && { error "Usage: morie profile-code run <script>"; return 1; }
            [ ! -f "$script" ] && { error "File not found: $script"; return 1; }
            info "Profiling $script..."
            _require_python
            "$PYTHON" -m cProfile -s cumulative "$script" 2>&1 | head -40
            ;;
        import)
            info "Profiling morie import..."
            _require_python
            "$PYTHON" -c "
import cProfile, pstats, io
pr = cProfile.Profile()
pr.enable()
import morie
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats(20)
print(s.getvalue())
"
            ;;
        top)
            local n="${1:-20}"
            info "Top $n slowest functions from last profile:"
            warn "Run 'morie profile-code run <script>' first."
            ;;
        module)
            local mod="$1"
            [ -z "$mod" ] && { error "Usage: morie profile-code module <name>"; return 1; }
            info "Profiling morie.$mod import..."
            _require_python
            "$PYTHON" -c "
import cProfile, pstats, io
pr = cProfile.Profile()
pr.enable()
import importlib
importlib.import_module('morie.$mod')
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats(15)
print(s.getvalue())
" 2>&1
            ;;
        *)
            error "Unknown: morie profile-code $subcmd"
            ;;
    esac
}

# =========================================================================
# morie serve — Local HTTP server
# =========================================================================

cmd_serve() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SERVEEOF'
morie serve — Local HTTP server

COMMANDS
    docs [port]        Serve Sphinx HTML docs (default port 8080)
    data [port]        Serve data directory (default port 8081)

EXAMPLES
    morie serve docs
    morie serve docs 9090
    morie serve data
SERVEEOF
            ;;
        docs)
            local port="${1:-8080}"
            local docdir="$ROOT/docz/build/html"
            if [ ! -d "$docdir" ]; then
                warn "Docs not built. Building first..."
                _require_python
                "$PYTHON" -m sphinx -b html "$ROOT/docz/source" "$docdir" 2>/dev/null
            fi
            info "Serving docs at http://localhost:$port"
            info "Press Ctrl+C to stop."
            _require_python
            "$PYTHON" -m http.server "$port" --directory "$docdir"
            ;;
        data)
            local port="${1:-8081}"
            info "Serving data at http://localhost:$port"
            info "Press Ctrl+C to stop."
            _require_python
            "$PYTHON" -m http.server "$port" --directory "$ROOT/data"
            ;;
        *)
            error "Unknown: morie serve $subcmd"
            ;;
    esac
}

# =========================================================================
# morie watch — File watcher for auto-rebuild
# =========================================================================

cmd_watch() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'WATCHEOF'
morie watch — File watcher for auto-rebuild

COMMANDS
    docs               Rebuild Sphinx docs on change
    tests              Re-run pytest on change
    lint               Re-lint on change

Requires: fswatch (macOS: brew install fswatch)

EXAMPLES
    morie watch docs
    morie watch tests
WATCHEOF
            ;;
        docs)
            if ! command -v fswatch >/dev/null 2>&1; then
                error "fswatch not found. Install: brew install fswatch"
                return 1
            fi
            info "Watching docs for changes... (Ctrl+C to stop)"
            _require_python
            fswatch -o "$ROOT/docz/source" | while read -r _; do
                info "Change detected, rebuilding docs..."
                "$PYTHON" -m sphinx -b html "$ROOT/docz/source" "$ROOT/docz/build/html" 2>/dev/null && success "Docs rebuilt." || warn "Build failed."
            done
            ;;
        tests)
            if ! command -v fswatch >/dev/null 2>&1; then
                error "fswatch not found. Install: brew install fswatch"
                return 1
            fi
            info "Watching code for changes... (Ctrl+C to stop)"
            _require_python
            fswatch -o "$ROOT/py-package/morie" "$ROOT/tests" | while read -r _; do
                info "Change detected, running tests..."
                "$PYTHON" -m pytest "$ROOT/tests/" -q --tb=line 2>&1 | tail -3
            done
            ;;
        lint)
            if ! command -v fswatch >/dev/null 2>&1; then
                error "fswatch not found. Install: brew install fswatch"
                return 1
            fi
            info "Watching code for changes... (Ctrl+C to stop)"
            _require_python
            fswatch -o "$ROOT/py-package/morie" | while read -r _; do
                info "Change detected, linting..."
                cmd_lint py 2>&1 | tail -5
            done
            ;;
        *)
            error "Unknown: morie watch $subcmd"
            ;;
    esac
}

# =========================================================================
# morie diff-outputs — Compare analysis outputs between runs
# =========================================================================

cmd_diff_outputs() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'DIFFEOF'
morie diff-outputs — Compare analysis outputs

COMMANDS
    compare <dir1> <dir2>   Diff CSV outputs between directories
    report <dir1> <dir2>    Summary of differences
    latest                  Compare latest two output runs

EXAMPLES
    morie diff-outputs compare /tmp/run1 /tmp/run2
    morie diff-outputs latest
DIFFEOF
            ;;
        compare)
            local dir1="$1" dir2="$2"
            [ -z "$dir1" ] || [ -z "$dir2" ] && { error "Usage: morie diff-outputs compare <dir1> <dir2>"; return 1; }
            info "Comparing outputs..."
            for f in "$dir1"/*.csv; do
                local base
                base="$(basename "$f")"
                if [ -f "$dir2/$base" ]; then
                    local d
                    d="$(diff "$f" "$dir2/$base" 2>/dev/null | wc -l | tr -d ' ')"
                    if [ "$d" -gt 0 ]; then
                        warn "$base: $d line differences"
                    else
                        success "$base: identical"
                    fi
                else
                    warn "$base: only in dir1"
                fi
            done
            ;;
        report)
            local dir1="$1" dir2="$2"
            [ -z "$dir1" ] || [ -z "$dir2" ] && { error "Usage: morie diff-outputs report <dir1> <dir2>"; return 1; }
            local same=0 diff_count=0 only1=0 only2=0
            for f in "$dir1"/*.csv; do
                local base
                base="$(basename "$f")"
                if [ -f "$dir2/$base" ]; then
                    if diff -q "$f" "$dir2/$base" >/dev/null 2>&1; then
                        same=$((same + 1))
                    else
                        diff_count=$((diff_count + 1))
                    fi
                else
                    only1=$((only1 + 1))
                fi
            done
            for f in "$dir2"/*.csv; do
                local base
                base="$(basename "$f")"
                [ ! -f "$dir1/$base" ] && only2=$((only2 + 1))
            done
            info "Summary:"
            printf "  %-20s %d\n" "Identical:" "$same"
            printf "  %-20s %d\n" "Different:" "$diff_count"
            printf "  %-20s %d\n" "Only in dir1:" "$only1"
            printf "  %-20s %d\n" "Only in dir2:" "$only2"
            ;;
        latest)
            local outdir="$ROOT/data/public/outputs"
            info "Output directory: $outdir"
            if [ -d "$outdir" ]; then
                ls -lt "$outdir"/*.csv 2>/dev/null | head -10
            else
                warn "No output directory found."
            fi
            ;;
        *)
            error "Unknown: morie diff-outputs $subcmd"
            ;;
    esac
}

# =========================================================================
# morie audit — Full project audit
# =========================================================================

cmd_audit() {
    local subcmd="${1:-all}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'AUDITEOF'
morie audit — Full project audit

COMMANDS
    all                Run complete audit
    code               Code statistics (LOC, functions, tests)
    data               Dataset inventory and sizes
    docs               Documentation coverage
    deps               Dependency audit
    summary            Quick summary

EXAMPLES
    morie audit
    morie audit code
    morie audit data
AUDITEOF
            ;;
        code)
            info "Code Statistics:"
            echo ""
            info "Python source (py-package/morie/):"
            local py_files py_lines py_funcs
            py_files="$(find "$ROOT/py-package/morie" -name "*.py" -not -path "*/__pycache__/*" | wc -l | tr -d ' ')"
            py_lines="$(find "$ROOT/py-package/morie" -name "*.py" -not -path "*/__pycache__/*" -exec cat {} + 2>/dev/null | wc -l | tr -d ' ')"
            py_funcs="$(grep -r "^def \|^    def " "$ROOT/py-package/morie/" --include="*.py" 2>/dev/null | wc -l | tr -d ' ')"
            printf "  %-25s %s\n" "Files:" "$py_files"
            printf "  %-25s %s\n" "Lines:" "$py_lines"
            printf "  %-25s %s\n" "Functions:" "$py_funcs"
            echo ""
            info "R source (r-package/morie/):"
            local r_files r_lines
            r_files="$(find "$ROOT/r-package/morie/R" -name "*.R" 2>/dev/null | wc -l | tr -d ' ')"
            r_lines="$(find "$ROOT/r-package/morie/R" -name "*.R" -exec cat {} + 2>/dev/null | wc -l | tr -d ' ')"
            printf "  %-25s %s\n" "Files:" "$r_files"
            printf "  %-25s %s\n" "Lines:" "$r_lines"
            echo ""
            info "Tests:"
            local test_files test_lines
            test_files="$(find "$ROOT/tests" -name "test_*.py" 2>/dev/null | wc -l | tr -d ' ')"
            test_lines="$(find "$ROOT/tests" -name "test_*.py" -exec cat {} + 2>/dev/null | wc -l | tr -d ' ')"
            printf "  %-25s %s\n" "Test files:" "$test_files"
            printf "  %-25s %s\n" "Test lines:" "$test_lines"
            echo ""
            info "Shell (morie executable):"
            printf "  %-25s %s\n" "Lines:" "$(wc -l < "$0" 2>/dev/null | tr -d ' ')"
            printf "  %-25s %s\n" "Size:" "$(du -h "$0" 2>/dev/null | cut -f1)"
            ;;
        data)
            info "Dataset Inventory:"
            local db_path="$ROOT/py-package/morie/data/esml_datasets.db"
            if [ -f "$db_path" ] && command -v sqlite3 >/dev/null 2>&1; then
                printf "  %-40s %12s\n" "TABLE" "ROWS"
                sqlite3 "$db_path" "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name;" 2>/dev/null | while read -r tbl; do
                    local rc
                    rc="$(sqlite3 "$db_path" "SELECT count(*) FROM \"$tbl\";" 2>/dev/null || echo '?')"
                    printf "  %-40s %12s\n" "$tbl" "$rc"
                done
            else
                warn "Database or sqlite3 not available."
            fi
            echo ""
            info "Data directory sizes:"
            cmd_size data
            ;;
        docs)
            info "Documentation Coverage:"
            local rst_count qmd_count
            rst_count="$(find "$ROOT/docz/source" -name "*.rst" 2>/dev/null | wc -l | tr -d ' ')"
            qmd_count="$(find "$ROOT/../../../quarto/project" -name "*.qmd" 2>/dev/null | wc -l | tr -d ' ')"
            printf "  %-25s %s\n" "RST files:" "$rst_count"
            printf "  %-25s %s\n" "QMD files:" "$qmd_count"
            # Check which modules have autodoc
            _require_python
            "$PYTHON" -c "
import pkgutil, morie
mods = [m.name for m in pkgutil.iter_modules(morie.__path__) if not m.name.startswith('_')]
print(f'  Python modules: {len(mods)}')
" 2>/dev/null
            ;;
        deps)
            info "Dependency Audit:"
            _require_python
            "$PYTHON" -c "
import pkg_resources
installed = list(pkg_resources.working_set)
print(f'  Installed packages: {len(installed)}')
" 2>/dev/null
            "$PYTHON" -m pip list --outdated --format=columns 2>/dev/null | head -15
            ;;
        summary)
            info "MORIE Project Summary:"
            _require_python
            "$PYTHON" -c "
import morie, pkgutil
mods = list(pkgutil.iter_modules(morie.__path__))
print(f'  Version: {morie.__version__}')
print(f'  Modules: {len(mods)}')
" 2>/dev/null
            local py_files
            py_files="$(find "$ROOT/py-package/morie" -name "*.py" -not -path "*/__pycache__/*" | wc -l | tr -d ' ')"
            printf "  %-25s %s\n" "Python files:" "$py_files"
            printf "  %-25s %s\n" "Executable:" "$(du -h "$0" | cut -f1)"
            ;;
        all)
            cmd_audit summary
            echo ""
            cmd_audit code
            echo ""
            cmd_audit data
            echo ""
            cmd_audit docs
            ;;
        *)
            error "Unknown: morie audit $subcmd"
            ;;
    esac
}

# =========================================================================
# morie catalog — Dataset catalog browser
# =========================================================================

cmd_catalog() {
    local subcmd="${1:-list}"
    shift 2>/dev/null || true
    local db_path="$ROOT/py-package/morie/data/esml_datasets.db"

    case "$subcmd" in
        help|-h|--help)
            cat << 'CATEOF'
morie catalog — Dataset catalog browser

COMMANDS
    list               List all 32 datasets
    search <term>      Search by name/source
    info <dataset>     Full metadata for a dataset
    preview <ds> [n]   Preview first n rows (default 5)
    sources            List data sources
    columns <dataset>  List columns

EXAMPLES
    morie catalog list
    morie catalog search cpads
    morie catalog info cpads_2021_2022
    morie catalog preview cpads_2021_2022 10
CATEOF
            ;;
        list)
            info "MORIE Dataset Catalog:"
            _require_python
            "$PYTHON" -c "
from morie.data import DATASET_CATALOG
print(f'  {len(DATASET_CATALOG)} datasets available:')
print()
for key, meta in sorted(DATASET_CATALOG.items()):
    source = meta.get('source', '?')
    table = meta.get('table_name', key)
    desc = meta.get('description', '')[:50]
    print(f'  {key:35s} {source:15s} {desc}')
" 2>/dev/null || {
                # Fallback to sqlite3
                if [ -f "$db_path" ] && command -v sqlite3 >/dev/null 2>&1; then
                    sqlite3 "$db_path" ".tables"
                else
                    warn "Cannot list datasets."
                fi
            }
            ;;
        search)
            local term="$1"
            [ -z "$term" ] && { error "Usage: morie catalog search <term>"; return 1; }
            info "Searching for '$term'..."
            _require_python
            "$PYTHON" -c "
from morie.data import DATASET_CATALOG
term = '$term'.lower()
for key, meta in sorted(DATASET_CATALOG.items()):
    if term in key.lower() or term in str(meta).lower():
        print(f'  {key}: {meta.get(\"description\", \"\")[:60]}')
" 2>/dev/null
            ;;
        info)
            local ds="$1"
            [ -z "$ds" ] && { error "Usage: morie catalog info <dataset>"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import DATASET_CATALOG
ds = '$ds'
if ds in DATASET_CATALOG:
    meta = DATASET_CATALOG[ds]
    for k, v in meta.items():
        print(f'  {k}: {v}')
else:
    print(f'  Dataset \"{ds}\" not found.')
    close = [k for k in DATASET_CATALOG if '$ds'.lower() in k.lower()]
    if close:
        print(f'  Did you mean: {\", \".join(close)}?')
" 2>/dev/null
            ;;
        preview)
            local ds="$1"
            local n="${2:-5}"
            [ -z "$ds" ] && { error "Usage: morie catalog preview <dataset> [n]"; return 1; }
            if [ -f "$db_path" ] && command -v sqlite3 >/dev/null 2>&1; then
                sqlite3 -header -column "$db_path" "SELECT * FROM \"$ds\" LIMIT $n;" 2>/dev/null || error "Table '$ds' not found."
            else
                _require_python
                "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
print(df.head($n).to_string())
" 2>/dev/null
            fi
            ;;
        sources)
            info "Data sources:"
            _require_python
            "$PYTHON" -c "
from morie.data import DATASET_CATALOG
sources = {}
for key, meta in DATASET_CATALOG.items():
    src = meta.get('source', 'unknown')
    sources.setdefault(src, []).append(key)
for src, datasets in sorted(sources.items()):
    print(f'  {src} ({len(datasets)} datasets):')
    for ds in datasets:
        print(f'    - {ds}')
" 2>/dev/null
            ;;
        columns)
            local ds="$1"
            [ -z "$ds" ] && { error "Usage: morie catalog columns <dataset>"; return 1; }
            if [ -f "$db_path" ] && command -v sqlite3 >/dev/null 2>&1; then
                cmd_db columns "$ds"
            else
                _require_python
                "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
for col in df.columns:
    print(f'  {col}: {df[col].dtype} ({df[col].notna().sum()}/{len(df)} non-null)')
" 2>/dev/null
            fi
            ;;
        *)
            error "Unknown: morie catalog $subcmd"
            ;;
    esac
}

# =========================================================================
# morie schema — Schema inspection
# =========================================================================

cmd_schema() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SCHEMAEOF'
morie schema — Schema inspection

COMMANDS
    csv <file>         Infer schema from CSV
    db <table>         Show DB table schema
    compare <f> <t>    Compare CSV vs DB schema
    validate <f> <t>   Validate CSV against DB schema

EXAMPLES
    morie schema csv data/files/csv/survey/cpads-2021-2022-pumf2.csv
    morie schema db cpads_2021_2022
SCHEMAEOF
            ;;
        csv)
            local file="$1"
            [ -z "$file" ] && { error "Usage: morie schema csv <file>"; return 1; }
            [ ! -f "$file" ] && { error "File not found: $file"; return 1; }
            _require_python
            "$PYTHON" -c "
import pandas as pd
df = pd.read_csv('$file', nrows=100)
print(f'Columns: {len(df.columns)}')
print(f'Sample rows: {len(df)}')
print()
for col in df.columns:
    dtype = df[col].dtype
    nulls = df[col].isna().sum()
    uniq = df[col].nunique()
    print(f'  {col:30s} {str(dtype):10s} nulls={nulls:3d} unique={uniq}')
"
            ;;
        db)
            local table="$1"
            [ -z "$table" ] && { error "Usage: morie schema db <table>"; return 1; }
            cmd_db schema "$table"
            ;;
        compare)
            local file="$1" table="$2"
            [ -z "$file" ] || [ -z "$table" ] && { error "Usage: morie schema compare <csv> <table>"; return 1; }
            info "Comparing CSV schema vs DB schema..."
            _require_python
            "$PYTHON" -c "
import pandas as pd, sqlite3
df = pd.read_csv('$file', nrows=5)
csv_cols = set(df.columns)
db_path = '$ROOT/py-package/morie/data/esml_datasets.db'
conn = sqlite3.connect(db_path)
cursor = conn.execute(f'PRAGMA table_info(\"$table\")')
db_cols = set(r[1] for r in cursor.fetchall())
conn.close()
only_csv = csv_cols - db_cols
only_db = db_cols - csv_cols
both = csv_cols & db_cols
print(f'In both: {len(both)}')
if only_csv: print(f'Only in CSV: {only_csv}')
if only_db: print(f'Only in DB: {only_db}')
if not only_csv and not only_db: print('Schemas match!')
"
            ;;
        validate)
            local file="$1" table="$2"
            [ -z "$file" ] || [ -z "$table" ] && { error "Usage: morie schema validate <csv> <table>"; return 1; }
            cmd_schema compare "$file" "$table"
            ;;
        *)
            error "Unknown: morie schema $subcmd"
            ;;
    esac
}

# =========================================================================
# morie sample-data — Sample rows from datasets
# =========================================================================

cmd_sample_data() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SAMPEOF'
morie sample-data — Sample rows from datasets

COMMANDS
    random <ds> [n]       Random sample (default n=10)
    head <ds> [n]         First n rows
    tail <ds> [n]         Last n rows
    stratified <ds> <col> [n]  Stratified sample

EXAMPLES
    morie sample-data random cpads_2021_2022 20
    morie sample-data head cpads_2021_2022 5
SAMPEOF
            ;;
        random)
            local ds="$1" n="${2:-10}"
            [ -z "$ds" ] && { error "Usage: morie sample-data random <dataset> [n]"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
print(df.sample(min($n, len(df))).to_string())
"
            ;;
        head)
            local ds="$1" n="${2:-5}"
            [ -z "$ds" ] && { error "Usage: morie sample-data head <dataset> [n]"; return 1; }
            local db_path="$ROOT/py-package/morie/data/esml_datasets.db"
            if [ -f "$db_path" ] && command -v sqlite3 >/dev/null 2>&1; then
                sqlite3 -header -column "$db_path" "SELECT * FROM \"$ds\" LIMIT $n;" 2>/dev/null
            else
                _require_python
                "$PYTHON" -c "from morie.data import load_dataset; print(load_dataset('$ds').head($n).to_string())"
            fi
            ;;
        tail)
            local ds="$1" n="${2:-5}"
            [ -z "$ds" ] && { error "Usage: morie sample-data tail <dataset> [n]"; return 1; }
            _require_python
            "$PYTHON" -c "from morie.data import load_dataset; print(load_dataset('$ds').tail($n).to_string())"
            ;;
        stratified)
            local ds="$1" col="$2" n="${3:-5}"
            [ -z "$ds" ] || [ -z "$col" ] && { error "Usage: morie sample-data stratified <dataset> <col> [n]"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
print(df.groupby('$col').apply(lambda g: g.sample(min($n, len(g)))).reset_index(drop=True).to_string())
"
            ;;
        *)
            error "Unknown: morie sample-data $subcmd"
            ;;
    esac
}

# =========================================================================
# morie validate-data — Data validation
# =========================================================================

cmd_validate_data() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'VALEOF'
morie validate-data — Data validation

COMMANDS
    completeness <ds>      Check missing values per column
    duplicates <ds>        Find duplicate rows
    ranges <ds> <col>      Check value ranges
    types <ds>             Check column data types
    summary <ds>           Full validation summary

EXAMPLES
    morie validate-data completeness cpads_2021_2022
    morie validate-data duplicates cpads_2021_2022
VALEOF
            ;;
        completeness)
            local ds="$1"
            [ -z "$ds" ] && { error "Usage: morie validate-data completeness <dataset>"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
total = len(df)
print(f'Dataset: $ds ({total} rows, {len(df.columns)} columns)')
print()
for col in df.columns:
    missing = df[col].isna().sum()
    pct = 100 * missing / total if total > 0 else 0
    status = 'OK' if pct == 0 else ('WARN' if pct < 10 else 'HIGH')
    print(f'  {col:30s} {missing:6d}/{total} missing ({pct:5.1f}%) [{status}]')
"
            ;;
        duplicates)
            local ds="$1"
            [ -z "$ds" ] && { error "Usage: morie validate-data duplicates <dataset>"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
dups = df.duplicated().sum()
print(f'Dataset: $ds ({len(df)} rows)')
print(f'Duplicate rows: {dups} ({100*dups/len(df):.1f}%)')
"
            ;;
        ranges)
            local ds="$1" col="$2"
            [ -z "$ds" ] || [ -z "$col" ] && { error "Usage: morie validate-data ranges <dataset> <col>"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
s = df['$col']
print(f'Column: $col')
print(f'  Type: {s.dtype}')
print(f'  Count: {s.count()}/{len(s)}')
if s.dtype in ['int64','float64']:
    print(f'  Min: {s.min()}')
    print(f'  Max: {s.max()}')
    print(f'  Mean: {s.mean():.4f}')
    print(f'  Std: {s.std():.4f}')
else:
    print(f'  Unique: {s.nunique()}')
    print(f'  Top: {s.value_counts().head(5).to_dict()}')
"
            ;;
        types)
            local ds="$1"
            [ -z "$ds" ] && { error "Usage: morie validate-data types <dataset>"; return 1; }
            _require_python
            "$PYTHON" -c "
from morie.data import load_dataset
df = load_dataset('$ds')
print(f'Dataset: $ds')
print(df.dtypes.to_string())
"
            ;;
        summary)
            local ds="$1"
            [ -z "$ds" ] && { error "Usage: morie validate-data summary <dataset>"; return 1; }
            cmd_validate_data completeness "$ds"
            echo ""
            cmd_validate_data duplicates "$ds"
            echo ""
            cmd_validate_data types "$ds"
            ;;
        *)
            error "Unknown: morie validate-data $subcmd"
            ;;
    esac
}

# =========================================================================
# morie translate — Code translation (Python ↔ R)
# =========================================================================

cmd_translate() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'TRANSEOF'
morie translate — Code translation between Python and R

COMMANDS
    examples           Show common translations
    py2r <func>        Show R equivalent of Python function
    r2py <func>        Show Python equivalent of R function
    cheatsheet         Print full translation cheatsheet

EXAMPLES
    morie translate examples
    morie translate py2r ttest
    morie translate r2py t.test
TRANSEOF
            ;;
        examples)
            cat << 'EXEOF'
Common MORIE Python ↔ R Translations:

  Python                              R
  ─────────────────────────────────   ─────────────────────────────────
  from morie.data import load_dataset  morie::esml_load_dataset()
  from morie.statistics import *       library(morie)
  one_sample_ttest(x, mu0=0)          t.test(x, mu=0)
  two_sample_ttest(x, y)              t.test(x, y)
  chi2_independence(x, y)             chisq.test(x, y)
  kaplan_meier(time, event)           survfit(Surv(time, event) ~ 1)
  cox_ph(time, event, X)              coxph(Surv(time, event) ~ ., data)
  estimate_ate(y, d, X)               ATE via DoubleML or AIPW package
  sharp_rdd(y, x, c)                  rdrobust(y, x, c=c)
  did_2x2(y, treat, post)             did::att_gt(...)
  tsls(y, d, Z, X)                    ivreg(y ~ d | Z, data)
  mice(df, m=5)                       mice::mice(df, m=5)
  table1(df, group)                   tableone::CreateTableOne(...)
EXEOF
            ;;
        py2r)
            local func="$1"
            [ -z "$func" ] && { error "Usage: morie translate py2r <function>"; return 1; }
            _require_python
            "$PYTHON" -c "
translations = {
    'ttest': 't.test(x, mu=0)',
    'ttest2': 't.test(x, y)',
    'anova': 'aov(y ~ group, data=df)',
    'chi2': 'chisq.test(table(x, y))',
    'kaplan_meier': 'survfit(Surv(time, event) ~ 1, data=df)',
    'cox': 'coxph(Surv(time, event) ~ x1 + x2, data=df)',
    'logrank': 'survdiff(Surv(time, event) ~ group, data=df)',
    'ipw': 'ipw::ipwpoint(treatment ~ x1 + x2, data=df)',
    'match': 'MatchIt::matchit(treat ~ x1 + x2, data=df)',
    'did': 'did::att_gt(yname, tname, idname, gname, data=df)',
    'rdd': 'rdrobust::rdrobust(y, x, c=cutoff)',
    'tsls': 'ivreg::ivreg(y ~ d | z, data=df)',
    'mice': 'mice::mice(df, m=5, method=\"pmm\")',
    'table1': 'tableone::CreateTableOne(vars, strata, data=df)',
}
func = '$func'.lower()
if func in translations:
    print(f'  Python: {func}()')
    print(f'  R:      {translations[func]}')
else:
    close = [k for k in translations if func in k]
    if close:
        for c in close:
            print(f'  {c}: {translations[c]}')
    else:
        print(f'  No translation found for \"{func}\".')
        print(f'  Available: {\", \".join(sorted(translations.keys()))}')
"
            ;;
        r2py)
            local func="$1"
            [ -z "$func" ] && { error "Usage: morie translate r2py <function>"; return 1; }
            info "R → Python translation for '$func':"
            _require_python
            "$PYTHON" -c "
translations = {
    't.test': 'from morie.statistics import one_sample_ttest, two_sample_ttest',
    'aov': 'from morie.statistics import one_way_anova',
    'chisq.test': 'from morie.statistics import chi2_independence',
    'survfit': 'from morie.survival import kaplan_meier',
    'coxph': 'from morie.survival import cox_ph',
    'survdiff': 'from morie.survival import logrank_test',
    'matchit': 'from morie.matching import match_nearest_neighbor',
    'rdrobust': 'from morie.rdd import sharp_rdd',
    'ivreg': 'from morie.iv import tsls',
    'mice': 'from morie.missing import mice',
    'lm': 'import statsmodels.api as sm; sm.OLS(y, X).fit()',
    'glm': 'import statsmodels.api as sm; sm.GLM(y, X, family).fit()',
}
func = '$func'.lower().replace('::', '_')
found = False
for k, v in translations.items():
    if func in k.lower() or k.lower() in func:
        print(f'  R: {k}()')
        print(f'  Python: {v}')
        found = True
if not found:
    print(f'  No translation found for \"{func}\".')
"
            ;;
        cheatsheet)
            cmd_translate examples
            ;;
        *)
            error "Unknown: morie translate $subcmd"
            ;;
    esac
}

# =========================================================================
# morie health — System health monitoring
# =========================================================================

cmd_health() {
    local subcmd="${1:-status}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'HEALTHEOF'
morie health — System health monitoring

COMMANDS
    status             Overall system health
    resources          CPU, memory, disk
    network            Check API endpoints
    python             Python environment health
    r                  R environment health

EXAMPLES
    morie health
    morie health resources
    morie health network
HEALTHEOF
            ;;
        status)
            info "MORIE Health Status:"
            echo ""
            # Python
            _resolve_python 2>/dev/null
            if [ -n "$PYTHON" ]; then
                local pyver
                pyver="$("$PYTHON" --version 2>&1)"
                success "Python: $pyver"
            else
                error "Python: not found"
            fi
            # R
            if command -v Rscript >/dev/null 2>&1; then
                local rver
                rver="$(Rscript --version 2>&1 | head -1)"
                success "R: $rver"
            else
                warn "R: not found"
            fi
            # Database
            local db_path="$ROOT/py-package/morie/data/esml_datasets.db"
            if [ -f "$db_path" ]; then
                success "Database: $(du -h "$db_path" | cut -f1)"
            else
                warn "Database: not found"
            fi
            # Ollama
            if curl -s --max-time 2 http://localhost:11434/api/tags >/dev/null 2>&1; then
                success "Ollama: running"
            else
                info "Ollama: not running"
            fi
            # OllamaFreeAPI
            _require_python 2>/dev/null
            if "$PYTHON" -c "import ollamafreeapi" 2>/dev/null; then
                success "OllamaFreeAPI: installed"
            else
                info "OllamaFreeAPI: not installed"
            fi
            # Venv
            if [ -d "$ROOT/.venv" ]; then
                success "Venv: active"
            else
                warn "Venv: missing"
            fi
            # Git
            if git -C "$ROOT" status >/dev/null 2>&1; then
                local branch
                branch="$(git -C "$ROOT" branch --show-current 2>/dev/null)"
                success "Git: branch $branch"
            fi
            ;;
        resources)
            info "System Resources:"
            # Disk
            printf "  %-20s %s\n" "Disk (project):" "$(du -sh "$ROOT" 2>/dev/null | cut -f1)"
            printf "  %-20s %s\n" "Disk (free):" "$(df -h "$ROOT" 2>/dev/null | tail -1 | awk '{print $4}')"
            # Memory
            if command -v vm_stat >/dev/null 2>&1; then
                local pages_free pages_active page_size
                page_size="$(sysctl -n hw.pagesize 2>/dev/null || echo 4096)"
                pages_free="$(vm_stat 2>/dev/null | grep "Pages free" | awk '{print $3}' | tr -d '.')"
                pages_active="$(vm_stat 2>/dev/null | grep "Pages active" | awk '{print $3}' | tr -d '.')"
                if [ -n "$pages_free" ]; then
                    local free_mb=$((pages_free * page_size / 1048576))
                    local active_mb=$((pages_active * page_size / 1048576))
                    printf "  %-20s %s MB free, %s MB active\n" "Memory:" "$free_mb" "$active_mb"
                fi
            elif command -v free >/dev/null 2>&1; then
                free -h 2>/dev/null | head -2
            fi
            # CPU
            if command -v sysctl >/dev/null 2>&1; then
                local cpus
                cpus="$(sysctl -n hw.ncpu 2>/dev/null)"
                printf "  %-20s %s cores\n" "CPU:" "$cpus"
            elif [ -f /proc/cpuinfo ]; then
                local cpus
                cpus="$(grep -c processor /proc/cpuinfo 2>/dev/null)"
                printf "  %-20s %s cores\n" "CPU:" "$cpus"
            fi
            ;;
        network)
            info "Network Health:"
            # Ollama local
            if curl -s --max-time 3 http://localhost:11434/api/tags >/dev/null 2>&1; then
                success "Ollama (localhost:11434): reachable"
            else
                info "Ollama (localhost:11434): not reachable"
            fi
            # PyPI
            if curl -s --max-time 5 https://pypi.org/simple/ >/dev/null 2>&1; then
                success "PyPI: reachable"
            else
                warn "PyPI: not reachable"
            fi
            # CRAN
            if curl -s --max-time 5 https://cran.r-project.org/ >/dev/null 2>&1; then
                success "CRAN: reachable"
            else
                warn "CRAN: not reachable"
            fi
            # GitHub
            if curl -s --max-time 5 https://github.com >/dev/null 2>&1; then
                success "GitHub: reachable"
            else
                warn "GitHub: not reachable"
            fi
            ;;
        python)
            info "Python Environment Health:"
            _require_python
            "$PYTHON" -c "
import sys, importlib
print(f'  Python: {sys.version}')
print(f'  Executable: {sys.executable}')
print(f'  Prefix: {sys.prefix}')
critical = ['numpy', 'pandas', 'scipy', 'sklearn', 'statsmodels', 'matplotlib']
for pkg in critical:
    try:
        m = importlib.import_module(pkg)
        v = getattr(m, '__version__', '?')
        print(f'  {pkg}: {v} ✓')
    except ImportError:
        print(f'  {pkg}: NOT INSTALLED ✗')
"
            ;;
        r)
            info "R Environment Health:"
            if command -v Rscript >/dev/null 2>&1; then
                Rscript -e "
                    cat(paste('  R:', R.version.string, '\n'))
                    cat(paste('  Home:', R.home(), '\n'))
                    pkgs <- c('DBI', 'RSQLite', 'testthat', 'devtools', 'survival', 'MatchIt')
                    for (p in pkgs) {
                        if (requireNamespace(p, quietly=TRUE)) {
                            v <- as.character(packageVersion(p))
                            cat(paste0('  ', p, ': ', v, ' OK\n'))
                        } else {
                            cat(paste0('  ', p, ': NOT INSTALLED\n'))
                        }
                    }
                " 2>/dev/null
            else
                warn "R not found."
            fi
            ;;
        *)
            error "Unknown: morie health $subcmd"
            ;;
    esac
}

# =========================================================================
# morie stat — Run statistical tests from CLI
# =========================================================================

cmd_stat() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'STATEOF'
morie stat — Run statistical tests from the command line

COMMANDS
    list               List all available statistical commands (1200+)
    count              Count registered commands
    search <term>      Search commands by name
    categories         List command categories
    run <cmd> [args]   Run a stat command with arguments
    info <cmd>         Show help for a specific command

EXAMPLES
    morie stat list
    morie stat count
    morie stat search survival
    morie stat categories
    morie stat run welch_ttest data.csv col group
    morie stat info kaplan_meier
STATEOF
            ;;
        list)
            info "All registered statistical commands:"
            _require_esml_package
            "$PYTHON" -c "
from morie.stat_commands import commands_by_category
for cat, cmds in sorted(commands_by_category().items()):
    print(f'\n  {cat} ({len(cmds)} commands):')
    for c in cmds:
        aliases = f' ({', '.join(c.aliases)})' if c.aliases else ''
        print(f'    {c.name}{aliases} — {c.description[:60]}')
"
            ;;
        count)
            _require_esml_package
            "$PYTHON" -c "
from morie.stat_commands import COMMAND_REGISTRY, ALIAS_MAP, all_command_names
print(f'  Registry commands: {len(COMMAND_REGISTRY)}')
print(f'  Aliases: {len(ALIAS_MAP)}')
print(f'  Total names: {len(all_command_names())}')
"
            ;;
        search)
            local term="$1"
            [ -z "$term" ] && { error "Usage: morie stat search <term>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
from morie.stat_commands import COMMAND_REGISTRY
term = '$term'.lower()
matches = [(n, c) for n, c in COMMAND_REGISTRY.items() if term in n.lower() or term in c.description.lower() or term in c.category.lower()]
print(f'  Found {len(matches)} matches for \"{term}\":')
for name, cmd in sorted(matches, key=lambda x: x[0]):
    print(f'    {name:35s} [{cmd.category}] {cmd.description[:50]}')
"
            ;;
        categories)
            _require_esml_package
            "$PYTHON" -c "
from morie.stat_commands import CATEGORIES
for cat, cmds in sorted(CATEGORIES.items()):
    print(f'  {cat:25s} {len(cmds):4d} commands')
total = sum(len(c) for c in CATEGORIES.values())
print(f'  {\"TOTAL\":25s} {total:4d}')
"
            ;;
        run)
            local cmd_name="$1"
            [ -z "$cmd_name" ] && { error "Usage: morie stat run <command> [args...]"; return 1; }
            shift
            _require_esml_package
            "$PYTHON" -c "
from morie.stat_commands import resolve
import sys
cmd = resolve('$cmd_name')
if cmd is None:
    print(f'Command not found: $cmd_name', file=sys.stderr)
    sys.exit(1)
# For REPL handler, try calling with remaining args
args = '$*'.split() if '$*' else []
try:
    result = cmd.handler_repl(*args)
    print(result)
except Exception as e:
    print(f'Error: {e}', file=sys.stderr)
    print(f'Usage: {cmd.usage}', file=sys.stderr)
"
            ;;
        info)
            local cmd_name="$1"
            [ -z "$cmd_name" ] && { error "Usage: morie stat info <command>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
from morie.stat_commands import resolve
cmd = resolve('$cmd_name')
if cmd is None:
    print(f'Command not found: $cmd_name')
else:
    print(f'  Name: {cmd.name}')
    print(f'  Category: {cmd.category}')
    print(f'  Usage: {cmd.usage}')
    print(f'  Description: {cmd.description}')
    print(f'  Module: {cmd.module}')
    if cmd.aliases:
        print(f'  Aliases: {\", \".join(cmd.aliases)}')
    if cmd.is_compound:
        print(f'  Type: compound (multi-step)')
    if cmd.is_r_bridge:
        print(f'  Type: R bridge')
    doc = cmd.handler_repl.__doc__
    if doc:
        print(f'  Doc: {doc[:200]}')
"
            ;;
        *)
            error "Unknown: morie stat $subcmd"
            echo "  Run 'morie stat help' for usage." >&2
            ;;
    esac
}

# =========================================================================
# morie causal — Causal inference from CLI
# =========================================================================

cmd_causal() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'CAUSALEOF'
morie causal — Causal inference from the command line

COMMANDS
    ate <csv> <y> <d> [covs]        Average treatment effect (IPW-OLS)
    att <csv> <y> <d> [covs]        ATT via IPW
    aipw <csv> <y> <d> [covs]       Augmented IPW
    dml <csv> <y> <d> [covs]        Double ML (PLR)
    irm <csv> <y> <d> [covs]        Interactive regression model
    ipw <csv> <d> [covs]            IPW weights
    propensity <csv> <d> [covs]     Propensity scores
    match <csv> <d> [covs]          PS nearest neighbor matching
    evalue <rr> [ci_lower]          E-value for unmeasured confounding
    sensitivity <rr> [ci]           Rosenbaum sensitivity bounds
    cate <csv> <y> <d> [covs]       Conditional ATE
    gate <csv> <y> <d> <g> [covs]   Group ATE
    late <csv> <y> <d> <z> [covs]   Local ATE (IV)
    gcomp <csv> <y> <d> [covs]      G-computation
    plr <csv> <y> <d> [covs]        Partialled linear regression
    pliv <csv> <y> <d> <z> [covs]   Partialled linear IV

EXAMPLES
    morie causal ate data.csv outcome treatment age sex
    morie causal dml data.csv y d x1 x2 x3
    morie causal match data.csv treatment x1 x2
    morie causal evalue 2.5 1.8
CAUSALEOF
            ;;
        ate)
            local csv="$1" y="$2" d="$3"
            shift 3 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$y" ] || [ -z "$d" ] && { error "Usage: morie causal ate <csv> <outcome> <treatment> [covariates...]"; return 1; }
            [ ! -f "$csv" ] && { error "File not found: $csv"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.effects import estimate_ate
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c not in ('$y', '$d')]
result = estimate_ate(df, '$y', '$d', covs)
print('Average Treatment Effect (IPW-OLS)')
print(f'  ATE: {result.ate:.4f}')
print(f'  SE:  {result.se:.4f}')
print(f'  p:   {result.p_value:.6f}')
print(f'  95%% CI: [{result.ci_lower:.4f}, {result.ci_upper:.4f}]')
"
            ;;
        att)
            local csv="$1" y="$2" d="$3"
            shift 3 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$y" ] || [ -z "$d" ] && { error "Usage: morie causal att <csv> <outcome> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.causal import estimate_att
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c not in ('$y', '$d')]
result = estimate_att(df, '$y', '$d', covs)
print('Average Treatment Effect on the Treated (ATT)')
print(result)
"
            ;;
        aipw)
            local csv="$1" y="$2" d="$3"
            shift 3 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$y" ] || [ -z "$d" ] && { error "Usage: morie causal aipw <csv> <outcome> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.causal import estimate_aipw
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c not in ('$y', '$d')]
result = estimate_aipw(df, '$y', '$d', covs)
print('Augmented Inverse Probability Weighting (AIPW)')
print(result)
"
            ;;
        dml)
            local csv="$1" y="$2" d="$3"
            shift 3 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$y" ] || [ -z "$d" ] && { error "Usage: morie causal dml <csv> <outcome> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.causal import estimate_double_ml
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c not in ('$y', '$d')]
result = estimate_double_ml(df, '$y', '$d', covs)
print('Double Machine Learning (DML-PLR)')
print(result)
"
            ;;
        irm)
            local csv="$1" y="$2" d="$3"
            shift 3 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$y" ] || [ -z "$d" ] && { error "Usage: morie causal irm <csv> <outcome> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.causal import estimate_irm
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c not in ('$y', '$d')]
result = estimate_irm(df, '$y', '$d', covs)
print('Interactive Regression Model (IRM)')
print(result)
"
            ;;
        ipw)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie causal ipw <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.causal import calculate_ipw_weights, effective_sample_size
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
weights = calculate_ipw_weights(df, '$d', covs)
ess = effective_sample_size(weights)
print('Inverse Probability Weights')
print(f'  N: {len(weights)}')
print(f'  Mean weight: {weights.mean():.4f}')
print(f'  Min weight: {weights.min():.4f}')
print(f'  Max weight: {weights.max():.4f}')
print(f'  Effective sample size: {ess:.1f}')
"
            ;;
        propensity)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie causal propensity <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.causal import compute_propensity_scores
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
ps = compute_propensity_scores(df, '$d', covs)
print('Propensity Scores')
print(f'  N: {len(ps)}')
print(f'  Mean: {ps.mean():.4f}')
print(f'  Min: {ps.min():.4f}')
print(f'  Max: {ps.max():.4f}')
print(f'  Std: {ps.std():.4f}')
"
            ;;
        match)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie causal match <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.matching import estimate_propensity_score, match_nearest_neighbor, balance_diagnostics, estimate_att_matched
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
print('Propensity Score Nearest Neighbor Matching')
print('1. Estimating propensity scores...')
ps = estimate_propensity_score(df, '$d', covs)
print(f'   PS range: [{ps.min():.4f}, {ps.max():.4f}]')
print('2. Matching...')
result = match_nearest_neighbor(df, '$d', ps)
print(f'   Matched pairs: {result.n_matched}')
print('3. Balance diagnostics...')
bal = balance_diagnostics(df, result.matched_data, '$d', covs)
print(bal)
"
            ;;
        evalue)
            local rr="$1" ci="${2:-}"
            [ -z "$rr" ] && { error "Usage: morie causal evalue <rr> [ci_lower]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
from morie.sensitivity import e_value_rr
rr = float('$rr')
result = e_value_rr(rr${ci:+, float('$ci')})
print('E-value for Unmeasured Confounding')
print(result)
"
            ;;
        sensitivity)
            local rr="$1" ci="${2:-}"
            [ -z "$rr" ] && { error "Usage: morie causal sensitivity <rr> [ci_lower]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
from morie.sensitivity import rosenbaum_bounds, e_value_rr
rr = float('$rr')
print('Sensitivity Analysis')
print()
ev = e_value_rr(rr)
print('E-value:')
print(ev)
"
            ;;
        cate|gate|late|gcomp|plr|pliv)
            local csv="$1"
            shift
            [ -z "$csv" ] && { error "Usage: morie causal $subcmd <csv> <args...>"; return 1; }
            _require_esml_package
            local func_map
            case "$subcmd" in
                cate) func_map="from morie.causal import estimate_cate as fn" ;;
                gate) func_map="from morie.causal import estimate_gate as fn" ;;
                late) func_map="from morie.causal import estimate_late as fn" ;;
                gcomp) func_map="from morie.effects import estimate_ate_gcomputation as fn" ;;
                plr) func_map="from morie.effects import estimate_plr as fn" ;;
                pliv) func_map="from morie.effects import estimate_pliv as fn" ;;
            esac
            "$PYTHON" -c "
import pandas as pd
$func_map
df = pd.read_csv('$csv')
print(fn(df, $*))
" 2>&1 || error "Failed. Check arguments."
            ;;
        *)
            error "Unknown: morie causal $subcmd"
            echo "  Run 'morie causal help' for usage." >&2
            ;;
    esac
}

# =========================================================================
# morie survival — Survival analysis from CLI
# =========================================================================

cmd_survival() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'SURVEOF'
morie survival — Survival analysis from the command line

COMMANDS
    km <csv> <time> <event>           Kaplan-Meier survival curve
    cox <csv> <time> <event> [covs]   Cox proportional hazards
    logrank <csv> <time> <event> <g>  Log-rank test
    nelson-aalen <csv> <time> <event> Nelson-Aalen estimator
    aft <csv> <time> <event> [dist]   Accelerated failure time model
    rmst <csv> <time> <event> [tau]   Restricted mean survival time
    ph-test <csv> <time> <event>      Test PH assumption
    concordance <csv> <time> <event>  Concordance index

EXAMPLES
    morie survival km data.csv time event
    morie survival cox data.csv time event age sex
    morie survival logrank data.csv time event treatment
    morie survival aft data.csv time event weibull
SURVEOF
            ;;
        km)
            local csv="$1" time="$2" event="$3"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival km <csv> <time_col> <event_col>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import kaplan_meier
df = pd.read_csv('$csv')
result = kaplan_meier(df['$time'].values, df['$event'].values)
print('Kaplan-Meier Survival Curve')
print(f'  N: {len(df)}')
print(f'  Events: {int(df[\"$event\"].sum())}')
print(f'  Median survival: {result.median_survival:.2f}' if hasattr(result, 'median_survival') and result.median_survival else '  Median survival: not reached')
print(result)
"
            ;;
        cox)
            local csv="$1" time="$2" event="$3"
            shift 3 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival cox <csv> <time> <event> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import cox_ph
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c not in ('$time', '$event')]
X = df[covs].values if covs else None
result = cox_ph(df['$time'].values, df['$event'].values, X)
print('Cox Proportional Hazards Model')
print(result)
"
            ;;
        logrank)
            local csv="$1" time="$2" event="$3" group="$4"
            [ -z "$group" ] && { error "Usage: morie survival logrank <csv> <time> <event> <group>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import logrank_test
df = pd.read_csv('$csv')
result = logrank_test(df['$time'].values, df['$event'].values, df['$group'].values)
print('Log-rank Test')
print(result)
"
            ;;
        nelson-aalen|na)
            local csv="$1" time="$2" event="$3"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival nelson-aalen <csv> <time> <event>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import nelson_aalen
df = pd.read_csv('$csv')
result = nelson_aalen(df['$time'].values, df['$event'].values)
print('Nelson-Aalen Cumulative Hazard')
print(result)
"
            ;;
        aft)
            local csv="$1" time="$2" event="$3" dist="${4:-weibull}"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival aft <csv> <time> <event> [dist]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import aft_weibull, aft_lognormal, aft_loglogistic
df = pd.read_csv('$csv')
t, e = df['$time'].values, df['$event'].values
dist = '$dist'
if dist == 'weibull': result = aft_weibull(t, e)
elif dist == 'lognormal': result = aft_lognormal(t, e)
elif dist == 'loglogistic': result = aft_loglogistic(t, e)
else: result = aft_weibull(t, e)
print(f'AFT Model ({dist})')
print(result)
"
            ;;
        rmst)
            local csv="$1" time="$2" event="$3" tau="${4:-}"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival rmst <csv> <time> <event> [tau]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import restricted_mean_survival_time
df = pd.read_csv('$csv')
tau = float('$tau') if '$tau' else None
result = restricted_mean_survival_time(df['$time'].values, df['$event'].values, tau=tau)
print('Restricted Mean Survival Time')
print(result)
"
            ;;
        ph-test)
            local csv="$1" time="$2" event="$3"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival ph-test <csv> <time> <event>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import test_ph_assumption, cox_ph
df = pd.read_csv('$csv')
covs = [c for c in df.columns if c not in ('$time', '$event')]
X = df[covs].values if covs else None
cox_result = cox_ph(df['$time'].values, df['$event'].values, X)
ph_result = test_ph_assumption(cox_result)
print('Proportional Hazards Assumption Test')
print(ph_result)
"
            ;;
        concordance)
            local csv="$1" time="$2" event="$3"
            [ -z "$csv" ] || [ -z "$time" ] || [ -z "$event" ] && { error "Usage: morie survival concordance <csv> <time> <event>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.survival import concordance_index
df = pd.read_csv('$csv')
c = concordance_index(df['$time'].values, df['$event'].values)
print(f'Concordance Index: {c:.4f}')
"
            ;;
        *)
            error "Unknown: morie survival $subcmd"
            echo "  Run 'morie survival help' for usage." >&2
            ;;
    esac
}

# =========================================================================
# morie match-cli — Matching analysis from CLI
# =========================================================================

cmd_match_cli() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'MATCHEOF'
morie match-cli — Propensity score matching from CLI

COMMANDS
    nn <csv> <d> [covs]              Nearest neighbor matching
    exact <csv> <d> [covs]           Exact matching
    mahalanobis <csv> <d> [covs]     Mahalanobis distance matching
    cem <csv> <d> [covs]             Coarsened exact matching
    full <csv> <d> [covs]            Full matching
    optimal <csv> <d> [covs]         Optimal pair matching
    subclass <csv> <d> [covs]        PS subclassification
    entropy <csv> <d> [covs]         Entropy balancing
    balance <csv> <d> [covs]         Balance diagnostics only
    compare <csv> <d> [covs]         Compare matching methods

EXAMPLES
    morie match-cli nn data.csv treatment age sex income
    morie match-cli balance data.csv treatment
    morie match-cli compare data.csv treatment x1 x2 x3
MATCHEOF
            ;;
        nn|nearest)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie match-cli nn <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.matching import estimate_propensity_score, match_nearest_neighbor, balance_diagnostics
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
ps = estimate_propensity_score(df, '$d', covs)
result = match_nearest_neighbor(df, '$d', ps)
print('Nearest Neighbor PS Matching')
print(f'  Treated: {int(df[\"$d\"].sum())}')
print(f'  Control: {int((1 - df[\"$d\"]).sum())}')
print(f'  Matched: {result.n_matched}')
bal = balance_diagnostics(df, result.matched_data, '$d', covs)
print()
print('Balance:')
print(bal)
"
            ;;
        exact)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie match-cli exact <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.matching import match_exact
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
result = match_exact(df, '$d', covs)
print('Exact Matching')
print(result)
"
            ;;
        mahalanobis|maha)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie match-cli mahalanobis <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.matching import match_mahalanobis
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
result = match_mahalanobis(df, '$d', covs)
print('Mahalanobis Distance Matching')
print(result)
"
            ;;
        cem|subclass|entropy|full|optimal)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie match-cli $subcmd <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            local func_map
            case "$subcmd" in
                cem) func_map="match_cem" ;;
                subclass) func_map="subclassify" ;;
                entropy) func_map="entropy_balance" ;;
                full) func_map="match_full" ;;
                optimal) func_map="match_optimal_pair" ;;
            esac
            "$PYTHON" -c "
import pandas as pd
from morie.matching import estimate_propensity_score, $func_map
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
ps = estimate_propensity_score(df, '$d', covs)
result = $func_map(df, '$d', ps)
print(result)
"
            ;;
        balance)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie match-cli balance <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.matching import balance_table
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
result = balance_table(df, '$d', covs)
print('Balance Table (pre-matching)')
print(result)
"
            ;;
        compare)
            local csv="$1" d="$2"
            shift 2 2>/dev/null || true
            local covs="$*"
            [ -z "$csv" ] || [ -z "$d" ] && { error "Usage: morie match-cli compare <csv> <treatment> [covariates...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.matching import estimate_propensity_score, match_nearest_neighbor, match_exact, match_mahalanobis, balance_diagnostics
df = pd.read_csv('$csv')
covs = '$covs'.split() if '$covs' else [c for c in df.columns if c != '$d']
ps = estimate_propensity_score(df, '$d', covs)
print('Comparing Matching Methods:')
for method_name, method_fn in [('NN', match_nearest_neighbor), ('Mahalanobis', match_mahalanobis)]:
    try:
        result = method_fn(df, '$d', ps)
        print(f'  {method_name}: {result.n_matched} matched')
    except Exception as e:
        print(f'  {method_name}: failed ({e})')
"
            ;;
        *)
            error "Unknown: morie match-cli $subcmd"
            ;;
    esac
}

# =========================================================================
# morie did-cli — Difference-in-differences from CLI
# =========================================================================

cmd_did_cli() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'DIDEOF'
morie did-cli — Difference-in-differences from the command line

COMMANDS
    2x2 <csv> <y> <treat> <post>         Classic 2x2 DiD
    event <csv> <y> <unit> <time> <trt>   Event study
    staggered <csv> <y> <unit> <time> <g> Staggered adoption DiD
    bacon <csv> <y> <treat> <post>        Bacon decomposition
    parallel <csv> <y> <treat> <time>     Test parallel trends
    dr <csv> <y> <treat> <post> [covs]    Doubly robust DiD
    triple <csv> <y> <d1> <d2> <post>     Triple difference (DDD)
    fuzzy <csv> <y> <treat> <post>        Fuzzy DiD
    synthetic <csv> <y> <unit> <time>     Synthetic DiD

EXAMPLES
    morie did-cli 2x2 data.csv outcome treatment post
    morie did-cli event data.csv y unit time treat_time
    morie did-cli parallel data.csv y treatment time
DIDEOF
            ;;
        2x2)
            local csv="$1" y="$2" treat="$3" post="$4"
            [ -z "$post" ] && { error "Usage: morie did-cli 2x2 <csv> <outcome> <treatment> <post>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.did import did_2x2
df = pd.read_csv('$csv')
result = did_2x2(df, '$y', '$treat', '$post')
print('Difference-in-Differences (2x2)')
print(result)
"
            ;;
        event)
            local csv="$1" y="$2" unit="$3" time="$4" trt="$5"
            [ -z "$trt" ] && { error "Usage: morie did-cli event <csv> <outcome> <unit> <time> <treatment_time>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.did import event_study
df = pd.read_csv('$csv')
result = event_study(df, '$y', '$unit', '$time', '$trt')
print('Event Study (Dynamic DiD)')
print(result)
"
            ;;
        staggered)
            local csv="$1" y="$2" unit="$3" time="$4" group="$5"
            [ -z "$group" ] && { error "Usage: morie did-cli staggered <csv> <outcome> <unit> <time> <group>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.did import staggered_did
df = pd.read_csv('$csv')
result = staggered_did(df, '$y', '$unit', '$time', '$group')
print('Staggered DiD')
print(result)
"
            ;;
        bacon)
            local csv="$1" y="$2" treat="$3" post="$4"
            [ -z "$post" ] && { error "Usage: morie did-cli bacon <csv> <outcome> <treatment> <post>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.did import bacon_decomposition
df = pd.read_csv('$csv')
result = bacon_decomposition(df, '$y', '$treat', '$post')
print('Bacon Decomposition')
print(result)
"
            ;;
        parallel)
            local csv="$1" y="$2" treat="$3" time="$4"
            [ -z "$time" ] && { error "Usage: morie did-cli parallel <csv> <outcome> <treatment> <time>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.did import test_parallel_trends
df = pd.read_csv('$csv')
result = test_parallel_trends(df, '$y', '$treat', '$time')
print('Parallel Trends Test')
print(result)
"
            ;;
        dr|triple|fuzzy|synthetic)
            _require_esml_package
            local func_map
            case "$subcmd" in
                dr) func_map="did_doubly_robust" ;;
                triple) func_map="did_triple_difference" ;;
                fuzzy) func_map="did_fuzzy" ;;
                synthetic) func_map="synthetic_did" ;;
            esac
            "$PYTHON" -c "
import pandas as pd
from morie.did import $func_map
df = pd.read_csv('$1')
print($func_map(df))
" 2>&1 || error "Check arguments."
            ;;
        *)
            error "Unknown: morie did-cli $subcmd"
            ;;
    esac
}

# =========================================================================
# morie rdd-cli — Regression discontinuity from CLI
# =========================================================================

cmd_rdd_cli() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'RDDEOF'
morie rdd-cli — Regression discontinuity from the command line

COMMANDS
    sharp <csv> <y> <x> <c>         Sharp RDD
    fuzzy <csv> <y> <x> <d> <c>     Fuzzy RDD
    mccrary <csv> <x> <c>           McCrary density test
    bandwidth <csv> <y> <x> <c>     Optimal bandwidth (CCT)
    donut <csv> <y> <x> <c> [h]     Donut RDD
    placebo <csv> <y> <x> <c>       Placebo cutoff test
    kink <csv> <y> <x> <c>          Kink RDD
    plot <csv> <y> <x> <c>          RDD plot data

EXAMPLES
    morie rdd-cli sharp data.csv outcome running 0.5
    morie rdd-cli fuzzy data.csv outcome running treatment 0.5
    morie rdd-cli mccrary data.csv running 0.5
RDDEOF
            ;;
        sharp)
            local csv="$1" y="$2" x="$3" c="$4"
            [ -z "$c" ] && { error "Usage: morie rdd-cli sharp <csv> <outcome> <running> <cutoff>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.rdd import sharp_rdd
df = pd.read_csv('$csv')
result = sharp_rdd(df['$y'].values, df['$x'].values, float('$c'))
print('Sharp Regression Discontinuity')
print(result)
"
            ;;
        fuzzy)
            local csv="$1" y="$2" x="$3" d="$4" c="$5"
            [ -z "$c" ] && { error "Usage: morie rdd-cli fuzzy <csv> <outcome> <running> <treatment> <cutoff>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.rdd import fuzzy_rdd
df = pd.read_csv('$csv')
result = fuzzy_rdd(df['$y'].values, df['$x'].values, df['$d'].values, float('$c'))
print('Fuzzy Regression Discontinuity')
print(result)
"
            ;;
        mccrary)
            local csv="$1" x="$2" c="$3"
            [ -z "$c" ] && { error "Usage: morie rdd-cli mccrary <csv> <running> <cutoff>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.rdd import mccrary_test
df = pd.read_csv('$csv')
result = mccrary_test(df['$x'].values, float('$c'))
print('McCrary Density Test')
print(result)
"
            ;;
        bandwidth)
            local csv="$1" y="$2" x="$3" c="$4"
            [ -z "$c" ] && { error "Usage: morie rdd-cli bandwidth <csv> <outcome> <running> <cutoff>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.rdd import bandwidth_cct, bandwidth_ik, bandwidth_rot
df = pd.read_csv('$csv')
y, x, c = df['$y'].values, df['$x'].values, float('$c')
print('Bandwidth Selection')
for name, fn in [('CCT', bandwidth_cct), ('IK', bandwidth_ik), ('ROT', bandwidth_rot)]:
    try:
        result = fn(y, x, c)
        print(f'  {name}: {result}')
    except Exception as e:
        print(f'  {name}: failed ({e})')
"
            ;;
        donut|placebo|kink|plot)
            _require_esml_package
            local func_map
            case "$subcmd" in
                donut) func_map="donut_rdd" ;;
                placebo) func_map="placebo_cutoff_test" ;;
                kink) func_map="kink_rdd" ;;
                plot) func_map="rd_plot_data" ;;
            esac
            local csv="$1" y="$2" x="$3" c="$4"
            [ -z "$c" ] && { error "Usage: morie rdd-cli $subcmd <csv> <outcome> <running> <cutoff>"; return 1; }
            "$PYTHON" -c "
import pandas as pd
from morie.rdd import $func_map
df = pd.read_csv('$csv')
result = $func_map(df['$y'].values, df['$x'].values, float('$c'))
print(result)
"
            ;;
        *)
            error "Unknown: morie rdd-cli $subcmd"
            ;;
    esac
}

# =========================================================================
# morie iv-cli — Instrumental variables from CLI
# =========================================================================

cmd_iv_cli() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'IVEOF'
morie iv-cli — Instrumental variables from the command line

COMMANDS
    tsls <csv> <y> <d> <z> [x]      Two-stage least squares
    liml <csv> <y> <d> <z> [x]      Limited info maximum likelihood
    gmm <csv> <y> <d> <z> [x]       GMM-IV estimation
    wald <csv> <y> <d> <z>           Wald IV estimator
    first-stage <csv> <d> <z> [x]    First-stage diagnostics
    hausman <csv> <y> <d> <z> [x]    Hausman specification test
    sargan <csv> <y> <d> <z1> <z2>   Sargan overidentification test
    ar-test <csv> <y> <d> <z>        Anderson-Rubin test
    ar-ci <csv> <y> <d> <z>          Anderson-Rubin confidence interval

EXAMPLES
    morie iv-cli tsls data.csv outcome endogenous instrument controls
    morie iv-cli first-stage data.csv endogenous instrument
    morie iv-cli hausman data.csv outcome endogenous instrument
IVEOF
            ;;
        tsls|2sls)
            local csv="$1" y="$2" d="$3" z="$4"
            shift 4 2>/dev/null || true
            local x="$*"
            [ -z "$z" ] && { error "Usage: morie iv-cli tsls <csv> <outcome> <endogenous> <instrument> [controls...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import tsls
df = pd.read_csv('$csv')
y = df['$y'].values
D = df[['$d']].values
Z = df[['$z']].values
x_cols = '$x'.split() if '$x' else []
X = df[x_cols].values if x_cols else None
result = tsls(y, D, Z, X)
print('Two-Stage Least Squares (2SLS)')
print(result)
"
            ;;
        liml)
            local csv="$1" y="$2" d="$3" z="$4"
            shift 4 2>/dev/null || true
            local x="$*"
            [ -z "$z" ] && { error "Usage: morie iv-cli liml <csv> <outcome> <endogenous> <instrument> [controls...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import liml
df = pd.read_csv('$csv')
y = df['$y'].values
D = df[['$d']].values
Z = df[['$z']].values
x_cols = '$x'.split() if '$x' else []
X = df[x_cols].values if x_cols else None
result = liml(y, D, Z, X)
print('Limited Information Maximum Likelihood (LIML)')
print(result)
"
            ;;
        gmm)
            local csv="$1" y="$2" d="$3" z="$4"
            shift 4 2>/dev/null || true
            local x="$*"
            [ -z "$z" ] && { error "Usage: morie iv-cli gmm <csv> <outcome> <endogenous> <instrument> [controls...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import gmm_iv
df = pd.read_csv('$csv')
result = gmm_iv(df['$y'].values, df[['$d']].values, df[['$z']].values)
print('GMM Instrumental Variables')
print(result)
"
            ;;
        wald)
            local csv="$1" y="$2" d="$3" z="$4"
            [ -z "$z" ] && { error "Usage: morie iv-cli wald <csv> <outcome> <endogenous> <instrument>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd
from morie.iv import wald_estimator
df = pd.read_csv('$csv')
result = wald_estimator(df['$y'].values, df['$d'].values, df['$z'].values)
print('Wald IV Estimator')
print(result)
"
            ;;
        first-stage)
            local csv="$1" d="$2" z="$3"
            shift 3 2>/dev/null || true
            local x="$*"
            [ -z "$z" ] && { error "Usage: morie iv-cli first-stage <csv> <endogenous> <instrument> [controls...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import first_stage_diagnostics
df = pd.read_csv('$csv')
D = df[['$d']].values
Z = df[['$z']].values
x_cols = '$x'.split() if '$x' else []
X = df[x_cols].values if x_cols else None
result = first_stage_diagnostics(D, Z, X)
print('First-Stage Diagnostics')
print(result)
"
            ;;
        hausman)
            local csv="$1" y="$2" d="$3" z="$4"
            shift 4 2>/dev/null || true
            local x="$*"
            [ -z "$z" ] && { error "Usage: morie iv-cli hausman <csv> <outcome> <endogenous> <instrument> [controls...]"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import hausman_test
df = pd.read_csv('$csv')
result = hausman_test(df['$y'].values, df[['$d']].values, df[['$z']].values)
print('Hausman Specification Test')
print(result)
"
            ;;
        sargan)
            local csv="$1" y="$2" d="$3" z1="$4" z2="$5"
            [ -z "$z2" ] && { error "Usage: morie iv-cli sargan <csv> <outcome> <endogenous> <instrument1> <instrument2>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import sargan_test, tsls
df = pd.read_csv('$csv')
Z = df[['$z1', '$z2']].values
iv_result = tsls(df['$y'].values, df[['$d']].values, Z)
result = sargan_test(iv_result)
print('Sargan Overidentification Test')
print(result)
"
            ;;
        ar-test|ar-ci)
            local csv="$1" y="$2" d="$3" z="$4"
            [ -z "$z" ] && { error "Usage: morie iv-cli $subcmd <csv> <outcome> <endogenous> <instrument>"; return 1; }
            _require_esml_package
            local func
            case "$subcmd" in
                ar-test) func="anderson_rubin_test" ;;
                ar-ci) func="anderson_rubin_ci" ;;
            esac
            "$PYTHON" -c "
import pandas as pd, numpy as np
from morie.iv import $func
df = pd.read_csv('$csv')
result = $func(df['$y'].values, df[['$d']].values, df[['$z']].values)
print(result)
"
            ;;
        *)
            error "Unknown: morie iv-cli $subcmd"
            ;;
    esac
}

# =========================================================================
# morie effect-cli — Effect sizes from CLI
# =========================================================================

cmd_effect_cli() {
    local subcmd="${1:-help}"
    shift 2>/dev/null || true
    case "$subcmd" in
        help|-h|--help)
            cat << 'EFFECTEOF'
morie effect-cli — Effect sizes from the command line

COMMANDS
    cohens-d <csv> <col> <group>      Cohen's d
    hedges-g <csv> <col> <group>      Hedges' g
    glass-delta <csv> <col> <group>   Glass' delta
    odds-ratio <csv> <col1> <col2>    Odds ratio from 2x2
    risk-ratio <csv> <col1> <col2>    Risk ratio
    nnt <csv> <col1> <col2>           Number needed to treat
    cramers-v <csv> <col1> <col2>     Cramér's V
    eta-sq <csv> <col> <group>        Eta-squared
    meta-fe <csv> <yi> <vi>           Fixed-effects meta-analysis
    meta-re <csv> <yi> <vi>           Random-effects meta-analysis
    convert <from> <to> <value>       Convert between effect sizes

EXAMPLES
    morie effect-cli cohens-d data.csv score treatment
    morie effect-cli nnt data.csv outcome exposure
    morie effect-cli convert d r 0.5
EFFECTEOF
            ;;
        cohens-d|hedges-g|glass-delta|cramers-v|eta-sq)
            local csv="$1" col="$2" group="$3"
            [ -z "$group" ] && { error "Usage: morie effect-cli $subcmd <csv> <col> <group>"; return 1; }
            _require_esml_package
            local func_map
            case "$subcmd" in
                cohens-d) func_map="cohens_d" ;;
                hedges-g) func_map="hedges_g" ;;
                glass-delta) func_map="glass_delta" ;;
                cramers-v) func_map="cramers_v" ;;
                eta-sq) func_map="eta_squared" ;;
            esac
            "$PYTHON" -c "
import pandas as pd
from morie.effect_sizes import $func_map
df = pd.read_csv('$csv')
groups = df['$group'].dropna().unique()
if len(groups) == 2:
    x = df.loc[df['$group']==groups[0], '$col'].dropna().values
    y = df.loc[df['$group']==groups[1], '$col'].dropna().values
    result = $func_map(x, y)
    print(f'$subcmd: {result}')
else:
    print(f'Expected 2 groups, found {len(groups)}')
"
            ;;
        odds-ratio|risk-ratio|nnt)
            local csv="$1" col1="$2" col2="$3"
            [ -z "$col2" ] && { error "Usage: morie effect-cli $subcmd <csv> <col1> <col2>"; return 1; }
            _require_esml_package
            local func_map
            case "$subcmd" in
                odds-ratio) func_map="odds_ratio" ;;
                risk-ratio) func_map="risk_ratio" ;;
                nnt) func_map="number_needed_to_treat" ;;
            esac
            "$PYTHON" -c "
import pandas as pd
from morie.effect_sizes import $func_map
df = pd.read_csv('$csv')
ct = pd.crosstab(df['$col1'], df['$col2'])
a, b, c, d = ct.iloc[0,0], ct.iloc[0,1], ct.iloc[1,0], ct.iloc[1,1]
result = $func_map(a, b, c, d)
print(f'$subcmd: {result}')
"
            ;;
        meta-fe|meta-re)
            local csv="$1" yi="$2" vi="$3"
            [ -z "$vi" ] && { error "Usage: morie effect-cli $subcmd <csv> <effect_col> <variance_col>"; return 1; }
            _require_esml_package
            local func_map
            case "$subcmd" in
                meta-fe) func_map="fixed_effects_meta" ;;
                meta-re) func_map="random_effects_meta" ;;
            esac
            "$PYTHON" -c "
import pandas as pd
from morie.effect_sizes import $func_map
df = pd.read_csv('$csv')
result = $func_map(df['$yi'].values, df['$vi'].values)
print(result)
"
            ;;
        convert)
            local from_type="$1" to_type="$2" value="$3"
            [ -z "$value" ] && { error "Usage: morie effect-cli convert <from> <to> <value>"; return 1; }
            _require_esml_package
            "$PYTHON" -c "
from morie.effect_sizes import d_to_r, r_to_d, d_to_or, or_to_d, d_to_nnt, r_to_or, or_to_r
conversions = {
    ('d','r'): d_to_r, ('r','d'): r_to_d,
    ('d','or'): d_to_or, ('or','d'): or_to_d,
    ('d','nnt'): d_to_nnt,
    ('r','or'): r_to_or, ('or','r'): or_to_r,
}
key = ('$from_type', '$to_type')
if key in conversions:
    result = conversions[key](float('$value'))
    print(f'  {\"$from_type\"} = $value → {\"$to_type\"} = {result:.4f}')
else:
    print(f'  Conversion {\"$from_type\"} → {\"$to_type\"} not available.')
    print(f'  Available: {\", \".join(f\"{a}→{b}\" for a,b in conversions)}')
"
            ;;
        *)
            error "Unknown: morie effect-cli $subcmd"
            ;;
    esac
}

# =========================================================================
# morie repl — Python REPL with morie pre-loaded
# =========================================================================

cmd_repl() {
    _require_esml_package
    info "Starting Python REPL with morie loaded..."
    "$PYTHON" -i -c "
import morie
import pandas as pd
import numpy as np
from morie import *
print(f'morie {morie.__version__} loaded ({len(morie.list_modules())} modules)')
print('Type help(morie) for package help, or dir(morie) to see all exports.')
print()
" "$@"
}

# =========================================================================
# morie run — Quick-run a Python or R script
# =========================================================================

cmd_run() {
    local script="$1"
    [[ -z "$script" ]] && { error "Usage: morie run <script.py|script.R>"; return 1; }
    [[ ! -f "$script" ]] && { error "File not found: $script"; return 1; }
    shift

    local ext="${script##*.}"
    case "$ext" in
        py)
            _require_python
            info "Running Python script: $script"
            _log_event "RUN python: $script"
            "$PYTHON" "$script" "$@"
            ;;
        R|r)
            if ! command -v Rscript &>/dev/null; then
                error "Rscript not found."
                return 1
            fi
            info "Running R script: $script"
            _log_event "RUN R: $script"
            Rscript "$script" "$@"
            ;;
        qmd)
            if ! command -v quarto &>/dev/null; then
                error "Quarto not installed."
                return 1
            fi
            info "Rendering Quarto notebook: $script"
            quarto render "$script" "$@"
            ;;
        sh|bash)
            info "Running shell script: $script"
            bash "$script" "$@"
            ;;
        *)
            error "Unknown file type: .$ext (supported: .py, .R, .qmd, .sh)"
            return 1
            ;;
    esac
}

# =========================================================================
# morie version — Detailed version information
# =========================================================================

cmd_version_full() {
    echo "${BOLD}MORIE Version Details${RESET}"
    echo ""
    printf "  %-25s %s\n" "Shell executive" "$ESML_SCRIPT_VERSION"
    if py="$(_resolve_python 2>/dev/null)"; then
        printf "  %-25s %s\n" "Python package" "$("$py" -c 'import morie; print(morie.__version__)' 2>/dev/null || echo 'not installed')"
        printf "  %-25s %s\n" "Python" "$("$py" --version 2>&1)"
    fi
    if command -v R &>/dev/null; then
        printf "  %-25s %s\n" "R" "$(R --version 2>&1 | head -1 | awk '{print $3}')"
    fi
    if command -v quarto &>/dev/null; then
        printf "  %-25s %s\n" "Quarto" "$(quarto --version 2>&1)"
    fi
    if command -v docker &>/dev/null; then
        printf "  %-25s %s\n" "Docker" "$(docker --version 2>&1 | sed 's/Docker version //')"
    fi
    if command -v ollama &>/dev/null; then
        printf "  %-25s %s\n" "Ollama" "$(ollama --version 2>&1 || echo installed)"
    fi
}

# =========================================================================
# VERSION (fast path)
# =========================================================================
case "${1:-}" in
    --version|-V)
        if py="$(_resolve_python 2>/dev/null)"; then
            ver="$("$py" -c "import morie; print(morie.__version__)" 2>/dev/null)" || ver="dev"
        else
            ver="dev"
        fi
        echo "morie $ver (shell: $ESML_SCRIPT_VERSION)"
        exit 0
        ;;
    --help|-h)
        cmd_help
        exit 0
        ;;
esac

# =========================================================================
# COMMAND DISPATCH
# =========================================================================
CMD="${1:-}"
shift 2>/dev/null || true

case "$CMD" in
    # ── Native commands (no Python) ──────────────────────────────────────
    install)
        cmd_install "$@"
        ;;
    update)
        cmd_update "$@"
        ;;
    config)
        cmd_config "$@"
        ;;
    ollama)
        cmd_ollama "$@"
        ;;
    container)
        cmd_container "$@"
        ;;
    completions)
        cmd_completions "$@"
        ;;
    data)
        cmd_data "$@"
        ;;
    log)
        cmd_log "$@"
        ;;
    backup)
        cmd_backup "$@"
        ;;
    env)
        cmd_env "$@"
        ;;
    test)
        cmd_test "$@"
        ;;
    man)
        cmd_man "$@"
        ;;
    status)
        cmd_status "$@"
        ;;
    doctor-native)
        cmd_doctor_native "$@"
        ;;
    help|"")
        if [[ -z "$CMD" ]]; then
            # No command: if interactive → delegate to Python for TUI/chat,
            # if piped → show help.
            if [[ -t 0 ]] && [[ -t 1 ]]; then
                _require_esml_package
                exec "$PYTHON" -m morie.runner
            else
                cmd_help
                exit 0
            fi
        else
            cmd_help
        fi
        ;;

    # ── Developer commands ─────────────────────────────────────────────
    lint)           cmd_lint "$@" ;;
    check)          cmd_check "$@" ;;
    benchmark)      cmd_benchmark "$@" ;;
    db)             cmd_db "$@" ;;
    tree-view)      cmd_tree_view "$@" ;;
    hash)           cmd_hash "$@" ;;
    size)           cmd_size "$@" ;;
    path)           cmd_path "$@" ;;
    changelog)      cmd_changelog "$@" ;;
    graph)          cmd_graph "$@" ;;
    fmt)            cmd_fmt "$@" ;;
    sec)            cmd_sec "$@" ;;
    sync)           cmd_sync "$@" ;;
    migrate)        cmd_migrate "$@" ;;
    profile-code)   cmd_profile_code "$@" ;;
    serve)          cmd_serve "$@" ;;
    watch)          cmd_watch "$@" ;;
    diff-outputs)   cmd_diff_outputs "$@" ;;
    audit)          cmd_audit "$@" ;;
    catalog)        cmd_catalog "$@" ;;
    schema)         cmd_schema "$@" ;;
    sample-data)    cmd_sample_data "$@" ;;
    validate-data)  cmd_validate_data "$@" ;;
    translate)      cmd_translate "$@" ;;
    health)         cmd_health "$@" ;;
    stat)           cmd_stat "$@" ;;
    causal)         cmd_causal "$@" ;;
    survival)       cmd_survival "$@" ;;
    match-cli)      cmd_match_cli "$@" ;;
    did-cli)        cmd_did_cli "$@" ;;
    rdd-cli)        cmd_rdd_cli "$@" ;;
    iv-cli)         cmd_iv_cli "$@" ;;
    effect-cli)     cmd_effect_cli "$@" ;;

    # ── Native + Python hybrid commands ─────────────────────────────────
    r)
        cmd_r "$@"
        ;;
    quarto)
        cmd_quarto "$@"
        ;;
    ci)
        cmd_ci "$@"
        ;;
    release)
        cmd_release "$@"
        ;;
    deps)
        cmd_deps "$@"
        ;;
    perf)
        cmd_perf "$@"
        ;;
    net)
        cmd_net "$@"
        ;;
    cron)
        cmd_cron "$@"
        ;;
    docs)
        cmd_docs "$@"
        ;;
    init)
        cmd_init "$@"
        ;;
    version)
        cmd_version_full "$@"
        ;;
    clean)
        cmd_clean "$@"
        ;;
    info)
        cmd_info "$@"
        ;;
    repl)
        cmd_repl "$@"
        ;;
    run)
        cmd_run "$@"
        ;;

    # ── Python-delegated commands ────────────────────────────────────────
    doctor|chat|tui|pipeline|list-modules|run-module|run-modules|\
    ask|assistant|agent|profile-dataset|sample|inspect|verify|parity-review|selftest|\
    exec|edit|percysuits|repl|convert-checkpoint|crypto|list-datasets|download-bootstrap|\
    verify-earth-engine|verify-pollution)
        _require_esml_package
        exec "$PYTHON" -m morie.runner "$CMD" "$@"
        ;;

    # ── Unknown command ──────────────────────────────────────────────────
    *)
        error "Unknown command: $CMD"
        echo "" >&2
        echo "  Run 'morie help' for usage." >&2
        exit 1
        ;;
esac
