#!/bin/bash
#
# Docker Health Check Script
#
# Checks the health of critical Docker containers and exits with non-zero
# code if any critical service is unhealthy.
#
# Exit Codes:
#   0 - All critical services healthy
#   1 - One or more critical services unhealthy
#   2 - Docker daemon unreachable
#
# Usage: ./scripts/observability/check-docker-health
#

set -euo pipefail

# Exit code tracking
exit_code=0

# Define critical services (must be running and healthy for system to function)
# Note: PostgreSQL and Kafka/Redpanda run on remote server (192.168.86.200), not locally
critical_services=(
    "archon-intelligence"
    "archon-qdrant"
    "archon-bridge"
)

# Define non-critical services (optional services)
non_critical_services=(
    "archon-search"
    "archon-memgraph"
)

# Colors for terminal output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

# Counters for summary
critical_healthy=0
critical_unhealthy=0
non_critical_healthy=0
non_critical_unhealthy=0

echo "=== Docker Health Check ==="
echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')"
echo ""

# Check if Docker daemon is accessible
if ! docker info &>/dev/null; then
    echo -e "${RED}❌ CRITICAL: Docker daemon is unreachable${NC}"
    echo ""
    echo "=== SUMMARY ==="
    echo -e "${RED}❌ Cannot connect to Docker daemon${NC}"
    echo "   Please ensure Docker is running and you have permission to access it."
    exit 2
fi

echo "=== Critical Services ==="
echo ""

# Check critical services
for service in "${critical_services[@]}"; do
    # Check if container exists
    if ! docker ps -a --filter "name=^${service}$" --format "{{.Names}}" | grep -q "^${service}$"; then
        echo -e "  ${RED}❌ CRITICAL: ${service} (not found)${NC}"
        ((critical_unhealthy++))
        exit_code=1
        continue
    fi

    # Check container status
    status=$(docker inspect --format='{{.State.Status}}' "$service" 2>/dev/null || echo "unknown")

    if [[ "$status" != "running" ]]; then
        echo -e "  ${RED}❌ CRITICAL: ${service} (status: ${status})${NC}"
        ((critical_unhealthy++))
        exit_code=1
        continue
    fi

    # Check health status (if container has healthcheck)
    health=$(docker inspect --format='{{if .State.Health}}{{.State.Health.Status}}{{else}}no-healthcheck{{end}}' "$service" 2>/dev/null || echo "unknown")

    if [[ "$health" == "healthy" ]] || [[ "$health" == "no-healthcheck" ]]; then
        if [[ "$health" == "no-healthcheck" ]]; then
            echo -e "  ${GREEN}✅ ${service} (running, no healthcheck)${NC}"
        else
            echo -e "  ${GREEN}✅ ${service} (healthy)${NC}"
        fi
        ((critical_healthy++))
    elif [[ "$health" == "starting" ]]; then
        echo -e "  ${YELLOW}⏳ ${service} (starting...)${NC}"
        ((critical_healthy++))
    else
        echo -e "  ${RED}❌ CRITICAL: ${service} (health: ${health})${NC}"
        ((critical_unhealthy++))
        exit_code=1
    fi
done

echo ""
echo "=== Non-Critical Services ==="
echo ""

# Check non-critical services
for service in "${non_critical_services[@]}"; do
    # Check if container exists
    if ! docker ps -a --filter "name=^${service}$" --format "{{.Names}}" | grep -q "^${service}$"; then
        echo -e "  ${YELLOW}⚠️  ${service} (not found)${NC}"
        ((non_critical_unhealthy++))
        continue
    fi

    # Check container status
    status=$(docker inspect --format='{{.State.Status}}' "$service" 2>/dev/null || echo "unknown")

    if [[ "$status" != "running" ]]; then
        echo -e "  ${YELLOW}⚠️  ${service} (status: ${status})${NC}"
        ((non_critical_unhealthy++))
        continue
    fi

    # Check health status (if container has healthcheck)
    health=$(docker inspect --format='{{if .State.Health}}{{.State.Health.Status}}{{else}}no-healthcheck{{end}}' "$service" 2>/dev/null || echo "unknown")

    if [[ "$health" == "healthy" ]] || [[ "$health" == "no-healthcheck" ]]; then
        if [[ "$health" == "no-healthcheck" ]]; then
            echo -e "  ${GREEN}✅ ${service} (running, no healthcheck)${NC}"
        else
            echo -e "  ${GREEN}✅ ${service} (healthy)${NC}"
        fi
        ((non_critical_healthy++))
    elif [[ "$health" == "starting" ]]; then
        echo -e "  ${YELLOW}⏳ ${service} (starting...)${NC}"
        ((non_critical_healthy++))
    else
        echo -e "  ${YELLOW}⚠️  ${service} (health: ${health})${NC}"
        ((non_critical_unhealthy++))
    fi
done

echo ""
echo "=== SUMMARY ==="
echo ""

# Critical services summary
total_critical=${#critical_services[@]}
echo -e "${BLUE}Critical Services:${NC} ${critical_healthy}/${total_critical} healthy"

if [[ $critical_unhealthy -gt 0 ]]; then
    echo -e "  ${RED}❌ ${critical_unhealthy} critical service(s) unhealthy${NC}"
fi

# Non-critical services summary
total_non_critical=${#non_critical_services[@]}
echo -e "${BLUE}Non-Critical Services:${NC} ${non_critical_healthy}/${total_non_critical} healthy"

if [[ $non_critical_unhealthy -gt 0 ]]; then
    echo -e "  ${YELLOW}⚠️  ${non_critical_unhealthy} non-critical service(s) unhealthy${NC}"
fi

echo ""

# Final status
if [[ $exit_code -eq 0 ]]; then
    echo -e "${GREEN}✅ All critical services healthy${NC}"
    if [[ $non_critical_unhealthy -gt 0 ]]; then
        echo -e "${YELLOW}⚠️  Some non-critical services have issues (safe to proceed)${NC}"
    fi
else
    echo -e "${RED}❌ One or more critical services unhealthy${NC}"
    echo -e "${RED}   System is NOT ready for use${NC}"
fi

echo ""
echo "=== End Docker Health Check ==="

exit $exit_code
