#!/bin/bash
# Pre-commit hook - Secrets Scanner
# Prevents committing API keys, tokens, and other sensitive data
# This hook scans staged files for common secret patterns

set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

echo -e "${BLUE}🔍 Scanning for secrets and sensitive data...${NC}"
echo ""

# Common secret patterns to detect
declare -A PATTERNS=(
    ["Anthropic API Key"]='sk-ant-api[0-9]{2}-[a-zA-Z0-9_-]{93,}'
    ["OpenAI API Key"]='sk-[a-zA-Z0-9]{32,}'
    ["OpenRouter API Key"]='sk-or-v1-[a-zA-Z0-9]{64,}'
    ["Google API Key"]='AIza[0-9A-Za-z-_]{35}'
    ["AWS Access Key"]='AKIA[0-9A-Z]{16}'
    ["AWS Secret Key"]='aws_secret_access_key.*[0-9a-zA-Z/+=]{40}'
    ["GitHub Token"]='ghp_[a-zA-Z0-9]{36}'
    ["GitHub OAuth"]='gho_[a-zA-Z0-9]{36}'
    ["Generic API Token"]='[a-f0-9]{32,64}'
    ["Bearer Token"]='Bearer [a-zA-Z0-9_\-\.=]{20,}'
    ["Basic Auth"]='Basic [A-Za-z0-9+/=]{20,}'
    ["Private Key Header"]='-----BEGIN (RSA |EC |OPENSSH )?PRIVATE KEY-----'
    ["Hardcoded Password"]='(password|passwd|pwd)\s*[:=]\s*["\x27][^\x27"]{8,}["\x27]'
    ["Database Connection"]='(mongodb|mysql|postgres):\/\/[^:]+:[^@]+@'
    ["JWT Token"]='eyJ[a-zA-Z0-9_-]+\.eyJ[a-zA-Z0-9_-]+\.[a-zA-Z0-9_-]+'
)

# Files and patterns to always allow (whitelist)
WHITELIST_FILES=(
    "secrets_template.json"
    "example.env"
    ".env.example"
    "test_fixtures"
    "mock_data"
)

# Track if secrets found
secrets_found=0
findings=()

# Get all staged files
staged_files=$(git diff --cached --name-only --diff-filter=ACM)

# Skip if no files staged
if [ -z "$staged_files" ]; then
    echo -e "${GREEN}✓ No files to scan${NC}"
    exit 0
fi

# Check each pattern against staged changes
for name in "${!PATTERNS[@]}"; do
    pattern="${PATTERNS[$name]}"

    # Search in staged diff
    matches=$(git diff --cached | grep -E -i "$pattern" || true)

    if [ -n "$matches" ]; then
        # Check if it's in a whitelisted file
        is_whitelisted=false
        for whitelist_file in "${WHITELIST_FILES[@]}"; do
            if echo "$staged_files" | grep -q "$whitelist_file"; then
                # Verify the match is actually in the whitelisted file
                if git diff --cached "$whitelist_file" 2>/dev/null | grep -E -i "$pattern" >/dev/null; then
                    is_whitelisted=true
                    break
                fi
            fi
        done

        if [ "$is_whitelisted" = false ]; then
            secrets_found=1
            findings+=("$name")
        fi
    fi
done

# Also check for common secret filenames being added
secret_filenames=(
    "secrets.json"
    ".env"
    "credentials.json"
    "service-account.json"
    "id_rsa"
    "id_dsa"
    "id_ecdsa"
    "id_ed25519"
    ".aws/credentials"
    ".ssh/id_"
)

for filename in "${secret_filenames[@]}"; do
    if echo "$staged_files" | grep -q "$filename"; then
        # Skip if it's a whitelisted variant
        skip=false
        for whitelist in "${WHITELIST_FILES[@]}"; do
            if echo "$filename" | grep -q "$whitelist"; then
                skip=true
                break
            fi
        done

        if [ "$skip" = false ]; then
            secrets_found=1
            findings+=("Sensitive filename: $filename")
        fi
    fi
done

# Report findings
if [ $secrets_found -eq 1 ]; then
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo -e "${RED}❌ SECURITY WARNING: Possible secrets detected!${NC}"
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    echo ""
    echo -e "${YELLOW}Found potential secrets:${NC}"
    for finding in "${findings[@]}"; do
        echo -e "  ${RED}•${NC} $finding"
    done
    echo ""
    echo -e "${YELLOW}Recommended actions:${NC}"
    echo "  1. Remove the secret from your code"
    echo "  2. Store in secrets.json (which is gitignored)"
    echo "  3. Use environment variables for sensitive data"
    echo "  4. Check AgentUsage/secrets_management.md for guidance"
    echo ""
    echo -e "${YELLOW}If this is a false positive:${NC}"
    echo "  • Add file to WHITELIST_FILES in this hook"
    echo "  • Use git commit --no-verify (NOT recommended)"
    echo ""
    echo -e "${RED}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
    exit 1
fi

echo -e "${GREEN}✓ No secrets detected in staged files${NC}"
echo ""
exit 0
