#!/usr/bin/env python
"""
Pre-Commit Hook: Multi-Layer Security Validation
Verhindert:
- GitHub Actions workflows (.github/workflows/*)
- Commit von obfuscated/compiled Files (.pyo, .pyd, .so)
- Hardcoded Secrets (API Keys, Tokens)
- Large Binary Files
- Debugging Code (breakpoint(), pdb)

Note: Git Hooks on Windows use Git's bundled Python or system Python.
Configure with: git config core.hooksPath .githooks
Python: Requires Python 3.8+ in PATH or use 'py' launcher
"""
import re
import subprocess
import sys
from pathlib import Path

# Konfiguration
MAX_FILE_SIZE_MB = 5
BLOCKED_EXTENSIONS = [".pyo", ".pyd", ".so", ".dll", ".dylib"]
BLOCKED_PATHS = [
    ".github/workflows/",  # GitHub Actions workflows
]
SECRET_PATTERNS = [
    r"sk-[a-zA-Z0-9]{32,}",  # OpenAI API Keys
    r"ghp_[a-zA-Z0-9]{36}",  # GitHub Personal Access Tokens
    r"ghs_[a-zA-Z0-9]{36}",  # GitHub OAuth Tokens
    r"AKIA[0-9A-Z]{16}",  # AWS Access Keys
    r"api[_-]?key['\"]?\s*[:=]\s*['\"]?[a-zA-Z0-9]{32,}",  # Generic API Keys
    r"password['\"]?\s*[:=]\s*(['\"])[^'\"]{8,}\1",  # Passwords (hardcoded literals only)
]
DEBUG_PATTERNS = [
    r"breakpoint\(\)",
    r"import\s+pdb",
    r"pdb\.set_trace\(\)",
    r"print\(['\"]DEBUG",
]


class PreCommitError(Exception):
    """Custom exception for pre-commit failures"""

    pass


def get_staged_files() -> list[str]:
    """Get list of staged files"""
    result = subprocess.run(
        ["git", "diff", "--cached", "--name-only", "--diff-filter=ACM"],
        capture_output=True,
        text=True,
        check=True,
        encoding="utf-8",
        errors="replace",
    )
    return [f for f in result.stdout.strip().split("\n") if f]


def check_file_extension(file_path: str) -> tuple[bool, str]:
    """Check if file has blocked extension"""
    path = Path(file_path)
    if path.suffix in BLOCKED_EXTENSIONS:
        return False, f"Blocked extension {path.suffix}"
    return True, ""


def check_blocked_path(file_path: str) -> tuple[bool, str]:
    """Check if file is in a blocked directory"""
    normalized_path = file_path.replace("\\", "/")
    for blocked_path in BLOCKED_PATHS:
        if normalized_path.startswith(blocked_path):
            return False, "GitHub Actions workflows are not allowed in this repository"
    return True, ""


def check_file_size(file_path: str) -> tuple[bool, str]:
    """Check if file exceeds size limit"""
    try:
        size_mb = Path(file_path).stat().st_size / (1024 * 1024)
        if size_mb > MAX_FILE_SIZE_MB:
            return False, f"File too large: {size_mb:.2f} MB (max {MAX_FILE_SIZE_MB} MB)"
    except FileNotFoundError:
        pass  # File deleted, skip check
    return True, ""


def check_secrets(file_path: str) -> tuple[bool, list[str]]:
    """Check file for hardcoded secrets"""
    try:
        with open(file_path, encoding="utf-8", errors="ignore") as f:
            content = f.read()

        violations = []
        for pattern in SECRET_PATTERNS:
            matches = re.finditer(pattern, content, re.IGNORECASE)
            for match in matches:
                line_num = content[: match.start()].count("\n") + 1
                violations.append(f"Line {line_num}: Possible secret detected")

        return len(violations) == 0, violations
    except Exception:
        return True, []  # Skip on error


def check_debug_code(file_path: str) -> tuple[bool, list[str]]:
    """Check for debugging code"""
    if not file_path.endswith(".py"):
        return True, []

    try:
        # Read staged content from git index (not filesystem)
        import subprocess

        result = subprocess.run(
            ["git", "show", f":{file_path}"],
            capture_output=True,
            text=True,
            check=False,
            encoding="utf-8",
            errors="replace",
        )

        if result.returncode != 0:
            # File might be new, fallback to filesystem
            with open(file_path, encoding="utf-8") as f:
                content = f.read()
        else:
            content = result.stdout

        violations = []
        for pattern in DEBUG_PATTERNS:
            matches = re.finditer(pattern, content)
            for match in matches:
                line_num = content[: match.start()].count("\n") + 1
                violations.append(f"Line {line_num}: Debug code found: {match.group()}")

        return len(violations) == 0, violations
    except Exception:
        return True, []


def main():
    """Main pre-commit validation"""
    print("🔒 Running Pre-Commit Security Checks...\n")

    staged_files = get_staged_files()
    if not staged_files:
        print("✅ No staged files to check")
        return 0

    all_violations = []

    for file_path in staged_files:
        violations = []

        # Check 1: Blocked Paths
        ok, msg = check_blocked_path(file_path)
        if not ok:
            violations.append(f"  🚫 {msg}")

        # Check 2: File Extension
        ok, msg = check_file_extension(file_path)
        if not ok:
            violations.append(f"  ❌ {msg}")

        # Check 3: File Size
        ok, msg = check_file_size(file_path)
        if not ok:
            violations.append(f"  ⚠️  {msg}")

        # Check 4: Secrets
        ok, msgs = check_secrets(file_path)
        if not ok:
            for msg in msgs:
                violations.append(f"  🔐 {msg}")

        # Check 5: Debug Code
        ok, msgs = check_debug_code(file_path)
        if not ok:
            for msg in msgs:
                violations.append(f"  🐛 {msg}")

        if violations:
            all_violations.append(f"\n📄 {file_path}:")
            all_violations.extend(violations)

    if all_violations:
        print("🔒 PRE-COMMIT SECURITY CHECK FAILED\n")
        print("\n".join(all_violations))
        print("\n💡 Tips:")
        print("  - Remove .pyo/.pyd files: git restore --staged <file>")
        print("  - Remove secrets: Use environment variables or SecretStorage")
        print("  - Remove debug code: Search for breakpoint(), pdb, print(DEBUG)")
        print("  - Reduce file size: Compress or exclude from repo")
        return 1

    print("✅ All Pre-Commit Security Checks Passed")
    return 0


if __name__ == "__main__":
    try:
        sys.exit(main())
    except Exception as e:
        print(f"❌ Pre-Commit Hook Error: {e}")
        sys.exit(1)
