#!/bin/bash
set -euo pipefail

# ============================================================================
# PR Comment Analyzer
# ============================================================================
# Pre-processes raw PR data from fetch-pr-data to produce categorized,
# structured analysis that agents can consume directly without jq parsing.
#
# Features:
#   - Pre-categorizes by severity (CRITICAL/MAJOR/MINOR/NITPICK)
#   - Extracts file:line references
#   - Tracks status (addressed vs unaddressed)
#   - Parses structured bot review sections
#   - Assigns unique IDs for tracking
#
# Usage:
#   fetch-pr-data 36 | analyze-pr-comments
#   analyze-pr-comments pr_data.json
#   fetch-pr-data 36 | analyze-pr-comments > categorized.json
# ============================================================================

# Colors for stderr output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'

print_status() {
    echo -e "${BLUE}[ANALYZE]${NC} $1" >&2
}

print_success() {
    echo -e "${GREEN}✓${NC} $1" >&2
}

print_error() {
    echo -e "${RED}✗${NC} $1" >&2
}

usage() {
    cat << 'EOF'
Usage: analyze-pr-comments [FILE]

Pre-processes raw PR data from fetch-pr-data into categorized analysis.

Input:
  FILE    Optional JSON file from fetch-pr-data (default: stdin)

Output:
  Structured JSON with pre-categorized issues and metadata

Examples:
  # From stdin
  fetch-pr-data 36 | analyze-pr-comments

  # From file
  analyze-pr-comments pr_data.json

  # Pipeline
  fetch-pr-data 36 | analyze-pr-comments > analysis.json

Output Structure:
  {
    "pr_number": 36,
    "analysis_timestamp": "2025-11-17T...",
    "last_commit_sha": "abc123...",
    "categorized_issues": {
      "critical": [...],
      "major": [...],
      "minor": [...],
      "nitpicks": [...]
    },
    "summary": {
      "total_critical": 4,
      "total_major": 16,
      ...
    },
    "structured_bot_reviews": [...]
  }

Exit codes:
  0 - Success
  1 - Invalid input or missing dependencies
EOF
    exit 1
}

# Check dependencies
check_dependencies() {
    local missing_deps=()

    if ! command -v jq &> /dev/null; then
        missing_deps+=("jq")
    fi

    if ! command -v gh &> /dev/null; then
        missing_deps+=("gh (GitHub CLI)")
    fi

    if [ ${#missing_deps[@]} -ne 0 ]; then
        print_error "Missing required dependencies:"
        for dep in "${missing_deps[@]}"; do
            echo "  - $dep" >&2
        done
        echo "" >&2
        echo "Install with:" >&2
        echo "  brew install gh jq" >&2
        exit 1
    fi
}

# Get last commit SHA and timestamp for PR
get_last_commit_info() {
    local pr_number="$1"
    local repo_name

    repo_name=$(gh repo view --json nameWithOwner -q .nameWithOwner 2>/dev/null || echo "")

    if [[ -z "$repo_name" ]]; then
        print_error "Failed to get repository name" >&2
        echo '{"sha": "unknown", "timestamp": 0}'
        return
    fi

    # Get last commit on PR
    local commit_info
    commit_info=$(gh api "repos/$repo_name/pulls/$pr_number/commits" \
        --jq '[.[-1] | {sha: .sha, timestamp: .commit.committer.date}] | .[0]' 2>/dev/null || echo '{"sha": "unknown", "timestamp": 0}')

    echo "$commit_info"
}

# Convert ISO timestamp to Unix epoch
iso_to_epoch() {
    local iso_time="$1"

    # Handle different date command syntax (macOS vs Linux)
    if date --version &>/dev/null 2>&1; then
        # GNU date (Linux)
        date -d "$iso_time" +%s 2>/dev/null || echo 0
    else
        # BSD date (macOS)
        date -j -f "%Y-%m-%dT%H:%M:%SZ" "$iso_time" "+%s" 2>/dev/null || echo 0
    fi
}

# Classify priority based on keywords and structured sections
classify_priority() {
    local body="$1"
    local body_lower=$(echo "$body" | tr '[:upper:]' '[:lower:]')

    # Check for structured bot sections first (highest priority)
    if echo "$body_lower" | grep -qE "(must fix before merge|blocking issue|🔴)"; then
        echo "CRITICAL"
        return
    fi

    if echo "$body_lower" | grep -qE "(should fix|major issue|🟠)"; then
        echo "MAJOR"
        return
    fi

    if echo "$body_lower" | grep -qE "(nice to have|suggestion|🟡)"; then
        echo "MINOR"
        return
    fi

    # Keyword-based classification
    if echo "$body_lower" | grep -qE "(critical|security|vulnerability|data loss|crash|breaking change|test failure|fails)"; then
        echo "CRITICAL"
        return
    fi

    if echo "$body_lower" | grep -qE "(major|bug|error|incorrect|performance|architecture|inconsistent|missing test)"; then
        echo "MAJOR"
        return
    fi

    if echo "$body_lower" | grep -qE "(nit|nitpick|consider|suggestion|optional|nice to have|style|formatting)"; then
        echo "NITPICK"
        return
    fi

    # Default to MINOR
    echo "MINOR"
}

# Extract file:line references from comment body
extract_file_references() {
    local body="$1"

    # Pattern: [file.py:123] or file.py:123 or `file.py:123`
    echo "$body" | grep -oE '\[?[a-zA-Z0-9_/.+-]+\.(py|js|ts|yaml|yml|json|sh|md):[0-9]+\]?' | \
        sed 's/\[//g' | sed 's/\]//g' | head -n 1 || echo ""
}

# Determine if issue has been addressed based on timestamps
get_issue_status() {
    local comment_created_at="$1"
    local last_commit_timestamp="$2"

    # Convert to epoch times
    local comment_epoch=$(iso_to_epoch "$comment_created_at")
    local commit_epoch=$(iso_to_epoch "$last_commit_timestamp")

    # If commit timestamp is unknown or zero, assume unaddressed
    if [[ $commit_epoch -eq 0 ]]; then
        echo "unaddressed"
        return
    fi

    # If comment was created before last commit, potentially addressed
    if [[ $comment_epoch -lt $commit_epoch ]]; then
        echo "potentially_addressed"
    else
        echo "unaddressed"
    fi
}

# Parse structured bot review sections
parse_bot_sections() {
    local body="$1"

    # Extract structured sections using markers
    local must_fix=$(echo "$body" | sed -n '/Must Fix Before Merge/,/^##/p' | grep -v '^##' || echo "")
    local should_fix=$(echo "$body" | sed -n '/Should Fix/,/^##/p' | grep -v '^##' || echo "")
    local nice_to_have=$(echo "$body" | sed -n '/Nice to Have/,/^##/p' | grep -v '^##' || echo "")

    jq -n \
        --arg must_fix "$must_fix" \
        --arg should_fix "$should_fix" \
        --arg nice_to_have "$nice_to_have" \
        '{
            must_fix: ($must_fix | if . == "" then null else . end),
            should_fix: ($should_fix | if . == "" then null else . end),
            nice_to_have: ($nice_to_have | if . == "" then null else . end)
        }'
}

# Process a single comment into structured issue
process_comment() {
    local comment="$1"
    local comment_type="$2"
    local issue_id_prefix="$3"
    local issue_counter="$4"
    local last_commit_timestamp="$5"

    local author=$(echo "$comment" | jq -r '.author')
    local body=$(echo "$comment" | jq -r '.body')
    local created_at=$(echo "$comment" | jq -r '.created_at // .submitted_at // ""')
    local path=$(echo "$comment" | jq -r '.path // null')
    local line=$(echo "$comment" | jq -r '.line // null')

    # Classify priority
    local severity=$(classify_priority "$body")

    # Extract file reference if not already present
    local file_ref=""
    if [[ "$path" == "null" ]]; then
        file_ref=$(extract_file_references "$body")
        if [[ -n "$file_ref" ]]; then
            path=$(echo "$file_ref" | cut -d':' -f1)
            line=$(echo "$file_ref" | cut -d':' -f2)
        fi
    fi

    # Determine status
    local status=$(get_issue_status "$created_at" "$last_commit_timestamp")

    # Parse structured sections for bot comments
    local structured_sections="{}"
    if [[ "$author" == *"bot"* ]] || [[ "$author" == "claude-code"* ]]; then
        structured_sections=$(parse_bot_sections "$body")
    fi

    # Generate unique ID
    local issue_id="${issue_id_prefix}_${issue_counter}"

    # Extract title (first line or first 80 chars)
    local title=$(echo "$body" | head -n 1 | cut -c 1-80)

    # Build issue object
    jq -n \
        --arg id "$issue_id" \
        --arg source "$comment_type" \
        --arg author "$author" \
        --arg severity "$severity" \
        --arg title "$title" \
        --arg description "$body" \
        --arg file "$path" \
        --arg line "$line" \
        --arg status "$status" \
        --arg created_at "$created_at" \
        --argjson structured "$structured_sections" \
        '{
            id: $id,
            source: $source,
            author: $author,
            severity: $severity,
            title: $title,
            description: $description,
            file: (if $file == "null" then null else $file end),
            line: (if $line == "null" then null else ($line | tonumber) end),
            status: $status,
            created_at: $created_at,
            structured_sections: $structured
        }'
}

# Main analysis function
analyze_pr_data() {
    local pr_data="$1"

    # Extract PR metadata
    local pr_number=$(echo "$pr_data" | jq -r '.pr_number')
    local repository=$(echo "$pr_data" | jq -r '.repository')

    print_status "Analyzing PR #${pr_number}..."

    # Get last commit info
    print_status "Fetching last commit information..."
    local commit_info=$(get_last_commit_info "$pr_number")
    local last_commit_sha=$(echo "$commit_info" | jq -r '.sha')
    local last_commit_timestamp=$(echo "$commit_info" | jq -r '.timestamp')

    print_success "Last commit: ${last_commit_sha:0:8}"

    # Initialize categorized arrays
    local critical_issues=()
    local major_issues=()
    local minor_issues=()
    local nitpick_issues=()
    local bot_reviews=()

    local issue_counter=0

    # Process all comment types
    print_status "Processing comments from 4 endpoints..."

    for comment_type in "reviews" "inline_comments" "pr_comments" "issue_comments"; do
        local comments=$(echo "$pr_data" | jq -c ".${comment_type}[]" 2>/dev/null || echo "")

        while IFS= read -r comment; do
            [[ -z "$comment" ]] && continue

            ((issue_counter++))

            # Process comment into structured issue
            local issue=$(process_comment "$comment" "$comment_type" "issue" "$issue_counter" "$last_commit_timestamp")

            # Get severity
            local severity=$(echo "$issue" | jq -r '.severity')

            # Categorize by severity
            case "$severity" in
                CRITICAL)
                    critical_issues+=("$issue")
                    ;;
                MAJOR)
                    major_issues+=("$issue")
                    ;;
                MINOR)
                    minor_issues+=("$issue")
                    ;;
                NITPICK)
                    nitpick_issues+=("$issue")
                    ;;
            esac

            # Track bot reviews separately
            local author=$(echo "$issue" | jq -r '.author')
            if [[ "$author" == *"bot"* ]] || [[ "$author" == "claude-code"* ]]; then
                local sections=$(echo "$issue" | jq -r '.structured_sections')
                if [[ "$sections" != "null" ]] && [[ "$sections" != "{}" ]]; then
                    bot_reviews+=("$(jq -n --arg author "$author" --argjson sections "$sections" '{author: $author, sections: $sections}')")
                fi
            fi
        done <<< "$comments"
    done

    print_success "Processed $issue_counter total comments"

    # Count unaddressed issues
    local unaddressed_critical=0
    local unaddressed_major=0
    local unaddressed_minor=0

    for issue in "${critical_issues[@]+"${critical_issues[@]}"}"; do
        local status=$(echo "$issue" | jq -r '.status')
        [[ "$status" == "unaddressed" ]] && ((unaddressed_critical++))
    done

    for issue in "${major_issues[@]+"${major_issues[@]}"}"; do
        local status=$(echo "$issue" | jq -r '.status')
        [[ "$status" == "unaddressed" ]] && ((unaddressed_major++))
    done

    for issue in "${minor_issues[@]+"${minor_issues[@]}"}"; do
        local status=$(echo "$issue" | jq -r '.status')
        [[ "$status" == "unaddressed" ]] && ((unaddressed_minor++))
    done

    # Build final JSON output
    print_status "Building structured output..."

    jq -n \
        --arg pr_number "$pr_number" \
        --arg repository "$repository" \
        --arg analysis_timestamp "$(date -u +"%Y-%m-%dT%H:%M:%SZ")" \
        --arg last_commit_sha "$last_commit_sha" \
        --arg last_commit_timestamp "$last_commit_timestamp" \
        --argjson critical "$(printf '%s\n' "${critical_issues[@]+"${critical_issues[@]}"}" | jq -s '.' 2>/dev/null || echo '[]')" \
        --argjson major "$(printf '%s\n' "${major_issues[@]+"${major_issues[@]}"}" | jq -s '.' 2>/dev/null || echo '[]')" \
        --argjson minor "$(printf '%s\n' "${minor_issues[@]+"${minor_issues[@]}"}" | jq -s '.' 2>/dev/null || echo '[]')" \
        --argjson nitpicks "$(printf '%s\n' "${nitpick_issues[@]+"${nitpick_issues[@]}"}" | jq -s '.' 2>/dev/null || echo '[]')" \
        --argjson bot_reviews "$(printf '%s\n' "${bot_reviews[@]+"${bot_reviews[@]}"}" | jq -s '.' 2>/dev/null || echo '[]')" \
        --argjson unaddressed_critical "$unaddressed_critical" \
        --argjson unaddressed_major "$unaddressed_major" \
        --argjson unaddressed_minor "$unaddressed_minor" \
        '{
            pr_number: ($pr_number | tonumber),
            repository: $repository,
            analysis_timestamp: $analysis_timestamp,
            last_commit: {
                sha: $last_commit_sha,
                timestamp: $last_commit_timestamp
            },
            categorized_issues: {
                critical: $critical,
                major: $major,
                minor: $minor,
                nitpicks: $nitpicks
            },
            summary: {
                total_critical: ($critical | length),
                total_major: ($major | length),
                total_minor: ($minor | length),
                total_nitpicks: ($nitpicks | length),
                total_all: (($critical | length) + ($major | length) + ($minor | length) + ($nitpicks | length)),
                total_actionable: (($critical | length) + ($major | length) + ($minor | length)),
                unaddressed_critical: $unaddressed_critical,
                unaddressed_major: $unaddressed_major,
                unaddressed_minor: $unaddressed_minor
            },
            structured_bot_reviews: $bot_reviews
        }'

    print_success "Analysis complete" >&2
}

# Main
main() {
    # Check for help flag
    if [[ "${1:-}" == "-h" ]] || [[ "${1:-}" == "--help" ]]; then
        usage
    fi

    check_dependencies

    # Read input (from file or stdin)
    local pr_data
    if [[ $# -gt 0 ]]; then
        # From file
        if [[ ! -f "$1" ]]; then
            print_error "File not found: $1"
            exit 1
        fi
        pr_data=$(cat "$1")
    else
        # From stdin
        pr_data=$(cat)
    fi

    # Validate JSON
    if ! echo "$pr_data" | jq empty 2>/dev/null; then
        print_error "Invalid JSON input"
        exit 1
    fi

    # Analyze and output
    analyze_pr_data "$pr_data"
}

main "$@"
