#!/usr/bin/env python3
"""
Standalone Language Detector Script

This script detects programming languages, versions, and features in code files.
It's designed to work independently of the Typer CLI framework to avoid integration issues.
"""

import asyncio
import json
import sys
import os
from pathlib import Path
import traceback

from rich.console import Console

# Create console for rich output
console = Console()

def display_table_results(result):
    """Display language detection results in a table format."""
    from rich.table import Table
    
    console.print(f"[bold blue]Language Detection Results[/bold blue]")
    console.print(f"Analyzed {result['file_count']} files")
    
    # Display language distribution
    console.print("\n[bold]Project Language Distribution:[/bold]")
    dist_table = Table(show_header=True)
    dist_table.add_column("Language")
    dist_table.add_column("Files")
    dist_table.add_column("Percentage")
    
    for lang, count in result["language_distribution"].items():
        percentage = (count / result["file_count"]) * 100
        dist_table.add_row(
            lang,
            str(count),
            f"{percentage:.1f}%"
        )
    
    console.print(dist_table)
    
    # Display file analysis results
    console.print("\n[bold]File Analysis:[/bold]")
    file_table = Table(show_header=True)
    file_table.add_column("File")
    file_table.add_column("Language")
    file_table.add_column("Confidence")
    file_table.add_column("Version")
    file_table.add_column("Frameworks")
    
    for file_path, analysis in result["file_analyses"].items():
        frameworks = ", ".join([f["name"] for f in analysis.get("frameworks", [])])
        version = analysis.get("version", {}).get("detected", "unknown")
        confidence = analysis.get("confidence", 0)
        confidence_str = f"{confidence * 100:.1f}%" if isinstance(confidence, float) else str(confidence)
        
        file_table.add_row(
            str(file_path),
            analysis.get("language", "unknown"),
            confidence_str,
            version,
            frameworks
        )
    
    console.print(file_table)


def display_markdown_results(result):
    """Display language detection results in markdown format."""
    md_output = "# Language Detection Results\n\n"
    md_output += f"Analyzed {result['file_count']} files\n\n"
    
    # Language distribution
    md_output += "## Project Language Distribution\n\n"
    md_output += "| Language | Files | Percentage |\n"
    md_output += "|----------|-------|------------|\n"
    
    for lang, count in result["language_distribution"].items():
        percentage = (count / result["file_count"]) * 100
        md_output += f"| {lang} | {count} | {percentage:.1f}% |\n"
    
    md_output += "\n## File Analysis\n\n"
    md_output += "| File | Language | Confidence | Version | Frameworks |\n"
    md_output += "|------|----------|------------|---------|------------|\n"
    
    for file_path, analysis in result["file_analyses"].items():
        frameworks = ", ".join([f["name"] for f in analysis.get("frameworks", [])])
        version = analysis.get("version", {}).get("detected", "unknown")
        confidence = analysis.get("confidence", 0)
        confidence_str = f"{confidence * 100:.1f}%" if isinstance(confidence, float) else str(confidence)
        
        md_output += f"| {file_path} | {analysis.get('language', 'unknown')} | {confidence_str} | {version} | {frameworks} |\n"
    
    console.print(md_output)


def detect_language(path, output_format="table", no_llm=False):
    """
    Detect programming languages, versions, and features in code files.
    
    Args:
        path: Path to file or directory to analyze
        output_format: Output format (table, json, markdown)
        no_llm: Disable LLM-based analysis, use only heuristic detection
    """
    try:
        # Process file paths
        file_paths = []
        file_contents = []
        
        file_path = Path(path)
        if file_path.is_dir():
            # Process all files in directory
            console.print(f"[bold]Scanning directory:[/bold] {file_path}")
            file_count = 0
            skipped_count = 0
            
            for path_item in file_path.glob("**/*"):
                if path_item.is_file() and not path_item.name.startswith("."):
                    try:
                        # Skip very large files
                        if path_item.stat().st_size > 1_000_000:  # 1MB
                            console.print(f"[yellow]Skipping large file:[/yellow] {path_item} ({path_item.stat().st_size / 1_000_000:.1f} MB)")
                            skipped_count += 1
                            continue
                            
                        # Skip binary files
                        if is_binary_file(path_item):
                            skipped_count += 1
                            continue
                            
                        content = path_item.read_text(errors="replace")
                        file_paths.append(str(path_item))
                        file_contents.append(content)
                        file_count += 1
                        
                        # Show progress for large directories
                        if file_count % 50 == 0:
                            console.print(f"[bold]Processed[/bold] {file_count} files...")
                            
                    except Exception as e:
                        console.print(f"[bold yellow]Warning: Could not read {path_item}: {str(e)}[/bold yellow]")
                        skipped_count += 1
            
            if skipped_count > 0:
                console.print(f"[yellow]Skipped {skipped_count} files (binary or too large)[/yellow]")
                
        else:
            # Process single file
            try:
                if not file_path.exists():
                    console.print(f"[bold red]Error: File not found: {file_path}[/bold red]")
                    return 1
                
                # Check if file is too large
                if file_path.stat().st_size > 5_000_000:  # 5MB
                    console.print(f"[bold red]Error: File is too large: {file_path} ({file_path.stat().st_size / 1_000_000:.1f} MB)[/bold red]")
                    console.print("Please use a smaller file or analyze specific directories.")
                    return 1
                
                # Check if file is binary
                if is_binary_file(file_path):
                    console.print(f"[bold red]Error: Cannot analyze binary file: {file_path}[/bold red]")
                    return 1
                
                content = file_path.read_text(errors="replace")
                file_paths.append(str(file_path))
                file_contents.append(content)
            except Exception as e:
                console.print(f"[bold red]Error: Could not read {file_path}: {str(e)}[/bold red]")
                return 1
        
        if not file_paths:
            console.print("[bold yellow]No valid files found to analyze.[/bold yellow]")
            return 1
        
        # Create and run the agent
        console.print("[bold]Creating language detector agent...[/bold]")
        from vaahai.core.agents.factory import AgentFactory
        agent = AgentFactory.create_agent("language_detector", {})
        
        console.print(f"[bold]Running language detection on {len(file_paths)} files...[/bold]")
        console.print(f"[bold]LLM-based analysis:[/bold] {'Disabled' if no_llm else 'Enabled'}")
        
        result = asyncio.run(agent.run(file_paths, file_contents, use_llm=not no_llm))
        
        # Display results based on output format
        if output_format == "json":
            console.print(json.dumps(result, indent=2))
        elif output_format == "markdown":
            display_markdown_results(result)
        else:  # Default to table
            display_table_results(result)
        
        return 0
    except ImportError as e:
        console.print(f"[bold red]Error: Missing dependency:[/bold red] {str(e)}")
        console.print("Make sure you have all required dependencies installed:")
        console.print("  pip install vaahai[all]")
        return 1
    except Exception as e:
        console.print(f"[bold red]Error:[/bold red] {str(e)}")
        if os.environ.get("VAAHAI_DEBUG"):
            console.print("\n[bold red]Debug traceback:[/bold red]")
            console.print(traceback.format_exc())
        return 1


def is_binary_file(file_path):
    """Check if a file is binary."""
    try:
        with open(file_path, 'rb') as f:
            chunk = f.read(1024)
            return b'\0' in chunk  # Simple heuristic for binary files
    except Exception:
        return False


def print_help():
    """Print help message."""
    console.print("[bold blue]Vaahai Language Detector[/bold blue]")
    console.print("\nDetect programming languages, versions, and features in code files.")
    console.print("\n[bold]Usage:[/bold]")
    console.print("  vaahai-detect-language PATH [OPTIONS]")
    console.print("\n[bold]Arguments:[/bold]")
    console.print("  PATH                  Path to file or directory to analyze")
    console.print("\n[bold]Options:[/bold]")
    console.print("  --format, -f FORMAT   Output format: table, json, or markdown [default: table]")
    console.print("  --no-llm              Disable LLM-based analysis, use only heuristic detection")
    console.print("  --help, -h            Show this help message and exit")
    console.print("  --debug               Enable debug mode (shows detailed error traceback)")
    console.print("\n[bold]Examples:[/bold]")
    console.print("  vaahai-detect-language app.py")
    console.print("  vaahai-detect-language src/ --format json")
    console.print("  vaahai-detect-language file.js --no-llm")


def main():
    """Main entry point for the standalone detect-language command."""
    # Parse command line arguments
    args = sys.argv[1:]
    
    # Check for debug mode
    if "--debug" in args:
        os.environ["VAAHAI_DEBUG"] = "1"
        args.remove("--debug")
    
    if not args or "--help" in args or "-h" in args:
        print_help()
        return 0
    
    # Extract path (first non-option argument)
    path = None
    output_format = "table"
    no_llm = False
    
    i = 0
    while i < len(args):
        if args[i].startswith("-"):
            if args[i] in ["--format", "-f"]:
                if i + 1 < len(args):
                    output_format = args[i + 1]
                    i += 2
                else:
                    console.print("[bold red]Error: Missing value for --format option[/bold red]")
                    return 1
            elif args[i] == "--no-llm":
                no_llm = True
                i += 1
            else:
                console.print(f"[bold red]Error: Unknown option {args[i]}[/bold red]")
                return 1
        else:
            if path is None:
                path = args[i]
                i += 1
            else:
                console.print("[bold red]Error: Multiple paths specified[/bold red]")
                return 1
    
    if path is None:
        console.print("[bold red]Error: No path specified[/bold red]")
        return 1
    
    return detect_language(path, output_format, no_llm)


if __name__ == "__main__":
    try:
        main()
    except KeyboardInterrupt:
        console.print("\n[bold yellow]Operation cancelled by user[/bold yellow]")
        sys.exit(1)
    except Exception as e:
        if "--debug" in sys.argv:
            console.print(f"[bold red]Error:[/bold red] {str(e)}")
            console.print("[bold red]Traceback:[/bold red]")
            import traceback
            console.print(traceback.format_exc())
        else:
            console.print(f"[bold red]Error:[/bold red] {str(e)}")
            console.print("[yellow]Run with --debug for more information[/yellow]")
        sys.exit(1)
