"""Test cases for updated registry client with new API response structure.

Tests based on actual API response samples provided by API team (Feb 2026).
"""

import pytest
from unittest.mock import Mock, patch
from mca_sdk.registry.client import RegistryClient
from mca_sdk.registry.models import ModelConfig
from mca_sdk.utils.exceptions import RegistryError


class TestRegistryClientUpdatedAPI:
    """Test registry client with new API response structure."""

    def test_parse_internal_regression_model(self):
        """Test parsing Sample 1: Internal regression model (equipment failure)."""
        # Sample response from API team
        api_response = {
            "model_id": "550e8400-e29b-41d4-a716-446655440000",
            "service_name": "equipment-failure-predictor",
            "team_name": "ops-engineering",
            "model_category": "internal",
            "model_type": "regression",
            "model_version": "4.2.0",
            "registry_id": "equipment-pred-v4",
            "deployment_id": "aws-lambda-prod-xyz",
            "thresholds": {
                "MAE": 12.5,
                "RMSE": 18.3,
                "r_squared": 0.82
            },
            "metadata_fields": {},  # Empty dict (no sensitive data)
            "config": None,  # Can be null
            "created_at": "2023-12-05T14:30:00.000000Z",
            "updated_at": "2024-02-10T08:22:15.456789Z"
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("550e8400-e29b-41d4-a716-446655440000")
            
            # Verify core fields
            assert config.model_id == "550e8400-e29b-41d4-a716-446655440000"
            assert config.service_name == "equipment-failure-predictor"
            assert config.team_name == "ops-engineering"
            assert config.model_category == "internal"
            assert config.model_type == "regression"
            
            # Verify model_version at top level
            assert config.model_version == "4.2.0"
            
            # Verify optional identifiers
            assert config.registry_id == "equipment-pred-v4"
            assert config.deployment_id == "aws-lambda-prod-xyz"
            
            # Verify thresholds (top-level, not nested)
            assert config.thresholds == {"MAE": 12.5, "RMSE": 18.3, "r_squared": 0.82}
            
            # Verify metadata_fields (empty dict)
            assert config.metadata_fields == {}
            
            # Verify extra_resource (empty when config is null)
            assert config.extra_resource == {}
            
            # Verify timestamps
            assert config.created_at == "2023-12-05T14:30:00.000000Z"
            assert config.updated_at == "2024-02-10T08:22:15.456789Z"

    def test_parse_vendor_generative_model(self):
        """Test parsing Sample 2: Vendor generative model (clinical notes summarizer)."""
        api_response = {
            "model_id": "9876fedc-ba98-4321-0fed-cba987654321",
            "service_name": "clinical-notes-summarizer",
            "team_name": "ai-platform",
            "model_category": "vendor",
            "model_type": "generative",
            "model_version": "3.2.1",
            "registry_id": None,  # Null for vendor models
            "deployment_id": None,  # Null for vendor models
            "thresholds": {
                "hallucination_rate": 0.03,
                "safety_score": 0.95,
                "rag_retrieval_precision": 0.9,
                "rag_retrieval_recall": 0.85,
                "pii_leakage_detection": 0.01,
                "rag_knowledge_base_freshness": 7.0
            },
            "metadata_fields": {  # Nested structure with criticality
                "user_ecd": {"criticality": "yes"},
                "business_notes": {"criticality": "yes"}
            },
            "config": {  # JSONB object
                "extra_resource": {  # Nested in config
                    "deployment_env": "stage",
                    "vendor_name": "openai",
                    "model_name": "gpt-4",
                    "api_version": "2024-02-01",
                    "max_tokens": "2000",  # String, not int
                    "temperature": "0.3"  # String, not float
                }
            },
            "created_at": "2024-02-01T12:00:00.000000Z",
            "updated_at": "2024-02-23T10:30:45.123456Z"
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("9876fedc-ba98-4321-0fed-cba987654321")
            
            # Verify core fields
            assert config.model_category == "vendor"
            assert config.model_type == "generative"
            assert config.model_version == "3.2.1"
            
            # Verify null identifiers for vendor models
            assert config.registry_id is None
            assert config.deployment_id is None
            
            # Verify GenAI-specific thresholds
            assert config.thresholds["hallucination_rate"] == 0.03
            assert config.thresholds["safety_score"] == 0.95
            
            # Verify nested metadata_fields structure
            assert "user_ecd" in config.metadata_fields
            assert "business_notes" in config.metadata_fields
            assert config.metadata_fields["user_ecd"]["criticality"] == "yes"
            
            # Verify extra_resource (nested in config)
            assert config.extra_resource["deployment_env"] == "stage"
            assert config.extra_resource["vendor_name"] == "openai"
            assert config.extra_resource["model_name"] == "gpt-4"
            # Values are strings (not typed)
            assert config.extra_resource["max_tokens"] == "2000"
            assert config.extra_resource["temperature"] == "0.3"

    def test_parse_internal_timeseries_model(self):
        """Test parsing Sample 3: Internal time-series model (revenue forecaster)."""
        api_response = {
            "model_id": "f7e8d9c0-b1a2-4567-8901-234567890def",
            "service_name": "monthly-revenue-forecaster",
            "team_name": "finance-ml",
            "model_category": "internal",
            "model_type": "time-series",
            "model_version": "1.5.2",
            "registry_id": "revenue-forecast-v1",
            "deployment_id": "sagemaker-endpoint-12345",
            "thresholds": {
                "MAE": 5000.0,
                "RMSE": 7500.0,
                "MAPE": 0.08,
                "missing_data_rate": 0.02,
                "outlier_rate": 0.05,
                "feature_drift": 0.12
            },
            "metadata_fields": {},
            "config": {
                "association_id_column": "forecast_run_id",  # Time-series specific
                "extra_resource": {
                    "deployment_env": "prod",
                    "region": "us-west-2",
                    "forecast_horizon": "30",
                    "retraining_frequency": "monthly"
                }
            },
            "created_at": "2024-01-10T08:15:22.456789Z",
            "updated_at": "2024-02-18T16:45:12.345678Z"
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("f7e8d9c0-b1a2-4567-8901-234567890def")
            
            # Verify time-series specific fields
            assert config.model_type == "time-series"
            assert config.association_id_column == "forecast_run_id"
            
            # Verify time-series thresholds
            assert config.thresholds["MAE"] == 5000.0
            assert config.thresholds["MAPE"] == 0.08
            assert config.thresholds["feature_drift"] == 0.12
            
            # Verify extra_resource
            assert config.extra_resource["deployment_env"] == "prod"
            assert config.extra_resource["region"] == "us-west-2"
            assert config.extra_resource["forecast_horizon"] == "30"

    def test_handle_missing_model_version(self):
        """Test handling when model_version is missing (should use 'unknown')."""
        api_response = {
            "model_id": "test-001",
            "service_name": "test-service",
            "team_name": "test-team",
            "model_category": "internal",
            "model_type": "regression",
            # model_version missing
            "thresholds": {},
            "metadata_fields": {},
            "config": None
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("test-001")
            
            # Should default to "unknown"
            assert config.model_version == "unknown"

    def test_handle_invalid_thresholds_type(self):
        """Test handling when thresholds is not a dict."""
        api_response = {
            "model_id": "test-001",
            "service_name": "test-service",
            "team_name": "test-team",
            "model_category": "internal",
            "model_type": "regression",
            "model_version": "1.0.0",
            "thresholds": "invalid",  # Should be dict
            "metadata_fields": {},
            "config": None
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("test-001")
            
            # Should default to empty dict
            assert config.thresholds == {}

    def test_handle_invalid_metadata_fields_type(self):
        """Test handling when metadata_fields is not a dict."""
        api_response = {
            "model_id": "test-001",
            "service_name": "test-service",
            "team_name": "test-team",
            "model_category": "internal",
            "model_type": "regression",
            "model_version": "1.0.0",
            "thresholds": {},
            "metadata_fields": ["invalid"],  # Should be dict
            "config": None
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("test-001")
            
            # Should default to empty dict
            assert config.metadata_fields == {}

    def test_handle_invalid_extra_resource_type(self):
        """Test handling when extra_resource is not a dict."""
        api_response = {
            "model_id": "test-001",
            "service_name": "test-service",
            "team_name": "test-team",
            "model_category": "internal",
            "model_type": "regression",
            "model_version": "1.0.0",
            "thresholds": {},
            "metadata_fields": {},
            "config": {
                "extra_resource": "invalid"  # Should be dict
            }
        }

        client = RegistryClient(url="http://localhost:8080", token="test-token")
        
        with patch.object(client._session, 'get') as mock_get:
            mock_response = Mock()
            mock_response.status_code = 200
            mock_response.json.return_value = api_response
            mock_get.return_value = mock_response
            
            config = client.fetch_model_config("test-001")
            
            # Should default to empty dict
            assert config.extra_resource == {}


class TestPHIFieldExtraction:
    """Test sensitive data field name extraction from nested structure."""

    def test_extract_phi_field_names_empty(self):
        """Test extracting field names from empty metadata_fields."""
        from mca_sdk.utils.phi_utils import extract_phi_field_names
        
        result = extract_phi_field_names({})
        assert result == []

    def test_extract_phi_field_names_nested(self):
        """Test extracting field names from nested metadata_fields structure."""
        from mca_sdk.utils.phi_utils import extract_phi_field_names
        
        metadata_fields = {
            "user_ecd": {"criticality": "yes"},
            "business_notes": {"criticality": "yes"},
            "mrn": {"criticality": "yes"}
        }
        
        result = extract_phi_field_names(metadata_fields)
        assert set(result) == {"user_ecd", "business_notes", "mrn"}

    def test_get_phi_field_criticality(self):
        """Test getting criticality level for specific field."""
        from mca_sdk.utils.phi_utils import get_phi_field_criticality
        
        metadata_fields = {
            "user_id": {"criticality": "yes"},
            "notes": {"criticality": "no"}
        }
        
        assert get_phi_field_criticality(metadata_fields, "user_id") == "yes"
        assert get_phi_field_criticality(metadata_fields, "notes") == "no"
        assert get_phi_field_criticality(metadata_fields, "nonexistent") == "unknown"
