geronimo.config.schema
Configuration schema definitions for Geronimo.
This module defines Pydantic models for the geronimo.yaml configuration file.
1"""Configuration schema definitions for Geronimo. 2 3This module defines Pydantic models for the geronimo.yaml configuration file. 4""" 5 6from enum import Enum 7from typing import Optional 8 9from pydantic import BaseModel, ConfigDict, Field 10 11 12class ModelType(str, Enum): 13 """Type of ML model deployment.""" 14 15 REALTIME = "realtime" 16 BATCH = "batch" 17 18 19class MLFramework(str, Enum): 20 """Supported ML frameworks.""" 21 22 SKLEARN = "sklearn" 23 PYTORCH = "pytorch" 24 TENSORFLOW = "tensorflow" 25 XGBOOST = "xgboost" 26 CUSTOM = "custom" 27 28 29class AlertType(str, Enum): 30 """Types of alert destinations.""" 31 32 SLACK = "slack" 33 EMAIL = "email" 34 ADO_WORKITEM = "ado_workitem" 35 36 37# ============================================================================ 38# Sub-configurations 39# ============================================================================ 40 41 42class ProjectConfig(BaseModel): 43 """Project metadata configuration.""" 44 45 name: str = Field(..., description="Project name (lowercase, hyphens allowed)") 46 version: str = Field(default="1.0.0", description="Semantic version") 47 description: Optional[str] = Field( 48 default=None, description="Brief project description" 49 ) 50 51 52class ModelConfig(BaseModel): 53 """ML model configuration.""" 54 55 type: ModelType = Field( 56 default=ModelType.REALTIME, description="Deployment type (realtime or batch)" 57 ) 58 framework: MLFramework = Field( 59 default=MLFramework.SKLEARN, description="ML framework used" 60 ) 61 artifact_path: Optional[str] = Field( 62 default="models/model.joblib", description="Path to model artifact" 63 ) 64 mcp_enabled: bool = Field( 65 default=True, 66 description="Enable MCP server for AI agent integration (realtime only)", 67 ) 68 69 70class RuntimeConfig(BaseModel): 71 """Python runtime configuration.""" 72 73 python_version: str = Field(default="3.11", description="Python version") 74 dependencies: list[str] = Field( 75 default_factory=list, description="Additional pip dependencies" 76 ) 77 78 79class ScalingConfig(BaseModel): 80 """Auto-scaling configuration.""" 81 82 min_instances: int = Field( 83 default=1, ge=0, description="Minimum number of instances" 84 ) 85 max_instances: int = Field( 86 default=4, ge=1, description="Maximum number of instances" 87 ) 88 target_cpu_percent: int = Field( 89 default=70, ge=1, le=100, description="Target CPU utilization for scaling" 90 ) 91 92 93class InfrastructureConfig(BaseModel): 94 """Infrastructure resource configuration.""" 95 96 cpu: int = Field( 97 default=512, 98 description="CPU units (256, 512, 1024, 2048, 4096)", 99 ) 100 memory: int = Field( 101 default=1024, 102 description="Memory in MB", 103 ) 104 scaling: ScalingConfig = Field(default_factory=ScalingConfig) 105 vpc_id: Optional[str] = Field( 106 default=None, description="VPC ID (supports ${ENV_VAR} interpolation)" 107 ) 108 subnets: Optional[list[str]] = Field( 109 default=None, description="Subnet IDs for deployment" 110 ) 111 security_groups: Optional[list[str]] = Field( 112 default=None, description="Security group IDs" 113 ) 114 115 116class AlertConditions(BaseModel): 117 """Conditions that trigger alerts.""" 118 119 error_rate_threshold: float = Field( 120 default=0.01, ge=0, le=1, description="Error rate threshold (0-1)" 121 ) 122 latency_p99_threshold_ms: int = Field( 123 default=500, ge=1, description="P99 latency threshold in milliseconds" 124 ) 125 data_drift_threshold: float = Field( 126 default=0.1, ge=0, le=1, description="Data drift detection threshold" 127 ) 128 129 130class AlertConfig(BaseModel): 131 """Alert destination configuration.""" 132 133 type: AlertType = Field(..., description="Alert destination type") 134 channel: Optional[str] = Field( 135 default=None, description="Slack channel or email address" 136 ) 137 webhook_url: Optional[str] = Field( 138 default=None, description="Webhook URL for Slack" 139 ) 140 conditions: AlertConditions = Field(default_factory=AlertConditions) 141 142 143class SourceSystem(str, Enum): 144 """Database source systems for query-based data capture.""" 145 146 SNOWFLAKE = "snowflake" 147 POSTGRES = "postgres" 148 SQLSERVER = "sqlserver" 149 150 151class DriftDetectionConfig(BaseModel): 152 """Drift detection configuration.""" 153 154 enabled: bool = Field(default=False, description="Enable drift detection") 155 s3_bucket: str = Field( 156 default="model-monitoring", 157 description="S3 bucket for storing snapshots and reports", 158 ) 159 sampling_rate: float = Field( 160 default=0.05, 161 ge=0.001, 162 le=1.0, 163 description="Fraction of requests to sample (0.001-1.0)", 164 ) 165 window_days: int = Field( 166 default=7, 167 ge=1, 168 le=90, 169 description="Rolling window size in days for recent data", 170 ) 171 drift_threshold: float = Field( 172 default=0.1, 173 ge=0.0, 174 le=1.0, 175 description="Threshold for feature drift detection", 176 ) 177 dataset_drift_threshold: float = Field( 178 default=0.3, 179 ge=0.0, 180 le=1.0, 181 description="Fraction of drifted features to trigger dataset drift", 182 ) 183 retention_days: int = Field( 184 default=90, 185 ge=1, 186 description="Days to retain historical snapshots", 187 ) 188 auto_capture_on_deploy: bool = Field( 189 default=True, 190 description="Automatically capture reference on deployment", 191 ) 192 193 194class MonitoringConfig(BaseModel): 195 """Monitoring and observability configuration.""" 196 197 metrics: list[str] = Field( 198 default_factory=lambda: [ 199 "latency_p50", 200 "latency_p99", 201 "error_rate", 202 "request_count", 203 "prediction_distribution", 204 ], 205 description="Metrics to collect", 206 ) 207 alerts: list[AlertConfig] = Field( 208 default_factory=list, description="Alert configurations" 209 ) 210 dashboard_enabled: bool = Field( 211 default=True, description="Generate CloudWatch dashboard" 212 ) 213 drift_detection: DriftDetectionConfig = Field( 214 default_factory=DriftDetectionConfig, description="Drift detection settings" 215 ) 216 217 218class EnvironmentConfig(BaseModel): 219 """Deployment environment configuration.""" 220 221 name: str = Field(..., description="Environment name (dev, staging, prod)") 222 auto_deploy: bool = Field( 223 default=False, description="Auto-deploy on pipeline success" 224 ) 225 approval_required: bool = Field( 226 default=False, description="Require manual approval for deployment" 227 ) 228 variables: dict[str, str] = Field( 229 default_factory=dict, description="Environment-specific variables" 230 ) 231 232 233class DeploymentConfig(BaseModel): 234 """Deployment pipeline configuration.""" 235 236 environments: list[EnvironmentConfig] = Field( 237 default_factory=lambda: [ 238 EnvironmentConfig(name="dev", auto_deploy=True), 239 EnvironmentConfig(name="prod", approval_required=True), 240 ], 241 description="Deployment environments", 242 ) 243 container_registry: Optional[str] = Field( 244 default=None, description="ECR registry URL (auto-generated if not provided)" 245 ) 246 247 248class BatchBackend(str, Enum): 249 """Batch job deployment backends.""" 250 251 STEP_FUNCTIONS = "step-functions" 252 AIRFLOW = "airflow" 253 254 255class BatchJobConfig(BaseModel): 256 """Individual batch job configuration.""" 257 258 name: str = Field(..., description="Job name") 259 flow_file: str = Field(..., description="Path to Metaflow flow file") 260 schedule: Optional[str] = Field( 261 default=None, description="Cron schedule expression" 262 ) 263 cpu: int = Field(default=4, description="CPU cores for batch compute") 264 memory: int = Field(default=8192, description="Memory in MB") 265 266 267class StepFunctionsConfig(BaseModel): 268 """AWS Step Functions backend configuration.""" 269 270 s3_root: str = Field( 271 default="s3://metaflow-data", 272 description="S3 root for Metaflow artifacts", 273 ) 274 batch_queue: Optional[str] = Field( 275 default=None, description="AWS Batch queue name" 276 ) 277 278 279class AirflowConfig(BaseModel): 280 """Astronomer Airflow backend configuration.""" 281 282 connection_id: str = Field( 283 default="astronomer_default", 284 description="Airflow connection ID", 285 ) 286 namespace: str = Field( 287 default="default", 288 description="Kubernetes namespace for pod operators", 289 ) 290 291 292class BatchConfig(BaseModel): 293 """Batch job configuration.""" 294 295 enabled: bool = Field(default=False, description="Enable batch job generation") 296 backend: BatchBackend = Field( 297 default=BatchBackend.STEP_FUNCTIONS, 298 description="Deployment backend (step-functions or airflow)", 299 ) 300 step_functions: StepFunctionsConfig = Field(default_factory=StepFunctionsConfig) 301 airflow: AirflowConfig = Field(default_factory=AirflowConfig) 302 jobs: list[BatchJobConfig] = Field( 303 default_factory=list, description="Batch job definitions" 304 ) 305 306 307# ============================================================================ 308# Main Configuration 309# ============================================================================ 310 311 312class GeronimoConfig(BaseModel): 313 """Root configuration for a Geronimo project. 314 315 This is the Pydantic model for geronimo.yaml files. 316 """ 317 318 model_config = ConfigDict(extra="forbid") 319 320 project: ProjectConfig = Field(..., description="Project metadata") 321 model: ModelConfig = Field(default_factory=ModelConfig) 322 runtime: RuntimeConfig = Field(default_factory=RuntimeConfig) 323 infrastructure: InfrastructureConfig = Field( 324 default_factory=InfrastructureConfig 325 ) 326 monitoring: MonitoringConfig = Field(default_factory=MonitoringConfig) 327 deployment: DeploymentConfig = Field(default_factory=DeploymentConfig) 328 batch: BatchConfig = Field(default_factory=BatchConfig)
13class ModelType(str, Enum): 14 """Type of ML model deployment.""" 15 16 REALTIME = "realtime" 17 BATCH = "batch"
Type of ML model deployment.
20class MLFramework(str, Enum): 21 """Supported ML frameworks.""" 22 23 SKLEARN = "sklearn" 24 PYTORCH = "pytorch" 25 TENSORFLOW = "tensorflow" 26 XGBOOST = "xgboost" 27 CUSTOM = "custom"
Supported ML frameworks.
30class AlertType(str, Enum): 31 """Types of alert destinations.""" 32 33 SLACK = "slack" 34 EMAIL = "email" 35 ADO_WORKITEM = "ado_workitem"
Types of alert destinations.
43class ProjectConfig(BaseModel): 44 """Project metadata configuration.""" 45 46 name: str = Field(..., description="Project name (lowercase, hyphens allowed)") 47 version: str = Field(default="1.0.0", description="Semantic version") 48 description: Optional[str] = Field( 49 default=None, description="Brief project description" 50 )
Project metadata configuration.
53class ModelConfig(BaseModel): 54 """ML model configuration.""" 55 56 type: ModelType = Field( 57 default=ModelType.REALTIME, description="Deployment type (realtime or batch)" 58 ) 59 framework: MLFramework = Field( 60 default=MLFramework.SKLEARN, description="ML framework used" 61 ) 62 artifact_path: Optional[str] = Field( 63 default="models/model.joblib", description="Path to model artifact" 64 ) 65 mcp_enabled: bool = Field( 66 default=True, 67 description="Enable MCP server for AI agent integration (realtime only)", 68 )
ML model configuration.
71class RuntimeConfig(BaseModel): 72 """Python runtime configuration.""" 73 74 python_version: str = Field(default="3.11", description="Python version") 75 dependencies: list[str] = Field( 76 default_factory=list, description="Additional pip dependencies" 77 )
Python runtime configuration.
80class ScalingConfig(BaseModel): 81 """Auto-scaling configuration.""" 82 83 min_instances: int = Field( 84 default=1, ge=0, description="Minimum number of instances" 85 ) 86 max_instances: int = Field( 87 default=4, ge=1, description="Maximum number of instances" 88 ) 89 target_cpu_percent: int = Field( 90 default=70, ge=1, le=100, description="Target CPU utilization for scaling" 91 )
Auto-scaling configuration.
94class InfrastructureConfig(BaseModel): 95 """Infrastructure resource configuration.""" 96 97 cpu: int = Field( 98 default=512, 99 description="CPU units (256, 512, 1024, 2048, 4096)", 100 ) 101 memory: int = Field( 102 default=1024, 103 description="Memory in MB", 104 ) 105 scaling: ScalingConfig = Field(default_factory=ScalingConfig) 106 vpc_id: Optional[str] = Field( 107 default=None, description="VPC ID (supports ${ENV_VAR} interpolation)" 108 ) 109 subnets: Optional[list[str]] = Field( 110 default=None, description="Subnet IDs for deployment" 111 ) 112 security_groups: Optional[list[str]] = Field( 113 default=None, description="Security group IDs" 114 )
Infrastructure resource configuration.
117class AlertConditions(BaseModel): 118 """Conditions that trigger alerts.""" 119 120 error_rate_threshold: float = Field( 121 default=0.01, ge=0, le=1, description="Error rate threshold (0-1)" 122 ) 123 latency_p99_threshold_ms: int = Field( 124 default=500, ge=1, description="P99 latency threshold in milliseconds" 125 ) 126 data_drift_threshold: float = Field( 127 default=0.1, ge=0, le=1, description="Data drift detection threshold" 128 )
Conditions that trigger alerts.
131class AlertConfig(BaseModel): 132 """Alert destination configuration.""" 133 134 type: AlertType = Field(..., description="Alert destination type") 135 channel: Optional[str] = Field( 136 default=None, description="Slack channel or email address" 137 ) 138 webhook_url: Optional[str] = Field( 139 default=None, description="Webhook URL for Slack" 140 ) 141 conditions: AlertConditions = Field(default_factory=AlertConditions)
Alert destination configuration.
144class SourceSystem(str, Enum): 145 """Database source systems for query-based data capture.""" 146 147 SNOWFLAKE = "snowflake" 148 POSTGRES = "postgres" 149 SQLSERVER = "sqlserver"
Database source systems for query-based data capture.
152class DriftDetectionConfig(BaseModel): 153 """Drift detection configuration.""" 154 155 enabled: bool = Field(default=False, description="Enable drift detection") 156 s3_bucket: str = Field( 157 default="model-monitoring", 158 description="S3 bucket for storing snapshots and reports", 159 ) 160 sampling_rate: float = Field( 161 default=0.05, 162 ge=0.001, 163 le=1.0, 164 description="Fraction of requests to sample (0.001-1.0)", 165 ) 166 window_days: int = Field( 167 default=7, 168 ge=1, 169 le=90, 170 description="Rolling window size in days for recent data", 171 ) 172 drift_threshold: float = Field( 173 default=0.1, 174 ge=0.0, 175 le=1.0, 176 description="Threshold for feature drift detection", 177 ) 178 dataset_drift_threshold: float = Field( 179 default=0.3, 180 ge=0.0, 181 le=1.0, 182 description="Fraction of drifted features to trigger dataset drift", 183 ) 184 retention_days: int = Field( 185 default=90, 186 ge=1, 187 description="Days to retain historical snapshots", 188 ) 189 auto_capture_on_deploy: bool = Field( 190 default=True, 191 description="Automatically capture reference on deployment", 192 )
Drift detection configuration.
195class MonitoringConfig(BaseModel): 196 """Monitoring and observability configuration.""" 197 198 metrics: list[str] = Field( 199 default_factory=lambda: [ 200 "latency_p50", 201 "latency_p99", 202 "error_rate", 203 "request_count", 204 "prediction_distribution", 205 ], 206 description="Metrics to collect", 207 ) 208 alerts: list[AlertConfig] = Field( 209 default_factory=list, description="Alert configurations" 210 ) 211 dashboard_enabled: bool = Field( 212 default=True, description="Generate CloudWatch dashboard" 213 ) 214 drift_detection: DriftDetectionConfig = Field( 215 default_factory=DriftDetectionConfig, description="Drift detection settings" 216 )
Monitoring and observability configuration.
219class EnvironmentConfig(BaseModel): 220 """Deployment environment configuration.""" 221 222 name: str = Field(..., description="Environment name (dev, staging, prod)") 223 auto_deploy: bool = Field( 224 default=False, description="Auto-deploy on pipeline success" 225 ) 226 approval_required: bool = Field( 227 default=False, description="Require manual approval for deployment" 228 ) 229 variables: dict[str, str] = Field( 230 default_factory=dict, description="Environment-specific variables" 231 )
Deployment environment configuration.
234class DeploymentConfig(BaseModel): 235 """Deployment pipeline configuration.""" 236 237 environments: list[EnvironmentConfig] = Field( 238 default_factory=lambda: [ 239 EnvironmentConfig(name="dev", auto_deploy=True), 240 EnvironmentConfig(name="prod", approval_required=True), 241 ], 242 description="Deployment environments", 243 ) 244 container_registry: Optional[str] = Field( 245 default=None, description="ECR registry URL (auto-generated if not provided)" 246 )
Deployment pipeline configuration.
249class BatchBackend(str, Enum): 250 """Batch job deployment backends.""" 251 252 STEP_FUNCTIONS = "step-functions" 253 AIRFLOW = "airflow"
Batch job deployment backends.
256class BatchJobConfig(BaseModel): 257 """Individual batch job configuration.""" 258 259 name: str = Field(..., description="Job name") 260 flow_file: str = Field(..., description="Path to Metaflow flow file") 261 schedule: Optional[str] = Field( 262 default=None, description="Cron schedule expression" 263 ) 264 cpu: int = Field(default=4, description="CPU cores for batch compute") 265 memory: int = Field(default=8192, description="Memory in MB")
Individual batch job configuration.
268class StepFunctionsConfig(BaseModel): 269 """AWS Step Functions backend configuration.""" 270 271 s3_root: str = Field( 272 default="s3://metaflow-data", 273 description="S3 root for Metaflow artifacts", 274 ) 275 batch_queue: Optional[str] = Field( 276 default=None, description="AWS Batch queue name" 277 )
AWS Step Functions backend configuration.
280class AirflowConfig(BaseModel): 281 """Astronomer Airflow backend configuration.""" 282 283 connection_id: str = Field( 284 default="astronomer_default", 285 description="Airflow connection ID", 286 ) 287 namespace: str = Field( 288 default="default", 289 description="Kubernetes namespace for pod operators", 290 )
Astronomer Airflow backend configuration.
293class BatchConfig(BaseModel): 294 """Batch job configuration.""" 295 296 enabled: bool = Field(default=False, description="Enable batch job generation") 297 backend: BatchBackend = Field( 298 default=BatchBackend.STEP_FUNCTIONS, 299 description="Deployment backend (step-functions or airflow)", 300 ) 301 step_functions: StepFunctionsConfig = Field(default_factory=StepFunctionsConfig) 302 airflow: AirflowConfig = Field(default_factory=AirflowConfig) 303 jobs: list[BatchJobConfig] = Field( 304 default_factory=list, description="Batch job definitions" 305 )
Batch job configuration.
313class GeronimoConfig(BaseModel): 314 """Root configuration for a Geronimo project. 315 316 This is the Pydantic model for geronimo.yaml files. 317 """ 318 319 model_config = ConfigDict(extra="forbid") 320 321 project: ProjectConfig = Field(..., description="Project metadata") 322 model: ModelConfig = Field(default_factory=ModelConfig) 323 runtime: RuntimeConfig = Field(default_factory=RuntimeConfig) 324 infrastructure: InfrastructureConfig = Field( 325 default_factory=InfrastructureConfig 326 ) 327 monitoring: MonitoringConfig = Field(default_factory=MonitoringConfig) 328 deployment: DeploymentConfig = Field(default_factory=DeploymentConfig) 329 batch: BatchConfig = Field(default_factory=BatchConfig)
Root configuration for a Geronimo project.
This is the Pydantic model for geronimo.yaml files.