geronimo.generators

Geronimo Code Generators.

This module powers the geronimo init scaffolding and other code generation tasks. It uses Jinja2 templates to produce production-ready code for:

  • New Geronimo projects (folder structure, config files)
  • Dockerfiles for training and serving containers
  • Terraform configurations for AWS infrastructure
  • CI/CD pipeline definitions (GitHub Actions)

The goal is to provide a "batteries-included" starting point that follows MLOps best practices.

 1"""Geronimo Code Generators.
 2
 3This module powers the `geronimo init` scaffolding and other code generation tasks.
 4It uses Jinja2 templates to produce production-ready code for:
 5- New Geronimo projects (folder structure, config files)
 6- Dockerfiles for training and serving containers
 7- Terraform configurations for AWS infrastructure
 8- CI/CD pipeline definitions (GitHub Actions)
 9
10The goal is to provide a "batteries-included" starting point that follows MLOps best practices.
11"""
12
13from geronimo.generators.base import BaseGenerator
14from geronimo.generators.docker import DockerGenerator
15from geronimo.generators.pipeline import PipelineGenerator
16from geronimo.generators.project import ProjectGenerator
17from geronimo.generators.terraform import TerraformGenerator
18
19__all__ = [
20    "BaseGenerator",
21    "ProjectGenerator",
22    "TerraformGenerator",
23    "DockerGenerator",
24    "PipelineGenerator",
25]
26
27__docformat__ = "google"
class BaseGenerator(abc.ABC):
14class BaseGenerator(ABC):
15    """Abstract base class for all Geronimo generators.
16
17    Provides Jinja2 template rendering infrastructure.
18    """
19
20    # Subclasses should override this to specify their template subdirectory
21    TEMPLATE_DIR: str = ""
22
23    _env: Environment
24    """The Jinja2 environment for template rendering."""
25
26    def __init__(self) -> None:
27        """Initialize the generator with Jinja2 environment."""
28        self._env = Environment(
29            loader=PackageLoader("geronimo.generators", "templates"),
30            autoescape=select_autoescape(["html", "xml"]),
31            trim_blocks=True,
32            lstrip_blocks=True,
33            keep_trailing_newline=True,
34        )
35
36    def render_template(self, template_name: str, context: dict[str, Any]) -> str:
37        """Render a template with the given context.
38
39        Args:
40            template_name: Name of the template file (relative to templates/).
41            context: Dictionary of variables to pass to the template.
42
43        Returns:
44            Rendered template as a string.
45        """
46        template_path = (
47            f"{self.TEMPLATE_DIR}/{template_name}"
48            if self.TEMPLATE_DIR
49            else template_name
50        )
51        template = self._env.get_template(template_path)
52        return template.render(**context)
53
54    def write_file(self, path: Path | str, content: str) -> None:
55        """Write content to a file, creating directories as needed.
56
57        Args:
58            path: Target file path.
59            content: Content to write.
60        """
61        path = Path(path)
62        path.parent.mkdir(parents=True, exist_ok=True)
63        path.write_text(content)
64
65    @abstractmethod
66    def generate(self) -> Any:
67        """Generate the output artifacts.
68
69        Subclasses must implement this method.
70        """
71        pass

Abstract base class for all Geronimo generators.

Provides Jinja2 template rendering infrastructure.

BaseGenerator()
26    def __init__(self) -> None:
27        """Initialize the generator with Jinja2 environment."""
28        self._env = Environment(
29            loader=PackageLoader("geronimo.generators", "templates"),
30            autoescape=select_autoescape(["html", "xml"]),
31            trim_blocks=True,
32            lstrip_blocks=True,
33            keep_trailing_newline=True,
34        )

Initialize the generator with Jinja2 environment.

TEMPLATE_DIR: str = ''
def render_template(self, template_name: str, context: dict[str, typing.Any]) -> str:
36    def render_template(self, template_name: str, context: dict[str, Any]) -> str:
37        """Render a template with the given context.
38
39        Args:
40            template_name: Name of the template file (relative to templates/).
41            context: Dictionary of variables to pass to the template.
42
43        Returns:
44            Rendered template as a string.
45        """
46        template_path = (
47            f"{self.TEMPLATE_DIR}/{template_name}"
48            if self.TEMPLATE_DIR
49            else template_name
50        )
51        template = self._env.get_template(template_path)
52        return template.render(**context)

Render a template with the given context.

Arguments:
  • template_name: Name of the template file (relative to templates/).
  • context: Dictionary of variables to pass to the template.
Returns:

Rendered template as a string.

def write_file(self, path: pathlib.Path | str, content: str) -> None:
54    def write_file(self, path: Path | str, content: str) -> None:
55        """Write content to a file, creating directories as needed.
56
57        Args:
58            path: Target file path.
59            content: Content to write.
60        """
61        path = Path(path)
62        path.parent.mkdir(parents=True, exist_ok=True)
63        path.write_text(content)

Write content to a file, creating directories as needed.

Arguments:
  • path: Target file path.
  • content: Content to write.
@abstractmethod
def generate(self) -> Any:
65    @abstractmethod
66    def generate(self) -> Any:
67        """Generate the output artifacts.
68
69        Subclasses must implement this method.
70        """
71        pass

Generate the output artifacts.

Subclasses must implement this method.

class ProjectGenerator(geronimo.generators.BaseGenerator):
  46class ProjectGenerator(BaseGenerator):
  47    """Generates a complete FastAPI ML project structure."""
  48
  49    TEMPLATE_DIR = "project"
  50
  51    project_name: str
  52    """The project name (kebab-case)."""
  53
  54    framework: MLFramework
  55    """The selected ML framework."""
  56
  57    output_dir: Path
  58    """The output directory path."""
  59
  60    project_dir: Path
  61    """The full path to the project directory."""
  62
  63    template: str
  64    """The selected project template (realtime/batch/both)."""
  65
  66    engine: TemplateEngine
  67    """The template rendering engine."""
  68
  69    def __init__(
  70        self,
  71        project_name: str,
  72        framework: str = "sklearn",
  73        output_dir: str = ".",
  74        template: str = "realtime",
  75    ) -> None:
  76        """Initialize the project generator.
  77
  78        Args:
  79            project_name: Name of the project.
  80            framework: ML framework to use.
  81            output_dir: Directory to create the project in.
  82            template: Project template (realtime, batch, or both).
  83        """
  84        super().__init__()
  85        self.project_name = project_name.lower().replace(" ", "-")
  86        self.framework = MLFramework(framework.lower())
  87        self.output_dir = Path(output_dir)
  88        self.project_dir = self.output_dir / self.project_name
  89        self.template = template
  90        self.engine = TemplateEngine()
  91
  92    def _get_framework_dependencies(self) -> list[str]:
  93        """Get framework-specific dependencies."""
  94        deps = {
  95            MLFramework.SKLEARN: ["scikit-learn>=1.3.0", "joblib>=1.3.0"],
  96            MLFramework.PYTORCH: ["torch>=2.0.0"],
  97            MLFramework.TENSORFLOW: ["tensorflow>=2.13.0"],
  98            MLFramework.XGBOOST: ["xgboost>=2.0.0"],
  99            MLFramework.CUSTOM: [],
 100        }
 101        return deps.get(self.framework, [])
 102
 103    def _get_template_dependencies(self) -> list[str]:
 104        """Get template-specific dependencies."""
 105        # Core deps for all templates
 106        core = CORE_DEPS
 107
 108        # Template-specific deps
 109        if self.template == "realtime":
 110            template_deps = REALTIME_DEPS
 111        elif self.template == "batch":
 112            template_deps = BATCH_DEPS
 113        else:  # both
 114            template_deps = REALTIME_DEPS + BATCH_DEPS
 115
 116        # Framework-specific deps
 117        framework_deps = self._get_framework_dependencies()
 118
 119        return core + template_deps + framework_deps
 120
 121    def _to_pascal_case(self, name: str) -> str:
 122        """Convert kebab-case or snake_case name to PascalCase.
 123        
 124        Args:
 125            name: Name in kebab-case (my-project) or snake_case (my_project)
 126            
 127        Returns:
 128            PascalCase version (MyProject)
 129            
 130        Examples:
 131            >>> self._to_pascal_case("my-project")
 132            'MyProject'
 133            >>> self._to_pascal_case("test_batch")
 134            'TestBatch'
 135        """
 136        return ''.join(
 137            word.title() for word in name.replace("-", "_").split("_")
 138        )
 139
 140    def _create_config(self) -> GeronimoConfig:
 141        """Create the default configuration for this project."""
 142        # Determine model type based on template
 143        model_type = ModelType.BATCH if self.template == "batch" else ModelType.REALTIME
 144        
 145        # Base dependencies
 146        base_deps = [
 147            "pydantic>=2.5.0",
 148            "numpy>=1.24.0",
 149            "pandas>=2.0.0",
 150            *self._get_framework_dependencies(),
 151        ]
 152        
 153        # Template-specific dependencies
 154        if self.template == "batch":
 155            runtime_deps = base_deps + ["metaflow>=2.10.0", "pyarrow>=14.0.0"]
 156        else:
 157            runtime_deps = base_deps + [
 158                "fastapi>=0.109.0",
 159                "uvicorn[standard]>=0.27.0",
 160            ]
 161        
 162        return GeronimoConfig(
 163            project=ProjectConfig(
 164                name=self.project_name,
 165                version="1.0.0",
 166                description=f"ML model serving API for {self.project_name}" if self.template != "batch" else f"ML batch pipeline for {self.project_name}",
 167            ),
 168            model=ModelConfig(
 169                type=model_type,
 170                framework=self.framework,
 171                artifact_path="models/model.joblib",
 172            ),
 173            runtime=RuntimeConfig(
 174                python_version="3.11",
 175                dependencies=runtime_deps,
 176            ),
 177            infrastructure=InfrastructureConfig(
 178                cpu=512,
 179                memory=1024,
 180                scaling=ScalingConfig(
 181                    min_instances=1,
 182                    max_instances=4,
 183                ),
 184            ),
 185            monitoring=MonitoringConfig(
 186                metrics=[
 187                    "latency_p50",
 188                    "latency_p99",
 189                    "error_rate",
 190                    "request_count",
 191                ],
 192                dashboard_enabled=True,
 193            ),
 194            deployment=DeploymentConfig(
 195                environments=[
 196                    EnvironmentConfig(name="dev", auto_deploy=True),
 197                    EnvironmentConfig(name="prod", approval_required=True),
 198                ],
 199            ),
 200        )
 201
 202    def generate(self) -> Path:
 203        """Generate the complete project structure.
 204
 205        Returns:
 206            Path to the created project directory.
 207        """
 208        # Create project directory
 209        self.project_dir.mkdir(parents=True, exist_ok=True)
 210
 211        # Generate configuration
 212        config = self._create_config()
 213        save_config(config, self.project_dir / "geronimo.yaml")
 214
 215        # Generate source code
 216        self._generate_source_code()
 217
 218        # Generate monitoring code (only for realtime/both)
 219        if self.template in ("realtime", "both"):
 220            self._generate_monitoring()
 221
 222        # Generate project files
 223        self._generate_project_files()
 224
 225        # Create models directory
 226        (self.project_dir / "models").mkdir(exist_ok=True)
 227        (self.project_dir / "models" / ".gitkeep").touch()
 228
 229        return self.project_dir
 230
 231    def _generate_source_code(self) -> None:
 232        """Generate SDK-first application structure.
 233        
 234        SDK files (user edits):
 235            - sdk/model.py - Model train/predict
 236            - sdk/features.py - FeatureSet definition
 237            - sdk/data_sources.py - DataSource config
 238            - sdk/endpoint.py - [realtime] preprocess/postprocess
 239            - sdk/pipeline.py - [batch] run() logic
 240        
 241        Wrappers (thin, rarely edited):
 242            - app.py - [realtime] FastAPI imports SDK
 243            - flow.py - [batch] Metaflow imports SDK
 244        """
 245        src = self.project_dir / "src"
 246        context = {
 247            "project_name": self.project_name,
 248            "project_name_snake": self.project_name.replace("-", "_"),
 249            "framework": self.framework.value,
 250        }
 251
 252        # Main package
 253        pkg_dir = src / context["project_name_snake"]
 254        pkg_dir.mkdir(parents=True, exist_ok=True)
 255        self.write_file(pkg_dir / "__init__.py", f'"""ML package for {self.project_name}."""\n')
 256
 257        # ==============================
 258        # SDK Core (always generated) - use templates
 259        # ==============================
 260        sdk_dir = pkg_dir / "sdk"
 261        sdk_dir.mkdir(exist_ok=True)
 262        self.write_file(sdk_dir / "__init__.py", '"""Geronimo SDK - define your model lifecycle here."""\n')
 263        
 264        # Use template engine for SDK files
 265        self.engine.render_to_file("sdk/model.py.jinja2", context, sdk_dir / "model.py")
 266        self.engine.render_to_file("sdk/features.py.jinja2", context, sdk_dir / "features.py")
 267        self.engine.render_to_file("sdk/data_sources.py.jinja2", context, sdk_dir / "data_sources.py")
 268
 269        # ==============================
 270        # Template-specific SDK files
 271        # ==============================
 272        if self.template in ("realtime", "both"):
 273            self.engine.render_to_file("sdk/endpoint.py.jinja2", context, sdk_dir / "endpoint.py")
 274            self.engine.render_to_file("sdk/monitoring_config.py.jinja2", context, sdk_dir / "monitoring_config.py")
 275            self.engine.render_to_file("project/app.py.jinja2", context, pkg_dir / "app.py")
 276            # Generate MCP agent package for AI integration
 277            self._generate_agent_package(context)
 278        
 279        if self.template in ("batch", "both"):
 280            self.engine.render_to_file("sdk/pipeline.py.jinja2", context, sdk_dir / "pipeline.py")
 281            self.engine.render_to_file("sdk/batch_monitoring_config.py.jinja2", context, sdk_dir / "monitoring_config.py")
 282            self.engine.render_to_file("project/flow.py.jinja2", context, pkg_dir / "flow.py")
 283            
 284            # Batch directory structure
 285            batch_dir = self.project_dir / "batch"
 286            batch_dir.mkdir(exist_ok=True)
 287            (batch_dir / "data").mkdir(exist_ok=True)
 288            (batch_dir / "output").mkdir(exist_ok=True)
 289
 290        # ==============================
 291        # Tests
 292        # ==============================
 293        tests_dir = self.project_dir / "tests"
 294        tests_dir.mkdir(exist_ok=True)
 295        self.write_file(tests_dir / "__init__.py", '"""Tests package."""\n')
 296        
 297        self.engine.render_to_file("project/test_sdk.py.jinja2", context, tests_dir / "test_sdk.py")
 298
 299    def _generate_sdk_model(self, context: dict) -> str:
 300        """Generate SDK model.py file."""
 301        project_name_pascal = self._to_pascal_case(context["project_name"])
 302        return f'''"""Model definition for {context["project_name"]}.
 303
 304This is the central file for your ML model. Implement:
 305- train(): Load data, fit features, train estimator
 306- predict(): Transform input and generate predictions
 307- save(): Persist estimator and features to ArtifactStore
 308- load(): Restore estimator and features from ArtifactStore
 309"""
 310
 311from typing import Any, Optional
 312import numpy as np
 313import pandas as pd
 314
 315from geronimo.models import Model, HyperParams
 316from geronimo.artifacts import ArtifactStore
 317from .features import {project_name_pascal}Features
 318from .data_sources import training_sources
 319
 320
 321class {project_name_pascal}Model(Model):
 322    """ML model for {context["project_name"]}.
 323    
 324    Uses declarative features for transformation and ArtifactStore for persistence.
 325    """
 326
 327    name = "{context["project_name"]}"
 328    version = "1.0.0"
 329    
 330    def __init__(self):
 331        super().__init__()
 332        self.estimator: Optional[Any] = None
 333        self.features: Optional[{project_name_pascal}Features] = None
 334        self._is_fitted = False
 335
 336    def train(self) -> dict:
 337        """Train the model.
 338        
 339        Loads training data sources, joins them, fits features, and trains estimator.
 340
 341        Returns:
 342            Training metrics dict
 343        """
 344        if not training_sources:
 345            raise ValueError("No training_* DataSources defined in data_sources.py")
 346        
 347        # Load and join training data sources
 348        df = training_sources[0].load()
 349        for source in training_sources[1:]:
 350            source_df = source.load()
 351            if source.join_spec:
 352                df = df.merge(
 353                    source_df,
 354                    left_on=source.join_spec.left_on,
 355                    right_on=source.join_spec.right_on,
 356                    how=source.join_spec.how,
 357                )
 358        
 359        # TODO: Configure your target column if supervised learning
 360        # y = df["target"].values
 361        
 362        # Initialize and fit features
 363        self.features = {project_name_pascal}Features()
 364        # X = self.features.fit_transform(df)
 365        
 366        # TODO: Initialize and train your estimator
 367        # from sklearn.ensemble import RandomForestClassifier
 368        # params = HyperParams(n_estimators=100, max_depth=5, random_state=42)
 369        # self.estimator = RandomForestClassifier(**params.to_dict())
 370        # self.estimator.fit(X, y)
 371        
 372        self._is_fitted = True
 373        
 374        # TODO: Return training metrics
 375        return {{
 376            "n_samples": len(df),
 377            # "accuracy": self.estimator.score(X, y),
 378        }}
 379
 380    def predict(self, X) -> np.ndarray:
 381        """Predict using the trained model.
 382        
 383        Args:
 384            X: Feature array or DataFrame
 385            
 386        Returns:
 387            Predictions array
 388        """
 389        if not self._is_fitted:
 390            raise RuntimeError("Model not trained. Call train() or load() first.")
 391        
 392        if isinstance(X, np.ndarray):
 393            df = pd.DataFrame(X, columns=self.features.feature_names)
 394        else:
 395            df = X
 396        
 397        # Transform using fitted features
 398        X_transformed = self.features.transform(df)
 399        return self.estimator.predict(X_transformed)
 400    
 401    def save(self, store: ArtifactStore) -> list[str]:
 402        """Save trained model and features to ArtifactStore.
 403        
 404        Args:
 405            store: ArtifactStore instance
 406            
 407        Returns:
 408            List of saved artifact paths
 409        """
 410        if not self._is_fitted:
 411            raise RuntimeError("Model not trained. Nothing to save.")
 412        
 413        paths = []
 414        
 415        # Save the trained estimator
 416        path = store.save(
 417            "estimator", 
 418            self.estimator, 
 419            artifact_type=type(self.estimator).__name__,
 420            tags={{"model": self.name, "version": self.version}}
 421        )
 422        paths.append(path)
 423        
 424        # Save the fitted features (includes transformers/scalers)
 425        path = store.save(
 426            "features",
 427            self.features,
 428            artifact_type="{project_name_pascal}Features",
 429            tags={{"model": self.name, "version": self.version}}
 430        )
 431        paths.append(path)
 432        
 433        return paths
 434    
 435    def load(self, store: ArtifactStore) -> None:
 436        """Load trained model and features from ArtifactStore.
 437        
 438        Args:
 439            store: ArtifactStore instance
 440        """
 441        self.estimator = store.get("estimator")
 442        self.features = store.get("features")
 443        self._is_fitted = True
 444    
 445    @property
 446    def is_fitted(self) -> bool:
 447        """Check if model is trained and ready for predictions."""
 448        return self._is_fitted
 449'''
 450
 451    def _generate_sdk_features(self, context: dict) -> str:
 452        """Generate SDK features.py file."""
 453        project_name_pascal = self._to_pascal_case(context["project_name"])
 454        return f'''"""Feature definitions for {context["project_name"]}.
 455
 456DEVELOPMENT WORKFLOW:
 4571. Review training and production data sources for column consistency
 4582. Perform exploratory data analysis (EDA) to identify:
 459   - Missing values → consider imputation strategies
 460   - Outliers → consider clipping or winsorization  
 461   - Skewed distributions → consider log/power transforms
 462   - Categorical cardinality → consider encoding strategies
 4633. Define Features with appropriate transformers
 464
 465Each Feature describes a column with its type, transformer, and encoder.
 466The FeatureSet handles fit_transform (training) and transform (inference).
 467"""
 468
 469from typing import Optional
 470from geronimo.features import FeatureSet, Feature
 471# from sklearn.preprocessing import StandardScaler, OneHotEncoder
 472
 473
 474class {project_name_pascal}Features(FeatureSet):
 475    """Feature engineering for {context["project_name"]}.
 476    
 477    Define your features here. Each Feature describes a column transformation.
 478    
 479    Example:
 480        age = Feature(dtype='numeric', transformer=StandardScaler())
 481        income = Feature(dtype='numeric', transformer=StandardScaler())
 482        category = Feature(dtype='categorical', encoder=OneHotEncoder())
 483        
 484        # Derived feature from multiple columns
 485        age_income_ratio = Feature(
 486            dtype='numeric',
 487            derived_feature_fn=lambda df: df['age'] / df['income']
 488        )
 489    """
 490    
 491    # TODO: Define your features based on EDA results
 492    # feature_1 = Feature(dtype='numeric')
 493    # feature_2 = Feature(dtype='categorical')
 494    pass
 495'''
 496
 497    def _generate_sdk_data_sources(self, context: dict) -> str:
 498        """Generate SDK data_sources.py file."""
 499        return f'''"""Data source definitions for {context["project_name"]}.
 500
 501NAMING CONVENTIONS:
 502- training_* : DataSources used for model training (e.g., training_customers, training_transactions)
 503- production_* : DataSources used for production inference/batch scoring
 504
 505JOIN BEHAVIOR:
 506- The FIRST DataSource in each group is the primary source
 507- Subsequent DataSources are joined to the primary using their join_spec
 508- All DataSources in a group should share a common primary key
 509
 510This module is imported by model.py and pipeline.py to load training/scoring data.
 511"""
 512
 513import sys
 514from geronimo.data_sources import DataSource, JoinSpec, Query, collect_data_sources
 515
 516
 517# =============================================================================
 518# Training Data Sources
 519# =============================================================================
 520
 521# Primary training source (first in the list)
 522training_data = DataSource(
 523    name="training_primary",
 524    source="file",
 525    path="data/train.csv",  # TODO: Update with your path
 526)
 527
 528# Example: Secondary training source to join
 529# training_features = DataSource(
 530#     name="training_features",
 531#     source="file",
 532#     path="data/features.csv",
 533#     join_spec=JoinSpec(
 534#         left_on="id",       # Column in primary source
 535#         right_on="id",      # Column in this source
 536#         how="left",         # left, right, inner, outer
 537#     ),
 538# )
 539
 540
 541# =============================================================================
 542# Production Data Sources
 543# =============================================================================
 544
 545production_data = DataSource(
 546    name="production_primary",
 547    source="file",
 548    path="batch/data/input.csv",  # TODO: Update with your path
 549)
 550
 551
 552# =============================================================================
 553# Auto-collected DataSource Lists
 554# =============================================================================
 555# These are dynamically populated from all training_* and production_* variables above
 556
 557training_sources = collect_data_sources(sys.modules[__name__], "training_")
 558production_sources = collect_data_sources(sys.modules[__name__], "production_")
 559'''
 560
 561    def _generate_sdk_endpoint(self, context: dict) -> str:
 562        """Generate SDK endpoint.py file for realtime serving."""
 563        return f'''"""Endpoint definition - handle incoming prediction requests."""
 564
 565from geronimo.serving import Endpoint
 566from .model import ProjectModel
 567
 568
 569class PredictEndpoint(Endpoint):
 570    """Prediction endpoint for real-time serving.
 571    
 572    This is a working demo endpoint. Replace the preprocess/postprocess
 573    methods with your actual implementation once you have a trained model.
 574    
 575    To train a model:
 576        uv run python -m {context["project_name_snake"]}.train
 577    """
 578
 579    model_class = ProjectModel
 580
 581    def preprocess(self, request: dict):
 582        """Transform incoming request to model input.
 583        
 584        Args:
 585            request: JSON request body with "features" key
 586            
 587        Returns:
 588            Feature matrix ready for model.predict()
 589        """
 590        # Demo mode: just return the features dict
 591        # TODO: Replace with actual preprocessing once model is trained
 592        # import pandas as pd
 593        # df = pd.DataFrame([request["features"]])
 594        # return self.model.features.transform(df)
 595        return request.get("features", request)
 596
 597    def postprocess(self, prediction):
 598        """Format model output for response.
 599        
 600        Args:
 601            prediction: Raw model output
 602            
 603        Returns:
 604            JSON-serializable response
 605        """
 606        # Demo mode: echo the input back
 607        # TODO: Replace with actual postprocessing once model is trained
 608        # return {{"prediction": int(prediction[0]), "confidence": 0.95}}
 609        return {{"result": prediction, "status": "demo_mode"}}
 610    
 611    def initialize(self, project=None, version=None):
 612        """Initialize endpoint.
 613        
 614        Demo mode: Skip model loading if artifacts don't exist.
 615        """
 616        try:
 617            super().initialize(project=project, version=version)
 618        except Exception:
 619            # Demo mode: continue without trained model
 620            self.model = None
 621            self._is_initialized = True
 622    
 623    def handle(self, request: dict) -> dict:
 624        """Handle prediction request.
 625        
 626        Demo mode: If no model loaded, echo the request back.
 627        """
 628        if self.model is None:
 629            # Demo mode
 630            features = self.preprocess(request)
 631            return self.postprocess(features)
 632        
 633        # Normal mode with trained model
 634        return super().handle(request)
 635'''
 636
 637    def _generate_sdk_pipeline(self, context: dict) -> str:
 638        """Generate SDK pipeline.py file for batch processing."""
 639        project_name_pascal = self._to_pascal_case(context["project_name"])
 640        return f'''"""Pipeline definition - implement your batch processing logic."""
 641
 642from geronimo.batch import BatchPipeline, Schedule
 643from .model import ProjectModel
 644from .data_sources import scoring_data
 645
 646
 647class {project_name_pascal}ScoringPipeline(BatchPipeline):
 648    """Batch scoring pipeline.
 649    
 650    This is a working demo pipeline. Replace the run() method with your
 651    actual implementation once you have a trained model.
 652    
 653    To train a model:
 654        uv run python -m {context["project_name_snake"]}.train
 655    """
 656
 657    name = "{context["project_name"]}-scoring"
 658    model_class = ProjectModel
 659    schedule = Schedule.daily(hour=6, minute=0)
 660    data_source = scoring_data
 661
 662    def initialize(self):
 663        """Initialize pipeline.
 664        
 665        Demo mode: Skip model loading if no artifacts exist.
 666        """
 667        try:
 668            super().initialize()
 669        except Exception:
 670            # Demo mode: continue without trained model
 671            self.model = None
 672            self._is_initialized = True
 673            print("Running in DEMO MODE (no trained model)")
 674
 675    def execute(self):
 676        """Execute the pipeline.
 677        
 678        Demo mode: Return sample results if no model loaded.
 679        """
 680        if self.model is None:
 681            # Demo mode
 682            return self.run()
 683        return super().execute()
 684
 685    def run(self):
 686        """Execute batch processing.
 687        
 688        Demo mode implementation - replace with your actual logic.
 689        
 690        Returns:
 691            Dict with execution results
 692        """
 693        # Demo mode: return sample results
 694        # TODO: Replace with actual batch logic once model is trained
 695        #
 696        # Example implementation:
 697        # data = self.data_source.load()
 698        # X = self.model.features.transform(data)
 699        # predictions = self.model.predict(X)
 700        # results = data.assign(prediction=predictions)
 701        # output_path = self.save_results(results)
 702        # return {{"samples_scored": len(results), "output_path": output_path}}
 703        
 704        return {{
 705            "status": "demo_mode",
 706            "message": "Pipeline executed successfully in demo mode",
 707            "samples_scored": 0,
 708        }}
 709'''
 710
 711    def _generate_api_code(self, context: dict, pkg_dir: Path) -> None:
 712        """Generate FastAPI structure for realtime serving."""
 713        # API module
 714        api_dir = pkg_dir / "api"
 715        api_dir.mkdir(exist_ok=True)
 716        self.write_file(api_dir / "__init__.py", '"""API package."""\n')
 717
 718        # Generate main.py
 719        main_content = self._generate_main_py(context)
 720        self.write_file(api_dir / "main.py", main_content)
 721        # Generate deps.py
 722        deps_content = self._generate_deps(context)
 723        self.write_file(api_dir / "deps.py", deps_content)
 724
 725        # Generate agent package
 726        self._generate_agent_package(context)
 727
 728        # Routes
 729        routes_dir = api_dir / "routes"
 730        routes_dir.mkdir(exist_ok=True)
 731        self.write_file(routes_dir / "__init__.py", '"""Routes package."""\n')
 732
 733        # Health route
 734        health_content = self._generate_health_route()
 735        self.write_file(routes_dir / "health.py", health_content)
 736
 737        # Predict route
 738        predict_content = self._generate_predict_route(context)
 739        self.write_file(routes_dir / "predict.py", predict_content)
 740
 741        # Models (schemas)
 742        models_dir = api_dir / "models"
 743        models_dir.mkdir(exist_ok=True)
 744        self.write_file(models_dir / "__init__.py", '"""Pydantic models."""\n')
 745
 746        schemas_content = self._generate_schemas(context)
 747        self.write_file(models_dir / "schemas.py", schemas_content)
 748
 749    def _generate_batch_code(self, context: dict, pkg_dir: Path) -> None:
 750        """Generate Metaflow batch pipeline structure."""
 751        # Flows directory at project root
 752        flows_dir = self.project_dir / "batch" / "flows"
 753        flows_dir.mkdir(parents=True, exist_ok=True)
 754        
 755        # Data and output directories
 756        (self.project_dir / "batch" / "data").mkdir(exist_ok=True)
 757        (self.project_dir / "batch" / "output").mkdir(exist_ok=True)
 758        
 759        # Generate Metaflow flow
 760        flow_content = self._generate_metaflow_flow(context)
 761        self.write_file(flows_dir / "scoring_flow.py", flow_content)
 762        
 763        # Generate pipeline class in package
 764        pipeline_content = self._generate_pipeline_class(context)
 765        self.write_file(pkg_dir / "pipeline.py", pipeline_content)
 766
 767    def _generate_metaflow_flow(self, context: dict) -> str:
 768        """Generate Metaflow flow file."""
 769        project_name_pascal = self._to_pascal_case(context["project_name"])
 770        return f'''"""Metaflow flow for {context["project_name"]} batch scoring.
 771
 772Run locally:
 773    python batch/flows/scoring_flow.py run
 774
 775Deploy to Step Functions:
 776    python batch/flows/scoring_flow.py step-functions create
 777"""
 778
 779from metaflow import FlowSpec, step, Parameter, schedule
 780
 781
 782@schedule(daily=True)
 783class {project_name_pascal}ScoringFlow(FlowSpec):
 784    """Daily batch scoring flow."""
 785
 786    input_path = Parameter(
 787        "input_path",
 788        help="Path to input data",
 789        default="batch/data/input.csv",
 790    )
 791    output_path = Parameter(
 792        "output_path",
 793        help="Path for output predictions",
 794        default="batch/output/predictions.parquet",
 795    )
 796
 797    @step
 798    def start(self):
 799        """Initialize the flow."""
 800        print(f"Starting batch scoring for {context["project_name"]}")
 801        self.next(self.load_data)
 802
 803    @step
 804    def load_data(self):
 805        """Load data to score."""
 806        import pandas as pd
 807        from pathlib import Path
 808
 809        path = Path(self.input_path)
 810        if path.exists():
 811            self.data = pd.read_csv(path)
 812        else:
 813            # Generate sample data
 814            import numpy as np
 815            self.data = pd.DataFrame({{
 816                "feature_1": np.random.randn(100),
 817                "feature_2": np.random.randn(100),
 818            }})
 819        print(f"Loaded {{len(self.data)}} samples")
 820        self.next(self.predict)
 821
 822    @step
 823    def predict(self):
 824        """Generate predictions."""
 825        import pandas as pd
 826        from {context["project_name_snake"]}.ml.predictor import ModelPredictor
 827
 828        predictor = ModelPredictor()
 829        predictor.load()
 830        
 831        predictions = predictor.predict(self.data)
 832        
 833        self.results = self.data.copy()
 834        self.results["prediction"] = predictions
 835        self.results["scored_at"] = pd.Timestamp.now().isoformat()
 836        
 837        print(f"Generated {{len(self.results)}} predictions")
 838        self.next(self.save_results)
 839
 840    @step
 841    def save_results(self):
 842        """Save predictions to storage."""
 843        from pathlib import Path
 844
 845        path = Path(self.output_path)
 846        path.parent.mkdir(parents=True, exist_ok=True)
 847        self.results.to_parquet(path, index=False)
 848        print(f"Saved results to {{path}}")
 849        self.next(self.end)
 850
 851    @step
 852    def end(self):
 853        """Flow complete."""
 854        print(f"Scored {{len(self.results)}} samples")
 855
 856
 857if __name__ == "__main__":
 858    {project_name_pascal}ScoringFlow()
 859'''
 860
 861    def _generate_pipeline_class(self, context: dict) -> str:
 862        """Generate BatchPipeline class."""
 863        project_name_pascal = self._to_pascal_case(context["project_name"])
 864        return f'''"""Batch pipeline using Geronimo BatchPipeline."""
 865
 866from geronimo.batch import BatchPipeline, Schedule
 867
 868
 869class {project_name_pascal}ScoringPipeline(BatchPipeline):
 870    """Daily batch scoring pipeline.
 871    
 872    Example:
 873        pipeline = {project_name_pascal}ScoringPipeline()
 874        pipeline.initialize()
 875        result = pipeline.execute()
 876    """
 877    
 878    name = "{context["project_name"]}-scoring"
 879    schedule = Schedule.daily(hour=6, minute=0)
 880    
 881    def run(self):
 882        """Main pipeline logic."""
 883        import pandas as pd
 884        from pathlib import Path
 885        from .ml.predictor import ModelPredictor
 886        
 887        # Load predictor
 888        predictor = ModelPredictor()
 889        predictor.load()
 890        
 891        # Load data
 892        data_path = Path("batch/data/input.csv")
 893        if data_path.exists():
 894            data = pd.read_csv(data_path)
 895        else:
 896            # Sample data
 897            import numpy as np
 898            data = pd.DataFrame({{
 899                "feature_1": np.random.randn(100),
 900                "feature_2": np.random.randn(100),
 901            }})
 902        
 903        # Predict
 904        predictions = predictor.predict(data)
 905        
 906        # Build results
 907        results = data.copy()
 908        results["prediction"] = predictions
 909        results["scored_at"] = pd.Timestamp.now().isoformat()
 910        
 911        # Save
 912        output_path = self.save_results(results)
 913        
 914        return {{
 915            "samples_scored": len(results),
 916            "output_path": output_path,
 917        }}
 918
 919
 920if __name__ == "__main__":
 921    pipeline = {project_name_pascal}ScoringPipeline()
 922    pipeline.initialize()
 923    print(pipeline.execute())
 924'''
 925
 926    def _generate_test_batch(self, context: dict) -> str:
 927        """Generate batch pipeline tests."""
 928        project_name_pascal = self._to_pascal_case(context["project_name"])
 929        return f'''"""Tests for batch pipeline."""
 930
 931import pytest
 932
 933
 934class Test{project_name_pascal}ScoringPipeline:
 935    """Tests for {project_name_pascal}ScoringPipeline."""
 936
 937    def test_pipeline_exists(self):
 938        """Test pipeline can be imported."""
 939        from {context["project_name_snake"]}.pipeline import {project_name_pascal}ScoringPipeline
 940        
 941        pipeline = {project_name_pascal}ScoringPipeline()
 942        assert pipeline.name == "{context["project_name"]}-scoring"
 943
 944    def test_pipeline_schedule(self):
 945        """Test pipeline has schedule."""
 946        from {context["project_name_snake"]}.pipeline import {project_name_pascal}ScoringPipeline
 947        
 948        pipeline = {project_name_pascal}ScoringPipeline()
 949        assert pipeline.schedule is not None
 950        assert "6" in pipeline.schedule.cron_expression
 951'''
 952
 953
 954
 955    def _generate_monitoring(self) -> None:
 956        """Generate monitoring package."""
 957        src = self.project_dir / "src"
 958        pkg_dir = src / self.project_name.replace("-", "_")
 959        monitor_dir = pkg_dir / "monitoring"
 960        monitor_dir.mkdir(exist_ok=True)
 961        
 962        # Create __init__.py for new package
 963        self.write_file(
 964            monitor_dir / "__init__.py", 
 965            '"""Monitoring package."""\n\n'
 966            'from .metrics import MetricsCollector, MetricType\n'
 967            'from .alerts import AlertManager, SlackAlert\n'
 968            'from .middleware import MonitoringMiddleware\n'
 969            'from .drift import DriftDetector\n'
 970            '\n'
 971            '__all__ = [\n'
 972            '    "MetricsCollector",\n'
 973            '    "MetricType",\n'
 974            '    "AlertManager",\n'
 975            '    "SlackAlert",\n'
 976            '    "MonitoringMiddleware",\n'
 977            '    "DriftDetector",\n'
 978            ']\n'
 979        )
 980
 981        # Read templates from installed package
 982        package_root = Path(geronimo.__file__).parent
 983        template_dir = package_root / "generators" / "templates" / "monitoring"
 984        
 985        files = {
 986            "metrics.py": "metrics.py",
 987            "alerts.py": "alerts.py",
 988            "middleware.py": "middleware.py",
 989            "drift.py": "drift.py",
 990        }
 991
 992        for dest_name, src_name in files.items():
 993            template_path = template_dir / src_name
 994            if not template_path.exists():
 995                # Fallback implementation or error
 996                # For basic functionality in development mode where files might not be moved yet?
 997                # No, we assume it exists.
 998                continue
 999                
1000            source = template_path.read_text()
1001            
1002            # Fix imports using simple string replacement
1003            # The original files had "from geronimo.monitoring..."
1004            # We need to change that to "from ." or "from <pkg>.monitoring"
1005            
1006            # Replace absolute imports with relative imports which is cleaner for internal package
1007            source = source.replace("from geronimo.monitoring.metrics", "from .metrics")
1008            source = source.replace("from geronimo.monitoring.alerts", "from .alerts")
1009            source = source.replace("from geronimo.monitoring.middleware", "from .middleware")
1010            source = source.replace("from geronimo.monitoring.drift", "from .drift")
1011            
1012            self.write_file(monitor_dir / dest_name, source)
1013
1014    def _generate_main_py(self, context: dict) -> str:
1015        """Generate the FastAPI main application."""
1016        return f'''"""FastAPI application for {context["project_name"]} ML serving."""
1017
1018import logging
1019import os
1020from contextlib import asynccontextmanager
1021from typing import AsyncGenerator
1022
1023from fastapi import FastAPI
1024from fastapi.middleware.cors import CORSMiddleware
1025
1026from {context["project_name_snake"]}.api.routes import health, predict
1027from {context["project_name_snake"]}.ml.predictor import ModelPredictor
1028from {context["project_name_snake"]}.monitoring.middleware import MonitoringMiddleware
1029from {context["project_name_snake"]}.monitoring.metrics import MetricsCollector
1030from {context["project_name_snake"]}.api import deps
1031from {context["project_name_snake"]}.agent.server import mcp
1032
1033# Configure logging
1034logging.basicConfig(
1035    level=logging.INFO,
1036    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
1037)
1038logger = logging.getLogger(__name__)
1039
1040# Initialize metrics collector
1041metrics = MetricsCollector(project_name="{context["project_name"]}")
1042
1043
1044@asynccontextmanager
1045async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
1046    """Application lifespan handler for model loading."""
1047    logger.info("Loading model...")
1048    deps.predictor = ModelPredictor()
1049    deps.predictor.load()
1050    logger.info("Model loaded successfully")
1051
1052    yield
1053
1054    logger.info("Shutting down...")
1055
1056
1057app = FastAPI(
1058    title="{context["project_name"]}",
1059    description="ML model serving API",
1060    version="1.0.0",
1061    lifespan=lifespan,
1062)
1063
1064# CORS middleware
1065app.add_middleware(
1066    CORSMiddleware,
1067    allow_origins=["*"],
1068    allow_credentials=True,
1069    allow_methods=["*"],
1070    allow_headers=["*"],
1071)
1072
1073# Monitoring middleware
1074app.add_middleware(MonitoringMiddleware, collector=metrics)
1075
1076# Mount MCP Agent (Streamable HTTP)
1077if os.getenv("ENABLE_MCP_AGENT", "true").lower() == "true":
1078    app.mount("/mcp", mcp.streamable_http_app())
1079
1080# Include routers
1081app.include_router(health.router, tags=["Health"])
1082app.include_router(predict.router, prefix="/v1", tags=["Predictions"])
1083'''
1084
1085    def _generate_deps(self, context: dict) -> str:
1086        """Generate dependencies module."""
1087        return f'''"""API dependencies."""
1088
1089from typing import Optional
1090from {context["project_name_snake"]}.ml.predictor import ModelPredictor
1091
1092# Global model instance
1093predictor: Optional[ModelPredictor] = None
1094
1095
1096def get_predictor() -> ModelPredictor:
1097    """Get the loaded model predictor."""
1098    if predictor is None:
1099        raise RuntimeError("Model not loaded")
1100    return predictor
1101'''
1102
1103    def _generate_health_route(self) -> str:
1104        """Generate the health check route."""
1105        return '''"""Health check endpoints."""
1106
1107from fastapi import APIRouter
1108
1109router = APIRouter()
1110
1111
1112@router.get("/health")
1113async def health_check() -> dict[str, str]:
1114    """Basic health check endpoint."""
1115    return {"status": "healthy"}
1116
1117
1118@router.get("/ready")
1119async def readiness_check() -> dict[str, str]:
1120    """Readiness check for load balancer."""
1121    # Add model readiness check here if needed
1122    return {"status": "ready"}
1123'''
1124
1125    def _generate_predict_route(self, context: dict) -> str:
1126        """Generate the prediction route using SDK Endpoint."""
1127        return f'''"""Prediction endpoints using Geronimo SDK Endpoint."""
1128
1129import time
1130import logging
1131from typing import Any
1132
1133from fastapi import APIRouter, HTTPException
1134
1135from {context["project_name_snake"]}.sdk.endpoint import PredictEndpoint
1136
1137logger = logging.getLogger(__name__)
1138
1139router = APIRouter()
1140
1141# Initialize SDK endpoint
1142_endpoint = None
1143
1144
1145def get_endpoint() -> PredictEndpoint:
1146    """Get or initialize the SDK endpoint."""
1147    global _endpoint
1148    if _endpoint is None:
1149        _endpoint = PredictEndpoint()
1150        _endpoint.initialize()
1151        logger.info("SDK Endpoint initialized")
1152    return _endpoint
1153
1154
1155@router.post("/predict")
1156async def predict(request: dict[str, Any]) -> dict[str, Any]:
1157    """Generate predictions using the SDK Endpoint.
1158
1159    Args:
1160        request: Input features for prediction.
1161
1162    Returns:
1163        Model predictions with metadata.
1164    """
1165    start_time = time.perf_counter()
1166
1167    try:
1168        endpoint = get_endpoint()
1169        result = endpoint.handle(request)
1170
1171        latency_ms = (time.perf_counter() - start_time) * 1000
1172        logger.info(f"Prediction completed in {{latency_ms:.2f}}ms")
1173
1174        return {{
1175            **result,
1176            "latency_ms": latency_ms,
1177        }}
1178
1179    except NotImplementedError as e:
1180        raise HTTPException(
1181            status_code=501, 
1182            detail=f"Endpoint not implemented: {{e}}"
1183        )
1184    except Exception as e:
1185        logger.error(f"Prediction failed: {{e}}")
1186        raise HTTPException(status_code=500, detail=str(e))
1187'''
1188
1189    def _generate_schemas(self, context: dict) -> str:
1190        """Generate Pydantic schemas for request/response."""
1191        return '''"""Pydantic models for API request/response schemas."""
1192
1193from pydantic import BaseModel, Field
1194
1195
1196class PredictionRequest(BaseModel):
1197    """Request schema for predictions."""
1198
1199    features: dict[str, float | int | str | list] = Field(
1200        ...,
1201        description="Input features as key-value pairs",
1202        examples=[{"feature_1": 1.5, "feature_2": "category_a", "feature_3": [1, 2, 3]}],
1203    )
1204
1205
1206class PredictionResponse(BaseModel):
1207    """Response schema for predictions."""
1208
1209    prediction: float | int | str | list = Field(
1210        ...,
1211        description="Model prediction result",
1212    )
1213    model_version: str = Field(
1214        ...,
1215        description="Version of the model used for prediction",
1216    )
1217    latency_ms: float = Field(
1218        ...,
1219        description="Prediction latency in milliseconds",
1220    )
1221
1222
1223class ErrorResponse(BaseModel):
1224    """Error response schema."""
1225
1226    detail: str = Field(..., description="Error message")
1227'''
1228
1229    def _generate_predictor(self, context: dict) -> str:
1230        """Generate the model predictor class."""
1231        load_code = self._get_framework_load_code(context["framework"])
1232
1233        return f'''"""Model predictor for ML inference.
1234
1235Handles model loading, caching, and prediction logic.
1236"""
1237
1238import logging
1239from pathlib import Path
1240from typing import Any
1241
1242{self._get_framework_imports(context["framework"])}
1243
1244logger = logging.getLogger(__name__)
1245
1246# Default model path (relative to project root)
1247DEFAULT_MODEL_PATH = Path("models/model.joblib")
1248
1249
1250class ModelPredictor:
1251    """Handles model loading and predictions.
1252
1253    Implements lazy loading and caching for efficient inference.
1254    """
1255
1256    def __init__(self, model_path: Path | str | None = None) -> None:
1257        """Initialize the predictor.
1258
1259        Args:
1260            model_path: Path to the model artifact. Uses default if not provided.
1261        """
1262        self.model_path = Path(model_path) if model_path else DEFAULT_MODEL_PATH
1263        self._model: Any = None
1264        self._version: str = "1.0.0"
1265
1266    @property
1267    def version(self) -> str:
1268        """Get the model version."""
1269        return self._version
1270
1271    @property
1272    def is_loaded(self) -> bool:
1273        """Check if the model is loaded."""
1274        return self._model is not None
1275
1276    def load(self) -> None:
1277        """Load the model from disk.
1278
1279        Raises:
1280            FileNotFoundError: If model file doesn't exist.
1281            RuntimeError: If model loading fails.
1282        """
1283        if not self.model_path.exists():
1284            logger.warning(
1285                f"Model file not found at {{self.model_path}}. "
1286                "Using placeholder for development."
1287            )
1288            self._model = self._create_placeholder_model()
1289            return
1290
1291        try:
1292            logger.info(f"Loading model from {{self.model_path}}")
1293            {load_code}
1294            logger.info("Model loaded successfully")
1295        except Exception as e:
1296            raise RuntimeError(f"Failed to load model: {{e}}")
1297
1298    def _create_placeholder_model(self) -> Any:
1299        """Create a placeholder model for development/testing."""
1300        # Returns a simple function that echoes input
1301        return lambda x: 0.5
1302
1303    def predict(self, features: dict[str, Any]) -> Any:
1304        """Generate predictions for input features.
1305
1306        Args:
1307            features: Dictionary of feature name to value.
1308
1309        Returns:
1310            Model prediction (type depends on model).
1311
1312        Raises:
1313            RuntimeError: If model is not loaded.
1314        """
1315        if not self.is_loaded:
1316            raise RuntimeError("Model not loaded. Call load() first.")
1317
1318        # Convert features to model input format
1319        # This should be customized based on your model's requirements
1320        try:
1321            if callable(self._model):
1322                # Placeholder model
1323                return self._model(features)
1324
1325            # For sklearn-style models with predict method
1326            import pandas as pd
1327            import numpy as np
1328
1329            # Convert dict to DataFrame for sklearn compatibility
1330            df = pd.DataFrame([features])
1331
1332            # Get prediction
1333            prediction = self._model.predict(df)
1334
1335            # Return single value if single prediction
1336            if isinstance(prediction, np.ndarray) and len(prediction) == 1:
1337                return float(prediction[0])
1338
1339            return prediction.tolist()
1340
1341        except Exception as e:
1342            logger.error(f"Prediction failed: {{e}}")
1343            raise
1344'''
1345
1346    def _get_framework_imports(self, framework: str) -> str:
1347        """Get framework-specific imports."""
1348        imports = {
1349            "sklearn": "import joblib",
1350            "pytorch": "import torch",
1351            "tensorflow": "import tensorflow as tf",
1352            "xgboost": "import xgboost as xgb\nimport joblib",
1353            "custom": "",
1354        }
1355        return imports.get(framework, "")
1356
1357    def _get_framework_load_code(self, framework: str) -> str:
1358        """Get framework-specific model loading code."""
1359        load_code = {
1360            "sklearn": "self._model = joblib.load(self.model_path)",
1361            "pytorch": "self._model = torch.load(self.model_path)\n            self._model.eval()",
1362            "tensorflow": "self._model = tf.keras.models.load_model(self.model_path)",
1363            "xgboost": "self._model = joblib.load(self.model_path)",
1364            "custom": "# Implement custom model loading",
1365        }
1366        return load_code.get(framework, "# Unknown framework")
1367
1368    def _generate_test_api(self, context: dict) -> str:
1369        """Generate API tests."""
1370        return f'''"""Tests for the ML serving API."""
1371
1372import pytest
1373from fastapi.testclient import TestClient
1374
1375from {context["project_name_snake"]}.api.main import app
1376
1377
1378@pytest.fixture
1379def client():
1380    """Create test client."""
1381    with TestClient(app) as c:
1382        yield c
1383
1384
1385def test_health_check(client: TestClient):
1386    """Test health endpoint."""
1387    response = client.get("/health")
1388    assert response.status_code == 200
1389    assert response.json()["status"] == "healthy"
1390
1391
1392def test_readiness_check(client: TestClient):
1393    """Test readiness endpoint."""
1394    response = client.get("/ready")
1395    assert response.status_code == 200
1396    assert response.json()["status"] == "ready"
1397
1398
1399def test_predict(client: TestClient):
1400    """Test prediction endpoint."""
1401    response = client.post(
1402        "/v1/predict",
1403        json={{"features": {{"feature_1": 1.0, "feature_2": 2.0}}}},
1404    )
1405    assert response.status_code == 200
1406    data = response.json()
1407    assert "prediction" in data
1408    assert "model_version" in data
1409    assert "latency_ms" in data
1410'''
1411
1412    def _generate_agent_package(self, context: dict) -> None:
1413        """Generate MCP agent file for AI agent integration.
1414        
1415        Creates agent.py at the package level (alongside app.py).
1416        """
1417        src = self.project_dir / "src"
1418        pkg_dir = src / context["project_name_snake"]
1419
1420        # Generate agent.py at package level (like app.py)
1421        self.engine.render_to_file(
1422            "sdk/agent_server.py.jinja2", 
1423            context, 
1424            pkg_dir / "agent.py"
1425        )
1426
1427    def _generate_project_files(self) -> None:
1428        """Generate project-level configuration files."""
1429        context = {
1430            "project_name": self.project_name,
1431            "project_name_snake": self.project_name.replace("-", "_"),
1432        }
1433
1434        # Template-specific dependencies
1435        deps = self._get_template_dependencies()
1436        deps_str = ",\n    ".join(f'"{d}"' for d in deps)
1437
1438        # pyproject.toml
1439        pyproject = f'''[project]
1440name = "{context["project_name"]}"
1441version = "1.0.0"
1442description = "ML model serving API"
1443readme = "README.md"
1444requires-python = ">=3.11"
1445dependencies = [
1446    {deps_str},
1447]
1448
1449[project.optional-dependencies]
1450dev = [
1451    "pytest>=7.4.0",
1452    "httpx>=0.25.0",
1453    "pytest-cov>=4.1.0",
1454]
1455
1456[build-system]
1457requires = ["hatchling"]
1458build-backend = "hatchling.build"
1459'''
1460        self.write_file(self.project_dir / "pyproject.toml", pyproject)
1461
1462        # Generate training script
1463        self._generate_training_script(context)
1464
1465        # README.md
1466        readme = f'''# {context["project_name"]}
1467
1468ML model serving API generated by Geronimo.
1469
1470## Quick Start
1471
1472```bash
1473# Install dependencies
1474uv sync
1475
1476# Run the API locally
1477uv run uvicorn {context["project_name_snake"]}.api.main:app --reload
1478
1479# Run tests
1480uv run pytest
1481```
1482
1483## Project Structure
1484
1485```
1486{context["project_name"]}/
1487├── geronimo.yaml          # Deployment configuration
1488├── pyproject.toml         # Python project config
1489├── Dockerfile             # Container definition
1490├── azure-pipelines.yaml   # CI/CD pipeline
1491├── infrastructure/        # Terraform files
1492├── src/
1493│   └── {context["project_name_snake"]}/
1494│       ├── api/          # FastAPI application
1495│       │   ├── main.py
1496│       │   ├── routes/
1497│       │   └── models/
1498│       └── ml/           # Model loading & inference
1499│           └── predictor.py
1500├── models/               # Model artifacts
1501└── tests/
1502```
1503
1504## Deployment
1505
1506```bash
1507# Generate all deployment artifacts
1508geronimo generate all
1509
1510# Deploy infrastructure
1511cd infrastructure && terraform apply
1512```
1513'''
1514        self.write_file(self.project_dir / "README.md", readme)
1515
1516        # .gitignore
1517        gitignore = '''# Python
1518__pycache__/
1519*.py[cod]
1520*$py.class
1521.venv/
1522venv/
1523.env
1524
1525# IDE
1526.idea/
1527.vscode/
1528*.swp
1529
1530# Testing
1531.coverage
1532htmlcov/
1533.pytest_cache/
1534
1535# Build
1536dist/
1537build/
1538*.egg-info/
1539
1540# Terraform
1541.terraform/
1542*.tfstate
1543*.tfstate.*
1544.terraform.lock.hcl
1545
1546# Models (large files)
1547models/*.joblib
1548models/*.pkl
1549models/*.pt
1550models/*.h5
1551!models/.gitkeep
1552'''
1553        self.write_file(self.project_dir / ".gitignore", gitignore)
1554
1555    def _generate_training_script(self, context: dict) -> None:
1556        """Generate training script template."""
1557        pkg_dir = self.project_dir / "src" / context["project_name_snake"]
1558
1559        train_script = f'''"""Training script for {context["project_name"]}.
1560
1561This script demonstrates the full training workflow:
15621. Load data from SDK data_sources
15632. Initialize model with SDK features
15643. Fit transformers and train model
15654. Save artifacts to ArtifactStore
1566
1567Usage:
1568    uv run python -m {context["project_name_snake"]}.train
1569"""
1570
1571from pathlib import Path
1572import pandas as pd
1573
1574from geronimo.artifacts import ArtifactStore
1575from geronimo.models import HyperParams
1576
1577# Import from your SDK
1578from {context["project_name_snake"]}.sdk.model import ProjectModel
1579from {context["project_name_snake"]}.sdk.data_sources import training_data
1580
1581
1582def main():
1583    """Train and save the model."""
1584    print("=" * 50)
1585    print("Model Training")
1586    print("=" * 50)
1587
1588    # =========================================================================
1589    # 1. Load data from configured data source
1590    # =========================================================================
1591    print("\\n1. Loading data...")
1592    
1593    # Option A: Load from SDK data_sources (recommended)
1594    # This uses the DataSource defined in sdk/data_sources.py
1595    # df = training_data.load()
1596    
1597    # Option B: Direct file load (for development/testing)
1598    # df = pd.read_csv("data/train.csv")
1599    
1600    # TODO: Uncomment one of the options above
1601    raise NotImplementedError(
1602        "Configure your data source in sdk/data_sources.py, then uncomment:\\n"
1603        "  df = training_data.load()"
1604    )
1605
1606    # =========================================================================
1607    # 2. Prepare features and target
1608    # =========================================================================
1609    print("\\n2. Preparing data...")
1610    
1611    # TODO: Update with your target column name
1612    # y = df.pop("target")
1613    raise NotImplementedError("Set your target column: y = df.pop('your_target_column')")
1614
1615    # =========================================================================
1616    # 3. Initialize and train model
1617    # =========================================================================
1618    print("\\n3. Training model...")
1619    model = ProjectModel()
1620    
1621    # Fit feature transformers (from sdk/features.py)
1622    print("   Fitting feature transformers...")
1623    model.features.fit(df)
1624    X = model.features.transform(df)
1625    
1626    # Train with hyperparameters
1627    # TODO: Customize your hyperparameters
1628    params = HyperParams(
1629        n_estimators=100,
1630        max_depth=5,
1631    )
1632    model.train(X, y, params)
1633
1634    # =========================================================================
1635    # 4. Save model artifacts
1636    # =========================================================================
1637    print("\\n4. Saving artifacts...")
1638    
1639    # ArtifactStore uses your global config from ~/.geronimo/config.yaml
1640    # Run `geronimo config show` to see current settings
1641    # Run `geronimo config init` to change backend (local, s3, or gdc)
1642    store = ArtifactStore(
1643        project="{context["project_name"]}",
1644        version="1.0.0",
1645        # backend defaults to your global config (~/.geronimo/config.yaml)
1646        # Override here if needed: backend="local", backend="s3", backend="gdc"
1647    )
1648    model.save(store)
1649    print(f"   Saved artifacts (backend: {{store.backend}})")
1650
1651    print("\\n" + "=" * 50)
1652    print("Training complete!")
1653    print("=" * 50)
1654
1655
1656if __name__ == "__main__":
1657    main()
1658'''
1659        self.write_file(pkg_dir / "train.py", train_script)

Generates a complete FastAPI ML project structure.

ProjectGenerator( project_name: str, framework: str = 'sklearn', output_dir: str = '.', template: str = 'realtime')
69    def __init__(
70        self,
71        project_name: str,
72        framework: str = "sklearn",
73        output_dir: str = ".",
74        template: str = "realtime",
75    ) -> None:
76        """Initialize the project generator.
77
78        Args:
79            project_name: Name of the project.
80            framework: ML framework to use.
81            output_dir: Directory to create the project in.
82            template: Project template (realtime, batch, or both).
83        """
84        super().__init__()
85        self.project_name = project_name.lower().replace(" ", "-")
86        self.framework = MLFramework(framework.lower())
87        self.output_dir = Path(output_dir)
88        self.project_dir = self.output_dir / self.project_name
89        self.template = template
90        self.engine = TemplateEngine()

Initialize the project generator.

Arguments:
  • project_name: Name of the project.
  • framework: ML framework to use.
  • output_dir: Directory to create the project in.
  • template: Project template (realtime, batch, or both).
TEMPLATE_DIR = 'project'
project_name: str

The project name (kebab-case).

The selected ML framework.

output_dir: pathlib.Path

The output directory path.

project_dir: pathlib.Path

The full path to the project directory.

template: str

The selected project template (realtime/batch/both).

engine: geronimo.generators.template_engine.TemplateEngine

The template rendering engine.

def generate(self) -> pathlib.Path:
202    def generate(self) -> Path:
203        """Generate the complete project structure.
204
205        Returns:
206            Path to the created project directory.
207        """
208        # Create project directory
209        self.project_dir.mkdir(parents=True, exist_ok=True)
210
211        # Generate configuration
212        config = self._create_config()
213        save_config(config, self.project_dir / "geronimo.yaml")
214
215        # Generate source code
216        self._generate_source_code()
217
218        # Generate monitoring code (only for realtime/both)
219        if self.template in ("realtime", "both"):
220            self._generate_monitoring()
221
222        # Generate project files
223        self._generate_project_files()
224
225        # Create models directory
226        (self.project_dir / "models").mkdir(exist_ok=True)
227        (self.project_dir / "models" / ".gitkeep").touch()
228
229        return self.project_dir

Generate the complete project structure.

Returns:

Path to the created project directory.

class TerraformGenerator(geronimo.generators.BaseGenerator):
14class TerraformGenerator(BaseGenerator):
15    """Generates Terraform infrastructure files for AWS ECS deployments."""
16
17    TEMPLATE_DIR = "terraform"
18
19    def __init__(
20        self,
21        config: GeronimoConfig,
22        output_dir: str = "infrastructure",
23    ) -> None:
24        """Initialize the Terraform generator.
25
26        Args:
27            config: Geronimo configuration.
28            output_dir: Directory to write Terraform files.
29        """
30        super().__init__()
31        self.config = config
32        self.output_dir = Path(output_dir)
33        self.engine = TemplateEngine()
34        
35        # Build context from config
36        self.context = self._build_context()
37
38    def _build_context(self) -> dict:
39        """Build template context from configuration."""
40        return {
41            "project_name": self.config.project.name,
42            "cpu": self.config.infrastructure.cpu,
43            "memory": self.config.infrastructure.memory,
44            "min_instances": self.config.infrastructure.scaling.min_instances,
45            "max_instances": self.config.infrastructure.scaling.max_instances,
46            "target_cpu": self.config.infrastructure.scaling.target_cpu_percent,
47        }
48
49    def generate(self) -> list[str]:
50        """Generate all Terraform files.
51
52        Returns:
53            List of generated file paths.
54        """
55        self.output_dir.mkdir(parents=True, exist_ok=True)
56
57        files = []
58
59        # Generate each Terraform file from templates
60        terraform_files = [
61            ("main.tf.jinja2", "main.tf"),
62            ("variables.tf.jinja2", "variables.tf"),
63            ("ecr.tf.jinja2", "ecr.tf"),
64            ("ecs.tf.jinja2", "ecs.tf"),
65            ("alb.tf.jinja2", "alb.tf"),
66            ("cloudwatch.tf.jinja2", "cloudwatch.tf"),
67            ("iam.tf.jinja2", "iam.tf"),
68            ("outputs.tf.jinja2", "outputs.tf"),
69        ]
70
71        for template_name, output_name in terraform_files:
72            output_path = self.output_dir / output_name
73            self.engine.render_to_file(
74                f"terraform/{template_name}",
75                self.context,
76                output_path,
77            )
78            files.append(str(output_path))
79
80        return files

Generates Terraform infrastructure files for AWS ECS deployments.

TerraformGenerator( config: geronimo.config.schema.GeronimoConfig, output_dir: str = 'infrastructure')
19    def __init__(
20        self,
21        config: GeronimoConfig,
22        output_dir: str = "infrastructure",
23    ) -> None:
24        """Initialize the Terraform generator.
25
26        Args:
27            config: Geronimo configuration.
28            output_dir: Directory to write Terraform files.
29        """
30        super().__init__()
31        self.config = config
32        self.output_dir = Path(output_dir)
33        self.engine = TemplateEngine()
34        
35        # Build context from config
36        self.context = self._build_context()

Initialize the Terraform generator.

Arguments:
  • config: Geronimo configuration.
  • output_dir: Directory to write Terraform files.
TEMPLATE_DIR = 'terraform'
config
output_dir
engine
context
def generate(self) -> list[str]:
49    def generate(self) -> list[str]:
50        """Generate all Terraform files.
51
52        Returns:
53            List of generated file paths.
54        """
55        self.output_dir.mkdir(parents=True, exist_ok=True)
56
57        files = []
58
59        # Generate each Terraform file from templates
60        terraform_files = [
61            ("main.tf.jinja2", "main.tf"),
62            ("variables.tf.jinja2", "variables.tf"),
63            ("ecr.tf.jinja2", "ecr.tf"),
64            ("ecs.tf.jinja2", "ecs.tf"),
65            ("alb.tf.jinja2", "alb.tf"),
66            ("cloudwatch.tf.jinja2", "cloudwatch.tf"),
67            ("iam.tf.jinja2", "iam.tf"),
68            ("outputs.tf.jinja2", "outputs.tf"),
69        ]
70
71        for template_name, output_name in terraform_files:
72            output_path = self.output_dir / output_name
73            self.engine.render_to_file(
74                f"terraform/{template_name}",
75                self.context,
76                output_path,
77            )
78            files.append(str(output_path))
79
80        return files

Generate all Terraform files.

Returns:

List of generated file paths.

class DockerGenerator(geronimo.generators.BaseGenerator):
14class DockerGenerator(BaseGenerator):
15    """Generates optimized Dockerfiles for ML deployments."""
16
17    TEMPLATE_DIR = "docker"
18
19    def __init__(
20        self,
21        config: GeronimoConfig,
22        output_path: str = "Dockerfile",
23    ) -> None:
24        """Initialize the Docker generator.
25
26        Args:
27            config: Geronimo configuration.
28            output_path: Path to write the Dockerfile.
29        """
30        super().__init__()
31        self.config = config
32        self.output_path = Path(output_path)
33        self.engine = TemplateEngine()
34
35    def _get_base_image(self) -> str:
36        """Get the appropriate base image for the framework."""
37        python_version = self.config.runtime.python_version
38
39        # Use slim images for smaller size
40        base_images = {
41            MLFramework.PYTORCH: f"python:{python_version}-slim",
42            MLFramework.TENSORFLOW: f"python:{python_version}-slim",
43            MLFramework.SKLEARN: f"python:{python_version}-slim",
44            MLFramework.XGBOOST: f"python:{python_version}-slim",
45            MLFramework.CUSTOM: f"python:{python_version}-slim",
46        }
47
48        return base_images.get(self.config.model.framework, f"python:{python_version}-slim")
49
50    def generate(self) -> str:
51        """Generate the Dockerfile.
52
53        Returns:
54            Path to the generated Dockerfile.
55        """
56        # Render Dockerfile
57        context = {
58            "project_name": self.config.project.name,
59            "project_name_snake": self.config.project.name.replace("-", "_"),
60            "base_image": self._get_base_image(),
61        }
62        
63        self.engine.render_to_file(
64            "docker/Dockerfile.jinja2",
65            context,
66            self.output_path
67        )
68
69        # Render .dockerignore
70        self.engine.render_to_file(
71            "docker/dockerignore.jinja2",
72            {},
73            self.output_path.parent / ".dockerignore"
74        )
75
76        return str(self.output_path)

Generates optimized Dockerfiles for ML deployments.

DockerGenerator( config: geronimo.config.schema.GeronimoConfig, output_path: str = 'Dockerfile')
19    def __init__(
20        self,
21        config: GeronimoConfig,
22        output_path: str = "Dockerfile",
23    ) -> None:
24        """Initialize the Docker generator.
25
26        Args:
27            config: Geronimo configuration.
28            output_path: Path to write the Dockerfile.
29        """
30        super().__init__()
31        self.config = config
32        self.output_path = Path(output_path)
33        self.engine = TemplateEngine()

Initialize the Docker generator.

Arguments:
  • config: Geronimo configuration.
  • output_path: Path to write the Dockerfile.
TEMPLATE_DIR = 'docker'
config
output_path
engine
def generate(self) -> str:
50    def generate(self) -> str:
51        """Generate the Dockerfile.
52
53        Returns:
54            Path to the generated Dockerfile.
55        """
56        # Render Dockerfile
57        context = {
58            "project_name": self.config.project.name,
59            "project_name_snake": self.config.project.name.replace("-", "_"),
60            "base_image": self._get_base_image(),
61        }
62        
63        self.engine.render_to_file(
64            "docker/Dockerfile.jinja2",
65            context,
66            self.output_path
67        )
68
69        # Render .dockerignore
70        self.engine.render_to_file(
71            "docker/dockerignore.jinja2",
72            {},
73            self.output_path.parent / ".dockerignore"
74        )
75
76        return str(self.output_path)

Generate the Dockerfile.

Returns:

Path to the generated Dockerfile.

class PipelineGenerator(geronimo.generators.BaseGenerator):
14class PipelineGenerator(BaseGenerator):
15    """Generates Azure DevOps pipeline configuration."""
16
17    TEMPLATE_DIR = "pipeline"
18
19    def __init__(
20        self,
21        config: GeronimoConfig,
22        output_path: str = "azure-pipelines.yaml",
23    ) -> None:
24        """Initialize the pipeline generator.
25
26        Args:
27            config: Geronimo configuration.
28            output_path: Path to write the pipeline file.
29        """
30        super().__init__()
31        self.config = config
32        self.output_path = Path(output_path)
33        self.engine = TemplateEngine()
34
35    def generate(self) -> str:
36        """Generate the Azure DevOps pipeline.
37
38        Returns:
39            Path to the generated pipeline file.
40        """
41        # Build context
42        context = {
43            "project_name": self.config.project.name,
44            "python_version": self.config.runtime.python_version,
45            "environments": self.config.deployment.environments,
46        }
47
48        # Render pipeline template
49        self.engine.render_to_file(
50            "pipeline/azure-pipelines.yaml.jinja2",
51            context,
52            self.output_path
53        )
54        
55        return str(self.output_path)

Generates Azure DevOps pipeline configuration.

PipelineGenerator( config: geronimo.config.schema.GeronimoConfig, output_path: str = 'azure-pipelines.yaml')
19    def __init__(
20        self,
21        config: GeronimoConfig,
22        output_path: str = "azure-pipelines.yaml",
23    ) -> None:
24        """Initialize the pipeline generator.
25
26        Args:
27            config: Geronimo configuration.
28            output_path: Path to write the pipeline file.
29        """
30        super().__init__()
31        self.config = config
32        self.output_path = Path(output_path)
33        self.engine = TemplateEngine()

Initialize the pipeline generator.

Arguments:
  • config: Geronimo configuration.
  • output_path: Path to write the pipeline file.
TEMPLATE_DIR = 'pipeline'
config
output_path
engine
def generate(self) -> str:
35    def generate(self) -> str:
36        """Generate the Azure DevOps pipeline.
37
38        Returns:
39            Path to the generated pipeline file.
40        """
41        # Build context
42        context = {
43            "project_name": self.config.project.name,
44            "python_version": self.config.runtime.python_version,
45            "environments": self.config.deployment.environments,
46        }
47
48        # Render pipeline template
49        self.engine.render_to_file(
50            "pipeline/azure-pipelines.yaml.jinja2",
51            context,
52            self.output_path
53        )
54        
55        return str(self.output_path)

Generate the Azure DevOps pipeline.

Returns:

Path to the generated pipeline file.