geronimo.serving

Geronimo Serving Module.

The serving module provides the infrastructure for exposing trained models as scalable API endpoints. It handles request parsing, feature transformation, and model inference.

Key features:

  • Fast API integration for high-performance serving.
  • Automatic generation of OpenAPI (Swagger) documentation.
  • Support for batch and real-time inference requests.
  • Health checks and monitoring endpoints.

Endpoints are automatically Dockerized and deployed to the target infrastructure.

 1"""Geronimo Serving Module.
 2
 3The serving module provides the infrastructure for exposing trained models as
 4scalable API endpoints. It handles request parsing, feature transformation,
 5and model inference.
 6
 7Key features:
 8- Fast API integration for high-performance serving.
 9- Automatic generation of OpenAPI (Swagger) documentation.
10- Support for batch and real-time inference requests.
11- Health checks and monitoring endpoints.
12
13Endpoints are automatically Dockerized and deployed to the target infrastructure.
14"""
15
16from geronimo.serving.endpoint import Endpoint
17
18__all__ = ["Endpoint"]
19
20__docformat__ = "google"
class Endpoint(abc.ABC):
 11class Endpoint(ABC):
 12    """Base class for real-time prediction endpoints.
 13
 14    Provides a standardized interface for pre/post processing
 15    with automatic model and feature loading.
 16
 17    Example:
 18        ```python
 19        from geronimo.serving import Endpoint
 20        from myproject.models import CreditRiskModel
 21
 22        class PredictEndpoint(Endpoint):
 23            model_class = CreditRiskModel
 24
 25            def preprocess(self, request: dict) -> dict:
 26                # Transform request data
 27                return self.model.features.transform(request["data"])
 28
 29            def postprocess(self, prediction) -> dict:
 30                return {
 31                    "score": float(prediction[0]),
 32                    "class": "approved" if prediction[0] > 0.5 else "denied",
 33                }
 34
 35        # Create FastAPI route
 36        endpoint = PredictEndpoint()
 37        endpoint.initialize()  # Loads model artifacts
 38
 39        @app.post("/predict")
 40        def predict(request: dict):
 41            return endpoint.handle(request)
 42        ```
 43    """
 44
 45    # Override in subclass
 46    model_class: type["Model"] = None
 47    artifact_project: Optional[str] = None
 48    artifact_version: Optional[str] = None
 49
 50    model: Optional["Model"]
 51    """The loaded model instance."""
 52
 53    _is_initialized: bool
 54    """Internal flag tracking initialization status."""
 55
 56    def __init__(self):
 57        """Initialize endpoint."""
 58        self.model: Optional["Model"] = None
 59        self._is_initialized: bool = False
 60
 61    def initialize(
 62        self,
 63        project: Optional[str] = None,
 64        version: Optional[str] = None,
 65    ) -> None:
 66        """Initialize endpoint by loading model artifacts.
 67
 68        Args:
 69            project: Artifact project name.
 70            version: Artifact version.
 71        """
 72        from geronimo.artifacts import ArtifactStore
 73
 74        project = project or self.artifact_project or self.model_class.name
 75        version = version or self.artifact_version or self.model_class.version
 76
 77        store = ArtifactStore.load(project=project, version=version)
 78
 79        self.model = self.model_class()
 80        self.model.load(store)
 81        self._is_initialized = True
 82
 83    def handle(self, request: dict) -> dict:
 84        """Handle prediction request.
 85
 86        Args:
 87            request: Input request data.
 88
 89        Returns:
 90            Response dictionary.
 91        """
 92        if not self._is_initialized:
 93            raise RuntimeError("Endpoint not initialized. Call initialize() first.")
 94
 95        # Preprocess
 96        features = self.preprocess(request)
 97
 98        # Predict
 99        prediction = self.model.predict(features)
100
101        # Postprocess
102        return self.postprocess(prediction)
103
104    @abstractmethod
105    def preprocess(self, request: dict) -> Any:
106        """Preprocess request data.
107
108        Args:
109            request: Raw request data.
110
111        Returns:
112            Preprocessed features for model.
113        """
114        pass
115
116    @abstractmethod
117    def postprocess(self, prediction: Any) -> dict:
118        """Postprocess model prediction.
119
120        Args:
121            prediction: Raw model output.
122
123        Returns:
124            Response dictionary.
125        """
126        pass
127
128    @property
129    def is_initialized(self) -> bool:
130        """Check if endpoint is initialized."""
131        return self._is_initialized
132
133    def __repr__(self) -> str:
134        status = "initialized" if self._is_initialized else "not initialized"
135        model_name = self.model_class.__name__ if self.model_class else "None"
136        return f"{self.__class__.__name__}(model={model_name}, {status})"

Base class for real-time prediction endpoints.

Provides a standardized interface for pre/post processing with automatic model and feature loading.

Example:
from geronimo.serving import Endpoint
from myproject.models import CreditRiskModel

class PredictEndpoint(Endpoint):
    model_class = CreditRiskModel

    def preprocess(self, request: dict) -> dict:
        # Transform request data
        return self.model.features.transform(request["data"])

    def postprocess(self, prediction) -> dict:
        return {
            "score": float(prediction[0]),
            "class": "approved" if prediction[0] > 0.5 else "denied",
        }

# Create FastAPI route
endpoint = PredictEndpoint()
endpoint.initialize()  # Loads model artifacts

@app.post("/predict")
def predict(request: dict):
    return endpoint.handle(request)
Endpoint()
56    def __init__(self):
57        """Initialize endpoint."""
58        self.model: Optional["Model"] = None
59        self._is_initialized: bool = False

Initialize endpoint.

model_class: type[geronimo.models.Model] = None
artifact_project: Optional[str] = None
artifact_version: Optional[str] = None
model: Optional[geronimo.models.Model]

The loaded model instance.

def initialize( self, project: Optional[str] = None, version: Optional[str] = None) -> None:
61    def initialize(
62        self,
63        project: Optional[str] = None,
64        version: Optional[str] = None,
65    ) -> None:
66        """Initialize endpoint by loading model artifacts.
67
68        Args:
69            project: Artifact project name.
70            version: Artifact version.
71        """
72        from geronimo.artifacts import ArtifactStore
73
74        project = project or self.artifact_project or self.model_class.name
75        version = version or self.artifact_version or self.model_class.version
76
77        store = ArtifactStore.load(project=project, version=version)
78
79        self.model = self.model_class()
80        self.model.load(store)
81        self._is_initialized = True

Initialize endpoint by loading model artifacts.

Arguments:
  • project: Artifact project name.
  • version: Artifact version.
def handle(self, request: dict) -> dict:
 83    def handle(self, request: dict) -> dict:
 84        """Handle prediction request.
 85
 86        Args:
 87            request: Input request data.
 88
 89        Returns:
 90            Response dictionary.
 91        """
 92        if not self._is_initialized:
 93            raise RuntimeError("Endpoint not initialized. Call initialize() first.")
 94
 95        # Preprocess
 96        features = self.preprocess(request)
 97
 98        # Predict
 99        prediction = self.model.predict(features)
100
101        # Postprocess
102        return self.postprocess(prediction)

Handle prediction request.

Arguments:
  • request: Input request data.
Returns:

Response dictionary.

@abstractmethod
def preprocess(self, request: dict) -> Any:
104    @abstractmethod
105    def preprocess(self, request: dict) -> Any:
106        """Preprocess request data.
107
108        Args:
109            request: Raw request data.
110
111        Returns:
112            Preprocessed features for model.
113        """
114        pass

Preprocess request data.

Arguments:
  • request: Raw request data.
Returns:

Preprocessed features for model.

@abstractmethod
def postprocess(self, prediction: Any) -> dict:
116    @abstractmethod
117    def postprocess(self, prediction: Any) -> dict:
118        """Postprocess model prediction.
119
120        Args:
121            prediction: Raw model output.
122
123        Returns:
124            Response dictionary.
125        """
126        pass

Postprocess model prediction.

Arguments:
  • prediction: Raw model output.
Returns:

Response dictionary.

is_initialized: bool
128    @property
129    def is_initialized(self) -> bool:
130        """Check if endpoint is initialized."""
131        return self._is_initialized

Check if endpoint is initialized.