geronimo.serving
Geronimo Serving Module.
The serving module provides the infrastructure for exposing trained models as scalable API endpoints. It handles request parsing, feature transformation, and model inference.
Key features:
- Fast API integration for high-performance serving.
- Automatic generation of OpenAPI (Swagger) documentation.
- Support for batch and real-time inference requests.
- Health checks and monitoring endpoints.
Endpoints are automatically Dockerized and deployed to the target infrastructure.
1"""Geronimo Serving Module. 2 3The serving module provides the infrastructure for exposing trained models as 4scalable API endpoints. It handles request parsing, feature transformation, 5and model inference. 6 7Key features: 8- Fast API integration for high-performance serving. 9- Automatic generation of OpenAPI (Swagger) documentation. 10- Support for batch and real-time inference requests. 11- Health checks and monitoring endpoints. 12 13Endpoints are automatically Dockerized and deployed to the target infrastructure. 14""" 15 16from geronimo.serving.endpoint import Endpoint 17 18__all__ = ["Endpoint"] 19 20__docformat__ = "google"
class
Endpoint(abc.ABC):
11class Endpoint(ABC): 12 """Base class for real-time prediction endpoints. 13 14 Provides a standardized interface for pre/post processing 15 with automatic model and feature loading. 16 17 Example: 18 ```python 19 from geronimo.serving import Endpoint 20 from myproject.models import CreditRiskModel 21 22 class PredictEndpoint(Endpoint): 23 model_class = CreditRiskModel 24 25 def preprocess(self, request: dict) -> dict: 26 # Transform request data 27 return self.model.features.transform(request["data"]) 28 29 def postprocess(self, prediction) -> dict: 30 return { 31 "score": float(prediction[0]), 32 "class": "approved" if prediction[0] > 0.5 else "denied", 33 } 34 35 # Create FastAPI route 36 endpoint = PredictEndpoint() 37 endpoint.initialize() # Loads model artifacts 38 39 @app.post("/predict") 40 def predict(request: dict): 41 return endpoint.handle(request) 42 ``` 43 """ 44 45 # Override in subclass 46 model_class: type["Model"] = None 47 artifact_project: Optional[str] = None 48 artifact_version: Optional[str] = None 49 50 model: Optional["Model"] 51 """The loaded model instance.""" 52 53 _is_initialized: bool 54 """Internal flag tracking initialization status.""" 55 56 def __init__(self): 57 """Initialize endpoint.""" 58 self.model: Optional["Model"] = None 59 self._is_initialized: bool = False 60 61 def initialize( 62 self, 63 project: Optional[str] = None, 64 version: Optional[str] = None, 65 ) -> None: 66 """Initialize endpoint by loading model artifacts. 67 68 Args: 69 project: Artifact project name. 70 version: Artifact version. 71 """ 72 from geronimo.artifacts import ArtifactStore 73 74 project = project or self.artifact_project or self.model_class.name 75 version = version or self.artifact_version or self.model_class.version 76 77 store = ArtifactStore.load(project=project, version=version) 78 79 self.model = self.model_class() 80 self.model.load(store) 81 self._is_initialized = True 82 83 def handle(self, request: dict) -> dict: 84 """Handle prediction request. 85 86 Args: 87 request: Input request data. 88 89 Returns: 90 Response dictionary. 91 """ 92 if not self._is_initialized: 93 raise RuntimeError("Endpoint not initialized. Call initialize() first.") 94 95 # Preprocess 96 features = self.preprocess(request) 97 98 # Predict 99 prediction = self.model.predict(features) 100 101 # Postprocess 102 return self.postprocess(prediction) 103 104 @abstractmethod 105 def preprocess(self, request: dict) -> Any: 106 """Preprocess request data. 107 108 Args: 109 request: Raw request data. 110 111 Returns: 112 Preprocessed features for model. 113 """ 114 pass 115 116 @abstractmethod 117 def postprocess(self, prediction: Any) -> dict: 118 """Postprocess model prediction. 119 120 Args: 121 prediction: Raw model output. 122 123 Returns: 124 Response dictionary. 125 """ 126 pass 127 128 @property 129 def is_initialized(self) -> bool: 130 """Check if endpoint is initialized.""" 131 return self._is_initialized 132 133 def __repr__(self) -> str: 134 status = "initialized" if self._is_initialized else "not initialized" 135 model_name = self.model_class.__name__ if self.model_class else "None" 136 return f"{self.__class__.__name__}(model={model_name}, {status})"
Base class for real-time prediction endpoints.
Provides a standardized interface for pre/post processing with automatic model and feature loading.
Example:
from geronimo.serving import Endpoint from myproject.models import CreditRiskModel class PredictEndpoint(Endpoint): model_class = CreditRiskModel def preprocess(self, request: dict) -> dict: # Transform request data return self.model.features.transform(request["data"]) def postprocess(self, prediction) -> dict: return { "score": float(prediction[0]), "class": "approved" if prediction[0] > 0.5 else "denied", } # Create FastAPI route endpoint = PredictEndpoint() endpoint.initialize() # Loads model artifacts @app.post("/predict") def predict(request: dict): return endpoint.handle(request)
Endpoint()
56 def __init__(self): 57 """Initialize endpoint.""" 58 self.model: Optional["Model"] = None 59 self._is_initialized: bool = False
Initialize endpoint.
def
initialize( self, project: Optional[str] = None, version: Optional[str] = None) -> None:
61 def initialize( 62 self, 63 project: Optional[str] = None, 64 version: Optional[str] = None, 65 ) -> None: 66 """Initialize endpoint by loading model artifacts. 67 68 Args: 69 project: Artifact project name. 70 version: Artifact version. 71 """ 72 from geronimo.artifacts import ArtifactStore 73 74 project = project or self.artifact_project or self.model_class.name 75 version = version or self.artifact_version or self.model_class.version 76 77 store = ArtifactStore.load(project=project, version=version) 78 79 self.model = self.model_class() 80 self.model.load(store) 81 self._is_initialized = True
Initialize endpoint by loading model artifacts.
Arguments:
- project: Artifact project name.
- version: Artifact version.
def
handle(self, request: dict) -> dict:
83 def handle(self, request: dict) -> dict: 84 """Handle prediction request. 85 86 Args: 87 request: Input request data. 88 89 Returns: 90 Response dictionary. 91 """ 92 if not self._is_initialized: 93 raise RuntimeError("Endpoint not initialized. Call initialize() first.") 94 95 # Preprocess 96 features = self.preprocess(request) 97 98 # Predict 99 prediction = self.model.predict(features) 100 101 # Postprocess 102 return self.postprocess(prediction)
Handle prediction request.
Arguments:
- request: Input request data.
Returns:
Response dictionary.
@abstractmethod
def
preprocess(self, request: dict) -> Any:
104 @abstractmethod 105 def preprocess(self, request: dict) -> Any: 106 """Preprocess request data. 107 108 Args: 109 request: Raw request data. 110 111 Returns: 112 Preprocessed features for model. 113 """ 114 pass
Preprocess request data.
Arguments:
- request: Raw request data.
Returns:
Preprocessed features for model.
@abstractmethod
def
postprocess(self, prediction: Any) -> dict:
116 @abstractmethod 117 def postprocess(self, prediction: Any) -> dict: 118 """Postprocess model prediction. 119 120 Args: 121 prediction: Raw model output. 122 123 Returns: 124 Response dictionary. 125 """ 126 pass
Postprocess model prediction.
Arguments:
- prediction: Raw model output.
Returns:
Response dictionary.