geronimo.features
Geronimo Features Module.
The features module provides the abstractions for defining feature transformations and pipelines. It is inspired by scikit-learn's fit/transform paradigm but optimized for production systems where consistency between training and serving is critical.
Key components:
- FeatureSet: A logical grouping of features (e.g., user features, item features).
- Feature: A specific transformation logic (e.g., OneHotEncoding, Normalization).
This module ensures that the exact same feature engineering logic is applied during batch training and real-time inference preventing training-serving skew.
1"""Geronimo Features Module. 2 3The features module provides the abstractions for defining feature transformations 4and pipelines. It is inspired by scikit-learn's fit/transform paradigm but 5optimized for production systems where consistency between training and serving is critical. 6 7Key components: 8- FeatureSet: A logical grouping of features (e.g., user features, item features). 9- Feature: A specific transformation logic (e.g., OneHotEncoding, Normalization). 10 11This module ensures that the exact same feature engineering logic is applied during 12batch training and real-time inference preventing training-serving skew. 13""" 14 15from geronimo.features.base import FeatureSet 16from geronimo.features.feature import Feature 17 18__all__ = ["FeatureSet", "Feature"] 19 20__docformat__ = "google"
15class FeatureSet: 16 """Base class for feature engineering pipelines. 17 18 Provides fit/transform semantics for training vs production, 19 with integrated artifact storage for encoders and transformers. 20 21 Example: 22 ```python 23 from geronimo.features import FeatureSet, Feature 24 from sklearn.preprocessing import StandardScaler, OneHotEncoder 25 26 class CustomerFeatures(FeatureSet): 27 data_source = DataSource( 28 name="customers", 29 source="snowflake", 30 query=Query.from_file("queries/customers.sql"), 31 ) 32 33 age = Feature(dtype="numeric", transformer=StandardScaler()) 34 income = Feature(dtype="numeric", transformer=StandardScaler()) 35 segment = Feature(dtype="categorical", encoder=OneHotEncoder(sparse_output=False)) 36 37 # Training: fit and transform 38 features = CustomerFeatures() 39 X = features.fit_transform(training_df) 40 41 # Production: transform only (uses fitted encoders) 42 X = features.transform(production_df) 43 ``` 44 """ 45 46 # Override in subclass 47 data_source: Optional["DataSource"] = None 48 49 def __init__(self): 50 """Initialize feature set.""" 51 self._features: dict[str, Feature] = {} 52 self._is_fitted: bool = False 53 54 # Collect Feature descriptors from class 55 for name in dir(self.__class__): 56 attr = getattr(self.__class__, name, None) 57 if isinstance(attr, Feature): 58 self._features[name] = attr 59 60 @property 61 def feature_names(self) -> list[str]: 62 """Get list of feature names (excluding dropped).""" 63 return [f.name for f in self._features.values() if not f.drop] 64 65 @property 66 def numeric_features(self) -> list[Feature]: 67 """Get numeric features.""" 68 return [f for f in self._features.values() if f.dtype == "numeric" and not f.drop] 69 70 @property 71 def categorical_features(self) -> list[Feature]: 72 """Get categorical features.""" 73 return [ 74 f for f in self._features.values() if f.dtype == "categorical" and not f.drop 75 ] 76 77 def fit(self, df: pd.DataFrame) -> "FeatureSet": 78 """Fit all transformers and encoders. 79 80 Args: 81 df: Training DataFrame. 82 83 Returns: 84 Self for chaining. 85 """ 86 for feature in self._features.values(): 87 if feature.drop: 88 continue 89 self._process_feature(feature, df, mode="fit") 90 91 self._is_fitted = True 92 return self 93 94 def transform(self, df: pd.DataFrame) -> pd.DataFrame: 95 """Transform DataFrame using fitted transformers. 96 97 Args: 98 df: Input DataFrame. 99 100 Returns: 101 Transformed DataFrame. 102 103 Raises: 104 ValueError: If not fitted. 105 """ 106 if not self._is_fitted: 107 raise ValueError("FeatureSet not fitted. Call fit() first.") 108 109 result = pd.DataFrame(index=df.index) 110 111 for feature in self._features.values(): 112 if feature.drop: 113 continue 114 115 transformed = self._process_feature(feature, df, mode="transform") 116 if transformed is not None: 117 if isinstance(transformed, dict): 118 # Multi-column output from encoders 119 for col_name, values in transformed.items(): 120 result[col_name] = values 121 else: 122 result[feature.name] = transformed 123 124 return result 125 126 def _process_feature( 127 self, 128 feature: Feature, 129 df: pd.DataFrame, 130 mode: str, 131 ) -> any: 132 """Process a single feature for fit or transform. 133 134 Unified processing logic to reduce code duplication between 135 fit() and transform() methods. 136 137 Args: 138 feature: Feature descriptor to process. 139 df: Input DataFrame. 140 mode: Either "fit" or "transform". 141 142 Returns: 143 For mode="fit": None (modifies transformers/encoders in place). 144 For mode="transform": Transformed values (Series, array, or dict for multi-column). 145 """ 146 # Handle derived features with custom functions 147 if feature.has_derived_fn: 148 derived_values = feature.apply(df) 149 150 if feature.has_transformer: 151 if mode == "fit": 152 feature.transformer.fit(derived_values.values.reshape(-1, 1)) 153 return None 154 else: # transform 155 transformed = feature.transformer.transform( 156 derived_values.values.reshape(-1, 1) 157 ) 158 return transformed.flatten() 159 else: 160 if mode == "fit": 161 return None 162 return derived_values.values 163 164 # Standard features 165 col_name = feature.source_column 166 if col_name not in df.columns: 167 return None 168 169 if feature.has_transformer: 170 if mode == "fit": 171 feature.transformer.fit(df[[col_name]]) 172 return None 173 else: # transform 174 transformed = feature.transformer.transform(df[[col_name]]) 175 return transformed.flatten() 176 elif feature.has_encoder: 177 if mode == "fit": 178 feature.encoder.fit(df[[col_name]]) 179 return None 180 else: # transform 181 encoded = feature.encoder.transform(df[[col_name]]) 182 # Handle multi-column output from encoders 183 if hasattr(feature.encoder, "get_feature_names_out"): 184 enc_names = feature.encoder.get_feature_names_out([col_name]) 185 return {enc_name: encoded[:, i] for i, enc_name in enumerate(enc_names)} 186 else: 187 return encoded.flatten() 188 else: 189 if mode == "fit": 190 return None 191 return df[col_name].values 192 193 def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: 194 """Fit and transform in one step. 195 196 Args: 197 df: Training DataFrame. 198 199 Returns: 200 Transformed DataFrame. 201 """ 202 return self.fit(df).transform(df) 203 204 def save(self, store: "ArtifactStore") -> None: 205 """Save fitted transformers and encoders to artifact store. 206 207 Args: 208 store: ArtifactStore instance. 209 """ 210 for name, feature in self._features.items(): 211 if feature.has_transformer: 212 store.save( 213 f"transformer_{name}", 214 feature.transformer, 215 artifact_type="transformer", 216 ) 217 if feature.has_encoder: 218 store.save( 219 f"encoder_{name}", 220 feature.encoder, 221 artifact_type="encoder", 222 ) 223 224 def load(self, store: "ArtifactStore") -> None: 225 """Load fitted transformers and encoders from artifact store. 226 227 Args: 228 store: ArtifactStore instance. 229 """ 230 for name, feature in self._features.items(): 231 if feature.has_transformer: 232 feature.transformer = store.get(f"transformer_{name}") 233 if feature.has_encoder: 234 feature.encoder = store.get(f"encoder_{name}") 235 236 self._is_fitted = True 237 238 @property 239 def is_fitted(self) -> bool: 240 """Check if feature set has been fitted.""" 241 return self._is_fitted 242 243 def __repr__(self) -> str: 244 status = "fitted" if self._is_fitted else "not fitted" 245 return f"{self.__class__.__name__}({len(self._features)} features, {status})"
Base class for feature engineering pipelines.
Provides fit/transform semantics for training vs production, with integrated artifact storage for encoders and transformers.
Example:
from geronimo.features import FeatureSet, Feature from sklearn.preprocessing import StandardScaler, OneHotEncoder class CustomerFeatures(FeatureSet): data_source = DataSource( name="customers", source="snowflake", query=Query.from_file("queries/customers.sql"), ) age = Feature(dtype="numeric", transformer=StandardScaler()) income = Feature(dtype="numeric", transformer=StandardScaler()) segment = Feature(dtype="categorical", encoder=OneHotEncoder(sparse_output=False)) # Training: fit and transform features = CustomerFeatures() X = features.fit_transform(training_df) # Production: transform only (uses fitted encoders) X = features.transform(production_df)
49 def __init__(self): 50 """Initialize feature set.""" 51 self._features: dict[str, Feature] = {} 52 self._is_fitted: bool = False 53 54 # Collect Feature descriptors from class 55 for name in dir(self.__class__): 56 attr = getattr(self.__class__, name, None) 57 if isinstance(attr, Feature): 58 self._features[name] = attr
Initialize feature set.
60 @property 61 def feature_names(self) -> list[str]: 62 """Get list of feature names (excluding dropped).""" 63 return [f.name for f in self._features.values() if not f.drop]
Get list of feature names (excluding dropped).
65 @property 66 def numeric_features(self) -> list[Feature]: 67 """Get numeric features.""" 68 return [f for f in self._features.values() if f.dtype == "numeric" and not f.drop]
Get numeric features.
70 @property 71 def categorical_features(self) -> list[Feature]: 72 """Get categorical features.""" 73 return [ 74 f for f in self._features.values() if f.dtype == "categorical" and not f.drop 75 ]
Get categorical features.
77 def fit(self, df: pd.DataFrame) -> "FeatureSet": 78 """Fit all transformers and encoders. 79 80 Args: 81 df: Training DataFrame. 82 83 Returns: 84 Self for chaining. 85 """ 86 for feature in self._features.values(): 87 if feature.drop: 88 continue 89 self._process_feature(feature, df, mode="fit") 90 91 self._is_fitted = True 92 return self
Fit all transformers and encoders.
Arguments:
- df: Training DataFrame.
Returns:
Self for chaining.
94 def transform(self, df: pd.DataFrame) -> pd.DataFrame: 95 """Transform DataFrame using fitted transformers. 96 97 Args: 98 df: Input DataFrame. 99 100 Returns: 101 Transformed DataFrame. 102 103 Raises: 104 ValueError: If not fitted. 105 """ 106 if not self._is_fitted: 107 raise ValueError("FeatureSet not fitted. Call fit() first.") 108 109 result = pd.DataFrame(index=df.index) 110 111 for feature in self._features.values(): 112 if feature.drop: 113 continue 114 115 transformed = self._process_feature(feature, df, mode="transform") 116 if transformed is not None: 117 if isinstance(transformed, dict): 118 # Multi-column output from encoders 119 for col_name, values in transformed.items(): 120 result[col_name] = values 121 else: 122 result[feature.name] = transformed 123 124 return result
Transform DataFrame using fitted transformers.
Arguments:
- df: Input DataFrame.
Returns:
Transformed DataFrame.
Raises:
- ValueError: If not fitted.
193 def fit_transform(self, df: pd.DataFrame) -> pd.DataFrame: 194 """Fit and transform in one step. 195 196 Args: 197 df: Training DataFrame. 198 199 Returns: 200 Transformed DataFrame. 201 """ 202 return self.fit(df).transform(df)
Fit and transform in one step.
Arguments:
- df: Training DataFrame.
Returns:
Transformed DataFrame.
204 def save(self, store: "ArtifactStore") -> None: 205 """Save fitted transformers and encoders to artifact store. 206 207 Args: 208 store: ArtifactStore instance. 209 """ 210 for name, feature in self._features.items(): 211 if feature.has_transformer: 212 store.save( 213 f"transformer_{name}", 214 feature.transformer, 215 artifact_type="transformer", 216 ) 217 if feature.has_encoder: 218 store.save( 219 f"encoder_{name}", 220 feature.encoder, 221 artifact_type="encoder", 222 )
Save fitted transformers and encoders to artifact store.
Arguments:
- store: ArtifactStore instance.
224 def load(self, store: "ArtifactStore") -> None: 225 """Load fitted transformers and encoders from artifact store. 226 227 Args: 228 store: ArtifactStore instance. 229 """ 230 for name, feature in self._features.items(): 231 if feature.has_transformer: 232 feature.transformer = store.get(f"transformer_{name}") 233 if feature.has_encoder: 234 feature.encoder = store.get(f"encoder_{name}") 235 236 self._is_fitted = True
Load fitted transformers and encoders from artifact store.
Arguments:
- store: ArtifactStore instance.
7class Feature: 8 """Feature descriptor for defining individual features. 9 10 Used within FeatureSet classes to define feature columns 11 with their types and transformations. 12 13 Order of Operations 14 ------------------- 15 When processing features, the following order is applied: 16 17 1. **derived_feature_fn** (if provided): 18 - Called first with the full DataFrame 19 - Computes derived values from source_columns 20 - Output becomes input for subsequent steps 21 22 2. **transformer** (if provided): 23 - Applied after derived_feature_fn (or to source column if no derive fn) 24 - Must implement sklearn fit/transform interface 25 - Typically for numeric normalization (StandardScaler, MinMaxScaler) 26 27 3. **encoder** (if provided): 28 - Applied to categorical values 29 - Must implement sklearn fit/transform interface 30 - Typically for categorical encoding (OneHotEncoder, LabelEncoder) 31 32 Note: transformer and encoder are mutually exclusive - use one or the other. 33 34 Example: 35 ```python 36 from geronimo.features import FeatureSet, Feature 37 from sklearn.preprocessing import StandardScaler, OneHotEncoder 38 39 class CustomerFeatures(FeatureSet): 40 # Simple numeric feature with transformer 41 age = Feature(dtype="numeric", transformer=StandardScaler()) 42 43 # Categorical feature with encoder 44 segment = Feature(dtype="categorical", encoder=OneHotEncoder()) 45 46 # Derived feature: single input → custom logic 47 age_bucket = Feature( 48 dtype="derived", 49 source_columns=["age"], 50 derived_feature_fn=lambda df: (df["age"] // 10) * 10, 51 ) 52 53 # Derived feature: multiple inputs → single output 54 bmi = Feature( 55 dtype="derived", 56 source_columns=["weight_kg", "height_m"], 57 derived_feature_fn=lambda df: df["weight_kg"] / (df["height_m"] ** 2), 58 ) 59 60 # Derived + transformed: compute then normalize 61 bmi_normalized = Feature( 62 dtype="derived", 63 source_columns=["weight_kg", "height_m"], 64 derived_feature_fn=lambda df: df["weight_kg"] / (df["height_m"] ** 2), 65 transformer=StandardScaler(), # Applied after derive 66 ) 67 68 # Drop from final output 69 name = Feature(dtype="text", drop=True) 70 ``` 71 """ 72 73 def __init__( 74 self, 75 dtype: Literal["numeric", "categorical", "text", "derived"] = "numeric", 76 transformer: Optional[Any] = None, 77 encoder: Optional[Any] = None, 78 source_column: Optional[str] = None, 79 source_columns: Optional[list[str]] = None, 80 derived_feature_fn: Optional[Callable] = None, 81 drop: bool = False, 82 description: Optional[str] = None, 83 ): 84 """Initialize feature. 85 86 Args: 87 dtype: Feature data type. 88 - "numeric": Numeric values (int, float) 89 - "categorical": Categorical/discrete values 90 - "text": Text data (typically dropped or embedded) 91 - "derived": Computed from other columns via derived_feature_fn 92 93 transformer: Sklearn-compatible transformer for numeric features. 94 Applied AFTER derived_feature_fn if both are provided. 95 Must implement fit() and transform() methods. 96 Example: StandardScaler(), MinMaxScaler() 97 98 encoder: Sklearn-compatible encoder for categorical features. 99 Must implement fit() and transform() methods. 100 Example: OneHotEncoder(), LabelEncoder() 101 102 source_column: Single input column name (if different from attribute name). 103 Used when feature maps 1:1 from a differently-named source column. 104 105 source_columns: List of input column names for derived features. 106 Required when derived_feature_fn needs multiple input columns. 107 108 derived_feature_fn: Custom function for feature engineering. 109 Receives full DataFrame, returns Series or array. 110 Called BEFORE transformer (if both provided). 111 Example: lambda df: df["weight"] / (df["height"] ** 2) 112 113 drop: If True, exclude feature from final output. 114 Useful for passthrough columns needed only for derived features. 115 116 description: Optional human-readable feature description. 117 """ 118 self.dtype = dtype 119 self.transformer = transformer 120 self.encoder = encoder 121 self.source_column = source_column 122 self.source_columns = source_columns 123 self.derived_feature_fn = derived_feature_fn 124 self.drop = drop 125 self.description = description 126 self._name: Optional[str] = None 127 128 dtype: Literal["numeric", "categorical", "text", "derived"] 129 """Feature data type.""" 130 131 transformer: Optional[Any] 132 """Sklearn-compatible transformer for numeric features.""" 133 134 encoder: Optional[Any] 135 """Sklearn-compatible encoder for categorical features.""" 136 137 source_column: Optional[str] 138 """Single input column name.""" 139 140 source_columns: Optional[list[str]] 141 """List of input column names for derived features.""" 142 143 derived_feature_fn: Optional[Callable] 144 """Custom function for feature engineering.""" 145 146 drop: bool 147 """If True, exclude feature from final output.""" 148 149 description: Optional[str] 150 """Optional human-readable feature description.""" 151 152 def __set_name__(self, owner, name: str) -> None: 153 """Capture attribute name when defined in class.""" 154 self._name = name 155 if self.source_column is None and self.source_columns is None: 156 self.source_column = name 157 158 @property 159 def name(self) -> str: 160 """Get feature name.""" 161 return self._name or "unnamed" 162 163 @property 164 def input_columns(self) -> list[str]: 165 """Get list of input column names.""" 166 if self.source_columns: 167 return self.source_columns 168 return [self.source_column or self.name] 169 170 @property 171 def has_transformer(self) -> bool: 172 """Check if feature has a transformer.""" 173 return self.transformer is not None 174 175 @property 176 def has_encoder(self) -> bool: 177 """Check if feature has an encoder.""" 178 return self.encoder is not None 179 180 @property 181 def has_derived_fn(self) -> bool: 182 """Check if feature has a derived feature function.""" 183 return self.derived_feature_fn is not None 184 185 @property 186 def is_derived(self) -> bool: 187 """Check if feature is derived from custom function.""" 188 return self.derived_feature_fn is not None or self.dtype == "derived" 189 190 def apply(self, df) -> Any: 191 """Apply derived feature function to DataFrame. 192 193 Args: 194 df: Input DataFrame with source columns. 195 196 Returns: 197 Transformed feature values (Series or array). 198 """ 199 if self.derived_feature_fn is not None: 200 return self.derived_feature_fn(df) 201 elif self.source_column: 202 return df[self.source_column] 203 else: 204 return df[self.name] 205 206 def __repr__(self) -> str: 207 extras = [] 208 if self.has_derived_fn: 209 extras.append("derived_feature_fn") 210 if self.source_columns: 211 extras.append(f"inputs={self.source_columns}") 212 if self.has_transformer: 213 extras.append("transformer") 214 if self.has_encoder: 215 extras.append("encoder") 216 extra_str = f", {', '.join(extras)}" if extras else "" 217 return f"Feature({self.name}, dtype={self.dtype}{extra_str})"
Feature descriptor for defining individual features.
Used within FeatureSet classes to define feature columns with their types and transformations.
Order of Operations
When processing features, the following order is applied:
derived_feature_fn (if provided):
- Called first with the full DataFrame
- Computes derived values from source_columns
- Output becomes input for subsequent steps
transformer (if provided):
- Applied after derived_feature_fn (or to source column if no derive fn)
- Must implement sklearn fit/transform interface
- Typically for numeric normalization (StandardScaler, MinMaxScaler)
encoder (if provided):
- Applied to categorical values
- Must implement sklearn fit/transform interface
- Typically for categorical encoding (OneHotEncoder, LabelEncoder)
Note: transformer and encoder are mutually exclusive - use one or the other.
Example:
from geronimo.features import FeatureSet, Feature from sklearn.preprocessing import StandardScaler, OneHotEncoder class CustomerFeatures(FeatureSet): # Simple numeric feature with transformer age = Feature(dtype="numeric", transformer=StandardScaler()) # Categorical feature with encoder segment = Feature(dtype="categorical", encoder=OneHotEncoder()) # Derived feature: single input → custom logic age_bucket = Feature( dtype="derived", source_columns=["age"], derived_feature_fn=lambda df: (df["age"] // 10) * 10, ) # Derived feature: multiple inputs → single output bmi = Feature( dtype="derived", source_columns=["weight_kg", "height_m"], derived_feature_fn=lambda df: df["weight_kg"] / (df["height_m"] ** 2), ) # Derived + transformed: compute then normalize bmi_normalized = Feature( dtype="derived", source_columns=["weight_kg", "height_m"], derived_feature_fn=lambda df: df["weight_kg"] / (df["height_m"] ** 2), transformer=StandardScaler(), # Applied after derive ) # Drop from final output name = Feature(dtype="text", drop=True)
73 def __init__( 74 self, 75 dtype: Literal["numeric", "categorical", "text", "derived"] = "numeric", 76 transformer: Optional[Any] = None, 77 encoder: Optional[Any] = None, 78 source_column: Optional[str] = None, 79 source_columns: Optional[list[str]] = None, 80 derived_feature_fn: Optional[Callable] = None, 81 drop: bool = False, 82 description: Optional[str] = None, 83 ): 84 """Initialize feature. 85 86 Args: 87 dtype: Feature data type. 88 - "numeric": Numeric values (int, float) 89 - "categorical": Categorical/discrete values 90 - "text": Text data (typically dropped or embedded) 91 - "derived": Computed from other columns via derived_feature_fn 92 93 transformer: Sklearn-compatible transformer for numeric features. 94 Applied AFTER derived_feature_fn if both are provided. 95 Must implement fit() and transform() methods. 96 Example: StandardScaler(), MinMaxScaler() 97 98 encoder: Sklearn-compatible encoder for categorical features. 99 Must implement fit() and transform() methods. 100 Example: OneHotEncoder(), LabelEncoder() 101 102 source_column: Single input column name (if different from attribute name). 103 Used when feature maps 1:1 from a differently-named source column. 104 105 source_columns: List of input column names for derived features. 106 Required when derived_feature_fn needs multiple input columns. 107 108 derived_feature_fn: Custom function for feature engineering. 109 Receives full DataFrame, returns Series or array. 110 Called BEFORE transformer (if both provided). 111 Example: lambda df: df["weight"] / (df["height"] ** 2) 112 113 drop: If True, exclude feature from final output. 114 Useful for passthrough columns needed only for derived features. 115 116 description: Optional human-readable feature description. 117 """ 118 self.dtype = dtype 119 self.transformer = transformer 120 self.encoder = encoder 121 self.source_column = source_column 122 self.source_columns = source_columns 123 self.derived_feature_fn = derived_feature_fn 124 self.drop = drop 125 self.description = description 126 self._name: Optional[str] = None
Initialize feature.
Arguments:
- dtype: Feature data type.
- "numeric": Numeric values (int, float)
- "categorical": Categorical/discrete values
- "text": Text data (typically dropped or embedded)
- "derived": Computed from other columns via derived_feature_fn
- transformer: Sklearn-compatible transformer for numeric features. Applied AFTER derived_feature_fn if both are provided. Must implement fit() and transform() methods. Example: StandardScaler(), MinMaxScaler()
- encoder: Sklearn-compatible encoder for categorical features. Must implement fit() and transform() methods. Example: OneHotEncoder(), LabelEncoder()
- source_column: Single input column name (if different from attribute name). Used when feature maps 1:1 from a differently-named source column.
- source_columns: List of input column names for derived features. Required when derived_feature_fn needs multiple input columns.
- derived_feature_fn: Custom function for feature engineering. Receives full DataFrame, returns Series or array. Called BEFORE transformer (if both provided). Example: lambda df: df["weight"] / (df["height"] ** 2)
- drop: If True, exclude feature from final output. Useful for passthrough columns needed only for derived features.
- description: Optional human-readable feature description.
158 @property 159 def name(self) -> str: 160 """Get feature name.""" 161 return self._name or "unnamed"
Get feature name.
163 @property 164 def input_columns(self) -> list[str]: 165 """Get list of input column names.""" 166 if self.source_columns: 167 return self.source_columns 168 return [self.source_column or self.name]
Get list of input column names.
170 @property 171 def has_transformer(self) -> bool: 172 """Check if feature has a transformer.""" 173 return self.transformer is not None
Check if feature has a transformer.
175 @property 176 def has_encoder(self) -> bool: 177 """Check if feature has an encoder.""" 178 return self.encoder is not None
Check if feature has an encoder.
180 @property 181 def has_derived_fn(self) -> bool: 182 """Check if feature has a derived feature function.""" 183 return self.derived_feature_fn is not None
Check if feature has a derived feature function.
185 @property 186 def is_derived(self) -> bool: 187 """Check if feature is derived from custom function.""" 188 return self.derived_feature_fn is not None or self.dtype == "derived"
Check if feature is derived from custom function.
190 def apply(self, df) -> Any: 191 """Apply derived feature function to DataFrame. 192 193 Args: 194 df: Input DataFrame with source columns. 195 196 Returns: 197 Transformed feature values (Series or array). 198 """ 199 if self.derived_feature_fn is not None: 200 return self.derived_feature_fn(df) 201 elif self.source_column: 202 return df[self.source_column] 203 else: 204 return df[self.name]
Apply derived feature function to DataFrame.
Arguments:
- df: Input DataFrame with source columns.
Returns:
Transformed feature values (Series or array).