import numpy as np
import pandas as pd
import torch
from scipy.stats import yeojohnson, yeojohnson_normmax
from torch import nn
from ise.utils.functions import to_tensor
[docs]
class StandardScaler(nn.Module):
"""
A class for scaling input data using mean and standard deviation.
Args:
nn.Module: The base class for all neural network modules in PyTorch.
Attributes:
mean_ (torch.Tensor): The mean values of the input data.
scale_ (torch.Tensor): The standard deviation values of the input data.
device (torch.device): The device (CPU or GPU) on which the calculations are performed.
Methods:
fit(X): Computes the mean and standard deviation of the input data.
transform(X): Scales the input data using the computed mean and standard deviation.
inverse_transform(X): Reverses the scaling operation on the input data.
save(path): Saves the mean and standard deviation to a file.
load(path): Loads the mean and standard deviation from a file.
"""
def __init__(
self,
):
super(StandardScaler, self).__init__()
self.mean_ = None
self.scale_ = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.to(self.device)
[docs]
def fit(self, X):
"""
Computes the mean and standard deviation of the input data.
Args:
X (torch.Tensor): The input data to be scaled.
"""
X = to_tensor(X).to(self.device)
self.mean_ = torch.mean(X, dim=0)
self.scale_ = torch.std(X, dim=0, unbiased=False)
self.eps = 1e-8 # to avoid divide by zero
self.scale_ = torch.where(
self.scale_ == 0, torch.ones_like(self.scale_) * self.eps, self.scale_
) # Avoid division by zero
[docs]
def save(self, path):
"""
Saves the mean and standard deviation to a file.
Args:
path (str): The path to save the file.
"""
torch.save(
{
"mean_": self.mean_,
"scale_": self.scale_,
},
path,
)
[docs]
@staticmethod
def load(path):
"""
Loads the mean and standard deviation from a file.
Args:
path (str): The path to load the file from.
Returns:
Scaler: A Scaler instance with the loaded mean and standard deviation.
"""
checkpoint = torch.load(path)
scaler = StandardScaler()
scaler.mean_ = checkpoint["mean_"]
scaler.scale_ = checkpoint["scale_"]
return scaler
[docs]
class RobustScaler(nn.Module):
def __init__(self):
super(RobustScaler, self).__init__()
self.median_ = None
self.iqr_ = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.to(self.device)
[docs]
def fit(self, X):
X = to_tensor(X).to(self.device)
self.median_ = torch.median(X, dim=0).values
q75, q25 = torch.quantile(X, 0.75, dim=0), torch.quantile(X, 0.25, dim=0)
self.iqr_ = q75 - q25
[docs]
def save(self, path):
torch.save(
{
"median_": self.median_,
"iqr_": self.iqr_,
},
path,
)
[docs]
@staticmethod
def load(path):
checkpoint = torch.load(path)
scaler = RobustScaler()
scaler.median_ = checkpoint["median_"]
scaler.iqr_ = checkpoint["iqr_"]
return scaler
[docs]
class LogScaler(nn.Module):
def __init__(self, epsilon=1e-8):
super(LogScaler, self).__init__()
self.epsilon = epsilon
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.to(self.device)
self.min_value = None
[docs]
def fit(self, X):
X = to_tensor(X).to(self.device)
dataset_min = torch.min(X) - self.epsilon
if dataset_min >= 0:
self.min_value = 0
else:
self.min_value = dataset_min
[docs]
def save(self, path):
torch.save(
{
"epsilon": self.epsilon,
"min_value": self.min_value,
},
path,
)
[docs]
@staticmethod
def load(path):
checkpoint = torch.load(path)
scaler = LogScaler()
scaler.epsilon = checkpoint["epsilon"]
scaler.min_value = checkpoint["min_value"]
return scaler
[docs]
class YeoJohnsonScaler(nn.Module):
def __init__(self):
super(YeoJohnsonScaler, self).__init__()
self.lambdas_ = None
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.to(self.device)
[docs]
def fit(self, X):
X_np = X.cpu().numpy() if isinstance(X, torch.Tensor) else np.array(X)
_, self.lambdas_ = yeojohnson(X_np)
self.lambdas_ = torch.tensor(self.lambdas_, dtype=torch.float32).to(self.device)
# Transformation logic here...
[docs]
def save(self, path):
torch.save(
{
"lambdas_": self.lambdas_,
},
path,
)
[docs]
@staticmethod
def load(path, device=None):
device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
checkpoint = torch.load(path, map_location=device)
scaler = YeoJohnsonScaler()
scaler.lambdas_ = checkpoint["lambdas_"]
scaler.to(device)
return scaler