Source code for Fireworks.toolbox.preprocessing

from functools import lru_cache
import math
import numpy as np
from Fireworks.toolbox import pipes as pl
from Fireworks import Model
from collections import defaultdict

[docs]def train_test_split(pipe, test=.2): """ Splits input pipe into a training pipe and a test pipe. """ if not hasattr(pipe, '__getitem__'): raise ValueError("Input pipe must be indexable via __getitem__") l = len(pipe) num_test = math.floor(l*test) indices = [i for i in range(l)] test_indices = sorted(np.random.choice(indices, num_test, replace=False)) train_indices = [i for i in indices if i not in test_indices] test_pipe = pl.IndexMapperPipe(inputs={'data': pipe}, input_indices=range(0,len(test_indices)), output_indices=test_indices) train_pipe = pl.IndexMapperPipe(inputs={'data': pipe}, input_indices=range(0,len(train_indices)), output_indices=train_indices) return train_pipe, test_pipe
[docs]def oversample(): pass
[docs]def apply_noise(): pass
[docs]class Normalizer(Model): """ Normalizes Data by Mean and Variance. Analogous to sklearn.preprocessing.Normalizer """ required_components = ['mean', 'variance']
[docs] def init_default_components(self): self.mean = {} self.variance = {}
[docs] def forward(self, batch): """ Uses computed means and variances in order to transform the given batch. """ keys = self.mean.keys() for key in keys: if key in batch: batch[key] = (batch[key] - self.mean[key]) / self.variance[key] return batch
[docs] def fit(self, dataset=None, continuamos=False): if dataset is None: dataset = self.input if not continuamos: self.reset() for batch in dataset: self.count += len(batch) for key in batch: self.rolling_sum += sum(batch[key]) self.rolling_squares += sum(batch[key]**2) for key in self.rolling_sum: self.mean[key] = self.rolling_sum[key] / self.count self.variance[key] = (self.rolling_squares[key] - 2*self.rolling_sum[key]*self.mean[key] + self.mean[key]**2) / self.count
[docs] def reset(self): self.count = 0 self.rolling_sum = defaultdict(lambda : 0) self.rolling_squares = defaultdict(lambda: 0) try: self.recursive_call('reset')() except: pass