import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout
from tensorflow.keras.layers import Dense, Lambda, dot, Activation, concatenate, Input
from tensorflow.keras import Model
import numpy as np
from matplotlib.widgets import SpanSelector
from tensorflow.keras.models import save_model, load_model
from tensorflow.keras.callbacks import EarlyStopping, Callback
import sys
from sklearn.preprocessing import PowerTransformer
import matplotlib.pyplot as plt
from sklearn.base import BaseEstimator, TransformerMixin
import pickle
from statsmodels.tsa.seasonal import STL
import threading
import queue
import xgboost as xgb
from xgboost import plot_importance, plot_tree
import pandas as pd
from datetime import datetime
[docs]def decoupe_dataframe(df, look_back):
dataX, dataY = [], []
for i in range(len(df) - look_back - 1):
a = df[i:(i + look_back)]
dataY = dataY + [df[i + look_back]]
dataX.append(a)
return (np.asarray(dataX), np.asarray(dataY).flatten())
[docs]def progressbar(it, prefix="", size=60, file=sys.stdout):
count = len(it)
def show(j):
x = int(size * j / count)
file.write("%s[%s%s] %i/%i\r" % (prefix, "#" * x, "." * (size - x), j, count))
file.flush()
show(0)
for i, item in enumerate(it):
yield item
show(i + 1)
file.write("\n")
file.flush()
[docs]class EarlyStoppingByUnderVal(Callback):
'''
Class to stop model's training earlier if the value we monitor(monitor) goes under a threshold (value)
replace usual callbacks functions from keras
'''
def __init__(self, monitor='val_loss', value=0.00001, verbose=0):
super(Callback, self).__init__()
self.monitor = monitor
self.value = value
self.verbose = verbose
[docs] def on_epoch_end(self, epoch, logs={}):
current = logs.get(self.monitor)
if current is None:
print("error")
if current < self.value:
if self.verbose > 0:
print("Epoch %05d: early stopping THR" % epoch)
self.model.stop_training = True
[docs]def attention_block(hidden_states):
hidden_size = int(hidden_states.shape[2])
score_first_part = Dense(hidden_size, use_bias=False, name='attention_score_vec')(hidden_states)
h_t = Lambda(lambda x: x[:, -1, :], output_shape=(hidden_size,), name='last_hidden_state')(hidden_states)
score = dot([score_first_part, h_t], [2, 1], name='attention_score')
attention_weights = Activation('softmax', name='attention_weight')(score)
context_vector = dot([hidden_states, attention_weights], [1, 1], name='context_vector')
pre_activation = concatenate([context_vector, h_t], name='attention_output')
attention_vector = Dense(128, use_bias=False, activation='tanh', name='attention_vector')(pre_activation)
return attention_vector
[docs]class Thread_train_model(threading.Thread):
def __init__(self,model,q,x_train,y_train,nb_epochs,nb_batch,name_ts,name='',verbose=0):
threading.Thread.__init__(self)
self.name=name
self.model=model
self.x_train=x_train
self.y_train=y_train
self.nb_epochs=nb_epochs
self.nb_batch=nb_batch
self.name_ts=name_ts
self.verbose=verbose
self._stopevent = threading.Event()
self.q=q
print("The new thread with the name : " + self.name + " start running")
[docs] def run(self):
model = self.train_model(self.model,self.x_train,self.y_train,self.nb_epochs,self.nb_batch,self.name_ts)
self.q.put(model)
self.stop()
print("le thread " + self.name + " ended ")
return 0
[docs] def stop(self):
self._stopevent.set()
[docs] def train_model(self,model,x_train,y_train,nb_epochs,nb_batch,name):
'''
Train the model and save it in the right file
Parameters
----------
model : Sequential object
model.
x_train : array
training data inputs.
y_train : array
training data ouputs.
nb_epochs : int.
nb of training repetitions.
nb_batch : int
size of batch of element which gonna enter in the model before doing a back propagation.
trend : bool
Distinguish trend signal from others (more complicated to modelise).
Returns
-------
model : Sequential object
model.
'''
if name == "trend":
nb_epochs = self.nb_epochs * 4
try:
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
except Exception as e:
if e:
print(
"************Not enought data in the input compared to the look back size. Data has size :"+str(np.shape(x_train))+"*****")
es = EarlyStopping(monitor='loss', mode='min', min_delta=1, patience=100)
hist = model.fit(x_train, y_train, epochs=nb_epochs, batch_size=self.nb_batch, verbose=self.verbose,
callbacks=[es])
i = 0
while hist.history["loss"][-1] > 10 and i < 5:
i = i + 1
epochs = 50
hist = model.fit(x_train, y_train, epochs=epochs, batch_size=100, verbose=self.verbose, callbacks=[es])
print("model_trained")
elif name == "residual":
nb_epochs = self.nb_epochs * 6
try :
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
except Exception as e:
if e:
print(
"************Not enought data in the input compared to the look back size. Data has size :"+str(np.shape(x_train))+"*****")
es = EarlyStopping(monitor='loss', mode='min', min_delta=0.1, patience=100)
hist = model.fit(x_train, y_train, epochs=nb_epochs, batch_size=self.nb_batch, verbose=self.verbose,
callbacks=[es])
i = 0
else:
nb_epochs = self.nb_epochs * 2
es = EarlyStoppingByUnderVal(monitor="loss", value=0.1, verbose=self.verbose)
try :
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
except Exception as e:
if e:
print(
"***Not enought data in the input compared to the look back size. Data has size :"+str(np.shape(x_train))+"*****")
model.fit(x_train, y_train, epochs=nb_epochs, batch_size=self.nb_batch, verbose=self.verbose)
print("model_trained")
return model
[docs]class GTSPredictor():
def __init__(self, verbose=0):
if verbose == 1:
print("Model is being created")
else:
pass
[docs] def fit_with_UI(self,df,verbose=0, nb_features=1, nb_epochs=300, nb_batch=200, nb_layers=50,
attention=True, loss="mape", metric="mse", optimizer="Adamax", directory=r"."):
'''
This function open a matplotlib window for you to select the pattern, take the data , make the preprocessing steps and train the models
Parameters
----------
df : dataframe
with a time columns (string) and y the columns with the values to forecast.
verbose : 0 or 1
If 0 no information printed during the creation and training of models.
nb_features :int, optional
output size. The default is 1.
nb_epochs : int, optional
The default is 50.
nb_batch : int, optional
The default is 100.
nb_layers : int, optional
The default is 50.
attention : Bool, optional
Either use or not the attention layer. The default is True.
loss : str, optional
loss for evaluation error between input and output . The default is "mape".
metric : str, optional
metrics for evaluation the training predictions. The default is "mse".
optimizer : str, optional
Keras optimizers. The default is "Adamax".
directory : str, optional
directory path which need to end by "/". The default is r".".
Returns
-------
None.
'''
np.random.seed(19680801)
fig, (ax1, ax2) = plt.subplots(2, figsize=(8, 6))
ax1.set(facecolor='#FFFFCC')
print(len(df["y"]))
x = np.arange(0,len(df["y"]),1)
y = np.array(df["y"])
ax1.plot(x, y, '-')
ax1.set_title('Press left mouse button and drag to select the pattern')
ax2.set(facecolor='#FFFFCC')
line2, = ax2.plot(x, y, '-')
def onselect(xmin, xmax):
indmin, indmax = np.searchsorted(x, (xmin, xmax))
indmax = min(len(x) - 1, indmax)
thisx = x[indmin:indmax]
thisy = y[indmin:indmax]
print(thisx.min(), thisx.max())
line2.set_data(thisx, thisy)
ax2.set_xlim(thisx[0], thisx[-1])
ax2.set_ylim(thisy.min(), thisy.max())
fig.canvas.draw()
pattern_size = int(abs(thisx.min() - thisx.max())) + 1
with open('pattern_size.txt', 'w') as f:
f.write('%d' % pattern_size)
span = SpanSelector(ax1, onselect, 'horizontal', useblit=True,
rectprops=dict(alpha=0.5, facecolor='red'))
plt.show(block=True)
plt.close()
f = open('pattern_size.txt', 'r')
freq_period = int(f.readline())
f.close()
os.remove('pattern_size.txt')
print(freq_period)
self.fit(df, int(freq_period*1.3)+1, freq_period, verbose=verbose, nb_features=nb_features, nb_epochs=nb_epochs, nb_batch=nb_batch, nb_layers=nb_layers,
attention=attention, loss=loss, metric=metric, optimizer=optimizer, directory=directory)
[docs] def fit_whitout(self, df, look_back, freq_period, verbose=0, nb_features=1, nb_epochs=200, nb_batch=100, nb_layers=50, attention=True, loss="mape", metric="mse", optimizer="Adamax", directory=r"."):
'''
This function take the data , make the preprocessing steps and train the models whitout multi threading
Parameters
----------
df : dataframe
with a time columns (string) and y the columns with the values to forecast.
look_back : int
size of inputs .
freq_period : int
size in point of the seasonal pattern (ex 24 if daily seasonality for a signal sampled at 1h frequency).
verbose : 0 or 1
If 0 no information printed during the creation and training of models.
nb_features :int, optional
output size. The default is 1.
nb_epochs : int, optional
The default is 50.
nb_batch : int, optional
The default is 100.
nb_layers : int, optional
The default is 50.
attention : Bool, optional
Either use or not the attention layer. The default is True.
loss : str, optional
loss for evaluation error between input and output . The default is "mape".
metric : str, optional
metrics for evaluation the training predictions. The default is "mse".
optimizer : str, optional
Keras optimizers. The default is "Adamax".
directory : str, optional
directory path which need to end by "/". The default is r".".
Returns
-------
None
'''
self.loss = loss
self.metric = metric
self.optimizer = optimizer
self.nb_features = nb_features
self.nb_epochs = nb_epochs
self.nb_batch = nb_batch
self.df = df
self.nb_layers = nb_layers
self.freq_period = freq_period
self.look_back = look_back
self.verbose = verbose
self.directory = directory
trend_x, trend_y, seasonal_x, seasonal_y, residual_x, residual_y = self.prepare_data(df, look_back,
self.freq_period,first=1)
if attention:
model_trend = self.make_models_with(True, self.df)
model_seasonal = self.make_models_with(False, self.df)
model_residual = self.make_models_with(False, self.df)
else:
model_trend = self.make_models(True, self.df)
model_seasonal = self.make_models(False, self.df)
model_residual = self.make_models(False, self.df)
self.model_trend = self.train_model(model_trend, trend_x, trend_y, "trend")
self.model_seasonal = self.train_model(model_seasonal, seasonal_x, seasonal_y, "seasonal")
self.model_residual = self.train_model(model_residual, residual_x, residual_y, "residual")
print("Modele fitted")
[docs] def fit(self ,df, look_back, freq_period, verbose=0, nb_features=1, nb_epochs=200, nb_batch=100, nb_layers=50,
attention=True, loss="mape", metric="mse", optimizer="Adamax", directory=r"."):
'''
This function take the data , make the preprocessing steps and train the models whit multi threading
Parameters
----------
df : dataframe
with a time columns (string) and y the columns with the values to forecast.
look_back : int
size of inputs .
freq_period : int
size in point of the seasonal pattern (ex 24 if daily seasonality for a signal sampled at 1h frequency).
verbose : 0 or 1
If 0 no information printed during the creation and training of models.
nb_features :int, optional
output size. The default is 1.
nb_epochs : int, optional
The default is 50.
nb_batch : int, optional
The default is 100.
nb_layers : int, optional
The default is 50.
attention : Bool, optional
Either use or not the attention layer. The default is True.
loss : str, optional
loss for evaluation error between input and output . The default is "mape".
metric : str, optional
metrics for evaluation the training predictions. The default is "mse".
optimizer : str, optional
Keras optimizers. The default is "Adamax".
directory : str, optional
directory path which need to end by "/". The default is r".".
Returns
-------
None.
'''
self.loss = loss
self.metric = metric
self.optimizer = optimizer
self.nb_features = nb_features
self.nb_epochs = nb_epochs
self.nb_batch = nb_batch
self.df = df
self.nb_layers = nb_layers
self.freq_period = freq_period
self.look_back = look_back
self.verbose = verbose
self.directory = directory
trend_x, trend_y, seasonal_x, seasonal_y, residual_x, residual_y = self.prepare_data(df, look_back,
self.freq_period,first=1)
if attention:
model_trend = self.make_models_with(True, self.df)
model_seasonal = self.make_models_with(False, self.df)
model_residual = self.make_models_with(False, self.df)
else:
model_trend = self.make_models(True, self.df)
model_seasonal = self.make_models(False, self.df)
model_residual = self.make_models(False, self.df)
que = queue.Queue()
threads_list = list()
thread = Thread_train_model(model_trend,que,trend_x,trend_y,nb_epochs,nb_batch,"trend","Trend Thread",self.verbose)
thread.start()
threads_list.append(thread)
thread_1 = Thread_train_model(model_seasonal,que,seasonal_x,seasonal_y,nb_epochs,nb_batch,"seasonal","Seasonal Thread",self.verbose)
thread_1.start()
threads_list.append(thread_1)
thread_2= Thread_train_model(model_residual,que,residual_x,residual_y,nb_epochs,nb_batch,"residual","Residual Thread",self.verbose)
thread_2.start()
threads_list.append(thread_2)
for t in threads_list:
t.join()
self.model_trend=que.get(block=False)
self.model_save(self.model_trend,"trend")
self.model_seasonal=que.get(block=False)
self.model_save(self.model_seasonal,"seasonal")
self.model_residual=que.get(block=False)
self.model_save(self.model_residual,"residual")
print("Models fitted")
[docs] def predict(self, steps=1, frame=True):
'''
Parameters
----------
steps : int
number of points you want to forecast
frame : Bool
True if you want the lower and upper bounders.default is True.
Returns
-------
prediction : array
Array with the predicted values
lower : array
Array containing the lower values. Only if frame == True.
frame : Bool
Array containing the lower values. Only if frame == True.
'''
prediction, lower, upper = self.make_prediction(steps)
if frame:
return prediction[0], lower[0], upper[0]
else:
return prediction[0]
[docs] def make_models_with(self, trend, df):
'''
Create an LSTM model depending on the parameters selected by the user
Parameters
----------
nb_layers : int
nb of layers of the lstm model.
loss : str
loss of the model.
metric : str
metric to evaluate the model.
nb_features : int
size of the ouput (one for regression).
optimizer : str
gradient descend's optimizer.
trend : bool
Distinguish trend signal from others (more complicated to modelise).
Returns
-------
model : Sequential object
model
'''
i = Input(shape=(self.nb_features, self.look_back))
x = LSTM(self.nb_layers, return_sequences=True, activation='relu',
input_shape=(self.nb_features, self.look_back))(i)
x = Activation("relu")(x)
x = Dropout(0.2)(x)
x = LSTM(self.nb_layers, return_sequences=True)(x)
x = Dropout(0.2)(x)
x = attention_block(x)
x = Dense(int(self.nb_layers / 2), activation='relu')(x)
output = Dense(1)(x)
model = Model(inputs=[i], outputs=[output])
model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[self.metric])
if self.verbose == 1:
print(model.summary())
return model
[docs] def make_models(self, trend, df):
model = Sequential()
model.add(LSTM(self.nb_layers, return_sequences=True, activation='relu',
input_shape=(self.nb_features, self.look_back)))
model.add(Dropout(0.2))
model.add(LSTM(self.nb_layers))
model.add(Dropout(0.2))
if (df["y"].max() - df["y"].min()) > 100:
model.add(Activation('softmax'))
model.add(Dense(int(self.nb_layers / 2), activation='relu'))
model.add(Dense(1))
model.compile(loss=self.loss, optimizer=self.optimizer, metrics=[self.metric])
if self.verbose == 1:
print(model.summary())
return model
[docs] def prediction_eval(self, prediction, real_data):
'''
This function compute and print four differents metrics (mse ,mae ,r2 and median) to evaluate accuracy of the model
prediction and real_data need to have the same size
Parameters
----------
prediction : array
predicted values.
real_data : array
real data.
Returns
-------
None.
'''
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import median_absolute_error as medae
from sklearn.metrics import r2_score as r2
print("mean_absolute_error : ", mae(real_data, prediction))
print("mean_squared_error : ", mse(real_data, prediction))
print("median_absolute_error : ", medae(real_data, prediction))
print("r2_score : ", r2(real_data, prediction))
[docs] def prepare_data(self, df, look_back, freq_period,first=0):
'''
Parameters
----------
df : DataFrame
datafrmae contening historical data .
look_back : int
length entry of the model .
Decompose the signal in three sub signals, trend,seasonal and residual in order to work separetly on each signal
Returns
-------
trend_x : array
values of the trend of the signal, matrix of dimention X array of dimension (1,length entry of model) X= length(dataframe)/look_back.
trend_y : array
vaklues to be predicted during the training
seasonal_x : array
same as trend_x but with the seasonal part of the signal.
seasonal_y : array
same as trend_y but with the seasonal part of the signal.
residual_x : array
same as trend_x but with the residual part of the signal.
residual_y : array
same as trend_y but with the residual part of the signal.
'''
df = df.dropna()
lendf=len(df)
scalerfile = self.directory + '/scaler_pred.sav'
if not os.path.isfile(scalerfile) or os.path.isfile(scalerfile) and first == 1 :
if (df["y"].max() - df["y"].min()) > 100:
if self.verbose == 1 :
print("PowerTransformation scaler used")
scaler = PowerTransformer()
else:
if self.verbose == 1 :
print("Identity scaler used")
scaler = IdentityTransformer()
self.scaler2 = scaler.fit(np.reshape(np.array(df["y"]), (-1, 1)))
Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
pickle.dump(self.scaler2, open(scalerfile, 'wb'))
elif os.path.isfile(scalerfile) and first == 0 :
self.scaler2 = pickle.load(open(scalerfile, "rb"))
Y = self.scaler2.transform(np.reshape(np.array(df["y"]), (-1, 1)))
if freq_period % 2 == 0 :
freq_period=freq_period+1
decomposition = STL(Y, period=freq_period + 1)
decomposition = decomposition.fit()
plt.show()
df.loc[:, 'trend'] = decomposition.trend
df.loc[:, 'seasonal'] = decomposition.seasonal
df.loc[:, 'residual'] = decomposition.resid
df["trend"] = df['trend']
df["seasonal"] = df['seasonal']
df["residual"] = df['residual']
df_a = df
df = df.dropna()
self.shift = lendf - len(df["trend"])
df = df.reset_index(drop=True)
df["trend"] = df["trend"].fillna(method="bfill")
self.trend = np.asarray(df.loc[:, 'trend'])
self.seasonal = np.asarray(df.loc[:, 'seasonal'])
self.residual = np.asarray(df.loc[:, 'residual'])
trend_x, trend_y = decoupe_dataframe(df["trend"], look_back)
seasonal_x, seasonal_y = decoupe_dataframe(df["seasonal"], look_back)
residual_x, residual_y = decoupe_dataframe(df["residual"], look_back)
if self.verbose == 1:
print("prepared")
return trend_x, trend_y, seasonal_x, seasonal_y, residual_x, residual_y
[docs] def train_model(self, model, x_train, y_train, name):
'''
Train the model and save it in the right file
Parameters
----------
model : Sequential object
model.
x_train : array
training data inputs.
y_train : array
training data ouputs.
nb_epochs : int.
nb of training repetitions.
nb_batch : int
size of batch of element which gonna enter in the model before doing a back propagation.
trend : bool
Distinguish trend signal from others (more complicated to modelise).
Returns
-------
model : Sequential object
model.
'''
if name == "trend":
nb_epochs = self.nb_epochs * 3
try:
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
except Exception as e:
if e:
print(
"************Not enought data in the input compared to the look back size. Put more data as input******")
es = EarlyStopping(monitor='loss', mode='min', min_delta=1, patience=100)
hist = model.fit(x_train, y_train, epochs=nb_epochs, batch_size=self.nb_batch, verbose=self.verbose,
callbacks=[es])
i = 0
while hist.history["loss"][-1] > 10 and i < 5:
i = i + 1
epochs = 50
hist = model.fit(x_train, y_train, epochs=epochs, batch_size=100, verbose=self.verbose, callbacks=[es])
print("model_trained")
self.model_save(model, name)
elif name == "residual":
nb_epochs = self.nb_epochs * 6
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
es = EarlyStopping(monitor='loss', mode='min', min_delta=0.1, patience=100)
hist = model.fit(x_train, y_train, epochs=nb_epochs, batch_size=self.nb_batch, verbose=self.verbose,
callbacks=[es])
i = 0
self.model_save(model, name)
else:
es = EarlyStoppingByUnderVal(monitor="loss", value=0.1, verbose=self.verbose)
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
model.fit(x_train, y_train, epochs=self.nb_epochs, batch_size=self.nb_batch, verbose=self.verbose)
print("model_trained")
self.model_save(model, name)
return model
[docs] def model_save(self, model, name):
path = self.directory + "/" + name + ".h5"
if name =="trend" :
with open(self.directory+'/look_back.txt', 'w') as f:
f.write('%d' % self.look_back)
with open(self.directory+'/freq_period.txt', 'w') as f:
f.write('%d' % self.freq_period)
save_model(model, path)
[docs] def make_prediction(self, len_prediction):
'''
This function make the prediction :
reshape data to fit with the model
make one prediction
crush outdated data with prediction
repeat len_prediction times
Parameters
----------
len_prediction : int
number of value we want to predict.
Returns
-------
prediction : array
values predicted.
pred_trend : array
values trend predicted (recherches prupose).
pred_seasonal : array
values seasonal predicted (recherches prupose).
pred_residual : array
values residual predicted (recherches prupose).
'''
data_residual = self.residual[-1 * self.look_back:]
data_trend = self.trend[-1 * self.look_back:]
data_seasonal = self.seasonal[-1 * self.look_back:]
prediction = np.zeros((1, len_prediction))
for i in progressbar(range(len_prediction), "Computing: ", 40):
dataset = np.reshape(data_residual, (1, 1, self.look_back))
prediction_residual = (self.model_residual.predict(dataset))
data_residual = np.append(data_residual[1:], [prediction_residual]).reshape(-1, 1
)
dataset = np.reshape(data_trend, (1, 1, self.look_back))
prediction_trend = (self.model_trend.predict(dataset))
data_trend = np.append(data_trend[1:], [prediction_trend]).reshape(-1, 1)
dataset = np.reshape(data_seasonal, (1, 1, self.look_back))
prediction_seasonal = (self.model_seasonal.predict(dataset))
data_seasonal = np.append(data_seasonal[1:], [prediction_seasonal]).reshape(-1, 1)
prediction[0, i] = prediction_residual + prediction_trend + prediction_seasonal
prediction = self.scaler2.inverse_transform(np.reshape(prediction, (-1, 1)))
lower, upper = self.frame_prediction(np.reshape(prediction, (1, -1)))
return np.reshape(prediction, (1, -1)), lower, upper
[docs] def frame_prediction(self, prediction):
'''
This function compute the 95% confident interval by calculating the standard deviation and the mean of the residual(Gaussian like distribution)
and return yestimated +- 1.96*CI +mean (1.96 to have 95%)
Parameters
----------
prediction : array
array contening the perdicted vector (1,N) size.
Returns
-------
lower : array
array contening the lowers boundry values (1,N) size.
upper : array
array contening the upper boundry values (1,N) size.
'''
mae = -1 * np.mean(self.scaler2.inverse_transform(np.reshape(self.residual, (-1, 1))))
std_deviation = np.std(self.scaler2.inverse_transform(np.reshape(self.residual, (-1, 1))))
sc = 1.96 # 1.96 for a 95% accuracy
margin_error = mae + sc * std_deviation
lower = prediction - margin_error
upper = prediction + margin_error
return lower, upper
[docs] def fit_predict_without_dec(self, df, look_back, freq_period, len_prediction, verbose, nb_features=1, nb_epochs=50,
nb_batch=100, nb_layers=50, attention=True, loss="mape", metric="mse",
optimizer="Adamax"):
df = df.reset_index()
i = Input(shape=(nb_features, look_back))
x = LSTM(nb_layers, return_sequences=True, activation='relu', input_shape=(nb_features, look_back))(i)
x = Activation("relu")(x)
x = Dropout(0.2)(x)
x = LSTM(nb_layers, return_sequences=True)(x)
x = Dropout(0.2)(x)
# x=attention_block(x)
x = Dense(int(nb_layers / 2), activation='relu')(x)
output = Dense(1)(x)
model = Model(inputs=[i], outputs=[output])
model.compile(loss=loss, optimizer=optimizer, metrics=[metric])
x_train, y_train = decoupe_dataframe(df["y"], look_back)
nb_epochs = nb_epochs * 7 * 8
try:
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
except Exception as e:
if e == IndexError:
print("Not enought data in the input compared to the look back size. Put more data as input")
hist = model.fit(x_train, y_train, epochs=nb_epochs, batch_size=nb_batch, verbose=verbose)
data_residual = np.array(df["y"][-1 * look_back:])
prediction = np.zeros((1, len_prediction))
for i in progressbar(range(len_prediction), "Computing: ", 40):
dataset = np.reshape(data_residual, (1, 1, look_back))
prediction_residual = (model.predict(dataset))
data_residual = np.append(data_residual[1:], [prediction_residual]).reshape(-1, 1)
prediction[0, i] = prediction_residual
return prediction, prediction, prediction
[docs] def retrain(self, df, nb_features=1, nb_epochs=10, nb_batch=10):
'''
This function retrain a GTSPredictor object that contained already trained models in order to do incremental learning.
df : dataframe
Values that are going to be used for the training.
Returns
-------
self : GTSPredictor object
trained object
'''
self.df = df
self.nb_epochs = nb_epochs
self.nb_batch=nb_batch
self.model_trend = self.train_model(self.model_trend, self.trend_x, self.trend_y, "trend")
self.model_seasonal = self.train_model(self.model_seasonal, self.seasonal_x, self.seasonal_y, "seasonal")
self.model_residual = self.train_model(self.model_residual, self.residual_x, self.residual_y, "residual")
self.model_save(self.model_trend, "trend")
self.model_save(self.model_seasonal, "seasonal")
self.model_save(self.model_residual, "residual")
return self
[docs] def load_models(self, directory="."):
model_trend = load_model(r"" + directory + "/" + "trend" + ".h5")
model_seasonal = load_model(r"" + directory + "/" + "seasonal" + ".h5")
model_residual = load_model(r"" + directory + "/" + "residual" + ".h5")
print("loaded")
self.model_trend = model_trend
self.model_seasonal = model_seasonal
self.model_residual = model_residual
return model_trend, model_seasonal, model_residual
[docs] def reuse(self,df,directory=".",verbose=0) :
'''
This function recreate a new GTSPredictor object with already trained models in order to be then reused.
df : dataframe
Values that are going to be used for the training.
directory : dataframe
Directory containing the models to be load and the informations
verbose : int
0 for no informations, 1 otherweise.
Returns
-------
self : GTSPredictor object
trained object
'''
self.directory=r""+directory
self.model_trend = load_model(r"" + directory + "/trend.h5")
self.model_seasonal = load_model(r"" + directory + "/seasonal.h5")
self.model_residual = load_model(r"" + directory + "/residual.h5")
self.verbose=verbose
print("loaded")
f = open(directory+'/look_back.txt', 'r')
self.look_back = int(f.readline())
f.close()
f = open(directory+'/freq_period.txt', 'r')
self.freq_period = int(f.readline())
f.close()
self.trend_x, self.trend_y, self.seasonal_x, self.seasonal_y, self.residual_x, self.residual_y = self.prepare_data(df, self.look_back,
self.freq_period,first=0)
return self
[docs] def plot_prediction(self,df,prediction,lower=0,upper=0):
'''
This function plots the predicted values at the end of the real values and plots the 95% CI.
df : dataframe
Values that are going to be used for the training.
prediction : array
predicted values
lower : array
lower bounder values
upper : array
upper bounder values.
Returns
-------
self : GTSPredictor object
trained object
'''
x=np.arange(0,len(df),1)
y=df["y"]
x_2=np.arange(len(df),len(df)+len(prediction),1)
y_2=prediction
plt.plot(x,y,label="real data",color="blue")
plt.plot(x_2,y_2,label="predicted values",color="green")
if len(lower) > 2 :
x_2 = np.arange(len(df), len(df) + len(lower), 1)
plt.fill_between(x_2,lower[:],upper[:],color="red",alpha=0.3,label="95% CI")
plt.legend()
plt.show()
[docs] def fit_predict_XGBoost(self,df,freq,date_format,steps=1,early_stopping_rounds=300,test_size=0.01,nb_estimators=1000):
'''
This function uses XGBoost mode in order to forecast values.
df : dataframe
Values that are going to be used for the training.
freq : str (python datetime format)
frequency of the data, ex "m" for months, "d" for days ...
date_format : str
format of time columns (ex if date columns is 1996-05-04, date_format="%Y-%m-%d"
steps : int
number of points to forecast
Returns
-------
prediction : array
array of predicted values
'''
df=df.dropna()
TIME = [None] * len(df['time'])
# modification du timestamp en format compréhensible par le modèle
for i in range(len(df['time'])):
dobj_a = datetime.strptime(df['time'][i],date_format)
TIME[i] = dobj_a
df["time"] = TIME
df = df.reset_index(drop=True)
df=df.set_index("time")
df.index=pd.to_datetime(df.index)
X,Y=self.create_features(df=df, label='y')
reg = xgb.XGBRegressor(n_estimators=nb_estimators)
x_train=X[: int((1-test_size)*len(X))]
y_train=Y[: int((1-test_size)*len(Y))]
x_test=X[ int((test_size)*len(X)) :]
y_test=Y[ int((test_size)*len(Y)) :]
reg.fit(x_train,y_train,
eval_set=[(x_train,y_train), (x_test,y_test)],
early_stopping_rounds=early_stopping_rounds,
verbose=True)
dates=pd.date_range(df.index[len(df)-1],freq=freq, periods=steps)
df2=pd.DataFrame({"time" : dates})
df2=df2.set_index("time")
df2.index=pd.to_datetime(df2.index)
X2=self.create_features(df=df2)
prediction = reg.predict(X2)
return prediction
[docs] def create_features(self,df, label=None):
df['date'] = df.index
df['hour'] = df['date'].dt.hour
df['dayofweek'] = df['date'].dt.dayofweek
df['quarter'] = df['date'].dt.quarter
df['month'] = df['date'].dt.month
df['year'] = df['date'].dt.year
df['dayofyear'] = df['date'].dt.dayofyear
df['dayofmonth'] = df['date'].dt.day
df['weekofyear'] = df['date'].dt.weekofyear
X = df[['hour','dayofweek','quarter','month','year',
'dayofyear','dayofmonth','weekofyear']]
if label:
y = df[label]
return X, y
return X