= 5 # Time in minutes. Counter starts, after the initial design is evaluated. So, runtime can be larger.
MAX_TIME = 20 # Initial number of designs to evaluate, before the surrogate is build. INIT_SIZE
10 Hyperparameter Tuning: sklearn
import pickle
import socket
from datetime import datetime
from dateutil.tz import tzlocal
= datetime.now(tzlocal())
start_time = socket.gethostname().split(".")[0]
HOSTNAME = '10-sklearn' + "_" + HOSTNAME + "_" + str(MAX_TIME) + "min_" + str(INIT_SIZE) + "init_" + str(start_time).split(".", 1)[0].replace(' ', '_')
experiment_name = experiment_name.replace(':', '-')
experiment_name experiment_name
This notebook exemplifies hyperparameter tuning with SPOT (spotPython). The hyperparameter software SPOT was developed in R (statistical programming language), see Open Access book “Hyperparameter Tuning for Machine and Deep Learning with R - A Practical Guide”, available here: https://link.springer.com/book/10.1007/978-981-19-5170-1.
list | grep "spot[RiverPython]" pip
# import sys
# !{sys.executable} -m pip install --upgrade build
# !{sys.executable} -m pip install --upgrade --force-reinstall spotPython
from tabulate import tabulate
import warnings
import numpy as np
from math import inf
import pandas as pd
from scipy.optimize import differential_evolution
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder , MinMaxScaler, StandardScaler
from sklearn.preprocessing import OrdinalEncoder
from sklearn.linear_model import RidgeCV
from sklearn.pipeline import make_pipeline , Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import cross_validate
from sklearn.datasets import fetch_openml
from sklearn.metrics import mean_absolute_error, accuracy_score, roc_curve, roc_auc_score, log_loss, mean_squared_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.datasets import make_regression
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.linear_model import ElasticNet
"ignore")
warnings.filterwarnings(
from spotPython.spot import spot
from spotPython.hyperparameters.values import (
add_core_model_to_fun_control,
assign_values,
convert_keys,
get_bound_values,
get_default_hyperparameters_for_core_model,
get_default_values,
get_dict_with_levels_and_types,
get_values_from_dict,
get_var_name,
get_var_type,
iterate_dict_values,
modify_hyper_parameter_levels,
modify_hyper_parameter_bounds,
replace_levels_with_positions,
return_conf_list_from_var_dict,
get_one_core_model_from_X,
transform_hyper_parameter_values,
get_dict_with_levels_and_types,
convert_keys,
iterate_dict_values,
get_one_sklearn_model_from_X
)
from spotPython.utils.convert import class_for_name
from spotPython.utils.eda import (
get_stars,
gen_design_table)from spotPython.utils.transform import transform_hyper_parameter_values
from spotPython.utils.convert import get_Xy_from_df
from spotPython.plot.validation import plot_cv_predictions, plot_roc, plot_confusion_matrix
from spotPython.utils.init import fun_control_init
from spotPython.data.sklearn_hyper_dict import SklearnHyperDict
from spotPython.fun.hypersklearn import HyperSklearn
from spotPython.utils.metrics import mapk, apk
10.1 Step 1: Initialization of the Empty fun_control
Dictionary
= fun_control_init(task="classification",
fun_control ="runs/10_spot_hpt_sklearn_classification") tensorboard_path
10.2 Step 2: Load Data (Classification)
Randomly generate classification data.
= 2
n_features = 250
n_samples = "y"
target_column = make_moons(n_samples, noise=0.5, random_state=0)
ds = ds
X, y = train_test_split(
X_train, X_test, y_train, y_test =0.4, random_state=42
X, y, test_size
)= pd.DataFrame(np.hstack((X_train, y_train.reshape(-1, 1))))
train = pd.DataFrame(np.hstack((X_test, y_test.reshape(-1, 1))))
test = [f"x{i}" for i in range(1, n_features+1)] + [target_column]
train.columns = [f"x{i}" for i in range(1, n_features+1)] + [target_column]
test.columns train.head()
from matplotlib.colors import ListedColormap
= X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
x_min, x_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
y_min, y_max
# just plot the dataset first
= plt.cm.RdBu
cm = ListedColormap(["#FF0000", "#0000FF"])
cm_bright = plt.subplot(1, 1, 1)
ax "Input data")
ax.set_title(# Plot the training points
0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k")
ax.scatter(X_train[:, # Plot the testing points
ax.scatter(0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k"
X_test[:,
)
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
ax.set_xticks(())
ax.set_yticks(())
plt.tight_layout() plt.show()
= len(train)
n_samples # add the dataset to the fun_control
"data": None, # dataset,
fun_control.update({"train": train,
"test": test,
"n_samples": n_samples,
"target_column": target_column})
10.3 Step 3: Specification of the Preprocessing Model
Data preprocesssing can be very simple, e.g., you can ignore it. Then you would choose the prep_model
“None”:
= None
prep_model "prep_model": prep_model}) fun_control.update({
A default approach for numerical data is the StandardScaler
(mean 0, variance 1). This can be selected as follows:
= StandardScaler()
prep_model "prep_model": prep_model}) fun_control.update({
Even more complicated pre-processing steps are possible, e.g., the follwing pipeline:
# categorical_columns = []
# one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
# prep_model = ColumnTransformer(
# transformers=[
# ("categorical", one_hot_encoder, categorical_columns),
# ],
# remainder=StandardScaler(),
# )
10.4 Step 4: Select algorithm
and core_model_hyper_dict
The selection of the algorithm (ML model) that should be tuned is done by specifying the its name from the sklearn
implementation. For example, the SVC
support vector machine classifier is selected as follows:
# core_model = RidgeCV
# core_model = GradientBoostingRegressor
# core_model = ElasticNet
# core_model = RandomForestClassifier
= SVC
core_model # core_model = LogisticRegression
# core_model = KNeighborsClassifier
# core_model = GradientBoostingClassifier
= add_core_model_to_fun_control(core_model=core_model,
fun_control =fun_control,
fun_control=SklearnHyperDict,
hyper_dict=None) filename
Now fun_control
has the information from the JSON file:
"SVC":
{
"C": {
"type": "float",
"default": 1.0,
"transform": "None",
"lower": 0.1,
"upper": 10.0},
"kernel": {
"levels": ["linear", "poly", "rbf", "sigmoid"],
"type": "factor",
"default": "rbf",
"transform": "None",
"core_model_parameter_type": "str",
"lower": 0,
"upper": 3},
"degree": {
"type": "int",
"default": 3,
"transform": "None",
"lower": 3,
"upper": 3},
"gamma": {
"levels": ["scale", "auto"],
"type": "factor",
"default": "scale",
"transform": "None",
"core_model_parameter_type": "str",
"lower": 0,
"upper": 1},
"coef0": {
"type": "float",
"default": 0.0,
"transform": "None",
"lower": 0.0,
"upper": 0.0},
"shrinking": {
"levels": [0, 1],
"type": "factor",
"default": 0,
"transform": "None",
"core_model_parameter_type": "bool",
"lower": 0,
"upper": 1},
"probability": {
"levels": [0, 1],
"type": "factor",
"default": 0,
"transform": "None",
"core_model_parameter_type": "bool",
"lower": 0,
"upper": 1},
"tol": {
"type": "float",
"default": 1e-3,
"transform": "None",
"lower": 1e-4,
"upper": 1e-2},
"cache_size": {
"type": "float",
"default": 200,
"transform": "None",
"lower": 100,
"upper": 400},
"break_ties": {
"levels": [0, 1],
"type": "factor",
"default": 0,
"transform": "None",
"core_model_parameter_type": "bool",
"lower": 0,
"upper": 1}
}
10.5 Step 5: Modify hyper_dict
Hyperparameters for the Selected Algorithm aka core_model
10.5.1 Modify hyperparameter of type factor
Factors can be modified with the modify_hyper_parameter_levels
function. For example, to exclude the sigmoid
kernel from the tuning, the kernel
hyperparameter of the SVC
model can be modified as follows:
= modify_hyper_parameter_levels(fun_control, "kernel", ["linear", "poly", "rbf"])
fun_control "core_model_hyper_dict"]["kernel"] fun_control[
10.5.2 Modify hyperparameter of type numeric and integer (boolean)
Numeric and boolean values can be modified using the modify_hyper_parameter_bounds
method. For example, to change the tol
hyperparameter of the SVC
model to the interval [1e-3, 1e-2], the following code can be used:
= modify_hyper_parameter_bounds(fun_control, "tol", bounds=[1e-3, 1e-2])
fun_control # fun_control = modify_hyper_parameter_bounds(fun_control, "min_samples_split", bounds=[3, 20])
#fun_control = modify_hyper_parameter_bounds(fun_control, "merit_preprune", bounds=[0, 0])
"core_model_hyper_dict"]["tol"] fun_control[
10.6 Step 6: Selection of the Objective (Loss) Function
There are two metrics:
1. `metric_river` is used for the river based evaluation via `eval_oml_iter_progressive`.
2. `metric_sklearn` is used for the sklearn based evaluation.
= HyperSklearn(seed=123, log_level=50).fun_sklearn
fun # metric_sklearn = roc_auc_score
# weights = -1.0
= log_loss
metric_sklearn = 1.0
weights # k = None
# custom_metric = mapk
fun_control.update({"horizon": None,
"oml_grace_period": None,
"weights": weights,
"step": None,
"log_level": 50,
"weight_coeff": None,
"metric_river": None,
"metric_sklearn": metric_sklearn,
# "metric_params": {"k": k},
})
10.6.1 Predict Classes or Class Probabilities
If the key "predict_proba"
is set to True
, the class probabilities are predicted. False
is the default, i.e., the classes are predicted.
fun_control.update({"predict_proba": False,
})
10.7 Step 7: Calling the SPOT Function
10.7.1 Prepare the SPOT Parameters
- Get types and variable names as well as lower and upper bounds for the hyperparameters.
= get_var_type(fun_control)
var_type = get_var_name(fun_control)
var_name "var_type": var_type,
fun_control.update({"var_name": var_name})
= get_bound_values(fun_control, "lower")
lower = get_bound_values(fun_control, "upper") upper
print(gen_design_table(fun_control))
10.7.2 Run the Spot
Optimizer
- Run SPOT for approx. x mins (
max_time
). - Note: the run takes longer, because the evaluation time of initial design (here:
initi_size
, 20 points) is not considered.
from spotPython.hyperparameters.values import get_default_hyperparameters_as_array
=SklearnHyperDict().load()
hyper_dict= get_default_hyperparameters_as_array(fun_control, hyper_dict)
X_start X_start
= spot.Spot(fun=fun,
spot_tuner = lower,
lower = upper,
upper = inf,
fun_evals = 1,
fun_repeats = MAX_TIME,
max_time = False,
noise = np.sqrt(np.spacing(1)),
tolerance_x = var_type,
var_type = var_name,
var_name = "y",
infill_criterion = 1,
n_points =123,
seed= 50,
log_level = False,
show_models= True,
show_progress= fun_control,
fun_control ={"init_size": INIT_SIZE,
design_control"repeats": 1},
={"noise": True,
surrogate_control"cod_type": "norm",
"min_theta": -4,
"max_theta": 3,
"n_theta": len(var_name),
"model_optimizer": differential_evolution,
"model_fun_evals": 10_000,
"log_level": 50
})=X_start) spot_tuner.run(X_start
10.7.3 Results
= False
SAVE = False
LOAD
if SAVE:
= "res_" + experiment_name + ".pkl"
result_file_name with open(result_file_name, 'wb') as f:
pickle.dump(spot_tuner, f)
if LOAD:
= "res_ch10-friedman-hpt-0_maans03_60min_20init_1K_2023-04-14_10-11-19.pkl"
result_file_name with open(result_file_name, 'rb') as f:
= pickle.load(f) spot_tuner
- Show the Progress of the hyperparameter tuning:
=False, filename="../Figures.d/" + experiment_name+"_progress.pdf") spot_tuner.plot_progress(log_y
Print the results.
print(gen_design_table(fun_control=fun_control, spot=spot_tuner))
10.8 Show variable importance
=0.025, filename="../Figures.d/" + experiment_name+"_importance.pdf") spot_tuner.plot_importance(threshold
10.9 Get Default Hyperparameters
= get_default_values(fun_control)
values_default = transform_hyper_parameter_values(fun_control=fun_control, hyper_parameter_values=values_default)
values_default values_default
= make_pipeline(fun_control["prep_model"], fun_control["core_model"](**values_default))
model_default model_default
10.10 Get SPOT Results
= spot_tuner.to_all_dim(spot_tuner.min_X.reshape(1,-1))
X print(X)
= assign_values(X, fun_control["var_name"])
v_dict =v_dict, fun_control=fun_control) return_conf_list_from_var_dict(var_dict
= get_one_sklearn_model_from_X(X, fun_control)
model_spot model_spot
10.11 Plot: Compare Predictions
=["Default", "Spot"]) plot_roc([model_default, model_spot], fun_control, model_names
= "Default") plot_confusion_matrix(model_default, fun_control, title
="SPOT") plot_confusion_matrix(model_spot, fun_control, title
min(spot_tuner.y), max(spot_tuner.y)
10.12 Detailed Hyperparameter Plots
= "./figures/" + experiment_name
filename =filename) spot_tuner.plot_important_hyperparameter_contour(filename
10.13 Parallel Coordinates Plot
spot_tuner.parallel_plot()
10.14 Plot all Combinations of Hyperparameters
- Warning: this may take a while.
= False
PLOT_ALL if PLOT_ALL:
= spot_tuner.k
n for i in range(n-1):
for j in range(i+1, n):
=i, j=j, min_z=min_z, max_z = max_z) spot_tuner.plot_contour(i