= 1
MAX_TIME = 5
INIT_SIZE = "cpu" # "cuda:0" DEVICE
12 Hyperparameter Tuning: PyTorch wth cifar10 Data
import pickle
import socket
from datetime import datetime
from dateutil.tz import tzlocal
= datetime.now(tzlocal())
start_time = socket.gethostname().split(".")[0]
HOSTNAME = '12-torch' + "_" + HOSTNAME + "_" + str(MAX_TIME) + "min_" + str(INIT_SIZE) + "init_" + str(start_time).split(".", 1)[0].replace(' ', '_')
experiment_name = experiment_name.replace(':', '-')
experiment_name experiment_name
This notebook exemplifies hyperparameter tuning with SPOT (spotPython). The hyperparameter software SPOT was developed in R (statistical programming language), see Open Access book “Hyperparameter Tuning for Machine and Deep Learning with R - A Practical Guide”, available here: https://link.springer.com/book/10.1007/978-981-19-5170-1.
list | grep "spot[RiverPython]" pip
# import sys
# !{sys.executable} -m pip install --upgrade build
# !{sys.executable} -m pip install --upgrade --force-reinstall spotPython
from tabulate import tabulate
import copy
import warnings
import numbers
import json
import calendar
import math
import datetime as dt
import numpy as np
from math import inf
import pandas as pd
from scipy.optimize import differential_evolution
import matplotlib.pyplot as plt
from functools import partial
import torch
from torch import nn
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import random_split
from torchvision import datasets
import torchvision
import torchvision.transforms as transforms
from torchvision.transforms import ToTensor
from spotPython.spot import spot
from spotPython.hyperparameters.values import (
add_core_model_to_fun_control,
assign_values,
convert_keys,
get_bound_values,
get_default_hyperparameters_for_core_model,
get_default_values,
get_dict_with_levels_and_types,
get_values_from_dict,
get_var_name,
get_var_type,
iterate_dict_values,
modify_hyper_parameter_levels,
modify_hyper_parameter_bounds,
replace_levels_with_positions,
return_conf_list_from_var_dict,
get_one_core_model_from_X,
transform_hyper_parameter_values,
get_dict_with_levels_and_types,
convert_keys,
iterate_dict_values,
)
from spotPython.torch.traintest import evaluate_cv, evaluate_hold_out
from spotPython.utils.convert import class_for_name
from spotPython.utils.eda import (
get_stars,
gen_design_table)from spotPython.utils.transform import transform_hyper_parameter_values
from spotPython.utils.convert import get_Xy_from_df
from spotPython.utils.init import fun_control_init
from spotPython.plot.validation import plot_cv_predictions, plot_roc, plot_confusion_matrix
from spotPython.data.torch_hyper_dict import TorchHyperDict
from spotPython.fun.hypertorch import HyperTorch
"ignore")
warnings.filterwarnings(
from spotPython.torch.netcifar10 import Net_CIFAR10
print(torch.__version__)
# Check that MPS is available
if not torch.backends.mps.is_available():
if not torch.backends.mps.is_built():
print("MPS not available because the current PyTorch install was not "
"built with MPS enabled.")
else:
print("MPS not available because the current MacOS version is not 12.3+ "
"and/or you do not have an MPS-enabled device on this machine.")
else:
= torch.device("mps")
mps_device print("MPS device: ", mps_device)
12.1 0. Initialization of the Empty fun_control
Dictionary
= fun_control_init(task="classification") fun_control
12.2 1. Load Data Cifar10 Data
def load_data(data_dir="./data"):
= transforms.Compose([
transform
transforms.ToTensor(),0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
transforms.Normalize((
])
= torchvision.datasets.CIFAR10(
trainset =data_dir, train=True, download=True, transform=transform)
root
= torchvision.datasets.CIFAR10(
testset =data_dir, train=False, download=True, transform=transform)
root
return trainset, testset
= load_data()
train, test train.data.shape, test.data.shape
= len(train)
n_samples # add the dataset to the fun_control
"data": None, # dataset,
fun_control.update({"train": train,
"test": test,
"n_samples": n_samples,
"target_column": None})
12.3 2. Specification of the Preprocessing Model
# categorical_columns = []
# one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
# prep_model = ColumnTransformer(
# transformers=[
# ("categorical", one_hot_encoder, categorical_columns),
# ],
# remainder=StandardScaler(),
# )
= None
prep_model "prep_model": prep_model}) fun_control.update({
12.4 3. Select algorithm
and core_model_hyper_dict
Our implementation is based on the Section “Configurable neural network” in the PyTorch tutorial Hyperparameter Tuning with Ray Tune is used here. spotPython
implements a class which is similar to the class described in the PyTorch tutorial. The class is called Net_CIFAR10
and is implemented in the file netcifar10.py
. The class is imported here.
Note: In addition to the class Net from the PyTorch tutorial, the class Net_CIFAR10 has additional attributes, namely:
- learning rate (
lr
), - batchsize (
batch_size
), - epochs (
epochs
), and - k_folds (
k_folds
).
Further attributes can be easily added to the class, e.g., optimizer
or loss_function
.
= Net_CIFAR10
core_model = add_core_model_to_fun_control(core_model=core_model,
fun_control =fun_control,
fun_control=TorchHyperDict,
hyper_dict=None) filename
12.4.1 The hyper_dict
Hyperparameters for the Selected Algorithm
spotPython
uses simple JSON
files for the specification of the hyperparameters. The JSON
file for the core_model
is called torch_hyper_dict.json
. The corresponding entries for the Net_CIFAR10
class are shown below.
{"Net_CIFAR10":
{
"l1": {
"type": "int",
"default": 5,
"transform": "transform_power_2_int",
"lower": 2,
"upper": 9},
"l2": {
"type": "int",
"default": 5,
"transform": "transform_power_2_int",
"lower": 2,
"upper": 9},
"lr": {
"type": "float",
"default": 1e-03,
"transform": "None",
"lower": 1e-05,
"upper": 1e-02},
"batch_size": {
"type": "int",
"default": 4,
"transform": "transform_power_2_int",
"lower": 1,
"upper": 4},
"epochs": {
"type": "int",
"default": 3,
"transform": "transform_power_2_int",
"lower": 1,
"upper": 4},
"k_folds": {
"type": "int",
"default": 2,
"transform": "None",
"lower": 2,
"upper": 3}
}
}
Each entry in the JSON
file represents one hyperparameter with the following structure: type
, default
, transform
, lower
, and upper
.
12.4.2 Categorical Hyperparameters
In contrast to Ray Tune, spotPython
can handle numerical, boolean, and categorical hyperparameters. Since Ray Tune
does not tune categorical hyperparameters, they are not used here. However, they can be specified in the JSON
file in a similar way as the numerical hyperparameters as shown below:
"factor_hyperparameter": {
"levels": ["A", "B", "C"],
"type": "factor",
"default": "B",
"transform": "None",
"core_model_parameter_type": "str",
"lower": 0,
"upper": 2},
12.5 4. Modify hyper_dict
Hyperparameters for the Selected Algorithm aka core_model
After specifying the model, the corresponding hyperparameters, their types and bounds are loaded from the JSON
file torch_hyper_dict.json
. After loading, the user can modify the hyperparameters, e.g., the bounds. spotPython
provides a clever rule for de-activating hyperparameters. If the lower and the upper bound are set to identical values, the hyperparameter is de-activated. This is useful for the hyperparameter tuning, because it allows to specify a hyperparameter in the JSON
file, but to de-activate it in the fun_control
dictionary. This is done in the next step.
12.5.1 Modify hyperparameter of type numeric and integer (boolean)
Since the hyperparameter k_folds
is not used in the PyTorch tutorial, it is de-activated here by setting the lower and upper bound to the same value.
# fun_control = modify_hyper_parameter_bounds(fun_control, "delta", bounds=[1e-10, 1e-6])
# fun_control = modify_hyper_parameter_bounds(fun_control, "min_samples_split", bounds=[3, 20])
#fun_control = modify_hyper_parameter_bounds(fun_control, "merit_preprune", bounds=[0, 0])
# fun_control["core_model_hyper_dict"]
= modify_hyper_parameter_bounds(fun_control, "k_folds", bounds=[2, 2]) fun_control
12.5.2 Modify hyperparameter of type factor
In a similar manner as for the numerical hyperparameters, the categorical hyperparameters can be modified. For example, the hyperparameter leaf_model
is de-activated here by choosing only one value "LinearRegression"
.
# fun_control = modify_hyper_parameter_levels(fun_control, "leaf_model", ["LinearRegression"])
# fun_control["core_model_hyper_dict"]
12.6 5. Selection of the Objective (Loss) Function
from torch.nn import CrossEntropyLoss
= CrossEntropyLoss()
loss_function "loss_function": loss_function}) fun_control.update({
In addition to the loss functions, spotPython
provides access to a large number of metrics.
- The key
"metric_sklearn"
is used for metrics that follow thescikit-learn
conventions. - The key
"river_metric"
is used for the river based evaluation (Montiel et al. 2021) viaeval_oml_iter_progressive
, and - the key
"metric_torch"
is used for the metrics fromTorchMetrics
.
TorchMetrics
is a collection of more than 90 PyTorch metrics1.
Because the PyTorch
tutorial uses the accuracy as metric, we use the same metric here. Currently, accuracy is computed in the tutorial’s example code. We will use TorchMetrics
instead, because it offers more flexibilty, e.g., it can be used for regression and classification. Furthermore, TorchMetrics
offers the following advantages:
- A standardized interface to increase reproducibility
- Reduces Boilerplate
- Distributed-training compatible
- Rigorously tested
- Automatic accumulation over batches
- Automatic synchronization between multiple devices
Therefore, we set
import torchmetrics
= torchmetrics.Accuracy(task="multiclass", num_classes=10) metric_torch
Important:
spotPython
performs minimization by default.- If accuracy should be maximized, then the objective function has to be multiplied by -1. Therefore,
weights
is set to -1 in this case.
= CrossEntropyLoss()
loss_function = 1.0
weights = torchmetrics.Accuracy(task="multiclass", num_classes=10)
metric_torch = True
shuffle eval = "train_hold_out"
= DEVICE
device = 100_000
show_batch_interval ="torch_model.pt"
path
fun_control.update({"data_dir": None,
"checkpoint_dir": None,
"horizon": None,
"oml_grace_period": None,
"weights": weights,
"step": None,
"log_level": 50,
"weight_coeff": None,
"metric_torch": metric_torch,
"metric_river": None,
"metric_sklearn": None,
"loss_function": loss_function,
"shuffle": shuffle,
"eval": eval,
"device": device,
"show_batch_interval": show_batch_interval,
"path": path,
})
12.7 6. Calling the SPOT Function
12.7.1 Prepare the SPOT Parameters
- Get types and variable names as well as lower and upper bounds for the hyperparameters.
= get_var_type(fun_control)
var_type = get_var_name(fun_control)
var_name "var_type": var_type,
fun_control.update({"var_name": var_name})
= get_bound_values(fun_control, "lower")
lower = get_bound_values(fun_control, "upper") upper
print(gen_design_table(fun_control))
The objective function fun_torch
is selected next. It implements an interface from PyTorch
’s training, validation, and testing methods to spotPython
.
from spotPython.fun.hypertorch import HyperTorch
= HyperTorch().fun_torch fun
12.7.2 Run the Spot
Optimizer
- Run SPOT for approx. x mins (
max_time
). - Note: the run takes longer, because the evaluation time of initial design (here:
initi_size
, 20 points) is not considered.
from spotPython.hyperparameters.values import get_default_hyperparameters_as_array
=TorchHyperDict().load()
hyper_dict= get_default_hyperparameters_as_array(fun_control, hyper_dict)
X_start X_start
= spot.Spot(fun=fun,
spot_tuner = lower,
lower = upper,
upper = inf,
fun_evals = 1,
fun_repeats = MAX_TIME,
max_time = False,
noise = np.sqrt(np.spacing(1)),
tolerance_x = var_type,
var_type = var_name,
var_name = "y",
infill_criterion = 1,
n_points =123,
seed= 50,
log_level = False,
show_models= True,
show_progress= fun_control,
fun_control ={"init_size": INIT_SIZE,
design_control"repeats": 1},
={"noise": True,
surrogate_control"cod_type": "norm",
"min_theta": -4,
"max_theta": 3,
"n_theta": len(var_name),
"model_optimizer": differential_evolution,
"model_fun_evals": 10_000,
"log_level": 50
})=X_start) spot_tuner.run(X_start
12.7.3 4 Results
= False
SAVE = False
LOAD
if SAVE:
= "res_" + experiment_name + ".pkl"
result_file_name with open(result_file_name, 'wb') as f:
pickle.dump(spot_tuner, f)
if LOAD:
= "res_ch10-friedman-hpt-0_maans03_60min_20init_1K_2023-04-14_10-11-19.pkl"
result_file_name with open(result_file_name, 'rb') as f:
= pickle.load(f) spot_tuner
- Show the Progress of the hyperparameter tuning:
spot_tuner.y
=False, filename="../Figures.d/" + experiment_name+"_progress.pdf") spot_tuner.plot_progress(log_y
- Print the Results
print(gen_design_table(fun_control=fun_control, spot=spot_tuner))
12.8 Show variable importance
=0.025, filename="../Figures.d/" + experiment_name+"_importance.pdf") spot_tuner.plot_importance(threshold
12.9 Get Default Hyperparameters
= get_default_values(fun_control)
values_default = transform_hyper_parameter_values(fun_control=fun_control, hyper_parameter_values=values_default)
values_default values_default
= fun_control["core_model"](**values_default)
model_default model_default
12.10 Get SPOT Results
The architecture of the spotPython
model can be obtained by the following code:
from spotPython.hyperparameters.values import get_one_core_model_from_X
= spot_tuner.to_all_dim(spot_tuner.min_X.reshape(1,-1))
X = get_one_core_model_from_X(X, fun_control)
model_spot model_spot
12.11 Evaluation of the Tuned Architecture
The method train_tuned
takes a model architecture without trained weights and trains this model with the train data. The train data is split into train and validation data. The validation data is used for early stopping. The trained model weights are saved as a dictionary.
from spotPython.torch.traintest import (
train_tuned,
test_tuned,
)=model_default, train_dataset=train, shuffle=True,
train_tuned(net=fun_control["loss_function"],
loss_function=fun_control["metric_torch"],
metric= DEVICE, show_batch_interval=1_000_000,
device =None,
path=fun_control["task"],)
task
=model_default, test_dataset=test,
test_tuned(net=fun_control["loss_function"],
loss_function=fun_control["metric_torch"],
metric=False,
shuffle= DEVICE,
device =fun_control["task"],) task
The following code trains the model model_spot
. If path
is set to a filename, e.g., path = "model_spot_trained.pt"
, the weights of the trained model will be saved to this file. If path
is set to a filename, e.g., path = "model_spot_trained.pt"
, the weights of the trained model will be loaded from this file.
=model_spot, train_dataset=train,
train_tuned(net=fun_control["loss_function"],
loss_function=fun_control["metric_torch"],
metric=True,
shuffle= DEVICE,
device =None,
path=fun_control["task"],)
task#| echo: true
=model_spot, test_dataset=test,
test_tuned(net=False,
shuffle=fun_control["loss_function"],
loss_function=fun_control["metric_torch"],
metric= DEVICE,
device =fun_control["task"],) task
12.12 Detailed Hyperparameter Plots
= "./figures/" + experiment_name
filename =filename) spot_tuner.plot_important_hyperparameter_contour(filename
12.13 Parallel Coordinates Plot
spot_tuner.parallel_plot()
12.14 Plot all Combinations of Hyperparameters
- Warning: this may take a while.
= False
PLOT_ALL if PLOT_ALL:
= spot_tuner.k
n for i in range(n-1):
for j in range(i+1, n):
=i, j=j, min_z=min_z, max_z = max_z) spot_tuner.plot_contour(i