list | grep "spot[RiverPython]" pip
spotPython 0.3.4
spotRiver 0.0.93
Note: you may need to restart the kernel to use updated packages.
This document refers to the following software versions:
python
: 3.10.10list | grep "spot[RiverPython]" pip
spotPython 0.3.4
spotRiver 0.0.93
Note: you may need to restart the kernel to use updated packages.
spotPython
can be installed via pip. Alternatively, the source code can be downloaded from gitHub: https://github.com/sequential-parameter-optimization/spotPython.
!pip install spotPython
spotPython
from gitHub.# import sys
# !{sys.executable} -m pip install --upgrade build
# !{sys.executable} -m pip install --upgrade --force-reinstall spotPython
Before we consider the detailed experimental setup, we select the parameters that affect run time and the initial design size.
= 1
MAX_TIME = 5
INIT_SIZE = False ORIGINAL
import os
import copy
import socket
from datetime import datetime
from dateutil.tz import tzlocal
= datetime.now(tzlocal())
start_time = socket.gethostname().split(".")[0]
HOSTNAME = '18-svc-sklearn' + "_" + HOSTNAME + "_" + str(MAX_TIME) + "min_" + str(INIT_SIZE) + "init_" + str(start_time).split(".", 1)[0].replace(' ', '_')
experiment_name = experiment_name.replace(':', '-')
experiment_name print(experiment_name)
if not os.path.exists('./figures'):
'./figures') os.makedirs(
18-svc-sklearn_p040025_1min_5init_2023-07-06_02-13-55
import warnings
"ignore") warnings.filterwarnings(
fun_control
Dictionarytensorboard_path
to None
if you are working under Windows.from spotPython.utils.init import fun_control_init
= fun_control_init(task="classification",
fun_control ="runs/16_spot_hpt_sklearn_classification") tensorboard_path
import pandas as pd
if ORIGINAL == True:
= pd.read_csv('./data/VBDP/trainn.csv')
train_df = pd.read_csv('./data/VBDP/testt.csv')
test_df else:
= pd.read_csv('./data/VBDP/train.csv')
train_df # remove the id column
= train_df.drop(columns=['id']) train_df
from sklearn.preprocessing import OrdinalEncoder
= train_df.shape[0]
n_samples = train_df.shape[1] - 1
n_features = "prognosis"
target_column # Encoder our prognosis labels as integers for easier decoding later
= OrdinalEncoder()
enc = enc.fit_transform(train_df[[target_column]])
train_df[target_column] = [f"x{i}" for i in range(1, n_features+1)] + [target_column]
train_df.columns print(train_df.shape)
train_df.head()
(707, 65)
x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | ... | x56 | x57 | x58 | x59 | x60 | x61 | x62 | x63 | x64 | prognosis | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 |
1 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 7.0 |
2 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | ... | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 3.0 |
3 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 10.0 |
4 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 6.0 |
5 rows × 65 columns
The full data set train_df
64 features. The target column is labeled as prognosis
.
We split out a hold-out test set (25% of the data) so we can calculate an example MAP@K
import numpy as np
from sklearn.model_selection import train_test_split
= train_test_split(train_df.drop(target_column, axis=1), train_df[target_column],
X_train, X_test, y_train, y_test =42,
random_state=0.25,
test_size=train_df[target_column])
stratify= pd.DataFrame(np.hstack((X_train, np.array(y_train).reshape(-1, 1))))
train = pd.DataFrame(np.hstack((X_test, np.array(y_test).reshape(-1, 1))))
test = [f"x{i}" for i in range(1, n_features+1)] + [target_column]
train.columns = [f"x{i}" for i in range(1, n_features+1)] + [target_column]
test.columns print(train.shape)
print(test.shape)
train.head()
(530, 65)
(177, 65)
x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | ... | x56 | x57 | x58 | x59 | x60 | x61 | x62 | x63 | x64 | prognosis | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 |
1 | 0.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 |
2 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 |
3 | 1.0 | 1.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 6.0 |
4 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 5.0 |
5 rows × 65 columns
# add the dataset to the fun_control
"data": train_df, # full dataset,
fun_control.update({"train": train,
"test": test,
"n_samples": n_samples,
"target_column": target_column})
Data preprocesssing can be very simple, e.g., you can ignore it. Then you would choose the prep_model
“None”:
= None
prep_model "prep_model": prep_model}) fun_control.update({
A default approach for numerical data is the StandardScaler
(mean 0, variance 1). This can be selected as follows:
# prep_model = StandardScaler()
# fun_control.update({"prep_model": prep_model})
Even more complicated pre-processing steps are possible, e.g., the follwing pipeline:
# categorical_columns = []
# one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
# prep_model = ColumnTransformer(
# transformers=[
# ("categorical", one_hot_encoder, categorical_columns),
# ],
# remainder=StandardScaler(),
# )
algorithm
) and core_model_hyper_dict
The selection of the algorithm (ML model) that should be tuned is done by specifying the its name from the sklearn
implementation. For example, the SVC
support vector machine classifier is selected as follows:
add_core_model_to_fun_control(SVC, fun_control, SklearnHyperDict)
Other core_models are, e.g.,:
We will use the RandomForestClassifier
classifier in this example.
from sklearn.linear_model import RidgeCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.linear_model import ElasticNet
from spotPython.hyperparameters.values import add_core_model_to_fun_control
from spotPython.data.sklearn_hyper_dict import SklearnHyperDict
from spotPython.fun.hypersklearn import HyperSklearn
# core_model = RidgeCV
# core_model = GradientBoostingRegressor
# core_model = ElasticNet
# core_model = RandomForestClassifier
= SVC
core_model # core_model = LogisticRegression
# core_model = KNeighborsClassifier
# core_model = GradientBoostingClassifier
# core_model = HistGradientBoostingClassifier
=core_model,
add_core_model_to_fun_control(core_model=fun_control,
fun_control=SklearnHyperDict,
hyper_dict=None) filename
Now fun_control
has the information from the JSON file. The available hyperparameters are:
print(*fun_control["core_model_hyper_dict"].keys(), sep="\n")
C
kernel
degree
gamma
coef0
shrinking
probability
tol
cache_size
break_ties
hyper_dict
Hyperparameters for the Selected Algorithm aka core_model
Numeric and boolean values can be modified using the modify_hyper_parameter_bounds
method. For example, to change the tol
hyperparameter of the SVC
model to the interval [1e-3, 1e-2], the following code can be used:
modify_hyper_parameter_bounds(fun_control, "tol", bounds=[1e-3, 1e-2])
from spotPython.hyperparameters.values import modify_hyper_parameter_bounds
"probability", bounds=[1, 1]) modify_hyper_parameter_bounds(fun_control,
spotPython
provides functions for modifying the hyperparameters, their bounds and factors as well as for activating and de-activating hyperparameters without re-compilation of the Python source code. These functions were described in Section 12.6.
Factors can be modified with the modify_hyper_parameter_levels
function. For example, to exclude the sigmoid
kernel from the tuning, the kernel
hyperparameter of the SVC
model can be modified as follows:
modify_hyper_parameter_levels(fun_control, "kernel", ["linear", "rbf"])
The new setting can be controlled via:
fun_control["core_model_hyper_dict"]["kernel"]
from spotPython.hyperparameters.values import modify_hyper_parameter_levels
"kernel", ["rbf"]) modify_hyper_parameter_levels(fun_control,
Optimizers are described in Section 12.6.1.
accuracy
function.cross_entropy
function and evaluated with respect to a metric, for example, the accuracy
function.The loss function, that is usually used in deep learning for optimizing the weights of the net, is stored in the fun_control
dictionary as "loss_function"
.
There are two different types of metrics in spotPython
:
"metric_river"
is used for the river based evaluation via eval_oml_iter_progressive
."metric_sklearn"
is used for the sklearn based evaluation.We will consider multi-class classification metrics, e.g., mapk_score
and top_k_accuracy_score
.
In this multi-class classification example the machine learning algorithm should return the probabilities of the specific classes ("predict_proba"
) instead of the predicted values.
We set "predict_proba"
to True
in the fun_control
dictionary.
To select the MAPK metric, the following two entries can be added to the fun_control
dictionary:
"metric_sklearn": mapk_score"
"metric_params": {"k": 3}
.
Alternatively, other metrics for multi-class classification can be used, e.g.,: * top_k_accuracy_score or * roc_auc_score
The metric roc_auc_score
requires the parameter "multi_class"
, e.g.,
"multi_class": "ovr"
.
This is set in the fun_control
dictionary.
spotPython
performs a minimization, therefore, metrics that should be maximized have to be multiplied by -1. This is done by setting "weights"
to -1
.
from spotPython.utils.metrics import mapk_score
fun_control.update({"weights": -1,
"metric_sklearn": mapk_score,
"predict_proba": True,
"metric_params": {"k": 3},
})
"eval_holdout"
.
fun_control.update({"eval": "train_hold_out",
})
Instead of using the OOB-score, the classical cross validation can be used. The number of folds is set by the key "k_folds"
. For example, to use 5-fold cross validation, the key "k_folds"
is set to 5
. Uncomment the following line to use cross validation:
# fun_control.update({
# "eval": "train_cv",
# "k_folds": 10,
# })
# extract the variable types, names, and bounds
from spotPython.hyperparameters.values import (get_bound_values,
get_var_name,
get_var_type,)= get_var_type(fun_control)
var_type = get_var_name(fun_control)
var_name = get_bound_values(fun_control, "lower")
lower = get_bound_values(fun_control, "upper") upper
from spotPython.utils.eda import gen_design_table
print(gen_design_table(fun_control))
| name | type | default | lower | upper | transform |
|-------------|--------|-----------|----------|---------|-------------|
| C | float | 1.0 | 0.1 | 10 | None |
| kernel | factor | rbf | 0 | 0 | None |
| degree | int | 3 | 3 | 3 | None |
| gamma | factor | scale | 0 | 1 | None |
| coef0 | float | 0.0 | 0 | 0 | None |
| shrinking | factor | 0 | 0 | 1 | None |
| probability | factor | 0 | 1 | 1 | None |
| tol | float | 0.001 | 0.0001 | 0.01 | None |
| cache_size | float | 200.0 | 100 | 400 | None |
| break_ties | factor | 0 | 0 | 1 | None |
The objective function is selected next. It implements an interface from sklearn
’s training, validation, and testing methods to spotPython
.
from spotPython.fun.hypersklearn import HyperSklearn
= HyperSklearn().fun_sklearn fun
Spot
Optimizermax_time
).initi_size
, 20 points) is not considered.from spotPython.hyperparameters.values import get_default_hyperparameters_as_array
= get_default_hyperparameters_as_array(fun_control)
X_start X_start
array([[1.e+00, 0.e+00, 3.e+00, 0.e+00, 0.e+00, 0.e+00, 0.e+00, 1.e-03,
2.e+02, 0.e+00]])
import numpy as np
from spotPython.spot import spot
from math import inf
= spot.Spot(fun=fun,
spot_tuner = lower,
lower = upper,
upper = inf,
fun_evals = 1,
fun_repeats = MAX_TIME,
max_time = False,
noise = np.sqrt(np.spacing(1)),
tolerance_x = var_type,
var_type = var_name,
var_name = "y",
infill_criterion = 1,
n_points =123,
seed= 50,
log_level = False,
show_models= True,
show_progress= fun_control,
fun_control ={"init_size": INIT_SIZE,
design_control"repeats": 1},
={"noise": True,
surrogate_control"cod_type": "norm",
"min_theta": -4,
"max_theta": 3,
"n_theta": len(var_name),
"model_fun_evals": 10_000,
"log_level": 50
})=X_start) spot_tuner.run(X_start
spotPython tuning: -0.38345864661654133 [----------] 0.30%
spotPython tuning: -0.38345864661654133 [----------] 0.61%
spotPython tuning: -0.38345864661654133 [----------] 0.98%
spotPython tuning: -0.38345864661654133 [----------] 1.33%
spotPython tuning: -0.38345864661654133 [----------] 1.56%
spotPython tuning: -0.38345864661654133 [----------] 1.84%
spotPython tuning: -0.38345864661654133 [----------] 2.08%
spotPython tuning: -0.38345864661654133 [----------] 2.37%
spotPython tuning: -0.38345864661654133 [----------] 2.64%
spotPython tuning: -0.39473684210526316 [----------] 2.91%
spotPython tuning: -0.39473684210526316 [----------] 3.14%
spotPython tuning: -0.39473684210526316 [----------] 3.39%
spotPython tuning: -0.39473684210526316 [----------] 4.00%
spotPython tuning: -0.39473684210526316 [----------] 4.66%
spotPython tuning: -0.39473684210526316 [#---------] 5.34%
spotPython tuning: -0.39473684210526316 [#---------] 6.26%
spotPython tuning: -0.39473684210526316 [#---------] 7.59%
spotPython tuning: -0.39473684210526316 [#---------] 8.81%
spotPython tuning: -0.39473684210526316 [#---------] 9.54%
spotPython tuning: -0.39473684210526316 [#---------] 10.39%
spotPython tuning: -0.39473684210526316 [#---------] 11.20%
spotPython tuning: -0.39473684210526316 [#---------] 12.04%
spotPython tuning: -0.39473684210526316 [#---------] 12.89%
spotPython tuning: -0.39473684210526316 [#---------] 13.80%
spotPython tuning: -0.39473684210526316 [#---------] 14.90%
spotPython tuning: -0.39473684210526316 [##--------] 15.75%
spotPython tuning: -0.39473684210526316 [##--------] 16.42%
spotPython tuning: -0.39473684210526316 [##--------] 17.28%
spotPython tuning: -0.39473684210526316 [##--------] 18.47%
spotPython tuning: -0.39473684210526316 [##--------] 19.44%
spotPython tuning: -0.39473684210526316 [##--------] 20.52%
spotPython tuning: -0.39473684210526316 [##--------] 22.02%
spotPython tuning: -0.39473684210526316 [##--------] 23.52%
spotPython tuning: -0.39473684210526316 [##--------] 24.81%
spotPython tuning: -0.39473684210526316 [###-------] 26.10%
spotPython tuning: -0.39473684210526316 [###-------] 27.87%
spotPython tuning: -0.39473684210526316 [###-------] 29.17%
spotPython tuning: -0.39473684210526316 [###-------] 30.51%
spotPython tuning: -0.39473684210526316 [###-------] 31.96%
spotPython tuning: -0.39473684210526316 [###-------] 33.36%
spotPython tuning: -0.39473684210526316 [###-------] 34.69%
spotPython tuning: -0.39473684210526316 [####------] 35.99%
spotPython tuning: -0.39473684210526316 [####------] 37.16%
spotPython tuning: -0.39473684210526316 [####------] 39.15%
spotPython tuning: -0.39473684210526316 [####------] 40.66%
spotPython tuning: -0.39473684210526316 [####------] 42.02%
spotPython tuning: -0.39473684210526316 [####------] 43.36%
spotPython tuning: -0.39473684210526316 [#####-----] 45.00%
spotPython tuning: -0.39473684210526316 [#####-----] 46.30%
spotPython tuning: -0.39473684210526316 [#####-----] 47.85%
spotPython tuning: -0.39473684210526316 [#####-----] 49.33%
spotPython tuning: -0.39473684210526316 [#####-----] 51.05%
spotPython tuning: -0.39473684210526316 [#####-----] 52.60%
spotPython tuning: -0.39473684210526316 [#####-----] 54.10%
spotPython tuning: -0.39473684210526316 [######----] 55.53%
spotPython tuning: -0.39473684210526316 [######----] 56.97%
spotPython tuning: -0.39473684210526316 [######----] 58.36%
spotPython tuning: -0.39473684210526316 [######----] 59.84%
spotPython tuning: -0.39473684210526316 [######----] 64.75%
spotPython tuning: -0.39473684210526316 [#######---] 72.77%
spotPython tuning: -0.39473684210526316 [########--] 77.94%
spotPython tuning: -0.39473684210526316 [########--] 83.48%
spotPython tuning: -0.39473684210526316 [#########-] 89.59%
spotPython tuning: -0.39473684210526316 [#########-] 94.81%
spotPython tuning: -0.39473684210526316 [##########] 100.00% Done...
<spotPython.spot.spot.Spot at 0x2ab363970>
The textual output shown in the console (or code cell) can be visualized with Tensorboard as described in Section 12.9, see also the description in the documentation: Tensorboard.
After the hyperparameter tuning run is finished, the progress of the hyperparameter tuning can be visualized. The following code generates the progress plot from ?fig-progress.
=False,
spot_tuner.plot_progress(log_y="./figures/" + experiment_name+"_progress.png") filename
print(gen_design_table(fun_control=fun_control,
=spot_tuner)) spot
| name | type | default | lower | upper | tuned | transform | importance | stars |
|-------------|--------|-----------|---------|---------|----------------------|-------------|--------------|---------|
| C | float | 1.0 | 0.1 | 10.0 | 4.211117448021866 | None | 100.00 | *** |
| kernel | factor | rbf | 0.0 | 0.0 | 0.0 | None | 0.00 | |
| degree | int | 3 | 3.0 | 3.0 | 3.0 | None | 0.00 | |
| gamma | factor | scale | 0.0 | 1.0 | 1.0 | None | 100.00 | *** |
| coef0 | float | 0.0 | 0.0 | 0.0 | 0.0 | None | 0.00 | |
| shrinking | factor | 0 | 0.0 | 1.0 | 1.0 | None | 0.00 | |
| probability | factor | 0 | 1.0 | 1.0 | 1.0 | None | 0.00 | |
| tol | float | 0.001 | 0.0001 | 0.01 | 0.004278044656534419 | None | 0.00 | |
| cache_size | float | 200.0 | 100.0 | 400.0 | 319.49898598118955 | None | 0.93 | . |
| break_ties | factor | 0 | 0.0 | 1.0 | 1.0 | None | 0.00 | |
=0.025, filename="./figures/" + experiment_name+"_importance.png") spot_tuner.plot_importance(threshold
from spotPython.hyperparameters.values import get_default_values, transform_hyper_parameter_values
= get_default_values(fun_control)
values_default = transform_hyper_parameter_values(fun_control=fun_control, hyper_parameter_values=values_default)
values_default values_default
{'C': 1.0,
'kernel': 'rbf',
'degree': 3,
'gamma': 'scale',
'coef0': 0.0,
'shrinking': 0,
'probability': 0,
'tol': 0.001,
'cache_size': 200.0,
'break_ties': 0}
from sklearn.pipeline import make_pipeline
= make_pipeline(fun_control["prep_model"], fun_control["core_model"](**values_default))
model_default model_default
Pipeline(steps=[('nonetype', None), ('svc', SVC(break_ties=0, cache_size=200.0, probability=0, shrinking=0))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('nonetype', None), ('svc', SVC(break_ties=0, cache_size=200.0, probability=0, shrinking=0))])
None
SVC(break_ties=0, cache_size=200.0, probability=0, shrinking=0)
"probability": 1}) values_default.update({
= spot_tuner.to_all_dim(spot_tuner.min_X.reshape(1,-1))
X print(X)
[[4.21111745e+00 0.00000000e+00 3.00000000e+00 1.00000000e+00
0.00000000e+00 1.00000000e+00 1.00000000e+00 4.27804466e-03
3.19498986e+02 1.00000000e+00]]
from spotPython.hyperparameters.values import assign_values, return_conf_list_from_var_dict
= assign_values(X, fun_control["var_name"])
v_dict =v_dict, fun_control=fun_control) return_conf_list_from_var_dict(var_dict
[{'C': 4.211117448021866,
'kernel': 'rbf',
'degree': 3,
'gamma': 'auto',
'coef0': 0.0,
'shrinking': 1,
'probability': 1,
'tol': 0.004278044656534419,
'cache_size': 319.49898598118955,
'break_ties': 1}]
from spotPython.hyperparameters.values import get_one_sklearn_model_from_X
= get_one_sklearn_model_from_X(X, fun_control)
model_spot model_spot
SVC(C=4.211117448021866, break_ties=1, cache_size=319.49898598118955, gamma='auto', probability=1, shrinking=1, tol=0.004278044656534419)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(C=4.211117448021866, break_ties=1, cache_size=319.49898598118955, gamma='auto', probability=1, shrinking=1, tol=0.004278044656534419)
from spotPython.utils.convert import get_Xy_from_df
= get_Xy_from_df(fun_control["train"], fun_control["target_column"])
X_train, y_train = get_Xy_from_df(fun_control["test"], fun_control["target_column"])
X_test, y_test X_test.shape, y_test.shape
((177, 64), (177,))
model_spot.fit(X_train, y_train)= model_spot.predict_proba(X_test)
y_pred = mapk_score(y_true=y_test, y_pred=y_pred, k=3)
res res
0.38229755178907715
def repeated_eval(n, model):
= []
res_values for i in range(n):
model.fit(X_train, y_train)= model.predict_proba(X_test)
y_pred = mapk_score(y_true=y_test, y_pred=y_pred, k=3)
res
res_values.append(res)= np.mean(res_values)
mean_res print(f"mean_res: {mean_res}")
= np.std(res_values)
std_res print(f"std_res: {std_res}")
= np.min(res_values)
min_res print(f"min_res: {min_res}")
= np.max(res_values)
max_res print(f"max_res: {max_res}")
= np.median(res_values)
median_res print(f"median_res: {median_res}")
return mean_res, std_res, min_res, max_res, median_res
= repeated_eval(30, model_spot) _
mean_res: 0.376961707470182
std_res: 0.0036978472002569076
min_res: 0.3700564971751412
max_res: 0.3860640301318267
median_res: 0.37664783427495285
"svc"].probability = True
model_default["svc"] model_default.fit(X_train, y_train)[
SVC(break_ties=0, cache_size=200.0, probability=True, shrinking=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(break_ties=0, cache_size=200.0, probability=True, shrinking=0)
= model_default.predict_proba(X_test)
y_pred =y_test, y_pred=y_pred, k=3) mapk_score(y_true
0.3794726930320151
Since one single evaluation is not meaningful, we perform, similar to the evaluation of the SPOT results, \(n=30\) runs of the default setting and and calculate the mean and standard deviation of the performance metric.
= repeated_eval(30, model_default) _
mean_res: 0.384902699309479
std_res: 0.004668276878397637
min_res: 0.37476459510357824
max_res: 0.396421845574388
median_res: 0.3860640301318267
from spotPython.plot.validation import plot_confusion_matrix
= "Default") plot_confusion_matrix(model_default, fun_control, title
="SPOT") plot_confusion_matrix(model_spot, fun_control, title
min(spot_tuner.y), max(spot_tuner.y)
(-0.39473684210526316, -0.3370927318295739)
from spotPython.sklearn.traintest import evaluate_cv
fun_control.update({"eval": "train_cv",
"k_folds": 10,
})=model_spot, fun_control=fun_control, verbose=0) evaluate_cv(model
(0.3465408805031446, None)
fun_control.update({"eval": "test_cv",
"k_folds": 10,
})=model_spot, fun_control=fun_control, verbose=0) evaluate_cv(model
(0.3538671023965142, None)
fun_control.update({"eval": "data_cv",
"k_folds": 10,
})=model_spot, fun_control=fun_control, verbose=0) evaluate_cv(model
(0.3643393695506371, None)
= "./figures/" + experiment_name
filename =filename) spot_tuner.plot_important_hyperparameter_contour(filename
C: 100.0
gamma: 100.0
cache_size: 0.9342569567506037
spot_tuner.parallel_plot()
= False
PLOT_ALL if PLOT_ALL:
= spot_tuner.k
n for i in range(n-1):
for j in range(i+1, n):
=i, j=j, min_z=min_z, max_z = max_z) spot_tuner.plot_contour(i