import random
import types
import warnings
from lcc.entities.exceptions import InvalidOption
from lcc.entities.exceptions import QueryInputError
from lcc.stars_processing.stars_filter import StarsFilter
from lcc.stars_processing.tools.stats_manager import StatsManager
from lcc.utils.helpers import progressbar
import numpy as np
[docs]class ParamsEstimator(object):
'''
Attributes
----------
searched : list of `Star` objects
Searched stars
others : list of `Star` objects
Contamination stars
descriptors : list, iterable
Unconstructed descriptor objects
deciders : list, iterable
Decider instances
tuned_params : list of dicts
List of parameters to tune
static_params : dict
Constant values for descriptors and deciders
'''
def __init__(self, searched, others, descriptors, deciders, tuned_params,
split_ratio=0.5, static_params={}, **kwargs):
'''
Parameters
----------
searched : list
Searched stars
others : list
Contamination stars
descriptors : list, iterable
Unconstructed descriptors object
deciders : list, iterable
Unconstructed decider instances
tuned_params : list of dicts
List of parameters to tune
EXAMPLE
[{'AbbeValue' : {'bins' : 10, ..}, 'NeuronDecider' : {'hidden_layers': 2, ..}, .. ]
split_ratio : float
Percentage number of train sample
static_params : dict
Constant values for descriptors and deciders. Format is the
same one item of tuned_params
'''
random.shuffle(searched)
random.shuffle(others)
self.searched_train = searched[:int(len(searched) * split_ratio)]
self.searched_test = searched[int(len(searched) * split_ratio):]
self.others_train = others[:int(len(others) * split_ratio)]
self.others_test = others[int(len(others) * split_ratio):]
self.descriptors = descriptors
self.deciders = deciders
self.tuned_params = tuned_params
self.static_params = static_params
self.stats_list = None
self.filters = None
[docs] def evaluateCombinations(self):
"""
Evaluate all combination of the filter parameters
Returns
-------
list
Filters created from particular combinations
list
Statistical values of all combinations
list
Input parameters of all combinations
"""
filters = []
stats_list = []
i = 0
for tun_param in progressbar(self.tuned_params,
"Evaluating the combinations: "):
i += 1
stars_filter, stats = self.evaluate(tun_param)
stats_list.append(stats)
filters.append(stars_filter)
self.stats_list = stats_list
self.filters = filters
return stats_list, filters, self.tuned_params
[docs] def fit(self, score_func=None, opt="max", save_params={}):
"""
Find the best combination of the filter parameters
Parameters
----------
score_func : function
Function which takes dict of statistical values and return
a score
opt : str
Option for evaluating scores
"max" - Returns the highest score
"min" - Returns the lowerest score
save_params : dict
Parameters for saving outputs. For each output there are some
mandatory keys:
ROC plot:
"roc_plot_path"
"roc_plot_name"
"roc_plot_title" - optional
ROC data file:
"roc_data_path"
"roc_data_name"
"roc_data_delim" - optional
Statistical params of all combinations:
"stats_path"
"stats_name"
"stats_delim" - optional
Returns
-------
object
Filter created from the best parameters
dict
Statistical values of the best combination
dict
Input parameters of the best combination
"""
stats_list, filters, tuned_params = self.evaluateCombinations()
try:
self.saveOutput(save_params)
except Exception as e:
warnings.warn("\nError during saving outputs...:\n\t%s" % e)
scores = []
for stat in stats_list:
if not score_func:
score = stat.get("precision", 0)
else:
score = score_func(**stat)
scores.append(score)
if opt == "max":
best_id = np.argmax(scores)
elif opt == "min":
best_id = np.argmin(scores)
else:
raise InvalidOption("Available options are: 'max' or 'min'.")
self.best_id = best_id
return filters[best_id], stats_list[best_id], tuned_params[best_id]
[docs] def evaluate(self, combination):
"""
Parameters
----------
combination : dict
Dictionary of dictionaries - one per a descriptor.
EXAMPLE
{'AbbeValue': {'bin':10, .. }, .. }
Returns
-------
tuple
Stars filter, statistical values
"""
descriptors = []
deciders = []
n = len(self.descriptors)
for i, des in enumerate(self.descriptors + self.deciders):
try:
static_params = self.static_params.get(des.__name__, {})
_params = combination.get(des.__name__, {})
params = _params.copy()
params.update(static_params)
if i < n:
descriptors.append(des(**params))
else:
deciders.append(des(**params))
except TypeError:
raise QueryInputError("Not enough parameters to construct constructor {0}\nGot: {1}".format(
des.__name__, params))
stars_filter = StarsFilter(descriptors, deciders)
stars_filter.learn(self.searched_train, self.others_train)
stat = stars_filter.getStatistic(self.searched_test, self.others_test)
return stars_filter, stat
[docs] def saveOutput(self, save_params):
"""
Parameters
----------
save_params : dict
Parameters for saving outputs. For each output there are some
mandatory keys:
ROC plot:
"roc_plot_path"
"roc_plot_name"
"roc_plot_title" - optional
ROC data file:
"roc_data_path"
"roc_data_name"
"roc_data_delim" - optional
Statistical params of all combinations:
"stats_path"
"stats_name"
"stats_delim" - optional
"""
to_save = self._prepareStatus(self.stats_list, self.tuned_params)
self.stats = to_save
man = StatsManager(to_save)
if "roc_plot_path" in save_params and "roc_plot_name" in save_params:
man.plotROC(save=True,
title=save_params.get("roc_plot_title", "ROC"),
path=save_params.get("roc_plot_path"),
file_name=save_params.get("roc_plot_name"))
if "roc_data_path" in save_params and "roc_data_name" in save_params:
man.saveROCfile(path=save_params.get("roc_data_path"),
file_name=save_params.get("roc_data_name"),
delim=save_params.get("stats_delim", "\t"))
if "stats_path" in save_params and "stats_name" in save_params:
man.saveStats(path=save_params.get("stats_path"),
file_name=save_params.get("stats_name"),
delim=save_params.get("stats_delim", "\t"),
overwrite=True)
def _prepareStatus(self, stats_list, tuned_params):
result = []
for st, tun in zip(stats_list, tuned_params):
x = st.copy()
unpacked_tun = self._mergeTwoDict(st, tun)
x.update(unpacked_tun)
result.append(x)
return result
def _mergeTwoDict(self, stat, tun):
unpacked_tun = []
for prefix, inner_dict in tun.iteritems():
for key, value in inner_dict.iteritems():
if hasattr(value, "__iter__"):
# if len(value) > 0 and not isinstance(value[0], types.InstanceType):
# unpacked_tun.append((key,value))
#
pass
elif not isinstance(value, types.InstanceType):
unpacked_tun.append((":".join([prefix, key]), value))
return unpacked_tun