Source code for lcc.stars_processing.stars_filter

from __future__ import division

import warnings

from lcc.entities.exceptions import QueryInputError
from lcc.utils.commons import check_attribute
from lcc.utils.helpers import getMeanDict
import numpy as np
import pandas as pd


[docs]class StarsFilter(object): """ This class is responsible for filtering stars according to given filters (their own implementation of filtering) Attributes ---------- descriptors : list Descriptor objects decider : list Decider object learned : bool It is True after executing the learning searched_coords : list Parameters space coordinates (got from descriptors) of searched objects others_coords : list Parameters space coordinates (got from descriptors) of contamination objects """ def __init__(self, descriptors, deciders): """ Parameters ---------- descriptors : list Descriptor objects decider :list Decider objects """ self.descriptors = descriptors if not isinstance(deciders, (list, tuple)): deciders = [deciders] self.deciders = deciders if not deciders: warnings.warn("There are no deciders!") if not descriptors: warnings.warn("There are no descriptors!") self.learned = False self.searched_coords = [] self.others_coords = [] def __str__(self, *args, **kwargs): txt = "Descriptors: " + \ ", ".join([desc.__class__.__name__ for desc in self.descriptors]) txt += "\nDeciders: " + \ ", ".join([dec.__class__.__name__ for dec in self.deciders]) if self.learned: txt += "\nStar filter is learned\n" txt += "It was trained on the sample of %i searched and %i contamination objects" % ( len(self.searched_coords), len(self.others_coords)) else: txt += "\nStar filter is not learned" return txt @check_attribute("learned", True, "raise")
[docs] def filterStars(self, stars, pass_method="all"): ''' Apply all deciders Parameters ---------- stars : list, iterable Star objects to be filtered pass_method : str Inspected star pass if it fulfill the selected condition. Methods for filtering: all - all probabilities have to be greater then the treshold mean - mean probability has to be greater then the treshold one - at least one has to be greater then the treshold Returns ------- list of `Star`s Stars which passed thru filtering ''' stars_coords = self.getSpaceCoordinates(stars) treshold = np.mean([dec.treshold for dec in self.deciders]) if pass_method == "all": probabilities = self.evaluateCoordinates(stars_coords, "lowest") elif pass_method == "mean": probabilities = self.evaluateCoordinates(stars_coords, "mean") elif pass_method == "one": probabilities = self.evaluateCoordinates(stars_coords, "highest") else: raise QueryInputError("Invalid filtering method") return [stars[i] for i, probab in enumerate(probabilities) if probab >= treshold]
[docs] def learnOnCoords(self, searched_coords, others_coords): """ Train deciders on given sample of coordinates Parameters ---------- searched_coords : pandas.DataFram, list, tuple Sample of searched coordinates others_coords : pandas.DataFram, list, tuple Contamination sample of coordinates Returns ------- None """ if (isinstance(searched_coords, pd.DataFrame) and isinstance(others_coords, pd.DataFrame)): searched_coords_data = searched_coords.values.tolist() others_coords_data = others_coords.values.tolist() df = True else: searched_coords_data = list(searched_coords) others_coords_data = list(others_coords) df = False for decider in self.deciders: decider.learn(searched_coords.values, others_coords.values) self.learned = True self.searched_coords = searched_coords self.others_coords = others_coords
[docs] def learn(self, searched, others): """ Train deciders on given sample of `Star` objects Parameters ---------- searched : list, tuple Sample of searched group of stars others : list, tuple Contamination sample of stars Returns ------- None """ self.learnOnCoords( self.getSpaceCoordinates(searched), self.getSpaceCoordinates(others))
[docs] def getSpaceCoordinates(self, stars): """ Get params space coordinates according to descriptors Parameters ---------- stars : list, tuple List of `Star` objects Returns ------- pandas.DataFrame Coordinates of the stars as pandas DataFrame """ space_coordinates = [] labels = [] for star in stars: coords = self._getSpaceCoordinates(star) if coords: space_coordinates.append(coords) labels.append(star.name) else: warnings.warn("Not all space coordinates have been obtained") desc_labels = [] for desc in self.descriptors: if hasattr(desc.LABEL, "__iter__"): desc_labels += desc.LABEL else: desc_labels.append(desc.LABEL) df_coords = pd.DataFrame( space_coordinates, columns=desc_labels, index=labels) df_coords.fillna(np.NaN) df_coords.dropna(inplace=True) return df_coords
[docs] def evaluateStars(self, stars, meth="mean"): """ Get probabilities of membership of inspected stars Parameters ---------- stars : list Star objects meth : str Method for filtering: mean - mean probability highest - highest probability lowest - lowest probability Returns ------- list Probabilities of membership according to selected the method """ stars_coords = self.getSpaceCoordinates(stars) return self.evaluateCoordinates(stars_coords, meth)
@check_attribute("learned", True, "raise")
[docs] def evaluateCoordinates(self, stars_coords, meth="mean"): ''' Get probability of membership calculated from all deciders Parameters ---------- stars_coords : list, iterable List of coordinates (lists) meth : str Method for filtering: mean - mean probability highest - highest probability lowest - lowest probability Returns ------- list Probabilities of membership according to selected the method ''' decisions = [] for decider in self.deciders: decisions.append(decider.evaluate(stars_coords)) if meth == "mean": return [round(np.mean(coo), 2) for coo in np.array(decisions).T] elif meth == "highest": return [round(np.max(coo), 2) for coo in np.array(decisions).T] elif meth == "lowest": return [round(np.min(coo), 2) for coo in np.array(decisions).T] else: raise QueryInputError( "Invalid method for calculating membership probability")
@check_attribute("learned", True, "raise")
[docs] def getStatistic(self, s_stars, c_stars, treshold=None): """ Parameters ---------- s_stars : list of `Star` objects Searched stars c_stars : list of `Star` objects Contamination stars treshold : float Treshold value for filtering (number from 0 to 1) Returns ------- statistic information : dict precision (float) True positive / (true positive + false positive) true_positive_rate (float) Proportion of positives that are correctly identified as such true_negative_rate :(float) Proportion of negatives that are correctly identified as such false_positive_rate (float) Proportion of positives that are incorrectly identified as negatives false_negative_rate (float) Proportion of negatives that are incorrectly identified as positives """ searched_stars_coords = self.getSpaceCoordinates(s_stars).values contamination_stars_coords = self.getSpaceCoordinates(c_stars).values return getMeanDict([decider.getStatistic(searched_stars_coords, contamination_stars_coords, treshold) for decider in self.deciders])
def _getSpaceCoordinates(self, star): space_coordinate = [] for descriptor in self.descriptors: _coo = descriptor.getSpaceCoords([star]) if _coo: coo = _coo[0] if hasattr(coo, "__iter__"): space_coordinate += coo else: space_coordinate.append(coo) else: return False return space_coordinate