Source code for discrimintools.get_candisc

# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd

[docs]def get_candisc_ind(self): """ Extract the results for individuals - CANDISC ----------------------------------------- Parameters ---------- self : an object of class CANDISC Returns ------- a dictionary of dataframes containing all the results for the active individuals including: - coord : coordinates for the individuals Author(s) --------- Duvérier DJIFACK ZEBAZE duverierdjifack@gmail.com """ if self.model_ != "candisc": raise TypeError("'self' must be an object of class CANDISC.") return self.ind_
[docs]def get_candisc_var(self,choice="correlation"): """ Extract the results for variables - CANDISC ------------------------------------------- Parameters ---------- self : an object of class CANDISC choice : the element to subset from the output. Allowed values are "correlation" (for canonical correlation) or "covariance" (for covariance). Returns ------- a dictionary of dataframes containings all the results for the variables Author(s) --------- Duvérier DJIFACK ZEBAZE duverierdjifack@gmail.com """ if self.model_ != "candisc": raise TypeError("'self' must be an object of class CANDISC") if choice not in ["correlation","covariance"]: raise ValueError("'choice' should be one of 'correlation', 'covariance'") if choice == "correlation": return self.corr_ elif choice == "covariance": return self.cov_
[docs]def get_candisc_coef(self,choice="absolute"): """ Extract coefficients - CANDISC ------------------------------ Parameters ---------- self : an object of class CANDISC choice : the element to subset from the output. Allowed values are "absolute" (for canonical coefficients) or "score" (for class coefficients) Returns ------- a pandas dataframe containing coefficients Author(s) --------- Duvérier DJIFACK ZEBAZE duverierdjifack@gmail.com """ if self.model_ != "candisc": raise TypeError("'self' must be an object of class CANDISC") if choice == "absolute": return pd.concat((self.coef_,self.intercept_.to_frame().T),axis=0) elif choice == "score": return pd.concat((self.score_coef_,self.score_intercept_),axis=0)
[docs]def get_candisc(self,choice = "ind"): """ Extract the results - CANDISC ----------------------------- Parameters ---------- self : an object of class CANDISC choice : Returns ------- a dictionary or a pandas dataframe Author(s) --------- Duvérier DJIFACK ZEBAZE duverierdjifack@gmail.com """ if self.model_ != "candisc": raise TypeError("'self' must be an object of class CANDISC") if choice not in ["ind","correlation","covariance","absolute","score"]: raise ValueError("'choice' should be one of 'ind', 'correlation', 'covariance', 'absolute', 'score'") if choice == "ind": return get_candisc_ind(self) elif choice in ["correlation","covariance"]: return get_candisc_var(self,choice=choice) elif choice in ["absolute","score"]: return get_candisc_coef(self,choice=choice)
[docs]def summaryCANDISC(self,digits=3,nb_element=10,ncp=3,to_markdown=False,tablefmt = "pipe",**kwargs): """ Printing summaries of Canonical Discriminant Analysis model ----------------------------------------------------------- Parameters ---------- self : an object of class CANDISC digits : int, default=3. Number of decimal printed nb_element : int, default = 10. Number of element ncp : int, default = 3. Number of componennts to_markdown : Print DataFrame in Markdown-friendly format. tablefmt : Table format. For more about tablefmt, see : https://pypi.org/project/tabulate/ **kwargs : These parameters will be passed to tabulate. Author(s) --------- Duvérier DJIFACK ZEBAZE duverierdjifack@gmail.com """ if self.model_ != "candisc": raise ValueError("'self' must be an object of class CANDISC") # Define number of components ncp = min(ncp,self.call_["n_components"]) nb_element = min(nb_element,self.call_["X"].shape[0]) ind = get_candisc(self,choice="ind") vcorr = get_candisc(self,choice="correlation") coef = get_candisc_coef(self,choice="absolute").round(decimals=digits) score_coef = get_candisc_coef(self,choice="score").round(decimals=digits) # Partial Principal Components Analysis Results print(" Canonical Discriminant Analysis - Results \n") print("\nSummary Information\n") summary = self.summary_information_ if to_markdown: print(summary.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(summary) print("\nClass Level information\n") class_level_infos = self.statistics_["information"] if to_markdown: print(class_level_infos.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(class_level_infos) # Add eigenvalues informations print("\nImportance of components") eig = self.eig_.T.round(decimals=digits) eig.index = ["Variance","Difference","% of var.","Cumulative % of var."] if to_markdown: print(eig.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(eig) print("\nTest of H0: The canonical correlations in the current row and all that follow are zero\n") lrt_test = self.statistics_["likelihood_test"].round(decimals=digits) if to_markdown: print(lrt_test.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(lrt_test) print("\nGroup means:\n") gmean = self.classes_["mean"] gmean.index.name = None gmean = gmean.T.round(decimals=digits) if to_markdown: print(gmean.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(gmean) print("\nCoefficients of canonical discriminants:\n") if to_markdown: print(coef.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(coef) print("\nClassification functions coefficients:\n") if to_markdown: print(score_coef.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(score_coef) # Add individuals informations if self.ind_["coord"].shape[0]>nb_element: print(f"\nIndividuals (the {nb_element} first)\n") else: print("\nIndividuals\n") ind_infos = ind["coord"].iloc[:nb_element,:].round(decimals=digits) if to_markdown: print(ind_infos.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(ind_infos) # Add variables informations if self.corr_["total"].shape[0]>nb_element: print(f"\nCorrelations between Canonical and Original Variables (the {nb_element} first)\n") else: print("\nCorrelations between Canonical and Original Variables\n") var_infos = pd.DataFrame().astype("float") for i in np.arange(0,ncp,1): tcorr = vcorr["total"].iloc[:,i] tcorr.name ="total."+str(i+1) bcorr = vcorr["between"].iloc[:,i] bcorr.name ="between."+str(i+1) wcorr = vcorr["within"].iloc[:,i] wcorr.name ="within."+str(i+1) var_infos = pd.concat([var_infos,tcorr,bcorr,wcorr],axis=1) var_infos = var_infos.round(decimals=digits) if to_markdown: print(var_infos.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(var_infos) print("\nClass Means on Canonical Variables\n") gcoord = self.classes_["coord"].round(decimals=digits) if to_markdown: print(gcoord.to_markdown(tablefmt=tablefmt,**kwargs)) else: print(gcoord)