# -*- coding: utf-8 -*-
import numpy as np
[docs]def revaluate_cat_variable(X):
"""
Revaluate Categorical variable
------------------------------
Parameters
----------
X : pandas DataFrame of shape (n_rows, n_columns)
Return
------
X : pandas DataFrame of shape (n_rows, n_columns)
"""
# check if shape greater than 1:
Y = X.copy()
if Y.shape[1]>1:
for i in range(X.shape[1]-1):
for j in range(i+1,X.shape[1]):
if X.iloc[:,i].dtype in ["object","category"] and X.iloc[:,j].dtype in ["object","category"]:
intersect = list(set(np.unique(X.iloc[:,i]).tolist()) & set(np.unique(X.iloc[:,j]).tolist()))
if len(intersect)>=1:
valuei = {x : X.columns.tolist()[i]+"_"+str(x) for x in np.unique(X.iloc[:,i]).tolist()}
valuej = {x : X.columns.tolist()[j]+"_"+str(x) for x in np.unique(X.iloc[:,j]).tolist()}
Y.iloc[:,i],Y.iloc[:,j] = X.iloc[:,i].map(valuei), X.iloc[:,j].map(valuej)
return Y