Module FamaFrench
Expand source code
import numpy as np
import pandas as pd
import QueryWRDS
import datetime
import pathlib
from shutil import get_terminal_size
import py_functions
import itertools
from tqdm import tqdm
import functools
from pandas.tseries.offsets import *
from cprint import *
# set printing options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', get_terminal_size()[0])
pd.set_option('display.float_format', lambda x: '%.5f' % x)
# suppress chained assignment warning
pd.options.mode.chained_assignment = None
class FamaFrench:
# TODO (1) Error Checking
# (2) Industry Sorts
def __init__(self, wrds_username: str, db_path: pathlib.Path):
self.DB = QueryWRDS.QueryWRDS(wrds_username, local_db_path = db_path)
# Helper Function
def _safe_append(self, obj, chk, ins):
if(not chk in obj):
obj[chk] = [ins]
else:
obj[chk].append(ins)
return(obj)
# Helper Function
def _portfolio_return(self, df, ret_type, weight_type, name):
if(weight_type == 'vw'):
mkt_s = df.groupby('date').apply(py_functions.wavg, ret_type, 'me')
else:
mkt_s = df.groupby('date').mean(numeric_only = True)[ret_type]
mkt_s.name = name
mkt_s = mkt_s.to_frame().reset_index()
return(mkt_s)
'''
Valid Factors: MKT, RF, MKT_RF, SMB3, SMB5, HML, RMW, CMA, MOM, ST_REV, LT_REV
'''
# TODO: Check for none dates
def FF_factors(self, factors: list[str],
dfin = None,
start_date: datetime.datetime = None,
end_date: datetime.datetime = None,
weight_type: str = 'vw',
ret_type: str = 'adjret',
drop_na: bool = True
) -> pd.DataFrame:
"""Creates standard Fama-French factors
Creates the Fama-French factors using the original accounting practices from
Eugene Fama's and Kenneth French's original 1992 paper.
The Cross-Section of Expected Stock Returns https://doi.org/10.1111/j.1540-6261.1992.tb04398.x
Constructable factors include: 'MKT' market return, 'RF' risk free rate, 'MKT_RF' equity premium,
'SMB3' 3 factor small minus big, 'SMB5' 5 factor small minus big, 'HML' high minus low,
'RMW' robust minus weak, 'CMA' conservative minus aggresive, 'MOM' momentum,
'ST_REV' short term reversal, 'LT_Rev' long term reversal. See https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html
for constrution notes.
Args:
factors: list of factors
dfin: datadrame with assets to use (optional)
start_date: start date for factors (optional)
end_date: end date for factors (optional)
weight_type: weights used to calculate returns
ret_type: return type with or without dividends
drop_na: if true drop rows that have NaN values
Returns:
A dataframe with the specifed factros and a date column. Dataframe sorted by date.
Example:
Create the original 3 factor Fama-French model between 'date1' and 'date2'
df = FamaFrench.FF_factors(
factors = ['MKT_RF', 'SMB3', 'HML'],
start_date = date1,
end_date = date2
)
TODO:
Error checking
"""
# query DB if no dataframe supplied
if(dfin is None):
ccm_df = self.DB.query_CCM(start_date, end_date)
else:
ccm_df = dfin
# create resulting dataframe
res = pd.DataFrame()
date_s = ccm_df.date.unique()
res['date'] = date_s
res = res.sort_values(by = ['date'])
# extract state and end date from dataframe
if(not dfin is None):
start_date = np.min(res.date)
end_date = np.max(res.date)
# calculate the market return of supplied assets
if('MKT' in factors):
mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT')
res = res.merge(mkt_df, how = 'left', on = ['date'])
# add the risk free rate
if('RF' in factors):
rf_df = self.DB.query_riskfree(start_date, end_date, 'M')
rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency
res = res.merge(rf_df, on = ['date'], how = 'left')
# add the market premium
if('MKT_RF' in factors):
if('MKT' in factors and 'RF' in factors):
res['MKT_RF'] = res.MKT - res.RF
elif('MKT' in factors and not 'RF' in factors):
rf_df = self.DB.query_riskfree(start_date, end_date, 'M')
rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency
res = res.merge(rf_df, on = ['date'], how = 'left')
res['MKT_RF'] = res.MKT - res.RF
res = res.drop(columns = ['MKT', 'RF'])
elif(not 'MKT' in factors and 'RF' in factors):
mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT')
res = res.merge(mkt_df, how = 'left', on = ['date'])
res['MKT_RF'] = res.MKT - res.RF
res = res.drop(columns = ['MKT', 'RF'])
else:
mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT')
res = res.merge(mkt_df, how = 'left', on = ['date'])
rf_df = self.DB.query_riskfree(start_date, end_date, 'M')
rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency
res = res.merge(rf_df, on = ['date'], how = 'left')
res['MKT_RF'] = res.MKT - res.RF
res = res.drop(columns = ['MKT', 'RF'])
# SMB factor from the 3-factor Fama-French model
if('SMB3' in factors):
# portfolio sorts on ME and BM
sorts_df = self.sort_portfolios(
stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]},
sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070},
drop_na = False, rebalance_freq = 'A'
)
sorts_df['SMB3'] = sorts_df[['me1_ffbm1', 'me1_ffbm2', 'me1_ffbm3']].mean(axis = 1) - sorts_df[['me2_ffbm1', 'me2_ffbm2', 'me2_ffbm3']].mean(axis = 1)
res = res.merge(sorts_df[['date', 'SMB3']], how = 'left', on = ['date'])
# SMB factor from the 5-factor Fama-French model
if('SMB5' in factors):
# sorts on BM
sortsBM_df = self.sort_portfolios(
stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]},
sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070},
drop_na = False, rebalance_freq = 'A'
)
# sorts on OP
sortsOP_df = self.sort_portfolios(
stocks = ccm_df, char_bkpts = {'me': [0.5], 'op': [0.3, 0.7]},
sorting_funcs = {'me': self.sort_50, 'op': self.sort_3070},
drop_na = False, rebalance_freq = 'A'
)
# sorts on INV
sortsINV_df = self.sort_portfolios(
stocks = ccm_df, char_bkpts = {'me': [0.5], 'inv': [0.3, 0.7]},
sorting_funcs = {'me': self.sort_50, 'inv': self.sort_3070},
drop_na = False, rebalance_freq = 'A'
)
# combine sorts into one dataframe
sortsBM_df = sortsBM_df.merge(sortsOP_df, how = 'left', on = ['date'])
sortsBM_df = sortsBM_df.merge(sortsINV_df, how = 'left', on = ['date'])
# housekeeping
sortsBM_df = sortsBM_df.set_index('date')
sortsBM_df = sortsBM_df.dropna(how = 'all')
# create factors
sortsBM_df['SMB_BM'] = sortsBM_df[['me1_ffbm1', 'me1_ffbm2', 'me1_ffbm3']].mean(axis = 1) - sortsBM_df[['me2_ffbm1', 'me2_ffbm2', 'me2_ffbm3']].mean(axis = 1)
sortsBM_df['SMB_OP'] = sortsOP_df[['me1_op1', 'me1_op2', 'me1_op3']].mean(axis = 1) - sortsOP_df[['me2_op1', 'me2_op2', 'me2_op3']].mean(axis = 1)
sortsBM_df['SMB_INV'] = sortsINV_df[['me1_inv1', 'me1_inv2', 'me1_inv3']].mean(axis = 1) - sortsINV_df[['me2_inv1', 'me2_inv2', 'me2_inv3']].mean(axis = 1)
# average factors
sortsBM_df['SMB5'] = sortsBM_df[['SMB_BM', 'SMB_OP', 'SMB_INV']].mean(axis = 1)
# add to result dataframe
sortsBM_df = sortsBM_df.reset_index()
res = res.merge(sortsBM_df[['date', 'SMB5']], how = 'left', on = ['date'])
if('HML' in factors):
sortsBM_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A')
sortsBM_df['HML'] = (1/2) * (sortsBM_df.me1_ffbm3 + sortsBM_df.me2_ffbm3) - (1/2) * (sortsBM_df.me1_ffbm1 + sortsBM_df.me2_ffbm1)
res = res.merge(sortsBM_df[['date', 'HML']], how = 'left', on = ['date'])
if('RMW' in factors):
sortsOP_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'op': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'op': self.sort_3070}, drop_na = False, rebalance_freq = 'A')
sortsOP_df['RMW'] = (1/2) * (sortsOP_df.me1_op3 + sortsOP_df.me2_op3) - (1/2) * (sortsOP_df.me1_op1 + sortsOP_df.me2_op1)
res = res.merge(sortsOP_df[['date', 'RMW']], how = 'left', on = ['date'])
if('CMA' in factors):
sortsINV_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'inv': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'inv': self.sort_3070}, drop_na = False, rebalance_freq = 'A')
sortsINV_df['CMA'] = (1/2) * (sortsINV_df.me1_inv1 + sortsINV_df.me2_inv1) - (1/2) * (sortsINV_df.me1_inv3 + sortsINV_df.me2_inv3)
res = res.merge(sortsINV_df[['date', 'CMA']], how = 'left', on = ['date'])
if('MOM' in factors):
sortsPR2_12_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr2_12': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr2_12': self.sort_3070}, drop_na = False, rebalance_freq = 'A')
sortsPR2_12_df['MOM'] = (1/2) * (sortsPR2_12_df.me1_pr2_123 + sortsPR2_12_df.me2_pr2_123) - (1/2) * (sortsPR2_12_df.me1_pr2_121 + sortsPR2_12_df.me2_pr2_121)
res = res.merge(sortsPR2_12_df[['date', 'MOM']], how = 'left', on = ['date'])
if('ST_REV' in factors):
sortsPR1_1_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr1_1': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr1_1': self.sort_3070}, drop_na = False, rebalance_freq = 'A')
sortsPR1_1_df['ST_REV'] = (1/2) * (sortsPR1_1_df.me1_pr1_11 + sortsPR1_1_df.me2_pr1_11) - (1/2) * (sortsPR1_1_df.me1_pr1_13 + sortsPR1_1_df.me2_pr1_13)
res = res.merge(sortsPR1_1_df[['date', 'ST_REV']], how = 'left', on = ['date'])
if('LT_REV' in factors):
sortsPR13_60_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr13_60': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr13_60': self.sort_3070}, drop_na = False, rebalance_freq = 'A')
sortsPR13_60_df['LT_REV'] = (1/2) * (sortsPR13_60_df.me1_pr13_601 + sortsPR13_60_df.me2_pr13_601) - (1/2) * (sortsPR13_60_df.me1_pr13_603 + sortsPR13_60_df.me2_pr13_603)
res = res.merge(sortsPR13_60_df[['date', 'LT_REV']], how = 'left', on = ['date'])
res = res.set_index('date').sort_index()
if(drop_na): res = res.dropna(how = 'all')
return(res)
def FF_3factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin = None):
return(self.FF_factors(factors = ['MKT_RF', 'SMB3', 'HML'],
dfin = dfin,
start_date = start_date, end_date = end_date,
weight_type = weigth_type, ret_type = ret_type,
drop_na = drop_na))
def FF_5factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin = None):
return(self.FF_factors(factors = ['MKT_RF', 'SMB5', 'HML', 'CMA', 'RMW'],
dfin = dfin,
start_date = start_date, end_date = end_date,
weight_type = weigth_type, ret_type = ret_type,
drop_na = drop_na))
def breakpoint_ts(self, df_in, vars, qtiles = None):
DEFAULT_QTILES = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1]
DECILES_QTILES = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
QUINTIL_QTILES = [0.2, 0.4, 0.6, 0.8]
dict_in = {}
if(type(vars) is dict):
dict_in = vars
else:
if(type(qtiles) is int or qtiles is None):
for var in vars:
if(qtiles == 5):
dict_in[var] = QUINTIL_QTILES
elif(qtiles == 10):
dict_in[var] = DECILES_QTILES
else:
dict_in[var] = DEFAULT_QTILES
elif(type(qtiles) is list):
for var in vars:
dict_in[var] = qtiles
else:
raise TypeError("No valid vars or qtile combination given.")
res = []
for var, qtiles in dict_in.items():
temp = df_in.groupby('date')[var].describe(percentiles = qtiles)
ptiles = [f'{int(100 * q)}%' for q in qtiles]
temp = temp[ptiles]
temp = temp.add_prefix(f'{var}_')
res.append(temp)
fin = functools.reduce(lambda x, y: pd.merge(x, y, on = 'date'), res)
fin = fin.reset_index()
return(fin)
# sorting functions
def sort_50(self, row, var):
if(row[var] < row[f'{var}_50%']):
res = f'{var}1'
elif(row[var] >= row[f'{var}_50%']):
res = f'{var}2'
else:
res = '--fail'
return(res)
def sort_050(self, row, var):
if(row[var] < 0):
res = f'{var}1'
if(row[var] >= 0 and row[var] < row[f'{var}_50%']):
res = f'{var}2'
elif(row[var] >= row[f'{var}_50%']):
res = f'{var}3'
else:
res = '--fail'
return(res)
def sort_3070(self, row, var):
if(row[var] < row[f'{var}_30%']):
res = f'{var}1'
elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_70%']):
res = f'{var}2'
elif(row[var] >= row[f'{var}_70%']):
res = f'{var}3'
else:
res = '--fail'
return(res)
def sort_03070(self, row, var):
if(row[var] <= 0):
res = f'{var}1'
elif(row[var] >= 0 and row[var] < row[f'{var}_30%']):
res = f'{var}2'
elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_70%']):
res = f'{var}3'
elif(row[var] >= row[f'{var}_70%']):
res = f'{var}4'
else:
res = '--fail'
return(res)
def sort_quintile(self, row, var):
if(row[var] < row[f'{var}_20%']):
res = f'{var}1'
elif(row[var] >= row[f'{var}_20%'] and row[var] < row[f'{var}_40%']):
res = f'{var}2'
elif(row[var] >= row[f'{var}_40%'] and row[var] < row[f'{var}_60%']):
res = f'{var}3'
elif(row[var] >= row[f'{var}_60%'] and row[var] < row[f'{var}_80%']):
res = f'{var}4'
elif(row[var] >= row[f'{var}_80%']):
res = f'{var}5'
else:
res = '--fail'
return(res)
def sort_deciles(self, row, var):
if(row[var] < row[f'{var}_10%']):
res = f'{var}1'
elif(row[var] >= row[f'{var}_10%'] and row[var] < row[f'{var}_20%']):
res = f'{var}2'
elif(row[var] >= row[f'{var}_20%'] and row[var] < row[f'{var}_30%']):
res = f'{var}3'
elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_40%']):
res = f'{var}4'
elif(row[var] >= row[f'{var}_40%'] and row[var] < row[f'{var}_50%']):
res = f'{var}5'
elif(row[var] >= row[f'{var}_50%'] and row[var] < row[f'{var}_60%']):
res = f'{var}6'
elif(row[var] >= row[f'{var}_60%'] and row[var] < row[f'{var}_70%']):
res = f'{var}7'
elif(row[var] >= row[f'{var}_70%'] and row[var] < row[f'{var}_80%']):
res = f'{var}8'
elif(row[var] >= row[f'{var}_80%'] and row[var] < row[f'{var}_90%']):
res = f'{var}9'
elif(row[var] >= row[f'{var}_90%']):
res = f'{var}10'
else:
res = '--fail'
return(res)
def sort_portfolios(self, stocks, char_bkpts, sorting_funcs, rebalance_freq, weight_type = 'vw', sort_month = 7, ex_dividend = False, drop_na = True, breakpoint_exchanges = ['1'], **kwargs):
# removes nans
stocks = stocks[(stocks.me > 0) & (stocks.wt > 0)]
stocks.date = pd.to_datetime(stocks.date)
if(rebalance_freq == 'A'):
rebalance_df = stocks[stocks.month == sort_month]
else:
rebalance_df = stocks
breakpoint_stocks_df = rebalance_df[rebalance_df.exchcd.isin(breakpoint_exchanges)]
# calculate breakpoints
breakpoints_df = self.breakpoint_ts(breakpoint_stocks_df, vars = char_bkpts)
# merge breakpoints to the rebalance df
rebalance_df = breakpoints_df.merge(rebalance_df, how = 'inner', on = ['date'])
ret_typ = 'adjretx' if(ex_dividend) else 'adjret'
rank_cols = []
for char, func in sorting_funcs.items():
rank_cols.append(f'{char}_rank')
rebalance_df[f'{char}_rank'] = rebalance_df.apply(func, args = (char, ), axis = 1)
for rank_col in rank_cols:
if('--fail' in rebalance_df[rank_col].unique()):
cprint.warn(f'There are stocks that could not be sorted in {rank_col}. They will be removed before constructing portfolios.')
rebalance_df = rebalance_df[rebalance_df[rank_col] != '--fail']
rebalance_df['port_name'] = rebalance_df[rank_cols].agg('_'.join, axis = 1)
if(rebalance_freq == 'A'):
fin = stocks.merge(rebalance_df[['permno', 'ffyear', 'port_name']], how = 'left', on = ['permno', 'ffyear'])
else:
fin = rebalance_df
fin = fin.dropna(subset = ['port_name'])
rets = None
if(weight_type == 'vw'):
rets = fin.groupby(['date', 'port_name']).apply(py_functions.wavg, ret_typ, 'wt').to_frame().reset_index().rename(columns = {0: ret_typ})
else:
rets = fin.groupby(['date', 'port_name']).mean(numeric_only = True)[ret_typ].to_frame().reset_index().rename(columns = {0: ret_typ})
firm = fin.groupby(['date', 'port_name'])['permno'].count().reset_index().rename(columns = {'permno': 'num_firms'})
rets = rets.pivot(index = 'date', columns = 'port_name', values = ret_typ)
firm = firm.pivot(index = 'date', columns = 'port_name', values = 'num_firms')
firm = firm.add_suffix('_num_firms')
res = rets.merge(firm, how = 'inner', on = ['date'])
res = res.reset_index()
if(drop_na): res = res.dropna()
return(res)
Classes
class FamaFrench (wrds_username: str, db_path: pathlib.Path)
-
Expand source code
class FamaFrench: # TODO (1) Error Checking # (2) Industry Sorts def __init__(self, wrds_username: str, db_path: pathlib.Path): self.DB = QueryWRDS.QueryWRDS(wrds_username, local_db_path = db_path) # Helper Function def _safe_append(self, obj, chk, ins): if(not chk in obj): obj[chk] = [ins] else: obj[chk].append(ins) return(obj) # Helper Function def _portfolio_return(self, df, ret_type, weight_type, name): if(weight_type == 'vw'): mkt_s = df.groupby('date').apply(py_functions.wavg, ret_type, 'me') else: mkt_s = df.groupby('date').mean(numeric_only = True)[ret_type] mkt_s.name = name mkt_s = mkt_s.to_frame().reset_index() return(mkt_s) ''' Valid Factors: MKT, RF, MKT_RF, SMB3, SMB5, HML, RMW, CMA, MOM, ST_REV, LT_REV ''' # TODO: Check for none dates def FF_factors(self, factors: list[str], dfin = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weight_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True ) -> pd.DataFrame: """Creates standard Fama-French factors Creates the Fama-French factors using the original accounting practices from Eugene Fama's and Kenneth French's original 1992 paper. The Cross-Section of Expected Stock Returns https://doi.org/10.1111/j.1540-6261.1992.tb04398.x Constructable factors include: 'MKT' market return, 'RF' risk free rate, 'MKT_RF' equity premium, 'SMB3' 3 factor small minus big, 'SMB5' 5 factor small minus big, 'HML' high minus low, 'RMW' robust minus weak, 'CMA' conservative minus aggresive, 'MOM' momentum, 'ST_REV' short term reversal, 'LT_Rev' long term reversal. See https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html for constrution notes. Args: factors: list of factors dfin: datadrame with assets to use (optional) start_date: start date for factors (optional) end_date: end date for factors (optional) weight_type: weights used to calculate returns ret_type: return type with or without dividends drop_na: if true drop rows that have NaN values Returns: A dataframe with the specifed factros and a date column. Dataframe sorted by date. Example: Create the original 3 factor Fama-French model between 'date1' and 'date2' df = FamaFrench.FF_factors( factors = ['MKT_RF', 'SMB3', 'HML'], start_date = date1, end_date = date2 ) TODO: Error checking """ # query DB if no dataframe supplied if(dfin is None): ccm_df = self.DB.query_CCM(start_date, end_date) else: ccm_df = dfin # create resulting dataframe res = pd.DataFrame() date_s = ccm_df.date.unique() res['date'] = date_s res = res.sort_values(by = ['date']) # extract state and end date from dataframe if(not dfin is None): start_date = np.min(res.date) end_date = np.max(res.date) # calculate the market return of supplied assets if('MKT' in factors): mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT') res = res.merge(mkt_df, how = 'left', on = ['date']) # add the risk free rate if('RF' in factors): rf_df = self.DB.query_riskfree(start_date, end_date, 'M') rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency res = res.merge(rf_df, on = ['date'], how = 'left') # add the market premium if('MKT_RF' in factors): if('MKT' in factors and 'RF' in factors): res['MKT_RF'] = res.MKT - res.RF elif('MKT' in factors and not 'RF' in factors): rf_df = self.DB.query_riskfree(start_date, end_date, 'M') rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency res = res.merge(rf_df, on = ['date'], how = 'left') res['MKT_RF'] = res.MKT - res.RF res = res.drop(columns = ['MKT', 'RF']) elif(not 'MKT' in factors and 'RF' in factors): mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT') res = res.merge(mkt_df, how = 'left', on = ['date']) res['MKT_RF'] = res.MKT - res.RF res = res.drop(columns = ['MKT', 'RF']) else: mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT') res = res.merge(mkt_df, how = 'left', on = ['date']) rf_df = self.DB.query_riskfree(start_date, end_date, 'M') rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency res = res.merge(rf_df, on = ['date'], how = 'left') res['MKT_RF'] = res.MKT - res.RF res = res.drop(columns = ['MKT', 'RF']) # SMB factor from the 3-factor Fama-French model if('SMB3' in factors): # portfolio sorts on ME and BM sorts_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) sorts_df['SMB3'] = sorts_df[['me1_ffbm1', 'me1_ffbm2', 'me1_ffbm3']].mean(axis = 1) - sorts_df[['me2_ffbm1', 'me2_ffbm2', 'me2_ffbm3']].mean(axis = 1) res = res.merge(sorts_df[['date', 'SMB3']], how = 'left', on = ['date']) # SMB factor from the 5-factor Fama-French model if('SMB5' in factors): # sorts on BM sortsBM_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) # sorts on OP sortsOP_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'op': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'op': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) # sorts on INV sortsINV_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'inv': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'inv': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) # combine sorts into one dataframe sortsBM_df = sortsBM_df.merge(sortsOP_df, how = 'left', on = ['date']) sortsBM_df = sortsBM_df.merge(sortsINV_df, how = 'left', on = ['date']) # housekeeping sortsBM_df = sortsBM_df.set_index('date') sortsBM_df = sortsBM_df.dropna(how = 'all') # create factors sortsBM_df['SMB_BM'] = sortsBM_df[['me1_ffbm1', 'me1_ffbm2', 'me1_ffbm3']].mean(axis = 1) - sortsBM_df[['me2_ffbm1', 'me2_ffbm2', 'me2_ffbm3']].mean(axis = 1) sortsBM_df['SMB_OP'] = sortsOP_df[['me1_op1', 'me1_op2', 'me1_op3']].mean(axis = 1) - sortsOP_df[['me2_op1', 'me2_op2', 'me2_op3']].mean(axis = 1) sortsBM_df['SMB_INV'] = sortsINV_df[['me1_inv1', 'me1_inv2', 'me1_inv3']].mean(axis = 1) - sortsINV_df[['me2_inv1', 'me2_inv2', 'me2_inv3']].mean(axis = 1) # average factors sortsBM_df['SMB5'] = sortsBM_df[['SMB_BM', 'SMB_OP', 'SMB_INV']].mean(axis = 1) # add to result dataframe sortsBM_df = sortsBM_df.reset_index() res = res.merge(sortsBM_df[['date', 'SMB5']], how = 'left', on = ['date']) if('HML' in factors): sortsBM_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsBM_df['HML'] = (1/2) * (sortsBM_df.me1_ffbm3 + sortsBM_df.me2_ffbm3) - (1/2) * (sortsBM_df.me1_ffbm1 + sortsBM_df.me2_ffbm1) res = res.merge(sortsBM_df[['date', 'HML']], how = 'left', on = ['date']) if('RMW' in factors): sortsOP_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'op': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'op': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsOP_df['RMW'] = (1/2) * (sortsOP_df.me1_op3 + sortsOP_df.me2_op3) - (1/2) * (sortsOP_df.me1_op1 + sortsOP_df.me2_op1) res = res.merge(sortsOP_df[['date', 'RMW']], how = 'left', on = ['date']) if('CMA' in factors): sortsINV_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'inv': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'inv': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsINV_df['CMA'] = (1/2) * (sortsINV_df.me1_inv1 + sortsINV_df.me2_inv1) - (1/2) * (sortsINV_df.me1_inv3 + sortsINV_df.me2_inv3) res = res.merge(sortsINV_df[['date', 'CMA']], how = 'left', on = ['date']) if('MOM' in factors): sortsPR2_12_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr2_12': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr2_12': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsPR2_12_df['MOM'] = (1/2) * (sortsPR2_12_df.me1_pr2_123 + sortsPR2_12_df.me2_pr2_123) - (1/2) * (sortsPR2_12_df.me1_pr2_121 + sortsPR2_12_df.me2_pr2_121) res = res.merge(sortsPR2_12_df[['date', 'MOM']], how = 'left', on = ['date']) if('ST_REV' in factors): sortsPR1_1_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr1_1': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr1_1': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsPR1_1_df['ST_REV'] = (1/2) * (sortsPR1_1_df.me1_pr1_11 + sortsPR1_1_df.me2_pr1_11) - (1/2) * (sortsPR1_1_df.me1_pr1_13 + sortsPR1_1_df.me2_pr1_13) res = res.merge(sortsPR1_1_df[['date', 'ST_REV']], how = 'left', on = ['date']) if('LT_REV' in factors): sortsPR13_60_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr13_60': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr13_60': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsPR13_60_df['LT_REV'] = (1/2) * (sortsPR13_60_df.me1_pr13_601 + sortsPR13_60_df.me2_pr13_601) - (1/2) * (sortsPR13_60_df.me1_pr13_603 + sortsPR13_60_df.me2_pr13_603) res = res.merge(sortsPR13_60_df[['date', 'LT_REV']], how = 'left', on = ['date']) res = res.set_index('date').sort_index() if(drop_na): res = res.dropna(how = 'all') return(res) def FF_3factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin = None): return(self.FF_factors(factors = ['MKT_RF', 'SMB3', 'HML'], dfin = dfin, start_date = start_date, end_date = end_date, weight_type = weigth_type, ret_type = ret_type, drop_na = drop_na)) def FF_5factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin = None): return(self.FF_factors(factors = ['MKT_RF', 'SMB5', 'HML', 'CMA', 'RMW'], dfin = dfin, start_date = start_date, end_date = end_date, weight_type = weigth_type, ret_type = ret_type, drop_na = drop_na)) def breakpoint_ts(self, df_in, vars, qtiles = None): DEFAULT_QTILES = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1] DECILES_QTILES = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] QUINTIL_QTILES = [0.2, 0.4, 0.6, 0.8] dict_in = {} if(type(vars) is dict): dict_in = vars else: if(type(qtiles) is int or qtiles is None): for var in vars: if(qtiles == 5): dict_in[var] = QUINTIL_QTILES elif(qtiles == 10): dict_in[var] = DECILES_QTILES else: dict_in[var] = DEFAULT_QTILES elif(type(qtiles) is list): for var in vars: dict_in[var] = qtiles else: raise TypeError("No valid vars or qtile combination given.") res = [] for var, qtiles in dict_in.items(): temp = df_in.groupby('date')[var].describe(percentiles = qtiles) ptiles = [f'{int(100 * q)}%' for q in qtiles] temp = temp[ptiles] temp = temp.add_prefix(f'{var}_') res.append(temp) fin = functools.reduce(lambda x, y: pd.merge(x, y, on = 'date'), res) fin = fin.reset_index() return(fin) # sorting functions def sort_50(self, row, var): if(row[var] < row[f'{var}_50%']): res = f'{var}1' elif(row[var] >= row[f'{var}_50%']): res = f'{var}2' else: res = '--fail' return(res) def sort_050(self, row, var): if(row[var] < 0): res = f'{var}1' if(row[var] >= 0 and row[var] < row[f'{var}_50%']): res = f'{var}2' elif(row[var] >= row[f'{var}_50%']): res = f'{var}3' else: res = '--fail' return(res) def sort_3070(self, row, var): if(row[var] < row[f'{var}_30%']): res = f'{var}1' elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_70%']): res = f'{var}2' elif(row[var] >= row[f'{var}_70%']): res = f'{var}3' else: res = '--fail' return(res) def sort_03070(self, row, var): if(row[var] <= 0): res = f'{var}1' elif(row[var] >= 0 and row[var] < row[f'{var}_30%']): res = f'{var}2' elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_70%']): res = f'{var}3' elif(row[var] >= row[f'{var}_70%']): res = f'{var}4' else: res = '--fail' return(res) def sort_quintile(self, row, var): if(row[var] < row[f'{var}_20%']): res = f'{var}1' elif(row[var] >= row[f'{var}_20%'] and row[var] < row[f'{var}_40%']): res = f'{var}2' elif(row[var] >= row[f'{var}_40%'] and row[var] < row[f'{var}_60%']): res = f'{var}3' elif(row[var] >= row[f'{var}_60%'] and row[var] < row[f'{var}_80%']): res = f'{var}4' elif(row[var] >= row[f'{var}_80%']): res = f'{var}5' else: res = '--fail' return(res) def sort_deciles(self, row, var): if(row[var] < row[f'{var}_10%']): res = f'{var}1' elif(row[var] >= row[f'{var}_10%'] and row[var] < row[f'{var}_20%']): res = f'{var}2' elif(row[var] >= row[f'{var}_20%'] and row[var] < row[f'{var}_30%']): res = f'{var}3' elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_40%']): res = f'{var}4' elif(row[var] >= row[f'{var}_40%'] and row[var] < row[f'{var}_50%']): res = f'{var}5' elif(row[var] >= row[f'{var}_50%'] and row[var] < row[f'{var}_60%']): res = f'{var}6' elif(row[var] >= row[f'{var}_60%'] and row[var] < row[f'{var}_70%']): res = f'{var}7' elif(row[var] >= row[f'{var}_70%'] and row[var] < row[f'{var}_80%']): res = f'{var}8' elif(row[var] >= row[f'{var}_80%'] and row[var] < row[f'{var}_90%']): res = f'{var}9' elif(row[var] >= row[f'{var}_90%']): res = f'{var}10' else: res = '--fail' return(res) def sort_portfolios(self, stocks, char_bkpts, sorting_funcs, rebalance_freq, weight_type = 'vw', sort_month = 7, ex_dividend = False, drop_na = True, breakpoint_exchanges = ['1'], **kwargs): # removes nans stocks = stocks[(stocks.me > 0) & (stocks.wt > 0)] stocks.date = pd.to_datetime(stocks.date) if(rebalance_freq == 'A'): rebalance_df = stocks[stocks.month == sort_month] else: rebalance_df = stocks breakpoint_stocks_df = rebalance_df[rebalance_df.exchcd.isin(breakpoint_exchanges)] # calculate breakpoints breakpoints_df = self.breakpoint_ts(breakpoint_stocks_df, vars = char_bkpts) # merge breakpoints to the rebalance df rebalance_df = breakpoints_df.merge(rebalance_df, how = 'inner', on = ['date']) ret_typ = 'adjretx' if(ex_dividend) else 'adjret' rank_cols = [] for char, func in sorting_funcs.items(): rank_cols.append(f'{char}_rank') rebalance_df[f'{char}_rank'] = rebalance_df.apply(func, args = (char, ), axis = 1) for rank_col in rank_cols: if('--fail' in rebalance_df[rank_col].unique()): cprint.warn(f'There are stocks that could not be sorted in {rank_col}. They will be removed before constructing portfolios.') rebalance_df = rebalance_df[rebalance_df[rank_col] != '--fail'] rebalance_df['port_name'] = rebalance_df[rank_cols].agg('_'.join, axis = 1) if(rebalance_freq == 'A'): fin = stocks.merge(rebalance_df[['permno', 'ffyear', 'port_name']], how = 'left', on = ['permno', 'ffyear']) else: fin = rebalance_df fin = fin.dropna(subset = ['port_name']) rets = None if(weight_type == 'vw'): rets = fin.groupby(['date', 'port_name']).apply(py_functions.wavg, ret_typ, 'wt').to_frame().reset_index().rename(columns = {0: ret_typ}) else: rets = fin.groupby(['date', 'port_name']).mean(numeric_only = True)[ret_typ].to_frame().reset_index().rename(columns = {0: ret_typ}) firm = fin.groupby(['date', 'port_name'])['permno'].count().reset_index().rename(columns = {'permno': 'num_firms'}) rets = rets.pivot(index = 'date', columns = 'port_name', values = ret_typ) firm = firm.pivot(index = 'date', columns = 'port_name', values = 'num_firms') firm = firm.add_suffix('_num_firms') res = rets.merge(firm, how = 'inner', on = ['date']) res = res.reset_index() if(drop_na): res = res.dropna() return(res)
Methods
def FF_3factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin=None)
-
Expand source code
def FF_3factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin = None): return(self.FF_factors(factors = ['MKT_RF', 'SMB3', 'HML'], dfin = dfin, start_date = start_date, end_date = end_date, weight_type = weigth_type, ret_type = ret_type, drop_na = drop_na))
def FF_5factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin=None)
-
Expand source code
def FF_5factor(self, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weigth_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True, dfin = None): return(self.FF_factors(factors = ['MKT_RF', 'SMB5', 'HML', 'CMA', 'RMW'], dfin = dfin, start_date = start_date, end_date = end_date, weight_type = weigth_type, ret_type = ret_type, drop_na = drop_na))
def FF_factors(self, factors: list[str], dfin=None, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weight_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True) ‑> pandas.core.frame.DataFrame
-
Creates standard Fama-French factors
Creates the Fama-French factors using the original accounting practices from Eugene Fama's and Kenneth French's original 1992 paper. The Cross-Section of Expected Stock Returns https://doi.org/10.1111/j.1540-6261.1992.tb04398.x
Constructable factors include: 'MKT' market return, 'RF' risk free rate, 'MKT_RF' equity premium, 'SMB3' 3 factor small minus big, 'SMB5' 5 factor small minus big, 'HML' high minus low, 'RMW' robust minus weak, 'CMA' conservative minus aggresive, 'MOM' momentum, 'ST_REV' short term reversal, 'LT_Rev' long term reversal. See https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html for constrution notes.
Args
factors
- list of factors
dfin
- datadrame with assets to use (optional)
start_date
- start date for factors (optional)
end_date
- end date for factors (optional)
weight_type
- weights used to calculate returns
ret_type
- return type with or without dividends
drop_na
- if true drop rows that have NaN values
Returns: A dataframe with the specifed factros and a date column. Dataframe sorted by date.
Example
Create the original 3 factor Fama-French model between 'date1' and 'date2'
df = FamaFrench.FF_factors( factors = ['MKT_RF', 'SMB3', 'HML'], start_date = date1, end_date = date2 )
Todo
Error checking
Expand source code
def FF_factors(self, factors: list[str], dfin = None, start_date: datetime.datetime = None, end_date: datetime.datetime = None, weight_type: str = 'vw', ret_type: str = 'adjret', drop_na: bool = True ) -> pd.DataFrame: """Creates standard Fama-French factors Creates the Fama-French factors using the original accounting practices from Eugene Fama's and Kenneth French's original 1992 paper. The Cross-Section of Expected Stock Returns https://doi.org/10.1111/j.1540-6261.1992.tb04398.x Constructable factors include: 'MKT' market return, 'RF' risk free rate, 'MKT_RF' equity premium, 'SMB3' 3 factor small minus big, 'SMB5' 5 factor small minus big, 'HML' high minus low, 'RMW' robust minus weak, 'CMA' conservative minus aggresive, 'MOM' momentum, 'ST_REV' short term reversal, 'LT_Rev' long term reversal. See https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/data_library.html for constrution notes. Args: factors: list of factors dfin: datadrame with assets to use (optional) start_date: start date for factors (optional) end_date: end date for factors (optional) weight_type: weights used to calculate returns ret_type: return type with or without dividends drop_na: if true drop rows that have NaN values Returns: A dataframe with the specifed factros and a date column. Dataframe sorted by date. Example: Create the original 3 factor Fama-French model between 'date1' and 'date2' df = FamaFrench.FF_factors( factors = ['MKT_RF', 'SMB3', 'HML'], start_date = date1, end_date = date2 ) TODO: Error checking """ # query DB if no dataframe supplied if(dfin is None): ccm_df = self.DB.query_CCM(start_date, end_date) else: ccm_df = dfin # create resulting dataframe res = pd.DataFrame() date_s = ccm_df.date.unique() res['date'] = date_s res = res.sort_values(by = ['date']) # extract state and end date from dataframe if(not dfin is None): start_date = np.min(res.date) end_date = np.max(res.date) # calculate the market return of supplied assets if('MKT' in factors): mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT') res = res.merge(mkt_df, how = 'left', on = ['date']) # add the risk free rate if('RF' in factors): rf_df = self.DB.query_riskfree(start_date, end_date, 'M') rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency res = res.merge(rf_df, on = ['date'], how = 'left') # add the market premium if('MKT_RF' in factors): if('MKT' in factors and 'RF' in factors): res['MKT_RF'] = res.MKT - res.RF elif('MKT' in factors and not 'RF' in factors): rf_df = self.DB.query_riskfree(start_date, end_date, 'M') rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency res = res.merge(rf_df, on = ['date'], how = 'left') res['MKT_RF'] = res.MKT - res.RF res = res.drop(columns = ['MKT', 'RF']) elif(not 'MKT' in factors and 'RF' in factors): mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT') res = res.merge(mkt_df, how = 'left', on = ['date']) res['MKT_RF'] = res.MKT - res.RF res = res.drop(columns = ['MKT', 'RF']) else: mkt_df = self._portfolio_return(ccm_df, ret_type, weight_type, 'MKT') res = res.merge(mkt_df, how = 'left', on = ['date']) rf_df = self.DB.query_riskfree(start_date, end_date, 'M') rf_df = rf_df.rename(columns = {'rf': 'RF'}) # just for naming consistency res = res.merge(rf_df, on = ['date'], how = 'left') res['MKT_RF'] = res.MKT - res.RF res = res.drop(columns = ['MKT', 'RF']) # SMB factor from the 3-factor Fama-French model if('SMB3' in factors): # portfolio sorts on ME and BM sorts_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) sorts_df['SMB3'] = sorts_df[['me1_ffbm1', 'me1_ffbm2', 'me1_ffbm3']].mean(axis = 1) - sorts_df[['me2_ffbm1', 'me2_ffbm2', 'me2_ffbm3']].mean(axis = 1) res = res.merge(sorts_df[['date', 'SMB3']], how = 'left', on = ['date']) # SMB factor from the 5-factor Fama-French model if('SMB5' in factors): # sorts on BM sortsBM_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) # sorts on OP sortsOP_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'op': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'op': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) # sorts on INV sortsINV_df = self.sort_portfolios( stocks = ccm_df, char_bkpts = {'me': [0.5], 'inv': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'inv': self.sort_3070}, drop_na = False, rebalance_freq = 'A' ) # combine sorts into one dataframe sortsBM_df = sortsBM_df.merge(sortsOP_df, how = 'left', on = ['date']) sortsBM_df = sortsBM_df.merge(sortsINV_df, how = 'left', on = ['date']) # housekeeping sortsBM_df = sortsBM_df.set_index('date') sortsBM_df = sortsBM_df.dropna(how = 'all') # create factors sortsBM_df['SMB_BM'] = sortsBM_df[['me1_ffbm1', 'me1_ffbm2', 'me1_ffbm3']].mean(axis = 1) - sortsBM_df[['me2_ffbm1', 'me2_ffbm2', 'me2_ffbm3']].mean(axis = 1) sortsBM_df['SMB_OP'] = sortsOP_df[['me1_op1', 'me1_op2', 'me1_op3']].mean(axis = 1) - sortsOP_df[['me2_op1', 'me2_op2', 'me2_op3']].mean(axis = 1) sortsBM_df['SMB_INV'] = sortsINV_df[['me1_inv1', 'me1_inv2', 'me1_inv3']].mean(axis = 1) - sortsINV_df[['me2_inv1', 'me2_inv2', 'me2_inv3']].mean(axis = 1) # average factors sortsBM_df['SMB5'] = sortsBM_df[['SMB_BM', 'SMB_OP', 'SMB_INV']].mean(axis = 1) # add to result dataframe sortsBM_df = sortsBM_df.reset_index() res = res.merge(sortsBM_df[['date', 'SMB5']], how = 'left', on = ['date']) if('HML' in factors): sortsBM_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'ffbm': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'ffbm': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsBM_df['HML'] = (1/2) * (sortsBM_df.me1_ffbm3 + sortsBM_df.me2_ffbm3) - (1/2) * (sortsBM_df.me1_ffbm1 + sortsBM_df.me2_ffbm1) res = res.merge(sortsBM_df[['date', 'HML']], how = 'left', on = ['date']) if('RMW' in factors): sortsOP_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'op': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'op': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsOP_df['RMW'] = (1/2) * (sortsOP_df.me1_op3 + sortsOP_df.me2_op3) - (1/2) * (sortsOP_df.me1_op1 + sortsOP_df.me2_op1) res = res.merge(sortsOP_df[['date', 'RMW']], how = 'left', on = ['date']) if('CMA' in factors): sortsINV_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'inv': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'inv': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsINV_df['CMA'] = (1/2) * (sortsINV_df.me1_inv1 + sortsINV_df.me2_inv1) - (1/2) * (sortsINV_df.me1_inv3 + sortsINV_df.me2_inv3) res = res.merge(sortsINV_df[['date', 'CMA']], how = 'left', on = ['date']) if('MOM' in factors): sortsPR2_12_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr2_12': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr2_12': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsPR2_12_df['MOM'] = (1/2) * (sortsPR2_12_df.me1_pr2_123 + sortsPR2_12_df.me2_pr2_123) - (1/2) * (sortsPR2_12_df.me1_pr2_121 + sortsPR2_12_df.me2_pr2_121) res = res.merge(sortsPR2_12_df[['date', 'MOM']], how = 'left', on = ['date']) if('ST_REV' in factors): sortsPR1_1_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr1_1': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr1_1': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsPR1_1_df['ST_REV'] = (1/2) * (sortsPR1_1_df.me1_pr1_11 + sortsPR1_1_df.me2_pr1_11) - (1/2) * (sortsPR1_1_df.me1_pr1_13 + sortsPR1_1_df.me2_pr1_13) res = res.merge(sortsPR1_1_df[['date', 'ST_REV']], how = 'left', on = ['date']) if('LT_REV' in factors): sortsPR13_60_df = self.sort_portfolios(stocks = ccm_df, char_bkpts = {'me': [0.5], 'pr13_60': [0.3, 0.7]}, sorting_funcs = {'me': self.sort_50, 'pr13_60': self.sort_3070}, drop_na = False, rebalance_freq = 'A') sortsPR13_60_df['LT_REV'] = (1/2) * (sortsPR13_60_df.me1_pr13_601 + sortsPR13_60_df.me2_pr13_601) - (1/2) * (sortsPR13_60_df.me1_pr13_603 + sortsPR13_60_df.me2_pr13_603) res = res.merge(sortsPR13_60_df[['date', 'LT_REV']], how = 'left', on = ['date']) res = res.set_index('date').sort_index() if(drop_na): res = res.dropna(how = 'all') return(res)
def breakpoint_ts(self, df_in, vars, qtiles=None)
-
Expand source code
def breakpoint_ts(self, df_in, vars, qtiles = None): DEFAULT_QTILES = [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1] DECILES_QTILES = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] QUINTIL_QTILES = [0.2, 0.4, 0.6, 0.8] dict_in = {} if(type(vars) is dict): dict_in = vars else: if(type(qtiles) is int or qtiles is None): for var in vars: if(qtiles == 5): dict_in[var] = QUINTIL_QTILES elif(qtiles == 10): dict_in[var] = DECILES_QTILES else: dict_in[var] = DEFAULT_QTILES elif(type(qtiles) is list): for var in vars: dict_in[var] = qtiles else: raise TypeError("No valid vars or qtile combination given.") res = [] for var, qtiles in dict_in.items(): temp = df_in.groupby('date')[var].describe(percentiles = qtiles) ptiles = [f'{int(100 * q)}%' for q in qtiles] temp = temp[ptiles] temp = temp.add_prefix(f'{var}_') res.append(temp) fin = functools.reduce(lambda x, y: pd.merge(x, y, on = 'date'), res) fin = fin.reset_index() return(fin)
def sort_03070(self, row, var)
-
Expand source code
def sort_03070(self, row, var): if(row[var] <= 0): res = f'{var}1' elif(row[var] >= 0 and row[var] < row[f'{var}_30%']): res = f'{var}2' elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_70%']): res = f'{var}3' elif(row[var] >= row[f'{var}_70%']): res = f'{var}4' else: res = '--fail' return(res)
def sort_050(self, row, var)
-
Expand source code
def sort_050(self, row, var): if(row[var] < 0): res = f'{var}1' if(row[var] >= 0 and row[var] < row[f'{var}_50%']): res = f'{var}2' elif(row[var] >= row[f'{var}_50%']): res = f'{var}3' else: res = '--fail' return(res)
def sort_3070(self, row, var)
-
Expand source code
def sort_3070(self, row, var): if(row[var] < row[f'{var}_30%']): res = f'{var}1' elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_70%']): res = f'{var}2' elif(row[var] >= row[f'{var}_70%']): res = f'{var}3' else: res = '--fail' return(res)
def sort_50(self, row, var)
-
Expand source code
def sort_50(self, row, var): if(row[var] < row[f'{var}_50%']): res = f'{var}1' elif(row[var] >= row[f'{var}_50%']): res = f'{var}2' else: res = '--fail' return(res)
def sort_deciles(self, row, var)
-
Expand source code
def sort_deciles(self, row, var): if(row[var] < row[f'{var}_10%']): res = f'{var}1' elif(row[var] >= row[f'{var}_10%'] and row[var] < row[f'{var}_20%']): res = f'{var}2' elif(row[var] >= row[f'{var}_20%'] and row[var] < row[f'{var}_30%']): res = f'{var}3' elif(row[var] >= row[f'{var}_30%'] and row[var] < row[f'{var}_40%']): res = f'{var}4' elif(row[var] >= row[f'{var}_40%'] and row[var] < row[f'{var}_50%']): res = f'{var}5' elif(row[var] >= row[f'{var}_50%'] and row[var] < row[f'{var}_60%']): res = f'{var}6' elif(row[var] >= row[f'{var}_60%'] and row[var] < row[f'{var}_70%']): res = f'{var}7' elif(row[var] >= row[f'{var}_70%'] and row[var] < row[f'{var}_80%']): res = f'{var}8' elif(row[var] >= row[f'{var}_80%'] and row[var] < row[f'{var}_90%']): res = f'{var}9' elif(row[var] >= row[f'{var}_90%']): res = f'{var}10' else: res = '--fail' return(res)
def sort_portfolios(self, stocks, char_bkpts, sorting_funcs, rebalance_freq, weight_type='vw', sort_month=7, ex_dividend=False, drop_na=True, breakpoint_exchanges=['1'], **kwargs)
-
Expand source code
def sort_portfolios(self, stocks, char_bkpts, sorting_funcs, rebalance_freq, weight_type = 'vw', sort_month = 7, ex_dividend = False, drop_na = True, breakpoint_exchanges = ['1'], **kwargs): # removes nans stocks = stocks[(stocks.me > 0) & (stocks.wt > 0)] stocks.date = pd.to_datetime(stocks.date) if(rebalance_freq == 'A'): rebalance_df = stocks[stocks.month == sort_month] else: rebalance_df = stocks breakpoint_stocks_df = rebalance_df[rebalance_df.exchcd.isin(breakpoint_exchanges)] # calculate breakpoints breakpoints_df = self.breakpoint_ts(breakpoint_stocks_df, vars = char_bkpts) # merge breakpoints to the rebalance df rebalance_df = breakpoints_df.merge(rebalance_df, how = 'inner', on = ['date']) ret_typ = 'adjretx' if(ex_dividend) else 'adjret' rank_cols = [] for char, func in sorting_funcs.items(): rank_cols.append(f'{char}_rank') rebalance_df[f'{char}_rank'] = rebalance_df.apply(func, args = (char, ), axis = 1) for rank_col in rank_cols: if('--fail' in rebalance_df[rank_col].unique()): cprint.warn(f'There are stocks that could not be sorted in {rank_col}. They will be removed before constructing portfolios.') rebalance_df = rebalance_df[rebalance_df[rank_col] != '--fail'] rebalance_df['port_name'] = rebalance_df[rank_cols].agg('_'.join, axis = 1) if(rebalance_freq == 'A'): fin = stocks.merge(rebalance_df[['permno', 'ffyear', 'port_name']], how = 'left', on = ['permno', 'ffyear']) else: fin = rebalance_df fin = fin.dropna(subset = ['port_name']) rets = None if(weight_type == 'vw'): rets = fin.groupby(['date', 'port_name']).apply(py_functions.wavg, ret_typ, 'wt').to_frame().reset_index().rename(columns = {0: ret_typ}) else: rets = fin.groupby(['date', 'port_name']).mean(numeric_only = True)[ret_typ].to_frame().reset_index().rename(columns = {0: ret_typ}) firm = fin.groupby(['date', 'port_name'])['permno'].count().reset_index().rename(columns = {'permno': 'num_firms'}) rets = rets.pivot(index = 'date', columns = 'port_name', values = ret_typ) firm = firm.pivot(index = 'date', columns = 'port_name', values = 'num_firms') firm = firm.add_suffix('_num_firms') res = rets.merge(firm, how = 'inner', on = ['date']) res = res.reset_index() if(drop_na): res = res.dropna() return(res)
def sort_quintile(self, row, var)
-
Expand source code
def sort_quintile(self, row, var): if(row[var] < row[f'{var}_20%']): res = f'{var}1' elif(row[var] >= row[f'{var}_20%'] and row[var] < row[f'{var}_40%']): res = f'{var}2' elif(row[var] >= row[f'{var}_40%'] and row[var] < row[f'{var}_60%']): res = f'{var}3' elif(row[var] >= row[f'{var}_60%'] and row[var] < row[f'{var}_80%']): res = f'{var}4' elif(row[var] >= row[f'{var}_80%']): res = f'{var}5' else: res = '--fail' return(res)