Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1from statsmodels.compat.python import iterkeys 

2import statsmodels.tools.data as data_util 

3from patsy import dmatrices, NAAction 

4import numpy as np 

5 

6# if users want to pass in a different formula framework, they can 

7# add their handler here. how to do it interactively? 

8 

9# this is a mutable object, so editing it should show up in the below 

10formula_handler = {} 

11 

12 

13class NAAction(NAAction): 

14 # monkey-patch so we can handle missing values in 'extra' arrays later 

15 def _handle_NA_drop(self, values, is_NAs, origins): 

16 total_mask = np.zeros(is_NAs[0].shape[0], dtype=bool) 

17 for is_NA in is_NAs: 

18 total_mask |= is_NA 

19 good_mask = ~total_mask 

20 self.missing_mask = total_mask 

21 # "..." to handle 1- versus 2-dim indexing 

22 return [v[good_mask, ...] for v in values] 

23 

24 

25def handle_formula_data(Y, X, formula, depth=0, missing='drop'): 

26 """ 

27 Returns endog, exog, and the model specification from arrays and formula. 

28 

29 Parameters 

30 ---------- 

31 Y : array_like 

32 Either endog (the LHS) of a model specification or all of the data. 

33 Y must define __getitem__ for now. 

34 X : array_like 

35 Either exog or None. If all the data for the formula is provided in 

36 Y then you must explicitly set X to None. 

37 formula : str or patsy.model_desc 

38 You can pass a handler by import formula_handler and adding a 

39 key-value pair where the key is the formula object class and 

40 the value is a function that returns endog, exog, formula object. 

41 

42 Returns 

43 ------- 

44 endog : array_like 

45 Should preserve the input type of Y,X. 

46 exog : array_like 

47 Should preserve the input type of Y,X. Could be None. 

48 """ 

49 # half ass attempt to handle other formula objects 

50 if isinstance(formula, tuple(iterkeys(formula_handler))): 

51 return formula_handler[type(formula)] 

52 

53 na_action = NAAction(on_NA=missing) 

54 

55 if X is not None: 

56 if data_util._is_using_pandas(Y, X): 

57 result = dmatrices(formula, (Y, X), depth, 

58 return_type='dataframe', NA_action=na_action) 

59 else: 

60 result = dmatrices(formula, (Y, X), depth, 

61 return_type='dataframe', NA_action=na_action) 

62 else: 

63 if data_util._is_using_pandas(Y, None): 

64 result = dmatrices(formula, Y, depth, return_type='dataframe', 

65 NA_action=na_action) 

66 else: 

67 result = dmatrices(formula, Y, depth, return_type='dataframe', 

68 NA_action=na_action) 

69 

70 # if missing == 'raise' there's not missing_mask 

71 missing_mask = getattr(na_action, 'missing_mask', None) 

72 if not np.any(missing_mask): 

73 missing_mask = None 

74 if len(result) > 1: # have RHS design 

75 design_info = result[1].design_info # detach it from DataFrame 

76 else: 

77 design_info = None 

78 # NOTE: is there ever a case where we'd need LHS design_info? 

79 return result, missing_mask, design_info 

80 

81 

82def _remove_intercept_patsy(terms): 

83 """ 

84 Remove intercept from Patsy terms. 

85 """ 

86 from patsy.desc import INTERCEPT 

87 if INTERCEPT in terms: 

88 terms.remove(INTERCEPT) 

89 return terms 

90 

91 

92def _has_intercept(design_info): 

93 from patsy.desc import INTERCEPT 

94 return INTERCEPT in design_info.terms 

95 

96 

97def _intercept_idx(design_info): 

98 """ 

99 Returns boolean array index indicating which column holds the intercept. 

100 """ 

101 from patsy.desc import INTERCEPT 

102 from numpy import array 

103 return array([INTERCEPT == i for i in design_info.terms]) 

104 

105 

106def make_hypotheses_matrices(model_results, test_formula): 

107 """ 

108 """ 

109 from patsy.constraint import linear_constraint 

110 exog_names = model_results.model.exog_names 

111 LC = linear_constraint(test_formula, exog_names) 

112 return LC