Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2""" 

3Created on Fri Dec 19 11:29:18 2014 

4 

5Author: Josef Perktold 

6License: BSD-3 

7 

8""" 

9 

10import numpy as np 

11from scipy import stats 

12 

13 

14# this is similar to ContrastResults after t_test, copied and adjusted 

15class PredictionResults(object): 

16 """ 

17 Results class for predictions. 

18 

19 Parameters 

20 ---------- 

21 predicted_mean : ndarray 

22 The array containing the prediction means. 

23 var_pred_mean : ndarray 

24 The array of the variance of the prediction means. 

25 var_resid : ndarray 

26 The array of residual variances. 

27 df : int 

28 The degree of freedom used if dist is 't'. 

29 dist : {'norm', 't', object} 

30 Either a string for the normal or t distribution or another object 

31 that exposes a `ppf` method. 

32 row_labels : list[str] 

33 Row labels used in summary frame. 

34 """ 

35 

36 def __init__(self, predicted_mean, var_pred_mean, var_resid, 

37 df=None, dist=None, row_labels=None): 

38 self.predicted_mean = predicted_mean 

39 self.var_pred_mean = var_pred_mean 

40 self.df = df 

41 self.var_resid = var_resid 

42 self.row_labels = row_labels 

43 

44 if dist is None or dist == 'norm': 

45 self.dist = stats.norm 

46 self.dist_args = () 

47 elif dist == 't': 

48 self.dist = stats.t 

49 self.dist_args = (self.df,) 

50 else: 

51 self.dist = dist 

52 self.dist_args = () 

53 

54 @property 

55 def se_obs(self): 

56 return np.sqrt(self.var_pred_mean + self.var_resid) 

57 

58 @property 

59 def se_mean(self): 

60 return np.sqrt(self.var_pred_mean) 

61 

62 def conf_int(self, obs=False, alpha=0.05): 

63 """ 

64 Returns the confidence interval of the value, `effect` of the 

65 constraint. 

66 

67 This is currently only available for t and z tests. 

68 

69 Parameters 

70 ---------- 

71 alpha : float, optional 

72 The significance level for the confidence interval. 

73 ie., The default `alpha` = .05 returns a 95% confidence interval. 

74 

75 Returns 

76 ------- 

77 ci : ndarray, (k_constraints, 2) 

78 The array has the lower and the upper limit of the confidence 

79 interval in the columns. 

80 """ 

81 

82 se = self.se_obs if obs else self.se_mean 

83 

84 q = self.dist.ppf(1 - alpha / 2., *self.dist_args) 

85 lower = self.predicted_mean - q * se 

86 upper = self.predicted_mean + q * se 

87 return np.column_stack((lower, upper)) 

88 

89 def summary_frame(self, what='all', alpha=0.05): 

90 # TODO: finish and cleanup 

91 import pandas as pd 

92 from collections import OrderedDict 

93 ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split 

94 ci_mean = self.conf_int(alpha=alpha, obs=False) 

95 to_include = OrderedDict() 

96 to_include['mean'] = self.predicted_mean 

97 to_include['mean_se'] = self.se_mean 

98 to_include['mean_ci_lower'] = ci_mean[:, 0] 

99 to_include['mean_ci_upper'] = ci_mean[:, 1] 

100 to_include['obs_ci_lower'] = ci_obs[:, 0] 

101 to_include['obs_ci_upper'] = ci_obs[:, 1] 

102 

103 self.table = to_include 

104 # OrderedDict does not work to preserve sequence 

105 # pandas dict does not handle 2d_array 

106 # data = np.column_stack(list(to_include.values())) 

107 # names = .... 

108 res = pd.DataFrame(to_include, index=self.row_labels, 

109 columns=to_include.keys()) 

110 return res 

111 

112 

113def get_prediction(self, exog=None, transform=True, weights=None, 

114 row_labels=None, pred_kwds=None): 

115 """ 

116 Compute prediction results. 

117 

118 Parameters 

119 ---------- 

120 exog : array_like, optional 

121 The values for which you want to predict. 

122 transform : bool, optional 

123 If the model was fit via a formula, do you want to pass 

124 exog through the formula. Default is True. E.g., if you fit 

125 a model y ~ log(x1) + log(x2), and transform is True, then 

126 you can pass a data structure that contains x1 and x2 in 

127 their original form. Otherwise, you'd need to log the data 

128 first. 

129 weights : array_like, optional 

130 Weights interpreted as in WLS, used for the variance of the predicted 

131 residual. 

132 row_labels : list 

133 A list of row labels to use. If not provided, read `exog` is 

134 available. 

135 **kwargs 

136 Some models can take additional keyword arguments, see the predict 

137 method of the model for the details. 

138 

139 Returns 

140 ------- 

141 linear_model.PredictionResults 

142 The prediction results instance contains prediction and prediction 

143 variance and can on demand calculate confidence intervals and summary 

144 tables for the prediction of the mean and of new observations. 

145 """ 

146 

147 # prepare exog and row_labels, based on base Results.predict 

148 if transform and hasattr(self.model, 'formula') and exog is not None: 

149 from patsy import dmatrix 

150 exog = dmatrix(self.model.data.design_info, exog) 

151 

152 if exog is not None: 

153 if row_labels is None: 

154 row_labels = getattr(exog, 'index', None) 

155 if callable(row_labels): 

156 row_labels = None 

157 

158 exog = np.asarray(exog) 

159 if exog.ndim == 1 and (self.model.exog.ndim == 1 or 

160 self.model.exog.shape[1] == 1): 

161 exog = exog[:, None] 

162 exog = np.atleast_2d(exog) # needed in count model shape[1] 

163 else: 

164 exog = self.model.exog 

165 if weights is None: 

166 weights = getattr(self.model, 'weights', None) 

167 

168 if row_labels is None: 

169 row_labels = getattr(self.model.data, 'row_labels', None) 

170 

171 # need to handle other arrays, TODO: is delegating to model possible ? 

172 if weights is not None: 

173 weights = np.asarray(weights) 

174 if (weights.size > 1 and 

175 (weights.ndim != 1 or weights.shape[0] == exog.shape[1])): 

176 raise ValueError('weights has wrong shape') 

177 

178 if pred_kwds is None: 

179 pred_kwds = {} 

180 predicted_mean = self.model.predict(self.params, exog, **pred_kwds) 

181 

182 covb = self.cov_params() 

183 var_pred_mean = (exog * np.dot(covb, exog.T).T).sum(1) 

184 var_resid = self.scale # self.mse_resid / weights 

185 

186 # TODO: check that we have correct scale, Refactor scale #??? 

187 # special case for now: 

188 if self.cov_type == 'fixed scale': 

189 var_resid = self.cov_kwds['scale'] 

190 

191 if weights is not None: 

192 var_resid /= weights 

193 

194 dist = ['norm', 't'][self.use_t] 

195 return PredictionResults(predicted_mean, var_pred_mean, var_resid, 

196 df=self.df_resid, dist=dist, 

197 row_labels=row_labels)