Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/regression/_prediction.py : 13%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3Created on Fri Dec 19 11:29:18 2014
5Author: Josef Perktold
6License: BSD-3
8"""
10import numpy as np
11from scipy import stats
14# this is similar to ContrastResults after t_test, copied and adjusted
15class PredictionResults(object):
16 """
17 Results class for predictions.
19 Parameters
20 ----------
21 predicted_mean : ndarray
22 The array containing the prediction means.
23 var_pred_mean : ndarray
24 The array of the variance of the prediction means.
25 var_resid : ndarray
26 The array of residual variances.
27 df : int
28 The degree of freedom used if dist is 't'.
29 dist : {'norm', 't', object}
30 Either a string for the normal or t distribution or another object
31 that exposes a `ppf` method.
32 row_labels : list[str]
33 Row labels used in summary frame.
34 """
36 def __init__(self, predicted_mean, var_pred_mean, var_resid,
37 df=None, dist=None, row_labels=None):
38 self.predicted_mean = predicted_mean
39 self.var_pred_mean = var_pred_mean
40 self.df = df
41 self.var_resid = var_resid
42 self.row_labels = row_labels
44 if dist is None or dist == 'norm':
45 self.dist = stats.norm
46 self.dist_args = ()
47 elif dist == 't':
48 self.dist = stats.t
49 self.dist_args = (self.df,)
50 else:
51 self.dist = dist
52 self.dist_args = ()
54 @property
55 def se_obs(self):
56 return np.sqrt(self.var_pred_mean + self.var_resid)
58 @property
59 def se_mean(self):
60 return np.sqrt(self.var_pred_mean)
62 def conf_int(self, obs=False, alpha=0.05):
63 """
64 Returns the confidence interval of the value, `effect` of the
65 constraint.
67 This is currently only available for t and z tests.
69 Parameters
70 ----------
71 alpha : float, optional
72 The significance level for the confidence interval.
73 ie., The default `alpha` = .05 returns a 95% confidence interval.
75 Returns
76 -------
77 ci : ndarray, (k_constraints, 2)
78 The array has the lower and the upper limit of the confidence
79 interval in the columns.
80 """
82 se = self.se_obs if obs else self.se_mean
84 q = self.dist.ppf(1 - alpha / 2., *self.dist_args)
85 lower = self.predicted_mean - q * se
86 upper = self.predicted_mean + q * se
87 return np.column_stack((lower, upper))
89 def summary_frame(self, what='all', alpha=0.05):
90 # TODO: finish and cleanup
91 import pandas as pd
92 from collections import OrderedDict
93 ci_obs = self.conf_int(alpha=alpha, obs=True) # need to split
94 ci_mean = self.conf_int(alpha=alpha, obs=False)
95 to_include = OrderedDict()
96 to_include['mean'] = self.predicted_mean
97 to_include['mean_se'] = self.se_mean
98 to_include['mean_ci_lower'] = ci_mean[:, 0]
99 to_include['mean_ci_upper'] = ci_mean[:, 1]
100 to_include['obs_ci_lower'] = ci_obs[:, 0]
101 to_include['obs_ci_upper'] = ci_obs[:, 1]
103 self.table = to_include
104 # OrderedDict does not work to preserve sequence
105 # pandas dict does not handle 2d_array
106 # data = np.column_stack(list(to_include.values()))
107 # names = ....
108 res = pd.DataFrame(to_include, index=self.row_labels,
109 columns=to_include.keys())
110 return res
113def get_prediction(self, exog=None, transform=True, weights=None,
114 row_labels=None, pred_kwds=None):
115 """
116 Compute prediction results.
118 Parameters
119 ----------
120 exog : array_like, optional
121 The values for which you want to predict.
122 transform : bool, optional
123 If the model was fit via a formula, do you want to pass
124 exog through the formula. Default is True. E.g., if you fit
125 a model y ~ log(x1) + log(x2), and transform is True, then
126 you can pass a data structure that contains x1 and x2 in
127 their original form. Otherwise, you'd need to log the data
128 first.
129 weights : array_like, optional
130 Weights interpreted as in WLS, used for the variance of the predicted
131 residual.
132 row_labels : list
133 A list of row labels to use. If not provided, read `exog` is
134 available.
135 **kwargs
136 Some models can take additional keyword arguments, see the predict
137 method of the model for the details.
139 Returns
140 -------
141 linear_model.PredictionResults
142 The prediction results instance contains prediction and prediction
143 variance and can on demand calculate confidence intervals and summary
144 tables for the prediction of the mean and of new observations.
145 """
147 # prepare exog and row_labels, based on base Results.predict
148 if transform and hasattr(self.model, 'formula') and exog is not None:
149 from patsy import dmatrix
150 exog = dmatrix(self.model.data.design_info, exog)
152 if exog is not None:
153 if row_labels is None:
154 row_labels = getattr(exog, 'index', None)
155 if callable(row_labels):
156 row_labels = None
158 exog = np.asarray(exog)
159 if exog.ndim == 1 and (self.model.exog.ndim == 1 or
160 self.model.exog.shape[1] == 1):
161 exog = exog[:, None]
162 exog = np.atleast_2d(exog) # needed in count model shape[1]
163 else:
164 exog = self.model.exog
165 if weights is None:
166 weights = getattr(self.model, 'weights', None)
168 if row_labels is None:
169 row_labels = getattr(self.model.data, 'row_labels', None)
171 # need to handle other arrays, TODO: is delegating to model possible ?
172 if weights is not None:
173 weights = np.asarray(weights)
174 if (weights.size > 1 and
175 (weights.ndim != 1 or weights.shape[0] == exog.shape[1])):
176 raise ValueError('weights has wrong shape')
178 if pred_kwds is None:
179 pred_kwds = {}
180 predicted_mean = self.model.predict(self.params, exog, **pred_kwds)
182 covb = self.cov_params()
183 var_pred_mean = (exog * np.dot(covb, exog.T).T).sum(1)
184 var_resid = self.scale # self.mse_resid / weights
186 # TODO: check that we have correct scale, Refactor scale #???
187 # special case for now:
188 if self.cov_type == 'fixed scale':
189 var_resid = self.cov_kwds['scale']
191 if weights is not None:
192 var_resid /= weights
194 dist = ['norm', 't'][self.use_t]
195 return PredictionResults(predicted_mean, var_pred_mean, var_resid,
196 df=self.df_resid, dist=dist,
197 row_labels=row_labels)