Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Empirical likelihood inference on descriptive statistics 

3 

4This module conducts hypothesis tests and constructs confidence 

5intervals for the mean, variance, skewness, kurtosis and correlation. 

6 

7If matplotlib is installed, this module can also generate multivariate 

8confidence region plots as well as mean-variance contour plots. 

9 

10See _OptFuncts docstring for technical details and optimization variable 

11definitions. 

12 

13General References: 

14------------------ 

15Owen, A. (2001). "Empirical Likelihood." Chapman and Hall 

16 

17""" 

18import numpy as np 

19from scipy import optimize 

20from scipy.stats import chi2, skew, kurtosis 

21from statsmodels.base.optimizer import _fit_newton 

22import itertools 

23from statsmodels.graphics import utils 

24 

25 

26def DescStat(endog): 

27 """ 

28 Returns an instance to conduct inference on descriptive statistics 

29 via empirical likelihood. See DescStatUV and DescStatMV for more 

30 information. 

31 

32 Parameters 

33 ---------- 

34 endog : ndarray 

35 Array of data 

36 

37 Returns : DescStat instance 

38 If k=1, the function returns a univariate instance, DescStatUV. 

39 If k>1, the function returns a multivariate instance, DescStatMV. 

40 """ 

41 if endog.ndim == 1: 

42 endog = endog.reshape(len(endog), 1) 

43 if endog.shape[1] == 1: 

44 return DescStatUV(endog) 

45 if endog.shape[1] > 1: 

46 return DescStatMV(endog) 

47 

48 

49class _OptFuncts(object): 

50 """ 

51 A class that holds functions that are optimized/solved. 

52 

53 The general setup of the class is simple. Any method that starts with 

54 _opt_ creates a vector of estimating equations named est_vect such that 

55 np.dot(p, (est_vect))=0 where p is the weight on each 

56 observation as a 1 x n array and est_vect is n x k. Then _modif_Newton is 

57 called to determine the optimal p by solving for the Lagrange multiplier 

58 (eta) in the profile likelihood maximization problem. In the presence 

59 of nuisance parameters, _opt_ functions are optimized over to profile 

60 out the nuisance parameters. 

61 

62 Any method starting with _ci_limits calculates the log likelihood 

63 ratio for a specific value of a parameter and then subtracts a 

64 pre-specified critical value. This is solved so that llr - crit = 0. 

65 """ 

66 

67 def __init__(self, endog): 

68 pass 

69 

70 def _log_star(self, eta, est_vect, weights, nobs): 

71 """ 

72 Transforms the log of observation probabilities in terms of the 

73 Lagrange multiplier to the log 'star' of the probabilities. 

74 

75 Parameters 

76 ---------- 

77 eta : float 

78 Lagrange multiplier 

79 

80 est_vect : ndarray (n,k) 

81 Estimating equations vector 

82 

83 wts : nx1 array 

84 Observation weights 

85 

86 Returns 

87 ------ 

88 data_star : ndarray 

89 The weighted logstar of the estimting equations 

90 

91 Notes 

92 ----- 

93 This function is only a placeholder for the _fit_Newton. 

94 The function value is not used in optimization and the optimal value 

95 is disregarded when computing the log likelihood ratio. 

96 """ 

97 data_star = np.log(weights) + (np.sum(weights) +\ 

98 np.dot(est_vect, eta)) 

99 idx = data_star < 1. / nobs 

100 not_idx = ~idx 

101 nx = nobs * data_star[idx] 

102 data_star[idx] = np.log(1. / nobs) - 1.5 + nx * (2. - nx / 2) 

103 data_star[not_idx] = np.log(data_star[not_idx]) 

104 return data_star 

105 

106 def _hess(self, eta, est_vect, weights, nobs): 

107 """ 

108 Calculates the hessian of a weighted empirical likelihood 

109 problem. 

110 

111 Parameters 

112 ---------- 

113 eta : ndarray, (1,m) 

114 Lagrange multiplier in the profile likelihood maximization 

115 

116 est_vect : ndarray (n,k) 

117 Estimating equations vector 

118 

119 weights : 1darray 

120 Observation weights 

121 

122 Returns 

123 ------- 

124 hess : m x m array 

125 Weighted hessian used in _wtd_modif_newton 

126 """ 

127 #eta = np.squeeze(eta) 

128 data_star_doub_prime = np.sum(weights) + np.dot(est_vect, eta) 

129 idx = data_star_doub_prime < 1. / nobs 

130 not_idx = ~idx 

131 data_star_doub_prime[idx] = - nobs ** 2 

132 data_star_doub_prime[not_idx] = - (data_star_doub_prime[not_idx]) ** -2 

133 wtd_dsdp = weights * data_star_doub_prime 

134 return np.dot(est_vect.T, wtd_dsdp[:, None] * est_vect) 

135 

136 def _grad(self, eta, est_vect, weights, nobs): 

137 """ 

138 Calculates the gradient of a weighted empirical likelihood 

139 problem 

140 

141 Parameters 

142 ---------- 

143 eta : ndarray, (1,m) 

144 Lagrange multiplier in the profile likelihood maximization 

145 

146 est_vect : ndarray, (n,k) 

147 Estimating equations vector 

148 

149 weights : 1darray 

150 Observation weights 

151 

152 Returns 

153 ------- 

154 gradient : ndarray (m,1) 

155 The gradient used in _wtd_modif_newton 

156 """ 

157 #eta = np.squeeze(eta) 

158 data_star_prime = np.sum(weights) + np.dot(est_vect, eta) 

159 idx = data_star_prime < 1. / nobs 

160 not_idx = ~idx 

161 data_star_prime[idx] = nobs * (2 - nobs * data_star_prime[idx]) 

162 data_star_prime[not_idx] = 1. / data_star_prime[not_idx] 

163 return np.dot(weights * data_star_prime, est_vect) 

164 

165 def _modif_newton(self, eta, est_vect, weights): 

166 """ 

167 Modified Newton's method for maximizing the log 'star' equation. This 

168 function calls _fit_newton to find the optimal values of eta. 

169 

170 Parameters 

171 ---------- 

172 eta : ndarray, (1,m) 

173 Lagrange multiplier in the profile likelihood maximization 

174 

175 est_vect : ndarray, (n,k) 

176 Estimating equations vector 

177 

178 weights : 1darray 

179 Observation weights 

180 

181 Returns 

182 ------- 

183 params : 1xm array 

184 Lagrange multiplier that maximizes the log-likelihood 

185 """ 

186 nobs = len(est_vect) 

187 f = lambda x0: - np.sum(self._log_star(x0, est_vect, weights, nobs)) 

188 grad = lambda x0: - self._grad(x0, est_vect, weights, nobs) 

189 hess = lambda x0: - self._hess(x0, est_vect, weights, nobs) 

190 kwds = {'tol': 1e-8} 

191 eta = eta.squeeze() 

192 res = _fit_newton(f, grad, eta, (), kwds, hess=hess, maxiter=50, \ 

193 disp=0) 

194 return res[0] 

195 

196 def _find_eta(self, eta): 

197 """ 

198 Finding the root of sum(xi-h0)/(1+eta(xi-mu)) solves for 

199 eta when computing ELR for univariate mean. 

200 

201 Parameters 

202 ---------- 

203 eta : float 

204 Lagrange multiplier in the empirical likelihood maximization 

205 

206 Returns 

207 ------- 

208 llr : float 

209 n times the log likelihood value for a given value of eta 

210 """ 

211 return np.sum((self.endog - self.mu0) / \ 

212 (1. + eta * (self.endog - self.mu0))) 

213 

214 def _ci_limits_mu(self, mu): 

215 """ 

216 Calculates the difference between the log likelihood of mu_test and a 

217 specified critical value. 

218 

219 Parameters 

220 ---------- 

221 mu : float 

222 Hypothesized value of the mean. 

223 

224 Returns 

225 ------- 

226 diff : float 

227 The difference between the log likelihood value of mu0 and 

228 a specified value. 

229 """ 

230 return self.test_mean(mu)[0] - self.r0 

231 

232 def _find_gamma(self, gamma): 

233 """ 

234 Finds gamma that satisfies 

235 sum(log(n * w(gamma))) - log(r0) = 0 

236 

237 Used for confidence intervals for the mean 

238 

239 Parameters 

240 ---------- 

241 gamma : float 

242 Lagrange multiplier when computing confidence interval 

243 

244 Returns 

245 ------- 

246 diff : float 

247 The difference between the log-liklihood when the Lagrange 

248 multiplier is gamma and a pre-specified value 

249 """ 

250 denom = np.sum((self.endog - gamma) ** -1) 

251 new_weights = (self.endog - gamma) ** -1 / denom 

252 return -2 * np.sum(np.log(self.nobs * new_weights)) - \ 

253 self.r0 

254 

255 def _opt_var(self, nuisance_mu, pval=False): 

256 """ 

257 This is the function to be optimized over a nuisance mean parameter 

258 to determine the likelihood ratio for the variance 

259 

260 Parameters 

261 ---------- 

262 nuisance_mu : float 

263 Value of a nuisance mean parameter 

264 

265 Returns 

266 ------- 

267 llr : float 

268 Log likelihood of a pre-specified variance holding the nuisance 

269 parameter constant 

270 """ 

271 endog = self.endog 

272 nobs = self.nobs 

273 sig_data = ((endog - nuisance_mu) ** 2 \ 

274 - self.sig2_0) 

275 mu_data = (endog - nuisance_mu) 

276 est_vect = np.column_stack((mu_data, sig_data)) 

277 eta_star = self._modif_newton(np.array([1. / nobs, 

278 1. / nobs]), est_vect, 

279 np.ones(nobs) * (1. / nobs)) 

280 

281 denom = 1 + np.dot(eta_star, est_vect.T) 

282 self.new_weights = 1. / nobs * 1. / denom 

283 llr = np.sum(np.log(nobs * self.new_weights)) 

284 if pval: # Used for contour plotting 

285 return chi2.sf(-2 * llr, 1) 

286 return -2 * llr 

287 

288 def _ci_limits_var(self, var): 

289 """ 

290 Used to determine the confidence intervals for the variance. 

291 It calls test_var and when called by an optimizer, 

292 finds the value of sig2_0 that is chi2.ppf(significance-level) 

293 

294 Parameters 

295 ---------- 

296 var_test : float 

297 Hypothesized value of the variance 

298 

299 Returns 

300 ------- 

301 diff : float 

302 The difference between the log likelihood ratio at var_test and a 

303 pre-specified value. 

304 """ 

305 return self.test_var(var)[0] - self.r0 

306 

307 def _opt_skew(self, nuis_params): 

308 """ 

309 Called by test_skew. This function is optimized over 

310 nuisance parameters mu and sigma 

311 

312 Parameters 

313 ---------- 

314 nuis_params : 1darray 

315 An array with a nuisance mean and variance parameter 

316 

317 Returns 

318 ------- 

319 llr : float 

320 The log likelihood ratio of a pre-specified skewness holding 

321 the nuisance parameters constant. 

322 """ 

323 endog = self.endog 

324 nobs = self.nobs 

325 mu_data = endog - nuis_params[0] 

326 sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1] 

327 skew_data = ((((endog - nuis_params[0]) ** 3) / 

328 (nuis_params[1] ** 1.5))) - self.skew0 

329 est_vect = np.column_stack((mu_data, sig_data, skew_data)) 

330 eta_star = self._modif_newton(np.array([1. / nobs, 

331 1. / nobs, 

332 1. / nobs]), est_vect, 

333 np.ones(nobs) * (1. / nobs)) 

334 denom = 1. + np.dot(eta_star, est_vect.T) 

335 self.new_weights = 1. / nobs * 1. / denom 

336 llr = np.sum(np.log(nobs * self.new_weights)) 

337 return -2 * llr 

338 

339 def _opt_kurt(self, nuis_params): 

340 """ 

341 Called by test_kurt. This function is optimized over 

342 nuisance parameters mu and sigma 

343 

344 Parameters 

345 ---------- 

346 nuis_params : 1darray 

347 An array with a nuisance mean and variance parameter 

348 

349 Returns 

350 ------- 

351 llr : float 

352 The log likelihood ratio of a pre-speified kurtosis holding the 

353 nuisance parameters constant 

354 """ 

355 endog = self.endog 

356 nobs = self.nobs 

357 mu_data = endog - nuis_params[0] 

358 sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1] 

359 kurt_data = (((((endog - nuis_params[0]) ** 4) / \ 

360 (nuis_params[1] ** 2))) - 3) - self.kurt0 

361 est_vect = np.column_stack((mu_data, sig_data, kurt_data)) 

362 eta_star = self._modif_newton(np.array([1. / nobs, 

363 1. / nobs, 

364 1. / nobs]), est_vect, 

365 np.ones(nobs) * (1. / nobs)) 

366 denom = 1 + np.dot(eta_star, est_vect.T) 

367 self.new_weights = 1. / nobs * 1. / denom 

368 llr = np.sum(np.log(nobs * self.new_weights)) 

369 return -2 * llr 

370 

371 def _opt_skew_kurt(self, nuis_params): 

372 """ 

373 Called by test_joint_skew_kurt. This function is optimized over 

374 nuisance parameters mu and sigma 

375 

376 Parameters 

377 ---------- 

378 nuis_params : 1darray 

379 An array with a nuisance mean and variance parameter 

380 

381 Returns 

382 ------ 

383 llr : float 

384 The log likelihood ratio of a pre-speified skewness and 

385 kurtosis holding the nuisance parameters constant. 

386 """ 

387 endog = self.endog 

388 nobs = self.nobs 

389 mu_data = endog - nuis_params[0] 

390 sig_data = ((endog - nuis_params[0]) ** 2) - nuis_params[1] 

391 skew_data = ((((endog - nuis_params[0]) ** 3) / \ 

392 (nuis_params[1] ** 1.5))) - self.skew0 

393 kurt_data = (((((endog - nuis_params[0]) ** 4) / \ 

394 (nuis_params[1] ** 2))) - 3) - self.kurt0 

395 est_vect = np.column_stack((mu_data, sig_data, skew_data, kurt_data)) 

396 eta_star = self._modif_newton(np.array([1. / nobs, 

397 1. / nobs, 

398 1. / nobs, 

399 1. / nobs]), est_vect, 

400 np.ones(nobs) * (1. / nobs)) 

401 denom = 1. + np.dot(eta_star, est_vect.T) 

402 self.new_weights = 1. / nobs * 1. / denom 

403 llr = np.sum(np.log(nobs * self.new_weights)) 

404 return -2 * llr 

405 

406 def _ci_limits_skew(self, skew): 

407 """ 

408 Parameters 

409 ---------- 

410 skew0 : float 

411 Hypothesized value of skewness 

412 

413 Returns 

414 ------- 

415 diff : float 

416 The difference between the log likelihood ratio at skew and a 

417 pre-specified value. 

418 """ 

419 return self.test_skew(skew)[0] - self.r0 

420 

421 def _ci_limits_kurt(self, kurt): 

422 """ 

423 Parameters 

424 ---------- 

425 skew0 : float 

426 Hypothesized value of kurtosis 

427 

428 Returns 

429 ------- 

430 diff : float 

431 The difference between the log likelihood ratio at kurt and a 

432 pre-specified value. 

433 """ 

434 return self.test_kurt(kurt)[0] - self.r0 

435 

436 def _opt_correl(self, nuis_params, corr0, endog, nobs, x0, weights0): 

437 """ 

438 Parameters 

439 ---------- 

440 nuis_params : 1darray 

441 Array containing two nuisance means and two nuisance variances 

442 

443 Returns 

444 ------- 

445 llr : float 

446 The log-likelihood of the correlation coefficient holding nuisance 

447 parameters constant 

448 """ 

449 mu1_data, mu2_data = (endog - nuis_params[::2]).T 

450 sig1_data = mu1_data ** 2 - nuis_params[1] 

451 sig2_data = mu2_data ** 2 - nuis_params[3] 

452 correl_data = ((mu1_data * mu2_data) - corr0 * 

453 (nuis_params[1] * nuis_params[3]) ** .5) 

454 est_vect = np.column_stack((mu1_data, sig1_data, 

455 mu2_data, sig2_data, correl_data)) 

456 eta_star = self._modif_newton(x0, est_vect, weights0) 

457 denom = 1. + np.dot(est_vect, eta_star) 

458 self.new_weights = 1. / nobs * 1. / denom 

459 llr = np.sum(np.log(nobs * self.new_weights)) 

460 return -2 * llr 

461 

462 def _ci_limits_corr(self, corr): 

463 return self.test_corr(corr)[0] - self.r0 

464 

465 

466class DescStatUV(_OptFuncts): 

467 """ 

468 A class to compute confidence intervals and hypothesis tests involving 

469 mean, variance, kurtosis and skewness of a univariate random variable. 

470 

471 Parameters 

472 ---------- 

473 endog : 1darray 

474 Data to be analyzed 

475 

476 Attributes 

477 ---------- 

478 endog : 1darray 

479 Data to be analyzed 

480 

481 nobs : float 

482 Number of observations 

483 """ 

484 

485 def __init__(self, endog): 

486 self.endog = np.squeeze(endog) 

487 self.nobs = endog.shape[0] 

488 

489 def test_mean(self, mu0, return_weights=False): 

490 """ 

491 Returns - 2 x log-likelihood ratio, p-value and weights 

492 for a hypothesis test of the mean. 

493 

494 Parameters 

495 ---------- 

496 mu0 : float 

497 Mean value to be tested 

498 

499 return_weights : bool 

500 If return_weights is True the function returns 

501 the weights of the observations under the null hypothesis. 

502 Default is False 

503 

504 Returns 

505 ------- 

506 test_results : tuple 

507 The log-likelihood ratio and p-value of mu0 

508 """ 

509 self.mu0 = mu0 

510 endog = self.endog 

511 nobs = self.nobs 

512 eta_min = (1. - (1. / nobs)) / (self.mu0 - max(endog)) 

513 eta_max = (1. - (1. / nobs)) / (self.mu0 - min(endog)) 

514 eta_star = optimize.brentq(self._find_eta, eta_min, eta_max) 

515 new_weights = (1. / nobs) * 1. / (1. + eta_star * (endog - self.mu0)) 

516 llr = -2 * np.sum(np.log(nobs * new_weights)) 

517 if return_weights: 

518 return llr, chi2.sf(llr, 1), new_weights 

519 else: 

520 return llr, chi2.sf(llr, 1) 

521 

522 def ci_mean(self, sig=.05, method='gamma', epsilon=10 ** -8, 

523 gamma_low=-10 ** 10, gamma_high=10 ** 10): 

524 """ 

525 Returns the confidence interval for the mean. 

526 

527 Parameters 

528 ---------- 

529 sig : float 

530 significance level. Default is .05 

531 

532 method : str 

533 Root finding method, Can be 'nested-brent' or 

534 'gamma'. Default is 'gamma' 

535 

536 'gamma' Tries to solve for the gamma parameter in the 

537 Lagrange (see Owen pg 22) and then determine the weights. 

538 

539 'nested brent' uses brents method to find the confidence 

540 intervals but must maximize the likelihood ratio on every 

541 iteration. 

542 

543 gamma is generally much faster. If the optimizations does not 

544 converge, try expanding the gamma_high and gamma_low 

545 variable. 

546 

547 gamma_low : float 

548 Lower bound for gamma when finding lower limit. 

549 If function returns f(a) and f(b) must have different signs, 

550 consider lowering gamma_low. 

551 

552 gamma_high : float 

553 Upper bound for gamma when finding upper limit. 

554 If function returns f(a) and f(b) must have different signs, 

555 consider raising gamma_high. 

556 

557 epsilon : float 

558 When using 'nested-brent', amount to decrease (increase) 

559 from the maximum (minimum) of the data when 

560 starting the search. This is to protect against the 

561 likelihood ratio being zero at the maximum (minimum) 

562 value of the data. If data is very small in absolute value 

563 (<10 ``**`` -6) consider shrinking epsilon 

564 

565 When using 'gamma', amount to decrease (increase) the 

566 minimum (maximum) by to start the search for gamma. 

567 If function returns f(a) and f(b) must have different signs, 

568 consider lowering epsilon. 

569 

570 Returns 

571 ------- 

572 Interval : tuple 

573 Confidence interval for the mean 

574 """ 

575 endog = self.endog 

576 sig = 1 - sig 

577 if method == 'nested-brent': 

578 self.r0 = chi2.ppf(sig, 1) 

579 middle = np.mean(endog) 

580 epsilon_u = (max(endog) - np.mean(endog)) * epsilon 

581 epsilon_l = (np.mean(endog) - min(endog)) * epsilon 

582 ulim = optimize.brentq(self._ci_limits_mu, middle, 

583 max(endog) - epsilon_u) 

584 llim = optimize.brentq(self._ci_limits_mu, middle, 

585 min(endog) + epsilon_l) 

586 return llim, ulim 

587 

588 if method == 'gamma': 

589 self.r0 = chi2.ppf(sig, 1) 

590 gamma_star_l = optimize.brentq(self._find_gamma, gamma_low, 

591 min(endog) - epsilon) 

592 gamma_star_u = optimize.brentq(self._find_gamma, \ 

593 max(endog) + epsilon, gamma_high) 

594 weights_low = ((endog - gamma_star_l) ** -1) / \ 

595 np.sum((endog - gamma_star_l) ** -1) 

596 weights_high = ((endog - gamma_star_u) ** -1) / \ 

597 np.sum((endog - gamma_star_u) ** -1) 

598 mu_low = np.sum(weights_low * endog) 

599 mu_high = np.sum(weights_high * endog) 

600 return mu_low, mu_high 

601 

602 def test_var(self, sig2_0, return_weights=False): 

603 """ 

604 Returns -2 x log-likelihood ratio and the p-value for the 

605 hypothesized variance 

606 

607 Parameters 

608 ---------- 

609 sig2_0 : float 

610 Hypothesized variance to be tested 

611 

612 return_weights : bool 

613 If True, returns the weights that maximize the 

614 likelihood of observing sig2_0. Default is False 

615 

616 Returns 

617 ------- 

618 test_results : tuple 

619 The log-likelihood ratio and the p_value of sig2_0 

620 

621 Examples 

622 -------- 

623 >>> import numpy as np 

624 >>> import statsmodels.api as sm 

625 >>> random_numbers = np.random.standard_normal(1000)*100 

626 >>> el_analysis = sm.emplike.DescStat(random_numbers) 

627 >>> hyp_test = el_analysis.test_var(9500) 

628 """ 

629 self.sig2_0 = sig2_0 

630 mu_max = max(self.endog) 

631 mu_min = min(self.endog) 

632 llr = optimize.fminbound(self._opt_var, mu_min, mu_max, \ 

633 full_output=1)[1] 

634 p_val = chi2.sf(llr, 1) 

635 if return_weights: 

636 return llr, p_val, self.new_weights.T 

637 else: 

638 return llr, p_val 

639 

640 def ci_var(self, lower_bound=None, upper_bound=None, sig=.05): 

641 """ 

642 Returns the confidence interval for the variance. 

643 

644 Parameters 

645 ---------- 

646 lower_bound : float 

647 The minimum value the lower confidence interval can 

648 take. The p-value from test_var(lower_bound) must be lower 

649 than 1 - significance level. Default is .99 confidence 

650 limit assuming normality 

651 

652 upper_bound : float 

653 The maximum value the upper confidence interval 

654 can take. The p-value from test_var(upper_bound) must be lower 

655 than 1 - significance level. Default is .99 confidence 

656 limit assuming normality 

657 

658 sig : float 

659 The significance level. Default is .05 

660 

661 Returns 

662 ------- 

663 Interval : tuple 

664 Confidence interval for the variance 

665 

666 Examples 

667 -------- 

668 >>> import numpy as np 

669 >>> import statsmodels.api as sm 

670 >>> random_numbers = np.random.standard_normal(100) 

671 >>> el_analysis = sm.emplike.DescStat(random_numbers) 

672 >>> el_analysis.ci_var() 

673 (0.7539322567470305, 1.229998852496268) 

674 >>> el_analysis.ci_var(.5, 2) 

675 (0.7539322567469926, 1.2299988524962664) 

676 

677 Notes 

678 ----- 

679 If the function returns the error f(a) and f(b) must have 

680 different signs, consider lowering lower_bound and raising 

681 upper_bound. 

682 """ 

683 endog = self.endog 

684 if upper_bound is None: 

685 upper_bound = ((self.nobs - 1) * endog.var()) / \ 

686 (chi2.ppf(.0001, self.nobs - 1)) 

687 if lower_bound is None: 

688 lower_bound = ((self.nobs - 1) * endog.var()) / \ 

689 (chi2.ppf(.9999, self.nobs - 1)) 

690 self.r0 = chi2.ppf(1 - sig, 1) 

691 llim = optimize.brentq(self._ci_limits_var, lower_bound, endog.var()) 

692 ulim = optimize.brentq(self._ci_limits_var, endog.var(), upper_bound) 

693 return llim, ulim 

694 

695 def plot_contour(self, mu_low, mu_high, var_low, var_high, mu_step, 

696 var_step, 

697 levs=[.2, .1, .05, .01, .001]): 

698 """ 

699 Returns a plot of the confidence region for a univariate 

700 mean and variance. 

701 

702 Parameters 

703 ---------- 

704 mu_low : float 

705 Lowest value of the mean to plot 

706 

707 mu_high : float 

708 Highest value of the mean to plot 

709 

710 var_low : float 

711 Lowest value of the variance to plot 

712 

713 var_high : float 

714 Highest value of the variance to plot 

715 

716 mu_step : float 

717 Increments to evaluate the mean 

718 

719 var_step : float 

720 Increments to evaluate the mean 

721 

722 levs : list 

723 Which values of significance the contour lines will be drawn. 

724 Default is [.2, .1, .05, .01, .001] 

725 

726 Returns 

727 ------- 

728 Figure 

729 The contour plot 

730 """ 

731 fig, ax = utils.create_mpl_ax() 

732 ax.set_ylabel('Variance') 

733 ax.set_xlabel('Mean') 

734 mu_vect = list(np.arange(mu_low, mu_high, mu_step)) 

735 var_vect = list(np.arange(var_low, var_high, var_step)) 

736 z = [] 

737 for sig0 in var_vect: 

738 self.sig2_0 = sig0 

739 for mu0 in mu_vect: 

740 z.append(self._opt_var(mu0, pval=True)) 

741 z = np.asarray(z).reshape(len(var_vect), len(mu_vect)) 

742 ax.contour(mu_vect, var_vect, z, levels=levs) 

743 return fig 

744 

745 def test_skew(self, skew0, return_weights=False): 

746 """ 

747 Returns -2 x log-likelihood and p-value for the hypothesized 

748 skewness. 

749 

750 Parameters 

751 ---------- 

752 skew0 : float 

753 Skewness value to be tested 

754 

755 return_weights : bool 

756 If True, function also returns the weights that 

757 maximize the likelihood ratio. Default is False. 

758 

759 Returns 

760 ------- 

761 test_results : tuple 

762 The log-likelihood ratio and p_value of skew0 

763 """ 

764 self.skew0 = skew0 

765 start_nuisance = np.array([self.endog.mean(), 

766 self.endog.var()]) 

767 

768 llr = optimize.fmin_powell(self._opt_skew, start_nuisance, 

769 full_output=1, disp=0)[1] 

770 p_val = chi2.sf(llr, 1) 

771 if return_weights: 

772 return llr, p_val, self.new_weights.T 

773 return llr, p_val 

774 

775 def test_kurt(self, kurt0, return_weights=False): 

776 """ 

777 Returns -2 x log-likelihood and the p-value for the hypothesized 

778 kurtosis. 

779 

780 Parameters 

781 ---------- 

782 kurt0 : float 

783 Kurtosis value to be tested 

784 

785 return_weights : bool 

786 If True, function also returns the weights that 

787 maximize the likelihood ratio. Default is False. 

788 

789 Returns 

790 ------- 

791 test_results : tuple 

792 The log-likelihood ratio and p-value of kurt0 

793 """ 

794 self.kurt0 = kurt0 

795 start_nuisance = np.array([self.endog.mean(), 

796 self.endog.var()]) 

797 

798 llr = optimize.fmin_powell(self._opt_kurt, start_nuisance, 

799 full_output=1, disp=0)[1] 

800 p_val = chi2.sf(llr, 1) 

801 if return_weights: 

802 return llr, p_val, self.new_weights.T 

803 return llr, p_val 

804 

805 def test_joint_skew_kurt(self, skew0, kurt0, return_weights=False): 

806 """ 

807 Returns - 2 x log-likelihood and the p-value for the joint 

808 hypothesis test for skewness and kurtosis 

809 

810 Parameters 

811 ---------- 

812 skew0 : float 

813 Skewness value to be tested 

814 kurt0 : float 

815 Kurtosis value to be tested 

816 

817 return_weights : bool 

818 If True, function also returns the weights that 

819 maximize the likelihood ratio. Default is False. 

820 

821 Returns 

822 ------- 

823 test_results : tuple 

824 The log-likelihood ratio and p-value of the joint hypothesis test. 

825 """ 

826 self.skew0 = skew0 

827 self.kurt0 = kurt0 

828 start_nuisance = np.array([self.endog.mean(), 

829 self.endog.var()]) 

830 

831 llr = optimize.fmin_powell(self._opt_skew_kurt, start_nuisance, 

832 full_output=1, disp=0)[1] 

833 p_val = chi2.sf(llr, 2) 

834 if return_weights: 

835 return llr, p_val, self.new_weights.T 

836 return llr, p_val 

837 

838 def ci_skew(self, sig=.05, upper_bound=None, lower_bound=None): 

839 """ 

840 Returns the confidence interval for skewness. 

841 

842 Parameters 

843 ---------- 

844 sig : float 

845 The significance level. Default is .05 

846 

847 upper_bound : float 

848 Maximum value of skewness the upper limit can be. 

849 Default is .99 confidence limit assuming normality. 

850 

851 lower_bound : float 

852 Minimum value of skewness the lower limit can be. 

853 Default is .99 confidence level assuming normality. 

854 

855 Returns 

856 ------- 

857 Interval : tuple 

858 Confidence interval for the skewness 

859 

860 Notes 

861 ----- 

862 If function returns f(a) and f(b) must have different signs, consider 

863 expanding lower and upper bounds 

864 """ 

865 nobs = self.nobs 

866 endog = self.endog 

867 if upper_bound is None: 

868 upper_bound = skew(endog) + \ 

869 2.5 * ((6. * nobs * (nobs - 1.)) / \ 

870 ((nobs - 2.) * (nobs + 1.) * \ 

871 (nobs + 3.))) ** .5 

872 if lower_bound is None: 

873 lower_bound = skew(endog) - \ 

874 2.5 * ((6. * nobs * (nobs - 1.)) / \ 

875 ((nobs - 2.) * (nobs + 1.) * \ 

876 (nobs + 3.))) ** .5 

877 self.r0 = chi2.ppf(1 - sig, 1) 

878 llim = optimize.brentq(self._ci_limits_skew, lower_bound, skew(endog)) 

879 ulim = optimize.brentq(self._ci_limits_skew, skew(endog), upper_bound) 

880 return llim, ulim 

881 

882 def ci_kurt(self, sig=.05, upper_bound=None, lower_bound=None): 

883 """ 

884 Returns the confidence interval for kurtosis. 

885 

886 Parameters 

887 ---------- 

888 

889 sig : float 

890 The significance level. Default is .05 

891 

892 upper_bound : float 

893 Maximum value of kurtosis the upper limit can be. 

894 Default is .99 confidence limit assuming normality. 

895 

896 lower_bound : float 

897 Minimum value of kurtosis the lower limit can be. 

898 Default is .99 confidence limit assuming normality. 

899 

900 Returns 

901 ------- 

902 Interval : tuple 

903 Lower and upper confidence limit 

904 

905 Notes 

906 ----- 

907 For small n, upper_bound and lower_bound may have to be 

908 provided by the user. Consider using test_kurt to find 

909 values close to the desired significance level. 

910 

911 If function returns f(a) and f(b) must have different signs, consider 

912 expanding the bounds. 

913 """ 

914 endog = self.endog 

915 nobs = self.nobs 

916 if upper_bound is None: 

917 upper_bound = kurtosis(endog) + \ 

918 (2.5 * (2. * ((6. * nobs * (nobs - 1.)) / \ 

919 ((nobs - 2.) * (nobs + 1.) * \ 

920 (nobs + 3.))) ** .5) * \ 

921 (((nobs ** 2.) - 1.) / ((nobs - 3.) *\ 

922 (nobs + 5.))) ** .5) 

923 if lower_bound is None: 

924 lower_bound = kurtosis(endog) - \ 

925 (2.5 * (2. * ((6. * nobs * (nobs - 1.)) / \ 

926 ((nobs - 2.) * (nobs + 1.) * \ 

927 (nobs + 3.))) ** .5) * \ 

928 (((nobs ** 2.) - 1.) / ((nobs - 3.) *\ 

929 (nobs + 5.))) ** .5) 

930 self.r0 = chi2.ppf(1 - sig, 1) 

931 llim = optimize.brentq(self._ci_limits_kurt, lower_bound, \ 

932 kurtosis(endog)) 

933 ulim = optimize.brentq(self._ci_limits_kurt, kurtosis(endog), \ 

934 upper_bound) 

935 return llim, ulim 

936 

937 

938class DescStatMV(_OptFuncts): 

939 """ 

940 A class for conducting inference on multivariate means and correlation. 

941 

942 Parameters 

943 ---------- 

944 endog : ndarray 

945 Data to be analyzed 

946 

947 Attributes 

948 ---------- 

949 endog : ndarray 

950 Data to be analyzed 

951 

952 nobs : float 

953 Number of observations 

954 """ 

955 

956 def __init__(self, endog): 

957 self.endog = endog 

958 self.nobs = endog.shape[0] 

959 

960 def mv_test_mean(self, mu_array, return_weights=False): 

961 """ 

962 Returns -2 x log likelihood and the p-value 

963 for a multivariate hypothesis test of the mean 

964 

965 Parameters 

966 ---------- 

967 mu_array : 1d array 

968 Hypothesized values for the mean. Must have same number of 

969 elements as columns in endog 

970 

971 return_weights : bool 

972 If True, returns the weights that maximize the 

973 likelihood of mu_array. Default is False. 

974 

975 Returns 

976 ------- 

977 test_results : tuple 

978 The log-likelihood ratio and p-value for mu_array 

979 """ 

980 endog = self.endog 

981 nobs = self.nobs 

982 if len(mu_array) != endog.shape[1]: 

983 raise ValueError('mu_array must have the same number of ' 

984 'elements as the columns of the data.') 

985 mu_array = mu_array.reshape(1, endog.shape[1]) 

986 means = np.ones((endog.shape[0], endog.shape[1])) 

987 means = mu_array * means 

988 est_vect = endog - means 

989 start_vals = 1. / nobs * np.ones(endog.shape[1]) 

990 eta_star = self._modif_newton(start_vals, est_vect, 

991 np.ones(nobs) * (1. / nobs)) 

992 denom = 1 + np.dot(eta_star, est_vect.T) 

993 self.new_weights = 1 / nobs * 1 / denom 

994 llr = -2 * np.sum(np.log(nobs * self.new_weights)) 

995 p_val = chi2.sf(llr, mu_array.shape[1]) 

996 if return_weights: 

997 return llr, p_val, self.new_weights.T 

998 else: 

999 return llr, p_val 

1000 

1001 def mv_mean_contour(self, mu1_low, mu1_upp, mu2_low, mu2_upp, step1, step2, 

1002 levs=(.001, .01, .05, .1, .2), var1_name=None, 

1003 var2_name=None, plot_dta=False): 

1004 """ 

1005 Creates a confidence region plot for the mean of bivariate data 

1006 

1007 Parameters 

1008 ---------- 

1009 m1_low : float 

1010 Minimum value of the mean for variable 1 

1011 m1_upp : float 

1012 Maximum value of the mean for variable 1 

1013 mu2_low : float 

1014 Minimum value of the mean for variable 2 

1015 mu2_upp : float 

1016 Maximum value of the mean for variable 2 

1017 step1 : float 

1018 Increment of evaluations for variable 1 

1019 step2 : float 

1020 Increment of evaluations for variable 2 

1021 levs : list 

1022 Levels to be drawn on the contour plot. 

1023 Default = (.001, .01, .05, .1, .2) 

1024 plot_dta : bool 

1025 If True, makes a scatter plot of the data on 

1026 top of the contour plot. Defaultis False. 

1027 var1_name : str 

1028 Name of variable 1 to be plotted on the x-axis 

1029 var2_name : str 

1030 Name of variable 2 to be plotted on the y-axis 

1031 

1032 Notes 

1033 ----- 

1034 The smaller the step size, the more accurate the intervals 

1035 will be 

1036 

1037 If the function returns optimization failed, consider narrowing 

1038 the boundaries of the plot 

1039 

1040 Examples 

1041 -------- 

1042 >>> import statsmodels.api as sm 

1043 >>> two_rvs = np.random.standard_normal((20,2)) 

1044 >>> el_analysis = sm.emplike.DescStat(two_rvs) 

1045 >>> contourp = el_analysis.mv_mean_contour(-2, 2, -2, 2, .1, .1) 

1046 >>> contourp.show() 

1047 """ 

1048 if self.endog.shape[1] != 2: 

1049 raise ValueError('Data must contain exactly two variables') 

1050 fig, ax = utils.create_mpl_ax() 

1051 if var2_name is None: 

1052 ax.set_ylabel('Variable 2') 

1053 else: 

1054 ax.set_ylabel(var2_name) 

1055 if var1_name is None: 

1056 ax.set_xlabel('Variable 1') 

1057 else: 

1058 ax.set_xlabel(var1_name) 

1059 x = np.arange(mu1_low, mu1_upp, step1) 

1060 y = np.arange(mu2_low, mu2_upp, step2) 

1061 pairs = itertools.product(x, y) 

1062 z = [] 

1063 for i in pairs: 

1064 z.append(self.mv_test_mean(np.asarray(i))[0]) 

1065 X, Y = np.meshgrid(x, y) 

1066 z = np.asarray(z) 

1067 z = z.reshape(X.shape[1], Y.shape[0]) 

1068 ax.contour(x, y, z.T, levels=levs) 

1069 if plot_dta: 

1070 ax.plot(self.endog[:, 0], self.endog[:, 1], 'bo') 

1071 return fig 

1072 

1073 def test_corr(self, corr0, return_weights=0): 

1074 """ 

1075 Returns -2 x log-likelihood ratio and p-value for the 

1076 correlation coefficient between 2 variables 

1077 

1078 Parameters 

1079 ---------- 

1080 corr0 : float 

1081 Hypothesized value to be tested 

1082 

1083 return_weights : bool 

1084 If true, returns the weights that maximize 

1085 the log-likelihood at the hypothesized value 

1086 """ 

1087 nobs = self.nobs 

1088 endog = self.endog 

1089 if endog.shape[1] != 2: 

1090 raise NotImplementedError('Correlation matrix not yet implemented') 

1091 nuis0 = np.array([endog[:, 0].mean(), 

1092 endog[:, 0].var(), 

1093 endog[:, 1].mean(), 

1094 endog[:, 1].var()]) 

1095 

1096 x0 = np.zeros(5) 

1097 weights0 = np.array([1. / nobs] * int(nobs)) 

1098 args = (corr0, endog, nobs, x0, weights0) 

1099 llr = optimize.fmin(self._opt_correl, nuis0, args=args, 

1100 full_output=1, disp=0)[1] 

1101 p_val = chi2.sf(llr, 1) 

1102 if return_weights: 

1103 return llr, p_val, self.new_weights.T 

1104 return llr, p_val 

1105 

1106 def ci_corr(self, sig=.05, upper_bound=None, lower_bound=None): 

1107 """ 

1108 Returns the confidence intervals for the correlation coefficient 

1109 

1110 Parameters 

1111 ---------- 

1112 sig : float 

1113 The significance level. Default is .05 

1114 

1115 upper_bound : float 

1116 Maximum value the upper confidence limit can be. 

1117 Default is 99% confidence limit assuming normality. 

1118 

1119 lower_bound : float 

1120 Minimum value the lower confidence limit can be. 

1121 Default is 99% confidence limit assuming normality. 

1122 

1123 Returns 

1124 ------- 

1125 interval : tuple 

1126 Confidence interval for the correlation 

1127 """ 

1128 endog = self.endog 

1129 nobs = self.nobs 

1130 self.r0 = chi2.ppf(1 - sig, 1) 

1131 point_est = np.corrcoef(endog[:, 0], endog[:, 1])[0, 1] 

1132 if upper_bound is None: 

1133 upper_bound = min(.999, point_est + \ 

1134 2.5 * ((1. - point_est ** 2.) / \ 

1135 (nobs - 2.)) ** .5) 

1136 

1137 if lower_bound is None: 

1138 lower_bound = max(- .999, point_est - \ 

1139 2.5 * (np.sqrt((1. - point_est ** 2.) / \ 

1140 (nobs - 2.)))) 

1141 

1142 llim = optimize.brenth(self._ci_limits_corr, lower_bound, point_est) 

1143 ulim = optimize.brenth(self._ci_limits_corr, point_est, upper_bound) 

1144 return llim, ulim