Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# 

2# Author: Travis Oliphant 2002-2011 with contributions from 

3# SciPy Developers 2004-2011 

4# 

5from functools import partial 

6from scipy import special 

7from scipy.special import entr, logsumexp, betaln, gammaln as gamln 

8from scipy._lib._util import _lazywhere, rng_integers 

9 

10from numpy import floor, ceil, log, exp, sqrt, log1p, expm1, tanh, cosh, sinh 

11 

12import numpy as np 

13 

14from ._distn_infrastructure import ( 

15 rv_discrete, _ncx2_pdf, _ncx2_cdf, get_distribution_names) 

16 

17 

18class binom_gen(rv_discrete): 

19 r"""A binomial discrete random variable. 

20 

21 %(before_notes)s 

22 

23 Notes 

24 ----- 

25 The probability mass function for `binom` is: 

26 

27 .. math:: 

28 

29 f(k) = \binom{n}{k} p^k (1-p)^{n-k} 

30 

31 for ``k`` in ``{0, 1,..., n}``. 

32 

33 `binom` takes ``n`` and ``p`` as shape parameters. 

34 

35 %(after_notes)s 

36 

37 %(example)s 

38 

39 """ 

40 def _rvs(self, n, p, size=None, random_state=None): 

41 return random_state.binomial(n, p, size) 

42 

43 def _argcheck(self, n, p): 

44 return (n >= 0) & (p >= 0) & (p <= 1) 

45 

46 def _get_support(self, n, p): 

47 return self.a, n 

48 

49 def _logpmf(self, x, n, p): 

50 k = floor(x) 

51 combiln = (gamln(n+1) - (gamln(k+1) + gamln(n-k+1))) 

52 return combiln + special.xlogy(k, p) + special.xlog1py(n-k, -p) 

53 

54 def _pmf(self, x, n, p): 

55 # binom.pmf(k) = choose(n, k) * p**k * (1-p)**(n-k) 

56 return exp(self._logpmf(x, n, p)) 

57 

58 def _cdf(self, x, n, p): 

59 k = floor(x) 

60 vals = special.bdtr(k, n, p) 

61 return vals 

62 

63 def _sf(self, x, n, p): 

64 k = floor(x) 

65 return special.bdtrc(k, n, p) 

66 

67 def _ppf(self, q, n, p): 

68 vals = ceil(special.bdtrik(q, n, p)) 

69 vals1 = np.maximum(vals - 1, 0) 

70 temp = special.bdtr(vals1, n, p) 

71 return np.where(temp >= q, vals1, vals) 

72 

73 def _stats(self, n, p, moments='mv'): 

74 q = 1.0 - p 

75 mu = n * p 

76 var = n * p * q 

77 g1, g2 = None, None 

78 if 's' in moments: 

79 g1 = (q - p) / sqrt(var) 

80 if 'k' in moments: 

81 g2 = (1.0 - 6*p*q) / var 

82 return mu, var, g1, g2 

83 

84 def _entropy(self, n, p): 

85 k = np.r_[0:n + 1] 

86 vals = self._pmf(k, n, p) 

87 return np.sum(entr(vals), axis=0) 

88 

89 

90binom = binom_gen(name='binom') 

91 

92 

93class bernoulli_gen(binom_gen): 

94 r"""A Bernoulli discrete random variable. 

95 

96 %(before_notes)s 

97 

98 Notes 

99 ----- 

100 The probability mass function for `bernoulli` is: 

101 

102 .. math:: 

103 

104 f(k) = \begin{cases}1-p &\text{if } k = 0\\ 

105 p &\text{if } k = 1\end{cases} 

106 

107 for :math:`k` in :math:`\{0, 1\}`. 

108 

109 `bernoulli` takes :math:`p` as shape parameter. 

110 

111 %(after_notes)s 

112 

113 %(example)s 

114 

115 """ 

116 def _rvs(self, p, size=None, random_state=None): 

117 return binom_gen._rvs(self, 1, p, size=size, random_state=random_state) 

118 

119 def _argcheck(self, p): 

120 return (p >= 0) & (p <= 1) 

121 

122 def _get_support(self, p): 

123 # Overrides binom_gen._get_support!x 

124 return self.a, self.b 

125 

126 def _logpmf(self, x, p): 

127 return binom._logpmf(x, 1, p) 

128 

129 def _pmf(self, x, p): 

130 # bernoulli.pmf(k) = 1-p if k = 0 

131 # = p if k = 1 

132 return binom._pmf(x, 1, p) 

133 

134 def _cdf(self, x, p): 

135 return binom._cdf(x, 1, p) 

136 

137 def _sf(self, x, p): 

138 return binom._sf(x, 1, p) 

139 

140 def _ppf(self, q, p): 

141 return binom._ppf(q, 1, p) 

142 

143 def _stats(self, p): 

144 return binom._stats(1, p) 

145 

146 def _entropy(self, p): 

147 return entr(p) + entr(1-p) 

148 

149 

150bernoulli = bernoulli_gen(b=1, name='bernoulli') 

151 

152 

153class betabinom_gen(rv_discrete): 

154 r"""A beta-binomial discrete random variable. 

155 

156 %(before_notes)s 

157 

158 Notes 

159 ----- 

160 The beta-binomial distribution is a binomial distribution with a 

161 probability of success `p` that follows a beta distribution. 

162 

163 The probability mass function for `betabinom` is: 

164 

165 .. math:: 

166 

167 f(k) = \binom{n}{k} \frac{B(k + a, n - k + b)}{B(a, b)} 

168 

169 for ``k`` in ``{0, 1,..., n}``, :math:`n \geq 0`, :math:`a > 0`, 

170 :math:`b > 0`, where :math:`B(a, b)` is the beta function. 

171 

172 `betabinom` takes :math:`n`, :math:`a`, and :math:`b` as shape parameters. 

173 

174 References 

175 ---------- 

176 .. [1] https://en.wikipedia.org/wiki/Beta-binomial_distribution 

177 

178 %(after_notes)s 

179 

180 .. versionadded:: 1.4.0 

181 

182 See Also 

183 -------- 

184 beta, binom 

185 

186 %(example)s 

187 

188 """ 

189 

190 def _rvs(self, n, a, b, size=None, random_state=None): 

191 p = random_state.beta(a, b, size) 

192 return random_state.binomial(n, p, size) 

193 

194 def _get_support(self, n, a, b): 

195 return 0, n 

196 

197 def _argcheck(self, n, a, b): 

198 return (n >= 0) & (a > 0) & (b > 0) 

199 

200 def _logpmf(self, x, n, a, b): 

201 k = floor(x) 

202 combiln = -log(n + 1) - betaln(n - k + 1, k + 1) 

203 return combiln + betaln(k + a, n - k + b) - betaln(a, b) 

204 

205 def _pmf(self, x, n, a, b): 

206 return exp(self._logpmf(x, n, a, b)) 

207 

208 def _stats(self, n, a, b, moments='mv'): 

209 e_p = a / (a + b) 

210 e_q = 1 - e_p 

211 mu = n * e_p 

212 var = n * (a + b + n) * e_p * e_q / (a + b + 1) 

213 g1, g2 = None, None 

214 if 's' in moments: 

215 g1 = 1.0 / sqrt(var) 

216 g1 *= (a + b + 2 * n) * (b - a) 

217 g1 /= (a + b + 2) * (a + b) 

218 if 'k' in moments: 

219 g2 = a + b 

220 g2 *= (a + b - 1 + 6 * n) 

221 g2 += 3 * a * b * (n - 2) 

222 g2 += 6 * n ** 2 

223 g2 -= 3 * e_p * b * n * (6 - n) 

224 g2 -= 18 * e_p * e_q * n ** 2 

225 g2 *= (a + b) ** 2 * (1 + a + b) 

226 g2 /= (n * a * b * (a + b + 2) * (a + b + 3) * (a + b + n)) 

227 g2 -= 3 

228 return mu, var, g1, g2 

229 

230 

231betabinom = betabinom_gen(name='betabinom') 

232 

233 

234class nbinom_gen(rv_discrete): 

235 r"""A negative binomial discrete random variable. 

236 

237 %(before_notes)s 

238 

239 Notes 

240 ----- 

241 Negative binomial distribution describes a sequence of i.i.d. Bernoulli 

242 trials, repeated until a predefined, non-random number of successes occurs. 

243 

244 The probability mass function of the number of failures for `nbinom` is: 

245 

246 .. math:: 

247 

248 f(k) = \binom{k+n-1}{n-1} p^n (1-p)^k 

249 

250 for :math:`k \ge 0`. 

251 

252 `nbinom` takes :math:`n` and :math:`p` as shape parameters where n is the 

253 number of successes, whereas p is the probability of a single success. 

254 

255 %(after_notes)s 

256 

257 %(example)s 

258 

259 """ 

260 def _rvs(self, n, p, size=None, random_state=None): 

261 return random_state.negative_binomial(n, p, size) 

262 

263 def _argcheck(self, n, p): 

264 return (n > 0) & (p >= 0) & (p <= 1) 

265 

266 def _pmf(self, x, n, p): 

267 # nbinom.pmf(k) = choose(k+n-1, n-1) * p**n * (1-p)**k 

268 return exp(self._logpmf(x, n, p)) 

269 

270 def _logpmf(self, x, n, p): 

271 coeff = gamln(n+x) - gamln(x+1) - gamln(n) 

272 return coeff + n*log(p) + special.xlog1py(x, -p) 

273 

274 def _cdf(self, x, n, p): 

275 k = floor(x) 

276 return special.betainc(n, k+1, p) 

277 

278 def _sf_skip(self, x, n, p): 

279 # skip because special.nbdtrc doesn't work for 0<n<1 

280 k = floor(x) 

281 return special.nbdtrc(k, n, p) 

282 

283 def _ppf(self, q, n, p): 

284 vals = ceil(special.nbdtrik(q, n, p)) 

285 vals1 = (vals-1).clip(0.0, np.inf) 

286 temp = self._cdf(vals1, n, p) 

287 return np.where(temp >= q, vals1, vals) 

288 

289 def _stats(self, n, p): 

290 Q = 1.0 / p 

291 P = Q - 1.0 

292 mu = n*P 

293 var = n*P*Q 

294 g1 = (Q+P)/sqrt(n*P*Q) 

295 g2 = (1.0 + 6*P*Q) / (n*P*Q) 

296 return mu, var, g1, g2 

297 

298 

299nbinom = nbinom_gen(name='nbinom') 

300 

301 

302class geom_gen(rv_discrete): 

303 r"""A geometric discrete random variable. 

304 

305 %(before_notes)s 

306 

307 Notes 

308 ----- 

309 The probability mass function for `geom` is: 

310 

311 .. math:: 

312 

313 f(k) = (1-p)^{k-1} p 

314 

315 for :math:`k \ge 1`. 

316 

317 `geom` takes :math:`p` as shape parameter. 

318 

319 %(after_notes)s 

320 

321 See Also 

322 -------- 

323 planck 

324 

325 %(example)s 

326 

327 """ 

328 def _rvs(self, p, size=None, random_state=None): 

329 return random_state.geometric(p, size=size) 

330 

331 def _argcheck(self, p): 

332 return (p <= 1) & (p >= 0) 

333 

334 def _pmf(self, k, p): 

335 return np.power(1-p, k-1) * p 

336 

337 def _logpmf(self, k, p): 

338 return special.xlog1py(k - 1, -p) + log(p) 

339 

340 def _cdf(self, x, p): 

341 k = floor(x) 

342 return -expm1(log1p(-p)*k) 

343 

344 def _sf(self, x, p): 

345 return np.exp(self._logsf(x, p)) 

346 

347 def _logsf(self, x, p): 

348 k = floor(x) 

349 return k*log1p(-p) 

350 

351 def _ppf(self, q, p): 

352 vals = ceil(log1p(-q) / log1p(-p)) 

353 temp = self._cdf(vals-1, p) 

354 return np.where((temp >= q) & (vals > 0), vals-1, vals) 

355 

356 def _stats(self, p): 

357 mu = 1.0/p 

358 qr = 1.0-p 

359 var = qr / p / p 

360 g1 = (2.0-p) / sqrt(qr) 

361 g2 = np.polyval([1, -6, 6], p)/(1.0-p) 

362 return mu, var, g1, g2 

363 

364 

365geom = geom_gen(a=1, name='geom', longname="A geometric") 

366 

367 

368class hypergeom_gen(rv_discrete): 

369 r"""A hypergeometric discrete random variable. 

370 

371 The hypergeometric distribution models drawing objects from a bin. 

372 `M` is the total number of objects, `n` is total number of Type I objects. 

373 The random variate represents the number of Type I objects in `N` drawn 

374 without replacement from the total population. 

375 

376 %(before_notes)s 

377 

378 Notes 

379 ----- 

380 The symbols used to denote the shape parameters (`M`, `n`, and `N`) are not 

381 universally accepted. See the Examples for a clarification of the 

382 definitions used here. 

383 

384 The probability mass function is defined as, 

385 

386 .. math:: p(k, M, n, N) = \frac{\binom{n}{k} \binom{M - n}{N - k}} 

387 {\binom{M}{N}} 

388 

389 for :math:`k \in [\max(0, N - M + n), \min(n, N)]`, where the binomial 

390 coefficients are defined as, 

391 

392 .. math:: \binom{n}{k} \equiv \frac{n!}{k! (n - k)!}. 

393 

394 %(after_notes)s 

395 

396 Examples 

397 -------- 

398 >>> from scipy.stats import hypergeom 

399 >>> import matplotlib.pyplot as plt 

400 

401 Suppose we have a collection of 20 animals, of which 7 are dogs. Then if 

402 we want to know the probability of finding a given number of dogs if we 

403 choose at random 12 of the 20 animals, we can initialize a frozen 

404 distribution and plot the probability mass function: 

405 

406 >>> [M, n, N] = [20, 7, 12] 

407 >>> rv = hypergeom(M, n, N) 

408 >>> x = np.arange(0, n+1) 

409 >>> pmf_dogs = rv.pmf(x) 

410 

411 >>> fig = plt.figure() 

412 >>> ax = fig.add_subplot(111) 

413 >>> ax.plot(x, pmf_dogs, 'bo') 

414 >>> ax.vlines(x, 0, pmf_dogs, lw=2) 

415 >>> ax.set_xlabel('# of dogs in our group of chosen animals') 

416 >>> ax.set_ylabel('hypergeom PMF') 

417 >>> plt.show() 

418 

419 Instead of using a frozen distribution we can also use `hypergeom` 

420 methods directly. To for example obtain the cumulative distribution 

421 function, use: 

422 

423 >>> prb = hypergeom.cdf(x, M, n, N) 

424 

425 And to generate random numbers: 

426 

427 >>> R = hypergeom.rvs(M, n, N, size=10) 

428 

429 """ 

430 def _rvs(self, M, n, N, size=None, random_state=None): 

431 return random_state.hypergeometric(n, M-n, N, size=size) 

432 

433 def _get_support(self, M, n, N): 

434 return np.maximum(N-(M-n), 0), np.minimum(n, N) 

435 

436 def _argcheck(self, M, n, N): 

437 cond = (M > 0) & (n >= 0) & (N >= 0) 

438 cond &= (n <= M) & (N <= M) 

439 return cond 

440 

441 def _logpmf(self, k, M, n, N): 

442 tot, good = M, n 

443 bad = tot - good 

444 result = (betaln(good+1, 1) + betaln(bad+1, 1) + betaln(tot-N+1, N+1) - 

445 betaln(k+1, good-k+1) - betaln(N-k+1, bad-N+k+1) - 

446 betaln(tot+1, 1)) 

447 return result 

448 

449 def _pmf(self, k, M, n, N): 

450 # same as the following but numerically more precise 

451 # return comb(good, k) * comb(bad, N-k) / comb(tot, N) 

452 return exp(self._logpmf(k, M, n, N)) 

453 

454 def _stats(self, M, n, N): 

455 # tot, good, sample_size = M, n, N 

456 # "wikipedia".replace('N', 'M').replace('n', 'N').replace('K', 'n') 

457 M, n, N = 1.*M, 1.*n, 1.*N 

458 m = M - n 

459 p = n/M 

460 mu = N*p 

461 

462 var = m*n*N*(M - N)*1.0/(M*M*(M-1)) 

463 g1 = (m - n)*(M-2*N) / (M-2.0) * sqrt((M-1.0) / (m*n*N*(M-N))) 

464 

465 g2 = M*(M+1) - 6.*N*(M-N) - 6.*n*m 

466 g2 *= (M-1)*M*M 

467 g2 += 6.*n*N*(M-N)*m*(5.*M-6) 

468 g2 /= n * N * (M-N) * m * (M-2.) * (M-3.) 

469 return mu, var, g1, g2 

470 

471 def _entropy(self, M, n, N): 

472 k = np.r_[N - (M - n):min(n, N) + 1] 

473 vals = self.pmf(k, M, n, N) 

474 return np.sum(entr(vals), axis=0) 

475 

476 def _sf(self, k, M, n, N): 

477 # This for loop is needed because `k` can be an array. If that's the 

478 # case, the sf() method makes M, n and N arrays of the same shape. We 

479 # therefore unpack all inputs args, so we can do the manual 

480 # integration. 

481 res = [] 

482 for quant, tot, good, draw in zip(k, M, n, N): 

483 # Manual integration over probability mass function. More accurate 

484 # than integrate.quad. 

485 k2 = np.arange(quant + 1, draw + 1) 

486 res.append(np.sum(self._pmf(k2, tot, good, draw))) 

487 return np.asarray(res) 

488 

489 def _logsf(self, k, M, n, N): 

490 res = [] 

491 for quant, tot, good, draw in zip(k, M, n, N): 

492 if (quant + 0.5) * (tot + 0.5) < (good - 0.5) * (draw - 0.5): 

493 # Less terms to sum if we calculate log(1-cdf) 

494 res.append(log1p(-exp(self.logcdf(quant, tot, good, draw)))) 

495 else: 

496 # Integration over probability mass function using logsumexp 

497 k2 = np.arange(quant + 1, draw + 1) 

498 res.append(logsumexp(self._logpmf(k2, tot, good, draw))) 

499 return np.asarray(res) 

500 

501 def _logcdf(self, k, M, n, N): 

502 res = [] 

503 for quant, tot, good, draw in zip(k, M, n, N): 

504 if (quant + 0.5) * (tot + 0.5) > (good - 0.5) * (draw - 0.5): 

505 # Less terms to sum if we calculate log(1-sf) 

506 res.append(log1p(-exp(self.logsf(quant, tot, good, draw)))) 

507 else: 

508 # Integration over probability mass function using logsumexp 

509 k2 = np.arange(0, quant + 1) 

510 res.append(logsumexp(self._logpmf(k2, tot, good, draw))) 

511 return np.asarray(res) 

512 

513 

514hypergeom = hypergeom_gen(name='hypergeom') 

515 

516 

517# FIXME: Fails _cdfvec 

518class logser_gen(rv_discrete): 

519 r"""A Logarithmic (Log-Series, Series) discrete random variable. 

520 

521 %(before_notes)s 

522 

523 Notes 

524 ----- 

525 The probability mass function for `logser` is: 

526 

527 .. math:: 

528 

529 f(k) = - \frac{p^k}{k \log(1-p)} 

530 

531 for :math:`k \ge 1`. 

532 

533 `logser` takes :math:`p` as shape parameter. 

534 

535 %(after_notes)s 

536 

537 %(example)s 

538 

539 """ 

540 def _rvs(self, p, size=None, random_state=None): 

541 # looks wrong for p>0.5, too few k=1 

542 # trying to use generic is worse, no k=1 at all 

543 return random_state.logseries(p, size=size) 

544 

545 def _argcheck(self, p): 

546 return (p > 0) & (p < 1) 

547 

548 def _pmf(self, k, p): 

549 # logser.pmf(k) = - p**k / (k*log(1-p)) 

550 return -np.power(p, k) * 1.0 / k / special.log1p(-p) 

551 

552 def _stats(self, p): 

553 r = special.log1p(-p) 

554 mu = p / (p - 1.0) / r 

555 mu2p = -p / r / (p - 1.0)**2 

556 var = mu2p - mu*mu 

557 mu3p = -p / r * (1.0+p) / (1.0 - p)**3 

558 mu3 = mu3p - 3*mu*mu2p + 2*mu**3 

559 g1 = mu3 / np.power(var, 1.5) 

560 

561 mu4p = -p / r * ( 

562 1.0 / (p-1)**2 - 6*p / (p - 1)**3 + 6*p*p / (p-1)**4) 

563 mu4 = mu4p - 4*mu3p*mu + 6*mu2p*mu*mu - 3*mu**4 

564 g2 = mu4 / var**2 - 3.0 

565 return mu, var, g1, g2 

566 

567 

568logser = logser_gen(a=1, name='logser', longname='A logarithmic') 

569 

570 

571class poisson_gen(rv_discrete): 

572 r"""A Poisson discrete random variable. 

573 

574 %(before_notes)s 

575 

576 Notes 

577 ----- 

578 The probability mass function for `poisson` is: 

579 

580 .. math:: 

581 

582 f(k) = \exp(-\mu) \frac{\mu^k}{k!} 

583 

584 for :math:`k \ge 0`. 

585 

586 `poisson` takes :math:`\mu` as shape parameter. 

587 

588 %(after_notes)s 

589 

590 %(example)s 

591 

592 """ 

593 

594 # Override rv_discrete._argcheck to allow mu=0. 

595 def _argcheck(self, mu): 

596 return mu >= 0 

597 

598 def _rvs(self, mu, size=None, random_state=None): 

599 return random_state.poisson(mu, size) 

600 

601 def _logpmf(self, k, mu): 

602 Pk = special.xlogy(k, mu) - gamln(k + 1) - mu 

603 return Pk 

604 

605 def _pmf(self, k, mu): 

606 # poisson.pmf(k) = exp(-mu) * mu**k / k! 

607 return exp(self._logpmf(k, mu)) 

608 

609 def _cdf(self, x, mu): 

610 k = floor(x) 

611 return special.pdtr(k, mu) 

612 

613 def _sf(self, x, mu): 

614 k = floor(x) 

615 return special.pdtrc(k, mu) 

616 

617 def _ppf(self, q, mu): 

618 vals = ceil(special.pdtrik(q, mu)) 

619 vals1 = np.maximum(vals - 1, 0) 

620 temp = special.pdtr(vals1, mu) 

621 return np.where(temp >= q, vals1, vals) 

622 

623 def _stats(self, mu): 

624 var = mu 

625 tmp = np.asarray(mu) 

626 mu_nonzero = tmp > 0 

627 g1 = _lazywhere(mu_nonzero, (tmp,), lambda x: sqrt(1.0/x), np.inf) 

628 g2 = _lazywhere(mu_nonzero, (tmp,), lambda x: 1.0/x, np.inf) 

629 return mu, var, g1, g2 

630 

631 

632poisson = poisson_gen(name="poisson", longname='A Poisson') 

633 

634 

635class planck_gen(rv_discrete): 

636 r"""A Planck discrete exponential random variable. 

637 

638 %(before_notes)s 

639 

640 Notes 

641 ----- 

642 The probability mass function for `planck` is: 

643 

644 .. math:: 

645 

646 f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) 

647 

648 for :math:`k \ge 0` and :math:`\lambda > 0`. 

649 

650 `planck` takes :math:`\lambda` as shape parameter. The Planck distribution 

651 can be written as a geometric distribution (`geom`) with 

652 :math:`p = 1 - \exp(-\lambda)` shifted by `loc = -1`. 

653 

654 %(after_notes)s 

655 

656 See Also 

657 -------- 

658 geom 

659 

660 %(example)s 

661 

662 """ 

663 def _argcheck(self, lambda_): 

664 return lambda_ > 0 

665 

666 def _pmf(self, k, lambda_): 

667 return -expm1(-lambda_)*exp(-lambda_*k) 

668 

669 def _cdf(self, x, lambda_): 

670 k = floor(x) 

671 return -expm1(-lambda_*(k+1)) 

672 

673 def _sf(self, x, lambda_): 

674 return exp(self._logsf(x, lambda_)) 

675 

676 def _logsf(self, x, lambda_): 

677 k = floor(x) 

678 return -lambda_*(k+1) 

679 

680 def _ppf(self, q, lambda_): 

681 vals = ceil(-1.0/lambda_ * log1p(-q)-1) 

682 vals1 = (vals-1).clip(*(self._get_support(lambda_))) 

683 temp = self._cdf(vals1, lambda_) 

684 return np.where(temp >= q, vals1, vals) 

685 

686 def _rvs(self, lambda_, size=None, random_state=None): 

687 # use relation to geometric distribution for sampling 

688 p = -expm1(-lambda_) 

689 return random_state.geometric(p, size=size) - 1.0 

690 

691 def _stats(self, lambda_): 

692 mu = 1/expm1(lambda_) 

693 var = exp(-lambda_)/(expm1(-lambda_))**2 

694 g1 = 2*cosh(lambda_/2.0) 

695 g2 = 4+2*cosh(lambda_) 

696 return mu, var, g1, g2 

697 

698 def _entropy(self, lambda_): 

699 C = -expm1(-lambda_) 

700 return lambda_*exp(-lambda_)/C - log(C) 

701 

702 

703planck = planck_gen(a=0, name='planck', longname='A discrete exponential ') 

704 

705 

706class boltzmann_gen(rv_discrete): 

707 r"""A Boltzmann (Truncated Discrete Exponential) random variable. 

708 

709 %(before_notes)s 

710 

711 Notes 

712 ----- 

713 The probability mass function for `boltzmann` is: 

714 

715 .. math:: 

716 

717 f(k) = (1-\exp(-\lambda)) \exp(-\lambda k) / (1-\exp(-\lambda N)) 

718 

719 for :math:`k = 0,..., N-1`. 

720 

721 `boltzmann` takes :math:`\lambda > 0` and :math:`N > 0` as shape parameters. 

722 

723 %(after_notes)s 

724 

725 %(example)s 

726 

727 """ 

728 def _argcheck(self, lambda_, N): 

729 return (lambda_ > 0) & (N > 0) 

730 

731 def _get_support(self, lambda_, N): 

732 return self.a, N - 1 

733 

734 def _pmf(self, k, lambda_, N): 

735 # boltzmann.pmf(k) = 

736 # (1-exp(-lambda_)*exp(-lambda_*k)/(1-exp(-lambda_*N)) 

737 fact = (1-exp(-lambda_))/(1-exp(-lambda_*N)) 

738 return fact*exp(-lambda_*k) 

739 

740 def _cdf(self, x, lambda_, N): 

741 k = floor(x) 

742 return (1-exp(-lambda_*(k+1)))/(1-exp(-lambda_*N)) 

743 

744 def _ppf(self, q, lambda_, N): 

745 qnew = q*(1-exp(-lambda_*N)) 

746 vals = ceil(-1.0/lambda_ * log(1-qnew)-1) 

747 vals1 = (vals-1).clip(0.0, np.inf) 

748 temp = self._cdf(vals1, lambda_, N) 

749 return np.where(temp >= q, vals1, vals) 

750 

751 def _stats(self, lambda_, N): 

752 z = exp(-lambda_) 

753 zN = exp(-lambda_*N) 

754 mu = z/(1.0-z)-N*zN/(1-zN) 

755 var = z/(1.0-z)**2 - N*N*zN/(1-zN)**2 

756 trm = (1-zN)/(1-z) 

757 trm2 = (z*trm**2 - N*N*zN) 

758 g1 = z*(1+z)*trm**3 - N**3*zN*(1+zN) 

759 g1 = g1 / trm2**(1.5) 

760 g2 = z*(1+4*z+z*z)*trm**4 - N**4 * zN*(1+4*zN+zN*zN) 

761 g2 = g2 / trm2 / trm2 

762 return mu, var, g1, g2 

763 

764 

765boltzmann = boltzmann_gen(name='boltzmann', a=0, 

766 longname='A truncated discrete exponential ') 

767 

768 

769class randint_gen(rv_discrete): 

770 r"""A uniform discrete random variable. 

771 

772 %(before_notes)s 

773 

774 Notes 

775 ----- 

776 The probability mass function for `randint` is: 

777 

778 .. math:: 

779 

780 f(k) = \frac{1}{high - low} 

781 

782 for ``k = low, ..., high - 1``. 

783 

784 `randint` takes ``low`` and ``high`` as shape parameters. 

785 

786 %(after_notes)s 

787 

788 %(example)s 

789 

790 """ 

791 def _argcheck(self, low, high): 

792 return (high > low) 

793 

794 def _get_support(self, low, high): 

795 return low, high-1 

796 

797 def _pmf(self, k, low, high): 

798 # randint.pmf(k) = 1./(high - low) 

799 p = np.ones_like(k) / (high - low) 

800 return np.where((k >= low) & (k < high), p, 0.) 

801 

802 def _cdf(self, x, low, high): 

803 k = floor(x) 

804 return (k - low + 1.) / (high - low) 

805 

806 def _ppf(self, q, low, high): 

807 vals = ceil(q * (high - low) + low) - 1 

808 vals1 = (vals - 1).clip(low, high) 

809 temp = self._cdf(vals1, low, high) 

810 return np.where(temp >= q, vals1, vals) 

811 

812 def _stats(self, low, high): 

813 m2, m1 = np.asarray(high), np.asarray(low) 

814 mu = (m2 + m1 - 1.0) / 2 

815 d = m2 - m1 

816 var = (d*d - 1) / 12.0 

817 g1 = 0.0 

818 g2 = -6.0/5.0 * (d*d + 1.0) / (d*d - 1.0) 

819 return mu, var, g1, g2 

820 

821 def _rvs(self, low, high, size=None, random_state=None): 

822 """An array of *size* random integers >= ``low`` and < ``high``.""" 

823 if np.asarray(low).size == 1 and np.asarray(high).size == 1: 

824 # no need to vectorize in that case 

825 return rng_integers(random_state, low, high, size=size) 

826 

827 if size is not None: 

828 # NumPy's RandomState.randint() doesn't broadcast its arguments. 

829 # Use `broadcast_to()` to extend the shapes of low and high 

830 # up to size. Then we can use the numpy.vectorize'd 

831 # randint without needing to pass it a `size` argument. 

832 low = np.broadcast_to(low, size) 

833 high = np.broadcast_to(high, size) 

834 randint = np.vectorize(partial(rng_integers, random_state), 

835 otypes=[np.int_]) 

836 return randint(low, high) 

837 

838 def _entropy(self, low, high): 

839 return log(high - low) 

840 

841 

842randint = randint_gen(name='randint', longname='A discrete uniform ' 

843 '(random integer)') 

844 

845 

846# FIXME: problems sampling. 

847class zipf_gen(rv_discrete): 

848 r"""A Zipf discrete random variable. 

849 

850 %(before_notes)s 

851 

852 Notes 

853 ----- 

854 The probability mass function for `zipf` is: 

855 

856 .. math:: 

857 

858 f(k, a) = \frac{1}{\zeta(a) k^a} 

859 

860 for :math:`k \ge 1`. 

861 

862 `zipf` takes :math:`a` as shape parameter. :math:`\zeta` is the 

863 Riemann zeta function (`scipy.special.zeta`) 

864 

865 %(after_notes)s 

866 

867 %(example)s 

868 

869 """ 

870 def _rvs(self, a, size=None, random_state=None): 

871 return random_state.zipf(a, size=size) 

872 

873 def _argcheck(self, a): 

874 return a > 1 

875 

876 def _pmf(self, k, a): 

877 # zipf.pmf(k, a) = 1/(zeta(a) * k**a) 

878 Pk = 1.0 / special.zeta(a, 1) / k**a 

879 return Pk 

880 

881 def _munp(self, n, a): 

882 return _lazywhere( 

883 a > n + 1, (a, n), 

884 lambda a, n: special.zeta(a - n, 1) / special.zeta(a, 1), 

885 np.inf) 

886 

887 

888zipf = zipf_gen(a=1, name='zipf', longname='A Zipf') 

889 

890 

891class dlaplace_gen(rv_discrete): 

892 r"""A Laplacian discrete random variable. 

893 

894 %(before_notes)s 

895 

896 Notes 

897 ----- 

898 The probability mass function for `dlaplace` is: 

899 

900 .. math:: 

901 

902 f(k) = \tanh(a/2) \exp(-a |k|) 

903 

904 for integers :math:`k` and :math:`a > 0`. 

905 

906 `dlaplace` takes :math:`a` as shape parameter. 

907 

908 %(after_notes)s 

909 

910 %(example)s 

911 

912 """ 

913 def _pmf(self, k, a): 

914 # dlaplace.pmf(k) = tanh(a/2) * exp(-a*abs(k)) 

915 return tanh(a/2.0) * exp(-a * abs(k)) 

916 

917 def _cdf(self, x, a): 

918 k = floor(x) 

919 f = lambda k, a: 1.0 - exp(-a * k) / (exp(a) + 1) 

920 f2 = lambda k, a: exp(a * (k+1)) / (exp(a) + 1) 

921 return _lazywhere(k >= 0, (k, a), f=f, f2=f2) 

922 

923 def _ppf(self, q, a): 

924 const = 1 + exp(a) 

925 vals = ceil(np.where(q < 1.0 / (1 + exp(-a)), 

926 log(q*const) / a - 1, 

927 -log((1-q) * const) / a)) 

928 vals1 = vals - 1 

929 return np.where(self._cdf(vals1, a) >= q, vals1, vals) 

930 

931 def _stats(self, a): 

932 ea = exp(a) 

933 mu2 = 2.*ea/(ea-1.)**2 

934 mu4 = 2.*ea*(ea**2+10.*ea+1.) / (ea-1.)**4 

935 return 0., mu2, 0., mu4/mu2**2 - 3. 

936 

937 def _entropy(self, a): 

938 return a / sinh(a) - log(tanh(a/2.0)) 

939 

940 def _rvs(self, a, size=None, random_state=None): 

941 # The discrete Laplace is equivalent to the two-sided geometric 

942 # distribution with PMF: 

943 # f(k) = (1 - alpha)/(1 + alpha) * alpha^abs(k) 

944 # Reference: 

945 # https://www.sciencedirect.com/science/ 

946 # article/abs/pii/S0378375804003519 

947 # Furthermore, the two-sided geometric distribution is 

948 # equivalent to the difference between two iid geometric  

949 # distributions. 

950 # Reference (page 179): 

951 # https://pdfs.semanticscholar.org/61b3/ 

952 # b99f466815808fd0d03f5d2791eea8b541a1.pdf 

953 # Thus, we can leverage the following: 

954 # 1) alpha = e^-a 

955 # 2) probability_of_success = 1 - alpha (Bernoulli trial) 

956 probOfSuccess = -np.expm1(-np.asarray(a)) 

957 x = random_state.geometric(probOfSuccess, size=size) 

958 y = random_state.geometric(probOfSuccess, size=size) 

959 return x - y 

960 

961 

962dlaplace = dlaplace_gen(a=-np.inf, 

963 name='dlaplace', longname='A discrete Laplacian') 

964 

965 

966class skellam_gen(rv_discrete): 

967 r"""A Skellam discrete random variable. 

968 

969 %(before_notes)s 

970 

971 Notes 

972 ----- 

973 Probability distribution of the difference of two correlated or 

974 uncorrelated Poisson random variables. 

975 

976 Let :math:`k_1` and :math:`k_2` be two Poisson-distributed r.v. with 

977 expected values :math:`\lambda_1` and :math:`\lambda_2`. Then, 

978 :math:`k_1 - k_2` follows a Skellam distribution with parameters 

979 :math:`\mu_1 = \lambda_1 - \rho \sqrt{\lambda_1 \lambda_2}` and 

980 :math:`\mu_2 = \lambda_2 - \rho \sqrt{\lambda_1 \lambda_2}`, where 

981 :math:`\rho` is the correlation coefficient between :math:`k_1` and 

982 :math:`k_2`. If the two Poisson-distributed r.v. are independent then 

983 :math:`\rho = 0`. 

984 

985 Parameters :math:`\mu_1` and :math:`\mu_2` must be strictly positive. 

986 

987 For details see: https://en.wikipedia.org/wiki/Skellam_distribution 

988 

989 `skellam` takes :math:`\mu_1` and :math:`\mu_2` as shape parameters. 

990 

991 %(after_notes)s 

992 

993 %(example)s 

994 

995 """ 

996 def _rvs(self, mu1, mu2, size=None, random_state=None): 

997 n = size 

998 return (random_state.poisson(mu1, n) - 

999 random_state.poisson(mu2, n)) 

1000 

1001 def _pmf(self, x, mu1, mu2): 

1002 px = np.where(x < 0, 

1003 _ncx2_pdf(2*mu2, 2*(1-x), 2*mu1)*2, 

1004 _ncx2_pdf(2*mu1, 2*(1+x), 2*mu2)*2) 

1005 # ncx2.pdf() returns nan's for extremely low probabilities 

1006 return px 

1007 

1008 def _cdf(self, x, mu1, mu2): 

1009 x = floor(x) 

1010 px = np.where(x < 0, 

1011 _ncx2_cdf(2*mu2, -2*x, 2*mu1), 

1012 1 - _ncx2_cdf(2*mu1, 2*(x+1), 2*mu2)) 

1013 return px 

1014 

1015 def _stats(self, mu1, mu2): 

1016 mean = mu1 - mu2 

1017 var = mu1 + mu2 

1018 g1 = mean / sqrt((var)**3) 

1019 g2 = 1 / var 

1020 return mean, var, g1, g2 

1021 

1022 

1023skellam = skellam_gen(a=-np.inf, name="skellam", longname='A Skellam') 

1024 

1025 

1026class yulesimon_gen(rv_discrete): 

1027 r"""A Yule-Simon discrete random variable. 

1028 

1029 %(before_notes)s 

1030 

1031 Notes 

1032 ----- 

1033 

1034 The probability mass function for the `yulesimon` is: 

1035 

1036 .. math:: 

1037 

1038 f(k) = \alpha B(k, \alpha+1) 

1039 

1040 for :math:`k=1,2,3,...`, where :math:`\alpha>0`. 

1041 Here :math:`B` refers to the `scipy.special.beta` function. 

1042 

1043 The sampling of random variates is based on pg 553, Section 6.3 of [1]_. 

1044 Our notation maps to the referenced logic via :math:`\alpha=a-1`. 

1045 

1046 For details see the wikipedia entry [2]_. 

1047 

1048 References 

1049 ---------- 

1050 .. [1] Devroye, Luc. "Non-uniform Random Variate Generation", 

1051 (1986) Springer, New York. 

1052 

1053 .. [2] https://en.wikipedia.org/wiki/Yule-Simon_distribution 

1054 

1055 %(after_notes)s 

1056 

1057 %(example)s 

1058 

1059 """ 

1060 def _rvs(self, alpha, size=None, random_state=None): 

1061 E1 = random_state.standard_exponential(size) 

1062 E2 = random_state.standard_exponential(size) 

1063 ans = ceil(-E1 / log1p(-exp(-E2 / alpha))) 

1064 return ans 

1065 

1066 def _pmf(self, x, alpha): 

1067 return alpha * special.beta(x, alpha + 1) 

1068 

1069 def _argcheck(self, alpha): 

1070 return (alpha > 0) 

1071 

1072 def _logpmf(self, x, alpha): 

1073 return log(alpha) + special.betaln(x, alpha + 1) 

1074 

1075 def _cdf(self, x, alpha): 

1076 return 1 - x * special.beta(x, alpha + 1) 

1077 

1078 def _sf(self, x, alpha): 

1079 return x * special.beta(x, alpha + 1) 

1080 

1081 def _logsf(self, x, alpha): 

1082 return log(x) + special.betaln(x, alpha + 1) 

1083 

1084 def _stats(self, alpha): 

1085 mu = np.where(alpha <= 1, np.inf, alpha / (alpha - 1)) 

1086 mu2 = np.where(alpha > 2, 

1087 alpha**2 / ((alpha - 2.0) * (alpha - 1)**2), 

1088 np.inf) 

1089 mu2 = np.where(alpha <= 1, np.nan, mu2) 

1090 g1 = np.where(alpha > 3, 

1091 sqrt(alpha - 2) * (alpha + 1)**2 / (alpha * (alpha - 3)), 

1092 np.inf) 

1093 g1 = np.where(alpha <= 2, np.nan, g1) 

1094 g2 = np.where(alpha > 4, 

1095 (alpha + 3) + (alpha**3 - 49 * alpha - 22) / (alpha * 

1096 (alpha - 4) * (alpha - 3)), np.inf) 

1097 g2 = np.where(alpha <= 2, np.nan, g2) 

1098 return mu, mu2, g1, g2 

1099 

1100 

1101yulesimon = yulesimon_gen(name='yulesimon', a=1) 

1102 

1103 

1104# Collect names of classes and objects in this module. 

1105pairs = list(globals().items()) 

1106_distn_names, _distn_gen_names = get_distribution_names(pairs, rv_discrete) 

1107 

1108__all__ = _distn_names + _distn_gen_names