Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/statsmodels/duration/survfunc.py : 6%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numpy as np
2import pandas as pd
3from scipy.stats.distributions import chi2, norm
4from statsmodels.graphics import utils
7def _calc_survfunc_right(time, status, weights=None, entry=None, compress=True,
8 retall=True):
9 """
10 Calculate the survival function and its standard error for a single
11 group.
12 """
14 # Convert the unique times to ranks (0, 1, 2, ...)
15 if entry is None:
16 utime, rtime = np.unique(time, return_inverse=True)
17 else:
18 tx = np.concatenate((time, entry))
19 utime, rtime = np.unique(tx, return_inverse=True)
20 rtime = rtime[0:len(time)]
22 # Number of deaths at each unique time.
23 ml = len(utime)
24 if weights is None:
25 d = np.bincount(rtime, weights=status, minlength=ml)
26 else:
27 d = np.bincount(rtime, weights=status*weights, minlength=ml)
29 # Size of risk set just prior to each event time.
30 if weights is None:
31 n = np.bincount(rtime, minlength=ml)
32 else:
33 n = np.bincount(rtime, weights=weights, minlength=ml)
34 if entry is not None:
35 n = np.cumsum(n) - n
36 rentry = np.searchsorted(utime, entry, side='left')
37 if weights is None:
38 n0 = np.bincount(rentry, minlength=ml)
39 else:
40 n0 = np.bincount(rentry, weights=weights, minlength=ml)
41 n0 = np.cumsum(n0) - n0
42 n = n0 - n
43 else:
44 n = np.cumsum(n[::-1])[::-1]
46 # Only retain times where an event occurred.
47 if compress:
48 ii = np.flatnonzero(d > 0)
49 d = d[ii]
50 n = n[ii]
51 utime = utime[ii]
53 # The survival function probabilities.
54 sp = 1 - d / n.astype(np.float64)
55 ii = sp < 1e-16
56 sp[ii] = 1e-16
57 sp = np.log(sp)
58 sp = np.cumsum(sp)
59 sp = np.exp(sp)
60 sp[ii] = 0
62 if not retall:
63 return sp, utime, rtime, n, d
65 # Standard errors
66 if weights is None:
67 # Greenwood's formula
68 denom = n * (n - d)
69 denom = np.clip(denom, 1e-12, np.inf)
70 se = d / denom.astype(np.float64)
71 se[(n == d) | (n == 0)] = np.nan
72 se = np.cumsum(se)
73 se = np.sqrt(se)
74 locs = np.isfinite(se) | (sp != 0)
75 se[locs] *= sp[locs]
76 se[~locs] = np.nan
77 else:
78 # Tsiatis' (1981) formula
79 se = d / (n * n).astype(np.float64)
80 se = np.cumsum(se)
81 se = np.sqrt(se)
83 return sp, se, utime, rtime, n, d
86def _calc_incidence_right(time, status, weights=None):
87 """
88 Calculate the cumulative incidence function and its standard error.
89 """
91 # Calculate the all-cause survival function.
92 status0 = (status >= 1).astype(np.float64)
93 sp, utime, rtime, n, d = _calc_survfunc_right(time, status0, weights,
94 compress=False, retall=False)
96 ngrp = int(status.max())
98 # Number of cause-specific deaths at each unique time.
99 d = []
100 for k in range(ngrp):
101 status0 = (status == k + 1).astype(np.float64)
102 if weights is None:
103 d0 = np.bincount(rtime, weights=status0, minlength=len(utime))
104 else:
105 d0 = np.bincount(rtime, weights=status0*weights,
106 minlength=len(utime))
107 d.append(d0)
109 # The cumulative incidence function probabilities.
110 ip = []
111 sp0 = np.r_[1, sp[:-1]] / n
112 for k in range(ngrp):
113 ip0 = np.cumsum(sp0 * d[k])
114 ip.append(ip0)
116 # The standard error of the cumulative incidence function.
117 if weights is not None:
118 return ip, None, utime
119 se = []
120 da = sum(d)
121 for k in range(ngrp):
123 ra = da / (n * (n - da))
124 v = ip[k]**2 * np.cumsum(ra)
125 v -= 2 * ip[k] * np.cumsum(ip[k] * ra)
126 v += np.cumsum(ip[k]**2 * ra)
128 ra = (n - d[k]) * d[k] / n
129 v += np.cumsum(sp0**2 * ra)
131 ra = sp0 * d[k] / n
132 v -= 2 * ip[k] * np.cumsum(ra)
133 v += 2 * np.cumsum(ip[k] * ra)
135 se.append(np.sqrt(v))
137 return ip, se, utime
140def _checkargs(time, status, entry, freq_weights, exog):
142 if len(time) != len(status):
143 raise ValueError("time and status must have the same length")
145 if entry is not None and (len(entry) != len(time)):
146 msg = "entry times and event times must have the same length"
147 raise ValueError(msg)
149 if entry is not None and np.any(entry >= time):
150 msg = "Entry times must not occur on or after event times"
151 raise ValueError(msg)
153 if freq_weights is not None and (len(freq_weights) != len(time)):
154 raise ValueError("weights, time and status must have the same length")
156 if exog is not None and (exog.shape[0] != len(time)):
157 raise ValueError("the rows of exog should align with time")
160class CumIncidenceRight(object):
161 """
162 Estimation and inference for a cumulative incidence function.
164 If J = 1, 2, ... indicates the event type, the cumulative
165 incidence function for cause j is:
167 I(t, j) = P(T <= t and J=j)
169 Only right censoring is supported. If frequency weights are provided,
170 the point estimate is returned without a standard error.
172 Parameters
173 ----------
174 time : array_like
175 An array of times (censoring times or event times)
176 status : array_like
177 If status >= 1 indicates which event occurred at time t. If
178 status = 0, the subject was censored at time t.
179 title : str
180 Optional title used for plots and summary output.
181 freq_weights : array_like
182 Optional frequency weights
183 exog : array_like
184 Optional, if present used to account for violation of
185 independent censoring.
186 bw_factor : float
187 Band-width multiplier for kernel-based estimation. Only
188 used if exog is provided.
189 dimred : bool
190 If True, proportional hazards regression models are used to
191 reduce exog to two columns by predicting overall events and
192 censoring in two separate models. If False, exog is used
193 directly for calculating kernel weights without dimension
194 reduction.
196 Attributes
197 ----------
198 times : array_like
199 The distinct times at which the incidence rates are estimated
200 cinc : list of arrays
201 cinc[k-1] contains the estimated cumulative incidence rates
202 for outcome k=1,2,...
203 cinc_se : list of arrays
204 The standard errors for the values in `cinc`. Not available when
205 exog and/or frequency weights are provided.
207 Notes
208 -----
209 When exog is provided, a local estimate of the cumulative incidence
210 rate around each point is provided, and these are averaged to
211 produce an estimate of the marginal cumulative incidence
212 functions. The procedure is analogous to that described in Zeng
213 (2004) for estimation of the marginal survival function. The
214 approach removes bias resulting from dependent censoring when the
215 censoring becomes independent conditioned on the columns of exog.
217 References
218 ----------
219 The Stata stcompet procedure:
220 http://www.stata-journal.com/sjpdf.html?articlenum=st0059
222 Dinse, G. E. and M. G. Larson. 1986. A note on semi-Markov models
223 for partially censored data. Biometrika 73: 379-386.
225 Marubini, E. and M. G. Valsecchi. 1995. Analysing Survival Data
226 from Clinical Trials and Observational Studies. Chichester, UK:
227 John Wiley & Sons.
229 D. Zeng (2004). Estimating marginal survival function by
230 adjusting for dependent censoring using many covariates. Annals
231 of Statistics 32:4.
232 https://arxiv.org/pdf/math/0409180.pdf
233 """
235 def __init__(self, time, status, title=None, freq_weights=None,
236 exog=None, bw_factor=1., dimred=True):
238 _checkargs(time, status, None, freq_weights, None)
239 time = self.time = np.asarray(time)
240 status = self.status = np.asarray(status)
241 if freq_weights is not None:
242 freq_weights = self.freq_weights = np.asarray(freq_weights)
244 if exog is not None:
245 from ._kernel_estimates import _kernel_cumincidence
246 exog = self.exog = np.asarray(exog)
247 nobs = exog.shape[0]
248 kw = nobs**(-1/3.0) * bw_factor
249 kfunc = lambda x: np.exp(-x**2 / kw**2).sum(1)
250 x = _kernel_cumincidence(time, status, exog, kfunc, freq_weights,
251 dimred)
252 self.times = x[0]
253 self.cinc = x[1]
254 return
256 x = _calc_incidence_right(time, status, freq_weights)
257 self.cinc = x[0]
258 self.cinc_se = x[1]
259 self.times = x[2]
260 self.title = "" if not title else title
263class SurvfuncRight(object):
264 """
265 Estimation and inference for a survival function.
267 The survival function S(t) = P(T > t) is the probability that an
268 event time T is greater than t.
270 This class currently only supports right censoring.
272 Parameters
273 ----------
274 time : array_like
275 An array of times (censoring times or event times)
276 status : array_like
277 Status at the event time, status==1 is the 'event'
278 (e.g. death, failure), meaning that the event
279 occurs at the given value in `time`; status==0
280 indicates that censoring has occurred, meaning that
281 the event occurs after the given value in `time`.
282 entry : array_like, optional An array of entry times for handling
283 left truncation (the subject is not in the risk set on or
284 before the entry time)
285 title : str
286 Optional title used for plots and summary output.
287 freq_weights : array_like
288 Optional frequency weights
289 exog : array_like
290 Optional, if present used to account for violation of
291 independent censoring.
292 bw_factor : float
293 Band-width multiplier for kernel-based estimation. Only used
294 if exog is provided.
296 Attributes
297 ----------
298 surv_prob : array_like
299 The estimated value of the survivor function at each time
300 point in `surv_times`.
301 surv_prob_se : array_like
302 The standard errors for the values in `surv_prob`. Not available
303 if exog is provided.
304 surv_times : array_like
305 The points where the survival function changes.
306 n_risk : array_like
307 The number of subjects at risk just before each time value in
308 `surv_times`. Not available if exog is provided.
309 n_events : array_like
310 The number of events (e.g. deaths) that occur at each point
311 in `surv_times`. Not available if exog is provided.
313 Notes
314 -----
315 If exog is None, the standard Kaplan-Meier estimator is used. If
316 exog is not None, a local estimate of the marginal survival
317 function around each point is constructed, and these are then
318 averaged. This procedure gives an estimate of the marginal
319 survival function that accounts for dependent censoring as long as
320 the censoring becomes independent when conditioning on the
321 covariates in exog. See Zeng et al. (2004) for details.
323 References
324 ----------
325 D. Zeng (2004). Estimating marginal survival function by
326 adjusting for dependent censoring using many covariates. Annals
327 of Statistics 32:4.
328 https://arxiv.org/pdf/math/0409180.pdf
329 """
331 def __init__(self, time, status, entry=None, title=None,
332 freq_weights=None, exog=None, bw_factor=1.):
334 _checkargs(time, status, entry, freq_weights, exog)
335 time = self.time = np.asarray(time)
336 status = self.status = np.asarray(status)
337 if freq_weights is not None:
338 freq_weights = self.freq_weights = np.asarray(freq_weights)
340 if entry is not None:
341 entry = self.entry = np.asarray(entry)
343 if exog is not None:
344 if entry is not None:
345 raise ValueError("exog and entry cannot both be present")
346 from ._kernel_estimates import _kernel_survfunc
347 exog = self.exog = np.asarray(exog)
348 nobs = exog.shape[0]
349 kw = nobs**(-1/3.0) * bw_factor
350 kfunc = lambda x: np.exp(-x**2 / kw**2).sum(1)
351 x = _kernel_survfunc(time, status, exog, kfunc, freq_weights)
352 self.surv_prob = x[0]
353 self.surv_times = x[1]
354 return
356 x = _calc_survfunc_right(time, status, weights=freq_weights,
357 entry=entry)
359 self.surv_prob = x[0]
360 self.surv_prob_se = x[1]
361 self.surv_times = x[2]
362 self.n_risk = x[4]
363 self.n_events = x[5]
364 self.title = "" if not title else title
366 def plot(self, ax=None):
367 """
368 Plot the survival function.
370 Examples
371 --------
372 Change the line color:
374 >>> import statsmodels.api as sm
375 >>> data = sm.datasets.get_rdataset("flchain", "survival").data
376 >>> df = data.loc[data.sex == "F", :]
377 >>> sf = sm.SurvfuncRight(df["futime"], df["death"])
378 >>> fig = sf.plot()
379 >>> ax = fig.get_axes()[0]
380 >>> li = ax.get_lines()
381 >>> li[0].set_color('purple')
382 >>> li[1].set_color('purple')
384 Do not show the censoring points:
386 >>> fig = sf.plot()
387 >>> ax = fig.get_axes()[0]
388 >>> li = ax.get_lines()
389 >>> li[1].set_visible(False)
390 """
392 return plot_survfunc(self, ax)
394 def quantile(self, p):
395 """
396 Estimated quantile of a survival distribution.
398 Parameters
399 ----------
400 p : float
401 The probability point at which the quantile
402 is determined.
404 Returns the estimated quantile.
405 """
407 # SAS uses a strict inequality here.
408 ii = np.flatnonzero(self.surv_prob < 1 - p)
410 if len(ii) == 0:
411 return np.nan
413 return self.surv_times[ii[0]]
415 def quantile_ci(self, p, alpha=0.05, method='cloglog'):
416 """
417 Returns a confidence interval for a survival quantile.
419 Parameters
420 ----------
421 p : float
422 The probability point for which a confidence interval is
423 determined.
424 alpha : float
425 The confidence interval has nominal coverage probability
426 1 - `alpha`.
427 method : str
428 Function to use for g-transformation, must be ...
430 Returns
431 -------
432 lb : float
433 The lower confidence limit.
434 ub : float
435 The upper confidence limit.
437 Notes
438 -----
439 The confidence interval is obtained by inverting Z-tests. The
440 limits of the confidence interval will always be observed
441 event times.
443 References
444 ----------
445 The method is based on the approach used in SAS, documented here:
447 http://support.sas.com/documentation/cdl/en/statug/68162/HTML/default/viewer.htm#statug_lifetest_details03.htm
448 """
450 tr = norm.ppf(1 - alpha / 2)
452 method = method.lower()
453 if method == "cloglog":
454 g = lambda x: np.log(-np.log(x))
455 gprime = lambda x: -1 / (x * np.log(x))
456 elif method == "linear":
457 g = lambda x: x
458 gprime = lambda x: 1
459 elif method == "log":
460 g = lambda x: np.log(x)
461 gprime = lambda x: 1 / x
462 elif method == "logit":
463 g = lambda x: np.log(x / (1 - x))
464 gprime = lambda x: 1 / (x * (1 - x))
465 elif method == "asinsqrt":
466 g = lambda x: np.arcsin(np.sqrt(x))
467 gprime = lambda x: 1 / (2 * np.sqrt(x) * np.sqrt(1 - x))
468 else:
469 raise ValueError("unknown method")
471 r = g(self.surv_prob) - g(1 - p)
472 r /= (gprime(self.surv_prob) * self.surv_prob_se)
474 ii = np.flatnonzero(np.abs(r) <= tr)
475 if len(ii) == 0:
476 return np.nan, np.nan
478 lb = self.surv_times[ii[0]]
480 if ii[-1] == len(self.surv_times) - 1:
481 ub = np.inf
482 else:
483 ub = self.surv_times[ii[-1] + 1]
485 return lb, ub
487 def summary(self):
488 """
489 Return a summary of the estimated survival function.
491 The summary is a dataframe containing the unique event times,
492 estimated survival function values, and related quantities.
493 """
495 df = pd.DataFrame(index=self.surv_times)
496 df.index.name = "Time"
497 df["Surv prob"] = self.surv_prob
498 df["Surv prob SE"] = self.surv_prob_se
499 df["num at risk"] = self.n_risk
500 df["num events"] = self.n_events
502 return df
504 def simultaneous_cb(self, alpha=0.05, method="hw", transform="log"):
505 """
506 Returns a simultaneous confidence band for the survival function.
508 Parameters
509 ----------
510 alpha : float
511 `1 - alpha` is the desired simultaneous coverage
512 probability for the confidence region. Currently alpha
513 must be set to 0.05, giving 95% simultaneous intervals.
514 method : str
515 The method used to produce the simultaneous confidence
516 band. Only the Hall-Wellner (hw) method is currently
517 implemented.
518 transform : str
519 The used to produce the interval (note that the returned
520 interval is on the survival probability scale regardless
521 of which transform is used). Only `log` and `arcsin` are
522 implemented.
524 Returns
525 -------
526 lcb : array_like
527 The lower confidence limits corresponding to the points
528 in `surv_times`.
529 ucb : array_like
530 The upper confidence limits corresponding to the points
531 in `surv_times`.
532 """
534 method = method.lower()
535 if method != "hw":
536 msg = "only the Hall-Wellner (hw) method is implemented"
537 raise ValueError(msg)
539 if alpha != 0.05:
540 raise ValueError("alpha must be set to 0.05")
542 transform = transform.lower()
543 s2 = self.surv_prob_se**2 / self.surv_prob**2
544 nn = self.n_risk
545 if transform == "log":
546 denom = np.sqrt(nn) * np.log(self.surv_prob)
547 theta = 1.3581 * (1 + nn * s2) / denom
548 theta = np.exp(theta)
549 lcb = self.surv_prob**(1/theta)
550 ucb = self.surv_prob**theta
551 elif transform == "arcsin":
552 k = 1.3581
553 k *= (1 + nn * s2) / (2 * np.sqrt(nn))
554 k *= np.sqrt(self.surv_prob / (1 - self.surv_prob))
555 f = np.arcsin(np.sqrt(self.surv_prob))
556 v = np.clip(f - k, 0, np.inf)
557 lcb = np.sin(v)**2
558 v = np.clip(f + k, -np.inf, np.pi/2)
559 ucb = np.sin(v)**2
560 else:
561 raise ValueError("Unknown transform")
563 return lcb, ucb
566def survdiff(time, status, group, weight_type=None, strata=None,
567 entry=None, **kwargs):
568 """
569 Test for the equality of two survival distributions.
571 Parameters
572 ----------
573 time : array_like
574 The event or censoring times.
575 status : array_like
576 The censoring status variable, status=1 indicates that the
577 event occurred, status=0 indicates that the observation was
578 censored.
579 group : array_like
580 Indicators of the two groups
581 weight_type : str
582 The following weight types are implemented:
583 None (default) : logrank test
584 fh : Fleming-Harrington, weights by S^(fh_p),
585 requires exponent fh_p to be provided as keyword
586 argument; the weights are derived from S defined at
587 the previous event time, and the first weight is
588 always 1.
589 gb : Gehan-Breslow, weights by the number at risk
590 tw : Tarone-Ware, weights by the square root of the number
591 at risk
592 strata : array_like
593 Optional stratum indicators for a stratified test
594 entry : array_like
595 Entry times to handle left truncation. The subject is not in
596 the risk set on or before the entry time.
598 Returns
599 -------
600 chisq : The chi-square (1 degree of freedom) distributed test
601 statistic value
602 pvalue : The p-value for the chi^2 test
603 """
605 # TODO: extend to handle more than two groups
607 time = np.asarray(time)
608 status = np.asarray(status)
609 group = np.asarray(group)
611 gr = np.unique(group)
612 if len(gr) != 2:
613 raise ValueError("logrank only supports two groups")
615 if strata is None:
616 obs, var = _survdiff(time, status, group, weight_type, gr,
617 entry, **kwargs)
618 else:
619 strata = np.asarray(strata)
620 stu = np.unique(strata)
621 obs, var = 0., 0.
622 for st in stu:
623 # could be more efficient?
624 ii = (strata == st)
625 obs1, var1 = _survdiff(time[ii], status[ii], group[ii],
626 weight_type, gr, entry, **kwargs)
627 obs += obs1
628 var += var1
630 zstat = obs / np.sqrt(var)
632 # The chi^2 test statistic and p-value.
633 chisq = zstat**2
634 pvalue = 1 - chi2.cdf(chisq, 1)
636 return chisq, pvalue
639def _survdiff(time, status, group, weight_type, gr, entry=None,
640 **kwargs):
641 # logrank test for one stratum
643 # Get the unique times.
644 if entry is None:
645 utimes, rtimes = np.unique(time, return_inverse=True)
646 else:
647 utimes, rtimes = np.unique(np.concatenate((time, entry)),
648 return_inverse=True)
649 rtimes = rtimes[0:len(time)]
651 # Split entry times by group if present (should use pandas groupby)
652 tse = [(gr[0], None), (gr[1], None)]
653 if entry is not None:
654 for k in 0, 1:
655 ii = (group == gr[k])
656 entry1 = entry[ii]
657 tse[k] = (gr[k], entry1)
659 # Event count and risk set size at each time point, per group and overall.
660 # TODO: should use Pandas groupby
661 nrisk, obsv = [], []
662 ml = len(utimes)
663 for g, entry0 in tse:
665 mk = (group == g)
666 n = np.bincount(rtimes, weights=mk, minlength=ml)
668 ob = np.bincount(rtimes, weights=status*mk, minlength=ml)
669 obsv.append(ob)
671 if entry is not None:
672 n = np.cumsum(n) - n
673 rentry = np.searchsorted(utimes, entry0, side='left')
674 n0 = np.bincount(rentry, minlength=ml)
675 n0 = np.cumsum(n0) - n0
676 nr = n0 - n
677 else:
678 nr = np.cumsum(n[::-1])[::-1]
680 nrisk.append(nr)
682 obs = sum(obsv)
683 nrisk_tot = sum(nrisk)
685 # The variance of event counts in the first group.
686 r = nrisk[0] / np.clip(nrisk_tot, 1e-10, np.inf)
687 denom = nrisk_tot - 1
688 denom = np.clip(denom, 1e-10, np.inf)
689 var = obs * r * (1 - r) * (nrisk_tot - obs) / denom
691 # The expected number of events in the first group.
692 exp1 = obs * r
694 weights = None
695 if weight_type is not None:
696 weight_type = weight_type.lower()
697 if weight_type == "gb":
698 weights = nrisk_tot
699 elif weight_type == "tw":
700 weights = np.sqrt(nrisk_tot)
701 elif weight_type == "fh":
702 if "fh_p" not in kwargs:
703 msg = "weight_type type 'fh' requires specification of fh_p"
704 raise ValueError(msg)
705 fh_p = kwargs["fh_p"]
706 # Calculate the survivor function directly to avoid the
707 # overhead of creating a SurvfuncRight object
708 sp = 1 - obs / nrisk_tot.astype(np.float64)
709 sp = np.log(sp)
710 sp = np.cumsum(sp)
711 sp = np.exp(sp)
712 weights = sp**fh_p
713 weights = np.roll(weights, 1)
714 weights[0] = 1
715 else:
716 raise ValueError("weight_type not implemented")
718 # The Z-scale test statistic (compare to normal reference
719 # distribution).
720 ix = np.flatnonzero(nrisk_tot > 1)
721 if weights is None:
722 obs = np.sum(obsv[0][ix] - exp1[ix])
723 var = np.sum(var[ix])
724 else:
725 obs = np.dot(weights[ix], obsv[0][ix] - exp1[ix])
726 var = np.dot(weights[ix]**2, var[ix])
728 return obs, var
731def plot_survfunc(survfuncs, ax=None):
732 """
733 Plot one or more survivor functions.
735 Parameters
736 ----------
737 survfuncs : object or array_like
738 A single SurvfuncRight object, or a list or SurvfuncRight
739 objects that are plotted together.
741 Returns
742 -------
743 A figure instance on which the plot was drawn.
745 Examples
746 --------
747 Add a legend:
749 >>> import statsmodels.api as sm
750 >>> from statsmodels.duration.survfunc import plot_survfunc
751 >>> data = sm.datasets.get_rdataset("flchain", "survival").data
752 >>> df = data.loc[data.sex == "F", :]
753 >>> sf0 = sm.SurvfuncRight(df["futime"], df["death"])
754 >>> sf1 = sm.SurvfuncRight(3.0 * df["futime"], df["death"])
755 >>> fig = plot_survfunc([sf0, sf1])
756 >>> ax = fig.get_axes()[0]
757 >>> ax.set_position([0.1, 0.1, 0.64, 0.8])
758 >>> ha, lb = ax.get_legend_handles_labels()
759 >>> leg = fig.legend((ha[0], ha[1]), (lb[0], lb[1]), 'center right')
761 Change the line colors:
763 >>> fig = plot_survfunc([sf0, sf1])
764 >>> ax = fig.get_axes()[0]
765 >>> ax.set_position([0.1, 0.1, 0.64, 0.8])
766 >>> ha, lb = ax.get_legend_handles_labels()
767 >>> ha[0].set_color('purple')
768 >>> ha[1].set_color('orange')
769 """
771 fig, ax = utils.create_mpl_ax(ax)
773 # If we have only a single survival function to plot, put it into
774 # a list.
775 try:
776 assert(type(survfuncs[0]) is SurvfuncRight)
777 except:
778 survfuncs = [survfuncs]
780 for gx, sf in enumerate(survfuncs):
782 # The estimated survival function does not include a point at
783 # time 0, include it here for plotting.
784 surv_times = np.concatenate(([0], sf.surv_times))
785 surv_prob = np.concatenate(([1], sf.surv_prob))
787 # If the final times are censoring times they are not included
788 # in the survival function so we add them here
789 mxt = max(sf.time)
790 if mxt > surv_times[-1]:
791 surv_times = np.concatenate((surv_times, [mxt]))
792 surv_prob = np.concatenate((surv_prob, [surv_prob[-1]]))
794 label = getattr(sf, "title", "Group %d" % (gx + 1))
796 li, = ax.step(surv_times, surv_prob, '-', label=label, lw=2,
797 where='post')
799 # Plot the censored points.
800 ii = np.flatnonzero(np.logical_not(sf.status))
801 ti = np.unique(sf.time[ii])
802 jj = np.searchsorted(surv_times, ti) - 1
803 sp = surv_prob[jj]
804 ax.plot(ti, sp, '+', ms=12, color=li.get_color(),
805 label=label + " points")
807 ax.set_ylim(0, 1.01)
809 return fig