Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Correlation plot functions.""" 

2 

3 

4import numpy as np 

5 

6from statsmodels.graphics import utils 

7from statsmodels.tsa.stattools import acf, pacf 

8 

9 

10def _prepare_data_corr_plot(x, lags, zero): 

11 zero = bool(zero) 

12 irregular = False if zero else True 

13 if lags is None: 

14 # GH 4663 - use a sensible default value 

15 nobs = x.shape[0] 

16 lim = min(int(np.ceil(10 * np.log10(nobs))), nobs - 1) 

17 lags = np.arange(not zero, lim + 1) 

18 elif np.isscalar(lags): 

19 lags = np.arange(not zero, int(lags) + 1) # +1 for zero lag 

20 else: 

21 irregular = True 

22 lags = np.asanyarray(lags).astype(np.int) 

23 nlags = lags.max(0) 

24 

25 return lags, nlags, irregular 

26 

27 

28def _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines, 

29 vlines_kwargs, **kwargs): 

30 if irregular: 

31 acf_x = acf_x[lags] 

32 if confint is not None: 

33 confint = confint[lags] 

34 

35 if use_vlines: 

36 ax.vlines(lags, [0], acf_x, **vlines_kwargs) 

37 ax.axhline(**kwargs) 

38 

39 kwargs.setdefault('marker', 'o') 

40 kwargs.setdefault('markersize', 5) 

41 if 'ls' not in kwargs: 

42 # gh-2369 

43 kwargs.setdefault('linestyle', 'None') 

44 ax.margins(.05) 

45 ax.plot(lags, acf_x, **kwargs) 

46 ax.set_title(title) 

47 

48 if confint is not None: 

49 if lags[0] == 0: 

50 lags = lags[1:] 

51 confint = confint[1:] 

52 acf_x = acf_x[1:] 

53 lags = lags.astype(np.float) 

54 lags[0] -= 0.5 

55 lags[-1] += 0.5 

56 ax.fill_between(lags, confint[:, 0] - acf_x, 

57 confint[:, 1] - acf_x, alpha=.25) 

58 

59 

60def plot_acf(x, ax=None, lags=None, *, alpha=.05, use_vlines=True, 

61 unbiased=False, fft=False, missing='none', 

62 title='Autocorrelation', zero=True, vlines_kwargs=None, **kwargs): 

63 """ 

64 Plot the autocorrelation function 

65 

66 Plots lags on the horizontal and the correlations on vertical axis. 

67 

68 Parameters 

69 ---------- 

70 x : array_like 

71 Array of time-series values 

72 ax : AxesSubplot, optional 

73 If given, this subplot is used to plot in instead of a new figure being 

74 created. 

75 lags : {int, array_like}, optional 

76 An int or array of lag values, used on horizontal axis. Uses 

77 np.arange(lags) when lags is an int. If not provided, 

78 ``lags=np.arange(len(corr))`` is used. 

79 alpha : scalar, optional 

80 If a number is given, the confidence intervals for the given level are 

81 returned. For instance if alpha=.05, 95 % confidence intervals are 

82 returned where the standard deviation is computed according to 

83 Bartlett's formula. If None, no confidence intervals are plotted. 

84 use_vlines : bool, optional 

85 If True, vertical lines and markers are plotted. 

86 If False, only markers are plotted. The default marker is 'o'; it can 

87 be overridden with a ``marker`` kwarg. 

88 unbiased : bool 

89 If True, then denominators for autocovariance are n-k, otherwise n 

90 fft : bool, optional 

91 If True, computes the ACF via FFT. 

92 missing : str, optional 

93 A string in ['none', 'raise', 'conservative', 'drop'] specifying how 

94 the NaNs are to be treated. 

95 title : str, optional 

96 Title to place on plot. Default is 'Autocorrelation' 

97 zero : bool, optional 

98 Flag indicating whether to include the 0-lag autocorrelation. 

99 Default is True. 

100 vlines_kwargs : dict, optional 

101 Optional dictionary of keyword arguments that are passed to vlines. 

102 **kwargs : kwargs, optional 

103 Optional keyword arguments that are directly passed on to the 

104 Matplotlib ``plot`` and ``axhline`` functions. 

105 

106 Returns 

107 ------- 

108 Figure 

109 If `ax` is None, the created figure. Otherwise the figure to which 

110 `ax` is connected. 

111 

112 See Also 

113 -------- 

114 matplotlib.pyplot.xcorr 

115 matplotlib.pyplot.acorr 

116 

117 Notes 

118 ----- 

119 Adapted from matplotlib's `xcorr`. 

120 

121 Data are plotted as ``plot(lags, corr, **kwargs)`` 

122 

123 kwargs is used to pass matplotlib optional arguments to both the line 

124 tracing the autocorrelations and for the horizontal line at 0. These 

125 options must be valid for a Line2D object. 

126 

127 vlines_kwargs is used to pass additional optional arguments to the 

128 vertical lines connecting each autocorrelation to the axis. These options 

129 must be valid for a LineCollection object. 

130 

131 Examples 

132 -------- 

133 >>> import pandas as pd 

134 >>> import matplotlib.pyplot as plt 

135 >>> import statsmodels.api as sm 

136 

137 >>> dta = sm.datasets.sunspots.load_pandas().data 

138 >>> dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008')) 

139 >>> del dta["YEAR"] 

140 >>> sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40) 

141 >>> plt.show() 

142 

143 .. plot:: plots/graphics_tsa_plot_acf.py 

144 """ 

145 fig, ax = utils.create_mpl_ax(ax) 

146 

147 lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero) 

148 vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs 

149 

150 confint = None 

151 # acf has different return type based on alpha 

152 acf_x = acf(x, nlags=nlags, alpha=alpha, fft=fft, unbiased=unbiased, 

153 missing=missing) 

154 if alpha is not None: 

155 acf_x, confint = acf_x 

156 

157 _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines, 

158 vlines_kwargs, **kwargs) 

159 

160 return fig 

161 

162 

163def plot_pacf(x, ax=None, lags=None, alpha=.05, method='ywunbiased', 

164 use_vlines=True, title='Partial Autocorrelation', zero=True, 

165 vlines_kwargs=None, **kwargs): 

166 """ 

167 Plot the partial autocorrelation function 

168 

169 Parameters 

170 ---------- 

171 x : array_like 

172 Array of time-series values 

173 ax : AxesSubplot, optional 

174 If given, this subplot is used to plot in instead of a new figure being 

175 created. 

176 lags : {int, array_like}, optional 

177 An int or array of lag values, used on horizontal axis. Uses 

178 np.arange(lags) when lags is an int. If not provided, 

179 ``lags=np.arange(len(corr))`` is used. 

180 alpha : float, optional 

181 If a number is given, the confidence intervals for the given level are 

182 returned. For instance if alpha=.05, 95 % confidence intervals are 

183 returned where the standard deviation is computed according to 

184 1/sqrt(len(x)) 

185 method : {'ywunbiased', 'ywmle', 'ols'} 

186 Specifies which method for the calculations to use: 

187 

188 - yw or ywunbiased : yule walker with bias correction in denominator 

189 for acovf. Default. 

190 - ywm or ywmle : yule walker without bias correction 

191 - ols - regression of time series on lags of it and on constant 

192 - ld or ldunbiased : Levinson-Durbin recursion with bias correction 

193 - ldb or ldbiased : Levinson-Durbin recursion without bias correction 

194 

195 use_vlines : bool, optional 

196 If True, vertical lines and markers are plotted. 

197 If False, only markers are plotted. The default marker is 'o'; it can 

198 be overridden with a ``marker`` kwarg. 

199 title : str, optional 

200 Title to place on plot. Default is 'Partial Autocorrelation' 

201 zero : bool, optional 

202 Flag indicating whether to include the 0-lag autocorrelation. 

203 Default is True. 

204 vlines_kwargs : dict, optional 

205 Optional dictionary of keyword arguments that are passed to vlines. 

206 **kwargs : kwargs, optional 

207 Optional keyword arguments that are directly passed on to the 

208 Matplotlib ``plot`` and ``axhline`` functions. 

209 

210 Returns 

211 ------- 

212 Figure 

213 If `ax` is None, the created figure. Otherwise the figure to which 

214 `ax` is connected. 

215 

216 See Also 

217 -------- 

218 matplotlib.pyplot.xcorr 

219 matplotlib.pyplot.acorr 

220 

221 Notes 

222 ----- 

223 Plots lags on the horizontal and the correlations on vertical axis. 

224 Adapted from matplotlib's `xcorr`. 

225 

226 Data are plotted as ``plot(lags, corr, **kwargs)`` 

227 

228 kwargs is used to pass matplotlib optional arguments to both the line 

229 tracing the autocorrelations and for the horizontal line at 0. These 

230 options must be valid for a Line2D object. 

231 

232 vlines_kwargs is used to pass additional optional arguments to the 

233 vertical lines connecting each autocorrelation to the axis. These options 

234 must be valid for a LineCollection object. 

235 

236 Examples 

237 -------- 

238 >>> import pandas as pd 

239 >>> import matplotlib.pyplot as plt 

240 >>> import statsmodels.api as sm 

241 

242 >>> dta = sm.datasets.sunspots.load_pandas().data 

243 >>> dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008')) 

244 >>> del dta["YEAR"] 

245 >>> sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40) 

246 >>> plt.show() 

247 

248 .. plot:: plots/graphics_tsa_plot_pacf.py 

249 """ 

250 fig, ax = utils.create_mpl_ax(ax) 

251 vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs 

252 lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero) 

253 

254 confint = None 

255 if alpha is None: 

256 acf_x = pacf(x, nlags=nlags, alpha=alpha, method=method) 

257 else: 

258 acf_x, confint = pacf(x, nlags=nlags, alpha=alpha, method=method) 

259 

260 _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines, 

261 vlines_kwargs, **kwargs) 

262 

263 return fig 

264 

265 

266def seasonal_plot(grouped_x, xticklabels, ylabel=None, ax=None): 

267 """ 

268 Consider using one of month_plot or quarter_plot unless you need 

269 irregular plotting. 

270 

271 Parameters 

272 ---------- 

273 grouped_x : iterable of DataFrames 

274 Should be a GroupBy object (or similar pair of group_names and groups 

275 as DataFrames) with a DatetimeIndex or PeriodIndex 

276 xticklabels : list of str 

277 List of season labels, one for each group. 

278 ylabel : str 

279 Lable for y axis 

280 ax : AxesSubplot, optional 

281 If given, this subplot is used to plot in instead of a new figure being 

282 created. 

283 """ 

284 fig, ax = utils.create_mpl_ax(ax) 

285 start = 0 

286 ticks = [] 

287 for season, df in grouped_x: 

288 df = df.copy() # or sort balks for series. may be better way 

289 df.sort_index() 

290 nobs = len(df) 

291 x_plot = np.arange(start, start + nobs) 

292 ticks.append(x_plot.mean()) 

293 ax.plot(x_plot, df.values, 'k') 

294 ax.hlines(df.values.mean(), x_plot[0], x_plot[-1], colors='r', 

295 linewidth=3) 

296 start += nobs 

297 

298 ax.set_xticks(ticks) 

299 ax.set_xticklabels(xticklabels) 

300 ax.set_ylabel(ylabel) 

301 ax.margins(.1, .05) 

302 return fig 

303 

304 

305def month_plot(x, dates=None, ylabel=None, ax=None): 

306 """ 

307 Seasonal plot of monthly data. 

308 

309 Parameters 

310 ---------- 

311 x : array_like 

312 Seasonal data to plot. If dates is None, x must be a pandas object 

313 with a PeriodIndex or DatetimeIndex with a monthly frequency. 

314 dates : array_like, optional 

315 If `x` is not a pandas object, then dates must be supplied. 

316 ylabel : str, optional 

317 The label for the y-axis. Will attempt to use the `name` attribute 

318 of the Series. 

319 ax : Axes, optional 

320 Existing axes instance. 

321 

322 Returns 

323 ------- 

324 Figure 

325 If `ax` is provided, the Figure instance attached to `ax`. Otherwise 

326 a new Figure instance. 

327 

328 Examples 

329 -------- 

330 >>> import statsmodels.api as sm 

331 >>> import pandas as pd 

332 

333 >>> dta = sm.datasets.elnino.load_pandas().data 

334 >>> dta['YEAR'] = dta.YEAR.astype(int).astype(str) 

335 >>> dta = dta.set_index('YEAR').T.unstack() 

336 >>> dates = pd.to_datetime(list(map(lambda x: '-'.join(x) + '-1', 

337 ... dta.index.values))) 

338 >>> dta.index = pd.DatetimeIndex(dates, freq='MS') 

339 >>> fig = sm.graphics.tsa.month_plot(dta) 

340 

341 .. plot:: plots/graphics_tsa_month_plot.py 

342 """ 

343 

344 if dates is None: 

345 from statsmodels.tools.data import _check_period_index 

346 _check_period_index(x, freq="M") 

347 else: 

348 from pandas import Series, PeriodIndex 

349 x = Series(x, index=PeriodIndex(dates, freq="M")) 

350 

351 xticklabels = ['j', 'f', 'm', 'a', 'm', 'j', 'j', 'a', 's', 'o', 'n', 'd'] 

352 return seasonal_plot(x.groupby(lambda y: y.month), xticklabels, 

353 ylabel=ylabel, ax=ax) 

354 

355 

356def quarter_plot(x, dates=None, ylabel=None, ax=None): 

357 """ 

358 Seasonal plot of quarterly data 

359 

360 Parameters 

361 ---------- 

362 x : array_like 

363 Seasonal data to plot. If dates is None, x must be a pandas object 

364 with a PeriodIndex or DatetimeIndex with a monthly frequency. 

365 dates : array_like, optional 

366 If `x` is not a pandas object, then dates must be supplied. 

367 ylabel : str, optional 

368 The label for the y-axis. Will attempt to use the `name` attribute 

369 of the Series. 

370 ax : matplotlib.axes, optional 

371 Existing axes instance. 

372 

373 Returns 

374 ------- 

375 Figure 

376 If `ax` is provided, the Figure instance attached to `ax`. Otherwise 

377 a new Figure instance. 

378 

379 Examples 

380 -------- 

381 >>> import statsmodels.api as sm 

382 >>> import pandas as pd 

383 

384 >>> dta = sm.datasets.elnino.load_pandas().data 

385 >>> dta['YEAR'] = dta.YEAR.astype(int).astype(str) 

386 >>> dta = dta.set_index('YEAR').T.unstack() 

387 >>> dates = pd.to_datetime(list(map(lambda x: '-'.join(x) + '-1', 

388 ... dta.index.values))) 

389 >>> dta.index = dates.to_period('Q') 

390 >>> fig = sm.graphics.tsa.quarter_plot(dta) 

391 

392 .. plot:: plots/graphics_tsa_quarter_plot.py 

393 """ 

394 

395 if dates is None: 

396 from statsmodels.tools.data import _check_period_index 

397 _check_period_index(x, freq="Q") 

398 else: 

399 from pandas import Series, PeriodIndex 

400 x = Series(x, index=PeriodIndex(dates, freq="Q")) 

401 

402 xticklabels = ['q1', 'q2', 'q3', 'q4'] 

403 return seasonal_plot(x.groupby(lambda y: y.quarter), xticklabels, 

404 ylabel=ylabel, ax=ax)