Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import importlib 

2 

3from pandas._config import get_option 

4 

5from pandas.util._decorators import Appender, Substitution 

6 

7from pandas.core.dtypes.common import is_integer, is_list_like 

8from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries 

9 

10from pandas.core.base import PandasObject 

11 

12 

13def hist_series( 

14 self, 

15 by=None, 

16 ax=None, 

17 grid=True, 

18 xlabelsize=None, 

19 xrot=None, 

20 ylabelsize=None, 

21 yrot=None, 

22 figsize=None, 

23 bins=10, 

24 backend=None, 

25 **kwargs, 

26): 

27 """ 

28 Draw histogram of the input series using matplotlib. 

29 

30 Parameters 

31 ---------- 

32 by : object, optional 

33 If passed, then used to form histograms for separate groups. 

34 ax : matplotlib axis object 

35 If not passed, uses gca(). 

36 grid : bool, default True 

37 Whether to show axis grid lines. 

38 xlabelsize : int, default None 

39 If specified changes the x-axis label size. 

40 xrot : float, default None 

41 Rotation of x axis labels. 

42 ylabelsize : int, default None 

43 If specified changes the y-axis label size. 

44 yrot : float, default None 

45 Rotation of y axis labels. 

46 figsize : tuple, default None 

47 Figure size in inches by default. 

48 bins : int or sequence, default 10 

49 Number of histogram bins to be used. If an integer is given, bins + 1 

50 bin edges are calculated and returned. If bins is a sequence, gives 

51 bin edges, including left edge of first bin and right edge of last 

52 bin. In this case, bins is returned unmodified. 

53 backend : str, default None 

54 Backend to use instead of the backend specified in the option 

55 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

56 specify the ``plotting.backend`` for the whole session, set 

57 ``pd.options.plotting.backend``. 

58 

59 .. versionadded:: 1.0.0 

60 

61 **kwargs 

62 To be passed to the actual plotting function. 

63 

64 Returns 

65 ------- 

66 matplotlib.AxesSubplot 

67 A histogram plot. 

68 

69 See Also 

70 -------- 

71 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib. 

72 """ 

73 plot_backend = _get_plot_backend(backend) 

74 return plot_backend.hist_series( 

75 self, 

76 by=by, 

77 ax=ax, 

78 grid=grid, 

79 xlabelsize=xlabelsize, 

80 xrot=xrot, 

81 ylabelsize=ylabelsize, 

82 yrot=yrot, 

83 figsize=figsize, 

84 bins=bins, 

85 **kwargs, 

86 ) 

87 

88 

89def hist_frame( 

90 data, 

91 column=None, 

92 by=None, 

93 grid=True, 

94 xlabelsize=None, 

95 xrot=None, 

96 ylabelsize=None, 

97 yrot=None, 

98 ax=None, 

99 sharex=False, 

100 sharey=False, 

101 figsize=None, 

102 layout=None, 

103 bins=10, 

104 backend=None, 

105 **kwargs, 

106): 

107 """ 

108 Make a histogram of the DataFrame's. 

109 

110 A `histogram`_ is a representation of the distribution of data. 

111 This function calls :meth:`matplotlib.pyplot.hist`, on each series in 

112 the DataFrame, resulting in one histogram per column. 

113 

114 .. _histogram: https://en.wikipedia.org/wiki/Histogram 

115 

116 Parameters 

117 ---------- 

118 data : DataFrame 

119 The pandas object holding the data. 

120 column : str or sequence 

121 If passed, will be used to limit data to a subset of columns. 

122 by : object, optional 

123 If passed, then used to form histograms for separate groups. 

124 grid : bool, default True 

125 Whether to show axis grid lines. 

126 xlabelsize : int, default None 

127 If specified changes the x-axis label size. 

128 xrot : float, default None 

129 Rotation of x axis labels. For example, a value of 90 displays the 

130 x labels rotated 90 degrees clockwise. 

131 ylabelsize : int, default None 

132 If specified changes the y-axis label size. 

133 yrot : float, default None 

134 Rotation of y axis labels. For example, a value of 90 displays the 

135 y labels rotated 90 degrees clockwise. 

136 ax : Matplotlib axes object, default None 

137 The axes to plot the histogram on. 

138 sharex : bool, default True if ax is None else False 

139 In case subplots=True, share x axis and set some x axis labels to 

140 invisible; defaults to True if ax is None otherwise False if an ax 

141 is passed in. 

142 Note that passing in both an ax and sharex=True will alter all x axis 

143 labels for all subplots in a figure. 

144 sharey : bool, default False 

145 In case subplots=True, share y axis and set some y axis labels to 

146 invisible. 

147 figsize : tuple 

148 The size in inches of the figure to create. Uses the value in 

149 `matplotlib.rcParams` by default. 

150 layout : tuple, optional 

151 Tuple of (rows, columns) for the layout of the histograms. 

152 bins : int or sequence, default 10 

153 Number of histogram bins to be used. If an integer is given, bins + 1 

154 bin edges are calculated and returned. If bins is a sequence, gives 

155 bin edges, including left edge of first bin and right edge of last 

156 bin. In this case, bins is returned unmodified. 

157 backend : str, default None 

158 Backend to use instead of the backend specified in the option 

159 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

160 specify the ``plotting.backend`` for the whole session, set 

161 ``pd.options.plotting.backend``. 

162 

163 .. versionadded:: 1.0.0 

164 

165 **kwargs 

166 All other plotting keyword arguments to be passed to 

167 :meth:`matplotlib.pyplot.hist`. 

168 

169 Returns 

170 ------- 

171 matplotlib.AxesSubplot or numpy.ndarray of them 

172 

173 See Also 

174 -------- 

175 matplotlib.pyplot.hist : Plot a histogram using matplotlib. 

176 

177 Examples 

178 -------- 

179 

180 .. plot:: 

181 :context: close-figs 

182 

183 This example draws a histogram based on the length and width of 

184 some animals, displayed in three bins 

185 

186 >>> df = pd.DataFrame({ 

187 ... 'length': [1.5, 0.5, 1.2, 0.9, 3], 

188 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1] 

189 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse']) 

190 >>> hist = df.hist(bins=3) 

191 """ 

192 plot_backend = _get_plot_backend(backend) 

193 return plot_backend.hist_frame( 

194 data, 

195 column=column, 

196 by=by, 

197 grid=grid, 

198 xlabelsize=xlabelsize, 

199 xrot=xrot, 

200 ylabelsize=ylabelsize, 

201 yrot=yrot, 

202 ax=ax, 

203 sharex=sharex, 

204 sharey=sharey, 

205 figsize=figsize, 

206 layout=layout, 

207 bins=bins, 

208 **kwargs, 

209 ) 

210 

211 

212_boxplot_doc = """ 

213Make a box plot from DataFrame columns. 

214 

215Make a box-and-whisker plot from DataFrame columns, optionally grouped 

216by some other columns. A box plot is a method for graphically depicting 

217groups of numerical data through their quartiles. 

218The box extends from the Q1 to Q3 quartile values of the data, 

219with a line at the median (Q2). The whiskers extend from the edges 

220of box to show the range of the data. The position of the whiskers 

221is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. 

222Outlier points are those past the end of the whiskers. 

223 

224For further details see 

225Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_. 

226 

227Parameters 

228---------- 

229column : str or list of str, optional 

230 Column name or list of names, or vector. 

231 Can be any valid input to :meth:`pandas.DataFrame.groupby`. 

232by : str or array-like, optional 

233 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. 

234 One box-plot will be done per value of columns in `by`. 

235ax : object of class matplotlib.axes.Axes, optional 

236 The matplotlib axes to be used by boxplot. 

237fontsize : float or str 

238 Tick label font size in points or as a string (e.g., `large`). 

239rot : int or float, default 0 

240 The rotation angle of labels (in degrees) 

241 with respect to the screen coordinate system. 

242grid : bool, default True 

243 Setting this to True will show the grid. 

244figsize : A tuple (width, height) in inches 

245 The size of the figure to create in matplotlib. 

246layout : tuple (rows, columns), optional 

247 For example, (3, 5) will display the subplots 

248 using 3 columns and 5 rows, starting from the top-left. 

249return_type : {'axes', 'dict', 'both'} or None, default 'axes' 

250 The kind of object to return. The default is ``axes``. 

251 

252 * 'axes' returns the matplotlib axes the boxplot is drawn on. 

253 * 'dict' returns a dictionary whose values are the matplotlib 

254 Lines of the boxplot. 

255 * 'both' returns a namedtuple with the axes and dict. 

256 * when grouping with ``by``, a Series mapping columns to 

257 ``return_type`` is returned. 

258 

259 If ``return_type`` is `None`, a NumPy array 

260 of axes with the same shape as ``layout`` is returned. 

261%(backend)s\ 

262 

263**kwargs 

264 All other plotting keyword arguments to be passed to 

265 :func:`matplotlib.pyplot.boxplot`. 

266 

267Returns 

268------- 

269result 

270 See Notes. 

271 

272See Also 

273-------- 

274Series.plot.hist: Make a histogram. 

275matplotlib.pyplot.boxplot : Matplotlib equivalent plot. 

276 

277Notes 

278----- 

279The return type depends on the `return_type` parameter: 

280 

281* 'axes' : object of class matplotlib.axes.Axes 

282* 'dict' : dict of matplotlib.lines.Line2D objects 

283* 'both' : a namedtuple with structure (ax, lines) 

284 

285For data grouped with ``by``, return a Series of the above or a numpy 

286array: 

287 

288* :class:`~pandas.Series` 

289* :class:`~numpy.array` (for ``return_type = None``) 

290 

291Use ``return_type='dict'`` when you want to tweak the appearance 

292of the lines after plotting. In this case a dict containing the Lines 

293making up the boxes, caps, fliers, medians, and whiskers is returned. 

294 

295Examples 

296-------- 

297 

298Boxplots can be created for every column in the dataframe 

299by ``df.boxplot()`` or indicating the columns to be used: 

300 

301.. plot:: 

302 :context: close-figs 

303 

304 >>> np.random.seed(1234) 

305 >>> df = pd.DataFrame(np.random.randn(10, 4), 

306 ... columns=['Col1', 'Col2', 'Col3', 'Col4']) 

307 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) 

308 

309Boxplots of variables distributions grouped by the values of a third 

310variable can be created using the option ``by``. For instance: 

311 

312.. plot:: 

313 :context: close-figs 

314 

315 >>> df = pd.DataFrame(np.random.randn(10, 2), 

316 ... columns=['Col1', 'Col2']) 

317 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

318 ... 'B', 'B', 'B', 'B', 'B']) 

319 >>> boxplot = df.boxplot(by='X') 

320 

321A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot 

322in order to group the data by combination of the variables in the x-axis: 

323 

324.. plot:: 

325 :context: close-figs 

326 

327 >>> df = pd.DataFrame(np.random.randn(10, 3), 

328 ... columns=['Col1', 'Col2', 'Col3']) 

329 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', 

330 ... 'B', 'B', 'B', 'B', 'B']) 

331 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', 

332 ... 'B', 'A', 'B', 'A', 'B']) 

333 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) 

334 

335The layout of boxplot can be adjusted giving a tuple to ``layout``: 

336 

337.. plot:: 

338 :context: close-figs 

339 

340 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

341 ... layout=(2, 1)) 

342 

343Additional formatting can be done to the boxplot, like suppressing the grid 

344(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) 

345or changing the fontsize (i.e. ``fontsize=15``): 

346 

347.. plot:: 

348 :context: close-figs 

349 

350 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) 

351 

352The parameter ``return_type`` can be used to select the type of element 

353returned by `boxplot`. When ``return_type='axes'`` is selected, 

354the matplotlib axes on which the boxplot is drawn are returned: 

355 

356 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes') 

357 >>> type(boxplot) 

358 <class 'matplotlib.axes._subplots.AxesSubplot'> 

359 

360When grouping with ``by``, a Series mapping columns to ``return_type`` 

361is returned: 

362 

363 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

364 ... return_type='axes') 

365 >>> type(boxplot) 

366 <class 'pandas.core.series.Series'> 

367 

368If ``return_type`` is `None`, a NumPy array of axes with the same shape 

369as ``layout`` is returned: 

370 

371 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', 

372 ... return_type=None) 

373 >>> type(boxplot) 

374 <class 'numpy.ndarray'> 

375""" 

376 

377_backend_doc = """\ 

378backend : str, default None 

379 Backend to use instead of the backend specified in the option 

380 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

381 specify the ``plotting.backend`` for the whole session, set 

382 ``pd.options.plotting.backend``. 

383 

384 .. versionadded:: 1.0.0 

385""" 

386 

387 

388@Substitution(backend="") 

389@Appender(_boxplot_doc) 

390def boxplot( 

391 data, 

392 column=None, 

393 by=None, 

394 ax=None, 

395 fontsize=None, 

396 rot=0, 

397 grid=True, 

398 figsize=None, 

399 layout=None, 

400 return_type=None, 

401 **kwargs, 

402): 

403 plot_backend = _get_plot_backend("matplotlib") 

404 return plot_backend.boxplot( 

405 data, 

406 column=column, 

407 by=by, 

408 ax=ax, 

409 fontsize=fontsize, 

410 rot=rot, 

411 grid=grid, 

412 figsize=figsize, 

413 layout=layout, 

414 return_type=return_type, 

415 **kwargs, 

416 ) 

417 

418 

419@Substitution(backend=_backend_doc) 

420@Appender(_boxplot_doc) 

421def boxplot_frame( 

422 self, 

423 column=None, 

424 by=None, 

425 ax=None, 

426 fontsize=None, 

427 rot=0, 

428 grid=True, 

429 figsize=None, 

430 layout=None, 

431 return_type=None, 

432 backend=None, 

433 **kwargs, 

434): 

435 plot_backend = _get_plot_backend(backend) 

436 return plot_backend.boxplot_frame( 

437 self, 

438 column=column, 

439 by=by, 

440 ax=ax, 

441 fontsize=fontsize, 

442 rot=rot, 

443 grid=grid, 

444 figsize=figsize, 

445 layout=layout, 

446 return_type=return_type, 

447 **kwargs, 

448 ) 

449 

450 

451def boxplot_frame_groupby( 

452 grouped, 

453 subplots=True, 

454 column=None, 

455 fontsize=None, 

456 rot=0, 

457 grid=True, 

458 ax=None, 

459 figsize=None, 

460 layout=None, 

461 sharex=False, 

462 sharey=True, 

463 backend=None, 

464 **kwargs, 

465): 

466 """ 

467 Make box plots from DataFrameGroupBy data. 

468 

469 Parameters 

470 ---------- 

471 grouped : Grouped DataFrame 

472 subplots : bool 

473 * ``False`` - no subplots will be used 

474 * ``True`` - create a subplot for each group. 

475 

476 column : column name or list of names, or vector 

477 Can be any valid input to groupby. 

478 fontsize : int or str 

479 rot : label rotation angle 

480 grid : Setting this to True will show the grid 

481 ax : Matplotlib axis object, default None 

482 figsize : A tuple (width, height) in inches 

483 layout : tuple (optional) 

484 The layout of the plot: (rows, columns). 

485 sharex : bool, default False 

486 Whether x-axes will be shared among subplots. 

487 

488 .. versionadded:: 0.23.1 

489 sharey : bool, default True 

490 Whether y-axes will be shared among subplots. 

491 

492 .. versionadded:: 0.23.1 

493 backend : str, default None 

494 Backend to use instead of the backend specified in the option 

495 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

496 specify the ``plotting.backend`` for the whole session, set 

497 ``pd.options.plotting.backend``. 

498 

499 .. versionadded:: 1.0.0 

500 

501 **kwargs 

502 All other plotting keyword arguments to be passed to 

503 matplotlib's boxplot function. 

504 

505 Returns 

506 ------- 

507 dict of key/value = group key/DataFrame.boxplot return value 

508 or DataFrame.boxplot return value in case subplots=figures=False 

509 

510 Examples 

511 -------- 

512 >>> import itertools 

513 >>> tuples = [t for t in itertools.product(range(1000), range(4))] 

514 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1']) 

515 >>> data = np.random.randn(len(index),4) 

516 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index) 

517 >>> 

518 >>> grouped = df.groupby(level='lvl1') 

519 >>> boxplot_frame_groupby(grouped) 

520 >>> 

521 >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1) 

522 >>> boxplot_frame_groupby(grouped, subplots=False) 

523 """ 

524 plot_backend = _get_plot_backend(backend) 

525 return plot_backend.boxplot_frame_groupby( 

526 grouped, 

527 subplots=subplots, 

528 column=column, 

529 fontsize=fontsize, 

530 rot=rot, 

531 grid=grid, 

532 ax=ax, 

533 figsize=figsize, 

534 layout=layout, 

535 sharex=sharex, 

536 sharey=sharey, 

537 **kwargs, 

538 ) 

539 

540 

541class PlotAccessor(PandasObject): 

542 """ 

543 Make plots of Series or DataFrame. 

544 

545 Uses the backend specified by the 

546 option ``plotting.backend``. By default, matplotlib is used. 

547 

548 Parameters 

549 ---------- 

550 data : Series or DataFrame 

551 The object for which the method is called. 

552 x : label or position, default None 

553 Only used if data is a DataFrame. 

554 y : label, position or list of label, positions, default None 

555 Allows plotting of one column versus another. Only used if data is a 

556 DataFrame. 

557 kind : str 

558 The kind of plot to produce: 

559 

560 - 'line' : line plot (default) 

561 - 'bar' : vertical bar plot 

562 - 'barh' : horizontal bar plot 

563 - 'hist' : histogram 

564 - 'box' : boxplot 

565 - 'kde' : Kernel Density Estimation plot 

566 - 'density' : same as 'kde' 

567 - 'area' : area plot 

568 - 'pie' : pie plot 

569 - 'scatter' : scatter plot 

570 - 'hexbin' : hexbin plot. 

571 

572 figsize : a tuple (width, height) in inches 

573 use_index : bool, default True 

574 Use index as ticks for x axis. 

575 title : str or list 

576 Title to use for the plot. If a string is passed, print the string 

577 at the top of the figure. If a list is passed and `subplots` is 

578 True, print each item in the list above the corresponding subplot. 

579 grid : bool, default None (matlab style default) 

580 Axis grid lines. 

581 legend : bool or {'reverse'} 

582 Place legend on axis subplots. 

583 style : list or dict 

584 The matplotlib line style per column. 

585 logx : bool or 'sym', default False 

586 Use log scaling or symlog scaling on x axis. 

587 .. versionchanged:: 0.25.0 

588 

589 logy : bool or 'sym' default False 

590 Use log scaling or symlog scaling on y axis. 

591 .. versionchanged:: 0.25.0 

592 

593 loglog : bool or 'sym', default False 

594 Use log scaling or symlog scaling on both x and y axes. 

595 .. versionchanged:: 0.25.0 

596 

597 xticks : sequence 

598 Values to use for the xticks. 

599 yticks : sequence 

600 Values to use for the yticks. 

601 xlim : 2-tuple/list 

602 ylim : 2-tuple/list 

603 rot : int, default None 

604 Rotation for ticks (xticks for vertical, yticks for horizontal 

605 plots). 

606 fontsize : int, default None 

607 Font size for xticks and yticks. 

608 colormap : str or matplotlib colormap object, default None 

609 Colormap to select colors from. If string, load colormap with that 

610 name from matplotlib. 

611 colorbar : bool, optional 

612 If True, plot colorbar (only relevant for 'scatter' and 'hexbin' 

613 plots). 

614 position : float 

615 Specify relative alignments for bar plot layout. 

616 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

617 (center). 

618 table : bool, Series or DataFrame, default False 

619 If True, draw a table using the data in the DataFrame and the data 

620 will be transposed to meet matplotlib's default layout. 

621 If a Series or DataFrame is passed, use passed data to draw a 

622 table. 

623 yerr : DataFrame, Series, array-like, dict and str 

624 See :ref:`Plotting with Error Bars <visualization.errorbars>` for 

625 detail. 

626 xerr : DataFrame, Series, array-like, dict and str 

627 Equivalent to yerr. 

628 mark_right : bool, default True 

629 When using a secondary_y axis, automatically mark the column 

630 labels with "(right)" in the legend. 

631 include_bool : bool, default is False 

632 If True, boolean values can be plotted. 

633 backend : str, default None 

634 Backend to use instead of the backend specified in the option 

635 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to 

636 specify the ``plotting.backend`` for the whole session, set 

637 ``pd.options.plotting.backend``. 

638 

639 .. versionadded:: 1.0.0 

640 

641 **kwargs 

642 Options to pass to matplotlib plotting method. 

643 

644 Returns 

645 ------- 

646 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

647 If the backend is not the default matplotlib one, the return value 

648 will be the object returned by the backend. 

649 

650 Notes 

651 ----- 

652 - See matplotlib documentation online for more on this subject 

653 - If `kind` = 'bar' or 'barh', you can specify relative alignments 

654 for bar plot layout by `position` keyword. 

655 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5 

656 (center) 

657 """ 

658 

659 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box") 

660 _series_kinds = ("pie",) 

661 _dataframe_kinds = ("scatter", "hexbin") 

662 _kind_aliases = {"density": "kde"} 

663 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds 

664 

665 def __init__(self, data): 

666 self._parent = data 

667 

668 @staticmethod 

669 def _get_call_args(backend_name, data, args, kwargs): 

670 """ 

671 This function makes calls to this accessor `__call__` method compatible 

672 with the previous `SeriesPlotMethods.__call__` and 

673 `DataFramePlotMethods.__call__`. Those had slightly different 

674 signatures, since `DataFramePlotMethods` accepted `x` and `y` 

675 parameters. 

676 """ 

677 if isinstance(data, ABCSeries): 

678 arg_def = [ 

679 ("kind", "line"), 

680 ("ax", None), 

681 ("figsize", None), 

682 ("use_index", True), 

683 ("title", None), 

684 ("grid", None), 

685 ("legend", False), 

686 ("style", None), 

687 ("logx", False), 

688 ("logy", False), 

689 ("loglog", False), 

690 ("xticks", None), 

691 ("yticks", None), 

692 ("xlim", None), 

693 ("ylim", None), 

694 ("rot", None), 

695 ("fontsize", None), 

696 ("colormap", None), 

697 ("table", False), 

698 ("yerr", None), 

699 ("xerr", None), 

700 ("label", None), 

701 ("secondary_y", False), 

702 ] 

703 elif isinstance(data, ABCDataFrame): 

704 arg_def = [ 

705 ("x", None), 

706 ("y", None), 

707 ("kind", "line"), 

708 ("ax", None), 

709 ("subplots", False), 

710 ("sharex", None), 

711 ("sharey", False), 

712 ("layout", None), 

713 ("figsize", None), 

714 ("use_index", True), 

715 ("title", None), 

716 ("grid", None), 

717 ("legend", True), 

718 ("style", None), 

719 ("logx", False), 

720 ("logy", False), 

721 ("loglog", False), 

722 ("xticks", None), 

723 ("yticks", None), 

724 ("xlim", None), 

725 ("ylim", None), 

726 ("rot", None), 

727 ("fontsize", None), 

728 ("colormap", None), 

729 ("table", False), 

730 ("yerr", None), 

731 ("xerr", None), 

732 ("secondary_y", False), 

733 ("sort_columns", False), 

734 ] 

735 else: 

736 raise TypeError( 

737 f"Called plot accessor for type {type(data).__name__}, " 

738 "expected Series or DataFrame" 

739 ) 

740 

741 if args and isinstance(data, ABCSeries): 

742 positional_args = str(args)[1:-1] 

743 keyword_args = ", ".join( 

744 f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args) 

745 ) 

746 msg = ( 

747 "`Series.plot()` should not be called with positional " 

748 "arguments, only keyword arguments. The order of " 

749 "positional arguments will change in the future. " 

750 f"Use `Series.plot({keyword_args})` instead of " 

751 f"`Series.plot({positional_args})`." 

752 ) 

753 raise TypeError(msg) 

754 

755 pos_args = {name: value for value, (name, _) in zip(args, arg_def)} 

756 if backend_name == "pandas.plotting._matplotlib": 

757 kwargs = dict(arg_def, **pos_args, **kwargs) 

758 else: 

759 kwargs = dict(pos_args, **kwargs) 

760 

761 x = kwargs.pop("x", None) 

762 y = kwargs.pop("y", None) 

763 kind = kwargs.pop("kind", "line") 

764 return x, y, kind, kwargs 

765 

766 def __call__(self, *args, **kwargs): 

767 plot_backend = _get_plot_backend(kwargs.pop("backend", None)) 

768 

769 x, y, kind, kwargs = self._get_call_args( 

770 plot_backend.__name__, self._parent, args, kwargs 

771 ) 

772 

773 kind = self._kind_aliases.get(kind, kind) 

774 

775 # when using another backend, get out of the way 

776 if plot_backend.__name__ != "pandas.plotting._matplotlib": 

777 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs) 

778 

779 if kind not in self._all_kinds: 

780 raise ValueError(f"{kind} is not a valid plot kind") 

781 

782 # The original data structured can be transformed before passed to the 

783 # backend. For example, for DataFrame is common to set the index as the 

784 # `x` parameter, and return a Series with the parameter `y` as values. 

785 data = self._parent.copy() 

786 

787 if isinstance(data, ABCSeries): 

788 kwargs["reuse_plot"] = True 

789 

790 if kind in self._dataframe_kinds: 

791 if isinstance(data, ABCDataFrame): 

792 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs) 

793 else: 

794 raise ValueError(f"plot kind {kind} can only be used for data frames") 

795 elif kind in self._series_kinds: 

796 if isinstance(data, ABCDataFrame): 

797 if y is None and kwargs.get("subplots") is False: 

798 raise ValueError( 

799 f"{kind} requires either y column or 'subplots=True'" 

800 ) 

801 elif y is not None: 

802 if is_integer(y) and not data.columns.holds_integer(): 

803 y = data.columns[y] 

804 # converted to series actually. copy to not modify 

805 data = data[y].copy() 

806 data.index.name = y 

807 elif isinstance(data, ABCDataFrame): 

808 data_cols = data.columns 

809 if x is not None: 

810 if is_integer(x) and not data.columns.holds_integer(): 

811 x = data_cols[x] 

812 elif not isinstance(data[x], ABCSeries): 

813 raise ValueError("x must be a label or position") 

814 data = data.set_index(x) 

815 if y is not None: 

816 # check if we have y as int or list of ints 

817 int_ylist = is_list_like(y) and all(is_integer(c) for c in y) 

818 int_y_arg = is_integer(y) or int_ylist 

819 if int_y_arg and not data.columns.holds_integer(): 

820 y = data_cols[y] 

821 

822 label_kw = kwargs["label"] if "label" in kwargs else False 

823 for kw in ["xerr", "yerr"]: 

824 if kw in kwargs and ( 

825 isinstance(kwargs[kw], str) or is_integer(kwargs[kw]) 

826 ): 

827 try: 

828 kwargs[kw] = data[kwargs[kw]] 

829 except (IndexError, KeyError, TypeError): 

830 pass 

831 

832 # don't overwrite 

833 data = data[y].copy() 

834 

835 if isinstance(data, ABCSeries): 

836 label_name = label_kw or y 

837 data.name = label_name 

838 else: 

839 match = is_list_like(label_kw) and len(label_kw) == len(y) 

840 if label_kw and not match: 

841 raise ValueError( 

842 "label should be list-like and same length as y" 

843 ) 

844 label_name = label_kw or data.columns 

845 data.columns = label_name 

846 

847 return plot_backend.plot(data, kind=kind, **kwargs) 

848 

849 __call__.__doc__ = __doc__ 

850 

851 def line(self, x=None, y=None, **kwargs): 

852 """ 

853 Plot Series or DataFrame as lines. 

854 

855 This function is useful to plot lines using DataFrame's values 

856 as coordinates. 

857 

858 Parameters 

859 ---------- 

860 x : int or str, optional 

861 Columns to use for the horizontal axis. 

862 Either the location or the label of the columns to be used. 

863 By default, it will use the DataFrame indices. 

864 y : int, str, or list of them, optional 

865 The values to be plotted. 

866 Either the location or the label of the columns to be used. 

867 By default, it will use the remaining DataFrame numeric columns. 

868 **kwargs 

869 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

870 

871 Returns 

872 ------- 

873 :class:`matplotlib.axes.Axes` or :class:`numpy.ndarray` 

874 Return an ndarray when ``subplots=True``. 

875 

876 See Also 

877 -------- 

878 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers. 

879 

880 Examples 

881 -------- 

882 

883 .. plot:: 

884 :context: close-figs 

885 

886 >>> s = pd.Series([1, 3, 2]) 

887 >>> s.plot.line() 

888 

889 .. plot:: 

890 :context: close-figs 

891 

892 The following example shows the populations for some animals 

893 over the years. 

894 

895 >>> df = pd.DataFrame({ 

896 ... 'pig': [20, 18, 489, 675, 1776], 

897 ... 'horse': [4, 25, 281, 600, 1900] 

898 ... }, index=[1990, 1997, 2003, 2009, 2014]) 

899 >>> lines = df.plot.line() 

900 

901 .. plot:: 

902 :context: close-figs 

903 

904 An example with subplots, so an array of axes is returned. 

905 

906 >>> axes = df.plot.line(subplots=True) 

907 >>> type(axes) 

908 <class 'numpy.ndarray'> 

909 

910 .. plot:: 

911 :context: close-figs 

912 

913 The following example shows the relationship between both 

914 populations. 

915 

916 >>> lines = df.plot.line(x='pig', y='horse') 

917 """ 

918 return self(kind="line", x=x, y=y, **kwargs) 

919 

920 def bar(self, x=None, y=None, **kwargs): 

921 """ 

922 Vertical bar plot. 

923 

924 A bar plot is a plot that presents categorical data with 

925 rectangular bars with lengths proportional to the values that they 

926 represent. A bar plot shows comparisons among discrete categories. One 

927 axis of the plot shows the specific categories being compared, and the 

928 other axis represents a measured value. 

929 

930 Parameters 

931 ---------- 

932 x : label or position, optional 

933 Allows plotting of one column versus another. If not specified, 

934 the index of the DataFrame is used. 

935 y : label or position, optional 

936 Allows plotting of one column versus another. If not specified, 

937 all numerical columns are used. 

938 **kwargs 

939 Additional keyword arguments are documented in 

940 :meth:`DataFrame.plot`. 

941 

942 Returns 

943 ------- 

944 matplotlib.axes.Axes or np.ndarray of them 

945 An ndarray is returned with one :class:`matplotlib.axes.Axes` 

946 per column when ``subplots=True``. 

947 

948 See Also 

949 -------- 

950 DataFrame.plot.barh : Horizontal bar plot. 

951 DataFrame.plot : Make plots of a DataFrame. 

952 matplotlib.pyplot.bar : Make a bar plot with matplotlib. 

953 

954 Examples 

955 -------- 

956 Basic plot. 

957 

958 .. plot:: 

959 :context: close-figs 

960 

961 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]}) 

962 >>> ax = df.plot.bar(x='lab', y='val', rot=0) 

963 

964 Plot a whole dataframe to a bar plot. Each column is assigned a 

965 distinct color, and each row is nested in a group along the 

966 horizontal axis. 

967 

968 .. plot:: 

969 :context: close-figs 

970 

971 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

972 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

973 >>> index = ['snail', 'pig', 'elephant', 

974 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

975 >>> df = pd.DataFrame({'speed': speed, 

976 ... 'lifespan': lifespan}, index=index) 

977 >>> ax = df.plot.bar(rot=0) 

978 

979 Instead of nesting, the figure can be split by column with 

980 ``subplots=True``. In this case, a :class:`numpy.ndarray` of 

981 :class:`matplotlib.axes.Axes` are returned. 

982 

983 .. plot:: 

984 :context: close-figs 

985 

986 >>> axes = df.plot.bar(rot=0, subplots=True) 

987 >>> axes[1].legend(loc=2) # doctest: +SKIP 

988 

989 Plot a single column. 

990 

991 .. plot:: 

992 :context: close-figs 

993 

994 >>> ax = df.plot.bar(y='speed', rot=0) 

995 

996 Plot only selected categories for the DataFrame. 

997 

998 .. plot:: 

999 :context: close-figs 

1000 

1001 >>> ax = df.plot.bar(x='lifespan', rot=0) 

1002 """ 

1003 return self(kind="bar", x=x, y=y, **kwargs) 

1004 

1005 def barh(self, x=None, y=None, **kwargs): 

1006 """ 

1007 Make a horizontal bar plot. 

1008 

1009 A horizontal bar plot is a plot that presents quantitative data with 

1010 rectangular bars with lengths proportional to the values that they 

1011 represent. A bar plot shows comparisons among discrete categories. One 

1012 axis of the plot shows the specific categories being compared, and the 

1013 other axis represents a measured value. 

1014 

1015 Parameters 

1016 ---------- 

1017 x : label or position, default DataFrame.index 

1018 Column to be used for categories. 

1019 y : label or position, default All numeric columns in dataframe 

1020 Columns to be plotted from the DataFrame. 

1021 **kwargs 

1022 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1023 

1024 Returns 

1025 ------- 

1026 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1027 

1028 See Also 

1029 -------- 

1030 DataFrame.plot.bar: Vertical bar plot. 

1031 DataFrame.plot : Make plots of DataFrame using matplotlib. 

1032 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib. 

1033 

1034 Examples 

1035 -------- 

1036 Basic example 

1037 

1038 .. plot:: 

1039 :context: close-figs 

1040 

1041 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]}) 

1042 >>> ax = df.plot.barh(x='lab', y='val') 

1043 

1044 Plot a whole DataFrame to a horizontal bar plot 

1045 

1046 .. plot:: 

1047 :context: close-figs 

1048 

1049 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1050 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1051 >>> index = ['snail', 'pig', 'elephant', 

1052 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1053 >>> df = pd.DataFrame({'speed': speed, 

1054 ... 'lifespan': lifespan}, index=index) 

1055 >>> ax = df.plot.barh() 

1056 

1057 Plot a column of the DataFrame to a horizontal bar plot 

1058 

1059 .. plot:: 

1060 :context: close-figs 

1061 

1062 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1063 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1064 >>> index = ['snail', 'pig', 'elephant', 

1065 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1066 >>> df = pd.DataFrame({'speed': speed, 

1067 ... 'lifespan': lifespan}, index=index) 

1068 >>> ax = df.plot.barh(y='speed') 

1069 

1070 Plot DataFrame versus the desired column 

1071 

1072 .. plot:: 

1073 :context: close-figs 

1074 

1075 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88] 

1076 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28] 

1077 >>> index = ['snail', 'pig', 'elephant', 

1078 ... 'rabbit', 'giraffe', 'coyote', 'horse'] 

1079 >>> df = pd.DataFrame({'speed': speed, 

1080 ... 'lifespan': lifespan}, index=index) 

1081 >>> ax = df.plot.barh(x='lifespan') 

1082 """ 

1083 return self(kind="barh", x=x, y=y, **kwargs) 

1084 

1085 def box(self, by=None, **kwargs): 

1086 r""" 

1087 Make a box plot of the DataFrame columns. 

1088 

1089 A box plot is a method for graphically depicting groups of numerical 

1090 data through their quartiles. 

1091 The box extends from the Q1 to Q3 quartile values of the data, 

1092 with a line at the median (Q2). The whiskers extend from the edges 

1093 of box to show the range of the data. The position of the whiskers 

1094 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the 

1095 box. Outlier points are those past the end of the whiskers. 

1096 

1097 For further details see Wikipedia's 

1098 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__. 

1099 

1100 A consideration when using this chart is that the box and the whiskers 

1101 can overlap, which is very common when plotting small sets of data. 

1102 

1103 Parameters 

1104 ---------- 

1105 by : str or sequence 

1106 Column in the DataFrame to group by. 

1107 **kwargs 

1108 Additional keywords are documented in 

1109 :meth:`DataFrame.plot`. 

1110 

1111 Returns 

1112 ------- 

1113 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1114 

1115 See Also 

1116 -------- 

1117 DataFrame.boxplot: Another method to draw a box plot. 

1118 Series.plot.box: Draw a box plot from a Series object. 

1119 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib. 

1120 

1121 Examples 

1122 -------- 

1123 Draw a box plot from a DataFrame with four columns of randomly 

1124 generated data. 

1125 

1126 .. plot:: 

1127 :context: close-figs 

1128 

1129 >>> data = np.random.randn(25, 4) 

1130 >>> df = pd.DataFrame(data, columns=list('ABCD')) 

1131 >>> ax = df.plot.box() 

1132 """ 

1133 return self(kind="box", by=by, **kwargs) 

1134 

1135 def hist(self, by=None, bins=10, **kwargs): 

1136 """ 

1137 Draw one histogram of the DataFrame's columns. 

1138 

1139 A histogram is a representation of the distribution of data. 

1140 This function groups the values of all given Series in the DataFrame 

1141 into bins and draws all bins in one :class:`matplotlib.axes.Axes`. 

1142 This is useful when the DataFrame's Series are in a similar scale. 

1143 

1144 Parameters 

1145 ---------- 

1146 by : str or sequence, optional 

1147 Column in the DataFrame to group by. 

1148 bins : int, default 10 

1149 Number of histogram bins to be used. 

1150 **kwargs 

1151 Additional keyword arguments are documented in 

1152 :meth:`DataFrame.plot`. 

1153 

1154 Returns 

1155 ------- 

1156 class:`matplotlib.AxesSubplot` 

1157 Return a histogram plot. 

1158 

1159 See Also 

1160 -------- 

1161 DataFrame.hist : Draw histograms per DataFrame's Series. 

1162 Series.hist : Draw a histogram with Series' data. 

1163 

1164 Examples 

1165 -------- 

1166 When we draw a dice 6000 times, we expect to get each value around 1000 

1167 times. But when we draw two dices and sum the result, the distribution 

1168 is going to be quite different. A histogram illustrates those 

1169 distributions. 

1170 

1171 .. plot:: 

1172 :context: close-figs 

1173 

1174 >>> df = pd.DataFrame( 

1175 ... np.random.randint(1, 7, 6000), 

1176 ... columns = ['one']) 

1177 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000) 

1178 >>> ax = df.plot.hist(bins=12, alpha=0.5) 

1179 """ 

1180 return self(kind="hist", by=by, bins=bins, **kwargs) 

1181 

1182 def kde(self, bw_method=None, ind=None, **kwargs): 

1183 """ 

1184 Generate Kernel Density Estimate plot using Gaussian kernels. 

1185 

1186 In statistics, `kernel density estimation`_ (KDE) is a non-parametric 

1187 way to estimate the probability density function (PDF) of a random 

1188 variable. This function uses Gaussian kernels and includes automatic 

1189 bandwidth determination. 

1190 

1191 .. _kernel density estimation: 

1192 https://en.wikipedia.org/wiki/Kernel_density_estimation 

1193 

1194 Parameters 

1195 ---------- 

1196 bw_method : str, scalar or callable, optional 

1197 The method used to calculate the estimator bandwidth. This can be 

1198 'scott', 'silverman', a scalar constant or a callable. 

1199 If None (default), 'scott' is used. 

1200 See :class:`scipy.stats.gaussian_kde` for more information. 

1201 ind : NumPy array or int, optional 

1202 Evaluation points for the estimated PDF. If None (default), 

1203 1000 equally spaced points are used. If `ind` is a NumPy array, the 

1204 KDE is evaluated at the points passed. If `ind` is an integer, 

1205 `ind` number of equally spaced points are used. 

1206 **kwargs 

1207 Additional keyword arguments are documented in 

1208 :meth:`pandas.%(this-datatype)s.plot`. 

1209 

1210 Returns 

1211 ------- 

1212 matplotlib.axes.Axes or numpy.ndarray of them 

1213 

1214 See Also 

1215 -------- 

1216 scipy.stats.gaussian_kde : Representation of a kernel-density 

1217 estimate using Gaussian kernels. This is the function used 

1218 internally to estimate the PDF. 

1219 

1220 Examples 

1221 -------- 

1222 Given a Series of points randomly sampled from an unknown 

1223 distribution, estimate its PDF using KDE with automatic 

1224 bandwidth determination and plot the results, evaluating them at 

1225 1000 equally spaced points (default): 

1226 

1227 .. plot:: 

1228 :context: close-figs 

1229 

1230 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5]) 

1231 >>> ax = s.plot.kde() 

1232 

1233 A scalar bandwidth can be specified. Using a small bandwidth value can 

1234 lead to over-fitting, while using a large bandwidth value may result 

1235 in under-fitting: 

1236 

1237 .. plot:: 

1238 :context: close-figs 

1239 

1240 >>> ax = s.plot.kde(bw_method=0.3) 

1241 

1242 .. plot:: 

1243 :context: close-figs 

1244 

1245 >>> ax = s.plot.kde(bw_method=3) 

1246 

1247 Finally, the `ind` parameter determines the evaluation points for the 

1248 plot of the estimated PDF: 

1249 

1250 .. plot:: 

1251 :context: close-figs 

1252 

1253 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5]) 

1254 

1255 For DataFrame, it works in the same way: 

1256 

1257 .. plot:: 

1258 :context: close-figs 

1259 

1260 >>> df = pd.DataFrame({ 

1261 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5], 

1262 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6], 

1263 ... }) 

1264 >>> ax = df.plot.kde() 

1265 

1266 A scalar bandwidth can be specified. Using a small bandwidth value can 

1267 lead to over-fitting, while using a large bandwidth value may result 

1268 in under-fitting: 

1269 

1270 .. plot:: 

1271 :context: close-figs 

1272 

1273 >>> ax = df.plot.kde(bw_method=0.3) 

1274 

1275 .. plot:: 

1276 :context: close-figs 

1277 

1278 >>> ax = df.plot.kde(bw_method=3) 

1279 

1280 Finally, the `ind` parameter determines the evaluation points for the 

1281 plot of the estimated PDF: 

1282 

1283 .. plot:: 

1284 :context: close-figs 

1285 

1286 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6]) 

1287 """ 

1288 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) 

1289 

1290 density = kde 

1291 

1292 def area(self, x=None, y=None, **kwargs): 

1293 """ 

1294 Draw a stacked area plot. 

1295 

1296 An area plot displays quantitative data visually. 

1297 This function wraps the matplotlib area function. 

1298 

1299 Parameters 

1300 ---------- 

1301 x : label or position, optional 

1302 Coordinates for the X axis. By default uses the index. 

1303 y : label or position, optional 

1304 Column to plot. By default uses all columns. 

1305 stacked : bool, default True 

1306 Area plots are stacked by default. Set to False to create a 

1307 unstacked plot. 

1308 **kwargs 

1309 Additional keyword arguments are documented in 

1310 :meth:`DataFrame.plot`. 

1311 

1312 Returns 

1313 ------- 

1314 matplotlib.axes.Axes or numpy.ndarray 

1315 Area plot, or array of area plots if subplots is True. 

1316 

1317 See Also 

1318 -------- 

1319 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab. 

1320 

1321 Examples 

1322 -------- 

1323 Draw an area plot based on basic business metrics: 

1324 

1325 .. plot:: 

1326 :context: close-figs 

1327 

1328 >>> df = pd.DataFrame({ 

1329 ... 'sales': [3, 2, 3, 9, 10, 6], 

1330 ... 'signups': [5, 5, 6, 12, 14, 13], 

1331 ... 'visits': [20, 42, 28, 62, 81, 50], 

1332 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01', 

1333 ... freq='M')) 

1334 >>> ax = df.plot.area() 

1335 

1336 Area plots are stacked by default. To produce an unstacked plot, 

1337 pass ``stacked=False``: 

1338 

1339 .. plot:: 

1340 :context: close-figs 

1341 

1342 >>> ax = df.plot.area(stacked=False) 

1343 

1344 Draw an area plot for a single column: 

1345 

1346 .. plot:: 

1347 :context: close-figs 

1348 

1349 >>> ax = df.plot.area(y='sales') 

1350 

1351 Draw with a different `x`: 

1352 

1353 .. plot:: 

1354 :context: close-figs 

1355 

1356 >>> df = pd.DataFrame({ 

1357 ... 'sales': [3, 2, 3], 

1358 ... 'visits': [20, 42, 28], 

1359 ... 'day': [1, 2, 3], 

1360 ... }) 

1361 >>> ax = df.plot.area(x='day') 

1362 """ 

1363 return self(kind="area", x=x, y=y, **kwargs) 

1364 

1365 def pie(self, **kwargs): 

1366 """ 

1367 Generate a pie plot. 

1368 

1369 A pie plot is a proportional representation of the numerical data in a 

1370 column. This function wraps :meth:`matplotlib.pyplot.pie` for the 

1371 specified column. If no column reference is passed and 

1372 ``subplots=True`` a pie plot is drawn for each numerical column 

1373 independently. 

1374 

1375 Parameters 

1376 ---------- 

1377 y : int or label, optional 

1378 Label or position of the column to plot. 

1379 If not provided, ``subplots=True`` argument must be passed. 

1380 **kwargs 

1381 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1382 

1383 Returns 

1384 ------- 

1385 matplotlib.axes.Axes or np.ndarray of them 

1386 A NumPy array is returned when `subplots` is True. 

1387 

1388 See Also 

1389 -------- 

1390 Series.plot.pie : Generate a pie plot for a Series. 

1391 DataFrame.plot : Make plots of a DataFrame. 

1392 

1393 Examples 

1394 -------- 

1395 In the example below we have a DataFrame with the information about 

1396 planet's mass and radius. We pass the the 'mass' column to the 

1397 pie function to get a pie plot. 

1398 

1399 .. plot:: 

1400 :context: close-figs 

1401 

1402 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97], 

1403 ... 'radius': [2439.7, 6051.8, 6378.1]}, 

1404 ... index=['Mercury', 'Venus', 'Earth']) 

1405 >>> plot = df.plot.pie(y='mass', figsize=(5, 5)) 

1406 

1407 .. plot:: 

1408 :context: close-figs 

1409 

1410 >>> plot = df.plot.pie(subplots=True, figsize=(6, 3)) 

1411 """ 

1412 if ( 

1413 isinstance(self._parent, ABCDataFrame) 

1414 and kwargs.get("y", None) is None 

1415 and not kwargs.get("subplots", False) 

1416 ): 

1417 raise ValueError("pie requires either y column or 'subplots=True'") 

1418 return self(kind="pie", **kwargs) 

1419 

1420 def scatter(self, x, y, s=None, c=None, **kwargs): 

1421 """ 

1422 Create a scatter plot with varying marker point size and color. 

1423 

1424 The coordinates of each point are defined by two dataframe columns and 

1425 filled circles are used to represent each point. This kind of plot is 

1426 useful to see complex correlations between two variables. Points could 

1427 be for instance natural 2D coordinates like longitude and latitude in 

1428 a map or, in general, any pair of metrics that can be plotted against 

1429 each other. 

1430 

1431 Parameters 

1432 ---------- 

1433 x : int or str 

1434 The column name or column position to be used as horizontal 

1435 coordinates for each point. 

1436 y : int or str 

1437 The column name or column position to be used as vertical 

1438 coordinates for each point. 

1439 s : scalar or array_like, optional 

1440 The size of each point. Possible values are: 

1441 

1442 - A single scalar so all points have the same size. 

1443 

1444 - A sequence of scalars, which will be used for each point's size 

1445 recursively. For instance, when passing [2,14] all points size 

1446 will be either 2 or 14, alternatively. 

1447 

1448 c : str, int or array_like, optional 

1449 The color of each point. Possible values are: 

1450 

1451 - A single color string referred to by name, RGB or RGBA code, 

1452 for instance 'red' or '#a98d19'. 

1453 

1454 - A sequence of color strings referred to by name, RGB or RGBA 

1455 code, which will be used for each point's color recursively. For 

1456 instance ['green','yellow'] all points will be filled in green or 

1457 yellow, alternatively. 

1458 

1459 - A column name or position whose values will be used to color the 

1460 marker points according to a colormap. 

1461 

1462 **kwargs 

1463 Keyword arguments to pass on to :meth:`DataFrame.plot`. 

1464 

1465 Returns 

1466 ------- 

1467 :class:`matplotlib.axes.Axes` or numpy.ndarray of them 

1468 

1469 See Also 

1470 -------- 

1471 matplotlib.pyplot.scatter : Scatter plot using multiple input data 

1472 formats. 

1473 

1474 Examples 

1475 -------- 

1476 Let's see how to draw a scatter plot using coordinates from the values 

1477 in a DataFrame's columns. 

1478 

1479 .. plot:: 

1480 :context: close-figs 

1481 

1482 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1], 

1483 ... [6.4, 3.2, 1], [5.9, 3.0, 2]], 

1484 ... columns=['length', 'width', 'species']) 

1485 >>> ax1 = df.plot.scatter(x='length', 

1486 ... y='width', 

1487 ... c='DarkBlue') 

1488 

1489 And now with the color determined by a column as well. 

1490 

1491 .. plot:: 

1492 :context: close-figs 

1493 

1494 >>> ax2 = df.plot.scatter(x='length', 

1495 ... y='width', 

1496 ... c='species', 

1497 ... colormap='viridis') 

1498 """ 

1499 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs) 

1500 

1501 def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs): 

1502 """ 

1503 Generate a hexagonal binning plot. 

1504 

1505 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` 

1506 (the default), this is a histogram of the number of occurrences 

1507 of the observations at ``(x[i], y[i])``. 

1508 

1509 If `C` is specified, specifies values at given coordinates 

1510 ``(x[i], y[i])``. These values are accumulated for each hexagonal 

1511 bin and then reduced according to `reduce_C_function`, 

1512 having as default the NumPy's mean function (:meth:`numpy.mean`). 

1513 (If `C` is specified, it must also be a 1-D sequence 

1514 of the same length as `x` and `y`, or a column label.) 

1515 

1516 Parameters 

1517 ---------- 

1518 x : int or str 

1519 The column label or position for x points. 

1520 y : int or str 

1521 The column label or position for y points. 

1522 C : int or str, optional 

1523 The column label or position for the value of `(x, y)` point. 

1524 reduce_C_function : callable, default `np.mean` 

1525 Function of one argument that reduces all the values in a bin to 

1526 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). 

1527 gridsize : int or tuple of (int, int), default 100 

1528 The number of hexagons in the x-direction. 

1529 The corresponding number of hexagons in the y-direction is 

1530 chosen in a way that the hexagons are approximately regular. 

1531 Alternatively, gridsize can be a tuple with two elements 

1532 specifying the number of hexagons in the x-direction and the 

1533 y-direction. 

1534 **kwargs 

1535 Additional keyword arguments are documented in 

1536 :meth:`DataFrame.plot`. 

1537 

1538 Returns 

1539 ------- 

1540 matplotlib.AxesSubplot 

1541 The matplotlib ``Axes`` on which the hexbin is plotted. 

1542 

1543 See Also 

1544 -------- 

1545 DataFrame.plot : Make plots of a DataFrame. 

1546 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib, 

1547 the matplotlib function that is used under the hood. 

1548 

1549 Examples 

1550 -------- 

1551 The following examples are generated with random data from 

1552 a normal distribution. 

1553 

1554 .. plot:: 

1555 :context: close-figs 

1556 

1557 >>> n = 10000 

1558 >>> df = pd.DataFrame({'x': np.random.randn(n), 

1559 ... 'y': np.random.randn(n)}) 

1560 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) 

1561 

1562 The next example uses `C` and `np.sum` as `reduce_C_function`. 

1563 Note that `'observations'` values ranges from 1 to 5 but the result 

1564 plot shows values up to more than 25. This is because of the 

1565 `reduce_C_function`. 

1566 

1567 .. plot:: 

1568 :context: close-figs 

1569 

1570 >>> n = 500 

1571 >>> df = pd.DataFrame({ 

1572 ... 'coord_x': np.random.uniform(-3, 3, size=n), 

1573 ... 'coord_y': np.random.uniform(30, 50, size=n), 

1574 ... 'observations': np.random.randint(1,5, size=n) 

1575 ... }) 

1576 >>> ax = df.plot.hexbin(x='coord_x', 

1577 ... y='coord_y', 

1578 ... C='observations', 

1579 ... reduce_C_function=np.sum, 

1580 ... gridsize=10, 

1581 ... cmap="viridis") 

1582 """ 

1583 if reduce_C_function is not None: 

1584 kwargs["reduce_C_function"] = reduce_C_function 

1585 if gridsize is not None: 

1586 kwargs["gridsize"] = gridsize 

1587 

1588 return self(kind="hexbin", x=x, y=y, C=C, **kwargs) 

1589 

1590 

1591_backends = {} 

1592 

1593 

1594def _find_backend(backend: str): 

1595 """ 

1596 Find a pandas plotting backend> 

1597 

1598 Parameters 

1599 ---------- 

1600 backend : str 

1601 The identifier for the backend. Either an entrypoint item registered 

1602 with pkg_resources, or a module name. 

1603 

1604 Notes 

1605 ----- 

1606 Modifies _backends with imported backends as a side effect. 

1607 

1608 Returns 

1609 ------- 

1610 types.ModuleType 

1611 The imported backend. 

1612 """ 

1613 import pkg_resources # Delay import for performance. 

1614 

1615 for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"): 

1616 if entry_point.name == "matplotlib": 

1617 # matplotlib is an optional dependency. When 

1618 # missing, this would raise. 

1619 continue 

1620 _backends[entry_point.name] = entry_point.load() 

1621 

1622 try: 

1623 return _backends[backend] 

1624 except KeyError: 

1625 # Fall back to unregisted, module name approach. 

1626 try: 

1627 module = importlib.import_module(backend) 

1628 except ImportError: 

1629 # We re-raise later on. 

1630 pass 

1631 else: 

1632 if hasattr(module, "plot"): 

1633 # Validate that the interface is implemented when the option 

1634 # is set, rather than at plot time. 

1635 _backends[backend] = module 

1636 return module 

1637 

1638 raise ValueError( 

1639 f"Could not find plotting backend '{backend}'. Ensure that you've installed " 

1640 f"the package providing the '{backend}' entrypoint, or that the package has a " 

1641 "top-level `.plot` method." 

1642 ) 

1643 

1644 

1645def _get_plot_backend(backend=None): 

1646 """ 

1647 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`). 

1648 

1649 The plotting system of pandas has been using matplotlib, but the idea here 

1650 is that it can also work with other third-party backends. In the future, 

1651 this function will return the backend from a pandas option, and all the 

1652 rest of the code in this file will use the backend specified there for the 

1653 plotting. 

1654 

1655 The backend is imported lazily, as matplotlib is a soft dependency, and 

1656 pandas can be used without it being installed. 

1657 """ 

1658 backend = backend or get_option("plotting.backend") 

1659 

1660 if backend == "matplotlib": 

1661 # Because matplotlib is an optional dependency and first-party backend, 

1662 # we need to attempt an import here to raise an ImportError if needed. 

1663 try: 

1664 import pandas.plotting._matplotlib as module 

1665 except ImportError: 

1666 raise ImportError( 

1667 "matplotlib is required for plotting when the " 

1668 'default backend "matplotlib" is selected.' 

1669 ) from None 

1670 

1671 _backends["matplotlib"] = module 

1672 

1673 if backend in _backends: 

1674 return _backends[backend] 

1675 

1676 module = _find_backend(backend) 

1677 _backends[backend] = module 

1678 return module