Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/plotting/_core.py : 25%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import importlib
3from pandas._config import get_option
5from pandas.util._decorators import Appender, Substitution
7from pandas.core.dtypes.common import is_integer, is_list_like
8from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
10from pandas.core.base import PandasObject
13def hist_series(
14 self,
15 by=None,
16 ax=None,
17 grid=True,
18 xlabelsize=None,
19 xrot=None,
20 ylabelsize=None,
21 yrot=None,
22 figsize=None,
23 bins=10,
24 backend=None,
25 **kwargs,
26):
27 """
28 Draw histogram of the input series using matplotlib.
30 Parameters
31 ----------
32 by : object, optional
33 If passed, then used to form histograms for separate groups.
34 ax : matplotlib axis object
35 If not passed, uses gca().
36 grid : bool, default True
37 Whether to show axis grid lines.
38 xlabelsize : int, default None
39 If specified changes the x-axis label size.
40 xrot : float, default None
41 Rotation of x axis labels.
42 ylabelsize : int, default None
43 If specified changes the y-axis label size.
44 yrot : float, default None
45 Rotation of y axis labels.
46 figsize : tuple, default None
47 Figure size in inches by default.
48 bins : int or sequence, default 10
49 Number of histogram bins to be used. If an integer is given, bins + 1
50 bin edges are calculated and returned. If bins is a sequence, gives
51 bin edges, including left edge of first bin and right edge of last
52 bin. In this case, bins is returned unmodified.
53 backend : str, default None
54 Backend to use instead of the backend specified in the option
55 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
56 specify the ``plotting.backend`` for the whole session, set
57 ``pd.options.plotting.backend``.
59 .. versionadded:: 1.0.0
61 **kwargs
62 To be passed to the actual plotting function.
64 Returns
65 -------
66 matplotlib.AxesSubplot
67 A histogram plot.
69 See Also
70 --------
71 matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
72 """
73 plot_backend = _get_plot_backend(backend)
74 return plot_backend.hist_series(
75 self,
76 by=by,
77 ax=ax,
78 grid=grid,
79 xlabelsize=xlabelsize,
80 xrot=xrot,
81 ylabelsize=ylabelsize,
82 yrot=yrot,
83 figsize=figsize,
84 bins=bins,
85 **kwargs,
86 )
89def hist_frame(
90 data,
91 column=None,
92 by=None,
93 grid=True,
94 xlabelsize=None,
95 xrot=None,
96 ylabelsize=None,
97 yrot=None,
98 ax=None,
99 sharex=False,
100 sharey=False,
101 figsize=None,
102 layout=None,
103 bins=10,
104 backend=None,
105 **kwargs,
106):
107 """
108 Make a histogram of the DataFrame's.
110 A `histogram`_ is a representation of the distribution of data.
111 This function calls :meth:`matplotlib.pyplot.hist`, on each series in
112 the DataFrame, resulting in one histogram per column.
114 .. _histogram: https://en.wikipedia.org/wiki/Histogram
116 Parameters
117 ----------
118 data : DataFrame
119 The pandas object holding the data.
120 column : str or sequence
121 If passed, will be used to limit data to a subset of columns.
122 by : object, optional
123 If passed, then used to form histograms for separate groups.
124 grid : bool, default True
125 Whether to show axis grid lines.
126 xlabelsize : int, default None
127 If specified changes the x-axis label size.
128 xrot : float, default None
129 Rotation of x axis labels. For example, a value of 90 displays the
130 x labels rotated 90 degrees clockwise.
131 ylabelsize : int, default None
132 If specified changes the y-axis label size.
133 yrot : float, default None
134 Rotation of y axis labels. For example, a value of 90 displays the
135 y labels rotated 90 degrees clockwise.
136 ax : Matplotlib axes object, default None
137 The axes to plot the histogram on.
138 sharex : bool, default True if ax is None else False
139 In case subplots=True, share x axis and set some x axis labels to
140 invisible; defaults to True if ax is None otherwise False if an ax
141 is passed in.
142 Note that passing in both an ax and sharex=True will alter all x axis
143 labels for all subplots in a figure.
144 sharey : bool, default False
145 In case subplots=True, share y axis and set some y axis labels to
146 invisible.
147 figsize : tuple
148 The size in inches of the figure to create. Uses the value in
149 `matplotlib.rcParams` by default.
150 layout : tuple, optional
151 Tuple of (rows, columns) for the layout of the histograms.
152 bins : int or sequence, default 10
153 Number of histogram bins to be used. If an integer is given, bins + 1
154 bin edges are calculated and returned. If bins is a sequence, gives
155 bin edges, including left edge of first bin and right edge of last
156 bin. In this case, bins is returned unmodified.
157 backend : str, default None
158 Backend to use instead of the backend specified in the option
159 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
160 specify the ``plotting.backend`` for the whole session, set
161 ``pd.options.plotting.backend``.
163 .. versionadded:: 1.0.0
165 **kwargs
166 All other plotting keyword arguments to be passed to
167 :meth:`matplotlib.pyplot.hist`.
169 Returns
170 -------
171 matplotlib.AxesSubplot or numpy.ndarray of them
173 See Also
174 --------
175 matplotlib.pyplot.hist : Plot a histogram using matplotlib.
177 Examples
178 --------
180 .. plot::
181 :context: close-figs
183 This example draws a histogram based on the length and width of
184 some animals, displayed in three bins
186 >>> df = pd.DataFrame({
187 ... 'length': [1.5, 0.5, 1.2, 0.9, 3],
188 ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
189 ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
190 >>> hist = df.hist(bins=3)
191 """
192 plot_backend = _get_plot_backend(backend)
193 return plot_backend.hist_frame(
194 data,
195 column=column,
196 by=by,
197 grid=grid,
198 xlabelsize=xlabelsize,
199 xrot=xrot,
200 ylabelsize=ylabelsize,
201 yrot=yrot,
202 ax=ax,
203 sharex=sharex,
204 sharey=sharey,
205 figsize=figsize,
206 layout=layout,
207 bins=bins,
208 **kwargs,
209 )
212_boxplot_doc = """
213Make a box plot from DataFrame columns.
215Make a box-and-whisker plot from DataFrame columns, optionally grouped
216by some other columns. A box plot is a method for graphically depicting
217groups of numerical data through their quartiles.
218The box extends from the Q1 to Q3 quartile values of the data,
219with a line at the median (Q2). The whiskers extend from the edges
220of box to show the range of the data. The position of the whiskers
221is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box.
222Outlier points are those past the end of the whiskers.
224For further details see
225Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
227Parameters
228----------
229column : str or list of str, optional
230 Column name or list of names, or vector.
231 Can be any valid input to :meth:`pandas.DataFrame.groupby`.
232by : str or array-like, optional
233 Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
234 One box-plot will be done per value of columns in `by`.
235ax : object of class matplotlib.axes.Axes, optional
236 The matplotlib axes to be used by boxplot.
237fontsize : float or str
238 Tick label font size in points or as a string (e.g., `large`).
239rot : int or float, default 0
240 The rotation angle of labels (in degrees)
241 with respect to the screen coordinate system.
242grid : bool, default True
243 Setting this to True will show the grid.
244figsize : A tuple (width, height) in inches
245 The size of the figure to create in matplotlib.
246layout : tuple (rows, columns), optional
247 For example, (3, 5) will display the subplots
248 using 3 columns and 5 rows, starting from the top-left.
249return_type : {'axes', 'dict', 'both'} or None, default 'axes'
250 The kind of object to return. The default is ``axes``.
252 * 'axes' returns the matplotlib axes the boxplot is drawn on.
253 * 'dict' returns a dictionary whose values are the matplotlib
254 Lines of the boxplot.
255 * 'both' returns a namedtuple with the axes and dict.
256 * when grouping with ``by``, a Series mapping columns to
257 ``return_type`` is returned.
259 If ``return_type`` is `None`, a NumPy array
260 of axes with the same shape as ``layout`` is returned.
261%(backend)s\
263**kwargs
264 All other plotting keyword arguments to be passed to
265 :func:`matplotlib.pyplot.boxplot`.
267Returns
268-------
269result
270 See Notes.
272See Also
273--------
274Series.plot.hist: Make a histogram.
275matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
277Notes
278-----
279The return type depends on the `return_type` parameter:
281* 'axes' : object of class matplotlib.axes.Axes
282* 'dict' : dict of matplotlib.lines.Line2D objects
283* 'both' : a namedtuple with structure (ax, lines)
285For data grouped with ``by``, return a Series of the above or a numpy
286array:
288* :class:`~pandas.Series`
289* :class:`~numpy.array` (for ``return_type = None``)
291Use ``return_type='dict'`` when you want to tweak the appearance
292of the lines after plotting. In this case a dict containing the Lines
293making up the boxes, caps, fliers, medians, and whiskers is returned.
295Examples
296--------
298Boxplots can be created for every column in the dataframe
299by ``df.boxplot()`` or indicating the columns to be used:
301.. plot::
302 :context: close-figs
304 >>> np.random.seed(1234)
305 >>> df = pd.DataFrame(np.random.randn(10, 4),
306 ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
307 >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3'])
309Boxplots of variables distributions grouped by the values of a third
310variable can be created using the option ``by``. For instance:
312.. plot::
313 :context: close-figs
315 >>> df = pd.DataFrame(np.random.randn(10, 2),
316 ... columns=['Col1', 'Col2'])
317 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
318 ... 'B', 'B', 'B', 'B', 'B'])
319 >>> boxplot = df.boxplot(by='X')
321A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
322in order to group the data by combination of the variables in the x-axis:
324.. plot::
325 :context: close-figs
327 >>> df = pd.DataFrame(np.random.randn(10, 3),
328 ... columns=['Col1', 'Col2', 'Col3'])
329 >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
330 ... 'B', 'B', 'B', 'B', 'B'])
331 >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
332 ... 'B', 'A', 'B', 'A', 'B'])
333 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
335The layout of boxplot can be adjusted giving a tuple to ``layout``:
337.. plot::
338 :context: close-figs
340 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
341 ... layout=(2, 1))
343Additional formatting can be done to the boxplot, like suppressing the grid
344(``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
345or changing the fontsize (i.e. ``fontsize=15``):
347.. plot::
348 :context: close-figs
350 >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15)
352The parameter ``return_type`` can be used to select the type of element
353returned by `boxplot`. When ``return_type='axes'`` is selected,
354the matplotlib axes on which the boxplot is drawn are returned:
356 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
357 >>> type(boxplot)
358 <class 'matplotlib.axes._subplots.AxesSubplot'>
360When grouping with ``by``, a Series mapping columns to ``return_type``
361is returned:
363 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
364 ... return_type='axes')
365 >>> type(boxplot)
366 <class 'pandas.core.series.Series'>
368If ``return_type`` is `None`, a NumPy array of axes with the same shape
369as ``layout`` is returned:
371 >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
372 ... return_type=None)
373 >>> type(boxplot)
374 <class 'numpy.ndarray'>
375"""
377_backend_doc = """\
378backend : str, default None
379 Backend to use instead of the backend specified in the option
380 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
381 specify the ``plotting.backend`` for the whole session, set
382 ``pd.options.plotting.backend``.
384 .. versionadded:: 1.0.0
385"""
388@Substitution(backend="")
389@Appender(_boxplot_doc)
390def boxplot(
391 data,
392 column=None,
393 by=None,
394 ax=None,
395 fontsize=None,
396 rot=0,
397 grid=True,
398 figsize=None,
399 layout=None,
400 return_type=None,
401 **kwargs,
402):
403 plot_backend = _get_plot_backend("matplotlib")
404 return plot_backend.boxplot(
405 data,
406 column=column,
407 by=by,
408 ax=ax,
409 fontsize=fontsize,
410 rot=rot,
411 grid=grid,
412 figsize=figsize,
413 layout=layout,
414 return_type=return_type,
415 **kwargs,
416 )
419@Substitution(backend=_backend_doc)
420@Appender(_boxplot_doc)
421def boxplot_frame(
422 self,
423 column=None,
424 by=None,
425 ax=None,
426 fontsize=None,
427 rot=0,
428 grid=True,
429 figsize=None,
430 layout=None,
431 return_type=None,
432 backend=None,
433 **kwargs,
434):
435 plot_backend = _get_plot_backend(backend)
436 return plot_backend.boxplot_frame(
437 self,
438 column=column,
439 by=by,
440 ax=ax,
441 fontsize=fontsize,
442 rot=rot,
443 grid=grid,
444 figsize=figsize,
445 layout=layout,
446 return_type=return_type,
447 **kwargs,
448 )
451def boxplot_frame_groupby(
452 grouped,
453 subplots=True,
454 column=None,
455 fontsize=None,
456 rot=0,
457 grid=True,
458 ax=None,
459 figsize=None,
460 layout=None,
461 sharex=False,
462 sharey=True,
463 backend=None,
464 **kwargs,
465):
466 """
467 Make box plots from DataFrameGroupBy data.
469 Parameters
470 ----------
471 grouped : Grouped DataFrame
472 subplots : bool
473 * ``False`` - no subplots will be used
474 * ``True`` - create a subplot for each group.
476 column : column name or list of names, or vector
477 Can be any valid input to groupby.
478 fontsize : int or str
479 rot : label rotation angle
480 grid : Setting this to True will show the grid
481 ax : Matplotlib axis object, default None
482 figsize : A tuple (width, height) in inches
483 layout : tuple (optional)
484 The layout of the plot: (rows, columns).
485 sharex : bool, default False
486 Whether x-axes will be shared among subplots.
488 .. versionadded:: 0.23.1
489 sharey : bool, default True
490 Whether y-axes will be shared among subplots.
492 .. versionadded:: 0.23.1
493 backend : str, default None
494 Backend to use instead of the backend specified in the option
495 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
496 specify the ``plotting.backend`` for the whole session, set
497 ``pd.options.plotting.backend``.
499 .. versionadded:: 1.0.0
501 **kwargs
502 All other plotting keyword arguments to be passed to
503 matplotlib's boxplot function.
505 Returns
506 -------
507 dict of key/value = group key/DataFrame.boxplot return value
508 or DataFrame.boxplot return value in case subplots=figures=False
510 Examples
511 --------
512 >>> import itertools
513 >>> tuples = [t for t in itertools.product(range(1000), range(4))]
514 >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
515 >>> data = np.random.randn(len(index),4)
516 >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
517 >>>
518 >>> grouped = df.groupby(level='lvl1')
519 >>> boxplot_frame_groupby(grouped)
520 >>>
521 >>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1)
522 >>> boxplot_frame_groupby(grouped, subplots=False)
523 """
524 plot_backend = _get_plot_backend(backend)
525 return plot_backend.boxplot_frame_groupby(
526 grouped,
527 subplots=subplots,
528 column=column,
529 fontsize=fontsize,
530 rot=rot,
531 grid=grid,
532 ax=ax,
533 figsize=figsize,
534 layout=layout,
535 sharex=sharex,
536 sharey=sharey,
537 **kwargs,
538 )
541class PlotAccessor(PandasObject):
542 """
543 Make plots of Series or DataFrame.
545 Uses the backend specified by the
546 option ``plotting.backend``. By default, matplotlib is used.
548 Parameters
549 ----------
550 data : Series or DataFrame
551 The object for which the method is called.
552 x : label or position, default None
553 Only used if data is a DataFrame.
554 y : label, position or list of label, positions, default None
555 Allows plotting of one column versus another. Only used if data is a
556 DataFrame.
557 kind : str
558 The kind of plot to produce:
560 - 'line' : line plot (default)
561 - 'bar' : vertical bar plot
562 - 'barh' : horizontal bar plot
563 - 'hist' : histogram
564 - 'box' : boxplot
565 - 'kde' : Kernel Density Estimation plot
566 - 'density' : same as 'kde'
567 - 'area' : area plot
568 - 'pie' : pie plot
569 - 'scatter' : scatter plot
570 - 'hexbin' : hexbin plot.
572 figsize : a tuple (width, height) in inches
573 use_index : bool, default True
574 Use index as ticks for x axis.
575 title : str or list
576 Title to use for the plot. If a string is passed, print the string
577 at the top of the figure. If a list is passed and `subplots` is
578 True, print each item in the list above the corresponding subplot.
579 grid : bool, default None (matlab style default)
580 Axis grid lines.
581 legend : bool or {'reverse'}
582 Place legend on axis subplots.
583 style : list or dict
584 The matplotlib line style per column.
585 logx : bool or 'sym', default False
586 Use log scaling or symlog scaling on x axis.
587 .. versionchanged:: 0.25.0
589 logy : bool or 'sym' default False
590 Use log scaling or symlog scaling on y axis.
591 .. versionchanged:: 0.25.0
593 loglog : bool or 'sym', default False
594 Use log scaling or symlog scaling on both x and y axes.
595 .. versionchanged:: 0.25.0
597 xticks : sequence
598 Values to use for the xticks.
599 yticks : sequence
600 Values to use for the yticks.
601 xlim : 2-tuple/list
602 ylim : 2-tuple/list
603 rot : int, default None
604 Rotation for ticks (xticks for vertical, yticks for horizontal
605 plots).
606 fontsize : int, default None
607 Font size for xticks and yticks.
608 colormap : str or matplotlib colormap object, default None
609 Colormap to select colors from. If string, load colormap with that
610 name from matplotlib.
611 colorbar : bool, optional
612 If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
613 plots).
614 position : float
615 Specify relative alignments for bar plot layout.
616 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
617 (center).
618 table : bool, Series or DataFrame, default False
619 If True, draw a table using the data in the DataFrame and the data
620 will be transposed to meet matplotlib's default layout.
621 If a Series or DataFrame is passed, use passed data to draw a
622 table.
623 yerr : DataFrame, Series, array-like, dict and str
624 See :ref:`Plotting with Error Bars <visualization.errorbars>` for
625 detail.
626 xerr : DataFrame, Series, array-like, dict and str
627 Equivalent to yerr.
628 mark_right : bool, default True
629 When using a secondary_y axis, automatically mark the column
630 labels with "(right)" in the legend.
631 include_bool : bool, default is False
632 If True, boolean values can be plotted.
633 backend : str, default None
634 Backend to use instead of the backend specified in the option
635 ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
636 specify the ``plotting.backend`` for the whole session, set
637 ``pd.options.plotting.backend``.
639 .. versionadded:: 1.0.0
641 **kwargs
642 Options to pass to matplotlib plotting method.
644 Returns
645 -------
646 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
647 If the backend is not the default matplotlib one, the return value
648 will be the object returned by the backend.
650 Notes
651 -----
652 - See matplotlib documentation online for more on this subject
653 - If `kind` = 'bar' or 'barh', you can specify relative alignments
654 for bar plot layout by `position` keyword.
655 From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
656 (center)
657 """
659 _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")
660 _series_kinds = ("pie",)
661 _dataframe_kinds = ("scatter", "hexbin")
662 _kind_aliases = {"density": "kde"}
663 _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds
665 def __init__(self, data):
666 self._parent = data
668 @staticmethod
669 def _get_call_args(backend_name, data, args, kwargs):
670 """
671 This function makes calls to this accessor `__call__` method compatible
672 with the previous `SeriesPlotMethods.__call__` and
673 `DataFramePlotMethods.__call__`. Those had slightly different
674 signatures, since `DataFramePlotMethods` accepted `x` and `y`
675 parameters.
676 """
677 if isinstance(data, ABCSeries):
678 arg_def = [
679 ("kind", "line"),
680 ("ax", None),
681 ("figsize", None),
682 ("use_index", True),
683 ("title", None),
684 ("grid", None),
685 ("legend", False),
686 ("style", None),
687 ("logx", False),
688 ("logy", False),
689 ("loglog", False),
690 ("xticks", None),
691 ("yticks", None),
692 ("xlim", None),
693 ("ylim", None),
694 ("rot", None),
695 ("fontsize", None),
696 ("colormap", None),
697 ("table", False),
698 ("yerr", None),
699 ("xerr", None),
700 ("label", None),
701 ("secondary_y", False),
702 ]
703 elif isinstance(data, ABCDataFrame):
704 arg_def = [
705 ("x", None),
706 ("y", None),
707 ("kind", "line"),
708 ("ax", None),
709 ("subplots", False),
710 ("sharex", None),
711 ("sharey", False),
712 ("layout", None),
713 ("figsize", None),
714 ("use_index", True),
715 ("title", None),
716 ("grid", None),
717 ("legend", True),
718 ("style", None),
719 ("logx", False),
720 ("logy", False),
721 ("loglog", False),
722 ("xticks", None),
723 ("yticks", None),
724 ("xlim", None),
725 ("ylim", None),
726 ("rot", None),
727 ("fontsize", None),
728 ("colormap", None),
729 ("table", False),
730 ("yerr", None),
731 ("xerr", None),
732 ("secondary_y", False),
733 ("sort_columns", False),
734 ]
735 else:
736 raise TypeError(
737 f"Called plot accessor for type {type(data).__name__}, "
738 "expected Series or DataFrame"
739 )
741 if args and isinstance(data, ABCSeries):
742 positional_args = str(args)[1:-1]
743 keyword_args = ", ".join(
744 f"{name}={repr(value)}" for (name, default), value in zip(arg_def, args)
745 )
746 msg = (
747 "`Series.plot()` should not be called with positional "
748 "arguments, only keyword arguments. The order of "
749 "positional arguments will change in the future. "
750 f"Use `Series.plot({keyword_args})` instead of "
751 f"`Series.plot({positional_args})`."
752 )
753 raise TypeError(msg)
755 pos_args = {name: value for value, (name, _) in zip(args, arg_def)}
756 if backend_name == "pandas.plotting._matplotlib":
757 kwargs = dict(arg_def, **pos_args, **kwargs)
758 else:
759 kwargs = dict(pos_args, **kwargs)
761 x = kwargs.pop("x", None)
762 y = kwargs.pop("y", None)
763 kind = kwargs.pop("kind", "line")
764 return x, y, kind, kwargs
766 def __call__(self, *args, **kwargs):
767 plot_backend = _get_plot_backend(kwargs.pop("backend", None))
769 x, y, kind, kwargs = self._get_call_args(
770 plot_backend.__name__, self._parent, args, kwargs
771 )
773 kind = self._kind_aliases.get(kind, kind)
775 # when using another backend, get out of the way
776 if plot_backend.__name__ != "pandas.plotting._matplotlib":
777 return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)
779 if kind not in self._all_kinds:
780 raise ValueError(f"{kind} is not a valid plot kind")
782 # The original data structured can be transformed before passed to the
783 # backend. For example, for DataFrame is common to set the index as the
784 # `x` parameter, and return a Series with the parameter `y` as values.
785 data = self._parent.copy()
787 if isinstance(data, ABCSeries):
788 kwargs["reuse_plot"] = True
790 if kind in self._dataframe_kinds:
791 if isinstance(data, ABCDataFrame):
792 return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
793 else:
794 raise ValueError(f"plot kind {kind} can only be used for data frames")
795 elif kind in self._series_kinds:
796 if isinstance(data, ABCDataFrame):
797 if y is None and kwargs.get("subplots") is False:
798 raise ValueError(
799 f"{kind} requires either y column or 'subplots=True'"
800 )
801 elif y is not None:
802 if is_integer(y) and not data.columns.holds_integer():
803 y = data.columns[y]
804 # converted to series actually. copy to not modify
805 data = data[y].copy()
806 data.index.name = y
807 elif isinstance(data, ABCDataFrame):
808 data_cols = data.columns
809 if x is not None:
810 if is_integer(x) and not data.columns.holds_integer():
811 x = data_cols[x]
812 elif not isinstance(data[x], ABCSeries):
813 raise ValueError("x must be a label or position")
814 data = data.set_index(x)
815 if y is not None:
816 # check if we have y as int or list of ints
817 int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
818 int_y_arg = is_integer(y) or int_ylist
819 if int_y_arg and not data.columns.holds_integer():
820 y = data_cols[y]
822 label_kw = kwargs["label"] if "label" in kwargs else False
823 for kw in ["xerr", "yerr"]:
824 if kw in kwargs and (
825 isinstance(kwargs[kw], str) or is_integer(kwargs[kw])
826 ):
827 try:
828 kwargs[kw] = data[kwargs[kw]]
829 except (IndexError, KeyError, TypeError):
830 pass
832 # don't overwrite
833 data = data[y].copy()
835 if isinstance(data, ABCSeries):
836 label_name = label_kw or y
837 data.name = label_name
838 else:
839 match = is_list_like(label_kw) and len(label_kw) == len(y)
840 if label_kw and not match:
841 raise ValueError(
842 "label should be list-like and same length as y"
843 )
844 label_name = label_kw or data.columns
845 data.columns = label_name
847 return plot_backend.plot(data, kind=kind, **kwargs)
849 __call__.__doc__ = __doc__
851 def line(self, x=None, y=None, **kwargs):
852 """
853 Plot Series or DataFrame as lines.
855 This function is useful to plot lines using DataFrame's values
856 as coordinates.
858 Parameters
859 ----------
860 x : int or str, optional
861 Columns to use for the horizontal axis.
862 Either the location or the label of the columns to be used.
863 By default, it will use the DataFrame indices.
864 y : int, str, or list of them, optional
865 The values to be plotted.
866 Either the location or the label of the columns to be used.
867 By default, it will use the remaining DataFrame numeric columns.
868 **kwargs
869 Keyword arguments to pass on to :meth:`DataFrame.plot`.
871 Returns
872 -------
873 :class:`matplotlib.axes.Axes` or :class:`numpy.ndarray`
874 Return an ndarray when ``subplots=True``.
876 See Also
877 --------
878 matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
880 Examples
881 --------
883 .. plot::
884 :context: close-figs
886 >>> s = pd.Series([1, 3, 2])
887 >>> s.plot.line()
889 .. plot::
890 :context: close-figs
892 The following example shows the populations for some animals
893 over the years.
895 >>> df = pd.DataFrame({
896 ... 'pig': [20, 18, 489, 675, 1776],
897 ... 'horse': [4, 25, 281, 600, 1900]
898 ... }, index=[1990, 1997, 2003, 2009, 2014])
899 >>> lines = df.plot.line()
901 .. plot::
902 :context: close-figs
904 An example with subplots, so an array of axes is returned.
906 >>> axes = df.plot.line(subplots=True)
907 >>> type(axes)
908 <class 'numpy.ndarray'>
910 .. plot::
911 :context: close-figs
913 The following example shows the relationship between both
914 populations.
916 >>> lines = df.plot.line(x='pig', y='horse')
917 """
918 return self(kind="line", x=x, y=y, **kwargs)
920 def bar(self, x=None, y=None, **kwargs):
921 """
922 Vertical bar plot.
924 A bar plot is a plot that presents categorical data with
925 rectangular bars with lengths proportional to the values that they
926 represent. A bar plot shows comparisons among discrete categories. One
927 axis of the plot shows the specific categories being compared, and the
928 other axis represents a measured value.
930 Parameters
931 ----------
932 x : label or position, optional
933 Allows plotting of one column versus another. If not specified,
934 the index of the DataFrame is used.
935 y : label or position, optional
936 Allows plotting of one column versus another. If not specified,
937 all numerical columns are used.
938 **kwargs
939 Additional keyword arguments are documented in
940 :meth:`DataFrame.plot`.
942 Returns
943 -------
944 matplotlib.axes.Axes or np.ndarray of them
945 An ndarray is returned with one :class:`matplotlib.axes.Axes`
946 per column when ``subplots=True``.
948 See Also
949 --------
950 DataFrame.plot.barh : Horizontal bar plot.
951 DataFrame.plot : Make plots of a DataFrame.
952 matplotlib.pyplot.bar : Make a bar plot with matplotlib.
954 Examples
955 --------
956 Basic plot.
958 .. plot::
959 :context: close-figs
961 >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
962 >>> ax = df.plot.bar(x='lab', y='val', rot=0)
964 Plot a whole dataframe to a bar plot. Each column is assigned a
965 distinct color, and each row is nested in a group along the
966 horizontal axis.
968 .. plot::
969 :context: close-figs
971 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
972 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
973 >>> index = ['snail', 'pig', 'elephant',
974 ... 'rabbit', 'giraffe', 'coyote', 'horse']
975 >>> df = pd.DataFrame({'speed': speed,
976 ... 'lifespan': lifespan}, index=index)
977 >>> ax = df.plot.bar(rot=0)
979 Instead of nesting, the figure can be split by column with
980 ``subplots=True``. In this case, a :class:`numpy.ndarray` of
981 :class:`matplotlib.axes.Axes` are returned.
983 .. plot::
984 :context: close-figs
986 >>> axes = df.plot.bar(rot=0, subplots=True)
987 >>> axes[1].legend(loc=2) # doctest: +SKIP
989 Plot a single column.
991 .. plot::
992 :context: close-figs
994 >>> ax = df.plot.bar(y='speed', rot=0)
996 Plot only selected categories for the DataFrame.
998 .. plot::
999 :context: close-figs
1001 >>> ax = df.plot.bar(x='lifespan', rot=0)
1002 """
1003 return self(kind="bar", x=x, y=y, **kwargs)
1005 def barh(self, x=None, y=None, **kwargs):
1006 """
1007 Make a horizontal bar plot.
1009 A horizontal bar plot is a plot that presents quantitative data with
1010 rectangular bars with lengths proportional to the values that they
1011 represent. A bar plot shows comparisons among discrete categories. One
1012 axis of the plot shows the specific categories being compared, and the
1013 other axis represents a measured value.
1015 Parameters
1016 ----------
1017 x : label or position, default DataFrame.index
1018 Column to be used for categories.
1019 y : label or position, default All numeric columns in dataframe
1020 Columns to be plotted from the DataFrame.
1021 **kwargs
1022 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1024 Returns
1025 -------
1026 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1028 See Also
1029 --------
1030 DataFrame.plot.bar: Vertical bar plot.
1031 DataFrame.plot : Make plots of DataFrame using matplotlib.
1032 matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
1034 Examples
1035 --------
1036 Basic example
1038 .. plot::
1039 :context: close-figs
1041 >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
1042 >>> ax = df.plot.barh(x='lab', y='val')
1044 Plot a whole DataFrame to a horizontal bar plot
1046 .. plot::
1047 :context: close-figs
1049 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1050 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1051 >>> index = ['snail', 'pig', 'elephant',
1052 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1053 >>> df = pd.DataFrame({'speed': speed,
1054 ... 'lifespan': lifespan}, index=index)
1055 >>> ax = df.plot.barh()
1057 Plot a column of the DataFrame to a horizontal bar plot
1059 .. plot::
1060 :context: close-figs
1062 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1063 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1064 >>> index = ['snail', 'pig', 'elephant',
1065 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1066 >>> df = pd.DataFrame({'speed': speed,
1067 ... 'lifespan': lifespan}, index=index)
1068 >>> ax = df.plot.barh(y='speed')
1070 Plot DataFrame versus the desired column
1072 .. plot::
1073 :context: close-figs
1075 >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
1076 >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
1077 >>> index = ['snail', 'pig', 'elephant',
1078 ... 'rabbit', 'giraffe', 'coyote', 'horse']
1079 >>> df = pd.DataFrame({'speed': speed,
1080 ... 'lifespan': lifespan}, index=index)
1081 >>> ax = df.plot.barh(x='lifespan')
1082 """
1083 return self(kind="barh", x=x, y=y, **kwargs)
1085 def box(self, by=None, **kwargs):
1086 r"""
1087 Make a box plot of the DataFrame columns.
1089 A box plot is a method for graphically depicting groups of numerical
1090 data through their quartiles.
1091 The box extends from the Q1 to Q3 quartile values of the data,
1092 with a line at the median (Q2). The whiskers extend from the edges
1093 of box to show the range of the data. The position of the whiskers
1094 is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
1095 box. Outlier points are those past the end of the whiskers.
1097 For further details see Wikipedia's
1098 entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
1100 A consideration when using this chart is that the box and the whiskers
1101 can overlap, which is very common when plotting small sets of data.
1103 Parameters
1104 ----------
1105 by : str or sequence
1106 Column in the DataFrame to group by.
1107 **kwargs
1108 Additional keywords are documented in
1109 :meth:`DataFrame.plot`.
1111 Returns
1112 -------
1113 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1115 See Also
1116 --------
1117 DataFrame.boxplot: Another method to draw a box plot.
1118 Series.plot.box: Draw a box plot from a Series object.
1119 matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
1121 Examples
1122 --------
1123 Draw a box plot from a DataFrame with four columns of randomly
1124 generated data.
1126 .. plot::
1127 :context: close-figs
1129 >>> data = np.random.randn(25, 4)
1130 >>> df = pd.DataFrame(data, columns=list('ABCD'))
1131 >>> ax = df.plot.box()
1132 """
1133 return self(kind="box", by=by, **kwargs)
1135 def hist(self, by=None, bins=10, **kwargs):
1136 """
1137 Draw one histogram of the DataFrame's columns.
1139 A histogram is a representation of the distribution of data.
1140 This function groups the values of all given Series in the DataFrame
1141 into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
1142 This is useful when the DataFrame's Series are in a similar scale.
1144 Parameters
1145 ----------
1146 by : str or sequence, optional
1147 Column in the DataFrame to group by.
1148 bins : int, default 10
1149 Number of histogram bins to be used.
1150 **kwargs
1151 Additional keyword arguments are documented in
1152 :meth:`DataFrame.plot`.
1154 Returns
1155 -------
1156 class:`matplotlib.AxesSubplot`
1157 Return a histogram plot.
1159 See Also
1160 --------
1161 DataFrame.hist : Draw histograms per DataFrame's Series.
1162 Series.hist : Draw a histogram with Series' data.
1164 Examples
1165 --------
1166 When we draw a dice 6000 times, we expect to get each value around 1000
1167 times. But when we draw two dices and sum the result, the distribution
1168 is going to be quite different. A histogram illustrates those
1169 distributions.
1171 .. plot::
1172 :context: close-figs
1174 >>> df = pd.DataFrame(
1175 ... np.random.randint(1, 7, 6000),
1176 ... columns = ['one'])
1177 >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
1178 >>> ax = df.plot.hist(bins=12, alpha=0.5)
1179 """
1180 return self(kind="hist", by=by, bins=bins, **kwargs)
1182 def kde(self, bw_method=None, ind=None, **kwargs):
1183 """
1184 Generate Kernel Density Estimate plot using Gaussian kernels.
1186 In statistics, `kernel density estimation`_ (KDE) is a non-parametric
1187 way to estimate the probability density function (PDF) of a random
1188 variable. This function uses Gaussian kernels and includes automatic
1189 bandwidth determination.
1191 .. _kernel density estimation:
1192 https://en.wikipedia.org/wiki/Kernel_density_estimation
1194 Parameters
1195 ----------
1196 bw_method : str, scalar or callable, optional
1197 The method used to calculate the estimator bandwidth. This can be
1198 'scott', 'silverman', a scalar constant or a callable.
1199 If None (default), 'scott' is used.
1200 See :class:`scipy.stats.gaussian_kde` for more information.
1201 ind : NumPy array or int, optional
1202 Evaluation points for the estimated PDF. If None (default),
1203 1000 equally spaced points are used. If `ind` is a NumPy array, the
1204 KDE is evaluated at the points passed. If `ind` is an integer,
1205 `ind` number of equally spaced points are used.
1206 **kwargs
1207 Additional keyword arguments are documented in
1208 :meth:`pandas.%(this-datatype)s.plot`.
1210 Returns
1211 -------
1212 matplotlib.axes.Axes or numpy.ndarray of them
1214 See Also
1215 --------
1216 scipy.stats.gaussian_kde : Representation of a kernel-density
1217 estimate using Gaussian kernels. This is the function used
1218 internally to estimate the PDF.
1220 Examples
1221 --------
1222 Given a Series of points randomly sampled from an unknown
1223 distribution, estimate its PDF using KDE with automatic
1224 bandwidth determination and plot the results, evaluating them at
1225 1000 equally spaced points (default):
1227 .. plot::
1228 :context: close-figs
1230 >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
1231 >>> ax = s.plot.kde()
1233 A scalar bandwidth can be specified. Using a small bandwidth value can
1234 lead to over-fitting, while using a large bandwidth value may result
1235 in under-fitting:
1237 .. plot::
1238 :context: close-figs
1240 >>> ax = s.plot.kde(bw_method=0.3)
1242 .. plot::
1243 :context: close-figs
1245 >>> ax = s.plot.kde(bw_method=3)
1247 Finally, the `ind` parameter determines the evaluation points for the
1248 plot of the estimated PDF:
1250 .. plot::
1251 :context: close-figs
1253 >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
1255 For DataFrame, it works in the same way:
1257 .. plot::
1258 :context: close-figs
1260 >>> df = pd.DataFrame({
1261 ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
1262 ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
1263 ... })
1264 >>> ax = df.plot.kde()
1266 A scalar bandwidth can be specified. Using a small bandwidth value can
1267 lead to over-fitting, while using a large bandwidth value may result
1268 in under-fitting:
1270 .. plot::
1271 :context: close-figs
1273 >>> ax = df.plot.kde(bw_method=0.3)
1275 .. plot::
1276 :context: close-figs
1278 >>> ax = df.plot.kde(bw_method=3)
1280 Finally, the `ind` parameter determines the evaluation points for the
1281 plot of the estimated PDF:
1283 .. plot::
1284 :context: close-figs
1286 >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
1287 """
1288 return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
1290 density = kde
1292 def area(self, x=None, y=None, **kwargs):
1293 """
1294 Draw a stacked area plot.
1296 An area plot displays quantitative data visually.
1297 This function wraps the matplotlib area function.
1299 Parameters
1300 ----------
1301 x : label or position, optional
1302 Coordinates for the X axis. By default uses the index.
1303 y : label or position, optional
1304 Column to plot. By default uses all columns.
1305 stacked : bool, default True
1306 Area plots are stacked by default. Set to False to create a
1307 unstacked plot.
1308 **kwargs
1309 Additional keyword arguments are documented in
1310 :meth:`DataFrame.plot`.
1312 Returns
1313 -------
1314 matplotlib.axes.Axes or numpy.ndarray
1315 Area plot, or array of area plots if subplots is True.
1317 See Also
1318 --------
1319 DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
1321 Examples
1322 --------
1323 Draw an area plot based on basic business metrics:
1325 .. plot::
1326 :context: close-figs
1328 >>> df = pd.DataFrame({
1329 ... 'sales': [3, 2, 3, 9, 10, 6],
1330 ... 'signups': [5, 5, 6, 12, 14, 13],
1331 ... 'visits': [20, 42, 28, 62, 81, 50],
1332 ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
1333 ... freq='M'))
1334 >>> ax = df.plot.area()
1336 Area plots are stacked by default. To produce an unstacked plot,
1337 pass ``stacked=False``:
1339 .. plot::
1340 :context: close-figs
1342 >>> ax = df.plot.area(stacked=False)
1344 Draw an area plot for a single column:
1346 .. plot::
1347 :context: close-figs
1349 >>> ax = df.plot.area(y='sales')
1351 Draw with a different `x`:
1353 .. plot::
1354 :context: close-figs
1356 >>> df = pd.DataFrame({
1357 ... 'sales': [3, 2, 3],
1358 ... 'visits': [20, 42, 28],
1359 ... 'day': [1, 2, 3],
1360 ... })
1361 >>> ax = df.plot.area(x='day')
1362 """
1363 return self(kind="area", x=x, y=y, **kwargs)
1365 def pie(self, **kwargs):
1366 """
1367 Generate a pie plot.
1369 A pie plot is a proportional representation of the numerical data in a
1370 column. This function wraps :meth:`matplotlib.pyplot.pie` for the
1371 specified column. If no column reference is passed and
1372 ``subplots=True`` a pie plot is drawn for each numerical column
1373 independently.
1375 Parameters
1376 ----------
1377 y : int or label, optional
1378 Label or position of the column to plot.
1379 If not provided, ``subplots=True`` argument must be passed.
1380 **kwargs
1381 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1383 Returns
1384 -------
1385 matplotlib.axes.Axes or np.ndarray of them
1386 A NumPy array is returned when `subplots` is True.
1388 See Also
1389 --------
1390 Series.plot.pie : Generate a pie plot for a Series.
1391 DataFrame.plot : Make plots of a DataFrame.
1393 Examples
1394 --------
1395 In the example below we have a DataFrame with the information about
1396 planet's mass and radius. We pass the the 'mass' column to the
1397 pie function to get a pie plot.
1399 .. plot::
1400 :context: close-figs
1402 >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
1403 ... 'radius': [2439.7, 6051.8, 6378.1]},
1404 ... index=['Mercury', 'Venus', 'Earth'])
1405 >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
1407 .. plot::
1408 :context: close-figs
1410 >>> plot = df.plot.pie(subplots=True, figsize=(6, 3))
1411 """
1412 if (
1413 isinstance(self._parent, ABCDataFrame)
1414 and kwargs.get("y", None) is None
1415 and not kwargs.get("subplots", False)
1416 ):
1417 raise ValueError("pie requires either y column or 'subplots=True'")
1418 return self(kind="pie", **kwargs)
1420 def scatter(self, x, y, s=None, c=None, **kwargs):
1421 """
1422 Create a scatter plot with varying marker point size and color.
1424 The coordinates of each point are defined by two dataframe columns and
1425 filled circles are used to represent each point. This kind of plot is
1426 useful to see complex correlations between two variables. Points could
1427 be for instance natural 2D coordinates like longitude and latitude in
1428 a map or, in general, any pair of metrics that can be plotted against
1429 each other.
1431 Parameters
1432 ----------
1433 x : int or str
1434 The column name or column position to be used as horizontal
1435 coordinates for each point.
1436 y : int or str
1437 The column name or column position to be used as vertical
1438 coordinates for each point.
1439 s : scalar or array_like, optional
1440 The size of each point. Possible values are:
1442 - A single scalar so all points have the same size.
1444 - A sequence of scalars, which will be used for each point's size
1445 recursively. For instance, when passing [2,14] all points size
1446 will be either 2 or 14, alternatively.
1448 c : str, int or array_like, optional
1449 The color of each point. Possible values are:
1451 - A single color string referred to by name, RGB or RGBA code,
1452 for instance 'red' or '#a98d19'.
1454 - A sequence of color strings referred to by name, RGB or RGBA
1455 code, which will be used for each point's color recursively. For
1456 instance ['green','yellow'] all points will be filled in green or
1457 yellow, alternatively.
1459 - A column name or position whose values will be used to color the
1460 marker points according to a colormap.
1462 **kwargs
1463 Keyword arguments to pass on to :meth:`DataFrame.plot`.
1465 Returns
1466 -------
1467 :class:`matplotlib.axes.Axes` or numpy.ndarray of them
1469 See Also
1470 --------
1471 matplotlib.pyplot.scatter : Scatter plot using multiple input data
1472 formats.
1474 Examples
1475 --------
1476 Let's see how to draw a scatter plot using coordinates from the values
1477 in a DataFrame's columns.
1479 .. plot::
1480 :context: close-figs
1482 >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
1483 ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
1484 ... columns=['length', 'width', 'species'])
1485 >>> ax1 = df.plot.scatter(x='length',
1486 ... y='width',
1487 ... c='DarkBlue')
1489 And now with the color determined by a column as well.
1491 .. plot::
1492 :context: close-figs
1494 >>> ax2 = df.plot.scatter(x='length',
1495 ... y='width',
1496 ... c='species',
1497 ... colormap='viridis')
1498 """
1499 return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
1501 def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs):
1502 """
1503 Generate a hexagonal binning plot.
1505 Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
1506 (the default), this is a histogram of the number of occurrences
1507 of the observations at ``(x[i], y[i])``.
1509 If `C` is specified, specifies values at given coordinates
1510 ``(x[i], y[i])``. These values are accumulated for each hexagonal
1511 bin and then reduced according to `reduce_C_function`,
1512 having as default the NumPy's mean function (:meth:`numpy.mean`).
1513 (If `C` is specified, it must also be a 1-D sequence
1514 of the same length as `x` and `y`, or a column label.)
1516 Parameters
1517 ----------
1518 x : int or str
1519 The column label or position for x points.
1520 y : int or str
1521 The column label or position for y points.
1522 C : int or str, optional
1523 The column label or position for the value of `(x, y)` point.
1524 reduce_C_function : callable, default `np.mean`
1525 Function of one argument that reduces all the values in a bin to
1526 a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
1527 gridsize : int or tuple of (int, int), default 100
1528 The number of hexagons in the x-direction.
1529 The corresponding number of hexagons in the y-direction is
1530 chosen in a way that the hexagons are approximately regular.
1531 Alternatively, gridsize can be a tuple with two elements
1532 specifying the number of hexagons in the x-direction and the
1533 y-direction.
1534 **kwargs
1535 Additional keyword arguments are documented in
1536 :meth:`DataFrame.plot`.
1538 Returns
1539 -------
1540 matplotlib.AxesSubplot
1541 The matplotlib ``Axes`` on which the hexbin is plotted.
1543 See Also
1544 --------
1545 DataFrame.plot : Make plots of a DataFrame.
1546 matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
1547 the matplotlib function that is used under the hood.
1549 Examples
1550 --------
1551 The following examples are generated with random data from
1552 a normal distribution.
1554 .. plot::
1555 :context: close-figs
1557 >>> n = 10000
1558 >>> df = pd.DataFrame({'x': np.random.randn(n),
1559 ... 'y': np.random.randn(n)})
1560 >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
1562 The next example uses `C` and `np.sum` as `reduce_C_function`.
1563 Note that `'observations'` values ranges from 1 to 5 but the result
1564 plot shows values up to more than 25. This is because of the
1565 `reduce_C_function`.
1567 .. plot::
1568 :context: close-figs
1570 >>> n = 500
1571 >>> df = pd.DataFrame({
1572 ... 'coord_x': np.random.uniform(-3, 3, size=n),
1573 ... 'coord_y': np.random.uniform(30, 50, size=n),
1574 ... 'observations': np.random.randint(1,5, size=n)
1575 ... })
1576 >>> ax = df.plot.hexbin(x='coord_x',
1577 ... y='coord_y',
1578 ... C='observations',
1579 ... reduce_C_function=np.sum,
1580 ... gridsize=10,
1581 ... cmap="viridis")
1582 """
1583 if reduce_C_function is not None:
1584 kwargs["reduce_C_function"] = reduce_C_function
1585 if gridsize is not None:
1586 kwargs["gridsize"] = gridsize
1588 return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
1591_backends = {}
1594def _find_backend(backend: str):
1595 """
1596 Find a pandas plotting backend>
1598 Parameters
1599 ----------
1600 backend : str
1601 The identifier for the backend. Either an entrypoint item registered
1602 with pkg_resources, or a module name.
1604 Notes
1605 -----
1606 Modifies _backends with imported backends as a side effect.
1608 Returns
1609 -------
1610 types.ModuleType
1611 The imported backend.
1612 """
1613 import pkg_resources # Delay import for performance.
1615 for entry_point in pkg_resources.iter_entry_points("pandas_plotting_backends"):
1616 if entry_point.name == "matplotlib":
1617 # matplotlib is an optional dependency. When
1618 # missing, this would raise.
1619 continue
1620 _backends[entry_point.name] = entry_point.load()
1622 try:
1623 return _backends[backend]
1624 except KeyError:
1625 # Fall back to unregisted, module name approach.
1626 try:
1627 module = importlib.import_module(backend)
1628 except ImportError:
1629 # We re-raise later on.
1630 pass
1631 else:
1632 if hasattr(module, "plot"):
1633 # Validate that the interface is implemented when the option
1634 # is set, rather than at plot time.
1635 _backends[backend] = module
1636 return module
1638 raise ValueError(
1639 f"Could not find plotting backend '{backend}'. Ensure that you've installed "
1640 f"the package providing the '{backend}' entrypoint, or that the package has a "
1641 "top-level `.plot` method."
1642 )
1645def _get_plot_backend(backend=None):
1646 """
1647 Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
1649 The plotting system of pandas has been using matplotlib, but the idea here
1650 is that it can also work with other third-party backends. In the future,
1651 this function will return the backend from a pandas option, and all the
1652 rest of the code in this file will use the backend specified there for the
1653 plotting.
1655 The backend is imported lazily, as matplotlib is a soft dependency, and
1656 pandas can be used without it being installed.
1657 """
1658 backend = backend or get_option("plotting.backend")
1660 if backend == "matplotlib":
1661 # Because matplotlib is an optional dependency and first-party backend,
1662 # we need to attempt an import here to raise an ImportError if needed.
1663 try:
1664 import pandas.plotting._matplotlib as module
1665 except ImportError:
1666 raise ImportError(
1667 "matplotlib is required for plotting when the "
1668 'default backend "matplotlib" is selected.'
1669 ) from None
1671 _backends["matplotlib"] = module
1673 if backend in _backends:
1674 return _backends[backend]
1676 module = _find_backend(backend)
1677 _backends[backend] = module
1678 return module