Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/series.py : 22%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Data structure for 1-dimensional cross-sectional and time series data
3"""
4from io import StringIO
5from shutil import get_terminal_size
6from textwrap import dedent
7from typing import IO, Any, Callable, Hashable, List, Optional
8import warnings
10import numpy as np
12from pandas._config import get_option
14from pandas._libs import index as libindex, lib, reshape, tslibs
15from pandas.compat.numpy import function as nv
16from pandas.util._decorators import Appender, Substitution
17from pandas.util._validators import validate_bool_kwarg, validate_percentile
19from pandas.core.dtypes.cast import convert_dtypes
20from pandas.core.dtypes.common import (
21 _is_unorderable_exception,
22 ensure_platform_int,
23 is_bool,
24 is_categorical_dtype,
25 is_datetime64_dtype,
26 is_dict_like,
27 is_extension_array_dtype,
28 is_integer,
29 is_iterator,
30 is_list_like,
31 is_object_dtype,
32 is_scalar,
33 is_timedelta64_dtype,
34)
35from pandas.core.dtypes.generic import (
36 ABCDataFrame,
37 ABCDatetimeIndex,
38 ABCSeries,
39 ABCSparseArray,
40)
41from pandas.core.dtypes.inference import is_hashable
42from pandas.core.dtypes.missing import (
43 isna,
44 na_value_for_dtype,
45 notna,
46 remove_na_arraylike,
47)
49import pandas as pd
50from pandas.core import algorithms, base, generic, nanops, ops
51from pandas.core.accessor import CachedAccessor
52from pandas.core.arrays import ExtensionArray, try_cast_to_ea
53from pandas.core.arrays.categorical import Categorical, CategoricalAccessor
54from pandas.core.arrays.sparse import SparseAccessor
55import pandas.core.common as com
56from pandas.core.construction import (
57 create_series_with_explicit_dtype,
58 extract_array,
59 is_empty_data,
60 sanitize_array,
61)
62from pandas.core.groupby import generic as groupby_generic
63from pandas.core.indexers import maybe_convert_indices
64from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
65from pandas.core.indexes.api import (
66 Float64Index,
67 Index,
68 InvalidIndexError,
69 MultiIndex,
70 ensure_index,
71)
72import pandas.core.indexes.base as ibase
73from pandas.core.indexes.datetimes import DatetimeIndex
74from pandas.core.indexes.period import PeriodIndex
75from pandas.core.indexes.timedeltas import TimedeltaIndex
76from pandas.core.indexing import check_bool_indexer
77from pandas.core.internals import SingleBlockManager
78from pandas.core.strings import StringMethods
79from pandas.core.tools.datetimes import to_datetime
81import pandas.io.formats.format as fmt
82import pandas.plotting
84__all__ = ["Series"]
86_shared_doc_kwargs = dict(
87 axes="index",
88 klass="Series",
89 axes_single_arg="{0 or 'index'}",
90 axis="""axis : {0 or 'index'}
91 Parameter needed for compatibility with DataFrame.""",
92 inplace="""inplace : boolean, default False
93 If True, performs operation inplace and returns None.""",
94 unique="np.ndarray",
95 duplicated="Series",
96 optional_by="",
97 optional_mapper="",
98 optional_labels="",
99 optional_axis="",
100 versionadded_to_excel="\n .. versionadded:: 0.20.0\n",
101)
104def _coerce_method(converter):
105 """
106 Install the scalar coercion methods.
107 """
109 def wrapper(self):
110 if len(self) == 1:
111 return converter(self.iloc[0])
112 raise TypeError(f"cannot convert the series to {converter}")
114 wrapper.__name__ = f"__{converter.__name__}__"
115 return wrapper
118# ----------------------------------------------------------------------
119# Series class
122class Series(base.IndexOpsMixin, generic.NDFrame):
123 """
124 One-dimensional ndarray with axis labels (including time series).
126 Labels need not be unique but must be a hashable type. The object
127 supports both integer- and label-based indexing and provides a host of
128 methods for performing operations involving the index. Statistical
129 methods from ndarray have been overridden to automatically exclude
130 missing data (currently represented as NaN).
132 Operations between Series (+, -, /, *, **) align values based on their
133 associated index values-- they need not be the same length. The result
134 index will be the sorted union of the two indexes.
136 Parameters
137 ----------
138 data : array-like, Iterable, dict, or scalar value
139 Contains data stored in Series.
141 .. versionchanged:: 0.23.0
142 If data is a dict, argument order is maintained for Python 3.6
143 and later.
145 index : array-like or Index (1d)
146 Values must be hashable and have the same length as `data`.
147 Non-unique index values are allowed. Will default to
148 RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
149 sequence are used, the index will override the keys found in the
150 dict.
151 dtype : str, numpy.dtype, or ExtensionDtype, optional
152 Data type for the output Series. If not specified, this will be
153 inferred from `data`.
154 See the :ref:`user guide <basics.dtypes>` for more usages.
155 name : str, optional
156 The name to give to the Series.
157 copy : bool, default False
158 Copy input data.
159 """
161 _typ = "series"
163 _name: Optional[Hashable]
164 _metadata: List[str] = ["name"]
165 _accessors = {"dt", "cat", "str", "sparse"}
166 _deprecations = (
167 base.IndexOpsMixin._deprecations
168 | generic.NDFrame._deprecations
169 | frozenset(["compress", "ptp"])
170 )
172 # Override cache_readonly bc Series is mutable
173 hasnans = property(
174 base.IndexOpsMixin.hasnans.func, doc=base.IndexOpsMixin.hasnans.__doc__
175 )
176 _data: SingleBlockManager
177 div: Callable[["Series", Any], "Series"]
178 rdiv: Callable[["Series", Any], "Series"]
180 # ----------------------------------------------------------------------
181 # Constructors
183 def __init__(
184 self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
185 ):
187 # we are called internally, so short-circuit
188 if fastpath:
190 # data is an ndarray, index is defined
191 if not isinstance(data, SingleBlockManager):
192 data = SingleBlockManager(data, index, fastpath=True)
193 if copy:
194 data = data.copy()
195 if index is None:
196 index = data.index
198 else:
200 name = ibase.maybe_extract_name(name, data, type(self))
202 if is_empty_data(data) and dtype is None:
203 # gh-17261
204 warnings.warn(
205 "The default dtype for empty Series will be 'object' instead "
206 "of 'float64' in a future version. Specify a dtype explicitly "
207 "to silence this warning.",
208 DeprecationWarning,
209 stacklevel=2,
210 )
211 # uncomment the line below when removing the DeprecationWarning
212 # dtype = np.dtype(object)
214 if index is not None:
215 index = ensure_index(index)
217 if data is None:
218 data = {}
219 if dtype is not None:
220 dtype = self._validate_dtype(dtype)
222 if isinstance(data, MultiIndex):
223 raise NotImplementedError(
224 "initializing a Series from a MultiIndex is not supported"
225 )
226 elif isinstance(data, Index):
228 if dtype is not None:
229 # astype copies
230 data = data.astype(dtype)
231 else:
232 # need to copy to avoid aliasing issues
233 data = data._values.copy()
234 if isinstance(data, ABCDatetimeIndex) and data.tz is not None:
235 # GH#24096 need copy to be deep for datetime64tz case
236 # TODO: See if we can avoid these copies
237 data = data._values.copy(deep=True)
238 copy = False
240 elif isinstance(data, np.ndarray):
241 if len(data.dtype):
242 # GH#13296 we are dealing with a compound dtype, which
243 # should be treated as 2D
244 raise ValueError(
245 "Cannot construct a Series from an ndarray with "
246 "compound dtype. Use DataFrame instead."
247 )
248 pass
249 elif isinstance(data, ABCSeries):
250 if index is None:
251 index = data.index
252 else:
253 data = data.reindex(index, copy=copy)
254 data = data._data
255 elif is_dict_like(data):
256 data, index = self._init_dict(data, index, dtype)
257 dtype = None
258 copy = False
259 elif isinstance(data, SingleBlockManager):
260 if index is None:
261 index = data.index
262 elif not data.index.equals(index) or copy:
263 # GH#19275 SingleBlockManager input should only be called
264 # internally
265 raise AssertionError(
266 "Cannot pass both SingleBlockManager "
267 "`data` argument and a different "
268 "`index` argument. `copy` must be False."
269 )
271 elif is_extension_array_dtype(data):
272 pass
273 elif isinstance(data, (set, frozenset)):
274 raise TypeError(f"'{type(data).__name__}' type is unordered")
275 elif isinstance(data, ABCSparseArray):
276 # handle sparse passed here (and force conversion)
277 data = data.to_dense()
278 else:
279 data = com.maybe_iterable_to_list(data)
281 if index is None:
282 if not is_list_like(data):
283 data = [data]
284 index = ibase.default_index(len(data))
285 elif is_list_like(data):
287 # a scalar numpy array is list-like but doesn't
288 # have a proper length
289 try:
290 if len(index) != len(data):
291 raise ValueError(
292 f"Length of passed values is {len(data)}, "
293 f"index implies {len(index)}."
294 )
295 except TypeError:
296 pass
298 # create/copy the manager
299 if isinstance(data, SingleBlockManager):
300 if dtype is not None:
301 data = data.astype(dtype=dtype, errors="ignore", copy=copy)
302 elif copy:
303 data = data.copy()
304 else:
305 data = sanitize_array(data, index, dtype, copy, raise_cast_failure=True)
307 data = SingleBlockManager(data, index, fastpath=True)
309 generic.NDFrame.__init__(self, data, fastpath=True)
310 self.name = name
311 self._set_axis(0, index, fastpath=True)
313 def _init_dict(self, data, index=None, dtype=None):
314 """
315 Derive the "_data" and "index" attributes of a new Series from a
316 dictionary input.
318 Parameters
319 ----------
320 data : dict or dict-like
321 Data used to populate the new Series.
322 index : Index or index-like, default None
323 Index for the new Series: if None, use dict keys.
324 dtype : dtype, default None
325 The dtype for the new Series: if None, infer from data.
327 Returns
328 -------
329 _data : BlockManager for the new Series
330 index : index for the new Series
331 """
332 # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
333 # raises KeyError), so we iterate the entire dict, and align
334 if data:
335 keys, values = zip(*data.items())
336 values = list(values)
337 elif index is not None:
338 # fastpath for Series(data=None). Just use broadcasting a scalar
339 # instead of reindexing.
340 values = na_value_for_dtype(dtype)
341 keys = index
342 else:
343 keys, values = [], []
345 # Input is now list-like, so rely on "standard" construction:
347 # TODO: passing np.float64 to not break anything yet. See GH-17261
348 s = create_series_with_explicit_dtype(
349 values, index=keys, dtype=dtype, dtype_if_empty=np.float64
350 )
352 # Now we just make sure the order is respected, if any
353 if data and index is not None:
354 s = s.reindex(index, copy=False)
355 return s._data, s.index
357 # ----------------------------------------------------------------------
359 @property
360 def _constructor(self):
361 return Series
363 @property
364 def _constructor_expanddim(self):
365 from pandas.core.frame import DataFrame
367 return DataFrame
369 # types
370 @property
371 def _can_hold_na(self):
372 return self._data._can_hold_na
374 _index = None
376 def _set_axis(self, axis, labels, fastpath=False):
377 """
378 Override generic, we want to set the _typ here.
379 """
381 if not fastpath:
382 labels = ensure_index(labels)
384 is_all_dates = labels.is_all_dates
385 if is_all_dates:
386 if not isinstance(labels, (DatetimeIndex, PeriodIndex, TimedeltaIndex)):
387 try:
388 labels = DatetimeIndex(labels)
389 # need to set here because we changed the index
390 if fastpath:
391 self._data.set_axis(axis, labels)
392 except (tslibs.OutOfBoundsDatetime, ValueError):
393 # labels may exceeds datetime bounds,
394 # or not be a DatetimeIndex
395 pass
397 self._set_subtyp(is_all_dates)
399 object.__setattr__(self, "_index", labels)
400 if not fastpath:
401 self._data.set_axis(axis, labels)
403 def _set_subtyp(self, is_all_dates):
404 if is_all_dates:
405 object.__setattr__(self, "_subtyp", "time_series")
406 else:
407 object.__setattr__(self, "_subtyp", "series")
409 def _update_inplace(self, result, **kwargs):
410 # we want to call the generic version and not the IndexOpsMixin
411 return generic.NDFrame._update_inplace(self, result, **kwargs)
413 # ndarray compatibility
414 @property
415 def dtype(self):
416 """
417 Return the dtype object of the underlying data.
418 """
419 return self._data.dtype
421 @property
422 def dtypes(self):
423 """
424 Return the dtype object of the underlying data.
425 """
426 return self._data.dtype
428 @property
429 def name(self) -> Optional[Hashable]:
430 return self._name
432 @name.setter
433 def name(self, value: Optional[Hashable]) -> None:
434 if not is_hashable(value):
435 raise TypeError("Series.name must be a hashable type")
436 object.__setattr__(self, "_name", value)
438 @property
439 def values(self):
440 """
441 Return Series as ndarray or ndarray-like depending on the dtype.
443 .. warning::
445 We recommend using :attr:`Series.array` or
446 :meth:`Series.to_numpy`, depending on whether you need
447 a reference to the underlying data or a NumPy array.
449 Returns
450 -------
451 numpy.ndarray or ndarray-like
453 See Also
454 --------
455 Series.array : Reference to the underlying data.
456 Series.to_numpy : A NumPy array representing the underlying data.
458 Examples
459 --------
460 >>> pd.Series([1, 2, 3]).values
461 array([1, 2, 3])
463 >>> pd.Series(list('aabc')).values
464 array(['a', 'a', 'b', 'c'], dtype=object)
466 >>> pd.Series(list('aabc')).astype('category').values
467 [a, a, b, c]
468 Categories (3, object): [a, b, c]
470 Timezone aware datetime data is converted to UTC:
472 >>> pd.Series(pd.date_range('20130101', periods=3,
473 ... tz='US/Eastern')).values
474 array(['2013-01-01T05:00:00.000000000',
475 '2013-01-02T05:00:00.000000000',
476 '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
477 """
478 return self._data.external_values()
480 @property
481 def _values(self):
482 """
483 Return the internal repr of this data (defined by Block.interval_values).
484 This are the values as stored in the Block (ndarray or ExtensionArray
485 depending on the Block class).
487 Differs from the public ``.values`` for certain data types, because of
488 historical backwards compatibility of the public attribute (e.g. period
489 returns object ndarray and datetimetz a datetime64[ns] ndarray for
490 ``.values`` while it returns an ExtensionArray for ``._values`` in those
491 cases).
493 Differs from ``.array`` in that this still returns the numpy array if
494 the Block is backed by a numpy array, while ``.array`` ensures to always
495 return an ExtensionArray.
497 Differs from ``._ndarray_values``, as that ensures to always return a
498 numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
499 the Series was backed by an ExtensionArray).
501 Overview:
503 dtype | values | _values | array | _ndarray_values |
504 ----------- | ------------- | ------------- | ------------- | --------------- |
505 Numeric | ndarray | ndarray | PandasArray | ndarray |
506 Category | Categorical | Categorical | Categorical | ndarray[int] |
507 dt64[ns] | ndarray[M8ns] | ndarray[M8ns] | DatetimeArray | ndarray[M8ns] |
508 dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns] |
509 Period | ndarray[obj] | PeriodArray | PeriodArray | ndarray[int] |
510 Nullable | EA | EA | EA | ndarray |
512 """
513 return self._data.internal_values()
515 @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore
516 @property
517 def array(self) -> ExtensionArray:
518 return self._data._block.array_values()
520 def _internal_get_values(self):
521 """
522 Same as values (but handles sparseness conversions); is a view.
524 Returns
525 -------
526 numpy.ndarray
527 Data of the Series.
528 """
530 return self._data.get_values()
532 # ops
533 def ravel(self, order="C"):
534 """
535 Return the flattened underlying data as an ndarray.
537 Returns
538 -------
539 numpy.ndarray or ndarray-like
540 Flattened data of the Series.
542 See Also
543 --------
544 numpy.ndarray.ravel
545 """
546 return self._values.ravel(order=order)
548 def __len__(self) -> int:
549 """
550 Return the length of the Series.
551 """
552 return len(self._data)
554 def view(self, dtype=None):
555 """
556 Create a new view of the Series.
558 This function will return a new Series with a view of the same
559 underlying values in memory, optionally reinterpreted with a new data
560 type. The new data type must preserve the same size in bytes as to not
561 cause index misalignment.
563 Parameters
564 ----------
565 dtype : data type
566 Data type object or one of their string representations.
568 Returns
569 -------
570 Series
571 A new Series object as a view of the same data in memory.
573 See Also
574 --------
575 numpy.ndarray.view : Equivalent numpy function to create a new view of
576 the same data in memory.
578 Notes
579 -----
580 Series are instantiated with ``dtype=float64`` by default. While
581 ``numpy.ndarray.view()`` will return a view with the same data type as
582 the original array, ``Series.view()`` (without specified dtype)
583 will try using ``float64`` and may fail if the original data type size
584 in bytes is not the same.
586 Examples
587 --------
588 >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
589 >>> s
590 0 -2
591 1 -1
592 2 0
593 3 1
594 4 2
595 dtype: int8
597 The 8 bit signed integer representation of `-1` is `0b11111111`, but
598 the same bytes represent 255 if read as an 8 bit unsigned integer:
600 >>> us = s.view('uint8')
601 >>> us
602 0 254
603 1 255
604 2 0
605 3 1
606 4 2
607 dtype: uint8
609 The views share the same underlying values:
611 >>> us[0] = 128
612 >>> s
613 0 -128
614 1 -1
615 2 0
616 3 1
617 4 2
618 dtype: int8
619 """
620 return self._constructor(
621 self._values.view(dtype), index=self.index
622 ).__finalize__(self)
624 # ----------------------------------------------------------------------
625 # NDArray Compat
626 _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
628 def __array_ufunc__(
629 self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any
630 ):
631 # TODO: handle DataFrame
632 cls = type(self)
634 # for binary ops, use our custom dunder methods
635 result = ops.maybe_dispatch_ufunc_to_dunder_op(
636 self, ufunc, method, *inputs, **kwargs
637 )
638 if result is not NotImplemented:
639 return result
641 # Determine if we should defer.
642 no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
644 for item in inputs:
645 higher_priority = (
646 hasattr(item, "__array_priority__")
647 and item.__array_priority__ > self.__array_priority__
648 )
649 has_array_ufunc = (
650 hasattr(item, "__array_ufunc__")
651 and type(item).__array_ufunc__ not in no_defer
652 and not isinstance(item, self._HANDLED_TYPES)
653 )
654 if higher_priority or has_array_ufunc:
655 return NotImplemented
657 # align all the inputs.
658 names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
659 types = tuple(type(x) for x in inputs)
660 # TODO: dataframe
661 alignable = [x for x, t in zip(inputs, types) if issubclass(t, Series)]
663 if len(alignable) > 1:
664 # This triggers alignment.
665 # At the moment, there aren't any ufuncs with more than two inputs
666 # so this ends up just being x1.index | x2.index, but we write
667 # it to handle *args.
668 index = alignable[0].index
669 for s in alignable[1:]:
670 index |= s.index
671 inputs = tuple(
672 x.reindex(index) if issubclass(t, Series) else x
673 for x, t in zip(inputs, types)
674 )
675 else:
676 index = self.index
678 inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
679 result = getattr(ufunc, method)(*inputs, **kwargs)
681 name: Optional[Hashable]
682 if len(set(names)) == 1:
683 name = names[0]
684 else:
685 name = None
687 def construct_return(result):
688 if lib.is_scalar(result):
689 return result
690 elif result.ndim > 1:
691 # e.g. np.subtract.outer
692 if method == "outer":
693 # GH#27198
694 raise NotImplementedError
695 return result
696 return self._constructor(result, index=index, name=name, copy=False)
698 if type(result) is tuple:
699 # multiple return values
700 return tuple(construct_return(x) for x in result)
701 elif method == "at":
702 # no return value
703 return None
704 else:
705 return construct_return(result)
707 def __array__(self, dtype=None) -> np.ndarray:
708 """
709 Return the values as a NumPy array.
711 Users should not call this directly. Rather, it is invoked by
712 :func:`numpy.array` and :func:`numpy.asarray`.
714 Parameters
715 ----------
716 dtype : str or numpy.dtype, optional
717 The dtype to use for the resulting NumPy array. By default,
718 the dtype is inferred from the data.
720 Returns
721 -------
722 numpy.ndarray
723 The values in the series converted to a :class:`numpy.ndarary`
724 with the specified `dtype`.
726 See Also
727 --------
728 array : Create a new array from data.
729 Series.array : Zero-copy view to the array backing the Series.
730 Series.to_numpy : Series method for similar behavior.
732 Examples
733 --------
734 >>> ser = pd.Series([1, 2, 3])
735 >>> np.asarray(ser)
736 array([1, 2, 3])
738 For timezone-aware data, the timezones may be retained with
739 ``dtype='object'``
741 >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
742 >>> np.asarray(tzser, dtype="object")
743 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'),
744 Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')],
745 dtype=object)
747 Or the values may be localized to UTC and the tzinfo discarded with
748 ``dtype='datetime64[ns]'``
750 >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
751 array(['1999-12-31T23:00:00.000000000', ...],
752 dtype='datetime64[ns]')
753 """
754 return np.asarray(self.array, dtype)
756 # ----------------------------------------------------------------------
757 # Unary Methods
759 # coercion
760 __float__ = _coerce_method(float)
761 __long__ = _coerce_method(int)
762 __int__ = _coerce_method(int)
764 # ----------------------------------------------------------------------
766 def _unpickle_series_compat(self, state):
767 if isinstance(state, dict):
768 self._data = state["_data"]
769 self.name = state["name"]
770 self.index = self._data.index
772 elif isinstance(state, tuple):
774 # < 0.12 series pickle
776 nd_state, own_state = state
778 # recreate the ndarray
779 data = np.empty(nd_state[1], dtype=nd_state[2])
780 np.ndarray.__setstate__(data, nd_state)
782 # backwards compat
783 index, name = own_state[0], None
784 if len(own_state) > 1:
785 name = own_state[1]
787 # recreate
788 self._data = SingleBlockManager(data, index, fastpath=True)
789 self._index = index
790 self.name = name
792 else:
793 raise Exception(f"cannot unpickle legacy formats -> [{state}]")
795 # indexers
796 @property
797 def axes(self):
798 """
799 Return a list of the row axis labels.
800 """
801 return [self.index]
803 # ----------------------------------------------------------------------
804 # Indexing Methods
806 @Appender(generic.NDFrame.take.__doc__)
807 def take(self, indices, axis=0, is_copy=None, **kwargs) -> "Series":
808 if is_copy is not None:
809 warnings.warn(
810 "is_copy is deprecated and will be removed in a future version. "
811 "'take' always returns a copy, so there is no need to specify this.",
812 FutureWarning,
813 stacklevel=2,
814 )
815 nv.validate_take(tuple(), kwargs)
817 indices = ensure_platform_int(indices)
818 new_index = self.index.take(indices)
820 if is_categorical_dtype(self):
821 # https://github.com/pandas-dev/pandas/issues/20664
822 # TODO: remove when the default Categorical.take behavior changes
823 indices = maybe_convert_indices(indices, len(self._get_axis(axis)))
824 kwargs = {"allow_fill": False}
825 else:
826 kwargs = {}
827 new_values = self._values.take(indices, **kwargs)
829 return self._constructor(
830 new_values, index=new_index, fastpath=True
831 ).__finalize__(self)
833 def _take_with_is_copy(self, indices, axis=0, **kwargs):
834 """
835 Internal version of the `take` method that sets the `_is_copy`
836 attribute to keep track of the parent dataframe (using in indexing
837 for the SettingWithCopyWarning). For Series this does the same
838 as the public take (it never sets `_is_copy`).
840 See the docstring of `take` for full explanation of the parameters.
841 """
842 return self.take(indices=indices, axis=axis, **kwargs)
844 def _ixs(self, i: int, axis: int = 0):
845 """
846 Return the i-th value or values in the Series by location.
848 Parameters
849 ----------
850 i : int
852 Returns
853 -------
854 scalar (int) or Series (slice, sequence)
855 """
857 # dispatch to the values if we need
858 values = self._values
859 if isinstance(values, np.ndarray):
860 return libindex.get_value_at(values, i)
861 else:
862 return values[i]
864 def _slice(self, slobj: slice, axis: int = 0, kind=None):
865 slobj = self.index._convert_slice_indexer(slobj, kind=kind or "getitem")
866 return self._get_values(slobj)
868 def __getitem__(self, key):
869 key = com.apply_if_callable(key, self)
870 try:
871 result = self.index.get_value(self, key)
873 if not is_scalar(result):
874 if is_list_like(result) and not isinstance(result, Series):
876 # we need to box if loc of the key isn't scalar here
877 # otherwise have inline ndarray/lists
878 try:
879 if not is_scalar(self.index.get_loc(key)):
880 result = self._constructor(
881 result, index=[key] * len(result), dtype=self.dtype
882 ).__finalize__(self)
883 except KeyError:
884 pass
885 return result
886 except InvalidIndexError:
887 pass
888 except (KeyError, ValueError):
889 if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
890 # kludge
891 pass
892 elif key is Ellipsis:
893 return self
894 elif com.is_bool_indexer(key):
895 pass
896 else:
898 # we can try to coerce the indexer (or this will raise)
899 new_key = self.index._convert_scalar_indexer(key, kind="getitem")
900 if type(new_key) != type(key):
901 return self.__getitem__(new_key)
902 raise
904 if is_iterator(key):
905 key = list(key)
907 if com.is_bool_indexer(key):
908 key = check_bool_indexer(self.index, key)
910 return self._get_with(key)
912 def _get_with(self, key):
913 # other: fancy integer or otherwise
914 if isinstance(key, slice):
915 return self._slice(key)
916 elif isinstance(key, ABCDataFrame):
917 raise TypeError(
918 "Indexing a Series with DataFrame is not "
919 "supported, use the appropriate DataFrame column"
920 )
921 elif isinstance(key, tuple):
922 try:
923 return self._get_values_tuple(key)
924 except ValueError:
925 # if we don't have a MultiIndex, we may still be able to handle
926 # a 1-tuple. see test_1tuple_without_multiindex
927 if len(key) == 1:
928 key = key[0]
929 if isinstance(key, slice):
930 return self._get_values(key)
931 raise
933 if not isinstance(key, (list, np.ndarray, Series, Index)):
934 key = list(key)
936 if isinstance(key, Index):
937 key_type = key.inferred_type
938 else:
939 key_type = lib.infer_dtype(key, skipna=False)
941 if key_type == "integer":
942 if self.index.is_integer() or self.index.is_floating():
943 return self.loc[key]
944 else:
945 return self._get_values(key)
946 elif key_type == "boolean":
947 return self._get_values(key)
949 if isinstance(key, (list, tuple)):
950 # TODO: de-dup with tuple case handled above?
951 # handle the dup indexing case GH#4246
952 if len(key) == 1 and isinstance(key[0], slice):
953 # [slice(0, 5, None)] will break if you convert to ndarray,
954 # e.g. as requested by np.median
955 # FIXME: hack
956 return self._get_values(key)
958 return self.loc[key]
960 return self.reindex(key)
962 def _get_values_tuple(self, key):
963 # mpl hackaround
964 if com.any_none(*key):
965 # suppress warning from slicing the index with a 2d indexer.
966 # eventually we'll want Series itself to warn.
967 with warnings.catch_warnings():
968 warnings.filterwarnings(
969 "ignore", "Support for multi-dim", DeprecationWarning
970 )
971 return self._get_values(key)
973 if not isinstance(self.index, MultiIndex):
974 raise ValueError("Can only tuple-index with a MultiIndex")
976 # If key is contained, would have returned by now
977 indexer, new_index = self.index.get_loc_level(key)
978 return self._constructor(self._values[indexer], index=new_index).__finalize__(
979 self
980 )
982 def _get_values(self, indexer):
983 try:
984 return self._constructor(
985 self._data.get_slice(indexer), fastpath=True
986 ).__finalize__(self)
987 except ValueError:
988 # mpl compat if we look up e.g. ser[:, np.newaxis];
989 # see tests.series.timeseries.test_mpl_compat_hack
990 return self._values[indexer]
992 def _get_value(self, label, takeable: bool = False):
993 """
994 Quickly retrieve single value at passed index label.
996 Parameters
997 ----------
998 label : object
999 takeable : interpret the index as indexers, default False
1001 Returns
1002 -------
1003 scalar value
1004 """
1005 if takeable:
1006 return com.maybe_box_datetimelike(self._values[label])
1007 return self.index.get_value(self._values, label)
1009 def __setitem__(self, key, value):
1010 key = com.apply_if_callable(key, self)
1011 cacher_needs_updating = self._check_is_chained_assignment_possible()
1013 try:
1014 self._set_with_engine(key, value)
1015 except com.SettingWithCopyError:
1016 raise
1017 except (KeyError, ValueError):
1018 values = self._values
1019 if is_integer(key) and not self.index.inferred_type == "integer":
1020 values[key] = value
1021 elif key is Ellipsis:
1022 self[:] = value
1023 else:
1024 self.loc[key] = value
1026 except TypeError as e:
1027 if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
1028 raise ValueError("Can only tuple-index with a MultiIndex")
1030 # python 3 type errors should be raised
1031 if _is_unorderable_exception(e):
1032 raise IndexError(key)
1034 if com.is_bool_indexer(key):
1035 key = check_bool_indexer(self.index, key)
1036 try:
1037 self._where(~key, value, inplace=True)
1038 return
1039 except InvalidIndexError:
1040 pass
1042 self._set_with(key, value)
1044 if cacher_needs_updating:
1045 self._maybe_update_cacher()
1047 def _set_with_engine(self, key, value):
1048 values = self._values
1049 if is_extension_array_dtype(values.dtype):
1050 # The cython indexing engine does not support ExtensionArrays.
1051 values[self.index.get_loc(key)] = value
1052 return
1053 try:
1054 self.index._engine.set_value(values, key, value)
1055 return
1056 except KeyError:
1057 values[self.index.get_loc(key)] = value
1058 return
1060 def _set_with(self, key, value):
1061 # other: fancy integer or otherwise
1062 if isinstance(key, slice):
1063 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1064 return self._set_values(indexer, value)
1066 elif is_scalar(key) and not is_integer(key) and key not in self.index:
1067 # GH#12862 adding an new key to the Series
1068 # Note: have to exclude integers because that is ambiguously
1069 # position-based
1070 self.loc[key] = value
1071 return
1073 else:
1074 if isinstance(key, tuple):
1075 try:
1076 # TODO: no test cases that get here
1077 self._set_values(key, value)
1078 except Exception:
1079 pass
1081 if is_scalar(key):
1082 key = [key]
1084 if isinstance(key, Index):
1085 key_type = key.inferred_type
1086 key = key._values
1087 else:
1088 key_type = lib.infer_dtype(key, skipna=False)
1090 if key_type == "integer":
1091 if self.index.inferred_type == "integer":
1092 self._set_labels(key, value)
1093 else:
1094 return self._set_values(key, value)
1095 elif key_type == "boolean":
1096 self._set_values(key.astype(np.bool_), value)
1097 else:
1098 self._set_labels(key, value)
1100 def _set_labels(self, key, value):
1101 key = com.asarray_tuplesafe(key)
1102 indexer = self.index.get_indexer(key)
1103 mask = indexer == -1
1104 if mask.any():
1105 raise ValueError(f"{key[mask]} not contained in the index")
1106 self._set_values(indexer, value)
1108 def _set_values(self, key, value):
1109 if isinstance(key, Series):
1110 key = key._values
1111 self._data = self._data.setitem(indexer=key, value=value)
1112 self._maybe_update_cacher()
1114 def _set_value(self, label, value, takeable: bool = False):
1115 """
1116 Quickly set single value at passed label.
1118 If label is not contained, a new object is created with the label
1119 placed at the end of the result index.
1121 Parameters
1122 ----------
1123 label : object
1124 Partial indexing with MultiIndex not allowed.
1125 value : object
1126 Scalar value.
1127 takeable : interpret the index as indexers, default False
1129 Returns
1130 -------
1131 Series
1132 If label is contained, will be reference to calling Series,
1133 otherwise a new object.
1134 """
1135 try:
1136 if takeable:
1137 self._values[label] = value
1138 else:
1139 self.index._engine.set_value(self._values, label, value)
1140 except (KeyError, TypeError):
1141 # set using a non-recursive method
1142 self.loc[label] = value
1144 return self
1146 # ----------------------------------------------------------------------
1147 # Unsorted
1149 @property
1150 def _is_mixed_type(self):
1151 return False
1153 def repeat(self, repeats, axis=None):
1154 """
1155 Repeat elements of a Series.
1157 Returns a new Series where each element of the current Series
1158 is repeated consecutively a given number of times.
1160 Parameters
1161 ----------
1162 repeats : int or array of ints
1163 The number of repetitions for each element. This should be a
1164 non-negative integer. Repeating 0 times will return an empty
1165 Series.
1166 axis : None
1167 Must be ``None``. Has no effect but is accepted for compatibility
1168 with numpy.
1170 Returns
1171 -------
1172 Series
1173 Newly created Series with repeated elements.
1175 See Also
1176 --------
1177 Index.repeat : Equivalent function for Index.
1178 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1180 Examples
1181 --------
1182 >>> s = pd.Series(['a', 'b', 'c'])
1183 >>> s
1184 0 a
1185 1 b
1186 2 c
1187 dtype: object
1188 >>> s.repeat(2)
1189 0 a
1190 0 a
1191 1 b
1192 1 b
1193 2 c
1194 2 c
1195 dtype: object
1196 >>> s.repeat([1, 2, 3])
1197 0 a
1198 1 b
1199 1 b
1200 2 c
1201 2 c
1202 2 c
1203 dtype: object
1204 """
1205 nv.validate_repeat(tuple(), dict(axis=axis))
1206 new_index = self.index.repeat(repeats)
1207 new_values = self._values.repeat(repeats)
1208 return self._constructor(new_values, index=new_index).__finalize__(self)
1210 def reset_index(self, level=None, drop=False, name=None, inplace=False):
1211 """
1212 Generate a new DataFrame or Series with the index reset.
1214 This is useful when the index needs to be treated as a column, or
1215 when the index is meaningless and needs to be reset to the default
1216 before another operation.
1218 Parameters
1219 ----------
1220 level : int, str, tuple, or list, default optional
1221 For a Series with a MultiIndex, only remove the specified levels
1222 from the index. Removes all levels by default.
1223 drop : bool, default False
1224 Just reset the index, without inserting it as a column in
1225 the new DataFrame.
1226 name : object, optional
1227 The name to use for the column containing the original Series
1228 values. Uses ``self.name`` by default. This argument is ignored
1229 when `drop` is True.
1230 inplace : bool, default False
1231 Modify the Series in place (do not create a new object).
1233 Returns
1234 -------
1235 Series or DataFrame
1236 When `drop` is False (the default), a DataFrame is returned.
1237 The newly created columns will come first in the DataFrame,
1238 followed by the original Series values.
1239 When `drop` is True, a `Series` is returned.
1240 In either case, if ``inplace=True``, no value is returned.
1242 See Also
1243 --------
1244 DataFrame.reset_index: Analogous function for DataFrame.
1246 Examples
1247 --------
1248 >>> s = pd.Series([1, 2, 3, 4], name='foo',
1249 ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
1251 Generate a DataFrame with default index.
1253 >>> s.reset_index()
1254 idx foo
1255 0 a 1
1256 1 b 2
1257 2 c 3
1258 3 d 4
1260 To specify the name of the new column use `name`.
1262 >>> s.reset_index(name='values')
1263 idx values
1264 0 a 1
1265 1 b 2
1266 2 c 3
1267 3 d 4
1269 To generate a new Series with the default set `drop` to True.
1271 >>> s.reset_index(drop=True)
1272 0 1
1273 1 2
1274 2 3
1275 3 4
1276 Name: foo, dtype: int64
1278 To update the Series in place, without generating a new one
1279 set `inplace` to True. Note that it also requires ``drop=True``.
1281 >>> s.reset_index(inplace=True, drop=True)
1282 >>> s
1283 0 1
1284 1 2
1285 2 3
1286 3 4
1287 Name: foo, dtype: int64
1289 The `level` parameter is interesting for Series with a multi-level
1290 index.
1292 >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
1293 ... np.array(['one', 'two', 'one', 'two'])]
1294 >>> s2 = pd.Series(
1295 ... range(4), name='foo',
1296 ... index=pd.MultiIndex.from_arrays(arrays,
1297 ... names=['a', 'b']))
1299 To remove a specific level from the Index, use `level`.
1301 >>> s2.reset_index(level='a')
1302 a foo
1303 b
1304 one bar 0
1305 two bar 1
1306 one baz 2
1307 two baz 3
1309 If `level` is not set, all levels are removed from the Index.
1311 >>> s2.reset_index()
1312 a b foo
1313 0 bar one 0
1314 1 bar two 1
1315 2 baz one 2
1316 3 baz two 3
1317 """
1318 inplace = validate_bool_kwarg(inplace, "inplace")
1319 if drop:
1320 new_index = ibase.default_index(len(self))
1321 if level is not None:
1322 if not isinstance(level, (tuple, list)):
1323 level = [level]
1324 level = [self.index._get_level_number(lev) for lev in level]
1325 if len(level) < self.index.nlevels:
1326 new_index = self.index.droplevel(level)
1328 if inplace:
1329 self.index = new_index
1330 # set name if it was passed, otherwise, keep the previous name
1331 self.name = name or self.name
1332 else:
1333 return self._constructor(
1334 self._values.copy(), index=new_index
1335 ).__finalize__(self)
1336 elif inplace:
1337 raise TypeError(
1338 "Cannot reset_index inplace on a Series to create a DataFrame"
1339 )
1340 else:
1341 df = self.to_frame(name)
1342 return df.reset_index(level=level, drop=drop)
1344 # ----------------------------------------------------------------------
1345 # Rendering Methods
1347 def __repr__(self) -> str:
1348 """
1349 Return a string representation for a particular Series.
1350 """
1351 buf = StringIO("")
1352 width, height = get_terminal_size()
1353 max_rows = (
1354 height
1355 if get_option("display.max_rows") == 0
1356 else get_option("display.max_rows")
1357 )
1358 min_rows = (
1359 height
1360 if get_option("display.max_rows") == 0
1361 else get_option("display.min_rows")
1362 )
1363 show_dimensions = get_option("display.show_dimensions")
1365 self.to_string(
1366 buf=buf,
1367 name=self.name,
1368 dtype=self.dtype,
1369 min_rows=min_rows,
1370 max_rows=max_rows,
1371 length=show_dimensions,
1372 )
1373 result = buf.getvalue()
1375 return result
1377 def to_string(
1378 self,
1379 buf=None,
1380 na_rep="NaN",
1381 float_format=None,
1382 header=True,
1383 index=True,
1384 length=False,
1385 dtype=False,
1386 name=False,
1387 max_rows=None,
1388 min_rows=None,
1389 ):
1390 """
1391 Render a string representation of the Series.
1393 Parameters
1394 ----------
1395 buf : StringIO-like, optional
1396 Buffer to write to.
1397 na_rep : str, optional
1398 String representation of NaN to use, default 'NaN'.
1399 float_format : one-parameter function, optional
1400 Formatter function to apply to columns' elements if they are
1401 floats, default None.
1402 header : bool, default True
1403 Add the Series header (index name).
1404 index : bool, optional
1405 Add index (row) labels, default True.
1406 length : bool, default False
1407 Add the Series length.
1408 dtype : bool, default False
1409 Add the Series dtype.
1410 name : bool, default False
1411 Add the Series name if not None.
1412 max_rows : int, optional
1413 Maximum number of rows to show before truncating. If None, show
1414 all.
1415 min_rows : int, optional
1416 The number of rows to display in a truncated repr (when number
1417 of rows is above `max_rows`).
1419 Returns
1420 -------
1421 str or None
1422 String representation of Series if ``buf=None``, otherwise None.
1423 """
1425 formatter = fmt.SeriesFormatter(
1426 self,
1427 name=name,
1428 length=length,
1429 header=header,
1430 index=index,
1431 dtype=dtype,
1432 na_rep=na_rep,
1433 float_format=float_format,
1434 min_rows=min_rows,
1435 max_rows=max_rows,
1436 )
1437 result = formatter.to_string()
1439 # catch contract violations
1440 if not isinstance(result, str):
1441 raise AssertionError(
1442 "result must be of type str, type"
1443 f" of result is {repr(type(result).__name__)}"
1444 )
1446 if buf is None:
1447 return result
1448 else:
1449 try:
1450 buf.write(result)
1451 except AttributeError:
1452 with open(buf, "w") as f:
1453 f.write(result)
1455 @Appender(
1456 """
1457 Examples
1458 --------
1459 >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
1460 >>> print(s.to_markdown())
1461 | | animal |
1462 |---:|:---------|
1463 | 0 | elk |
1464 | 1 | pig |
1465 | 2 | dog |
1466 | 3 | quetzal |
1467 """
1468 )
1469 @Substitution(klass="Series")
1470 @Appender(generic._shared_docs["to_markdown"])
1471 def to_markdown(
1472 self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs
1473 ) -> Optional[str]:
1474 return self.to_frame().to_markdown(buf, mode, **kwargs)
1476 # ----------------------------------------------------------------------
1478 def items(self):
1479 """
1480 Lazily iterate over (index, value) tuples.
1482 This method returns an iterable tuple (index, value). This is
1483 convenient if you want to create a lazy iterator.
1485 Returns
1486 -------
1487 iterable
1488 Iterable of tuples containing the (index, value) pairs from a
1489 Series.
1491 See Also
1492 --------
1493 DataFrame.items : Iterate over (column name, Series) pairs.
1494 DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
1496 Examples
1497 --------
1498 >>> s = pd.Series(['A', 'B', 'C'])
1499 >>> for index, value in s.items():
1500 ... print(f"Index : {index}, Value : {value}")
1501 Index : 0, Value : A
1502 Index : 1, Value : B
1503 Index : 2, Value : C
1504 """
1505 return zip(iter(self.index), iter(self))
1507 @Appender(items.__doc__)
1508 def iteritems(self):
1509 return self.items()
1511 # ----------------------------------------------------------------------
1512 # Misc public methods
1514 def keys(self):
1515 """
1516 Return alias for index.
1518 Returns
1519 -------
1520 Index
1521 Index of the Series.
1522 """
1523 return self.index
1525 def to_dict(self, into=dict):
1526 """
1527 Convert Series to {label -> value} dict or dict-like object.
1529 Parameters
1530 ----------
1531 into : class, default dict
1532 The collections.abc.Mapping subclass to use as the return
1533 object. Can be the actual class or an empty
1534 instance of the mapping type you want. If you want a
1535 collections.defaultdict, you must pass it initialized.
1537 .. versionadded:: 0.21.0
1539 Returns
1540 -------
1541 collections.abc.Mapping
1542 Key-value representation of Series.
1544 Examples
1545 --------
1546 >>> s = pd.Series([1, 2, 3, 4])
1547 >>> s.to_dict()
1548 {0: 1, 1: 2, 2: 3, 3: 4}
1549 >>> from collections import OrderedDict, defaultdict
1550 >>> s.to_dict(OrderedDict)
1551 OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
1552 >>> dd = defaultdict(list)
1553 >>> s.to_dict(dd)
1554 defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
1555 """
1556 # GH16122
1557 into_c = com.standardize_mapping(into)
1558 return into_c(self.items())
1560 def to_frame(self, name=None):
1561 """
1562 Convert Series to DataFrame.
1564 Parameters
1565 ----------
1566 name : object, default None
1567 The passed name should substitute for the series name (if it has
1568 one).
1570 Returns
1571 -------
1572 DataFrame
1573 DataFrame representation of Series.
1575 Examples
1576 --------
1577 >>> s = pd.Series(["a", "b", "c"],
1578 ... name="vals")
1579 >>> s.to_frame()
1580 vals
1581 0 a
1582 1 b
1583 2 c
1584 """
1585 if name is None:
1586 df = self._constructor_expanddim(self)
1587 else:
1588 df = self._constructor_expanddim({name: self})
1590 return df
1592 def _set_name(self, name, inplace=False):
1593 """
1594 Set the Series name.
1596 Parameters
1597 ----------
1598 name : str
1599 inplace : bool
1600 Whether to modify `self` directly or return a copy.
1601 """
1602 inplace = validate_bool_kwarg(inplace, "inplace")
1603 ser = self if inplace else self.copy()
1604 ser.name = name
1605 return ser
1607 @Appender(
1608 """
1609Examples
1610--------
1611>>> ser = pd.Series([390., 350., 30., 20.],
1612... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1613>>> ser
1614Falcon 390.0
1615Falcon 350.0
1616Parrot 30.0
1617Parrot 20.0
1618Name: Max Speed, dtype: float64
1619>>> ser.groupby(["a", "b", "a", "b"]).mean()
1620a 210.0
1621b 185.0
1622Name: Max Speed, dtype: float64
1623>>> ser.groupby(level=0).mean()
1624Falcon 370.0
1625Parrot 25.0
1626Name: Max Speed, dtype: float64
1627>>> ser.groupby(ser > 100).mean()
1628Max Speed
1629False 25.0
1630True 370.0
1631Name: Max Speed, dtype: float64
1633**Grouping by Indexes**
1635We can groupby different levels of a hierarchical index
1636using the `level` parameter:
1638>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1639... ['Captive', 'Wild', 'Captive', 'Wild']]
1640>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1641>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1642>>> ser
1643Animal Type
1644Falcon Captive 390.0
1645 Wild 350.0
1646Parrot Captive 30.0
1647 Wild 20.0
1648Name: Max Speed, dtype: float64
1649>>> ser.groupby(level=0).mean()
1650Animal
1651Falcon 370.0
1652Parrot 25.0
1653Name: Max Speed, dtype: float64
1654>>> ser.groupby(level="Type").mean()
1655Type
1656Captive 210.0
1657Wild 185.0
1658Name: Max Speed, dtype: float64
1659"""
1660 )
1661 @Appender(generic._shared_docs["groupby"] % _shared_doc_kwargs)
1662 def groupby(
1663 self,
1664 by=None,
1665 axis=0,
1666 level=None,
1667 as_index: bool = True,
1668 sort: bool = True,
1669 group_keys: bool = True,
1670 squeeze: bool = False,
1671 observed: bool = False,
1672 ) -> "groupby_generic.SeriesGroupBy":
1674 if level is None and by is None:
1675 raise TypeError("You have to supply one of 'by' and 'level'")
1676 axis = self._get_axis_number(axis)
1678 return groupby_generic.SeriesGroupBy(
1679 obj=self,
1680 keys=by,
1681 axis=axis,
1682 level=level,
1683 as_index=as_index,
1684 sort=sort,
1685 group_keys=group_keys,
1686 squeeze=squeeze,
1687 observed=observed,
1688 )
1690 # ----------------------------------------------------------------------
1691 # Statistics, overridden ndarray methods
1693 # TODO: integrate bottleneck
1695 def count(self, level=None):
1696 """
1697 Return number of non-NA/null observations in the Series.
1699 Parameters
1700 ----------
1701 level : int or level name, default None
1702 If the axis is a MultiIndex (hierarchical), count along a
1703 particular level, collapsing into a smaller Series.
1705 Returns
1706 -------
1707 int or Series (if level specified)
1708 Number of non-null values in the Series.
1710 Examples
1711 --------
1712 >>> s = pd.Series([0.0, 1.0, np.nan])
1713 >>> s.count()
1714 2
1715 """
1716 if level is None:
1717 return notna(self.array).sum()
1719 if isinstance(level, str):
1720 level = self.index._get_level_number(level)
1722 lev = self.index.levels[level]
1723 level_codes = np.array(self.index.codes[level], subok=False, copy=True)
1725 mask = level_codes == -1
1726 if mask.any():
1727 level_codes[mask] = cnt = len(lev)
1728 lev = lev.insert(cnt, lev._na_value)
1730 obs = level_codes[notna(self.values)]
1731 out = np.bincount(obs, minlength=len(lev) or None)
1732 return self._constructor(out, index=lev, dtype="int64").__finalize__(self)
1734 def mode(self, dropna=True):
1735 """
1736 Return the mode(s) of the dataset.
1738 Always returns Series even if only one value is returned.
1740 Parameters
1741 ----------
1742 dropna : bool, default True
1743 Don't consider counts of NaN/NaT.
1745 .. versionadded:: 0.24.0
1747 Returns
1748 -------
1749 Series
1750 Modes of the Series in sorted order.
1751 """
1752 # TODO: Add option for bins like value_counts()
1753 return algorithms.mode(self, dropna=dropna)
1755 def unique(self):
1756 """
1757 Return unique values of Series object.
1759 Uniques are returned in order of appearance. Hash table-based unique,
1760 therefore does NOT sort.
1762 Returns
1763 -------
1764 ndarray or ExtensionArray
1765 The unique values returned as a NumPy array. See Notes.
1767 See Also
1768 --------
1769 unique : Top-level unique method for any 1-d array-like object.
1770 Index.unique : Return Index with unique values from an Index object.
1772 Notes
1773 -----
1774 Returns the unique values as a NumPy array. In case of an
1775 extension-array backed Series, a new
1776 :class:`~api.extensions.ExtensionArray` of that type with just
1777 the unique values is returned. This includes
1779 * Categorical
1780 * Period
1781 * Datetime with Timezone
1782 * Interval
1783 * Sparse
1784 * IntegerNA
1786 See Examples section.
1788 Examples
1789 --------
1790 >>> pd.Series([2, 1, 3, 3], name='A').unique()
1791 array([2, 1, 3])
1793 >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
1794 array(['2016-01-01T00:00:00.000000000'], dtype='datetime64[ns]')
1796 >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
1797 ... for _ in range(3)]).unique()
1798 <DatetimeArray>
1799 ['2016-01-01 00:00:00-05:00']
1800 Length: 1, dtype: datetime64[ns, US/Eastern]
1802 An unordered Categorical will return categories in the order of
1803 appearance.
1805 >>> pd.Series(pd.Categorical(list('baabc'))).unique()
1806 [b, a, c]
1807 Categories (3, object): [b, a, c]
1809 An ordered Categorical preserves the category ordering.
1811 >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
1812 ... ordered=True)).unique()
1813 [b, a, c]
1814 Categories (3, object): [a < b < c]
1815 """
1816 result = super().unique()
1817 return result
1819 def drop_duplicates(self, keep="first", inplace=False):
1820 """
1821 Return Series with duplicate values removed.
1823 Parameters
1824 ----------
1825 keep : {'first', 'last', ``False``}, default 'first'
1826 Method to handle dropping duplicates:
1828 - 'first' : Drop duplicates except for the first occurrence.
1829 - 'last' : Drop duplicates except for the last occurrence.
1830 - ``False`` : Drop all duplicates.
1832 inplace : bool, default ``False``
1833 If ``True``, performs operation inplace and returns None.
1835 Returns
1836 -------
1837 Series
1838 Series with duplicates dropped.
1840 See Also
1841 --------
1842 Index.drop_duplicates : Equivalent method on Index.
1843 DataFrame.drop_duplicates : Equivalent method on DataFrame.
1844 Series.duplicated : Related method on Series, indicating duplicate
1845 Series values.
1847 Examples
1848 --------
1849 Generate a Series with duplicated entries.
1851 >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
1852 ... name='animal')
1853 >>> s
1854 0 lama
1855 1 cow
1856 2 lama
1857 3 beetle
1858 4 lama
1859 5 hippo
1860 Name: animal, dtype: object
1862 With the 'keep' parameter, the selection behaviour of duplicated values
1863 can be changed. The value 'first' keeps the first occurrence for each
1864 set of duplicated entries. The default value of keep is 'first'.
1866 >>> s.drop_duplicates()
1867 0 lama
1868 1 cow
1869 3 beetle
1870 5 hippo
1871 Name: animal, dtype: object
1873 The value 'last' for parameter 'keep' keeps the last occurrence for
1874 each set of duplicated entries.
1876 >>> s.drop_duplicates(keep='last')
1877 1 cow
1878 3 beetle
1879 4 lama
1880 5 hippo
1881 Name: animal, dtype: object
1883 The value ``False`` for parameter 'keep' discards all sets of
1884 duplicated entries. Setting the value of 'inplace' to ``True`` performs
1885 the operation inplace and returns ``None``.
1887 >>> s.drop_duplicates(keep=False, inplace=True)
1888 >>> s
1889 1 cow
1890 3 beetle
1891 5 hippo
1892 Name: animal, dtype: object
1893 """
1894 return super().drop_duplicates(keep=keep, inplace=inplace)
1896 def duplicated(self, keep="first"):
1897 """
1898 Indicate duplicate Series values.
1900 Duplicated values are indicated as ``True`` values in the resulting
1901 Series. Either all duplicates, all except the first or all except the
1902 last occurrence of duplicates can be indicated.
1904 Parameters
1905 ----------
1906 keep : {'first', 'last', False}, default 'first'
1907 Method to handle dropping duplicates:
1909 - 'first' : Mark duplicates as ``True`` except for the first
1910 occurrence.
1911 - 'last' : Mark duplicates as ``True`` except for the last
1912 occurrence.
1913 - ``False`` : Mark all duplicates as ``True``.
1915 Returns
1916 -------
1917 Series
1918 Series indicating whether each value has occurred in the
1919 preceding values.
1921 See Also
1922 --------
1923 Index.duplicated : Equivalent method on pandas.Index.
1924 DataFrame.duplicated : Equivalent method on pandas.DataFrame.
1925 Series.drop_duplicates : Remove duplicate values from Series.
1927 Examples
1928 --------
1929 By default, for each set of duplicated values, the first occurrence is
1930 set on False and all others on True:
1932 >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
1933 >>> animals.duplicated()
1934 0 False
1935 1 False
1936 2 True
1937 3 False
1938 4 True
1939 dtype: bool
1941 which is equivalent to
1943 >>> animals.duplicated(keep='first')
1944 0 False
1945 1 False
1946 2 True
1947 3 False
1948 4 True
1949 dtype: bool
1951 By using 'last', the last occurrence of each set of duplicated values
1952 is set on False and all others on True:
1954 >>> animals.duplicated(keep='last')
1955 0 True
1956 1 False
1957 2 True
1958 3 False
1959 4 False
1960 dtype: bool
1962 By setting keep on ``False``, all duplicates are True:
1964 >>> animals.duplicated(keep=False)
1965 0 True
1966 1 False
1967 2 True
1968 3 False
1969 4 True
1970 dtype: bool
1971 """
1972 return super().duplicated(keep=keep)
1974 def idxmin(self, axis=0, skipna=True, *args, **kwargs):
1975 """
1976 Return the row label of the minimum value.
1978 If multiple values equal the minimum, the first row label with that
1979 value is returned.
1981 Parameters
1982 ----------
1983 axis : int, default 0
1984 For compatibility with DataFrame.idxmin. Redundant for application
1985 on Series.
1986 skipna : bool, default True
1987 Exclude NA/null values. If the entire Series is NA, the result
1988 will be NA.
1989 *args, **kwargs
1990 Additional arguments and keywords have no effect but might be
1991 accepted for compatibility with NumPy.
1993 Returns
1994 -------
1995 Index
1996 Label of the minimum value.
1998 Raises
1999 ------
2000 ValueError
2001 If the Series is empty.
2003 See Also
2004 --------
2005 numpy.argmin : Return indices of the minimum values
2006 along the given axis.
2007 DataFrame.idxmin : Return index of first occurrence of minimum
2008 over requested axis.
2009 Series.idxmax : Return index *label* of the first occurrence
2010 of maximum of values.
2012 Notes
2013 -----
2014 This method is the Series version of ``ndarray.argmin``. This method
2015 returns the label of the minimum, while ``ndarray.argmin`` returns
2016 the position. To get the position, use ``series.values.argmin()``.
2018 Examples
2019 --------
2020 >>> s = pd.Series(data=[1, None, 4, 1],
2021 ... index=['A', 'B', 'C', 'D'])
2022 >>> s
2023 A 1.0
2024 B NaN
2025 C 4.0
2026 D 1.0
2027 dtype: float64
2029 >>> s.idxmin()
2030 'A'
2032 If `skipna` is False and there is an NA value in the data,
2033 the function returns ``nan``.
2035 >>> s.idxmin(skipna=False)
2036 nan
2037 """
2038 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
2039 i = nanops.nanargmin(com.values_from_object(self), skipna=skipna)
2040 if i == -1:
2041 return np.nan
2042 return self.index[i]
2044 def idxmax(self, axis=0, skipna=True, *args, **kwargs):
2045 """
2046 Return the row label of the maximum value.
2048 If multiple values equal the maximum, the first row label with that
2049 value is returned.
2051 Parameters
2052 ----------
2053 axis : int, default 0
2054 For compatibility with DataFrame.idxmax. Redundant for application
2055 on Series.
2056 skipna : bool, default True
2057 Exclude NA/null values. If the entire Series is NA, the result
2058 will be NA.
2059 *args, **kwargs
2060 Additional arguments and keywords have no effect but might be
2061 accepted for compatibility with NumPy.
2063 Returns
2064 -------
2065 Index
2066 Label of the maximum value.
2068 Raises
2069 ------
2070 ValueError
2071 If the Series is empty.
2073 See Also
2074 --------
2075 numpy.argmax : Return indices of the maximum values
2076 along the given axis.
2077 DataFrame.idxmax : Return index of first occurrence of maximum
2078 over requested axis.
2079 Series.idxmin : Return index *label* of the first occurrence
2080 of minimum of values.
2082 Notes
2083 -----
2084 This method is the Series version of ``ndarray.argmax``. This method
2085 returns the label of the maximum, while ``ndarray.argmax`` returns
2086 the position. To get the position, use ``series.values.argmax()``.
2088 Examples
2089 --------
2090 >>> s = pd.Series(data=[1, None, 4, 3, 4],
2091 ... index=['A', 'B', 'C', 'D', 'E'])
2092 >>> s
2093 A 1.0
2094 B NaN
2095 C 4.0
2096 D 3.0
2097 E 4.0
2098 dtype: float64
2100 >>> s.idxmax()
2101 'C'
2103 If `skipna` is False and there is an NA value in the data,
2104 the function returns ``nan``.
2106 >>> s.idxmax(skipna=False)
2107 nan
2108 """
2109 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
2110 i = nanops.nanargmax(com.values_from_object(self), skipna=skipna)
2111 if i == -1:
2112 return np.nan
2113 return self.index[i]
2115 def round(self, decimals=0, *args, **kwargs):
2116 """
2117 Round each value in a Series to the given number of decimals.
2119 Parameters
2120 ----------
2121 decimals : int, default 0
2122 Number of decimal places to round to. If decimals is negative,
2123 it specifies the number of positions to the left of the decimal point.
2125 Returns
2126 -------
2127 Series
2128 Rounded values of the Series.
2130 See Also
2131 --------
2132 numpy.around : Round values of an np.array.
2133 DataFrame.round : Round values of a DataFrame.
2135 Examples
2136 --------
2137 >>> s = pd.Series([0.1, 1.3, 2.7])
2138 >>> s.round()
2139 0 0.0
2140 1 1.0
2141 2 3.0
2142 dtype: float64
2143 """
2144 nv.validate_round(args, kwargs)
2145 result = com.values_from_object(self).round(decimals)
2146 result = self._constructor(result, index=self.index).__finalize__(self)
2148 return result
2150 def quantile(self, q=0.5, interpolation="linear"):
2151 """
2152 Return value at the given quantile.
2154 Parameters
2155 ----------
2156 q : float or array-like, default 0.5 (50% quantile)
2157 The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
2158 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
2159 This optional parameter specifies the interpolation method to use,
2160 when the desired quantile lies between two data points `i` and `j`:
2162 * linear: `i + (j - i) * fraction`, where `fraction` is the
2163 fractional part of the index surrounded by `i` and `j`.
2164 * lower: `i`.
2165 * higher: `j`.
2166 * nearest: `i` or `j` whichever is nearest.
2167 * midpoint: (`i` + `j`) / 2.
2169 Returns
2170 -------
2171 float or Series
2172 If ``q`` is an array, a Series will be returned where the
2173 index is ``q`` and the values are the quantiles, otherwise
2174 a float will be returned.
2176 See Also
2177 --------
2178 core.window.Rolling.quantile
2179 numpy.percentile
2181 Examples
2182 --------
2183 >>> s = pd.Series([1, 2, 3, 4])
2184 >>> s.quantile(.5)
2185 2.5
2186 >>> s.quantile([.25, .5, .75])
2187 0.25 1.75
2188 0.50 2.50
2189 0.75 3.25
2190 dtype: float64
2191 """
2193 validate_percentile(q)
2195 # We dispatch to DataFrame so that core.internals only has to worry
2196 # about 2D cases.
2197 df = self.to_frame()
2199 result = df.quantile(q=q, interpolation=interpolation, numeric_only=False)
2200 if result.ndim == 2:
2201 result = result.iloc[:, 0]
2203 if is_list_like(q):
2204 result.name = self.name
2205 return self._constructor(result, index=Float64Index(q), name=self.name)
2206 else:
2207 # scalar
2208 return result.iloc[0]
2210 def corr(self, other, method="pearson", min_periods=None):
2211 """
2212 Compute correlation with `other` Series, excluding missing values.
2214 Parameters
2215 ----------
2216 other : Series
2217 Series with which to compute the correlation.
2218 method : {'pearson', 'kendall', 'spearman'} or callable
2219 Method used to compute correlation:
2221 - pearson : Standard correlation coefficient
2222 - kendall : Kendall Tau correlation coefficient
2223 - spearman : Spearman rank correlation
2224 - callable: Callable with input two 1d ndarrays and returning a float.
2226 .. versionadded:: 0.24.0
2227 Note that the returned matrix from corr will have 1 along the
2228 diagonals and will be symmetric regardless of the callable's
2229 behavior.
2230 min_periods : int, optional
2231 Minimum number of observations needed to have a valid result.
2233 Returns
2234 -------
2235 float
2236 Correlation with other.
2238 Examples
2239 --------
2240 >>> def histogram_intersection(a, b):
2241 ... v = np.minimum(a, b).sum().round(decimals=1)
2242 ... return v
2243 >>> s1 = pd.Series([.2, .0, .6, .2])
2244 >>> s2 = pd.Series([.3, .6, .0, .1])
2245 >>> s1.corr(s2, method=histogram_intersection)
2246 0.3
2247 """
2248 this, other = self.align(other, join="inner", copy=False)
2249 if len(this) == 0:
2250 return np.nan
2252 if method in ["pearson", "spearman", "kendall"] or callable(method):
2253 return nanops.nancorr(
2254 this.values, other.values, method=method, min_periods=min_periods
2255 )
2257 raise ValueError(
2258 "method must be either 'pearson', "
2259 "'spearman', 'kendall', or a callable, "
2260 f"'{method}' was supplied"
2261 )
2263 def cov(self, other, min_periods=None):
2264 """
2265 Compute covariance with Series, excluding missing values.
2267 Parameters
2268 ----------
2269 other : Series
2270 Series with which to compute the covariance.
2271 min_periods : int, optional
2272 Minimum number of observations needed to have a valid result.
2274 Returns
2275 -------
2276 float
2277 Covariance between Series and other normalized by N-1
2278 (unbiased estimator).
2280 Examples
2281 --------
2282 >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
2283 >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
2284 >>> s1.cov(s2)
2285 -0.01685762652715874
2286 """
2287 this, other = self.align(other, join="inner", copy=False)
2288 if len(this) == 0:
2289 return np.nan
2290 return nanops.nancov(this.values, other.values, min_periods=min_periods)
2292 def diff(self, periods=1):
2293 """
2294 First discrete difference of element.
2296 Calculates the difference of a Series element compared with another
2297 element in the Series (default is element in previous row).
2299 Parameters
2300 ----------
2301 periods : int, default 1
2302 Periods to shift for calculating difference, accepts negative
2303 values.
2305 Returns
2306 -------
2307 Series
2308 First differences of the Series.
2310 See Also
2311 --------
2312 Series.pct_change: Percent change over given number of periods.
2313 Series.shift: Shift index by desired number of periods with an
2314 optional time freq.
2315 DataFrame.diff: First discrete difference of object.
2317 Notes
2318 -----
2319 For boolean dtypes, this uses :meth:`operator.xor` rather than
2320 :meth:`operator.sub`.
2322 Examples
2323 --------
2324 Difference with previous row
2326 >>> s = pd.Series([1, 1, 2, 3, 5, 8])
2327 >>> s.diff()
2328 0 NaN
2329 1 0.0
2330 2 1.0
2331 3 1.0
2332 4 2.0
2333 5 3.0
2334 dtype: float64
2336 Difference with 3rd previous row
2338 >>> s.diff(periods=3)
2339 0 NaN
2340 1 NaN
2341 2 NaN
2342 3 2.0
2343 4 4.0
2344 5 6.0
2345 dtype: float64
2347 Difference with following row
2349 >>> s.diff(periods=-1)
2350 0 0.0
2351 1 -1.0
2352 2 -1.0
2353 3 -2.0
2354 4 -3.0
2355 5 NaN
2356 dtype: float64
2357 """
2358 result = algorithms.diff(self.array, periods)
2359 return self._constructor(result, index=self.index).__finalize__(self)
2361 def autocorr(self, lag=1):
2362 """
2363 Compute the lag-N autocorrelation.
2365 This method computes the Pearson correlation between
2366 the Series and its shifted self.
2368 Parameters
2369 ----------
2370 lag : int, default 1
2371 Number of lags to apply before performing autocorrelation.
2373 Returns
2374 -------
2375 float
2376 The Pearson correlation between self and self.shift(lag).
2378 See Also
2379 --------
2380 Series.corr : Compute the correlation between two Series.
2381 Series.shift : Shift index by desired number of periods.
2382 DataFrame.corr : Compute pairwise correlation of columns.
2383 DataFrame.corrwith : Compute pairwise correlation between rows or
2384 columns of two DataFrame objects.
2386 Notes
2387 -----
2388 If the Pearson correlation is not well defined return 'NaN'.
2390 Examples
2391 --------
2392 >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
2393 >>> s.autocorr() # doctest: +ELLIPSIS
2394 0.10355...
2395 >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
2396 -0.99999...
2398 If the Pearson correlation is not well defined, then 'NaN' is returned.
2400 >>> s = pd.Series([1, 0, 0, 0])
2401 >>> s.autocorr()
2402 nan
2403 """
2404 return self.corr(self.shift(lag))
2406 def dot(self, other):
2407 """
2408 Compute the dot product between the Series and the columns of other.
2410 This method computes the dot product between the Series and another
2411 one, or the Series and each columns of a DataFrame, or the Series and
2412 each columns of an array.
2414 It can also be called using `self @ other` in Python >= 3.5.
2416 Parameters
2417 ----------
2418 other : Series, DataFrame or array-like
2419 The other object to compute the dot product with its columns.
2421 Returns
2422 -------
2423 scalar, Series or numpy.ndarray
2424 Return the dot product of the Series and other if other is a
2425 Series, the Series of the dot product of Series and each rows of
2426 other if other is a DataFrame or a numpy.ndarray between the Series
2427 and each columns of the numpy array.
2429 See Also
2430 --------
2431 DataFrame.dot: Compute the matrix product with the DataFrame.
2432 Series.mul: Multiplication of series and other, element-wise.
2434 Notes
2435 -----
2436 The Series and other has to share the same index if other is a Series
2437 or a DataFrame.
2439 Examples
2440 --------
2441 >>> s = pd.Series([0, 1, 2, 3])
2442 >>> other = pd.Series([-1, 2, -3, 4])
2443 >>> s.dot(other)
2444 8
2445 >>> s @ other
2446 8
2447 >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
2448 >>> s.dot(df)
2449 0 24
2450 1 14
2451 dtype: int64
2452 >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
2453 >>> s.dot(arr)
2454 array([24, 14])
2455 """
2456 if isinstance(other, (Series, ABCDataFrame)):
2457 common = self.index.union(other.index)
2458 if len(common) > len(self.index) or len(common) > len(other.index):
2459 raise ValueError("matrices are not aligned")
2461 left = self.reindex(index=common, copy=False)
2462 right = other.reindex(index=common, copy=False)
2463 lvals = left.values
2464 rvals = right.values
2465 else:
2466 lvals = self.values
2467 rvals = np.asarray(other)
2468 if lvals.shape[0] != rvals.shape[0]:
2469 raise Exception(
2470 f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
2471 )
2473 if isinstance(other, ABCDataFrame):
2474 return self._constructor(
2475 np.dot(lvals, rvals), index=other.columns
2476 ).__finalize__(self)
2477 elif isinstance(other, Series):
2478 return np.dot(lvals, rvals)
2479 elif isinstance(rvals, np.ndarray):
2480 return np.dot(lvals, rvals)
2481 else: # pragma: no cover
2482 raise TypeError(f"unsupported type: {type(other)}")
2484 def __matmul__(self, other):
2485 """
2486 Matrix multiplication using binary `@` operator in Python>=3.5.
2487 """
2488 return self.dot(other)
2490 def __rmatmul__(self, other):
2491 """
2492 Matrix multiplication using binary `@` operator in Python>=3.5.
2493 """
2494 return self.dot(np.transpose(other))
2496 @Substitution(klass="Series")
2497 @Appender(base._shared_docs["searchsorted"])
2498 def searchsorted(self, value, side="left", sorter=None):
2499 return algorithms.searchsorted(self._values, value, side=side, sorter=sorter)
2501 # -------------------------------------------------------------------
2502 # Combination
2504 def append(self, to_append, ignore_index=False, verify_integrity=False):
2505 """
2506 Concatenate two or more Series.
2508 Parameters
2509 ----------
2510 to_append : Series or list/tuple of Series
2511 Series to append with self.
2512 ignore_index : bool, default False
2513 If True, do not use the index labels.
2514 verify_integrity : bool, default False
2515 If True, raise Exception on creating index with duplicates.
2517 Returns
2518 -------
2519 Series
2520 Concatenated Series.
2522 See Also
2523 --------
2524 concat : General function to concatenate DataFrame or Series objects.
2526 Notes
2527 -----
2528 Iteratively appending to a Series can be more computationally intensive
2529 than a single concatenate. A better solution is to append values to a
2530 list and then concatenate the list with the original Series all at
2531 once.
2533 Examples
2534 --------
2535 >>> s1 = pd.Series([1, 2, 3])
2536 >>> s2 = pd.Series([4, 5, 6])
2537 >>> s3 = pd.Series([4, 5, 6], index=[3, 4, 5])
2538 >>> s1.append(s2)
2539 0 1
2540 1 2
2541 2 3
2542 0 4
2543 1 5
2544 2 6
2545 dtype: int64
2547 >>> s1.append(s3)
2548 0 1
2549 1 2
2550 2 3
2551 3 4
2552 4 5
2553 5 6
2554 dtype: int64
2556 With `ignore_index` set to True:
2558 >>> s1.append(s2, ignore_index=True)
2559 0 1
2560 1 2
2561 2 3
2562 3 4
2563 4 5
2564 5 6
2565 dtype: int64
2567 With `verify_integrity` set to True:
2569 >>> s1.append(s2, verify_integrity=True)
2570 Traceback (most recent call last):
2571 ...
2572 ValueError: Indexes have overlapping values: [0, 1, 2]
2573 """
2574 from pandas.core.reshape.concat import concat
2576 if isinstance(to_append, (list, tuple)):
2577 to_concat = [self]
2578 to_concat.extend(to_append)
2579 else:
2580 to_concat = [self, to_append]
2581 return concat(
2582 to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
2583 )
2585 def _binop(self, other, func, level=None, fill_value=None):
2586 """
2587 Perform generic binary operation with optional fill value.
2589 Parameters
2590 ----------
2591 other : Series
2592 func : binary operator
2593 fill_value : float or object
2594 Value to substitute for NA/null values. If both Series are NA in a
2595 location, the result will be NA regardless of the passed fill value.
2596 level : int or level name, default None
2597 Broadcast across a level, matching Index values on the
2598 passed MultiIndex level.
2600 Returns
2601 -------
2602 Series
2603 """
2605 if not isinstance(other, Series):
2606 raise AssertionError("Other operand must be Series")
2608 new_index = self.index
2609 this = self
2611 if not self.index.equals(other.index):
2612 this, other = self.align(other, level=level, join="outer", copy=False)
2613 new_index = this.index
2615 this_vals, other_vals = ops.fill_binop(this.values, other.values, fill_value)
2617 with np.errstate(all="ignore"):
2618 result = func(this_vals, other_vals)
2620 name = ops.get_op_result_name(self, other)
2621 ret = ops._construct_result(self, result, new_index, name)
2622 return ret
2624 def combine(self, other, func, fill_value=None):
2625 """
2626 Combine the Series with a Series or scalar according to `func`.
2628 Combine the Series and `other` using `func` to perform elementwise
2629 selection for combined Series.
2630 `fill_value` is assumed when value is missing at some index
2631 from one of the two objects being combined.
2633 Parameters
2634 ----------
2635 other : Series or scalar
2636 The value(s) to be combined with the `Series`.
2637 func : function
2638 Function that takes two scalars as inputs and returns an element.
2639 fill_value : scalar, optional
2640 The value to assume when an index is missing from
2641 one Series or the other. The default specifies to use the
2642 appropriate NaN value for the underlying dtype of the Series.
2644 Returns
2645 -------
2646 Series
2647 The result of combining the Series with the other object.
2649 See Also
2650 --------
2651 Series.combine_first : Combine Series values, choosing the calling
2652 Series' values first.
2654 Examples
2655 --------
2656 Consider 2 Datasets ``s1`` and ``s2`` containing
2657 highest clocked speeds of different birds.
2659 >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
2660 >>> s1
2661 falcon 330.0
2662 eagle 160.0
2663 dtype: float64
2664 >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
2665 >>> s2
2666 falcon 345.0
2667 eagle 200.0
2668 duck 30.0
2669 dtype: float64
2671 Now, to combine the two datasets and view the highest speeds
2672 of the birds across the two datasets
2674 >>> s1.combine(s2, max)
2675 duck NaN
2676 eagle 200.0
2677 falcon 345.0
2678 dtype: float64
2680 In the previous example, the resulting value for duck is missing,
2681 because the maximum of a NaN and a float is a NaN.
2682 So, in the example, we set ``fill_value=0``,
2683 so the maximum value returned will be the value from some dataset.
2685 >>> s1.combine(s2, max, fill_value=0)
2686 duck 30.0
2687 eagle 200.0
2688 falcon 345.0
2689 dtype: float64
2690 """
2691 if fill_value is None:
2692 fill_value = na_value_for_dtype(self.dtype, compat=False)
2694 if isinstance(other, Series):
2695 # If other is a Series, result is based on union of Series,
2696 # so do this element by element
2697 new_index = self.index.union(other.index)
2698 new_name = ops.get_op_result_name(self, other)
2699 new_values = []
2700 for idx in new_index:
2701 lv = self.get(idx, fill_value)
2702 rv = other.get(idx, fill_value)
2703 with np.errstate(all="ignore"):
2704 new_values.append(func(lv, rv))
2705 else:
2706 # Assume that other is a scalar, so apply the function for
2707 # each element in the Series
2708 new_index = self.index
2709 with np.errstate(all="ignore"):
2710 new_values = [func(lv, other) for lv in self._values]
2711 new_name = self.name
2713 if is_categorical_dtype(self.values):
2714 pass
2715 elif is_extension_array_dtype(self.values):
2716 # The function can return something of any type, so check
2717 # if the type is compatible with the calling EA.
2718 new_values = try_cast_to_ea(self._values, new_values)
2719 return self._constructor(new_values, index=new_index, name=new_name)
2721 def combine_first(self, other):
2722 """
2723 Combine Series values, choosing the calling Series's values first.
2725 Parameters
2726 ----------
2727 other : Series
2728 The value(s) to be combined with the `Series`.
2730 Returns
2731 -------
2732 Series
2733 The result of combining the Series with the other object.
2735 See Also
2736 --------
2737 Series.combine : Perform elementwise operation on two Series
2738 using a given function.
2740 Notes
2741 -----
2742 Result index will be the union of the two indexes.
2744 Examples
2745 --------
2746 >>> s1 = pd.Series([1, np.nan])
2747 >>> s2 = pd.Series([3, 4])
2748 >>> s1.combine_first(s2)
2749 0 1.0
2750 1 4.0
2751 dtype: float64
2752 """
2753 new_index = self.index.union(other.index)
2754 this = self.reindex(new_index, copy=False)
2755 other = other.reindex(new_index, copy=False)
2756 if this.dtype.kind == "M" and other.dtype.kind != "M":
2757 other = to_datetime(other)
2759 return this.where(notna(this), other)
2761 def update(self, other):
2762 """
2763 Modify Series in place using non-NA values from passed
2764 Series. Aligns on index.
2766 Parameters
2767 ----------
2768 other : Series
2770 Examples
2771 --------
2772 >>> s = pd.Series([1, 2, 3])
2773 >>> s.update(pd.Series([4, 5, 6]))
2774 >>> s
2775 0 4
2776 1 5
2777 2 6
2778 dtype: int64
2780 >>> s = pd.Series(['a', 'b', 'c'])
2781 >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
2782 >>> s
2783 0 d
2784 1 b
2785 2 e
2786 dtype: object
2788 >>> s = pd.Series([1, 2, 3])
2789 >>> s.update(pd.Series([4, 5, 6, 7, 8]))
2790 >>> s
2791 0 4
2792 1 5
2793 2 6
2794 dtype: int64
2796 If ``other`` contains NaNs the corresponding values are not updated
2797 in the original Series.
2799 >>> s = pd.Series([1, 2, 3])
2800 >>> s.update(pd.Series([4, np.nan, 6]))
2801 >>> s
2802 0 4
2803 1 2
2804 2 6
2805 dtype: int64
2806 """
2807 other = other.reindex_like(self)
2808 mask = notna(other)
2810 self._data = self._data.putmask(mask=mask, new=other, inplace=True)
2811 self._maybe_update_cacher()
2813 # ----------------------------------------------------------------------
2814 # Reindexing, sorting
2816 def sort_values(
2817 self,
2818 axis=0,
2819 ascending=True,
2820 inplace=False,
2821 kind="quicksort",
2822 na_position="last",
2823 ignore_index=False,
2824 ):
2825 """
2826 Sort by the values.
2828 Sort a Series in ascending or descending order by some
2829 criterion.
2831 Parameters
2832 ----------
2833 axis : {0 or 'index'}, default 0
2834 Axis to direct sorting. The value 'index' is accepted for
2835 compatibility with DataFrame.sort_values.
2836 ascending : bool, default True
2837 If True, sort values in ascending order, otherwise descending.
2838 inplace : bool, default False
2839 If True, perform operation in-place.
2840 kind : {'quicksort', 'mergesort' or 'heapsort'}, default 'quicksort'
2841 Choice of sorting algorithm. See also :func:`numpy.sort` for more
2842 information. 'mergesort' is the only stable algorithm.
2843 na_position : {'first' or 'last'}, default 'last'
2844 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
2845 the end.
2846 ignore_index : bool, default False
2847 If True, the resulting axis will be labeled 0, 1, …, n - 1.
2849 .. versionadded:: 1.0.0
2851 Returns
2852 -------
2853 Series
2854 Series ordered by values.
2856 See Also
2857 --------
2858 Series.sort_index : Sort by the Series indices.
2859 DataFrame.sort_values : Sort DataFrame by the values along either axis.
2860 DataFrame.sort_index : Sort DataFrame by indices.
2862 Examples
2863 --------
2864 >>> s = pd.Series([np.nan, 1, 3, 10, 5])
2865 >>> s
2866 0 NaN
2867 1 1.0
2868 2 3.0
2869 3 10.0
2870 4 5.0
2871 dtype: float64
2873 Sort values ascending order (default behaviour)
2875 >>> s.sort_values(ascending=True)
2876 1 1.0
2877 2 3.0
2878 4 5.0
2879 3 10.0
2880 0 NaN
2881 dtype: float64
2883 Sort values descending order
2885 >>> s.sort_values(ascending=False)
2886 3 10.0
2887 4 5.0
2888 2 3.0
2889 1 1.0
2890 0 NaN
2891 dtype: float64
2893 Sort values inplace
2895 >>> s.sort_values(ascending=False, inplace=True)
2896 >>> s
2897 3 10.0
2898 4 5.0
2899 2 3.0
2900 1 1.0
2901 0 NaN
2902 dtype: float64
2904 Sort values putting NAs first
2906 >>> s.sort_values(na_position='first')
2907 0 NaN
2908 1 1.0
2909 2 3.0
2910 4 5.0
2911 3 10.0
2912 dtype: float64
2914 Sort a series of strings
2916 >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
2917 >>> s
2918 0 z
2919 1 b
2920 2 d
2921 3 a
2922 4 c
2923 dtype: object
2925 >>> s.sort_values()
2926 3 a
2927 1 b
2928 4 c
2929 2 d
2930 0 z
2931 dtype: object
2932 """
2933 inplace = validate_bool_kwarg(inplace, "inplace")
2934 # Validate the axis parameter
2935 self._get_axis_number(axis)
2937 # GH 5856/5853
2938 if inplace and self._is_cached:
2939 raise ValueError(
2940 "This Series is a view of some other array, to "
2941 "sort in-place you must create a copy"
2942 )
2944 def _try_kind_sort(arr):
2945 # easier to ask forgiveness than permission
2946 try:
2947 # if kind==mergesort, it can fail for object dtype
2948 return arr.argsort(kind=kind)
2949 except TypeError:
2950 # stable sort not available for object dtype
2951 # uses the argsort default quicksort
2952 return arr.argsort(kind="quicksort")
2954 arr = self._values
2955 sorted_index = np.empty(len(self), dtype=np.int32)
2957 bad = isna(arr)
2959 good = ~bad
2960 idx = ibase.default_index(len(self))
2962 argsorted = _try_kind_sort(arr[good])
2964 if is_list_like(ascending):
2965 if len(ascending) != 1:
2966 raise ValueError(
2967 f"Length of ascending ({len(ascending)}) must be 1 for Series"
2968 )
2969 ascending = ascending[0]
2971 if not is_bool(ascending):
2972 raise ValueError("ascending must be boolean")
2974 if not ascending:
2975 argsorted = argsorted[::-1]
2977 if na_position == "last":
2978 n = good.sum()
2979 sorted_index[:n] = idx[good][argsorted]
2980 sorted_index[n:] = idx[bad]
2981 elif na_position == "first":
2982 n = bad.sum()
2983 sorted_index[n:] = idx[good][argsorted]
2984 sorted_index[:n] = idx[bad]
2985 else:
2986 raise ValueError(f"invalid na_position: {na_position}")
2988 result = self._constructor(arr[sorted_index], index=self.index[sorted_index])
2990 if ignore_index:
2991 result.index = ibase.default_index(len(sorted_index))
2993 if inplace:
2994 self._update_inplace(result)
2995 else:
2996 return result.__finalize__(self)
2998 def sort_index(
2999 self,
3000 axis=0,
3001 level=None,
3002 ascending=True,
3003 inplace=False,
3004 kind="quicksort",
3005 na_position="last",
3006 sort_remaining=True,
3007 ignore_index: bool = False,
3008 ):
3009 """
3010 Sort Series by index labels.
3012 Returns a new Series sorted by label if `inplace` argument is
3013 ``False``, otherwise updates the original series and returns None.
3015 Parameters
3016 ----------
3017 axis : int, default 0
3018 Axis to direct sorting. This can only be 0 for Series.
3019 level : int, optional
3020 If not None, sort on values in specified index level(s).
3021 ascending : bool, default true
3022 Sort ascending vs. descending.
3023 inplace : bool, default False
3024 If True, perform operation in-place.
3025 kind : {'quicksort', 'mergesort', 'heapsort'}, default 'quicksort'
3026 Choice of sorting algorithm. See also :func:`numpy.sort` for more
3027 information. 'mergesort' is the only stable algorithm. For
3028 DataFrames, this option is only applied when sorting on a single
3029 column or label.
3030 na_position : {'first', 'last'}, default 'last'
3031 If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
3032 Not implemented for MultiIndex.
3033 sort_remaining : bool, default True
3034 If True and sorting by level and index is multilevel, sort by other
3035 levels too (in order) after sorting by specified level.
3036 ignore_index : bool, default False
3037 If True, the resulting axis will be labeled 0, 1, …, n - 1.
3039 .. versionadded:: 1.0.0
3041 Returns
3042 -------
3043 Series
3044 The original Series sorted by the labels.
3046 See Also
3047 --------
3048 DataFrame.sort_index: Sort DataFrame by the index.
3049 DataFrame.sort_values: Sort DataFrame by the value.
3050 Series.sort_values : Sort Series by the value.
3052 Examples
3053 --------
3054 >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
3055 >>> s.sort_index()
3056 1 c
3057 2 b
3058 3 a
3059 4 d
3060 dtype: object
3062 Sort Descending
3064 >>> s.sort_index(ascending=False)
3065 4 d
3066 3 a
3067 2 b
3068 1 c
3069 dtype: object
3071 Sort Inplace
3073 >>> s.sort_index(inplace=True)
3074 >>> s
3075 1 c
3076 2 b
3077 3 a
3078 4 d
3079 dtype: object
3081 By default NaNs are put at the end, but use `na_position` to place
3082 them at the beginning
3084 >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
3085 >>> s.sort_index(na_position='first')
3086 NaN d
3087 1.0 c
3088 2.0 b
3089 3.0 a
3090 dtype: object
3092 Specify index level to sort
3094 >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
3095 ... 'baz', 'baz', 'bar', 'bar']),
3096 ... np.array(['two', 'one', 'two', 'one',
3097 ... 'two', 'one', 'two', 'one'])]
3098 >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
3099 >>> s.sort_index(level=1)
3100 bar one 8
3101 baz one 6
3102 foo one 4
3103 qux one 2
3104 bar two 7
3105 baz two 5
3106 foo two 3
3107 qux two 1
3108 dtype: int64
3110 Does not sort by remaining levels when sorting by levels
3112 >>> s.sort_index(level=1, sort_remaining=False)
3113 qux one 2
3114 foo one 4
3115 baz one 6
3116 bar one 8
3117 qux two 1
3118 foo two 3
3119 baz two 5
3120 bar two 7
3121 dtype: int64
3122 """
3123 # TODO: this can be combined with DataFrame.sort_index impl as
3124 # almost identical
3125 inplace = validate_bool_kwarg(inplace, "inplace")
3126 # Validate the axis parameter
3127 self._get_axis_number(axis)
3128 index = self.index
3130 if level is not None:
3131 new_index, indexer = index.sortlevel(
3132 level, ascending=ascending, sort_remaining=sort_remaining
3133 )
3134 elif isinstance(index, MultiIndex):
3135 from pandas.core.sorting import lexsort_indexer
3137 labels = index._sort_levels_monotonic()
3138 indexer = lexsort_indexer(
3139 labels._get_codes_for_sorting(),
3140 orders=ascending,
3141 na_position=na_position,
3142 )
3143 else:
3144 from pandas.core.sorting import nargsort
3146 # Check monotonic-ness before sort an index
3147 # GH11080
3148 if (ascending and index.is_monotonic_increasing) or (
3149 not ascending and index.is_monotonic_decreasing
3150 ):
3151 if inplace:
3152 return
3153 else:
3154 return self.copy()
3156 indexer = nargsort(
3157 index, kind=kind, ascending=ascending, na_position=na_position
3158 )
3160 indexer = ensure_platform_int(indexer)
3161 new_index = index.take(indexer)
3162 new_index = new_index._sort_levels_monotonic()
3164 new_values = self._values.take(indexer)
3165 result = self._constructor(new_values, index=new_index)
3167 if ignore_index:
3168 result.index = ibase.default_index(len(result))
3170 if inplace:
3171 self._update_inplace(result)
3172 else:
3173 return result.__finalize__(self)
3175 def argsort(self, axis=0, kind="quicksort", order=None):
3176 """
3177 Override ndarray.argsort. Argsorts the value, omitting NA/null values,
3178 and places the result in the same locations as the non-NA values.
3180 Parameters
3181 ----------
3182 axis : {0 or "index"}
3183 Has no effect but is accepted for compatibility with numpy.
3184 kind : {'mergesort', 'quicksort', 'heapsort'}, default 'quicksort'
3185 Choice of sorting algorithm. See np.sort for more
3186 information. 'mergesort' is the only stable algorithm.
3187 order : None
3188 Has no effect but is accepted for compatibility with numpy.
3190 Returns
3191 -------
3192 Series
3193 Positions of values within the sort order with -1 indicating
3194 nan values.
3196 See Also
3197 --------
3198 numpy.ndarray.argsort
3199 """
3200 values = self._values
3201 mask = isna(values)
3203 if mask.any():
3204 result = Series(-1, index=self.index, name=self.name, dtype="int64")
3205 notmask = ~mask
3206 result[notmask] = np.argsort(values[notmask], kind=kind)
3207 return self._constructor(result, index=self.index).__finalize__(self)
3208 else:
3209 return self._constructor(
3210 np.argsort(values, kind=kind), index=self.index, dtype="int64"
3211 ).__finalize__(self)
3213 def nlargest(self, n=5, keep="first"):
3214 """
3215 Return the largest `n` elements.
3217 Parameters
3218 ----------
3219 n : int, default 5
3220 Return this many descending sorted values.
3221 keep : {'first', 'last', 'all'}, default 'first'
3222 When there are duplicate values that cannot all fit in a
3223 Series of `n` elements:
3225 - ``first`` : return the first `n` occurrences in order
3226 of appearance.
3227 - ``last`` : return the last `n` occurrences in reverse
3228 order of appearance.
3229 - ``all`` : keep all occurrences. This can result in a Series of
3230 size larger than `n`.
3232 Returns
3233 -------
3234 Series
3235 The `n` largest values in the Series, sorted in decreasing order.
3237 See Also
3238 --------
3239 Series.nsmallest: Get the `n` smallest elements.
3240 Series.sort_values: Sort Series by values.
3241 Series.head: Return the first `n` rows.
3243 Notes
3244 -----
3245 Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
3246 relative to the size of the ``Series`` object.
3248 Examples
3249 --------
3250 >>> countries_population = {"Italy": 59000000, "France": 65000000,
3251 ... "Malta": 434000, "Maldives": 434000,
3252 ... "Brunei": 434000, "Iceland": 337000,
3253 ... "Nauru": 11300, "Tuvalu": 11300,
3254 ... "Anguilla": 11300, "Monserat": 5200}
3255 >>> s = pd.Series(countries_population)
3256 >>> s
3257 Italy 59000000
3258 France 65000000
3259 Malta 434000
3260 Maldives 434000
3261 Brunei 434000
3262 Iceland 337000
3263 Nauru 11300
3264 Tuvalu 11300
3265 Anguilla 11300
3266 Monserat 5200
3267 dtype: int64
3269 The `n` largest elements where ``n=5`` by default.
3271 >>> s.nlargest()
3272 France 65000000
3273 Italy 59000000
3274 Malta 434000
3275 Maldives 434000
3276 Brunei 434000
3277 dtype: int64
3279 The `n` largest elements where ``n=3``. Default `keep` value is 'first'
3280 so Malta will be kept.
3282 >>> s.nlargest(3)
3283 France 65000000
3284 Italy 59000000
3285 Malta 434000
3286 dtype: int64
3288 The `n` largest elements where ``n=3`` and keeping the last duplicates.
3289 Brunei will be kept since it is the last with value 434000 based on
3290 the index order.
3292 >>> s.nlargest(3, keep='last')
3293 France 65000000
3294 Italy 59000000
3295 Brunei 434000
3296 dtype: int64
3298 The `n` largest elements where ``n=3`` with all duplicates kept. Note
3299 that the returned Series has five elements due to the three duplicates.
3301 >>> s.nlargest(3, keep='all')
3302 France 65000000
3303 Italy 59000000
3304 Malta 434000
3305 Maldives 434000
3306 Brunei 434000
3307 dtype: int64
3308 """
3309 return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()
3311 def nsmallest(self, n=5, keep="first"):
3312 """
3313 Return the smallest `n` elements.
3315 Parameters
3316 ----------
3317 n : int, default 5
3318 Return this many ascending sorted values.
3319 keep : {'first', 'last', 'all'}, default 'first'
3320 When there are duplicate values that cannot all fit in a
3321 Series of `n` elements:
3323 - ``first`` : return the first `n` occurrences in order
3324 of appearance.
3325 - ``last`` : return the last `n` occurrences in reverse
3326 order of appearance.
3327 - ``all`` : keep all occurrences. This can result in a Series of
3328 size larger than `n`.
3330 Returns
3331 -------
3332 Series
3333 The `n` smallest values in the Series, sorted in increasing order.
3335 See Also
3336 --------
3337 Series.nlargest: Get the `n` largest elements.
3338 Series.sort_values: Sort Series by values.
3339 Series.head: Return the first `n` rows.
3341 Notes
3342 -----
3343 Faster than ``.sort_values().head(n)`` for small `n` relative to
3344 the size of the ``Series`` object.
3346 Examples
3347 --------
3348 >>> countries_population = {"Italy": 59000000, "France": 65000000,
3349 ... "Brunei": 434000, "Malta": 434000,
3350 ... "Maldives": 434000, "Iceland": 337000,
3351 ... "Nauru": 11300, "Tuvalu": 11300,
3352 ... "Anguilla": 11300, "Monserat": 5200}
3353 >>> s = pd.Series(countries_population)
3354 >>> s
3355 Italy 59000000
3356 France 65000000
3357 Brunei 434000
3358 Malta 434000
3359 Maldives 434000
3360 Iceland 337000
3361 Nauru 11300
3362 Tuvalu 11300
3363 Anguilla 11300
3364 Monserat 5200
3365 dtype: int64
3367 The `n` smallest elements where ``n=5`` by default.
3369 >>> s.nsmallest()
3370 Monserat 5200
3371 Nauru 11300
3372 Tuvalu 11300
3373 Anguilla 11300
3374 Iceland 337000
3375 dtype: int64
3377 The `n` smallest elements where ``n=3``. Default `keep` value is
3378 'first' so Nauru and Tuvalu will be kept.
3380 >>> s.nsmallest(3)
3381 Monserat 5200
3382 Nauru 11300
3383 Tuvalu 11300
3384 dtype: int64
3386 The `n` smallest elements where ``n=3`` and keeping the last
3387 duplicates. Anguilla and Tuvalu will be kept since they are the last
3388 with value 11300 based on the index order.
3390 >>> s.nsmallest(3, keep='last')
3391 Monserat 5200
3392 Anguilla 11300
3393 Tuvalu 11300
3394 dtype: int64
3396 The `n` smallest elements where ``n=3`` with all duplicates kept. Note
3397 that the returned Series has four elements due to the three duplicates.
3399 >>> s.nsmallest(3, keep='all')
3400 Monserat 5200
3401 Nauru 11300
3402 Tuvalu 11300
3403 Anguilla 11300
3404 dtype: int64
3405 """
3406 return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()
3408 def swaplevel(self, i=-2, j=-1, copy=True):
3409 """
3410 Swap levels i and j in a :class:`MultiIndex`.
3412 Default is to swap the two innermost levels of the index.
3414 Parameters
3415 ----------
3416 i, j : int, str
3417 Level of the indices to be swapped. Can pass level name as string.
3418 copy : bool, default True
3419 Whether to copy underlying data.
3421 Returns
3422 -------
3423 Series
3424 Series with levels swapped in MultiIndex.
3425 """
3426 new_index = self.index.swaplevel(i, j)
3427 return self._constructor(self._values, index=new_index, copy=copy).__finalize__(
3428 self
3429 )
3431 def reorder_levels(self, order):
3432 """
3433 Rearrange index levels using input order.
3435 May not drop or duplicate levels.
3437 Parameters
3438 ----------
3439 order : list of int representing new level order
3440 Reference level by number or key.
3442 Returns
3443 -------
3444 type of caller (new object)
3445 """
3446 if not isinstance(self.index, MultiIndex): # pragma: no cover
3447 raise Exception("Can only reorder levels on a hierarchical axis.")
3449 result = self.copy()
3450 result.index = result.index.reorder_levels(order)
3451 return result
3453 def explode(self) -> "Series":
3454 """
3455 Transform each element of a list-like to a row, replicating the
3456 index values.
3458 .. versionadded:: 0.25.0
3460 Returns
3461 -------
3462 Series
3463 Exploded lists to rows; index will be duplicated for these rows.
3465 See Also
3466 --------
3467 Series.str.split : Split string values on specified separator.
3468 Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
3469 to produce DataFrame.
3470 DataFrame.melt : Unpivot a DataFrame from wide format to long format.
3471 DataFrame.explode : Explode a DataFrame from list-like
3472 columns to long format.
3474 Notes
3475 -----
3476 This routine will explode list-likes including lists, tuples,
3477 Series, and np.ndarray. The result dtype of the subset rows will
3478 be object. Scalars will be returned unchanged. Empty list-likes will
3479 result in a np.nan for that row.
3481 Examples
3482 --------
3483 >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
3484 >>> s
3485 0 [1, 2, 3]
3486 1 foo
3487 2 []
3488 3 [3, 4]
3489 dtype: object
3491 >>> s.explode()
3492 0 1
3493 0 2
3494 0 3
3495 1 foo
3496 2 NaN
3497 3 3
3498 3 4
3499 dtype: object
3500 """
3501 if not len(self) or not is_object_dtype(self):
3502 return self.copy()
3504 values, counts = reshape.explode(np.asarray(self.array))
3506 result = Series(values, index=self.index.repeat(counts), name=self.name)
3507 return result
3509 def unstack(self, level=-1, fill_value=None):
3510 """
3511 Unstack, a.k.a. pivot, Series with MultiIndex to produce DataFrame.
3512 The level involved will automatically get sorted.
3514 Parameters
3515 ----------
3516 level : int, str, or list of these, default last level
3517 Level(s) to unstack, can pass level name.
3518 fill_value : scalar value, default None
3519 Value to use when replacing NaN values.
3521 Returns
3522 -------
3523 DataFrame
3524 Unstacked Series.
3526 Examples
3527 --------
3528 >>> s = pd.Series([1, 2, 3, 4],
3529 ... index=pd.MultiIndex.from_product([['one', 'two'],
3530 ... ['a', 'b']]))
3531 >>> s
3532 one a 1
3533 b 2
3534 two a 3
3535 b 4
3536 dtype: int64
3538 >>> s.unstack(level=-1)
3539 a b
3540 one 1 2
3541 two 3 4
3543 >>> s.unstack(level=0)
3544 one two
3545 a 1 3
3546 b 2 4
3547 """
3548 from pandas.core.reshape.reshape import unstack
3550 return unstack(self, level, fill_value)
3552 # ----------------------------------------------------------------------
3553 # function application
3555 def map(self, arg, na_action=None):
3556 """
3557 Map values of Series according to input correspondence.
3559 Used for substituting each value in a Series with another value,
3560 that may be derived from a function, a ``dict`` or
3561 a :class:`Series`.
3563 Parameters
3564 ----------
3565 arg : function, collections.abc.Mapping subclass or Series
3566 Mapping correspondence.
3567 na_action : {None, 'ignore'}, default None
3568 If 'ignore', propagate NaN values, without passing them to the
3569 mapping correspondence.
3571 Returns
3572 -------
3573 Series
3574 Same index as caller.
3576 See Also
3577 --------
3578 Series.apply : For applying more complex functions on a Series.
3579 DataFrame.apply : Apply a function row-/column-wise.
3580 DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
3582 Notes
3583 -----
3584 When ``arg`` is a dictionary, values in Series that are not in the
3585 dictionary (as keys) are converted to ``NaN``. However, if the
3586 dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
3587 provides a method for default values), then this default is used
3588 rather than ``NaN``.
3590 Examples
3591 --------
3592 >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
3593 >>> s
3594 0 cat
3595 1 dog
3596 2 NaN
3597 3 rabbit
3598 dtype: object
3600 ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
3601 in the ``dict`` are converted to ``NaN``, unless the dict has a default
3602 value (e.g. ``defaultdict``):
3604 >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
3605 0 kitten
3606 1 puppy
3607 2 NaN
3608 3 NaN
3609 dtype: object
3611 It also accepts a function:
3613 >>> s.map('I am a {}'.format)
3614 0 I am a cat
3615 1 I am a dog
3616 2 I am a nan
3617 3 I am a rabbit
3618 dtype: object
3620 To avoid applying the function to missing values (and keep them as
3621 ``NaN``) ``na_action='ignore'`` can be used:
3623 >>> s.map('I am a {}'.format, na_action='ignore')
3624 0 I am a cat
3625 1 I am a dog
3626 2 NaN
3627 3 I am a rabbit
3628 dtype: object
3629 """
3630 new_values = super()._map_values(arg, na_action=na_action)
3631 return self._constructor(new_values, index=self.index).__finalize__(self)
3633 def _gotitem(self, key, ndim, subset=None):
3634 """
3635 Sub-classes to define. Return a sliced object.
3637 Parameters
3638 ----------
3639 key : string / list of selections
3640 ndim : 1,2
3641 Requested ndim of result.
3642 subset : object, default None
3643 Subset to act on.
3644 """
3645 return self
3647 _agg_see_also_doc = dedent(
3648 """
3649 See Also
3650 --------
3651 Series.apply : Invoke function on a Series.
3652 Series.transform : Transform function producing a Series with like indexes.
3653 """
3654 )
3656 _agg_examples_doc = dedent(
3657 """
3658 Examples
3659 --------
3660 >>> s = pd.Series([1, 2, 3, 4])
3661 >>> s
3662 0 1
3663 1 2
3664 2 3
3665 3 4
3666 dtype: int64
3668 >>> s.agg('min')
3669 1
3671 >>> s.agg(['min', 'max'])
3672 min 1
3673 max 4
3674 dtype: int64
3675 """
3676 )
3678 @Substitution(
3679 see_also=_agg_see_also_doc,
3680 examples=_agg_examples_doc,
3681 versionadded="\n.. versionadded:: 0.20.0\n",
3682 **_shared_doc_kwargs,
3683 )
3684 @Appender(generic._shared_docs["aggregate"])
3685 def aggregate(self, func, axis=0, *args, **kwargs):
3686 # Validate the axis parameter
3687 self._get_axis_number(axis)
3688 result, how = self._aggregate(func, *args, **kwargs)
3689 if result is None:
3691 # we can be called from an inner function which
3692 # passes this meta-data
3693 kwargs.pop("_axis", None)
3694 kwargs.pop("_level", None)
3696 # try a regular apply, this evaluates lambdas
3697 # row-by-row; however if the lambda is expected a Series
3698 # expression, e.g.: lambda x: x-x.quantile(0.25)
3699 # this will fail, so we can try a vectorized evaluation
3701 # we cannot FIRST try the vectorized evaluation, because
3702 # then .agg and .apply would have different semantics if the
3703 # operation is actually defined on the Series, e.g. str
3704 try:
3705 result = self.apply(func, *args, **kwargs)
3706 except (ValueError, AttributeError, TypeError):
3707 result = func(self, *args, **kwargs)
3709 return result
3711 agg = aggregate
3713 @Appender(generic._shared_docs["transform"] % _shared_doc_kwargs)
3714 def transform(self, func, axis=0, *args, **kwargs):
3715 # Validate the axis parameter
3716 self._get_axis_number(axis)
3717 return super().transform(func, *args, **kwargs)
3719 def apply(self, func, convert_dtype=True, args=(), **kwds):
3720 """
3721 Invoke function on values of Series.
3723 Can be ufunc (a NumPy function that applies to the entire Series)
3724 or a Python function that only works on single values.
3726 Parameters
3727 ----------
3728 func : function
3729 Python function or NumPy ufunc to apply.
3730 convert_dtype : bool, default True
3731 Try to find better dtype for elementwise function results. If
3732 False, leave as dtype=object.
3733 args : tuple
3734 Positional arguments passed to func after the series value.
3735 **kwds
3736 Additional keyword arguments passed to func.
3738 Returns
3739 -------
3740 Series or DataFrame
3741 If func returns a Series object the result will be a DataFrame.
3743 See Also
3744 --------
3745 Series.map: For element-wise operations.
3746 Series.agg: Only perform aggregating type operations.
3747 Series.transform: Only perform transforming type operations.
3749 Examples
3750 --------
3751 Create a series with typical summer temperatures for each city.
3753 >>> s = pd.Series([20, 21, 12],
3754 ... index=['London', 'New York', 'Helsinki'])
3755 >>> s
3756 London 20
3757 New York 21
3758 Helsinki 12
3759 dtype: int64
3761 Square the values by defining a function and passing it as an
3762 argument to ``apply()``.
3764 >>> def square(x):
3765 ... return x ** 2
3766 >>> s.apply(square)
3767 London 400
3768 New York 441
3769 Helsinki 144
3770 dtype: int64
3772 Square the values by passing an anonymous function as an
3773 argument to ``apply()``.
3775 >>> s.apply(lambda x: x ** 2)
3776 London 400
3777 New York 441
3778 Helsinki 144
3779 dtype: int64
3781 Define a custom function that needs additional positional
3782 arguments and pass these additional arguments using the
3783 ``args`` keyword.
3785 >>> def subtract_custom_value(x, custom_value):
3786 ... return x - custom_value
3788 >>> s.apply(subtract_custom_value, args=(5,))
3789 London 15
3790 New York 16
3791 Helsinki 7
3792 dtype: int64
3794 Define a custom function that takes keyword arguments
3795 and pass these arguments to ``apply``.
3797 >>> def add_custom_values(x, **kwargs):
3798 ... for month in kwargs:
3799 ... x += kwargs[month]
3800 ... return x
3802 >>> s.apply(add_custom_values, june=30, july=20, august=25)
3803 London 95
3804 New York 96
3805 Helsinki 87
3806 dtype: int64
3808 Use a function from the Numpy library.
3810 >>> s.apply(np.log)
3811 London 2.995732
3812 New York 3.044522
3813 Helsinki 2.484907
3814 dtype: float64
3815 """
3816 if len(self) == 0:
3817 return self._constructor(dtype=self.dtype, index=self.index).__finalize__(
3818 self
3819 )
3821 # dispatch to agg
3822 if isinstance(func, (list, dict)):
3823 return self.aggregate(func, *args, **kwds)
3825 # if we are a string, try to dispatch
3826 if isinstance(func, str):
3827 return self._try_aggregate_string_function(func, *args, **kwds)
3829 # handle ufuncs and lambdas
3830 if kwds or args and not isinstance(func, np.ufunc):
3832 def f(x):
3833 return func(x, *args, **kwds)
3835 else:
3836 f = func
3838 with np.errstate(all="ignore"):
3839 if isinstance(f, np.ufunc):
3840 return f(self)
3842 # row-wise access
3843 if is_extension_array_dtype(self.dtype) and hasattr(self._values, "map"):
3844 # GH#23179 some EAs do not have `map`
3845 mapped = self._values.map(f)
3846 else:
3847 values = self.astype(object).values
3848 mapped = lib.map_infer(values, f, convert=convert_dtype)
3850 if len(mapped) and isinstance(mapped[0], Series):
3851 # GH 25959 use pd.array instead of tolist
3852 # so extension arrays can be used
3853 return self._constructor_expanddim(pd.array(mapped), index=self.index)
3854 else:
3855 return self._constructor(mapped, index=self.index).__finalize__(self)
3857 def _reduce(
3858 self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
3859 ):
3860 """
3861 Perform a reduction operation.
3863 If we have an ndarray as a value, then simply perform the operation,
3864 otherwise delegate to the object.
3865 """
3866 delegate = self._values
3868 if axis is not None:
3869 self._get_axis_number(axis)
3871 if isinstance(delegate, Categorical):
3872 return delegate._reduce(name, skipna=skipna, **kwds)
3873 elif isinstance(delegate, ExtensionArray):
3874 # dispatch to ExtensionArray interface
3875 return delegate._reduce(name, skipna=skipna, **kwds)
3876 elif is_datetime64_dtype(delegate):
3877 # use DatetimeIndex implementation to handle skipna correctly
3878 delegate = DatetimeIndex(delegate)
3879 elif is_timedelta64_dtype(delegate) and hasattr(TimedeltaIndex, name):
3880 # use TimedeltaIndex to handle skipna correctly
3881 # TODO: remove hasattr check after TimedeltaIndex has `std` method
3882 delegate = TimedeltaIndex(delegate)
3884 # dispatch to numpy arrays
3885 elif isinstance(delegate, np.ndarray):
3886 if numeric_only:
3887 raise NotImplementedError(
3888 f"Series.{name} does not implement numeric_only."
3889 )
3890 with np.errstate(all="ignore"):
3891 return op(delegate, skipna=skipna, **kwds)
3893 # TODO(EA) dispatch to Index
3894 # remove once all internals extension types are
3895 # moved to ExtensionArrays
3896 return delegate._reduce(
3897 op=op,
3898 name=name,
3899 axis=axis,
3900 skipna=skipna,
3901 numeric_only=numeric_only,
3902 filter_type=filter_type,
3903 **kwds,
3904 )
3906 def _reindex_indexer(self, new_index, indexer, copy):
3907 if indexer is None:
3908 if copy:
3909 return self.copy()
3910 return self
3912 new_values = algorithms.take_1d(
3913 self._values, indexer, allow_fill=True, fill_value=None
3914 )
3915 return self._constructor(new_values, index=new_index)
3917 def _needs_reindex_multi(self, axes, method, level):
3918 """
3919 Check if we do need a multi reindex; this is for compat with
3920 higher dims.
3921 """
3922 return False
3924 @Appender(generic._shared_docs["align"] % _shared_doc_kwargs)
3925 def align(
3926 self,
3927 other,
3928 join="outer",
3929 axis=None,
3930 level=None,
3931 copy=True,
3932 fill_value=None,
3933 method=None,
3934 limit=None,
3935 fill_axis=0,
3936 broadcast_axis=None,
3937 ):
3938 return super().align(
3939 other,
3940 join=join,
3941 axis=axis,
3942 level=level,
3943 copy=copy,
3944 fill_value=fill_value,
3945 method=method,
3946 limit=limit,
3947 fill_axis=fill_axis,
3948 broadcast_axis=broadcast_axis,
3949 )
3951 def rename(
3952 self,
3953 index=None,
3954 *,
3955 axis=None,
3956 copy=True,
3957 inplace=False,
3958 level=None,
3959 errors="ignore",
3960 ):
3961 """
3962 Alter Series index labels or name.
3964 Function / dict values must be unique (1-to-1). Labels not contained in
3965 a dict / Series will be left as-is. Extra labels listed don't throw an
3966 error.
3968 Alternatively, change ``Series.name`` with a scalar value.
3970 See the :ref:`user guide <basics.rename>` for more.
3972 Parameters
3973 ----------
3974 axis : {0 or "index"}
3975 Unused. Accepted for compatability with DataFrame method only.
3976 index : scalar, hashable sequence, dict-like or function, optional
3977 Functions or dict-like are transformations to apply to
3978 the index.
3979 Scalar or hashable sequence-like will alter the ``Series.name``
3980 attribute.
3982 **kwargs
3983 Additional keyword arguments passed to the function. Only the
3984 "inplace" keyword is used.
3986 Returns
3987 -------
3988 Series
3989 Series with index labels or name altered.
3991 See Also
3992 --------
3993 DataFrame.rename : Corresponding DataFrame method.
3994 Series.rename_axis : Set the name of the axis.
3996 Examples
3997 --------
3998 >>> s = pd.Series([1, 2, 3])
3999 >>> s
4000 0 1
4001 1 2
4002 2 3
4003 dtype: int64
4004 >>> s.rename("my_name") # scalar, changes Series.name
4005 0 1
4006 1 2
4007 2 3
4008 Name: my_name, dtype: int64
4009 >>> s.rename(lambda x: x ** 2) # function, changes labels
4010 0 1
4011 1 2
4012 4 3
4013 dtype: int64
4014 >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
4015 0 1
4016 3 2
4017 5 3
4018 dtype: int64
4019 """
4020 if callable(index) or is_dict_like(index):
4021 return super().rename(
4022 index, copy=copy, inplace=inplace, level=level, errors=errors
4023 )
4024 else:
4025 return self._set_name(index, inplace=inplace)
4027 @Substitution(**_shared_doc_kwargs)
4028 @Appender(generic.NDFrame.reindex.__doc__)
4029 def reindex(self, index=None, **kwargs):
4030 return super().reindex(index=index, **kwargs)
4032 def drop(
4033 self,
4034 labels=None,
4035 axis=0,
4036 index=None,
4037 columns=None,
4038 level=None,
4039 inplace=False,
4040 errors="raise",
4041 ):
4042 """
4043 Return Series with specified index labels removed.
4045 Remove elements of a Series based on specifying the index labels.
4046 When using a multi-index, labels on different levels can be removed
4047 by specifying the level.
4049 Parameters
4050 ----------
4051 labels : single label or list-like
4052 Index labels to drop.
4053 axis : 0, default 0
4054 Redundant for application on Series.
4055 index : single label or list-like
4056 Redundant for application on Series, but 'index' can be used instead
4057 of 'labels'.
4059 .. versionadded:: 0.21.0
4060 columns : single label or list-like
4061 No change is made to the Series; use 'index' or 'labels' instead.
4063 .. versionadded:: 0.21.0
4064 level : int or level name, optional
4065 For MultiIndex, level for which the labels will be removed.
4066 inplace : bool, default False
4067 If True, do operation inplace and return None.
4068 errors : {'ignore', 'raise'}, default 'raise'
4069 If 'ignore', suppress error and only existing labels are dropped.
4071 Returns
4072 -------
4073 Series
4074 Series with specified index labels removed.
4076 Raises
4077 ------
4078 KeyError
4079 If none of the labels are found in the index.
4081 See Also
4082 --------
4083 Series.reindex : Return only specified index labels of Series.
4084 Series.dropna : Return series without null values.
4085 Series.drop_duplicates : Return Series with duplicate values removed.
4086 DataFrame.drop : Drop specified labels from rows or columns.
4088 Examples
4089 --------
4090 >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
4091 >>> s
4092 A 0
4093 B 1
4094 C 2
4095 dtype: int64
4097 Drop labels B en C
4099 >>> s.drop(labels=['B', 'C'])
4100 A 0
4101 dtype: int64
4103 Drop 2nd level label in MultiIndex Series
4105 >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
4106 ... ['speed', 'weight', 'length']],
4107 ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
4108 ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
4109 >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
4110 ... index=midx)
4111 >>> s
4112 lama speed 45.0
4113 weight 200.0
4114 length 1.2
4115 cow speed 30.0
4116 weight 250.0
4117 length 1.5
4118 falcon speed 320.0
4119 weight 1.0
4120 length 0.3
4121 dtype: float64
4123 >>> s.drop(labels='weight', level=1)
4124 lama speed 45.0
4125 length 1.2
4126 cow speed 30.0
4127 length 1.5
4128 falcon speed 320.0
4129 length 0.3
4130 dtype: float64
4131 """
4132 return super().drop(
4133 labels=labels,
4134 axis=axis,
4135 index=index,
4136 columns=columns,
4137 level=level,
4138 inplace=inplace,
4139 errors=errors,
4140 )
4142 @Substitution(**_shared_doc_kwargs)
4143 @Appender(generic.NDFrame.fillna.__doc__)
4144 def fillna(
4145 self,
4146 value=None,
4147 method=None,
4148 axis=None,
4149 inplace=False,
4150 limit=None,
4151 downcast=None,
4152 ) -> Optional["Series"]:
4153 return super().fillna(
4154 value=value,
4155 method=method,
4156 axis=axis,
4157 inplace=inplace,
4158 limit=limit,
4159 downcast=downcast,
4160 )
4162 @Appender(generic._shared_docs["replace"] % _shared_doc_kwargs)
4163 def replace(
4164 self,
4165 to_replace=None,
4166 value=None,
4167 inplace=False,
4168 limit=None,
4169 regex=False,
4170 method="pad",
4171 ):
4172 return super().replace(
4173 to_replace=to_replace,
4174 value=value,
4175 inplace=inplace,
4176 limit=limit,
4177 regex=regex,
4178 method=method,
4179 )
4181 @Appender(generic._shared_docs["shift"] % _shared_doc_kwargs)
4182 def shift(self, periods=1, freq=None, axis=0, fill_value=None):
4183 return super().shift(
4184 periods=periods, freq=freq, axis=axis, fill_value=fill_value
4185 )
4187 def memory_usage(self, index=True, deep=False):
4188 """
4189 Return the memory usage of the Series.
4191 The memory usage can optionally include the contribution of
4192 the index and of elements of `object` dtype.
4194 Parameters
4195 ----------
4196 index : bool, default True
4197 Specifies whether to include the memory usage of the Series index.
4198 deep : bool, default False
4199 If True, introspect the data deeply by interrogating
4200 `object` dtypes for system-level memory consumption, and include
4201 it in the returned value.
4203 Returns
4204 -------
4205 int
4206 Bytes of memory consumed.
4208 See Also
4209 --------
4210 numpy.ndarray.nbytes : Total bytes consumed by the elements of the
4211 array.
4212 DataFrame.memory_usage : Bytes consumed by a DataFrame.
4214 Examples
4215 --------
4216 >>> s = pd.Series(range(3))
4217 >>> s.memory_usage()
4218 152
4220 Not including the index gives the size of the rest of the data, which
4221 is necessarily smaller:
4223 >>> s.memory_usage(index=False)
4224 24
4226 The memory footprint of `object` values is ignored by default:
4228 >>> s = pd.Series(["a", "b"])
4229 >>> s.values
4230 array(['a', 'b'], dtype=object)
4231 >>> s.memory_usage()
4232 144
4233 >>> s.memory_usage(deep=True)
4234 260
4235 """
4236 v = super().memory_usage(deep=deep)
4237 if index:
4238 v += self.index.memory_usage(deep=deep)
4239 return v
4241 def isin(self, values):
4242 """
4243 Check whether `values` are contained in Series.
4245 Return a boolean Series showing whether each element in the Series
4246 matches an element in the passed sequence of `values` exactly.
4248 Parameters
4249 ----------
4250 values : set or list-like
4251 The sequence of values to test. Passing in a single string will
4252 raise a ``TypeError``. Instead, turn a single string into a
4253 list of one element.
4255 Returns
4256 -------
4257 Series
4258 Series of booleans indicating if each element is in values.
4260 Raises
4261 ------
4262 TypeError
4263 * If `values` is a string
4265 See Also
4266 --------
4267 DataFrame.isin : Equivalent method on DataFrame.
4269 Examples
4270 --------
4271 >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
4272 ... 'hippo'], name='animal')
4273 >>> s.isin(['cow', 'lama'])
4274 0 True
4275 1 True
4276 2 True
4277 3 False
4278 4 True
4279 5 False
4280 Name: animal, dtype: bool
4282 Passing a single string as ``s.isin('lama')`` will raise an error. Use
4283 a list of one element instead:
4285 >>> s.isin(['lama'])
4286 0 True
4287 1 False
4288 2 True
4289 3 False
4290 4 True
4291 5 False
4292 Name: animal, dtype: bool
4293 """
4294 result = algorithms.isin(self, values)
4295 return self._constructor(result, index=self.index).__finalize__(self)
4297 def between(self, left, right, inclusive=True):
4298 """
4299 Return boolean Series equivalent to left <= series <= right.
4301 This function returns a boolean vector containing `True` wherever the
4302 corresponding Series element is between the boundary values `left` and
4303 `right`. NA values are treated as `False`.
4305 Parameters
4306 ----------
4307 left : scalar or list-like
4308 Left boundary.
4309 right : scalar or list-like
4310 Right boundary.
4311 inclusive : bool, default True
4312 Include boundaries.
4314 Returns
4315 -------
4316 Series
4317 Series representing whether each element is between left and
4318 right (inclusive).
4320 See Also
4321 --------
4322 Series.gt : Greater than of series and other.
4323 Series.lt : Less than of series and other.
4325 Notes
4326 -----
4327 This function is equivalent to ``(left <= ser) & (ser <= right)``
4329 Examples
4330 --------
4331 >>> s = pd.Series([2, 0, 4, 8, np.nan])
4333 Boundary values are included by default:
4335 >>> s.between(1, 4)
4336 0 True
4337 1 False
4338 2 True
4339 3 False
4340 4 False
4341 dtype: bool
4343 With `inclusive` set to ``False`` boundary values are excluded:
4345 >>> s.between(1, 4, inclusive=False)
4346 0 True
4347 1 False
4348 2 False
4349 3 False
4350 4 False
4351 dtype: bool
4353 `left` and `right` can be any scalar value:
4355 >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
4356 >>> s.between('Anna', 'Daniel')
4357 0 False
4358 1 True
4359 2 True
4360 3 False
4361 dtype: bool
4362 """
4363 if inclusive:
4364 lmask = self >= left
4365 rmask = self <= right
4366 else:
4367 lmask = self > left
4368 rmask = self < right
4370 return lmask & rmask
4372 # ----------------------------------------------------------------------
4373 # Convert to types that support pd.NA
4375 def _convert_dtypes(
4376 self: ABCSeries,
4377 infer_objects: bool = True,
4378 convert_string: bool = True,
4379 convert_integer: bool = True,
4380 convert_boolean: bool = True,
4381 ) -> "Series":
4382 input_series = self
4383 if infer_objects:
4384 input_series = input_series.infer_objects()
4385 if is_object_dtype(input_series):
4386 input_series = input_series.copy()
4388 if convert_string or convert_integer or convert_boolean:
4389 inferred_dtype = convert_dtypes(
4390 input_series._values, convert_string, convert_integer, convert_boolean
4391 )
4392 try:
4393 result = input_series.astype(inferred_dtype)
4394 except TypeError:
4395 result = input_series.copy()
4396 else:
4397 result = input_series.copy()
4398 return result
4400 @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)
4401 def isna(self):
4402 return super().isna()
4404 @Appender(generic._shared_docs["isna"] % _shared_doc_kwargs)
4405 def isnull(self):
4406 return super().isnull()
4408 @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs)
4409 def notna(self):
4410 return super().notna()
4412 @Appender(generic._shared_docs["notna"] % _shared_doc_kwargs)
4413 def notnull(self):
4414 return super().notnull()
4416 def dropna(self, axis=0, inplace=False, how=None):
4417 """
4418 Return a new Series with missing values removed.
4420 See the :ref:`User Guide <missing_data>` for more on which values are
4421 considered missing, and how to work with missing data.
4423 Parameters
4424 ----------
4425 axis : {0 or 'index'}, default 0
4426 There is only one axis to drop values from.
4427 inplace : bool, default False
4428 If True, do operation inplace and return None.
4429 how : str, optional
4430 Not in use. Kept for compatibility.
4432 Returns
4433 -------
4434 Series
4435 Series with NA entries dropped from it.
4437 See Also
4438 --------
4439 Series.isna: Indicate missing values.
4440 Series.notna : Indicate existing (non-missing) values.
4441 Series.fillna : Replace missing values.
4442 DataFrame.dropna : Drop rows or columns which contain NA values.
4443 Index.dropna : Drop missing indices.
4445 Examples
4446 --------
4447 >>> ser = pd.Series([1., 2., np.nan])
4448 >>> ser
4449 0 1.0
4450 1 2.0
4451 2 NaN
4452 dtype: float64
4454 Drop NA values from a Series.
4456 >>> ser.dropna()
4457 0 1.0
4458 1 2.0
4459 dtype: float64
4461 Keep the Series with valid entries in the same variable.
4463 >>> ser.dropna(inplace=True)
4464 >>> ser
4465 0 1.0
4466 1 2.0
4467 dtype: float64
4469 Empty strings are not considered NA values. ``None`` is considered an
4470 NA value.
4472 >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
4473 >>> ser
4474 0 NaN
4475 1 2
4476 2 NaT
4477 3
4478 4 None
4479 5 I stay
4480 dtype: object
4481 >>> ser.dropna()
4482 1 2
4483 3
4484 5 I stay
4485 dtype: object
4486 """
4487 inplace = validate_bool_kwarg(inplace, "inplace")
4488 # Validate the axis parameter
4489 self._get_axis_number(axis or 0)
4491 if self._can_hold_na:
4492 result = remove_na_arraylike(self)
4493 if inplace:
4494 self._update_inplace(result)
4495 else:
4496 return result
4497 else:
4498 if inplace:
4499 # do nothing
4500 pass
4501 else:
4502 return self.copy()
4504 # ----------------------------------------------------------------------
4505 # Time series-oriented methods
4507 def to_timestamp(self, freq=None, how="start", copy=True):
4508 """
4509 Cast to DatetimeIndex of Timestamps, at *beginning* of period.
4511 Parameters
4512 ----------
4513 freq : str, default frequency of PeriodIndex
4514 Desired frequency.
4515 how : {'s', 'e', 'start', 'end'}
4516 Convention for converting period to timestamp; start of period
4517 vs. end.
4518 copy : bool, default True
4519 Whether or not to return a copy.
4521 Returns
4522 -------
4523 Series with DatetimeIndex
4524 """
4525 new_values = self._values
4526 if copy:
4527 new_values = new_values.copy()
4529 new_index = self.index.to_timestamp(freq=freq, how=how)
4530 return self._constructor(new_values, index=new_index).__finalize__(self)
4532 def to_period(self, freq=None, copy=True):
4533 """
4534 Convert Series from DatetimeIndex to PeriodIndex with desired
4535 frequency (inferred from index if not passed).
4537 Parameters
4538 ----------
4539 freq : str, default None
4540 Frequency associated with the PeriodIndex.
4541 copy : bool, default True
4542 Whether or not to return a copy.
4544 Returns
4545 -------
4546 Series
4547 Series with index converted to PeriodIndex.
4548 """
4549 new_values = self._values
4550 if copy:
4551 new_values = new_values.copy()
4553 new_index = self.index.to_period(freq=freq)
4554 return self._constructor(new_values, index=new_index).__finalize__(self)
4556 # ----------------------------------------------------------------------
4557 # Accessor Methods
4558 # ----------------------------------------------------------------------
4559 str = CachedAccessor("str", StringMethods)
4560 dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
4561 cat = CachedAccessor("cat", CategoricalAccessor)
4562 plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
4563 sparse = CachedAccessor("sparse", SparseAccessor)
4565 # ----------------------------------------------------------------------
4566 # Add plotting methods to Series
4567 hist = pandas.plotting.hist_series
4570Series._setup_axes(["index"], docs={"index": "The index (axis labels) of the Series."})
4571Series._add_numeric_operations()
4572Series._add_series_or_dataframe_operations()
4574# Add arithmetic!
4575ops.add_flex_arithmetic_methods(Series)
4576ops.add_special_arithmetic_methods(Series)