Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pandas/core/arrays/sparse/accessor.py : 31%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Sparse accessor"""
3import numpy as np
5from pandas.compat._optional import import_optional_dependency
7from pandas.core.dtypes.cast import find_common_type
9from pandas.core.accessor import PandasDelegate, delegate_names
10from pandas.core.arrays.sparse.array import SparseArray
11from pandas.core.arrays.sparse.dtype import SparseDtype
14class BaseAccessor:
15 _validation_msg = "Can only use the '.sparse' accessor with Sparse data."
17 def __init__(self, data=None):
18 self._parent = data
19 self._validate(data)
21 def _validate(self, data):
22 raise NotImplementedError
25@delegate_names(
26 SparseArray, ["npoints", "density", "fill_value", "sp_values"], typ="property"
27)
28class SparseAccessor(BaseAccessor, PandasDelegate):
29 """
30 Accessor for SparseSparse from other sparse matrix data types.
31 """
33 def _validate(self, data):
34 if not isinstance(data.dtype, SparseDtype):
35 raise AttributeError(self._validation_msg)
37 def _delegate_property_get(self, name, *args, **kwargs):
38 return getattr(self._parent.array, name)
40 def _delegate_method(self, name, *args, **kwargs):
41 if name == "from_coo":
42 return self.from_coo(*args, **kwargs)
43 elif name == "to_coo":
44 return self.to_coo(*args, **kwargs)
45 else:
46 raise ValueError
48 @classmethod
49 def from_coo(cls, A, dense_index=False):
50 """
51 Create a Series with sparse values from a scipy.sparse.coo_matrix.
53 Parameters
54 ----------
55 A : scipy.sparse.coo_matrix
56 dense_index : bool, default False
57 If False (default), the SparseSeries index consists of only the
58 coords of the non-null entries of the original coo_matrix.
59 If True, the SparseSeries index consists of the full sorted
60 (row, col) coordinates of the coo_matrix.
62 Returns
63 -------
64 s : Series
65 A Series with sparse values.
67 Examples
68 --------
69 >>> from scipy import sparse
70 >>> A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])),
71 shape=(3, 4))
72 >>> A
73 <3x4 sparse matrix of type '<class 'numpy.float64'>'
74 with 3 stored elements in COOrdinate format>
75 >>> A.todense()
76 matrix([[ 0., 0., 1., 2.],
77 [ 3., 0., 0., 0.],
78 [ 0., 0., 0., 0.]])
79 >>> ss = pd.Series.sparse.from_coo(A)
80 >>> ss
81 0 2 1
82 3 2
83 1 0 3
84 dtype: float64
85 BlockIndex
86 Block locations: array([0], dtype=int32)
87 Block lengths: array([3], dtype=int32)
88 """
89 from pandas.core.arrays.sparse.scipy_sparse import _coo_to_sparse_series
90 from pandas import Series
92 result = _coo_to_sparse_series(A, dense_index=dense_index)
93 result = Series(result.array, index=result.index, copy=False)
95 return result
97 def to_coo(self, row_levels=(0,), column_levels=(1,), sort_labels=False):
98 """
99 Create a scipy.sparse.coo_matrix from a Series with MultiIndex.
101 Use row_levels and column_levels to determine the row and column
102 coordinates respectively. row_levels and column_levels are the names
103 (labels) or numbers of the levels. {row_levels, column_levels} must be
104 a partition of the MultiIndex level names (or numbers).
106 Parameters
107 ----------
108 row_levels : tuple/list
109 column_levels : tuple/list
110 sort_labels : bool, default False
111 Sort the row and column labels before forming the sparse matrix.
113 Returns
114 -------
115 y : scipy.sparse.coo_matrix
116 rows : list (row labels)
117 columns : list (column labels)
119 Examples
120 --------
121 >>> s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan])
122 >>> s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0),
123 (1, 2, 'a', 1),
124 (1, 1, 'b', 0),
125 (1, 1, 'b', 1),
126 (2, 1, 'b', 0),
127 (2, 1, 'b', 1)],
128 names=['A', 'B', 'C', 'D'])
129 >>> ss = s.astype("Sparse")
130 >>> A, rows, columns = ss.sparse.to_coo(row_levels=['A', 'B'],
131 ... column_levels=['C', 'D'],
132 ... sort_labels=True)
133 >>> A
134 <3x4 sparse matrix of type '<class 'numpy.float64'>'
135 with 3 stored elements in COOrdinate format>
136 >>> A.todense()
137 matrix([[ 0., 0., 1., 3.],
138 [ 3., 0., 0., 0.],
139 [ 0., 0., 0., 0.]])
140 >>> rows
141 [(1, 1), (1, 2), (2, 1)]
142 >>> columns
143 [('a', 0), ('a', 1), ('b', 0), ('b', 1)]
144 """
145 from pandas.core.arrays.sparse.scipy_sparse import _sparse_series_to_coo
147 A, rows, columns = _sparse_series_to_coo(
148 self._parent, row_levels, column_levels, sort_labels=sort_labels
149 )
150 return A, rows, columns
152 def to_dense(self):
153 """
154 Convert a Series from sparse values to dense.
156 .. versionadded:: 0.25.0
158 Returns
159 -------
160 Series:
161 A Series with the same values, stored as a dense array.
163 Examples
164 --------
165 >>> series = pd.Series(pd.arrays.SparseArray([0, 1, 0]))
166 >>> series
167 0 0
168 1 1
169 2 0
170 dtype: Sparse[int64, 0]
172 >>> series.sparse.to_dense()
173 0 0
174 1 1
175 2 0
176 dtype: int64
177 """
178 from pandas import Series
180 return Series(
181 self._parent.array.to_dense(),
182 index=self._parent.index,
183 name=self._parent.name,
184 )
187class SparseFrameAccessor(BaseAccessor, PandasDelegate):
188 """
189 DataFrame accessor for sparse data.
191 .. versionadded:: 0.25.0
192 """
194 def _validate(self, data):
195 dtypes = data.dtypes
196 if not all(isinstance(t, SparseDtype) for t in dtypes):
197 raise AttributeError(self._validation_msg)
199 @classmethod
200 def from_spmatrix(cls, data, index=None, columns=None):
201 """
202 Create a new DataFrame from a scipy sparse matrix.
204 .. versionadded:: 0.25.0
206 Parameters
207 ----------
208 data : scipy.sparse.spmatrix
209 Must be convertible to csc format.
210 index, columns : Index, optional
211 Row and column labels to use for the resulting DataFrame.
212 Defaults to a RangeIndex.
214 Returns
215 -------
216 DataFrame
217 Each column of the DataFrame is stored as a
218 :class:`arrays.SparseArray`.
220 Examples
221 --------
222 >>> import scipy.sparse
223 >>> mat = scipy.sparse.eye(3)
224 >>> pd.DataFrame.sparse.from_spmatrix(mat)
225 0 1 2
226 0 1.0 0.0 0.0
227 1 0.0 1.0 0.0
228 2 0.0 0.0 1.0
229 """
230 from pandas import DataFrame
232 data = data.tocsc()
233 index, columns = cls._prep_index(data, index, columns)
234 sparrays = [SparseArray.from_spmatrix(data[:, i]) for i in range(data.shape[1])]
235 data = dict(enumerate(sparrays))
236 result = DataFrame(data, index=index)
237 result.columns = columns
238 return result
240 def to_dense(self):
241 """
242 Convert a DataFrame with sparse values to dense.
244 .. versionadded:: 0.25.0
246 Returns
247 -------
248 DataFrame
249 A DataFrame with the same values stored as dense arrays.
251 Examples
252 --------
253 >>> df = pd.DataFrame({"A": pd.arrays.SparseArray([0, 1, 0])})
254 >>> df.sparse.to_dense()
255 A
256 0 0
257 1 1
258 2 0
259 """
260 from pandas import DataFrame
262 data = {k: v.array.to_dense() for k, v in self._parent.items()}
263 return DataFrame(data, index=self._parent.index, columns=self._parent.columns)
265 def to_coo(self):
266 """
267 Return the contents of the frame as a sparse SciPy COO matrix.
269 .. versionadded:: 0.25.0
271 Returns
272 -------
273 coo_matrix : scipy.sparse.spmatrix
274 If the caller is heterogeneous and contains booleans or objects,
275 the result will be of dtype=object. See Notes.
277 Notes
278 -----
279 The dtype will be the lowest-common-denominator type (implicit
280 upcasting); that is to say if the dtypes (even of numeric types)
281 are mixed, the one that accommodates all will be chosen.
283 e.g. If the dtypes are float16 and float32, dtype will be upcast to
284 float32. By numpy.find_common_type convention, mixing int64 and
285 and uint64 will result in a float64 dtype.
286 """
287 import_optional_dependency("scipy")
288 from scipy.sparse import coo_matrix
290 dtype = find_common_type(self._parent.dtypes)
291 if isinstance(dtype, SparseDtype):
292 dtype = dtype.subtype
294 cols, rows, datas = [], [], []
295 for col, name in enumerate(self._parent):
296 s = self._parent[name]
297 row = s.array.sp_index.to_int_index().indices
298 cols.append(np.repeat(col, len(row)))
299 rows.append(row)
300 datas.append(s.array.sp_values.astype(dtype, copy=False))
302 cols = np.concatenate(cols)
303 rows = np.concatenate(rows)
304 datas = np.concatenate(datas)
305 return coo_matrix((datas, (rows, cols)), shape=self._parent.shape)
307 @property
308 def density(self) -> float:
309 """
310 Ratio of non-sparse points to total (dense) data points.
311 """
312 return np.mean([column.array.density for _, column in self._parent.items()])
314 @staticmethod
315 def _prep_index(data, index, columns):
316 import pandas.core.indexes.base as ibase
318 N, K = data.shape
319 if index is None:
320 index = ibase.default_index(N)
321 if columns is None:
322 columns = ibase.default_index(K)
324 if len(columns) != K:
325 raise ValueError(f"Column length mismatch: {len(columns)} vs. {K}")
326 if len(index) != N:
327 raise ValueError(f"Index length mismatch: {len(index)} vs. {N}")
328 return index, columns