Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/sparse/compressed.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""Base class for sparse matrix formats using compressed storage."""
2__all__ = []
4from warnings import warn
5import operator
7import numpy as np
8from scipy._lib._util import _prune_array
10from .base import spmatrix, isspmatrix, SparseEfficiencyWarning
11from .data import _data_matrix, _minmax_mixin
12from .dia import dia_matrix
13from . import _sparsetools
14from ._sparsetools import (get_csr_submatrix, csr_sample_offsets, csr_todense,
15 csr_sample_values, csr_row_index, csr_row_slice,
16 csr_column_index1, csr_column_index2)
17from ._index import IndexMixin
18from .sputils import (upcast, upcast_char, to_native, isdense, isshape,
19 getdtype, isscalarlike, isintlike, get_index_dtype,
20 downcast_intp_index, get_sum_dtype, check_shape,
21 matrix, asmatrix, is_pydata_spmatrix)
24class _cs_matrix(_data_matrix, _minmax_mixin, IndexMixin):
25 """base matrix class for compressed row- and column-oriented matrices"""
27 def __init__(self, arg1, shape=None, dtype=None, copy=False):
28 _data_matrix.__init__(self)
30 if isspmatrix(arg1):
31 if arg1.format == self.format and copy:
32 arg1 = arg1.copy()
33 else:
34 arg1 = arg1.asformat(self.format)
35 self._set_self(arg1)
37 elif isinstance(arg1, tuple):
38 if isshape(arg1):
39 # It's a tuple of matrix dimensions (M, N)
40 # create empty matrix
41 self._shape = check_shape(arg1)
42 M, N = self.shape
43 # Select index dtype large enough to pass array and
44 # scalar parameters to sparsetools
45 idx_dtype = get_index_dtype(maxval=max(M, N))
46 self.data = np.zeros(0, getdtype(dtype, default=float))
47 self.indices = np.zeros(0, idx_dtype)
48 self.indptr = np.zeros(self._swap((M, N))[0] + 1,
49 dtype=idx_dtype)
50 else:
51 if len(arg1) == 2:
52 # (data, ij) format
53 from .coo import coo_matrix
54 other = self.__class__(coo_matrix(arg1, shape=shape))
55 self._set_self(other)
56 elif len(arg1) == 3:
57 # (data, indices, indptr) format
58 (data, indices, indptr) = arg1
60 # Select index dtype large enough to pass array and
61 # scalar parameters to sparsetools
62 maxval = None
63 if shape is not None:
64 maxval = max(shape)
65 idx_dtype = get_index_dtype((indices, indptr),
66 maxval=maxval,
67 check_contents=True)
69 self.indices = np.array(indices, copy=copy,
70 dtype=idx_dtype)
71 self.indptr = np.array(indptr, copy=copy, dtype=idx_dtype)
72 self.data = np.array(data, copy=copy, dtype=dtype)
73 else:
74 raise ValueError("unrecognized {}_matrix "
75 "constructor usage".format(self.format))
77 else:
78 # must be dense
79 try:
80 arg1 = np.asarray(arg1)
81 except Exception:
82 raise ValueError("unrecognized {}_matrix constructor usage"
83 "".format(self.format))
84 from .coo import coo_matrix
85 self._set_self(self.__class__(coo_matrix(arg1, dtype=dtype)))
87 # Read matrix dimensions given, if any
88 if shape is not None:
89 self._shape = check_shape(shape)
90 else:
91 if self.shape is None:
92 # shape not already set, try to infer dimensions
93 try:
94 major_dim = len(self.indptr) - 1
95 minor_dim = self.indices.max() + 1
96 except Exception:
97 raise ValueError('unable to infer matrix dimensions')
98 else:
99 self._shape = check_shape(self._swap((major_dim,
100 minor_dim)))
102 if dtype is not None:
103 self.data = self.data.astype(dtype, copy=False)
105 self.check_format(full_check=False)
107 def getnnz(self, axis=None):
108 if axis is None:
109 return int(self.indptr[-1])
110 else:
111 if axis < 0:
112 axis += 2
113 axis, _ = self._swap((axis, 1 - axis))
114 _, N = self._swap(self.shape)
115 if axis == 0:
116 return np.bincount(downcast_intp_index(self.indices),
117 minlength=N)
118 elif axis == 1:
119 return np.diff(self.indptr)
120 raise ValueError('axis out of bounds')
122 getnnz.__doc__ = spmatrix.getnnz.__doc__
124 def _set_self(self, other, copy=False):
125 """take the member variables of other and assign them to self"""
127 if copy:
128 other = other.copy()
130 self.data = other.data
131 self.indices = other.indices
132 self.indptr = other.indptr
133 self._shape = check_shape(other.shape)
135 def check_format(self, full_check=True):
136 """check whether the matrix format is valid
138 Parameters
139 ----------
140 full_check : bool, optional
141 If `True`, rigorous check, O(N) operations. Otherwise
142 basic check, O(1) operations (default True).
143 """
144 # use _swap to determine proper bounds
145 major_name, minor_name = self._swap(('row', 'column'))
146 major_dim, minor_dim = self._swap(self.shape)
148 # index arrays should have integer data types
149 if self.indptr.dtype.kind != 'i':
150 warn("indptr array has non-integer dtype ({})"
151 "".format(self.indptr.dtype.name), stacklevel=3)
152 if self.indices.dtype.kind != 'i':
153 warn("indices array has non-integer dtype ({})"
154 "".format(self.indices.dtype.name), stacklevel=3)
156 idx_dtype = get_index_dtype((self.indptr, self.indices))
157 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
158 self.indices = np.asarray(self.indices, dtype=idx_dtype)
159 self.data = to_native(self.data)
161 # check array shapes
162 for x in [self.data.ndim, self.indices.ndim, self.indptr.ndim]:
163 if x != 1:
164 raise ValueError('data, indices, and indptr should be 1-D')
166 # check index pointer
167 if (len(self.indptr) != major_dim + 1):
168 raise ValueError("index pointer size ({}) should be ({})"
169 "".format(len(self.indptr), major_dim + 1))
170 if (self.indptr[0] != 0):
171 raise ValueError("index pointer should start with 0")
173 # check index and data arrays
174 if (len(self.indices) != len(self.data)):
175 raise ValueError("indices and data should have the same size")
176 if (self.indptr[-1] > len(self.indices)):
177 raise ValueError("Last value of index pointer should be less than "
178 "the size of index and data arrays")
180 self.prune()
182 if full_check:
183 # check format validity (more expensive)
184 if self.nnz > 0:
185 if self.indices.max() >= minor_dim:
186 raise ValueError("{} index values must be < {}"
187 "".format(minor_name, minor_dim))
188 if self.indices.min() < 0:
189 raise ValueError("{} index values must be >= 0"
190 "".format(minor_name))
191 if np.diff(self.indptr).min() < 0:
192 raise ValueError("index pointer values must form a "
193 "non-decreasing sequence")
195 # if not self.has_sorted_indices():
196 # warn('Indices were not in sorted order. Sorting indices.')
197 # self.sort_indices()
198 # assert(self.has_sorted_indices())
199 # TODO check for duplicates?
201 #######################
202 # Boolean comparisons #
203 #######################
205 def _scalar_binopt(self, other, op):
206 """Scalar version of self._binopt, for cases in which no new nonzeros
207 are added. Produces a new spmatrix in canonical form.
208 """
209 self.sum_duplicates()
210 res = self._with_data(op(self.data, other), copy=True)
211 res.eliminate_zeros()
212 return res
214 def __eq__(self, other):
215 # Scalar other.
216 if isscalarlike(other):
217 if np.isnan(other):
218 return self.__class__(self.shape, dtype=np.bool_)
220 if other == 0:
221 warn("Comparing a sparse matrix with 0 using == is inefficient"
222 ", try using != instead.", SparseEfficiencyWarning,
223 stacklevel=3)
224 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
225 inv = self._scalar_binopt(other, operator.ne)
226 return all_true - inv
227 else:
228 return self._scalar_binopt(other, operator.eq)
229 # Dense other.
230 elif isdense(other):
231 return self.todense() == other
232 # Pydata sparse other.
233 elif is_pydata_spmatrix(other):
234 return NotImplemented
235 # Sparse other.
236 elif isspmatrix(other):
237 warn("Comparing sparse matrices using == is inefficient, try using"
238 " != instead.", SparseEfficiencyWarning, stacklevel=3)
239 # TODO sparse broadcasting
240 if self.shape != other.shape:
241 return False
242 elif self.format != other.format:
243 other = other.asformat(self.format)
244 res = self._binopt(other, '_ne_')
245 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
246 return all_true - res
247 else:
248 return False
250 def __ne__(self, other):
251 # Scalar other.
252 if isscalarlike(other):
253 if np.isnan(other):
254 warn("Comparing a sparse matrix with nan using != is"
255 " inefficient", SparseEfficiencyWarning, stacklevel=3)
256 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
257 return all_true
258 elif other != 0:
259 warn("Comparing a sparse matrix with a nonzero scalar using !="
260 " is inefficient, try using == instead.",
261 SparseEfficiencyWarning, stacklevel=3)
262 all_true = self.__class__(np.ones(self.shape), dtype=np.bool_)
263 inv = self._scalar_binopt(other, operator.eq)
264 return all_true - inv
265 else:
266 return self._scalar_binopt(other, operator.ne)
267 # Dense other.
268 elif isdense(other):
269 return self.todense() != other
270 # Pydata sparse other.
271 elif is_pydata_spmatrix(other):
272 return NotImplemented
273 # Sparse other.
274 elif isspmatrix(other):
275 # TODO sparse broadcasting
276 if self.shape != other.shape:
277 return True
278 elif self.format != other.format:
279 other = other.asformat(self.format)
280 return self._binopt(other, '_ne_')
281 else:
282 return True
284 def _inequality(self, other, op, op_name, bad_scalar_msg):
285 # Scalar other.
286 if isscalarlike(other):
287 if 0 == other and op_name in ('_le_', '_ge_'):
288 raise NotImplementedError(" >= and <= don't work with 0.")
289 elif op(0, other):
290 warn(bad_scalar_msg, SparseEfficiencyWarning)
291 other_arr = np.empty(self.shape, dtype=np.result_type(other))
292 other_arr.fill(other)
293 other_arr = self.__class__(other_arr)
294 return self._binopt(other_arr, op_name)
295 else:
296 return self._scalar_binopt(other, op)
297 # Dense other.
298 elif isdense(other):
299 return op(self.todense(), other)
300 # Sparse other.
301 elif isspmatrix(other):
302 # TODO sparse broadcasting
303 if self.shape != other.shape:
304 raise ValueError("inconsistent shapes")
305 elif self.format != other.format:
306 other = other.asformat(self.format)
307 if op_name not in ('_ge_', '_le_'):
308 return self._binopt(other, op_name)
310 warn("Comparing sparse matrices using >= and <= is inefficient, "
311 "using <, >, or !=, instead.", SparseEfficiencyWarning)
312 all_true = self.__class__(np.ones(self.shape, dtype=np.bool_))
313 res = self._binopt(other, '_gt_' if op_name == '_le_' else '_lt_')
314 return all_true - res
315 else:
316 raise ValueError("Operands could not be compared.")
318 def __lt__(self, other):
319 return self._inequality(other, operator.lt, '_lt_',
320 "Comparing a sparse matrix with a scalar "
321 "greater than zero using < is inefficient, "
322 "try using >= instead.")
324 def __gt__(self, other):
325 return self._inequality(other, operator.gt, '_gt_',
326 "Comparing a sparse matrix with a scalar "
327 "less than zero using > is inefficient, "
328 "try using <= instead.")
330 def __le__(self, other):
331 return self._inequality(other, operator.le, '_le_',
332 "Comparing a sparse matrix with a scalar "
333 "greater than zero using <= is inefficient, "
334 "try using > instead.")
336 def __ge__(self, other):
337 return self._inequality(other, operator.ge, '_ge_',
338 "Comparing a sparse matrix with a scalar "
339 "less than zero using >= is inefficient, "
340 "try using < instead.")
342 #################################
343 # Arithmetic operator overrides #
344 #################################
346 def _add_dense(self, other):
347 if other.shape != self.shape:
348 raise ValueError('Incompatible shapes.')
349 dtype = upcast_char(self.dtype.char, other.dtype.char)
350 order = self._swap('CF')[0]
351 result = np.array(other, dtype=dtype, order=order, copy=True)
352 M, N = self._swap(self.shape)
353 y = result if result.flags.c_contiguous else result.T
354 csr_todense(M, N, self.indptr, self.indices, self.data, y)
355 return matrix(result, copy=False)
357 def _add_sparse(self, other):
358 return self._binopt(other, '_plus_')
360 def _sub_sparse(self, other):
361 return self._binopt(other, '_minus_')
363 def multiply(self, other):
364 """Point-wise multiplication by another matrix, vector, or
365 scalar.
366 """
367 # Scalar multiplication.
368 if isscalarlike(other):
369 return self._mul_scalar(other)
370 # Sparse matrix or vector.
371 if isspmatrix(other):
372 if self.shape == other.shape:
373 other = self.__class__(other)
374 return self._binopt(other, '_elmul_')
375 # Single element.
376 elif other.shape == (1, 1):
377 return self._mul_scalar(other.toarray()[0, 0])
378 elif self.shape == (1, 1):
379 return other._mul_scalar(self.toarray()[0, 0])
380 # A row times a column.
381 elif self.shape[1] == 1 and other.shape[0] == 1:
382 return self._mul_sparse_matrix(other.tocsc())
383 elif self.shape[0] == 1 and other.shape[1] == 1:
384 return other._mul_sparse_matrix(self.tocsc())
385 # Row vector times matrix. other is a row.
386 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
387 other = dia_matrix((other.toarray().ravel(), [0]),
388 shape=(other.shape[1], other.shape[1]))
389 return self._mul_sparse_matrix(other)
390 # self is a row.
391 elif self.shape[0] == 1 and self.shape[1] == other.shape[1]:
392 copy = dia_matrix((self.toarray().ravel(), [0]),
393 shape=(self.shape[1], self.shape[1]))
394 return other._mul_sparse_matrix(copy)
395 # Column vector times matrix. other is a column.
396 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
397 other = dia_matrix((other.toarray().ravel(), [0]),
398 shape=(other.shape[0], other.shape[0]))
399 return other._mul_sparse_matrix(self)
400 # self is a column.
401 elif self.shape[1] == 1 and self.shape[0] == other.shape[0]:
402 copy = dia_matrix((self.toarray().ravel(), [0]),
403 shape=(self.shape[0], self.shape[0]))
404 return copy._mul_sparse_matrix(other)
405 else:
406 raise ValueError("inconsistent shapes")
408 # Assume other is a dense matrix/array, which produces a single-item
409 # object array if other isn't convertible to ndarray.
410 other = np.atleast_2d(other)
412 if other.ndim != 2:
413 return np.multiply(self.toarray(), other)
414 # Single element / wrapped object.
415 if other.size == 1:
416 return self._mul_scalar(other.flat[0])
417 # Fast case for trivial sparse matrix.
418 elif self.shape == (1, 1):
419 return np.multiply(self.toarray()[0, 0], other)
421 from .coo import coo_matrix
422 ret = self.tocoo()
423 # Matching shapes.
424 if self.shape == other.shape:
425 data = np.multiply(ret.data, other[ret.row, ret.col])
426 # Sparse row vector times...
427 elif self.shape[0] == 1:
428 if other.shape[1] == 1: # Dense column vector.
429 data = np.multiply(ret.data, other)
430 elif other.shape[1] == self.shape[1]: # Dense matrix.
431 data = np.multiply(ret.data, other[:, ret.col])
432 else:
433 raise ValueError("inconsistent shapes")
434 row = np.repeat(np.arange(other.shape[0]), len(ret.row))
435 col = np.tile(ret.col, other.shape[0])
436 return coo_matrix((data.view(np.ndarray).ravel(), (row, col)),
437 shape=(other.shape[0], self.shape[1]),
438 copy=False)
439 # Sparse column vector times...
440 elif self.shape[1] == 1:
441 if other.shape[0] == 1: # Dense row vector.
442 data = np.multiply(ret.data[:, None], other)
443 elif other.shape[0] == self.shape[0]: # Dense matrix.
444 data = np.multiply(ret.data[:, None], other[ret.row])
445 else:
446 raise ValueError("inconsistent shapes")
447 row = np.repeat(ret.row, other.shape[1])
448 col = np.tile(np.arange(other.shape[1]), len(ret.col))
449 return coo_matrix((data.view(np.ndarray).ravel(), (row, col)),
450 shape=(self.shape[0], other.shape[1]),
451 copy=False)
452 # Sparse matrix times dense row vector.
453 elif other.shape[0] == 1 and self.shape[1] == other.shape[1]:
454 data = np.multiply(ret.data, other[:, ret.col].ravel())
455 # Sparse matrix times dense column vector.
456 elif other.shape[1] == 1 and self.shape[0] == other.shape[0]:
457 data = np.multiply(ret.data, other[ret.row].ravel())
458 else:
459 raise ValueError("inconsistent shapes")
460 ret.data = data.view(np.ndarray).ravel()
461 return ret
463 ###########################
464 # Multiplication handlers #
465 ###########################
467 def _mul_vector(self, other):
468 M, N = self.shape
470 # output array
471 result = np.zeros(M, dtype=upcast_char(self.dtype.char,
472 other.dtype.char))
474 # csr_matvec or csc_matvec
475 fn = getattr(_sparsetools, self.format + '_matvec')
476 fn(M, N, self.indptr, self.indices, self.data, other, result)
478 return result
480 def _mul_multivector(self, other):
481 M, N = self.shape
482 n_vecs = other.shape[1] # number of column vectors
484 result = np.zeros((M, n_vecs),
485 dtype=upcast_char(self.dtype.char, other.dtype.char))
487 # csr_matvecs or csc_matvecs
488 fn = getattr(_sparsetools, self.format + '_matvecs')
489 fn(M, N, n_vecs, self.indptr, self.indices, self.data,
490 other.ravel(), result.ravel())
492 return result
494 def _mul_sparse_matrix(self, other):
495 M, K1 = self.shape
496 K2, N = other.shape
498 major_axis = self._swap((M, N))[0]
499 other = self.__class__(other) # convert to this format
501 idx_dtype = get_index_dtype((self.indptr, self.indices,
502 other.indptr, other.indices))
504 fn = getattr(_sparsetools, self.format + '_matmat_maxnnz')
505 nnz = fn(M, N,
506 np.asarray(self.indptr, dtype=idx_dtype),
507 np.asarray(self.indices, dtype=idx_dtype),
508 np.asarray(other.indptr, dtype=idx_dtype),
509 np.asarray(other.indices, dtype=idx_dtype))
511 idx_dtype = get_index_dtype((self.indptr, self.indices,
512 other.indptr, other.indices),
513 maxval=nnz)
515 indptr = np.empty(major_axis + 1, dtype=idx_dtype)
516 indices = np.empty(nnz, dtype=idx_dtype)
517 data = np.empty(nnz, dtype=upcast(self.dtype, other.dtype))
519 fn = getattr(_sparsetools, self.format + '_matmat')
520 fn(M, N, np.asarray(self.indptr, dtype=idx_dtype),
521 np.asarray(self.indices, dtype=idx_dtype),
522 self.data,
523 np.asarray(other.indptr, dtype=idx_dtype),
524 np.asarray(other.indices, dtype=idx_dtype),
525 other.data,
526 indptr, indices, data)
528 return self.__class__((data, indices, indptr), shape=(M, N))
530 def diagonal(self, k=0):
531 rows, cols = self.shape
532 if k <= -rows or k >= cols:
533 return np.empty(0, dtype=self.data.dtype)
534 fn = getattr(_sparsetools, self.format + "_diagonal")
535 y = np.empty(min(rows + min(k, 0), cols - max(k, 0)),
536 dtype=upcast(self.dtype))
537 fn(k, self.shape[0], self.shape[1], self.indptr, self.indices,
538 self.data, y)
539 return y
541 diagonal.__doc__ = spmatrix.diagonal.__doc__
543 #####################
544 # Other binary ops #
545 #####################
547 def _maximum_minimum(self, other, npop, op_name, dense_check):
548 if isscalarlike(other):
549 if dense_check(other):
550 warn("Taking maximum (minimum) with > 0 (< 0) number results"
551 " to a dense matrix.", SparseEfficiencyWarning,
552 stacklevel=3)
553 other_arr = np.empty(self.shape, dtype=np.asarray(other).dtype)
554 other_arr.fill(other)
555 other_arr = self.__class__(other_arr)
556 return self._binopt(other_arr, op_name)
557 else:
558 self.sum_duplicates()
559 new_data = npop(self.data, np.asarray(other))
560 mat = self.__class__((new_data, self.indices, self.indptr),
561 dtype=new_data.dtype, shape=self.shape)
562 return mat
563 elif isdense(other):
564 return npop(self.todense(), other)
565 elif isspmatrix(other):
566 return self._binopt(other, op_name)
567 else:
568 raise ValueError("Operands not compatible.")
570 def maximum(self, other):
571 return self._maximum_minimum(other, np.maximum,
572 '_maximum_', lambda x: np.asarray(x) > 0)
574 maximum.__doc__ = spmatrix.maximum.__doc__
576 def minimum(self, other):
577 return self._maximum_minimum(other, np.minimum,
578 '_minimum_', lambda x: np.asarray(x) < 0)
580 minimum.__doc__ = spmatrix.minimum.__doc__
582 #####################
583 # Reduce operations #
584 #####################
586 def sum(self, axis=None, dtype=None, out=None):
587 """Sum the matrix over the given axis. If the axis is None, sum
588 over both rows and columns, returning a scalar.
589 """
590 # The spmatrix base class already does axis=0 and axis=1 efficiently
591 # so we only do the case axis=None here
592 if (not hasattr(self, 'blocksize') and
593 axis in self._swap(((1, -1), (0, 2)))[0]):
594 # faster than multiplication for large minor axis in CSC/CSR
595 res_dtype = get_sum_dtype(self.dtype)
596 ret = np.zeros(len(self.indptr) - 1, dtype=res_dtype)
598 major_index, value = self._minor_reduce(np.add)
599 ret[major_index] = value
600 ret = asmatrix(ret)
601 if axis % 2 == 1:
602 ret = ret.T
604 if out is not None and out.shape != ret.shape:
605 raise ValueError('dimensions do not match')
607 return ret.sum(axis=(), dtype=dtype, out=out)
608 # spmatrix will handle the remaining situations when axis
609 # is in {None, -1, 0, 1}
610 else:
611 return spmatrix.sum(self, axis=axis, dtype=dtype, out=out)
613 sum.__doc__ = spmatrix.sum.__doc__
615 def _minor_reduce(self, ufunc, data=None):
616 """Reduce nonzeros with a ufunc over the minor axis when non-empty
618 Can be applied to a function of self.data by supplying data parameter.
620 Warning: this does not call sum_duplicates()
622 Returns
623 -------
624 major_index : array of ints
625 Major indices where nonzero
627 value : array of self.dtype
628 Reduce result for nonzeros in each major_index
629 """
630 if data is None:
631 data = self.data
632 major_index = np.flatnonzero(np.diff(self.indptr))
633 value = ufunc.reduceat(data,
634 downcast_intp_index(self.indptr[major_index]))
635 return major_index, value
637 #######################
638 # Getting and Setting #
639 #######################
641 def _get_intXint(self, row, col):
642 M, N = self._swap(self.shape)
643 major, minor = self._swap((row, col))
644 indptr, indices, data = get_csr_submatrix(
645 M, N, self.indptr, self.indices, self.data,
646 major, major + 1, minor, minor + 1)
647 return data.sum(dtype=self.dtype)
649 def _get_sliceXslice(self, row, col):
650 major, minor = self._swap((row, col))
651 if major.step in (1, None) and minor.step in (1, None):
652 return self._get_submatrix(major, minor, copy=True)
653 return self._major_slice(major)._minor_slice(minor)
655 def _get_arrayXarray(self, row, col):
656 # inner indexing
657 idx_dtype = self.indices.dtype
658 M, N = self._swap(self.shape)
659 major, minor = self._swap((row, col))
660 major = np.asarray(major, dtype=idx_dtype)
661 minor = np.asarray(minor, dtype=idx_dtype)
663 val = np.empty(major.size, dtype=self.dtype)
664 csr_sample_values(M, N, self.indptr, self.indices, self.data,
665 major.size, major.ravel(), minor.ravel(), val)
666 if major.ndim == 1:
667 return asmatrix(val)
668 return self.__class__(val.reshape(major.shape))
670 def _get_columnXarray(self, row, col):
671 # outer indexing
672 major, minor = self._swap((row, col))
673 return self._major_index_fancy(major)._minor_index_fancy(minor)
675 def _major_index_fancy(self, idx):
676 """Index along the major axis where idx is an array of ints.
677 """
678 idx_dtype = self.indices.dtype
679 indices = np.asarray(idx, dtype=idx_dtype).ravel()
681 _, N = self._swap(self.shape)
682 M = len(indices)
683 new_shape = self._swap((M, N))
684 if M == 0:
685 return self.__class__(new_shape)
687 row_nnz = np.diff(self.indptr)
688 idx_dtype = self.indices.dtype
689 res_indptr = np.zeros(M+1, dtype=idx_dtype)
690 np.cumsum(row_nnz[idx], out=res_indptr[1:])
692 nnz = res_indptr[-1]
693 res_indices = np.empty(nnz, dtype=idx_dtype)
694 res_data = np.empty(nnz, dtype=self.dtype)
695 csr_row_index(M, indices, self.indptr, self.indices, self.data,
696 res_indices, res_data)
698 return self.__class__((res_data, res_indices, res_indptr),
699 shape=new_shape, copy=False)
701 def _major_slice(self, idx, copy=False):
702 """Index along the major axis where idx is a slice object.
703 """
704 if idx == slice(None):
705 return self.copy() if copy else self
707 M, N = self._swap(self.shape)
708 start, stop, step = idx.indices(M)
709 M = len(range(start, stop, step))
710 new_shape = self._swap((M, N))
711 if M == 0:
712 return self.__class__(new_shape)
714 row_nnz = np.diff(self.indptr)
715 idx_dtype = self.indices.dtype
716 res_indptr = np.zeros(M+1, dtype=idx_dtype)
717 np.cumsum(row_nnz[idx], out=res_indptr[1:])
719 if step == 1:
720 all_idx = slice(self.indptr[start], self.indptr[stop])
721 res_indices = np.array(self.indices[all_idx], copy=copy)
722 res_data = np.array(self.data[all_idx], copy=copy)
723 else:
724 nnz = res_indptr[-1]
725 res_indices = np.empty(nnz, dtype=idx_dtype)
726 res_data = np.empty(nnz, dtype=self.dtype)
727 csr_row_slice(start, stop, step, self.indptr, self.indices,
728 self.data, res_indices, res_data)
730 return self.__class__((res_data, res_indices, res_indptr),
731 shape=new_shape, copy=False)
733 def _minor_index_fancy(self, idx):
734 """Index along the minor axis where idx is an array of ints.
735 """
736 idx_dtype = self.indices.dtype
737 idx = np.asarray(idx, dtype=idx_dtype).ravel()
739 M, N = self._swap(self.shape)
740 k = len(idx)
741 new_shape = self._swap((M, k))
742 if k == 0:
743 return self.__class__(new_shape)
745 # pass 1: count idx entries and compute new indptr
746 col_offsets = np.zeros(N, dtype=idx_dtype)
747 res_indptr = np.empty_like(self.indptr)
748 csr_column_index1(k, idx, M, N, self.indptr, self.indices,
749 col_offsets, res_indptr)
751 # pass 2: copy indices/data for selected idxs
752 col_order = np.argsort(idx).astype(idx_dtype, copy=False)
753 nnz = res_indptr[-1]
754 res_indices = np.empty(nnz, dtype=idx_dtype)
755 res_data = np.empty(nnz, dtype=self.dtype)
756 csr_column_index2(col_order, col_offsets, len(self.indices),
757 self.indices, self.data, res_indices, res_data)
758 return self.__class__((res_data, res_indices, res_indptr),
759 shape=new_shape, copy=False)
761 def _minor_slice(self, idx, copy=False):
762 """Index along the minor axis where idx is a slice object.
763 """
764 if idx == slice(None):
765 return self.copy() if copy else self
767 M, N = self._swap(self.shape)
768 start, stop, step = idx.indices(N)
769 N = len(range(start, stop, step))
770 if N == 0:
771 return self.__class__(self._swap((M, N)))
772 if step == 1:
773 return self._get_submatrix(minor=idx, copy=copy)
774 # TODO: don't fall back to fancy indexing here
775 return self._minor_index_fancy(np.arange(start, stop, step))
777 def _get_submatrix(self, major=None, minor=None, copy=False):
778 """Return a submatrix of this matrix.
780 major, minor: None, int, or slice with step 1
781 """
782 M, N = self._swap(self.shape)
783 i0, i1 = _process_slice(major, M)
784 j0, j1 = _process_slice(minor, N)
786 if i0 == 0 and j0 == 0 and i1 == M and j1 == N:
787 return self.copy() if copy else self
789 indptr, indices, data = get_csr_submatrix(
790 M, N, self.indptr, self.indices, self.data, i0, i1, j0, j1)
792 shape = self._swap((i1 - i0, j1 - j0))
793 return self.__class__((data, indices, indptr), shape=shape,
794 dtype=self.dtype, copy=False)
796 def _set_intXint(self, row, col, x):
797 i, j = self._swap((row, col))
798 self._set_many(i, j, x)
800 def _set_arrayXarray(self, row, col, x):
801 i, j = self._swap((row, col))
802 self._set_many(i, j, x)
804 def _set_arrayXarray_sparse(self, row, col, x):
805 # clear entries that will be overwritten
806 self._zero_many(*self._swap((row, col)))
808 M, N = row.shape # matches col.shape
809 broadcast_row = M != 1 and x.shape[0] == 1
810 broadcast_col = N != 1 and x.shape[1] == 1
811 r, c = x.row, x.col
812 x = np.asarray(x.data, dtype=self.dtype)
813 if broadcast_row:
814 r = np.repeat(np.arange(M), len(r))
815 c = np.tile(c, M)
816 x = np.tile(x, M)
817 if broadcast_col:
818 r = np.repeat(r, N)
819 c = np.tile(np.arange(N), len(c))
820 x = np.repeat(x, N)
821 # only assign entries in the new sparsity structure
822 i, j = self._swap((row[r, c], col[r, c]))
823 self._set_many(i, j, x)
825 def _setdiag(self, values, k):
826 if 0 in self.shape:
827 return
829 M, N = self.shape
830 broadcast = (values.ndim == 0)
832 if k < 0:
833 if broadcast:
834 max_index = min(M + k, N)
835 else:
836 max_index = min(M + k, N, len(values))
837 i = np.arange(max_index, dtype=self.indices.dtype)
838 j = np.arange(max_index, dtype=self.indices.dtype)
839 i -= k
841 else:
842 if broadcast:
843 max_index = min(M, N - k)
844 else:
845 max_index = min(M, N - k, len(values))
846 i = np.arange(max_index, dtype=self.indices.dtype)
847 j = np.arange(max_index, dtype=self.indices.dtype)
848 j += k
850 if not broadcast:
851 values = values[:len(i)]
853 self[i, j] = values
855 def _prepare_indices(self, i, j):
856 M, N = self._swap(self.shape)
858 def check_bounds(indices, bound):
859 idx = indices.max()
860 if idx >= bound:
861 raise IndexError('index (%d) out of range (>= %d)' %
862 (idx, bound))
863 idx = indices.min()
864 if idx < -bound:
865 raise IndexError('index (%d) out of range (< -%d)' %
866 (idx, bound))
868 i = np.array(i, dtype=self.indices.dtype, copy=False, ndmin=1).ravel()
869 j = np.array(j, dtype=self.indices.dtype, copy=False, ndmin=1).ravel()
870 check_bounds(i, M)
871 check_bounds(j, N)
872 return i, j, M, N
874 def _set_many(self, i, j, x):
875 """Sets value at each (i, j) to x
877 Here (i,j) index major and minor respectively, and must not contain
878 duplicate entries.
879 """
880 i, j, M, N = self._prepare_indices(i, j)
881 x = np.array(x, dtype=self.dtype, copy=False, ndmin=1).ravel()
883 n_samples = x.size
884 offsets = np.empty(n_samples, dtype=self.indices.dtype)
885 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
886 i, j, offsets)
887 if ret == 1:
888 # rinse and repeat
889 self.sum_duplicates()
890 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
891 i, j, offsets)
893 if -1 not in offsets:
894 # only affects existing non-zero cells
895 self.data[offsets] = x
896 return
898 else:
899 warn("Changing the sparsity structure of a {}_matrix is expensive."
900 " lil_matrix is more efficient.".format(self.format),
901 SparseEfficiencyWarning, stacklevel=3)
902 # replace where possible
903 mask = offsets > -1
904 self.data[offsets[mask]] = x[mask]
905 # only insertions remain
906 mask = ~mask
907 i = i[mask]
908 i[i < 0] += M
909 j = j[mask]
910 j[j < 0] += N
911 self._insert_many(i, j, x[mask])
913 def _zero_many(self, i, j):
914 """Sets value at each (i, j) to zero, preserving sparsity structure.
916 Here (i,j) index major and minor respectively.
917 """
918 i, j, M, N = self._prepare_indices(i, j)
920 n_samples = len(i)
921 offsets = np.empty(n_samples, dtype=self.indices.dtype)
922 ret = csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
923 i, j, offsets)
924 if ret == 1:
925 # rinse and repeat
926 self.sum_duplicates()
927 csr_sample_offsets(M, N, self.indptr, self.indices, n_samples,
928 i, j, offsets)
930 # only assign zeros to the existing sparsity structure
931 self.data[offsets[offsets > -1]] = 0
933 def _insert_many(self, i, j, x):
934 """Inserts new nonzero at each (i, j) with value x
936 Here (i,j) index major and minor respectively.
937 i, j and x must be non-empty, 1d arrays.
938 Inserts each major group (e.g. all entries per row) at a time.
939 Maintains has_sorted_indices property.
940 Modifies i, j, x in place.
941 """
942 order = np.argsort(i, kind='mergesort') # stable for duplicates
943 i = i.take(order, mode='clip')
944 j = j.take(order, mode='clip')
945 x = x.take(order, mode='clip')
947 do_sort = self.has_sorted_indices
949 # Update index data type
950 idx_dtype = get_index_dtype((self.indices, self.indptr),
951 maxval=(self.indptr[-1] + x.size))
952 self.indptr = np.asarray(self.indptr, dtype=idx_dtype)
953 self.indices = np.asarray(self.indices, dtype=idx_dtype)
954 i = np.asarray(i, dtype=idx_dtype)
955 j = np.asarray(j, dtype=idx_dtype)
957 # Collate old and new in chunks by major index
958 indices_parts = []
959 data_parts = []
960 ui, ui_indptr = np.unique(i, return_index=True)
961 ui_indptr = np.append(ui_indptr, len(j))
962 new_nnzs = np.diff(ui_indptr)
963 prev = 0
964 for c, (ii, js, je) in enumerate(zip(ui, ui_indptr, ui_indptr[1:])):
965 # old entries
966 start = self.indptr[prev]
967 stop = self.indptr[ii]
968 indices_parts.append(self.indices[start:stop])
969 data_parts.append(self.data[start:stop])
971 # handle duplicate j: keep last setting
972 uj, uj_indptr = np.unique(j[js:je][::-1], return_index=True)
973 if len(uj) == je - js:
974 indices_parts.append(j[js:je])
975 data_parts.append(x[js:je])
976 else:
977 indices_parts.append(j[js:je][::-1][uj_indptr])
978 data_parts.append(x[js:je][::-1][uj_indptr])
979 new_nnzs[c] = len(uj)
981 prev = ii
983 # remaining old entries
984 start = self.indptr[ii]
985 indices_parts.append(self.indices[start:])
986 data_parts.append(self.data[start:])
988 # update attributes
989 self.indices = np.concatenate(indices_parts)
990 self.data = np.concatenate(data_parts)
991 nnzs = np.empty(self.indptr.shape, dtype=idx_dtype)
992 nnzs[0] = idx_dtype(0)
993 indptr_diff = np.diff(self.indptr)
994 indptr_diff[ui] += new_nnzs
995 nnzs[1:] = indptr_diff
996 self.indptr = np.cumsum(nnzs, out=nnzs)
998 if do_sort:
999 # TODO: only sort where necessary
1000 self.has_sorted_indices = False
1001 self.sort_indices()
1003 self.check_format(full_check=False)
1005 ######################
1006 # Conversion methods #
1007 ######################
1009 def tocoo(self, copy=True):
1010 major_dim, minor_dim = self._swap(self.shape)
1011 minor_indices = self.indices
1012 major_indices = np.empty(len(minor_indices), dtype=self.indices.dtype)
1013 _sparsetools.expandptr(major_dim, self.indptr, major_indices)
1014 row, col = self._swap((major_indices, minor_indices))
1016 from .coo import coo_matrix
1017 return coo_matrix((self.data, (row, col)), self.shape, copy=copy,
1018 dtype=self.dtype)
1020 tocoo.__doc__ = spmatrix.tocoo.__doc__
1022 def toarray(self, order=None, out=None):
1023 if out is None and order is None:
1024 order = self._swap('cf')[0]
1025 out = self._process_toarray_args(order, out)
1026 if not (out.flags.c_contiguous or out.flags.f_contiguous):
1027 raise ValueError('Output array must be C or F contiguous')
1028 # align ideal order with output array order
1029 if out.flags.c_contiguous:
1030 x = self.tocsr()
1031 y = out
1032 else:
1033 x = self.tocsc()
1034 y = out.T
1035 M, N = x._swap(x.shape)
1036 csr_todense(M, N, x.indptr, x.indices, x.data, y)
1037 return out
1039 toarray.__doc__ = spmatrix.toarray.__doc__
1041 ##############################################################
1042 # methods that examine or modify the internal data structure #
1043 ##############################################################
1045 def eliminate_zeros(self):
1046 """Remove zero entries from the matrix
1048 This is an *in place* operation
1049 """
1050 M, N = self._swap(self.shape)
1051 _sparsetools.csr_eliminate_zeros(M, N, self.indptr, self.indices,
1052 self.data)
1053 self.prune() # nnz may have changed
1055 def __get_has_canonical_format(self):
1056 """Determine whether the matrix has sorted indices and no duplicates
1058 Returns
1059 - True: if the above applies
1060 - False: otherwise
1062 has_canonical_format implies has_sorted_indices, so if the latter flag
1063 is False, so will the former be; if the former is found True, the
1064 latter flag is also set.
1065 """
1067 # first check to see if result was cached
1068 if not getattr(self, '_has_sorted_indices', True):
1069 # not sorted => not canonical
1070 self._has_canonical_format = False
1071 elif not hasattr(self, '_has_canonical_format'):
1072 self.has_canonical_format = _sparsetools.csr_has_canonical_format(
1073 len(self.indptr) - 1, self.indptr, self.indices)
1074 return self._has_canonical_format
1076 def __set_has_canonical_format(self, val):
1077 self._has_canonical_format = bool(val)
1078 if val:
1079 self.has_sorted_indices = True
1081 has_canonical_format = property(fget=__get_has_canonical_format,
1082 fset=__set_has_canonical_format)
1084 def sum_duplicates(self):
1085 """Eliminate duplicate matrix entries by adding them together
1087 The is an *in place* operation
1088 """
1089 if self.has_canonical_format:
1090 return
1091 self.sort_indices()
1093 M, N = self._swap(self.shape)
1094 _sparsetools.csr_sum_duplicates(M, N, self.indptr, self.indices,
1095 self.data)
1097 self.prune() # nnz may have changed
1098 self.has_canonical_format = True
1100 def __get_sorted(self):
1101 """Determine whether the matrix has sorted indices
1103 Returns
1104 - True: if the indices of the matrix are in sorted order
1105 - False: otherwise
1107 """
1109 # first check to see if result was cached
1110 if not hasattr(self, '_has_sorted_indices'):
1111 self._has_sorted_indices = _sparsetools.csr_has_sorted_indices(
1112 len(self.indptr) - 1, self.indptr, self.indices)
1113 return self._has_sorted_indices
1115 def __set_sorted(self, val):
1116 self._has_sorted_indices = bool(val)
1118 has_sorted_indices = property(fget=__get_sorted, fset=__set_sorted)
1120 def sorted_indices(self):
1121 """Return a copy of this matrix with sorted indices
1122 """
1123 A = self.copy()
1124 A.sort_indices()
1125 return A
1127 # an alternative that has linear complexity is the following
1128 # although the previous option is typically faster
1129 # return self.toother().toother()
1131 def sort_indices(self):
1132 """Sort the indices of this matrix *in place*
1133 """
1135 if not self.has_sorted_indices:
1136 _sparsetools.csr_sort_indices(len(self.indptr) - 1, self.indptr,
1137 self.indices, self.data)
1138 self.has_sorted_indices = True
1140 def prune(self):
1141 """Remove empty space after all non-zero elements.
1142 """
1143 major_dim = self._swap(self.shape)[0]
1145 if len(self.indptr) != major_dim + 1:
1146 raise ValueError('index pointer has invalid length')
1147 if len(self.indices) < self.nnz:
1148 raise ValueError('indices array has fewer than nnz elements')
1149 if len(self.data) < self.nnz:
1150 raise ValueError('data array has fewer than nnz elements')
1152 self.indices = _prune_array(self.indices[:self.nnz])
1153 self.data = _prune_array(self.data[:self.nnz])
1155 def resize(self, *shape):
1156 shape = check_shape(shape)
1157 if hasattr(self, 'blocksize'):
1158 bm, bn = self.blocksize
1159 new_M, rm = divmod(shape[0], bm)
1160 new_N, rn = divmod(shape[1], bn)
1161 if rm or rn:
1162 raise ValueError("shape must be divisible into %s blocks. "
1163 "Got %s" % (self.blocksize, shape))
1164 M, N = self.shape[0] // bm, self.shape[1] // bn
1165 else:
1166 new_M, new_N = self._swap(shape)
1167 M, N = self._swap(self.shape)
1169 if new_M < M:
1170 self.indices = self.indices[:self.indptr[new_M]]
1171 self.data = self.data[:self.indptr[new_M]]
1172 self.indptr = self.indptr[:new_M + 1]
1173 elif new_M > M:
1174 self.indptr = np.resize(self.indptr, new_M + 1)
1175 self.indptr[M + 1:].fill(self.indptr[M])
1177 if new_N < N:
1178 mask = self.indices < new_N
1179 if not np.all(mask):
1180 self.indices = self.indices[mask]
1181 self.data = self.data[mask]
1182 major_index, val = self._minor_reduce(np.add, mask)
1183 self.indptr.fill(0)
1184 self.indptr[1:][major_index] = val
1185 np.cumsum(self.indptr, out=self.indptr)
1187 self._shape = shape
1189 resize.__doc__ = spmatrix.resize.__doc__
1191 ###################
1192 # utility methods #
1193 ###################
1195 # needed by _data_matrix
1196 def _with_data(self, data, copy=True):
1197 """Returns a matrix with the same sparsity structure as self,
1198 but with different data. By default the structure arrays
1199 (i.e. .indptr and .indices) are copied.
1200 """
1201 if copy:
1202 return self.__class__((data, self.indices.copy(),
1203 self.indptr.copy()),
1204 shape=self.shape,
1205 dtype=data.dtype)
1206 else:
1207 return self.__class__((data, self.indices, self.indptr),
1208 shape=self.shape, dtype=data.dtype)
1210 def _binopt(self, other, op):
1211 """apply the binary operation fn to two sparse matrices."""
1212 other = self.__class__(other)
1214 # e.g. csr_plus_csr, csr_minus_csr, etc.
1215 fn = getattr(_sparsetools, self.format + op + self.format)
1217 maxnnz = self.nnz + other.nnz
1218 idx_dtype = get_index_dtype((self.indptr, self.indices,
1219 other.indptr, other.indices),
1220 maxval=maxnnz)
1221 indptr = np.empty(self.indptr.shape, dtype=idx_dtype)
1222 indices = np.empty(maxnnz, dtype=idx_dtype)
1224 bool_ops = ['_ne_', '_lt_', '_gt_', '_le_', '_ge_']
1225 if op in bool_ops:
1226 data = np.empty(maxnnz, dtype=np.bool_)
1227 else:
1228 data = np.empty(maxnnz, dtype=upcast(self.dtype, other.dtype))
1230 fn(self.shape[0], self.shape[1],
1231 np.asarray(self.indptr, dtype=idx_dtype),
1232 np.asarray(self.indices, dtype=idx_dtype),
1233 self.data,
1234 np.asarray(other.indptr, dtype=idx_dtype),
1235 np.asarray(other.indices, dtype=idx_dtype),
1236 other.data,
1237 indptr, indices, data)
1239 A = self.__class__((data, indices, indptr), shape=self.shape)
1240 A.prune()
1242 return A
1244 def _divide_sparse(self, other):
1245 """
1246 Divide this matrix by a second sparse matrix.
1247 """
1248 if other.shape != self.shape:
1249 raise ValueError('inconsistent shapes')
1251 r = self._binopt(other, '_eldiv_')
1253 if np.issubdtype(r.dtype, np.inexact):
1254 # Eldiv leaves entries outside the combined sparsity
1255 # pattern empty, so they must be filled manually.
1256 # Everything outside of other's sparsity is NaN, and everything
1257 # inside it is either zero or defined by eldiv.
1258 out = np.empty(self.shape, dtype=self.dtype)
1259 out.fill(np.nan)
1260 row, col = other.nonzero()
1261 out[row, col] = 0
1262 r = r.tocoo()
1263 out[r.row, r.col] = r.data
1264 out = matrix(out)
1265 else:
1266 # integers types go with nan <-> 0
1267 out = r
1269 return out
1272def _process_slice(sl, num):
1273 if sl is None:
1274 i0, i1 = 0, num
1275 elif isinstance(sl, slice):
1276 i0, i1, stride = sl.indices(num)
1277 if stride != 1:
1278 raise ValueError('slicing with step != 1 not supported')
1279 i0 = min(i0, i1) # give an empty slice when i0 > i1
1280 elif isintlike(sl):
1281 if sl < 0:
1282 sl += num
1283 i0, i1 = sl, sl + 1
1284 if i0 < 0 or i1 > num:
1285 raise IndexError('index out of bounds: 0 <= %d < %d <= %d' %
1286 (i0, i1, num))
1287 else:
1288 raise TypeError('expected slice or scalar')
1290 return i0, i1