Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/sparse/coo.py : 14%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1""" A sparse matrix in COOrdinate or 'triplet' format"""
3__docformat__ = "restructuredtext en"
5__all__ = ['coo_matrix', 'isspmatrix_coo']
7from warnings import warn
9import numpy as np
12from ._sparsetools import coo_tocsr, coo_todense, coo_matvec
13from .base import isspmatrix, SparseEfficiencyWarning, spmatrix
14from .data import _data_matrix, _minmax_mixin
15from .sputils import (upcast, upcast_char, to_native, isshape, getdtype,
16 get_index_dtype, downcast_intp_index, check_shape,
17 check_reshape_kwargs, matrix)
19import operator
22class coo_matrix(_data_matrix, _minmax_mixin):
23 """
24 A sparse matrix in COOrdinate format.
26 Also known as the 'ijv' or 'triplet' format.
28 This can be instantiated in several ways:
29 coo_matrix(D)
30 with a dense matrix D
32 coo_matrix(S)
33 with another sparse matrix S (equivalent to S.tocoo())
35 coo_matrix((M, N), [dtype])
36 to construct an empty matrix with shape (M, N)
37 dtype is optional, defaulting to dtype='d'.
39 coo_matrix((data, (i, j)), [shape=(M, N)])
40 to construct from three arrays:
41 1. data[:] the entries of the matrix, in any order
42 2. i[:] the row indices of the matrix entries
43 3. j[:] the column indices of the matrix entries
45 Where ``A[i[k], j[k]] = data[k]``. When shape is not
46 specified, it is inferred from the index arrays
48 Attributes
49 ----------
50 dtype : dtype
51 Data type of the matrix
52 shape : 2-tuple
53 Shape of the matrix
54 ndim : int
55 Number of dimensions (this is always 2)
56 nnz
57 Number of stored values, including explicit zeros
58 data
59 COO format data array of the matrix
60 row
61 COO format row index array of the matrix
62 col
63 COO format column index array of the matrix
65 Notes
66 -----
68 Sparse matrices can be used in arithmetic operations: they support
69 addition, subtraction, multiplication, division, and matrix power.
71 Advantages of the COO format
72 - facilitates fast conversion among sparse formats
73 - permits duplicate entries (see example)
74 - very fast conversion to and from CSR/CSC formats
76 Disadvantages of the COO format
77 - does not directly support:
78 + arithmetic operations
79 + slicing
81 Intended Usage
82 - COO is a fast format for constructing sparse matrices
83 - Once a matrix has been constructed, convert to CSR or
84 CSC format for fast arithmetic and matrix vector operations
85 - By default when converting to CSR or CSC format, duplicate (i,j)
86 entries will be summed together. This facilitates efficient
87 construction of finite element matrices and the like. (see example)
89 Examples
90 --------
92 >>> # Constructing an empty matrix
93 >>> from scipy.sparse import coo_matrix
94 >>> coo_matrix((3, 4), dtype=np.int8).toarray()
95 array([[0, 0, 0, 0],
96 [0, 0, 0, 0],
97 [0, 0, 0, 0]], dtype=int8)
99 >>> # Constructing a matrix using ijv format
100 >>> row = np.array([0, 3, 1, 0])
101 >>> col = np.array([0, 3, 1, 2])
102 >>> data = np.array([4, 5, 7, 9])
103 >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray()
104 array([[4, 0, 9, 0],
105 [0, 7, 0, 0],
106 [0, 0, 0, 0],
107 [0, 0, 0, 5]])
109 >>> # Constructing a matrix with duplicate indices
110 >>> row = np.array([0, 0, 1, 3, 1, 0, 0])
111 >>> col = np.array([0, 2, 1, 3, 1, 0, 0])
112 >>> data = np.array([1, 1, 1, 1, 1, 1, 1])
113 >>> coo = coo_matrix((data, (row, col)), shape=(4, 4))
114 >>> # Duplicate indices are maintained until implicitly or explicitly summed
115 >>> np.max(coo.data)
116 1
117 >>> coo.toarray()
118 array([[3, 0, 1, 0],
119 [0, 2, 0, 0],
120 [0, 0, 0, 0],
121 [0, 0, 0, 1]])
123 """
124 format = 'coo'
126 def __init__(self, arg1, shape=None, dtype=None, copy=False):
127 _data_matrix.__init__(self)
129 if isinstance(arg1, tuple):
130 if isshape(arg1):
131 M, N = arg1
132 self._shape = check_shape((M, N))
133 idx_dtype = get_index_dtype(maxval=max(M, N))
134 self.row = np.array([], dtype=idx_dtype)
135 self.col = np.array([], dtype=idx_dtype)
136 self.data = np.array([], getdtype(dtype, default=float))
137 self.has_canonical_format = True
138 else:
139 try:
140 obj, (row, col) = arg1
141 except (TypeError, ValueError):
142 raise TypeError('invalid input format')
144 if shape is None:
145 if len(row) == 0 or len(col) == 0:
146 raise ValueError('cannot infer dimensions from zero '
147 'sized index arrays')
148 M = operator.index(np.max(row)) + 1
149 N = operator.index(np.max(col)) + 1
150 self._shape = check_shape((M, N))
151 else:
152 # Use 2 steps to ensure shape has length 2.
153 M, N = shape
154 self._shape = check_shape((M, N))
156 idx_dtype = get_index_dtype(maxval=max(self.shape))
157 self.row = np.array(row, copy=copy, dtype=idx_dtype)
158 self.col = np.array(col, copy=copy, dtype=idx_dtype)
159 self.data = np.array(obj, copy=copy)
160 self.has_canonical_format = False
162 else:
163 if isspmatrix(arg1):
164 if isspmatrix_coo(arg1) and copy:
165 self.row = arg1.row.copy()
166 self.col = arg1.col.copy()
167 self.data = arg1.data.copy()
168 self._shape = check_shape(arg1.shape)
169 else:
170 coo = arg1.tocoo()
171 self.row = coo.row
172 self.col = coo.col
173 self.data = coo.data
174 self._shape = check_shape(coo.shape)
175 self.has_canonical_format = False
176 else:
177 #dense argument
178 M = np.atleast_2d(np.asarray(arg1))
180 if M.ndim != 2:
181 raise TypeError('expected dimension <= 2 array or matrix')
183 self._shape = check_shape(M.shape)
184 if shape is not None:
185 if check_shape(shape) != self._shape:
186 raise ValueError('inconsistent shapes: %s != %s' %
187 (shape, self._shape))
189 self.row, self.col = M.nonzero()
190 self.data = M[self.row, self.col]
191 self.has_canonical_format = True
193 if dtype is not None:
194 self.data = self.data.astype(dtype, copy=False)
196 self._check()
198 def reshape(self, *args, **kwargs):
199 shape = check_shape(args, self.shape)
200 order, copy = check_reshape_kwargs(kwargs)
202 # Return early if reshape is not required
203 if shape == self.shape:
204 if copy:
205 return self.copy()
206 else:
207 return self
209 nrows, ncols = self.shape
211 if order == 'C':
212 # Upcast to avoid overflows: the coo_matrix constructor
213 # below will downcast the results to a smaller dtype, if
214 # possible.
215 dtype = get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1)))
217 flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col
218 new_row, new_col = divmod(flat_indices, shape[1])
219 elif order == 'F':
220 dtype = get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1)))
222 flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row
223 new_col, new_row = divmod(flat_indices, shape[0])
224 else:
225 raise ValueError("'order' must be 'C' or 'F'")
227 # Handle copy here rather than passing on to the constructor so that no
228 # copy will be made of new_row and new_col regardless
229 if copy:
230 new_data = self.data.copy()
231 else:
232 new_data = self.data
234 return coo_matrix((new_data, (new_row, new_col)),
235 shape=shape, copy=False)
237 reshape.__doc__ = spmatrix.reshape.__doc__
239 def getnnz(self, axis=None):
240 if axis is None:
241 nnz = len(self.data)
242 if nnz != len(self.row) or nnz != len(self.col):
243 raise ValueError('row, column, and data array must all be the '
244 'same length')
246 if self.data.ndim != 1 or self.row.ndim != 1 or \
247 self.col.ndim != 1:
248 raise ValueError('row, column, and data arrays must be 1-D')
250 return int(nnz)
252 if axis < 0:
253 axis += 2
254 if axis == 0:
255 return np.bincount(downcast_intp_index(self.col),
256 minlength=self.shape[1])
257 elif axis == 1:
258 return np.bincount(downcast_intp_index(self.row),
259 minlength=self.shape[0])
260 else:
261 raise ValueError('axis out of bounds')
263 getnnz.__doc__ = spmatrix.getnnz.__doc__
265 def _check(self):
266 """ Checks data structure for consistency """
268 # index arrays should have integer data types
269 if self.row.dtype.kind != 'i':
270 warn("row index array has non-integer dtype (%s) "
271 % self.row.dtype.name)
272 if self.col.dtype.kind != 'i':
273 warn("col index array has non-integer dtype (%s) "
274 % self.col.dtype.name)
276 idx_dtype = get_index_dtype(maxval=max(self.shape))
277 self.row = np.asarray(self.row, dtype=idx_dtype)
278 self.col = np.asarray(self.col, dtype=idx_dtype)
279 self.data = to_native(self.data)
281 if self.nnz > 0:
282 if self.row.max() >= self.shape[0]:
283 raise ValueError('row index exceeds matrix dimensions')
284 if self.col.max() >= self.shape[1]:
285 raise ValueError('column index exceeds matrix dimensions')
286 if self.row.min() < 0:
287 raise ValueError('negative row index found')
288 if self.col.min() < 0:
289 raise ValueError('negative column index found')
291 def transpose(self, axes=None, copy=False):
292 if axes is not None:
293 raise ValueError(("Sparse matrices do not support "
294 "an 'axes' parameter because swapping "
295 "dimensions is the only logical permutation."))
297 M, N = self.shape
298 return coo_matrix((self.data, (self.col, self.row)),
299 shape=(N, M), copy=copy)
301 transpose.__doc__ = spmatrix.transpose.__doc__
303 def resize(self, *shape):
304 shape = check_shape(shape)
305 new_M, new_N = shape
306 M, N = self.shape
308 if new_M < M or new_N < N:
309 mask = np.logical_and(self.row < new_M, self.col < new_N)
310 if not mask.all():
311 self.row = self.row[mask]
312 self.col = self.col[mask]
313 self.data = self.data[mask]
315 self._shape = shape
317 resize.__doc__ = spmatrix.resize.__doc__
319 def toarray(self, order=None, out=None):
320 """See the docstring for `spmatrix.toarray`."""
321 B = self._process_toarray_args(order, out)
322 fortran = int(B.flags.f_contiguous)
323 if not fortran and not B.flags.c_contiguous:
324 raise ValueError("Output array must be C or F contiguous")
325 M,N = self.shape
326 coo_todense(M, N, self.nnz, self.row, self.col, self.data,
327 B.ravel('A'), fortran)
328 return B
330 def tocsc(self, copy=False):
331 """Convert this matrix to Compressed Sparse Column format
333 Duplicate entries will be summed together.
335 Examples
336 --------
337 >>> from numpy import array
338 >>> from scipy.sparse import coo_matrix
339 >>> row = array([0, 0, 1, 3, 1, 0, 0])
340 >>> col = array([0, 2, 1, 3, 1, 0, 0])
341 >>> data = array([1, 1, 1, 1, 1, 1, 1])
342 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsc()
343 >>> A.toarray()
344 array([[3, 0, 1, 0],
345 [0, 2, 0, 0],
346 [0, 0, 0, 0],
347 [0, 0, 0, 1]])
349 """
350 from .csc import csc_matrix
351 if self.nnz == 0:
352 return csc_matrix(self.shape, dtype=self.dtype)
353 else:
354 M,N = self.shape
355 idx_dtype = get_index_dtype((self.col, self.row),
356 maxval=max(self.nnz, M))
357 row = self.row.astype(idx_dtype, copy=False)
358 col = self.col.astype(idx_dtype, copy=False)
360 indptr = np.empty(N + 1, dtype=idx_dtype)
361 indices = np.empty_like(row, dtype=idx_dtype)
362 data = np.empty_like(self.data, dtype=upcast(self.dtype))
364 coo_tocsr(N, M, self.nnz, col, row, self.data,
365 indptr, indices, data)
367 x = csc_matrix((data, indices, indptr), shape=self.shape)
368 if not self.has_canonical_format:
369 x.sum_duplicates()
370 return x
372 def tocsr(self, copy=False):
373 """Convert this matrix to Compressed Sparse Row format
375 Duplicate entries will be summed together.
377 Examples
378 --------
379 >>> from numpy import array
380 >>> from scipy.sparse import coo_matrix
381 >>> row = array([0, 0, 1, 3, 1, 0, 0])
382 >>> col = array([0, 2, 1, 3, 1, 0, 0])
383 >>> data = array([1, 1, 1, 1, 1, 1, 1])
384 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr()
385 >>> A.toarray()
386 array([[3, 0, 1, 0],
387 [0, 2, 0, 0],
388 [0, 0, 0, 0],
389 [0, 0, 0, 1]])
391 """
392 from .csr import csr_matrix
393 if self.nnz == 0:
394 return csr_matrix(self.shape, dtype=self.dtype)
395 else:
396 M,N = self.shape
397 idx_dtype = get_index_dtype((self.row, self.col),
398 maxval=max(self.nnz, N))
399 row = self.row.astype(idx_dtype, copy=False)
400 col = self.col.astype(idx_dtype, copy=False)
402 indptr = np.empty(M + 1, dtype=idx_dtype)
403 indices = np.empty_like(col, dtype=idx_dtype)
404 data = np.empty_like(self.data, dtype=upcast(self.dtype))
406 coo_tocsr(M, N, self.nnz, row, col, self.data,
407 indptr, indices, data)
409 x = csr_matrix((data, indices, indptr), shape=self.shape)
410 if not self.has_canonical_format:
411 x.sum_duplicates()
412 return x
414 def tocoo(self, copy=False):
415 if copy:
416 return self.copy()
417 else:
418 return self
420 tocoo.__doc__ = spmatrix.tocoo.__doc__
422 def todia(self, copy=False):
423 from .dia import dia_matrix
425 self.sum_duplicates()
426 ks = self.col - self.row # the diagonal for each nonzero
427 diags, diag_idx = np.unique(ks, return_inverse=True)
429 if len(diags) > 100:
430 # probably undesired, should todia() have a maxdiags parameter?
431 warn("Constructing a DIA matrix with %d diagonals "
432 "is inefficient" % len(diags), SparseEfficiencyWarning)
434 #initialize and fill in data array
435 if self.data.size == 0:
436 data = np.zeros((0, 0), dtype=self.dtype)
437 else:
438 data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype)
439 data[diag_idx, self.col] = self.data
441 return dia_matrix((data,diags), shape=self.shape)
443 todia.__doc__ = spmatrix.todia.__doc__
445 def todok(self, copy=False):
446 from .dok import dok_matrix
448 self.sum_duplicates()
449 dok = dok_matrix((self.shape), dtype=self.dtype)
450 dok._update(zip(zip(self.row,self.col),self.data))
452 return dok
454 todok.__doc__ = spmatrix.todok.__doc__
456 def diagonal(self, k=0):
457 rows, cols = self.shape
458 if k <= -rows or k >= cols:
459 return np.empty(0, dtype=self.data.dtype)
460 diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)),
461 dtype=self.dtype)
462 diag_mask = (self.row + k) == self.col
464 if self.has_canonical_format:
465 row = self.row[diag_mask]
466 data = self.data[diag_mask]
467 else:
468 row, _, data = self._sum_duplicates(self.row[diag_mask],
469 self.col[diag_mask],
470 self.data[diag_mask])
471 diag[row + min(k, 0)] = data
473 return diag
475 diagonal.__doc__ = _data_matrix.diagonal.__doc__
477 def _setdiag(self, values, k):
478 M, N = self.shape
479 if values.ndim and not len(values):
480 return
481 idx_dtype = self.row.dtype
483 # Determine which triples to keep and where to put the new ones.
484 full_keep = self.col - self.row != k
485 if k < 0:
486 max_index = min(M+k, N)
487 if values.ndim:
488 max_index = min(max_index, len(values))
489 keep = np.logical_or(full_keep, self.col >= max_index)
490 new_row = np.arange(-k, -k + max_index, dtype=idx_dtype)
491 new_col = np.arange(max_index, dtype=idx_dtype)
492 else:
493 max_index = min(M, N-k)
494 if values.ndim:
495 max_index = min(max_index, len(values))
496 keep = np.logical_or(full_keep, self.row >= max_index)
497 new_row = np.arange(max_index, dtype=idx_dtype)
498 new_col = np.arange(k, k + max_index, dtype=idx_dtype)
500 # Define the array of data consisting of the entries to be added.
501 if values.ndim:
502 new_data = values[:max_index]
503 else:
504 new_data = np.empty(max_index, dtype=self.dtype)
505 new_data[:] = values
507 # Update the internal structure.
508 self.row = np.concatenate((self.row[keep], new_row))
509 self.col = np.concatenate((self.col[keep], new_col))
510 self.data = np.concatenate((self.data[keep], new_data))
511 self.has_canonical_format = False
513 # needed by _data_matrix
514 def _with_data(self,data,copy=True):
515 """Returns a matrix with the same sparsity structure as self,
516 but with different data. By default the index arrays
517 (i.e. .row and .col) are copied.
518 """
519 if copy:
520 return coo_matrix((data, (self.row.copy(), self.col.copy())),
521 shape=self.shape, dtype=data.dtype)
522 else:
523 return coo_matrix((data, (self.row, self.col)),
524 shape=self.shape, dtype=data.dtype)
526 def sum_duplicates(self):
527 """Eliminate duplicate matrix entries by adding them together
529 This is an *in place* operation
530 """
531 if self.has_canonical_format:
532 return
533 summed = self._sum_duplicates(self.row, self.col, self.data)
534 self.row, self.col, self.data = summed
535 self.has_canonical_format = True
537 def _sum_duplicates(self, row, col, data):
538 # Assumes (data, row, col) not in canonical format.
539 if len(data) == 0:
540 return row, col, data
541 order = np.lexsort((row, col))
542 row = row[order]
543 col = col[order]
544 data = data[order]
545 unique_mask = ((row[1:] != row[:-1]) |
546 (col[1:] != col[:-1]))
547 unique_mask = np.append(True, unique_mask)
548 row = row[unique_mask]
549 col = col[unique_mask]
550 unique_inds, = np.nonzero(unique_mask)
551 data = np.add.reduceat(data, unique_inds, dtype=self.dtype)
552 return row, col, data
554 def eliminate_zeros(self):
555 """Remove zero entries from the matrix
557 This is an *in place* operation
558 """
559 mask = self.data != 0
560 self.data = self.data[mask]
561 self.row = self.row[mask]
562 self.col = self.col[mask]
564 #######################
565 # Arithmetic handlers #
566 #######################
568 def _add_dense(self, other):
569 if other.shape != self.shape:
570 raise ValueError('Incompatible shapes.')
571 dtype = upcast_char(self.dtype.char, other.dtype.char)
572 result = np.array(other, dtype=dtype, copy=True)
573 fortran = int(result.flags.f_contiguous)
574 M, N = self.shape
575 coo_todense(M, N, self.nnz, self.row, self.col, self.data,
576 result.ravel('A'), fortran)
577 return matrix(result, copy=False)
579 def _mul_vector(self, other):
580 #output array
581 result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char,
582 other.dtype.char))
583 coo_matvec(self.nnz, self.row, self.col, self.data, other, result)
584 return result
586 def _mul_multivector(self, other):
587 result = np.zeros((other.shape[1], self.shape[0]),
588 dtype=upcast_char(self.dtype.char, other.dtype.char))
589 for i, col in enumerate(other.T):
590 coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i])
591 return result.T.view(type=type(other))
594def isspmatrix_coo(x):
595 """Is x of coo_matrix type?
597 Parameters
598 ----------
599 x
600 object to check for being a coo matrix
602 Returns
603 -------
604 bool
605 True if x is a coo matrix, False otherwise
607 Examples
608 --------
609 >>> from scipy.sparse import coo_matrix, isspmatrix_coo
610 >>> isspmatrix_coo(coo_matrix([[5]]))
611 True
613 >>> from scipy.sparse import coo_matrix, csr_matrix, isspmatrix_coo
614 >>> isspmatrix_coo(csr_matrix([[5]]))
615 False
616 """
617 return isinstance(x, coo_matrix)