Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" A sparse matrix in COOrdinate or 'triplet' format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['coo_matrix', 'isspmatrix_coo'] 

6 

7from warnings import warn 

8 

9import numpy as np 

10 

11 

12from ._sparsetools import coo_tocsr, coo_todense, coo_matvec 

13from .base import isspmatrix, SparseEfficiencyWarning, spmatrix 

14from .data import _data_matrix, _minmax_mixin 

15from .sputils import (upcast, upcast_char, to_native, isshape, getdtype, 

16 get_index_dtype, downcast_intp_index, check_shape, 

17 check_reshape_kwargs, matrix) 

18 

19import operator 

20 

21 

22class coo_matrix(_data_matrix, _minmax_mixin): 

23 """ 

24 A sparse matrix in COOrdinate format. 

25 

26 Also known as the 'ijv' or 'triplet' format. 

27 

28 This can be instantiated in several ways: 

29 coo_matrix(D) 

30 with a dense matrix D 

31 

32 coo_matrix(S) 

33 with another sparse matrix S (equivalent to S.tocoo()) 

34 

35 coo_matrix((M, N), [dtype]) 

36 to construct an empty matrix with shape (M, N) 

37 dtype is optional, defaulting to dtype='d'. 

38 

39 coo_matrix((data, (i, j)), [shape=(M, N)]) 

40 to construct from three arrays: 

41 1. data[:] the entries of the matrix, in any order 

42 2. i[:] the row indices of the matrix entries 

43 3. j[:] the column indices of the matrix entries 

44 

45 Where ``A[i[k], j[k]] = data[k]``. When shape is not 

46 specified, it is inferred from the index arrays 

47 

48 Attributes 

49 ---------- 

50 dtype : dtype 

51 Data type of the matrix 

52 shape : 2-tuple 

53 Shape of the matrix 

54 ndim : int 

55 Number of dimensions (this is always 2) 

56 nnz 

57 Number of stored values, including explicit zeros 

58 data 

59 COO format data array of the matrix 

60 row 

61 COO format row index array of the matrix 

62 col 

63 COO format column index array of the matrix 

64 

65 Notes 

66 ----- 

67 

68 Sparse matrices can be used in arithmetic operations: they support 

69 addition, subtraction, multiplication, division, and matrix power. 

70 

71 Advantages of the COO format 

72 - facilitates fast conversion among sparse formats 

73 - permits duplicate entries (see example) 

74 - very fast conversion to and from CSR/CSC formats 

75 

76 Disadvantages of the COO format 

77 - does not directly support: 

78 + arithmetic operations 

79 + slicing 

80 

81 Intended Usage 

82 - COO is a fast format for constructing sparse matrices 

83 - Once a matrix has been constructed, convert to CSR or 

84 CSC format for fast arithmetic and matrix vector operations 

85 - By default when converting to CSR or CSC format, duplicate (i,j) 

86 entries will be summed together. This facilitates efficient 

87 construction of finite element matrices and the like. (see example) 

88 

89 Examples 

90 -------- 

91 

92 >>> # Constructing an empty matrix 

93 >>> from scipy.sparse import coo_matrix 

94 >>> coo_matrix((3, 4), dtype=np.int8).toarray() 

95 array([[0, 0, 0, 0], 

96 [0, 0, 0, 0], 

97 [0, 0, 0, 0]], dtype=int8) 

98 

99 >>> # Constructing a matrix using ijv format 

100 >>> row = np.array([0, 3, 1, 0]) 

101 >>> col = np.array([0, 3, 1, 2]) 

102 >>> data = np.array([4, 5, 7, 9]) 

103 >>> coo_matrix((data, (row, col)), shape=(4, 4)).toarray() 

104 array([[4, 0, 9, 0], 

105 [0, 7, 0, 0], 

106 [0, 0, 0, 0], 

107 [0, 0, 0, 5]]) 

108 

109 >>> # Constructing a matrix with duplicate indices 

110 >>> row = np.array([0, 0, 1, 3, 1, 0, 0]) 

111 >>> col = np.array([0, 2, 1, 3, 1, 0, 0]) 

112 >>> data = np.array([1, 1, 1, 1, 1, 1, 1]) 

113 >>> coo = coo_matrix((data, (row, col)), shape=(4, 4)) 

114 >>> # Duplicate indices are maintained until implicitly or explicitly summed 

115 >>> np.max(coo.data) 

116 1 

117 >>> coo.toarray() 

118 array([[3, 0, 1, 0], 

119 [0, 2, 0, 0], 

120 [0, 0, 0, 0], 

121 [0, 0, 0, 1]]) 

122 

123 """ 

124 format = 'coo' 

125 

126 def __init__(self, arg1, shape=None, dtype=None, copy=False): 

127 _data_matrix.__init__(self) 

128 

129 if isinstance(arg1, tuple): 

130 if isshape(arg1): 

131 M, N = arg1 

132 self._shape = check_shape((M, N)) 

133 idx_dtype = get_index_dtype(maxval=max(M, N)) 

134 self.row = np.array([], dtype=idx_dtype) 

135 self.col = np.array([], dtype=idx_dtype) 

136 self.data = np.array([], getdtype(dtype, default=float)) 

137 self.has_canonical_format = True 

138 else: 

139 try: 

140 obj, (row, col) = arg1 

141 except (TypeError, ValueError): 

142 raise TypeError('invalid input format') 

143 

144 if shape is None: 

145 if len(row) == 0 or len(col) == 0: 

146 raise ValueError('cannot infer dimensions from zero ' 

147 'sized index arrays') 

148 M = operator.index(np.max(row)) + 1 

149 N = operator.index(np.max(col)) + 1 

150 self._shape = check_shape((M, N)) 

151 else: 

152 # Use 2 steps to ensure shape has length 2. 

153 M, N = shape 

154 self._shape = check_shape((M, N)) 

155 

156 idx_dtype = get_index_dtype(maxval=max(self.shape)) 

157 self.row = np.array(row, copy=copy, dtype=idx_dtype) 

158 self.col = np.array(col, copy=copy, dtype=idx_dtype) 

159 self.data = np.array(obj, copy=copy) 

160 self.has_canonical_format = False 

161 

162 else: 

163 if isspmatrix(arg1): 

164 if isspmatrix_coo(arg1) and copy: 

165 self.row = arg1.row.copy() 

166 self.col = arg1.col.copy() 

167 self.data = arg1.data.copy() 

168 self._shape = check_shape(arg1.shape) 

169 else: 

170 coo = arg1.tocoo() 

171 self.row = coo.row 

172 self.col = coo.col 

173 self.data = coo.data 

174 self._shape = check_shape(coo.shape) 

175 self.has_canonical_format = False 

176 else: 

177 #dense argument 

178 M = np.atleast_2d(np.asarray(arg1)) 

179 

180 if M.ndim != 2: 

181 raise TypeError('expected dimension <= 2 array or matrix') 

182 

183 self._shape = check_shape(M.shape) 

184 if shape is not None: 

185 if check_shape(shape) != self._shape: 

186 raise ValueError('inconsistent shapes: %s != %s' % 

187 (shape, self._shape)) 

188 

189 self.row, self.col = M.nonzero() 

190 self.data = M[self.row, self.col] 

191 self.has_canonical_format = True 

192 

193 if dtype is not None: 

194 self.data = self.data.astype(dtype, copy=False) 

195 

196 self._check() 

197 

198 def reshape(self, *args, **kwargs): 

199 shape = check_shape(args, self.shape) 

200 order, copy = check_reshape_kwargs(kwargs) 

201 

202 # Return early if reshape is not required 

203 if shape == self.shape: 

204 if copy: 

205 return self.copy() 

206 else: 

207 return self 

208 

209 nrows, ncols = self.shape 

210 

211 if order == 'C': 

212 # Upcast to avoid overflows: the coo_matrix constructor 

213 # below will downcast the results to a smaller dtype, if 

214 # possible. 

215 dtype = get_index_dtype(maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1))) 

216 

217 flat_indices = np.multiply(ncols, self.row, dtype=dtype) + self.col 

218 new_row, new_col = divmod(flat_indices, shape[1]) 

219 elif order == 'F': 

220 dtype = get_index_dtype(maxval=(nrows * max(0, ncols - 1) + max(0, nrows - 1))) 

221 

222 flat_indices = np.multiply(nrows, self.col, dtype=dtype) + self.row 

223 new_col, new_row = divmod(flat_indices, shape[0]) 

224 else: 

225 raise ValueError("'order' must be 'C' or 'F'") 

226 

227 # Handle copy here rather than passing on to the constructor so that no 

228 # copy will be made of new_row and new_col regardless 

229 if copy: 

230 new_data = self.data.copy() 

231 else: 

232 new_data = self.data 

233 

234 return coo_matrix((new_data, (new_row, new_col)), 

235 shape=shape, copy=False) 

236 

237 reshape.__doc__ = spmatrix.reshape.__doc__ 

238 

239 def getnnz(self, axis=None): 

240 if axis is None: 

241 nnz = len(self.data) 

242 if nnz != len(self.row) or nnz != len(self.col): 

243 raise ValueError('row, column, and data array must all be the ' 

244 'same length') 

245 

246 if self.data.ndim != 1 or self.row.ndim != 1 or \ 

247 self.col.ndim != 1: 

248 raise ValueError('row, column, and data arrays must be 1-D') 

249 

250 return int(nnz) 

251 

252 if axis < 0: 

253 axis += 2 

254 if axis == 0: 

255 return np.bincount(downcast_intp_index(self.col), 

256 minlength=self.shape[1]) 

257 elif axis == 1: 

258 return np.bincount(downcast_intp_index(self.row), 

259 minlength=self.shape[0]) 

260 else: 

261 raise ValueError('axis out of bounds') 

262 

263 getnnz.__doc__ = spmatrix.getnnz.__doc__ 

264 

265 def _check(self): 

266 """ Checks data structure for consistency """ 

267 

268 # index arrays should have integer data types 

269 if self.row.dtype.kind != 'i': 

270 warn("row index array has non-integer dtype (%s) " 

271 % self.row.dtype.name) 

272 if self.col.dtype.kind != 'i': 

273 warn("col index array has non-integer dtype (%s) " 

274 % self.col.dtype.name) 

275 

276 idx_dtype = get_index_dtype(maxval=max(self.shape)) 

277 self.row = np.asarray(self.row, dtype=idx_dtype) 

278 self.col = np.asarray(self.col, dtype=idx_dtype) 

279 self.data = to_native(self.data) 

280 

281 if self.nnz > 0: 

282 if self.row.max() >= self.shape[0]: 

283 raise ValueError('row index exceeds matrix dimensions') 

284 if self.col.max() >= self.shape[1]: 

285 raise ValueError('column index exceeds matrix dimensions') 

286 if self.row.min() < 0: 

287 raise ValueError('negative row index found') 

288 if self.col.min() < 0: 

289 raise ValueError('negative column index found') 

290 

291 def transpose(self, axes=None, copy=False): 

292 if axes is not None: 

293 raise ValueError(("Sparse matrices do not support " 

294 "an 'axes' parameter because swapping " 

295 "dimensions is the only logical permutation.")) 

296 

297 M, N = self.shape 

298 return coo_matrix((self.data, (self.col, self.row)), 

299 shape=(N, M), copy=copy) 

300 

301 transpose.__doc__ = spmatrix.transpose.__doc__ 

302 

303 def resize(self, *shape): 

304 shape = check_shape(shape) 

305 new_M, new_N = shape 

306 M, N = self.shape 

307 

308 if new_M < M or new_N < N: 

309 mask = np.logical_and(self.row < new_M, self.col < new_N) 

310 if not mask.all(): 

311 self.row = self.row[mask] 

312 self.col = self.col[mask] 

313 self.data = self.data[mask] 

314 

315 self._shape = shape 

316 

317 resize.__doc__ = spmatrix.resize.__doc__ 

318 

319 def toarray(self, order=None, out=None): 

320 """See the docstring for `spmatrix.toarray`.""" 

321 B = self._process_toarray_args(order, out) 

322 fortran = int(B.flags.f_contiguous) 

323 if not fortran and not B.flags.c_contiguous: 

324 raise ValueError("Output array must be C or F contiguous") 

325 M,N = self.shape 

326 coo_todense(M, N, self.nnz, self.row, self.col, self.data, 

327 B.ravel('A'), fortran) 

328 return B 

329 

330 def tocsc(self, copy=False): 

331 """Convert this matrix to Compressed Sparse Column format 

332 

333 Duplicate entries will be summed together. 

334 

335 Examples 

336 -------- 

337 >>> from numpy import array 

338 >>> from scipy.sparse import coo_matrix 

339 >>> row = array([0, 0, 1, 3, 1, 0, 0]) 

340 >>> col = array([0, 2, 1, 3, 1, 0, 0]) 

341 >>> data = array([1, 1, 1, 1, 1, 1, 1]) 

342 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsc() 

343 >>> A.toarray() 

344 array([[3, 0, 1, 0], 

345 [0, 2, 0, 0], 

346 [0, 0, 0, 0], 

347 [0, 0, 0, 1]]) 

348 

349 """ 

350 from .csc import csc_matrix 

351 if self.nnz == 0: 

352 return csc_matrix(self.shape, dtype=self.dtype) 

353 else: 

354 M,N = self.shape 

355 idx_dtype = get_index_dtype((self.col, self.row), 

356 maxval=max(self.nnz, M)) 

357 row = self.row.astype(idx_dtype, copy=False) 

358 col = self.col.astype(idx_dtype, copy=False) 

359 

360 indptr = np.empty(N + 1, dtype=idx_dtype) 

361 indices = np.empty_like(row, dtype=idx_dtype) 

362 data = np.empty_like(self.data, dtype=upcast(self.dtype)) 

363 

364 coo_tocsr(N, M, self.nnz, col, row, self.data, 

365 indptr, indices, data) 

366 

367 x = csc_matrix((data, indices, indptr), shape=self.shape) 

368 if not self.has_canonical_format: 

369 x.sum_duplicates() 

370 return x 

371 

372 def tocsr(self, copy=False): 

373 """Convert this matrix to Compressed Sparse Row format 

374 

375 Duplicate entries will be summed together. 

376 

377 Examples 

378 -------- 

379 >>> from numpy import array 

380 >>> from scipy.sparse import coo_matrix 

381 >>> row = array([0, 0, 1, 3, 1, 0, 0]) 

382 >>> col = array([0, 2, 1, 3, 1, 0, 0]) 

383 >>> data = array([1, 1, 1, 1, 1, 1, 1]) 

384 >>> A = coo_matrix((data, (row, col)), shape=(4, 4)).tocsr() 

385 >>> A.toarray() 

386 array([[3, 0, 1, 0], 

387 [0, 2, 0, 0], 

388 [0, 0, 0, 0], 

389 [0, 0, 0, 1]]) 

390 

391 """ 

392 from .csr import csr_matrix 

393 if self.nnz == 0: 

394 return csr_matrix(self.shape, dtype=self.dtype) 

395 else: 

396 M,N = self.shape 

397 idx_dtype = get_index_dtype((self.row, self.col), 

398 maxval=max(self.nnz, N)) 

399 row = self.row.astype(idx_dtype, copy=False) 

400 col = self.col.astype(idx_dtype, copy=False) 

401 

402 indptr = np.empty(M + 1, dtype=idx_dtype) 

403 indices = np.empty_like(col, dtype=idx_dtype) 

404 data = np.empty_like(self.data, dtype=upcast(self.dtype)) 

405 

406 coo_tocsr(M, N, self.nnz, row, col, self.data, 

407 indptr, indices, data) 

408 

409 x = csr_matrix((data, indices, indptr), shape=self.shape) 

410 if not self.has_canonical_format: 

411 x.sum_duplicates() 

412 return x 

413 

414 def tocoo(self, copy=False): 

415 if copy: 

416 return self.copy() 

417 else: 

418 return self 

419 

420 tocoo.__doc__ = spmatrix.tocoo.__doc__ 

421 

422 def todia(self, copy=False): 

423 from .dia import dia_matrix 

424 

425 self.sum_duplicates() 

426 ks = self.col - self.row # the diagonal for each nonzero 

427 diags, diag_idx = np.unique(ks, return_inverse=True) 

428 

429 if len(diags) > 100: 

430 # probably undesired, should todia() have a maxdiags parameter? 

431 warn("Constructing a DIA matrix with %d diagonals " 

432 "is inefficient" % len(diags), SparseEfficiencyWarning) 

433 

434 #initialize and fill in data array 

435 if self.data.size == 0: 

436 data = np.zeros((0, 0), dtype=self.dtype) 

437 else: 

438 data = np.zeros((len(diags), self.col.max()+1), dtype=self.dtype) 

439 data[diag_idx, self.col] = self.data 

440 

441 return dia_matrix((data,diags), shape=self.shape) 

442 

443 todia.__doc__ = spmatrix.todia.__doc__ 

444 

445 def todok(self, copy=False): 

446 from .dok import dok_matrix 

447 

448 self.sum_duplicates() 

449 dok = dok_matrix((self.shape), dtype=self.dtype) 

450 dok._update(zip(zip(self.row,self.col),self.data)) 

451 

452 return dok 

453 

454 todok.__doc__ = spmatrix.todok.__doc__ 

455 

456 def diagonal(self, k=0): 

457 rows, cols = self.shape 

458 if k <= -rows or k >= cols: 

459 return np.empty(0, dtype=self.data.dtype) 

460 diag = np.zeros(min(rows + min(k, 0), cols - max(k, 0)), 

461 dtype=self.dtype) 

462 diag_mask = (self.row + k) == self.col 

463 

464 if self.has_canonical_format: 

465 row = self.row[diag_mask] 

466 data = self.data[diag_mask] 

467 else: 

468 row, _, data = self._sum_duplicates(self.row[diag_mask], 

469 self.col[diag_mask], 

470 self.data[diag_mask]) 

471 diag[row + min(k, 0)] = data 

472 

473 return diag 

474 

475 diagonal.__doc__ = _data_matrix.diagonal.__doc__ 

476 

477 def _setdiag(self, values, k): 

478 M, N = self.shape 

479 if values.ndim and not len(values): 

480 return 

481 idx_dtype = self.row.dtype 

482 

483 # Determine which triples to keep and where to put the new ones. 

484 full_keep = self.col - self.row != k 

485 if k < 0: 

486 max_index = min(M+k, N) 

487 if values.ndim: 

488 max_index = min(max_index, len(values)) 

489 keep = np.logical_or(full_keep, self.col >= max_index) 

490 new_row = np.arange(-k, -k + max_index, dtype=idx_dtype) 

491 new_col = np.arange(max_index, dtype=idx_dtype) 

492 else: 

493 max_index = min(M, N-k) 

494 if values.ndim: 

495 max_index = min(max_index, len(values)) 

496 keep = np.logical_or(full_keep, self.row >= max_index) 

497 new_row = np.arange(max_index, dtype=idx_dtype) 

498 new_col = np.arange(k, k + max_index, dtype=idx_dtype) 

499 

500 # Define the array of data consisting of the entries to be added. 

501 if values.ndim: 

502 new_data = values[:max_index] 

503 else: 

504 new_data = np.empty(max_index, dtype=self.dtype) 

505 new_data[:] = values 

506 

507 # Update the internal structure. 

508 self.row = np.concatenate((self.row[keep], new_row)) 

509 self.col = np.concatenate((self.col[keep], new_col)) 

510 self.data = np.concatenate((self.data[keep], new_data)) 

511 self.has_canonical_format = False 

512 

513 # needed by _data_matrix 

514 def _with_data(self,data,copy=True): 

515 """Returns a matrix with the same sparsity structure as self, 

516 but with different data. By default the index arrays 

517 (i.e. .row and .col) are copied. 

518 """ 

519 if copy: 

520 return coo_matrix((data, (self.row.copy(), self.col.copy())), 

521 shape=self.shape, dtype=data.dtype) 

522 else: 

523 return coo_matrix((data, (self.row, self.col)), 

524 shape=self.shape, dtype=data.dtype) 

525 

526 def sum_duplicates(self): 

527 """Eliminate duplicate matrix entries by adding them together 

528 

529 This is an *in place* operation 

530 """ 

531 if self.has_canonical_format: 

532 return 

533 summed = self._sum_duplicates(self.row, self.col, self.data) 

534 self.row, self.col, self.data = summed 

535 self.has_canonical_format = True 

536 

537 def _sum_duplicates(self, row, col, data): 

538 # Assumes (data, row, col) not in canonical format. 

539 if len(data) == 0: 

540 return row, col, data 

541 order = np.lexsort((row, col)) 

542 row = row[order] 

543 col = col[order] 

544 data = data[order] 

545 unique_mask = ((row[1:] != row[:-1]) | 

546 (col[1:] != col[:-1])) 

547 unique_mask = np.append(True, unique_mask) 

548 row = row[unique_mask] 

549 col = col[unique_mask] 

550 unique_inds, = np.nonzero(unique_mask) 

551 data = np.add.reduceat(data, unique_inds, dtype=self.dtype) 

552 return row, col, data 

553 

554 def eliminate_zeros(self): 

555 """Remove zero entries from the matrix 

556 

557 This is an *in place* operation 

558 """ 

559 mask = self.data != 0 

560 self.data = self.data[mask] 

561 self.row = self.row[mask] 

562 self.col = self.col[mask] 

563 

564 ####################### 

565 # Arithmetic handlers # 

566 ####################### 

567 

568 def _add_dense(self, other): 

569 if other.shape != self.shape: 

570 raise ValueError('Incompatible shapes.') 

571 dtype = upcast_char(self.dtype.char, other.dtype.char) 

572 result = np.array(other, dtype=dtype, copy=True) 

573 fortran = int(result.flags.f_contiguous) 

574 M, N = self.shape 

575 coo_todense(M, N, self.nnz, self.row, self.col, self.data, 

576 result.ravel('A'), fortran) 

577 return matrix(result, copy=False) 

578 

579 def _mul_vector(self, other): 

580 #output array 

581 result = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, 

582 other.dtype.char)) 

583 coo_matvec(self.nnz, self.row, self.col, self.data, other, result) 

584 return result 

585 

586 def _mul_multivector(self, other): 

587 result = np.zeros((other.shape[1], self.shape[0]), 

588 dtype=upcast_char(self.dtype.char, other.dtype.char)) 

589 for i, col in enumerate(other.T): 

590 coo_matvec(self.nnz, self.row, self.col, self.data, col, result[i]) 

591 return result.T.view(type=type(other)) 

592 

593 

594def isspmatrix_coo(x): 

595 """Is x of coo_matrix type? 

596 

597 Parameters 

598 ---------- 

599 x 

600 object to check for being a coo matrix 

601 

602 Returns 

603 ------- 

604 bool 

605 True if x is a coo matrix, False otherwise 

606 

607 Examples 

608 -------- 

609 >>> from scipy.sparse import coo_matrix, isspmatrix_coo 

610 >>> isspmatrix_coo(coo_matrix([[5]])) 

611 True 

612 

613 >>> from scipy.sparse import coo_matrix, csr_matrix, isspmatrix_coo 

614 >>> isspmatrix_coo(csr_matrix([[5]])) 

615 False 

616 """ 

617 return isinstance(x, coo_matrix)