Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Compressed Sparse Row matrix format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['csr_matrix', 'isspmatrix_csr'] 

6 

7import numpy as np 

8 

9from .base import spmatrix 

10from ._sparsetools import (csr_tocsc, csr_tobsr, csr_count_blocks, 

11 get_csr_submatrix) 

12from .sputils import upcast, get_index_dtype 

13 

14from .compressed import _cs_matrix 

15 

16 

17class csr_matrix(_cs_matrix): 

18 """ 

19 Compressed Sparse Row matrix 

20 

21 This can be instantiated in several ways: 

22 csr_matrix(D) 

23 with a dense matrix or rank-2 ndarray D 

24 

25 csr_matrix(S) 

26 with another sparse matrix S (equivalent to S.tocsr()) 

27 

28 csr_matrix((M, N), [dtype]) 

29 to construct an empty matrix with shape (M, N) 

30 dtype is optional, defaulting to dtype='d'. 

31 

32 csr_matrix((data, (row_ind, col_ind)), [shape=(M, N)]) 

33 where ``data``, ``row_ind`` and ``col_ind`` satisfy the 

34 relationship ``a[row_ind[k], col_ind[k]] = data[k]``. 

35 

36 csr_matrix((data, indices, indptr), [shape=(M, N)]) 

37 is the standard CSR representation where the column indices for 

38 row i are stored in ``indices[indptr[i]:indptr[i+1]]`` and their 

39 corresponding values are stored in ``data[indptr[i]:indptr[i+1]]``. 

40 If the shape parameter is not supplied, the matrix dimensions 

41 are inferred from the index arrays. 

42 

43 Attributes 

44 ---------- 

45 dtype : dtype 

46 Data type of the matrix 

47 shape : 2-tuple 

48 Shape of the matrix 

49 ndim : int 

50 Number of dimensions (this is always 2) 

51 nnz 

52 Number of stored values, including explicit zeros 

53 data 

54 CSR format data array of the matrix 

55 indices 

56 CSR format index array of the matrix 

57 indptr 

58 CSR format index pointer array of the matrix 

59 has_sorted_indices 

60 Whether indices are sorted 

61 

62 Notes 

63 ----- 

64 

65 Sparse matrices can be used in arithmetic operations: they support 

66 addition, subtraction, multiplication, division, and matrix power. 

67 

68 Advantages of the CSR format 

69 - efficient arithmetic operations CSR + CSR, CSR * CSR, etc. 

70 - efficient row slicing 

71 - fast matrix vector products 

72 

73 Disadvantages of the CSR format 

74 - slow column slicing operations (consider CSC) 

75 - changes to the sparsity structure are expensive (consider LIL or DOK) 

76 

77 Examples 

78 -------- 

79 

80 >>> import numpy as np 

81 >>> from scipy.sparse import csr_matrix 

82 >>> csr_matrix((3, 4), dtype=np.int8).toarray() 

83 array([[0, 0, 0, 0], 

84 [0, 0, 0, 0], 

85 [0, 0, 0, 0]], dtype=int8) 

86 

87 >>> row = np.array([0, 0, 1, 2, 2, 2]) 

88 >>> col = np.array([0, 2, 2, 0, 1, 2]) 

89 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

90 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

91 array([[1, 0, 2], 

92 [0, 0, 3], 

93 [4, 5, 6]]) 

94 

95 >>> indptr = np.array([0, 2, 3, 6]) 

96 >>> indices = np.array([0, 2, 2, 0, 1, 2]) 

97 >>> data = np.array([1, 2, 3, 4, 5, 6]) 

98 >>> csr_matrix((data, indices, indptr), shape=(3, 3)).toarray() 

99 array([[1, 0, 2], 

100 [0, 0, 3], 

101 [4, 5, 6]]) 

102 

103 Duplicate entries are summed together: 

104 

105 >>> row = np.array([0, 1, 2, 0]) 

106 >>> col = np.array([0, 1, 1, 0]) 

107 >>> data = np.array([1, 2, 4, 8]) 

108 >>> csr_matrix((data, (row, col)), shape=(3, 3)).toarray() 

109 array([[9, 0, 0], 

110 [0, 2, 0], 

111 [0, 4, 0]]) 

112 

113 As an example of how to construct a CSR matrix incrementally, 

114 the following snippet builds a term-document matrix from texts: 

115 

116 >>> docs = [["hello", "world", "hello"], ["goodbye", "cruel", "world"]] 

117 >>> indptr = [0] 

118 >>> indices = [] 

119 >>> data = [] 

120 >>> vocabulary = {} 

121 >>> for d in docs: 

122 ... for term in d: 

123 ... index = vocabulary.setdefault(term, len(vocabulary)) 

124 ... indices.append(index) 

125 ... data.append(1) 

126 ... indptr.append(len(indices)) 

127 ... 

128 >>> csr_matrix((data, indices, indptr), dtype=int).toarray() 

129 array([[2, 1, 0, 0], 

130 [0, 1, 1, 1]]) 

131 

132 """ 

133 format = 'csr' 

134 

135 def transpose(self, axes=None, copy=False): 

136 if axes is not None: 

137 raise ValueError(("Sparse matrices do not support " 

138 "an 'axes' parameter because swapping " 

139 "dimensions is the only logical permutation.")) 

140 

141 M, N = self.shape 

142 

143 from .csc import csc_matrix 

144 return csc_matrix((self.data, self.indices, 

145 self.indptr), shape=(N, M), copy=copy) 

146 

147 transpose.__doc__ = spmatrix.transpose.__doc__ 

148 

149 def tolil(self, copy=False): 

150 from .lil import lil_matrix 

151 lil = lil_matrix(self.shape,dtype=self.dtype) 

152 

153 self.sum_duplicates() 

154 ptr,ind,dat = self.indptr,self.indices,self.data 

155 rows, data = lil.rows, lil.data 

156 

157 for n in range(self.shape[0]): 

158 start = ptr[n] 

159 end = ptr[n+1] 

160 rows[n] = ind[start:end].tolist() 

161 data[n] = dat[start:end].tolist() 

162 

163 return lil 

164 

165 tolil.__doc__ = spmatrix.tolil.__doc__ 

166 

167 def tocsr(self, copy=False): 

168 if copy: 

169 return self.copy() 

170 else: 

171 return self 

172 

173 tocsr.__doc__ = spmatrix.tocsr.__doc__ 

174 

175 def tocsc(self, copy=False): 

176 idx_dtype = get_index_dtype((self.indptr, self.indices), 

177 maxval=max(self.nnz, self.shape[0])) 

178 indptr = np.empty(self.shape[1] + 1, dtype=idx_dtype) 

179 indices = np.empty(self.nnz, dtype=idx_dtype) 

180 data = np.empty(self.nnz, dtype=upcast(self.dtype)) 

181 

182 csr_tocsc(self.shape[0], self.shape[1], 

183 self.indptr.astype(idx_dtype), 

184 self.indices.astype(idx_dtype), 

185 self.data, 

186 indptr, 

187 indices, 

188 data) 

189 

190 from .csc import csc_matrix 

191 A = csc_matrix((data, indices, indptr), shape=self.shape) 

192 A.has_sorted_indices = True 

193 return A 

194 

195 tocsc.__doc__ = spmatrix.tocsc.__doc__ 

196 

197 def tobsr(self, blocksize=None, copy=True): 

198 from .bsr import bsr_matrix 

199 

200 if blocksize is None: 

201 from .spfuncs import estimate_blocksize 

202 return self.tobsr(blocksize=estimate_blocksize(self)) 

203 

204 elif blocksize == (1,1): 

205 arg1 = (self.data.reshape(-1,1,1),self.indices,self.indptr) 

206 return bsr_matrix(arg1, shape=self.shape, copy=copy) 

207 

208 else: 

209 R,C = blocksize 

210 M,N = self.shape 

211 

212 if R < 1 or C < 1 or M % R != 0 or N % C != 0: 

213 raise ValueError('invalid blocksize %s' % blocksize) 

214 

215 blks = csr_count_blocks(M,N,R,C,self.indptr,self.indices) 

216 

217 idx_dtype = get_index_dtype((self.indptr, self.indices), 

218 maxval=max(N//C, blks)) 

219 indptr = np.empty(M//R+1, dtype=idx_dtype) 

220 indices = np.empty(blks, dtype=idx_dtype) 

221 data = np.zeros((blks,R,C), dtype=self.dtype) 

222 

223 csr_tobsr(M, N, R, C, 

224 self.indptr.astype(idx_dtype), 

225 self.indices.astype(idx_dtype), 

226 self.data, 

227 indptr, indices, data.ravel()) 

228 

229 return bsr_matrix((data,indices,indptr), shape=self.shape) 

230 

231 tobsr.__doc__ = spmatrix.tobsr.__doc__ 

232 

233 # these functions are used by the parent class (_cs_matrix) 

234 # to remove redudancy between csc_matrix and csr_matrix 

235 def _swap(self, x): 

236 """swap the members of x if this is a column-oriented matrix 

237 """ 

238 return x 

239 

240 def __iter__(self): 

241 indptr = np.zeros(2, dtype=self.indptr.dtype) 

242 shape = (1, self.shape[1]) 

243 i0 = 0 

244 for i1 in self.indptr[1:]: 

245 indptr[1] = i1 - i0 

246 indices = self.indices[i0:i1] 

247 data = self.data[i0:i1] 

248 yield csr_matrix((data, indices, indptr), shape=shape, copy=True) 

249 i0 = i1 

250 

251 def getrow(self, i): 

252 """Returns a copy of row i of the matrix, as a (1 x n) 

253 CSR matrix (row vector). 

254 """ 

255 M, N = self.shape 

256 i = int(i) 

257 if i < 0: 

258 i += M 

259 if i < 0 or i >= M: 

260 raise IndexError('index (%d) out of range' % i) 

261 indptr, indices, data = get_csr_submatrix( 

262 M, N, self.indptr, self.indices, self.data, i, i + 1, 0, N) 

263 return csr_matrix((data, indices, indptr), shape=(1, N), 

264 dtype=self.dtype, copy=False) 

265 

266 def getcol(self, i): 

267 """Returns a copy of column i of the matrix, as a (m x 1) 

268 CSR matrix (column vector). 

269 """ 

270 M, N = self.shape 

271 i = int(i) 

272 if i < 0: 

273 i += N 

274 if i < 0 or i >= N: 

275 raise IndexError('index (%d) out of range' % i) 

276 indptr, indices, data = get_csr_submatrix( 

277 M, N, self.indptr, self.indices, self.data, 0, M, i, i + 1) 

278 return csr_matrix((data, indices, indptr), shape=(M, 1), 

279 dtype=self.dtype, copy=False) 

280 

281 def _get_intXarray(self, row, col): 

282 return self.getrow(row)._minor_index_fancy(col) 

283 

284 def _get_intXslice(self, row, col): 

285 if col.step in (1, None): 

286 return self._get_submatrix(row, col, copy=True) 

287 # TODO: uncomment this once it's faster: 

288 # return self.getrow(row)._minor_slice(col) 

289 

290 M, N = self.shape 

291 start, stop, stride = col.indices(N) 

292 

293 ii, jj = self.indptr[row:row+2] 

294 row_indices = self.indices[ii:jj] 

295 row_data = self.data[ii:jj] 

296 

297 if stride > 0: 

298 ind = (row_indices >= start) & (row_indices < stop) 

299 else: 

300 ind = (row_indices <= start) & (row_indices > stop) 

301 

302 if abs(stride) > 1: 

303 ind &= (row_indices - start) % stride == 0 

304 

305 row_indices = (row_indices[ind] - start) // stride 

306 row_data = row_data[ind] 

307 row_indptr = np.array([0, len(row_indices)]) 

308 

309 if stride < 0: 

310 row_data = row_data[::-1] 

311 row_indices = abs(row_indices[::-1]) 

312 

313 shape = (1, int(np.ceil(float(stop - start) / stride))) 

314 return csr_matrix((row_data, row_indices, row_indptr), shape=shape, 

315 dtype=self.dtype, copy=False) 

316 

317 def _get_sliceXint(self, row, col): 

318 if row.step in (1, None): 

319 return self._get_submatrix(row, col, copy=True) 

320 return self._major_slice(row)._get_submatrix(minor=col) 

321 

322 def _get_sliceXarray(self, row, col): 

323 return self._major_slice(row)._minor_index_fancy(col) 

324 

325 def _get_arrayXint(self, row, col): 

326 return self._major_index_fancy(row)._get_submatrix(minor=col) 

327 

328 def _get_arrayXslice(self, row, col): 

329 if col.step not in (1, None): 

330 col = np.arange(*col.indices(self.shape[1])) 

331 return self._get_arrayXarray(row, col) 

332 return self._major_index_fancy(row)._get_submatrix(minor=col) 

333 

334 

335def isspmatrix_csr(x): 

336 """Is x of csr_matrix type? 

337 

338 Parameters 

339 ---------- 

340 x 

341 object to check for being a csr matrix 

342 

343 Returns 

344 ------- 

345 bool 

346 True if x is a csr matrix, False otherwise 

347 

348 Examples 

349 -------- 

350 >>> from scipy.sparse import csr_matrix, isspmatrix_csr 

351 >>> isspmatrix_csr(csr_matrix([[5]])) 

352 True 

353 

354 >>> from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc 

355 >>> isspmatrix_csr(csc_matrix([[5]])) 

356 False 

357 """ 

358 return isinstance(x, csr_matrix)