Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Sparse DIAgonal format""" 

2 

3__docformat__ = "restructuredtext en" 

4 

5__all__ = ['dia_matrix', 'isspmatrix_dia'] 

6 

7import numpy as np 

8 

9from .base import isspmatrix, _formats, spmatrix 

10from .data import _data_matrix 

11from .sputils import (isshape, upcast_char, getdtype, get_index_dtype, 

12 get_sum_dtype, validateaxis, check_shape, matrix) 

13from ._sparsetools import dia_matvec 

14 

15 

16class dia_matrix(_data_matrix): 

17 """Sparse matrix with DIAgonal storage 

18 

19 This can be instantiated in several ways: 

20 dia_matrix(D) 

21 with a dense matrix 

22 

23 dia_matrix(S) 

24 with another sparse matrix S (equivalent to S.todia()) 

25 

26 dia_matrix((M, N), [dtype]) 

27 to construct an empty matrix with shape (M, N), 

28 dtype is optional, defaulting to dtype='d'. 

29 

30 dia_matrix((data, offsets), shape=(M, N)) 

31 where the ``data[k,:]`` stores the diagonal entries for 

32 diagonal ``offsets[k]`` (See example below) 

33 

34 Attributes 

35 ---------- 

36 dtype : dtype 

37 Data type of the matrix 

38 shape : 2-tuple 

39 Shape of the matrix 

40 ndim : int 

41 Number of dimensions (this is always 2) 

42 nnz 

43 Number of stored values, including explicit zeros 

44 data 

45 DIA format data array of the matrix 

46 offsets 

47 DIA format offset array of the matrix 

48 

49 Notes 

50 ----- 

51 

52 Sparse matrices can be used in arithmetic operations: they support 

53 addition, subtraction, multiplication, division, and matrix power. 

54 

55 Examples 

56 -------- 

57 

58 >>> import numpy as np 

59 >>> from scipy.sparse import dia_matrix 

60 >>> dia_matrix((3, 4), dtype=np.int8).toarray() 

61 array([[0, 0, 0, 0], 

62 [0, 0, 0, 0], 

63 [0, 0, 0, 0]], dtype=int8) 

64 

65 >>> data = np.array([[1, 2, 3, 4]]).repeat(3, axis=0) 

66 >>> offsets = np.array([0, -1, 2]) 

67 >>> dia_matrix((data, offsets), shape=(4, 4)).toarray() 

68 array([[1, 0, 3, 0], 

69 [1, 2, 0, 4], 

70 [0, 2, 3, 0], 

71 [0, 0, 3, 4]]) 

72 

73 >>> from scipy.sparse import dia_matrix 

74 >>> n = 10 

75 >>> ex = np.ones(n) 

76 >>> data = np.array([ex, 2 * ex, ex]) 

77 >>> offsets = np.array([-1, 0, 1]) 

78 >>> dia_matrix((data, offsets), shape=(n, n)).toarray() 

79 array([[2., 1., 0., ..., 0., 0., 0.], 

80 [1., 2., 1., ..., 0., 0., 0.], 

81 [0., 1., 2., ..., 0., 0., 0.], 

82 ..., 

83 [0., 0., 0., ..., 2., 1., 0.], 

84 [0., 0., 0., ..., 1., 2., 1.], 

85 [0., 0., 0., ..., 0., 1., 2.]]) 

86 """ 

87 format = 'dia' 

88 

89 def __init__(self, arg1, shape=None, dtype=None, copy=False): 

90 _data_matrix.__init__(self) 

91 

92 if isspmatrix_dia(arg1): 

93 if copy: 

94 arg1 = arg1.copy() 

95 self.data = arg1.data 

96 self.offsets = arg1.offsets 

97 self._shape = check_shape(arg1.shape) 

98 elif isspmatrix(arg1): 

99 if isspmatrix_dia(arg1) and copy: 

100 A = arg1.copy() 

101 else: 

102 A = arg1.todia() 

103 self.data = A.data 

104 self.offsets = A.offsets 

105 self._shape = check_shape(A.shape) 

106 elif isinstance(arg1, tuple): 

107 if isshape(arg1): 

108 # It's a tuple of matrix dimensions (M, N) 

109 # create empty matrix 

110 self._shape = check_shape(arg1) 

111 self.data = np.zeros((0,0), getdtype(dtype, default=float)) 

112 idx_dtype = get_index_dtype(maxval=max(self.shape)) 

113 self.offsets = np.zeros((0), dtype=idx_dtype) 

114 else: 

115 try: 

116 # Try interpreting it as (data, offsets) 

117 data, offsets = arg1 

118 except Exception: 

119 raise ValueError('unrecognized form for dia_matrix constructor') 

120 else: 

121 if shape is None: 

122 raise ValueError('expected a shape argument') 

123 self.data = np.atleast_2d(np.array(arg1[0], dtype=dtype, copy=copy)) 

124 self.offsets = np.atleast_1d(np.array(arg1[1], 

125 dtype=get_index_dtype(maxval=max(shape)), 

126 copy=copy)) 

127 self._shape = check_shape(shape) 

128 else: 

129 #must be dense, convert to COO first, then to DIA 

130 try: 

131 arg1 = np.asarray(arg1) 

132 except Exception: 

133 raise ValueError("unrecognized form for" 

134 " %s_matrix constructor" % self.format) 

135 from .coo import coo_matrix 

136 A = coo_matrix(arg1, dtype=dtype, shape=shape).todia() 

137 self.data = A.data 

138 self.offsets = A.offsets 

139 self._shape = check_shape(A.shape) 

140 

141 if dtype is not None: 

142 self.data = self.data.astype(dtype) 

143 

144 #check format 

145 if self.offsets.ndim != 1: 

146 raise ValueError('offsets array must have rank 1') 

147 

148 if self.data.ndim != 2: 

149 raise ValueError('data array must have rank 2') 

150 

151 if self.data.shape[0] != len(self.offsets): 

152 raise ValueError('number of diagonals (%d) ' 

153 'does not match the number of offsets (%d)' 

154 % (self.data.shape[0], len(self.offsets))) 

155 

156 if len(np.unique(self.offsets)) != len(self.offsets): 

157 raise ValueError('offset array contains duplicate values') 

158 

159 def __repr__(self): 

160 format = _formats[self.getformat()][1] 

161 return "<%dx%d sparse matrix of type '%s'\n" \ 

162 "\twith %d stored elements (%d diagonals) in %s format>" % \ 

163 (self.shape + (self.dtype.type, self.nnz, self.data.shape[0], 

164 format)) 

165 

166 def _data_mask(self): 

167 """Returns a mask of the same shape as self.data, where 

168 mask[i,j] is True when data[i,j] corresponds to a stored element.""" 

169 num_rows, num_cols = self.shape 

170 offset_inds = np.arange(self.data.shape[1]) 

171 row = offset_inds - self.offsets[:,None] 

172 mask = (row >= 0) 

173 mask &= (row < num_rows) 

174 mask &= (offset_inds < num_cols) 

175 return mask 

176 

177 def count_nonzero(self): 

178 mask = self._data_mask() 

179 return np.count_nonzero(self.data[mask]) 

180 

181 def getnnz(self, axis=None): 

182 if axis is not None: 

183 raise NotImplementedError("getnnz over an axis is not implemented " 

184 "for DIA format") 

185 M,N = self.shape 

186 nnz = 0 

187 for k in self.offsets: 

188 if k > 0: 

189 nnz += min(M,N-k) 

190 else: 

191 nnz += min(M+k,N) 

192 return int(nnz) 

193 

194 getnnz.__doc__ = spmatrix.getnnz.__doc__ 

195 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__ 

196 

197 def sum(self, axis=None, dtype=None, out=None): 

198 validateaxis(axis) 

199 

200 if axis is not None and axis < 0: 

201 axis += 2 

202 

203 res_dtype = get_sum_dtype(self.dtype) 

204 num_rows, num_cols = self.shape 

205 ret = None 

206 

207 if axis == 0: 

208 mask = self._data_mask() 

209 x = (self.data * mask).sum(axis=0) 

210 if x.shape[0] == num_cols: 

211 res = x 

212 else: 

213 res = np.zeros(num_cols, dtype=x.dtype) 

214 res[:x.shape[0]] = x 

215 ret = matrix(res, dtype=res_dtype) 

216 

217 else: 

218 row_sums = np.zeros(num_rows, dtype=res_dtype) 

219 one = np.ones(num_cols, dtype=res_dtype) 

220 dia_matvec(num_rows, num_cols, len(self.offsets), 

221 self.data.shape[1], self.offsets, self.data, one, row_sums) 

222 

223 row_sums = matrix(row_sums) 

224 

225 if axis is None: 

226 return row_sums.sum(dtype=dtype, out=out) 

227 

228 if axis is not None: 

229 row_sums = row_sums.T 

230 

231 ret = matrix(row_sums.sum(axis=axis)) 

232 

233 if out is not None and out.shape != ret.shape: 

234 raise ValueError("dimensions do not match") 

235 

236 return ret.sum(axis=(), dtype=dtype, out=out) 

237 

238 sum.__doc__ = spmatrix.sum.__doc__ 

239 

240 def _mul_vector(self, other): 

241 x = other 

242 

243 y = np.zeros(self.shape[0], dtype=upcast_char(self.dtype.char, 

244 x.dtype.char)) 

245 

246 L = self.data.shape[1] 

247 

248 M,N = self.shape 

249 

250 dia_matvec(M,N, len(self.offsets), L, self.offsets, self.data, x.ravel(), y.ravel()) 

251 

252 return y 

253 

254 def _mul_multimatrix(self, other): 

255 return np.hstack([self._mul_vector(col).reshape(-1,1) for col in other.T]) 

256 

257 def _setdiag(self, values, k=0): 

258 M, N = self.shape 

259 

260 if values.ndim == 0: 

261 # broadcast 

262 values_n = np.inf 

263 else: 

264 values_n = len(values) 

265 

266 if k < 0: 

267 n = min(M + k, N, values_n) 

268 min_index = 0 

269 max_index = n 

270 else: 

271 n = min(M, N - k, values_n) 

272 min_index = k 

273 max_index = k + n 

274 

275 if values.ndim != 0: 

276 # allow also longer sequences 

277 values = values[:n] 

278 

279 if k in self.offsets: 

280 self.data[self.offsets == k, min_index:max_index] = values 

281 else: 

282 self.offsets = np.append(self.offsets, self.offsets.dtype.type(k)) 

283 m = max(max_index, self.data.shape[1]) 

284 data = np.zeros((self.data.shape[0]+1, m), dtype=self.data.dtype) 

285 data[:-1,:self.data.shape[1]] = self.data 

286 data[-1, min_index:max_index] = values 

287 self.data = data 

288 

289 def todia(self, copy=False): 

290 if copy: 

291 return self.copy() 

292 else: 

293 return self 

294 

295 todia.__doc__ = spmatrix.todia.__doc__ 

296 

297 def transpose(self, axes=None, copy=False): 

298 if axes is not None: 

299 raise ValueError(("Sparse matrices do not support " 

300 "an 'axes' parameter because swapping " 

301 "dimensions is the only logical permutation.")) 

302 

303 num_rows, num_cols = self.shape 

304 max_dim = max(self.shape) 

305 

306 # flip diagonal offsets 

307 offsets = -self.offsets 

308 

309 # re-align the data matrix 

310 r = np.arange(len(offsets), dtype=np.intc)[:, None] 

311 c = np.arange(num_rows, dtype=np.intc) - (offsets % max_dim)[:, None] 

312 pad_amount = max(0, max_dim-self.data.shape[1]) 

313 data = np.hstack((self.data, np.zeros((self.data.shape[0], pad_amount), 

314 dtype=self.data.dtype))) 

315 data = data[r, c] 

316 return dia_matrix((data, offsets), shape=( 

317 num_cols, num_rows), copy=copy) 

318 

319 transpose.__doc__ = spmatrix.transpose.__doc__ 

320 

321 def diagonal(self, k=0): 

322 rows, cols = self.shape 

323 if k <= -rows or k >= cols: 

324 return np.empty(0, dtype=self.data.dtype) 

325 idx, = np.nonzero(self.offsets == k) 

326 first_col, last_col = max(0, k), min(rows + k, cols) 

327 if idx.size == 0: 

328 return np.zeros(last_col - first_col, dtype=self.data.dtype) 

329 return self.data[idx[0], first_col:last_col] 

330 

331 diagonal.__doc__ = spmatrix.diagonal.__doc__ 

332 

333 def tocsc(self, copy=False): 

334 from .csc import csc_matrix 

335 if self.nnz == 0: 

336 return csc_matrix(self.shape, dtype=self.dtype) 

337 

338 num_rows, num_cols = self.shape 

339 num_offsets, offset_len = self.data.shape 

340 offset_inds = np.arange(offset_len) 

341 

342 row = offset_inds - self.offsets[:,None] 

343 mask = (row >= 0) 

344 mask &= (row < num_rows) 

345 mask &= (offset_inds < num_cols) 

346 mask &= (self.data != 0) 

347 

348 idx_dtype = get_index_dtype(maxval=max(self.shape)) 

349 indptr = np.zeros(num_cols + 1, dtype=idx_dtype) 

350 indptr[1:offset_len+1] = np.cumsum(mask.sum(axis=0)) 

351 indptr[offset_len+1:] = indptr[offset_len] 

352 indices = row.T[mask.T].astype(idx_dtype, copy=False) 

353 data = self.data.T[mask.T] 

354 return csc_matrix((data, indices, indptr), shape=self.shape, 

355 dtype=self.dtype) 

356 

357 tocsc.__doc__ = spmatrix.tocsc.__doc__ 

358 

359 def tocoo(self, copy=False): 

360 num_rows, num_cols = self.shape 

361 num_offsets, offset_len = self.data.shape 

362 offset_inds = np.arange(offset_len) 

363 

364 row = offset_inds - self.offsets[:,None] 

365 mask = (row >= 0) 

366 mask &= (row < num_rows) 

367 mask &= (offset_inds < num_cols) 

368 mask &= (self.data != 0) 

369 row = row[mask] 

370 col = np.tile(offset_inds, num_offsets)[mask.ravel()] 

371 data = self.data[mask] 

372 

373 from .coo import coo_matrix 

374 A = coo_matrix((data,(row,col)), shape=self.shape, dtype=self.dtype) 

375 A.has_canonical_format = True 

376 return A 

377 

378 tocoo.__doc__ = spmatrix.tocoo.__doc__ 

379 

380 # needed by _data_matrix 

381 def _with_data(self, data, copy=True): 

382 """Returns a matrix with the same sparsity structure as self, 

383 but with different data. By default the structure arrays are copied. 

384 """ 

385 if copy: 

386 return dia_matrix((data, self.offsets.copy()), shape=self.shape) 

387 else: 

388 return dia_matrix((data,self.offsets), shape=self.shape) 

389 

390 def resize(self, *shape): 

391 shape = check_shape(shape) 

392 M, N = shape 

393 # we do not need to handle the case of expanding N 

394 self.data = self.data[:, :N] 

395 

396 if (M > self.shape[0] and 

397 np.any(self.offsets + self.shape[0] < self.data.shape[1])): 

398 # explicitly clear values that were previously hidden 

399 mask = (self.offsets[:, None] + self.shape[0] <= 

400 np.arange(self.data.shape[1])) 

401 self.data[mask] = 0 

402 

403 self._shape = shape 

404 

405 resize.__doc__ = spmatrix.resize.__doc__ 

406 

407 

408def isspmatrix_dia(x): 

409 """Is x of dia_matrix type? 

410 

411 Parameters 

412 ---------- 

413 x 

414 object to check for being a dia matrix 

415 

416 Returns 

417 ------- 

418 bool 

419 True if x is a dia matrix, False otherwise 

420 

421 Examples 

422 -------- 

423 >>> from scipy.sparse import dia_matrix, isspmatrix_dia 

424 >>> isspmatrix_dia(dia_matrix([[5]])) 

425 True 

426 

427 >>> from scipy.sparse import dia_matrix, csr_matrix, isspmatrix_dia 

428 >>> isspmatrix_dia(csr_matrix([[5]])) 

429 False 

430 """ 

431 return isinstance(x, dia_matrix)