Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""Base class for sparse matrice with a .data attribute 

2 

3 subclasses must provide a _with_data() method that 

4 creates a new matrix with the same sparsity pattern 

5 as self but with a different data array 

6 

7""" 

8 

9import numpy as np 

10 

11from .base import spmatrix, _ufuncs_with_fixed_point_at_zero 

12from .sputils import isscalarlike, validateaxis, matrix 

13 

14__all__ = [] 

15 

16 

17# TODO implement all relevant operations 

18# use .data.__methods__() instead of /=, *=, etc. 

19class _data_matrix(spmatrix): 

20 def __init__(self): 

21 spmatrix.__init__(self) 

22 

23 def _get_dtype(self): 

24 return self.data.dtype 

25 

26 def _set_dtype(self, newtype): 

27 self.data.dtype = newtype 

28 dtype = property(fget=_get_dtype, fset=_set_dtype) 

29 

30 def _deduped_data(self): 

31 if hasattr(self, 'sum_duplicates'): 

32 self.sum_duplicates() 

33 return self.data 

34 

35 def __abs__(self): 

36 return self._with_data(abs(self._deduped_data())) 

37 

38 def __round__(self, ndigits=0): 

39 return self._with_data(np.around(self._deduped_data(), decimals=ndigits)) 

40 

41 def _real(self): 

42 return self._with_data(self.data.real) 

43 

44 def _imag(self): 

45 return self._with_data(self.data.imag) 

46 

47 def __neg__(self): 

48 if self.dtype.kind == 'b': 

49 raise NotImplementedError('negating a sparse boolean ' 

50 'matrix is not supported') 

51 return self._with_data(-self.data) 

52 

53 def __imul__(self, other): # self *= other 

54 if isscalarlike(other): 

55 self.data *= other 

56 return self 

57 else: 

58 return NotImplemented 

59 

60 def __itruediv__(self, other): # self /= other 

61 if isscalarlike(other): 

62 recip = 1.0 / other 

63 self.data *= recip 

64 return self 

65 else: 

66 return NotImplemented 

67 

68 def astype(self, dtype, casting='unsafe', copy=True): 

69 dtype = np.dtype(dtype) 

70 if self.dtype != dtype: 

71 return self._with_data( 

72 self._deduped_data().astype(dtype, casting=casting, copy=copy), 

73 copy=copy) 

74 elif copy: 

75 return self.copy() 

76 else: 

77 return self 

78 

79 astype.__doc__ = spmatrix.astype.__doc__ 

80 

81 def conj(self, copy=True): 

82 if np.issubdtype(self.dtype, np.complexfloating): 

83 return self._with_data(self.data.conj(), copy=copy) 

84 elif copy: 

85 return self.copy() 

86 else: 

87 return self 

88 

89 conj.__doc__ = spmatrix.conj.__doc__ 

90 

91 def copy(self): 

92 return self._with_data(self.data.copy(), copy=True) 

93 

94 copy.__doc__ = spmatrix.copy.__doc__ 

95 

96 def count_nonzero(self): 

97 return np.count_nonzero(self._deduped_data()) 

98 

99 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__ 

100 

101 def power(self, n, dtype=None): 

102 """ 

103 This function performs element-wise power. 

104 

105 Parameters 

106 ---------- 

107 n : n is a scalar 

108 

109 dtype : If dtype is not specified, the current dtype will be preserved. 

110 """ 

111 if not isscalarlike(n): 

112 raise NotImplementedError("input is not scalar") 

113 

114 data = self._deduped_data() 

115 if dtype is not None: 

116 data = data.astype(dtype) 

117 return self._with_data(data ** n) 

118 

119 ########################### 

120 # Multiplication handlers # 

121 ########################### 

122 

123 def _mul_scalar(self, other): 

124 return self._with_data(self.data * other) 

125 

126 

127# Add the numpy unary ufuncs for which func(0) = 0 to _data_matrix. 

128for npfunc in _ufuncs_with_fixed_point_at_zero: 

129 name = npfunc.__name__ 

130 

131 def _create_method(op): 

132 def method(self): 

133 result = op(self._deduped_data()) 

134 return self._with_data(result, copy=True) 

135 

136 method.__doc__ = ("Element-wise %s.\n\n" 

137 "See `numpy.%s` for more information." % (name, name)) 

138 method.__name__ = name 

139 

140 return method 

141 

142 setattr(_data_matrix, name, _create_method(npfunc)) 

143 

144 

145def _find_missing_index(ind, n): 

146 for k, a in enumerate(ind): 

147 if k != a: 

148 return k 

149 

150 k += 1 

151 if k < n: 

152 return k 

153 else: 

154 return -1 

155 

156 

157class _minmax_mixin(object): 

158 """Mixin for min and max methods. 

159 

160 These are not implemented for dia_matrix, hence the separate class. 

161 """ 

162 

163 def _min_or_max_axis(self, axis, min_or_max): 

164 N = self.shape[axis] 

165 if N == 0: 

166 raise ValueError("zero-size array to reduction operation") 

167 M = self.shape[1 - axis] 

168 

169 mat = self.tocsc() if axis == 0 else self.tocsr() 

170 mat.sum_duplicates() 

171 

172 major_index, value = mat._minor_reduce(min_or_max) 

173 not_full = np.diff(mat.indptr)[major_index] < N 

174 value[not_full] = min_or_max(value[not_full], 0) 

175 

176 mask = value != 0 

177 major_index = np.compress(mask, major_index) 

178 value = np.compress(mask, value) 

179 

180 from . import coo_matrix 

181 if axis == 0: 

182 return coo_matrix((value, (np.zeros(len(value)), major_index)), 

183 dtype=self.dtype, shape=(1, M)) 

184 else: 

185 return coo_matrix((value, (major_index, np.zeros(len(value)))), 

186 dtype=self.dtype, shape=(M, 1)) 

187 

188 def _min_or_max(self, axis, out, min_or_max): 

189 if out is not None: 

190 raise ValueError(("Sparse matrices do not support " 

191 "an 'out' parameter.")) 

192 

193 validateaxis(axis) 

194 

195 if axis is None: 

196 if 0 in self.shape: 

197 raise ValueError("zero-size array to reduction operation") 

198 

199 zero = self.dtype.type(0) 

200 if self.nnz == 0: 

201 return zero 

202 m = min_or_max.reduce(self._deduped_data().ravel()) 

203 if self.nnz != np.prod(self.shape): 

204 m = min_or_max(zero, m) 

205 return m 

206 

207 if axis < 0: 

208 axis += 2 

209 

210 if (axis == 0) or (axis == 1): 

211 return self._min_or_max_axis(axis, min_or_max) 

212 else: 

213 raise ValueError("axis out of range") 

214 

215 def _arg_min_or_max_axis(self, axis, op, compare): 

216 if self.shape[axis] == 0: 

217 raise ValueError("Can't apply the operation along a zero-sized " 

218 "dimension.") 

219 

220 if axis < 0: 

221 axis += 2 

222 

223 zero = self.dtype.type(0) 

224 

225 mat = self.tocsc() if axis == 0 else self.tocsr() 

226 mat.sum_duplicates() 

227 

228 ret_size, line_size = mat._swap(mat.shape) 

229 ret = np.zeros(ret_size, dtype=int) 

230 

231 nz_lines, = np.nonzero(np.diff(mat.indptr)) 

232 for i in nz_lines: 

233 p, q = mat.indptr[i:i + 2] 

234 data = mat.data[p:q] 

235 indices = mat.indices[p:q] 

236 am = op(data) 

237 m = data[am] 

238 if compare(m, zero) or q - p == line_size: 

239 ret[i] = indices[am] 

240 else: 

241 zero_ind = _find_missing_index(indices, line_size) 

242 if m == zero: 

243 ret[i] = min(am, zero_ind) 

244 else: 

245 ret[i] = zero_ind 

246 

247 if axis == 1: 

248 ret = ret.reshape(-1, 1) 

249 

250 return matrix(ret) 

251 

252 def _arg_min_or_max(self, axis, out, op, compare): 

253 if out is not None: 

254 raise ValueError("Sparse matrices do not support " 

255 "an 'out' parameter.") 

256 

257 validateaxis(axis) 

258 

259 if axis is None: 

260 if 0 in self.shape: 

261 raise ValueError("Can't apply the operation to " 

262 "an empty matrix.") 

263 

264 if self.nnz == 0: 

265 return 0 

266 else: 

267 zero = self.dtype.type(0) 

268 mat = self.tocoo() 

269 mat.sum_duplicates() 

270 am = op(mat.data) 

271 m = mat.data[am] 

272 

273 if compare(m, zero): 

274 return mat.row[am] * mat.shape[1] + mat.col[am] 

275 else: 

276 size = np.prod(mat.shape) 

277 if size == mat.nnz: 

278 return am 

279 else: 

280 ind = mat.row * mat.shape[1] + mat.col 

281 zero_ind = _find_missing_index(ind, size) 

282 if m == zero: 

283 return min(zero_ind, am) 

284 else: 

285 return zero_ind 

286 

287 return self._arg_min_or_max_axis(axis, op, compare) 

288 

289 def max(self, axis=None, out=None): 

290 """ 

291 Return the maximum of the matrix or maximum along an axis. 

292 This takes all elements into account, not just the non-zero ones. 

293 

294 Parameters 

295 ---------- 

296 axis : {-2, -1, 0, 1, None} optional 

297 Axis along which the sum is computed. The default is to 

298 compute the maximum over all the matrix elements, returning 

299 a scalar (i.e., `axis` = `None`). 

300 

301 out : None, optional 

302 This argument is in the signature *solely* for NumPy 

303 compatibility reasons. Do not pass in anything except 

304 for the default value, as this argument is not used. 

305 

306 Returns 

307 ------- 

308 amax : coo_matrix or scalar 

309 Maximum of `a`. If `axis` is None, the result is a scalar value. 

310 If `axis` is given, the result is a sparse.coo_matrix of dimension 

311 ``a.ndim - 1``. 

312 

313 See Also 

314 -------- 

315 min : The minimum value of a sparse matrix along a given axis. 

316 numpy.matrix.max : NumPy's implementation of 'max' for matrices 

317 

318 """ 

319 return self._min_or_max(axis, out, np.maximum) 

320 

321 def min(self, axis=None, out=None): 

322 """ 

323 Return the minimum of the matrix or maximum along an axis. 

324 This takes all elements into account, not just the non-zero ones. 

325 

326 Parameters 

327 ---------- 

328 axis : {-2, -1, 0, 1, None} optional 

329 Axis along which the sum is computed. The default is to 

330 compute the minimum over all the matrix elements, returning 

331 a scalar (i.e., `axis` = `None`). 

332 

333 out : None, optional 

334 This argument is in the signature *solely* for NumPy 

335 compatibility reasons. Do not pass in anything except for 

336 the default value, as this argument is not used. 

337 

338 Returns 

339 ------- 

340 amin : coo_matrix or scalar 

341 Minimum of `a`. If `axis` is None, the result is a scalar value. 

342 If `axis` is given, the result is a sparse.coo_matrix of dimension 

343 ``a.ndim - 1``. 

344 

345 See Also 

346 -------- 

347 max : The maximum value of a sparse matrix along a given axis. 

348 numpy.matrix.min : NumPy's implementation of 'min' for matrices 

349 

350 """ 

351 return self._min_or_max(axis, out, np.minimum) 

352 

353 def argmax(self, axis=None, out=None): 

354 """Return indices of maximum elements along an axis. 

355 

356 Implicit zero elements are also taken into account. If there are 

357 several maximum values, the index of the first occurrence is returned. 

358 

359 Parameters 

360 ---------- 

361 axis : {-2, -1, 0, 1, None}, optional 

362 Axis along which the argmax is computed. If None (default), index 

363 of the maximum element in the flatten data is returned. 

364 out : None, optional 

365 This argument is in the signature *solely* for NumPy 

366 compatibility reasons. Do not pass in anything except for 

367 the default value, as this argument is not used. 

368 

369 Returns 

370 ------- 

371 ind : numpy.matrix or int 

372 Indices of maximum elements. If matrix, its size along `axis` is 1. 

373 """ 

374 return self._arg_min_or_max(axis, out, np.argmax, np.greater) 

375 

376 def argmin(self, axis=None, out=None): 

377 """Return indices of minimum elements along an axis. 

378 

379 Implicit zero elements are also taken into account. If there are 

380 several minimum values, the index of the first occurrence is returned. 

381 

382 Parameters 

383 ---------- 

384 axis : {-2, -1, 0, 1, None}, optional 

385 Axis along which the argmin is computed. If None (default), index 

386 of the minimum element in the flatten data is returned. 

387 out : None, optional 

388 This argument is in the signature *solely* for NumPy 

389 compatibility reasons. Do not pass in anything except for 

390 the default value, as this argument is not used. 

391 

392 Returns 

393 ------- 

394 ind : numpy.matrix or int 

395 Indices of minimum elements. If matrix, its size along `axis` is 1. 

396 """ 

397 return self._arg_min_or_max(axis, out, np.argmin, np.less)