Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/scipy/sparse/lil.py : 20%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""List of Lists sparse matrix class
2"""
4__docformat__ = "restructuredtext en"
6__all__ = ['lil_matrix', 'isspmatrix_lil']
8from bisect import bisect_left
10import numpy as np
12from .base import spmatrix, isspmatrix
13from ._index import IndexMixin, INT_TYPES, _broadcast_arrays
14from .sputils import (getdtype, isshape, isscalarlike, upcast_scalar,
15 get_index_dtype, check_shape, check_reshape_kwargs,
16 asmatrix)
17from . import _csparsetools
20class lil_matrix(spmatrix, IndexMixin):
21 """Row-based list of lists sparse matrix
23 This is a structure for constructing sparse matrices incrementally.
24 Note that inserting a single item can take linear time in the worst case;
25 to construct a matrix efficiently, make sure the items are pre-sorted by
26 index, per row.
28 This can be instantiated in several ways:
29 lil_matrix(D)
30 with a dense matrix or rank-2 ndarray D
32 lil_matrix(S)
33 with another sparse matrix S (equivalent to S.tolil())
35 lil_matrix((M, N), [dtype])
36 to construct an empty matrix with shape (M, N)
37 dtype is optional, defaulting to dtype='d'.
39 Attributes
40 ----------
41 dtype : dtype
42 Data type of the matrix
43 shape : 2-tuple
44 Shape of the matrix
45 ndim : int
46 Number of dimensions (this is always 2)
47 nnz
48 Number of stored values, including explicit zeros
49 data
50 LIL format data array of the matrix
51 rows
52 LIL format row index array of the matrix
54 Notes
55 -----
57 Sparse matrices can be used in arithmetic operations: they support
58 addition, subtraction, multiplication, division, and matrix power.
60 Advantages of the LIL format
61 - supports flexible slicing
62 - changes to the matrix sparsity structure are efficient
64 Disadvantages of the LIL format
65 - arithmetic operations LIL + LIL are slow (consider CSR or CSC)
66 - slow column slicing (consider CSC)
67 - slow matrix vector products (consider CSR or CSC)
69 Intended Usage
70 - LIL is a convenient format for constructing sparse matrices
71 - once a matrix has been constructed, convert to CSR or
72 CSC format for fast arithmetic and matrix vector operations
73 - consider using the COO format when constructing large matrices
75 Data Structure
76 - An array (``self.rows``) of rows, each of which is a sorted
77 list of column indices of non-zero elements.
78 - The corresponding nonzero values are stored in similar
79 fashion in ``self.data``.
82 """
83 format = 'lil'
85 def __init__(self, arg1, shape=None, dtype=None, copy=False):
86 spmatrix.__init__(self)
87 self.dtype = getdtype(dtype, arg1, default=float)
89 # First get the shape
90 if isspmatrix(arg1):
91 if isspmatrix_lil(arg1) and copy:
92 A = arg1.copy()
93 else:
94 A = arg1.tolil()
96 if dtype is not None:
97 A = A.astype(dtype, copy=False)
99 self._shape = check_shape(A.shape)
100 self.dtype = A.dtype
101 self.rows = A.rows
102 self.data = A.data
103 elif isinstance(arg1,tuple):
104 if isshape(arg1):
105 if shape is not None:
106 raise ValueError('invalid use of shape parameter')
107 M, N = arg1
108 self._shape = check_shape((M, N))
109 self.rows = np.empty((M,), dtype=object)
110 self.data = np.empty((M,), dtype=object)
111 for i in range(M):
112 self.rows[i] = []
113 self.data[i] = []
114 else:
115 raise TypeError('unrecognized lil_matrix constructor usage')
116 else:
117 # assume A is dense
118 try:
119 A = asmatrix(arg1)
120 except TypeError:
121 raise TypeError('unsupported matrix type')
122 else:
123 from .csr import csr_matrix
124 A = csr_matrix(A, dtype=dtype).tolil()
126 self._shape = check_shape(A.shape)
127 self.dtype = A.dtype
128 self.rows = A.rows
129 self.data = A.data
131 def __iadd__(self,other):
132 self[:,:] = self + other
133 return self
135 def __isub__(self,other):
136 self[:,:] = self - other
137 return self
139 def __imul__(self,other):
140 if isscalarlike(other):
141 self[:,:] = self * other
142 return self
143 else:
144 return NotImplemented
146 def __itruediv__(self,other):
147 if isscalarlike(other):
148 self[:,:] = self / other
149 return self
150 else:
151 return NotImplemented
153 # Whenever the dimensions change, empty lists should be created for each
154 # row
156 def getnnz(self, axis=None):
157 if axis is None:
158 return sum([len(rowvals) for rowvals in self.data])
159 if axis < 0:
160 axis += 2
161 if axis == 0:
162 out = np.zeros(self.shape[1], dtype=np.intp)
163 for row in self.rows:
164 out[row] += 1
165 return out
166 elif axis == 1:
167 return np.array([len(rowvals) for rowvals in self.data], dtype=np.intp)
168 else:
169 raise ValueError('axis out of bounds')
171 def count_nonzero(self):
172 return sum(np.count_nonzero(rowvals) for rowvals in self.data)
174 getnnz.__doc__ = spmatrix.getnnz.__doc__
175 count_nonzero.__doc__ = spmatrix.count_nonzero.__doc__
177 def __str__(self):
178 val = ''
179 for i, row in enumerate(self.rows):
180 for pos, j in enumerate(row):
181 val += " %s\t%s\n" % (str((i, j)), str(self.data[i][pos]))
182 return val[:-1]
184 def getrowview(self, i):
185 """Returns a view of the 'i'th row (without copying).
186 """
187 new = lil_matrix((1, self.shape[1]), dtype=self.dtype)
188 new.rows[0] = self.rows[i]
189 new.data[0] = self.data[i]
190 return new
192 def getrow(self, i):
193 """Returns a copy of the 'i'th row.
194 """
195 M, N = self.shape
196 if i < 0:
197 i += M
198 if i < 0 or i >= M:
199 raise IndexError('row index out of bounds')
200 new = lil_matrix((1, N), dtype=self.dtype)
201 new.rows[0] = self.rows[i][:]
202 new.data[0] = self.data[i][:]
203 return new
205 def __getitem__(self, key):
206 # Fast path for simple (int, int) indexing.
207 if (isinstance(key, tuple) and len(key) == 2 and
208 isinstance(key[0], INT_TYPES) and
209 isinstance(key[1], INT_TYPES)):
210 # lil_get1 handles validation for us.
211 return self._get_intXint(*key)
212 # Everything else takes the normal path.
213 return IndexMixin.__getitem__(self, key)
215 def _asindices(self, idx, N):
216 # LIL routines handle bounds-checking for us, so don't do it here.
217 try:
218 x = np.asarray(idx)
219 except (ValueError, TypeError, MemoryError):
220 raise IndexError('invalid index')
221 if x.ndim not in (1, 2):
222 raise IndexError('Index dimension must be <= 2')
223 return x
225 def _get_intXint(self, row, col):
226 v = _csparsetools.lil_get1(self.shape[0], self.shape[1], self.rows,
227 self.data, row, col)
228 return self.dtype.type(v)
230 def _get_sliceXint(self, row, col):
231 row = range(*row.indices(self.shape[0]))
232 return self._get_row_ranges(row, slice(col, col+1))
234 def _get_arrayXint(self, row, col):
235 return self._get_row_ranges(row, slice(col, col+1))
237 def _get_intXslice(self, row, col):
238 return self._get_row_ranges((row,), col)
240 def _get_sliceXslice(self, row, col):
241 row = range(*row.indices(self.shape[0]))
242 return self._get_row_ranges(row, col)
244 def _get_arrayXslice(self, row, col):
245 return self._get_row_ranges(row, col)
247 def _get_intXarray(self, row, col):
248 row = np.array(row, dtype=col.dtype, ndmin=1)
249 return self._get_columnXarray(row, col)
251 def _get_sliceXarray(self, row, col):
252 row = np.arange(*row.indices(self.shape[0]))
253 return self._get_columnXarray(row, col)
255 def _get_columnXarray(self, row, col):
256 # outer indexing
257 row, col = _broadcast_arrays(row[:,None], col)
258 return self._get_arrayXarray(row, col)
260 def _get_arrayXarray(self, row, col):
261 # inner indexing
262 i, j = map(np.atleast_2d, _prepare_index_for_memoryview(row, col))
263 new = lil_matrix(i.shape, dtype=self.dtype)
264 _csparsetools.lil_fancy_get(self.shape[0], self.shape[1],
265 self.rows, self.data,
266 new.rows, new.data,
267 i, j)
268 return new
270 def _get_row_ranges(self, rows, col_slice):
271 """
272 Fast path for indexing in the case where column index is slice.
274 This gains performance improvement over brute force by more
275 efficient skipping of zeros, by accessing the elements
276 column-wise in order.
278 Parameters
279 ----------
280 rows : sequence or range
281 Rows indexed. If range, must be within valid bounds.
282 col_slice : slice
283 Columns indexed
285 """
286 j_start, j_stop, j_stride = col_slice.indices(self.shape[1])
287 col_range = range(j_start, j_stop, j_stride)
288 nj = len(col_range)
289 new = lil_matrix((len(rows), nj), dtype=self.dtype)
291 _csparsetools.lil_get_row_ranges(self.shape[0], self.shape[1],
292 self.rows, self.data,
293 new.rows, new.data,
294 rows,
295 j_start, j_stop, j_stride, nj)
297 return new
299 def _set_intXint(self, row, col, x):
300 _csparsetools.lil_insert(self.shape[0], self.shape[1], self.rows,
301 self.data, row, col, x)
303 def _set_arrayXarray(self, row, col, x):
304 i, j, x = map(np.atleast_2d, _prepare_index_for_memoryview(row, col, x))
305 _csparsetools.lil_fancy_set(self.shape[0], self.shape[1],
306 self.rows, self.data,
307 i, j, x)
309 def _set_arrayXarray_sparse(self, row, col, x):
310 # Special case: full matrix assignment
311 if (x.shape == self.shape and
312 isinstance(row, slice) and row == slice(None) and
313 isinstance(col, slice) and col == slice(None)):
314 x = lil_matrix(x, dtype=self.dtype)
315 self.rows = x.rows
316 self.data = x.data
317 return
318 # Fall back to densifying x
319 x = np.asarray(x.toarray(), dtype=self.dtype)
320 x, _ = _broadcast_arrays(x, row)
321 self._set_arrayXarray(row, col, x)
323 def __setitem__(self, key, x):
324 # Fast path for simple (int, int) indexing.
325 if (isinstance(key, tuple) and len(key) == 2 and
326 isinstance(key[0], INT_TYPES) and
327 isinstance(key[1], INT_TYPES)):
328 x = self.dtype.type(x)
329 if x.size > 1:
330 raise ValueError("Trying to assign a sequence to an item")
331 return self._set_intXint(key[0], key[1], x)
332 # Everything else takes the normal path.
333 IndexMixin.__setitem__(self, key, x)
335 def _mul_scalar(self, other):
336 if other == 0:
337 # Multiply by zero: return the zero matrix
338 new = lil_matrix(self.shape, dtype=self.dtype)
339 else:
340 res_dtype = upcast_scalar(self.dtype, other)
342 new = self.copy()
343 new = new.astype(res_dtype)
344 # Multiply this scalar by every element.
345 for j, rowvals in enumerate(new.data):
346 new.data[j] = [val*other for val in rowvals]
347 return new
349 def __truediv__(self, other): # self / other
350 if isscalarlike(other):
351 new = self.copy()
352 # Divide every element by this scalar
353 for j, rowvals in enumerate(new.data):
354 new.data[j] = [val/other for val in rowvals]
355 return new
356 else:
357 return self.tocsr() / other
359 def copy(self):
360 M, N = self.shape
361 new = lil_matrix(self.shape, dtype=self.dtype)
362 # This is ~14x faster than calling deepcopy() on rows and data.
363 _csparsetools.lil_get_row_ranges(M, N, self.rows, self.data,
364 new.rows, new.data, range(M),
365 0, N, 1, N)
366 return new
368 copy.__doc__ = spmatrix.copy.__doc__
370 def reshape(self, *args, **kwargs):
371 shape = check_shape(args, self.shape)
372 order, copy = check_reshape_kwargs(kwargs)
374 # Return early if reshape is not required
375 if shape == self.shape:
376 if copy:
377 return self.copy()
378 else:
379 return self
381 new = lil_matrix(shape, dtype=self.dtype)
383 if order == 'C':
384 ncols = self.shape[1]
385 for i, row in enumerate(self.rows):
386 for col, j in enumerate(row):
387 new_r, new_c = np.unravel_index(i * ncols + j, shape)
388 new[new_r, new_c] = self[i, j]
389 elif order == 'F':
390 nrows = self.shape[0]
391 for i, row in enumerate(self.rows):
392 for col, j in enumerate(row):
393 new_r, new_c = np.unravel_index(i + j * nrows, shape, order)
394 new[new_r, new_c] = self[i, j]
395 else:
396 raise ValueError("'order' must be 'C' or 'F'")
398 return new
400 reshape.__doc__ = spmatrix.reshape.__doc__
402 def resize(self, *shape):
403 shape = check_shape(shape)
404 new_M, new_N = shape
405 M, N = self.shape
407 if new_M < M:
408 self.rows = self.rows[:new_M]
409 self.data = self.data[:new_M]
410 elif new_M > M:
411 self.rows = np.resize(self.rows, new_M)
412 self.data = np.resize(self.data, new_M)
413 for i in range(M, new_M):
414 self.rows[i] = []
415 self.data[i] = []
417 if new_N < N:
418 for row, data in zip(self.rows, self.data):
419 trunc = bisect_left(row, new_N)
420 del row[trunc:]
421 del data[trunc:]
423 self._shape = shape
425 resize.__doc__ = spmatrix.resize.__doc__
427 def toarray(self, order=None, out=None):
428 d = self._process_toarray_args(order, out)
429 for i, row in enumerate(self.rows):
430 for pos, j in enumerate(row):
431 d[i, j] = self.data[i][pos]
432 return d
434 toarray.__doc__ = spmatrix.toarray.__doc__
436 def transpose(self, axes=None, copy=False):
437 return self.tocsr(copy=copy).transpose(axes=axes, copy=False).tolil(copy=False)
439 transpose.__doc__ = spmatrix.transpose.__doc__
441 def tolil(self, copy=False):
442 if copy:
443 return self.copy()
444 else:
445 return self
447 tolil.__doc__ = spmatrix.tolil.__doc__
449 def tocsr(self, copy=False):
450 from .csr import csr_matrix
452 M, N = self.shape
453 if M == 0 or N == 0:
454 return csr_matrix((M, N), dtype=self.dtype)
456 # construct indptr array
457 if M*N <= np.iinfo(np.int32).max:
458 # fast path: it is known that 64-bit indexing will not be needed.
459 idx_dtype = np.int32
460 indptr = np.empty(M + 1, dtype=idx_dtype)
461 indptr[0] = 0
462 _csparsetools.lil_get_lengths(self.rows, indptr[1:])
463 np.cumsum(indptr, out=indptr)
464 nnz = indptr[-1]
465 else:
466 idx_dtype = get_index_dtype(maxval=N)
467 lengths = np.empty(M, dtype=idx_dtype)
468 _csparsetools.lil_get_lengths(self.rows, lengths)
469 nnz = lengths.sum()
470 idx_dtype = get_index_dtype(maxval=max(N, nnz))
471 indptr = np.empty(M + 1, dtype=idx_dtype)
472 indptr[0] = 0
473 np.cumsum(lengths, dtype=idx_dtype, out=indptr[1:])
475 indices = np.empty(nnz, dtype=idx_dtype)
476 data = np.empty(nnz, dtype=self.dtype)
477 _csparsetools.lil_flatten_to_array(self.rows, indices)
478 _csparsetools.lil_flatten_to_array(self.data, data)
480 # init csr matrix
481 return csr_matrix((data, indices, indptr), shape=self.shape)
483 tocsr.__doc__ = spmatrix.tocsr.__doc__
486def _prepare_index_for_memoryview(i, j, x=None):
487 """
488 Convert index and data arrays to form suitable for passing to the
489 Cython fancy getset routines.
491 The conversions are necessary since to (i) ensure the integer
492 index arrays are in one of the accepted types, and (ii) to ensure
493 the arrays are writable so that Cython memoryview support doesn't
494 choke on them.
496 Parameters
497 ----------
498 i, j
499 Index arrays
500 x : optional
501 Data arrays
503 Returns
504 -------
505 i, j, x
506 Re-formatted arrays (x is omitted, if input was None)
508 """
509 if i.dtype > j.dtype:
510 j = j.astype(i.dtype)
511 elif i.dtype < j.dtype:
512 i = i.astype(j.dtype)
514 if not i.flags.writeable or i.dtype not in (np.int32, np.int64):
515 i = i.astype(np.intp)
516 if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
517 j = j.astype(np.intp)
519 if x is not None:
520 if not x.flags.writeable:
521 x = x.copy()
522 return i, j, x
523 else:
524 return i, j
527def isspmatrix_lil(x):
528 """Is x of lil_matrix type?
530 Parameters
531 ----------
532 x
533 object to check for being a lil matrix
535 Returns
536 -------
537 bool
538 True if x is a lil matrix, False otherwise
540 Examples
541 --------
542 >>> from scipy.sparse import lil_matrix, isspmatrix_lil
543 >>> isspmatrix_lil(lil_matrix([[5]]))
544 True
546 >>> from scipy.sparse import lil_matrix, csr_matrix, isspmatrix_lil
547 >>> isspmatrix_lil(csr_matrix([[5]]))
548 False
549 """
550 return isinstance(x, lil_matrix)