Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/numpy/core/memmap.py : 19%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import numpy as np
2from .numeric import uint8, ndarray, dtype
3from numpy.compat import (
4 os_fspath, contextlib_nullcontext, is_pathlib_path
5)
6from numpy.core.overrides import set_module
8__all__ = ['memmap']
10dtypedescr = dtype
11valid_filemodes = ["r", "c", "r+", "w+"]
12writeable_filemodes = ["r+", "w+"]
14mode_equivalents = {
15 "readonly":"r",
16 "copyonwrite":"c",
17 "readwrite":"r+",
18 "write":"w+"
19 }
22@set_module('numpy')
23class memmap(ndarray):
24 """Create a memory-map to an array stored in a *binary* file on disk.
26 Memory-mapped files are used for accessing small segments of large files
27 on disk, without reading the entire file into memory. NumPy's
28 memmap's are array-like objects. This differs from Python's ``mmap``
29 module, which uses file-like objects.
31 This subclass of ndarray has some unpleasant interactions with
32 some operations, because it doesn't quite fit properly as a subclass.
33 An alternative to using this subclass is to create the ``mmap``
34 object yourself, then create an ndarray with ndarray.__new__ directly,
35 passing the object created in its 'buffer=' parameter.
37 This class may at some point be turned into a factory function
38 which returns a view into an mmap buffer.
40 Delete the memmap instance to close the memmap file.
43 Parameters
44 ----------
45 filename : str, file-like object, or pathlib.Path instance
46 The file name or file object to be used as the array data buffer.
47 dtype : data-type, optional
48 The data-type used to interpret the file contents.
49 Default is `uint8`.
50 mode : {'r+', 'r', 'w+', 'c'}, optional
51 The file is opened in this mode:
53 +------+-------------------------------------------------------------+
54 | 'r' | Open existing file for reading only. |
55 +------+-------------------------------------------------------------+
56 | 'r+' | Open existing file for reading and writing. |
57 +------+-------------------------------------------------------------+
58 | 'w+' | Create or overwrite existing file for reading and writing. |
59 +------+-------------------------------------------------------------+
60 | 'c' | Copy-on-write: assignments affect data in memory, but |
61 | | changes are not saved to disk. The file on disk is |
62 | | read-only. |
63 +------+-------------------------------------------------------------+
65 Default is 'r+'.
66 offset : int, optional
67 In the file, array data starts at this offset. Since `offset` is
68 measured in bytes, it should normally be a multiple of the byte-size
69 of `dtype`. When ``mode != 'r'``, even positive offsets beyond end of
70 file are valid; The file will be extended to accommodate the
71 additional data. By default, ``memmap`` will start at the beginning of
72 the file, even if ``filename`` is a file pointer ``fp`` and
73 ``fp.tell() != 0``.
74 shape : tuple, optional
75 The desired shape of the array. If ``mode == 'r'`` and the number
76 of remaining bytes after `offset` is not a multiple of the byte-size
77 of `dtype`, you must specify `shape`. By default, the returned array
78 will be 1-D with the number of elements determined by file size
79 and data-type.
80 order : {'C', 'F'}, optional
81 Specify the order of the ndarray memory layout:
82 :term:`row-major`, C-style or :term:`column-major`,
83 Fortran-style. This only has an effect if the shape is
84 greater than 1-D. The default order is 'C'.
86 Attributes
87 ----------
88 filename : str or pathlib.Path instance
89 Path to the mapped file.
90 offset : int
91 Offset position in the file.
92 mode : str
93 File mode.
95 Methods
96 -------
97 flush
98 Flush any changes in memory to file on disk.
99 When you delete a memmap object, flush is called first to write
100 changes to disk before removing the object.
103 See also
104 --------
105 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.
107 Notes
108 -----
109 The memmap object can be used anywhere an ndarray is accepted.
110 Given a memmap ``fp``, ``isinstance(fp, numpy.ndarray)`` returns
111 ``True``.
113 Memory-mapped files cannot be larger than 2GB on 32-bit systems.
115 When a memmap causes a file to be created or extended beyond its
116 current size in the filesystem, the contents of the new part are
117 unspecified. On systems with POSIX filesystem semantics, the extended
118 part will be filled with zero bytes.
120 Examples
121 --------
122 >>> data = np.arange(12, dtype='float32')
123 >>> data.resize((3,4))
125 This example uses a temporary file so that doctest doesn't write
126 files to your directory. You would use a 'normal' filename.
128 >>> from tempfile import mkdtemp
129 >>> import os.path as path
130 >>> filename = path.join(mkdtemp(), 'newfile.dat')
132 Create a memmap with dtype and shape that matches our data:
134 >>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4))
135 >>> fp
136 memmap([[0., 0., 0., 0.],
137 [0., 0., 0., 0.],
138 [0., 0., 0., 0.]], dtype=float32)
140 Write data to memmap array:
142 >>> fp[:] = data[:]
143 >>> fp
144 memmap([[ 0., 1., 2., 3.],
145 [ 4., 5., 6., 7.],
146 [ 8., 9., 10., 11.]], dtype=float32)
148 >>> fp.filename == path.abspath(filename)
149 True
151 Deletion flushes memory changes to disk before removing the object:
153 >>> del fp
155 Load the memmap and verify data was stored:
157 >>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
158 >>> newfp
159 memmap([[ 0., 1., 2., 3.],
160 [ 4., 5., 6., 7.],
161 [ 8., 9., 10., 11.]], dtype=float32)
163 Read-only memmap:
165 >>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
166 >>> fpr.flags.writeable
167 False
169 Copy-on-write memmap:
171 >>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4))
172 >>> fpc.flags.writeable
173 True
175 It's possible to assign to copy-on-write array, but values are only
176 written into the memory copy of the array, and not written to disk:
178 >>> fpc
179 memmap([[ 0., 1., 2., 3.],
180 [ 4., 5., 6., 7.],
181 [ 8., 9., 10., 11.]], dtype=float32)
182 >>> fpc[0,:] = 0
183 >>> fpc
184 memmap([[ 0., 0., 0., 0.],
185 [ 4., 5., 6., 7.],
186 [ 8., 9., 10., 11.]], dtype=float32)
188 File on disk is unchanged:
190 >>> fpr
191 memmap([[ 0., 1., 2., 3.],
192 [ 4., 5., 6., 7.],
193 [ 8., 9., 10., 11.]], dtype=float32)
195 Offset into a memmap:
197 >>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16)
198 >>> fpo
199 memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32)
201 """
203 __array_priority__ = -100.0
205 def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
206 shape=None, order='C'):
207 # Import here to minimize 'import numpy' overhead
208 import mmap
209 import os.path
210 try:
211 mode = mode_equivalents[mode]
212 except KeyError as e:
213 if mode not in valid_filemodes:
214 raise ValueError(
215 "mode must be one of {!r} (got {!r})"
216 .format(valid_filemodes + list(mode_equivalents.keys()), mode)
217 ) from None
219 if mode == 'w+' and shape is None:
220 raise ValueError("shape must be given")
222 if hasattr(filename, 'read'):
223 f_ctx = contextlib_nullcontext(filename)
224 else:
225 f_ctx = open(os_fspath(filename), ('r' if mode == 'c' else mode)+'b')
227 with f_ctx as fid:
228 fid.seek(0, 2)
229 flen = fid.tell()
230 descr = dtypedescr(dtype)
231 _dbytes = descr.itemsize
233 if shape is None:
234 bytes = flen - offset
235 if bytes % _dbytes:
236 raise ValueError("Size of available data is not a "
237 "multiple of the data-type size.")
238 size = bytes // _dbytes
239 shape = (size,)
240 else:
241 if not isinstance(shape, tuple):
242 shape = (shape,)
243 size = np.intp(1) # avoid default choice of np.int_, which might overflow
244 for k in shape:
245 size *= k
247 bytes = int(offset + size*_dbytes)
249 if mode in ('w+', 'r+') and flen < bytes:
250 fid.seek(bytes - 1, 0)
251 fid.write(b'\0')
252 fid.flush()
254 if mode == 'c':
255 acc = mmap.ACCESS_COPY
256 elif mode == 'r':
257 acc = mmap.ACCESS_READ
258 else:
259 acc = mmap.ACCESS_WRITE
261 start = offset - offset % mmap.ALLOCATIONGRANULARITY
262 bytes -= start
263 array_offset = offset - start
264 mm = mmap.mmap(fid.fileno(), bytes, access=acc, offset=start)
266 self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
267 offset=array_offset, order=order)
268 self._mmap = mm
269 self.offset = offset
270 self.mode = mode
272 if is_pathlib_path(filename):
273 # special case - if we were constructed with a pathlib.path,
274 # then filename is a path object, not a string
275 self.filename = filename.resolve()
276 elif hasattr(fid, "name") and isinstance(fid.name, str):
277 # py3 returns int for TemporaryFile().name
278 self.filename = os.path.abspath(fid.name)
279 # same as memmap copies (e.g. memmap + 1)
280 else:
281 self.filename = None
283 return self
285 def __array_finalize__(self, obj):
286 if hasattr(obj, '_mmap') and np.may_share_memory(self, obj):
287 self._mmap = obj._mmap
288 self.filename = obj.filename
289 self.offset = obj.offset
290 self.mode = obj.mode
291 else:
292 self._mmap = None
293 self.filename = None
294 self.offset = None
295 self.mode = None
297 def flush(self):
298 """
299 Write any changes in the array to the file on disk.
301 For further information, see `memmap`.
303 Parameters
304 ----------
305 None
307 See Also
308 --------
309 memmap
311 """
312 if self.base is not None and hasattr(self.base, 'flush'):
313 self.base.flush()
315 def __array_wrap__(self, arr, context=None):
316 arr = super(memmap, self).__array_wrap__(arr, context)
318 # Return a memmap if a memmap was given as the output of the
319 # ufunc. Leave the arr class unchanged if self is not a memmap
320 # to keep original memmap subclasses behavior
321 if self is arr or type(self) is not memmap:
322 return arr
323 # Return scalar instead of 0d memmap, e.g. for np.sum with
324 # axis=None
325 if arr.shape == ():
326 return arr[()]
327 # Return ndarray otherwise
328 return arr.view(np.ndarray)
330 def __getitem__(self, index):
331 res = super(memmap, self).__getitem__(index)
332 if type(res) is memmap and res._mmap is None:
333 return res.view(type=ndarray)
334 return res