Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/numpy/lib/npyio.py : 7%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import sys
2import os
3import re
4import functools
5import itertools
6import warnings
7import weakref
8import contextlib
9from operator import itemgetter, index as opindex
10from collections.abc import Mapping
12import numpy as np
13from . import format
14from ._datasource import DataSource
15from numpy.core import overrides
16from numpy.core.multiarray import packbits, unpackbits
17from numpy.core.overrides import set_module
18from numpy.core._internal import recursive
19from ._iotools import (
20 LineSplitter, NameValidator, StringConverter, ConverterError,
21 ConverterLockError, ConversionWarning, _is_string_like,
22 has_nested_fields, flatten_dtype, easy_dtype, _decode_line
23 )
25from numpy.compat import (
26 asbytes, asstr, asunicode, bytes, os_fspath, os_PathLike,
27 pickle, contextlib_nullcontext
28 )
31@set_module('numpy')
32def loads(*args, **kwargs):
33 # NumPy 1.15.0, 2017-12-10
34 warnings.warn(
35 "np.loads is deprecated, use pickle.loads instead",
36 DeprecationWarning, stacklevel=2)
37 return pickle.loads(*args, **kwargs)
40__all__ = [
41 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
42 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
43 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'
44 ]
47array_function_dispatch = functools.partial(
48 overrides.array_function_dispatch, module='numpy')
51class BagObj:
52 """
53 BagObj(obj)
55 Convert attribute look-ups to getitems on the object passed in.
57 Parameters
58 ----------
59 obj : class instance
60 Object on which attribute look-up is performed.
62 Examples
63 --------
64 >>> from numpy.lib.npyio import BagObj as BO
65 >>> class BagDemo:
66 ... def __getitem__(self, key): # An instance of BagObj(BagDemo)
67 ... # will call this method when any
68 ... # attribute look-up is required
69 ... result = "Doesn't matter what you want, "
70 ... return result + "you're gonna get this"
71 ...
72 >>> demo_obj = BagDemo()
73 >>> bagobj = BO(demo_obj)
74 >>> bagobj.hello_there
75 "Doesn't matter what you want, you're gonna get this"
76 >>> bagobj.I_can_be_anything
77 "Doesn't matter what you want, you're gonna get this"
79 """
81 def __init__(self, obj):
82 # Use weakref to make NpzFile objects collectable by refcount
83 self._obj = weakref.proxy(obj)
85 def __getattribute__(self, key):
86 try:
87 return object.__getattribute__(self, '_obj')[key]
88 except KeyError:
89 raise AttributeError(key)
91 def __dir__(self):
92 """
93 Enables dir(bagobj) to list the files in an NpzFile.
95 This also enables tab-completion in an interpreter or IPython.
96 """
97 return list(object.__getattribute__(self, '_obj').keys())
100def zipfile_factory(file, *args, **kwargs):
101 """
102 Create a ZipFile.
104 Allows for Zip64, and the `file` argument can accept file, str, or
105 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile
106 constructor.
107 """
108 if not hasattr(file, 'read'):
109 file = os_fspath(file)
110 import zipfile
111 kwargs['allowZip64'] = True
112 return zipfile.ZipFile(file, *args, **kwargs)
115class NpzFile(Mapping):
116 """
117 NpzFile(fid)
119 A dictionary-like object with lazy-loading of files in the zipped
120 archive provided on construction.
122 `NpzFile` is used to load files in the NumPy ``.npz`` data archive
123 format. It assumes that files in the archive have a ``.npy`` extension,
124 other files are ignored.
126 The arrays and file strings are lazily loaded on either
127 getitem access using ``obj['key']`` or attribute lookup using
128 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can
129 be obtained with ``obj.files`` and the ZipFile object itself using
130 ``obj.zip``.
132 Attributes
133 ----------
134 files : list of str
135 List of all files in the archive with a ``.npy`` extension.
136 zip : ZipFile instance
137 The ZipFile object initialized with the zipped archive.
138 f : BagObj instance
139 An object on which attribute can be performed as an alternative
140 to getitem access on the `NpzFile` instance itself.
141 allow_pickle : bool, optional
142 Allow loading pickled data. Default: False
144 .. versionchanged:: 1.16.3
145 Made default False in response to CVE-2019-6446.
147 pickle_kwargs : dict, optional
148 Additional keyword arguments to pass on to pickle.load.
149 These are only useful when loading object arrays saved on
150 Python 2 when using Python 3.
152 Parameters
153 ----------
154 fid : file or str
155 The zipped archive to open. This is either a file-like object
156 or a string containing the path to the archive.
157 own_fid : bool, optional
158 Whether NpzFile should close the file handle.
159 Requires that `fid` is a file-like object.
161 Examples
162 --------
163 >>> from tempfile import TemporaryFile
164 >>> outfile = TemporaryFile()
165 >>> x = np.arange(10)
166 >>> y = np.sin(x)
167 >>> np.savez(outfile, x=x, y=y)
168 >>> _ = outfile.seek(0)
170 >>> npz = np.load(outfile)
171 >>> isinstance(npz, np.lib.io.NpzFile)
172 True
173 >>> sorted(npz.files)
174 ['x', 'y']
175 >>> npz['x'] # getitem access
176 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
177 >>> npz.f.x # attribute lookup
178 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
180 """
182 def __init__(self, fid, own_fid=False, allow_pickle=False,
183 pickle_kwargs=None):
184 # Import is postponed to here since zipfile depends on gzip, an
185 # optional component of the so-called standard library.
186 _zip = zipfile_factory(fid)
187 self._files = _zip.namelist()
188 self.files = []
189 self.allow_pickle = allow_pickle
190 self.pickle_kwargs = pickle_kwargs
191 for x in self._files:
192 if x.endswith('.npy'):
193 self.files.append(x[:-4])
194 else:
195 self.files.append(x)
196 self.zip = _zip
197 self.f = BagObj(self)
198 if own_fid:
199 self.fid = fid
200 else:
201 self.fid = None
203 def __enter__(self):
204 return self
206 def __exit__(self, exc_type, exc_value, traceback):
207 self.close()
209 def close(self):
210 """
211 Close the file.
213 """
214 if self.zip is not None:
215 self.zip.close()
216 self.zip = None
217 if self.fid is not None:
218 self.fid.close()
219 self.fid = None
220 self.f = None # break reference cycle
222 def __del__(self):
223 self.close()
225 # Implement the Mapping ABC
226 def __iter__(self):
227 return iter(self.files)
229 def __len__(self):
230 return len(self.files)
232 def __getitem__(self, key):
233 # FIXME: This seems like it will copy strings around
234 # more than is strictly necessary. The zipfile
235 # will read the string and then
236 # the format.read_array will copy the string
237 # to another place in memory.
238 # It would be better if the zipfile could read
239 # (or at least uncompress) the data
240 # directly into the array memory.
241 member = False
242 if key in self._files:
243 member = True
244 elif key in self.files:
245 member = True
246 key += '.npy'
247 if member:
248 bytes = self.zip.open(key)
249 magic = bytes.read(len(format.MAGIC_PREFIX))
250 bytes.close()
251 if magic == format.MAGIC_PREFIX:
252 bytes = self.zip.open(key)
253 return format.read_array(bytes,
254 allow_pickle=self.allow_pickle,
255 pickle_kwargs=self.pickle_kwargs)
256 else:
257 return self.zip.read(key)
258 else:
259 raise KeyError("%s is not a file in the archive" % key)
262 # deprecate the python 2 dict apis that we supported by accident in
263 # python 3. We forgot to implement itervalues() at all in earlier
264 # versions of numpy, so no need to deprecated it here.
266 def iteritems(self):
267 # Numpy 1.15, 2018-02-20
268 warnings.warn(
269 "NpzFile.iteritems is deprecated in python 3, to match the "
270 "removal of dict.itertems. Use .items() instead.",
271 DeprecationWarning, stacklevel=2)
272 return self.items()
274 def iterkeys(self):
275 # Numpy 1.15, 2018-02-20
276 warnings.warn(
277 "NpzFile.iterkeys is deprecated in python 3, to match the "
278 "removal of dict.iterkeys. Use .keys() instead.",
279 DeprecationWarning, stacklevel=2)
280 return self.keys()
283@set_module('numpy')
284def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True,
285 encoding='ASCII'):
286 """
287 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
289 .. warning:: Loading files that contain object arrays uses the ``pickle``
290 module, which is not secure against erroneous or maliciously
291 constructed data. Consider passing ``allow_pickle=False`` to
292 load data that is known not to contain object arrays for the
293 safer handling of untrusted sources.
295 Parameters
296 ----------
297 file : file-like object, string, or pathlib.Path
298 The file to read. File-like objects must support the
299 ``seek()`` and ``read()`` methods. Pickled files require that the
300 file-like object support the ``readline()`` method as well.
301 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional
302 If not None, then memory-map the file, using the given mode (see
303 `numpy.memmap` for a detailed description of the modes). A
304 memory-mapped array is kept on disk. However, it can be accessed
305 and sliced like any ndarray. Memory mapping is especially useful
306 for accessing small fragments of large files without reading the
307 entire file into memory.
308 allow_pickle : bool, optional
309 Allow loading pickled object arrays stored in npy files. Reasons for
310 disallowing pickles include security, as loading pickled data can
311 execute arbitrary code. If pickles are disallowed, loading object
312 arrays will fail. Default: False
314 .. versionchanged:: 1.16.3
315 Made default False in response to CVE-2019-6446.
317 fix_imports : bool, optional
318 Only useful when loading Python 2 generated pickled files on Python 3,
319 which includes npy/npz files containing object arrays. If `fix_imports`
320 is True, pickle will try to map the old Python 2 names to the new names
321 used in Python 3.
322 encoding : str, optional
323 What encoding to use when reading Python 2 strings. Only useful when
324 loading Python 2 generated pickled files in Python 3, which includes
325 npy/npz files containing object arrays. Values other than 'latin1',
326 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
327 data. Default: 'ASCII'
329 Returns
330 -------
331 result : array, tuple, dict, etc.
332 Data stored in the file. For ``.npz`` files, the returned instance
333 of NpzFile class must be closed to avoid leaking file descriptors.
335 Raises
336 ------
337 IOError
338 If the input file does not exist or cannot be read.
339 ValueError
340 The file contains an object array, but allow_pickle=False given.
342 See Also
343 --------
344 save, savez, savez_compressed, loadtxt
345 memmap : Create a memory-map to an array stored in a file on disk.
346 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file.
348 Notes
349 -----
350 - If the file contains pickle data, then whatever object is stored
351 in the pickle is returned.
352 - If the file is a ``.npy`` file, then a single array is returned.
353 - If the file is a ``.npz`` file, then a dictionary-like object is
354 returned, containing ``{filename: array}`` key-value pairs, one for
355 each file in the archive.
356 - If the file is a ``.npz`` file, the returned value supports the
357 context manager protocol in a similar fashion to the open function::
359 with load('foo.npz') as data:
360 a = data['a']
362 The underlying file descriptor is closed when exiting the 'with'
363 block.
365 Examples
366 --------
367 Store data to disk, and load it again:
369 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
370 >>> np.load('/tmp/123.npy')
371 array([[1, 2, 3],
372 [4, 5, 6]])
374 Store compressed data to disk, and load it again:
376 >>> a=np.array([[1, 2, 3], [4, 5, 6]])
377 >>> b=np.array([1, 2])
378 >>> np.savez('/tmp/123.npz', a=a, b=b)
379 >>> data = np.load('/tmp/123.npz')
380 >>> data['a']
381 array([[1, 2, 3],
382 [4, 5, 6]])
383 >>> data['b']
384 array([1, 2])
385 >>> data.close()
387 Mem-map the stored array, and then access the second row
388 directly from disk:
390 >>> X = np.load('/tmp/123.npy', mmap_mode='r')
391 >>> X[1, :]
392 memmap([4, 5, 6])
394 """
395 if encoding not in ('ASCII', 'latin1', 'bytes'):
396 # The 'encoding' value for pickle also affects what encoding
397 # the serialized binary data of NumPy arrays is loaded
398 # in. Pickle does not pass on the encoding information to
399 # NumPy. The unpickling code in numpy.core.multiarray is
400 # written to assume that unicode data appearing where binary
401 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'.
402 #
403 # Other encoding values can corrupt binary data, and we
404 # purposefully disallow them. For the same reason, the errors=
405 # argument is not exposed, as values other than 'strict'
406 # result can similarly silently corrupt numerical data.
407 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'")
409 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports)
411 with contextlib.ExitStack() as stack:
412 if hasattr(file, 'read'):
413 fid = file
414 own_fid = False
415 else:
416 fid = stack.enter_context(open(os_fspath(file), "rb"))
417 own_fid = True
419 # Code to distinguish from NumPy binary files and pickles.
420 _ZIP_PREFIX = b'PK\x03\x04'
421 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this
422 N = len(format.MAGIC_PREFIX)
423 magic = fid.read(N)
424 # If the file size is less than N, we need to make sure not
425 # to seek past the beginning of the file
426 fid.seek(-min(N, len(magic)), 1) # back-up
427 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX):
428 # zip-file (assume .npz)
429 # Potentially transfer file ownership to NpzFile
430 stack.pop_all()
431 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle,
432 pickle_kwargs=pickle_kwargs)
433 return ret
434 elif magic == format.MAGIC_PREFIX:
435 # .npy file
436 if mmap_mode:
437 return format.open_memmap(file, mode=mmap_mode)
438 else:
439 return format.read_array(fid, allow_pickle=allow_pickle,
440 pickle_kwargs=pickle_kwargs)
441 else:
442 # Try a pickle
443 if not allow_pickle:
444 raise ValueError("Cannot load file containing pickled data "
445 "when allow_pickle=False")
446 try:
447 return pickle.load(fid, **pickle_kwargs)
448 except Exception:
449 raise IOError(
450 "Failed to interpret file %s as a pickle" % repr(file))
453def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None):
454 return (arr,)
457@array_function_dispatch(_save_dispatcher)
458def save(file, arr, allow_pickle=True, fix_imports=True):
459 """
460 Save an array to a binary file in NumPy ``.npy`` format.
462 Parameters
463 ----------
464 file : file, str, or pathlib.Path
465 File or filename to which the data is saved. If file is a file-object,
466 then the filename is unchanged. If file is a string or Path, a ``.npy``
467 extension will be appended to the filename if it does not already
468 have one.
469 arr : array_like
470 Array data to be saved.
471 allow_pickle : bool, optional
472 Allow saving object arrays using Python pickles. Reasons for disallowing
473 pickles include security (loading pickled data can execute arbitrary
474 code) and portability (pickled objects may not be loadable on different
475 Python installations, for example if the stored objects require libraries
476 that are not available, and not all pickled data is compatible between
477 Python 2 and Python 3).
478 Default: True
479 fix_imports : bool, optional
480 Only useful in forcing objects in object arrays on Python 3 to be
481 pickled in a Python 2 compatible way. If `fix_imports` is True, pickle
482 will try to map the new Python 3 names to the old module names used in
483 Python 2, so that the pickle data stream is readable with Python 2.
485 See Also
486 --------
487 savez : Save several arrays into a ``.npz`` archive
488 savetxt, load
490 Notes
491 -----
492 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
494 Any data saved to the file is appended to the end of the file.
496 Examples
497 --------
498 >>> from tempfile import TemporaryFile
499 >>> outfile = TemporaryFile()
501 >>> x = np.arange(10)
502 >>> np.save(outfile, x)
504 >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
505 >>> np.load(outfile)
506 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
509 >>> with open('test.npy', 'wb') as f:
510 ... np.save(f, np.array([1, 2]))
511 ... np.save(f, np.array([1, 3]))
512 >>> with open('test.npy', 'rb') as f:
513 ... a = np.load(f)
514 ... b = np.load(f)
515 >>> print(a, b)
516 # [1 2] [1 3]
517 """
518 if hasattr(file, 'write'):
519 file_ctx = contextlib_nullcontext(file)
520 else:
521 file = os_fspath(file)
522 if not file.endswith('.npy'):
523 file = file + '.npy'
524 file_ctx = open(file, "wb")
526 with file_ctx as fid:
527 arr = np.asanyarray(arr)
528 format.write_array(fid, arr, allow_pickle=allow_pickle,
529 pickle_kwargs=dict(fix_imports=fix_imports))
532def _savez_dispatcher(file, *args, **kwds):
533 yield from args
534 yield from kwds.values()
537@array_function_dispatch(_savez_dispatcher)
538def savez(file, *args, **kwds):
539 """Save several arrays into a single file in uncompressed ``.npz`` format.
541 If arguments are passed in with no keywords, the corresponding variable
542 names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword
543 arguments are given, the corresponding variable names, in the ``.npz``
544 file will match the keyword names.
546 Parameters
547 ----------
548 file : str or file
549 Either the filename (string) or an open file (file-like object)
550 where the data will be saved. If file is a string or a Path, the
551 ``.npz`` extension will be appended to the filename if it is not
552 already there.
553 args : Arguments, optional
554 Arrays to save to the file. Since it is not possible for Python to
555 know the names of the arrays outside `savez`, the arrays will be saved
556 with names "arr_0", "arr_1", and so on. These arguments can be any
557 expression.
558 kwds : Keyword arguments, optional
559 Arrays to save to the file. Arrays will be saved in the file with the
560 keyword names.
562 Returns
563 -------
564 None
566 See Also
567 --------
568 save : Save a single array to a binary file in NumPy format.
569 savetxt : Save an array to a file as plain text.
570 savez_compressed : Save several arrays into a compressed ``.npz`` archive
572 Notes
573 -----
574 The ``.npz`` file format is a zipped archive of files named after the
575 variables they contain. The archive is not compressed and each file
576 in the archive contains one variable in ``.npy`` format. For a
577 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`.
579 When opening the saved ``.npz`` file with `load` a `NpzFile` object is
580 returned. This is a dictionary-like object which can be queried for
581 its list of arrays (with the ``.files`` attribute), and for the arrays
582 themselves.
584 When saving dictionaries, the dictionary keys become filenames
585 inside the ZIP archive. Therefore, keys should be valid filenames.
586 E.g., avoid keys that begin with ``/`` or contain ``.``.
588 Examples
589 --------
590 >>> from tempfile import TemporaryFile
591 >>> outfile = TemporaryFile()
592 >>> x = np.arange(10)
593 >>> y = np.sin(x)
595 Using `savez` with \\*args, the arrays are saved with default names.
597 >>> np.savez(outfile, x, y)
598 >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file
599 >>> npzfile = np.load(outfile)
600 >>> npzfile.files
601 ['arr_0', 'arr_1']
602 >>> npzfile['arr_0']
603 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
605 Using `savez` with \\**kwds, the arrays are saved with the keyword names.
607 >>> outfile = TemporaryFile()
608 >>> np.savez(outfile, x=x, y=y)
609 >>> _ = outfile.seek(0)
610 >>> npzfile = np.load(outfile)
611 >>> sorted(npzfile.files)
612 ['x', 'y']
613 >>> npzfile['x']
614 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
615 """
616 _savez(file, args, kwds, False)
619def _savez_compressed_dispatcher(file, *args, **kwds):
620 yield from args
621 yield from kwds.values()
624@array_function_dispatch(_savez_compressed_dispatcher)
625def savez_compressed(file, *args, **kwds):
626 """
627 Save several arrays into a single file in compressed ``.npz`` format.
629 If keyword arguments are given, then filenames are taken from the keywords.
630 If arguments are passed in with no keywords, then stored filenames are
631 arr_0, arr_1, etc.
633 Parameters
634 ----------
635 file : str or file
636 Either the filename (string) or an open file (file-like object)
637 where the data will be saved. If file is a string or a Path, the
638 ``.npz`` extension will be appended to the filename if it is not
639 already there.
640 args : Arguments, optional
641 Arrays to save to the file. Since it is not possible for Python to
642 know the names of the arrays outside `savez`, the arrays will be saved
643 with names "arr_0", "arr_1", and so on. These arguments can be any
644 expression.
645 kwds : Keyword arguments, optional
646 Arrays to save to the file. Arrays will be saved in the file with the
647 keyword names.
649 Returns
650 -------
651 None
653 See Also
654 --------
655 numpy.save : Save a single array to a binary file in NumPy format.
656 numpy.savetxt : Save an array to a file as plain text.
657 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format
658 numpy.load : Load the files created by savez_compressed.
660 Notes
661 -----
662 The ``.npz`` file format is a zipped archive of files named after the
663 variables they contain. The archive is compressed with
664 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable
665 in ``.npy`` format. For a description of the ``.npy`` format, see
666 :py:mod:`numpy.lib.format`.
669 When opening the saved ``.npz`` file with `load` a `NpzFile` object is
670 returned. This is a dictionary-like object which can be queried for
671 its list of arrays (with the ``.files`` attribute), and for the arrays
672 themselves.
674 Examples
675 --------
676 >>> test_array = np.random.rand(3, 2)
677 >>> test_vector = np.random.rand(4)
678 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector)
679 >>> loaded = np.load('/tmp/123.npz')
680 >>> print(np.array_equal(test_array, loaded['a']))
681 True
682 >>> print(np.array_equal(test_vector, loaded['b']))
683 True
685 """
686 _savez(file, args, kwds, True)
689def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
690 # Import is postponed to here since zipfile depends on gzip, an optional
691 # component of the so-called standard library.
692 import zipfile
694 if not hasattr(file, 'write'):
695 file = os_fspath(file)
696 if not file.endswith('.npz'):
697 file = file + '.npz'
699 namedict = kwds
700 for i, val in enumerate(args):
701 key = 'arr_%d' % i
702 if key in namedict.keys():
703 raise ValueError(
704 "Cannot use un-named variables and keyword %s" % key)
705 namedict[key] = val
707 if compress:
708 compression = zipfile.ZIP_DEFLATED
709 else:
710 compression = zipfile.ZIP_STORED
712 zipf = zipfile_factory(file, mode="w", compression=compression)
714 if sys.version_info >= (3, 6):
715 # Since Python 3.6 it is possible to write directly to a ZIP file.
716 for key, val in namedict.items():
717 fname = key + '.npy'
718 val = np.asanyarray(val)
719 # always force zip64, gh-10776
720 with zipf.open(fname, 'w', force_zip64=True) as fid:
721 format.write_array(fid, val,
722 allow_pickle=allow_pickle,
723 pickle_kwargs=pickle_kwargs)
724 else:
725 # Stage arrays in a temporary file on disk, before writing to zip.
727 # Import deferred for startup time improvement
728 import tempfile
729 # Since target file might be big enough to exceed capacity of a global
730 # temporary directory, create temp file side-by-side with the target file.
731 file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp')
732 fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy')
733 os.close(fd)
734 try:
735 for key, val in namedict.items():
736 fname = key + '.npy'
737 fid = open(tmpfile, 'wb')
738 try:
739 format.write_array(fid, np.asanyarray(val),
740 allow_pickle=allow_pickle,
741 pickle_kwargs=pickle_kwargs)
742 fid.close()
743 fid = None
744 zipf.write(tmpfile, arcname=fname)
745 except IOError as exc:
746 raise IOError("Failed to write to %s: %s" % (tmpfile, exc))
747 finally:
748 if fid:
749 fid.close()
750 finally:
751 os.remove(tmpfile)
753 zipf.close()
756def _getconv(dtype):
757 """ Find the correct dtype converter. Adapted from matplotlib """
759 def floatconv(x):
760 x.lower()
761 if '0x' in x:
762 return float.fromhex(x)
763 return float(x)
765 typ = dtype.type
766 if issubclass(typ, np.bool_):
767 return lambda x: bool(int(x))
768 if issubclass(typ, np.uint64):
769 return np.uint64
770 if issubclass(typ, np.int64):
771 return np.int64
772 if issubclass(typ, np.integer):
773 return lambda x: int(float(x))
774 elif issubclass(typ, np.longdouble):
775 return np.longdouble
776 elif issubclass(typ, np.floating):
777 return floatconv
778 elif issubclass(typ, complex):
779 return lambda x: complex(asstr(x).replace('+-', '-'))
780 elif issubclass(typ, np.bytes_):
781 return asbytes
782 elif issubclass(typ, np.unicode_):
783 return asunicode
784 else:
785 return asstr
787# amount of lines loadtxt reads in one chunk, can be overridden for testing
788_loadtxt_chunksize = 50000
791@set_module('numpy')
792def loadtxt(fname, dtype=float, comments='#', delimiter=None,
793 converters=None, skiprows=0, usecols=None, unpack=False,
794 ndmin=0, encoding='bytes', max_rows=None):
795 r"""
796 Load data from a text file.
798 Each row in the text file must have the same number of values.
800 Parameters
801 ----------
802 fname : file, str, or pathlib.Path
803 File, filename, or generator to read. If the filename extension is
804 ``.gz`` or ``.bz2``, the file is first decompressed. Note that
805 generators should return byte strings.
806 dtype : data-type, optional
807 Data-type of the resulting array; default: float. If this is a
808 structured data-type, the resulting array will be 1-dimensional, and
809 each row will be interpreted as an element of the array. In this
810 case, the number of columns used must match the number of fields in
811 the data-type.
812 comments : str or sequence of str, optional
813 The characters or list of characters used to indicate the start of a
814 comment. None implies no comments. For backwards compatibility, byte
815 strings will be decoded as 'latin1'. The default is '#'.
816 delimiter : str, optional
817 The string used to separate values. For backwards compatibility, byte
818 strings will be decoded as 'latin1'. The default is whitespace.
819 converters : dict, optional
820 A dictionary mapping column number to a function that will parse the
821 column string into the desired value. E.g., if column 0 is a date
822 string: ``converters = {0: datestr2num}``. Converters can also be
823 used to provide a default value for missing data (but see also
824 `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``.
825 Default: None.
826 skiprows : int, optional
827 Skip the first `skiprows` lines, including comments; default: 0.
828 usecols : int or sequence, optional
829 Which columns to read, with 0 being the first. For example,
830 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
831 The default, None, results in all columns being read.
833 .. versionchanged:: 1.11.0
834 When a single column has to be read it is possible to use
835 an integer instead of a tuple. E.g ``usecols = 3`` reads the
836 fourth column the same way as ``usecols = (3,)`` would.
837 unpack : bool, optional
838 If True, the returned array is transposed, so that arguments may be
839 unpacked using ``x, y, z = loadtxt(...)``. When used with a structured
840 data-type, arrays are returned for each field. Default is False.
841 ndmin : int, optional
842 The returned array will have at least `ndmin` dimensions.
843 Otherwise mono-dimensional axes will be squeezed.
844 Legal values: 0 (default), 1 or 2.
846 .. versionadded:: 1.6.0
847 encoding : str, optional
848 Encoding used to decode the inputfile. Does not apply to input streams.
849 The special value 'bytes' enables backward compatibility workarounds
850 that ensures you receive byte arrays as results if possible and passes
851 'latin1' encoded strings to converters. Override this value to receive
852 unicode arrays and pass strings as input to converters. If set to None
853 the system default is used. The default value is 'bytes'.
855 .. versionadded:: 1.14.0
856 max_rows : int, optional
857 Read `max_rows` lines of content after `skiprows` lines. The default
858 is to read all the lines.
860 .. versionadded:: 1.16.0
862 Returns
863 -------
864 out : ndarray
865 Data read from the text file.
867 See Also
868 --------
869 load, fromstring, fromregex
870 genfromtxt : Load data with missing values handled as specified.
871 scipy.io.loadmat : reads MATLAB data files
873 Notes
874 -----
875 This function aims to be a fast reader for simply formatted files. The
876 `genfromtxt` function provides more sophisticated handling of, e.g.,
877 lines with missing values.
879 .. versionadded:: 1.10.0
881 The strings produced by the Python float.hex method can be used as
882 input for floats.
884 Examples
885 --------
886 >>> from io import StringIO # StringIO behaves like a file object
887 >>> c = StringIO("0 1\n2 3")
888 >>> np.loadtxt(c)
889 array([[0., 1.],
890 [2., 3.]])
892 >>> d = StringIO("M 21 72\nF 35 58")
893 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'),
894 ... 'formats': ('S1', 'i4', 'f4')})
895 array([(b'M', 21, 72.), (b'F', 35, 58.)],
896 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')])
898 >>> c = StringIO("1,0,2\n3,0,4")
899 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
900 >>> x
901 array([1., 3.])
902 >>> y
903 array([2., 4.])
905 This example shows how `converters` can be used to convert a field
906 with a trailing minus sign into a negative number.
908 >>> s = StringIO('10.01 31.25-\n19.22 64.31\n17.57- 63.94')
909 >>> def conv(fld):
910 ... return -float(fld[:-1]) if fld.endswith(b'-') else float(fld)
911 ...
912 >>> np.loadtxt(s, converters={0: conv, 1: conv})
913 array([[ 10.01, -31.25],
914 [ 19.22, 64.31],
915 [-17.57, 63.94]])
916 """
917 # Type conversions for Py3 convenience
918 if comments is not None:
919 if isinstance(comments, (str, bytes)):
920 comments = [comments]
921 comments = [_decode_line(x) for x in comments]
922 # Compile regex for comments beforehand
923 comments = (re.escape(comment) for comment in comments)
924 regex_comments = re.compile('|'.join(comments))
926 if delimiter is not None:
927 delimiter = _decode_line(delimiter)
929 user_converters = converters
931 if encoding == 'bytes':
932 encoding = None
933 byte_converters = True
934 else:
935 byte_converters = False
937 if usecols is not None:
938 # Allow usecols to be a single int or a sequence of ints
939 try:
940 usecols_as_list = list(usecols)
941 except TypeError:
942 usecols_as_list = [usecols]
943 for col_idx in usecols_as_list:
944 try:
945 opindex(col_idx)
946 except TypeError as e:
947 e.args = (
948 "usecols must be an int or a sequence of ints but "
949 "it contains at least one element of type %s" %
950 type(col_idx),
951 )
952 raise
953 # Fall back to existing code
954 usecols = usecols_as_list
956 fown = False
957 try:
958 if isinstance(fname, os_PathLike):
959 fname = os_fspath(fname)
960 if _is_string_like(fname):
961 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding)
962 fencoding = getattr(fh, 'encoding', 'latin1')
963 fh = iter(fh)
964 fown = True
965 else:
966 fh = iter(fname)
967 fencoding = getattr(fname, 'encoding', 'latin1')
968 except TypeError:
969 raise ValueError('fname must be a string, file handle, or generator')
971 # input may be a python2 io stream
972 if encoding is not None:
973 fencoding = encoding
974 # we must assume local encoding
975 # TODO emit portability warning?
976 elif fencoding is None:
977 import locale
978 fencoding = locale.getpreferredencoding()
980 # not to be confused with the flatten_dtype we import...
981 @recursive
982 def flatten_dtype_internal(self, dt):
983 """Unpack a structured data-type, and produce re-packing info."""
984 if dt.names is None:
985 # If the dtype is flattened, return.
986 # If the dtype has a shape, the dtype occurs
987 # in the list more than once.
988 shape = dt.shape
989 if len(shape) == 0:
990 return ([dt.base], None)
991 else:
992 packing = [(shape[-1], list)]
993 if len(shape) > 1:
994 for dim in dt.shape[-2::-1]:
995 packing = [(dim*packing[0][0], packing*dim)]
996 return ([dt.base] * int(np.prod(dt.shape)), packing)
997 else:
998 types = []
999 packing = []
1000 for field in dt.names:
1001 tp, bytes = dt.fields[field]
1002 flat_dt, flat_packing = self(tp)
1003 types.extend(flat_dt)
1004 # Avoid extra nesting for subarrays
1005 if tp.ndim > 0:
1006 packing.extend(flat_packing)
1007 else:
1008 packing.append((len(flat_dt), flat_packing))
1009 return (types, packing)
1011 @recursive
1012 def pack_items(self, items, packing):
1013 """Pack items into nested lists based on re-packing info."""
1014 if packing is None:
1015 return items[0]
1016 elif packing is tuple:
1017 return tuple(items)
1018 elif packing is list:
1019 return list(items)
1020 else:
1021 start = 0
1022 ret = []
1023 for length, subpacking in packing:
1024 ret.append(self(items[start:start+length], subpacking))
1025 start += length
1026 return tuple(ret)
1028 def split_line(line):
1029 """Chop off comments, strip, and split at delimiter. """
1030 line = _decode_line(line, encoding=encoding)
1032 if comments is not None:
1033 line = regex_comments.split(line, maxsplit=1)[0]
1034 line = line.strip('\r\n')
1035 if line:
1036 return line.split(delimiter)
1037 else:
1038 return []
1040 def read_data(chunk_size):
1041 """Parse each line, including the first.
1043 The file read, `fh`, is a global defined above.
1045 Parameters
1046 ----------
1047 chunk_size : int
1048 At most `chunk_size` lines are read at a time, with iteration
1049 until all lines are read.
1051 """
1052 X = []
1053 line_iter = itertools.chain([first_line], fh)
1054 line_iter = itertools.islice(line_iter, max_rows)
1055 for i, line in enumerate(line_iter):
1056 vals = split_line(line)
1057 if len(vals) == 0:
1058 continue
1059 if usecols:
1060 vals = [vals[j] for j in usecols]
1061 if len(vals) != N:
1062 line_num = i + skiprows + 1
1063 raise ValueError("Wrong number of columns at line %d"
1064 % line_num)
1066 # Convert each value according to its column and store
1067 items = [conv(val) for (conv, val) in zip(converters, vals)]
1069 # Then pack it according to the dtype's nesting
1070 items = pack_items(items, packing)
1071 X.append(items)
1072 if len(X) > chunk_size:
1073 yield X
1074 X = []
1075 if X:
1076 yield X
1078 try:
1079 # Make sure we're dealing with a proper dtype
1080 dtype = np.dtype(dtype)
1081 defconv = _getconv(dtype)
1083 # Skip the first `skiprows` lines
1084 for i in range(skiprows):
1085 next(fh)
1087 # Read until we find a line with some values, and use
1088 # it to estimate the number of columns, N.
1089 first_vals = None
1090 try:
1091 while not first_vals:
1092 first_line = next(fh)
1093 first_vals = split_line(first_line)
1094 except StopIteration:
1095 # End of lines reached
1096 first_line = ''
1097 first_vals = []
1098 warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2)
1099 N = len(usecols or first_vals)
1101 dtype_types, packing = flatten_dtype_internal(dtype)
1102 if len(dtype_types) > 1:
1103 # We're dealing with a structured array, each field of
1104 # the dtype matches a column
1105 converters = [_getconv(dt) for dt in dtype_types]
1106 else:
1107 # All fields have the same dtype
1108 converters = [defconv for i in range(N)]
1109 if N > 1:
1110 packing = [(N, tuple)]
1112 # By preference, use the converters specified by the user
1113 for i, conv in (user_converters or {}).items():
1114 if usecols:
1115 try:
1116 i = usecols.index(i)
1117 except ValueError:
1118 # Unused converter specified
1119 continue
1120 if byte_converters:
1121 # converters may use decode to workaround numpy's old behaviour,
1122 # so encode the string again before passing to the user converter
1123 def tobytes_first(x, conv):
1124 if type(x) is bytes:
1125 return conv(x)
1126 return conv(x.encode("latin1"))
1127 converters[i] = functools.partial(tobytes_first, conv=conv)
1128 else:
1129 converters[i] = conv
1131 converters = [conv if conv is not bytes else
1132 lambda x: x.encode(fencoding) for conv in converters]
1134 # read data in chunks and fill it into an array via resize
1135 # over-allocating and shrinking the array later may be faster but is
1136 # probably not relevant compared to the cost of actually reading and
1137 # converting the data
1138 X = None
1139 for x in read_data(_loadtxt_chunksize):
1140 if X is None:
1141 X = np.array(x, dtype)
1142 else:
1143 nshape = list(X.shape)
1144 pos = nshape[0]
1145 nshape[0] += len(x)
1146 X.resize(nshape, refcheck=False)
1147 X[pos:, ...] = x
1148 finally:
1149 if fown:
1150 fh.close()
1152 if X is None:
1153 X = np.array([], dtype)
1155 # Multicolumn data are returned with shape (1, N, M), i.e.
1156 # (1, 1, M) for a single row - remove the singleton dimension there
1157 if X.ndim == 3 and X.shape[:2] == (1, 1):
1158 X.shape = (1, -1)
1160 # Verify that the array has at least dimensions `ndmin`.
1161 # Check correctness of the values of `ndmin`
1162 if ndmin not in [0, 1, 2]:
1163 raise ValueError('Illegal value of ndmin keyword: %s' % ndmin)
1164 # Tweak the size and shape of the arrays - remove extraneous dimensions
1165 if X.ndim > ndmin:
1166 X = np.squeeze(X)
1167 # and ensure we have the minimum number of dimensions asked for
1168 # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0
1169 if X.ndim < ndmin:
1170 if ndmin == 1:
1171 X = np.atleast_1d(X)
1172 elif ndmin == 2:
1173 X = np.atleast_2d(X).T
1175 if unpack:
1176 if len(dtype_types) > 1:
1177 # For structured arrays, return an array for each field.
1178 return [X[field] for field in dtype.names]
1179 else:
1180 return X.T
1181 else:
1182 return X
1185def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None,
1186 header=None, footer=None, comments=None,
1187 encoding=None):
1188 return (X,)
1191@array_function_dispatch(_savetxt_dispatcher)
1192def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
1193 footer='', comments='# ', encoding=None):
1194 """
1195 Save an array to a text file.
1197 Parameters
1198 ----------
1199 fname : filename or file handle
1200 If the filename ends in ``.gz``, the file is automatically saved in
1201 compressed gzip format. `loadtxt` understands gzipped files
1202 transparently.
1203 X : 1D or 2D array_like
1204 Data to be saved to a text file.
1205 fmt : str or sequence of strs, optional
1206 A single format (%10.5f), a sequence of formats, or a
1207 multi-format string, e.g. 'Iteration %d -- %10.5f', in which
1208 case `delimiter` is ignored. For complex `X`, the legal options
1209 for `fmt` are:
1211 * a single specifier, `fmt='%.4e'`, resulting in numbers formatted
1212 like `' (%s+%sj)' % (fmt, fmt)`
1213 * a full string specifying every real and imaginary part, e.g.
1214 `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns
1215 * a list of specifiers, one per column - in this case, the real
1216 and imaginary part must have separate specifiers,
1217 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns
1218 delimiter : str, optional
1219 String or character separating columns.
1220 newline : str, optional
1221 String or character separating lines.
1223 .. versionadded:: 1.5.0
1224 header : str, optional
1225 String that will be written at the beginning of the file.
1227 .. versionadded:: 1.7.0
1228 footer : str, optional
1229 String that will be written at the end of the file.
1231 .. versionadded:: 1.7.0
1232 comments : str, optional
1233 String that will be prepended to the ``header`` and ``footer`` strings,
1234 to mark them as comments. Default: '# ', as expected by e.g.
1235 ``numpy.loadtxt``.
1237 .. versionadded:: 1.7.0
1238 encoding : {None, str}, optional
1239 Encoding used to encode the outputfile. Does not apply to output
1240 streams. If the encoding is something other than 'bytes' or 'latin1'
1241 you will not be able to load the file in NumPy versions < 1.14. Default
1242 is 'latin1'.
1244 .. versionadded:: 1.14.0
1247 See Also
1248 --------
1249 save : Save an array to a binary file in NumPy ``.npy`` format
1250 savez : Save several arrays into an uncompressed ``.npz`` archive
1251 savez_compressed : Save several arrays into a compressed ``.npz`` archive
1253 Notes
1254 -----
1255 Further explanation of the `fmt` parameter
1256 (``%[flag]width[.precision]specifier``):
1258 flags:
1259 ``-`` : left justify
1261 ``+`` : Forces to precede result with + or -.
1263 ``0`` : Left pad the number with zeros instead of space (see width).
1265 width:
1266 Minimum number of characters to be printed. The value is not truncated
1267 if it has more characters.
1269 precision:
1270 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of
1271 digits.
1272 - For ``e, E`` and ``f`` specifiers, the number of digits to print
1273 after the decimal point.
1274 - For ``g`` and ``G``, the maximum number of significant digits.
1275 - For ``s``, the maximum number of characters.
1277 specifiers:
1278 ``c`` : character
1280 ``d`` or ``i`` : signed decimal integer
1282 ``e`` or ``E`` : scientific notation with ``e`` or ``E``.
1284 ``f`` : decimal floating point
1286 ``g,G`` : use the shorter of ``e,E`` or ``f``
1288 ``o`` : signed octal
1290 ``s`` : string of characters
1292 ``u`` : unsigned decimal integer
1294 ``x,X`` : unsigned hexadecimal integer
1296 This explanation of ``fmt`` is not complete, for an exhaustive
1297 specification see [1]_.
1299 References
1300 ----------
1301 .. [1] `Format Specification Mini-Language
1302 <https://docs.python.org/library/string.html#format-specification-mini-language>`_,
1303 Python Documentation.
1305 Examples
1306 --------
1307 >>> x = y = z = np.arange(0.0,5.0,1.0)
1308 >>> np.savetxt('test.out', x, delimiter=',') # X is an array
1309 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays
1310 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation
1312 """
1314 # Py3 conversions first
1315 if isinstance(fmt, bytes):
1316 fmt = asstr(fmt)
1317 delimiter = asstr(delimiter)
1319 class WriteWrap:
1320 """Convert to bytes on bytestream inputs.
1322 """
1323 def __init__(self, fh, encoding):
1324 self.fh = fh
1325 self.encoding = encoding
1326 self.do_write = self.first_write
1328 def close(self):
1329 self.fh.close()
1331 def write(self, v):
1332 self.do_write(v)
1334 def write_bytes(self, v):
1335 if isinstance(v, bytes):
1336 self.fh.write(v)
1337 else:
1338 self.fh.write(v.encode(self.encoding))
1340 def write_normal(self, v):
1341 self.fh.write(asunicode(v))
1343 def first_write(self, v):
1344 try:
1345 self.write_normal(v)
1346 self.write = self.write_normal
1347 except TypeError:
1348 # input is probably a bytestream
1349 self.write_bytes(v)
1350 self.write = self.write_bytes
1352 own_fh = False
1353 if isinstance(fname, os_PathLike):
1354 fname = os_fspath(fname)
1355 if _is_string_like(fname):
1356 # datasource doesn't support creating a new file ...
1357 open(fname, 'wt').close()
1358 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding)
1359 own_fh = True
1360 elif hasattr(fname, 'write'):
1361 # wrap to handle byte output streams
1362 fh = WriteWrap(fname, encoding or 'latin1')
1363 else:
1364 raise ValueError('fname must be a string or file handle')
1366 try:
1367 X = np.asarray(X)
1369 # Handle 1-dimensional arrays
1370 if X.ndim == 0 or X.ndim > 2:
1371 raise ValueError(
1372 "Expected 1D or 2D array, got %dD array instead" % X.ndim)
1373 elif X.ndim == 1:
1374 # Common case -- 1d array of numbers
1375 if X.dtype.names is None:
1376 X = np.atleast_2d(X).T
1377 ncol = 1
1379 # Complex dtype -- each field indicates a separate column
1380 else:
1381 ncol = len(X.dtype.names)
1382 else:
1383 ncol = X.shape[1]
1385 iscomplex_X = np.iscomplexobj(X)
1386 # `fmt` can be a string with multiple insertion points or a
1387 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d')
1388 if type(fmt) in (list, tuple):
1389 if len(fmt) != ncol:
1390 raise AttributeError('fmt has wrong shape. %s' % str(fmt))
1391 format = asstr(delimiter).join(map(asstr, fmt))
1392 elif isinstance(fmt, str):
1393 n_fmt_chars = fmt.count('%')
1394 error = ValueError('fmt has wrong number of %% formats: %s' % fmt)
1395 if n_fmt_chars == 1:
1396 if iscomplex_X:
1397 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol
1398 else:
1399 fmt = [fmt, ] * ncol
1400 format = delimiter.join(fmt)
1401 elif iscomplex_X and n_fmt_chars != (2 * ncol):
1402 raise error
1403 elif ((not iscomplex_X) and n_fmt_chars != ncol):
1404 raise error
1405 else:
1406 format = fmt
1407 else:
1408 raise ValueError('invalid fmt: %r' % (fmt,))
1410 if len(header) > 0:
1411 header = header.replace('\n', '\n' + comments)
1412 fh.write(comments + header + newline)
1413 if iscomplex_X:
1414 for row in X:
1415 row2 = []
1416 for number in row:
1417 row2.append(number.real)
1418 row2.append(number.imag)
1419 s = format % tuple(row2) + newline
1420 fh.write(s.replace('+-', '-'))
1421 else:
1422 for row in X:
1423 try:
1424 v = format % tuple(row) + newline
1425 except TypeError:
1426 raise TypeError("Mismatch between array dtype ('%s') and "
1427 "format specifier ('%s')"
1428 % (str(X.dtype), format))
1429 fh.write(v)
1431 if len(footer) > 0:
1432 footer = footer.replace('\n', '\n' + comments)
1433 fh.write(comments + footer + newline)
1434 finally:
1435 if own_fh:
1436 fh.close()
1439@set_module('numpy')
1440def fromregex(file, regexp, dtype, encoding=None):
1441 """
1442 Construct an array from a text file, using regular expression parsing.
1444 The returned array is always a structured array, and is constructed from
1445 all matches of the regular expression in the file. Groups in the regular
1446 expression are converted to fields of the structured array.
1448 Parameters
1449 ----------
1450 file : str or file
1451 Filename or file object to read.
1452 regexp : str or regexp
1453 Regular expression used to parse the file.
1454 Groups in the regular expression correspond to fields in the dtype.
1455 dtype : dtype or list of dtypes
1456 Dtype for the structured array.
1457 encoding : str, optional
1458 Encoding used to decode the inputfile. Does not apply to input streams.
1460 .. versionadded:: 1.14.0
1462 Returns
1463 -------
1464 output : ndarray
1465 The output array, containing the part of the content of `file` that
1466 was matched by `regexp`. `output` is always a structured array.
1468 Raises
1469 ------
1470 TypeError
1471 When `dtype` is not a valid dtype for a structured array.
1473 See Also
1474 --------
1475 fromstring, loadtxt
1477 Notes
1478 -----
1479 Dtypes for structured arrays can be specified in several forms, but all
1480 forms specify at least the data type and field name. For details see
1481 `doc.structured_arrays`.
1483 Examples
1484 --------
1485 >>> f = open('test.dat', 'w')
1486 >>> _ = f.write("1312 foo\\n1534 bar\\n444 qux")
1487 >>> f.close()
1489 >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything]
1490 >>> output = np.fromregex('test.dat', regexp,
1491 ... [('num', np.int64), ('key', 'S3')])
1492 >>> output
1493 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')],
1494 dtype=[('num', '<i8'), ('key', 'S3')])
1495 >>> output['num']
1496 array([1312, 1534, 444])
1498 """
1499 own_fh = False
1500 if not hasattr(file, "read"):
1501 file = np.lib._datasource.open(file, 'rt', encoding=encoding)
1502 own_fh = True
1504 try:
1505 if not isinstance(dtype, np.dtype):
1506 dtype = np.dtype(dtype)
1508 content = file.read()
1509 if isinstance(content, bytes) and isinstance(regexp, np.compat.unicode):
1510 regexp = asbytes(regexp)
1511 elif isinstance(content, np.compat.unicode) and isinstance(regexp, bytes):
1512 regexp = asstr(regexp)
1514 if not hasattr(regexp, 'match'):
1515 regexp = re.compile(regexp)
1516 seq = regexp.findall(content)
1517 if seq and not isinstance(seq[0], tuple):
1518 # Only one group is in the regexp.
1519 # Create the new array as a single data-type and then
1520 # re-interpret as a single-field structured array.
1521 newdtype = np.dtype(dtype[dtype.names[0]])
1522 output = np.array(seq, dtype=newdtype)
1523 output.dtype = dtype
1524 else:
1525 output = np.array(seq, dtype=dtype)
1527 return output
1528 finally:
1529 if own_fh:
1530 file.close()
1533#####--------------------------------------------------------------------------
1534#---- --- ASCII functions ---
1535#####--------------------------------------------------------------------------
1538@set_module('numpy')
1539def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
1540 skip_header=0, skip_footer=0, converters=None,
1541 missing_values=None, filling_values=None, usecols=None,
1542 names=None, excludelist=None,
1543 deletechars=''.join(sorted(NameValidator.defaultdeletechars)),
1544 replace_space='_', autostrip=False, case_sensitive=True,
1545 defaultfmt="f%i", unpack=None, usemask=False, loose=True,
1546 invalid_raise=True, max_rows=None, encoding='bytes'):
1547 """
1548 Load data from a text file, with missing values handled as specified.
1550 Each line past the first `skip_header` lines is split at the `delimiter`
1551 character, and characters following the `comments` character are discarded.
1553 Parameters
1554 ----------
1555 fname : file, str, pathlib.Path, list of str, generator
1556 File, filename, list, or generator to read. If the filename
1557 extension is `.gz` or `.bz2`, the file is first decompressed. Note
1558 that generators must return byte strings. The strings
1559 in a list or produced by a generator are treated as lines.
1560 dtype : dtype, optional
1561 Data type of the resulting array.
1562 If None, the dtypes will be determined by the contents of each
1563 column, individually.
1564 comments : str, optional
1565 The character used to indicate the start of a comment.
1566 All the characters occurring on a line after a comment are discarded
1567 delimiter : str, int, or sequence, optional
1568 The string used to separate values. By default, any consecutive
1569 whitespaces act as delimiter. An integer or sequence of integers
1570 can also be provided as width(s) of each field.
1571 skiprows : int, optional
1572 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead.
1573 skip_header : int, optional
1574 The number of lines to skip at the beginning of the file.
1575 skip_footer : int, optional
1576 The number of lines to skip at the end of the file.
1577 converters : variable, optional
1578 The set of functions that convert the data of a column to a value.
1579 The converters can also be used to provide a default value
1580 for missing data: ``converters = {3: lambda s: float(s or 0)}``.
1581 missing : variable, optional
1582 `missing` was removed in numpy 1.10. Please use `missing_values`
1583 instead.
1584 missing_values : variable, optional
1585 The set of strings corresponding to missing data.
1586 filling_values : variable, optional
1587 The set of values to be used as default when the data are missing.
1588 usecols : sequence, optional
1589 Which columns to read, with 0 being the first. For example,
1590 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
1591 names : {None, True, str, sequence}, optional
1592 If `names` is True, the field names are read from the first line after
1593 the first `skip_header` lines. This line can optionally be proceeded
1594 by a comment delimiter. If `names` is a sequence or a single-string of
1595 comma-separated names, the names will be used to define the field names
1596 in a structured dtype. If `names` is None, the names of the dtype
1597 fields will be used, if any.
1598 excludelist : sequence, optional
1599 A list of names to exclude. This list is appended to the default list
1600 ['return','file','print']. Excluded names are appended an underscore:
1601 for example, `file` would become `file_`.
1602 deletechars : str, optional
1603 A string combining invalid characters that must be deleted from the
1604 names.
1605 defaultfmt : str, optional
1606 A format used to define default field names, such as "f%i" or "f_%02i".
1607 autostrip : bool, optional
1608 Whether to automatically strip white spaces from the variables.
1609 replace_space : char, optional
1610 Character(s) used in replacement of white spaces in the variables
1611 names. By default, use a '_'.
1612 case_sensitive : {True, False, 'upper', 'lower'}, optional
1613 If True, field names are case sensitive.
1614 If False or 'upper', field names are converted to upper case.
1615 If 'lower', field names are converted to lower case.
1616 unpack : bool, optional
1617 If True, the returned array is transposed, so that arguments may be
1618 unpacked using ``x, y, z = loadtxt(...)``
1619 usemask : bool, optional
1620 If True, return a masked array.
1621 If False, return a regular array.
1622 loose : bool, optional
1623 If True, do not raise errors for invalid values.
1624 invalid_raise : bool, optional
1625 If True, an exception is raised if an inconsistency is detected in the
1626 number of columns.
1627 If False, a warning is emitted and the offending lines are skipped.
1628 max_rows : int, optional
1629 The maximum number of rows to read. Must not be used with skip_footer
1630 at the same time. If given, the value must be at least 1. Default is
1631 to read the entire file.
1633 .. versionadded:: 1.10.0
1634 encoding : str, optional
1635 Encoding used to decode the inputfile. Does not apply when `fname` is
1636 a file object. The special value 'bytes' enables backward compatibility
1637 workarounds that ensure that you receive byte arrays when possible
1638 and passes latin1 encoded strings to converters. Override this value to
1639 receive unicode arrays and pass strings as input to converters. If set
1640 to None the system default is used. The default value is 'bytes'.
1642 .. versionadded:: 1.14.0
1644 Returns
1645 -------
1646 out : ndarray
1647 Data read from the text file. If `usemask` is True, this is a
1648 masked array.
1650 See Also
1651 --------
1652 numpy.loadtxt : equivalent function when no data is missing.
1654 Notes
1655 -----
1656 * When spaces are used as delimiters, or when no delimiter has been given
1657 as input, there should not be any missing data between two fields.
1658 * When the variables are named (either by a flexible dtype or with `names`),
1659 there must not be any header in the file (else a ValueError
1660 exception is raised).
1661 * Individual values are not stripped of spaces by default.
1662 When using a custom converter, make sure the function does remove spaces.
1664 References
1665 ----------
1666 .. [1] NumPy User Guide, section `I/O with NumPy
1667 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_.
1669 Examples
1670 ---------
1671 >>> from io import StringIO
1672 >>> import numpy as np
1674 Comma delimited file with mixed dtype
1676 >>> s = StringIO(u"1,1.3,abcde")
1677 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
1678 ... ('mystring','S5')], delimiter=",")
1679 >>> data
1680 array((1, 1.3, b'abcde'),
1681 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1683 Using dtype = None
1685 >>> _ = s.seek(0) # needed for StringIO example only
1686 >>> data = np.genfromtxt(s, dtype=None,
1687 ... names = ['myint','myfloat','mystring'], delimiter=",")
1688 >>> data
1689 array((1, 1.3, b'abcde'),
1690 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1692 Specifying dtype and names
1694 >>> _ = s.seek(0)
1695 >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
1696 ... names=['myint','myfloat','mystring'], delimiter=",")
1697 >>> data
1698 array((1, 1.3, b'abcde'),
1699 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')])
1701 An example with fixed-width columns
1703 >>> s = StringIO(u"11.3abcde")
1704 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
1705 ... delimiter=[1,3,5])
1706 >>> data
1707 array((1, 1.3, b'abcde'),
1708 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')])
1710 An example to show comments
1712 >>> f = StringIO('''
1713 ... text,# of chars
1714 ... hello world,11
1715 ... numpy,5''')
1716 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',')
1717 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')],
1718 dtype=[('f0', 'S12'), ('f1', 'S12')])
1720 """
1721 if max_rows is not None:
1722 if skip_footer:
1723 raise ValueError(
1724 "The keywords 'skip_footer' and 'max_rows' can not be "
1725 "specified at the same time.")
1726 if max_rows < 1:
1727 raise ValueError("'max_rows' must be at least 1.")
1729 if usemask:
1730 from numpy.ma import MaskedArray, make_mask_descr
1731 # Check the input dictionary of converters
1732 user_converters = converters or {}
1733 if not isinstance(user_converters, dict):
1734 raise TypeError(
1735 "The input argument 'converter' should be a valid dictionary "
1736 "(got '%s' instead)" % type(user_converters))
1738 if encoding == 'bytes':
1739 encoding = None
1740 byte_converters = True
1741 else:
1742 byte_converters = False
1744 # Initialize the filehandle, the LineSplitter and the NameValidator
1745 try:
1746 if isinstance(fname, os_PathLike):
1747 fname = os_fspath(fname)
1748 if isinstance(fname, str):
1749 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding)
1750 fid_ctx = contextlib.closing(fid)
1751 else:
1752 fid = fname
1753 fid_ctx = contextlib_nullcontext(fid)
1754 fhd = iter(fid)
1755 except TypeError:
1756 raise TypeError(
1757 "fname must be a string, filehandle, list of strings, "
1758 "or generator. Got %s instead." % type(fname))
1760 with fid_ctx:
1761 split_line = LineSplitter(delimiter=delimiter, comments=comments,
1762 autostrip=autostrip, encoding=encoding)
1763 validate_names = NameValidator(excludelist=excludelist,
1764 deletechars=deletechars,
1765 case_sensitive=case_sensitive,
1766 replace_space=replace_space)
1768 # Skip the first `skip_header` rows
1769 try:
1770 for i in range(skip_header):
1771 next(fhd)
1773 # Keep on until we find the first valid values
1774 first_values = None
1776 while not first_values:
1777 first_line = _decode_line(next(fhd), encoding)
1778 if (names is True) and (comments is not None):
1779 if comments in first_line:
1780 first_line = (
1781 ''.join(first_line.split(comments)[1:]))
1782 first_values = split_line(first_line)
1783 except StopIteration:
1784 # return an empty array if the datafile is empty
1785 first_line = ''
1786 first_values = []
1787 warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2)
1789 # Should we take the first values as names ?
1790 if names is True:
1791 fval = first_values[0].strip()
1792 if comments is not None:
1793 if fval in comments:
1794 del first_values[0]
1796 # Check the columns to use: make sure `usecols` is a list
1797 if usecols is not None:
1798 try:
1799 usecols = [_.strip() for _ in usecols.split(",")]
1800 except AttributeError:
1801 try:
1802 usecols = list(usecols)
1803 except TypeError:
1804 usecols = [usecols, ]
1805 nbcols = len(usecols or first_values)
1807 # Check the names and overwrite the dtype.names if needed
1808 if names is True:
1809 names = validate_names([str(_.strip()) for _ in first_values])
1810 first_line = ''
1811 elif _is_string_like(names):
1812 names = validate_names([_.strip() for _ in names.split(',')])
1813 elif names:
1814 names = validate_names(names)
1815 # Get the dtype
1816 if dtype is not None:
1817 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names,
1818 excludelist=excludelist,
1819 deletechars=deletechars,
1820 case_sensitive=case_sensitive,
1821 replace_space=replace_space)
1822 # Make sure the names is a list (for 2.5)
1823 if names is not None:
1824 names = list(names)
1826 if usecols:
1827 for (i, current) in enumerate(usecols):
1828 # if usecols is a list of names, convert to a list of indices
1829 if _is_string_like(current):
1830 usecols[i] = names.index(current)
1831 elif current < 0:
1832 usecols[i] = current + len(first_values)
1833 # If the dtype is not None, make sure we update it
1834 if (dtype is not None) and (len(dtype) > nbcols):
1835 descr = dtype.descr
1836 dtype = np.dtype([descr[_] for _ in usecols])
1837 names = list(dtype.names)
1838 # If `names` is not None, update the names
1839 elif (names is not None) and (len(names) > nbcols):
1840 names = [names[_] for _ in usecols]
1841 elif (names is not None) and (dtype is not None):
1842 names = list(dtype.names)
1844 # Process the missing values ...............................
1845 # Rename missing_values for convenience
1846 user_missing_values = missing_values or ()
1847 if isinstance(user_missing_values, bytes):
1848 user_missing_values = user_missing_values.decode('latin1')
1850 # Define the list of missing_values (one column: one list)
1851 missing_values = [list(['']) for _ in range(nbcols)]
1853 # We have a dictionary: process it field by field
1854 if isinstance(user_missing_values, dict):
1855 # Loop on the items
1856 for (key, val) in user_missing_values.items():
1857 # Is the key a string ?
1858 if _is_string_like(key):
1859 try:
1860 # Transform it into an integer
1861 key = names.index(key)
1862 except ValueError:
1863 # We couldn't find it: the name must have been dropped
1864 continue
1865 # Redefine the key as needed if it's a column number
1866 if usecols:
1867 try:
1868 key = usecols.index(key)
1869 except ValueError:
1870 pass
1871 # Transform the value as a list of string
1872 if isinstance(val, (list, tuple)):
1873 val = [str(_) for _ in val]
1874 else:
1875 val = [str(val), ]
1876 # Add the value(s) to the current list of missing
1877 if key is None:
1878 # None acts as default
1879 for miss in missing_values:
1880 miss.extend(val)
1881 else:
1882 missing_values[key].extend(val)
1883 # We have a sequence : each item matches a column
1884 elif isinstance(user_missing_values, (list, tuple)):
1885 for (value, entry) in zip(user_missing_values, missing_values):
1886 value = str(value)
1887 if value not in entry:
1888 entry.append(value)
1889 # We have a string : apply it to all entries
1890 elif isinstance(user_missing_values, str):
1891 user_value = user_missing_values.split(",")
1892 for entry in missing_values:
1893 entry.extend(user_value)
1894 # We have something else: apply it to all entries
1895 else:
1896 for entry in missing_values:
1897 entry.extend([str(user_missing_values)])
1899 # Process the filling_values ...............................
1900 # Rename the input for convenience
1901 user_filling_values = filling_values
1902 if user_filling_values is None:
1903 user_filling_values = []
1904 # Define the default
1905 filling_values = [None] * nbcols
1906 # We have a dictionary : update each entry individually
1907 if isinstance(user_filling_values, dict):
1908 for (key, val) in user_filling_values.items():
1909 if _is_string_like(key):
1910 try:
1911 # Transform it into an integer
1912 key = names.index(key)
1913 except ValueError:
1914 # We couldn't find it: the name must have been dropped,
1915 continue
1916 # Redefine the key if it's a column number and usecols is defined
1917 if usecols:
1918 try:
1919 key = usecols.index(key)
1920 except ValueError:
1921 pass
1922 # Add the value to the list
1923 filling_values[key] = val
1924 # We have a sequence : update on a one-to-one basis
1925 elif isinstance(user_filling_values, (list, tuple)):
1926 n = len(user_filling_values)
1927 if (n <= nbcols):
1928 filling_values[:n] = user_filling_values
1929 else:
1930 filling_values = user_filling_values[:nbcols]
1931 # We have something else : use it for all entries
1932 else:
1933 filling_values = [user_filling_values] * nbcols
1935 # Initialize the converters ................................
1936 if dtype is None:
1937 # Note: we can't use a [...]*nbcols, as we would have 3 times the same
1938 # ... converter, instead of 3 different converters.
1939 converters = [StringConverter(None, missing_values=miss, default=fill)
1940 for (miss, fill) in zip(missing_values, filling_values)]
1941 else:
1942 dtype_flat = flatten_dtype(dtype, flatten_base=True)
1943 # Initialize the converters
1944 if len(dtype_flat) > 1:
1945 # Flexible type : get a converter from each dtype
1946 zipit = zip(dtype_flat, missing_values, filling_values)
1947 converters = [StringConverter(dt, locked=True,
1948 missing_values=miss, default=fill)
1949 for (dt, miss, fill) in zipit]
1950 else:
1951 # Set to a default converter (but w/ different missing values)
1952 zipit = zip(missing_values, filling_values)
1953 converters = [StringConverter(dtype, locked=True,
1954 missing_values=miss, default=fill)
1955 for (miss, fill) in zipit]
1956 # Update the converters to use the user-defined ones
1957 uc_update = []
1958 for (j, conv) in user_converters.items():
1959 # If the converter is specified by column names, use the index instead
1960 if _is_string_like(j):
1961 try:
1962 j = names.index(j)
1963 i = j
1964 except ValueError:
1965 continue
1966 elif usecols:
1967 try:
1968 i = usecols.index(j)
1969 except ValueError:
1970 # Unused converter specified
1971 continue
1972 else:
1973 i = j
1974 # Find the value to test - first_line is not filtered by usecols:
1975 if len(first_line):
1976 testing_value = first_values[j]
1977 else:
1978 testing_value = None
1979 if conv is bytes:
1980 user_conv = asbytes
1981 elif byte_converters:
1982 # converters may use decode to workaround numpy's old behaviour,
1983 # so encode the string again before passing to the user converter
1984 def tobytes_first(x, conv):
1985 if type(x) is bytes:
1986 return conv(x)
1987 return conv(x.encode("latin1"))
1988 user_conv = functools.partial(tobytes_first, conv=conv)
1989 else:
1990 user_conv = conv
1991 converters[i].update(user_conv, locked=True,
1992 testing_value=testing_value,
1993 default=filling_values[i],
1994 missing_values=missing_values[i],)
1995 uc_update.append((i, user_conv))
1996 # Make sure we have the corrected keys in user_converters...
1997 user_converters.update(uc_update)
1999 # Fixme: possible error as following variable never used.
2000 # miss_chars = [_.missing_values for _ in converters]
2002 # Initialize the output lists ...
2003 # ... rows
2004 rows = []
2005 append_to_rows = rows.append
2006 # ... masks
2007 if usemask:
2008 masks = []
2009 append_to_masks = masks.append
2010 # ... invalid
2011 invalid = []
2012 append_to_invalid = invalid.append
2014 # Parse each line
2015 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
2016 values = split_line(line)
2017 nbvalues = len(values)
2018 # Skip an empty line
2019 if nbvalues == 0:
2020 continue
2021 if usecols:
2022 # Select only the columns we need
2023 try:
2024 values = [values[_] for _ in usecols]
2025 except IndexError:
2026 append_to_invalid((i + skip_header + 1, nbvalues))
2027 continue
2028 elif nbvalues != nbcols:
2029 append_to_invalid((i + skip_header + 1, nbvalues))
2030 continue
2031 # Store the values
2032 append_to_rows(tuple(values))
2033 if usemask:
2034 append_to_masks(tuple([v.strip() in m
2035 for (v, m) in zip(values,
2036 missing_values)]))
2037 if len(rows) == max_rows:
2038 break
2040 # Upgrade the converters (if needed)
2041 if dtype is None:
2042 for (i, converter) in enumerate(converters):
2043 current_column = [itemgetter(i)(_m) for _m in rows]
2044 try:
2045 converter.iterupgrade(current_column)
2046 except ConverterLockError:
2047 errmsg = "Converter #%i is locked and cannot be upgraded: " % i
2048 current_column = map(itemgetter(i), rows)
2049 for (j, value) in enumerate(current_column):
2050 try:
2051 converter.upgrade(value)
2052 except (ConverterError, ValueError):
2053 errmsg += "(occurred line #%i for value '%s')"
2054 errmsg %= (j + 1 + skip_header, value)
2055 raise ConverterError(errmsg)
2057 # Check that we don't have invalid values
2058 nbinvalid = len(invalid)
2059 if nbinvalid > 0:
2060 nbrows = len(rows) + nbinvalid - skip_footer
2061 # Construct the error message
2062 template = " Line #%%i (got %%i columns instead of %i)" % nbcols
2063 if skip_footer > 0:
2064 nbinvalid_skipped = len([_ for _ in invalid
2065 if _[0] > nbrows + skip_header])
2066 invalid = invalid[:nbinvalid - nbinvalid_skipped]
2067 skip_footer -= nbinvalid_skipped
2068#
2069# nbrows -= skip_footer
2070# errmsg = [template % (i, nb)
2071# for (i, nb) in invalid if i < nbrows]
2072# else:
2073 errmsg = [template % (i, nb)
2074 for (i, nb) in invalid]
2075 if len(errmsg):
2076 errmsg.insert(0, "Some errors were detected !")
2077 errmsg = "\n".join(errmsg)
2078 # Raise an exception ?
2079 if invalid_raise:
2080 raise ValueError(errmsg)
2081 # Issue a warning ?
2082 else:
2083 warnings.warn(errmsg, ConversionWarning, stacklevel=2)
2085 # Strip the last skip_footer data
2086 if skip_footer > 0:
2087 rows = rows[:-skip_footer]
2088 if usemask:
2089 masks = masks[:-skip_footer]
2091 # Convert each value according to the converter:
2092 # We want to modify the list in place to avoid creating a new one...
2093 if loose:
2094 rows = list(
2095 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)]
2096 for (i, conv) in enumerate(converters)]))
2097 else:
2098 rows = list(
2099 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)]
2100 for (i, conv) in enumerate(converters)]))
2102 # Reset the dtype
2103 data = rows
2104 if dtype is None:
2105 # Get the dtypes from the types of the converters
2106 column_types = [conv.type for conv in converters]
2107 # Find the columns with strings...
2108 strcolidx = [i for (i, v) in enumerate(column_types)
2109 if v == np.unicode_]
2111 if byte_converters and strcolidx:
2112 # convert strings back to bytes for backward compatibility
2113 warnings.warn(
2114 "Reading unicode strings without specifying the encoding "
2115 "argument is deprecated. Set the encoding, use None for the "
2116 "system default.",
2117 np.VisibleDeprecationWarning, stacklevel=2)
2118 def encode_unicode_cols(row_tup):
2119 row = list(row_tup)
2120 for i in strcolidx:
2121 row[i] = row[i].encode('latin1')
2122 return tuple(row)
2124 try:
2125 data = [encode_unicode_cols(r) for r in data]
2126 except UnicodeEncodeError:
2127 pass
2128 else:
2129 for i in strcolidx:
2130 column_types[i] = np.bytes_
2132 # Update string types to be the right length
2133 sized_column_types = column_types[:]
2134 for i, col_type in enumerate(column_types):
2135 if np.issubdtype(col_type, np.character):
2136 n_chars = max(len(row[i]) for row in data)
2137 sized_column_types[i] = (col_type, n_chars)
2139 if names is None:
2140 # If the dtype is uniform (before sizing strings)
2141 base = {
2142 c_type
2143 for c, c_type in zip(converters, column_types)
2144 if c._checked}
2145 if len(base) == 1:
2146 uniform_type, = base
2147 (ddtype, mdtype) = (uniform_type, bool)
2148 else:
2149 ddtype = [(defaultfmt % i, dt)
2150 for (i, dt) in enumerate(sized_column_types)]
2151 if usemask:
2152 mdtype = [(defaultfmt % i, bool)
2153 for (i, dt) in enumerate(sized_column_types)]
2154 else:
2155 ddtype = list(zip(names, sized_column_types))
2156 mdtype = list(zip(names, [bool] * len(sized_column_types)))
2157 output = np.array(data, dtype=ddtype)
2158 if usemask:
2159 outputmask = np.array(masks, dtype=mdtype)
2160 else:
2161 # Overwrite the initial dtype names if needed
2162 if names and dtype.names is not None:
2163 dtype.names = names
2164 # Case 1. We have a structured type
2165 if len(dtype_flat) > 1:
2166 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])]
2167 # First, create the array using a flattened dtype:
2168 # [('a', int), ('b1', int), ('b2', float)]
2169 # Then, view the array using the specified dtype.
2170 if 'O' in (_.char for _ in dtype_flat):
2171 if has_nested_fields(dtype):
2172 raise NotImplementedError(
2173 "Nested fields involving objects are not supported...")
2174 else:
2175 output = np.array(data, dtype=dtype)
2176 else:
2177 rows = np.array(data, dtype=[('', _) for _ in dtype_flat])
2178 output = rows.view(dtype)
2179 # Now, process the rowmasks the same way
2180 if usemask:
2181 rowmasks = np.array(
2182 masks, dtype=np.dtype([('', bool) for t in dtype_flat]))
2183 # Construct the new dtype
2184 mdtype = make_mask_descr(dtype)
2185 outputmask = rowmasks.view(mdtype)
2186 # Case #2. We have a basic dtype
2187 else:
2188 # We used some user-defined converters
2189 if user_converters:
2190 ishomogeneous = True
2191 descr = []
2192 for i, ttype in enumerate([conv.type for conv in converters]):
2193 # Keep the dtype of the current converter
2194 if i in user_converters:
2195 ishomogeneous &= (ttype == dtype.type)
2196 if np.issubdtype(ttype, np.character):
2197 ttype = (ttype, max(len(row[i]) for row in data))
2198 descr.append(('', ttype))
2199 else:
2200 descr.append(('', dtype))
2201 # So we changed the dtype ?
2202 if not ishomogeneous:
2203 # We have more than one field
2204 if len(descr) > 1:
2205 dtype = np.dtype(descr)
2206 # We have only one field: drop the name if not needed.
2207 else:
2208 dtype = np.dtype(ttype)
2209 #
2210 output = np.array(data, dtype)
2211 if usemask:
2212 if dtype.names is not None:
2213 mdtype = [(_, bool) for _ in dtype.names]
2214 else:
2215 mdtype = bool
2216 outputmask = np.array(masks, dtype=mdtype)
2217 # Try to take care of the missing data we missed
2218 names = output.dtype.names
2219 if usemask and names:
2220 for (name, conv) in zip(names, converters):
2221 missing_values = [conv(_) for _ in conv.missing_values
2222 if _ != '']
2223 for mval in missing_values:
2224 outputmask[name] |= (output[name] == mval)
2225 # Construct the final array
2226 if usemask:
2227 output = output.view(MaskedArray)
2228 output._mask = outputmask
2229 if unpack:
2230 return output.squeeze().T
2231 return output.squeeze()
2234def ndfromtxt(fname, **kwargs):
2235 """
2236 Load ASCII data stored in a file and return it as a single array.
2238 .. deprecated:: 1.17
2239 ndfromtxt` is a deprecated alias of `genfromtxt` which
2240 overwrites the ``usemask`` argument with `False` even when
2241 explicitly called as ``ndfromtxt(..., usemask=True)``.
2242 Use `genfromtxt` instead.
2244 Parameters
2245 ----------
2246 fname, kwargs : For a description of input parameters, see `genfromtxt`.
2248 See Also
2249 --------
2250 numpy.genfromtxt : generic function.
2252 """
2253 kwargs['usemask'] = False
2254 # Numpy 1.17
2255 warnings.warn(
2256 "np.ndfromtxt is a deprecated alias of np.genfromtxt, "
2257 "prefer the latter.",
2258 DeprecationWarning, stacklevel=2)
2259 return genfromtxt(fname, **kwargs)
2262def mafromtxt(fname, **kwargs):
2263 """
2264 Load ASCII data stored in a text file and return a masked array.
2266 .. deprecated:: 1.17
2267 np.mafromtxt is a deprecated alias of `genfromtxt` which
2268 overwrites the ``usemask`` argument with `True` even when
2269 explicitly called as ``mafromtxt(..., usemask=False)``.
2270 Use `genfromtxt` instead.
2272 Parameters
2273 ----------
2274 fname, kwargs : For a description of input parameters, see `genfromtxt`.
2276 See Also
2277 --------
2278 numpy.genfromtxt : generic function to load ASCII data.
2280 """
2281 kwargs['usemask'] = True
2282 # Numpy 1.17
2283 warnings.warn(
2284 "np.mafromtxt is a deprecated alias of np.genfromtxt, "
2285 "prefer the latter.",
2286 DeprecationWarning, stacklevel=2)
2287 return genfromtxt(fname, **kwargs)
2290def recfromtxt(fname, **kwargs):
2291 """
2292 Load ASCII data from a file and return it in a record array.
2294 If ``usemask=False`` a standard `recarray` is returned,
2295 if ``usemask=True`` a MaskedRecords array is returned.
2297 Parameters
2298 ----------
2299 fname, kwargs : For a description of input parameters, see `genfromtxt`.
2301 See Also
2302 --------
2303 numpy.genfromtxt : generic function
2305 Notes
2306 -----
2307 By default, `dtype` is None, which means that the data-type of the output
2308 array will be determined from the data.
2310 """
2311 kwargs.setdefault("dtype", None)
2312 usemask = kwargs.get('usemask', False)
2313 output = genfromtxt(fname, **kwargs)
2314 if usemask:
2315 from numpy.ma.mrecords import MaskedRecords
2316 output = output.view(MaskedRecords)
2317 else:
2318 output = output.view(np.recarray)
2319 return output
2322def recfromcsv(fname, **kwargs):
2323 """
2324 Load ASCII data stored in a comma-separated file.
2326 The returned array is a record array (if ``usemask=False``, see
2327 `recarray`) or a masked record array (if ``usemask=True``,
2328 see `ma.mrecords.MaskedRecords`).
2330 Parameters
2331 ----------
2332 fname, kwargs : For a description of input parameters, see `genfromtxt`.
2334 See Also
2335 --------
2336 numpy.genfromtxt : generic function to load ASCII data.
2338 Notes
2339 -----
2340 By default, `dtype` is None, which means that the data-type of the output
2341 array will be determined from the data.
2343 """
2344 # Set default kwargs for genfromtxt as relevant to csv import.
2345 kwargs.setdefault("case_sensitive", "lower")
2346 kwargs.setdefault("names", True)
2347 kwargs.setdefault("delimiter", ",")
2348 kwargs.setdefault("dtype", None)
2349 output = genfromtxt(fname, **kwargs)
2351 usemask = kwargs.get("usemask", False)
2352 if usemask:
2353 from numpy.ma.mrecords import MaskedRecords
2354 output = output.view(MaskedRecords)
2355 else:
2356 output = output.view(np.recarray)
2357 return output