Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1import sys 

2import os 

3import re 

4import functools 

5import itertools 

6import warnings 

7import weakref 

8import contextlib 

9from operator import itemgetter, index as opindex 

10from collections.abc import Mapping 

11 

12import numpy as np 

13from . import format 

14from ._datasource import DataSource 

15from numpy.core import overrides 

16from numpy.core.multiarray import packbits, unpackbits 

17from numpy.core.overrides import set_module 

18from numpy.core._internal import recursive 

19from ._iotools import ( 

20 LineSplitter, NameValidator, StringConverter, ConverterError, 

21 ConverterLockError, ConversionWarning, _is_string_like, 

22 has_nested_fields, flatten_dtype, easy_dtype, _decode_line 

23 ) 

24 

25from numpy.compat import ( 

26 asbytes, asstr, asunicode, bytes, os_fspath, os_PathLike, 

27 pickle, contextlib_nullcontext 

28 ) 

29 

30 

31@set_module('numpy') 

32def loads(*args, **kwargs): 

33 # NumPy 1.15.0, 2017-12-10 

34 warnings.warn( 

35 "np.loads is deprecated, use pickle.loads instead", 

36 DeprecationWarning, stacklevel=2) 

37 return pickle.loads(*args, **kwargs) 

38 

39 

40__all__ = [ 

41 'savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', 

42 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', 

43 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource' 

44 ] 

45 

46 

47array_function_dispatch = functools.partial( 

48 overrides.array_function_dispatch, module='numpy') 

49 

50 

51class BagObj: 

52 """ 

53 BagObj(obj) 

54 

55 Convert attribute look-ups to getitems on the object passed in. 

56 

57 Parameters 

58 ---------- 

59 obj : class instance 

60 Object on which attribute look-up is performed. 

61 

62 Examples 

63 -------- 

64 >>> from numpy.lib.npyio import BagObj as BO 

65 >>> class BagDemo: 

66 ... def __getitem__(self, key): # An instance of BagObj(BagDemo) 

67 ... # will call this method when any 

68 ... # attribute look-up is required 

69 ... result = "Doesn't matter what you want, " 

70 ... return result + "you're gonna get this" 

71 ... 

72 >>> demo_obj = BagDemo() 

73 >>> bagobj = BO(demo_obj) 

74 >>> bagobj.hello_there 

75 "Doesn't matter what you want, you're gonna get this" 

76 >>> bagobj.I_can_be_anything 

77 "Doesn't matter what you want, you're gonna get this" 

78 

79 """ 

80 

81 def __init__(self, obj): 

82 # Use weakref to make NpzFile objects collectable by refcount 

83 self._obj = weakref.proxy(obj) 

84 

85 def __getattribute__(self, key): 

86 try: 

87 return object.__getattribute__(self, '_obj')[key] 

88 except KeyError: 

89 raise AttributeError(key) 

90 

91 def __dir__(self): 

92 """ 

93 Enables dir(bagobj) to list the files in an NpzFile. 

94 

95 This also enables tab-completion in an interpreter or IPython. 

96 """ 

97 return list(object.__getattribute__(self, '_obj').keys()) 

98 

99 

100def zipfile_factory(file, *args, **kwargs): 

101 """ 

102 Create a ZipFile. 

103 

104 Allows for Zip64, and the `file` argument can accept file, str, or 

105 pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile 

106 constructor. 

107 """ 

108 if not hasattr(file, 'read'): 

109 file = os_fspath(file) 

110 import zipfile 

111 kwargs['allowZip64'] = True 

112 return zipfile.ZipFile(file, *args, **kwargs) 

113 

114 

115class NpzFile(Mapping): 

116 """ 

117 NpzFile(fid) 

118 

119 A dictionary-like object with lazy-loading of files in the zipped 

120 archive provided on construction. 

121 

122 `NpzFile` is used to load files in the NumPy ``.npz`` data archive 

123 format. It assumes that files in the archive have a ``.npy`` extension, 

124 other files are ignored. 

125 

126 The arrays and file strings are lazily loaded on either 

127 getitem access using ``obj['key']`` or attribute lookup using 

128 ``obj.f.key``. A list of all files (without ``.npy`` extensions) can 

129 be obtained with ``obj.files`` and the ZipFile object itself using 

130 ``obj.zip``. 

131 

132 Attributes 

133 ---------- 

134 files : list of str 

135 List of all files in the archive with a ``.npy`` extension. 

136 zip : ZipFile instance 

137 The ZipFile object initialized with the zipped archive. 

138 f : BagObj instance 

139 An object on which attribute can be performed as an alternative 

140 to getitem access on the `NpzFile` instance itself. 

141 allow_pickle : bool, optional 

142 Allow loading pickled data. Default: False 

143 

144 .. versionchanged:: 1.16.3 

145 Made default False in response to CVE-2019-6446. 

146 

147 pickle_kwargs : dict, optional 

148 Additional keyword arguments to pass on to pickle.load. 

149 These are only useful when loading object arrays saved on 

150 Python 2 when using Python 3. 

151 

152 Parameters 

153 ---------- 

154 fid : file or str 

155 The zipped archive to open. This is either a file-like object 

156 or a string containing the path to the archive. 

157 own_fid : bool, optional 

158 Whether NpzFile should close the file handle. 

159 Requires that `fid` is a file-like object. 

160 

161 Examples 

162 -------- 

163 >>> from tempfile import TemporaryFile 

164 >>> outfile = TemporaryFile() 

165 >>> x = np.arange(10) 

166 >>> y = np.sin(x) 

167 >>> np.savez(outfile, x=x, y=y) 

168 >>> _ = outfile.seek(0) 

169 

170 >>> npz = np.load(outfile) 

171 >>> isinstance(npz, np.lib.io.NpzFile) 

172 True 

173 >>> sorted(npz.files) 

174 ['x', 'y'] 

175 >>> npz['x'] # getitem access 

176 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

177 >>> npz.f.x # attribute lookup 

178 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

179 

180 """ 

181 

182 def __init__(self, fid, own_fid=False, allow_pickle=False, 

183 pickle_kwargs=None): 

184 # Import is postponed to here since zipfile depends on gzip, an 

185 # optional component of the so-called standard library. 

186 _zip = zipfile_factory(fid) 

187 self._files = _zip.namelist() 

188 self.files = [] 

189 self.allow_pickle = allow_pickle 

190 self.pickle_kwargs = pickle_kwargs 

191 for x in self._files: 

192 if x.endswith('.npy'): 

193 self.files.append(x[:-4]) 

194 else: 

195 self.files.append(x) 

196 self.zip = _zip 

197 self.f = BagObj(self) 

198 if own_fid: 

199 self.fid = fid 

200 else: 

201 self.fid = None 

202 

203 def __enter__(self): 

204 return self 

205 

206 def __exit__(self, exc_type, exc_value, traceback): 

207 self.close() 

208 

209 def close(self): 

210 """ 

211 Close the file. 

212 

213 """ 

214 if self.zip is not None: 

215 self.zip.close() 

216 self.zip = None 

217 if self.fid is not None: 

218 self.fid.close() 

219 self.fid = None 

220 self.f = None # break reference cycle 

221 

222 def __del__(self): 

223 self.close() 

224 

225 # Implement the Mapping ABC 

226 def __iter__(self): 

227 return iter(self.files) 

228 

229 def __len__(self): 

230 return len(self.files) 

231 

232 def __getitem__(self, key): 

233 # FIXME: This seems like it will copy strings around 

234 # more than is strictly necessary. The zipfile 

235 # will read the string and then 

236 # the format.read_array will copy the string 

237 # to another place in memory. 

238 # It would be better if the zipfile could read 

239 # (or at least uncompress) the data 

240 # directly into the array memory. 

241 member = False 

242 if key in self._files: 

243 member = True 

244 elif key in self.files: 

245 member = True 

246 key += '.npy' 

247 if member: 

248 bytes = self.zip.open(key) 

249 magic = bytes.read(len(format.MAGIC_PREFIX)) 

250 bytes.close() 

251 if magic == format.MAGIC_PREFIX: 

252 bytes = self.zip.open(key) 

253 return format.read_array(bytes, 

254 allow_pickle=self.allow_pickle, 

255 pickle_kwargs=self.pickle_kwargs) 

256 else: 

257 return self.zip.read(key) 

258 else: 

259 raise KeyError("%s is not a file in the archive" % key) 

260 

261 

262 # deprecate the python 2 dict apis that we supported by accident in 

263 # python 3. We forgot to implement itervalues() at all in earlier 

264 # versions of numpy, so no need to deprecated it here. 

265 

266 def iteritems(self): 

267 # Numpy 1.15, 2018-02-20 

268 warnings.warn( 

269 "NpzFile.iteritems is deprecated in python 3, to match the " 

270 "removal of dict.itertems. Use .items() instead.", 

271 DeprecationWarning, stacklevel=2) 

272 return self.items() 

273 

274 def iterkeys(self): 

275 # Numpy 1.15, 2018-02-20 

276 warnings.warn( 

277 "NpzFile.iterkeys is deprecated in python 3, to match the " 

278 "removal of dict.iterkeys. Use .keys() instead.", 

279 DeprecationWarning, stacklevel=2) 

280 return self.keys() 

281 

282 

283@set_module('numpy') 

284def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, 

285 encoding='ASCII'): 

286 """ 

287 Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files. 

288 

289 .. warning:: Loading files that contain object arrays uses the ``pickle`` 

290 module, which is not secure against erroneous or maliciously 

291 constructed data. Consider passing ``allow_pickle=False`` to 

292 load data that is known not to contain object arrays for the 

293 safer handling of untrusted sources. 

294 

295 Parameters 

296 ---------- 

297 file : file-like object, string, or pathlib.Path 

298 The file to read. File-like objects must support the 

299 ``seek()`` and ``read()`` methods. Pickled files require that the 

300 file-like object support the ``readline()`` method as well. 

301 mmap_mode : {None, 'r+', 'r', 'w+', 'c'}, optional 

302 If not None, then memory-map the file, using the given mode (see 

303 `numpy.memmap` for a detailed description of the modes). A 

304 memory-mapped array is kept on disk. However, it can be accessed 

305 and sliced like any ndarray. Memory mapping is especially useful 

306 for accessing small fragments of large files without reading the 

307 entire file into memory. 

308 allow_pickle : bool, optional 

309 Allow loading pickled object arrays stored in npy files. Reasons for 

310 disallowing pickles include security, as loading pickled data can 

311 execute arbitrary code. If pickles are disallowed, loading object 

312 arrays will fail. Default: False 

313 

314 .. versionchanged:: 1.16.3 

315 Made default False in response to CVE-2019-6446. 

316 

317 fix_imports : bool, optional 

318 Only useful when loading Python 2 generated pickled files on Python 3, 

319 which includes npy/npz files containing object arrays. If `fix_imports` 

320 is True, pickle will try to map the old Python 2 names to the new names 

321 used in Python 3. 

322 encoding : str, optional 

323 What encoding to use when reading Python 2 strings. Only useful when 

324 loading Python 2 generated pickled files in Python 3, which includes 

325 npy/npz files containing object arrays. Values other than 'latin1', 

326 'ASCII', and 'bytes' are not allowed, as they can corrupt numerical 

327 data. Default: 'ASCII' 

328 

329 Returns 

330 ------- 

331 result : array, tuple, dict, etc. 

332 Data stored in the file. For ``.npz`` files, the returned instance 

333 of NpzFile class must be closed to avoid leaking file descriptors. 

334 

335 Raises 

336 ------ 

337 IOError 

338 If the input file does not exist or cannot be read. 

339 ValueError 

340 The file contains an object array, but allow_pickle=False given. 

341 

342 See Also 

343 -------- 

344 save, savez, savez_compressed, loadtxt 

345 memmap : Create a memory-map to an array stored in a file on disk. 

346 lib.format.open_memmap : Create or load a memory-mapped ``.npy`` file. 

347 

348 Notes 

349 ----- 

350 - If the file contains pickle data, then whatever object is stored 

351 in the pickle is returned. 

352 - If the file is a ``.npy`` file, then a single array is returned. 

353 - If the file is a ``.npz`` file, then a dictionary-like object is 

354 returned, containing ``{filename: array}`` key-value pairs, one for 

355 each file in the archive. 

356 - If the file is a ``.npz`` file, the returned value supports the 

357 context manager protocol in a similar fashion to the open function:: 

358 

359 with load('foo.npz') as data: 

360 a = data['a'] 

361 

362 The underlying file descriptor is closed when exiting the 'with' 

363 block. 

364 

365 Examples 

366 -------- 

367 Store data to disk, and load it again: 

368 

369 >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]])) 

370 >>> np.load('/tmp/123.npy') 

371 array([[1, 2, 3], 

372 [4, 5, 6]]) 

373 

374 Store compressed data to disk, and load it again: 

375 

376 >>> a=np.array([[1, 2, 3], [4, 5, 6]]) 

377 >>> b=np.array([1, 2]) 

378 >>> np.savez('/tmp/123.npz', a=a, b=b) 

379 >>> data = np.load('/tmp/123.npz') 

380 >>> data['a'] 

381 array([[1, 2, 3], 

382 [4, 5, 6]]) 

383 >>> data['b'] 

384 array([1, 2]) 

385 >>> data.close() 

386 

387 Mem-map the stored array, and then access the second row 

388 directly from disk: 

389 

390 >>> X = np.load('/tmp/123.npy', mmap_mode='r') 

391 >>> X[1, :] 

392 memmap([4, 5, 6]) 

393 

394 """ 

395 if encoding not in ('ASCII', 'latin1', 'bytes'): 

396 # The 'encoding' value for pickle also affects what encoding 

397 # the serialized binary data of NumPy arrays is loaded 

398 # in. Pickle does not pass on the encoding information to 

399 # NumPy. The unpickling code in numpy.core.multiarray is 

400 # written to assume that unicode data appearing where binary 

401 # should be is in 'latin1'. 'bytes' is also safe, as is 'ASCII'. 

402 # 

403 # Other encoding values can corrupt binary data, and we 

404 # purposefully disallow them. For the same reason, the errors= 

405 # argument is not exposed, as values other than 'strict' 

406 # result can similarly silently corrupt numerical data. 

407 raise ValueError("encoding must be 'ASCII', 'latin1', or 'bytes'") 

408 

409 pickle_kwargs = dict(encoding=encoding, fix_imports=fix_imports) 

410 

411 with contextlib.ExitStack() as stack: 

412 if hasattr(file, 'read'): 

413 fid = file 

414 own_fid = False 

415 else: 

416 fid = stack.enter_context(open(os_fspath(file), "rb")) 

417 own_fid = True 

418 

419 # Code to distinguish from NumPy binary files and pickles. 

420 _ZIP_PREFIX = b'PK\x03\x04' 

421 _ZIP_SUFFIX = b'PK\x05\x06' # empty zip files start with this 

422 N = len(format.MAGIC_PREFIX) 

423 magic = fid.read(N) 

424 # If the file size is less than N, we need to make sure not 

425 # to seek past the beginning of the file 

426 fid.seek(-min(N, len(magic)), 1) # back-up 

427 if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): 

428 # zip-file (assume .npz) 

429 # Potentially transfer file ownership to NpzFile 

430 stack.pop_all() 

431 ret = NpzFile(fid, own_fid=own_fid, allow_pickle=allow_pickle, 

432 pickle_kwargs=pickle_kwargs) 

433 return ret 

434 elif magic == format.MAGIC_PREFIX: 

435 # .npy file 

436 if mmap_mode: 

437 return format.open_memmap(file, mode=mmap_mode) 

438 else: 

439 return format.read_array(fid, allow_pickle=allow_pickle, 

440 pickle_kwargs=pickle_kwargs) 

441 else: 

442 # Try a pickle 

443 if not allow_pickle: 

444 raise ValueError("Cannot load file containing pickled data " 

445 "when allow_pickle=False") 

446 try: 

447 return pickle.load(fid, **pickle_kwargs) 

448 except Exception: 

449 raise IOError( 

450 "Failed to interpret file %s as a pickle" % repr(file)) 

451 

452 

453def _save_dispatcher(file, arr, allow_pickle=None, fix_imports=None): 

454 return (arr,) 

455 

456 

457@array_function_dispatch(_save_dispatcher) 

458def save(file, arr, allow_pickle=True, fix_imports=True): 

459 """ 

460 Save an array to a binary file in NumPy ``.npy`` format. 

461 

462 Parameters 

463 ---------- 

464 file : file, str, or pathlib.Path 

465 File or filename to which the data is saved. If file is a file-object, 

466 then the filename is unchanged. If file is a string or Path, a ``.npy`` 

467 extension will be appended to the filename if it does not already 

468 have one. 

469 arr : array_like 

470 Array data to be saved. 

471 allow_pickle : bool, optional 

472 Allow saving object arrays using Python pickles. Reasons for disallowing 

473 pickles include security (loading pickled data can execute arbitrary 

474 code) and portability (pickled objects may not be loadable on different 

475 Python installations, for example if the stored objects require libraries 

476 that are not available, and not all pickled data is compatible between 

477 Python 2 and Python 3). 

478 Default: True 

479 fix_imports : bool, optional 

480 Only useful in forcing objects in object arrays on Python 3 to be 

481 pickled in a Python 2 compatible way. If `fix_imports` is True, pickle 

482 will try to map the new Python 3 names to the old module names used in 

483 Python 2, so that the pickle data stream is readable with Python 2. 

484 

485 See Also 

486 -------- 

487 savez : Save several arrays into a ``.npz`` archive 

488 savetxt, load 

489 

490 Notes 

491 ----- 

492 For a description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. 

493 

494 Any data saved to the file is appended to the end of the file. 

495 

496 Examples 

497 -------- 

498 >>> from tempfile import TemporaryFile 

499 >>> outfile = TemporaryFile() 

500 

501 >>> x = np.arange(10) 

502 >>> np.save(outfile, x) 

503 

504 >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file 

505 >>> np.load(outfile) 

506 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

507 

508 

509 >>> with open('test.npy', 'wb') as f: 

510 ... np.save(f, np.array([1, 2])) 

511 ... np.save(f, np.array([1, 3])) 

512 >>> with open('test.npy', 'rb') as f: 

513 ... a = np.load(f) 

514 ... b = np.load(f) 

515 >>> print(a, b) 

516 # [1 2] [1 3] 

517 """ 

518 if hasattr(file, 'write'): 

519 file_ctx = contextlib_nullcontext(file) 

520 else: 

521 file = os_fspath(file) 

522 if not file.endswith('.npy'): 

523 file = file + '.npy' 

524 file_ctx = open(file, "wb") 

525 

526 with file_ctx as fid: 

527 arr = np.asanyarray(arr) 

528 format.write_array(fid, arr, allow_pickle=allow_pickle, 

529 pickle_kwargs=dict(fix_imports=fix_imports)) 

530 

531 

532def _savez_dispatcher(file, *args, **kwds): 

533 yield from args 

534 yield from kwds.values() 

535 

536 

537@array_function_dispatch(_savez_dispatcher) 

538def savez(file, *args, **kwds): 

539 """Save several arrays into a single file in uncompressed ``.npz`` format. 

540 

541 If arguments are passed in with no keywords, the corresponding variable 

542 names, in the ``.npz`` file, are 'arr_0', 'arr_1', etc. If keyword 

543 arguments are given, the corresponding variable names, in the ``.npz`` 

544 file will match the keyword names. 

545 

546 Parameters 

547 ---------- 

548 file : str or file 

549 Either the filename (string) or an open file (file-like object) 

550 where the data will be saved. If file is a string or a Path, the 

551 ``.npz`` extension will be appended to the filename if it is not 

552 already there. 

553 args : Arguments, optional 

554 Arrays to save to the file. Since it is not possible for Python to 

555 know the names of the arrays outside `savez`, the arrays will be saved 

556 with names "arr_0", "arr_1", and so on. These arguments can be any 

557 expression. 

558 kwds : Keyword arguments, optional 

559 Arrays to save to the file. Arrays will be saved in the file with the 

560 keyword names. 

561 

562 Returns 

563 ------- 

564 None 

565 

566 See Also 

567 -------- 

568 save : Save a single array to a binary file in NumPy format. 

569 savetxt : Save an array to a file as plain text. 

570 savez_compressed : Save several arrays into a compressed ``.npz`` archive 

571 

572 Notes 

573 ----- 

574 The ``.npz`` file format is a zipped archive of files named after the 

575 variables they contain. The archive is not compressed and each file 

576 in the archive contains one variable in ``.npy`` format. For a 

577 description of the ``.npy`` format, see :py:mod:`numpy.lib.format`. 

578 

579 When opening the saved ``.npz`` file with `load` a `NpzFile` object is 

580 returned. This is a dictionary-like object which can be queried for 

581 its list of arrays (with the ``.files`` attribute), and for the arrays 

582 themselves. 

583 

584 When saving dictionaries, the dictionary keys become filenames 

585 inside the ZIP archive. Therefore, keys should be valid filenames. 

586 E.g., avoid keys that begin with ``/`` or contain ``.``. 

587 

588 Examples 

589 -------- 

590 >>> from tempfile import TemporaryFile 

591 >>> outfile = TemporaryFile() 

592 >>> x = np.arange(10) 

593 >>> y = np.sin(x) 

594 

595 Using `savez` with \\*args, the arrays are saved with default names. 

596 

597 >>> np.savez(outfile, x, y) 

598 >>> _ = outfile.seek(0) # Only needed here to simulate closing & reopening file 

599 >>> npzfile = np.load(outfile) 

600 >>> npzfile.files 

601 ['arr_0', 'arr_1'] 

602 >>> npzfile['arr_0'] 

603 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

604 

605 Using `savez` with \\**kwds, the arrays are saved with the keyword names. 

606 

607 >>> outfile = TemporaryFile() 

608 >>> np.savez(outfile, x=x, y=y) 

609 >>> _ = outfile.seek(0) 

610 >>> npzfile = np.load(outfile) 

611 >>> sorted(npzfile.files) 

612 ['x', 'y'] 

613 >>> npzfile['x'] 

614 array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) 

615 """ 

616 _savez(file, args, kwds, False) 

617 

618 

619def _savez_compressed_dispatcher(file, *args, **kwds): 

620 yield from args 

621 yield from kwds.values() 

622 

623 

624@array_function_dispatch(_savez_compressed_dispatcher) 

625def savez_compressed(file, *args, **kwds): 

626 """ 

627 Save several arrays into a single file in compressed ``.npz`` format. 

628 

629 If keyword arguments are given, then filenames are taken from the keywords. 

630 If arguments are passed in with no keywords, then stored filenames are 

631 arr_0, arr_1, etc. 

632 

633 Parameters 

634 ---------- 

635 file : str or file 

636 Either the filename (string) or an open file (file-like object) 

637 where the data will be saved. If file is a string or a Path, the 

638 ``.npz`` extension will be appended to the filename if it is not 

639 already there. 

640 args : Arguments, optional 

641 Arrays to save to the file. Since it is not possible for Python to 

642 know the names of the arrays outside `savez`, the arrays will be saved 

643 with names "arr_0", "arr_1", and so on. These arguments can be any 

644 expression. 

645 kwds : Keyword arguments, optional 

646 Arrays to save to the file. Arrays will be saved in the file with the 

647 keyword names. 

648 

649 Returns 

650 ------- 

651 None 

652 

653 See Also 

654 -------- 

655 numpy.save : Save a single array to a binary file in NumPy format. 

656 numpy.savetxt : Save an array to a file as plain text. 

657 numpy.savez : Save several arrays into an uncompressed ``.npz`` file format 

658 numpy.load : Load the files created by savez_compressed. 

659 

660 Notes 

661 ----- 

662 The ``.npz`` file format is a zipped archive of files named after the 

663 variables they contain. The archive is compressed with 

664 ``zipfile.ZIP_DEFLATED`` and each file in the archive contains one variable 

665 in ``.npy`` format. For a description of the ``.npy`` format, see 

666 :py:mod:`numpy.lib.format`. 

667 

668 

669 When opening the saved ``.npz`` file with `load` a `NpzFile` object is 

670 returned. This is a dictionary-like object which can be queried for 

671 its list of arrays (with the ``.files`` attribute), and for the arrays 

672 themselves. 

673 

674 Examples 

675 -------- 

676 >>> test_array = np.random.rand(3, 2) 

677 >>> test_vector = np.random.rand(4) 

678 >>> np.savez_compressed('/tmp/123', a=test_array, b=test_vector) 

679 >>> loaded = np.load('/tmp/123.npz') 

680 >>> print(np.array_equal(test_array, loaded['a'])) 

681 True 

682 >>> print(np.array_equal(test_vector, loaded['b'])) 

683 True 

684 

685 """ 

686 _savez(file, args, kwds, True) 

687 

688 

689def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): 

690 # Import is postponed to here since zipfile depends on gzip, an optional 

691 # component of the so-called standard library. 

692 import zipfile 

693 

694 if not hasattr(file, 'write'): 

695 file = os_fspath(file) 

696 if not file.endswith('.npz'): 

697 file = file + '.npz' 

698 

699 namedict = kwds 

700 for i, val in enumerate(args): 

701 key = 'arr_%d' % i 

702 if key in namedict.keys(): 

703 raise ValueError( 

704 "Cannot use un-named variables and keyword %s" % key) 

705 namedict[key] = val 

706 

707 if compress: 

708 compression = zipfile.ZIP_DEFLATED 

709 else: 

710 compression = zipfile.ZIP_STORED 

711 

712 zipf = zipfile_factory(file, mode="w", compression=compression) 

713 

714 if sys.version_info >= (3, 6): 

715 # Since Python 3.6 it is possible to write directly to a ZIP file. 

716 for key, val in namedict.items(): 

717 fname = key + '.npy' 

718 val = np.asanyarray(val) 

719 # always force zip64, gh-10776 

720 with zipf.open(fname, 'w', force_zip64=True) as fid: 

721 format.write_array(fid, val, 

722 allow_pickle=allow_pickle, 

723 pickle_kwargs=pickle_kwargs) 

724 else: 

725 # Stage arrays in a temporary file on disk, before writing to zip. 

726 

727 # Import deferred for startup time improvement 

728 import tempfile 

729 # Since target file might be big enough to exceed capacity of a global 

730 # temporary directory, create temp file side-by-side with the target file. 

731 file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') 

732 fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') 

733 os.close(fd) 

734 try: 

735 for key, val in namedict.items(): 

736 fname = key + '.npy' 

737 fid = open(tmpfile, 'wb') 

738 try: 

739 format.write_array(fid, np.asanyarray(val), 

740 allow_pickle=allow_pickle, 

741 pickle_kwargs=pickle_kwargs) 

742 fid.close() 

743 fid = None 

744 zipf.write(tmpfile, arcname=fname) 

745 except IOError as exc: 

746 raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) 

747 finally: 

748 if fid: 

749 fid.close() 

750 finally: 

751 os.remove(tmpfile) 

752 

753 zipf.close() 

754 

755 

756def _getconv(dtype): 

757 """ Find the correct dtype converter. Adapted from matplotlib """ 

758 

759 def floatconv(x): 

760 x.lower() 

761 if '0x' in x: 

762 return float.fromhex(x) 

763 return float(x) 

764 

765 typ = dtype.type 

766 if issubclass(typ, np.bool_): 

767 return lambda x: bool(int(x)) 

768 if issubclass(typ, np.uint64): 

769 return np.uint64 

770 if issubclass(typ, np.int64): 

771 return np.int64 

772 if issubclass(typ, np.integer): 

773 return lambda x: int(float(x)) 

774 elif issubclass(typ, np.longdouble): 

775 return np.longdouble 

776 elif issubclass(typ, np.floating): 

777 return floatconv 

778 elif issubclass(typ, complex): 

779 return lambda x: complex(asstr(x).replace('+-', '-')) 

780 elif issubclass(typ, np.bytes_): 

781 return asbytes 

782 elif issubclass(typ, np.unicode_): 

783 return asunicode 

784 else: 

785 return asstr 

786 

787# amount of lines loadtxt reads in one chunk, can be overridden for testing 

788_loadtxt_chunksize = 50000 

789 

790 

791@set_module('numpy') 

792def loadtxt(fname, dtype=float, comments='#', delimiter=None, 

793 converters=None, skiprows=0, usecols=None, unpack=False, 

794 ndmin=0, encoding='bytes', max_rows=None): 

795 r""" 

796 Load data from a text file. 

797 

798 Each row in the text file must have the same number of values. 

799 

800 Parameters 

801 ---------- 

802 fname : file, str, or pathlib.Path 

803 File, filename, or generator to read. If the filename extension is 

804 ``.gz`` or ``.bz2``, the file is first decompressed. Note that 

805 generators should return byte strings. 

806 dtype : data-type, optional 

807 Data-type of the resulting array; default: float. If this is a 

808 structured data-type, the resulting array will be 1-dimensional, and 

809 each row will be interpreted as an element of the array. In this 

810 case, the number of columns used must match the number of fields in 

811 the data-type. 

812 comments : str or sequence of str, optional 

813 The characters or list of characters used to indicate the start of a 

814 comment. None implies no comments. For backwards compatibility, byte 

815 strings will be decoded as 'latin1'. The default is '#'. 

816 delimiter : str, optional 

817 The string used to separate values. For backwards compatibility, byte 

818 strings will be decoded as 'latin1'. The default is whitespace. 

819 converters : dict, optional 

820 A dictionary mapping column number to a function that will parse the 

821 column string into the desired value. E.g., if column 0 is a date 

822 string: ``converters = {0: datestr2num}``. Converters can also be 

823 used to provide a default value for missing data (but see also 

824 `genfromtxt`): ``converters = {3: lambda s: float(s.strip() or 0)}``. 

825 Default: None. 

826 skiprows : int, optional 

827 Skip the first `skiprows` lines, including comments; default: 0. 

828 usecols : int or sequence, optional 

829 Which columns to read, with 0 being the first. For example, 

830 ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. 

831 The default, None, results in all columns being read. 

832 

833 .. versionchanged:: 1.11.0 

834 When a single column has to be read it is possible to use 

835 an integer instead of a tuple. E.g ``usecols = 3`` reads the 

836 fourth column the same way as ``usecols = (3,)`` would. 

837 unpack : bool, optional 

838 If True, the returned array is transposed, so that arguments may be 

839 unpacked using ``x, y, z = loadtxt(...)``. When used with a structured 

840 data-type, arrays are returned for each field. Default is False. 

841 ndmin : int, optional 

842 The returned array will have at least `ndmin` dimensions. 

843 Otherwise mono-dimensional axes will be squeezed. 

844 Legal values: 0 (default), 1 or 2. 

845 

846 .. versionadded:: 1.6.0 

847 encoding : str, optional 

848 Encoding used to decode the inputfile. Does not apply to input streams. 

849 The special value 'bytes' enables backward compatibility workarounds 

850 that ensures you receive byte arrays as results if possible and passes 

851 'latin1' encoded strings to converters. Override this value to receive 

852 unicode arrays and pass strings as input to converters. If set to None 

853 the system default is used. The default value is 'bytes'. 

854 

855 .. versionadded:: 1.14.0 

856 max_rows : int, optional 

857 Read `max_rows` lines of content after `skiprows` lines. The default 

858 is to read all the lines. 

859 

860 .. versionadded:: 1.16.0 

861 

862 Returns 

863 ------- 

864 out : ndarray 

865 Data read from the text file. 

866 

867 See Also 

868 -------- 

869 load, fromstring, fromregex 

870 genfromtxt : Load data with missing values handled as specified. 

871 scipy.io.loadmat : reads MATLAB data files 

872 

873 Notes 

874 ----- 

875 This function aims to be a fast reader for simply formatted files. The 

876 `genfromtxt` function provides more sophisticated handling of, e.g., 

877 lines with missing values. 

878 

879 .. versionadded:: 1.10.0 

880 

881 The strings produced by the Python float.hex method can be used as 

882 input for floats. 

883 

884 Examples 

885 -------- 

886 >>> from io import StringIO # StringIO behaves like a file object 

887 >>> c = StringIO("0 1\n2 3") 

888 >>> np.loadtxt(c) 

889 array([[0., 1.], 

890 [2., 3.]]) 

891 

892 >>> d = StringIO("M 21 72\nF 35 58") 

893 >>> np.loadtxt(d, dtype={'names': ('gender', 'age', 'weight'), 

894 ... 'formats': ('S1', 'i4', 'f4')}) 

895 array([(b'M', 21, 72.), (b'F', 35, 58.)], 

896 dtype=[('gender', 'S1'), ('age', '<i4'), ('weight', '<f4')]) 

897 

898 >>> c = StringIO("1,0,2\n3,0,4") 

899 >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True) 

900 >>> x 

901 array([1., 3.]) 

902 >>> y 

903 array([2., 4.]) 

904 

905 This example shows how `converters` can be used to convert a field 

906 with a trailing minus sign into a negative number. 

907 

908 >>> s = StringIO('10.01 31.25-\n19.22 64.31\n17.57- 63.94') 

909 >>> def conv(fld): 

910 ... return -float(fld[:-1]) if fld.endswith(b'-') else float(fld) 

911 ... 

912 >>> np.loadtxt(s, converters={0: conv, 1: conv}) 

913 array([[ 10.01, -31.25], 

914 [ 19.22, 64.31], 

915 [-17.57, 63.94]]) 

916 """ 

917 # Type conversions for Py3 convenience 

918 if comments is not None: 

919 if isinstance(comments, (str, bytes)): 

920 comments = [comments] 

921 comments = [_decode_line(x) for x in comments] 

922 # Compile regex for comments beforehand 

923 comments = (re.escape(comment) for comment in comments) 

924 regex_comments = re.compile('|'.join(comments)) 

925 

926 if delimiter is not None: 

927 delimiter = _decode_line(delimiter) 

928 

929 user_converters = converters 

930 

931 if encoding == 'bytes': 

932 encoding = None 

933 byte_converters = True 

934 else: 

935 byte_converters = False 

936 

937 if usecols is not None: 

938 # Allow usecols to be a single int or a sequence of ints 

939 try: 

940 usecols_as_list = list(usecols) 

941 except TypeError: 

942 usecols_as_list = [usecols] 

943 for col_idx in usecols_as_list: 

944 try: 

945 opindex(col_idx) 

946 except TypeError as e: 

947 e.args = ( 

948 "usecols must be an int or a sequence of ints but " 

949 "it contains at least one element of type %s" % 

950 type(col_idx), 

951 ) 

952 raise 

953 # Fall back to existing code 

954 usecols = usecols_as_list 

955 

956 fown = False 

957 try: 

958 if isinstance(fname, os_PathLike): 

959 fname = os_fspath(fname) 

960 if _is_string_like(fname): 

961 fh = np.lib._datasource.open(fname, 'rt', encoding=encoding) 

962 fencoding = getattr(fh, 'encoding', 'latin1') 

963 fh = iter(fh) 

964 fown = True 

965 else: 

966 fh = iter(fname) 

967 fencoding = getattr(fname, 'encoding', 'latin1') 

968 except TypeError: 

969 raise ValueError('fname must be a string, file handle, or generator') 

970 

971 # input may be a python2 io stream 

972 if encoding is not None: 

973 fencoding = encoding 

974 # we must assume local encoding 

975 # TODO emit portability warning? 

976 elif fencoding is None: 

977 import locale 

978 fencoding = locale.getpreferredencoding() 

979 

980 # not to be confused with the flatten_dtype we import... 

981 @recursive 

982 def flatten_dtype_internal(self, dt): 

983 """Unpack a structured data-type, and produce re-packing info.""" 

984 if dt.names is None: 

985 # If the dtype is flattened, return. 

986 # If the dtype has a shape, the dtype occurs 

987 # in the list more than once. 

988 shape = dt.shape 

989 if len(shape) == 0: 

990 return ([dt.base], None) 

991 else: 

992 packing = [(shape[-1], list)] 

993 if len(shape) > 1: 

994 for dim in dt.shape[-2::-1]: 

995 packing = [(dim*packing[0][0], packing*dim)] 

996 return ([dt.base] * int(np.prod(dt.shape)), packing) 

997 else: 

998 types = [] 

999 packing = [] 

1000 for field in dt.names: 

1001 tp, bytes = dt.fields[field] 

1002 flat_dt, flat_packing = self(tp) 

1003 types.extend(flat_dt) 

1004 # Avoid extra nesting for subarrays 

1005 if tp.ndim > 0: 

1006 packing.extend(flat_packing) 

1007 else: 

1008 packing.append((len(flat_dt), flat_packing)) 

1009 return (types, packing) 

1010 

1011 @recursive 

1012 def pack_items(self, items, packing): 

1013 """Pack items into nested lists based on re-packing info.""" 

1014 if packing is None: 

1015 return items[0] 

1016 elif packing is tuple: 

1017 return tuple(items) 

1018 elif packing is list: 

1019 return list(items) 

1020 else: 

1021 start = 0 

1022 ret = [] 

1023 for length, subpacking in packing: 

1024 ret.append(self(items[start:start+length], subpacking)) 

1025 start += length 

1026 return tuple(ret) 

1027 

1028 def split_line(line): 

1029 """Chop off comments, strip, and split at delimiter. """ 

1030 line = _decode_line(line, encoding=encoding) 

1031 

1032 if comments is not None: 

1033 line = regex_comments.split(line, maxsplit=1)[0] 

1034 line = line.strip('\r\n') 

1035 if line: 

1036 return line.split(delimiter) 

1037 else: 

1038 return [] 

1039 

1040 def read_data(chunk_size): 

1041 """Parse each line, including the first. 

1042 

1043 The file read, `fh`, is a global defined above. 

1044 

1045 Parameters 

1046 ---------- 

1047 chunk_size : int 

1048 At most `chunk_size` lines are read at a time, with iteration 

1049 until all lines are read. 

1050 

1051 """ 

1052 X = [] 

1053 line_iter = itertools.chain([first_line], fh) 

1054 line_iter = itertools.islice(line_iter, max_rows) 

1055 for i, line in enumerate(line_iter): 

1056 vals = split_line(line) 

1057 if len(vals) == 0: 

1058 continue 

1059 if usecols: 

1060 vals = [vals[j] for j in usecols] 

1061 if len(vals) != N: 

1062 line_num = i + skiprows + 1 

1063 raise ValueError("Wrong number of columns at line %d" 

1064 % line_num) 

1065 

1066 # Convert each value according to its column and store 

1067 items = [conv(val) for (conv, val) in zip(converters, vals)] 

1068 

1069 # Then pack it according to the dtype's nesting 

1070 items = pack_items(items, packing) 

1071 X.append(items) 

1072 if len(X) > chunk_size: 

1073 yield X 

1074 X = [] 

1075 if X: 

1076 yield X 

1077 

1078 try: 

1079 # Make sure we're dealing with a proper dtype 

1080 dtype = np.dtype(dtype) 

1081 defconv = _getconv(dtype) 

1082 

1083 # Skip the first `skiprows` lines 

1084 for i in range(skiprows): 

1085 next(fh) 

1086 

1087 # Read until we find a line with some values, and use 

1088 # it to estimate the number of columns, N. 

1089 first_vals = None 

1090 try: 

1091 while not first_vals: 

1092 first_line = next(fh) 

1093 first_vals = split_line(first_line) 

1094 except StopIteration: 

1095 # End of lines reached 

1096 first_line = '' 

1097 first_vals = [] 

1098 warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2) 

1099 N = len(usecols or first_vals) 

1100 

1101 dtype_types, packing = flatten_dtype_internal(dtype) 

1102 if len(dtype_types) > 1: 

1103 # We're dealing with a structured array, each field of 

1104 # the dtype matches a column 

1105 converters = [_getconv(dt) for dt in dtype_types] 

1106 else: 

1107 # All fields have the same dtype 

1108 converters = [defconv for i in range(N)] 

1109 if N > 1: 

1110 packing = [(N, tuple)] 

1111 

1112 # By preference, use the converters specified by the user 

1113 for i, conv in (user_converters or {}).items(): 

1114 if usecols: 

1115 try: 

1116 i = usecols.index(i) 

1117 except ValueError: 

1118 # Unused converter specified 

1119 continue 

1120 if byte_converters: 

1121 # converters may use decode to workaround numpy's old behaviour, 

1122 # so encode the string again before passing to the user converter 

1123 def tobytes_first(x, conv): 

1124 if type(x) is bytes: 

1125 return conv(x) 

1126 return conv(x.encode("latin1")) 

1127 converters[i] = functools.partial(tobytes_first, conv=conv) 

1128 else: 

1129 converters[i] = conv 

1130 

1131 converters = [conv if conv is not bytes else 

1132 lambda x: x.encode(fencoding) for conv in converters] 

1133 

1134 # read data in chunks and fill it into an array via resize 

1135 # over-allocating and shrinking the array later may be faster but is 

1136 # probably not relevant compared to the cost of actually reading and 

1137 # converting the data 

1138 X = None 

1139 for x in read_data(_loadtxt_chunksize): 

1140 if X is None: 

1141 X = np.array(x, dtype) 

1142 else: 

1143 nshape = list(X.shape) 

1144 pos = nshape[0] 

1145 nshape[0] += len(x) 

1146 X.resize(nshape, refcheck=False) 

1147 X[pos:, ...] = x 

1148 finally: 

1149 if fown: 

1150 fh.close() 

1151 

1152 if X is None: 

1153 X = np.array([], dtype) 

1154 

1155 # Multicolumn data are returned with shape (1, N, M), i.e. 

1156 # (1, 1, M) for a single row - remove the singleton dimension there 

1157 if X.ndim == 3 and X.shape[:2] == (1, 1): 

1158 X.shape = (1, -1) 

1159 

1160 # Verify that the array has at least dimensions `ndmin`. 

1161 # Check correctness of the values of `ndmin` 

1162 if ndmin not in [0, 1, 2]: 

1163 raise ValueError('Illegal value of ndmin keyword: %s' % ndmin) 

1164 # Tweak the size and shape of the arrays - remove extraneous dimensions 

1165 if X.ndim > ndmin: 

1166 X = np.squeeze(X) 

1167 # and ensure we have the minimum number of dimensions asked for 

1168 # - has to be in this order for the odd case ndmin=1, X.squeeze().ndim=0 

1169 if X.ndim < ndmin: 

1170 if ndmin == 1: 

1171 X = np.atleast_1d(X) 

1172 elif ndmin == 2: 

1173 X = np.atleast_2d(X).T 

1174 

1175 if unpack: 

1176 if len(dtype_types) > 1: 

1177 # For structured arrays, return an array for each field. 

1178 return [X[field] for field in dtype.names] 

1179 else: 

1180 return X.T 

1181 else: 

1182 return X 

1183 

1184 

1185def _savetxt_dispatcher(fname, X, fmt=None, delimiter=None, newline=None, 

1186 header=None, footer=None, comments=None, 

1187 encoding=None): 

1188 return (X,) 

1189 

1190 

1191@array_function_dispatch(_savetxt_dispatcher) 

1192def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', 

1193 footer='', comments='# ', encoding=None): 

1194 """ 

1195 Save an array to a text file. 

1196 

1197 Parameters 

1198 ---------- 

1199 fname : filename or file handle 

1200 If the filename ends in ``.gz``, the file is automatically saved in 

1201 compressed gzip format. `loadtxt` understands gzipped files 

1202 transparently. 

1203 X : 1D or 2D array_like 

1204 Data to be saved to a text file. 

1205 fmt : str or sequence of strs, optional 

1206 A single format (%10.5f), a sequence of formats, or a 

1207 multi-format string, e.g. 'Iteration %d -- %10.5f', in which 

1208 case `delimiter` is ignored. For complex `X`, the legal options 

1209 for `fmt` are: 

1210 

1211 * a single specifier, `fmt='%.4e'`, resulting in numbers formatted 

1212 like `' (%s+%sj)' % (fmt, fmt)` 

1213 * a full string specifying every real and imaginary part, e.g. 

1214 `' %.4e %+.4ej %.4e %+.4ej %.4e %+.4ej'` for 3 columns 

1215 * a list of specifiers, one per column - in this case, the real 

1216 and imaginary part must have separate specifiers, 

1217 e.g. `['%.3e + %.3ej', '(%.15e%+.15ej)']` for 2 columns 

1218 delimiter : str, optional 

1219 String or character separating columns. 

1220 newline : str, optional 

1221 String or character separating lines. 

1222 

1223 .. versionadded:: 1.5.0 

1224 header : str, optional 

1225 String that will be written at the beginning of the file. 

1226 

1227 .. versionadded:: 1.7.0 

1228 footer : str, optional 

1229 String that will be written at the end of the file. 

1230 

1231 .. versionadded:: 1.7.0 

1232 comments : str, optional 

1233 String that will be prepended to the ``header`` and ``footer`` strings, 

1234 to mark them as comments. Default: '# ', as expected by e.g. 

1235 ``numpy.loadtxt``. 

1236 

1237 .. versionadded:: 1.7.0 

1238 encoding : {None, str}, optional 

1239 Encoding used to encode the outputfile. Does not apply to output 

1240 streams. If the encoding is something other than 'bytes' or 'latin1' 

1241 you will not be able to load the file in NumPy versions < 1.14. Default 

1242 is 'latin1'. 

1243 

1244 .. versionadded:: 1.14.0 

1245 

1246 

1247 See Also 

1248 -------- 

1249 save : Save an array to a binary file in NumPy ``.npy`` format 

1250 savez : Save several arrays into an uncompressed ``.npz`` archive 

1251 savez_compressed : Save several arrays into a compressed ``.npz`` archive 

1252 

1253 Notes 

1254 ----- 

1255 Further explanation of the `fmt` parameter 

1256 (``%[flag]width[.precision]specifier``): 

1257 

1258 flags: 

1259 ``-`` : left justify 

1260 

1261 ``+`` : Forces to precede result with + or -. 

1262 

1263 ``0`` : Left pad the number with zeros instead of space (see width). 

1264 

1265 width: 

1266 Minimum number of characters to be printed. The value is not truncated 

1267 if it has more characters. 

1268 

1269 precision: 

1270 - For integer specifiers (eg. ``d,i,o,x``), the minimum number of 

1271 digits. 

1272 - For ``e, E`` and ``f`` specifiers, the number of digits to print 

1273 after the decimal point. 

1274 - For ``g`` and ``G``, the maximum number of significant digits. 

1275 - For ``s``, the maximum number of characters. 

1276 

1277 specifiers: 

1278 ``c`` : character 

1279 

1280 ``d`` or ``i`` : signed decimal integer 

1281 

1282 ``e`` or ``E`` : scientific notation with ``e`` or ``E``. 

1283 

1284 ``f`` : decimal floating point 

1285 

1286 ``g,G`` : use the shorter of ``e,E`` or ``f`` 

1287 

1288 ``o`` : signed octal 

1289 

1290 ``s`` : string of characters 

1291 

1292 ``u`` : unsigned decimal integer 

1293 

1294 ``x,X`` : unsigned hexadecimal integer 

1295 

1296 This explanation of ``fmt`` is not complete, for an exhaustive 

1297 specification see [1]_. 

1298 

1299 References 

1300 ---------- 

1301 .. [1] `Format Specification Mini-Language 

1302 <https://docs.python.org/library/string.html#format-specification-mini-language>`_, 

1303 Python Documentation. 

1304 

1305 Examples 

1306 -------- 

1307 >>> x = y = z = np.arange(0.0,5.0,1.0) 

1308 >>> np.savetxt('test.out', x, delimiter=',') # X is an array 

1309 >>> np.savetxt('test.out', (x,y,z)) # x,y,z equal sized 1D arrays 

1310 >>> np.savetxt('test.out', x, fmt='%1.4e') # use exponential notation 

1311 

1312 """ 

1313 

1314 # Py3 conversions first 

1315 if isinstance(fmt, bytes): 

1316 fmt = asstr(fmt) 

1317 delimiter = asstr(delimiter) 

1318 

1319 class WriteWrap: 

1320 """Convert to bytes on bytestream inputs. 

1321 

1322 """ 

1323 def __init__(self, fh, encoding): 

1324 self.fh = fh 

1325 self.encoding = encoding 

1326 self.do_write = self.first_write 

1327 

1328 def close(self): 

1329 self.fh.close() 

1330 

1331 def write(self, v): 

1332 self.do_write(v) 

1333 

1334 def write_bytes(self, v): 

1335 if isinstance(v, bytes): 

1336 self.fh.write(v) 

1337 else: 

1338 self.fh.write(v.encode(self.encoding)) 

1339 

1340 def write_normal(self, v): 

1341 self.fh.write(asunicode(v)) 

1342 

1343 def first_write(self, v): 

1344 try: 

1345 self.write_normal(v) 

1346 self.write = self.write_normal 

1347 except TypeError: 

1348 # input is probably a bytestream 

1349 self.write_bytes(v) 

1350 self.write = self.write_bytes 

1351 

1352 own_fh = False 

1353 if isinstance(fname, os_PathLike): 

1354 fname = os_fspath(fname) 

1355 if _is_string_like(fname): 

1356 # datasource doesn't support creating a new file ... 

1357 open(fname, 'wt').close() 

1358 fh = np.lib._datasource.open(fname, 'wt', encoding=encoding) 

1359 own_fh = True 

1360 elif hasattr(fname, 'write'): 

1361 # wrap to handle byte output streams 

1362 fh = WriteWrap(fname, encoding or 'latin1') 

1363 else: 

1364 raise ValueError('fname must be a string or file handle') 

1365 

1366 try: 

1367 X = np.asarray(X) 

1368 

1369 # Handle 1-dimensional arrays 

1370 if X.ndim == 0 or X.ndim > 2: 

1371 raise ValueError( 

1372 "Expected 1D or 2D array, got %dD array instead" % X.ndim) 

1373 elif X.ndim == 1: 

1374 # Common case -- 1d array of numbers 

1375 if X.dtype.names is None: 

1376 X = np.atleast_2d(X).T 

1377 ncol = 1 

1378 

1379 # Complex dtype -- each field indicates a separate column 

1380 else: 

1381 ncol = len(X.dtype.names) 

1382 else: 

1383 ncol = X.shape[1] 

1384 

1385 iscomplex_X = np.iscomplexobj(X) 

1386 # `fmt` can be a string with multiple insertion points or a 

1387 # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') 

1388 if type(fmt) in (list, tuple): 

1389 if len(fmt) != ncol: 

1390 raise AttributeError('fmt has wrong shape. %s' % str(fmt)) 

1391 format = asstr(delimiter).join(map(asstr, fmt)) 

1392 elif isinstance(fmt, str): 

1393 n_fmt_chars = fmt.count('%') 

1394 error = ValueError('fmt has wrong number of %% formats: %s' % fmt) 

1395 if n_fmt_chars == 1: 

1396 if iscomplex_X: 

1397 fmt = [' (%s+%sj)' % (fmt, fmt), ] * ncol 

1398 else: 

1399 fmt = [fmt, ] * ncol 

1400 format = delimiter.join(fmt) 

1401 elif iscomplex_X and n_fmt_chars != (2 * ncol): 

1402 raise error 

1403 elif ((not iscomplex_X) and n_fmt_chars != ncol): 

1404 raise error 

1405 else: 

1406 format = fmt 

1407 else: 

1408 raise ValueError('invalid fmt: %r' % (fmt,)) 

1409 

1410 if len(header) > 0: 

1411 header = header.replace('\n', '\n' + comments) 

1412 fh.write(comments + header + newline) 

1413 if iscomplex_X: 

1414 for row in X: 

1415 row2 = [] 

1416 for number in row: 

1417 row2.append(number.real) 

1418 row2.append(number.imag) 

1419 s = format % tuple(row2) + newline 

1420 fh.write(s.replace('+-', '-')) 

1421 else: 

1422 for row in X: 

1423 try: 

1424 v = format % tuple(row) + newline 

1425 except TypeError: 

1426 raise TypeError("Mismatch between array dtype ('%s') and " 

1427 "format specifier ('%s')" 

1428 % (str(X.dtype), format)) 

1429 fh.write(v) 

1430 

1431 if len(footer) > 0: 

1432 footer = footer.replace('\n', '\n' + comments) 

1433 fh.write(comments + footer + newline) 

1434 finally: 

1435 if own_fh: 

1436 fh.close() 

1437 

1438 

1439@set_module('numpy') 

1440def fromregex(file, regexp, dtype, encoding=None): 

1441 """ 

1442 Construct an array from a text file, using regular expression parsing. 

1443 

1444 The returned array is always a structured array, and is constructed from 

1445 all matches of the regular expression in the file. Groups in the regular 

1446 expression are converted to fields of the structured array. 

1447 

1448 Parameters 

1449 ---------- 

1450 file : str or file 

1451 Filename or file object to read. 

1452 regexp : str or regexp 

1453 Regular expression used to parse the file. 

1454 Groups in the regular expression correspond to fields in the dtype. 

1455 dtype : dtype or list of dtypes 

1456 Dtype for the structured array. 

1457 encoding : str, optional 

1458 Encoding used to decode the inputfile. Does not apply to input streams. 

1459 

1460 .. versionadded:: 1.14.0 

1461 

1462 Returns 

1463 ------- 

1464 output : ndarray 

1465 The output array, containing the part of the content of `file` that 

1466 was matched by `regexp`. `output` is always a structured array. 

1467 

1468 Raises 

1469 ------ 

1470 TypeError 

1471 When `dtype` is not a valid dtype for a structured array. 

1472 

1473 See Also 

1474 -------- 

1475 fromstring, loadtxt 

1476 

1477 Notes 

1478 ----- 

1479 Dtypes for structured arrays can be specified in several forms, but all 

1480 forms specify at least the data type and field name. For details see 

1481 `doc.structured_arrays`. 

1482 

1483 Examples 

1484 -------- 

1485 >>> f = open('test.dat', 'w') 

1486 >>> _ = f.write("1312 foo\\n1534 bar\\n444 qux") 

1487 >>> f.close() 

1488 

1489 >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything] 

1490 >>> output = np.fromregex('test.dat', regexp, 

1491 ... [('num', np.int64), ('key', 'S3')]) 

1492 >>> output 

1493 array([(1312, b'foo'), (1534, b'bar'), ( 444, b'qux')], 

1494 dtype=[('num', '<i8'), ('key', 'S3')]) 

1495 >>> output['num'] 

1496 array([1312, 1534, 444]) 

1497 

1498 """ 

1499 own_fh = False 

1500 if not hasattr(file, "read"): 

1501 file = np.lib._datasource.open(file, 'rt', encoding=encoding) 

1502 own_fh = True 

1503 

1504 try: 

1505 if not isinstance(dtype, np.dtype): 

1506 dtype = np.dtype(dtype) 

1507 

1508 content = file.read() 

1509 if isinstance(content, bytes) and isinstance(regexp, np.compat.unicode): 

1510 regexp = asbytes(regexp) 

1511 elif isinstance(content, np.compat.unicode) and isinstance(regexp, bytes): 

1512 regexp = asstr(regexp) 

1513 

1514 if not hasattr(regexp, 'match'): 

1515 regexp = re.compile(regexp) 

1516 seq = regexp.findall(content) 

1517 if seq and not isinstance(seq[0], tuple): 

1518 # Only one group is in the regexp. 

1519 # Create the new array as a single data-type and then 

1520 # re-interpret as a single-field structured array. 

1521 newdtype = np.dtype(dtype[dtype.names[0]]) 

1522 output = np.array(seq, dtype=newdtype) 

1523 output.dtype = dtype 

1524 else: 

1525 output = np.array(seq, dtype=dtype) 

1526 

1527 return output 

1528 finally: 

1529 if own_fh: 

1530 file.close() 

1531 

1532 

1533#####-------------------------------------------------------------------------- 

1534#---- --- ASCII functions --- 

1535#####-------------------------------------------------------------------------- 

1536 

1537 

1538@set_module('numpy') 

1539def genfromtxt(fname, dtype=float, comments='#', delimiter=None, 

1540 skip_header=0, skip_footer=0, converters=None, 

1541 missing_values=None, filling_values=None, usecols=None, 

1542 names=None, excludelist=None, 

1543 deletechars=''.join(sorted(NameValidator.defaultdeletechars)), 

1544 replace_space='_', autostrip=False, case_sensitive=True, 

1545 defaultfmt="f%i", unpack=None, usemask=False, loose=True, 

1546 invalid_raise=True, max_rows=None, encoding='bytes'): 

1547 """ 

1548 Load data from a text file, with missing values handled as specified. 

1549 

1550 Each line past the first `skip_header` lines is split at the `delimiter` 

1551 character, and characters following the `comments` character are discarded. 

1552 

1553 Parameters 

1554 ---------- 

1555 fname : file, str, pathlib.Path, list of str, generator 

1556 File, filename, list, or generator to read. If the filename 

1557 extension is `.gz` or `.bz2`, the file is first decompressed. Note 

1558 that generators must return byte strings. The strings 

1559 in a list or produced by a generator are treated as lines. 

1560 dtype : dtype, optional 

1561 Data type of the resulting array. 

1562 If None, the dtypes will be determined by the contents of each 

1563 column, individually. 

1564 comments : str, optional 

1565 The character used to indicate the start of a comment. 

1566 All the characters occurring on a line after a comment are discarded 

1567 delimiter : str, int, or sequence, optional 

1568 The string used to separate values. By default, any consecutive 

1569 whitespaces act as delimiter. An integer or sequence of integers 

1570 can also be provided as width(s) of each field. 

1571 skiprows : int, optional 

1572 `skiprows` was removed in numpy 1.10. Please use `skip_header` instead. 

1573 skip_header : int, optional 

1574 The number of lines to skip at the beginning of the file. 

1575 skip_footer : int, optional 

1576 The number of lines to skip at the end of the file. 

1577 converters : variable, optional 

1578 The set of functions that convert the data of a column to a value. 

1579 The converters can also be used to provide a default value 

1580 for missing data: ``converters = {3: lambda s: float(s or 0)}``. 

1581 missing : variable, optional 

1582 `missing` was removed in numpy 1.10. Please use `missing_values` 

1583 instead. 

1584 missing_values : variable, optional 

1585 The set of strings corresponding to missing data. 

1586 filling_values : variable, optional 

1587 The set of values to be used as default when the data are missing. 

1588 usecols : sequence, optional 

1589 Which columns to read, with 0 being the first. For example, 

1590 ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns. 

1591 names : {None, True, str, sequence}, optional 

1592 If `names` is True, the field names are read from the first line after 

1593 the first `skip_header` lines. This line can optionally be proceeded 

1594 by a comment delimiter. If `names` is a sequence or a single-string of 

1595 comma-separated names, the names will be used to define the field names 

1596 in a structured dtype. If `names` is None, the names of the dtype 

1597 fields will be used, if any. 

1598 excludelist : sequence, optional 

1599 A list of names to exclude. This list is appended to the default list 

1600 ['return','file','print']. Excluded names are appended an underscore: 

1601 for example, `file` would become `file_`. 

1602 deletechars : str, optional 

1603 A string combining invalid characters that must be deleted from the 

1604 names. 

1605 defaultfmt : str, optional 

1606 A format used to define default field names, such as "f%i" or "f_%02i". 

1607 autostrip : bool, optional 

1608 Whether to automatically strip white spaces from the variables. 

1609 replace_space : char, optional 

1610 Character(s) used in replacement of white spaces in the variables 

1611 names. By default, use a '_'. 

1612 case_sensitive : {True, False, 'upper', 'lower'}, optional 

1613 If True, field names are case sensitive. 

1614 If False or 'upper', field names are converted to upper case. 

1615 If 'lower', field names are converted to lower case. 

1616 unpack : bool, optional 

1617 If True, the returned array is transposed, so that arguments may be 

1618 unpacked using ``x, y, z = loadtxt(...)`` 

1619 usemask : bool, optional 

1620 If True, return a masked array. 

1621 If False, return a regular array. 

1622 loose : bool, optional 

1623 If True, do not raise errors for invalid values. 

1624 invalid_raise : bool, optional 

1625 If True, an exception is raised if an inconsistency is detected in the 

1626 number of columns. 

1627 If False, a warning is emitted and the offending lines are skipped. 

1628 max_rows : int, optional 

1629 The maximum number of rows to read. Must not be used with skip_footer 

1630 at the same time. If given, the value must be at least 1. Default is 

1631 to read the entire file. 

1632 

1633 .. versionadded:: 1.10.0 

1634 encoding : str, optional 

1635 Encoding used to decode the inputfile. Does not apply when `fname` is 

1636 a file object. The special value 'bytes' enables backward compatibility 

1637 workarounds that ensure that you receive byte arrays when possible 

1638 and passes latin1 encoded strings to converters. Override this value to 

1639 receive unicode arrays and pass strings as input to converters. If set 

1640 to None the system default is used. The default value is 'bytes'. 

1641 

1642 .. versionadded:: 1.14.0 

1643 

1644 Returns 

1645 ------- 

1646 out : ndarray 

1647 Data read from the text file. If `usemask` is True, this is a 

1648 masked array. 

1649 

1650 See Also 

1651 -------- 

1652 numpy.loadtxt : equivalent function when no data is missing. 

1653 

1654 Notes 

1655 ----- 

1656 * When spaces are used as delimiters, or when no delimiter has been given 

1657 as input, there should not be any missing data between two fields. 

1658 * When the variables are named (either by a flexible dtype or with `names`), 

1659 there must not be any header in the file (else a ValueError 

1660 exception is raised). 

1661 * Individual values are not stripped of spaces by default. 

1662 When using a custom converter, make sure the function does remove spaces. 

1663 

1664 References 

1665 ---------- 

1666 .. [1] NumPy User Guide, section `I/O with NumPy 

1667 <https://docs.scipy.org/doc/numpy/user/basics.io.genfromtxt.html>`_. 

1668 

1669 Examples 

1670 --------- 

1671 >>> from io import StringIO 

1672 >>> import numpy as np 

1673 

1674 Comma delimited file with mixed dtype 

1675 

1676 >>> s = StringIO(u"1,1.3,abcde") 

1677 >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'), 

1678 ... ('mystring','S5')], delimiter=",") 

1679 >>> data 

1680 array((1, 1.3, b'abcde'), 

1681 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 

1682 

1683 Using dtype = None 

1684 

1685 >>> _ = s.seek(0) # needed for StringIO example only 

1686 >>> data = np.genfromtxt(s, dtype=None, 

1687 ... names = ['myint','myfloat','mystring'], delimiter=",") 

1688 >>> data 

1689 array((1, 1.3, b'abcde'), 

1690 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 

1691 

1692 Specifying dtype and names 

1693 

1694 >>> _ = s.seek(0) 

1695 >>> data = np.genfromtxt(s, dtype="i8,f8,S5", 

1696 ... names=['myint','myfloat','mystring'], delimiter=",") 

1697 >>> data 

1698 array((1, 1.3, b'abcde'), 

1699 dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', 'S5')]) 

1700 

1701 An example with fixed-width columns 

1702 

1703 >>> s = StringIO(u"11.3abcde") 

1704 >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'], 

1705 ... delimiter=[1,3,5]) 

1706 >>> data 

1707 array((1, 1.3, b'abcde'), 

1708 dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', 'S5')]) 

1709 

1710 An example to show comments 

1711 

1712 >>> f = StringIO(''' 

1713 ... text,# of chars 

1714 ... hello world,11 

1715 ... numpy,5''') 

1716 >>> np.genfromtxt(f, dtype='S12,S12', delimiter=',') 

1717 array([(b'text', b''), (b'hello world', b'11'), (b'numpy', b'5')], 

1718 dtype=[('f0', 'S12'), ('f1', 'S12')]) 

1719 

1720 """ 

1721 if max_rows is not None: 

1722 if skip_footer: 

1723 raise ValueError( 

1724 "The keywords 'skip_footer' and 'max_rows' can not be " 

1725 "specified at the same time.") 

1726 if max_rows < 1: 

1727 raise ValueError("'max_rows' must be at least 1.") 

1728 

1729 if usemask: 

1730 from numpy.ma import MaskedArray, make_mask_descr 

1731 # Check the input dictionary of converters 

1732 user_converters = converters or {} 

1733 if not isinstance(user_converters, dict): 

1734 raise TypeError( 

1735 "The input argument 'converter' should be a valid dictionary " 

1736 "(got '%s' instead)" % type(user_converters)) 

1737 

1738 if encoding == 'bytes': 

1739 encoding = None 

1740 byte_converters = True 

1741 else: 

1742 byte_converters = False 

1743 

1744 # Initialize the filehandle, the LineSplitter and the NameValidator 

1745 try: 

1746 if isinstance(fname, os_PathLike): 

1747 fname = os_fspath(fname) 

1748 if isinstance(fname, str): 

1749 fid = np.lib._datasource.open(fname, 'rt', encoding=encoding) 

1750 fid_ctx = contextlib.closing(fid) 

1751 else: 

1752 fid = fname 

1753 fid_ctx = contextlib_nullcontext(fid) 

1754 fhd = iter(fid) 

1755 except TypeError: 

1756 raise TypeError( 

1757 "fname must be a string, filehandle, list of strings, " 

1758 "or generator. Got %s instead." % type(fname)) 

1759 

1760 with fid_ctx: 

1761 split_line = LineSplitter(delimiter=delimiter, comments=comments, 

1762 autostrip=autostrip, encoding=encoding) 

1763 validate_names = NameValidator(excludelist=excludelist, 

1764 deletechars=deletechars, 

1765 case_sensitive=case_sensitive, 

1766 replace_space=replace_space) 

1767 

1768 # Skip the first `skip_header` rows 

1769 try: 

1770 for i in range(skip_header): 

1771 next(fhd) 

1772 

1773 # Keep on until we find the first valid values 

1774 first_values = None 

1775 

1776 while not first_values: 

1777 first_line = _decode_line(next(fhd), encoding) 

1778 if (names is True) and (comments is not None): 

1779 if comments in first_line: 

1780 first_line = ( 

1781 ''.join(first_line.split(comments)[1:])) 

1782 first_values = split_line(first_line) 

1783 except StopIteration: 

1784 # return an empty array if the datafile is empty 

1785 first_line = '' 

1786 first_values = [] 

1787 warnings.warn('genfromtxt: Empty input file: "%s"' % fname, stacklevel=2) 

1788 

1789 # Should we take the first values as names ? 

1790 if names is True: 

1791 fval = first_values[0].strip() 

1792 if comments is not None: 

1793 if fval in comments: 

1794 del first_values[0] 

1795 

1796 # Check the columns to use: make sure `usecols` is a list 

1797 if usecols is not None: 

1798 try: 

1799 usecols = [_.strip() for _ in usecols.split(",")] 

1800 except AttributeError: 

1801 try: 

1802 usecols = list(usecols) 

1803 except TypeError: 

1804 usecols = [usecols, ] 

1805 nbcols = len(usecols or first_values) 

1806 

1807 # Check the names and overwrite the dtype.names if needed 

1808 if names is True: 

1809 names = validate_names([str(_.strip()) for _ in first_values]) 

1810 first_line = '' 

1811 elif _is_string_like(names): 

1812 names = validate_names([_.strip() for _ in names.split(',')]) 

1813 elif names: 

1814 names = validate_names(names) 

1815 # Get the dtype 

1816 if dtype is not None: 

1817 dtype = easy_dtype(dtype, defaultfmt=defaultfmt, names=names, 

1818 excludelist=excludelist, 

1819 deletechars=deletechars, 

1820 case_sensitive=case_sensitive, 

1821 replace_space=replace_space) 

1822 # Make sure the names is a list (for 2.5) 

1823 if names is not None: 

1824 names = list(names) 

1825 

1826 if usecols: 

1827 for (i, current) in enumerate(usecols): 

1828 # if usecols is a list of names, convert to a list of indices 

1829 if _is_string_like(current): 

1830 usecols[i] = names.index(current) 

1831 elif current < 0: 

1832 usecols[i] = current + len(first_values) 

1833 # If the dtype is not None, make sure we update it 

1834 if (dtype is not None) and (len(dtype) > nbcols): 

1835 descr = dtype.descr 

1836 dtype = np.dtype([descr[_] for _ in usecols]) 

1837 names = list(dtype.names) 

1838 # If `names` is not None, update the names 

1839 elif (names is not None) and (len(names) > nbcols): 

1840 names = [names[_] for _ in usecols] 

1841 elif (names is not None) and (dtype is not None): 

1842 names = list(dtype.names) 

1843 

1844 # Process the missing values ............................... 

1845 # Rename missing_values for convenience 

1846 user_missing_values = missing_values or () 

1847 if isinstance(user_missing_values, bytes): 

1848 user_missing_values = user_missing_values.decode('latin1') 

1849 

1850 # Define the list of missing_values (one column: one list) 

1851 missing_values = [list(['']) for _ in range(nbcols)] 

1852 

1853 # We have a dictionary: process it field by field 

1854 if isinstance(user_missing_values, dict): 

1855 # Loop on the items 

1856 for (key, val) in user_missing_values.items(): 

1857 # Is the key a string ? 

1858 if _is_string_like(key): 

1859 try: 

1860 # Transform it into an integer 

1861 key = names.index(key) 

1862 except ValueError: 

1863 # We couldn't find it: the name must have been dropped 

1864 continue 

1865 # Redefine the key as needed if it's a column number 

1866 if usecols: 

1867 try: 

1868 key = usecols.index(key) 

1869 except ValueError: 

1870 pass 

1871 # Transform the value as a list of string 

1872 if isinstance(val, (list, tuple)): 

1873 val = [str(_) for _ in val] 

1874 else: 

1875 val = [str(val), ] 

1876 # Add the value(s) to the current list of missing 

1877 if key is None: 

1878 # None acts as default 

1879 for miss in missing_values: 

1880 miss.extend(val) 

1881 else: 

1882 missing_values[key].extend(val) 

1883 # We have a sequence : each item matches a column 

1884 elif isinstance(user_missing_values, (list, tuple)): 

1885 for (value, entry) in zip(user_missing_values, missing_values): 

1886 value = str(value) 

1887 if value not in entry: 

1888 entry.append(value) 

1889 # We have a string : apply it to all entries 

1890 elif isinstance(user_missing_values, str): 

1891 user_value = user_missing_values.split(",") 

1892 for entry in missing_values: 

1893 entry.extend(user_value) 

1894 # We have something else: apply it to all entries 

1895 else: 

1896 for entry in missing_values: 

1897 entry.extend([str(user_missing_values)]) 

1898 

1899 # Process the filling_values ............................... 

1900 # Rename the input for convenience 

1901 user_filling_values = filling_values 

1902 if user_filling_values is None: 

1903 user_filling_values = [] 

1904 # Define the default 

1905 filling_values = [None] * nbcols 

1906 # We have a dictionary : update each entry individually 

1907 if isinstance(user_filling_values, dict): 

1908 for (key, val) in user_filling_values.items(): 

1909 if _is_string_like(key): 

1910 try: 

1911 # Transform it into an integer 

1912 key = names.index(key) 

1913 except ValueError: 

1914 # We couldn't find it: the name must have been dropped, 

1915 continue 

1916 # Redefine the key if it's a column number and usecols is defined 

1917 if usecols: 

1918 try: 

1919 key = usecols.index(key) 

1920 except ValueError: 

1921 pass 

1922 # Add the value to the list 

1923 filling_values[key] = val 

1924 # We have a sequence : update on a one-to-one basis 

1925 elif isinstance(user_filling_values, (list, tuple)): 

1926 n = len(user_filling_values) 

1927 if (n <= nbcols): 

1928 filling_values[:n] = user_filling_values 

1929 else: 

1930 filling_values = user_filling_values[:nbcols] 

1931 # We have something else : use it for all entries 

1932 else: 

1933 filling_values = [user_filling_values] * nbcols 

1934 

1935 # Initialize the converters ................................ 

1936 if dtype is None: 

1937 # Note: we can't use a [...]*nbcols, as we would have 3 times the same 

1938 # ... converter, instead of 3 different converters. 

1939 converters = [StringConverter(None, missing_values=miss, default=fill) 

1940 for (miss, fill) in zip(missing_values, filling_values)] 

1941 else: 

1942 dtype_flat = flatten_dtype(dtype, flatten_base=True) 

1943 # Initialize the converters 

1944 if len(dtype_flat) > 1: 

1945 # Flexible type : get a converter from each dtype 

1946 zipit = zip(dtype_flat, missing_values, filling_values) 

1947 converters = [StringConverter(dt, locked=True, 

1948 missing_values=miss, default=fill) 

1949 for (dt, miss, fill) in zipit] 

1950 else: 

1951 # Set to a default converter (but w/ different missing values) 

1952 zipit = zip(missing_values, filling_values) 

1953 converters = [StringConverter(dtype, locked=True, 

1954 missing_values=miss, default=fill) 

1955 for (miss, fill) in zipit] 

1956 # Update the converters to use the user-defined ones 

1957 uc_update = [] 

1958 for (j, conv) in user_converters.items(): 

1959 # If the converter is specified by column names, use the index instead 

1960 if _is_string_like(j): 

1961 try: 

1962 j = names.index(j) 

1963 i = j 

1964 except ValueError: 

1965 continue 

1966 elif usecols: 

1967 try: 

1968 i = usecols.index(j) 

1969 except ValueError: 

1970 # Unused converter specified 

1971 continue 

1972 else: 

1973 i = j 

1974 # Find the value to test - first_line is not filtered by usecols: 

1975 if len(first_line): 

1976 testing_value = first_values[j] 

1977 else: 

1978 testing_value = None 

1979 if conv is bytes: 

1980 user_conv = asbytes 

1981 elif byte_converters: 

1982 # converters may use decode to workaround numpy's old behaviour, 

1983 # so encode the string again before passing to the user converter 

1984 def tobytes_first(x, conv): 

1985 if type(x) is bytes: 

1986 return conv(x) 

1987 return conv(x.encode("latin1")) 

1988 user_conv = functools.partial(tobytes_first, conv=conv) 

1989 else: 

1990 user_conv = conv 

1991 converters[i].update(user_conv, locked=True, 

1992 testing_value=testing_value, 

1993 default=filling_values[i], 

1994 missing_values=missing_values[i],) 

1995 uc_update.append((i, user_conv)) 

1996 # Make sure we have the corrected keys in user_converters... 

1997 user_converters.update(uc_update) 

1998 

1999 # Fixme: possible error as following variable never used. 

2000 # miss_chars = [_.missing_values for _ in converters] 

2001 

2002 # Initialize the output lists ... 

2003 # ... rows 

2004 rows = [] 

2005 append_to_rows = rows.append 

2006 # ... masks 

2007 if usemask: 

2008 masks = [] 

2009 append_to_masks = masks.append 

2010 # ... invalid 

2011 invalid = [] 

2012 append_to_invalid = invalid.append 

2013 

2014 # Parse each line 

2015 for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): 

2016 values = split_line(line) 

2017 nbvalues = len(values) 

2018 # Skip an empty line 

2019 if nbvalues == 0: 

2020 continue 

2021 if usecols: 

2022 # Select only the columns we need 

2023 try: 

2024 values = [values[_] for _ in usecols] 

2025 except IndexError: 

2026 append_to_invalid((i + skip_header + 1, nbvalues)) 

2027 continue 

2028 elif nbvalues != nbcols: 

2029 append_to_invalid((i + skip_header + 1, nbvalues)) 

2030 continue 

2031 # Store the values 

2032 append_to_rows(tuple(values)) 

2033 if usemask: 

2034 append_to_masks(tuple([v.strip() in m 

2035 for (v, m) in zip(values, 

2036 missing_values)])) 

2037 if len(rows) == max_rows: 

2038 break 

2039 

2040 # Upgrade the converters (if needed) 

2041 if dtype is None: 

2042 for (i, converter) in enumerate(converters): 

2043 current_column = [itemgetter(i)(_m) for _m in rows] 

2044 try: 

2045 converter.iterupgrade(current_column) 

2046 except ConverterLockError: 

2047 errmsg = "Converter #%i is locked and cannot be upgraded: " % i 

2048 current_column = map(itemgetter(i), rows) 

2049 for (j, value) in enumerate(current_column): 

2050 try: 

2051 converter.upgrade(value) 

2052 except (ConverterError, ValueError): 

2053 errmsg += "(occurred line #%i for value '%s')" 

2054 errmsg %= (j + 1 + skip_header, value) 

2055 raise ConverterError(errmsg) 

2056 

2057 # Check that we don't have invalid values 

2058 nbinvalid = len(invalid) 

2059 if nbinvalid > 0: 

2060 nbrows = len(rows) + nbinvalid - skip_footer 

2061 # Construct the error message 

2062 template = " Line #%%i (got %%i columns instead of %i)" % nbcols 

2063 if skip_footer > 0: 

2064 nbinvalid_skipped = len([_ for _ in invalid 

2065 if _[0] > nbrows + skip_header]) 

2066 invalid = invalid[:nbinvalid - nbinvalid_skipped] 

2067 skip_footer -= nbinvalid_skipped 

2068# 

2069# nbrows -= skip_footer 

2070# errmsg = [template % (i, nb) 

2071# for (i, nb) in invalid if i < nbrows] 

2072# else: 

2073 errmsg = [template % (i, nb) 

2074 for (i, nb) in invalid] 

2075 if len(errmsg): 

2076 errmsg.insert(0, "Some errors were detected !") 

2077 errmsg = "\n".join(errmsg) 

2078 # Raise an exception ? 

2079 if invalid_raise: 

2080 raise ValueError(errmsg) 

2081 # Issue a warning ? 

2082 else: 

2083 warnings.warn(errmsg, ConversionWarning, stacklevel=2) 

2084 

2085 # Strip the last skip_footer data 

2086 if skip_footer > 0: 

2087 rows = rows[:-skip_footer] 

2088 if usemask: 

2089 masks = masks[:-skip_footer] 

2090 

2091 # Convert each value according to the converter: 

2092 # We want to modify the list in place to avoid creating a new one... 

2093 if loose: 

2094 rows = list( 

2095 zip(*[[conv._loose_call(_r) for _r in map(itemgetter(i), rows)] 

2096 for (i, conv) in enumerate(converters)])) 

2097 else: 

2098 rows = list( 

2099 zip(*[[conv._strict_call(_r) for _r in map(itemgetter(i), rows)] 

2100 for (i, conv) in enumerate(converters)])) 

2101 

2102 # Reset the dtype 

2103 data = rows 

2104 if dtype is None: 

2105 # Get the dtypes from the types of the converters 

2106 column_types = [conv.type for conv in converters] 

2107 # Find the columns with strings... 

2108 strcolidx = [i for (i, v) in enumerate(column_types) 

2109 if v == np.unicode_] 

2110 

2111 if byte_converters and strcolidx: 

2112 # convert strings back to bytes for backward compatibility 

2113 warnings.warn( 

2114 "Reading unicode strings without specifying the encoding " 

2115 "argument is deprecated. Set the encoding, use None for the " 

2116 "system default.", 

2117 np.VisibleDeprecationWarning, stacklevel=2) 

2118 def encode_unicode_cols(row_tup): 

2119 row = list(row_tup) 

2120 for i in strcolidx: 

2121 row[i] = row[i].encode('latin1') 

2122 return tuple(row) 

2123 

2124 try: 

2125 data = [encode_unicode_cols(r) for r in data] 

2126 except UnicodeEncodeError: 

2127 pass 

2128 else: 

2129 for i in strcolidx: 

2130 column_types[i] = np.bytes_ 

2131 

2132 # Update string types to be the right length 

2133 sized_column_types = column_types[:] 

2134 for i, col_type in enumerate(column_types): 

2135 if np.issubdtype(col_type, np.character): 

2136 n_chars = max(len(row[i]) for row in data) 

2137 sized_column_types[i] = (col_type, n_chars) 

2138 

2139 if names is None: 

2140 # If the dtype is uniform (before sizing strings) 

2141 base = { 

2142 c_type 

2143 for c, c_type in zip(converters, column_types) 

2144 if c._checked} 

2145 if len(base) == 1: 

2146 uniform_type, = base 

2147 (ddtype, mdtype) = (uniform_type, bool) 

2148 else: 

2149 ddtype = [(defaultfmt % i, dt) 

2150 for (i, dt) in enumerate(sized_column_types)] 

2151 if usemask: 

2152 mdtype = [(defaultfmt % i, bool) 

2153 for (i, dt) in enumerate(sized_column_types)] 

2154 else: 

2155 ddtype = list(zip(names, sized_column_types)) 

2156 mdtype = list(zip(names, [bool] * len(sized_column_types))) 

2157 output = np.array(data, dtype=ddtype) 

2158 if usemask: 

2159 outputmask = np.array(masks, dtype=mdtype) 

2160 else: 

2161 # Overwrite the initial dtype names if needed 

2162 if names and dtype.names is not None: 

2163 dtype.names = names 

2164 # Case 1. We have a structured type 

2165 if len(dtype_flat) > 1: 

2166 # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] 

2167 # First, create the array using a flattened dtype: 

2168 # [('a', int), ('b1', int), ('b2', float)] 

2169 # Then, view the array using the specified dtype. 

2170 if 'O' in (_.char for _ in dtype_flat): 

2171 if has_nested_fields(dtype): 

2172 raise NotImplementedError( 

2173 "Nested fields involving objects are not supported...") 

2174 else: 

2175 output = np.array(data, dtype=dtype) 

2176 else: 

2177 rows = np.array(data, dtype=[('', _) for _ in dtype_flat]) 

2178 output = rows.view(dtype) 

2179 # Now, process the rowmasks the same way 

2180 if usemask: 

2181 rowmasks = np.array( 

2182 masks, dtype=np.dtype([('', bool) for t in dtype_flat])) 

2183 # Construct the new dtype 

2184 mdtype = make_mask_descr(dtype) 

2185 outputmask = rowmasks.view(mdtype) 

2186 # Case #2. We have a basic dtype 

2187 else: 

2188 # We used some user-defined converters 

2189 if user_converters: 

2190 ishomogeneous = True 

2191 descr = [] 

2192 for i, ttype in enumerate([conv.type for conv in converters]): 

2193 # Keep the dtype of the current converter 

2194 if i in user_converters: 

2195 ishomogeneous &= (ttype == dtype.type) 

2196 if np.issubdtype(ttype, np.character): 

2197 ttype = (ttype, max(len(row[i]) for row in data)) 

2198 descr.append(('', ttype)) 

2199 else: 

2200 descr.append(('', dtype)) 

2201 # So we changed the dtype ? 

2202 if not ishomogeneous: 

2203 # We have more than one field 

2204 if len(descr) > 1: 

2205 dtype = np.dtype(descr) 

2206 # We have only one field: drop the name if not needed. 

2207 else: 

2208 dtype = np.dtype(ttype) 

2209 # 

2210 output = np.array(data, dtype) 

2211 if usemask: 

2212 if dtype.names is not None: 

2213 mdtype = [(_, bool) for _ in dtype.names] 

2214 else: 

2215 mdtype = bool 

2216 outputmask = np.array(masks, dtype=mdtype) 

2217 # Try to take care of the missing data we missed 

2218 names = output.dtype.names 

2219 if usemask and names: 

2220 for (name, conv) in zip(names, converters): 

2221 missing_values = [conv(_) for _ in conv.missing_values 

2222 if _ != ''] 

2223 for mval in missing_values: 

2224 outputmask[name] |= (output[name] == mval) 

2225 # Construct the final array 

2226 if usemask: 

2227 output = output.view(MaskedArray) 

2228 output._mask = outputmask 

2229 if unpack: 

2230 return output.squeeze().T 

2231 return output.squeeze() 

2232 

2233 

2234def ndfromtxt(fname, **kwargs): 

2235 """ 

2236 Load ASCII data stored in a file and return it as a single array. 

2237 

2238 .. deprecated:: 1.17 

2239 ndfromtxt` is a deprecated alias of `genfromtxt` which 

2240 overwrites the ``usemask`` argument with `False` even when 

2241 explicitly called as ``ndfromtxt(..., usemask=True)``. 

2242 Use `genfromtxt` instead. 

2243 

2244 Parameters 

2245 ---------- 

2246 fname, kwargs : For a description of input parameters, see `genfromtxt`. 

2247 

2248 See Also 

2249 -------- 

2250 numpy.genfromtxt : generic function. 

2251 

2252 """ 

2253 kwargs['usemask'] = False 

2254 # Numpy 1.17 

2255 warnings.warn( 

2256 "np.ndfromtxt is a deprecated alias of np.genfromtxt, " 

2257 "prefer the latter.", 

2258 DeprecationWarning, stacklevel=2) 

2259 return genfromtxt(fname, **kwargs) 

2260 

2261 

2262def mafromtxt(fname, **kwargs): 

2263 """ 

2264 Load ASCII data stored in a text file and return a masked array. 

2265 

2266 .. deprecated:: 1.17 

2267 np.mafromtxt is a deprecated alias of `genfromtxt` which 

2268 overwrites the ``usemask`` argument with `True` even when 

2269 explicitly called as ``mafromtxt(..., usemask=False)``. 

2270 Use `genfromtxt` instead. 

2271 

2272 Parameters 

2273 ---------- 

2274 fname, kwargs : For a description of input parameters, see `genfromtxt`. 

2275 

2276 See Also 

2277 -------- 

2278 numpy.genfromtxt : generic function to load ASCII data. 

2279 

2280 """ 

2281 kwargs['usemask'] = True 

2282 # Numpy 1.17 

2283 warnings.warn( 

2284 "np.mafromtxt is a deprecated alias of np.genfromtxt, " 

2285 "prefer the latter.", 

2286 DeprecationWarning, stacklevel=2) 

2287 return genfromtxt(fname, **kwargs) 

2288 

2289 

2290def recfromtxt(fname, **kwargs): 

2291 """ 

2292 Load ASCII data from a file and return it in a record array. 

2293 

2294 If ``usemask=False`` a standard `recarray` is returned, 

2295 if ``usemask=True`` a MaskedRecords array is returned. 

2296 

2297 Parameters 

2298 ---------- 

2299 fname, kwargs : For a description of input parameters, see `genfromtxt`. 

2300 

2301 See Also 

2302 -------- 

2303 numpy.genfromtxt : generic function 

2304 

2305 Notes 

2306 ----- 

2307 By default, `dtype` is None, which means that the data-type of the output 

2308 array will be determined from the data. 

2309 

2310 """ 

2311 kwargs.setdefault("dtype", None) 

2312 usemask = kwargs.get('usemask', False) 

2313 output = genfromtxt(fname, **kwargs) 

2314 if usemask: 

2315 from numpy.ma.mrecords import MaskedRecords 

2316 output = output.view(MaskedRecords) 

2317 else: 

2318 output = output.view(np.recarray) 

2319 return output 

2320 

2321 

2322def recfromcsv(fname, **kwargs): 

2323 """ 

2324 Load ASCII data stored in a comma-separated file. 

2325 

2326 The returned array is a record array (if ``usemask=False``, see 

2327 `recarray`) or a masked record array (if ``usemask=True``, 

2328 see `ma.mrecords.MaskedRecords`). 

2329 

2330 Parameters 

2331 ---------- 

2332 fname, kwargs : For a description of input parameters, see `genfromtxt`. 

2333 

2334 See Also 

2335 -------- 

2336 numpy.genfromtxt : generic function to load ASCII data. 

2337 

2338 Notes 

2339 ----- 

2340 By default, `dtype` is None, which means that the data-type of the output 

2341 array will be determined from the data. 

2342 

2343 """ 

2344 # Set default kwargs for genfromtxt as relevant to csv import. 

2345 kwargs.setdefault("case_sensitive", "lower") 

2346 kwargs.setdefault("names", True) 

2347 kwargs.setdefault("delimiter", ",") 

2348 kwargs.setdefault("dtype", None) 

2349 output = genfromtxt(fname, **kwargs) 

2350 

2351 usemask = kwargs.get("usemask", False) 

2352 if usemask: 

2353 from numpy.ma.mrecords import MaskedRecords 

2354 output = output.view(MaskedRecords) 

2355 else: 

2356 output = output.view(np.recarray) 

2357 return output