Coverage for src/hdmf/utils.py: 97%

586 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-07-10 23:48 +0000

1import collections 

2import copy as _copy 

3import types 

4import warnings 

5from abc import ABCMeta 

6from enum import Enum 

7 

8import h5py 

9import numpy as np 

10 

11 

12__macros = { 

13 'array_data': [np.ndarray, list, tuple, h5py.Dataset], 

14 'scalar_data': [str, int, float, bytes, bool], 

15 'data': [] 

16} 

17 

18try: 

19 # optionally accept zarr.Array as array data to support conversion of data from Zarr to HDMF 

20 import zarr 

21 __macros['array_data'].append(zarr.Array) 

22except ImportError: 

23 pass 

24 

25 

26# code to signify how to handle positional arguments in docval 

27AllowPositional = Enum('AllowPositional', 'ALLOWED WARNING ERROR') 

28 

29__supported_bool_types = (bool, np.bool_) 

30__supported_uint_types = (np.uint8, np.uint16, np.uint32, np.uint64) 

31__supported_int_types = (int, np.int8, np.int16, np.int32, np.int64) 

32__supported_float_types = [float, np.float16, np.float32, np.float64] 

33if hasattr(np, "float128"): # pragma: no cover 

34 __supported_float_types.append(np.float128) 

35if hasattr(np, "longdouble"): # pragma: no cover 

36 # on windows python<=3.5, h5py floats resolve float64s as either np.float64 or np.longdouble 

37 # non-deterministically. a future version of h5py will fix this. see #112 

38 __supported_float_types.append(np.longdouble) 

39__supported_float_types = tuple(__supported_float_types) 

40__allowed_enum_types = (__supported_bool_types + __supported_uint_types + __supported_int_types 

41 + __supported_float_types + (str,)) 

42 

43 

44def docval_macro(macro): 

45 """Class decorator to add the class to a list of types associated with the key macro in the __macros dict 

46 """ 

47 

48 def _dec(cls): 

49 if macro not in __macros: 

50 __macros[macro] = list() 

51 __macros[macro].append(cls) 

52 return cls 

53 

54 return _dec 

55 

56 

57def get_docval_macro(key=None): 

58 """ 

59 Return a deepcopy of the docval macros, i.e., strings that represent a customizable list of types for use in docval. 

60 

61 :param key: Name of the macro. If key=None, then a dictionary of all macros is returned. Otherwise, a tuple of 

62 the types associated with the key is returned. 

63 """ 

64 if key is None: 

65 return _copy.deepcopy(__macros) 

66 else: 

67 return tuple(__macros[key]) 

68 

69 

70def __type_okay(value, argtype, allow_none=False): 

71 """Check a value against a type 

72 

73 The difference between this function and :py:func:`isinstance` is that 

74 it allows specifying a type as a string. Furthermore, strings allow for specifying more general 

75 types, such as a simple numeric type (i.e. ``argtype``="num"). 

76 

77 Args: 

78 value (any): the value to check 

79 argtype (type, str): the type to check for 

80 allow_none (bool): whether or not to allow None as a valid value 

81 

82 

83 Returns: 

84 bool: True if value is a valid instance of argtype 

85 """ 

86 if value is None: 

87 return allow_none 

88 if isinstance(argtype, str): 

89 if argtype in __macros: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true

90 return __type_okay(value, __macros[argtype], allow_none=allow_none) 

91 elif argtype == 'uint': 

92 return __is_uint(value) 

93 elif argtype == 'int': 

94 return __is_int(value) 

95 elif argtype == 'float': 

96 return __is_float(value) 

97 elif argtype == 'bool': 

98 return __is_bool(value) 

99 return argtype in [cls.__name__ for cls in value.__class__.__mro__] 

100 elif isinstance(argtype, type): 

101 if argtype is int: 

102 return __is_int(value) 

103 elif argtype is float: 

104 return __is_float(value) 

105 elif argtype is bool: 

106 return __is_bool(value) 

107 return isinstance(value, argtype) 

108 elif isinstance(argtype, tuple) or isinstance(argtype, list): 

109 return any(__type_okay(value, i) for i in argtype) 

110 else: # argtype is None 

111 return True 

112 

113 

114def __shape_okay_multi(value, argshape): 

115 if type(argshape[0]) in (tuple, list): # if multiple shapes are present 

116 return any(__shape_okay(value, a) for a in argshape) 

117 else: 

118 return __shape_okay(value, argshape) 

119 

120 

121def __shape_okay(value, argshape): 

122 valshape = get_data_shape(value) 

123 if not len(valshape) == len(argshape): 

124 return False 

125 for a, b in zip(valshape, argshape): 

126 if b not in (a, None): 

127 return False 

128 return True 

129 

130 

131def __is_uint(value): 

132 return isinstance(value, __supported_uint_types) 

133 

134 

135def __is_int(value): 

136 return isinstance(value, __supported_int_types) 

137 

138 

139def __is_float(value): 

140 return isinstance(value, __supported_float_types) 

141 

142 

143def __is_bool(value): 

144 return isinstance(value, __supported_bool_types) 

145 

146 

147def __format_type(argtype): 

148 if isinstance(argtype, str): 

149 return argtype 

150 elif isinstance(argtype, type): 

151 return argtype.__name__ 

152 elif isinstance(argtype, tuple) or isinstance(argtype, list): 152 ↛ 158line 152 didn't jump to line 158, because the condition on line 152 was never false

153 types = [__format_type(i) for i in argtype] 

154 if len(types) > 1: 154 ↛ 157line 154 didn't jump to line 157, because the condition on line 154 was never false

155 return "%s or %s" % (", ".join(types[:-1]), types[-1]) 

156 else: 

157 return types[0] 

158 elif argtype is None: 

159 return "any type" 

160 else: 

161 raise ValueError("argtype must be a type, str, list, or tuple") 

162 

163 

164def __check_enum(argval, arg): 

165 """ 

166 Helper function to check whether the given argument value validates against the enum specification. 

167 

168 :param argval: argument value passed to the function/method 

169 :param arg: argument validator - the specification dictionary for this argument 

170 

171 :return: None if the value validates successfully, error message if the value does not. 

172 """ 

173 if argval not in arg['enum']: 

174 return "forbidden value for '{}' (got {}, expected {})".format(arg['name'], __fmt_str_quotes(argval), 

175 arg['enum']) 

176 

177 

178def __fmt_str_quotes(x): 

179 """Return a string or list of strings where the input string or list of strings have single quotes around strings""" 

180 if isinstance(x, (list, tuple)): 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true

181 return '{}'.format(x) 

182 if isinstance(x, str): 

183 return "'%s'" % x 

184 return str(x) 

185 

186 

187def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, allow_extra=False, # noqa: C901 

188 allow_positional=AllowPositional.ALLOWED): 

189 """ 

190 Internal helper function used by the docval decorator to parse and validate function arguments 

191 

192 :param validator: List of dicts from docval with the description of the arguments 

193 :param args: List of the values of positional arguments supplied by the caller 

194 :param kwargs: Dict keyword arguments supplied by the caller where keys are the argument name and 

195 values are the argument value. 

196 :param enforce_type: Boolean indicating whether the type of arguments should be enforced 

197 :param enforce_shape: Boolean indicating whether the dimensions of array arguments 

198 should be enforced if possible. 

199 :param allow_extra: Boolean indicating whether extra keyword arguments are allowed (if False and extra keyword 

200 arguments are specified, then an error is raised). 

201 :param allow_positional: integer code indicating whether positional arguments are allowed: 

202 AllowPositional.ALLOWED: positional arguments are allowed 

203 AllowPositional.WARNING: return warning if positional arguments are supplied 

204 AllowPositional.ERROR: return error if positional arguments are supplied 

205 

206 :return: Dict with: 

207 * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. 

208 * 'errors' : List of string with error messages 

209 """ 

210 ret = dict() 

211 syntax_errors = list() 

212 type_errors = list() 

213 value_errors = list() 

214 future_warnings = list() 

215 argsi = 0 

216 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises 

217 

218 try: 

219 # check for duplicates in docval 

220 names = [x['name'] for x in validator] 

221 duplicated = [item for item, count in collections.Counter(names).items() 

222 if count > 1] 

223 if duplicated: 

224 raise ValueError( 

225 'The following names are duplicated: {}'.format(duplicated)) 

226 

227 if allow_extra: # extra keyword arguments are allowed so do not consider them when checking number of args 

228 if len(args) > len(validator): 

229 raise TypeError( 

230 'Expected at most %d arguments %r, got %d positional' % (len(validator), names, len(args)) 

231 ) 

232 else: # allow for keyword args 

233 if len(args) + len(kwargs) > len(validator): 

234 raise TypeError( 

235 'Expected at most %d arguments %r, got %d: %d positional and %d keyword %s' 

236 % (len(validator), names, len(args) + len(kwargs), len(args), len(kwargs), sorted(kwargs)) 

237 ) 

238 

239 if args: 

240 if allow_positional == AllowPositional.WARNING: 

241 msg = ('Using positional arguments for this method is discouraged and will be deprecated ' 

242 'in a future major release. Please use keyword arguments to ensure future compatibility.') 

243 future_warnings.append(msg) 

244 elif allow_positional == AllowPositional.ERROR: 

245 msg = 'Only keyword arguments (e.g., func(argname=value, ...)) are allowed for this method.' 

246 syntax_errors.append(msg) 

247 

248 # iterate through the docval specification and find a matching value in args / kwargs 

249 it = iter(validator) 

250 arg = next(it) 

251 

252 # process positional arguments of the docval specification (no default value) 

253 extras = dict(kwargs) 

254 while True: 

255 if 'default' in arg: 

256 break 

257 argname = arg['name'] 

258 argval_set = False 

259 if argname in kwargs: 

260 # if this positional arg is specified by a keyword arg and there are remaining positional args that 

261 # have not yet been matched, then it is undetermined what those positional args match to. thus, raise 

262 # an error 

263 if argsi < len(args): 

264 type_errors.append("got multiple values for argument '%s'" % argname) 

265 argval = kwargs.get(argname) 

266 extras.pop(argname, None) 

267 argval_set = True 

268 elif argsi < len(args): 

269 argval = args[argsi] 

270 argval_set = True 

271 

272 if not argval_set: 

273 type_errors.append("missing argument '%s'" % argname) 

274 else: 

275 if enforce_type: 275 ↛ 283line 275 didn't jump to line 283, because the condition on line 275 was never false

276 if not __type_okay(argval, arg['type']): 

277 if argval is None: 

278 fmt_val = (argname, __format_type(arg['type'])) 

279 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) 

280 else: 

281 fmt_val = (argname, type(argval).__name__, __format_type(arg['type'])) 

282 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val) 

283 if enforce_shape and 'shape' in arg: 

284 valshape = get_data_shape(argval) 

285 while valshape is None: 

286 if argval is None: 

287 break 

288 if not hasattr(argval, argname): 

289 fmt_val = (argval, argname, arg['shape']) 

290 value_errors.append("cannot check shape of object '%s' for argument '%s' " 

291 "(expected shape '%s')" % fmt_val) 

292 break 

293 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked 

294 argval = getattr(argval, argname) 

295 valshape = get_data_shape(argval) 

296 if valshape is not None and not __shape_okay_multi(argval, arg['shape']): 

297 fmt_val = (argname, valshape, arg['shape']) 

298 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val) 

299 if 'enum' in arg: 

300 err = __check_enum(argval, arg) 

301 if err: 

302 value_errors.append(err) 

303 

304 ret[argname] = argval 

305 argsi += 1 

306 arg = next(it) 

307 

308 # process arguments of the docval specification with a default value 

309 # NOTE: the default value will be deepcopied, so 'default': list() is safe unlike in normal python 

310 while True: 

311 argname = arg['name'] 

312 if argname in kwargs: 

313 ret[argname] = kwargs.get(argname) 

314 extras.pop(argname, None) 

315 elif len(args) > argsi: 

316 ret[argname] = args[argsi] 

317 argsi += 1 

318 else: 

319 ret[argname] = _copy.deepcopy(arg['default']) 

320 argval = ret[argname] 

321 if enforce_type: 321 ↛ 329line 321 didn't jump to line 329, because the condition on line 321 was never false

322 if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): 

323 if argval is None and arg['default'] is None: 323 ↛ 324line 323 didn't jump to line 324, because the condition on line 323 was never true

324 fmt_val = (argname, __format_type(arg['type'])) 

325 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) 

326 else: 

327 fmt_val = (argname, type(argval).__name__, __format_type(arg['type'])) 

328 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val) 

329 if enforce_shape and 'shape' in arg and argval is not None: 

330 valshape = get_data_shape(argval) 

331 while valshape is None: 

332 if argval is None: 

333 break 

334 if not hasattr(argval, argname): 

335 fmt_val = (argval, argname, arg['shape']) 

336 value_errors.append("cannot check shape of object '%s' for argument '%s' (expected shape '%s')" 

337 % fmt_val) 

338 break 

339 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked 

340 argval = getattr(argval, argname) 

341 valshape = get_data_shape(argval) 

342 if valshape is not None and not __shape_okay_multi(argval, arg['shape']): 

343 fmt_val = (argname, valshape, arg['shape']) 

344 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val) 

345 if 'enum' in arg and argval is not None: 

346 err = __check_enum(argval, arg) 

347 if err: 

348 value_errors.append(err) 

349 

350 arg = next(it) 

351 except StopIteration: 

352 pass 

353 except TypeError as e: 

354 type_errors.append(str(e)) 

355 except ValueError as e: 

356 value_errors.append(str(e)) 

357 

358 if not allow_extra: 

359 for key in extras.keys(): 

360 type_errors.append("unrecognized argument: '%s'" % key) 

361 else: 

362 # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args. 

363 # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out 

364 for key in extras.keys(): 

365 ret[key] = extras[key] 

366 return {'args': ret, 'future_warnings': future_warnings, 'type_errors': type_errors, 'value_errors': value_errors, 

367 'syntax_errors': syntax_errors} 

368 

369 

370docval_idx_name = '__dv_idx__' 

371docval_attr_name = '__docval__' 

372__docval_args_loc = 'args' 

373 

374 

375def get_docval(func, *args): 

376 '''Get a copy of docval arguments for a function. 

377 If args are supplied, return only docval arguments with value for 'name' key equal to the args 

378 ''' 

379 func_docval = getattr(func, docval_attr_name, None) 

380 if func_docval: 

381 if args: 

382 docval_idx = getattr(func, docval_idx_name, None) 

383 try: 

384 return tuple(docval_idx[name] for name in args) 

385 except KeyError as ke: 

386 raise ValueError('Function %s does not have docval argument %s' % (func.__name__, str(ke))) 

387 return tuple(func_docval[__docval_args_loc]) 

388 else: 

389 if args: 

390 raise ValueError('Function %s has no docval arguments' % func.__name__) 

391 return tuple() 

392 

393 

394# def docval_wrap(func, is_method=True): 

395# if is_method: 

396# @docval(*get_docval(func)) 

397# def method(self, **kwargs): 

398# 

399# return call_docval_args(func, kwargs) 

400# return method 

401# else: 

402# @docval(*get_docval(func)) 

403# def static_method(**kwargs): 

404# return call_docval_args(func, kwargs) 

405# return method 

406 

407 

408def fmt_docval_args(func, kwargs): 

409 ''' Separate positional and keyword arguments 

410 

411 Useful for methods that wrap other methods 

412 ''' 

413 warnings.warn("fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, " 

414 "call the function directly with the kwargs. Please note that fmt_docval_args " 

415 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " 

416 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " 

417 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", 

418 PendingDeprecationWarning) 

419 func_docval = getattr(func, docval_attr_name, None) 

420 ret_args = list() 

421 ret_kwargs = dict() 

422 kwargs_copy = _copy.copy(kwargs) 

423 if func_docval: 

424 for arg in func_docval[__docval_args_loc]: 

425 val = kwargs_copy.pop(arg['name'], None) 

426 if 'default' in arg: 

427 if val is not None: 427 ↛ 424line 427 didn't jump to line 424, because the condition on line 427 was never false

428 ret_kwargs[arg['name']] = val 

429 else: 

430 ret_args.append(val) 

431 if func_docval['allow_extra']: 

432 ret_kwargs.update(kwargs_copy) 

433 else: 

434 raise ValueError('no docval found on %s' % str(func)) 

435 return ret_args, ret_kwargs 

436 

437 

438# def _remove_extra_args(func, kwargs): 

439# """Return a dict of only the keyword arguments that are accepted by the function's docval. 

440# 

441# If the docval specifies allow_extra=True, then the original kwargs are returned. 

442# """ 

443# # NOTE: this has the same functionality as the to-be-deprecated fmt_docval_args except that 

444# # kwargs are kept as kwargs instead of parsed into args and kwargs 

445# func_docval = getattr(func, docval_attr_name, None) 

446# if func_docval: 

447# if func_docval['allow_extra']: 

448# # if extra args are allowed, return all args 

449# return kwargs 

450# else: 

451# # save only the arguments listed in the function's docval (skip any others present in kwargs) 

452# ret_kwargs = dict() 

453# for arg in func_docval[__docval_args_loc]: 

454# val = kwargs.get(arg['name'], None) 

455# if val is not None: # do not return arguments that are not present or have value None 

456# ret_kwargs[arg['name']] = val 

457# return ret_kwargs 

458# else: 

459# raise ValueError('No docval found on %s' % str(func)) 

460 

461 

462def call_docval_func(func, kwargs): 

463 """Call the function with only the keyword arguments that are accepted by the function's docval. 

464 

465 Extra keyword arguments are not passed to the function unless the function's docval has allow_extra=True. 

466 """ 

467 warnings.warn("call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, " 

468 "call the function directly with the kwargs. Please note that call_docval_func " 

469 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " 

470 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " 

471 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", 

472 PendingDeprecationWarning) 

473 with warnings.catch_warnings(record=True): 

474 # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two 

475 # PendingDeprecationWarnings saying the same thing are not raised 

476 warnings.simplefilter("ignore", UserWarning) 

477 warnings.simplefilter("always", PendingDeprecationWarning) 

478 fargs, fkwargs = fmt_docval_args(func, kwargs) 

479 

480 return func(*fargs, **fkwargs) 

481 

482 

483def __resolve_type(t): 

484 if t is None: 

485 return t 

486 if isinstance(t, str): 

487 if t in __macros: 

488 return tuple(__macros[t]) 

489 else: 

490 return t 

491 elif isinstance(t, type): 

492 return t 

493 elif isinstance(t, (list, tuple)): 

494 ret = list() 

495 for i in t: 

496 resolved = __resolve_type(i) 

497 if isinstance(resolved, tuple): 

498 ret.extend(resolved) 

499 else: 

500 ret.append(resolved) 

501 return tuple(ret) 

502 else: 

503 msg = "argtype must be a type, a str, a list, a tuple, or None - got %s" % type(t) 

504 raise ValueError(msg) 

505 

506 

507def __check_enum_argtype(argtype): 

508 """Return True/False whether the given argtype or list/tuple of argtypes is a supported docval enum type""" 

509 if isinstance(argtype, (list, tuple)): 

510 return all(x in __allowed_enum_types for x in argtype) 

511 return argtype in __allowed_enum_types 

512 

513 

514def docval(*validator, **options): # noqa: C901 

515 '''A decorator for documenting and enforcing type for instance method arguments. 

516 

517 This decorator takes a list of dictionaries that specify the method parameters. These 

518 dictionaries are used for enforcing type and building a Sphinx docstring. 

519 

520 The first arguments are dictionaries that specify the positional 

521 arguments and keyword arguments of the decorated function. These dictionaries 

522 must contain the following keys: ``'name'``, ``'type'``, and ``'doc'``. This will define a 

523 positional argument. To define a keyword argument, specify a default value 

524 using the key ``'default'``. To validate the dimensions of an input array 

525 add the optional ``'shape'`` parameter. To allow a None value for an argument, 

526 either the default value must be None or a different default value must be provided 

527 and ``'allow_none': True`` must be passed. 

528 

529 The decorated method must take ``self`` and ``**kwargs`` as arguments. 

530 

531 When using this decorator, the functions :py:func:`getargs` and 

532 :py:func:`popargs` can be used for easily extracting arguments from 

533 kwargs. 

534 

535 The following code example demonstrates the use of this decorator: 

536 

537 .. code-block:: python 

538 

539 @docval({'name': 'arg1':, 'type': str, 'doc': 'this is the first positional argument'}, 

540 {'name': 'arg2':, 'type': int, 'doc': 'this is the second positional argument'}, 

541 {'name': 'kwarg1':, 'type': (list, tuple), 'doc': 'this is a keyword argument', 'default': list()}, 

542 returns='foo object', rtype='Foo')) 

543 def foo(self, **kwargs): 

544 arg1, arg2, kwarg1 = getargs('arg1', 'arg2', 'kwarg1', **kwargs) 

545 ... 

546 

547 :param enforce_type: Enforce types of input parameters (Default=True) 

548 :param returns: String describing the return values 

549 :param rtype: String describing the data type of the return values 

550 :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True) 

551 :param enforce_shape: Enforce the dimensions of input arrays (Default=True) 

552 :param validator: :py:func:`dict` objects specifying the method parameters 

553 :param allow_extra: Allow extra arguments (Default=False) 

554 :param allow_positional: Allow positional arguments (Default=True) 

555 :param options: additional options for documenting and validating method parameters 

556 ''' 

557 enforce_type = options.pop('enforce_type', True) 

558 enforce_shape = options.pop('enforce_shape', True) 

559 returns = options.pop('returns', None) 

560 rtype = options.pop('rtype', None) 

561 is_method = options.pop('is_method', True) 

562 allow_extra = options.pop('allow_extra', False) 

563 allow_positional = options.pop('allow_positional', True) 

564 

565 def dec(func): 

566 _docval = _copy.copy(options) 

567 _docval['allow_extra'] = allow_extra 

568 _docval['allow_positional'] = allow_positional 

569 func.__name__ = _docval.get('func_name', func.__name__) 

570 func.__doc__ = _docval.get('doc', func.__doc__) 

571 pos = list() 

572 kw = list() 

573 for a in validator: 

574 # catch unsupported keys 

575 allowable_terms = ('name', 'doc', 'type', 'shape', 'enum', 'default', 'allow_none', 'help') 

576 unsupported_terms = set(a.keys()) - set(allowable_terms) 

577 if unsupported_terms: 

578 raise Exception('docval for {}: keys {} are not supported by docval'.format(a['name'], 

579 sorted(unsupported_terms))) 

580 # check that arg type is valid 

581 try: 

582 a['type'] = __resolve_type(a['type']) 

583 except Exception as e: 

584 msg = "docval for %s: error parsing argument type: %s" % (a['name'], e.args[0]) 

585 raise Exception(msg) 

586 if 'enum' in a: 

587 # check that value for enum key is a list or tuple (cannot have only one allowed value) 

588 if not isinstance(a['enum'], (list, tuple)): 

589 msg = ('docval for %s: enum value must be a list or tuple (received %s)' 

590 % (a['name'], type(a['enum']))) 

591 raise Exception(msg) 

592 # check that arg type is compatible with enum 

593 if not __check_enum_argtype(a['type']): 

594 msg = 'docval for {}: enum checking cannot be used with arg type {}'.format(a['name'], a['type']) 

595 raise Exception(msg) 

596 # check that enum allowed values are allowed by arg type 

597 if any([not __type_okay(x, a['type']) for x in a['enum']]): 

598 msg = ('docval for {}: enum values are of types not allowed by arg type (got {}, ' 

599 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type'])) 

600 raise Exception(msg) 

601 if a.get('allow_none', False) and 'default' not in a: 

602 msg = 'docval for {}: allow_none=True can only be set if a default value is provided.'.format(a['name']) 

603 raise Exception(msg) 

604 if 'default' in a: 

605 kw.append(a) 

606 else: 

607 pos.append(a) 

608 loc_val = pos + kw 

609 _docval[__docval_args_loc] = loc_val 

610 

611 def _check_args(args, kwargs): 

612 """Parse and check arguments to decorated function. Raise warnings and errors as appropriate.""" 

613 # this function was separated from func_call() in order to make stepping through lines of code using pdb 

614 # easier 

615 parsed = __parse_args( 

616 loc_val, 

617 args[1:] if is_method else args, 

618 kwargs, 

619 enforce_type=enforce_type, 

620 enforce_shape=enforce_shape, 

621 allow_extra=allow_extra, 

622 allow_positional=allow_positional 

623 ) 

624 

625 parse_warnings = parsed.get('future_warnings') 

626 if parse_warnings: 

627 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_warnings)) 

628 warnings.warn(msg, FutureWarning) 

629 

630 for error_type, ExceptionType in (('type_errors', TypeError), 

631 ('value_errors', ValueError), 

632 ('syntax_errors', SyntaxError)): 

633 parse_err = parsed.get(error_type) 

634 if parse_err: 

635 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_err)) 

636 raise ExceptionType(msg) 

637 

638 return parsed['args'] 

639 

640 # this code is intentionally separated to make stepping through lines of code using pdb easier 

641 if is_method: 

642 def func_call(*args, **kwargs): 

643 pargs = _check_args(args, kwargs) 

644 return func(args[0], **pargs) 

645 else: 

646 def func_call(*args, **kwargs): 

647 pargs = _check_args(args, kwargs) 

648 return func(**pargs) 

649 

650 _rtype = rtype 

651 if isinstance(rtype, type): 

652 _rtype = rtype.__name__ 

653 docstring = __googledoc(func, _docval[__docval_args_loc], returns=returns, rtype=_rtype) 

654 docval_idx = {a['name']: a for a in _docval[__docval_args_loc]} # cache a name-indexed dictionary of args 

655 setattr(func_call, '__doc__', docstring) 

656 setattr(func_call, '__name__', func.__name__) 

657 setattr(func_call, docval_attr_name, _docval) 

658 setattr(func_call, docval_idx_name, docval_idx) 

659 setattr(func_call, '__module__', func.__module__) 

660 return func_call 

661 

662 return dec 

663 

664 

665def __sig_arg(argval): 

666 if 'default' in argval: 

667 default = argval['default'] 

668 if isinstance(default, str): 

669 default = "'%s'" % default 

670 else: 

671 default = str(default) 

672 return "%s=%s" % (argval['name'], default) 

673 else: 

674 return argval['name'] 

675 

676 

677def __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=None, returns=None, rtype=None): 

678 '''Generate a Spinxy docstring''' 

679 

680 def to_str(argtype): 

681 if isinstance(argtype, type): 

682 module = argtype.__module__ 

683 name = argtype.__name__ 

684 

685 if module.startswith("h5py") or module.startswith("pandas") or module.startswith("builtins"): 

686 return ":py:class:`~{name}`".format(name=name) 

687 else: 

688 return ":py:class:`~{module}.{name}`".format(name=name, module=module) 

689 return argtype 

690 

691 def __sphinx_arg(arg): 

692 fmt = dict() 

693 fmt['name'] = arg.get('name') 

694 fmt['doc'] = arg.get('doc') 

695 if isinstance(arg['type'], tuple) or isinstance(arg['type'], list): 

696 fmt['type'] = " or ".join(map(to_str, arg['type'])) 

697 else: 

698 fmt['type'] = to_str(arg['type']) 

699 return arg_fmt.format(**fmt) 

700 

701 sig = "%s(%s)\n\n" % (func.__name__, ", ".join(map(__sig_arg, validator))) 

702 desc = func.__doc__.strip() if func.__doc__ is not None else "" 

703 sig += docstring_fmt.format(description=desc, args="\n".join(map(__sphinx_arg, validator))) 

704 

705 if not (ret_fmt is None or returns is None or rtype is None): 

706 sig += ret_fmt.format(returns=returns, rtype=rtype) 

707 return sig 

708 

709 

710def __sphinxdoc(func, validator, returns=None, rtype=None): 

711 arg_fmt = (":param {name}: {doc}\n" 

712 ":type {name}: {type}") 

713 docstring_fmt = ("{description}\n\n" 

714 "{args}\n") 

715 ret_fmt = (":returns: {returns}\n" 

716 ":rtype: {rtype}") 

717 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype) 

718 

719 

720def __googledoc(func, validator, returns=None, rtype=None): 

721 arg_fmt = " {name} ({type}): {doc}" 

722 docstring_fmt = "{description}\n\n" 

723 if len(validator) > 0: 

724 docstring_fmt += "Args:\n{args}\n" 

725 ret_fmt = ("\nReturns:\n" 

726 " {rtype}: {returns}") 

727 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype) 

728 

729 

730def getargs(*argnames): 

731 """getargs(*argnames, argdict) 

732 Convenience function to retrieve arguments from a dictionary in batch. 

733 

734 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which 

735 to retrieve the values. 

736 

737 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this 

738 function or the last argument is not a dictionary 

739 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names 

740 """ 

741 if len(argnames) < 2: 

742 raise ValueError('Must supply at least one key and a dict') 

743 if not isinstance(argnames[-1], dict): 

744 raise ValueError('Last argument must be a dict') 

745 kwargs = argnames[-1] 

746 if len(argnames) == 2: 

747 if argnames[0] not in kwargs: 

748 raise ValueError("Argument not found in dict: '%s'" % argnames[0]) 

749 return kwargs.get(argnames[0]) 

750 ret = [] 

751 for arg in argnames[:-1]: 

752 if arg not in kwargs: 

753 raise ValueError("Argument not found in dict: '%s'" % arg) 

754 ret.append(kwargs.get(arg)) 

755 return ret 

756 

757 

758def popargs(*argnames): 

759 """popargs(*argnames, argdict) 

760 Convenience function to retrieve and remove arguments from a dictionary in batch. 

761 

762 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which 

763 to retrieve the values. 

764 

765 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this 

766 function or the last argument is not a dictionary 

767 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names 

768 """ 

769 if len(argnames) < 2: 

770 raise ValueError('Must supply at least one key and a dict') 

771 if not isinstance(argnames[-1], dict): 

772 raise ValueError('Last argument must be a dict') 

773 kwargs = argnames[-1] 

774 if len(argnames) == 2: 

775 try: 

776 ret = kwargs.pop(argnames[0]) 

777 except KeyError as ke: 

778 raise ValueError('Argument not found in dict: %s' % str(ke)) 

779 return ret 

780 try: 

781 ret = [kwargs.pop(arg) for arg in argnames[:-1]] 

782 except KeyError as ke: 

783 raise ValueError('Argument not found in dict: %s' % str(ke)) 

784 return ret 

785 

786 

787def popargs_to_dict(keys, argdict): 

788 """Convenience function to retrieve and remove arguments from a dictionary in batch into a dictionary. 

789 

790 Same as `{key: argdict.pop(key) for key in keys}` with a custom ValueError 

791 

792 :param keys: Iterable of keys to pull out of argdict 

793 :type keys: Iterable 

794 :param argdict: Dictionary to process 

795 :type dict: dict 

796 :raises ValueError: if an argument name is not found in the dictionary 

797 :return: a dict of arguments removed 

798 """ 

799 ret = dict() 

800 for arg in keys: 

801 try: 

802 ret[arg] = argdict.pop(arg) 

803 except KeyError as ke: 

804 raise ValueError('Argument not found in dict: %s' % str(ke)) 

805 return ret 

806 

807 

808class ExtenderMeta(ABCMeta): 

809 """A metaclass that will extend the base class initialization 

810 routine by executing additional functions defined in 

811 classes that use this metaclass 

812 

813 In general, this class should only be used by core developers. 

814 """ 

815 

816 __preinit = '__preinit' 

817 

818 @classmethod 

819 def pre_init(cls, func): 

820 setattr(func, cls.__preinit, True) 

821 return classmethod(func) 

822 

823 __postinit = '__postinit' 

824 

825 @classmethod 

826 def post_init(cls, func): 

827 '''A decorator for defining a routine to run after creation of a type object. 

828 

829 An example use of this method would be to define a classmethod that gathers 

830 any defined methods or attributes after the base Python type construction (i.e. after 

831 :py:func:`type` has been called) 

832 ''' 

833 setattr(func, cls.__postinit, True) 

834 return classmethod(func) 

835 

836 def __init__(cls, name, bases, classdict): 

837 it = (getattr(cls, n) for n in dir(cls)) 

838 it = (a for a in it if hasattr(a, cls.__preinit)) 

839 for func in it: 

840 func(name, bases, classdict) 

841 super().__init__(name, bases, classdict) 

842 it = (getattr(cls, n) for n in dir(cls)) 

843 it = (a for a in it if hasattr(a, cls.__postinit)) 

844 for func in it: 

845 func(name, bases, classdict) 

846 

847 

848def get_data_shape(data, strict_no_data_load=False): 

849 """ 

850 Helper function used to determine the shape of the given array. 

851 

852 In order to determine the shape of nested tuples, lists, and sets, this function 

853 recursively inspects elements along the dimensions, assuming that the data has a regular, 

854 rectangular shape. In the case of out-of-core iterators, this means that the first item 

855 along each dimension would potentially be loaded into memory. Set strict_no_data_load=True 

856 to enforce that this does not happen, at the cost that we may not be able to determine 

857 the shape of the array. 

858 

859 :param data: Array for which we should determine the shape. 

860 :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape. 

861 :param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default), 

862 the first element of data may be loaded into memory. 

863 :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown 

864 will be set to None. 

865 """ 

866 

867 def __get_shape_helper(local_data): 

868 shape = list() 

869 if hasattr(local_data, '__len__'): 

870 shape.append(len(local_data)) 

871 if len(local_data): 

872 el = next(iter(local_data)) 

873 if not isinstance(el, (str, bytes)): 

874 shape.extend(__get_shape_helper(el)) 

875 return tuple(shape) 

876 

877 # NOTE: data.maxshape will fail on empty h5py.Dataset without shape or maxshape. this will be fixed in h5py 3.0 

878 if hasattr(data, 'maxshape'): 

879 return data.maxshape 

880 if hasattr(data, 'shape') and data.shape is not None: 

881 return data.shape 

882 if isinstance(data, dict): 

883 return None 

884 if hasattr(data, '__len__') and not isinstance(data, (str, bytes)): 

885 if not strict_no_data_load or isinstance(data, (list, tuple, set)): 

886 return __get_shape_helper(data) 

887 return None 

888 

889 

890def pystr(s): 

891 """ 

892 Convert a string of characters to Python str object 

893 """ 

894 if isinstance(s, bytes): 

895 return s.decode('utf-8') 

896 else: 

897 return s 

898 

899 

900def to_uint_array(arr): 

901 """ 

902 Convert a numpy array or array-like object to a numpy array of unsigned integers with the same dtype itemsize. 

903 

904 For example, a list of int32 values is converted to a numpy array with dtype uint32. 

905 :raises ValueError: if input array contains values that are not unsigned integers or non-negative integers. 

906 """ 

907 if not isinstance(arr, np.ndarray): 

908 arr = np.array(arr) 

909 if np.issubdtype(arr.dtype, np.unsignedinteger): 

910 return arr 

911 if np.issubdtype(arr.dtype, np.integer): 

912 if (arr < 0).any(): 

913 raise ValueError('Cannot convert negative integer values to uint.') 

914 dt = np.dtype('uint' + str(int(arr.dtype.itemsize*8))) # keep precision 

915 return arr.astype(dt) 

916 raise ValueError('Cannot convert array of dtype %s to uint.' % arr.dtype) 

917 

918 

919class LabelledDict(dict): 

920 """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items. 

921 

922 For example, if the key attribute is set as 'name' in __init__, then all objects added to the LabelledDict must have 

923 a 'name' attribute and a particular object in the LabelledDict can be accessed using the syntax ['object_name'] if 

924 the object.name == 'object_name'. In this way, LabelledDict acts like a set where values can be retrieved using 

925 square brackets around the value of the key attribute. An 'add' method makes clear the association between the key 

926 attribute of the LabelledDict and the values of the LabelledDict. 

927 

928 LabelledDict also supports retrieval of values with the syntax my_dict['attr == val'], which returns a set of 

929 objects in the LabelledDict which have an attribute 'attr' with a string value 'val'. If no objects match that 

930 condition, a KeyError is raised. Note that if 'attr' equals the key attribute, then the single matching value is 

931 returned, not a set. 

932 

933 LabelledDict does not support changing items that have already been set. A TypeError will be raised when using 

934 __setitem__ on keys that already exist in the dict. The setdefault and update methods are not supported. A 

935 TypeError will be raised when these are called. 

936 

937 A callable function may be passed to the constructor to be run on an item after adding it to this dict using 

938 the __setitem__ and add methods. 

939 

940 A callable function may be passed to the constructor to be run on an item after removing it from this dict using 

941 the __delitem__ (the del operator), pop, and popitem methods. It will also be run on each removed item when using 

942 the clear method. 

943 

944 Usage: 

945 LabelledDict(label='my_objects', key_attr='name') 

946 my_dict[obj.name] = obj 

947 my_dict.add(obj) # simpler syntax 

948 

949 Example: 

950 # MyTestClass is a class with attributes 'prop1' and 'prop2'. MyTestClass.__init__ sets those attributes. 

951 ld = LabelledDict(label='all_objects', key_attr='prop1') 

952 obj1 = MyTestClass('a', 'b') 

953 obj2 = MyTestClass('d', 'b') 

954 ld[obj1.prop1] = obj1 # obj1 is added to the LabelledDict with the key obj1.prop1. Any other key is not allowed. 

955 ld.add(obj2) # Simpler 'add' syntax enforces the required relationship 

956 ld['a'] # Returns obj1 

957 ld['prop1 == a'] # Also returns obj1 

958 ld['prop2 == b'] # Returns set([obj1, obj2]) - the set of all values v in ld where v.prop2 == 'b' 

959 """ 

960 

961 @docval({'name': 'label', 'type': str, 'doc': 'the label on this dictionary'}, 

962 {'name': 'key_attr', 'type': str, 'doc': 'the attribute name to use as the key', 'default': 'name'}, 

963 {'name': 'add_callable', 'type': types.FunctionType, 

964 'doc': 'function to call on an element after adding it to this dict using the add or __setitem__ methods', 

965 'default': None}, 

966 {'name': 'remove_callable', 'type': types.FunctionType, 

967 'doc': ('function to call on an element after removing it from this dict using the pop, popitem, clear, ' 

968 'or __delitem__ methods'), 

969 'default': None}) 

970 def __init__(self, **kwargs): 

971 label, key_attr, add_callable, remove_callable = getargs('label', 'key_attr', 'add_callable', 'remove_callable', 

972 kwargs) 

973 self.__label = label 

974 self.__key_attr = key_attr 

975 self.__add_callable = add_callable 

976 self.__remove_callable = remove_callable 

977 

978 @property 

979 def label(self): 

980 """Return the label of this LabelledDict""" 

981 return self.__label 

982 

983 @property 

984 def key_attr(self): 

985 """Return the attribute used as the key for values in this LabelledDict""" 

986 return self.__key_attr 

987 

988 def __getitem__(self, args): 

989 """Get a value from the LabelledDict with the given key. 

990 

991 Supports syntax my_dict['attr == val'], which returns a set of objects in the LabelledDict which have an 

992 attribute 'attr' with a string value 'val'. If no objects match that condition, an empty set is returned. 

993 Note that if 'attr' equals the key attribute of this LabelledDict, then the single matching value is 

994 returned, not a set. 

995 """ 

996 key = args 

997 if '==' in args: 

998 key, val = args.split("==") 

999 key = key.strip() 

1000 val = val.strip() # val is a string 

1001 if not key: 

1002 raise ValueError("An attribute name is required before '=='.") 

1003 if not val: 

1004 raise ValueError("A value is required after '=='.") 

1005 if key != self.key_attr: 

1006 ret = set() 

1007 for item in self.values(): 

1008 if getattr(item, key, None) == val: 

1009 ret.add(item) 

1010 return ret 

1011 else: 

1012 return super().__getitem__(val) 

1013 else: 

1014 return super().__getitem__(key) 

1015 

1016 def __setitem__(self, key, value): 

1017 """Set a value in the LabelledDict with the given key. The key must equal value.key_attr. 

1018 

1019 See LabelledDict.add for a simpler syntax since the key is redundant. 

1020 Raises TypeError is key already exists. 

1021 Raises ValueError if value does not have attribute key_attr. 

1022 """ 

1023 if key in self: 

1024 raise TypeError("Key '%s' is already in this dict. Cannot reset items in a %s." 

1025 % (key, self.__class__.__name__)) 

1026 self.__check_value(value) 

1027 if key != getattr(value, self.key_attr): 

1028 raise KeyError("Key '%s' must equal attribute '%s' of '%s'." % (key, self.key_attr, value)) 

1029 super().__setitem__(key, value) 

1030 if self.__add_callable: 

1031 self.__add_callable(value) 

1032 

1033 def add(self, value): 

1034 """Add a value to the dict with the key value.key_attr. 

1035 

1036 Raises ValueError if value does not have attribute key_attr. 

1037 """ 

1038 self.__check_value(value) 

1039 self.__setitem__(getattr(value, self.key_attr), value) 

1040 

1041 def __check_value(self, value): 

1042 if not hasattr(value, self.key_attr): 

1043 raise ValueError("Cannot set value '%s' in %s. Value must have attribute '%s'." 

1044 % (value, self.__class__.__name__, self.key_attr)) 

1045 

1046 def pop(self, k): 

1047 """Remove an item that matches the key. If remove_callable was initialized, call that on the returned value.""" 

1048 ret = super().pop(k) 

1049 if self.__remove_callable: 

1050 self.__remove_callable(ret) 

1051 return ret 

1052 

1053 def popitem(self): 

1054 """Remove the last added item. If remove_callable was initialized, call that on the returned value. 

1055 

1056 Note: popitem returns a tuple (key, value) but the remove_callable will be called only on the value. 

1057 

1058 Note: in Python 3.5 and earlier, dictionaries are not ordered, so popitem removes an arbitrary item. 

1059 """ 

1060 ret = super().popitem() 

1061 if self.__remove_callable: 

1062 self.__remove_callable(ret[1]) # execute callable only on dict value 

1063 return ret 

1064 

1065 def clear(self): 

1066 """Remove all items. If remove_callable was initialized, call that on each returned value. 

1067 

1068 The order of removal depends on the popitem method. 

1069 """ 

1070 while len(self): 

1071 self.popitem() 

1072 

1073 def __delitem__(self, k): 

1074 """Remove an item that matches the key. If remove_callable was initialized, call that on the matching value.""" 

1075 item = self[k] 

1076 super().__delitem__(k) 

1077 if self.__remove_callable: 

1078 self.__remove_callable(item) 

1079 

1080 def setdefault(self, k): 

1081 """setdefault is not supported. A TypeError will be raised.""" 

1082 raise TypeError('setdefault is not supported for %s' % self.__class__.__name__) 

1083 

1084 def update(self, other): 

1085 """update is not supported. A TypeError will be raised.""" 

1086 raise TypeError('update is not supported for %s' % self.__class__.__name__) 

1087 

1088 

1089@docval_macro('array_data') 

1090class StrDataset(h5py.Dataset): 

1091 """Wrapper to decode strings on reading the dataset""" 

1092 def __init__(self, dset, encoding, errors='strict'): 

1093 self.dset = dset 

1094 if encoding is None: 1094 ↛ 1096line 1094 didn't jump to line 1096, because the condition on line 1094 was never false

1095 encoding = h5py.h5t.check_string_dtype(dset.dtype).encoding 

1096 self.encoding = encoding 

1097 self.errors = errors 

1098 

1099 def __getattr__(self, name): 

1100 return getattr(self.dset, name) 

1101 

1102 def __repr__(self): 

1103 return '<StrDataset for %s>' % repr(self.dset)[1:-1] 

1104 

1105 def __len__(self): 

1106 return len(self.dset) 

1107 

1108 def __getitem__(self, args): 

1109 bytes_arr = self.dset[args] 

1110 # numpy.char.decode() seems like the obvious thing to use. But it only 

1111 # accepts numpy string arrays, not object arrays of bytes (which we 

1112 # return from HDF5 variable-length strings). And the numpy 

1113 # implementation is not faster than doing it with a loop; in fact, by 

1114 # not converting the result to a numpy unicode array, the 

1115 # naive way can be faster! (Comparing with numpy 1.18.4, June 2020) 

1116 if np.isscalar(bytes_arr): 

1117 return bytes_arr.decode(self.encoding, self.errors) 

1118 

1119 return np.array([ 

1120 b.decode(self.encoding, self.errors) for b in bytes_arr.flat 

1121 ], dtype=object).reshape(bytes_arr.shape)