Coverage for src/hdmf/utils.py: 97%

600 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-04 02:57 +0000

1import collections 

2import copy as _copy 

3import types 

4import warnings 

5from abc import ABCMeta 

6from enum import Enum 

7 

8import h5py 

9import numpy as np 

10 

11 

12__macros = { 

13 'array_data': [np.ndarray, list, tuple, h5py.Dataset], 

14 'scalar_data': [str, int, float, bytes, bool], 

15 'data': [] 

16} 

17 

18try: 

19 # optionally accept zarr.Array as array data to support conversion of data from Zarr to HDMF 

20 import zarr 

21 __macros['array_data'].append(zarr.Array) 

22except ImportError: 

23 pass 

24 

25 

26# code to signify how to handle positional arguments in docval 

27AllowPositional = Enum('AllowPositional', 'ALLOWED WARNING ERROR') 

28 

29__supported_bool_types = (bool, np.bool_) 

30__supported_uint_types = (np.uint8, np.uint16, np.uint32, np.uint64) 

31__supported_int_types = (int, np.int8, np.int16, np.int32, np.int64) 

32__supported_float_types = [float, np.float16, np.float32, np.float64] 

33if hasattr(np, "float128"): # pragma: no cover 

34 __supported_float_types.append(np.float128) 

35if hasattr(np, "longdouble"): # pragma: no cover 

36 # on windows python<=3.5, h5py floats resolve float64s as either np.float64 or np.longdouble 

37 # non-deterministically. a future version of h5py will fix this. see #112 

38 __supported_float_types.append(np.longdouble) 

39__supported_float_types = tuple(__supported_float_types) 

40__allowed_enum_types = (__supported_bool_types + __supported_uint_types + __supported_int_types 

41 + __supported_float_types + (str,)) 

42 

43 

44def docval_macro(macro): 

45 """Class decorator to add the class to a list of types associated with the key macro in the __macros dict 

46 """ 

47 

48 def _dec(cls): 

49 if macro not in __macros: 

50 __macros[macro] = list() 

51 __macros[macro].append(cls) 

52 return cls 

53 

54 return _dec 

55 

56 

57def get_docval_macro(key=None): 

58 """ 

59 Return a deepcopy of the docval macros, i.e., strings that represent a customizable list of types for use in docval. 

60 

61 :param key: Name of the macro. If key=None, then a dictionary of all macros is returned. Otherwise, a tuple of 

62 the types associated with the key is returned. 

63 """ 

64 if key is None: 

65 return _copy.deepcopy(__macros) 

66 else: 

67 return tuple(__macros[key]) 

68 

69 

70def __type_okay(value, argtype, allow_none=False): 

71 """Check a value against a type 

72 

73 The difference between this function and :py:func:`isinstance` is that 

74 it allows specifying a type as a string. Furthermore, strings allow for specifying more general 

75 types, such as a simple numeric type (i.e. ``argtype``="num"). 

76 

77 Args: 

78 value (any): the value to check 

79 argtype (type, str): the type to check for 

80 allow_none (bool): whether or not to allow None as a valid value 

81 

82 

83 Returns: 

84 bool: True if value is a valid instance of argtype 

85 """ 

86 if value is None: 

87 return allow_none 

88 if isinstance(argtype, str): 

89 if argtype in __macros: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true

90 return __type_okay(value, __macros[argtype], allow_none=allow_none) 

91 elif argtype == 'uint': 

92 return __is_uint(value) 

93 elif argtype == 'int': 

94 return __is_int(value) 

95 elif argtype == 'float': 

96 return __is_float(value) 

97 elif argtype == 'bool': 

98 return __is_bool(value) 

99 return argtype in [cls.__name__ for cls in value.__class__.__mro__] 

100 elif isinstance(argtype, type): 

101 if argtype is int: 

102 return __is_int(value) 

103 elif argtype is float: 

104 return __is_float(value) 

105 elif argtype is bool: 

106 return __is_bool(value) 

107 return isinstance(value, argtype) 

108 elif isinstance(argtype, tuple) or isinstance(argtype, list): 

109 return any(__type_okay(value, i) for i in argtype) 

110 else: # argtype is None 

111 return True 

112 

113 

114def __shape_okay_multi(value, argshape): 

115 if type(argshape[0]) in (tuple, list): # if multiple shapes are present 

116 return any(__shape_okay(value, a) for a in argshape) 

117 else: 

118 return __shape_okay(value, argshape) 

119 

120 

121def __shape_okay(value, argshape): 

122 valshape = get_data_shape(value) 

123 if not len(valshape) == len(argshape): 

124 return False 

125 for a, b in zip(valshape, argshape): 

126 if b not in (a, None): 

127 return False 

128 return True 

129 

130 

131def __is_uint(value): 

132 return isinstance(value, __supported_uint_types) 

133 

134 

135def __is_int(value): 

136 return isinstance(value, __supported_int_types) 

137 

138 

139def __is_float(value): 

140 return isinstance(value, __supported_float_types) 

141 

142 

143def __is_bool(value): 

144 return isinstance(value, __supported_bool_types) 

145 

146 

147def __format_type(argtype): 

148 if isinstance(argtype, str): 

149 return argtype 

150 elif isinstance(argtype, type): 

151 return argtype.__name__ 

152 elif isinstance(argtype, tuple) or isinstance(argtype, list): 152 ↛ 158line 152 didn't jump to line 158, because the condition on line 152 was never false

153 types = [__format_type(i) for i in argtype] 

154 if len(types) > 1: 154 ↛ 157line 154 didn't jump to line 157, because the condition on line 154 was never false

155 return "%s or %s" % (", ".join(types[:-1]), types[-1]) 

156 else: 

157 return types[0] 

158 elif argtype is None: 

159 return "any type" 

160 else: 

161 raise ValueError("argtype must be a type, str, list, or tuple") 

162 

163 

164def __check_enum(argval, arg): 

165 """ 

166 Helper function to check whether the given argument value validates against the enum specification. 

167 

168 :param argval: argument value passed to the function/method 

169 :param arg: argument validator - the specification dictionary for this argument 

170 

171 :return: None if the value validates successfully, error message if the value does not. 

172 """ 

173 if argval not in arg['enum']: 

174 return "forbidden value for '{}' (got {}, expected {})".format(arg['name'], __fmt_str_quotes(argval), 

175 arg['enum']) 

176 

177 

178def __fmt_str_quotes(x): 

179 """Return a string or list of strings where the input string or list of strings have single quotes around strings""" 

180 if isinstance(x, (list, tuple)): 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true

181 return '{}'.format(x) 

182 if isinstance(x, str): 

183 return "'%s'" % x 

184 return str(x) 

185 

186 

187def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, allow_extra=False, # noqa: C901 

188 allow_positional=AllowPositional.ALLOWED): 

189 """ 

190 Internal helper function used by the docval decorator to parse and validate function arguments 

191 

192 :param validator: List of dicts from docval with the description of the arguments 

193 :param args: List of the values of positional arguments supplied by the caller 

194 :param kwargs: Dict keyword arguments supplied by the caller where keys are the argument name and 

195 values are the argument value. 

196 :param enforce_type: Boolean indicating whether the type of arguments should be enforced 

197 :param enforce_shape: Boolean indicating whether the dimensions of array arguments 

198 should be enforced if possible. 

199 :param allow_extra: Boolean indicating whether extra keyword arguments are allowed (if False and extra keyword 

200 arguments are specified, then an error is raised). 

201 :param allow_positional: integer code indicating whether positional arguments are allowed: 

202 AllowPositional.ALLOWED: positional arguments are allowed 

203 AllowPositional.WARNING: return warning if positional arguments are supplied 

204 AllowPositional.ERROR: return error if positional arguments are supplied 

205 

206 :return: Dict with: 

207 * 'args' : Dict all arguments where keys are the names and values are the values of the arguments. 

208 * 'errors' : List of string with error messages 

209 """ 

210 

211 ret = dict() 

212 syntax_errors = list() 

213 type_errors = list() 

214 value_errors = list() 

215 future_warnings = list() 

216 argsi = 0 

217 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises 

218 try: 

219 # check for duplicates in docval 

220 names = [x['name'] for x in validator] 

221 duplicated = [item for item, count in collections.Counter(names).items() 

222 if count > 1] 

223 if duplicated: 

224 raise ValueError( 

225 'The following names are duplicated: {}'.format(duplicated)) 

226 

227 if allow_extra: # extra keyword arguments are allowed so do not consider them when checking number of args 

228 if len(args) > len(validator): 

229 raise TypeError( 

230 'Expected at most %d arguments %r, got %d positional' % (len(validator), names, len(args)) 

231 ) 

232 else: # allow for keyword args 

233 if len(args) + len(kwargs) > len(validator): 

234 raise TypeError( 

235 'Expected at most %d arguments %r, got %d: %d positional and %d keyword %s' 

236 % (len(validator), names, len(args) + len(kwargs), len(args), len(kwargs), sorted(kwargs)) 

237 ) 

238 

239 if args: 

240 if allow_positional == AllowPositional.WARNING: 

241 msg = ('Using positional arguments for this method is discouraged and will be deprecated ' 

242 'in a future major release. Please use keyword arguments to ensure future compatibility.') 

243 future_warnings.append(msg) 

244 elif allow_positional == AllowPositional.ERROR: 

245 msg = 'Only keyword arguments (e.g., func(argname=value, ...)) are allowed for this method.' 

246 syntax_errors.append(msg) 

247 

248 # iterate through the docval specification and find a matching value in args / kwargs 

249 it = iter(validator) 

250 arg = next(it) 

251 

252 # process positional arguments of the docval specification (no default value) 

253 extras = dict(kwargs) 

254 while True: 

255 if 'default' in arg: 

256 break 

257 argname = arg['name'] 

258 argval_set = False 

259 if argname in kwargs: 

260 # if this positional arg is specified by a keyword arg and there are remaining positional args that 

261 # have not yet been matched, then it is undetermined what those positional args match to. thus, raise 

262 # an error 

263 if argsi < len(args): 

264 type_errors.append("got multiple values for argument '%s'" % argname) 

265 argval = kwargs.get(argname) # kwargs is the dict that stores the object names and the values 

266 extras.pop(argname, None) 

267 argval_set = True 

268 elif argsi < len(args): 

269 argval = args[argsi] 

270 argval_set = True 

271 

272 if not argval_set: 

273 type_errors.append("missing argument '%s'" % argname) 

274 else: 

275 from .term_set import TermSetWrapper # circular import fix 

276 wrapper = None 

277 if isinstance(argval, TermSetWrapper): 

278 wrapper = argval 

279 # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. 

280 argval = argval.value 

281 if enforce_type: 281 ↛ 289line 281 didn't jump to line 289, because the condition on line 281 was never false

282 if not __type_okay(argval, arg['type']): 

283 if argval is None: 

284 fmt_val = (argname, __format_type(arg['type'])) 

285 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) 

286 else: 

287 fmt_val = (argname, type(argval).__name__, __format_type(arg['type'])) 

288 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val) 

289 if enforce_shape and 'shape' in arg: 

290 valshape = get_data_shape(argval) 

291 while valshape is None: 

292 if argval is None: 

293 break 

294 if not hasattr(argval, argname): 

295 fmt_val = (argval, argname, arg['shape']) 

296 value_errors.append("cannot check shape of object '%s' for argument '%s' " 

297 "(expected shape '%s')" % fmt_val) 

298 break 

299 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked 

300 argval = getattr(argval, argname) 

301 valshape = get_data_shape(argval) 

302 if valshape is not None and not __shape_okay_multi(argval, arg['shape']): 

303 fmt_val = (argname, valshape, arg['shape']) 

304 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val) 

305 if 'enum' in arg: 

306 err = __check_enum(argval, arg) 

307 if err: 

308 value_errors.append(err) 

309 

310 if wrapper is not None: 

311 # reassign the wrapper so that it can be used to flag HERD "on write" 

312 argval = wrapper 

313 

314 ret[argname] = argval 

315 argsi += 1 

316 arg = next(it) 

317 

318 # process arguments of the docval specification with a default value 

319 # NOTE: the default value will be deepcopied, so 'default': list() is safe unlike in normal python 

320 while True: 

321 argname = arg['name'] 

322 if argname in kwargs: 

323 ret[argname] = kwargs.get(argname) 

324 extras.pop(argname, None) 

325 elif len(args) > argsi: 

326 ret[argname] = args[argsi] 

327 argsi += 1 

328 else: 

329 ret[argname] = _copy.deepcopy(arg['default']) 

330 argval = ret[argname] 

331 

332 from .term_set import TermSetWrapper # circular import fix 

333 wrapper = None 

334 if isinstance(argval, TermSetWrapper): 

335 wrapper = argval 

336 # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type. 

337 argval = argval.value 

338 if enforce_type: 338 ↛ 346line 338 didn't jump to line 346, because the condition on line 338 was never false

339 if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)): 

340 if argval is None and arg['default'] is None: 340 ↛ 341line 340 didn't jump to line 341, because the condition on line 340 was never true

341 fmt_val = (argname, __format_type(arg['type'])) 

342 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val) 

343 else: 

344 fmt_val = (argname, type(argval).__name__, __format_type(arg['type'])) 

345 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val) 

346 if enforce_shape and 'shape' in arg and argval is not None: 

347 valshape = get_data_shape(argval) 

348 while valshape is None: 

349 if argval is None: 

350 break 

351 if not hasattr(argval, argname): 

352 fmt_val = (argval, argname, arg['shape']) 

353 value_errors.append("cannot check shape of object '%s' for argument '%s' (expected shape '%s')" 

354 % fmt_val) 

355 break 

356 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked 

357 argval = getattr(argval, argname) 

358 valshape = get_data_shape(argval) 

359 if valshape is not None and not __shape_okay_multi(argval, arg['shape']): 

360 fmt_val = (argname, valshape, arg['shape']) 

361 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val) 

362 if 'enum' in arg and argval is not None: 

363 err = __check_enum(argval, arg) 

364 if err: 

365 value_errors.append(err) 

366 if wrapper is not None: 

367 # reassign the wrapper so that it can be used to flag HERD "on write" 

368 argval = wrapper 

369 arg = next(it) 

370 except StopIteration: 

371 pass 

372 except TypeError as e: 

373 type_errors.append(str(e)) 

374 except ValueError as e: 

375 value_errors.append(str(e)) 

376 

377 if not allow_extra: 

378 for key in extras.keys(): 

379 type_errors.append("unrecognized argument: '%s'" % key) 

380 else: 

381 # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args. 

382 # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out 

383 for key in extras.keys(): 

384 ret[key] = extras[key] 

385 return {'args': ret, 'future_warnings': future_warnings, 'type_errors': type_errors, 'value_errors': value_errors, 

386 'syntax_errors': syntax_errors} 

387 

388 

389docval_idx_name = '__dv_idx__' 

390docval_attr_name = '__docval__' 

391__docval_args_loc = 'args' 

392 

393 

394def get_docval(func, *args): 

395 '''Get a copy of docval arguments for a function. 

396 If args are supplied, return only docval arguments with value for 'name' key equal to the args 

397 ''' 

398 func_docval = getattr(func, docval_attr_name, None) 

399 if func_docval: 

400 if args: 

401 docval_idx = getattr(func, docval_idx_name, None) 

402 try: 

403 return tuple(docval_idx[name] for name in args) 

404 except KeyError as ke: 

405 raise ValueError('Function %s does not have docval argument %s' % (func.__name__, str(ke))) 

406 return tuple(func_docval[__docval_args_loc]) 

407 else: 

408 if args: 

409 raise ValueError('Function %s has no docval arguments' % func.__name__) 

410 return tuple() 

411 

412 

413# def docval_wrap(func, is_method=True): 

414# if is_method: 

415# @docval(*get_docval(func)) 

416# def method(self, **kwargs): 

417# 

418# return call_docval_args(func, kwargs) 

419# return method 

420# else: 

421# @docval(*get_docval(func)) 

422# def static_method(**kwargs): 

423# return call_docval_args(func, kwargs) 

424# return method 

425 

426 

427def fmt_docval_args(func, kwargs): 

428 ''' Separate positional and keyword arguments 

429 

430 Useful for methods that wrap other methods 

431 ''' 

432 warnings.warn("fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, " 

433 "call the function directly with the kwargs. Please note that fmt_docval_args " 

434 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " 

435 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " 

436 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", 

437 PendingDeprecationWarning) 

438 func_docval = getattr(func, docval_attr_name, None) 

439 ret_args = list() 

440 ret_kwargs = dict() 

441 kwargs_copy = _copy.copy(kwargs) 

442 if func_docval: 

443 for arg in func_docval[__docval_args_loc]: 

444 val = kwargs_copy.pop(arg['name'], None) 

445 if 'default' in arg: 

446 if val is not None: 446 ↛ 443line 446 didn't jump to line 443, because the condition on line 446 was never false

447 ret_kwargs[arg['name']] = val 

448 else: 

449 ret_args.append(val) 

450 if func_docval['allow_extra']: 

451 ret_kwargs.update(kwargs_copy) 

452 else: 

453 raise ValueError('no docval found on %s' % str(func)) 

454 return ret_args, ret_kwargs 

455 

456 

457# def _remove_extra_args(func, kwargs): 

458# """Return a dict of only the keyword arguments that are accepted by the function's docval. 

459# 

460# If the docval specifies allow_extra=True, then the original kwargs are returned. 

461# """ 

462# # NOTE: this has the same functionality as the to-be-deprecated fmt_docval_args except that 

463# # kwargs are kept as kwargs instead of parsed into args and kwargs 

464# func_docval = getattr(func, docval_attr_name, None) 

465# if func_docval: 

466# if func_docval['allow_extra']: 

467# # if extra args are allowed, return all args 

468# return kwargs 

469# else: 

470# # save only the arguments listed in the function's docval (skip any others present in kwargs) 

471# ret_kwargs = dict() 

472# for arg in func_docval[__docval_args_loc]: 

473# val = kwargs.get(arg['name'], None) 

474# if val is not None: # do not return arguments that are not present or have value None 

475# ret_kwargs[arg['name']] = val 

476# return ret_kwargs 

477# else: 

478# raise ValueError('No docval found on %s' % str(func)) 

479 

480 

481def call_docval_func(func, kwargs): 

482 """Call the function with only the keyword arguments that are accepted by the function's docval. 

483 

484 Extra keyword arguments are not passed to the function unless the function's docval has allow_extra=True. 

485 """ 

486 warnings.warn("call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, " 

487 "call the function directly with the kwargs. Please note that call_docval_func " 

488 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that " 

489 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True " 

490 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.", 

491 PendingDeprecationWarning) 

492 with warnings.catch_warnings(record=True): 

493 # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two 

494 # PendingDeprecationWarnings saying the same thing are not raised 

495 warnings.simplefilter("ignore", UserWarning) 

496 warnings.simplefilter("always", PendingDeprecationWarning) 

497 fargs, fkwargs = fmt_docval_args(func, kwargs) 

498 

499 return func(*fargs, **fkwargs) 

500 

501 

502def __resolve_type(t): 

503 if t is None: 

504 return t 

505 if isinstance(t, str): 

506 if t in __macros: 

507 return tuple(__macros[t]) 

508 else: 

509 return t 

510 elif isinstance(t, type): 

511 return t 

512 elif isinstance(t, (list, tuple)): 

513 ret = list() 

514 for i in t: 

515 resolved = __resolve_type(i) 

516 if isinstance(resolved, tuple): 

517 ret.extend(resolved) 

518 else: 

519 ret.append(resolved) 

520 return tuple(ret) 

521 else: 

522 msg = "argtype must be a type, a str, a list, a tuple, or None - got %s" % type(t) 

523 raise ValueError(msg) 

524 

525 

526def __check_enum_argtype(argtype): 

527 """Return True/False whether the given argtype or list/tuple of argtypes is a supported docval enum type""" 

528 if isinstance(argtype, (list, tuple)): 

529 return all(x in __allowed_enum_types for x in argtype) 

530 return argtype in __allowed_enum_types 

531 

532 

533def docval(*validator, **options): # noqa: C901 

534 '''A decorator for documenting and enforcing type for instance method arguments. 

535 

536 This decorator takes a list of dictionaries that specify the method parameters. These 

537 dictionaries are used for enforcing type and building a Sphinx docstring. 

538 

539 The first arguments are dictionaries that specify the positional 

540 arguments and keyword arguments of the decorated function. These dictionaries 

541 must contain the following keys: ``'name'``, ``'type'``, and ``'doc'``. This will define a 

542 positional argument. To define a keyword argument, specify a default value 

543 using the key ``'default'``. To validate the dimensions of an input array 

544 add the optional ``'shape'`` parameter. To allow a None value for an argument, 

545 either the default value must be None or a different default value must be provided 

546 and ``'allow_none': True`` must be passed. 

547 

548 The decorated method must take ``self`` and ``**kwargs`` as arguments. 

549 

550 When using this decorator, the functions :py:func:`getargs` and 

551 :py:func:`popargs` can be used for easily extracting arguments from 

552 kwargs. 

553 

554 The following code example demonstrates the use of this decorator: 

555 

556 .. code-block:: python 

557 

558 @docval({'name': 'arg1':, 'type': str, 'doc': 'this is the first positional argument'}, 

559 {'name': 'arg2':, 'type': int, 'doc': 'this is the second positional argument'}, 

560 {'name': 'kwarg1':, 'type': (list, tuple), 'doc': 'this is a keyword argument', 'default': list()}, 

561 returns='foo object', rtype='Foo')) 

562 def foo(self, **kwargs): 

563 arg1, arg2, kwarg1 = getargs('arg1', 'arg2', 'kwarg1', **kwargs) 

564 ... 

565 

566 :param enforce_type: Enforce types of input parameters (Default=True) 

567 :param returns: String describing the return values 

568 :param rtype: String describing the data type of the return values 

569 :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True) 

570 :param enforce_shape: Enforce the dimensions of input arrays (Default=True) 

571 :param validator: :py:func:`dict` objects specifying the method parameters 

572 :param allow_extra: Allow extra arguments (Default=False) 

573 :param allow_positional: Allow positional arguments (Default=True) 

574 :param options: additional options for documenting and validating method parameters 

575 ''' 

576 enforce_type = options.pop('enforce_type', True) 

577 enforce_shape = options.pop('enforce_shape', True) 

578 returns = options.pop('returns', None) 

579 rtype = options.pop('rtype', None) 

580 is_method = options.pop('is_method', True) 

581 allow_extra = options.pop('allow_extra', False) 

582 allow_positional = options.pop('allow_positional', True) 

583 

584 def dec(func): 

585 _docval = _copy.copy(options) 

586 _docval['allow_extra'] = allow_extra 

587 _docval['allow_positional'] = allow_positional 

588 func.__name__ = _docval.get('func_name', func.__name__) 

589 func.__doc__ = _docval.get('doc', func.__doc__) 

590 pos = list() 

591 kw = list() 

592 for a in validator: 

593 # catch unsupported keys 

594 allowable_terms = ('name', 'doc', 'type', 'shape', 'enum', 'default', 'allow_none', 'help') 

595 unsupported_terms = set(a.keys()) - set(allowable_terms) 

596 if unsupported_terms: 

597 raise Exception('docval for {}: keys {} are not supported by docval'.format(a['name'], 

598 sorted(unsupported_terms))) 

599 # check that arg type is valid 

600 try: 

601 a['type'] = __resolve_type(a['type']) 

602 except Exception as e: 

603 msg = "docval for %s: error parsing argument type: %s" % (a['name'], e.args[0]) 

604 raise Exception(msg) 

605 if 'enum' in a: 

606 # check that value for enum key is a list or tuple (cannot have only one allowed value) 

607 if not isinstance(a['enum'], (list, tuple)): 

608 msg = ('docval for %s: enum value must be a list or tuple (received %s)' 

609 % (a['name'], type(a['enum']))) 

610 raise Exception(msg) 

611 # check that arg type is compatible with enum 

612 if not __check_enum_argtype(a['type']): 

613 msg = 'docval for {}: enum checking cannot be used with arg type {}'.format(a['name'], a['type']) 

614 raise Exception(msg) 

615 # check that enum allowed values are allowed by arg type 

616 if any([not __type_okay(x, a['type']) for x in a['enum']]): 

617 msg = ('docval for {}: enum values are of types not allowed by arg type (got {}, ' 

618 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type'])) 

619 raise Exception(msg) 

620 if a.get('allow_none', False) and 'default' not in a: 

621 msg = 'docval for {}: allow_none=True can only be set if a default value is provided.'.format(a['name']) 

622 raise Exception(msg) 

623 if 'default' in a: 

624 kw.append(a) 

625 else: 

626 pos.append(a) 

627 loc_val = pos + kw 

628 _docval[__docval_args_loc] = loc_val 

629 

630 def _check_args(args, kwargs): 

631 """Parse and check arguments to decorated function. Raise warnings and errors as appropriate.""" 

632 # this function was separated from func_call() in order to make stepping through lines of code using pdb 

633 # easier 

634 

635 parsed = __parse_args( 

636 loc_val, 

637 args[1:] if is_method else args, 

638 kwargs, 

639 enforce_type=enforce_type, 

640 enforce_shape=enforce_shape, 

641 allow_extra=allow_extra, 

642 allow_positional=allow_positional 

643 ) 

644 

645 parse_warnings = parsed.get('future_warnings') 

646 if parse_warnings: 

647 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_warnings)) 

648 warnings.warn(msg, FutureWarning) 

649 

650 for error_type, ExceptionType in (('type_errors', TypeError), 

651 ('value_errors', ValueError), 

652 ('syntax_errors', SyntaxError)): 

653 parse_err = parsed.get(error_type) 

654 if parse_err: 

655 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_err)) 

656 raise ExceptionType(msg) 

657 

658 return parsed['args'] 

659 

660 # this code is intentionally separated to make stepping through lines of code using pdb easier 

661 if is_method: 

662 def func_call(*args, **kwargs): 

663 pargs = _check_args(args, kwargs) 

664 return func(args[0], **pargs) 

665 else: 

666 def func_call(*args, **kwargs): 

667 pargs = _check_args(args, kwargs) 

668 return func(**pargs) 

669 

670 _rtype = rtype 

671 if isinstance(rtype, type): 

672 _rtype = rtype.__name__ 

673 docstring = __googledoc(func, _docval[__docval_args_loc], returns=returns, rtype=_rtype) 

674 docval_idx = {a['name']: a for a in _docval[__docval_args_loc]} # cache a name-indexed dictionary of args 

675 setattr(func_call, '__doc__', docstring) 

676 setattr(func_call, '__name__', func.__name__) 

677 setattr(func_call, docval_attr_name, _docval) 

678 setattr(func_call, docval_idx_name, docval_idx) 

679 setattr(func_call, '__module__', func.__module__) 

680 return func_call 

681 

682 return dec 

683 

684 

685def __sig_arg(argval): 

686 if 'default' in argval: 

687 default = argval['default'] 

688 if isinstance(default, str): 

689 default = "'%s'" % default 

690 else: 

691 default = str(default) 

692 return "%s=%s" % (argval['name'], default) 

693 else: 

694 return argval['name'] 

695 

696 

697def __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=None, returns=None, rtype=None): 

698 '''Generate a Spinxy docstring''' 

699 

700 def to_str(argtype): 

701 if isinstance(argtype, type): 

702 module = argtype.__module__ 

703 name = argtype.__name__ 

704 

705 if module.startswith("h5py") or module.startswith("pandas") or module.startswith("builtins"): 

706 return ":py:class:`~{name}`".format(name=name) 

707 else: 

708 return ":py:class:`~{module}.{name}`".format(name=name, module=module) 

709 return argtype 

710 

711 def __sphinx_arg(arg): 

712 fmt = dict() 

713 fmt['name'] = arg.get('name') 

714 fmt['doc'] = arg.get('doc') 

715 if isinstance(arg['type'], tuple) or isinstance(arg['type'], list): 

716 fmt['type'] = " or ".join(map(to_str, arg['type'])) 

717 else: 

718 fmt['type'] = to_str(arg['type']) 

719 return arg_fmt.format(**fmt) 

720 

721 sig = "%s(%s)\n\n" % (func.__name__, ", ".join(map(__sig_arg, validator))) 

722 desc = func.__doc__.strip() if func.__doc__ is not None else "" 

723 sig += docstring_fmt.format(description=desc, args="\n".join(map(__sphinx_arg, validator))) 

724 

725 if not (ret_fmt is None or returns is None or rtype is None): 

726 sig += ret_fmt.format(returns=returns, rtype=rtype) 

727 return sig 

728 

729 

730def __sphinxdoc(func, validator, returns=None, rtype=None): 

731 arg_fmt = (":param {name}: {doc}\n" 

732 ":type {name}: {type}") 

733 docstring_fmt = ("{description}\n\n" 

734 "{args}\n") 

735 ret_fmt = (":returns: {returns}\n" 

736 ":rtype: {rtype}") 

737 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype) 

738 

739 

740def __googledoc(func, validator, returns=None, rtype=None): 

741 arg_fmt = " {name} ({type}): {doc}" 

742 docstring_fmt = "{description}\n\n" 

743 if len(validator) > 0: 

744 docstring_fmt += "Args:\n{args}\n" 

745 ret_fmt = ("\nReturns:\n" 

746 " {rtype}: {returns}") 

747 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype) 

748 

749 

750def getargs(*argnames): 

751 """getargs(*argnames, argdict) 

752 Convenience function to retrieve arguments from a dictionary in batch. 

753 

754 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which 

755 to retrieve the values. 

756 

757 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this 

758 function or the last argument is not a dictionary 

759 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names 

760 """ 

761 if len(argnames) < 2: 

762 raise ValueError('Must supply at least one key and a dict') 

763 if not isinstance(argnames[-1], dict): 

764 raise ValueError('Last argument must be a dict') 

765 kwargs = argnames[-1] 

766 if len(argnames) == 2: 

767 if argnames[0] not in kwargs: 

768 raise ValueError("Argument not found in dict: '%s'" % argnames[0]) 

769 return kwargs.get(argnames[0]) 

770 ret = [] 

771 for arg in argnames[:-1]: 

772 if arg not in kwargs: 

773 raise ValueError("Argument not found in dict: '%s'" % arg) 

774 ret.append(kwargs.get(arg)) 

775 return ret 

776 

777 

778def popargs(*argnames): 

779 """popargs(*argnames, argdict) 

780 Convenience function to retrieve and remove arguments from a dictionary in batch. 

781 

782 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which 

783 to retrieve the values. 

784 

785 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this 

786 function or the last argument is not a dictionary 

787 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names 

788 """ 

789 if len(argnames) < 2: 

790 raise ValueError('Must supply at least one key and a dict') 

791 if not isinstance(argnames[-1], dict): 

792 raise ValueError('Last argument must be a dict') 

793 kwargs = argnames[-1] 

794 if len(argnames) == 2: 

795 try: 

796 ret = kwargs.pop(argnames[0]) 

797 except KeyError as ke: 

798 raise ValueError('Argument not found in dict: %s' % str(ke)) 

799 return ret 

800 try: 

801 ret = [kwargs.pop(arg) for arg in argnames[:-1]] 

802 except KeyError as ke: 

803 raise ValueError('Argument not found in dict: %s' % str(ke)) 

804 return ret 

805 

806 

807def popargs_to_dict(keys, argdict): 

808 """Convenience function to retrieve and remove arguments from a dictionary in batch into a dictionary. 

809 

810 Same as `{key: argdict.pop(key) for key in keys}` with a custom ValueError 

811 

812 :param keys: Iterable of keys to pull out of argdict 

813 :type keys: Iterable 

814 :param argdict: Dictionary to process 

815 :type dict: dict 

816 :raises ValueError: if an argument name is not found in the dictionary 

817 :return: a dict of arguments removed 

818 """ 

819 ret = dict() 

820 for arg in keys: 

821 try: 

822 ret[arg] = argdict.pop(arg) 

823 except KeyError as ke: 

824 raise ValueError('Argument not found in dict: %s' % str(ke)) 

825 return ret 

826 

827 

828class ExtenderMeta(ABCMeta): 

829 """A metaclass that will extend the base class initialization 

830 routine by executing additional functions defined in 

831 classes that use this metaclass 

832 

833 In general, this class should only be used by core developers. 

834 """ 

835 

836 __preinit = '__preinit' 

837 

838 @classmethod 

839 def pre_init(cls, func): 

840 setattr(func, cls.__preinit, True) 

841 return classmethod(func) 

842 

843 __postinit = '__postinit' 

844 

845 @classmethod 

846 def post_init(cls, func): 

847 '''A decorator for defining a routine to run after creation of a type object. 

848 

849 An example use of this method would be to define a classmethod that gathers 

850 any defined methods or attributes after the base Python type construction (i.e. after 

851 :py:func:`type` has been called) 

852 ''' 

853 setattr(func, cls.__postinit, True) 

854 return classmethod(func) 

855 

856 def __init__(cls, name, bases, classdict): 

857 it = (getattr(cls, n) for n in dir(cls)) 

858 it = (a for a in it if hasattr(a, cls.__preinit)) 

859 for func in it: 

860 func(name, bases, classdict) 

861 super().__init__(name, bases, classdict) 

862 it = (getattr(cls, n) for n in dir(cls)) 

863 it = (a for a in it if hasattr(a, cls.__postinit)) 

864 for func in it: 

865 func(name, bases, classdict) 

866 

867 

868def get_data_shape(data, strict_no_data_load=False): 

869 """ 

870 Helper function used to determine the shape of the given array. 

871 

872 In order to determine the shape of nested tuples, lists, and sets, this function 

873 recursively inspects elements along the dimensions, assuming that the data has a regular, 

874 rectangular shape. In the case of out-of-core iterators, this means that the first item 

875 along each dimension would potentially be loaded into memory. Set strict_no_data_load=True 

876 to enforce that this does not happen, at the cost that we may not be able to determine 

877 the shape of the array. 

878 

879 :param data: Array for which we should determine the shape. 

880 :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape. 

881 :param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default), 

882 the first element of data may be loaded into memory. 

883 :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown 

884 will be set to None. 

885 """ 

886 

887 def __get_shape_helper(local_data): 

888 shape = list() 

889 if hasattr(local_data, '__len__'): 

890 shape.append(len(local_data)) 

891 if len(local_data): 

892 el = next(iter(local_data)) 

893 if not isinstance(el, (str, bytes)): 

894 shape.extend(__get_shape_helper(el)) 

895 return tuple(shape) 

896 

897 # NOTE: data.maxshape will fail on empty h5py.Dataset without shape or maxshape. this will be fixed in h5py 3.0 

898 if hasattr(data, 'maxshape'): 

899 return data.maxshape 

900 if hasattr(data, 'shape') and data.shape is not None: 

901 return data.shape 

902 if isinstance(data, dict): 

903 return None 

904 if hasattr(data, '__len__') and not isinstance(data, (str, bytes)): 

905 if not strict_no_data_load or isinstance(data, (list, tuple, set)): 

906 return __get_shape_helper(data) 

907 return None 

908 

909 

910def pystr(s): 

911 """ 

912 Convert a string of characters to Python str object 

913 """ 

914 if isinstance(s, bytes): 

915 return s.decode('utf-8') 

916 else: 

917 return s 

918 

919 

920def to_uint_array(arr): 

921 """ 

922 Convert a numpy array or array-like object to a numpy array of unsigned integers with the same dtype itemsize. 

923 

924 For example, a list of int32 values is converted to a numpy array with dtype uint32. 

925 :raises ValueError: if input array contains values that are not unsigned integers or non-negative integers. 

926 """ 

927 if not isinstance(arr, np.ndarray): 

928 arr = np.array(arr) 

929 if np.issubdtype(arr.dtype, np.unsignedinteger): 

930 return arr 

931 if np.issubdtype(arr.dtype, np.integer): 

932 if (arr < 0).any(): 

933 raise ValueError('Cannot convert negative integer values to uint.') 

934 dt = np.dtype('uint' + str(int(arr.dtype.itemsize*8))) # keep precision 

935 return arr.astype(dt) 

936 raise ValueError('Cannot convert array of dtype %s to uint.' % arr.dtype) 

937 

938 

939class LabelledDict(dict): 

940 """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items. 

941 

942 For example, if the key attribute is set as 'name' in __init__, then all objects added to the LabelledDict must have 

943 a 'name' attribute and a particular object in the LabelledDict can be accessed using the syntax ['object_name'] if 

944 the object.name == 'object_name'. In this way, LabelledDict acts like a set where values can be retrieved using 

945 square brackets around the value of the key attribute. An 'add' method makes clear the association between the key 

946 attribute of the LabelledDict and the values of the LabelledDict. 

947 

948 LabelledDict also supports retrieval of values with the syntax my_dict['attr == val'], which returns a set of 

949 objects in the LabelledDict which have an attribute 'attr' with a string value 'val'. If no objects match that 

950 condition, a KeyError is raised. Note that if 'attr' equals the key attribute, then the single matching value is 

951 returned, not a set. 

952 

953 LabelledDict does not support changing items that have already been set. A TypeError will be raised when using 

954 __setitem__ on keys that already exist in the dict. The setdefault and update methods are not supported. A 

955 TypeError will be raised when these are called. 

956 

957 A callable function may be passed to the constructor to be run on an item after adding it to this dict using 

958 the __setitem__ and add methods. 

959 

960 A callable function may be passed to the constructor to be run on an item after removing it from this dict using 

961 the __delitem__ (the del operator), pop, and popitem methods. It will also be run on each removed item when using 

962 the clear method. 

963 

964 Usage: 

965 LabelledDict(label='my_objects', key_attr='name') 

966 my_dict[obj.name] = obj 

967 my_dict.add(obj) # simpler syntax 

968 

969 Example: 

970 # MyTestClass is a class with attributes 'prop1' and 'prop2'. MyTestClass.__init__ sets those attributes. 

971 ld = LabelledDict(label='all_objects', key_attr='prop1') 

972 obj1 = MyTestClass('a', 'b') 

973 obj2 = MyTestClass('d', 'b') 

974 ld[obj1.prop1] = obj1 # obj1 is added to the LabelledDict with the key obj1.prop1. Any other key is not allowed. 

975 ld.add(obj2) # Simpler 'add' syntax enforces the required relationship 

976 ld['a'] # Returns obj1 

977 ld['prop1 == a'] # Also returns obj1 

978 ld['prop2 == b'] # Returns set([obj1, obj2]) - the set of all values v in ld where v.prop2 == 'b' 

979 """ 

980 

981 @docval({'name': 'label', 'type': str, 'doc': 'the label on this dictionary'}, 

982 {'name': 'key_attr', 'type': str, 'doc': 'the attribute name to use as the key', 'default': 'name'}, 

983 {'name': 'add_callable', 'type': types.FunctionType, 

984 'doc': 'function to call on an element after adding it to this dict using the add or __setitem__ methods', 

985 'default': None}, 

986 {'name': 'remove_callable', 'type': types.FunctionType, 

987 'doc': ('function to call on an element after removing it from this dict using the pop, popitem, clear, ' 

988 'or __delitem__ methods'), 

989 'default': None}) 

990 def __init__(self, **kwargs): 

991 label, key_attr, add_callable, remove_callable = getargs('label', 'key_attr', 'add_callable', 'remove_callable', 

992 kwargs) 

993 self.__label = label 

994 self.__key_attr = key_attr 

995 self.__add_callable = add_callable 

996 self.__remove_callable = remove_callable 

997 

998 @property 

999 def label(self): 

1000 """Return the label of this LabelledDict""" 

1001 return self.__label 

1002 

1003 @property 

1004 def key_attr(self): 

1005 """Return the attribute used as the key for values in this LabelledDict""" 

1006 return self.__key_attr 

1007 

1008 def __getitem__(self, args): 

1009 """Get a value from the LabelledDict with the given key. 

1010 

1011 Supports syntax my_dict['attr == val'], which returns a set of objects in the LabelledDict which have an 

1012 attribute 'attr' with a string value 'val'. If no objects match that condition, an empty set is returned. 

1013 Note that if 'attr' equals the key attribute of this LabelledDict, then the single matching value is 

1014 returned, not a set. 

1015 """ 

1016 key = args 

1017 if '==' in args: 

1018 key, val = args.split("==") 

1019 key = key.strip() 

1020 val = val.strip() # val is a string 

1021 if not key: 

1022 raise ValueError("An attribute name is required before '=='.") 

1023 if not val: 

1024 raise ValueError("A value is required after '=='.") 

1025 if key != self.key_attr: 

1026 ret = set() 

1027 for item in self.values(): 

1028 if getattr(item, key, None) == val: 

1029 ret.add(item) 

1030 return ret 

1031 else: 

1032 return super().__getitem__(val) 

1033 else: 

1034 return super().__getitem__(key) 

1035 

1036 def __setitem__(self, key, value): 

1037 """Set a value in the LabelledDict with the given key. The key must equal value.key_attr. 

1038 

1039 See LabelledDict.add for a simpler syntax since the key is redundant. 

1040 Raises TypeError is key already exists. 

1041 Raises ValueError if value does not have attribute key_attr. 

1042 """ 

1043 if key in self: 

1044 raise TypeError("Key '%s' is already in this dict. Cannot reset items in a %s." 

1045 % (key, self.__class__.__name__)) 

1046 self.__check_value(value) 

1047 if key != getattr(value, self.key_attr): 

1048 raise KeyError("Key '%s' must equal attribute '%s' of '%s'." % (key, self.key_attr, value)) 

1049 super().__setitem__(key, value) 

1050 if self.__add_callable: 

1051 self.__add_callable(value) 

1052 

1053 def add(self, value): 

1054 """Add a value to the dict with the key value.key_attr. 

1055 

1056 Raises ValueError if value does not have attribute key_attr. 

1057 """ 

1058 self.__check_value(value) 

1059 self.__setitem__(getattr(value, self.key_attr), value) 

1060 

1061 def __check_value(self, value): 

1062 if not hasattr(value, self.key_attr): 

1063 raise ValueError("Cannot set value '%s' in %s. Value must have attribute '%s'." 

1064 % (value, self.__class__.__name__, self.key_attr)) 

1065 

1066 def pop(self, k): 

1067 """Remove an item that matches the key. If remove_callable was initialized, call that on the returned value.""" 

1068 ret = super().pop(k) 

1069 if self.__remove_callable: 

1070 self.__remove_callable(ret) 

1071 return ret 

1072 

1073 def popitem(self): 

1074 """Remove the last added item. If remove_callable was initialized, call that on the returned value. 

1075 

1076 Note: popitem returns a tuple (key, value) but the remove_callable will be called only on the value. 

1077 

1078 Note: in Python 3.5 and earlier, dictionaries are not ordered, so popitem removes an arbitrary item. 

1079 """ 

1080 ret = super().popitem() 

1081 if self.__remove_callable: 

1082 self.__remove_callable(ret[1]) # execute callable only on dict value 

1083 return ret 

1084 

1085 def clear(self): 

1086 """Remove all items. If remove_callable was initialized, call that on each returned value. 

1087 

1088 The order of removal depends on the popitem method. 

1089 """ 

1090 while len(self): 

1091 self.popitem() 

1092 

1093 def __delitem__(self, k): 

1094 """Remove an item that matches the key. If remove_callable was initialized, call that on the matching value.""" 

1095 item = self[k] 

1096 super().__delitem__(k) 

1097 if self.__remove_callable: 

1098 self.__remove_callable(item) 

1099 

1100 def setdefault(self, k): 

1101 """setdefault is not supported. A TypeError will be raised.""" 

1102 raise TypeError('setdefault is not supported for %s' % self.__class__.__name__) 

1103 

1104 def update(self, other): 

1105 """update is not supported. A TypeError will be raised.""" 

1106 raise TypeError('update is not supported for %s' % self.__class__.__name__) 

1107 

1108 

1109@docval_macro('array_data') 

1110class StrDataset(h5py.Dataset): 

1111 """Wrapper to decode strings on reading the dataset""" 

1112 def __init__(self, dset, encoding, errors='strict'): 

1113 self.dset = dset 

1114 if encoding is None: 1114 ↛ 1116line 1114 didn't jump to line 1116, because the condition on line 1114 was never false

1115 encoding = h5py.h5t.check_string_dtype(dset.dtype).encoding 

1116 self.encoding = encoding 

1117 self.errors = errors 

1118 

1119 def __getattr__(self, name): 

1120 return getattr(self.dset, name) 

1121 

1122 def __repr__(self): 

1123 return '<StrDataset for %s>' % repr(self.dset)[1:-1] 

1124 

1125 def __len__(self): 

1126 return len(self.dset) 

1127 

1128 def __getitem__(self, args): 

1129 bytes_arr = self.dset[args] 

1130 # numpy.char.decode() seems like the obvious thing to use. But it only 

1131 # accepts numpy string arrays, not object arrays of bytes (which we 

1132 # return from HDF5 variable-length strings). And the numpy 

1133 # implementation is not faster than doing it with a loop; in fact, by 

1134 # not converting the result to a numpy unicode array, the 

1135 # naive way can be faster! (Comparing with numpy 1.18.4, June 2020) 

1136 if np.isscalar(bytes_arr): 

1137 return bytes_arr.decode(self.encoding, self.errors) 

1138 

1139 return np.array([ 

1140 b.decode(self.encoding, self.errors) for b in bytes_arr.flat 

1141 ], dtype=object).reshape(bytes_arr.shape)