Coverage for src/hdmf/utils.py: 97%
586 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
1import collections
2import copy as _copy
3import types
4import warnings
5from abc import ABCMeta
6from enum import Enum
8import h5py
9import numpy as np
12__macros = {
13 'array_data': [np.ndarray, list, tuple, h5py.Dataset],
14 'scalar_data': [str, int, float, bytes, bool],
15 'data': []
16}
18try:
19 # optionally accept zarr.Array as array data to support conversion of data from Zarr to HDMF
20 import zarr
21 __macros['array_data'].append(zarr.Array)
22except ImportError:
23 pass
26# code to signify how to handle positional arguments in docval
27AllowPositional = Enum('AllowPositional', 'ALLOWED WARNING ERROR')
29__supported_bool_types = (bool, np.bool_)
30__supported_uint_types = (np.uint8, np.uint16, np.uint32, np.uint64)
31__supported_int_types = (int, np.int8, np.int16, np.int32, np.int64)
32__supported_float_types = [float, np.float16, np.float32, np.float64]
33if hasattr(np, "float128"): # pragma: no cover
34 __supported_float_types.append(np.float128)
35if hasattr(np, "longdouble"): # pragma: no cover
36 # on windows python<=3.5, h5py floats resolve float64s as either np.float64 or np.longdouble
37 # non-deterministically. a future version of h5py will fix this. see #112
38 __supported_float_types.append(np.longdouble)
39__supported_float_types = tuple(__supported_float_types)
40__allowed_enum_types = (__supported_bool_types + __supported_uint_types + __supported_int_types
41 + __supported_float_types + (str,))
44def docval_macro(macro):
45 """Class decorator to add the class to a list of types associated with the key macro in the __macros dict
46 """
48 def _dec(cls):
49 if macro not in __macros:
50 __macros[macro] = list()
51 __macros[macro].append(cls)
52 return cls
54 return _dec
57def get_docval_macro(key=None):
58 """
59 Return a deepcopy of the docval macros, i.e., strings that represent a customizable list of types for use in docval.
61 :param key: Name of the macro. If key=None, then a dictionary of all macros is returned. Otherwise, a tuple of
62 the types associated with the key is returned.
63 """
64 if key is None:
65 return _copy.deepcopy(__macros)
66 else:
67 return tuple(__macros[key])
70def __type_okay(value, argtype, allow_none=False):
71 """Check a value against a type
73 The difference between this function and :py:func:`isinstance` is that
74 it allows specifying a type as a string. Furthermore, strings allow for specifying more general
75 types, such as a simple numeric type (i.e. ``argtype``="num").
77 Args:
78 value (any): the value to check
79 argtype (type, str): the type to check for
80 allow_none (bool): whether or not to allow None as a valid value
83 Returns:
84 bool: True if value is a valid instance of argtype
85 """
86 if value is None:
87 return allow_none
88 if isinstance(argtype, str):
89 if argtype in __macros: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true
90 return __type_okay(value, __macros[argtype], allow_none=allow_none)
91 elif argtype == 'uint':
92 return __is_uint(value)
93 elif argtype == 'int':
94 return __is_int(value)
95 elif argtype == 'float':
96 return __is_float(value)
97 elif argtype == 'bool':
98 return __is_bool(value)
99 return argtype in [cls.__name__ for cls in value.__class__.__mro__]
100 elif isinstance(argtype, type):
101 if argtype is int:
102 return __is_int(value)
103 elif argtype is float:
104 return __is_float(value)
105 elif argtype is bool:
106 return __is_bool(value)
107 return isinstance(value, argtype)
108 elif isinstance(argtype, tuple) or isinstance(argtype, list):
109 return any(__type_okay(value, i) for i in argtype)
110 else: # argtype is None
111 return True
114def __shape_okay_multi(value, argshape):
115 if type(argshape[0]) in (tuple, list): # if multiple shapes are present
116 return any(__shape_okay(value, a) for a in argshape)
117 else:
118 return __shape_okay(value, argshape)
121def __shape_okay(value, argshape):
122 valshape = get_data_shape(value)
123 if not len(valshape) == len(argshape):
124 return False
125 for a, b in zip(valshape, argshape):
126 if b not in (a, None):
127 return False
128 return True
131def __is_uint(value):
132 return isinstance(value, __supported_uint_types)
135def __is_int(value):
136 return isinstance(value, __supported_int_types)
139def __is_float(value):
140 return isinstance(value, __supported_float_types)
143def __is_bool(value):
144 return isinstance(value, __supported_bool_types)
147def __format_type(argtype):
148 if isinstance(argtype, str):
149 return argtype
150 elif isinstance(argtype, type):
151 return argtype.__name__
152 elif isinstance(argtype, tuple) or isinstance(argtype, list): 152 ↛ 158line 152 didn't jump to line 158, because the condition on line 152 was never false
153 types = [__format_type(i) for i in argtype]
154 if len(types) > 1: 154 ↛ 157line 154 didn't jump to line 157, because the condition on line 154 was never false
155 return "%s or %s" % (", ".join(types[:-1]), types[-1])
156 else:
157 return types[0]
158 elif argtype is None:
159 return "any type"
160 else:
161 raise ValueError("argtype must be a type, str, list, or tuple")
164def __check_enum(argval, arg):
165 """
166 Helper function to check whether the given argument value validates against the enum specification.
168 :param argval: argument value passed to the function/method
169 :param arg: argument validator - the specification dictionary for this argument
171 :return: None if the value validates successfully, error message if the value does not.
172 """
173 if argval not in arg['enum']:
174 return "forbidden value for '{}' (got {}, expected {})".format(arg['name'], __fmt_str_quotes(argval),
175 arg['enum'])
178def __fmt_str_quotes(x):
179 """Return a string or list of strings where the input string or list of strings have single quotes around strings"""
180 if isinstance(x, (list, tuple)): 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true
181 return '{}'.format(x)
182 if isinstance(x, str):
183 return "'%s'" % x
184 return str(x)
187def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, allow_extra=False, # noqa: C901
188 allow_positional=AllowPositional.ALLOWED):
189 """
190 Internal helper function used by the docval decorator to parse and validate function arguments
192 :param validator: List of dicts from docval with the description of the arguments
193 :param args: List of the values of positional arguments supplied by the caller
194 :param kwargs: Dict keyword arguments supplied by the caller where keys are the argument name and
195 values are the argument value.
196 :param enforce_type: Boolean indicating whether the type of arguments should be enforced
197 :param enforce_shape: Boolean indicating whether the dimensions of array arguments
198 should be enforced if possible.
199 :param allow_extra: Boolean indicating whether extra keyword arguments are allowed (if False and extra keyword
200 arguments are specified, then an error is raised).
201 :param allow_positional: integer code indicating whether positional arguments are allowed:
202 AllowPositional.ALLOWED: positional arguments are allowed
203 AllowPositional.WARNING: return warning if positional arguments are supplied
204 AllowPositional.ERROR: return error if positional arguments are supplied
206 :return: Dict with:
207 * 'args' : Dict all arguments where keys are the names and values are the values of the arguments.
208 * 'errors' : List of string with error messages
209 """
210 ret = dict()
211 syntax_errors = list()
212 type_errors = list()
213 value_errors = list()
214 future_warnings = list()
215 argsi = 0
216 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises
218 try:
219 # check for duplicates in docval
220 names = [x['name'] for x in validator]
221 duplicated = [item for item, count in collections.Counter(names).items()
222 if count > 1]
223 if duplicated:
224 raise ValueError(
225 'The following names are duplicated: {}'.format(duplicated))
227 if allow_extra: # extra keyword arguments are allowed so do not consider them when checking number of args
228 if len(args) > len(validator):
229 raise TypeError(
230 'Expected at most %d arguments %r, got %d positional' % (len(validator), names, len(args))
231 )
232 else: # allow for keyword args
233 if len(args) + len(kwargs) > len(validator):
234 raise TypeError(
235 'Expected at most %d arguments %r, got %d: %d positional and %d keyword %s'
236 % (len(validator), names, len(args) + len(kwargs), len(args), len(kwargs), sorted(kwargs))
237 )
239 if args:
240 if allow_positional == AllowPositional.WARNING:
241 msg = ('Using positional arguments for this method is discouraged and will be deprecated '
242 'in a future major release. Please use keyword arguments to ensure future compatibility.')
243 future_warnings.append(msg)
244 elif allow_positional == AllowPositional.ERROR:
245 msg = 'Only keyword arguments (e.g., func(argname=value, ...)) are allowed for this method.'
246 syntax_errors.append(msg)
248 # iterate through the docval specification and find a matching value in args / kwargs
249 it = iter(validator)
250 arg = next(it)
252 # process positional arguments of the docval specification (no default value)
253 extras = dict(kwargs)
254 while True:
255 if 'default' in arg:
256 break
257 argname = arg['name']
258 argval_set = False
259 if argname in kwargs:
260 # if this positional arg is specified by a keyword arg and there are remaining positional args that
261 # have not yet been matched, then it is undetermined what those positional args match to. thus, raise
262 # an error
263 if argsi < len(args):
264 type_errors.append("got multiple values for argument '%s'" % argname)
265 argval = kwargs.get(argname)
266 extras.pop(argname, None)
267 argval_set = True
268 elif argsi < len(args):
269 argval = args[argsi]
270 argval_set = True
272 if not argval_set:
273 type_errors.append("missing argument '%s'" % argname)
274 else:
275 if enforce_type: 275 ↛ 283line 275 didn't jump to line 283, because the condition on line 275 was never false
276 if not __type_okay(argval, arg['type']):
277 if argval is None:
278 fmt_val = (argname, __format_type(arg['type']))
279 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val)
280 else:
281 fmt_val = (argname, type(argval).__name__, __format_type(arg['type']))
282 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val)
283 if enforce_shape and 'shape' in arg:
284 valshape = get_data_shape(argval)
285 while valshape is None:
286 if argval is None:
287 break
288 if not hasattr(argval, argname):
289 fmt_val = (argval, argname, arg['shape'])
290 value_errors.append("cannot check shape of object '%s' for argument '%s' "
291 "(expected shape '%s')" % fmt_val)
292 break
293 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked
294 argval = getattr(argval, argname)
295 valshape = get_data_shape(argval)
296 if valshape is not None and not __shape_okay_multi(argval, arg['shape']):
297 fmt_val = (argname, valshape, arg['shape'])
298 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val)
299 if 'enum' in arg:
300 err = __check_enum(argval, arg)
301 if err:
302 value_errors.append(err)
304 ret[argname] = argval
305 argsi += 1
306 arg = next(it)
308 # process arguments of the docval specification with a default value
309 # NOTE: the default value will be deepcopied, so 'default': list() is safe unlike in normal python
310 while True:
311 argname = arg['name']
312 if argname in kwargs:
313 ret[argname] = kwargs.get(argname)
314 extras.pop(argname, None)
315 elif len(args) > argsi:
316 ret[argname] = args[argsi]
317 argsi += 1
318 else:
319 ret[argname] = _copy.deepcopy(arg['default'])
320 argval = ret[argname]
321 if enforce_type: 321 ↛ 329line 321 didn't jump to line 329, because the condition on line 321 was never false
322 if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)):
323 if argval is None and arg['default'] is None: 323 ↛ 324line 323 didn't jump to line 324, because the condition on line 323 was never true
324 fmt_val = (argname, __format_type(arg['type']))
325 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val)
326 else:
327 fmt_val = (argname, type(argval).__name__, __format_type(arg['type']))
328 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val)
329 if enforce_shape and 'shape' in arg and argval is not None:
330 valshape = get_data_shape(argval)
331 while valshape is None:
332 if argval is None:
333 break
334 if not hasattr(argval, argname):
335 fmt_val = (argval, argname, arg['shape'])
336 value_errors.append("cannot check shape of object '%s' for argument '%s' (expected shape '%s')"
337 % fmt_val)
338 break
339 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked
340 argval = getattr(argval, argname)
341 valshape = get_data_shape(argval)
342 if valshape is not None and not __shape_okay_multi(argval, arg['shape']):
343 fmt_val = (argname, valshape, arg['shape'])
344 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val)
345 if 'enum' in arg and argval is not None:
346 err = __check_enum(argval, arg)
347 if err:
348 value_errors.append(err)
350 arg = next(it)
351 except StopIteration:
352 pass
353 except TypeError as e:
354 type_errors.append(str(e))
355 except ValueError as e:
356 value_errors.append(str(e))
358 if not allow_extra:
359 for key in extras.keys():
360 type_errors.append("unrecognized argument: '%s'" % key)
361 else:
362 # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args.
363 # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out
364 for key in extras.keys():
365 ret[key] = extras[key]
366 return {'args': ret, 'future_warnings': future_warnings, 'type_errors': type_errors, 'value_errors': value_errors,
367 'syntax_errors': syntax_errors}
370docval_idx_name = '__dv_idx__'
371docval_attr_name = '__docval__'
372__docval_args_loc = 'args'
375def get_docval(func, *args):
376 '''Get a copy of docval arguments for a function.
377 If args are supplied, return only docval arguments with value for 'name' key equal to the args
378 '''
379 func_docval = getattr(func, docval_attr_name, None)
380 if func_docval:
381 if args:
382 docval_idx = getattr(func, docval_idx_name, None)
383 try:
384 return tuple(docval_idx[name] for name in args)
385 except KeyError as ke:
386 raise ValueError('Function %s does not have docval argument %s' % (func.__name__, str(ke)))
387 return tuple(func_docval[__docval_args_loc])
388 else:
389 if args:
390 raise ValueError('Function %s has no docval arguments' % func.__name__)
391 return tuple()
394# def docval_wrap(func, is_method=True):
395# if is_method:
396# @docval(*get_docval(func))
397# def method(self, **kwargs):
398#
399# return call_docval_args(func, kwargs)
400# return method
401# else:
402# @docval(*get_docval(func))
403# def static_method(**kwargs):
404# return call_docval_args(func, kwargs)
405# return method
408def fmt_docval_args(func, kwargs):
409 ''' Separate positional and keyword arguments
411 Useful for methods that wrap other methods
412 '''
413 warnings.warn("fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, "
414 "call the function directly with the kwargs. Please note that fmt_docval_args "
415 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that "
416 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True "
417 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.",
418 PendingDeprecationWarning)
419 func_docval = getattr(func, docval_attr_name, None)
420 ret_args = list()
421 ret_kwargs = dict()
422 kwargs_copy = _copy.copy(kwargs)
423 if func_docval:
424 for arg in func_docval[__docval_args_loc]:
425 val = kwargs_copy.pop(arg['name'], None)
426 if 'default' in arg:
427 if val is not None: 427 ↛ 424line 427 didn't jump to line 424, because the condition on line 427 was never false
428 ret_kwargs[arg['name']] = val
429 else:
430 ret_args.append(val)
431 if func_docval['allow_extra']:
432 ret_kwargs.update(kwargs_copy)
433 else:
434 raise ValueError('no docval found on %s' % str(func))
435 return ret_args, ret_kwargs
438# def _remove_extra_args(func, kwargs):
439# """Return a dict of only the keyword arguments that are accepted by the function's docval.
440#
441# If the docval specifies allow_extra=True, then the original kwargs are returned.
442# """
443# # NOTE: this has the same functionality as the to-be-deprecated fmt_docval_args except that
444# # kwargs are kept as kwargs instead of parsed into args and kwargs
445# func_docval = getattr(func, docval_attr_name, None)
446# if func_docval:
447# if func_docval['allow_extra']:
448# # if extra args are allowed, return all args
449# return kwargs
450# else:
451# # save only the arguments listed in the function's docval (skip any others present in kwargs)
452# ret_kwargs = dict()
453# for arg in func_docval[__docval_args_loc]:
454# val = kwargs.get(arg['name'], None)
455# if val is not None: # do not return arguments that are not present or have value None
456# ret_kwargs[arg['name']] = val
457# return ret_kwargs
458# else:
459# raise ValueError('No docval found on %s' % str(func))
462def call_docval_func(func, kwargs):
463 """Call the function with only the keyword arguments that are accepted by the function's docval.
465 Extra keyword arguments are not passed to the function unless the function's docval has allow_extra=True.
466 """
467 warnings.warn("call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, "
468 "call the function directly with the kwargs. Please note that call_docval_func "
469 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that "
470 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True "
471 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.",
472 PendingDeprecationWarning)
473 with warnings.catch_warnings(record=True):
474 # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two
475 # PendingDeprecationWarnings saying the same thing are not raised
476 warnings.simplefilter("ignore", UserWarning)
477 warnings.simplefilter("always", PendingDeprecationWarning)
478 fargs, fkwargs = fmt_docval_args(func, kwargs)
480 return func(*fargs, **fkwargs)
483def __resolve_type(t):
484 if t is None:
485 return t
486 if isinstance(t, str):
487 if t in __macros:
488 return tuple(__macros[t])
489 else:
490 return t
491 elif isinstance(t, type):
492 return t
493 elif isinstance(t, (list, tuple)):
494 ret = list()
495 for i in t:
496 resolved = __resolve_type(i)
497 if isinstance(resolved, tuple):
498 ret.extend(resolved)
499 else:
500 ret.append(resolved)
501 return tuple(ret)
502 else:
503 msg = "argtype must be a type, a str, a list, a tuple, or None - got %s" % type(t)
504 raise ValueError(msg)
507def __check_enum_argtype(argtype):
508 """Return True/False whether the given argtype or list/tuple of argtypes is a supported docval enum type"""
509 if isinstance(argtype, (list, tuple)):
510 return all(x in __allowed_enum_types for x in argtype)
511 return argtype in __allowed_enum_types
514def docval(*validator, **options): # noqa: C901
515 '''A decorator for documenting and enforcing type for instance method arguments.
517 This decorator takes a list of dictionaries that specify the method parameters. These
518 dictionaries are used for enforcing type and building a Sphinx docstring.
520 The first arguments are dictionaries that specify the positional
521 arguments and keyword arguments of the decorated function. These dictionaries
522 must contain the following keys: ``'name'``, ``'type'``, and ``'doc'``. This will define a
523 positional argument. To define a keyword argument, specify a default value
524 using the key ``'default'``. To validate the dimensions of an input array
525 add the optional ``'shape'`` parameter. To allow a None value for an argument,
526 either the default value must be None or a different default value must be provided
527 and ``'allow_none': True`` must be passed.
529 The decorated method must take ``self`` and ``**kwargs`` as arguments.
531 When using this decorator, the functions :py:func:`getargs` and
532 :py:func:`popargs` can be used for easily extracting arguments from
533 kwargs.
535 The following code example demonstrates the use of this decorator:
537 .. code-block:: python
539 @docval({'name': 'arg1':, 'type': str, 'doc': 'this is the first positional argument'},
540 {'name': 'arg2':, 'type': int, 'doc': 'this is the second positional argument'},
541 {'name': 'kwarg1':, 'type': (list, tuple), 'doc': 'this is a keyword argument', 'default': list()},
542 returns='foo object', rtype='Foo'))
543 def foo(self, **kwargs):
544 arg1, arg2, kwarg1 = getargs('arg1', 'arg2', 'kwarg1', **kwargs)
545 ...
547 :param enforce_type: Enforce types of input parameters (Default=True)
548 :param returns: String describing the return values
549 :param rtype: String describing the data type of the return values
550 :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True)
551 :param enforce_shape: Enforce the dimensions of input arrays (Default=True)
552 :param validator: :py:func:`dict` objects specifying the method parameters
553 :param allow_extra: Allow extra arguments (Default=False)
554 :param allow_positional: Allow positional arguments (Default=True)
555 :param options: additional options for documenting and validating method parameters
556 '''
557 enforce_type = options.pop('enforce_type', True)
558 enforce_shape = options.pop('enforce_shape', True)
559 returns = options.pop('returns', None)
560 rtype = options.pop('rtype', None)
561 is_method = options.pop('is_method', True)
562 allow_extra = options.pop('allow_extra', False)
563 allow_positional = options.pop('allow_positional', True)
565 def dec(func):
566 _docval = _copy.copy(options)
567 _docval['allow_extra'] = allow_extra
568 _docval['allow_positional'] = allow_positional
569 func.__name__ = _docval.get('func_name', func.__name__)
570 func.__doc__ = _docval.get('doc', func.__doc__)
571 pos = list()
572 kw = list()
573 for a in validator:
574 # catch unsupported keys
575 allowable_terms = ('name', 'doc', 'type', 'shape', 'enum', 'default', 'allow_none', 'help')
576 unsupported_terms = set(a.keys()) - set(allowable_terms)
577 if unsupported_terms:
578 raise Exception('docval for {}: keys {} are not supported by docval'.format(a['name'],
579 sorted(unsupported_terms)))
580 # check that arg type is valid
581 try:
582 a['type'] = __resolve_type(a['type'])
583 except Exception as e:
584 msg = "docval for %s: error parsing argument type: %s" % (a['name'], e.args[0])
585 raise Exception(msg)
586 if 'enum' in a:
587 # check that value for enum key is a list or tuple (cannot have only one allowed value)
588 if not isinstance(a['enum'], (list, tuple)):
589 msg = ('docval for %s: enum value must be a list or tuple (received %s)'
590 % (a['name'], type(a['enum'])))
591 raise Exception(msg)
592 # check that arg type is compatible with enum
593 if not __check_enum_argtype(a['type']):
594 msg = 'docval for {}: enum checking cannot be used with arg type {}'.format(a['name'], a['type'])
595 raise Exception(msg)
596 # check that enum allowed values are allowed by arg type
597 if any([not __type_okay(x, a['type']) for x in a['enum']]):
598 msg = ('docval for {}: enum values are of types not allowed by arg type (got {}, '
599 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type']))
600 raise Exception(msg)
601 if a.get('allow_none', False) and 'default' not in a:
602 msg = 'docval for {}: allow_none=True can only be set if a default value is provided.'.format(a['name'])
603 raise Exception(msg)
604 if 'default' in a:
605 kw.append(a)
606 else:
607 pos.append(a)
608 loc_val = pos + kw
609 _docval[__docval_args_loc] = loc_val
611 def _check_args(args, kwargs):
612 """Parse and check arguments to decorated function. Raise warnings and errors as appropriate."""
613 # this function was separated from func_call() in order to make stepping through lines of code using pdb
614 # easier
615 parsed = __parse_args(
616 loc_val,
617 args[1:] if is_method else args,
618 kwargs,
619 enforce_type=enforce_type,
620 enforce_shape=enforce_shape,
621 allow_extra=allow_extra,
622 allow_positional=allow_positional
623 )
625 parse_warnings = parsed.get('future_warnings')
626 if parse_warnings:
627 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_warnings))
628 warnings.warn(msg, FutureWarning)
630 for error_type, ExceptionType in (('type_errors', TypeError),
631 ('value_errors', ValueError),
632 ('syntax_errors', SyntaxError)):
633 parse_err = parsed.get(error_type)
634 if parse_err:
635 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_err))
636 raise ExceptionType(msg)
638 return parsed['args']
640 # this code is intentionally separated to make stepping through lines of code using pdb easier
641 if is_method:
642 def func_call(*args, **kwargs):
643 pargs = _check_args(args, kwargs)
644 return func(args[0], **pargs)
645 else:
646 def func_call(*args, **kwargs):
647 pargs = _check_args(args, kwargs)
648 return func(**pargs)
650 _rtype = rtype
651 if isinstance(rtype, type):
652 _rtype = rtype.__name__
653 docstring = __googledoc(func, _docval[__docval_args_loc], returns=returns, rtype=_rtype)
654 docval_idx = {a['name']: a for a in _docval[__docval_args_loc]} # cache a name-indexed dictionary of args
655 setattr(func_call, '__doc__', docstring)
656 setattr(func_call, '__name__', func.__name__)
657 setattr(func_call, docval_attr_name, _docval)
658 setattr(func_call, docval_idx_name, docval_idx)
659 setattr(func_call, '__module__', func.__module__)
660 return func_call
662 return dec
665def __sig_arg(argval):
666 if 'default' in argval:
667 default = argval['default']
668 if isinstance(default, str):
669 default = "'%s'" % default
670 else:
671 default = str(default)
672 return "%s=%s" % (argval['name'], default)
673 else:
674 return argval['name']
677def __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=None, returns=None, rtype=None):
678 '''Generate a Spinxy docstring'''
680 def to_str(argtype):
681 if isinstance(argtype, type):
682 module = argtype.__module__
683 name = argtype.__name__
685 if module.startswith("h5py") or module.startswith("pandas") or module.startswith("builtins"):
686 return ":py:class:`~{name}`".format(name=name)
687 else:
688 return ":py:class:`~{module}.{name}`".format(name=name, module=module)
689 return argtype
691 def __sphinx_arg(arg):
692 fmt = dict()
693 fmt['name'] = arg.get('name')
694 fmt['doc'] = arg.get('doc')
695 if isinstance(arg['type'], tuple) or isinstance(arg['type'], list):
696 fmt['type'] = " or ".join(map(to_str, arg['type']))
697 else:
698 fmt['type'] = to_str(arg['type'])
699 return arg_fmt.format(**fmt)
701 sig = "%s(%s)\n\n" % (func.__name__, ", ".join(map(__sig_arg, validator)))
702 desc = func.__doc__.strip() if func.__doc__ is not None else ""
703 sig += docstring_fmt.format(description=desc, args="\n".join(map(__sphinx_arg, validator)))
705 if not (ret_fmt is None or returns is None or rtype is None):
706 sig += ret_fmt.format(returns=returns, rtype=rtype)
707 return sig
710def __sphinxdoc(func, validator, returns=None, rtype=None):
711 arg_fmt = (":param {name}: {doc}\n"
712 ":type {name}: {type}")
713 docstring_fmt = ("{description}\n\n"
714 "{args}\n")
715 ret_fmt = (":returns: {returns}\n"
716 ":rtype: {rtype}")
717 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype)
720def __googledoc(func, validator, returns=None, rtype=None):
721 arg_fmt = " {name} ({type}): {doc}"
722 docstring_fmt = "{description}\n\n"
723 if len(validator) > 0:
724 docstring_fmt += "Args:\n{args}\n"
725 ret_fmt = ("\nReturns:\n"
726 " {rtype}: {returns}")
727 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype)
730def getargs(*argnames):
731 """getargs(*argnames, argdict)
732 Convenience function to retrieve arguments from a dictionary in batch.
734 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which
735 to retrieve the values.
737 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this
738 function or the last argument is not a dictionary
739 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names
740 """
741 if len(argnames) < 2:
742 raise ValueError('Must supply at least one key and a dict')
743 if not isinstance(argnames[-1], dict):
744 raise ValueError('Last argument must be a dict')
745 kwargs = argnames[-1]
746 if len(argnames) == 2:
747 if argnames[0] not in kwargs:
748 raise ValueError("Argument not found in dict: '%s'" % argnames[0])
749 return kwargs.get(argnames[0])
750 ret = []
751 for arg in argnames[:-1]:
752 if arg not in kwargs:
753 raise ValueError("Argument not found in dict: '%s'" % arg)
754 ret.append(kwargs.get(arg))
755 return ret
758def popargs(*argnames):
759 """popargs(*argnames, argdict)
760 Convenience function to retrieve and remove arguments from a dictionary in batch.
762 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which
763 to retrieve the values.
765 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this
766 function or the last argument is not a dictionary
767 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names
768 """
769 if len(argnames) < 2:
770 raise ValueError('Must supply at least one key and a dict')
771 if not isinstance(argnames[-1], dict):
772 raise ValueError('Last argument must be a dict')
773 kwargs = argnames[-1]
774 if len(argnames) == 2:
775 try:
776 ret = kwargs.pop(argnames[0])
777 except KeyError as ke:
778 raise ValueError('Argument not found in dict: %s' % str(ke))
779 return ret
780 try:
781 ret = [kwargs.pop(arg) for arg in argnames[:-1]]
782 except KeyError as ke:
783 raise ValueError('Argument not found in dict: %s' % str(ke))
784 return ret
787def popargs_to_dict(keys, argdict):
788 """Convenience function to retrieve and remove arguments from a dictionary in batch into a dictionary.
790 Same as `{key: argdict.pop(key) for key in keys}` with a custom ValueError
792 :param keys: Iterable of keys to pull out of argdict
793 :type keys: Iterable
794 :param argdict: Dictionary to process
795 :type dict: dict
796 :raises ValueError: if an argument name is not found in the dictionary
797 :return: a dict of arguments removed
798 """
799 ret = dict()
800 for arg in keys:
801 try:
802 ret[arg] = argdict.pop(arg)
803 except KeyError as ke:
804 raise ValueError('Argument not found in dict: %s' % str(ke))
805 return ret
808class ExtenderMeta(ABCMeta):
809 """A metaclass that will extend the base class initialization
810 routine by executing additional functions defined in
811 classes that use this metaclass
813 In general, this class should only be used by core developers.
814 """
816 __preinit = '__preinit'
818 @classmethod
819 def pre_init(cls, func):
820 setattr(func, cls.__preinit, True)
821 return classmethod(func)
823 __postinit = '__postinit'
825 @classmethod
826 def post_init(cls, func):
827 '''A decorator for defining a routine to run after creation of a type object.
829 An example use of this method would be to define a classmethod that gathers
830 any defined methods or attributes after the base Python type construction (i.e. after
831 :py:func:`type` has been called)
832 '''
833 setattr(func, cls.__postinit, True)
834 return classmethod(func)
836 def __init__(cls, name, bases, classdict):
837 it = (getattr(cls, n) for n in dir(cls))
838 it = (a for a in it if hasattr(a, cls.__preinit))
839 for func in it:
840 func(name, bases, classdict)
841 super().__init__(name, bases, classdict)
842 it = (getattr(cls, n) for n in dir(cls))
843 it = (a for a in it if hasattr(a, cls.__postinit))
844 for func in it:
845 func(name, bases, classdict)
848def get_data_shape(data, strict_no_data_load=False):
849 """
850 Helper function used to determine the shape of the given array.
852 In order to determine the shape of nested tuples, lists, and sets, this function
853 recursively inspects elements along the dimensions, assuming that the data has a regular,
854 rectangular shape. In the case of out-of-core iterators, this means that the first item
855 along each dimension would potentially be loaded into memory. Set strict_no_data_load=True
856 to enforce that this does not happen, at the cost that we may not be able to determine
857 the shape of the array.
859 :param data: Array for which we should determine the shape.
860 :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape.
861 :param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default),
862 the first element of data may be loaded into memory.
863 :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown
864 will be set to None.
865 """
867 def __get_shape_helper(local_data):
868 shape = list()
869 if hasattr(local_data, '__len__'):
870 shape.append(len(local_data))
871 if len(local_data):
872 el = next(iter(local_data))
873 if not isinstance(el, (str, bytes)):
874 shape.extend(__get_shape_helper(el))
875 return tuple(shape)
877 # NOTE: data.maxshape will fail on empty h5py.Dataset without shape or maxshape. this will be fixed in h5py 3.0
878 if hasattr(data, 'maxshape'):
879 return data.maxshape
880 if hasattr(data, 'shape') and data.shape is not None:
881 return data.shape
882 if isinstance(data, dict):
883 return None
884 if hasattr(data, '__len__') and not isinstance(data, (str, bytes)):
885 if not strict_no_data_load or isinstance(data, (list, tuple, set)):
886 return __get_shape_helper(data)
887 return None
890def pystr(s):
891 """
892 Convert a string of characters to Python str object
893 """
894 if isinstance(s, bytes):
895 return s.decode('utf-8')
896 else:
897 return s
900def to_uint_array(arr):
901 """
902 Convert a numpy array or array-like object to a numpy array of unsigned integers with the same dtype itemsize.
904 For example, a list of int32 values is converted to a numpy array with dtype uint32.
905 :raises ValueError: if input array contains values that are not unsigned integers or non-negative integers.
906 """
907 if not isinstance(arr, np.ndarray):
908 arr = np.array(arr)
909 if np.issubdtype(arr.dtype, np.unsignedinteger):
910 return arr
911 if np.issubdtype(arr.dtype, np.integer):
912 if (arr < 0).any():
913 raise ValueError('Cannot convert negative integer values to uint.')
914 dt = np.dtype('uint' + str(int(arr.dtype.itemsize*8))) # keep precision
915 return arr.astype(dt)
916 raise ValueError('Cannot convert array of dtype %s to uint.' % arr.dtype)
919class LabelledDict(dict):
920 """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items.
922 For example, if the key attribute is set as 'name' in __init__, then all objects added to the LabelledDict must have
923 a 'name' attribute and a particular object in the LabelledDict can be accessed using the syntax ['object_name'] if
924 the object.name == 'object_name'. In this way, LabelledDict acts like a set where values can be retrieved using
925 square brackets around the value of the key attribute. An 'add' method makes clear the association between the key
926 attribute of the LabelledDict and the values of the LabelledDict.
928 LabelledDict also supports retrieval of values with the syntax my_dict['attr == val'], which returns a set of
929 objects in the LabelledDict which have an attribute 'attr' with a string value 'val'. If no objects match that
930 condition, a KeyError is raised. Note that if 'attr' equals the key attribute, then the single matching value is
931 returned, not a set.
933 LabelledDict does not support changing items that have already been set. A TypeError will be raised when using
934 __setitem__ on keys that already exist in the dict. The setdefault and update methods are not supported. A
935 TypeError will be raised when these are called.
937 A callable function may be passed to the constructor to be run on an item after adding it to this dict using
938 the __setitem__ and add methods.
940 A callable function may be passed to the constructor to be run on an item after removing it from this dict using
941 the __delitem__ (the del operator), pop, and popitem methods. It will also be run on each removed item when using
942 the clear method.
944 Usage:
945 LabelledDict(label='my_objects', key_attr='name')
946 my_dict[obj.name] = obj
947 my_dict.add(obj) # simpler syntax
949 Example:
950 # MyTestClass is a class with attributes 'prop1' and 'prop2'. MyTestClass.__init__ sets those attributes.
951 ld = LabelledDict(label='all_objects', key_attr='prop1')
952 obj1 = MyTestClass('a', 'b')
953 obj2 = MyTestClass('d', 'b')
954 ld[obj1.prop1] = obj1 # obj1 is added to the LabelledDict with the key obj1.prop1. Any other key is not allowed.
955 ld.add(obj2) # Simpler 'add' syntax enforces the required relationship
956 ld['a'] # Returns obj1
957 ld['prop1 == a'] # Also returns obj1
958 ld['prop2 == b'] # Returns set([obj1, obj2]) - the set of all values v in ld where v.prop2 == 'b'
959 """
961 @docval({'name': 'label', 'type': str, 'doc': 'the label on this dictionary'},
962 {'name': 'key_attr', 'type': str, 'doc': 'the attribute name to use as the key', 'default': 'name'},
963 {'name': 'add_callable', 'type': types.FunctionType,
964 'doc': 'function to call on an element after adding it to this dict using the add or __setitem__ methods',
965 'default': None},
966 {'name': 'remove_callable', 'type': types.FunctionType,
967 'doc': ('function to call on an element after removing it from this dict using the pop, popitem, clear, '
968 'or __delitem__ methods'),
969 'default': None})
970 def __init__(self, **kwargs):
971 label, key_attr, add_callable, remove_callable = getargs('label', 'key_attr', 'add_callable', 'remove_callable',
972 kwargs)
973 self.__label = label
974 self.__key_attr = key_attr
975 self.__add_callable = add_callable
976 self.__remove_callable = remove_callable
978 @property
979 def label(self):
980 """Return the label of this LabelledDict"""
981 return self.__label
983 @property
984 def key_attr(self):
985 """Return the attribute used as the key for values in this LabelledDict"""
986 return self.__key_attr
988 def __getitem__(self, args):
989 """Get a value from the LabelledDict with the given key.
991 Supports syntax my_dict['attr == val'], which returns a set of objects in the LabelledDict which have an
992 attribute 'attr' with a string value 'val'. If no objects match that condition, an empty set is returned.
993 Note that if 'attr' equals the key attribute of this LabelledDict, then the single matching value is
994 returned, not a set.
995 """
996 key = args
997 if '==' in args:
998 key, val = args.split("==")
999 key = key.strip()
1000 val = val.strip() # val is a string
1001 if not key:
1002 raise ValueError("An attribute name is required before '=='.")
1003 if not val:
1004 raise ValueError("A value is required after '=='.")
1005 if key != self.key_attr:
1006 ret = set()
1007 for item in self.values():
1008 if getattr(item, key, None) == val:
1009 ret.add(item)
1010 return ret
1011 else:
1012 return super().__getitem__(val)
1013 else:
1014 return super().__getitem__(key)
1016 def __setitem__(self, key, value):
1017 """Set a value in the LabelledDict with the given key. The key must equal value.key_attr.
1019 See LabelledDict.add for a simpler syntax since the key is redundant.
1020 Raises TypeError is key already exists.
1021 Raises ValueError if value does not have attribute key_attr.
1022 """
1023 if key in self:
1024 raise TypeError("Key '%s' is already in this dict. Cannot reset items in a %s."
1025 % (key, self.__class__.__name__))
1026 self.__check_value(value)
1027 if key != getattr(value, self.key_attr):
1028 raise KeyError("Key '%s' must equal attribute '%s' of '%s'." % (key, self.key_attr, value))
1029 super().__setitem__(key, value)
1030 if self.__add_callable:
1031 self.__add_callable(value)
1033 def add(self, value):
1034 """Add a value to the dict with the key value.key_attr.
1036 Raises ValueError if value does not have attribute key_attr.
1037 """
1038 self.__check_value(value)
1039 self.__setitem__(getattr(value, self.key_attr), value)
1041 def __check_value(self, value):
1042 if not hasattr(value, self.key_attr):
1043 raise ValueError("Cannot set value '%s' in %s. Value must have attribute '%s'."
1044 % (value, self.__class__.__name__, self.key_attr))
1046 def pop(self, k):
1047 """Remove an item that matches the key. If remove_callable was initialized, call that on the returned value."""
1048 ret = super().pop(k)
1049 if self.__remove_callable:
1050 self.__remove_callable(ret)
1051 return ret
1053 def popitem(self):
1054 """Remove the last added item. If remove_callable was initialized, call that on the returned value.
1056 Note: popitem returns a tuple (key, value) but the remove_callable will be called only on the value.
1058 Note: in Python 3.5 and earlier, dictionaries are not ordered, so popitem removes an arbitrary item.
1059 """
1060 ret = super().popitem()
1061 if self.__remove_callable:
1062 self.__remove_callable(ret[1]) # execute callable only on dict value
1063 return ret
1065 def clear(self):
1066 """Remove all items. If remove_callable was initialized, call that on each returned value.
1068 The order of removal depends on the popitem method.
1069 """
1070 while len(self):
1071 self.popitem()
1073 def __delitem__(self, k):
1074 """Remove an item that matches the key. If remove_callable was initialized, call that on the matching value."""
1075 item = self[k]
1076 super().__delitem__(k)
1077 if self.__remove_callable:
1078 self.__remove_callable(item)
1080 def setdefault(self, k):
1081 """setdefault is not supported. A TypeError will be raised."""
1082 raise TypeError('setdefault is not supported for %s' % self.__class__.__name__)
1084 def update(self, other):
1085 """update is not supported. A TypeError will be raised."""
1086 raise TypeError('update is not supported for %s' % self.__class__.__name__)
1089@docval_macro('array_data')
1090class StrDataset(h5py.Dataset):
1091 """Wrapper to decode strings on reading the dataset"""
1092 def __init__(self, dset, encoding, errors='strict'):
1093 self.dset = dset
1094 if encoding is None: 1094 ↛ 1096line 1094 didn't jump to line 1096, because the condition on line 1094 was never false
1095 encoding = h5py.h5t.check_string_dtype(dset.dtype).encoding
1096 self.encoding = encoding
1097 self.errors = errors
1099 def __getattr__(self, name):
1100 return getattr(self.dset, name)
1102 def __repr__(self):
1103 return '<StrDataset for %s>' % repr(self.dset)[1:-1]
1105 def __len__(self):
1106 return len(self.dset)
1108 def __getitem__(self, args):
1109 bytes_arr = self.dset[args]
1110 # numpy.char.decode() seems like the obvious thing to use. But it only
1111 # accepts numpy string arrays, not object arrays of bytes (which we
1112 # return from HDF5 variable-length strings). And the numpy
1113 # implementation is not faster than doing it with a loop; in fact, by
1114 # not converting the result to a numpy unicode array, the
1115 # naive way can be faster! (Comparing with numpy 1.18.4, June 2020)
1116 if np.isscalar(bytes_arr):
1117 return bytes_arr.decode(self.encoding, self.errors)
1119 return np.array([
1120 b.decode(self.encoding, self.errors) for b in bytes_arr.flat
1121 ], dtype=object).reshape(bytes_arr.shape)