Coverage for src/hdmf/utils.py: 97%
600 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
1import collections
2import copy as _copy
3import types
4import warnings
5from abc import ABCMeta
6from enum import Enum
8import h5py
9import numpy as np
12__macros = {
13 'array_data': [np.ndarray, list, tuple, h5py.Dataset],
14 'scalar_data': [str, int, float, bytes, bool],
15 'data': []
16}
18try:
19 # optionally accept zarr.Array as array data to support conversion of data from Zarr to HDMF
20 import zarr
21 __macros['array_data'].append(zarr.Array)
22except ImportError:
23 pass
26# code to signify how to handle positional arguments in docval
27AllowPositional = Enum('AllowPositional', 'ALLOWED WARNING ERROR')
29__supported_bool_types = (bool, np.bool_)
30__supported_uint_types = (np.uint8, np.uint16, np.uint32, np.uint64)
31__supported_int_types = (int, np.int8, np.int16, np.int32, np.int64)
32__supported_float_types = [float, np.float16, np.float32, np.float64]
33if hasattr(np, "float128"): # pragma: no cover
34 __supported_float_types.append(np.float128)
35if hasattr(np, "longdouble"): # pragma: no cover
36 # on windows python<=3.5, h5py floats resolve float64s as either np.float64 or np.longdouble
37 # non-deterministically. a future version of h5py will fix this. see #112
38 __supported_float_types.append(np.longdouble)
39__supported_float_types = tuple(__supported_float_types)
40__allowed_enum_types = (__supported_bool_types + __supported_uint_types + __supported_int_types
41 + __supported_float_types + (str,))
44def docval_macro(macro):
45 """Class decorator to add the class to a list of types associated with the key macro in the __macros dict
46 """
48 def _dec(cls):
49 if macro not in __macros:
50 __macros[macro] = list()
51 __macros[macro].append(cls)
52 return cls
54 return _dec
57def get_docval_macro(key=None):
58 """
59 Return a deepcopy of the docval macros, i.e., strings that represent a customizable list of types for use in docval.
61 :param key: Name of the macro. If key=None, then a dictionary of all macros is returned. Otherwise, a tuple of
62 the types associated with the key is returned.
63 """
64 if key is None:
65 return _copy.deepcopy(__macros)
66 else:
67 return tuple(__macros[key])
70def __type_okay(value, argtype, allow_none=False):
71 """Check a value against a type
73 The difference between this function and :py:func:`isinstance` is that
74 it allows specifying a type as a string. Furthermore, strings allow for specifying more general
75 types, such as a simple numeric type (i.e. ``argtype``="num").
77 Args:
78 value (any): the value to check
79 argtype (type, str): the type to check for
80 allow_none (bool): whether or not to allow None as a valid value
83 Returns:
84 bool: True if value is a valid instance of argtype
85 """
86 if value is None:
87 return allow_none
88 if isinstance(argtype, str):
89 if argtype in __macros: 89 ↛ 90line 89 didn't jump to line 90, because the condition on line 89 was never true
90 return __type_okay(value, __macros[argtype], allow_none=allow_none)
91 elif argtype == 'uint':
92 return __is_uint(value)
93 elif argtype == 'int':
94 return __is_int(value)
95 elif argtype == 'float':
96 return __is_float(value)
97 elif argtype == 'bool':
98 return __is_bool(value)
99 return argtype in [cls.__name__ for cls in value.__class__.__mro__]
100 elif isinstance(argtype, type):
101 if argtype is int:
102 return __is_int(value)
103 elif argtype is float:
104 return __is_float(value)
105 elif argtype is bool:
106 return __is_bool(value)
107 return isinstance(value, argtype)
108 elif isinstance(argtype, tuple) or isinstance(argtype, list):
109 return any(__type_okay(value, i) for i in argtype)
110 else: # argtype is None
111 return True
114def __shape_okay_multi(value, argshape):
115 if type(argshape[0]) in (tuple, list): # if multiple shapes are present
116 return any(__shape_okay(value, a) for a in argshape)
117 else:
118 return __shape_okay(value, argshape)
121def __shape_okay(value, argshape):
122 valshape = get_data_shape(value)
123 if not len(valshape) == len(argshape):
124 return False
125 for a, b in zip(valshape, argshape):
126 if b not in (a, None):
127 return False
128 return True
131def __is_uint(value):
132 return isinstance(value, __supported_uint_types)
135def __is_int(value):
136 return isinstance(value, __supported_int_types)
139def __is_float(value):
140 return isinstance(value, __supported_float_types)
143def __is_bool(value):
144 return isinstance(value, __supported_bool_types)
147def __format_type(argtype):
148 if isinstance(argtype, str):
149 return argtype
150 elif isinstance(argtype, type):
151 return argtype.__name__
152 elif isinstance(argtype, tuple) or isinstance(argtype, list): 152 ↛ 158line 152 didn't jump to line 158, because the condition on line 152 was never false
153 types = [__format_type(i) for i in argtype]
154 if len(types) > 1: 154 ↛ 157line 154 didn't jump to line 157, because the condition on line 154 was never false
155 return "%s or %s" % (", ".join(types[:-1]), types[-1])
156 else:
157 return types[0]
158 elif argtype is None:
159 return "any type"
160 else:
161 raise ValueError("argtype must be a type, str, list, or tuple")
164def __check_enum(argval, arg):
165 """
166 Helper function to check whether the given argument value validates against the enum specification.
168 :param argval: argument value passed to the function/method
169 :param arg: argument validator - the specification dictionary for this argument
171 :return: None if the value validates successfully, error message if the value does not.
172 """
173 if argval not in arg['enum']:
174 return "forbidden value for '{}' (got {}, expected {})".format(arg['name'], __fmt_str_quotes(argval),
175 arg['enum'])
178def __fmt_str_quotes(x):
179 """Return a string or list of strings where the input string or list of strings have single quotes around strings"""
180 if isinstance(x, (list, tuple)): 180 ↛ 181line 180 didn't jump to line 181, because the condition on line 180 was never true
181 return '{}'.format(x)
182 if isinstance(x, str):
183 return "'%s'" % x
184 return str(x)
187def __parse_args(validator, args, kwargs, enforce_type=True, enforce_shape=True, allow_extra=False, # noqa: C901
188 allow_positional=AllowPositional.ALLOWED):
189 """
190 Internal helper function used by the docval decorator to parse and validate function arguments
192 :param validator: List of dicts from docval with the description of the arguments
193 :param args: List of the values of positional arguments supplied by the caller
194 :param kwargs: Dict keyword arguments supplied by the caller where keys are the argument name and
195 values are the argument value.
196 :param enforce_type: Boolean indicating whether the type of arguments should be enforced
197 :param enforce_shape: Boolean indicating whether the dimensions of array arguments
198 should be enforced if possible.
199 :param allow_extra: Boolean indicating whether extra keyword arguments are allowed (if False and extra keyword
200 arguments are specified, then an error is raised).
201 :param allow_positional: integer code indicating whether positional arguments are allowed:
202 AllowPositional.ALLOWED: positional arguments are allowed
203 AllowPositional.WARNING: return warning if positional arguments are supplied
204 AllowPositional.ERROR: return error if positional arguments are supplied
206 :return: Dict with:
207 * 'args' : Dict all arguments where keys are the names and values are the values of the arguments.
208 * 'errors' : List of string with error messages
209 """
211 ret = dict()
212 syntax_errors = list()
213 type_errors = list()
214 value_errors = list()
215 future_warnings = list()
216 argsi = 0
217 extras = dict() # has to be initialized to empty here, to avoid spurious errors reported upon early raises
218 try:
219 # check for duplicates in docval
220 names = [x['name'] for x in validator]
221 duplicated = [item for item, count in collections.Counter(names).items()
222 if count > 1]
223 if duplicated:
224 raise ValueError(
225 'The following names are duplicated: {}'.format(duplicated))
227 if allow_extra: # extra keyword arguments are allowed so do not consider them when checking number of args
228 if len(args) > len(validator):
229 raise TypeError(
230 'Expected at most %d arguments %r, got %d positional' % (len(validator), names, len(args))
231 )
232 else: # allow for keyword args
233 if len(args) + len(kwargs) > len(validator):
234 raise TypeError(
235 'Expected at most %d arguments %r, got %d: %d positional and %d keyword %s'
236 % (len(validator), names, len(args) + len(kwargs), len(args), len(kwargs), sorted(kwargs))
237 )
239 if args:
240 if allow_positional == AllowPositional.WARNING:
241 msg = ('Using positional arguments for this method is discouraged and will be deprecated '
242 'in a future major release. Please use keyword arguments to ensure future compatibility.')
243 future_warnings.append(msg)
244 elif allow_positional == AllowPositional.ERROR:
245 msg = 'Only keyword arguments (e.g., func(argname=value, ...)) are allowed for this method.'
246 syntax_errors.append(msg)
248 # iterate through the docval specification and find a matching value in args / kwargs
249 it = iter(validator)
250 arg = next(it)
252 # process positional arguments of the docval specification (no default value)
253 extras = dict(kwargs)
254 while True:
255 if 'default' in arg:
256 break
257 argname = arg['name']
258 argval_set = False
259 if argname in kwargs:
260 # if this positional arg is specified by a keyword arg and there are remaining positional args that
261 # have not yet been matched, then it is undetermined what those positional args match to. thus, raise
262 # an error
263 if argsi < len(args):
264 type_errors.append("got multiple values for argument '%s'" % argname)
265 argval = kwargs.get(argname) # kwargs is the dict that stores the object names and the values
266 extras.pop(argname, None)
267 argval_set = True
268 elif argsi < len(args):
269 argval = args[argsi]
270 argval_set = True
272 if not argval_set:
273 type_errors.append("missing argument '%s'" % argname)
274 else:
275 from .term_set import TermSetWrapper # circular import fix
276 wrapper = None
277 if isinstance(argval, TermSetWrapper):
278 wrapper = argval
279 # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type.
280 argval = argval.value
281 if enforce_type: 281 ↛ 289line 281 didn't jump to line 289, because the condition on line 281 was never false
282 if not __type_okay(argval, arg['type']):
283 if argval is None:
284 fmt_val = (argname, __format_type(arg['type']))
285 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val)
286 else:
287 fmt_val = (argname, type(argval).__name__, __format_type(arg['type']))
288 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val)
289 if enforce_shape and 'shape' in arg:
290 valshape = get_data_shape(argval)
291 while valshape is None:
292 if argval is None:
293 break
294 if not hasattr(argval, argname):
295 fmt_val = (argval, argname, arg['shape'])
296 value_errors.append("cannot check shape of object '%s' for argument '%s' "
297 "(expected shape '%s')" % fmt_val)
298 break
299 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked
300 argval = getattr(argval, argname)
301 valshape = get_data_shape(argval)
302 if valshape is not None and not __shape_okay_multi(argval, arg['shape']):
303 fmt_val = (argname, valshape, arg['shape'])
304 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val)
305 if 'enum' in arg:
306 err = __check_enum(argval, arg)
307 if err:
308 value_errors.append(err)
310 if wrapper is not None:
311 # reassign the wrapper so that it can be used to flag HERD "on write"
312 argval = wrapper
314 ret[argname] = argval
315 argsi += 1
316 arg = next(it)
318 # process arguments of the docval specification with a default value
319 # NOTE: the default value will be deepcopied, so 'default': list() is safe unlike in normal python
320 while True:
321 argname = arg['name']
322 if argname in kwargs:
323 ret[argname] = kwargs.get(argname)
324 extras.pop(argname, None)
325 elif len(args) > argsi:
326 ret[argname] = args[argsi]
327 argsi += 1
328 else:
329 ret[argname] = _copy.deepcopy(arg['default'])
330 argval = ret[argname]
332 from .term_set import TermSetWrapper # circular import fix
333 wrapper = None
334 if isinstance(argval, TermSetWrapper):
335 wrapper = argval
336 # we can use this to unwrap the dataset/attribute to use the "item" for docval to validate the type.
337 argval = argval.value
338 if enforce_type: 338 ↛ 346line 338 didn't jump to line 346, because the condition on line 338 was never false
339 if not __type_okay(argval, arg['type'], arg['default'] is None or arg.get('allow_none', False)):
340 if argval is None and arg['default'] is None: 340 ↛ 341line 340 didn't jump to line 341, because the condition on line 340 was never true
341 fmt_val = (argname, __format_type(arg['type']))
342 type_errors.append("None is not allowed for '%s' (expected '%s', not None)" % fmt_val)
343 else:
344 fmt_val = (argname, type(argval).__name__, __format_type(arg['type']))
345 type_errors.append("incorrect type for '%s' (got '%s', expected '%s')" % fmt_val)
346 if enforce_shape and 'shape' in arg and argval is not None:
347 valshape = get_data_shape(argval)
348 while valshape is None:
349 if argval is None:
350 break
351 if not hasattr(argval, argname):
352 fmt_val = (argval, argname, arg['shape'])
353 value_errors.append("cannot check shape of object '%s' for argument '%s' (expected shape '%s')"
354 % fmt_val)
355 break
356 # unpack, e.g. if TimeSeries is passed for arg 'data', then TimeSeries.data is checked
357 argval = getattr(argval, argname)
358 valshape = get_data_shape(argval)
359 if valshape is not None and not __shape_okay_multi(argval, arg['shape']):
360 fmt_val = (argname, valshape, arg['shape'])
361 value_errors.append("incorrect shape for '%s' (got '%s', expected '%s')" % fmt_val)
362 if 'enum' in arg and argval is not None:
363 err = __check_enum(argval, arg)
364 if err:
365 value_errors.append(err)
366 if wrapper is not None:
367 # reassign the wrapper so that it can be used to flag HERD "on write"
368 argval = wrapper
369 arg = next(it)
370 except StopIteration:
371 pass
372 except TypeError as e:
373 type_errors.append(str(e))
374 except ValueError as e:
375 value_errors.append(str(e))
377 if not allow_extra:
378 for key in extras.keys():
379 type_errors.append("unrecognized argument: '%s'" % key)
380 else:
381 # TODO: Extras get stripped out if function arguments are composed with fmt_docval_args.
382 # allow_extra needs to be tracked on a function so that fmt_docval_args doesn't strip them out
383 for key in extras.keys():
384 ret[key] = extras[key]
385 return {'args': ret, 'future_warnings': future_warnings, 'type_errors': type_errors, 'value_errors': value_errors,
386 'syntax_errors': syntax_errors}
389docval_idx_name = '__dv_idx__'
390docval_attr_name = '__docval__'
391__docval_args_loc = 'args'
394def get_docval(func, *args):
395 '''Get a copy of docval arguments for a function.
396 If args are supplied, return only docval arguments with value for 'name' key equal to the args
397 '''
398 func_docval = getattr(func, docval_attr_name, None)
399 if func_docval:
400 if args:
401 docval_idx = getattr(func, docval_idx_name, None)
402 try:
403 return tuple(docval_idx[name] for name in args)
404 except KeyError as ke:
405 raise ValueError('Function %s does not have docval argument %s' % (func.__name__, str(ke)))
406 return tuple(func_docval[__docval_args_loc])
407 else:
408 if args:
409 raise ValueError('Function %s has no docval arguments' % func.__name__)
410 return tuple()
413# def docval_wrap(func, is_method=True):
414# if is_method:
415# @docval(*get_docval(func))
416# def method(self, **kwargs):
417#
418# return call_docval_args(func, kwargs)
419# return method
420# else:
421# @docval(*get_docval(func))
422# def static_method(**kwargs):
423# return call_docval_args(func, kwargs)
424# return method
427def fmt_docval_args(func, kwargs):
428 ''' Separate positional and keyword arguments
430 Useful for methods that wrap other methods
431 '''
432 warnings.warn("fmt_docval_args will be deprecated in a future version of HDMF. Instead of using fmt_docval_args, "
433 "call the function directly with the kwargs. Please note that fmt_docval_args "
434 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that "
435 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True "
436 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.",
437 PendingDeprecationWarning)
438 func_docval = getattr(func, docval_attr_name, None)
439 ret_args = list()
440 ret_kwargs = dict()
441 kwargs_copy = _copy.copy(kwargs)
442 if func_docval:
443 for arg in func_docval[__docval_args_loc]:
444 val = kwargs_copy.pop(arg['name'], None)
445 if 'default' in arg:
446 if val is not None: 446 ↛ 443line 446 didn't jump to line 443, because the condition on line 446 was never false
447 ret_kwargs[arg['name']] = val
448 else:
449 ret_args.append(val)
450 if func_docval['allow_extra']:
451 ret_kwargs.update(kwargs_copy)
452 else:
453 raise ValueError('no docval found on %s' % str(func))
454 return ret_args, ret_kwargs
457# def _remove_extra_args(func, kwargs):
458# """Return a dict of only the keyword arguments that are accepted by the function's docval.
459#
460# If the docval specifies allow_extra=True, then the original kwargs are returned.
461# """
462# # NOTE: this has the same functionality as the to-be-deprecated fmt_docval_args except that
463# # kwargs are kept as kwargs instead of parsed into args and kwargs
464# func_docval = getattr(func, docval_attr_name, None)
465# if func_docval:
466# if func_docval['allow_extra']:
467# # if extra args are allowed, return all args
468# return kwargs
469# else:
470# # save only the arguments listed in the function's docval (skip any others present in kwargs)
471# ret_kwargs = dict()
472# for arg in func_docval[__docval_args_loc]:
473# val = kwargs.get(arg['name'], None)
474# if val is not None: # do not return arguments that are not present or have value None
475# ret_kwargs[arg['name']] = val
476# return ret_kwargs
477# else:
478# raise ValueError('No docval found on %s' % str(func))
481def call_docval_func(func, kwargs):
482 """Call the function with only the keyword arguments that are accepted by the function's docval.
484 Extra keyword arguments are not passed to the function unless the function's docval has allow_extra=True.
485 """
486 warnings.warn("call_docval_func will be deprecated in a future version of HDMF. Instead of using call_docval_func, "
487 "call the function directly with the kwargs. Please note that call_docval_func "
488 "removes all arguments not accepted by the function's docval, so if you are passing kwargs that "
489 "includes extra arguments and the function's docval does not allow extra arguments (allow_extra=True "
490 "is set), then you will need to pop the extra arguments out of kwargs before calling the function.",
491 PendingDeprecationWarning)
492 with warnings.catch_warnings(record=True):
493 # catch and ignore only PendingDeprecationWarnings from fmt_docval_args so that two
494 # PendingDeprecationWarnings saying the same thing are not raised
495 warnings.simplefilter("ignore", UserWarning)
496 warnings.simplefilter("always", PendingDeprecationWarning)
497 fargs, fkwargs = fmt_docval_args(func, kwargs)
499 return func(*fargs, **fkwargs)
502def __resolve_type(t):
503 if t is None:
504 return t
505 if isinstance(t, str):
506 if t in __macros:
507 return tuple(__macros[t])
508 else:
509 return t
510 elif isinstance(t, type):
511 return t
512 elif isinstance(t, (list, tuple)):
513 ret = list()
514 for i in t:
515 resolved = __resolve_type(i)
516 if isinstance(resolved, tuple):
517 ret.extend(resolved)
518 else:
519 ret.append(resolved)
520 return tuple(ret)
521 else:
522 msg = "argtype must be a type, a str, a list, a tuple, or None - got %s" % type(t)
523 raise ValueError(msg)
526def __check_enum_argtype(argtype):
527 """Return True/False whether the given argtype or list/tuple of argtypes is a supported docval enum type"""
528 if isinstance(argtype, (list, tuple)):
529 return all(x in __allowed_enum_types for x in argtype)
530 return argtype in __allowed_enum_types
533def docval(*validator, **options): # noqa: C901
534 '''A decorator for documenting and enforcing type for instance method arguments.
536 This decorator takes a list of dictionaries that specify the method parameters. These
537 dictionaries are used for enforcing type and building a Sphinx docstring.
539 The first arguments are dictionaries that specify the positional
540 arguments and keyword arguments of the decorated function. These dictionaries
541 must contain the following keys: ``'name'``, ``'type'``, and ``'doc'``. This will define a
542 positional argument. To define a keyword argument, specify a default value
543 using the key ``'default'``. To validate the dimensions of an input array
544 add the optional ``'shape'`` parameter. To allow a None value for an argument,
545 either the default value must be None or a different default value must be provided
546 and ``'allow_none': True`` must be passed.
548 The decorated method must take ``self`` and ``**kwargs`` as arguments.
550 When using this decorator, the functions :py:func:`getargs` and
551 :py:func:`popargs` can be used for easily extracting arguments from
552 kwargs.
554 The following code example demonstrates the use of this decorator:
556 .. code-block:: python
558 @docval({'name': 'arg1':, 'type': str, 'doc': 'this is the first positional argument'},
559 {'name': 'arg2':, 'type': int, 'doc': 'this is the second positional argument'},
560 {'name': 'kwarg1':, 'type': (list, tuple), 'doc': 'this is a keyword argument', 'default': list()},
561 returns='foo object', rtype='Foo'))
562 def foo(self, **kwargs):
563 arg1, arg2, kwarg1 = getargs('arg1', 'arg2', 'kwarg1', **kwargs)
564 ...
566 :param enforce_type: Enforce types of input parameters (Default=True)
567 :param returns: String describing the return values
568 :param rtype: String describing the data type of the return values
569 :param is_method: True if this is decorating an instance or class method, False otherwise (Default=True)
570 :param enforce_shape: Enforce the dimensions of input arrays (Default=True)
571 :param validator: :py:func:`dict` objects specifying the method parameters
572 :param allow_extra: Allow extra arguments (Default=False)
573 :param allow_positional: Allow positional arguments (Default=True)
574 :param options: additional options for documenting and validating method parameters
575 '''
576 enforce_type = options.pop('enforce_type', True)
577 enforce_shape = options.pop('enforce_shape', True)
578 returns = options.pop('returns', None)
579 rtype = options.pop('rtype', None)
580 is_method = options.pop('is_method', True)
581 allow_extra = options.pop('allow_extra', False)
582 allow_positional = options.pop('allow_positional', True)
584 def dec(func):
585 _docval = _copy.copy(options)
586 _docval['allow_extra'] = allow_extra
587 _docval['allow_positional'] = allow_positional
588 func.__name__ = _docval.get('func_name', func.__name__)
589 func.__doc__ = _docval.get('doc', func.__doc__)
590 pos = list()
591 kw = list()
592 for a in validator:
593 # catch unsupported keys
594 allowable_terms = ('name', 'doc', 'type', 'shape', 'enum', 'default', 'allow_none', 'help')
595 unsupported_terms = set(a.keys()) - set(allowable_terms)
596 if unsupported_terms:
597 raise Exception('docval for {}: keys {} are not supported by docval'.format(a['name'],
598 sorted(unsupported_terms)))
599 # check that arg type is valid
600 try:
601 a['type'] = __resolve_type(a['type'])
602 except Exception as e:
603 msg = "docval for %s: error parsing argument type: %s" % (a['name'], e.args[0])
604 raise Exception(msg)
605 if 'enum' in a:
606 # check that value for enum key is a list or tuple (cannot have only one allowed value)
607 if not isinstance(a['enum'], (list, tuple)):
608 msg = ('docval for %s: enum value must be a list or tuple (received %s)'
609 % (a['name'], type(a['enum'])))
610 raise Exception(msg)
611 # check that arg type is compatible with enum
612 if not __check_enum_argtype(a['type']):
613 msg = 'docval for {}: enum checking cannot be used with arg type {}'.format(a['name'], a['type'])
614 raise Exception(msg)
615 # check that enum allowed values are allowed by arg type
616 if any([not __type_okay(x, a['type']) for x in a['enum']]):
617 msg = ('docval for {}: enum values are of types not allowed by arg type (got {}, '
618 'expected {})'.format(a['name'], [type(x) for x in a['enum']], a['type']))
619 raise Exception(msg)
620 if a.get('allow_none', False) and 'default' not in a:
621 msg = 'docval for {}: allow_none=True can only be set if a default value is provided.'.format(a['name'])
622 raise Exception(msg)
623 if 'default' in a:
624 kw.append(a)
625 else:
626 pos.append(a)
627 loc_val = pos + kw
628 _docval[__docval_args_loc] = loc_val
630 def _check_args(args, kwargs):
631 """Parse and check arguments to decorated function. Raise warnings and errors as appropriate."""
632 # this function was separated from func_call() in order to make stepping through lines of code using pdb
633 # easier
635 parsed = __parse_args(
636 loc_val,
637 args[1:] if is_method else args,
638 kwargs,
639 enforce_type=enforce_type,
640 enforce_shape=enforce_shape,
641 allow_extra=allow_extra,
642 allow_positional=allow_positional
643 )
645 parse_warnings = parsed.get('future_warnings')
646 if parse_warnings:
647 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_warnings))
648 warnings.warn(msg, FutureWarning)
650 for error_type, ExceptionType in (('type_errors', TypeError),
651 ('value_errors', ValueError),
652 ('syntax_errors', SyntaxError)):
653 parse_err = parsed.get(error_type)
654 if parse_err:
655 msg = '%s: %s' % (func.__qualname__, ', '.join(parse_err))
656 raise ExceptionType(msg)
658 return parsed['args']
660 # this code is intentionally separated to make stepping through lines of code using pdb easier
661 if is_method:
662 def func_call(*args, **kwargs):
663 pargs = _check_args(args, kwargs)
664 return func(args[0], **pargs)
665 else:
666 def func_call(*args, **kwargs):
667 pargs = _check_args(args, kwargs)
668 return func(**pargs)
670 _rtype = rtype
671 if isinstance(rtype, type):
672 _rtype = rtype.__name__
673 docstring = __googledoc(func, _docval[__docval_args_loc], returns=returns, rtype=_rtype)
674 docval_idx = {a['name']: a for a in _docval[__docval_args_loc]} # cache a name-indexed dictionary of args
675 setattr(func_call, '__doc__', docstring)
676 setattr(func_call, '__name__', func.__name__)
677 setattr(func_call, docval_attr_name, _docval)
678 setattr(func_call, docval_idx_name, docval_idx)
679 setattr(func_call, '__module__', func.__module__)
680 return func_call
682 return dec
685def __sig_arg(argval):
686 if 'default' in argval:
687 default = argval['default']
688 if isinstance(default, str):
689 default = "'%s'" % default
690 else:
691 default = str(default)
692 return "%s=%s" % (argval['name'], default)
693 else:
694 return argval['name']
697def __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=None, returns=None, rtype=None):
698 '''Generate a Spinxy docstring'''
700 def to_str(argtype):
701 if isinstance(argtype, type):
702 module = argtype.__module__
703 name = argtype.__name__
705 if module.startswith("h5py") or module.startswith("pandas") or module.startswith("builtins"):
706 return ":py:class:`~{name}`".format(name=name)
707 else:
708 return ":py:class:`~{module}.{name}`".format(name=name, module=module)
709 return argtype
711 def __sphinx_arg(arg):
712 fmt = dict()
713 fmt['name'] = arg.get('name')
714 fmt['doc'] = arg.get('doc')
715 if isinstance(arg['type'], tuple) or isinstance(arg['type'], list):
716 fmt['type'] = " or ".join(map(to_str, arg['type']))
717 else:
718 fmt['type'] = to_str(arg['type'])
719 return arg_fmt.format(**fmt)
721 sig = "%s(%s)\n\n" % (func.__name__, ", ".join(map(__sig_arg, validator)))
722 desc = func.__doc__.strip() if func.__doc__ is not None else ""
723 sig += docstring_fmt.format(description=desc, args="\n".join(map(__sphinx_arg, validator)))
725 if not (ret_fmt is None or returns is None or rtype is None):
726 sig += ret_fmt.format(returns=returns, rtype=rtype)
727 return sig
730def __sphinxdoc(func, validator, returns=None, rtype=None):
731 arg_fmt = (":param {name}: {doc}\n"
732 ":type {name}: {type}")
733 docstring_fmt = ("{description}\n\n"
734 "{args}\n")
735 ret_fmt = (":returns: {returns}\n"
736 ":rtype: {rtype}")
737 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype)
740def __googledoc(func, validator, returns=None, rtype=None):
741 arg_fmt = " {name} ({type}): {doc}"
742 docstring_fmt = "{description}\n\n"
743 if len(validator) > 0:
744 docstring_fmt += "Args:\n{args}\n"
745 ret_fmt = ("\nReturns:\n"
746 " {rtype}: {returns}")
747 return __builddoc(func, validator, docstring_fmt, arg_fmt, ret_fmt=ret_fmt, returns=returns, rtype=rtype)
750def getargs(*argnames):
751 """getargs(*argnames, argdict)
752 Convenience function to retrieve arguments from a dictionary in batch.
754 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which
755 to retrieve the values.
757 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this
758 function or the last argument is not a dictionary
759 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names
760 """
761 if len(argnames) < 2:
762 raise ValueError('Must supply at least one key and a dict')
763 if not isinstance(argnames[-1], dict):
764 raise ValueError('Last argument must be a dict')
765 kwargs = argnames[-1]
766 if len(argnames) == 2:
767 if argnames[0] not in kwargs:
768 raise ValueError("Argument not found in dict: '%s'" % argnames[0])
769 return kwargs.get(argnames[0])
770 ret = []
771 for arg in argnames[:-1]:
772 if arg not in kwargs:
773 raise ValueError("Argument not found in dict: '%s'" % arg)
774 ret.append(kwargs.get(arg))
775 return ret
778def popargs(*argnames):
779 """popargs(*argnames, argdict)
780 Convenience function to retrieve and remove arguments from a dictionary in batch.
782 The last argument should be a dictionary, and the other arguments should be the keys (argument names) for which
783 to retrieve the values.
785 :raises ValueError: if a argument name is not found in the dictionary or there is only one argument passed to this
786 function or the last argument is not a dictionary
787 :return: a single value if there is only one argument, or a list of values corresponding to the given argument names
788 """
789 if len(argnames) < 2:
790 raise ValueError('Must supply at least one key and a dict')
791 if not isinstance(argnames[-1], dict):
792 raise ValueError('Last argument must be a dict')
793 kwargs = argnames[-1]
794 if len(argnames) == 2:
795 try:
796 ret = kwargs.pop(argnames[0])
797 except KeyError as ke:
798 raise ValueError('Argument not found in dict: %s' % str(ke))
799 return ret
800 try:
801 ret = [kwargs.pop(arg) for arg in argnames[:-1]]
802 except KeyError as ke:
803 raise ValueError('Argument not found in dict: %s' % str(ke))
804 return ret
807def popargs_to_dict(keys, argdict):
808 """Convenience function to retrieve and remove arguments from a dictionary in batch into a dictionary.
810 Same as `{key: argdict.pop(key) for key in keys}` with a custom ValueError
812 :param keys: Iterable of keys to pull out of argdict
813 :type keys: Iterable
814 :param argdict: Dictionary to process
815 :type dict: dict
816 :raises ValueError: if an argument name is not found in the dictionary
817 :return: a dict of arguments removed
818 """
819 ret = dict()
820 for arg in keys:
821 try:
822 ret[arg] = argdict.pop(arg)
823 except KeyError as ke:
824 raise ValueError('Argument not found in dict: %s' % str(ke))
825 return ret
828class ExtenderMeta(ABCMeta):
829 """A metaclass that will extend the base class initialization
830 routine by executing additional functions defined in
831 classes that use this metaclass
833 In general, this class should only be used by core developers.
834 """
836 __preinit = '__preinit'
838 @classmethod
839 def pre_init(cls, func):
840 setattr(func, cls.__preinit, True)
841 return classmethod(func)
843 __postinit = '__postinit'
845 @classmethod
846 def post_init(cls, func):
847 '''A decorator for defining a routine to run after creation of a type object.
849 An example use of this method would be to define a classmethod that gathers
850 any defined methods or attributes after the base Python type construction (i.e. after
851 :py:func:`type` has been called)
852 '''
853 setattr(func, cls.__postinit, True)
854 return classmethod(func)
856 def __init__(cls, name, bases, classdict):
857 it = (getattr(cls, n) for n in dir(cls))
858 it = (a for a in it if hasattr(a, cls.__preinit))
859 for func in it:
860 func(name, bases, classdict)
861 super().__init__(name, bases, classdict)
862 it = (getattr(cls, n) for n in dir(cls))
863 it = (a for a in it if hasattr(a, cls.__postinit))
864 for func in it:
865 func(name, bases, classdict)
868def get_data_shape(data, strict_no_data_load=False):
869 """
870 Helper function used to determine the shape of the given array.
872 In order to determine the shape of nested tuples, lists, and sets, this function
873 recursively inspects elements along the dimensions, assuming that the data has a regular,
874 rectangular shape. In the case of out-of-core iterators, this means that the first item
875 along each dimension would potentially be loaded into memory. Set strict_no_data_load=True
876 to enforce that this does not happen, at the cost that we may not be able to determine
877 the shape of the array.
879 :param data: Array for which we should determine the shape.
880 :type data: List, numpy.ndarray, DataChunkIterator, any object that support __len__ or .shape.
881 :param strict_no_data_load: If True and data is an out-of-core iterator, None may be returned. If False (default),
882 the first element of data may be loaded into memory.
883 :return: Tuple of ints indicating the size of known dimensions. Dimensions for which the size is unknown
884 will be set to None.
885 """
887 def __get_shape_helper(local_data):
888 shape = list()
889 if hasattr(local_data, '__len__'):
890 shape.append(len(local_data))
891 if len(local_data):
892 el = next(iter(local_data))
893 if not isinstance(el, (str, bytes)):
894 shape.extend(__get_shape_helper(el))
895 return tuple(shape)
897 # NOTE: data.maxshape will fail on empty h5py.Dataset without shape or maxshape. this will be fixed in h5py 3.0
898 if hasattr(data, 'maxshape'):
899 return data.maxshape
900 if hasattr(data, 'shape') and data.shape is not None:
901 return data.shape
902 if isinstance(data, dict):
903 return None
904 if hasattr(data, '__len__') and not isinstance(data, (str, bytes)):
905 if not strict_no_data_load or isinstance(data, (list, tuple, set)):
906 return __get_shape_helper(data)
907 return None
910def pystr(s):
911 """
912 Convert a string of characters to Python str object
913 """
914 if isinstance(s, bytes):
915 return s.decode('utf-8')
916 else:
917 return s
920def to_uint_array(arr):
921 """
922 Convert a numpy array or array-like object to a numpy array of unsigned integers with the same dtype itemsize.
924 For example, a list of int32 values is converted to a numpy array with dtype uint32.
925 :raises ValueError: if input array contains values that are not unsigned integers or non-negative integers.
926 """
927 if not isinstance(arr, np.ndarray):
928 arr = np.array(arr)
929 if np.issubdtype(arr.dtype, np.unsignedinteger):
930 return arr
931 if np.issubdtype(arr.dtype, np.integer):
932 if (arr < 0).any():
933 raise ValueError('Cannot convert negative integer values to uint.')
934 dt = np.dtype('uint' + str(int(arr.dtype.itemsize*8))) # keep precision
935 return arr.astype(dt)
936 raise ValueError('Cannot convert array of dtype %s to uint.' % arr.dtype)
939class LabelledDict(dict):
940 """A dict wrapper that allows querying by an attribute of the values and running a callable on removed items.
942 For example, if the key attribute is set as 'name' in __init__, then all objects added to the LabelledDict must have
943 a 'name' attribute and a particular object in the LabelledDict can be accessed using the syntax ['object_name'] if
944 the object.name == 'object_name'. In this way, LabelledDict acts like a set where values can be retrieved using
945 square brackets around the value of the key attribute. An 'add' method makes clear the association between the key
946 attribute of the LabelledDict and the values of the LabelledDict.
948 LabelledDict also supports retrieval of values with the syntax my_dict['attr == val'], which returns a set of
949 objects in the LabelledDict which have an attribute 'attr' with a string value 'val'. If no objects match that
950 condition, a KeyError is raised. Note that if 'attr' equals the key attribute, then the single matching value is
951 returned, not a set.
953 LabelledDict does not support changing items that have already been set. A TypeError will be raised when using
954 __setitem__ on keys that already exist in the dict. The setdefault and update methods are not supported. A
955 TypeError will be raised when these are called.
957 A callable function may be passed to the constructor to be run on an item after adding it to this dict using
958 the __setitem__ and add methods.
960 A callable function may be passed to the constructor to be run on an item after removing it from this dict using
961 the __delitem__ (the del operator), pop, and popitem methods. It will also be run on each removed item when using
962 the clear method.
964 Usage:
965 LabelledDict(label='my_objects', key_attr='name')
966 my_dict[obj.name] = obj
967 my_dict.add(obj) # simpler syntax
969 Example:
970 # MyTestClass is a class with attributes 'prop1' and 'prop2'. MyTestClass.__init__ sets those attributes.
971 ld = LabelledDict(label='all_objects', key_attr='prop1')
972 obj1 = MyTestClass('a', 'b')
973 obj2 = MyTestClass('d', 'b')
974 ld[obj1.prop1] = obj1 # obj1 is added to the LabelledDict with the key obj1.prop1. Any other key is not allowed.
975 ld.add(obj2) # Simpler 'add' syntax enforces the required relationship
976 ld['a'] # Returns obj1
977 ld['prop1 == a'] # Also returns obj1
978 ld['prop2 == b'] # Returns set([obj1, obj2]) - the set of all values v in ld where v.prop2 == 'b'
979 """
981 @docval({'name': 'label', 'type': str, 'doc': 'the label on this dictionary'},
982 {'name': 'key_attr', 'type': str, 'doc': 'the attribute name to use as the key', 'default': 'name'},
983 {'name': 'add_callable', 'type': types.FunctionType,
984 'doc': 'function to call on an element after adding it to this dict using the add or __setitem__ methods',
985 'default': None},
986 {'name': 'remove_callable', 'type': types.FunctionType,
987 'doc': ('function to call on an element after removing it from this dict using the pop, popitem, clear, '
988 'or __delitem__ methods'),
989 'default': None})
990 def __init__(self, **kwargs):
991 label, key_attr, add_callable, remove_callable = getargs('label', 'key_attr', 'add_callable', 'remove_callable',
992 kwargs)
993 self.__label = label
994 self.__key_attr = key_attr
995 self.__add_callable = add_callable
996 self.__remove_callable = remove_callable
998 @property
999 def label(self):
1000 """Return the label of this LabelledDict"""
1001 return self.__label
1003 @property
1004 def key_attr(self):
1005 """Return the attribute used as the key for values in this LabelledDict"""
1006 return self.__key_attr
1008 def __getitem__(self, args):
1009 """Get a value from the LabelledDict with the given key.
1011 Supports syntax my_dict['attr == val'], which returns a set of objects in the LabelledDict which have an
1012 attribute 'attr' with a string value 'val'. If no objects match that condition, an empty set is returned.
1013 Note that if 'attr' equals the key attribute of this LabelledDict, then the single matching value is
1014 returned, not a set.
1015 """
1016 key = args
1017 if '==' in args:
1018 key, val = args.split("==")
1019 key = key.strip()
1020 val = val.strip() # val is a string
1021 if not key:
1022 raise ValueError("An attribute name is required before '=='.")
1023 if not val:
1024 raise ValueError("A value is required after '=='.")
1025 if key != self.key_attr:
1026 ret = set()
1027 for item in self.values():
1028 if getattr(item, key, None) == val:
1029 ret.add(item)
1030 return ret
1031 else:
1032 return super().__getitem__(val)
1033 else:
1034 return super().__getitem__(key)
1036 def __setitem__(self, key, value):
1037 """Set a value in the LabelledDict with the given key. The key must equal value.key_attr.
1039 See LabelledDict.add for a simpler syntax since the key is redundant.
1040 Raises TypeError is key already exists.
1041 Raises ValueError if value does not have attribute key_attr.
1042 """
1043 if key in self:
1044 raise TypeError("Key '%s' is already in this dict. Cannot reset items in a %s."
1045 % (key, self.__class__.__name__))
1046 self.__check_value(value)
1047 if key != getattr(value, self.key_attr):
1048 raise KeyError("Key '%s' must equal attribute '%s' of '%s'." % (key, self.key_attr, value))
1049 super().__setitem__(key, value)
1050 if self.__add_callable:
1051 self.__add_callable(value)
1053 def add(self, value):
1054 """Add a value to the dict with the key value.key_attr.
1056 Raises ValueError if value does not have attribute key_attr.
1057 """
1058 self.__check_value(value)
1059 self.__setitem__(getattr(value, self.key_attr), value)
1061 def __check_value(self, value):
1062 if not hasattr(value, self.key_attr):
1063 raise ValueError("Cannot set value '%s' in %s. Value must have attribute '%s'."
1064 % (value, self.__class__.__name__, self.key_attr))
1066 def pop(self, k):
1067 """Remove an item that matches the key. If remove_callable was initialized, call that on the returned value."""
1068 ret = super().pop(k)
1069 if self.__remove_callable:
1070 self.__remove_callable(ret)
1071 return ret
1073 def popitem(self):
1074 """Remove the last added item. If remove_callable was initialized, call that on the returned value.
1076 Note: popitem returns a tuple (key, value) but the remove_callable will be called only on the value.
1078 Note: in Python 3.5 and earlier, dictionaries are not ordered, so popitem removes an arbitrary item.
1079 """
1080 ret = super().popitem()
1081 if self.__remove_callable:
1082 self.__remove_callable(ret[1]) # execute callable only on dict value
1083 return ret
1085 def clear(self):
1086 """Remove all items. If remove_callable was initialized, call that on each returned value.
1088 The order of removal depends on the popitem method.
1089 """
1090 while len(self):
1091 self.popitem()
1093 def __delitem__(self, k):
1094 """Remove an item that matches the key. If remove_callable was initialized, call that on the matching value."""
1095 item = self[k]
1096 super().__delitem__(k)
1097 if self.__remove_callable:
1098 self.__remove_callable(item)
1100 def setdefault(self, k):
1101 """setdefault is not supported. A TypeError will be raised."""
1102 raise TypeError('setdefault is not supported for %s' % self.__class__.__name__)
1104 def update(self, other):
1105 """update is not supported. A TypeError will be raised."""
1106 raise TypeError('update is not supported for %s' % self.__class__.__name__)
1109@docval_macro('array_data')
1110class StrDataset(h5py.Dataset):
1111 """Wrapper to decode strings on reading the dataset"""
1112 def __init__(self, dset, encoding, errors='strict'):
1113 self.dset = dset
1114 if encoding is None: 1114 ↛ 1116line 1114 didn't jump to line 1116, because the condition on line 1114 was never false
1115 encoding = h5py.h5t.check_string_dtype(dset.dtype).encoding
1116 self.encoding = encoding
1117 self.errors = errors
1119 def __getattr__(self, name):
1120 return getattr(self.dset, name)
1122 def __repr__(self):
1123 return '<StrDataset for %s>' % repr(self.dset)[1:-1]
1125 def __len__(self):
1126 return len(self.dset)
1128 def __getitem__(self, args):
1129 bytes_arr = self.dset[args]
1130 # numpy.char.decode() seems like the obvious thing to use. But it only
1131 # accepts numpy string arrays, not object arrays of bytes (which we
1132 # return from HDF5 variable-length strings). And the numpy
1133 # implementation is not faster than doing it with a loop; in fact, by
1134 # not converting the result to a numpy unicode array, the
1135 # naive way can be faster! (Comparing with numpy 1.18.4, June 2020)
1136 if np.isscalar(bytes_arr):
1137 return bytes_arr.decode(self.encoding, self.errors)
1139 return np.array([
1140 b.decode(self.encoding, self.errors) for b in bytes_arr.flat
1141 ], dtype=object).reshape(bytes_arr.shape)