Coverage for src/hdmf/build/objectmapper.py: 95%
814 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-07-10 23:48 +0000
1import logging
2import re
3import warnings
4from collections import OrderedDict
5from copy import copy
7import numpy as np
9from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, BaseBuilder
10from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
11 ConstructError)
12from .manager import Proxy, BuildManager
13from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning
14from ..container import AbstractContainer, Data, DataRegion
15from ..data_utils import DataIO, AbstractDataChunkIterator
16from ..query import ReferenceResolver
17from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
18from ..spec.spec import BaseStorageSpec
19from ..utils import docval, getargs, ExtenderMeta, get_docval
21_const_arg = '__constructor_arg'
24@docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
25 is_method=False)
26def _constructor_arg(**kwargs):
27 '''Decorator to override the default mapping scheme for a given constructor argument.
29 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
30 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
31 first argument the Builder object that is being mapped. The method should return the value to be passed
32 to the target AbstractContainer class constructor argument given by *name*.
33 '''
34 name = getargs('name', kwargs)
36 def _dec(func):
37 setattr(func, _const_arg, name)
38 return func
40 return _dec
43_obj_attr = '__object_attr'
46@docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
47 is_method=False)
48def _object_attr(**kwargs):
49 '''Decorator to override the default mapping scheme for a given object attribute.
51 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
52 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
53 first argument the AbstractContainer object that is being mapped. The method should return the child Builder
54 object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the
55 attribute given by *name*.
56 '''
57 name = getargs('name', kwargs)
59 def _dec(func):
60 setattr(func, _obj_attr, name)
61 return func
63 return _dec
66def _unicode(s):
67 """
68 A helper function for converting to Unicode
69 """
70 if isinstance(s, str):
71 return s
72 elif isinstance(s, bytes):
73 return s.decode('utf-8')
74 else:
75 raise ValueError("Expected unicode or ascii string, got %s" % type(s))
78def _ascii(s):
79 """
80 A helper function for converting to ASCII
81 """
82 if isinstance(s, str):
83 return s.encode('ascii', 'backslashreplace')
84 elif isinstance(s, bytes):
85 return s
86 else:
87 raise ValueError("Expected unicode or ascii string, got %s" % type(s))
90class ObjectMapper(metaclass=ExtenderMeta):
91 '''A class for mapping between Spec objects and AbstractContainer attributes
93 '''
95 # mapping from spec dtypes to numpy dtypes or functions for conversion of values to spec dtypes
96 # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms,
97 # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map,
98 # hdmf.validate.validator.__allowable, and backend dtype maps
99 __dtypes = {
100 "float": np.float32,
101 "float32": np.float32,
102 "double": np.float64,
103 "float64": np.float64,
104 "long": np.int64,
105 "int64": np.int64,
106 "int": np.int32,
107 "int32": np.int32,
108 "short": np.int16,
109 "int16": np.int16,
110 "int8": np.int8,
111 "uint": np.uint32,
112 "uint64": np.uint64,
113 "uint32": np.uint32,
114 "uint16": np.uint16,
115 "uint8": np.uint8,
116 "bool": np.bool_,
117 "text": _unicode,
118 "utf": _unicode,
119 "utf8": _unicode,
120 "utf-8": _unicode,
121 "ascii": _ascii,
122 "bytes": _ascii,
123 "isodatetime": _ascii,
124 "datetime": _ascii,
125 }
127 __no_convert = set()
129 @classmethod
130 def __resolve_numeric_dtype(cls, given, specified):
131 """
132 Determine the dtype to use from the dtype of the given value and the specified dtype.
133 This amounts to determining the greater precision of the two arguments, but also
134 checks to make sure the same base dtype is being used. A warning is raised if the
135 base type of the specified dtype differs from the base type of the given dtype and
136 a conversion will result (e.g., float32 -> uint32).
137 """
138 g = np.dtype(given)
139 s = np.dtype(specified)
140 if g == s:
141 return s.type, None
142 if g.itemsize <= s.itemsize: # given type has precision < precision of specified type
143 # note: this allows float32 -> int32, bool -> int8, int16 -> uint16 which may involve buffer overflows,
144 # truncated values, and other unexpected consequences.
145 warning_msg = ('Value with data type %s is being converted to data type %s as specified.'
146 % (g.name, s.name))
147 return s.type, warning_msg
148 elif g.name[:3] == s.name[:3]:
149 return g.type, None # same base type, use higher-precision given type
150 else:
151 if np.issubdtype(s, np.unsignedinteger):
152 # e.g.: given int64 and spec uint32, return uint64. given float32 and spec uint8, return uint32.
153 ret_type = np.dtype('uint' + str(int(g.itemsize * 8)))
154 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).'
155 % (g.name, ret_type.name, s.name))
156 return ret_type.type, warning_msg
157 if np.issubdtype(s, np.floating):
158 # e.g.: given int64 and spec float32, return float64. given uint64 and spec float32, return float32.
159 ret_type = np.dtype('float' + str(max(int(g.itemsize * 8), 32)))
160 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).'
161 % (g.name, ret_type.name, s.name))
162 return ret_type.type, warning_msg
163 if np.issubdtype(s, np.integer):
164 # e.g.: given float64 and spec int8, return int64. given uint32 and spec int8, return int32.
165 ret_type = np.dtype('int' + str(int(g.itemsize * 8)))
166 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).'
167 % (g.name, ret_type.name, s.name))
168 return ret_type.type, warning_msg
169 if s.type is np.bool_: 169 ↛ 173line 169 didn't jump to line 173, because the condition on line 169 was never false
170 msg = "expected %s, received %s - must supply %s" % (s.name, g.name, s.name)
171 raise ValueError(msg)
172 # all numeric types in __dtypes should be caught by the above
173 raise ValueError('Unsupported conversion to specification data type: %s' % s.name)
175 @classmethod
176 def no_convert(cls, obj_type):
177 """
178 Specify an object type that ObjectMappers should not convert.
179 """
180 cls.__no_convert.add(obj_type)
182 @classmethod # noqa: C901
183 def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
184 """
185 Convert values to the specified dtype. For example, if a literal int
186 is passed in to a field that is specified as a unsigned integer, this function
187 will convert the Python int to a numpy unsigned int.
189 :param spec: The DatasetSpec or AttributeSpec to which this value is being applied
190 :param value: The value being converted to the spec dtype
191 :param spec_dtype: Optional override of the dtype in spec.dtype. Used to specify the parent dtype when the given
192 extended spec lacks a dtype.
194 :return: The function returns a tuple consisting of 1) the value, and 2) the data type.
195 The value is returned as the function may convert the input value to comply
196 with the dtype specified in the schema.
197 """
198 if spec_dtype is None:
199 spec_dtype = spec.dtype
200 ret, ret_dtype = cls.__check_edgecases(spec, value, spec_dtype)
201 if ret is not None or ret_dtype is not None:
202 return ret, ret_dtype
203 # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions _unicode or _ascii
204 spec_dtype_type = cls.__dtypes[spec_dtype]
205 warning_msg = None
206 # Numpy Array or Zarr array
207 if (isinstance(value, np.ndarray) or
208 (hasattr(value, 'astype') and hasattr(value, 'dtype'))):
209 if spec_dtype_type is _unicode:
210 ret = value.astype('U')
211 ret_dtype = "utf8"
212 elif spec_dtype_type is _ascii:
213 ret = value.astype('S')
214 ret_dtype = "ascii"
215 else:
216 dtype_func, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type)
217 if value.dtype == dtype_func:
218 ret = value
219 else:
220 ret = value.astype(dtype_func)
221 ret_dtype = ret.dtype.type
222 # Tuple or list
223 elif isinstance(value, (tuple, list)):
224 if len(value) == 0:
225 if spec_dtype_type is _unicode:
226 ret_dtype = 'utf8'
227 elif spec_dtype_type is _ascii:
228 ret_dtype = 'ascii'
229 else:
230 ret_dtype = spec_dtype_type
231 return value, ret_dtype
232 ret = list()
233 for elem in value:
234 tmp, tmp_dtype = cls.convert_dtype(spec, elem, spec_dtype)
235 ret.append(tmp)
236 ret = type(value)(ret)
237 ret_dtype = tmp_dtype
238 # Any DataChunkIterator
239 elif isinstance(value, AbstractDataChunkIterator):
240 ret = value
241 if spec_dtype_type is _unicode:
242 ret_dtype = "utf8"
243 elif spec_dtype_type is _ascii:
244 ret_dtype = "ascii"
245 else:
246 ret_dtype, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type)
247 else:
248 if spec_dtype_type in (_unicode, _ascii):
249 ret_dtype = 'ascii'
250 if spec_dtype_type is _unicode:
251 ret_dtype = 'utf8'
252 ret = spec_dtype_type(value)
253 else:
254 dtype_func, warning_msg = cls.__resolve_numeric_dtype(type(value), spec_dtype_type)
255 ret = dtype_func(value)
256 ret_dtype = type(ret)
257 if warning_msg:
258 full_warning_msg = "Spec '%s': %s" % (spec.path, warning_msg)
259 warnings.warn(full_warning_msg, DtypeConversionWarning)
260 return ret, ret_dtype
262 @classmethod
263 def __check_convert_numeric(cls, value_type):
264 # dtype 'numeric' allows only ints, floats, and uints
265 value_dtype = np.dtype(value_type)
266 if not (np.issubdtype(value_dtype, np.unsignedinteger) or
267 np.issubdtype(value_dtype, np.floating) or
268 np.issubdtype(value_dtype, np.integer)):
269 raise ValueError("Cannot convert from %s to 'numeric' specification dtype." % value_type)
271 @classmethod # noqa: C901
272 def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901
273 """
274 Check edge cases in converting data to a dtype
275 """
276 if value is None:
277 # Data is missing. Determine dtype from spec
278 dt = spec_dtype
279 if isinstance(dt, RefSpec):
280 dt = dt.reftype
281 return None, dt
282 if isinstance(spec_dtype, list):
283 # compound dtype - Since the I/O layer needs to determine how to handle these,
284 # return the list of DtypeSpecs
285 return value, spec_dtype
286 if isinstance(value, DataIO):
287 # data is wrapped for I/O via DataIO
288 if value.data is None:
289 # Data is missing so DataIO.dtype must be set to determine the dtype
290 return value, value.dtype
291 else:
292 # Determine the dtype from the DataIO.data
293 return value, cls.convert_dtype(spec, value.data, spec_dtype)[1]
294 if spec_dtype is None or spec_dtype == 'numeric' or type(value) in cls.__no_convert:
295 # infer type from value
296 if hasattr(value, 'dtype'): # covers numpy types, Zarr Array, AbstractDataChunkIterator
297 if spec_dtype == 'numeric':
298 cls.__check_convert_numeric(value.dtype.type)
299 if np.issubdtype(value.dtype, np.str_):
300 ret_dtype = 'utf8'
301 elif np.issubdtype(value.dtype, np.string_):
302 ret_dtype = 'ascii'
303 elif np.issubdtype(value.dtype, np.dtype('O')): 303 ↛ 306line 303 didn't jump to line 306, because the condition on line 303 was never true
304 # Only variable-length strings should ever appear as generic objects.
305 # Everything else should have a well-defined type
306 ret_dtype = 'utf8'
307 else:
308 ret_dtype = value.dtype.type
309 return value, ret_dtype
310 if isinstance(value, (list, tuple)):
311 if len(value) == 0:
312 msg = "Cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype."
313 raise ValueError(msg)
314 return value, cls.__check_edgecases(spec, value[0], spec_dtype)[1] # infer dtype from first element
315 ret_dtype = type(value)
316 if spec_dtype == 'numeric':
317 cls.__check_convert_numeric(ret_dtype)
318 if ret_dtype is str:
319 ret_dtype = 'utf8'
320 elif ret_dtype is bytes:
321 ret_dtype = 'ascii'
322 return value, ret_dtype
323 if isinstance(spec_dtype, RefSpec):
324 if not isinstance(value, ReferenceBuilder):
325 msg = "got RefSpec for value of type %s" % type(value)
326 raise ValueError(msg)
327 return value, spec_dtype
328 if spec_dtype is not None and spec_dtype not in cls.__dtypes: # pragma: no cover
329 msg = "unrecognized dtype: %s -- cannot convert value" % spec_dtype
330 raise ValueError(msg)
331 return None, None
333 _const_arg = '__constructor_arg'
335 @staticmethod
336 @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
337 is_method=False)
338 def constructor_arg(**kwargs):
339 '''Decorator to override the default mapping scheme for a given constructor argument.
341 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
342 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
343 first argument the Builder object that is being mapped. The method should return the value to be passed
344 to the target AbstractContainer class constructor argument given by *name*.
345 '''
346 name = getargs('name', kwargs)
347 return _constructor_arg(name)
349 _obj_attr = '__object_attr'
351 @staticmethod
352 @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
353 is_method=False)
354 def object_attr(**kwargs):
355 '''Decorator to override the default mapping scheme for a given object attribute.
357 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
358 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
359 first argument the AbstractContainer object that is being mapped. The method should return the child Builder
360 object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the
361 attribute given by *name*.
362 '''
363 name = getargs('name', kwargs)
364 return _object_attr(name)
366 @staticmethod
367 def __is_attr(attr_val):
368 return hasattr(attr_val, _obj_attr)
370 @staticmethod
371 def __get_obj_attr(attr_val):
372 return getattr(attr_val, _obj_attr)
374 @staticmethod
375 def __is_constructor_arg(attr_val):
376 return hasattr(attr_val, _const_arg)
378 @staticmethod
379 def __get_cargname(attr_val):
380 return getattr(attr_val, _const_arg)
382 @ExtenderMeta.post_init
383 def __gather_procedures(cls, name, bases, classdict):
384 if hasattr(cls, 'constructor_args'):
385 cls.constructor_args = copy(cls.constructor_args)
386 else:
387 cls.constructor_args = dict()
388 if hasattr(cls, 'obj_attrs'):
389 cls.obj_attrs = copy(cls.obj_attrs)
390 else:
391 cls.obj_attrs = dict()
392 for name, func in cls.__dict__.items():
393 if cls.__is_constructor_arg(func):
394 cls.constructor_args[cls.__get_cargname(func)] = getattr(cls, name)
395 elif cls.__is_attr(func):
396 cls.obj_attrs[cls.__get_obj_attr(func)] = getattr(cls, name)
398 @docval({'name': 'spec', 'type': (DatasetSpec, GroupSpec),
399 'doc': 'The specification for mapping objects to builders'})
400 def __init__(self, **kwargs):
401 """ Create a map from AbstractContainer attributes to specifications """
402 self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__))
403 spec = getargs('spec', kwargs)
404 self.__spec = spec
405 self.__data_type_key = spec.type_key()
406 self.__spec2attr = dict()
407 self.__attr2spec = dict()
408 self.__spec2carg = dict()
409 self.__carg2spec = dict()
410 self.__map_spec(spec)
412 @property
413 def spec(self):
414 ''' the Spec used in this ObjectMapper '''
415 return self.__spec
417 @_constructor_arg('name')
418 def get_container_name(self, *args):
419 builder = args[0]
420 return builder.name
422 @classmethod
423 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the name for'})
424 def convert_dt_name(cls, **kwargs):
425 '''Construct the attribute name corresponding to a specification'''
426 spec = getargs('spec', kwargs)
427 name = cls.__get_data_type(spec)
428 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
429 name = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
430 if name[-1] != 's' and spec.is_many():
431 name += 's'
432 return name
434 @classmethod
435 def __get_fields(cls, name_stack, all_names, spec):
436 name = spec.name
437 if spec.name is None:
438 name = cls.convert_dt_name(spec)
439 name_stack.append(name)
440 name = '__'.join(name_stack)
441 # TODO address potential name clashes, e.g., quantity '*' subgroups and links of same data_type_inc will
442 # have the same name
443 all_names[name] = spec
444 if isinstance(spec, BaseStorageSpec):
445 if not (spec.data_type_def is None and spec.data_type_inc is None):
446 # don't get names for components in data_types
447 name_stack.pop()
448 return
449 for subspec in spec.attributes:
450 cls.__get_fields(name_stack, all_names, subspec)
451 if isinstance(spec, GroupSpec):
452 for subspec in spec.datasets:
453 cls.__get_fields(name_stack, all_names, subspec)
454 for subspec in spec.groups:
455 cls.__get_fields(name_stack, all_names, subspec)
456 for subspec in spec.links:
457 cls.__get_fields(name_stack, all_names, subspec)
458 name_stack.pop()
460 @classmethod
461 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the object attribute names for'})
462 def get_attr_names(cls, **kwargs):
463 '''Get the attribute names for each subspecification in a Spec'''
464 spec = getargs('spec', kwargs)
465 names = OrderedDict()
466 for subspec in spec.attributes:
467 cls.__get_fields(list(), names, subspec)
468 if isinstance(spec, GroupSpec):
469 for subspec in spec.groups:
470 cls.__get_fields(list(), names, subspec)
471 for subspec in spec.datasets:
472 cls.__get_fields(list(), names, subspec)
473 for subspec in spec.links:
474 cls.__get_fields(list(), names, subspec)
475 return names
477 def __map_spec(self, spec):
478 attr_names = self.get_attr_names(spec)
479 for k, v in attr_names.items():
480 self.map_spec(k, v)
482 @docval({"name": "attr_name", "type": str, "doc": "the name of the object to map"},
483 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
484 def map_attr(self, **kwargs):
485 """ Map an attribute to spec. Use this to override default behavior """
486 attr_name, spec = getargs('attr_name', 'spec', kwargs)
487 self.__spec2attr[spec] = attr_name
488 self.__attr2spec[attr_name] = spec
490 @docval({"name": "attr_name", "type": str, "doc": "the name of the attribute"})
491 def get_attr_spec(self, **kwargs):
492 """ Return the Spec for a given attribute """
493 attr_name = getargs('attr_name', kwargs)
494 return self.__attr2spec.get(attr_name)
496 @docval({"name": "carg_name", "type": str, "doc": "the name of the constructor argument"})
497 def get_carg_spec(self, **kwargs):
498 """ Return the Spec for a given constructor argument """
499 carg_name = getargs('carg_name', kwargs)
500 return self.__carg2spec.get(carg_name)
502 @docval({"name": "const_arg", "type": str, "doc": "the name of the constructor argument to map"},
503 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
504 def map_const_arg(self, **kwargs):
505 """ Map an attribute to spec. Use this to override default behavior """
506 const_arg, spec = getargs('const_arg', 'spec', kwargs)
507 self.__spec2carg[spec] = const_arg
508 self.__carg2spec[const_arg] = spec
510 @docval({"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
511 def unmap(self, **kwargs):
512 """ Removing any mapping for a specification. Use this to override default mapping """
513 spec = getargs('spec', kwargs)
514 self.__spec2attr.pop(spec, None)
515 self.__spec2carg.pop(spec, None)
517 @docval({"name": "attr_carg", "type": str, "doc": "the constructor argument/object attribute to map this spec to"},
518 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
519 def map_spec(self, **kwargs):
520 """ Map the given specification to the construct argument and object attribute """
521 spec, attr_carg = getargs('spec', 'attr_carg', kwargs)
522 self.map_const_arg(attr_carg, spec)
523 self.map_attr(attr_carg, spec)
525 def __get_override_carg(self, *args):
526 name = args[0]
527 remaining_args = tuple(args[1:])
528 if name in self.constructor_args:
529 self.logger.debug(" Calling override function for constructor argument '%s'" % name)
530 func = self.constructor_args[name]
531 return func(self, *remaining_args)
532 return None
534 def __get_override_attr(self, name, container, manager):
535 if name in self.obj_attrs:
536 self.logger.debug(" Calling override function for attribute '%s'" % name)
537 func = self.obj_attrs[name]
538 return func(self, container, manager)
539 return None
541 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute for"},
542 returns='the attribute name', rtype=str)
543 def get_attribute(self, **kwargs):
544 ''' Get the object attribute name for the given Spec '''
545 spec = getargs('spec', kwargs)
546 val = self.__spec2attr.get(spec, None)
547 return val
549 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"},
550 {"name": "container", "type": AbstractContainer, "doc": "the container to get the attribute value from"},
551 {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"},
552 returns='the value of the attribute')
553 def get_attr_value(self, **kwargs):
554 ''' Get the value of the attribute corresponding to this spec from the given container '''
555 spec, container, manager = getargs('spec', 'container', 'manager', kwargs)
556 attr_name = self.get_attribute(spec)
557 if attr_name is None: 557 ↛ 558line 557 didn't jump to line 558, because the condition on line 557 was never true
558 return None
559 attr_val = self.__get_override_attr(attr_name, container, manager)
560 if attr_val is None:
561 try:
562 attr_val = getattr(container, attr_name)
563 except AttributeError:
564 msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s"
565 % (container.__class__.__name__, container.name, attr_name, spec))
566 raise ContainerConfigurationError(msg)
567 if attr_val is not None:
568 attr_val = self.__convert_string(attr_val, spec)
569 spec_dt = self.__get_data_type(spec)
570 if spec_dt is not None:
571 try:
572 attr_val = self.__filter_by_spec_dt(attr_val, spec_dt, manager)
573 except ValueError as e:
574 msg = ("%s '%s' attribute '%s' has unexpected type."
575 % (container.__class__.__name__, container.name, attr_name))
576 raise ContainerConfigurationError(msg) from e
577 # else: attr_val is an attribute on the Container and its value is None
578 # attr_val can be None, an AbstractContainer, or a list of AbstractContainers
579 return attr_val
581 @classmethod
582 def __get_data_type(cls, spec):
583 ret = None
584 if isinstance(spec, LinkSpec):
585 ret = spec.target_type
586 elif isinstance(spec, BaseStorageSpec):
587 if spec.data_type_def is not None:
588 ret = spec.data_type_def
589 elif spec.data_type_inc is not None:
590 ret = spec.data_type_inc
591 # else, untyped group/dataset spec
592 # else, attribute spec
593 return ret
595 def __convert_string(self, value, spec):
596 """Convert string types to the specified dtype."""
597 ret = value
598 if isinstance(spec, AttributeSpec):
599 if 'text' in spec.dtype:
600 if spec.shape is not None or spec.dims is not None:
601 ret = list(map(str, value))
602 else:
603 ret = str(value)
604 elif isinstance(spec, DatasetSpec):
605 # TODO: make sure we can handle specs with data_type_inc set
606 if spec.data_type_inc is None and spec.dtype is not None:
607 string_type = None
608 if 'text' in spec.dtype:
609 string_type = str
610 elif 'ascii' in spec.dtype: 610 ↛ 611line 610 didn't jump to line 611, because the condition on line 610 was never true
611 string_type = bytes
612 elif 'isodatetime' in spec.dtype:
613 def string_type(x):
614 return x.isoformat() # method works for both date and datetime
615 if string_type is not None:
616 if spec.shape is not None or spec.dims is not None:
617 ret = list(map(string_type, value))
618 else:
619 ret = string_type(value)
620 # copy over any I/O parameters if they were specified
621 if isinstance(value, DataIO):
622 params = value.get_io_params()
623 params['data'] = ret
624 ret = value.__class__(**params)
625 return ret
627 def __filter_by_spec_dt(self, attr_value, spec_dt, build_manager):
628 """Return a list of containers that match the spec data type.
630 If attr_value is a container that does not match the spec data type, then None is returned.
631 If attr_value is a collection, then a list of only the containers in the collection that match the
632 spec data type are returned.
633 Otherwise, attr_value is returned unchanged.
635 spec_dt is a string representing a spec data type.
637 Return None, an AbstractContainer, or a list of AbstractContainers
638 """
639 if isinstance(attr_value, AbstractContainer):
640 if build_manager.is_sub_data_type(attr_value, spec_dt):
641 return attr_value
642 else:
643 return None
645 ret = attr_value
646 if isinstance(attr_value, (list, tuple, set, dict)):
647 if isinstance(attr_value, dict):
648 attr_values = attr_value.values()
649 else:
650 attr_values = attr_value
651 ret = []
652 # NOTE: this will test collections of non-containers element-wise (e.g. lists of lists of ints)
653 for c in attr_values:
654 if self.__filter_by_spec_dt(c, spec_dt, build_manager) is not None:
655 ret.append(c)
656 if len(ret) == 0:
657 ret = None
658 else:
659 raise ValueError("Unexpected type for attr_value: %s. Only AbstractContainer, list, tuple, set, dict, are "
660 "allowed." % type(attr_value))
661 return ret
663 def __check_quantity(self, attr_value, spec, container):
664 if attr_value is None and spec.required:
665 attr_name = self.get_attribute(spec)
666 msg = ("%s '%s' is missing required value for attribute '%s'."
667 % (container.__class__.__name__, container.name, attr_name))
668 warnings.warn(msg, MissingRequiredBuildWarning)
669 self.logger.debug('MissingRequiredBuildWarning: ' + msg)
670 elif attr_value is not None and self.__get_data_type(spec) is not None:
671 # quantity is valid only for specs with a data type or target type
672 if isinstance(attr_value, AbstractContainer):
673 attr_value = [attr_value]
674 n = len(attr_value)
675 if (n and isinstance(attr_value[0], AbstractContainer) and
676 ((n > 1 and not spec.is_many()) or (isinstance(spec.quantity, int) and n != spec.quantity))):
677 attr_name = self.get_attribute(spec)
678 msg = ("%s '%s' has %d values for attribute '%s' but spec allows %s."
679 % (container.__class__.__name__, container.name, n, attr_name, repr(spec.quantity)))
680 warnings.warn(msg, IncorrectQuantityBuildWarning)
681 self.logger.debug('IncorrectQuantityBuildWarning: ' + msg)
683 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the constructor argument for"},
684 returns="the name of the constructor argument", rtype=str)
685 def get_const_arg(self, **kwargs):
686 ''' Get the constructor argument for the given Spec '''
687 spec = getargs('spec', kwargs)
688 return self.__spec2carg.get(spec, None)
690 @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"},
691 {"name": "manager", "type": BuildManager, "doc": "the BuildManager to use for managing this build"},
692 {"name": "parent", "type": GroupBuilder, "doc": "the parent of the resulting Builder", 'default': None},
693 {"name": "source", "type": str,
694 "doc": "the source of container being built i.e. file path", 'default': None},
695 {"name": "builder", "type": BaseBuilder, "doc": "the Builder to build on", 'default': None},
696 {"name": "spec_ext", "type": BaseStorageSpec, "doc": "a spec extension", 'default': None},
697 {"name": "export", "type": bool, "doc": "whether this build is for exporting",
698 'default': False},
699 returns="the Builder representing the given AbstractContainer", rtype=Builder)
700 def build(self, **kwargs):
701 '''Convert an AbstractContainer to a Builder representation.
703 References are not added but are queued to be added in the BuildManager.
704 '''
705 container, manager, parent, source = getargs('container', 'manager', 'parent', 'source', kwargs)
706 builder, spec_ext, export = getargs('builder', 'spec_ext', 'export', kwargs)
707 name = manager.get_builder_name(container)
708 if isinstance(self.__spec, GroupSpec):
709 self.logger.debug("Building %s '%s' as a group (source: %s)"
710 % (container.__class__.__name__, container.name, repr(source)))
711 if builder is None:
712 builder = GroupBuilder(name, parent=parent, source=source)
713 self.__add_datasets(builder, self.__spec.datasets, container, manager, source, export)
714 self.__add_groups(builder, self.__spec.groups, container, manager, source, export)
715 self.__add_links(builder, self.__spec.links, container, manager, source, export)
716 else:
717 if builder is None: 717 ↛ 759line 717 didn't jump to line 759, because the condition on line 717 was never false
718 if not isinstance(container, Data): 718 ↛ 719line 718 didn't jump to line 719, because the condition on line 718 was never true
719 msg = "'container' must be of type Data with DatasetSpec"
720 raise ValueError(msg)
721 spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext)
722 if isinstance(spec_dtype, RefSpec):
723 self.logger.debug("Building %s '%s' as a dataset of references (source: %s)"
724 % (container.__class__.__name__, container.name, repr(source)))
725 # create dataset builder with data=None as a placeholder. fill in with refs later
726 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype.reftype)
727 manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager))
728 elif isinstance(spec_dtype, list):
729 # a compound dataset
730 self.logger.debug("Building %s '%s' as a dataset of compound dtypes (source: %s)"
731 % (container.__class__.__name__, container.name, repr(source)))
732 # create dataset builder with data=None, dtype=None as a placeholder. fill in with refs later
733 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype)
734 manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container,
735 manager))
736 else:
737 # a regular dtype
738 if spec_dtype is None and self.__is_reftype(container.data):
739 self.logger.debug("Building %s '%s' containing references as a dataset of unspecified dtype "
740 "(source: %s)"
741 % (container.__class__.__name__, container.name, repr(source)))
742 # an unspecified dtype and we were given references
743 # create dataset builder with data=None as a placeholder. fill in with refs later
744 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype='object')
745 manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager))
746 else:
747 # a dataset that has no references, pass the conversion off to the convert_dtype method
748 self.logger.debug("Building %s '%s' as a dataset (source: %s)"
749 % (container.__class__.__name__, container.name, repr(source)))
750 try:
751 # use spec_dtype from self.spec when spec_ext does not specify dtype
752 bldr_data, dtype = self.convert_dtype(spec, container.data, spec_dtype=spec_dtype)
753 except Exception as ex:
754 msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name)
755 raise Exception(msg) from ex
756 builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype)
758 # Add attributes from the specification extension to the list of attributes
759 all_attrs = self.__spec.attributes + getattr(spec_ext, 'attributes', tuple())
760 # If the spec_ext refines an existing attribute it will now appear twice in the list. The
761 # refinement should only be relevant for validation (not for write). To avoid problems with the
762 # write we here remove duplicates and keep the original spec of the two to make write work.
763 # TODO: We should add validation in the AttributeSpec to make sure refinements are valid
764 # TODO: Check the BuildManager as refinements should probably be resolved rather than be passed in via spec_ext
765 all_attrs = list({a.name: a for a in all_attrs[::-1]}.values())
766 self.__add_attributes(builder, all_attrs, container, manager, source, export)
767 return builder
769 def __check_dset_spec(self, orig, ext):
770 """
771 Check a dataset spec against a refining spec to see which dtype and shape should be used
772 """
773 dtype = orig.dtype
774 shape = orig.shape
775 spec = orig
776 if ext is not None:
777 if ext.dtype is not None:
778 dtype = ext.dtype
779 if ext.shape is not None:
780 shape = ext.shape
781 spec = ext
782 return dtype, shape, spec
784 def __is_reftype(self, data):
785 if (isinstance(data, AbstractDataChunkIterator) or
786 (isinstance(data, DataIO) and isinstance(data.data, AbstractDataChunkIterator))):
787 return False
789 tmp = data
790 while hasattr(tmp, '__len__') and not isinstance(tmp, (AbstractContainer, str, bytes)):
791 tmptmp = None
792 for t in tmp:
793 # In case of a numeric array stop the iteration at the first element to avoid long-running loop
794 if isinstance(t, (int, float, complex, bool)):
795 break
796 if hasattr(t, '__len__') and len(t) > 0 and not isinstance(t, (AbstractContainer, str, bytes)):
797 tmptmp = tmp[0]
798 break
799 if tmptmp is not None:
800 break
801 else:
802 if len(tmp) == 0: 802 ↛ 803line 802 didn't jump to line 803, because the condition on line 802 was never true
803 tmp = None
804 else:
805 tmp = tmp[0]
806 if isinstance(tmp, AbstractContainer):
807 return True
808 else:
809 return False
811 def __set_dataset_to_refs(self, builder, dtype, shape, container, build_manager):
812 self.logger.debug("Queueing set dataset of references %s '%s' to reference builder(s)"
813 % (builder.__class__.__name__, builder.name))
815 def _filler():
816 builder.data = self.__get_ref_builder(builder, dtype, shape, container, build_manager)
818 return _filler
820 def __set_compound_dataset_to_refs(self, builder, spec, spec_dtype, container, build_manager):
821 self.logger.debug("Queueing convert compound dataset %s '%s' and set any references to reference builders"
822 % (builder.__class__.__name__, builder.name))
824 def _filler():
825 self.logger.debug("Converting compound dataset %s '%s' and setting any references to reference builders"
826 % (builder.__class__.__name__, builder.name))
827 # convert the reference part(s) of a compound dataset to ReferenceBuilders, row by row
828 refs = [(i, subt) for i, subt in enumerate(spec_dtype) if isinstance(subt.dtype, RefSpec)]
829 bldr_data = list()
830 for i, row in enumerate(container.data):
831 tmp = list(row)
832 for j, subt in refs:
833 tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager)
834 bldr_data.append(tuple(tmp))
835 builder.data = bldr_data
837 return _filler
839 def __set_untyped_dataset_to_refs(self, builder, container, build_manager):
840 self.logger.debug("Queueing set untyped dataset %s '%s' to reference builders"
841 % (builder.__class__.__name__, builder.name))
843 def _filler():
844 self.logger.debug("Setting untyped dataset %s '%s' to list of reference builders"
845 % (builder.__class__.__name__, builder.name))
846 bldr_data = list()
847 for d in container.data:
848 if d is None:
849 bldr_data.append(None)
850 else:
851 target_builder = self.__get_target_builder(d, build_manager, builder)
852 bldr_data.append(ReferenceBuilder(target_builder))
853 builder.data = bldr_data
855 return _filler
857 def __get_ref_builder(self, builder, dtype, shape, container, build_manager):
858 bldr_data = None
859 if dtype.is_region(): 859 ↛ 860line 859 didn't jump to line 860, because the condition on line 859 was never true
860 if shape is None:
861 if not isinstance(container, DataRegion):
862 msg = "'container' must be of type DataRegion if spec represents region reference"
863 raise ValueError(msg)
864 self.logger.debug("Setting %s '%s' data to region reference builder"
865 % (builder.__class__.__name__, builder.name))
866 target_builder = self.__get_target_builder(container.data, build_manager, builder)
867 bldr_data = RegionBuilder(container.region, target_builder)
868 else:
869 self.logger.debug("Setting %s '%s' data to list of region reference builders"
870 % (builder.__class__.__name__, builder.name))
871 bldr_data = list()
872 for d in container.data:
873 target_builder = self.__get_target_builder(d.target, build_manager, builder)
874 bldr_data.append(RegionBuilder(d.slice, target_builder))
875 else:
876 self.logger.debug("Setting object reference dataset on %s '%s' data"
877 % (builder.__class__.__name__, builder.name))
878 if isinstance(container, Data):
879 self.logger.debug("Setting %s '%s' data to list of reference builders"
880 % (builder.__class__.__name__, builder.name))
881 bldr_data = list()
882 for d in container.data:
883 target_builder = self.__get_target_builder(d, build_manager, builder)
884 bldr_data.append(ReferenceBuilder(target_builder))
885 else:
886 self.logger.debug("Setting %s '%s' data to reference builder"
887 % (builder.__class__.__name__, builder.name))
888 target_builder = self.__get_target_builder(container, build_manager, builder)
889 bldr_data = ReferenceBuilder(target_builder)
890 return bldr_data
892 def __get_target_builder(self, container, build_manager, builder):
893 target_builder = build_manager.get_builder(container)
894 if target_builder is None:
895 raise ReferenceTargetNotBuiltError(builder, container)
896 return target_builder
898 def __add_attributes(self, builder, attributes, container, build_manager, source, export):
899 if attributes:
900 self.logger.debug("Adding attributes from %s '%s' to %s '%s'"
901 % (container.__class__.__name__, container.name,
902 builder.__class__.__name__, builder.name))
903 for spec in attributes:
904 self.logger.debug(" Adding attribute for spec name: %s (dtype: %s)"
905 % (repr(spec.name), spec.dtype.__class__.__name__))
906 if spec.value is not None:
907 attr_value = spec.value
908 else:
909 attr_value = self.get_attr_value(spec, container, build_manager)
910 if attr_value is None:
911 attr_value = spec.default_value
913 attr_value = self.__check_ref_resolver(attr_value)
915 self.__check_quantity(attr_value, spec, container)
916 if attr_value is None:
917 self.logger.debug(" Skipping empty attribute")
918 continue
920 if isinstance(spec.dtype, RefSpec):
921 if not self.__is_reftype(attr_value):
922 msg = ("invalid type for reference '%s' (%s) - must be AbstractContainer"
923 % (spec.name, type(attr_value)))
924 raise ValueError(msg)
926 build_manager.queue_ref(self.__set_attr_to_ref(builder, attr_value, build_manager, spec))
927 continue
928 else:
929 try:
930 attr_value, attr_dtype = self.convert_dtype(spec, attr_value)
931 except Exception as ex:
932 msg = 'could not convert %s for %s %s' % (spec.name, type(container).__name__, container.name)
933 raise BuildError(builder, msg) from ex
935 # do not write empty or null valued objects
936 self.__check_quantity(attr_value, spec, container)
937 if attr_value is None: 937 ↛ 938line 937 didn't jump to line 938, because the condition on line 937 was never true
938 self.logger.debug(" Skipping empty attribute")
939 continue
941 builder.set_attribute(spec.name, attr_value)
943 def __set_attr_to_ref(self, builder, attr_value, build_manager, spec):
944 self.logger.debug("Queueing set reference attribute on %s '%s' attribute '%s' to %s"
945 % (builder.__class__.__name__, builder.name, spec.name,
946 attr_value.__class__.__name__))
948 def _filler():
949 self.logger.debug("Setting reference attribute on %s '%s' attribute '%s' to %s"
950 % (builder.__class__.__name__, builder.name, spec.name,
951 attr_value.__class__.__name__))
952 target_builder = self.__get_target_builder(attr_value, build_manager, builder)
953 ref_attr_value = ReferenceBuilder(target_builder)
954 builder.set_attribute(spec.name, ref_attr_value)
956 return _filler
958 def __add_links(self, builder, links, container, build_manager, source, export):
959 if links:
960 self.logger.debug("Adding links from %s '%s' to %s '%s'"
961 % (container.__class__.__name__, container.name,
962 builder.__class__.__name__, builder.name))
963 for spec in links:
964 self.logger.debug(" Adding link for spec name: %s, target_type: %s"
965 % (repr(spec.name), repr(spec.target_type)))
966 attr_value = self.get_attr_value(spec, container, build_manager)
967 self.__check_quantity(attr_value, spec, container)
968 if attr_value is None:
969 self.logger.debug(" Skipping link - no attribute value")
970 continue
971 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
973 def __add_datasets(self, builder, datasets, container, build_manager, source, export):
974 if datasets:
975 self.logger.debug("Adding datasets from %s '%s' to %s '%s'"
976 % (container.__class__.__name__, container.name,
977 builder.__class__.__name__, builder.name))
978 for spec in datasets:
979 self.logger.debug(" Adding dataset for spec name: %s (dtype: %s)"
980 % (repr(spec.name), spec.dtype.__class__.__name__))
981 attr_value = self.get_attr_value(spec, container, build_manager)
982 self.__check_quantity(attr_value, spec, container)
983 if attr_value is None:
984 self.logger.debug(" Skipping dataset - no attribute value")
985 continue
986 attr_value = self.__check_ref_resolver(attr_value)
987 if isinstance(attr_value, LinkBuilder): 987 ↛ 988line 987 didn't jump to line 988, because the condition on line 987 was never true
988 self.logger.debug(" Adding %s '%s' for spec name: %s, %s: %s, %s: %s"
989 % (attr_value.name, attr_value.__class__.__name__,
990 repr(spec.name),
991 spec.def_key(), repr(spec.data_type_def),
992 spec.inc_key(), repr(spec.data_type_inc)))
993 builder.set_link(attr_value) # add the existing builder
994 elif spec.data_type_def is None and spec.data_type_inc is None: # untyped, named dataset
995 if spec.name in builder.datasets:
996 sub_builder = builder.datasets[spec.name]
997 self.logger.debug(" Retrieving existing DatasetBuilder '%s' for spec name %s and adding "
998 "attributes" % (sub_builder.name, repr(spec.name)))
999 else:
1000 self.logger.debug(" Converting untyped dataset for spec name %s to spec dtype %s"
1001 % (repr(spec.name), repr(spec.dtype)))
1002 try:
1003 data, dtype = self.convert_dtype(spec, attr_value)
1004 except Exception as ex:
1005 msg = 'could not convert \'%s\' for %s \'%s\''
1006 msg = msg % (spec.name, type(container).__name__, container.name)
1007 raise BuildError(builder, msg) from ex
1008 self.logger.debug(" Adding untyped dataset for spec name %s and adding attributes"
1009 % repr(spec.name))
1010 sub_builder = DatasetBuilder(spec.name, data, parent=builder, source=source, dtype=dtype)
1011 builder.set_dataset(sub_builder)
1012 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
1013 else:
1014 self.logger.debug(" Adding typed dataset for spec name: %s, %s: %s, %s: %s"
1015 % (repr(spec.name),
1016 spec.def_key(), repr(spec.data_type_def),
1017 spec.inc_key(), repr(spec.data_type_inc)))
1018 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
1020 def __add_groups(self, builder, groups, container, build_manager, source, export):
1021 if groups:
1022 self.logger.debug("Adding groups from %s '%s' to %s '%s'"
1023 % (container.__class__.__name__, container.name,
1024 builder.__class__.__name__, builder.name))
1025 for spec in groups:
1026 if spec.data_type_def is None and spec.data_type_inc is None:
1027 self.logger.debug(" Adding untyped group for spec name: %s" % repr(spec.name))
1028 # we don't need to get attr_name since any named group does not have the concept of value
1029 sub_builder = builder.groups.get(spec.name)
1030 if sub_builder is None:
1031 sub_builder = GroupBuilder(spec.name, source=source)
1032 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
1033 self.__add_datasets(sub_builder, spec.datasets, container, build_manager, source, export)
1034 self.__add_links(sub_builder, spec.links, container, build_manager, source, export)
1035 self.__add_groups(sub_builder, spec.groups, container, build_manager, source, export)
1036 empty = sub_builder.is_empty()
1037 if not empty or (empty and spec.required):
1038 if sub_builder.name not in builder.groups:
1039 builder.set_group(sub_builder)
1040 else:
1041 self.logger.debug(" Adding group for spec name: %s, %s: %s, %s: %s"
1042 % (repr(spec.name),
1043 spec.def_key(), repr(spec.data_type_def),
1044 spec.inc_key(), repr(spec.data_type_inc)))
1045 attr_value = self.get_attr_value(spec, container, build_manager)
1046 self.__check_quantity(attr_value, spec, container)
1047 if attr_value is not None:
1048 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
1050 def __add_containers(self, builder, spec, value, build_manager, source, parent_container, export):
1051 if isinstance(value, AbstractContainer):
1052 self.logger.debug(" Adding container %s '%s' with parent %s '%s' to %s '%s'"
1053 % (value.__class__.__name__, value.name,
1054 parent_container.__class__.__name__, parent_container.name,
1055 builder.__class__.__name__, builder.name))
1056 if value.parent is None:
1057 if (value.container_source == parent_container.container_source or
1058 build_manager.get_builder(value) is None):
1059 # value was removed (or parent not set) and there is a link to it in same file
1060 # or value was read from an external link
1061 raise OrphanContainerBuildError(builder, value)
1063 if value.modified or export:
1064 # writing a newly instantiated container (modified is False only after read) or as if it is newly
1065 # instantianted (export=True)
1066 self.logger.debug(" Building newly instantiated %s '%s'" % (value.__class__.__name__, value.name))
1067 if isinstance(spec, BaseStorageSpec):
1068 new_builder = build_manager.build(value, source=source, spec_ext=spec, export=export)
1069 else:
1070 new_builder = build_manager.build(value, source=source, export=export)
1071 # use spec to determine what kind of HDF5 object this AbstractContainer corresponds to
1072 if isinstance(spec, LinkSpec) or value.parent is not parent_container:
1073 self.logger.debug(" Adding link to %s '%s' in %s '%s'"
1074 % (new_builder.__class__.__name__, new_builder.name,
1075 builder.__class__.__name__, builder.name))
1076 builder.set_link(LinkBuilder(new_builder, name=spec.name, parent=builder))
1077 elif isinstance(spec, DatasetSpec):
1078 self.logger.debug(" Adding dataset %s '%s' to %s '%s'"
1079 % (new_builder.__class__.__name__, new_builder.name,
1080 builder.__class__.__name__, builder.name))
1081 builder.set_dataset(new_builder)
1082 else:
1083 self.logger.debug(" Adding subgroup %s '%s' to %s '%s'"
1084 % (new_builder.__class__.__name__, new_builder.name,
1085 builder.__class__.__name__, builder.name))
1086 builder.set_group(new_builder)
1087 elif value.container_source: # make a link to an existing container 1087 ↛ 1102line 1087 didn't jump to line 1102, because the condition on line 1087 was never false
1088 if (value.container_source != parent_container.container_source
1089 or value.parent is not parent_container):
1090 self.logger.debug(" Building %s '%s' (container source: %s) and adding a link to it"
1091 % (value.__class__.__name__, value.name, value.container_source))
1092 if isinstance(spec, BaseStorageSpec): 1092 ↛ 1093line 1092 didn't jump to line 1093, because the condition on line 1092 was never true
1093 new_builder = build_manager.build(value, source=source, spec_ext=spec, export=export)
1094 else:
1095 new_builder = build_manager.build(value, source=source, export=export)
1096 builder.set_link(LinkBuilder(new_builder, name=spec.name, parent=builder))
1097 else:
1098 self.logger.debug(" Skipping build for %s '%s' because both it and its parents were read "
1099 "from the same source."
1100 % (value.__class__.__name__, value.name))
1101 else:
1102 raise ValueError("Found unmodified AbstractContainer with no source - '%s' with parent '%s'" %
1103 (value.name, parent_container.name))
1104 elif isinstance(value, list):
1105 for container in value:
1106 self.__add_containers(builder, spec, container, build_manager, source, parent_container, export)
1107 else: # pragma: no cover
1108 msg = ("Received %s, expected AbstractContainer or a list of AbstractContainers."
1109 % value.__class__.__name__)
1110 raise ValueError(msg)
1112 def __get_subspec_values(self, builder, spec, manager):
1113 ret = dict()
1114 # First get attributes
1115 attributes = builder.attributes
1116 for attr_spec in spec.attributes:
1117 attr_val = attributes.get(attr_spec.name)
1118 if attr_val is None:
1119 continue
1120 if isinstance(attr_val, (GroupBuilder, DatasetBuilder)):
1121 ret[attr_spec] = manager.construct(attr_val)
1122 elif isinstance(attr_val, RegionBuilder): # pragma: no cover
1123 raise ValueError("RegionReferences as attributes is not yet supported")
1124 elif isinstance(attr_val, ReferenceBuilder): 1124 ↛ 1125line 1124 didn't jump to line 1125, because the condition on line 1124 was never true
1125 ret[attr_spec] = manager.construct(attr_val.builder)
1126 else:
1127 ret[attr_spec] = attr_val
1128 if isinstance(spec, GroupSpec):
1129 if not isinstance(builder, GroupBuilder): # pragma: no cover
1130 raise ValueError("__get_subspec_values - must pass GroupBuilder with GroupSpec")
1131 # first aggregate links by data type and separate them
1132 # by group and dataset
1133 groups = dict(builder.groups) # make a copy so we can separate links
1134 datasets = dict(builder.datasets) # make a copy so we can separate links
1135 links = builder.links
1136 link_dt = dict()
1137 for link_builder in links.values():
1138 target = link_builder.builder
1139 if isinstance(target, DatasetBuilder):
1140 datasets[link_builder.name] = target
1141 else:
1142 groups[link_builder.name] = target
1143 dt = manager.get_builder_dt(target)
1144 if dt is not None:
1145 link_dt.setdefault(dt, list()).append(target)
1146 # now assign links to their respective specification
1147 for subspec in spec.links:
1148 if subspec.name is not None and subspec.name in links:
1149 ret[subspec] = manager.construct(links[subspec.name].builder)
1150 else:
1151 sub_builder = link_dt.get(subspec.target_type)
1152 if sub_builder is not None: 1152 ↛ 1153line 1152 didn't jump to line 1153, because the condition on line 1152 was never true
1153 ret[subspec] = self.__flatten(sub_builder, subspec, manager)
1154 # now process groups and datasets
1155 self.__get_sub_builders(groups, spec.groups, manager, ret)
1156 self.__get_sub_builders(datasets, spec.datasets, manager, ret)
1157 elif isinstance(spec, DatasetSpec):
1158 if not isinstance(builder, DatasetBuilder): # pragma: no cover
1159 raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec")
1160 if (spec.shape is None and getattr(builder.data, 'shape', None) == (1,) and
1161 type(builder.data[0]) != np.void):
1162 # if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset
1163 builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error
1164 ret[spec] = self.__check_ref_resolver(builder.data)
1165 return ret
1167 @staticmethod
1168 def __check_ref_resolver(data):
1169 """
1170 Check if this dataset is a reference resolver, and invert it if so.
1171 """
1172 if isinstance(data, ReferenceResolver):
1173 return data.invert()
1174 return data
1176 def __get_sub_builders(self, sub_builders, subspecs, manager, ret):
1177 # index builders by data_type
1178 builder_dt = dict()
1179 for g in sub_builders.values():
1180 dt = manager.get_builder_dt(g)
1181 ns = manager.get_builder_ns(g)
1182 if dt is None or ns is None:
1183 continue
1184 for parent_dt in manager.namespace_catalog.get_hierarchy(ns, dt):
1185 builder_dt.setdefault(parent_dt, list()).append(g)
1186 for subspec in subspecs:
1187 # first get data type for the spec
1188 if subspec.data_type_def is not None: 1188 ↛ 1189line 1188 didn't jump to line 1189, because the condition on line 1188 was never true
1189 dt = subspec.data_type_def
1190 elif subspec.data_type_inc is not None:
1191 dt = subspec.data_type_inc
1192 else:
1193 dt = None
1194 # use name if we can, otherwise use data_data
1195 if subspec.name is None:
1196 sub_builder = builder_dt.get(dt)
1197 if sub_builder is not None:
1198 sub_builder = self.__flatten(sub_builder, subspec, manager)
1199 ret[subspec] = sub_builder
1200 else:
1201 sub_builder = sub_builders.get(subspec.name)
1202 if sub_builder is None:
1203 continue
1204 if dt is None:
1205 # recurse
1206 ret.update(self.__get_subspec_values(sub_builder, subspec, manager))
1207 else:
1208 ret[subspec] = manager.construct(sub_builder)
1210 def __flatten(self, sub_builder, subspec, manager):
1211 tmp = [manager.construct(b) for b in sub_builder]
1212 if len(tmp) == 1 and not subspec.is_many():
1213 tmp = tmp[0]
1214 return tmp
1216 @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder),
1217 'doc': 'the builder to construct the AbstractContainer from'},
1218 {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager for this build'},
1219 {'name': 'parent', 'type': (Proxy, AbstractContainer),
1220 'doc': 'the parent AbstractContainer/Proxy for the AbstractContainer being built', 'default': None})
1221 def construct(self, **kwargs):
1222 ''' Construct an AbstractContainer from the given Builder '''
1223 builder, manager, parent = getargs('builder', 'manager', 'parent', kwargs)
1224 cls = manager.get_cls(builder)
1225 # gather all subspecs
1226 subspecs = self.__get_subspec_values(builder, self.spec, manager)
1227 # get the constructor argument that each specification corresponds to
1228 const_args = dict()
1229 # For Data container classes, we need to populate the data constructor argument since
1230 # there is no sub-specification that maps to that argument under the default logic
1231 if issubclass(cls, Data):
1232 if not isinstance(builder, DatasetBuilder): # pragma: no cover
1233 raise ValueError('Can only construct a Data object from a DatasetBuilder - got %s' % type(builder))
1234 const_args['data'] = self.__check_ref_resolver(builder.data)
1235 for subspec, value in subspecs.items():
1236 const_arg = self.get_const_arg(subspec)
1237 if const_arg is not None:
1238 if isinstance(subspec, BaseStorageSpec) and subspec.is_many():
1239 existing_value = const_args.get(const_arg)
1240 if isinstance(existing_value, list): 1240 ↛ 1241line 1240 didn't jump to line 1241, because the condition on line 1240 was never true
1241 value = existing_value + value
1242 const_args[const_arg] = value
1243 # build kwargs for the constructor
1244 kwargs = dict()
1245 for const_arg in get_docval(cls.__init__):
1246 argname = const_arg['name']
1247 override = self.__get_override_carg(argname, builder, manager)
1248 if override is not None:
1249 val = override
1250 elif argname in const_args:
1251 val = const_args[argname]
1252 else:
1253 continue
1254 kwargs[argname] = val
1255 try:
1256 obj = self.__new_container__(cls, builder.source, parent, builder.attributes.get(self.__spec.id_key()),
1257 **kwargs)
1258 except Exception as ex:
1259 msg = 'Could not construct %s object due to: %s' % (cls.__name__, ex)
1260 raise ConstructError(builder, msg) from ex
1261 return obj
1263 def __new_container__(self, cls, container_source, parent, object_id, **kwargs):
1264 """A wrapper function for ensuring a container gets everything set appropriately"""
1265 obj = cls.__new__(cls, container_source=container_source, parent=parent, object_id=object_id,
1266 in_construct_mode=True)
1267 # obj has been created and is in construction mode, indicating that the object is being constructed by
1268 # the automatic construct process during read, rather than by the user
1269 obj.__init__(**kwargs)
1270 obj._in_construct_mode = False # reset to False to indicate that the construction of the object is complete
1271 return obj
1273 @docval({'name': 'container', 'type': AbstractContainer,
1274 'doc': 'the AbstractContainer to get the Builder name for'})
1275 def get_builder_name(self, **kwargs):
1276 '''Get the name of a Builder that represents a AbstractContainer'''
1277 container = getargs('container', kwargs)
1278 if self.__spec.name is not None:
1279 ret = self.__spec.name
1280 else:
1281 ret = container.name
1282 return ret