Coverage for src/hdmf/build/objectmapper.py: 95%
817 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
1import logging
2import re
3import warnings
4from collections import OrderedDict
5from copy import copy
7import numpy as np
9from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, BaseBuilder
10from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
11 ConstructError)
12from .manager import Proxy, BuildManager
13from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning
14from ..container import AbstractContainer, Data, DataRegion
15from ..term_set import TermSetWrapper
16from ..data_utils import DataIO, AbstractDataChunkIterator
17from ..query import ReferenceResolver
18from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec
19from ..spec.spec import BaseStorageSpec
20from ..utils import docval, getargs, ExtenderMeta, get_docval
22_const_arg = '__constructor_arg'
25@docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
26 is_method=False)
27def _constructor_arg(**kwargs):
28 '''Decorator to override the default mapping scheme for a given constructor argument.
30 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
31 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
32 first argument the Builder object that is being mapped. The method should return the value to be passed
33 to the target AbstractContainer class constructor argument given by *name*.
34 '''
35 name = getargs('name', kwargs)
37 def _dec(func):
38 setattr(func, _const_arg, name)
39 return func
41 return _dec
44_obj_attr = '__object_attr'
47@docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
48 is_method=False)
49def _object_attr(**kwargs):
50 '''Decorator to override the default mapping scheme for a given object attribute.
52 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
53 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
54 first argument the AbstractContainer object that is being mapped. The method should return the child Builder
55 object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the
56 attribute given by *name*.
57 '''
58 name = getargs('name', kwargs)
60 def _dec(func):
61 setattr(func, _obj_attr, name)
62 return func
64 return _dec
67def _unicode(s):
68 """
69 A helper function for converting to Unicode
70 """
71 if isinstance(s, str):
72 return s
73 elif isinstance(s, bytes):
74 return s.decode('utf-8')
75 else:
76 raise ValueError("Expected unicode or ascii string, got %s" % type(s))
79def _ascii(s):
80 """
81 A helper function for converting to ASCII
82 """
83 if isinstance(s, str):
84 return s.encode('ascii', 'backslashreplace')
85 elif isinstance(s, bytes):
86 return s
87 else:
88 raise ValueError("Expected unicode or ascii string, got %s" % type(s))
91class ObjectMapper(metaclass=ExtenderMeta):
92 '''A class for mapping between Spec objects and AbstractContainer attributes
94 '''
96 # mapping from spec dtypes to numpy dtypes or functions for conversion of values to spec dtypes
97 # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms,
98 # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map,
99 # hdmf.validate.validator.__allowable, and backend dtype maps
100 __dtypes = {
101 "float": np.float32,
102 "float32": np.float32,
103 "double": np.float64,
104 "float64": np.float64,
105 "long": np.int64,
106 "int64": np.int64,
107 "int": np.int32,
108 "int32": np.int32,
109 "short": np.int16,
110 "int16": np.int16,
111 "int8": np.int8,
112 "uint": np.uint32,
113 "uint64": np.uint64,
114 "uint32": np.uint32,
115 "uint16": np.uint16,
116 "uint8": np.uint8,
117 "bool": np.bool_,
118 "text": _unicode,
119 "utf": _unicode,
120 "utf8": _unicode,
121 "utf-8": _unicode,
122 "ascii": _ascii,
123 "bytes": _ascii,
124 "isodatetime": _ascii,
125 "datetime": _ascii,
126 }
128 __no_convert = set()
130 @classmethod
131 def __resolve_numeric_dtype(cls, given, specified):
132 """
133 Determine the dtype to use from the dtype of the given value and the specified dtype.
134 This amounts to determining the greater precision of the two arguments, but also
135 checks to make sure the same base dtype is being used. A warning is raised if the
136 base type of the specified dtype differs from the base type of the given dtype and
137 a conversion will result (e.g., float32 -> uint32).
138 """
139 g = np.dtype(given)
140 s = np.dtype(specified)
141 if g == s:
142 return s.type, None
143 if g.itemsize <= s.itemsize: # given type has precision < precision of specified type
144 # note: this allows float32 -> int32, bool -> int8, int16 -> uint16 which may involve buffer overflows,
145 # truncated values, and other unexpected consequences.
146 warning_msg = ('Value with data type %s is being converted to data type %s as specified.'
147 % (g.name, s.name))
148 return s.type, warning_msg
149 elif g.name[:3] == s.name[:3]:
150 return g.type, None # same base type, use higher-precision given type
151 else:
152 if np.issubdtype(s, np.unsignedinteger):
153 # e.g.: given int64 and spec uint32, return uint64. given float32 and spec uint8, return uint32.
154 ret_type = np.dtype('uint' + str(int(g.itemsize * 8)))
155 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).'
156 % (g.name, ret_type.name, s.name))
157 return ret_type.type, warning_msg
158 if np.issubdtype(s, np.floating):
159 # e.g.: given int64 and spec float32, return float64. given uint64 and spec float32, return float32.
160 ret_type = np.dtype('float' + str(max(int(g.itemsize * 8), 32)))
161 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).'
162 % (g.name, ret_type.name, s.name))
163 return ret_type.type, warning_msg
164 if np.issubdtype(s, np.integer):
165 # e.g.: given float64 and spec int8, return int64. given uint32 and spec int8, return int32.
166 ret_type = np.dtype('int' + str(int(g.itemsize * 8)))
167 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).'
168 % (g.name, ret_type.name, s.name))
169 return ret_type.type, warning_msg
170 if s.type is np.bool_: 170 ↛ 174line 170 didn't jump to line 174, because the condition on line 170 was never false
171 msg = "expected %s, received %s - must supply %s" % (s.name, g.name, s.name)
172 raise ValueError(msg)
173 # all numeric types in __dtypes should be caught by the above
174 raise ValueError('Unsupported conversion to specification data type: %s' % s.name)
176 @classmethod
177 def no_convert(cls, obj_type):
178 """
179 Specify an object type that ObjectMappers should not convert.
180 """
181 cls.__no_convert.add(obj_type)
183 @classmethod # noqa: C901
184 def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901
185 """
186 Convert values to the specified dtype. For example, if a literal int
187 is passed in to a field that is specified as a unsigned integer, this function
188 will convert the Python int to a numpy unsigned int.
190 :param spec: The DatasetSpec or AttributeSpec to which this value is being applied
191 :param value: The value being converted to the spec dtype
192 :param spec_dtype: Optional override of the dtype in spec.dtype. Used to specify the parent dtype when the given
193 extended spec lacks a dtype.
195 :return: The function returns a tuple consisting of 1) the value, and 2) the data type.
196 The value is returned as the function may convert the input value to comply
197 with the dtype specified in the schema.
198 """
199 if spec_dtype is None:
200 spec_dtype = spec.dtype
201 ret, ret_dtype = cls.__check_edgecases(spec, value, spec_dtype)
202 if ret is not None or ret_dtype is not None:
203 return ret, ret_dtype
204 # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions _unicode or _ascii
205 spec_dtype_type = cls.__dtypes[spec_dtype]
206 warning_msg = None
207 # Numpy Array or Zarr array
208 if (isinstance(value, np.ndarray) or
209 (hasattr(value, 'astype') and hasattr(value, 'dtype'))):
210 if spec_dtype_type is _unicode:
211 ret = value.astype('U')
212 ret_dtype = "utf8"
213 elif spec_dtype_type is _ascii:
214 ret = value.astype('S')
215 ret_dtype = "ascii"
216 else:
217 dtype_func, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type)
218 if value.dtype == dtype_func:
219 ret = value
220 else:
221 ret = value.astype(dtype_func)
222 ret_dtype = ret.dtype.type
223 # Tuple or list
224 elif isinstance(value, (tuple, list)):
225 if len(value) == 0:
226 if spec_dtype_type is _unicode:
227 ret_dtype = 'utf8'
228 elif spec_dtype_type is _ascii:
229 ret_dtype = 'ascii'
230 else:
231 ret_dtype = spec_dtype_type
232 return value, ret_dtype
233 ret = list()
234 for elem in value:
235 tmp, tmp_dtype = cls.convert_dtype(spec, elem, spec_dtype)
236 ret.append(tmp)
237 ret = type(value)(ret)
238 ret_dtype = tmp_dtype
239 # Any DataChunkIterator
240 elif isinstance(value, AbstractDataChunkIterator):
241 ret = value
242 if spec_dtype_type is _unicode:
243 ret_dtype = "utf8"
244 elif spec_dtype_type is _ascii:
245 ret_dtype = "ascii"
246 else:
247 ret_dtype, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type)
248 else:
249 if spec_dtype_type in (_unicode, _ascii):
250 ret_dtype = 'ascii'
251 if spec_dtype_type is _unicode:
252 ret_dtype = 'utf8'
253 ret = spec_dtype_type(value)
254 else:
255 dtype_func, warning_msg = cls.__resolve_numeric_dtype(type(value), spec_dtype_type)
256 ret = dtype_func(value)
257 ret_dtype = type(ret)
258 if warning_msg:
259 full_warning_msg = "Spec '%s': %s" % (spec.path, warning_msg)
260 warnings.warn(full_warning_msg, DtypeConversionWarning)
261 return ret, ret_dtype
263 @classmethod
264 def __check_convert_numeric(cls, value_type):
265 # dtype 'numeric' allows only ints, floats, and uints
266 value_dtype = np.dtype(value_type)
267 if not (np.issubdtype(value_dtype, np.unsignedinteger) or
268 np.issubdtype(value_dtype, np.floating) or
269 np.issubdtype(value_dtype, np.integer)):
270 raise ValueError("Cannot convert from %s to 'numeric' specification dtype." % value_type)
272 @classmethod # noqa: C901
273 def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901
274 """
275 Check edge cases in converting data to a dtype
276 """
277 if value is None:
278 # Data is missing. Determine dtype from spec
279 dt = spec_dtype
280 if isinstance(dt, RefSpec):
281 dt = dt.reftype
282 return None, dt
283 if isinstance(spec_dtype, list):
284 # compound dtype - Since the I/O layer needs to determine how to handle these,
285 # return the list of DtypeSpecs
286 return value, spec_dtype
287 if isinstance(value, DataIO):
288 # data is wrapped for I/O via DataIO
289 if value.data is None:
290 # Data is missing so DataIO.dtype must be set to determine the dtype
291 return value, value.dtype
292 else:
293 # Determine the dtype from the DataIO.data
294 return value, cls.convert_dtype(spec, value.data, spec_dtype)[1]
295 if spec_dtype is None or spec_dtype == 'numeric' or type(value) in cls.__no_convert:
296 # infer type from value
297 if hasattr(value, 'dtype'): # covers numpy types, Zarr Array, AbstractDataChunkIterator
298 if spec_dtype == 'numeric':
299 cls.__check_convert_numeric(value.dtype.type)
300 if np.issubdtype(value.dtype, np.str_):
301 ret_dtype = 'utf8'
302 elif np.issubdtype(value.dtype, np.string_):
303 ret_dtype = 'ascii'
304 elif np.issubdtype(value.dtype, np.dtype('O')): 304 ↛ 307line 304 didn't jump to line 307, because the condition on line 304 was never true
305 # Only variable-length strings should ever appear as generic objects.
306 # Everything else should have a well-defined type
307 ret_dtype = 'utf8'
308 else:
309 ret_dtype = value.dtype.type
310 return value, ret_dtype
311 if isinstance(value, (list, tuple)):
312 if len(value) == 0:
313 msg = "Cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype."
314 raise ValueError(msg)
315 return value, cls.__check_edgecases(spec, value[0], spec_dtype)[1] # infer dtype from first element
316 ret_dtype = type(value)
317 if spec_dtype == 'numeric':
318 cls.__check_convert_numeric(ret_dtype)
319 if ret_dtype is str:
320 ret_dtype = 'utf8'
321 elif ret_dtype is bytes:
322 ret_dtype = 'ascii'
323 return value, ret_dtype
324 if isinstance(spec_dtype, RefSpec):
325 if not isinstance(value, ReferenceBuilder):
326 msg = "got RefSpec for value of type %s" % type(value)
327 raise ValueError(msg)
328 return value, spec_dtype
329 if spec_dtype is not None and spec_dtype not in cls.__dtypes: # pragma: no cover
330 msg = "unrecognized dtype: %s -- cannot convert value" % spec_dtype
331 raise ValueError(msg)
332 return None, None
334 _const_arg = '__constructor_arg'
336 @staticmethod
337 @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
338 is_method=False)
339 def constructor_arg(**kwargs):
340 '''Decorator to override the default mapping scheme for a given constructor argument.
342 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
343 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
344 first argument the Builder object that is being mapped. The method should return the value to be passed
345 to the target AbstractContainer class constructor argument given by *name*.
346 '''
347 name = getargs('name', kwargs)
348 return _constructor_arg(name)
350 _obj_attr = '__object_attr'
352 @staticmethod
353 @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'},
354 is_method=False)
355 def object_attr(**kwargs):
356 '''Decorator to override the default mapping scheme for a given object attribute.
358 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default
359 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its
360 first argument the AbstractContainer object that is being mapped. The method should return the child Builder
361 object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the
362 attribute given by *name*.
363 '''
364 name = getargs('name', kwargs)
365 return _object_attr(name)
367 @staticmethod
368 def __is_attr(attr_val):
369 return hasattr(attr_val, _obj_attr)
371 @staticmethod
372 def __get_obj_attr(attr_val):
373 return getattr(attr_val, _obj_attr)
375 @staticmethod
376 def __is_constructor_arg(attr_val):
377 return hasattr(attr_val, _const_arg)
379 @staticmethod
380 def __get_cargname(attr_val):
381 return getattr(attr_val, _const_arg)
383 @ExtenderMeta.post_init
384 def __gather_procedures(cls, name, bases, classdict):
385 if hasattr(cls, 'constructor_args'):
386 cls.constructor_args = copy(cls.constructor_args)
387 else:
388 cls.constructor_args = dict()
389 if hasattr(cls, 'obj_attrs'):
390 cls.obj_attrs = copy(cls.obj_attrs)
391 else:
392 cls.obj_attrs = dict()
393 for name, func in cls.__dict__.items():
394 if cls.__is_constructor_arg(func):
395 cls.constructor_args[cls.__get_cargname(func)] = getattr(cls, name)
396 elif cls.__is_attr(func):
397 cls.obj_attrs[cls.__get_obj_attr(func)] = getattr(cls, name)
399 @docval({'name': 'spec', 'type': (DatasetSpec, GroupSpec),
400 'doc': 'The specification for mapping objects to builders'})
401 def __init__(self, **kwargs):
402 """ Create a map from AbstractContainer attributes to specifications """
403 self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__))
404 spec = getargs('spec', kwargs)
405 self.__spec = spec
406 self.__data_type_key = spec.type_key()
407 self.__spec2attr = dict()
408 self.__attr2spec = dict()
409 self.__spec2carg = dict()
410 self.__carg2spec = dict()
411 self.__map_spec(spec)
413 @property
414 def spec(self):
415 ''' the Spec used in this ObjectMapper '''
416 return self.__spec
418 @_constructor_arg('name')
419 def get_container_name(self, *args):
420 builder = args[0]
421 return builder.name
423 @classmethod
424 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the name for'})
425 def convert_dt_name(cls, **kwargs):
426 '''Construct the attribute name corresponding to a specification'''
427 spec = getargs('spec', kwargs)
428 name = cls.__get_data_type(spec)
429 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name)
430 name = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower()
431 if name[-1] != 's' and spec.is_many():
432 name += 's'
433 return name
435 @classmethod
436 def __get_fields(cls, name_stack, all_names, spec):
437 name = spec.name
438 if spec.name is None:
439 name = cls.convert_dt_name(spec)
440 name_stack.append(name)
441 name = '__'.join(name_stack)
442 # TODO address potential name clashes, e.g., quantity '*' subgroups and links of same data_type_inc will
443 # have the same name
444 all_names[name] = spec
445 if isinstance(spec, BaseStorageSpec):
446 if not (spec.data_type_def is None and spec.data_type_inc is None):
447 # don't get names for components in data_types
448 name_stack.pop()
449 return
450 for subspec in spec.attributes:
451 cls.__get_fields(name_stack, all_names, subspec)
452 if isinstance(spec, GroupSpec):
453 for subspec in spec.datasets:
454 cls.__get_fields(name_stack, all_names, subspec)
455 for subspec in spec.groups:
456 cls.__get_fields(name_stack, all_names, subspec)
457 for subspec in spec.links:
458 cls.__get_fields(name_stack, all_names, subspec)
459 name_stack.pop()
461 @classmethod
462 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the object attribute names for'})
463 def get_attr_names(cls, **kwargs):
464 '''Get the attribute names for each subspecification in a Spec'''
465 spec = getargs('spec', kwargs)
466 names = OrderedDict()
467 for subspec in spec.attributes:
468 cls.__get_fields(list(), names, subspec)
469 if isinstance(spec, GroupSpec):
470 for subspec in spec.groups:
471 cls.__get_fields(list(), names, subspec)
472 for subspec in spec.datasets:
473 cls.__get_fields(list(), names, subspec)
474 for subspec in spec.links:
475 cls.__get_fields(list(), names, subspec)
476 return names
478 def __map_spec(self, spec):
479 attr_names = self.get_attr_names(spec)
480 for k, v in attr_names.items():
481 self.map_spec(k, v)
483 @docval({"name": "attr_name", "type": str, "doc": "the name of the object to map"},
484 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
485 def map_attr(self, **kwargs):
486 """ Map an attribute to spec. Use this to override default behavior """
487 attr_name, spec = getargs('attr_name', 'spec', kwargs)
488 self.__spec2attr[spec] = attr_name
489 self.__attr2spec[attr_name] = spec
491 @docval({"name": "attr_name", "type": str, "doc": "the name of the attribute"})
492 def get_attr_spec(self, **kwargs):
493 """ Return the Spec for a given attribute """
494 attr_name = getargs('attr_name', kwargs)
495 return self.__attr2spec.get(attr_name)
497 @docval({"name": "carg_name", "type": str, "doc": "the name of the constructor argument"})
498 def get_carg_spec(self, **kwargs):
499 """ Return the Spec for a given constructor argument """
500 carg_name = getargs('carg_name', kwargs)
501 return self.__carg2spec.get(carg_name)
503 @docval({"name": "const_arg", "type": str, "doc": "the name of the constructor argument to map"},
504 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
505 def map_const_arg(self, **kwargs):
506 """ Map an attribute to spec. Use this to override default behavior """
507 const_arg, spec = getargs('const_arg', 'spec', kwargs)
508 self.__spec2carg[spec] = const_arg
509 self.__carg2spec[const_arg] = spec
511 @docval({"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
512 def unmap(self, **kwargs):
513 """ Removing any mapping for a specification. Use this to override default mapping """
514 spec = getargs('spec', kwargs)
515 self.__spec2attr.pop(spec, None)
516 self.__spec2carg.pop(spec, None)
518 @docval({"name": "attr_carg", "type": str, "doc": "the constructor argument/object attribute to map this spec to"},
519 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"})
520 def map_spec(self, **kwargs):
521 """ Map the given specification to the construct argument and object attribute """
522 spec, attr_carg = getargs('spec', 'attr_carg', kwargs)
523 self.map_const_arg(attr_carg, spec)
524 self.map_attr(attr_carg, spec)
526 def __get_override_carg(self, *args):
527 name = args[0]
528 remaining_args = tuple(args[1:])
529 if name in self.constructor_args:
530 self.logger.debug(" Calling override function for constructor argument '%s'" % name)
531 func = self.constructor_args[name]
532 return func(self, *remaining_args)
533 return None
535 def __get_override_attr(self, name, container, manager):
536 if name in self.obj_attrs:
537 self.logger.debug(" Calling override function for attribute '%s'" % name)
538 func = self.obj_attrs[name]
539 return func(self, container, manager)
540 return None
542 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute for"},
543 returns='the attribute name', rtype=str)
544 def get_attribute(self, **kwargs):
545 ''' Get the object attribute name for the given Spec '''
546 spec = getargs('spec', kwargs)
547 val = self.__spec2attr.get(spec, None)
548 return val
550 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"},
551 {"name": "container", "type": AbstractContainer, "doc": "the container to get the attribute value from"},
552 {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"},
553 returns='the value of the attribute')
554 def get_attr_value(self, **kwargs):
555 ''' Get the value of the attribute corresponding to this spec from the given container '''
556 spec, container, manager = getargs('spec', 'container', 'manager', kwargs)
557 attr_name = self.get_attribute(spec)
558 if attr_name is None: 558 ↛ 559line 558 didn't jump to line 559, because the condition on line 558 was never true
559 return None
560 attr_val = self.__get_override_attr(attr_name, container, manager)
561 if attr_val is None:
562 try:
563 attr_val = getattr(container, attr_name)
564 except AttributeError:
565 msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s"
566 % (container.__class__.__name__, container.name, attr_name, spec))
567 raise ContainerConfigurationError(msg)
568 if isinstance(attr_val, TermSetWrapper):
569 attr_val = attr_val.value
570 if attr_val is not None:
571 attr_val = self.__convert_string(attr_val, spec)
572 spec_dt = self.__get_data_type(spec)
573 if spec_dt is not None:
574 try:
575 attr_val = self.__filter_by_spec_dt(attr_val, spec_dt, manager)
576 except ValueError as e:
577 msg = ("%s '%s' attribute '%s' has unexpected type."
578 % (container.__class__.__name__, container.name, attr_name))
579 raise ContainerConfigurationError(msg) from e
580 # else: attr_val is an attribute on the Container and its value is None
581 # attr_val can be None, an AbstractContainer, or a list of AbstractContainers
582 return attr_val
584 @classmethod
585 def __get_data_type(cls, spec):
586 ret = None
587 if isinstance(spec, LinkSpec):
588 ret = spec.target_type
589 elif isinstance(spec, BaseStorageSpec):
590 if spec.data_type_def is not None:
591 ret = spec.data_type_def
592 elif spec.data_type_inc is not None:
593 ret = spec.data_type_inc
594 # else, untyped group/dataset spec
595 # else, attribute spec
596 return ret
598 def __convert_string(self, value, spec):
599 """Convert string types to the specified dtype."""
600 ret = value
601 if isinstance(spec, AttributeSpec):
602 if 'text' in spec.dtype:
603 if spec.shape is not None or spec.dims is not None:
604 ret = list(map(str, value))
605 else:
606 ret = str(value)
607 elif isinstance(spec, DatasetSpec):
608 # TODO: make sure we can handle specs with data_type_inc set
609 if spec.data_type_inc is None and spec.dtype is not None:
610 string_type = None
611 if 'text' in spec.dtype:
612 string_type = str
613 elif 'ascii' in spec.dtype: 613 ↛ 614line 613 didn't jump to line 614, because the condition on line 613 was never true
614 string_type = bytes
615 elif 'isodatetime' in spec.dtype:
616 def string_type(x):
617 return x.isoformat() # method works for both date and datetime
618 if string_type is not None:
619 if spec.shape is not None or spec.dims is not None:
620 ret = list(map(string_type, value))
621 else:
622 ret = string_type(value)
623 # copy over any I/O parameters if they were specified
624 if isinstance(value, DataIO):
625 params = value.get_io_params()
626 params['data'] = ret
627 ret = value.__class__(**params)
628 return ret
630 def __filter_by_spec_dt(self, attr_value, spec_dt, build_manager):
631 """Return a list of containers that match the spec data type.
633 If attr_value is a container that does not match the spec data type, then None is returned.
634 If attr_value is a collection, then a list of only the containers in the collection that match the
635 spec data type are returned.
636 Otherwise, attr_value is returned unchanged.
638 spec_dt is a string representing a spec data type.
640 Return None, an AbstractContainer, or a list of AbstractContainers
641 """
642 if isinstance(attr_value, AbstractContainer):
643 if build_manager.is_sub_data_type(attr_value, spec_dt):
644 return attr_value
645 else:
646 return None
648 ret = attr_value
649 if isinstance(attr_value, (list, tuple, set, dict)):
650 if isinstance(attr_value, dict):
651 attr_values = attr_value.values()
652 else:
653 attr_values = attr_value
654 ret = []
655 # NOTE: this will test collections of non-containers element-wise (e.g. lists of lists of ints)
656 for c in attr_values:
657 if self.__filter_by_spec_dt(c, spec_dt, build_manager) is not None:
658 ret.append(c)
659 if len(ret) == 0:
660 ret = None
661 else:
662 raise ValueError("Unexpected type for attr_value: %s. Only AbstractContainer, list, tuple, set, dict, are "
663 "allowed." % type(attr_value))
664 return ret
666 def __check_quantity(self, attr_value, spec, container):
667 if attr_value is None and spec.required:
668 attr_name = self.get_attribute(spec)
669 msg = ("%s '%s' is missing required value for attribute '%s'."
670 % (container.__class__.__name__, container.name, attr_name))
671 warnings.warn(msg, MissingRequiredBuildWarning)
672 self.logger.debug('MissingRequiredBuildWarning: ' + msg)
673 elif attr_value is not None and self.__get_data_type(spec) is not None:
674 # quantity is valid only for specs with a data type or target type
675 if isinstance(attr_value, AbstractContainer):
676 attr_value = [attr_value]
677 n = len(attr_value)
678 if (n and isinstance(attr_value[0], AbstractContainer) and
679 ((n > 1 and not spec.is_many()) or (isinstance(spec.quantity, int) and n != spec.quantity))):
680 attr_name = self.get_attribute(spec)
681 msg = ("%s '%s' has %d values for attribute '%s' but spec allows %s."
682 % (container.__class__.__name__, container.name, n, attr_name, repr(spec.quantity)))
683 warnings.warn(msg, IncorrectQuantityBuildWarning)
684 self.logger.debug('IncorrectQuantityBuildWarning: ' + msg)
686 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the constructor argument for"},
687 returns="the name of the constructor argument", rtype=str)
688 def get_const_arg(self, **kwargs):
689 ''' Get the constructor argument for the given Spec '''
690 spec = getargs('spec', kwargs)
691 return self.__spec2carg.get(spec, None)
693 @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"},
694 {"name": "manager", "type": BuildManager, "doc": "the BuildManager to use for managing this build"},
695 {"name": "parent", "type": GroupBuilder, "doc": "the parent of the resulting Builder", 'default': None},
696 {"name": "source", "type": str,
697 "doc": "the source of container being built i.e. file path", 'default': None},
698 {"name": "builder", "type": BaseBuilder, "doc": "the Builder to build on", 'default': None},
699 {"name": "spec_ext", "type": BaseStorageSpec, "doc": "a spec extension", 'default': None},
700 {"name": "export", "type": bool, "doc": "whether this build is for exporting",
701 'default': False},
702 returns="the Builder representing the given AbstractContainer", rtype=Builder)
703 def build(self, **kwargs):
704 '''Convert an AbstractContainer to a Builder representation.
706 References are not added but are queued to be added in the BuildManager.
707 '''
708 container, manager, parent, source = getargs('container', 'manager', 'parent', 'source', kwargs)
709 builder, spec_ext, export = getargs('builder', 'spec_ext', 'export', kwargs)
710 name = manager.get_builder_name(container)
711 if isinstance(self.__spec, GroupSpec):
712 self.logger.debug("Building %s '%s' as a group (source: %s)"
713 % (container.__class__.__name__, container.name, repr(source)))
714 if builder is None:
715 builder = GroupBuilder(name, parent=parent, source=source)
716 self.__add_datasets(builder, self.__spec.datasets, container, manager, source, export)
717 self.__add_groups(builder, self.__spec.groups, container, manager, source, export)
718 self.__add_links(builder, self.__spec.links, container, manager, source, export)
719 else:
720 if builder is None: 720 ↛ 762line 720 didn't jump to line 762, because the condition on line 720 was never false
721 if not isinstance(container, Data): 721 ↛ 722line 721 didn't jump to line 722, because the condition on line 721 was never true
722 msg = "'container' must be of type Data with DatasetSpec"
723 raise ValueError(msg)
724 spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext)
725 if isinstance(spec_dtype, RefSpec):
726 self.logger.debug("Building %s '%s' as a dataset of references (source: %s)"
727 % (container.__class__.__name__, container.name, repr(source)))
728 # create dataset builder with data=None as a placeholder. fill in with refs later
729 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype.reftype)
730 manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager))
731 elif isinstance(spec_dtype, list):
732 # a compound dataset
733 self.logger.debug("Building %s '%s' as a dataset of compound dtypes (source: %s)"
734 % (container.__class__.__name__, container.name, repr(source)))
735 # create dataset builder with data=None, dtype=None as a placeholder. fill in with refs later
736 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype)
737 manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container,
738 manager))
739 else:
740 # a regular dtype
741 if spec_dtype is None and self.__is_reftype(container.data):
742 self.logger.debug("Building %s '%s' containing references as a dataset of unspecified dtype "
743 "(source: %s)"
744 % (container.__class__.__name__, container.name, repr(source)))
745 # an unspecified dtype and we were given references
746 # create dataset builder with data=None as a placeholder. fill in with refs later
747 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype='object')
748 manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager))
749 else:
750 # a dataset that has no references, pass the conversion off to the convert_dtype method
751 self.logger.debug("Building %s '%s' as a dataset (source: %s)"
752 % (container.__class__.__name__, container.name, repr(source)))
753 try:
754 # use spec_dtype from self.spec when spec_ext does not specify dtype
755 bldr_data, dtype = self.convert_dtype(spec, container.data, spec_dtype=spec_dtype)
756 except Exception as ex:
757 msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name)
758 raise Exception(msg) from ex
759 builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype)
761 # Add attributes from the specification extension to the list of attributes
762 all_attrs = self.__spec.attributes + getattr(spec_ext, 'attributes', tuple())
763 # If the spec_ext refines an existing attribute it will now appear twice in the list. The
764 # refinement should only be relevant for validation (not for write). To avoid problems with the
765 # write we here remove duplicates and keep the original spec of the two to make write work.
766 # TODO: We should add validation in the AttributeSpec to make sure refinements are valid
767 # TODO: Check the BuildManager as refinements should probably be resolved rather than be passed in via spec_ext
768 all_attrs = list({a.name: a for a in all_attrs[::-1]}.values())
769 self.__add_attributes(builder, all_attrs, container, manager, source, export)
770 return builder
772 def __check_dset_spec(self, orig, ext):
773 """
774 Check a dataset spec against a refining spec to see which dtype and shape should be used
775 """
776 dtype = orig.dtype
777 shape = orig.shape
778 spec = orig
779 if ext is not None:
780 if ext.dtype is not None:
781 dtype = ext.dtype
782 if ext.shape is not None:
783 shape = ext.shape
784 spec = ext
785 return dtype, shape, spec
787 def __is_reftype(self, data):
788 if (isinstance(data, AbstractDataChunkIterator) or
789 (isinstance(data, DataIO) and isinstance(data.data, AbstractDataChunkIterator))):
790 return False
792 tmp = data
793 while hasattr(tmp, '__len__') and not isinstance(tmp, (AbstractContainer, str, bytes)):
794 tmptmp = None
795 for t in tmp:
796 # In case of a numeric array stop the iteration at the first element to avoid long-running loop
797 if isinstance(t, (int, float, complex, bool)):
798 break
799 if hasattr(t, '__len__') and len(t) > 0 and not isinstance(t, (AbstractContainer, str, bytes)):
800 tmptmp = tmp[0]
801 break
802 if tmptmp is not None:
803 break
804 else:
805 if len(tmp) == 0: 805 ↛ 806line 805 didn't jump to line 806, because the condition on line 805 was never true
806 tmp = None
807 else:
808 tmp = tmp[0]
809 if isinstance(tmp, AbstractContainer):
810 return True
811 else:
812 return False
814 def __set_dataset_to_refs(self, builder, dtype, shape, container, build_manager):
815 self.logger.debug("Queueing set dataset of references %s '%s' to reference builder(s)"
816 % (builder.__class__.__name__, builder.name))
818 def _filler():
819 builder.data = self.__get_ref_builder(builder, dtype, shape, container, build_manager)
821 return _filler
823 def __set_compound_dataset_to_refs(self, builder, spec, spec_dtype, container, build_manager):
824 self.logger.debug("Queueing convert compound dataset %s '%s' and set any references to reference builders"
825 % (builder.__class__.__name__, builder.name))
827 def _filler():
828 self.logger.debug("Converting compound dataset %s '%s' and setting any references to reference builders"
829 % (builder.__class__.__name__, builder.name))
830 # convert the reference part(s) of a compound dataset to ReferenceBuilders, row by row
831 refs = [(i, subt) for i, subt in enumerate(spec_dtype) if isinstance(subt.dtype, RefSpec)]
832 bldr_data = list()
833 for i, row in enumerate(container.data):
834 tmp = list(row)
835 for j, subt in refs:
836 tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager)
837 bldr_data.append(tuple(tmp))
838 builder.data = bldr_data
840 return _filler
842 def __set_untyped_dataset_to_refs(self, builder, container, build_manager):
843 self.logger.debug("Queueing set untyped dataset %s '%s' to reference builders"
844 % (builder.__class__.__name__, builder.name))
846 def _filler():
847 self.logger.debug("Setting untyped dataset %s '%s' to list of reference builders"
848 % (builder.__class__.__name__, builder.name))
849 bldr_data = list()
850 for d in container.data:
851 if d is None:
852 bldr_data.append(None)
853 else:
854 target_builder = self.__get_target_builder(d, build_manager, builder)
855 bldr_data.append(ReferenceBuilder(target_builder))
856 builder.data = bldr_data
858 return _filler
860 def __get_ref_builder(self, builder, dtype, shape, container, build_manager):
861 bldr_data = None
862 if dtype.is_region(): 862 ↛ 863line 862 didn't jump to line 863, because the condition on line 862 was never true
863 if shape is None:
864 if not isinstance(container, DataRegion):
865 msg = "'container' must be of type DataRegion if spec represents region reference"
866 raise ValueError(msg)
867 self.logger.debug("Setting %s '%s' data to region reference builder"
868 % (builder.__class__.__name__, builder.name))
869 target_builder = self.__get_target_builder(container.data, build_manager, builder)
870 bldr_data = RegionBuilder(container.region, target_builder)
871 else:
872 self.logger.debug("Setting %s '%s' data to list of region reference builders"
873 % (builder.__class__.__name__, builder.name))
874 bldr_data = list()
875 for d in container.data:
876 target_builder = self.__get_target_builder(d.target, build_manager, builder)
877 bldr_data.append(RegionBuilder(d.slice, target_builder))
878 else:
879 self.logger.debug("Setting object reference dataset on %s '%s' data"
880 % (builder.__class__.__name__, builder.name))
881 if isinstance(container, Data):
882 self.logger.debug("Setting %s '%s' data to list of reference builders"
883 % (builder.__class__.__name__, builder.name))
884 bldr_data = list()
885 for d in container.data:
886 target_builder = self.__get_target_builder(d, build_manager, builder)
887 bldr_data.append(ReferenceBuilder(target_builder))
888 else:
889 self.logger.debug("Setting %s '%s' data to reference builder"
890 % (builder.__class__.__name__, builder.name))
891 target_builder = self.__get_target_builder(container, build_manager, builder)
892 bldr_data = ReferenceBuilder(target_builder)
893 return bldr_data
895 def __get_target_builder(self, container, build_manager, builder):
896 target_builder = build_manager.get_builder(container)
897 if target_builder is None:
898 raise ReferenceTargetNotBuiltError(builder, container)
899 return target_builder
901 def __add_attributes(self, builder, attributes, container, build_manager, source, export):
902 if attributes:
903 self.logger.debug("Adding attributes from %s '%s' to %s '%s'"
904 % (container.__class__.__name__, container.name,
905 builder.__class__.__name__, builder.name))
906 for spec in attributes:
907 self.logger.debug(" Adding attribute for spec name: %s (dtype: %s)"
908 % (repr(spec.name), spec.dtype.__class__.__name__))
909 if spec.value is not None:
910 attr_value = spec.value
911 else:
912 attr_value = self.get_attr_value(spec, container, build_manager)
913 if attr_value is None:
914 attr_value = spec.default_value
916 attr_value = self.__check_ref_resolver(attr_value)
918 self.__check_quantity(attr_value, spec, container)
919 if attr_value is None:
920 self.logger.debug(" Skipping empty attribute")
921 continue
923 if isinstance(spec.dtype, RefSpec):
924 if not self.__is_reftype(attr_value):
925 msg = ("invalid type for reference '%s' (%s) - must be AbstractContainer"
926 % (spec.name, type(attr_value)))
927 raise ValueError(msg)
929 build_manager.queue_ref(self.__set_attr_to_ref(builder, attr_value, build_manager, spec))
930 continue
931 else:
932 try:
933 attr_value, attr_dtype = self.convert_dtype(spec, attr_value)
934 except Exception as ex:
935 msg = 'could not convert %s for %s %s' % (spec.name, type(container).__name__, container.name)
936 raise BuildError(builder, msg) from ex
938 # do not write empty or null valued objects
939 self.__check_quantity(attr_value, spec, container)
940 if attr_value is None: 940 ↛ 941line 940 didn't jump to line 941, because the condition on line 940 was never true
941 self.logger.debug(" Skipping empty attribute")
942 continue
943 builder.set_attribute(spec.name, attr_value)
945 def __set_attr_to_ref(self, builder, attr_value, build_manager, spec):
946 self.logger.debug("Queueing set reference attribute on %s '%s' attribute '%s' to %s"
947 % (builder.__class__.__name__, builder.name, spec.name,
948 attr_value.__class__.__name__))
950 def _filler():
951 self.logger.debug("Setting reference attribute on %s '%s' attribute '%s' to %s"
952 % (builder.__class__.__name__, builder.name, spec.name,
953 attr_value.__class__.__name__))
954 target_builder = self.__get_target_builder(attr_value, build_manager, builder)
955 ref_attr_value = ReferenceBuilder(target_builder)
956 builder.set_attribute(spec.name, ref_attr_value)
958 return _filler
960 def __add_links(self, builder, links, container, build_manager, source, export):
961 if links:
962 self.logger.debug("Adding links from %s '%s' to %s '%s'"
963 % (container.__class__.__name__, container.name,
964 builder.__class__.__name__, builder.name))
965 for spec in links:
966 self.logger.debug(" Adding link for spec name: %s, target_type: %s"
967 % (repr(spec.name), repr(spec.target_type)))
968 attr_value = self.get_attr_value(spec, container, build_manager)
969 self.__check_quantity(attr_value, spec, container)
970 if attr_value is None:
971 self.logger.debug(" Skipping link - no attribute value")
972 continue
973 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
975 def __add_datasets(self, builder, datasets, container, build_manager, source, export):
976 if datasets:
977 self.logger.debug("Adding datasets from %s '%s' to %s '%s'"
978 % (container.__class__.__name__, container.name,
979 builder.__class__.__name__, builder.name))
980 for spec in datasets:
981 self.logger.debug(" Adding dataset for spec name: %s (dtype: %s)"
982 % (repr(spec.name), spec.dtype.__class__.__name__))
983 attr_value = self.get_attr_value(spec, container, build_manager)
984 self.__check_quantity(attr_value, spec, container)
985 if attr_value is None:
986 self.logger.debug(" Skipping dataset - no attribute value")
987 continue
988 attr_value = self.__check_ref_resolver(attr_value)
989 if isinstance(attr_value, LinkBuilder): 989 ↛ 990line 989 didn't jump to line 990, because the condition on line 989 was never true
990 self.logger.debug(" Adding %s '%s' for spec name: %s, %s: %s, %s: %s"
991 % (attr_value.name, attr_value.__class__.__name__,
992 repr(spec.name),
993 spec.def_key(), repr(spec.data_type_def),
994 spec.inc_key(), repr(spec.data_type_inc)))
995 builder.set_link(attr_value) # add the existing builder
996 elif spec.data_type_def is None and spec.data_type_inc is None: # untyped, named dataset
997 if spec.name in builder.datasets:
998 sub_builder = builder.datasets[spec.name]
999 self.logger.debug(" Retrieving existing DatasetBuilder '%s' for spec name %s and adding "
1000 "attributes" % (sub_builder.name, repr(spec.name)))
1001 else:
1002 self.logger.debug(" Converting untyped dataset for spec name %s to spec dtype %s"
1003 % (repr(spec.name), repr(spec.dtype)))
1004 try:
1005 data, dtype = self.convert_dtype(spec, attr_value)
1006 except Exception as ex:
1007 msg = 'could not convert \'%s\' for %s \'%s\''
1008 msg = msg % (spec.name, type(container).__name__, container.name)
1009 raise BuildError(builder, msg) from ex
1010 self.logger.debug(" Adding untyped dataset for spec name %s and adding attributes"
1011 % repr(spec.name))
1012 sub_builder = DatasetBuilder(spec.name, data, parent=builder, source=source, dtype=dtype)
1013 builder.set_dataset(sub_builder)
1014 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
1015 else:
1016 self.logger.debug(" Adding typed dataset for spec name: %s, %s: %s, %s: %s"
1017 % (repr(spec.name),
1018 spec.def_key(), repr(spec.data_type_def),
1019 spec.inc_key(), repr(spec.data_type_inc)))
1020 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
1022 def __add_groups(self, builder, groups, container, build_manager, source, export):
1023 if groups:
1024 self.logger.debug("Adding groups from %s '%s' to %s '%s'"
1025 % (container.__class__.__name__, container.name,
1026 builder.__class__.__name__, builder.name))
1027 for spec in groups:
1028 if spec.data_type_def is None and spec.data_type_inc is None:
1029 self.logger.debug(" Adding untyped group for spec name: %s" % repr(spec.name))
1030 # we don't need to get attr_name since any named group does not have the concept of value
1031 sub_builder = builder.groups.get(spec.name)
1032 if sub_builder is None:
1033 sub_builder = GroupBuilder(spec.name, source=source)
1034 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export)
1035 self.__add_datasets(sub_builder, spec.datasets, container, build_manager, source, export)
1036 self.__add_links(sub_builder, spec.links, container, build_manager, source, export)
1037 self.__add_groups(sub_builder, spec.groups, container, build_manager, source, export)
1038 empty = sub_builder.is_empty()
1039 if not empty or (empty and spec.required):
1040 if sub_builder.name not in builder.groups:
1041 builder.set_group(sub_builder)
1042 else:
1043 self.logger.debug(" Adding group for spec name: %s, %s: %s, %s: %s"
1044 % (repr(spec.name),
1045 spec.def_key(), repr(spec.data_type_def),
1046 spec.inc_key(), repr(spec.data_type_inc)))
1047 attr_value = self.get_attr_value(spec, container, build_manager)
1048 self.__check_quantity(attr_value, spec, container)
1049 if attr_value is not None:
1050 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export)
1052 def __add_containers(self, builder, spec, value, build_manager, source, parent_container, export):
1053 if isinstance(value, AbstractContainer):
1054 self.logger.debug(" Adding container %s '%s' with parent %s '%s' to %s '%s'"
1055 % (value.__class__.__name__, value.name,
1056 parent_container.__class__.__name__, parent_container.name,
1057 builder.__class__.__name__, builder.name))
1058 if value.parent is None:
1059 if (value.container_source == parent_container.container_source or
1060 build_manager.get_builder(value) is None):
1061 # value was removed (or parent not set) and there is a link to it in same file
1062 # or value was read from an external link
1063 raise OrphanContainerBuildError(builder, value)
1065 if value.modified or export:
1066 # writing a newly instantiated container (modified is False only after read) or as if it is newly
1067 # instantianted (export=True)
1068 self.logger.debug(" Building newly instantiated %s '%s'" % (value.__class__.__name__, value.name))
1069 if isinstance(spec, BaseStorageSpec):
1070 new_builder = build_manager.build(value, source=source, spec_ext=spec, export=export)
1071 else:
1072 new_builder = build_manager.build(value, source=source, export=export)
1073 # use spec to determine what kind of HDF5 object this AbstractContainer corresponds to
1074 if isinstance(spec, LinkSpec) or value.parent is not parent_container:
1075 self.logger.debug(" Adding link to %s '%s' in %s '%s'"
1076 % (new_builder.__class__.__name__, new_builder.name,
1077 builder.__class__.__name__, builder.name))
1078 builder.set_link(LinkBuilder(new_builder, name=spec.name, parent=builder))
1079 elif isinstance(spec, DatasetSpec):
1080 self.logger.debug(" Adding dataset %s '%s' to %s '%s'"
1081 % (new_builder.__class__.__name__, new_builder.name,
1082 builder.__class__.__name__, builder.name))
1083 builder.set_dataset(new_builder)
1084 else:
1085 self.logger.debug(" Adding subgroup %s '%s' to %s '%s'"
1086 % (new_builder.__class__.__name__, new_builder.name,
1087 builder.__class__.__name__, builder.name))
1088 builder.set_group(new_builder)
1089 elif value.container_source: # make a link to an existing container 1089 ↛ 1104line 1089 didn't jump to line 1104, because the condition on line 1089 was never false
1090 if (value.container_source != parent_container.container_source
1091 or value.parent is not parent_container):
1092 self.logger.debug(" Building %s '%s' (container source: %s) and adding a link to it"
1093 % (value.__class__.__name__, value.name, value.container_source))
1094 if isinstance(spec, BaseStorageSpec): 1094 ↛ 1095line 1094 didn't jump to line 1095, because the condition on line 1094 was never true
1095 new_builder = build_manager.build(value, source=source, spec_ext=spec, export=export)
1096 else:
1097 new_builder = build_manager.build(value, source=source, export=export)
1098 builder.set_link(LinkBuilder(new_builder, name=spec.name, parent=builder))
1099 else:
1100 self.logger.debug(" Skipping build for %s '%s' because both it and its parents were read "
1101 "from the same source."
1102 % (value.__class__.__name__, value.name))
1103 else:
1104 raise ValueError("Found unmodified AbstractContainer with no source - '%s' with parent '%s'" %
1105 (value.name, parent_container.name))
1106 elif isinstance(value, list):
1107 for container in value:
1108 self.__add_containers(builder, spec, container, build_manager, source, parent_container, export)
1109 else: # pragma: no cover
1110 msg = ("Received %s, expected AbstractContainer or a list of AbstractContainers."
1111 % value.__class__.__name__)
1112 raise ValueError(msg)
1114 def __get_subspec_values(self, builder, spec, manager):
1115 ret = dict()
1116 # First get attributes
1117 attributes = builder.attributes
1118 for attr_spec in spec.attributes:
1119 attr_val = attributes.get(attr_spec.name)
1120 if attr_val is None:
1121 continue
1122 if isinstance(attr_val, (GroupBuilder, DatasetBuilder)):
1123 ret[attr_spec] = manager.construct(attr_val)
1124 elif isinstance(attr_val, RegionBuilder): # pragma: no cover
1125 raise ValueError("RegionReferences as attributes is not yet supported")
1126 elif isinstance(attr_val, ReferenceBuilder): 1126 ↛ 1127line 1126 didn't jump to line 1127, because the condition on line 1126 was never true
1127 ret[attr_spec] = manager.construct(attr_val.builder)
1128 else:
1129 ret[attr_spec] = attr_val
1130 if isinstance(spec, GroupSpec):
1131 if not isinstance(builder, GroupBuilder): # pragma: no cover
1132 raise ValueError("__get_subspec_values - must pass GroupBuilder with GroupSpec")
1133 # first aggregate links by data type and separate them
1134 # by group and dataset
1135 groups = dict(builder.groups) # make a copy so we can separate links
1136 datasets = dict(builder.datasets) # make a copy so we can separate links
1137 links = builder.links
1138 link_dt = dict()
1139 for link_builder in links.values():
1140 target = link_builder.builder
1141 if isinstance(target, DatasetBuilder):
1142 datasets[link_builder.name] = target
1143 else:
1144 groups[link_builder.name] = target
1145 dt = manager.get_builder_dt(target)
1146 if dt is not None:
1147 link_dt.setdefault(dt, list()).append(target)
1148 # now assign links to their respective specification
1149 for subspec in spec.links:
1150 if subspec.name is not None and subspec.name in links:
1151 ret[subspec] = manager.construct(links[subspec.name].builder)
1152 else:
1153 sub_builder = link_dt.get(subspec.target_type)
1154 if sub_builder is not None: 1154 ↛ 1155line 1154 didn't jump to line 1155, because the condition on line 1154 was never true
1155 ret[subspec] = self.__flatten(sub_builder, subspec, manager)
1156 # now process groups and datasets
1157 self.__get_sub_builders(groups, spec.groups, manager, ret)
1158 self.__get_sub_builders(datasets, spec.datasets, manager, ret)
1159 elif isinstance(spec, DatasetSpec):
1160 if not isinstance(builder, DatasetBuilder): # pragma: no cover
1161 raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec")
1162 if (spec.shape is None and getattr(builder.data, 'shape', None) == (1,) and
1163 type(builder.data[0]) != np.void):
1164 # if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset
1165 builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error
1166 ret[spec] = self.__check_ref_resolver(builder.data)
1167 return ret
1169 @staticmethod
1170 def __check_ref_resolver(data):
1171 """
1172 Check if this dataset is a reference resolver, and invert it if so.
1173 """
1174 if isinstance(data, ReferenceResolver):
1175 return data.invert()
1176 return data
1178 def __get_sub_builders(self, sub_builders, subspecs, manager, ret):
1179 # index builders by data_type
1180 builder_dt = dict()
1181 for g in sub_builders.values():
1182 dt = manager.get_builder_dt(g)
1183 ns = manager.get_builder_ns(g)
1184 if dt is None or ns is None:
1185 continue
1186 for parent_dt in manager.namespace_catalog.get_hierarchy(ns, dt):
1187 builder_dt.setdefault(parent_dt, list()).append(g)
1188 for subspec in subspecs:
1189 # first get data type for the spec
1190 if subspec.data_type_def is not None: 1190 ↛ 1191line 1190 didn't jump to line 1191, because the condition on line 1190 was never true
1191 dt = subspec.data_type_def
1192 elif subspec.data_type_inc is not None:
1193 dt = subspec.data_type_inc
1194 else:
1195 dt = None
1196 # use name if we can, otherwise use data_data
1197 if subspec.name is None:
1198 sub_builder = builder_dt.get(dt)
1199 if sub_builder is not None:
1200 sub_builder = self.__flatten(sub_builder, subspec, manager)
1201 ret[subspec] = sub_builder
1202 else:
1203 sub_builder = sub_builders.get(subspec.name)
1204 if sub_builder is None:
1205 continue
1206 if dt is None:
1207 # recurse
1208 ret.update(self.__get_subspec_values(sub_builder, subspec, manager))
1209 else:
1210 ret[subspec] = manager.construct(sub_builder)
1212 def __flatten(self, sub_builder, subspec, manager):
1213 tmp = [manager.construct(b) for b in sub_builder]
1214 if len(tmp) == 1 and not subspec.is_many():
1215 tmp = tmp[0]
1216 return tmp
1218 @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder),
1219 'doc': 'the builder to construct the AbstractContainer from'},
1220 {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager for this build'},
1221 {'name': 'parent', 'type': (Proxy, AbstractContainer),
1222 'doc': 'the parent AbstractContainer/Proxy for the AbstractContainer being built', 'default': None})
1223 def construct(self, **kwargs):
1224 ''' Construct an AbstractContainer from the given Builder '''
1225 builder, manager, parent = getargs('builder', 'manager', 'parent', kwargs)
1226 cls = manager.get_cls(builder)
1227 # gather all subspecs
1228 subspecs = self.__get_subspec_values(builder, self.spec, manager)
1229 # get the constructor argument that each specification corresponds to
1230 const_args = dict()
1231 # For Data container classes, we need to populate the data constructor argument since
1232 # there is no sub-specification that maps to that argument under the default logic
1233 if issubclass(cls, Data):
1234 if not isinstance(builder, DatasetBuilder): # pragma: no cover
1235 raise ValueError('Can only construct a Data object from a DatasetBuilder - got %s' % type(builder))
1236 const_args['data'] = self.__check_ref_resolver(builder.data)
1237 for subspec, value in subspecs.items():
1238 const_arg = self.get_const_arg(subspec)
1239 if const_arg is not None:
1240 if isinstance(subspec, BaseStorageSpec) and subspec.is_many():
1241 existing_value = const_args.get(const_arg)
1242 if isinstance(existing_value, list): 1242 ↛ 1243line 1242 didn't jump to line 1243, because the condition on line 1242 was never true
1243 value = existing_value + value
1244 const_args[const_arg] = value
1245 # build kwargs for the constructor
1246 kwargs = dict()
1247 for const_arg in get_docval(cls.__init__):
1248 argname = const_arg['name']
1249 override = self.__get_override_carg(argname, builder, manager)
1250 if override is not None:
1251 val = override
1252 elif argname in const_args:
1253 val = const_args[argname]
1254 else:
1255 continue
1256 kwargs[argname] = val
1257 try:
1258 obj = self.__new_container__(cls, builder.source, parent, builder.attributes.get(self.__spec.id_key()),
1259 **kwargs)
1260 except Exception as ex:
1261 msg = 'Could not construct %s object due to: %s' % (cls.__name__, ex)
1262 raise ConstructError(builder, msg) from ex
1263 return obj
1265 def __new_container__(self, cls, container_source, parent, object_id, **kwargs):
1266 """A wrapper function for ensuring a container gets everything set appropriately"""
1267 obj = cls.__new__(cls, container_source=container_source, parent=parent, object_id=object_id,
1268 in_construct_mode=True)
1269 # obj has been created and is in construction mode, indicating that the object is being constructed by
1270 # the automatic construct process during read, rather than by the user
1271 obj.__init__(**kwargs)
1272 obj._in_construct_mode = False # reset to False to indicate that the construction of the object is complete
1273 return obj
1275 @docval({'name': 'container', 'type': AbstractContainer,
1276 'doc': 'the AbstractContainer to get the Builder name for'})
1277 def get_builder_name(self, **kwargs):
1278 '''Get the name of a Builder that represents a AbstractContainer'''
1279 container = getargs('container', kwargs)
1280 if self.__spec.name is not None:
1281 ret = self.__spec.name
1282 else:
1283 ret = container.name
1284 return ret