Coverage for src/hdmf/build/objectmapper.py: 95%

817 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-04 02:57 +0000

1import logging 

2import re 

3import warnings 

4from collections import OrderedDict 

5from copy import copy 

6 

7import numpy as np 

8 

9from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, BaseBuilder 

10from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError, 

11 ConstructError) 

12from .manager import Proxy, BuildManager 

13from .warnings import MissingRequiredBuildWarning, DtypeConversionWarning, IncorrectQuantityBuildWarning 

14from ..container import AbstractContainer, Data, DataRegion 

15from ..term_set import TermSetWrapper 

16from ..data_utils import DataIO, AbstractDataChunkIterator 

17from ..query import ReferenceResolver 

18from ..spec import Spec, AttributeSpec, DatasetSpec, GroupSpec, LinkSpec, RefSpec 

19from ..spec.spec import BaseStorageSpec 

20from ..utils import docval, getargs, ExtenderMeta, get_docval 

21 

22_const_arg = '__constructor_arg' 

23 

24 

25@docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, 

26 is_method=False) 

27def _constructor_arg(**kwargs): 

28 '''Decorator to override the default mapping scheme for a given constructor argument. 

29 

30 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default 

31 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its 

32 first argument the Builder object that is being mapped. The method should return the value to be passed 

33 to the target AbstractContainer class constructor argument given by *name*. 

34 ''' 

35 name = getargs('name', kwargs) 

36 

37 def _dec(func): 

38 setattr(func, _const_arg, name) 

39 return func 

40 

41 return _dec 

42 

43 

44_obj_attr = '__object_attr' 

45 

46 

47@docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, 

48 is_method=False) 

49def _object_attr(**kwargs): 

50 '''Decorator to override the default mapping scheme for a given object attribute. 

51 

52 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default 

53 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its 

54 first argument the AbstractContainer object that is being mapped. The method should return the child Builder 

55 object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the 

56 attribute given by *name*. 

57 ''' 

58 name = getargs('name', kwargs) 

59 

60 def _dec(func): 

61 setattr(func, _obj_attr, name) 

62 return func 

63 

64 return _dec 

65 

66 

67def _unicode(s): 

68 """ 

69 A helper function for converting to Unicode 

70 """ 

71 if isinstance(s, str): 

72 return s 

73 elif isinstance(s, bytes): 

74 return s.decode('utf-8') 

75 else: 

76 raise ValueError("Expected unicode or ascii string, got %s" % type(s)) 

77 

78 

79def _ascii(s): 

80 """ 

81 A helper function for converting to ASCII 

82 """ 

83 if isinstance(s, str): 

84 return s.encode('ascii', 'backslashreplace') 

85 elif isinstance(s, bytes): 

86 return s 

87 else: 

88 raise ValueError("Expected unicode or ascii string, got %s" % type(s)) 

89 

90 

91class ObjectMapper(metaclass=ExtenderMeta): 

92 '''A class for mapping between Spec objects and AbstractContainer attributes 

93 

94 ''' 

95 

96 # mapping from spec dtypes to numpy dtypes or functions for conversion of values to spec dtypes 

97 # make sure keys are consistent between hdmf.spec.spec.DtypeHelper.primary_dtype_synonyms, 

98 # hdmf.build.objectmapper.ObjectMapper.__dtypes, hdmf.build.manager.TypeMap._spec_dtype_map, 

99 # hdmf.validate.validator.__allowable, and backend dtype maps 

100 __dtypes = { 

101 "float": np.float32, 

102 "float32": np.float32, 

103 "double": np.float64, 

104 "float64": np.float64, 

105 "long": np.int64, 

106 "int64": np.int64, 

107 "int": np.int32, 

108 "int32": np.int32, 

109 "short": np.int16, 

110 "int16": np.int16, 

111 "int8": np.int8, 

112 "uint": np.uint32, 

113 "uint64": np.uint64, 

114 "uint32": np.uint32, 

115 "uint16": np.uint16, 

116 "uint8": np.uint8, 

117 "bool": np.bool_, 

118 "text": _unicode, 

119 "utf": _unicode, 

120 "utf8": _unicode, 

121 "utf-8": _unicode, 

122 "ascii": _ascii, 

123 "bytes": _ascii, 

124 "isodatetime": _ascii, 

125 "datetime": _ascii, 

126 } 

127 

128 __no_convert = set() 

129 

130 @classmethod 

131 def __resolve_numeric_dtype(cls, given, specified): 

132 """ 

133 Determine the dtype to use from the dtype of the given value and the specified dtype. 

134 This amounts to determining the greater precision of the two arguments, but also 

135 checks to make sure the same base dtype is being used. A warning is raised if the 

136 base type of the specified dtype differs from the base type of the given dtype and 

137 a conversion will result (e.g., float32 -> uint32). 

138 """ 

139 g = np.dtype(given) 

140 s = np.dtype(specified) 

141 if g == s: 

142 return s.type, None 

143 if g.itemsize <= s.itemsize: # given type has precision < precision of specified type 

144 # note: this allows float32 -> int32, bool -> int8, int16 -> uint16 which may involve buffer overflows, 

145 # truncated values, and other unexpected consequences. 

146 warning_msg = ('Value with data type %s is being converted to data type %s as specified.' 

147 % (g.name, s.name)) 

148 return s.type, warning_msg 

149 elif g.name[:3] == s.name[:3]: 

150 return g.type, None # same base type, use higher-precision given type 

151 else: 

152 if np.issubdtype(s, np.unsignedinteger): 

153 # e.g.: given int64 and spec uint32, return uint64. given float32 and spec uint8, return uint32. 

154 ret_type = np.dtype('uint' + str(int(g.itemsize * 8))) 

155 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).' 

156 % (g.name, ret_type.name, s.name)) 

157 return ret_type.type, warning_msg 

158 if np.issubdtype(s, np.floating): 

159 # e.g.: given int64 and spec float32, return float64. given uint64 and spec float32, return float32. 

160 ret_type = np.dtype('float' + str(max(int(g.itemsize * 8), 32))) 

161 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).' 

162 % (g.name, ret_type.name, s.name)) 

163 return ret_type.type, warning_msg 

164 if np.issubdtype(s, np.integer): 

165 # e.g.: given float64 and spec int8, return int64. given uint32 and spec int8, return int32. 

166 ret_type = np.dtype('int' + str(int(g.itemsize * 8))) 

167 warning_msg = ('Value with data type %s is being converted to data type %s (min specification: %s).' 

168 % (g.name, ret_type.name, s.name)) 

169 return ret_type.type, warning_msg 

170 if s.type is np.bool_: 170 ↛ 174line 170 didn't jump to line 174, because the condition on line 170 was never false

171 msg = "expected %s, received %s - must supply %s" % (s.name, g.name, s.name) 

172 raise ValueError(msg) 

173 # all numeric types in __dtypes should be caught by the above 

174 raise ValueError('Unsupported conversion to specification data type: %s' % s.name) 

175 

176 @classmethod 

177 def no_convert(cls, obj_type): 

178 """ 

179 Specify an object type that ObjectMappers should not convert. 

180 """ 

181 cls.__no_convert.add(obj_type) 

182 

183 @classmethod # noqa: C901 

184 def convert_dtype(cls, spec, value, spec_dtype=None): # noqa: C901 

185 """ 

186 Convert values to the specified dtype. For example, if a literal int 

187 is passed in to a field that is specified as a unsigned integer, this function 

188 will convert the Python int to a numpy unsigned int. 

189 

190 :param spec: The DatasetSpec or AttributeSpec to which this value is being applied 

191 :param value: The value being converted to the spec dtype 

192 :param spec_dtype: Optional override of the dtype in spec.dtype. Used to specify the parent dtype when the given 

193 extended spec lacks a dtype. 

194 

195 :return: The function returns a tuple consisting of 1) the value, and 2) the data type. 

196 The value is returned as the function may convert the input value to comply 

197 with the dtype specified in the schema. 

198 """ 

199 if spec_dtype is None: 

200 spec_dtype = spec.dtype 

201 ret, ret_dtype = cls.__check_edgecases(spec, value, spec_dtype) 

202 if ret is not None or ret_dtype is not None: 

203 return ret, ret_dtype 

204 # spec_dtype is a string, spec_dtype_type is a type or the conversion helper functions _unicode or _ascii 

205 spec_dtype_type = cls.__dtypes[spec_dtype] 

206 warning_msg = None 

207 # Numpy Array or Zarr array 

208 if (isinstance(value, np.ndarray) or 

209 (hasattr(value, 'astype') and hasattr(value, 'dtype'))): 

210 if spec_dtype_type is _unicode: 

211 ret = value.astype('U') 

212 ret_dtype = "utf8" 

213 elif spec_dtype_type is _ascii: 

214 ret = value.astype('S') 

215 ret_dtype = "ascii" 

216 else: 

217 dtype_func, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type) 

218 if value.dtype == dtype_func: 

219 ret = value 

220 else: 

221 ret = value.astype(dtype_func) 

222 ret_dtype = ret.dtype.type 

223 # Tuple or list 

224 elif isinstance(value, (tuple, list)): 

225 if len(value) == 0: 

226 if spec_dtype_type is _unicode: 

227 ret_dtype = 'utf8' 

228 elif spec_dtype_type is _ascii: 

229 ret_dtype = 'ascii' 

230 else: 

231 ret_dtype = spec_dtype_type 

232 return value, ret_dtype 

233 ret = list() 

234 for elem in value: 

235 tmp, tmp_dtype = cls.convert_dtype(spec, elem, spec_dtype) 

236 ret.append(tmp) 

237 ret = type(value)(ret) 

238 ret_dtype = tmp_dtype 

239 # Any DataChunkIterator 

240 elif isinstance(value, AbstractDataChunkIterator): 

241 ret = value 

242 if spec_dtype_type is _unicode: 

243 ret_dtype = "utf8" 

244 elif spec_dtype_type is _ascii: 

245 ret_dtype = "ascii" 

246 else: 

247 ret_dtype, warning_msg = cls.__resolve_numeric_dtype(value.dtype, spec_dtype_type) 

248 else: 

249 if spec_dtype_type in (_unicode, _ascii): 

250 ret_dtype = 'ascii' 

251 if spec_dtype_type is _unicode: 

252 ret_dtype = 'utf8' 

253 ret = spec_dtype_type(value) 

254 else: 

255 dtype_func, warning_msg = cls.__resolve_numeric_dtype(type(value), spec_dtype_type) 

256 ret = dtype_func(value) 

257 ret_dtype = type(ret) 

258 if warning_msg: 

259 full_warning_msg = "Spec '%s': %s" % (spec.path, warning_msg) 

260 warnings.warn(full_warning_msg, DtypeConversionWarning) 

261 return ret, ret_dtype 

262 

263 @classmethod 

264 def __check_convert_numeric(cls, value_type): 

265 # dtype 'numeric' allows only ints, floats, and uints 

266 value_dtype = np.dtype(value_type) 

267 if not (np.issubdtype(value_dtype, np.unsignedinteger) or 

268 np.issubdtype(value_dtype, np.floating) or 

269 np.issubdtype(value_dtype, np.integer)): 

270 raise ValueError("Cannot convert from %s to 'numeric' specification dtype." % value_type) 

271 

272 @classmethod # noqa: C901 

273 def __check_edgecases(cls, spec, value, spec_dtype): # noqa: C901 

274 """ 

275 Check edge cases in converting data to a dtype 

276 """ 

277 if value is None: 

278 # Data is missing. Determine dtype from spec 

279 dt = spec_dtype 

280 if isinstance(dt, RefSpec): 

281 dt = dt.reftype 

282 return None, dt 

283 if isinstance(spec_dtype, list): 

284 # compound dtype - Since the I/O layer needs to determine how to handle these, 

285 # return the list of DtypeSpecs 

286 return value, spec_dtype 

287 if isinstance(value, DataIO): 

288 # data is wrapped for I/O via DataIO 

289 if value.data is None: 

290 # Data is missing so DataIO.dtype must be set to determine the dtype 

291 return value, value.dtype 

292 else: 

293 # Determine the dtype from the DataIO.data 

294 return value, cls.convert_dtype(spec, value.data, spec_dtype)[1] 

295 if spec_dtype is None or spec_dtype == 'numeric' or type(value) in cls.__no_convert: 

296 # infer type from value 

297 if hasattr(value, 'dtype'): # covers numpy types, Zarr Array, AbstractDataChunkIterator 

298 if spec_dtype == 'numeric': 

299 cls.__check_convert_numeric(value.dtype.type) 

300 if np.issubdtype(value.dtype, np.str_): 

301 ret_dtype = 'utf8' 

302 elif np.issubdtype(value.dtype, np.string_): 

303 ret_dtype = 'ascii' 

304 elif np.issubdtype(value.dtype, np.dtype('O')): 304 ↛ 307line 304 didn't jump to line 307, because the condition on line 304 was never true

305 # Only variable-length strings should ever appear as generic objects. 

306 # Everything else should have a well-defined type 

307 ret_dtype = 'utf8' 

308 else: 

309 ret_dtype = value.dtype.type 

310 return value, ret_dtype 

311 if isinstance(value, (list, tuple)): 

312 if len(value) == 0: 

313 msg = "Cannot infer dtype of empty list or tuple. Please use numpy array with specified dtype." 

314 raise ValueError(msg) 

315 return value, cls.__check_edgecases(spec, value[0], spec_dtype)[1] # infer dtype from first element 

316 ret_dtype = type(value) 

317 if spec_dtype == 'numeric': 

318 cls.__check_convert_numeric(ret_dtype) 

319 if ret_dtype is str: 

320 ret_dtype = 'utf8' 

321 elif ret_dtype is bytes: 

322 ret_dtype = 'ascii' 

323 return value, ret_dtype 

324 if isinstance(spec_dtype, RefSpec): 

325 if not isinstance(value, ReferenceBuilder): 

326 msg = "got RefSpec for value of type %s" % type(value) 

327 raise ValueError(msg) 

328 return value, spec_dtype 

329 if spec_dtype is not None and spec_dtype not in cls.__dtypes: # pragma: no cover 

330 msg = "unrecognized dtype: %s -- cannot convert value" % spec_dtype 

331 raise ValueError(msg) 

332 return None, None 

333 

334 _const_arg = '__constructor_arg' 

335 

336 @staticmethod 

337 @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, 

338 is_method=False) 

339 def constructor_arg(**kwargs): 

340 '''Decorator to override the default mapping scheme for a given constructor argument. 

341 

342 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default 

343 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its 

344 first argument the Builder object that is being mapped. The method should return the value to be passed 

345 to the target AbstractContainer class constructor argument given by *name*. 

346 ''' 

347 name = getargs('name', kwargs) 

348 return _constructor_arg(name) 

349 

350 _obj_attr = '__object_attr' 

351 

352 @staticmethod 

353 @docval({'name': 'name', 'type': str, 'doc': 'the name of the constructor argument'}, 

354 is_method=False) 

355 def object_attr(**kwargs): 

356 '''Decorator to override the default mapping scheme for a given object attribute. 

357 

358 Decorate ObjectMapper methods with this function when extending ObjectMapper to override the default 

359 scheme for mapping between AbstractContainer and Builder objects. The decorated method should accept as its 

360 first argument the AbstractContainer object that is being mapped. The method should return the child Builder 

361 object (or scalar if the object attribute corresponds to an AttributeSpec) that represents the 

362 attribute given by *name*. 

363 ''' 

364 name = getargs('name', kwargs) 

365 return _object_attr(name) 

366 

367 @staticmethod 

368 def __is_attr(attr_val): 

369 return hasattr(attr_val, _obj_attr) 

370 

371 @staticmethod 

372 def __get_obj_attr(attr_val): 

373 return getattr(attr_val, _obj_attr) 

374 

375 @staticmethod 

376 def __is_constructor_arg(attr_val): 

377 return hasattr(attr_val, _const_arg) 

378 

379 @staticmethod 

380 def __get_cargname(attr_val): 

381 return getattr(attr_val, _const_arg) 

382 

383 @ExtenderMeta.post_init 

384 def __gather_procedures(cls, name, bases, classdict): 

385 if hasattr(cls, 'constructor_args'): 

386 cls.constructor_args = copy(cls.constructor_args) 

387 else: 

388 cls.constructor_args = dict() 

389 if hasattr(cls, 'obj_attrs'): 

390 cls.obj_attrs = copy(cls.obj_attrs) 

391 else: 

392 cls.obj_attrs = dict() 

393 for name, func in cls.__dict__.items(): 

394 if cls.__is_constructor_arg(func): 

395 cls.constructor_args[cls.__get_cargname(func)] = getattr(cls, name) 

396 elif cls.__is_attr(func): 

397 cls.obj_attrs[cls.__get_obj_attr(func)] = getattr(cls, name) 

398 

399 @docval({'name': 'spec', 'type': (DatasetSpec, GroupSpec), 

400 'doc': 'The specification for mapping objects to builders'}) 

401 def __init__(self, **kwargs): 

402 """ Create a map from AbstractContainer attributes to specifications """ 

403 self.logger = logging.getLogger('%s.%s' % (self.__class__.__module__, self.__class__.__qualname__)) 

404 spec = getargs('spec', kwargs) 

405 self.__spec = spec 

406 self.__data_type_key = spec.type_key() 

407 self.__spec2attr = dict() 

408 self.__attr2spec = dict() 

409 self.__spec2carg = dict() 

410 self.__carg2spec = dict() 

411 self.__map_spec(spec) 

412 

413 @property 

414 def spec(self): 

415 ''' the Spec used in this ObjectMapper ''' 

416 return self.__spec 

417 

418 @_constructor_arg('name') 

419 def get_container_name(self, *args): 

420 builder = args[0] 

421 return builder.name 

422 

423 @classmethod 

424 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the name for'}) 

425 def convert_dt_name(cls, **kwargs): 

426 '''Construct the attribute name corresponding to a specification''' 

427 spec = getargs('spec', kwargs) 

428 name = cls.__get_data_type(spec) 

429 s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', name) 

430 name = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1).lower() 

431 if name[-1] != 's' and spec.is_many(): 

432 name += 's' 

433 return name 

434 

435 @classmethod 

436 def __get_fields(cls, name_stack, all_names, spec): 

437 name = spec.name 

438 if spec.name is None: 

439 name = cls.convert_dt_name(spec) 

440 name_stack.append(name) 

441 name = '__'.join(name_stack) 

442 # TODO address potential name clashes, e.g., quantity '*' subgroups and links of same data_type_inc will 

443 # have the same name 

444 all_names[name] = spec 

445 if isinstance(spec, BaseStorageSpec): 

446 if not (spec.data_type_def is None and spec.data_type_inc is None): 

447 # don't get names for components in data_types 

448 name_stack.pop() 

449 return 

450 for subspec in spec.attributes: 

451 cls.__get_fields(name_stack, all_names, subspec) 

452 if isinstance(spec, GroupSpec): 

453 for subspec in spec.datasets: 

454 cls.__get_fields(name_stack, all_names, subspec) 

455 for subspec in spec.groups: 

456 cls.__get_fields(name_stack, all_names, subspec) 

457 for subspec in spec.links: 

458 cls.__get_fields(name_stack, all_names, subspec) 

459 name_stack.pop() 

460 

461 @classmethod 

462 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to get the object attribute names for'}) 

463 def get_attr_names(cls, **kwargs): 

464 '''Get the attribute names for each subspecification in a Spec''' 

465 spec = getargs('spec', kwargs) 

466 names = OrderedDict() 

467 for subspec in spec.attributes: 

468 cls.__get_fields(list(), names, subspec) 

469 if isinstance(spec, GroupSpec): 

470 for subspec in spec.groups: 

471 cls.__get_fields(list(), names, subspec) 

472 for subspec in spec.datasets: 

473 cls.__get_fields(list(), names, subspec) 

474 for subspec in spec.links: 

475 cls.__get_fields(list(), names, subspec) 

476 return names 

477 

478 def __map_spec(self, spec): 

479 attr_names = self.get_attr_names(spec) 

480 for k, v in attr_names.items(): 

481 self.map_spec(k, v) 

482 

483 @docval({"name": "attr_name", "type": str, "doc": "the name of the object to map"}, 

484 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) 

485 def map_attr(self, **kwargs): 

486 """ Map an attribute to spec. Use this to override default behavior """ 

487 attr_name, spec = getargs('attr_name', 'spec', kwargs) 

488 self.__spec2attr[spec] = attr_name 

489 self.__attr2spec[attr_name] = spec 

490 

491 @docval({"name": "attr_name", "type": str, "doc": "the name of the attribute"}) 

492 def get_attr_spec(self, **kwargs): 

493 """ Return the Spec for a given attribute """ 

494 attr_name = getargs('attr_name', kwargs) 

495 return self.__attr2spec.get(attr_name) 

496 

497 @docval({"name": "carg_name", "type": str, "doc": "the name of the constructor argument"}) 

498 def get_carg_spec(self, **kwargs): 

499 """ Return the Spec for a given constructor argument """ 

500 carg_name = getargs('carg_name', kwargs) 

501 return self.__carg2spec.get(carg_name) 

502 

503 @docval({"name": "const_arg", "type": str, "doc": "the name of the constructor argument to map"}, 

504 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) 

505 def map_const_arg(self, **kwargs): 

506 """ Map an attribute to spec. Use this to override default behavior """ 

507 const_arg, spec = getargs('const_arg', 'spec', kwargs) 

508 self.__spec2carg[spec] = const_arg 

509 self.__carg2spec[const_arg] = spec 

510 

511 @docval({"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) 

512 def unmap(self, **kwargs): 

513 """ Removing any mapping for a specification. Use this to override default mapping """ 

514 spec = getargs('spec', kwargs) 

515 self.__spec2attr.pop(spec, None) 

516 self.__spec2carg.pop(spec, None) 

517 

518 @docval({"name": "attr_carg", "type": str, "doc": "the constructor argument/object attribute to map this spec to"}, 

519 {"name": "spec", "type": Spec, "doc": "the spec to map the attribute to"}) 

520 def map_spec(self, **kwargs): 

521 """ Map the given specification to the construct argument and object attribute """ 

522 spec, attr_carg = getargs('spec', 'attr_carg', kwargs) 

523 self.map_const_arg(attr_carg, spec) 

524 self.map_attr(attr_carg, spec) 

525 

526 def __get_override_carg(self, *args): 

527 name = args[0] 

528 remaining_args = tuple(args[1:]) 

529 if name in self.constructor_args: 

530 self.logger.debug(" Calling override function for constructor argument '%s'" % name) 

531 func = self.constructor_args[name] 

532 return func(self, *remaining_args) 

533 return None 

534 

535 def __get_override_attr(self, name, container, manager): 

536 if name in self.obj_attrs: 

537 self.logger.debug(" Calling override function for attribute '%s'" % name) 

538 func = self.obj_attrs[name] 

539 return func(self, container, manager) 

540 return None 

541 

542 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute for"}, 

543 returns='the attribute name', rtype=str) 

544 def get_attribute(self, **kwargs): 

545 ''' Get the object attribute name for the given Spec ''' 

546 spec = getargs('spec', kwargs) 

547 val = self.__spec2attr.get(spec, None) 

548 return val 

549 

550 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, 

551 {"name": "container", "type": AbstractContainer, "doc": "the container to get the attribute value from"}, 

552 {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, 

553 returns='the value of the attribute') 

554 def get_attr_value(self, **kwargs): 

555 ''' Get the value of the attribute corresponding to this spec from the given container ''' 

556 spec, container, manager = getargs('spec', 'container', 'manager', kwargs) 

557 attr_name = self.get_attribute(spec) 

558 if attr_name is None: 558 ↛ 559line 558 didn't jump to line 559, because the condition on line 558 was never true

559 return None 

560 attr_val = self.__get_override_attr(attr_name, container, manager) 

561 if attr_val is None: 

562 try: 

563 attr_val = getattr(container, attr_name) 

564 except AttributeError: 

565 msg = ("%s '%s' does not have attribute '%s' for mapping to spec: %s" 

566 % (container.__class__.__name__, container.name, attr_name, spec)) 

567 raise ContainerConfigurationError(msg) 

568 if isinstance(attr_val, TermSetWrapper): 

569 attr_val = attr_val.value 

570 if attr_val is not None: 

571 attr_val = self.__convert_string(attr_val, spec) 

572 spec_dt = self.__get_data_type(spec) 

573 if spec_dt is not None: 

574 try: 

575 attr_val = self.__filter_by_spec_dt(attr_val, spec_dt, manager) 

576 except ValueError as e: 

577 msg = ("%s '%s' attribute '%s' has unexpected type." 

578 % (container.__class__.__name__, container.name, attr_name)) 

579 raise ContainerConfigurationError(msg) from e 

580 # else: attr_val is an attribute on the Container and its value is None 

581 # attr_val can be None, an AbstractContainer, or a list of AbstractContainers 

582 return attr_val 

583 

584 @classmethod 

585 def __get_data_type(cls, spec): 

586 ret = None 

587 if isinstance(spec, LinkSpec): 

588 ret = spec.target_type 

589 elif isinstance(spec, BaseStorageSpec): 

590 if spec.data_type_def is not None: 

591 ret = spec.data_type_def 

592 elif spec.data_type_inc is not None: 

593 ret = spec.data_type_inc 

594 # else, untyped group/dataset spec 

595 # else, attribute spec 

596 return ret 

597 

598 def __convert_string(self, value, spec): 

599 """Convert string types to the specified dtype.""" 

600 ret = value 

601 if isinstance(spec, AttributeSpec): 

602 if 'text' in spec.dtype: 

603 if spec.shape is not None or spec.dims is not None: 

604 ret = list(map(str, value)) 

605 else: 

606 ret = str(value) 

607 elif isinstance(spec, DatasetSpec): 

608 # TODO: make sure we can handle specs with data_type_inc set 

609 if spec.data_type_inc is None and spec.dtype is not None: 

610 string_type = None 

611 if 'text' in spec.dtype: 

612 string_type = str 

613 elif 'ascii' in spec.dtype: 613 ↛ 614line 613 didn't jump to line 614, because the condition on line 613 was never true

614 string_type = bytes 

615 elif 'isodatetime' in spec.dtype: 

616 def string_type(x): 

617 return x.isoformat() # method works for both date and datetime 

618 if string_type is not None: 

619 if spec.shape is not None or spec.dims is not None: 

620 ret = list(map(string_type, value)) 

621 else: 

622 ret = string_type(value) 

623 # copy over any I/O parameters if they were specified 

624 if isinstance(value, DataIO): 

625 params = value.get_io_params() 

626 params['data'] = ret 

627 ret = value.__class__(**params) 

628 return ret 

629 

630 def __filter_by_spec_dt(self, attr_value, spec_dt, build_manager): 

631 """Return a list of containers that match the spec data type. 

632 

633 If attr_value is a container that does not match the spec data type, then None is returned. 

634 If attr_value is a collection, then a list of only the containers in the collection that match the 

635 spec data type are returned. 

636 Otherwise, attr_value is returned unchanged. 

637 

638 spec_dt is a string representing a spec data type. 

639 

640 Return None, an AbstractContainer, or a list of AbstractContainers 

641 """ 

642 if isinstance(attr_value, AbstractContainer): 

643 if build_manager.is_sub_data_type(attr_value, spec_dt): 

644 return attr_value 

645 else: 

646 return None 

647 

648 ret = attr_value 

649 if isinstance(attr_value, (list, tuple, set, dict)): 

650 if isinstance(attr_value, dict): 

651 attr_values = attr_value.values() 

652 else: 

653 attr_values = attr_value 

654 ret = [] 

655 # NOTE: this will test collections of non-containers element-wise (e.g. lists of lists of ints) 

656 for c in attr_values: 

657 if self.__filter_by_spec_dt(c, spec_dt, build_manager) is not None: 

658 ret.append(c) 

659 if len(ret) == 0: 

660 ret = None 

661 else: 

662 raise ValueError("Unexpected type for attr_value: %s. Only AbstractContainer, list, tuple, set, dict, are " 

663 "allowed." % type(attr_value)) 

664 return ret 

665 

666 def __check_quantity(self, attr_value, spec, container): 

667 if attr_value is None and spec.required: 

668 attr_name = self.get_attribute(spec) 

669 msg = ("%s '%s' is missing required value for attribute '%s'." 

670 % (container.__class__.__name__, container.name, attr_name)) 

671 warnings.warn(msg, MissingRequiredBuildWarning) 

672 self.logger.debug('MissingRequiredBuildWarning: ' + msg) 

673 elif attr_value is not None and self.__get_data_type(spec) is not None: 

674 # quantity is valid only for specs with a data type or target type 

675 if isinstance(attr_value, AbstractContainer): 

676 attr_value = [attr_value] 

677 n = len(attr_value) 

678 if (n and isinstance(attr_value[0], AbstractContainer) and 

679 ((n > 1 and not spec.is_many()) or (isinstance(spec.quantity, int) and n != spec.quantity))): 

680 attr_name = self.get_attribute(spec) 

681 msg = ("%s '%s' has %d values for attribute '%s' but spec allows %s." 

682 % (container.__class__.__name__, container.name, n, attr_name, repr(spec.quantity))) 

683 warnings.warn(msg, IncorrectQuantityBuildWarning) 

684 self.logger.debug('IncorrectQuantityBuildWarning: ' + msg) 

685 

686 @docval({"name": "spec", "type": Spec, "doc": "the spec to get the constructor argument for"}, 

687 returns="the name of the constructor argument", rtype=str) 

688 def get_const_arg(self, **kwargs): 

689 ''' Get the constructor argument for the given Spec ''' 

690 spec = getargs('spec', kwargs) 

691 return self.__spec2carg.get(spec, None) 

692 

693 @docval({"name": "container", "type": AbstractContainer, "doc": "the container to convert to a Builder"}, 

694 {"name": "manager", "type": BuildManager, "doc": "the BuildManager to use for managing this build"}, 

695 {"name": "parent", "type": GroupBuilder, "doc": "the parent of the resulting Builder", 'default': None}, 

696 {"name": "source", "type": str, 

697 "doc": "the source of container being built i.e. file path", 'default': None}, 

698 {"name": "builder", "type": BaseBuilder, "doc": "the Builder to build on", 'default': None}, 

699 {"name": "spec_ext", "type": BaseStorageSpec, "doc": "a spec extension", 'default': None}, 

700 {"name": "export", "type": bool, "doc": "whether this build is for exporting", 

701 'default': False}, 

702 returns="the Builder representing the given AbstractContainer", rtype=Builder) 

703 def build(self, **kwargs): 

704 '''Convert an AbstractContainer to a Builder representation. 

705 

706 References are not added but are queued to be added in the BuildManager. 

707 ''' 

708 container, manager, parent, source = getargs('container', 'manager', 'parent', 'source', kwargs) 

709 builder, spec_ext, export = getargs('builder', 'spec_ext', 'export', kwargs) 

710 name = manager.get_builder_name(container) 

711 if isinstance(self.__spec, GroupSpec): 

712 self.logger.debug("Building %s '%s' as a group (source: %s)" 

713 % (container.__class__.__name__, container.name, repr(source))) 

714 if builder is None: 

715 builder = GroupBuilder(name, parent=parent, source=source) 

716 self.__add_datasets(builder, self.__spec.datasets, container, manager, source, export) 

717 self.__add_groups(builder, self.__spec.groups, container, manager, source, export) 

718 self.__add_links(builder, self.__spec.links, container, manager, source, export) 

719 else: 

720 if builder is None: 720 ↛ 762line 720 didn't jump to line 762, because the condition on line 720 was never false

721 if not isinstance(container, Data): 721 ↛ 722line 721 didn't jump to line 722, because the condition on line 721 was never true

722 msg = "'container' must be of type Data with DatasetSpec" 

723 raise ValueError(msg) 

724 spec_dtype, spec_shape, spec = self.__check_dset_spec(self.spec, spec_ext) 

725 if isinstance(spec_dtype, RefSpec): 

726 self.logger.debug("Building %s '%s' as a dataset of references (source: %s)" 

727 % (container.__class__.__name__, container.name, repr(source))) 

728 # create dataset builder with data=None as a placeholder. fill in with refs later 

729 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype.reftype) 

730 manager.queue_ref(self.__set_dataset_to_refs(builder, spec_dtype, spec_shape, container, manager)) 

731 elif isinstance(spec_dtype, list): 

732 # a compound dataset 

733 self.logger.debug("Building %s '%s' as a dataset of compound dtypes (source: %s)" 

734 % (container.__class__.__name__, container.name, repr(source))) 

735 # create dataset builder with data=None, dtype=None as a placeholder. fill in with refs later 

736 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype=spec_dtype) 

737 manager.queue_ref(self.__set_compound_dataset_to_refs(builder, spec, spec_dtype, container, 

738 manager)) 

739 else: 

740 # a regular dtype 

741 if spec_dtype is None and self.__is_reftype(container.data): 

742 self.logger.debug("Building %s '%s' containing references as a dataset of unspecified dtype " 

743 "(source: %s)" 

744 % (container.__class__.__name__, container.name, repr(source))) 

745 # an unspecified dtype and we were given references 

746 # create dataset builder with data=None as a placeholder. fill in with refs later 

747 builder = DatasetBuilder(name, data=None, parent=parent, source=source, dtype='object') 

748 manager.queue_ref(self.__set_untyped_dataset_to_refs(builder, container, manager)) 

749 else: 

750 # a dataset that has no references, pass the conversion off to the convert_dtype method 

751 self.logger.debug("Building %s '%s' as a dataset (source: %s)" 

752 % (container.__class__.__name__, container.name, repr(source))) 

753 try: 

754 # use spec_dtype from self.spec when spec_ext does not specify dtype 

755 bldr_data, dtype = self.convert_dtype(spec, container.data, spec_dtype=spec_dtype) 

756 except Exception as ex: 

757 msg = 'could not resolve dtype for %s \'%s\'' % (type(container).__name__, container.name) 

758 raise Exception(msg) from ex 

759 builder = DatasetBuilder(name, bldr_data, parent=parent, source=source, dtype=dtype) 

760 

761 # Add attributes from the specification extension to the list of attributes 

762 all_attrs = self.__spec.attributes + getattr(spec_ext, 'attributes', tuple()) 

763 # If the spec_ext refines an existing attribute it will now appear twice in the list. The 

764 # refinement should only be relevant for validation (not for write). To avoid problems with the 

765 # write we here remove duplicates and keep the original spec of the two to make write work. 

766 # TODO: We should add validation in the AttributeSpec to make sure refinements are valid 

767 # TODO: Check the BuildManager as refinements should probably be resolved rather than be passed in via spec_ext 

768 all_attrs = list({a.name: a for a in all_attrs[::-1]}.values()) 

769 self.__add_attributes(builder, all_attrs, container, manager, source, export) 

770 return builder 

771 

772 def __check_dset_spec(self, orig, ext): 

773 """ 

774 Check a dataset spec against a refining spec to see which dtype and shape should be used 

775 """ 

776 dtype = orig.dtype 

777 shape = orig.shape 

778 spec = orig 

779 if ext is not None: 

780 if ext.dtype is not None: 

781 dtype = ext.dtype 

782 if ext.shape is not None: 

783 shape = ext.shape 

784 spec = ext 

785 return dtype, shape, spec 

786 

787 def __is_reftype(self, data): 

788 if (isinstance(data, AbstractDataChunkIterator) or 

789 (isinstance(data, DataIO) and isinstance(data.data, AbstractDataChunkIterator))): 

790 return False 

791 

792 tmp = data 

793 while hasattr(tmp, '__len__') and not isinstance(tmp, (AbstractContainer, str, bytes)): 

794 tmptmp = None 

795 for t in tmp: 

796 # In case of a numeric array stop the iteration at the first element to avoid long-running loop 

797 if isinstance(t, (int, float, complex, bool)): 

798 break 

799 if hasattr(t, '__len__') and len(t) > 0 and not isinstance(t, (AbstractContainer, str, bytes)): 

800 tmptmp = tmp[0] 

801 break 

802 if tmptmp is not None: 

803 break 

804 else: 

805 if len(tmp) == 0: 805 ↛ 806line 805 didn't jump to line 806, because the condition on line 805 was never true

806 tmp = None 

807 else: 

808 tmp = tmp[0] 

809 if isinstance(tmp, AbstractContainer): 

810 return True 

811 else: 

812 return False 

813 

814 def __set_dataset_to_refs(self, builder, dtype, shape, container, build_manager): 

815 self.logger.debug("Queueing set dataset of references %s '%s' to reference builder(s)" 

816 % (builder.__class__.__name__, builder.name)) 

817 

818 def _filler(): 

819 builder.data = self.__get_ref_builder(builder, dtype, shape, container, build_manager) 

820 

821 return _filler 

822 

823 def __set_compound_dataset_to_refs(self, builder, spec, spec_dtype, container, build_manager): 

824 self.logger.debug("Queueing convert compound dataset %s '%s' and set any references to reference builders" 

825 % (builder.__class__.__name__, builder.name)) 

826 

827 def _filler(): 

828 self.logger.debug("Converting compound dataset %s '%s' and setting any references to reference builders" 

829 % (builder.__class__.__name__, builder.name)) 

830 # convert the reference part(s) of a compound dataset to ReferenceBuilders, row by row 

831 refs = [(i, subt) for i, subt in enumerate(spec_dtype) if isinstance(subt.dtype, RefSpec)] 

832 bldr_data = list() 

833 for i, row in enumerate(container.data): 

834 tmp = list(row) 

835 for j, subt in refs: 

836 tmp[j] = self.__get_ref_builder(builder, subt.dtype, None, row[j], build_manager) 

837 bldr_data.append(tuple(tmp)) 

838 builder.data = bldr_data 

839 

840 return _filler 

841 

842 def __set_untyped_dataset_to_refs(self, builder, container, build_manager): 

843 self.logger.debug("Queueing set untyped dataset %s '%s' to reference builders" 

844 % (builder.__class__.__name__, builder.name)) 

845 

846 def _filler(): 

847 self.logger.debug("Setting untyped dataset %s '%s' to list of reference builders" 

848 % (builder.__class__.__name__, builder.name)) 

849 bldr_data = list() 

850 for d in container.data: 

851 if d is None: 

852 bldr_data.append(None) 

853 else: 

854 target_builder = self.__get_target_builder(d, build_manager, builder) 

855 bldr_data.append(ReferenceBuilder(target_builder)) 

856 builder.data = bldr_data 

857 

858 return _filler 

859 

860 def __get_ref_builder(self, builder, dtype, shape, container, build_manager): 

861 bldr_data = None 

862 if dtype.is_region(): 862 ↛ 863line 862 didn't jump to line 863, because the condition on line 862 was never true

863 if shape is None: 

864 if not isinstance(container, DataRegion): 

865 msg = "'container' must be of type DataRegion if spec represents region reference" 

866 raise ValueError(msg) 

867 self.logger.debug("Setting %s '%s' data to region reference builder" 

868 % (builder.__class__.__name__, builder.name)) 

869 target_builder = self.__get_target_builder(container.data, build_manager, builder) 

870 bldr_data = RegionBuilder(container.region, target_builder) 

871 else: 

872 self.logger.debug("Setting %s '%s' data to list of region reference builders" 

873 % (builder.__class__.__name__, builder.name)) 

874 bldr_data = list() 

875 for d in container.data: 

876 target_builder = self.__get_target_builder(d.target, build_manager, builder) 

877 bldr_data.append(RegionBuilder(d.slice, target_builder)) 

878 else: 

879 self.logger.debug("Setting object reference dataset on %s '%s' data" 

880 % (builder.__class__.__name__, builder.name)) 

881 if isinstance(container, Data): 

882 self.logger.debug("Setting %s '%s' data to list of reference builders" 

883 % (builder.__class__.__name__, builder.name)) 

884 bldr_data = list() 

885 for d in container.data: 

886 target_builder = self.__get_target_builder(d, build_manager, builder) 

887 bldr_data.append(ReferenceBuilder(target_builder)) 

888 else: 

889 self.logger.debug("Setting %s '%s' data to reference builder" 

890 % (builder.__class__.__name__, builder.name)) 

891 target_builder = self.__get_target_builder(container, build_manager, builder) 

892 bldr_data = ReferenceBuilder(target_builder) 

893 return bldr_data 

894 

895 def __get_target_builder(self, container, build_manager, builder): 

896 target_builder = build_manager.get_builder(container) 

897 if target_builder is None: 

898 raise ReferenceTargetNotBuiltError(builder, container) 

899 return target_builder 

900 

901 def __add_attributes(self, builder, attributes, container, build_manager, source, export): 

902 if attributes: 

903 self.logger.debug("Adding attributes from %s '%s' to %s '%s'" 

904 % (container.__class__.__name__, container.name, 

905 builder.__class__.__name__, builder.name)) 

906 for spec in attributes: 

907 self.logger.debug(" Adding attribute for spec name: %s (dtype: %s)" 

908 % (repr(spec.name), spec.dtype.__class__.__name__)) 

909 if spec.value is not None: 

910 attr_value = spec.value 

911 else: 

912 attr_value = self.get_attr_value(spec, container, build_manager) 

913 if attr_value is None: 

914 attr_value = spec.default_value 

915 

916 attr_value = self.__check_ref_resolver(attr_value) 

917 

918 self.__check_quantity(attr_value, spec, container) 

919 if attr_value is None: 

920 self.logger.debug(" Skipping empty attribute") 

921 continue 

922 

923 if isinstance(spec.dtype, RefSpec): 

924 if not self.__is_reftype(attr_value): 

925 msg = ("invalid type for reference '%s' (%s) - must be AbstractContainer" 

926 % (spec.name, type(attr_value))) 

927 raise ValueError(msg) 

928 

929 build_manager.queue_ref(self.__set_attr_to_ref(builder, attr_value, build_manager, spec)) 

930 continue 

931 else: 

932 try: 

933 attr_value, attr_dtype = self.convert_dtype(spec, attr_value) 

934 except Exception as ex: 

935 msg = 'could not convert %s for %s %s' % (spec.name, type(container).__name__, container.name) 

936 raise BuildError(builder, msg) from ex 

937 

938 # do not write empty or null valued objects 

939 self.__check_quantity(attr_value, spec, container) 

940 if attr_value is None: 940 ↛ 941line 940 didn't jump to line 941, because the condition on line 940 was never true

941 self.logger.debug(" Skipping empty attribute") 

942 continue 

943 builder.set_attribute(spec.name, attr_value) 

944 

945 def __set_attr_to_ref(self, builder, attr_value, build_manager, spec): 

946 self.logger.debug("Queueing set reference attribute on %s '%s' attribute '%s' to %s" 

947 % (builder.__class__.__name__, builder.name, spec.name, 

948 attr_value.__class__.__name__)) 

949 

950 def _filler(): 

951 self.logger.debug("Setting reference attribute on %s '%s' attribute '%s' to %s" 

952 % (builder.__class__.__name__, builder.name, spec.name, 

953 attr_value.__class__.__name__)) 

954 target_builder = self.__get_target_builder(attr_value, build_manager, builder) 

955 ref_attr_value = ReferenceBuilder(target_builder) 

956 builder.set_attribute(spec.name, ref_attr_value) 

957 

958 return _filler 

959 

960 def __add_links(self, builder, links, container, build_manager, source, export): 

961 if links: 

962 self.logger.debug("Adding links from %s '%s' to %s '%s'" 

963 % (container.__class__.__name__, container.name, 

964 builder.__class__.__name__, builder.name)) 

965 for spec in links: 

966 self.logger.debug(" Adding link for spec name: %s, target_type: %s" 

967 % (repr(spec.name), repr(spec.target_type))) 

968 attr_value = self.get_attr_value(spec, container, build_manager) 

969 self.__check_quantity(attr_value, spec, container) 

970 if attr_value is None: 

971 self.logger.debug(" Skipping link - no attribute value") 

972 continue 

973 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export) 

974 

975 def __add_datasets(self, builder, datasets, container, build_manager, source, export): 

976 if datasets: 

977 self.logger.debug("Adding datasets from %s '%s' to %s '%s'" 

978 % (container.__class__.__name__, container.name, 

979 builder.__class__.__name__, builder.name)) 

980 for spec in datasets: 

981 self.logger.debug(" Adding dataset for spec name: %s (dtype: %s)" 

982 % (repr(spec.name), spec.dtype.__class__.__name__)) 

983 attr_value = self.get_attr_value(spec, container, build_manager) 

984 self.__check_quantity(attr_value, spec, container) 

985 if attr_value is None: 

986 self.logger.debug(" Skipping dataset - no attribute value") 

987 continue 

988 attr_value = self.__check_ref_resolver(attr_value) 

989 if isinstance(attr_value, LinkBuilder): 989 ↛ 990line 989 didn't jump to line 990, because the condition on line 989 was never true

990 self.logger.debug(" Adding %s '%s' for spec name: %s, %s: %s, %s: %s" 

991 % (attr_value.name, attr_value.__class__.__name__, 

992 repr(spec.name), 

993 spec.def_key(), repr(spec.data_type_def), 

994 spec.inc_key(), repr(spec.data_type_inc))) 

995 builder.set_link(attr_value) # add the existing builder 

996 elif spec.data_type_def is None and spec.data_type_inc is None: # untyped, named dataset 

997 if spec.name in builder.datasets: 

998 sub_builder = builder.datasets[spec.name] 

999 self.logger.debug(" Retrieving existing DatasetBuilder '%s' for spec name %s and adding " 

1000 "attributes" % (sub_builder.name, repr(spec.name))) 

1001 else: 

1002 self.logger.debug(" Converting untyped dataset for spec name %s to spec dtype %s" 

1003 % (repr(spec.name), repr(spec.dtype))) 

1004 try: 

1005 data, dtype = self.convert_dtype(spec, attr_value) 

1006 except Exception as ex: 

1007 msg = 'could not convert \'%s\' for %s \'%s\'' 

1008 msg = msg % (spec.name, type(container).__name__, container.name) 

1009 raise BuildError(builder, msg) from ex 

1010 self.logger.debug(" Adding untyped dataset for spec name %s and adding attributes" 

1011 % repr(spec.name)) 

1012 sub_builder = DatasetBuilder(spec.name, data, parent=builder, source=source, dtype=dtype) 

1013 builder.set_dataset(sub_builder) 

1014 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export) 

1015 else: 

1016 self.logger.debug(" Adding typed dataset for spec name: %s, %s: %s, %s: %s" 

1017 % (repr(spec.name), 

1018 spec.def_key(), repr(spec.data_type_def), 

1019 spec.inc_key(), repr(spec.data_type_inc))) 

1020 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export) 

1021 

1022 def __add_groups(self, builder, groups, container, build_manager, source, export): 

1023 if groups: 

1024 self.logger.debug("Adding groups from %s '%s' to %s '%s'" 

1025 % (container.__class__.__name__, container.name, 

1026 builder.__class__.__name__, builder.name)) 

1027 for spec in groups: 

1028 if spec.data_type_def is None and spec.data_type_inc is None: 

1029 self.logger.debug(" Adding untyped group for spec name: %s" % repr(spec.name)) 

1030 # we don't need to get attr_name since any named group does not have the concept of value 

1031 sub_builder = builder.groups.get(spec.name) 

1032 if sub_builder is None: 

1033 sub_builder = GroupBuilder(spec.name, source=source) 

1034 self.__add_attributes(sub_builder, spec.attributes, container, build_manager, source, export) 

1035 self.__add_datasets(sub_builder, spec.datasets, container, build_manager, source, export) 

1036 self.__add_links(sub_builder, spec.links, container, build_manager, source, export) 

1037 self.__add_groups(sub_builder, spec.groups, container, build_manager, source, export) 

1038 empty = sub_builder.is_empty() 

1039 if not empty or (empty and spec.required): 

1040 if sub_builder.name not in builder.groups: 

1041 builder.set_group(sub_builder) 

1042 else: 

1043 self.logger.debug(" Adding group for spec name: %s, %s: %s, %s: %s" 

1044 % (repr(spec.name), 

1045 spec.def_key(), repr(spec.data_type_def), 

1046 spec.inc_key(), repr(spec.data_type_inc))) 

1047 attr_value = self.get_attr_value(spec, container, build_manager) 

1048 self.__check_quantity(attr_value, spec, container) 

1049 if attr_value is not None: 

1050 self.__add_containers(builder, spec, attr_value, build_manager, source, container, export) 

1051 

1052 def __add_containers(self, builder, spec, value, build_manager, source, parent_container, export): 

1053 if isinstance(value, AbstractContainer): 

1054 self.logger.debug(" Adding container %s '%s' with parent %s '%s' to %s '%s'" 

1055 % (value.__class__.__name__, value.name, 

1056 parent_container.__class__.__name__, parent_container.name, 

1057 builder.__class__.__name__, builder.name)) 

1058 if value.parent is None: 

1059 if (value.container_source == parent_container.container_source or 

1060 build_manager.get_builder(value) is None): 

1061 # value was removed (or parent not set) and there is a link to it in same file 

1062 # or value was read from an external link 

1063 raise OrphanContainerBuildError(builder, value) 

1064 

1065 if value.modified or export: 

1066 # writing a newly instantiated container (modified is False only after read) or as if it is newly 

1067 # instantianted (export=True) 

1068 self.logger.debug(" Building newly instantiated %s '%s'" % (value.__class__.__name__, value.name)) 

1069 if isinstance(spec, BaseStorageSpec): 

1070 new_builder = build_manager.build(value, source=source, spec_ext=spec, export=export) 

1071 else: 

1072 new_builder = build_manager.build(value, source=source, export=export) 

1073 # use spec to determine what kind of HDF5 object this AbstractContainer corresponds to 

1074 if isinstance(spec, LinkSpec) or value.parent is not parent_container: 

1075 self.logger.debug(" Adding link to %s '%s' in %s '%s'" 

1076 % (new_builder.__class__.__name__, new_builder.name, 

1077 builder.__class__.__name__, builder.name)) 

1078 builder.set_link(LinkBuilder(new_builder, name=spec.name, parent=builder)) 

1079 elif isinstance(spec, DatasetSpec): 

1080 self.logger.debug(" Adding dataset %s '%s' to %s '%s'" 

1081 % (new_builder.__class__.__name__, new_builder.name, 

1082 builder.__class__.__name__, builder.name)) 

1083 builder.set_dataset(new_builder) 

1084 else: 

1085 self.logger.debug(" Adding subgroup %s '%s' to %s '%s'" 

1086 % (new_builder.__class__.__name__, new_builder.name, 

1087 builder.__class__.__name__, builder.name)) 

1088 builder.set_group(new_builder) 

1089 elif value.container_source: # make a link to an existing container 1089 ↛ 1104line 1089 didn't jump to line 1104, because the condition on line 1089 was never false

1090 if (value.container_source != parent_container.container_source 

1091 or value.parent is not parent_container): 

1092 self.logger.debug(" Building %s '%s' (container source: %s) and adding a link to it" 

1093 % (value.__class__.__name__, value.name, value.container_source)) 

1094 if isinstance(spec, BaseStorageSpec): 1094 ↛ 1095line 1094 didn't jump to line 1095, because the condition on line 1094 was never true

1095 new_builder = build_manager.build(value, source=source, spec_ext=spec, export=export) 

1096 else: 

1097 new_builder = build_manager.build(value, source=source, export=export) 

1098 builder.set_link(LinkBuilder(new_builder, name=spec.name, parent=builder)) 

1099 else: 

1100 self.logger.debug(" Skipping build for %s '%s' because both it and its parents were read " 

1101 "from the same source." 

1102 % (value.__class__.__name__, value.name)) 

1103 else: 

1104 raise ValueError("Found unmodified AbstractContainer with no source - '%s' with parent '%s'" % 

1105 (value.name, parent_container.name)) 

1106 elif isinstance(value, list): 

1107 for container in value: 

1108 self.__add_containers(builder, spec, container, build_manager, source, parent_container, export) 

1109 else: # pragma: no cover 

1110 msg = ("Received %s, expected AbstractContainer or a list of AbstractContainers." 

1111 % value.__class__.__name__) 

1112 raise ValueError(msg) 

1113 

1114 def __get_subspec_values(self, builder, spec, manager): 

1115 ret = dict() 

1116 # First get attributes 

1117 attributes = builder.attributes 

1118 for attr_spec in spec.attributes: 

1119 attr_val = attributes.get(attr_spec.name) 

1120 if attr_val is None: 

1121 continue 

1122 if isinstance(attr_val, (GroupBuilder, DatasetBuilder)): 

1123 ret[attr_spec] = manager.construct(attr_val) 

1124 elif isinstance(attr_val, RegionBuilder): # pragma: no cover 

1125 raise ValueError("RegionReferences as attributes is not yet supported") 

1126 elif isinstance(attr_val, ReferenceBuilder): 1126 ↛ 1127line 1126 didn't jump to line 1127, because the condition on line 1126 was never true

1127 ret[attr_spec] = manager.construct(attr_val.builder) 

1128 else: 

1129 ret[attr_spec] = attr_val 

1130 if isinstance(spec, GroupSpec): 

1131 if not isinstance(builder, GroupBuilder): # pragma: no cover 

1132 raise ValueError("__get_subspec_values - must pass GroupBuilder with GroupSpec") 

1133 # first aggregate links by data type and separate them 

1134 # by group and dataset 

1135 groups = dict(builder.groups) # make a copy so we can separate links 

1136 datasets = dict(builder.datasets) # make a copy so we can separate links 

1137 links = builder.links 

1138 link_dt = dict() 

1139 for link_builder in links.values(): 

1140 target = link_builder.builder 

1141 if isinstance(target, DatasetBuilder): 

1142 datasets[link_builder.name] = target 

1143 else: 

1144 groups[link_builder.name] = target 

1145 dt = manager.get_builder_dt(target) 

1146 if dt is not None: 

1147 link_dt.setdefault(dt, list()).append(target) 

1148 # now assign links to their respective specification 

1149 for subspec in spec.links: 

1150 if subspec.name is not None and subspec.name in links: 

1151 ret[subspec] = manager.construct(links[subspec.name].builder) 

1152 else: 

1153 sub_builder = link_dt.get(subspec.target_type) 

1154 if sub_builder is not None: 1154 ↛ 1155line 1154 didn't jump to line 1155, because the condition on line 1154 was never true

1155 ret[subspec] = self.__flatten(sub_builder, subspec, manager) 

1156 # now process groups and datasets 

1157 self.__get_sub_builders(groups, spec.groups, manager, ret) 

1158 self.__get_sub_builders(datasets, spec.datasets, manager, ret) 

1159 elif isinstance(spec, DatasetSpec): 

1160 if not isinstance(builder, DatasetBuilder): # pragma: no cover 

1161 raise ValueError("__get_subspec_values - must pass DatasetBuilder with DatasetSpec") 

1162 if (spec.shape is None and getattr(builder.data, 'shape', None) == (1,) and 

1163 type(builder.data[0]) != np.void): 

1164 # if a scalar dataset is expected and a 1-element non-compound dataset is given, then read the dataset 

1165 builder['data'] = builder.data[0] # use dictionary reference instead of .data to bypass error 

1166 ret[spec] = self.__check_ref_resolver(builder.data) 

1167 return ret 

1168 

1169 @staticmethod 

1170 def __check_ref_resolver(data): 

1171 """ 

1172 Check if this dataset is a reference resolver, and invert it if so. 

1173 """ 

1174 if isinstance(data, ReferenceResolver): 

1175 return data.invert() 

1176 return data 

1177 

1178 def __get_sub_builders(self, sub_builders, subspecs, manager, ret): 

1179 # index builders by data_type 

1180 builder_dt = dict() 

1181 for g in sub_builders.values(): 

1182 dt = manager.get_builder_dt(g) 

1183 ns = manager.get_builder_ns(g) 

1184 if dt is None or ns is None: 

1185 continue 

1186 for parent_dt in manager.namespace_catalog.get_hierarchy(ns, dt): 

1187 builder_dt.setdefault(parent_dt, list()).append(g) 

1188 for subspec in subspecs: 

1189 # first get data type for the spec 

1190 if subspec.data_type_def is not None: 1190 ↛ 1191line 1190 didn't jump to line 1191, because the condition on line 1190 was never true

1191 dt = subspec.data_type_def 

1192 elif subspec.data_type_inc is not None: 

1193 dt = subspec.data_type_inc 

1194 else: 

1195 dt = None 

1196 # use name if we can, otherwise use data_data 

1197 if subspec.name is None: 

1198 sub_builder = builder_dt.get(dt) 

1199 if sub_builder is not None: 

1200 sub_builder = self.__flatten(sub_builder, subspec, manager) 

1201 ret[subspec] = sub_builder 

1202 else: 

1203 sub_builder = sub_builders.get(subspec.name) 

1204 if sub_builder is None: 

1205 continue 

1206 if dt is None: 

1207 # recurse 

1208 ret.update(self.__get_subspec_values(sub_builder, subspec, manager)) 

1209 else: 

1210 ret[subspec] = manager.construct(sub_builder) 

1211 

1212 def __flatten(self, sub_builder, subspec, manager): 

1213 tmp = [manager.construct(b) for b in sub_builder] 

1214 if len(tmp) == 1 and not subspec.is_many(): 

1215 tmp = tmp[0] 

1216 return tmp 

1217 

1218 @docval({'name': 'builder', 'type': (DatasetBuilder, GroupBuilder), 

1219 'doc': 'the builder to construct the AbstractContainer from'}, 

1220 {'name': 'manager', 'type': BuildManager, 'doc': 'the BuildManager for this build'}, 

1221 {'name': 'parent', 'type': (Proxy, AbstractContainer), 

1222 'doc': 'the parent AbstractContainer/Proxy for the AbstractContainer being built', 'default': None}) 

1223 def construct(self, **kwargs): 

1224 ''' Construct an AbstractContainer from the given Builder ''' 

1225 builder, manager, parent = getargs('builder', 'manager', 'parent', kwargs) 

1226 cls = manager.get_cls(builder) 

1227 # gather all subspecs 

1228 subspecs = self.__get_subspec_values(builder, self.spec, manager) 

1229 # get the constructor argument that each specification corresponds to 

1230 const_args = dict() 

1231 # For Data container classes, we need to populate the data constructor argument since 

1232 # there is no sub-specification that maps to that argument under the default logic 

1233 if issubclass(cls, Data): 

1234 if not isinstance(builder, DatasetBuilder): # pragma: no cover 

1235 raise ValueError('Can only construct a Data object from a DatasetBuilder - got %s' % type(builder)) 

1236 const_args['data'] = self.__check_ref_resolver(builder.data) 

1237 for subspec, value in subspecs.items(): 

1238 const_arg = self.get_const_arg(subspec) 

1239 if const_arg is not None: 

1240 if isinstance(subspec, BaseStorageSpec) and subspec.is_many(): 

1241 existing_value = const_args.get(const_arg) 

1242 if isinstance(existing_value, list): 1242 ↛ 1243line 1242 didn't jump to line 1243, because the condition on line 1242 was never true

1243 value = existing_value + value 

1244 const_args[const_arg] = value 

1245 # build kwargs for the constructor 

1246 kwargs = dict() 

1247 for const_arg in get_docval(cls.__init__): 

1248 argname = const_arg['name'] 

1249 override = self.__get_override_carg(argname, builder, manager) 

1250 if override is not None: 

1251 val = override 

1252 elif argname in const_args: 

1253 val = const_args[argname] 

1254 else: 

1255 continue 

1256 kwargs[argname] = val 

1257 try: 

1258 obj = self.__new_container__(cls, builder.source, parent, builder.attributes.get(self.__spec.id_key()), 

1259 **kwargs) 

1260 except Exception as ex: 

1261 msg = 'Could not construct %s object due to: %s' % (cls.__name__, ex) 

1262 raise ConstructError(builder, msg) from ex 

1263 return obj 

1264 

1265 def __new_container__(self, cls, container_source, parent, object_id, **kwargs): 

1266 """A wrapper function for ensuring a container gets everything set appropriately""" 

1267 obj = cls.__new__(cls, container_source=container_source, parent=parent, object_id=object_id, 

1268 in_construct_mode=True) 

1269 # obj has been created and is in construction mode, indicating that the object is being constructed by 

1270 # the automatic construct process during read, rather than by the user 

1271 obj.__init__(**kwargs) 

1272 obj._in_construct_mode = False # reset to False to indicate that the construction of the object is complete 

1273 return obj 

1274 

1275 @docval({'name': 'container', 'type': AbstractContainer, 

1276 'doc': 'the AbstractContainer to get the Builder name for'}) 

1277 def get_builder_name(self, **kwargs): 

1278 '''Get the name of a Builder that represents a AbstractContainer''' 

1279 container = getargs('container', kwargs) 

1280 if self.__spec.name is not None: 

1281 ret = self.__spec.name 

1282 else: 

1283 ret = container.name 

1284 return ret