Coverage for src/hdmf/validate/validator.py: 89%

434 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-07-10 23:48 +0000

1import re 

2from abc import ABCMeta, abstractmethod 

3from copy import copy 

4from itertools import chain 

5from collections import defaultdict, OrderedDict 

6 

7import numpy as np 

8 

9from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType 

10from .errors import ExpectedArrayError, IncorrectQuantityError 

11from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder 

12from ..build.builders import BaseBuilder 

13from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec 

14from ..spec import SpecNamespace 

15from ..spec.spec import BaseStorageSpec, DtypeHelper 

16from ..utils import docval, getargs, pystr, get_data_shape 

17from ..query import ReferenceResolver 

18 

19 

20__synonyms = DtypeHelper.primary_dtype_synonyms 

21 

22__additional = { 

23 'float': ['double'], 

24 'int8': ['short', 'int', 'long'], 

25 'short': ['int', 'long'], 

26 'int': ['long'], 

27 'uint8': ['uint16', 'uint32', 'uint64'], 

28 'uint16': ['uint32', 'uint64'], 

29 'uint32': ['uint64'], 

30 'utf': ['ascii'] 

31} 

32 

33# if the spec dtype is a key in __allowable, then all types in __allowable[key] are valid 

34__allowable = dict() 

35for dt, dt_syn in __synonyms.items(): 

36 allow = copy(dt_syn) 

37 if dt in __additional: 

38 for addl in __additional[dt]: 

39 allow.extend(__synonyms[addl]) 

40 for syn in dt_syn: 

41 __allowable[syn] = allow 

42__allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k)) 

43 

44 

45def check_type(expected, received): 

46 ''' 

47 *expected* should come from the spec 

48 *received* should come from the data 

49 ''' 

50 if isinstance(expected, list): 

51 if len(expected) > len(received): 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true

52 raise ValueError('compound type shorter than expected') 

53 for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)): 

54 rec = received[i] 

55 if rec not in __allowable[exp]: 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true

56 return False 

57 return True 

58 else: 

59 if isinstance(received, np.dtype): 

60 if received.char == 'O': 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 if 'vlen' in received.metadata: 

62 received = received.metadata['vlen'] 

63 else: 

64 raise ValueError("Unrecognized type: '%s'" % received) 

65 received = 'utf' if received is str else 'ascii' 

66 elif received.char == 'U': 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true

67 received = 'utf' 

68 elif received.char == 'S': 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true

69 received = 'ascii' 

70 else: 

71 received = received.name 

72 elif isinstance(received, type): 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true

73 received = received.__name__ 

74 if isinstance(expected, RefSpec): 

75 expected = expected.reftype 

76 elif isinstance(expected, type): 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true

77 expected = expected.__name__ 

78 return received in __allowable[expected] 

79 

80 

81def get_iso8601_regex(): 

82 isodate_re = ( 

83 r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])' # date 

84 r'(T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?)?$' # time 

85 ) 

86 return re.compile(isodate_re) 

87 

88 

89_iso_re = get_iso8601_regex() 

90 

91 

92def _check_isodatetime(s, default=None): 

93 try: 

94 if _iso_re.match(pystr(s)) is not None: 94 ↛ 95line 94 didn't jump to line 95, because the condition on line 94 was never true

95 return 'isodatetime' 

96 except Exception: 

97 pass 

98 return default 

99 

100 

101class EmptyArrayError(Exception): 

102 pass 

103 

104 

105def get_type(data): 

106 if isinstance(data, str): 

107 return _check_isodatetime(data, 'utf') 

108 elif isinstance(data, bytes): 

109 return _check_isodatetime(data, 'ascii') 

110 elif isinstance(data, RegionBuilder): 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true

111 return 'region' 

112 elif isinstance(data, ReferenceBuilder): 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true

113 return 'object' 

114 elif isinstance(data, ReferenceResolver): 

115 return data.dtype 

116 elif isinstance(data, np.ndarray): 

117 if data.size == 0: 

118 raise EmptyArrayError() 

119 return get_type(data[0]) 

120 elif isinstance(data, np.bool_): 

121 return 'bool' 

122 if not hasattr(data, '__len__'): 

123 return type(data).__name__ 

124 else: 

125 if hasattr(data, 'dtype'): 

126 if isinstance(data.dtype, list): 126 ↛ 127line 126 didn't jump to line 127, because the condition on line 126 was never true

127 return [get_type(data[0][i]) for i in range(len(data.dtype))] 

128 if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None: 128 ↛ 129line 128 didn't jump to line 129, because the condition on line 128 was never true

129 return get_type(data[0]) 

130 return data.dtype 

131 if len(data) == 0: 

132 raise EmptyArrayError() 

133 return get_type(data[0]) 

134 

135 

136def check_shape(expected, received): 

137 ret = False 

138 if expected is None: 

139 ret = True 

140 else: 

141 if isinstance(expected, (list, tuple)): 

142 if isinstance(expected[0], (list, tuple)): 

143 for sub in expected: 143 ↛ 158line 143 didn't jump to line 158, because the loop on line 143 didn't complete

144 if check_shape(sub, received): 

145 ret = True 

146 break 

147 else: 

148 if len(expected) > 0 and received is None: 

149 ret = False 

150 elif len(expected) == len(received): 

151 ret = True 

152 for e, r in zip(expected, received): 

153 if not check_shape(e, r): 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true

154 ret = False 

155 break 

156 elif isinstance(expected, int): 156 ↛ 158line 156 didn't jump to line 158, because the condition on line 156 was never false

157 ret = expected == received 

158 return ret 

159 

160 

161class ValidatorMap: 

162 """A class for keeping track of Validator objects for all data types in a namespace""" 

163 

164 @docval({'name': 'namespace', 'type': SpecNamespace, 'doc': 'the namespace to builder map for'}) 

165 def __init__(self, **kwargs): 

166 ns = getargs('namespace', kwargs) 

167 self.__ns = ns 

168 tree = defaultdict(list) 

169 types = ns.get_registered_types() 

170 self.__type_key = ns.get_spec(types[0]).type_key() 

171 for dt in types: 

172 spec = ns.get_spec(dt) 

173 parent = spec.data_type_inc 

174 child = spec.data_type_def 

175 tree[child] = list() 

176 if parent is not None: 

177 tree[parent].append(child) 

178 for t in tree: 

179 self.__rec(tree, t) 

180 self.__valid_types = dict() 

181 self.__validators = dict() 

182 for dt, children in tree.items(): 

183 _list = list() 

184 for t in children: 

185 spec = self.__ns.get_spec(t) 

186 if isinstance(spec, GroupSpec): 

187 val = GroupValidator(spec, self) 

188 else: 

189 val = DatasetValidator(spec, self) 

190 if t == dt: 

191 self.__validators[t] = val 

192 _list.append(val) 

193 self.__valid_types[dt] = tuple(_list) 

194 

195 def __rec(self, tree, node): 

196 if not isinstance(tree[node], tuple): 

197 sub_types = {node} 

198 for child in tree[node]: 

199 sub_types.update(self.__rec(tree, child)) 

200 tree[node] = tuple(sub_types) 

201 return tree[node] 

202 

203 @property 

204 def namespace(self): 

205 return self.__ns 

206 

207 @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to use to validate'}, 

208 returns='all valid sub data types for the given spec', rtype=tuple) 

209 def valid_types(self, **kwargs): 

210 '''Get all valid types for a given data type''' 

211 spec = getargs('spec', kwargs) 

212 if isinstance(spec, Spec): 212 ↛ 213line 212 didn't jump to line 213, because the condition on line 212 was never true

213 spec = spec.data_type_def 

214 try: 

215 return self.__valid_types[spec] 

216 except KeyError: 

217 raise ValueError("no children for '%s'" % spec) 

218 

219 @docval({'name': 'data_type', 'type': (BaseStorageSpec, str), 

220 'doc': 'the data type to get the validator for'}, 

221 returns='the validator ``data_type``') 

222 def get_validator(self, **kwargs): 

223 """Return the validator for a given data type""" 

224 dt = getargs('data_type', kwargs) 

225 if isinstance(dt, BaseStorageSpec): 225 ↛ 226line 225 didn't jump to line 226, because the condition on line 225 was never true

226 dt_tmp = dt.data_type_def 

227 if dt_tmp is None: 

228 dt_tmp = dt.data_type_inc 

229 dt = dt_tmp 

230 try: 

231 return self.__validators[dt] 

232 except KeyError: 

233 msg = "data type '%s' not found in namespace %s" % (dt, self.__ns.name) 

234 raise ValueError(msg) 

235 

236 @docval({'name': 'builder', 'type': BaseBuilder, 'doc': 'the builder to validate'}, 

237 returns="a list of errors found", rtype=list) 

238 def validate(self, **kwargs): 

239 """Validate a builder against a Spec 

240 

241 ``builder`` must have the attribute used to specifying data type 

242 by the namespace used to construct this ValidatorMap. 

243 """ 

244 builder = getargs('builder', kwargs) 

245 dt = builder.attributes.get(self.__type_key) 

246 if dt is None: 

247 msg = "builder must have data type defined with attribute '%s'" % self.__type_key 

248 raise ValueError(msg) 

249 validator = self.get_validator(dt) 

250 return validator.validate(builder) 

251 

252 

253class Validator(metaclass=ABCMeta): 

254 '''A base class for classes that will be used to validate against Spec subclasses''' 

255 

256 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to use to validate'}, 

257 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) 

258 def __init__(self, **kwargs): 

259 self.__spec = getargs('spec', kwargs) 

260 self.__vmap = getargs('validator_map', kwargs) 

261 

262 @property 

263 def spec(self): 

264 return self.__spec 

265 

266 @property 

267 def vmap(self): 

268 return self.__vmap 

269 

270 @abstractmethod 

271 @docval({'name': 'value', 'type': None, 'doc': 'either in the form of a value or a Builder'}, 

272 returns='a list of Errors', rtype=list) 

273 def validate(self, **kwargs): 

274 pass 

275 

276 @classmethod 

277 def get_spec_loc(cls, spec): 

278 return spec.path 

279 

280 @classmethod 

281 def get_builder_loc(cls, builder): 

282 stack = list() 

283 tmp = builder 

284 while tmp is not None and tmp.name != 'root': 

285 stack.append(tmp.name) 

286 tmp = tmp.parent 

287 return "/".join(reversed(stack)) 

288 

289 

290class AttributeValidator(Validator): 

291 '''A class for validating values against AttributeSpecs''' 

292 

293 @docval({'name': 'spec', 'type': AttributeSpec, 'doc': 'the specification to use to validate'}, 

294 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) 

295 def __init__(self, **kwargs): 

296 super().__init__(**kwargs) 

297 

298 @docval({'name': 'value', 'type': None, 'doc': 'the value to validate'}, 

299 returns='a list of Errors', rtype=list) 

300 def validate(self, **kwargs): 

301 value = getargs('value', kwargs) 

302 ret = list() 

303 spec = self.spec 

304 if spec.required and value is None: 304 ↛ 305line 304 didn't jump to line 305, because the condition on line 304 was never true

305 ret.append(MissingError(self.get_spec_loc(spec))) 

306 else: 

307 if spec.dtype is None: 307 ↛ 308line 307 didn't jump to line 308, because the condition on line 307 was never true

308 ret.append(Error(self.get_spec_loc(spec))) 

309 elif isinstance(spec.dtype, RefSpec): 

310 if not isinstance(value, BaseBuilder): 310 ↛ 311line 310 didn't jump to line 311, because the condition on line 310 was never true

311 expected = '%s reference' % spec.dtype.reftype 

312 try: 

313 value_type = get_type(value) 

314 ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type)) 

315 except EmptyArrayError: 

316 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple 

317 pass 

318 else: 

319 target_spec = self.vmap.namespace.catalog.get_spec(spec.dtype.target_type) 

320 data_type = value.attributes.get(target_spec.type_key()) 

321 hierarchy = self.vmap.namespace.catalog.get_hierarchy(data_type) 

322 if spec.dtype.target_type not in hierarchy: 322 ↛ 323line 322 didn't jump to line 323, because the condition on line 322 was never true

323 ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type)) 

324 else: 

325 try: 

326 dtype = get_type(value) 

327 if not check_type(spec.dtype, dtype): 

328 ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype)) 

329 except EmptyArrayError: 

330 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple 

331 pass 

332 shape = get_data_shape(value) 

333 if not check_shape(spec.shape, shape): 

334 if shape is None: 334 ↛ 337line 334 didn't jump to line 337, because the condition on line 334 was never false

335 ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(value))) 

336 else: 

337 ret.append(ShapeError(self.get_spec_loc(spec), spec.shape, shape)) 

338 return ret 

339 

340 

341class BaseStorageValidator(Validator): 

342 '''A base class for validating against Spec objects that have attributes i.e. BaseStorageSpec''' 

343 

344 @docval({'name': 'spec', 'type': BaseStorageSpec, 'doc': 'the specification to use to validate'}, 

345 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) 

346 def __init__(self, **kwargs): 

347 super().__init__(**kwargs) 

348 self.__attribute_validators = dict() 

349 for attr in self.spec.attributes: 

350 self.__attribute_validators[attr.name] = AttributeValidator(attr, self.vmap) 

351 

352 @docval({"name": "builder", "type": BaseBuilder, "doc": "the builder to validate"}, 

353 returns='a list of Errors', rtype=list) 

354 def validate(self, **kwargs): 

355 builder = getargs('builder', kwargs) 

356 attributes = builder.attributes 

357 ret = list() 

358 for attr, validator in self.__attribute_validators.items(): 

359 attr_val = attributes.get(attr) 

360 if attr_val is None: 

361 if validator.spec.required: 

362 ret.append(MissingError(self.get_spec_loc(validator.spec), 

363 location=self.get_builder_loc(builder))) 

364 else: 

365 errors = validator.validate(attr_val) 

366 for err in errors: 

367 err.location = self.get_builder_loc(builder) + ".%s" % validator.spec.name 

368 ret.extend(errors) 

369 return ret 

370 

371 

372class DatasetValidator(BaseStorageValidator): 

373 '''A class for validating DatasetBuilders against DatasetSpecs''' 

374 

375 @docval({'name': 'spec', 'type': DatasetSpec, 'doc': 'the specification to use to validate'}, 

376 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) 

377 def __init__(self, **kwargs): 

378 super().__init__(**kwargs) 

379 

380 @docval({"name": "builder", "type": DatasetBuilder, "doc": "the builder to validate"}, 

381 returns='a list of Errors', rtype=list) 

382 def validate(self, **kwargs): 

383 builder = getargs('builder', kwargs) 

384 ret = super().validate(builder) 

385 data = builder.data 

386 if self.spec.dtype is not None: 

387 try: 

388 dtype = get_type(data) 

389 if not check_type(self.spec.dtype, dtype): 

390 ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype, 

391 location=self.get_builder_loc(builder))) 

392 except EmptyArrayError: 

393 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple 

394 pass 

395 shape = get_data_shape(data) 

396 if not check_shape(self.spec.shape, shape): 

397 if shape is None: 397 ↛ 401line 397 didn't jump to line 401, because the condition on line 397 was never false

398 ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data), 

399 location=self.get_builder_loc(builder))) 

400 else: 

401 ret.append(ShapeError(self.get_spec_loc(self.spec), self.spec.shape, shape, 

402 location=self.get_builder_loc(builder))) 

403 return ret 

404 

405 

406def _resolve_data_type(spec): 

407 if isinstance(spec, LinkSpec): 

408 return spec.target_type 

409 return spec.data_type 

410 

411 

412class GroupValidator(BaseStorageValidator): 

413 '''A class for validating GroupBuilders against GroupSpecs''' 

414 

415 @docval({'name': 'spec', 'type': GroupSpec, 'doc': 'the specification to use to validate'}, 

416 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'}) 

417 def __init__(self, **kwargs): 

418 super().__init__(**kwargs) 

419 

420 @docval({"name": "builder", "type": GroupBuilder, "doc": "the builder to validate"}, # noqa: C901 

421 returns='a list of Errors', rtype=list) 

422 def validate(self, **kwargs): # noqa: C901 

423 builder = getargs('builder', kwargs) 

424 errors = super().validate(builder) 

425 errors.extend(self.__validate_children(builder)) 

426 return self._remove_duplicates(errors) 

427 

428 def __validate_children(self, parent_builder): 

429 """Validates the children of the group builder against the children in the spec. 

430 

431 Children are defined as datasets, groups, and links. 

432 

433 Validation works by first assigning builder children to spec children 

434 in a many-to-one relationship using a SpecMatcher (this matching is 

435 non-trivial due to inheritance, which is why it is isolated in a 

436 separate class). Once the matching is complete, it is a 

437 straightforward procedure for validating the set of matching builders 

438 against each child spec. 

439 """ 

440 spec_children = chain(self.spec.datasets, self.spec.groups, self.spec.links) 

441 matcher = SpecMatcher(self.vmap, spec_children) 

442 

443 builder_children = chain(parent_builder.datasets.values(), 

444 parent_builder.groups.values(), 

445 parent_builder.links.values()) 

446 matcher.assign_to_specs(builder_children) 

447 

448 for child_spec, matched_builders in matcher.spec_matches: 

449 yield from self.__validate_presence_and_quantity(child_spec, len(matched_builders), parent_builder) 

450 for child_builder in matched_builders: 

451 yield from self.__validate_child_builder(child_spec, child_builder, parent_builder) 

452 

453 def __validate_presence_and_quantity(self, child_spec, n_builders, parent_builder): 

454 """Validate that at least one matching builder exists if the spec is 

455 required and that the number of builders agrees with the spec quantity 

456 """ 

457 if n_builders == 0 and child_spec.required: 

458 yield self.__construct_missing_child_error(child_spec, parent_builder) 

459 elif self.__incorrect_quantity(n_builders, child_spec): 

460 yield self.__construct_incorrect_quantity_error(child_spec, parent_builder, n_builders) 

461 

462 def __construct_missing_child_error(self, child_spec, parent_builder): 

463 """Returns either a MissingDataType or a MissingError depending on 

464 whether or not a specific data type can be resolved from the spec 

465 """ 

466 data_type = _resolve_data_type(child_spec) 

467 builder_loc = self.get_builder_loc(parent_builder) 

468 if data_type is not None: 

469 name_of_erroneous = self.get_spec_loc(self.spec) 

470 return MissingDataType(name_of_erroneous, data_type, 

471 location=builder_loc, missing_dt_name=child_spec.name) 

472 else: 

473 name_of_erroneous = self.get_spec_loc(child_spec) 

474 return MissingError(name_of_erroneous, location=builder_loc) 

475 

476 @staticmethod 

477 def __incorrect_quantity(n_found, spec): 

478 """Returns a boolean indicating whether the number of builder elements matches the specified quantity""" 

479 if not spec.is_many() and n_found > 1: 

480 return True 

481 elif isinstance(spec.quantity, int) and n_found != spec.quantity: 

482 return True 

483 return False 

484 

485 def __construct_incorrect_quantity_error(self, child_spec, parent_builder, n_builders): 

486 name_of_erroneous = self.get_spec_loc(self.spec) 

487 data_type = _resolve_data_type(child_spec) 

488 builder_loc = self.get_builder_loc(parent_builder) 

489 return IncorrectQuantityError(name_of_erroneous, data_type, expected=child_spec.quantity, 

490 received=n_builders, location=builder_loc) 

491 

492 def __validate_child_builder(self, child_spec, child_builder, parent_builder): 

493 """Validate a child builder against a child spec considering links""" 

494 if isinstance(child_builder, LinkBuilder): 

495 if self.__cannot_be_link(child_spec): 

496 yield self.__construct_illegal_link_error(child_spec, parent_builder) 

497 return # do not validate illegally linked objects 

498 child_builder = child_builder.builder 

499 for child_validator in self.__get_child_validators(child_spec): 

500 yield from child_validator.validate(child_builder) 

501 

502 def __construct_illegal_link_error(self, child_spec, parent_builder): 

503 name_of_erroneous = self.get_spec_loc(child_spec) 

504 builder_loc = self.get_builder_loc(parent_builder) 

505 return IllegalLinkError(name_of_erroneous, location=builder_loc) 

506 

507 @staticmethod 

508 def __cannot_be_link(spec): 

509 return not isinstance(spec, LinkSpec) and not spec.linkable 

510 

511 def __get_child_validators(self, spec): 

512 """Returns the appropriate list of validators for a child spec 

513 

514 Due to the fact that child specs can both inherit a data type via data_type_inc 

515 and also modify the type without defining a new data type via data_type_def, 

516 we need to validate against both the spec for the base data type and the spec 

517 at the current hierarchy of the data type in case there have been any 

518 modifications. 

519 

520 If a specific data type can be resolved, a validator for that type is acquired 

521 from the ValidatorMap and included in the returned validators. If the spec is 

522 a GroupSpec or a DatasetSpec, then a new Validator is created and also 

523 returned. If the spec is a LinkSpec, no additional Validator is returned 

524 because the LinkSpec cannot add or modify fields and the target_type will be 

525 validated by the Validator returned from the ValidatorMap. 

526 """ 

527 if _resolve_data_type(spec) is not None: 

528 yield self.vmap.get_validator(_resolve_data_type(spec)) 

529 

530 if isinstance(spec, GroupSpec): 

531 yield GroupValidator(spec, self.vmap) 

532 elif isinstance(spec, DatasetSpec): 

533 yield DatasetValidator(spec, self.vmap) 

534 elif isinstance(spec, LinkSpec): 534 ↛ 537line 534 didn't jump to line 537, because the condition on line 534 was never false

535 return 

536 else: 

537 msg = "Unable to resolve a validator for spec %s" % spec 

538 raise ValueError(msg) 

539 

540 @staticmethod 

541 def _remove_duplicates(errors): 

542 """Return a list of validation errors where duplicates have been removed 

543 

544 In some cases a child of a group to be validated against two specs which can 

545 redundantly define the same fields/children. If the builder doesn't match the 

546 spec, it is possible for duplicate errors to be generated. 

547 """ 

548 ordered_errors = OrderedDict() 

549 for error in errors: 

550 ordered_errors[error] = error 

551 return list(ordered_errors) 

552 

553 

554class SpecMatches: 

555 """A utility class to hold a spec and the builders matched to it""" 

556 

557 def __init__(self, spec): 

558 self.spec = spec 

559 self.builders = list() 

560 

561 def add(self, builder): 

562 self.builders.append(builder) 

563 

564 

565class SpecMatcher: 

566 """Matches a set of builders against a set of specs 

567 

568 This class is intended to isolate the task of choosing which spec a 

569 builder should be validated against from the task of performing that 

570 validation. 

571 """ 

572 

573 def __init__(self, vmap, specs): 

574 self.vmap = vmap 

575 self._spec_matches = [SpecMatches(spec) for spec in specs] 

576 self._unmatched_builders = SpecMatches(None) 

577 

578 @property 

579 def unmatched_builders(self): 

580 """Returns the builders for which no matching spec was found 

581 

582 These builders can be considered superfluous, and will generate a 

583 warning in the future. 

584 """ 

585 return self._unmatched_builders.builders 

586 

587 @property 

588 def spec_matches(self): 

589 """Returns a list of tuples of: (spec, assigned builders)""" 

590 return [(sm.spec, sm.builders) for sm in self._spec_matches] 

591 

592 def assign_to_specs(self, builders): 

593 """Assigns a set of builders against a set of specs (many-to-one) 

594 

595 In the case that no matching spec is found, a builder will be 

596 added to a list of unmatched builders. 

597 """ 

598 for builder in builders: 

599 spec_match = self._best_matching_spec(builder) 

600 if spec_match is None: 

601 self._unmatched_builders.add(builder) 

602 else: 

603 spec_match.add(builder) 

604 

605 def _best_matching_spec(self, builder): 

606 """Finds the best matching spec for builder 

607 

608 The current algorithm is: 

609 1. filter specs which meet the minimum requirements of consistent name 

610 and data type 

611 2. if more than one candidate meets the minimum requirements, find the 

612 candidates which do not yet have a sufficient number of builders 

613 assigned (based on the spec quantity) 

614 3. return the first unsatisfied candidate if any, otherwise return the 

615 first candidate 

616 

617 Note that the current algorithm will give different results depending 

618 on the order of the specs or builders, and also does not consider 

619 inheritance hierarchy. Future improvements to this matching algorithm 

620 should resolve these discrepancies. 

621 """ 

622 candidates = self._filter_by_name(self._spec_matches, builder) 

623 candidates = self._filter_by_type(candidates, builder) 

624 if len(candidates) == 0: 

625 return None 

626 elif len(candidates) == 1: 

627 return candidates[0] 

628 else: 

629 unsatisfied_candidates = self._filter_by_unsatisfied(candidates) 

630 if len(unsatisfied_candidates) == 0: 

631 return candidates[0] 

632 else: 

633 return unsatisfied_candidates[0] 

634 

635 def _filter_by_name(self, candidates, builder): 

636 """Returns the candidate specs that either have the same name as the 

637 builder or do not specify a name. 

638 """ 

639 def name_is_consistent(spec_matches): 

640 spec = spec_matches.spec 

641 return spec.name is None or spec.name == builder.name 

642 

643 return list(filter(name_is_consistent, candidates)) 

644 

645 def _filter_by_type(self, candidates, builder): 

646 """Returns the candidate specs which have a data type consistent with 

647 the builder's data type. 

648 """ 

649 def compatible_type(spec_matches): 

650 spec = spec_matches.spec 

651 if isinstance(spec, LinkSpec): 

652 validator = self.vmap.get_validator(spec.target_type) 

653 spec = validator.spec 

654 if spec.data_type is None: 

655 return True 

656 valid_validators = self.vmap.valid_types(spec.data_type) 

657 valid_types = [v.spec.data_type for v in valid_validators] 

658 if isinstance(builder, LinkBuilder): 

659 dt = builder.builder.attributes.get(spec.type_key()) 

660 else: 

661 dt = builder.attributes.get(spec.type_key()) 

662 return dt in valid_types 

663 

664 return list(filter(compatible_type, candidates)) 

665 

666 def _filter_by_unsatisfied(self, candidates): 

667 """Returns the candidate specs which are not yet matched against 

668 a number of builders which fulfils the quantity for the spec. 

669 """ 

670 def is_unsatisfied(spec_matches): 

671 spec = spec_matches.spec 

672 n_match = len(spec_matches.builders) 

673 if spec.required and n_match == 0: 

674 return True 

675 if isinstance(spec.quantity, int) and n_match < spec.quantity: 675 ↛ 676line 675 didn't jump to line 676, because the condition on line 675 was never true

676 return True 

677 return False 

678 

679 return list(filter(is_unsatisfied, candidates))