Coverage for src/hdmf/validate/validator.py: 89%
434 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
1import re
2from abc import ABCMeta, abstractmethod
3from copy import copy
4from itertools import chain
5from collections import defaultdict, OrderedDict
7import numpy as np
9from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType
10from .errors import ExpectedArrayError, IncorrectQuantityError
11from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder
12from ..build.builders import BaseBuilder
13from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec
14from ..spec import SpecNamespace
15from ..spec.spec import BaseStorageSpec, DtypeHelper
16from ..utils import docval, getargs, pystr, get_data_shape
17from ..query import ReferenceResolver
20__synonyms = DtypeHelper.primary_dtype_synonyms
22__additional = {
23 'float': ['double'],
24 'int8': ['short', 'int', 'long'],
25 'short': ['int', 'long'],
26 'int': ['long'],
27 'uint8': ['uint16', 'uint32', 'uint64'],
28 'uint16': ['uint32', 'uint64'],
29 'uint32': ['uint64'],
30 'utf': ['ascii']
31}
33# if the spec dtype is a key in __allowable, then all types in __allowable[key] are valid
34__allowable = dict()
35for dt, dt_syn in __synonyms.items():
36 allow = copy(dt_syn)
37 if dt in __additional:
38 for addl in __additional[dt]:
39 allow.extend(__synonyms[addl])
40 for syn in dt_syn:
41 __allowable[syn] = allow
42__allowable['numeric'] = set(chain.from_iterable(__allowable[k] for k in __allowable if 'int' in k or 'float' in k))
45def check_type(expected, received):
46 '''
47 *expected* should come from the spec
48 *received* should come from the data
49 '''
50 if isinstance(expected, list):
51 if len(expected) > len(received): 51 ↛ 52line 51 didn't jump to line 52, because the condition on line 51 was never true
52 raise ValueError('compound type shorter than expected')
53 for i, exp in enumerate(DtypeHelper.simplify_cpd_type(expected)):
54 rec = received[i]
55 if rec not in __allowable[exp]: 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true
56 return False
57 return True
58 else:
59 if isinstance(received, np.dtype):
60 if received.char == 'O': 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 if 'vlen' in received.metadata:
62 received = received.metadata['vlen']
63 else:
64 raise ValueError("Unrecognized type: '%s'" % received)
65 received = 'utf' if received is str else 'ascii'
66 elif received.char == 'U': 66 ↛ 67line 66 didn't jump to line 67, because the condition on line 66 was never true
67 received = 'utf'
68 elif received.char == 'S': 68 ↛ 69line 68 didn't jump to line 69, because the condition on line 68 was never true
69 received = 'ascii'
70 else:
71 received = received.name
72 elif isinstance(received, type): 72 ↛ 73line 72 didn't jump to line 73, because the condition on line 72 was never true
73 received = received.__name__
74 if isinstance(expected, RefSpec):
75 expected = expected.reftype
76 elif isinstance(expected, type): 76 ↛ 77line 76 didn't jump to line 77, because the condition on line 76 was never true
77 expected = expected.__name__
78 return received in __allowable[expected]
81def get_iso8601_regex():
82 isodate_re = (
83 r'^(-?(?:[1-9][0-9]*)?[0-9]{4})-(1[0-2]|0[1-9])-(3[01]|0[1-9]|[12][0-9])' # date
84 r'(T(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\.[0-9]+)?(Z|[+-](?:2[0-3]|[01][0-9]):[0-5][0-9])?)?$' # time
85 )
86 return re.compile(isodate_re)
89_iso_re = get_iso8601_regex()
92def _check_isodatetime(s, default=None):
93 try:
94 if _iso_re.match(pystr(s)) is not None: 94 ↛ 95line 94 didn't jump to line 95, because the condition on line 94 was never true
95 return 'isodatetime'
96 except Exception:
97 pass
98 return default
101class EmptyArrayError(Exception):
102 pass
105def get_type(data):
106 if isinstance(data, str):
107 return _check_isodatetime(data, 'utf')
108 elif isinstance(data, bytes):
109 return _check_isodatetime(data, 'ascii')
110 elif isinstance(data, RegionBuilder): 110 ↛ 111line 110 didn't jump to line 111, because the condition on line 110 was never true
111 return 'region'
112 elif isinstance(data, ReferenceBuilder): 112 ↛ 113line 112 didn't jump to line 113, because the condition on line 112 was never true
113 return 'object'
114 elif isinstance(data, ReferenceResolver):
115 return data.dtype
116 elif isinstance(data, np.ndarray):
117 if data.size == 0:
118 raise EmptyArrayError()
119 return get_type(data[0])
120 elif isinstance(data, np.bool_):
121 return 'bool'
122 if not hasattr(data, '__len__'):
123 return type(data).__name__
124 else:
125 if hasattr(data, 'dtype'):
126 if isinstance(data.dtype, list): 126 ↛ 127line 126 didn't jump to line 127, because the condition on line 126 was never true
127 return [get_type(data[0][i]) for i in range(len(data.dtype))]
128 if data.dtype.metadata is not None and data.dtype.metadata.get('vlen') is not None: 128 ↛ 129line 128 didn't jump to line 129, because the condition on line 128 was never true
129 return get_type(data[0])
130 return data.dtype
131 if len(data) == 0:
132 raise EmptyArrayError()
133 return get_type(data[0])
136def check_shape(expected, received):
137 ret = False
138 if expected is None:
139 ret = True
140 else:
141 if isinstance(expected, (list, tuple)):
142 if isinstance(expected[0], (list, tuple)):
143 for sub in expected: 143 ↛ 158line 143 didn't jump to line 158, because the loop on line 143 didn't complete
144 if check_shape(sub, received):
145 ret = True
146 break
147 else:
148 if len(expected) > 0 and received is None:
149 ret = False
150 elif len(expected) == len(received):
151 ret = True
152 for e, r in zip(expected, received):
153 if not check_shape(e, r): 153 ↛ 154line 153 didn't jump to line 154, because the condition on line 153 was never true
154 ret = False
155 break
156 elif isinstance(expected, int): 156 ↛ 158line 156 didn't jump to line 158, because the condition on line 156 was never false
157 ret = expected == received
158 return ret
161class ValidatorMap:
162 """A class for keeping track of Validator objects for all data types in a namespace"""
164 @docval({'name': 'namespace', 'type': SpecNamespace, 'doc': 'the namespace to builder map for'})
165 def __init__(self, **kwargs):
166 ns = getargs('namespace', kwargs)
167 self.__ns = ns
168 tree = defaultdict(list)
169 types = ns.get_registered_types()
170 self.__type_key = ns.get_spec(types[0]).type_key()
171 for dt in types:
172 spec = ns.get_spec(dt)
173 parent = spec.data_type_inc
174 child = spec.data_type_def
175 tree[child] = list()
176 if parent is not None:
177 tree[parent].append(child)
178 for t in tree:
179 self.__rec(tree, t)
180 self.__valid_types = dict()
181 self.__validators = dict()
182 for dt, children in tree.items():
183 _list = list()
184 for t in children:
185 spec = self.__ns.get_spec(t)
186 if isinstance(spec, GroupSpec):
187 val = GroupValidator(spec, self)
188 else:
189 val = DatasetValidator(spec, self)
190 if t == dt:
191 self.__validators[t] = val
192 _list.append(val)
193 self.__valid_types[dt] = tuple(_list)
195 def __rec(self, tree, node):
196 if not isinstance(tree[node], tuple):
197 sub_types = {node}
198 for child in tree[node]:
199 sub_types.update(self.__rec(tree, child))
200 tree[node] = tuple(sub_types)
201 return tree[node]
203 @property
204 def namespace(self):
205 return self.__ns
207 @docval({'name': 'spec', 'type': (Spec, str), 'doc': 'the specification to use to validate'},
208 returns='all valid sub data types for the given spec', rtype=tuple)
209 def valid_types(self, **kwargs):
210 '''Get all valid types for a given data type'''
211 spec = getargs('spec', kwargs)
212 if isinstance(spec, Spec): 212 ↛ 213line 212 didn't jump to line 213, because the condition on line 212 was never true
213 spec = spec.data_type_def
214 try:
215 return self.__valid_types[spec]
216 except KeyError:
217 raise ValueError("no children for '%s'" % spec)
219 @docval({'name': 'data_type', 'type': (BaseStorageSpec, str),
220 'doc': 'the data type to get the validator for'},
221 returns='the validator ``data_type``')
222 def get_validator(self, **kwargs):
223 """Return the validator for a given data type"""
224 dt = getargs('data_type', kwargs)
225 if isinstance(dt, BaseStorageSpec): 225 ↛ 226line 225 didn't jump to line 226, because the condition on line 225 was never true
226 dt_tmp = dt.data_type_def
227 if dt_tmp is None:
228 dt_tmp = dt.data_type_inc
229 dt = dt_tmp
230 try:
231 return self.__validators[dt]
232 except KeyError:
233 msg = "data type '%s' not found in namespace %s" % (dt, self.__ns.name)
234 raise ValueError(msg)
236 @docval({'name': 'builder', 'type': BaseBuilder, 'doc': 'the builder to validate'},
237 returns="a list of errors found", rtype=list)
238 def validate(self, **kwargs):
239 """Validate a builder against a Spec
241 ``builder`` must have the attribute used to specifying data type
242 by the namespace used to construct this ValidatorMap.
243 """
244 builder = getargs('builder', kwargs)
245 dt = builder.attributes.get(self.__type_key)
246 if dt is None:
247 msg = "builder must have data type defined with attribute '%s'" % self.__type_key
248 raise ValueError(msg)
249 validator = self.get_validator(dt)
250 return validator.validate(builder)
253class Validator(metaclass=ABCMeta):
254 '''A base class for classes that will be used to validate against Spec subclasses'''
256 @docval({'name': 'spec', 'type': Spec, 'doc': 'the specification to use to validate'},
257 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'})
258 def __init__(self, **kwargs):
259 self.__spec = getargs('spec', kwargs)
260 self.__vmap = getargs('validator_map', kwargs)
262 @property
263 def spec(self):
264 return self.__spec
266 @property
267 def vmap(self):
268 return self.__vmap
270 @abstractmethod
271 @docval({'name': 'value', 'type': None, 'doc': 'either in the form of a value or a Builder'},
272 returns='a list of Errors', rtype=list)
273 def validate(self, **kwargs):
274 pass
276 @classmethod
277 def get_spec_loc(cls, spec):
278 return spec.path
280 @classmethod
281 def get_builder_loc(cls, builder):
282 stack = list()
283 tmp = builder
284 while tmp is not None and tmp.name != 'root':
285 stack.append(tmp.name)
286 tmp = tmp.parent
287 return "/".join(reversed(stack))
290class AttributeValidator(Validator):
291 '''A class for validating values against AttributeSpecs'''
293 @docval({'name': 'spec', 'type': AttributeSpec, 'doc': 'the specification to use to validate'},
294 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'})
295 def __init__(self, **kwargs):
296 super().__init__(**kwargs)
298 @docval({'name': 'value', 'type': None, 'doc': 'the value to validate'},
299 returns='a list of Errors', rtype=list)
300 def validate(self, **kwargs):
301 value = getargs('value', kwargs)
302 ret = list()
303 spec = self.spec
304 if spec.required and value is None: 304 ↛ 305line 304 didn't jump to line 305, because the condition on line 304 was never true
305 ret.append(MissingError(self.get_spec_loc(spec)))
306 else:
307 if spec.dtype is None: 307 ↛ 308line 307 didn't jump to line 308, because the condition on line 307 was never true
308 ret.append(Error(self.get_spec_loc(spec)))
309 elif isinstance(spec.dtype, RefSpec):
310 if not isinstance(value, BaseBuilder): 310 ↛ 311line 310 didn't jump to line 311, because the condition on line 310 was never true
311 expected = '%s reference' % spec.dtype.reftype
312 try:
313 value_type = get_type(value)
314 ret.append(DtypeError(self.get_spec_loc(spec), expected, value_type))
315 except EmptyArrayError:
316 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
317 pass
318 else:
319 target_spec = self.vmap.namespace.catalog.get_spec(spec.dtype.target_type)
320 data_type = value.attributes.get(target_spec.type_key())
321 hierarchy = self.vmap.namespace.catalog.get_hierarchy(data_type)
322 if spec.dtype.target_type not in hierarchy: 322 ↛ 323line 322 didn't jump to line 323, because the condition on line 322 was never true
323 ret.append(IncorrectDataType(self.get_spec_loc(spec), spec.dtype.target_type, data_type))
324 else:
325 try:
326 dtype = get_type(value)
327 if not check_type(spec.dtype, dtype):
328 ret.append(DtypeError(self.get_spec_loc(spec), spec.dtype, dtype))
329 except EmptyArrayError:
330 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
331 pass
332 shape = get_data_shape(value)
333 if not check_shape(spec.shape, shape):
334 if shape is None: 334 ↛ 337line 334 didn't jump to line 337, because the condition on line 334 was never false
335 ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(value)))
336 else:
337 ret.append(ShapeError(self.get_spec_loc(spec), spec.shape, shape))
338 return ret
341class BaseStorageValidator(Validator):
342 '''A base class for validating against Spec objects that have attributes i.e. BaseStorageSpec'''
344 @docval({'name': 'spec', 'type': BaseStorageSpec, 'doc': 'the specification to use to validate'},
345 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'})
346 def __init__(self, **kwargs):
347 super().__init__(**kwargs)
348 self.__attribute_validators = dict()
349 for attr in self.spec.attributes:
350 self.__attribute_validators[attr.name] = AttributeValidator(attr, self.vmap)
352 @docval({"name": "builder", "type": BaseBuilder, "doc": "the builder to validate"},
353 returns='a list of Errors', rtype=list)
354 def validate(self, **kwargs):
355 builder = getargs('builder', kwargs)
356 attributes = builder.attributes
357 ret = list()
358 for attr, validator in self.__attribute_validators.items():
359 attr_val = attributes.get(attr)
360 if attr_val is None:
361 if validator.spec.required:
362 ret.append(MissingError(self.get_spec_loc(validator.spec),
363 location=self.get_builder_loc(builder)))
364 else:
365 errors = validator.validate(attr_val)
366 for err in errors:
367 err.location = self.get_builder_loc(builder) + ".%s" % validator.spec.name
368 ret.extend(errors)
369 return ret
372class DatasetValidator(BaseStorageValidator):
373 '''A class for validating DatasetBuilders against DatasetSpecs'''
375 @docval({'name': 'spec', 'type': DatasetSpec, 'doc': 'the specification to use to validate'},
376 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'})
377 def __init__(self, **kwargs):
378 super().__init__(**kwargs)
380 @docval({"name": "builder", "type": DatasetBuilder, "doc": "the builder to validate"},
381 returns='a list of Errors', rtype=list)
382 def validate(self, **kwargs):
383 builder = getargs('builder', kwargs)
384 ret = super().validate(builder)
385 data = builder.data
386 if self.spec.dtype is not None:
387 try:
388 dtype = get_type(data)
389 if not check_type(self.spec.dtype, dtype):
390 ret.append(DtypeError(self.get_spec_loc(self.spec), self.spec.dtype, dtype,
391 location=self.get_builder_loc(builder)))
392 except EmptyArrayError:
393 # do not validate dtype of empty array. HDMF does not yet set dtype when writing a list/tuple
394 pass
395 shape = get_data_shape(data)
396 if not check_shape(self.spec.shape, shape):
397 if shape is None: 397 ↛ 401line 397 didn't jump to line 401, because the condition on line 397 was never false
398 ret.append(ExpectedArrayError(self.get_spec_loc(self.spec), self.spec.shape, str(data),
399 location=self.get_builder_loc(builder)))
400 else:
401 ret.append(ShapeError(self.get_spec_loc(self.spec), self.spec.shape, shape,
402 location=self.get_builder_loc(builder)))
403 return ret
406def _resolve_data_type(spec):
407 if isinstance(spec, LinkSpec):
408 return spec.target_type
409 return spec.data_type
412class GroupValidator(BaseStorageValidator):
413 '''A class for validating GroupBuilders against GroupSpecs'''
415 @docval({'name': 'spec', 'type': GroupSpec, 'doc': 'the specification to use to validate'},
416 {'name': 'validator_map', 'type': ValidatorMap, 'doc': 'the ValidatorMap to use during validation'})
417 def __init__(self, **kwargs):
418 super().__init__(**kwargs)
420 @docval({"name": "builder", "type": GroupBuilder, "doc": "the builder to validate"}, # noqa: C901
421 returns='a list of Errors', rtype=list)
422 def validate(self, **kwargs): # noqa: C901
423 builder = getargs('builder', kwargs)
424 errors = super().validate(builder)
425 errors.extend(self.__validate_children(builder))
426 return self._remove_duplicates(errors)
428 def __validate_children(self, parent_builder):
429 """Validates the children of the group builder against the children in the spec.
431 Children are defined as datasets, groups, and links.
433 Validation works by first assigning builder children to spec children
434 in a many-to-one relationship using a SpecMatcher (this matching is
435 non-trivial due to inheritance, which is why it is isolated in a
436 separate class). Once the matching is complete, it is a
437 straightforward procedure for validating the set of matching builders
438 against each child spec.
439 """
440 spec_children = chain(self.spec.datasets, self.spec.groups, self.spec.links)
441 matcher = SpecMatcher(self.vmap, spec_children)
443 builder_children = chain(parent_builder.datasets.values(),
444 parent_builder.groups.values(),
445 parent_builder.links.values())
446 matcher.assign_to_specs(builder_children)
448 for child_spec, matched_builders in matcher.spec_matches:
449 yield from self.__validate_presence_and_quantity(child_spec, len(matched_builders), parent_builder)
450 for child_builder in matched_builders:
451 yield from self.__validate_child_builder(child_spec, child_builder, parent_builder)
453 def __validate_presence_and_quantity(self, child_spec, n_builders, parent_builder):
454 """Validate that at least one matching builder exists if the spec is
455 required and that the number of builders agrees with the spec quantity
456 """
457 if n_builders == 0 and child_spec.required:
458 yield self.__construct_missing_child_error(child_spec, parent_builder)
459 elif self.__incorrect_quantity(n_builders, child_spec):
460 yield self.__construct_incorrect_quantity_error(child_spec, parent_builder, n_builders)
462 def __construct_missing_child_error(self, child_spec, parent_builder):
463 """Returns either a MissingDataType or a MissingError depending on
464 whether or not a specific data type can be resolved from the spec
465 """
466 data_type = _resolve_data_type(child_spec)
467 builder_loc = self.get_builder_loc(parent_builder)
468 if data_type is not None:
469 name_of_erroneous = self.get_spec_loc(self.spec)
470 return MissingDataType(name_of_erroneous, data_type,
471 location=builder_loc, missing_dt_name=child_spec.name)
472 else:
473 name_of_erroneous = self.get_spec_loc(child_spec)
474 return MissingError(name_of_erroneous, location=builder_loc)
476 @staticmethod
477 def __incorrect_quantity(n_found, spec):
478 """Returns a boolean indicating whether the number of builder elements matches the specified quantity"""
479 if not spec.is_many() and n_found > 1:
480 return True
481 elif isinstance(spec.quantity, int) and n_found != spec.quantity:
482 return True
483 return False
485 def __construct_incorrect_quantity_error(self, child_spec, parent_builder, n_builders):
486 name_of_erroneous = self.get_spec_loc(self.spec)
487 data_type = _resolve_data_type(child_spec)
488 builder_loc = self.get_builder_loc(parent_builder)
489 return IncorrectQuantityError(name_of_erroneous, data_type, expected=child_spec.quantity,
490 received=n_builders, location=builder_loc)
492 def __validate_child_builder(self, child_spec, child_builder, parent_builder):
493 """Validate a child builder against a child spec considering links"""
494 if isinstance(child_builder, LinkBuilder):
495 if self.__cannot_be_link(child_spec):
496 yield self.__construct_illegal_link_error(child_spec, parent_builder)
497 return # do not validate illegally linked objects
498 child_builder = child_builder.builder
499 for child_validator in self.__get_child_validators(child_spec):
500 yield from child_validator.validate(child_builder)
502 def __construct_illegal_link_error(self, child_spec, parent_builder):
503 name_of_erroneous = self.get_spec_loc(child_spec)
504 builder_loc = self.get_builder_loc(parent_builder)
505 return IllegalLinkError(name_of_erroneous, location=builder_loc)
507 @staticmethod
508 def __cannot_be_link(spec):
509 return not isinstance(spec, LinkSpec) and not spec.linkable
511 def __get_child_validators(self, spec):
512 """Returns the appropriate list of validators for a child spec
514 Due to the fact that child specs can both inherit a data type via data_type_inc
515 and also modify the type without defining a new data type via data_type_def,
516 we need to validate against both the spec for the base data type and the spec
517 at the current hierarchy of the data type in case there have been any
518 modifications.
520 If a specific data type can be resolved, a validator for that type is acquired
521 from the ValidatorMap and included in the returned validators. If the spec is
522 a GroupSpec or a DatasetSpec, then a new Validator is created and also
523 returned. If the spec is a LinkSpec, no additional Validator is returned
524 because the LinkSpec cannot add or modify fields and the target_type will be
525 validated by the Validator returned from the ValidatorMap.
526 """
527 if _resolve_data_type(spec) is not None:
528 yield self.vmap.get_validator(_resolve_data_type(spec))
530 if isinstance(spec, GroupSpec):
531 yield GroupValidator(spec, self.vmap)
532 elif isinstance(spec, DatasetSpec):
533 yield DatasetValidator(spec, self.vmap)
534 elif isinstance(spec, LinkSpec): 534 ↛ 537line 534 didn't jump to line 537, because the condition on line 534 was never false
535 return
536 else:
537 msg = "Unable to resolve a validator for spec %s" % spec
538 raise ValueError(msg)
540 @staticmethod
541 def _remove_duplicates(errors):
542 """Return a list of validation errors where duplicates have been removed
544 In some cases a child of a group to be validated against two specs which can
545 redundantly define the same fields/children. If the builder doesn't match the
546 spec, it is possible for duplicate errors to be generated.
547 """
548 ordered_errors = OrderedDict()
549 for error in errors:
550 ordered_errors[error] = error
551 return list(ordered_errors)
554class SpecMatches:
555 """A utility class to hold a spec and the builders matched to it"""
557 def __init__(self, spec):
558 self.spec = spec
559 self.builders = list()
561 def add(self, builder):
562 self.builders.append(builder)
565class SpecMatcher:
566 """Matches a set of builders against a set of specs
568 This class is intended to isolate the task of choosing which spec a
569 builder should be validated against from the task of performing that
570 validation.
571 """
573 def __init__(self, vmap, specs):
574 self.vmap = vmap
575 self._spec_matches = [SpecMatches(spec) for spec in specs]
576 self._unmatched_builders = SpecMatches(None)
578 @property
579 def unmatched_builders(self):
580 """Returns the builders for which no matching spec was found
582 These builders can be considered superfluous, and will generate a
583 warning in the future.
584 """
585 return self._unmatched_builders.builders
587 @property
588 def spec_matches(self):
589 """Returns a list of tuples of: (spec, assigned builders)"""
590 return [(sm.spec, sm.builders) for sm in self._spec_matches]
592 def assign_to_specs(self, builders):
593 """Assigns a set of builders against a set of specs (many-to-one)
595 In the case that no matching spec is found, a builder will be
596 added to a list of unmatched builders.
597 """
598 for builder in builders:
599 spec_match = self._best_matching_spec(builder)
600 if spec_match is None:
601 self._unmatched_builders.add(builder)
602 else:
603 spec_match.add(builder)
605 def _best_matching_spec(self, builder):
606 """Finds the best matching spec for builder
608 The current algorithm is:
609 1. filter specs which meet the minimum requirements of consistent name
610 and data type
611 2. if more than one candidate meets the minimum requirements, find the
612 candidates which do not yet have a sufficient number of builders
613 assigned (based on the spec quantity)
614 3. return the first unsatisfied candidate if any, otherwise return the
615 first candidate
617 Note that the current algorithm will give different results depending
618 on the order of the specs or builders, and also does not consider
619 inheritance hierarchy. Future improvements to this matching algorithm
620 should resolve these discrepancies.
621 """
622 candidates = self._filter_by_name(self._spec_matches, builder)
623 candidates = self._filter_by_type(candidates, builder)
624 if len(candidates) == 0:
625 return None
626 elif len(candidates) == 1:
627 return candidates[0]
628 else:
629 unsatisfied_candidates = self._filter_by_unsatisfied(candidates)
630 if len(unsatisfied_candidates) == 0:
631 return candidates[0]
632 else:
633 return unsatisfied_candidates[0]
635 def _filter_by_name(self, candidates, builder):
636 """Returns the candidate specs that either have the same name as the
637 builder or do not specify a name.
638 """
639 def name_is_consistent(spec_matches):
640 spec = spec_matches.spec
641 return spec.name is None or spec.name == builder.name
643 return list(filter(name_is_consistent, candidates))
645 def _filter_by_type(self, candidates, builder):
646 """Returns the candidate specs which have a data type consistent with
647 the builder's data type.
648 """
649 def compatible_type(spec_matches):
650 spec = spec_matches.spec
651 if isinstance(spec, LinkSpec):
652 validator = self.vmap.get_validator(spec.target_type)
653 spec = validator.spec
654 if spec.data_type is None:
655 return True
656 valid_validators = self.vmap.valid_types(spec.data_type)
657 valid_types = [v.spec.data_type for v in valid_validators]
658 if isinstance(builder, LinkBuilder):
659 dt = builder.builder.attributes.get(spec.type_key())
660 else:
661 dt = builder.attributes.get(spec.type_key())
662 return dt in valid_types
664 return list(filter(compatible_type, candidates))
666 def _filter_by_unsatisfied(self, candidates):
667 """Returns the candidate specs which are not yet matched against
668 a number of builders which fulfils the quantity for the spec.
669 """
670 def is_unsatisfied(spec_matches):
671 spec = spec_matches.spec
672 n_match = len(spec_matches.builders)
673 if spec.required and n_match == 0:
674 return True
675 if isinstance(spec.quantity, int) and n_match < spec.quantity: 675 ↛ 676line 675 didn't jump to line 676, because the condition on line 675 was never true
676 return True
677 return False
679 return list(filter(is_unsatisfied, candidates))