Coverage for src/hdmf/spec/namespace.py: 92%
337 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-10-04 02:57 +0000
1import os.path
2import ruamel.yaml as yaml
3import string
4from abc import ABCMeta, abstractmethod
5from collections import OrderedDict
6from copy import copy
7from datetime import datetime
8from warnings import warn
10from .catalog import SpecCatalog
11from .spec import DatasetSpec, GroupSpec
12from ..utils import docval, getargs, popargs, get_docval
14_namespace_args = [
15 {'name': 'doc', 'type': str, 'doc': 'a description about what this namespace represents'},
16 {'name': 'name', 'type': str, 'doc': 'the name of this namespace'},
17 {'name': 'schema', 'type': list, 'doc': 'location of schema specification files or other Namespaces'},
18 {'name': 'full_name', 'type': str, 'doc': 'extended full name of this namespace', 'default': None},
19 {'name': 'version', 'type': (str, tuple, list), 'doc': 'Version number of the namespace', 'default': None},
20 {'name': 'date', 'type': (datetime, str),
21 'doc': "Date last modified or released. Formatting is %Y-%m-%d %H:%M:%S, e.g, 2017-04-25 17:14:13",
22 'default': None},
23 {'name': 'author', 'type': (str, list), 'doc': 'Author or list of authors.', 'default': None},
24 {'name': 'contact', 'type': (str, list),
25 'doc': 'List of emails. Ordering should be the same as for author', 'default': None},
26 {'name': 'catalog', 'type': SpecCatalog, 'doc': 'The SpecCatalog object for this SpecNamespace', 'default': None}
27]
30class SpecNamespace(dict):
31 """
32 A namespace for specifications
33 """
35 __types_key = 'data_types'
37 UNVERSIONED = None # value representing missing version
39 @docval(*_namespace_args)
40 def __init__(self, **kwargs):
41 doc, full_name, name, version, date, author, contact, schema, catalog = \
42 popargs('doc', 'full_name', 'name', 'version', 'date', 'author', 'contact', 'schema', 'catalog', kwargs)
43 super().__init__()
44 self['doc'] = doc
45 self['schema'] = schema
46 if any(c in string.whitespace for c in name): 46 ↛ 47line 46 didn't jump to line 47, because the condition on line 46 was never true
47 raise ValueError("'name' must not contain any whitespace")
48 self['name'] = name
49 if full_name is not None:
50 self['full_name'] = full_name
51 if version == str(SpecNamespace.UNVERSIONED):
52 # the unversioned version may be written to file as a string and read from file as a string
53 warn("Loaded namespace '%s' is unversioned. Please notify the extension author." % name)
54 version = SpecNamespace.UNVERSIONED
55 if version is None:
56 # version is required on write -- see YAMLSpecWriter.write_namespace -- but can be None on read in order to
57 # be able to read older files with extensions that are missing the version key.
58 warn(("Loaded namespace '%s' is missing the required key 'version'. Version will be set to '%s'. "
59 "Please notify the extension author.") % (name, SpecNamespace.UNVERSIONED))
60 version = SpecNamespace.UNVERSIONED
61 self['version'] = version
62 if date is not None:
63 self['date'] = date
64 if author is not None:
65 self['author'] = author
66 if contact is not None:
67 self['contact'] = contact
68 self.__catalog = catalog if catalog is not None else SpecCatalog()
70 @classmethod
71 def types_key(cls):
72 ''' Get the key used for specifying types to include from a file or namespace
74 Override this method to use a different name for 'data_types'
75 '''
76 return cls.__types_key
78 @property
79 def full_name(self):
80 """String with full name or None"""
81 return self.get('full_name', None)
83 @property
84 def contact(self):
85 """String or list of strings with the contacts or None"""
86 return self.get('contact', None)
88 @property
89 def author(self):
90 """String or list of strings with the authors or None"""
91 return self.get('author', None)
93 @property
94 def version(self):
95 """
96 String, list, or tuple with the version or SpecNamespace.UNVERSIONED
97 if the version is missing or empty
98 """
99 return self.get('version', None) or SpecNamespace.UNVERSIONED
101 @property
102 def date(self):
103 """Date last modified or released.
105 :return: datetime object, string, or None"""
106 return self.get('date', None)
108 @property
109 def name(self):
110 """String with short name or None"""
111 return self.get('name', None)
113 @property
114 def doc(self):
115 return self['doc']
117 @property
118 def schema(self):
119 return self['schema']
121 def get_source_files(self):
122 """
123 Get the list of names of the source files included the schema of the namespace
124 """
125 return [item['source'] for item in self.schema if 'source' in item]
127 @docval({'name': 'sourcefile', 'type': str, 'doc': 'Name of the source file'},
128 returns='Dict with the source file documentation', rtype=dict)
129 def get_source_description(self, sourcefile):
130 """
131 Get the description of a source file as described in the namespace. The result is a
132 dict which contains the 'source' and optionally 'title', 'doc' and 'data_types'
133 imported from the source file
134 """
135 for item in self.schema: 135 ↛ exitline 135 didn't return from function 'get_source_description', because the loop on line 135 didn't complete
136 if item.get('source', None) == sourcefile: 136 ↛ 135line 136 didn't jump to line 135, because the condition on line 136 was never false
137 return item
139 @property
140 def catalog(self):
141 """The SpecCatalog containing all the Specs"""
142 return self.__catalog
144 @docval({'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the spec for'})
145 def get_spec(self, **kwargs):
146 """Get the Spec object for the given data type"""
147 data_type = getargs('data_type', kwargs)
148 spec = self.__catalog.get_spec(data_type)
149 if spec is None:
150 raise ValueError("No specification for '%s' in namespace '%s'" % (data_type, self.name))
151 return spec
153 @docval(returns="the a tuple of the available data types", rtype=tuple)
154 def get_registered_types(self, **kwargs):
155 """Get the available types in this namespace"""
156 return self.__catalog.get_registered_types()
158 @docval({'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the hierarchy of'},
159 returns="a tuple with the type hierarchy", rtype=tuple)
160 def get_hierarchy(self, **kwargs):
161 ''' Get the extension hierarchy for the given data_type in this namespace'''
162 data_type = getargs('data_type', kwargs)
163 return self.__catalog.get_hierarchy(data_type)
165 @classmethod
166 def build_namespace(cls, **spec_dict):
167 kwargs = copy(spec_dict)
168 try:
169 args = [kwargs.pop(x['name']) for x in get_docval(cls.__init__) if 'default' not in x]
170 except KeyError as e:
171 raise KeyError("'%s' not found in %s" % (e.args[0], str(spec_dict)))
172 return cls(*args, **kwargs)
175class SpecReader(metaclass=ABCMeta):
177 @docval({'name': 'source', 'type': str, 'doc': 'the source from which this reader reads from'})
178 def __init__(self, **kwargs):
179 self.__source = getargs('source', kwargs)
181 @property
182 def source(self):
183 return self.__source
185 @abstractmethod
186 def read_spec(self):
187 pass
189 @abstractmethod
190 def read_namespace(self):
191 pass
194class YAMLSpecReader(SpecReader):
196 @docval({'name': 'indir', 'type': str, 'doc': 'the path spec files are relative to', 'default': '.'})
197 def __init__(self, **kwargs):
198 super().__init__(source=kwargs['indir'])
200 def read_namespace(self, namespace_path):
201 namespaces = None
202 with open(namespace_path, 'r') as stream:
203 yaml_obj = yaml.YAML(typ='safe', pure=True)
204 d = yaml_obj.load(stream)
205 namespaces = d.get('namespaces')
206 if namespaces is None: 206 ↛ 207line 206 didn't jump to line 207, because the condition on line 206 was never true
207 raise ValueError("no 'namespaces' found in %s" % namespace_path)
208 return namespaces
210 def read_spec(self, spec_path):
211 specs = None
212 with open(self.__get_spec_path(spec_path), 'r') as stream:
213 yaml_obj = yaml.YAML(typ='safe', pure=True)
214 specs = yaml_obj.load(stream)
215 if not ('datasets' in specs or 'groups' in specs): 215 ↛ 216line 215 didn't jump to line 216, because the condition on line 215 was never true
216 raise ValueError("no 'groups' or 'datasets' found in %s" % spec_path)
217 return specs
219 def __get_spec_path(self, spec_path):
220 if os.path.isabs(spec_path): 220 ↛ 221line 220 didn't jump to line 221, because the condition on line 220 was never true
221 return spec_path
222 return os.path.join(self.source, spec_path)
225class NamespaceCatalog:
227 @docval({'name': 'group_spec_cls', 'type': type,
228 'doc': 'the class to use for group specifications', 'default': GroupSpec},
229 {'name': 'dataset_spec_cls', 'type': type,
230 'doc': 'the class to use for dataset specifications', 'default': DatasetSpec},
231 {'name': 'spec_namespace_cls', 'type': type,
232 'doc': 'the class to use for specification namespaces', 'default': SpecNamespace})
233 def __init__(self, **kwargs):
234 """Create a catalog for storing multiple Namespaces"""
235 self.__namespaces = OrderedDict()
236 self.__dataset_spec_cls = getargs('dataset_spec_cls', kwargs)
237 self.__group_spec_cls = getargs('group_spec_cls', kwargs)
238 self.__spec_namespace_cls = getargs('spec_namespace_cls', kwargs)
239 # keep track of all spec objects ever loaded, so we don't have
240 # multiple object instances of a spec
241 self.__loaded_specs = dict()
242 self.__included_specs = dict()
243 self.__included_sources = dict()
245 self._loaded_specs = self.__loaded_specs
247 def __copy__(self):
248 ret = NamespaceCatalog(self.__group_spec_cls,
249 self.__dataset_spec_cls,
250 self.__spec_namespace_cls)
251 ret.__namespaces = copy(self.__namespaces)
252 ret.__loaded_specs = copy(self.__loaded_specs)
253 ret.__included_specs = copy(self.__included_specs)
254 ret.__included_sources = copy(self.__included_sources)
255 return ret
257 def merge(self, ns_catalog):
258 for name, namespace in ns_catalog.__namespaces.items():
259 self.add_namespace(name, namespace)
261 @property
262 @docval(returns='a tuple of the available namespaces', rtype=tuple)
263 def namespaces(self):
264 """The namespaces in this NamespaceCatalog"""
265 return tuple(self.__namespaces.keys())
267 @property
268 def dataset_spec_cls(self):
269 """The DatasetSpec class used in this NamespaceCatalog"""
270 return self.__dataset_spec_cls
272 @property
273 def group_spec_cls(self):
274 """The GroupSpec class used in this NamespaceCatalog"""
275 return self.__group_spec_cls
277 @property
278 def spec_namespace_cls(self):
279 """The SpecNamespace class used in this NamespaceCatalog"""
280 return self.__spec_namespace_cls
282 @docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'},
283 {'name': 'namespace', 'type': SpecNamespace, 'doc': 'the SpecNamespace object'})
284 def add_namespace(self, **kwargs):
285 """Add a namespace to this catalog"""
286 name, namespace = getargs('name', 'namespace', kwargs)
287 if name in self.__namespaces: 287 ↛ 288line 287 didn't jump to line 288, because the condition on line 287 was never true
288 raise KeyError("namespace '%s' already exists" % name)
289 self.__namespaces[name] = namespace
290 for dt in namespace.catalog.get_registered_types():
291 source = namespace.catalog.get_spec_source_file(dt)
292 # do not add types that have already been loaded
293 # use dict with None values as ordered set because order of specs does matter
294 self.__loaded_specs.setdefault(source, dict()).update({dt: None})
296 @docval({'name': 'name', 'type': str, 'doc': 'the name of this namespace'},
297 returns="the SpecNamespace with the given name", rtype=SpecNamespace)
298 def get_namespace(self, **kwargs):
299 """Get the a SpecNamespace"""
300 name = getargs('name', kwargs)
301 ret = self.__namespaces.get(name)
302 if ret is None: 302 ↛ 303line 302 didn't jump to line 303, because the condition on line 302 was never true
303 raise KeyError("'%s' not a namespace" % name)
304 return ret
306 @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'},
307 {'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the spec for'},
308 returns="the specification for writing the given object type to HDF5 ", rtype='Spec')
309 def get_spec(self, **kwargs):
310 '''
311 Get the Spec object for the given type from the given Namespace
312 '''
313 namespace, data_type = getargs('namespace', 'data_type', kwargs)
314 if namespace not in self.__namespaces: 314 ↛ 315line 314 didn't jump to line 315, because the condition on line 314 was never true
315 raise KeyError("'%s' not a namespace" % namespace)
316 return self.__namespaces[namespace].get_spec(data_type)
318 @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'},
319 {'name': 'data_type', 'type': (str, type), 'doc': 'the data_type to get the spec for'},
320 returns="a tuple with the type hierarchy", rtype=tuple)
321 def get_hierarchy(self, **kwargs):
322 '''
323 Get the type hierarchy for a given data_type in a given namespace
324 '''
325 namespace, data_type = getargs('namespace', 'data_type', kwargs)
326 spec_ns = self.__namespaces.get(namespace)
327 if spec_ns is None: 327 ↛ 328line 327 didn't jump to line 328, because the condition on line 327 was never true
328 raise KeyError("'%s' not a namespace" % namespace)
329 return spec_ns.get_hierarchy(data_type)
331 @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace containing the data_type'},
332 {'name': 'data_type', 'type': str, 'doc': 'the data_type to check'},
333 {'name': 'parent_data_type', 'type': str, 'doc': 'the potential parent data_type'},
334 returns="True if *data_type* is a sub `data_type` of *parent_data_type*, False otherwise", rtype=bool)
335 def is_sub_data_type(self, **kwargs):
336 '''
337 Return whether or not *data_type* is a sub `data_type` of *parent_data_type*
338 '''
339 ns, dt, parent_dt = getargs('namespace', 'data_type', 'parent_data_type', kwargs)
340 hier = self.get_hierarchy(ns, dt)
341 return parent_dt in hier
343 @docval(rtype=tuple)
344 def get_sources(self, **kwargs):
345 '''
346 Get all the source specification files that were loaded in this catalog
347 '''
348 return tuple(self.__loaded_specs.keys())
350 @docval({'name': 'namespace', 'type': str, 'doc': 'the name of the namespace'},
351 rtype=tuple)
352 def get_namespace_sources(self, **kwargs):
353 '''
354 Get all the source specifications that were loaded for a given namespace
355 '''
356 namespace = getargs('namespace', kwargs)
357 return tuple(self.__included_sources[namespace])
359 @docval({'name': 'source', 'type': str, 'doc': 'the name of the source'},
360 rtype=tuple)
361 def get_types(self, **kwargs):
362 '''
363 Get the types that were loaded from a given source
364 '''
365 source = getargs('source', kwargs)
366 ret = self.__loaded_specs.get(source)
367 if ret is not None: 367 ↛ 370line 367 didn't jump to line 370, because the condition on line 367 was never false
368 ret = tuple(ret)
369 else:
370 ret = tuple()
371 return ret
373 def __load_spec_file(self, reader, spec_source, catalog, types_to_load=None, resolve=True):
374 ret = self.__loaded_specs.get(spec_source)
375 if ret is not None: 375 ↛ 376line 375 didn't jump to line 376, because the condition on line 375 was never true
376 raise ValueError("spec source '%s' already loaded" % spec_source)
378 def __reg_spec(spec_cls, spec_dict):
379 dt_def = spec_dict.get(spec_cls.def_key())
380 if dt_def is None: 380 ↛ 381line 380 didn't jump to line 381, because the condition on line 380 was never true
381 msg = 'No data type def key found in spec %s' % spec_source
382 raise ValueError(msg)
383 if types_to_load and dt_def not in types_to_load: 383 ↛ 384line 383 didn't jump to line 384, because the condition on line 383 was never true
384 return
385 if resolve:
386 self.__resolve_includes(spec_cls, spec_dict, catalog)
387 spec_obj = spec_cls.build_spec(spec_dict)
388 return catalog.auto_register(spec_obj, spec_source)
390 if ret is None: 390 ↛ 404line 390 didn't jump to line 404, because the condition on line 390 was never false
391 ret = dict() # this is used as an ordered set -- values are all none
392 d = reader.read_spec(spec_source)
393 specs = d.get('datasets', list())
394 for spec_dict in specs:
395 self.__convert_spec_cls_keys(GroupSpec, self.__group_spec_cls, spec_dict)
396 temp_dict = {k: None for k in __reg_spec(self.__dataset_spec_cls, spec_dict)}
397 ret.update(temp_dict)
398 specs = d.get('groups', list())
399 for spec_dict in specs:
400 self.__convert_spec_cls_keys(GroupSpec, self.__group_spec_cls, spec_dict)
401 temp_dict = {k: None for k in __reg_spec(self.__group_spec_cls, spec_dict)}
402 ret.update(temp_dict)
403 self.__loaded_specs[spec_source] = ret
404 return ret
406 def __convert_spec_cls_keys(self, parent_cls, spec_cls, spec_dict):
407 """Replace instances of data_type_def/inc in spec_dict with new values from spec_cls."""
408 # this is necessary because the def_key and inc_key may be different in each namespace
409 # NOTE: this does not handle more than one custom set of keys
410 if parent_cls.def_key() in spec_dict:
411 spec_dict[spec_cls.def_key()] = spec_dict.pop(parent_cls.def_key())
412 if parent_cls.inc_key() in spec_dict:
413 spec_dict[spec_cls.inc_key()] = spec_dict.pop(parent_cls.inc_key())
415 def __resolve_includes(self, spec_cls, spec_dict, catalog):
416 """Replace data type inc strings with the spec definition so the new spec is built with included fields.
417 """
418 dt_def = spec_dict.get(spec_cls.def_key())
419 dt_inc = spec_dict.get(spec_cls.inc_key())
420 if dt_inc is not None and dt_def is not None:
421 parent_spec = catalog.get_spec(dt_inc)
422 if parent_spec is None: 422 ↛ 423line 422 didn't jump to line 423, because the condition on line 422 was never true
423 msg = "Cannot resolve include spec '%s' for type '%s'" % (dt_inc, dt_def)
424 raise ValueError(msg)
425 # replace the inc key value from string to the inc spec so that the spec can be updated with all of the
426 # attributes, datasets, groups, and links of the inc spec when spec_cls.build_spec(spec_dict) is called
427 spec_dict[spec_cls.inc_key()] = parent_spec
428 for subspec_dict in spec_dict.get('groups', list()):
429 self.__resolve_includes(self.__group_spec_cls, subspec_dict, catalog)
430 for subspec_dict in spec_dict.get('datasets', list()):
431 self.__resolve_includes(self.__dataset_spec_cls, subspec_dict, catalog)
433 def __load_namespace(self, namespace, reader, resolve=True):
434 ns_name = namespace['name']
435 if ns_name in self.__namespaces: # pragma: no cover
436 raise KeyError("namespace '%s' already exists" % ns_name)
437 catalog = SpecCatalog()
438 included_types = dict()
439 for s in namespace['schema']:
440 # types_key may be different in each spec namespace, so check both the __spec_namespace_cls types key
441 # and the parent SpecNamespace types key. NOTE: this does not handle more than one custom types key
442 types_to_load = s.get(self.__spec_namespace_cls.types_key(), s.get(SpecNamespace.types_key()))
443 if types_to_load is not None: # schema specifies specific types from 'source' or 'namespace'
444 types_to_load = set(types_to_load)
445 if 'source' in s:
446 # read specs from file
447 self.__load_spec_file(reader, s['source'], catalog, types_to_load=types_to_load, resolve=resolve)
448 self.__included_sources.setdefault(ns_name, list()).append(s['source'])
449 elif 'namespace' in s: 449 ↛ 462line 449 didn't jump to line 462, because the condition on line 449 was never false
450 # load specs from namespace
451 try:
452 inc_ns = self.get_namespace(s['namespace'])
453 except KeyError as e:
454 raise ValueError("Could not load namespace '%s'" % s['namespace']) from e
455 if types_to_load is None:
456 types_to_load = inc_ns.get_registered_types() # load all types in namespace
457 registered_types = set()
458 for ndt in types_to_load:
459 self.__register_type(ndt, inc_ns, catalog, registered_types)
460 included_types[s['namespace']] = tuple(sorted(registered_types))
461 else:
462 raise ValueError("Spec '%s' schema must have either 'source' or 'namespace' key" % ns_name)
463 # construct namespace
464 ns = self.__spec_namespace_cls.build_namespace(catalog=catalog, **namespace)
465 self.__namespaces[ns_name] = ns
466 return included_types
468 def __register_type(self, ndt, inc_ns, catalog, registered_types):
469 spec = inc_ns.get_spec(ndt)
470 spec_file = inc_ns.catalog.get_spec_source_file(ndt)
471 self.__register_dependent_types(spec, inc_ns, catalog, registered_types)
472 if isinstance(spec, DatasetSpec):
473 built_spec = self.dataset_spec_cls.build_spec(spec)
474 else:
475 built_spec = self.group_spec_cls.build_spec(spec)
476 registered_types.add(ndt)
477 catalog.register_spec(built_spec, spec_file)
479 def __register_dependent_types(self, spec, inc_ns, catalog, registered_types):
480 """Ensure that classes for all types used by this type are registered
481 """
482 # TODO test cross-namespace registration...
483 def __register_dependent_types_helper(spec, inc_ns, catalog, registered_types):
484 if isinstance(spec, (GroupSpec, DatasetSpec)):
485 if spec.data_type_inc is not None:
486 # TODO handle recursive definitions
487 self.__register_type(spec.data_type_inc, inc_ns, catalog, registered_types)
488 if spec.data_type_def is not None: # nested type definition 488 ↛ 489line 488 didn't jump to line 489, because the condition on line 488 was never true
489 self.__register_type(spec.data_type_def, inc_ns, catalog, registered_types)
490 else: # spec is a LinkSpec
491 self.__register_type(spec.target_type, inc_ns, catalog, registered_types)
492 if isinstance(spec, GroupSpec):
493 for child_spec in (spec.groups + spec.datasets + spec.links):
494 __register_dependent_types_helper(child_spec, inc_ns, catalog, registered_types)
496 if spec.data_type_inc is not None:
497 self.__register_type(spec.data_type_inc, inc_ns, catalog, registered_types)
498 if isinstance(spec, GroupSpec):
499 for child_spec in (spec.groups + spec.datasets + spec.links):
500 __register_dependent_types_helper(child_spec, inc_ns, catalog, registered_types)
502 @docval({'name': 'namespace_path', 'type': str, 'doc': 'the path to the file containing the namespaces(s) to load'},
503 {'name': 'resolve',
504 'type': bool,
505 'doc': 'whether or not to include objects from included/parent spec objects', 'default': True},
506 {'name': 'reader',
507 'type': SpecReader,
508 'doc': 'the class to user for reading specifications', 'default': None},
509 returns='a dictionary describing the dependencies of loaded namespaces', rtype=dict)
510 def load_namespaces(self, **kwargs):
511 """Load the namespaces in the given file"""
512 namespace_path, resolve, reader = getargs('namespace_path', 'resolve', 'reader', kwargs)
513 if reader is None:
514 # load namespace definition from file
515 if not os.path.exists(namespace_path):
516 msg = "namespace file '%s' not found" % namespace_path
517 raise IOError(msg)
518 reader = YAMLSpecReader(indir=os.path.dirname(namespace_path))
519 ns_path_key = os.path.join(reader.source, os.path.basename(namespace_path))
520 ret = self.__included_specs.get(ns_path_key)
521 if ret is None:
522 ret = dict()
523 else:
524 return ret
525 namespaces = reader.read_namespace(namespace_path)
526 to_load = list()
527 for ns in namespaces:
528 if ns['name'] in self.__namespaces:
529 if ns['version'] != self.__namespaces.get(ns['name'])['version']:
530 # warn if the cached namespace differs from the already loaded namespace
531 warn("Ignoring cached namespace '%s' version %s because version %s is already loaded."
532 % (ns['name'], ns['version'], self.__namespaces.get(ns['name'])['version']))
533 else:
534 to_load.append(ns)
535 # now load specs into namespace
536 for ns in to_load:
537 ret[ns['name']] = self.__load_namespace(ns, reader, resolve=resolve)
538 self.__included_specs[ns_path_key] = ret
539 return ret