Coverage for src/hdmf/term_set.py: 94%
92 statements
« prev ^ index » next coverage.py v7.2.5, created at 2023-08-18 20:49 +0000
« prev ^ index » next coverage.py v7.2.5, created at 2023-08-18 20:49 +0000
1import glob
2import os
3from collections import namedtuple
4from .utils import docval
5import warnings
8class TermSet:
9 """
10 Class for implementing term sets from ontologies and other resources used to define the
11 meaning and/or identify of terms.
13 :ivar term_schema_path: The path to the LinkML YAML enumeration schema
14 :ivar sources: The prefixes for the ontologies used in the TermSet
15 :ivar view: SchemaView of the term set schema
16 :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files
17 :ivar expanded_term_set_path: The path to the schema with the expanded enumerations
18 """
19 def __init__(self,
20 term_schema_path: str=None,
21 schemasheets_folder: str=None,
22 dynamic: bool=False
23 ):
24 """
25 :param term_schema_path: The path to the LinkML YAML enumeration schema
26 :param schemasheets_folder: The path to the folder containing the LinkML TSV files
27 :param dynamic: Boolean parameter denoting whether the schema uses Dynamic Enumerations
29 """
30 try:
31 from linkml_runtime.utils.schemaview import SchemaView
32 except ImportError:
33 msg = "Install linkml_runtime"
34 raise ValueError(msg)
36 self.term_schema_path = term_schema_path
37 self.schemasheets_folder = schemasheets_folder
39 if self.schemasheets_folder is not None:
40 if self.term_schema_path is not None:
41 msg = "Cannot have both a path to a Schemasheets folder and a TermSet schema."
42 raise ValueError(msg)
43 else:
44 self.term_schema_path = self.__schemasheets_convert()
45 self.view = SchemaView(self.term_schema_path)
46 else:
47 self.view = SchemaView(self.term_schema_path)
48 self.expanded_term_set_path = None
49 if dynamic:
50 # reset view to now include the dynamically populated term_set
51 self.expanded_term_set_path = self.__enum_expander()
52 self.view = SchemaView(self.expanded_term_set_path)
54 self.sources = self.view.schema.prefixes
56 def __repr__(self):
57 re = "class: %s\n" % str(self.__class__)
58 re += "term_schema_path: %s\n" % self.term_schema_path
59 return re
61 def __perm_value_key_info(self, perm_values_dict: dict, key: str):
62 """
63 Private method to retrieve the id, description, and the meaning.
64 """
65 prefix_dict = self.view.schema.prefixes
66 info_tuple = namedtuple("Term_Info", ["id", "description", "meaning"])
67 description = perm_values_dict[key]['description']
68 enum_meaning = perm_values_dict[key]['meaning']
70 # filter for prefixes
71 marker = ':'
72 prefix = enum_meaning.split(marker, 1)[0]
73 id = enum_meaning.split(marker, 1)[1]
74 prefix_obj = prefix_dict[prefix]
75 prefix_reference = prefix_obj['prefix_reference']
77 # combine prefix and prefix_reference to make full term uri
78 meaning = prefix_reference+id
80 return info_tuple(enum_meaning, description, meaning)
82 @docval({'name': 'term', 'type': str, 'doc': "term to be validated"})
83 def validate(self, **kwargs):
84 """
85 Validate term in dataset towards a termset.
86 """
87 term = kwargs['term']
88 try:
89 self[term]
90 return True
91 except ValueError:
92 return False
94 @property
95 def view_set(self):
96 """
97 Property method to return a view of all terms in the the LinkML YAML Schema.
98 """
99 enumeration = list(self.view.all_enums())[0]
101 perm_values_dict = self.view.all_enums()[enumeration].permissible_values
102 enum_dict = {}
103 for perm_value_key in perm_values_dict.keys():
104 enum_dict[perm_value_key] = self.__perm_value_key_info(perm_values_dict=perm_values_dict,
105 key=perm_value_key)
107 return enum_dict
109 def __getitem__(self, term):
110 """
111 Method to retrieve a term and term information (LinkML description and LinkML meaning) from the set of terms.
112 """
113 enumeration = list(self.view.all_enums())[0]
114 perm_values_dict = self.view.all_enums()[enumeration].permissible_values
116 try:
117 term_info = self.__perm_value_key_info(perm_values_dict=perm_values_dict, key=term)
118 return term_info
120 except KeyError:
121 msg = 'Term not in schema'
122 raise ValueError(msg)
124 def __schemasheets_convert(self):
125 """
126 Method that will generate a schema from a directory of TSV files using SchemaMaker.
128 This method returns a path to the new schema to be viewed via SchemaView.
129 """
130 try:
131 import yaml
132 from linkml_runtime.utils.schema_as_dict import schema_as_dict
133 from schemasheets.schemamaker import SchemaMaker
134 except ImportError: # pragma: no cover
135 msg = "Install schemasheets."
136 raise ValueError(msg)
137 schema_maker = SchemaMaker()
138 tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv")
139 schema = schema_maker.create_schema(tsv_file_paths)
140 schema_dict = schema_as_dict(schema)
141 schemasheet_schema_path = os.path.join(self.schemasheets_folder, f"{schema_dict['name']}.yaml")
143 with open(schemasheet_schema_path, "w") as f:
144 yaml.dump(schema_dict, f)
146 return schemasheet_schema_path
148 def __enum_expander(self):
149 """
150 Method that will generate a new schema with the enumerations from the LinkML source.
151 This new schema will be stored in the same directory as the original schema with
152 the Dynamic Enumerations.
154 This method returns a path to the new schema to be viewed via SchemaView.
155 """
156 try:
157 with warnings.catch_warnings():
158 warnings.filterwarnings("ignore", category=DeprecationWarning)
159 from oaklib.utilities.subsets.value_set_expander import ValueSetExpander
160 except ImportError: # pragma: no cover
161 msg = 'Install oaklib.'
162 raise ValueError(msg)
163 expander = ValueSetExpander()
164 # TODO: linkml should raise a warning if the schema does not have dynamic enums
165 enum = list(self.view.all_enums())
166 schema_dir = os.path.dirname(self.term_schema_path)
167 file_name = os.path.basename(self.term_schema_path)
168 output_path = os.path.join(schema_dir, f"expanded_{file_name}")
169 expander.expand_in_place(self.term_schema_path, enum, output_path)
171 return output_path