Coverage for src/hdmf/term_set.py: 94%

92 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-08-18 20:49 +0000

1import glob 

2import os 

3from collections import namedtuple 

4from .utils import docval 

5import warnings 

6 

7 

8class TermSet: 

9 """ 

10 Class for implementing term sets from ontologies and other resources used to define the 

11 meaning and/or identify of terms. 

12 

13 :ivar term_schema_path: The path to the LinkML YAML enumeration schema 

14 :ivar sources: The prefixes for the ontologies used in the TermSet 

15 :ivar view: SchemaView of the term set schema 

16 :ivar schemasheets_folder: The path to the folder containing the LinkML TSV files 

17 :ivar expanded_term_set_path: The path to the schema with the expanded enumerations 

18 """ 

19 def __init__(self, 

20 term_schema_path: str=None, 

21 schemasheets_folder: str=None, 

22 dynamic: bool=False 

23 ): 

24 """ 

25 :param term_schema_path: The path to the LinkML YAML enumeration schema 

26 :param schemasheets_folder: The path to the folder containing the LinkML TSV files 

27 :param dynamic: Boolean parameter denoting whether the schema uses Dynamic Enumerations 

28 

29 """ 

30 try: 

31 from linkml_runtime.utils.schemaview import SchemaView 

32 except ImportError: 

33 msg = "Install linkml_runtime" 

34 raise ValueError(msg) 

35 

36 self.term_schema_path = term_schema_path 

37 self.schemasheets_folder = schemasheets_folder 

38 

39 if self.schemasheets_folder is not None: 

40 if self.term_schema_path is not None: 

41 msg = "Cannot have both a path to a Schemasheets folder and a TermSet schema." 

42 raise ValueError(msg) 

43 else: 

44 self.term_schema_path = self.__schemasheets_convert() 

45 self.view = SchemaView(self.term_schema_path) 

46 else: 

47 self.view = SchemaView(self.term_schema_path) 

48 self.expanded_term_set_path = None 

49 if dynamic: 

50 # reset view to now include the dynamically populated term_set 

51 self.expanded_term_set_path = self.__enum_expander() 

52 self.view = SchemaView(self.expanded_term_set_path) 

53 

54 self.sources = self.view.schema.prefixes 

55 

56 def __repr__(self): 

57 re = "class: %s\n" % str(self.__class__) 

58 re += "term_schema_path: %s\n" % self.term_schema_path 

59 return re 

60 

61 def __perm_value_key_info(self, perm_values_dict: dict, key: str): 

62 """ 

63 Private method to retrieve the id, description, and the meaning. 

64 """ 

65 prefix_dict = self.view.schema.prefixes 

66 info_tuple = namedtuple("Term_Info", ["id", "description", "meaning"]) 

67 description = perm_values_dict[key]['description'] 

68 enum_meaning = perm_values_dict[key]['meaning'] 

69 

70 # filter for prefixes 

71 marker = ':' 

72 prefix = enum_meaning.split(marker, 1)[0] 

73 id = enum_meaning.split(marker, 1)[1] 

74 prefix_obj = prefix_dict[prefix] 

75 prefix_reference = prefix_obj['prefix_reference'] 

76 

77 # combine prefix and prefix_reference to make full term uri 

78 meaning = prefix_reference+id 

79 

80 return info_tuple(enum_meaning, description, meaning) 

81 

82 @docval({'name': 'term', 'type': str, 'doc': "term to be validated"}) 

83 def validate(self, **kwargs): 

84 """ 

85 Validate term in dataset towards a termset. 

86 """ 

87 term = kwargs['term'] 

88 try: 

89 self[term] 

90 return True 

91 except ValueError: 

92 return False 

93 

94 @property 

95 def view_set(self): 

96 """ 

97 Property method to return a view of all terms in the the LinkML YAML Schema. 

98 """ 

99 enumeration = list(self.view.all_enums())[0] 

100 

101 perm_values_dict = self.view.all_enums()[enumeration].permissible_values 

102 enum_dict = {} 

103 for perm_value_key in perm_values_dict.keys(): 

104 enum_dict[perm_value_key] = self.__perm_value_key_info(perm_values_dict=perm_values_dict, 

105 key=perm_value_key) 

106 

107 return enum_dict 

108 

109 def __getitem__(self, term): 

110 """ 

111 Method to retrieve a term and term information (LinkML description and LinkML meaning) from the set of terms. 

112 """ 

113 enumeration = list(self.view.all_enums())[0] 

114 perm_values_dict = self.view.all_enums()[enumeration].permissible_values 

115 

116 try: 

117 term_info = self.__perm_value_key_info(perm_values_dict=perm_values_dict, key=term) 

118 return term_info 

119 

120 except KeyError: 

121 msg = 'Term not in schema' 

122 raise ValueError(msg) 

123 

124 def __schemasheets_convert(self): 

125 """ 

126 Method that will generate a schema from a directory of TSV files using SchemaMaker. 

127 

128 This method returns a path to the new schema to be viewed via SchemaView. 

129 """ 

130 try: 

131 import yaml 

132 from linkml_runtime.utils.schema_as_dict import schema_as_dict 

133 from schemasheets.schemamaker import SchemaMaker 

134 except ImportError: # pragma: no cover 

135 msg = "Install schemasheets." 

136 raise ValueError(msg) 

137 schema_maker = SchemaMaker() 

138 tsv_file_paths = glob.glob(self.schemasheets_folder + "/*.tsv") 

139 schema = schema_maker.create_schema(tsv_file_paths) 

140 schema_dict = schema_as_dict(schema) 

141 schemasheet_schema_path = os.path.join(self.schemasheets_folder, f"{schema_dict['name']}.yaml") 

142 

143 with open(schemasheet_schema_path, "w") as f: 

144 yaml.dump(schema_dict, f) 

145 

146 return schemasheet_schema_path 

147 

148 def __enum_expander(self): 

149 """ 

150 Method that will generate a new schema with the enumerations from the LinkML source. 

151 This new schema will be stored in the same directory as the original schema with 

152 the Dynamic Enumerations. 

153 

154 This method returns a path to the new schema to be viewed via SchemaView. 

155 """ 

156 try: 

157 with warnings.catch_warnings(): 

158 warnings.filterwarnings("ignore", category=DeprecationWarning) 

159 from oaklib.utilities.subsets.value_set_expander import ValueSetExpander 

160 except ImportError: # pragma: no cover 

161 msg = 'Install oaklib.' 

162 raise ValueError(msg) 

163 expander = ValueSetExpander() 

164 # TODO: linkml should raise a warning if the schema does not have dynamic enums 

165 enum = list(self.view.all_enums()) 

166 schema_dir = os.path.dirname(self.term_schema_path) 

167 file_name = os.path.basename(self.term_schema_path) 

168 output_path = os.path.join(schema_dir, f"expanded_{file_name}") 

169 expander.expand_in_place(self.term_schema_path, enum, output_path) 

170 

171 return output_path