Coverage for nlp_manager/all_processors.py: 76%

95 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/nlp_manager/all_processors.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Factory functions to manage all NLP processor classes.** 

27 

28""" 

29 

30# ============================================================================= 

31# Imports 

32# ============================================================================= 

33 

34from inspect import isabstract 

35from typing import Any, List, Optional, Set, Type 

36 

37from cardinal_pythonlib.json_utils.typing_helpers import ( 

38 JsonArrayType, 

39 JsonObjectType, 

40) 

41 

42from crate_anon.common.stringfunc import ( 

43 get_docstring, 

44 make_twocol_table, 

45 trim_docstring, 

46) 

47from crate_anon.nlp_manager.base_nlp_parser import BaseNlpParser, TableMaker 

48 

49# Mostly, here we are not concerned with importing a specific symbol, but the 

50# side effect on class inheritance (registering classes). So we can import an 

51# arbitrary class or constant: 

52from crate_anon.nlp_manager.nlp_definition import NlpDefinition 

53from crate_anon.nlp_manager.parse_gate import Gate 

54from crate_anon.nlp_manager.parse_medex import Medex 

55from crate_anon.nlp_manager.parse_biochemistry import ( 

56 ALL_BIOCHEMISTRY_NLP_AND_VALIDATORS, 

57) 

58from crate_anon.nlp_manager.parse_clinical import ( 

59 ALL_CLINICAL_NLP_AND_VALIDATORS, 

60) 

61from crate_anon.nlp_manager.parse_cognitive import ( 

62 ALL_COGNITIVE_NLP_AND_VALIDATORS, 

63) 

64from crate_anon.nlp_manager.parse_haematology import ( 

65 ALL_HAEMATOLOGY_NLP_AND_VALIDATORS, 

66) 

67from crate_anon.nlp_manager.parse_substance_misuse import ( 

68 ALL_SUBSTANCE_MISUSE_NLP_AND_VALIDATORS, 

69) 

70from crate_anon.nlp_webserver.server_processor import ServerProcessor 

71 

72ClassType = Type[object] 

73 

74 

75# noinspection PyUnusedLocal 

76def ignore(something: Any) -> None: 

77 pass 

78 

79 

80# To make warnings go away about imports being unused: 

81ignore(Gate) 

82ignore(Medex) 

83ignore(ALL_BIOCHEMISTRY_NLP_AND_VALIDATORS) 

84ignore(ALL_CLINICAL_NLP_AND_VALIDATORS) 

85ignore(ALL_COGNITIVE_NLP_AND_VALIDATORS) 

86ignore(ALL_HAEMATOLOGY_NLP_AND_VALIDATORS) 

87ignore(ALL_SUBSTANCE_MISUSE_NLP_AND_VALIDATORS) 

88 

89 

90# ============================================================================= 

91# Factory functions 

92# ============================================================================= 

93 

94 

95def get_all_subclasses(cls: ClassType) -> List[ClassType]: 

96 """ 

97 Returns all non-abstract subclasses of ``cls``. 

98 

99 Args: 

100 cls: class into which to recurse 

101 

102 Returns: 

103 list of classes 

104 """ 

105 # Type hinting, but not quite: 

106 # https://stackoverflow.com/questions/35655257 

107 # Getting derived subclasses: https://stackoverflow.com/questions/3862310 

108 all_subclasses = [] # List[ClassType] 

109 # noinspection PyArgumentList 

110 for subclass in cls.__subclasses__(): 

111 if not isabstract(subclass): 

112 all_subclasses.append(subclass) 

113 all_subclasses.extend(get_all_subclasses(subclass)) # recursive 

114 all_subclasses.sort(key=lambda c: c.__name__.lower()) 

115 return all_subclasses 

116 

117 

118def all_local_parser_classes() -> List[Type[BaseNlpParser]]: 

119 """ 

120 Return all classes that are non-abstract subclasses of 

121 :class:`crate_anon.nlp_manager.base_nlp_parser.BaseNlpParser`. 

122 

123 ... but not test parsers. 

124 

125 Checks that they all have unique names in lower case. 

126 """ 

127 # noinspection PyTypeChecker 

128 classes = get_all_subclasses( 

129 BaseNlpParser 

130 ) # type: List[Type[BaseNlpParser]] 

131 classes = [cls for cls in classes if not cls.is_test_nlp_parser] 

132 

133 lower_case_short_names = set() # type: Set[str] 

134 lower_case_full_names = set() # type: Set[str] 

135 for cls in classes: 

136 lc_sname = cls.classname().lower() 

137 if lc_sname in lower_case_short_names: 

138 raise ValueError( 

139 f"Trying to add NLP processor {lc_sname!r} but a processor " 

140 f"with the same lower-case name already exists" 

141 ) 

142 lower_case_short_names.add(lc_sname) 

143 

144 lc_fname = cls.fully_qualified_classname().lower() 

145 if lc_fname in lower_case_full_names: 

146 raise ValueError( 

147 f"Trying to add NLP processor {lc_fname!r} but a processor " 

148 f"with the same lower-case fully-qualified name already exists" 

149 ) 

150 lower_case_full_names.add(lc_fname) 

151 return classes 

152 

153 

154def all_tablemaker_classes() -> List[Type[TableMaker]]: 

155 """ 

156 Return all classes that are non-abstract subclasses of 

157 :class:`crate_anon.nlp_manager.base_nlp_parser.TableMaker`. 

158 """ 

159 # noinspection PyTypeChecker 

160 return get_all_subclasses(TableMaker) 

161 

162 

163def get_nlp_parser_class(classname: str) -> Optional[Type[TableMaker]]: 

164 """ 

165 Fetch an NLP parser class (not instance) by name. The match may be on 

166 either the class's short name or the fully-qualified name, and is 

167 case-insensitive. 

168 

169 Args: 

170 classname: the name of the NLP parser class 

171 

172 Returns: 

173 the class, or ``None`` if there isn't one with that name 

174 

175 """ 

176 classname = classname.lower() 

177 classes = all_tablemaker_classes() 

178 for cls in classes: 

179 if ( 

180 cls.classname().lower() == classname 

181 or cls.fully_qualified_classname().lower() == classname 

182 ): 

183 return cls 

184 return None 

185 

186 

187def make_nlp_parser( 

188 classname: str, nlpdef: NlpDefinition, cfg_processor_name: str 

189) -> TableMaker: 

190 """ 

191 Fetch an NLP processor instance by name. 

192 

193 Args: 

194 classname: 

195 the name of the processor 

196 nlpdef: 

197 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition` 

198 cfg_processor_name: 

199 the name (suffix) of a CRATE NLP config file section, passed to the 

200 NLP parser as we create it (for it to get extra config information 

201 if it wishes) 

202 

203 Returns: 

204 an NLP processor instance whose class name matches (in case-insensitive 

205 fashion) ``classname``. 

206 

207 Raises: 

208 :exc:`ValueError` if no such processor is found 

209 

210 """ 

211 cls = get_nlp_parser_class(classname) 

212 if cls: 

213 return cls(nlpdef=nlpdef, cfg_processor_name=cfg_processor_name) 

214 raise ValueError(f"Unknown NLP processor type: {classname!r}") 

215 

216 

217def possible_local_processor_names() -> List[str]: 

218 """ 

219 Returns all NLP processor names that can run locally. 

220 """ 

221 return [cls.classname() for cls in all_local_parser_classes()] 

222 

223 

224def all_nlp_processor_classes() -> List[Type[TableMaker]]: 

225 """ 

226 Returns all NLP processor classes. 

227 """ 

228 return all_tablemaker_classes() 

229 

230 

231def possible_processor_names_including_cloud() -> List[str]: 

232 """ 

233 Returns all NLP processor names. 

234 """ 

235 return [cls.classname() for cls in all_nlp_processor_classes()] 

236 

237 

238def all_local_processor_classes_without_external_tools() -> ( 

239 List[Type[BaseNlpParser]] 

240): 

241 """ 

242 Returns all NLP processor classes that don't rely on external tools. 

243 """ 

244 return [ 

245 cls for cls in all_local_parser_classes() if not cls.uses_external_tool 

246 ] 

247 

248 

249def possible_local_processor_names_without_external_tools() -> List[str]: 

250 """ 

251 Returns all NLP processor names for processors that don't rely on external 

252 tools. 

253 """ 

254 return [ 

255 cls.classname() 

256 for cls in all_local_processor_classes_without_external_tools() 

257 ] 

258 

259 

260def possible_processor_table() -> str: 

261 """ 

262 Returns a pretty-formatted string containing a table of all NLP processors 

263 and their description (from their docstring). 

264 """ 

265 colnames = ["NLP name", "Description"] 

266 rows = [] # type: List[List[str]] 

267 for cls in all_tablemaker_classes(): 

268 name = cls.classname() 

269 description = get_docstring(cls) 

270 rows.append([name, trim_docstring(description)]) 

271 return make_twocol_table(colnames, rows, rewrap_right_col=False) 

272 

273 

274def all_crate_python_processors_nlprp_processor_info( 

275 sql_dialect: str = None, extra_dict: JsonObjectType = None 

276) -> JsonArrayType: 

277 """ 

278 Returns NLPRP processor information for all CRATE Python NLP processors. 

279 

280 Args: 

281 sql_dialect: 

282 preferred SQL dialect for response, or ``None`` for a default 

283 extra_dict: 

284 extra dictionary to merge in for each processor 

285 

286 Returns: 

287 list: list of processor information dictionaries 

288 """ 

289 allprocs = [] # type: JsonArrayType 

290 for cls in all_local_processor_classes_without_external_tools(): 

291 instance = cls(None, None) 

292 proc_info = instance.nlprp_processor_info(sql_dialect=sql_dialect) 

293 if extra_dict: 

294 proc_info.update(extra_dict) 

295 allprocs.append(proc_info) 

296 return allprocs 

297 

298 

299def register_all_crate_python_processors_with_serverprocessor( 

300 set_parser: bool = True, 

301) -> None: 

302 """ 

303 Somewhat ugly. Register all CRATE Python NLP processors with the 

304 ServerProcessor class. 

305 

306 See also crate_anon/nlp_webserver/procs.py, for a similar thing from JSON. 

307 

308 Args: 

309 set_parser: 

310 Set up a "free-floating" parser too? 

311 """ 

312 for cls in all_local_processor_classes_without_external_tools(): 

313 instance = cls(None, None) 

314 _proc = instance.nlprp_processor_info() 

315 _x = ServerProcessor.from_nlprp_json_dict(_proc) 

316 # ... registers with the ServerProcessor class 

317 # Doing this here saves time per request 

318 if set_parser: 

319 _x.set_parser()