Coverage for nlp_manager/all_processors.py: 76%
95 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/all_processors.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Factory functions to manage all NLP processor classes.**
28"""
30# =============================================================================
31# Imports
32# =============================================================================
34from inspect import isabstract
35from typing import Any, List, Optional, Set, Type
37from cardinal_pythonlib.json_utils.typing_helpers import (
38 JsonArrayType,
39 JsonObjectType,
40)
42from crate_anon.common.stringfunc import (
43 get_docstring,
44 make_twocol_table,
45 trim_docstring,
46)
47from crate_anon.nlp_manager.base_nlp_parser import BaseNlpParser, TableMaker
49# Mostly, here we are not concerned with importing a specific symbol, but the
50# side effect on class inheritance (registering classes). So we can import an
51# arbitrary class or constant:
52from crate_anon.nlp_manager.nlp_definition import NlpDefinition
53from crate_anon.nlp_manager.parse_gate import Gate
54from crate_anon.nlp_manager.parse_medex import Medex
55from crate_anon.nlp_manager.parse_biochemistry import (
56 ALL_BIOCHEMISTRY_NLP_AND_VALIDATORS,
57)
58from crate_anon.nlp_manager.parse_clinical import (
59 ALL_CLINICAL_NLP_AND_VALIDATORS,
60)
61from crate_anon.nlp_manager.parse_cognitive import (
62 ALL_COGNITIVE_NLP_AND_VALIDATORS,
63)
64from crate_anon.nlp_manager.parse_haematology import (
65 ALL_HAEMATOLOGY_NLP_AND_VALIDATORS,
66)
67from crate_anon.nlp_manager.parse_substance_misuse import (
68 ALL_SUBSTANCE_MISUSE_NLP_AND_VALIDATORS,
69)
70from crate_anon.nlp_webserver.server_processor import ServerProcessor
72ClassType = Type[object]
75# noinspection PyUnusedLocal
76def ignore(something: Any) -> None:
77 pass
80# To make warnings go away about imports being unused:
81ignore(Gate)
82ignore(Medex)
83ignore(ALL_BIOCHEMISTRY_NLP_AND_VALIDATORS)
84ignore(ALL_CLINICAL_NLP_AND_VALIDATORS)
85ignore(ALL_COGNITIVE_NLP_AND_VALIDATORS)
86ignore(ALL_HAEMATOLOGY_NLP_AND_VALIDATORS)
87ignore(ALL_SUBSTANCE_MISUSE_NLP_AND_VALIDATORS)
90# =============================================================================
91# Factory functions
92# =============================================================================
95def get_all_subclasses(cls: ClassType) -> List[ClassType]:
96 """
97 Returns all non-abstract subclasses of ``cls``.
99 Args:
100 cls: class into which to recurse
102 Returns:
103 list of classes
104 """
105 # Type hinting, but not quite:
106 # https://stackoverflow.com/questions/35655257
107 # Getting derived subclasses: https://stackoverflow.com/questions/3862310
108 all_subclasses = [] # List[ClassType]
109 # noinspection PyArgumentList
110 for subclass in cls.__subclasses__():
111 if not isabstract(subclass):
112 all_subclasses.append(subclass)
113 all_subclasses.extend(get_all_subclasses(subclass)) # recursive
114 all_subclasses.sort(key=lambda c: c.__name__.lower())
115 return all_subclasses
118def all_local_parser_classes() -> List[Type[BaseNlpParser]]:
119 """
120 Return all classes that are non-abstract subclasses of
121 :class:`crate_anon.nlp_manager.base_nlp_parser.BaseNlpParser`.
123 ... but not test parsers.
125 Checks that they all have unique names in lower case.
126 """
127 # noinspection PyTypeChecker
128 classes = get_all_subclasses(
129 BaseNlpParser
130 ) # type: List[Type[BaseNlpParser]]
131 classes = [cls for cls in classes if not cls.is_test_nlp_parser]
133 lower_case_short_names = set() # type: Set[str]
134 lower_case_full_names = set() # type: Set[str]
135 for cls in classes:
136 lc_sname = cls.classname().lower()
137 if lc_sname in lower_case_short_names:
138 raise ValueError(
139 f"Trying to add NLP processor {lc_sname!r} but a processor "
140 f"with the same lower-case name already exists"
141 )
142 lower_case_short_names.add(lc_sname)
144 lc_fname = cls.fully_qualified_classname().lower()
145 if lc_fname in lower_case_full_names:
146 raise ValueError(
147 f"Trying to add NLP processor {lc_fname!r} but a processor "
148 f"with the same lower-case fully-qualified name already exists"
149 )
150 lower_case_full_names.add(lc_fname)
151 return classes
154def all_tablemaker_classes() -> List[Type[TableMaker]]:
155 """
156 Return all classes that are non-abstract subclasses of
157 :class:`crate_anon.nlp_manager.base_nlp_parser.TableMaker`.
158 """
159 # noinspection PyTypeChecker
160 return get_all_subclasses(TableMaker)
163def get_nlp_parser_class(classname: str) -> Optional[Type[TableMaker]]:
164 """
165 Fetch an NLP parser class (not instance) by name. The match may be on
166 either the class's short name or the fully-qualified name, and is
167 case-insensitive.
169 Args:
170 classname: the name of the NLP parser class
172 Returns:
173 the class, or ``None`` if there isn't one with that name
175 """
176 classname = classname.lower()
177 classes = all_tablemaker_classes()
178 for cls in classes:
179 if (
180 cls.classname().lower() == classname
181 or cls.fully_qualified_classname().lower() == classname
182 ):
183 return cls
184 return None
187def make_nlp_parser(
188 classname: str, nlpdef: NlpDefinition, cfg_processor_name: str
189) -> TableMaker:
190 """
191 Fetch an NLP processor instance by name.
193 Args:
194 classname:
195 the name of the processor
196 nlpdef:
197 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
198 cfg_processor_name:
199 the name (suffix) of a CRATE NLP config file section, passed to the
200 NLP parser as we create it (for it to get extra config information
201 if it wishes)
203 Returns:
204 an NLP processor instance whose class name matches (in case-insensitive
205 fashion) ``classname``.
207 Raises:
208 :exc:`ValueError` if no such processor is found
210 """
211 cls = get_nlp_parser_class(classname)
212 if cls:
213 return cls(nlpdef=nlpdef, cfg_processor_name=cfg_processor_name)
214 raise ValueError(f"Unknown NLP processor type: {classname!r}")
217def possible_local_processor_names() -> List[str]:
218 """
219 Returns all NLP processor names that can run locally.
220 """
221 return [cls.classname() for cls in all_local_parser_classes()]
224def all_nlp_processor_classes() -> List[Type[TableMaker]]:
225 """
226 Returns all NLP processor classes.
227 """
228 return all_tablemaker_classes()
231def possible_processor_names_including_cloud() -> List[str]:
232 """
233 Returns all NLP processor names.
234 """
235 return [cls.classname() for cls in all_nlp_processor_classes()]
238def all_local_processor_classes_without_external_tools() -> (
239 List[Type[BaseNlpParser]]
240):
241 """
242 Returns all NLP processor classes that don't rely on external tools.
243 """
244 return [
245 cls for cls in all_local_parser_classes() if not cls.uses_external_tool
246 ]
249def possible_local_processor_names_without_external_tools() -> List[str]:
250 """
251 Returns all NLP processor names for processors that don't rely on external
252 tools.
253 """
254 return [
255 cls.classname()
256 for cls in all_local_processor_classes_without_external_tools()
257 ]
260def possible_processor_table() -> str:
261 """
262 Returns a pretty-formatted string containing a table of all NLP processors
263 and their description (from their docstring).
264 """
265 colnames = ["NLP name", "Description"]
266 rows = [] # type: List[List[str]]
267 for cls in all_tablemaker_classes():
268 name = cls.classname()
269 description = get_docstring(cls)
270 rows.append([name, trim_docstring(description)])
271 return make_twocol_table(colnames, rows, rewrap_right_col=False)
274def all_crate_python_processors_nlprp_processor_info(
275 sql_dialect: str = None, extra_dict: JsonObjectType = None
276) -> JsonArrayType:
277 """
278 Returns NLPRP processor information for all CRATE Python NLP processors.
280 Args:
281 sql_dialect:
282 preferred SQL dialect for response, or ``None`` for a default
283 extra_dict:
284 extra dictionary to merge in for each processor
286 Returns:
287 list: list of processor information dictionaries
288 """
289 allprocs = [] # type: JsonArrayType
290 for cls in all_local_processor_classes_without_external_tools():
291 instance = cls(None, None)
292 proc_info = instance.nlprp_processor_info(sql_dialect=sql_dialect)
293 if extra_dict:
294 proc_info.update(extra_dict)
295 allprocs.append(proc_info)
296 return allprocs
299def register_all_crate_python_processors_with_serverprocessor(
300 set_parser: bool = True,
301) -> None:
302 """
303 Somewhat ugly. Register all CRATE Python NLP processors with the
304 ServerProcessor class.
306 See also crate_anon/nlp_webserver/procs.py, for a similar thing from JSON.
308 Args:
309 set_parser:
310 Set up a "free-floating" parser too?
311 """
312 for cls in all_local_processor_classes_without_external_tools():
313 instance = cls(None, None)
314 _proc = instance.nlprp_processor_info()
315 _x = ServerProcessor.from_nlprp_json_dict(_proc)
316 # ... registers with the ServerProcessor class
317 # Doing this here saves time per request
318 if set_parser:
319 _x.set_parser()