Coverage for nlp_manager/parse_cognitive.py: 100%
55 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/parse_cognitive.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Python regex-based NLP processors for cognitive tests.**
28All inherit from
29:class:`crate_anon.nlp_manager.regex_parser.NumeratorOutOfDenominatorParser`
30and are constructed with these arguments:
32nlpdef:
33 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
34cfgsection:
35 the name of a CRATE NLP config file section (from which we may
36 choose to get extra config information)
37commit:
38 force a COMMIT whenever we insert data? You should specify this
39 in multiprocess mode, or you may get database deadlocks.
41"""
43import logging
44from typing import List, Optional, Tuple
46from crate_anon.common.regex_helpers import WORD_BOUNDARY
47from crate_anon.nlp_manager.nlp_definition import NlpDefinition
48from crate_anon.nlp_manager.regex_numbers import IGNORESIGN_INTEGER
49from crate_anon.nlp_manager.regex_parser import (
50 APOSTROPHE,
51 NumeratorOutOfDenominatorParser,
52 ValidatorBase,
53)
54from crate_anon.nlp_manager.regex_units import OUT_OF_SEPARATOR
56log = logging.getLogger(__name__)
59# =============================================================================
60# Mini-mental state examination (MMSE)
61# =============================================================================
64class Mmse(NumeratorOutOfDenominatorParser):
65 """
66 COGNITIVE.
68 Mini-mental state examination (MMSE).
70 The default denominator is 30, but it supports other values if given
71 explicitly.
72 """
74 MMSE = rf"""
75 (?: {WORD_BOUNDARY}
76 (?: MMSE | mini[-\s]*mental (?: \s+ state)?
77 (?: \s+ exam(?:ination)? )? )
78 {WORD_BOUNDARY} )
79 """
80 NAME = "MMSE"
82 def __init__(
83 self,
84 nlpdef: Optional[NlpDefinition],
85 cfg_processor_name: Optional[str],
86 commit: bool = False,
87 ) -> None:
88 # see documentation above
89 super().__init__(
90 nlpdef=nlpdef,
91 cfg_processor_name=cfg_processor_name,
92 commit=commit,
93 variable_name=self.NAME,
94 variable_regex_str=self.MMSE,
95 expected_denominator=30,
96 take_absolute=True,
97 )
99 def test(self, verbose: bool = False) -> None:
100 # docstring in superclass
101 self.test_numerator_denominator_parser(
102 [
103 ("MMSE", []), # should fail; no values
104 ("MMSE 30/30", [(30, 30)]),
105 ("MMSE 25 / 30", [(25, 30)]),
106 ("MMSE 25 / 29", [(25, 29)]),
107 ("MMSE 25 / 31", [(25, 31)]),
108 ("mini-mental state exam 30", [(30, None)]),
109 ("minimental 25", [(25, None)]),
110 ("MMSE 30", [(30, None)]),
111 ("MMSE-27", [(27, None)]),
112 ("MMSE score was 30", [(30, None)]),
113 ("ACE 79", []),
114 ],
115 verbose=verbose,
116 )
119class MmseValidator(ValidatorBase):
120 """
121 Validator for Mmse (see help for explanation).
122 """
124 @classmethod
125 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
126 return Mmse.NAME, [Mmse.MMSE]
129# =============================================================================
130# Addenbrooke's Cognitive Examination (ACE, ACE-R, ACE-III)
131# =============================================================================
134class Ace(NumeratorOutOfDenominatorParser):
135 """
136 COGNITIVE.
138 Addenbrooke's Cognitive Examination (ACE, ACE-R, ACE-III) total score.
140 The default denominator is 100 but it supports other values if given
141 explicitly.
142 """
144 NAME = "ACE"
145 ACE = rf"""
146 (?: {WORD_BOUNDARY}
147 (?: ACE | (?: Addenbrooke{APOSTROPHE}?s \s+ cognitive \s+
148 (?: (?:evaluation) | exam(?:ination)? ) ) )
149 (?: \s* -? \s*
150 (?: R | III | 111
151 # or: 3 when not followed by an "out of X" expression
152 | (?: 3 (?! \s* {OUT_OF_SEPARATOR} \s* {IGNORESIGN_INTEGER}))
153 ) \b
154 )?+
155 {WORD_BOUNDARY} )
156 """ # noqa: E501
157 # ... note the possessive "?+" above; see tests below.
159 def __init__(
160 self,
161 nlpdef: Optional[NlpDefinition],
162 cfg_processor_name: Optional[str],
163 commit: bool = False,
164 ) -> None:
165 # see documentation above
166 super().__init__(
167 nlpdef=nlpdef,
168 cfg_processor_name=cfg_processor_name,
169 commit=commit,
170 variable_name=self.NAME,
171 variable_regex_str=self.ACE,
172 expected_denominator=100,
173 take_absolute=True,
174 )
176 def test(self, verbose: bool = False) -> None:
177 # docstring in superclass
178 self.test_numerator_denominator_parser(
179 [
180 ("MMSE", []),
181 ("MMSE 30/30", []),
182 ("MMSE 25 / 30", []),
183 ("mini-mental state exam 30", []),
184 ("minimental 25", []),
185 ("MMSE 30", []),
186 ("ACE 79", [(79, None)]),
187 ("ACE 79/100", [(79, 100)]),
188 ("ACE 79/95", [(79, 95)]),
189 ("ACE 79 / 100", [(79, 100)]),
190 ("Addenbrooke's cognitive examination 79", [(79, None)]),
191 ("Addenbrookes cognitive evaluation 79", [(79, None)]),
192 ("ACE-R 79", [(79, None)]),
193 ("ACE-R 79 out of 100", [(79, 100)]),
194 ("ACE-III 79", [(79, None)]),
195 ("ACE-III score was 79", [(79, None)]),
196 ("ACE R 79", [(79, None)]),
197 ("ACE III 79", [(79, None)]),
198 ("ACE-82", [(82, None)]),
199 (
200 "ACE 111 99",
201 [(99, None)],
202 ), # "ACE 111" (for III) from real data
203 # Note the difficulties created by the "ACE-3" representation
204 # of the task's name. We have to get these right:
205 ("ACE-3 79", [(79, None)]),
206 ("ACE 3 79", [(79, None)]),
207 ("ACE 3 79/100", [(79, 100)]),
208 ("ACE 3 3", [(3, None)]),
209 ("ACE 3 3/100", [(3, 100)]),
210 # ... but also a score of 3 (!) on the older ACE:
211 ("ACE 3/100", [(3, 100)]),
212 ("ACE 3 out of 100", [(3, 100)]),
213 # - This next one is ambiguous. Reference to new task? To old
214 # score? Making the "3" optional as part of the task name
215 # means that this will be accepted by the regex as a score.
216 # - We need a special exception to get "ACE 3" not to give a
217 # score.
218 # - We do this with a "possessive" quantifier on the "3" (or
219 # similar) part of the ACE descriptor.
220 # - http://www.rexegg.com/regex-quantifiers.html
221 # - Possessive quantifiers are in regex, not re:
222 # https://pypi.python.org/pypi/regex
223 # https://docs.python.org/3.5/library/re.html
224 # - Ah, no. That makes "ACE 3/100" fail.
225 # - But if we combine a possessive "3" with saying "3 unless
226 # it's "3 out of...", then we win.
227 ("ACE 3", []),
228 ("ACE 3/MOCA", []),
229 ("ACE 3 / MOCA", []),
230 ],
231 verbose=verbose,
232 )
235class AceValidator(ValidatorBase):
236 """
237 Validator for Ace (see help for explanation).
238 """
240 @classmethod
241 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
242 return Ace.NAME, [Ace.ACE]
244 def test(self, verbose: bool = False) -> None:
245 # docstring in superclass
246 self.test_validator(
247 [
248 ("pass me my mace, my boy", False),
249 ("he scored 10 on the ACE today", True),
250 ("he scored 10 on the ACE 3 today", True),
251 ("he scored 10 on the ACE3 today", True),
252 ("ACE 3/100", True),
253 ("ACE 3 3/100", True),
254 ("ACE3 4", True),
255 ("ACE 3", True),
256 ("ACE3", True),
257 ("ACE 3/MOCA", True),
258 ("ACE 3 / MOCA", True),
259 ],
260 verbose=verbose,
261 )
264# =============================================================================
265# Mini-Addenbrooke's Cognitive Examination (M-ACE)
266# =============================================================================
269class MiniAce(NumeratorOutOfDenominatorParser):
270 """
271 COGNITIVE.
273 Mini-Addenbrooke's Cognitive Examination (M-ACE).
275 The default denominator is 30, but it supports other values if given
276 explicitly.
277 """
279 MACE = rf"""
280 (?: {WORD_BOUNDARY}
281 (?: mini | M ) \s* -? \s*
282 (?: ACE | (?: Addenbrooke{APOSTROPHE}?s \s+ cognitive \s+
283 (?: (?:evaluation) | exam(?:ination)? ) ) )
284 {WORD_BOUNDARY} )
285 """
286 NAME = "MiniACE"
288 def __init__(
289 self,
290 nlpdef: Optional[NlpDefinition],
291 cfg_processor_name: Optional[str],
292 commit: bool = False,
293 ) -> None:
294 # see documentation above
295 super().__init__(
296 nlpdef=nlpdef,
297 cfg_processor_name=cfg_processor_name,
298 commit=commit,
299 variable_name=self.NAME,
300 variable_regex_str=self.MACE,
301 expected_denominator=30, # mini-ACE is out of 30
302 take_absolute=True,
303 )
305 def test(self, verbose: bool = False) -> None:
306 # docstring in superclass
307 self.test_numerator_denominator_parser(
308 [
309 ("MMSE 30", []),
310 ("ACE 79", []),
311 ("ACE 79/100", []),
312 ("Addenbrooke's cognitive examination 79", []),
313 ("Addenbrookes cognitive evaluation 79", []),
314 ("mini-Addenbrooke's cognitive examination 79", [(79, None)]),
315 ("mini-Addenbrooke’s cognitive examination 79", [(79, None)]),
316 ("mini-Addenbrookes cognitive evaluation 79", [(79, None)]),
317 ("M-ACE 20", [(20, None)]),
318 ("M-ACE score is 20", [(20, None)]),
319 ("M-ACE 29/30", [(29, 30)]),
320 ("M-ACE 29/29", [(29, 29)]),
321 ("MACE 29", [(29, None)]),
322 ("MACE-29", [(29, None)]),
323 ("mini-ACE 29", [(29, None)]),
324 ],
325 verbose=verbose,
326 )
329class MiniAceValidator(ValidatorBase):
330 """
331 Validator for MiniAce (see help for explanation).
332 """
334 @classmethod
335 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
336 return MiniAce.NAME, [MiniAce.MACE]
339# =============================================================================
340# Montreal Cognitive Assessment (MOCA)
341# =============================================================================
344class Moca(NumeratorOutOfDenominatorParser):
345 """
346 COGNITIVE.
348 Montreal Cognitive Assessment (MOCA).
350 The default denominator is 30, but it supports other values if given
351 explicitly.
352 """
354 # todo:: MOCA NLP parser: support also "scored X on the MOCA"?
355 MOCA = rf"""
356 (?: {WORD_BOUNDARY}
357 (?: MOCA | (?: Montreal \s+ cognitive \s+ assessment ) )
358 {WORD_BOUNDARY} )
359 """
360 NAME = "MOCA"
362 def __init__(
363 self,
364 nlpdef: Optional[NlpDefinition],
365 cfg_processor_name: Optional[str],
366 commit: bool = False,
367 ) -> None:
368 # see documentation above
369 super().__init__(
370 nlpdef=nlpdef,
371 cfg_processor_name=cfg_processor_name,
372 commit=commit,
373 variable_name=self.NAME,
374 variable_regex_str=self.MOCA,
375 expected_denominator=30,
376 take_absolute=True,
377 )
379 def test(self, verbose: bool = False) -> None:
380 # docstring in superclass
381 self.test_numerator_denominator_parser(
382 [
383 ("MOCA 30", [(30, None)]),
384 ("MOCA 30/30", [(30, 30)]),
385 ("MOCA 25/30", [(25, 30)]),
386 ("MOCA score was 25", [(25, None)]),
387 ("MOCA 25/29", [(25, 29)]),
388 ("MOCA-25", [(25, None)]),
389 ("Montreal Cognitive Assessment 25/30", [(25, 30)]),
390 ]
391 )
394class MocaValidator(ValidatorBase):
395 """
396 Validator for Moca (see help for explanation).
397 """
399 @classmethod
400 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
401 return Moca.NAME, [Moca.MOCA]
404# =============================================================================
405# All classes in this module
406# =============================================================================
408ALL_COGNITIVE_NLP_AND_VALIDATORS = [
409 (Ace, AceValidator),
410 (MiniAce, MiniAceValidator),
411 (Mmse, MmseValidator),
412 (Moca, MocaValidator),
413]