Coverage for nlp_manager/parse_cognitive.py: 100%

55 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/nlp_manager/parse_cognitive.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Python regex-based NLP processors for cognitive tests.** 

27 

28All inherit from 

29:class:`crate_anon.nlp_manager.regex_parser.NumeratorOutOfDenominatorParser` 

30and are constructed with these arguments: 

31 

32nlpdef: 

33 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition` 

34cfgsection: 

35 the name of a CRATE NLP config file section (from which we may 

36 choose to get extra config information) 

37commit: 

38 force a COMMIT whenever we insert data? You should specify this 

39 in multiprocess mode, or you may get database deadlocks. 

40 

41""" 

42 

43import logging 

44from typing import List, Optional, Tuple 

45 

46from crate_anon.common.regex_helpers import WORD_BOUNDARY 

47from crate_anon.nlp_manager.nlp_definition import NlpDefinition 

48from crate_anon.nlp_manager.regex_numbers import IGNORESIGN_INTEGER 

49from crate_anon.nlp_manager.regex_parser import ( 

50 APOSTROPHE, 

51 NumeratorOutOfDenominatorParser, 

52 ValidatorBase, 

53) 

54from crate_anon.nlp_manager.regex_units import OUT_OF_SEPARATOR 

55 

56log = logging.getLogger(__name__) 

57 

58 

59# ============================================================================= 

60# Mini-mental state examination (MMSE) 

61# ============================================================================= 

62 

63 

64class Mmse(NumeratorOutOfDenominatorParser): 

65 """ 

66 COGNITIVE. 

67 

68 Mini-mental state examination (MMSE). 

69 

70 The default denominator is 30, but it supports other values if given 

71 explicitly. 

72 """ 

73 

74 MMSE = rf""" 

75 (?: {WORD_BOUNDARY} 

76 (?: MMSE | mini[-\s]*mental (?: \s+ state)? 

77 (?: \s+ exam(?:ination)? )? ) 

78 {WORD_BOUNDARY} ) 

79 """ 

80 NAME = "MMSE" 

81 

82 def __init__( 

83 self, 

84 nlpdef: Optional[NlpDefinition], 

85 cfg_processor_name: Optional[str], 

86 commit: bool = False, 

87 ) -> None: 

88 # see documentation above 

89 super().__init__( 

90 nlpdef=nlpdef, 

91 cfg_processor_name=cfg_processor_name, 

92 commit=commit, 

93 variable_name=self.NAME, 

94 variable_regex_str=self.MMSE, 

95 expected_denominator=30, 

96 take_absolute=True, 

97 ) 

98 

99 def test(self, verbose: bool = False) -> None: 

100 # docstring in superclass 

101 self.test_numerator_denominator_parser( 

102 [ 

103 ("MMSE", []), # should fail; no values 

104 ("MMSE 30/30", [(30, 30)]), 

105 ("MMSE 25 / 30", [(25, 30)]), 

106 ("MMSE 25 / 29", [(25, 29)]), 

107 ("MMSE 25 / 31", [(25, 31)]), 

108 ("mini-mental state exam 30", [(30, None)]), 

109 ("minimental 25", [(25, None)]), 

110 ("MMSE 30", [(30, None)]), 

111 ("MMSE-27", [(27, None)]), 

112 ("MMSE score was 30", [(30, None)]), 

113 ("ACE 79", []), 

114 ], 

115 verbose=verbose, 

116 ) 

117 

118 

119class MmseValidator(ValidatorBase): 

120 """ 

121 Validator for Mmse (see help for explanation). 

122 """ 

123 

124 @classmethod 

125 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

126 return Mmse.NAME, [Mmse.MMSE] 

127 

128 

129# ============================================================================= 

130# Addenbrooke's Cognitive Examination (ACE, ACE-R, ACE-III) 

131# ============================================================================= 

132 

133 

134class Ace(NumeratorOutOfDenominatorParser): 

135 """ 

136 COGNITIVE. 

137 

138 Addenbrooke's Cognitive Examination (ACE, ACE-R, ACE-III) total score. 

139 

140 The default denominator is 100 but it supports other values if given 

141 explicitly. 

142 """ 

143 

144 NAME = "ACE" 

145 ACE = rf""" 

146 (?: {WORD_BOUNDARY} 

147 (?: ACE | (?: Addenbrooke{APOSTROPHE}?s \s+ cognitive \s+ 

148 (?: (?:evaluation) | exam(?:ination)? ) ) ) 

149 (?: \s* -? \s* 

150 (?: R | III | 111 

151 # or: 3 when not followed by an "out of X" expression 

152 | (?: 3 (?! \s* {OUT_OF_SEPARATOR} \s* {IGNORESIGN_INTEGER})) 

153 ) \b 

154 )?+ 

155 {WORD_BOUNDARY} ) 

156 """ # noqa: E501 

157 # ... note the possessive "?+" above; see tests below. 

158 

159 def __init__( 

160 self, 

161 nlpdef: Optional[NlpDefinition], 

162 cfg_processor_name: Optional[str], 

163 commit: bool = False, 

164 ) -> None: 

165 # see documentation above 

166 super().__init__( 

167 nlpdef=nlpdef, 

168 cfg_processor_name=cfg_processor_name, 

169 commit=commit, 

170 variable_name=self.NAME, 

171 variable_regex_str=self.ACE, 

172 expected_denominator=100, 

173 take_absolute=True, 

174 ) 

175 

176 def test(self, verbose: bool = False) -> None: 

177 # docstring in superclass 

178 self.test_numerator_denominator_parser( 

179 [ 

180 ("MMSE", []), 

181 ("MMSE 30/30", []), 

182 ("MMSE 25 / 30", []), 

183 ("mini-mental state exam 30", []), 

184 ("minimental 25", []), 

185 ("MMSE 30", []), 

186 ("ACE 79", [(79, None)]), 

187 ("ACE 79/100", [(79, 100)]), 

188 ("ACE 79/95", [(79, 95)]), 

189 ("ACE 79 / 100", [(79, 100)]), 

190 ("Addenbrooke's cognitive examination 79", [(79, None)]), 

191 ("Addenbrookes cognitive evaluation 79", [(79, None)]), 

192 ("ACE-R 79", [(79, None)]), 

193 ("ACE-R 79 out of 100", [(79, 100)]), 

194 ("ACE-III 79", [(79, None)]), 

195 ("ACE-III score was 79", [(79, None)]), 

196 ("ACE R 79", [(79, None)]), 

197 ("ACE III 79", [(79, None)]), 

198 ("ACE-82", [(82, None)]), 

199 ( 

200 "ACE 111 99", 

201 [(99, None)], 

202 ), # "ACE 111" (for III) from real data 

203 # Note the difficulties created by the "ACE-3" representation 

204 # of the task's name. We have to get these right: 

205 ("ACE-3 79", [(79, None)]), 

206 ("ACE 3 79", [(79, None)]), 

207 ("ACE 3 79/100", [(79, 100)]), 

208 ("ACE 3 3", [(3, None)]), 

209 ("ACE 3 3/100", [(3, 100)]), 

210 # ... but also a score of 3 (!) on the older ACE: 

211 ("ACE 3/100", [(3, 100)]), 

212 ("ACE 3 out of 100", [(3, 100)]), 

213 # - This next one is ambiguous. Reference to new task? To old 

214 # score? Making the "3" optional as part of the task name 

215 # means that this will be accepted by the regex as a score. 

216 # - We need a special exception to get "ACE 3" not to give a 

217 # score. 

218 # - We do this with a "possessive" quantifier on the "3" (or 

219 # similar) part of the ACE descriptor. 

220 # - http://www.rexegg.com/regex-quantifiers.html 

221 # - Possessive quantifiers are in regex, not re: 

222 # https://pypi.python.org/pypi/regex 

223 # https://docs.python.org/3.5/library/re.html 

224 # - Ah, no. That makes "ACE 3/100" fail. 

225 # - But if we combine a possessive "3" with saying "3 unless 

226 # it's "3 out of...", then we win. 

227 ("ACE 3", []), 

228 ("ACE 3/MOCA", []), 

229 ("ACE 3 / MOCA", []), 

230 ], 

231 verbose=verbose, 

232 ) 

233 

234 

235class AceValidator(ValidatorBase): 

236 """ 

237 Validator for Ace (see help for explanation). 

238 """ 

239 

240 @classmethod 

241 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

242 return Ace.NAME, [Ace.ACE] 

243 

244 def test(self, verbose: bool = False) -> None: 

245 # docstring in superclass 

246 self.test_validator( 

247 [ 

248 ("pass me my mace, my boy", False), 

249 ("he scored 10 on the ACE today", True), 

250 ("he scored 10 on the ACE 3 today", True), 

251 ("he scored 10 on the ACE3 today", True), 

252 ("ACE 3/100", True), 

253 ("ACE 3 3/100", True), 

254 ("ACE3 4", True), 

255 ("ACE 3", True), 

256 ("ACE3", True), 

257 ("ACE 3/MOCA", True), 

258 ("ACE 3 / MOCA", True), 

259 ], 

260 verbose=verbose, 

261 ) 

262 

263 

264# ============================================================================= 

265# Mini-Addenbrooke's Cognitive Examination (M-ACE) 

266# ============================================================================= 

267 

268 

269class MiniAce(NumeratorOutOfDenominatorParser): 

270 """ 

271 COGNITIVE. 

272 

273 Mini-Addenbrooke's Cognitive Examination (M-ACE). 

274 

275 The default denominator is 30, but it supports other values if given 

276 explicitly. 

277 """ 

278 

279 MACE = rf""" 

280 (?: {WORD_BOUNDARY} 

281 (?: mini | M ) \s* -? \s* 

282 (?: ACE | (?: Addenbrooke{APOSTROPHE}?s \s+ cognitive \s+ 

283 (?: (?:evaluation) | exam(?:ination)? ) ) ) 

284 {WORD_BOUNDARY} ) 

285 """ 

286 NAME = "MiniACE" 

287 

288 def __init__( 

289 self, 

290 nlpdef: Optional[NlpDefinition], 

291 cfg_processor_name: Optional[str], 

292 commit: bool = False, 

293 ) -> None: 

294 # see documentation above 

295 super().__init__( 

296 nlpdef=nlpdef, 

297 cfg_processor_name=cfg_processor_name, 

298 commit=commit, 

299 variable_name=self.NAME, 

300 variable_regex_str=self.MACE, 

301 expected_denominator=30, # mini-ACE is out of 30 

302 take_absolute=True, 

303 ) 

304 

305 def test(self, verbose: bool = False) -> None: 

306 # docstring in superclass 

307 self.test_numerator_denominator_parser( 

308 [ 

309 ("MMSE 30", []), 

310 ("ACE 79", []), 

311 ("ACE 79/100", []), 

312 ("Addenbrooke's cognitive examination 79", []), 

313 ("Addenbrookes cognitive evaluation 79", []), 

314 ("mini-Addenbrooke's cognitive examination 79", [(79, None)]), 

315 ("mini-Addenbrooke’s cognitive examination 79", [(79, None)]), 

316 ("mini-Addenbrookes cognitive evaluation 79", [(79, None)]), 

317 ("M-ACE 20", [(20, None)]), 

318 ("M-ACE score is 20", [(20, None)]), 

319 ("M-ACE 29/30", [(29, 30)]), 

320 ("M-ACE 29/29", [(29, 29)]), 

321 ("MACE 29", [(29, None)]), 

322 ("MACE-29", [(29, None)]), 

323 ("mini-ACE 29", [(29, None)]), 

324 ], 

325 verbose=verbose, 

326 ) 

327 

328 

329class MiniAceValidator(ValidatorBase): 

330 """ 

331 Validator for MiniAce (see help for explanation). 

332 """ 

333 

334 @classmethod 

335 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

336 return MiniAce.NAME, [MiniAce.MACE] 

337 

338 

339# ============================================================================= 

340# Montreal Cognitive Assessment (MOCA) 

341# ============================================================================= 

342 

343 

344class Moca(NumeratorOutOfDenominatorParser): 

345 """ 

346 COGNITIVE. 

347 

348 Montreal Cognitive Assessment (MOCA). 

349 

350 The default denominator is 30, but it supports other values if given 

351 explicitly. 

352 """ 

353 

354 # todo:: MOCA NLP parser: support also "scored X on the MOCA"? 

355 MOCA = rf""" 

356 (?: {WORD_BOUNDARY} 

357 (?: MOCA | (?: Montreal \s+ cognitive \s+ assessment ) ) 

358 {WORD_BOUNDARY} ) 

359 """ 

360 NAME = "MOCA" 

361 

362 def __init__( 

363 self, 

364 nlpdef: Optional[NlpDefinition], 

365 cfg_processor_name: Optional[str], 

366 commit: bool = False, 

367 ) -> None: 

368 # see documentation above 

369 super().__init__( 

370 nlpdef=nlpdef, 

371 cfg_processor_name=cfg_processor_name, 

372 commit=commit, 

373 variable_name=self.NAME, 

374 variable_regex_str=self.MOCA, 

375 expected_denominator=30, 

376 take_absolute=True, 

377 ) 

378 

379 def test(self, verbose: bool = False) -> None: 

380 # docstring in superclass 

381 self.test_numerator_denominator_parser( 

382 [ 

383 ("MOCA 30", [(30, None)]), 

384 ("MOCA 30/30", [(30, 30)]), 

385 ("MOCA 25/30", [(25, 30)]), 

386 ("MOCA score was 25", [(25, None)]), 

387 ("MOCA 25/29", [(25, 29)]), 

388 ("MOCA-25", [(25, None)]), 

389 ("Montreal Cognitive Assessment 25/30", [(25, 30)]), 

390 ] 

391 ) 

392 

393 

394class MocaValidator(ValidatorBase): 

395 """ 

396 Validator for Moca (see help for explanation). 

397 """ 

398 

399 @classmethod 

400 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

401 return Moca.NAME, [Moca.MOCA] 

402 

403 

404# ============================================================================= 

405# All classes in this module 

406# ============================================================================= 

407 

408ALL_COGNITIVE_NLP_AND_VALIDATORS = [ 

409 (Ace, AceValidator), 

410 (MiniAce, MiniAceValidator), 

411 (Mmse, MmseValidator), 

412 (Moca, MocaValidator), 

413]