Coverage for nlp_manager/parse_biochemistry.py: 99%

307 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/nlp_manager/parse_biochemistry.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Python regex-based NLP processors for biochemistry data.** 

27 

28All inherit from 

29:class:`crate_anon.nlp_manager.regex_parser.SimpleNumericalResultParser` and 

30are constructed with these arguments: 

31 

32nlpdef: 

33 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition` 

34cfgsection: 

35 the name of a CRATE NLP config file section (from which we may 

36 choose to get extra config information) 

37commit: 

38 force a COMMIT whenever we insert data? You should specify this 

39 in multiprocess mode, or you may get database deadlocks. 

40 

41""" 

42 

43import logging 

44from typing import List, Optional, Tuple, Union 

45 

46from crate_anon.common.regex_helpers import ( 

47 regex_or, 

48 WORD_BOUNDARY, 

49) 

50from crate_anon.nlp_manager.nlp_definition import NlpDefinition 

51from crate_anon.nlp_manager.number import to_float 

52from crate_anon.nlp_manager.regex_parser import ( 

53 make_simple_numeric_regex, 

54 OPTIONAL_POC, 

55 SimpleNumericalResultParser, 

56 ValidatorBase, 

57) 

58from crate_anon.nlp_manager.regex_read_codes import ( 

59 ReadCodes, 

60 regex_components_from_read_codes, 

61) 

62from crate_anon.nlp_manager.regex_units import ( 

63 factor_micromolar_from_mg_per_dl, 

64 factor_millimolar_from_mg_per_dl, 

65 G, 

66 G_PER_L, 

67 MG, 

68 MG_PER_DL, 

69 MG_PER_L, 

70 MICROEQ_PER_L, 

71 MICROMOLAR, 

72 micromolar_from_mg_per_dl, 

73 MICROMOLES_PER_L, 

74 MICROUNITS_PER_ML, 

75 MILLIEQ_PER_L, 

76 MILLIMOLAR, 

77 millimolar_from_mg_per_dl, 

78 MILLIMOLES_PER_L, 

79 MILLIMOLES_PER_MOL, 

80 MILLIUNITS_PER_L, 

81 PERCENT, 

82 UNITS_PER_L, 

83) 

84 

85log = logging.getLogger(__name__) 

86 

87 

88# ============================================================================= 

89# C-reactive protein (CRP) 

90# ============================================================================= 

91 

92 

93class Crp(SimpleNumericalResultParser): 

94 """ 

95 BIOCHEMISTRY. 

96 

97 C-reactive protein (CRP). Default units are mg/L; also supports mg/dL. 

98 

99 CRP units: 

100 

101 - mg/L is commonest in the UK (or at least standard at Addenbrooke's, 

102 Hinchingbrooke, and Dundee); 

103 

104 - values of <=6 mg/L or <10 mg/L are normal, and e.g. 70-250 mg/L in 

105 pneumonia. 

106 

107 - Refs include: 

108 

109 - https://www.ncbi.nlm.nih.gov/pubmed/7705110 

110 - https://emedicine.medscape.com/article/2086909-overview 

111 

112 - 1 mg/dL = 10 mg/L, so normal in mg/dL is <=1 roughly. 

113 

114 """ 

115 

116 CRP_BASE = rf""" 

117 {WORD_BOUNDARY} 

118 (?: (?: C [-\s]+ reactive [\s]+ protein ) | CRP ) 

119 {WORD_BOUNDARY} 

120 """ 

121 CRP = regex_or( 

122 *regex_components_from_read_codes( 

123 ReadCodes.CRP_PLASMA, 

124 ReadCodes.CRP_SERUM, 

125 ), 

126 CRP_BASE, 

127 wrap_each_in_noncapture_group=True, 

128 wrap_result_in_noncapture_group=False, 

129 ) 

130 REGEX = make_simple_numeric_regex( 

131 quantity=CRP, 

132 units=regex_or(MG_PER_DL, MG_PER_L), 

133 optional_ignorable_after_quantity=OPTIONAL_POC, 

134 ) 

135 NAME = "CRP" 

136 PREFERRED_UNIT_COLUMN = "value_mg_L" 

137 UNIT_MAPPING = { 

138 MG_PER_L: 1, # preferred unit 

139 MG_PER_DL: 10, # 1 mg/dL -> 10 mg/L 

140 } 

141 

142 def __init__( 

143 self, 

144 nlpdef: Optional[NlpDefinition], 

145 cfg_processor_name: Optional[str], 

146 commit: bool = False, 

147 ) -> None: 

148 # see documentation above 

149 super().__init__( 

150 nlpdef=nlpdef, 

151 cfg_processor_name=cfg_processor_name, 

152 regex_str=self.REGEX, 

153 variable=self.NAME, 

154 target_unit=self.PREFERRED_UNIT_COLUMN, 

155 units_to_factor=self.UNIT_MAPPING, 

156 commit=commit, 

157 take_absolute=True, 

158 ) 

159 

160 def test(self, verbose: bool = False) -> None: 

161 # docstring in parent class 

162 self.test_numerical_parser( 

163 [ 

164 ("CRP", []), # should fail; no values 

165 ("CRP 6", [6]), 

166 ("C-reactive protein 6", [6]), 

167 ("C reactive protein 6", [6]), 

168 ("CRP = 6", [6]), 

169 ("CRP 6 mg/dl", [60]), 

170 ("CRP: 6", [6]), 

171 ("CRP equals 6", [6]), 

172 ("CRP is equal to 6", [6]), 

173 ("CRP <1", [1]), 

174 ("CRP less than 1", [1]), 

175 ("CRP <1 mg/dl", [10]), 

176 ("CRP >250", [250]), 

177 ("CRP more than 1", [1]), 

178 ("CRP greater than 1", [1]), 

179 ("CRP >250 mg/dl", [2500]), 

180 ("CRP was 62", [62]), 

181 ("CRP was 62 mg/l", [62]), 

182 ("CRP was <1", [1]), 

183 ("CRP is 19.2", [19.2]), 

184 ("CRP is >250", [250]), 

185 ("CRP is 19 mg dl-1", [190]), 

186 ("CRP is 19 mg dl -1", [190]), 

187 ("CRP 1.9 mg/L", [1.9]), 

188 ("CRP-97", [97]), 

189 ("CRP 1.9 mg L-1", [1.9]), 

190 ("CRP | 1.9 (H) | mg/L", [1.9]), 

191 ("Plasma C-reactive protein level (XE2dy) 45 mg/L", [45]), 

192 ("Serum C reactive protein level (XaINL) 45 mg/L", [45]), 

193 ("CRP (mg/L) 62", [62]), 

194 ], 

195 verbose=verbose, 

196 ) 

197 

198 

199class CrpValidator(ValidatorBase): 

200 """ 

201 Validator for Crp (see help for explanation). 

202 """ 

203 

204 @classmethod 

205 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

206 return Crp.NAME, [Crp.CRP] 

207 

208 

209# ============================================================================= 

210# Sodium (Na) 

211# ============================================================================= 

212# ... handy to check approximately expected distribution of results! 

213 

214 

215class Sodium(SimpleNumericalResultParser): 

216 """ 

217 BIOCHEMISTRY (U&E). 

218 

219 Sodium (Na), in mM. 

220 """ 

221 

222 SODIUM_BASE = rf""" 

223 {WORD_BOUNDARY} (?: Na | Sodium ) {WORD_BOUNDARY} 

224 """ 

225 SODIUM = regex_or( 

226 *regex_components_from_read_codes( 

227 ReadCodes.SODIUM, 

228 ReadCodes.SODIUM_BLOOD, 

229 ReadCodes.SODIUM_PLASMA, 

230 ReadCodes.SODIUM_SERUM, 

231 ), 

232 SODIUM_BASE, 

233 wrap_each_in_noncapture_group=True, 

234 wrap_result_in_noncapture_group=False, 

235 ) 

236 REGEX = make_simple_numeric_regex( 

237 quantity=SODIUM, 

238 units=regex_or( 

239 MILLIMOLAR, # good 

240 MILLIMOLES_PER_L, # good 

241 MILLIEQ_PER_L, # good 

242 MG, # bad 

243 ), 

244 optional_ignorable_after_quantity=OPTIONAL_POC, 

245 ) 

246 NAME = "Sodium" 

247 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

248 UNIT_MAPPING = { 

249 MILLIMOLAR: 1, # preferred unit 

250 MILLIMOLES_PER_L: 1, 

251 MILLIEQ_PER_L: 1, 

252 # but not MG 

253 } 

254 

255 def __init__( 

256 self, 

257 nlpdef: Optional[NlpDefinition], 

258 cfg_processor_name: Optional[str], 

259 commit: bool = False, 

260 ) -> None: 

261 # see documentation above 

262 super().__init__( 

263 nlpdef=nlpdef, 

264 cfg_processor_name=cfg_processor_name, 

265 regex_str=self.REGEX, 

266 variable=self.NAME, 

267 target_unit=self.PREFERRED_UNIT_COLUMN, 

268 units_to_factor=self.UNIT_MAPPING, 

269 commit=commit, 

270 take_absolute=True, 

271 ) 

272 

273 def test(self, verbose: bool = False) -> None: 

274 # docstring in parent class 

275 self.test_numerical_parser( 

276 [ 

277 ("Na", []), # should fail; no values 

278 ("Na 120", [120]), 

279 ("sodium 153", [153]), 

280 ("Na 135 mEq/L", [135]), 

281 ("Na 139 mM", [139]), 

282 ("docusate sodium 100mg", []), 

283 ( 

284 "Present: Nicola Adams (NA). 1.0 Minutes of last meeting", 

285 [], 

286 ), 

287 ("Present: Nicola Adams (NA) 1.0 Minutes of last meeting", []), 

288 ("Na (H) 145 mM", [145]), 

289 ("Na (*) 145 mM", [145]), 

290 ("Na (X) 145 mM", []), 

291 ("blah (Na) 145 mM", []), 

292 ("Na (145) something", [145]), 

293 ("Na (145 mM), others", [145]), 

294 ("Na-145", [145]), 

295 ("Sodium level (X771T) 145", [145]), 

296 ("Blood sodium level (XaDva) 145", [145]), 

297 ("Plasma sodium level (XaIRf) 145", [145]), 

298 ("Serum sodium level (XE2q0) 145", [145]), 

299 ("Serum sodium level (mmol/L) 137", [137]), 

300 ], 

301 verbose=verbose, 

302 ) 

303 

304 

305class SodiumValidator(ValidatorBase): 

306 """ 

307 Validator for Sodium (see help for explanation). 

308 """ 

309 

310 @classmethod 

311 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

312 return Sodium.NAME, [Sodium.SODIUM] 

313 

314 

315# ============================================================================= 

316# Potassium (K) 

317# ============================================================================= 

318 

319 

320class Potassium(SimpleNumericalResultParser): 

321 """ 

322 BIOCHEMISTRY (U&E). 

323 

324 Potassium (K), in mM. 

325 """ 

326 

327 POTASSIUM_BASE = rf""" 

328 {WORD_BOUNDARY} (?: K | Potassium ) {WORD_BOUNDARY} 

329 """ 

330 POTASSIUM = regex_or( 

331 POTASSIUM_BASE, 

332 *regex_components_from_read_codes( 

333 ReadCodes.POTASSIUM, 

334 ReadCodes.POTASSIUM_BLOOD, 

335 ReadCodes.POTASSIUM_PLASMA, 

336 ReadCodes.POTASSIUM_SERUM, 

337 ), 

338 wrap_each_in_noncapture_group=True, 

339 wrap_result_in_noncapture_group=False, 

340 ) 

341 REGEX = make_simple_numeric_regex( 

342 quantity=POTASSIUM, 

343 units=regex_or( 

344 MILLIMOLAR, # good 

345 MILLIMOLES_PER_L, # good 

346 MILLIEQ_PER_L, # good 

347 MG, # bad 

348 ), 

349 optional_ignorable_after_quantity=OPTIONAL_POC, 

350 ) 

351 NAME = "Potassium" 

352 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

353 UNIT_MAPPING = { 

354 MILLIMOLAR: 1, # preferred unit 

355 MILLIMOLES_PER_L: 1, 

356 MILLIEQ_PER_L: 1, 

357 # but not MG 

358 } 

359 

360 def __init__( 

361 self, 

362 nlpdef: Optional[NlpDefinition], 

363 cfg_processor_name: Optional[str], 

364 commit: bool = False, 

365 ) -> None: 

366 # see documentation above 

367 super().__init__( 

368 nlpdef=nlpdef, 

369 cfg_processor_name=cfg_processor_name, 

370 regex_str=self.REGEX, 

371 variable=self.NAME, 

372 target_unit=self.PREFERRED_UNIT_COLUMN, 

373 units_to_factor=self.UNIT_MAPPING, 

374 commit=commit, 

375 take_absolute=True, 

376 ) 

377 

378 def test(self, verbose: bool = False) -> None: 

379 # docstring in parent class 

380 self.test_numerical_parser( 

381 [ 

382 ("K", []), # should fail; no values 

383 ("K 4", [4]), 

384 ("Potassium 4.3", [4.3]), 

385 ("K 4.5 mEq/L", [4.5]), 

386 ("K 4.5 mM", [4.5]), 

387 ("losartan potassium 50mg", []), 

388 ("Present: Kerry Smith (K). 1.0 Minutes of last meeting", []), 

389 ("Present: Kerry Smith (K) 1.0 Minutes of last meeting", []), 

390 ("K (H) 5.6 mM", [5.6]), 

391 ("K (*) 5.6 mM", [5.6]), 

392 ("K (X) 5.6 mM", []), 

393 ("blah (K) 5.6 mM", []), 

394 ("K (5.6) something", [5.6]), 

395 ("K (5.6 mM), others", [5.6]), 

396 ("K-3.2", [3.2]), 

397 ("Potassium level (X771S) 3.2", [3.2]), 

398 ("Blood potassium level (XaDvZ) 3.2", [3.2]), 

399 ("Plasma potassium level (XaIRl) 3.2", [3.2]), 

400 ("Serum potassium level (XE2pz) 3.2", [3.2]), 

401 ("Serum potassium level (XaIRl) 3.2", []), # wrong code 

402 ], 

403 verbose=verbose, 

404 ) 

405 

406 

407class PotassiumValidator(ValidatorBase): 

408 """ 

409 Validator for Potassium (see help for explanation). 

410 """ 

411 

412 @classmethod 

413 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

414 return Potassium.NAME, [Potassium.POTASSIUM] 

415 

416 

417# ============================================================================= 

418# Urea 

419# ============================================================================= 

420 

421 

422class Urea(SimpleNumericalResultParser): 

423 """ 

424 BIOCHEMISTRY (U&E). 

425 

426 Urea, in mM. 

427 """ 

428 

429 UREA_BASE = rf""" 

430 {WORD_BOUNDARY} U(?:r(?:ea)?)? {WORD_BOUNDARY} 

431 """ 

432 UREA = regex_or( 

433 *regex_components_from_read_codes( 

434 ReadCodes.UREA_BLOOD, 

435 ReadCodes.UREA_PLASMA, 

436 ReadCodes.UREA_SERUM, 

437 ), 

438 UREA_BASE, 

439 wrap_each_in_noncapture_group=True, 

440 wrap_result_in_noncapture_group=False, 

441 ) 

442 REGEX = make_simple_numeric_regex( 

443 quantity=UREA, 

444 units=regex_or( 

445 MILLIMOLAR, # good 

446 MILLIMOLES_PER_L, # good 

447 MILLIEQ_PER_L, # good 

448 MG, # bad 

449 ), 

450 optional_ignorable_after_quantity=OPTIONAL_POC, 

451 ) 

452 NAME = "Urea" 

453 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

454 UNIT_MAPPING = { 

455 MILLIMOLAR: 1, # preferred unit 

456 MILLIMOLES_PER_L: 1, 

457 MILLIEQ_PER_L: 1, 

458 # but not MG 

459 } 

460 

461 def __init__( 

462 self, 

463 nlpdef: Optional[NlpDefinition], 

464 cfg_processor_name: Optional[str], 

465 commit: bool = False, 

466 ) -> None: 

467 # see documentation above 

468 super().__init__( 

469 nlpdef=nlpdef, 

470 cfg_processor_name=cfg_processor_name, 

471 regex_str=self.REGEX, 

472 variable=self.NAME, 

473 target_unit=self.PREFERRED_UNIT_COLUMN, 

474 units_to_factor=self.UNIT_MAPPING, 

475 commit=commit, 

476 take_absolute=True, 

477 ) 

478 

479 def test(self, verbose: bool = False) -> None: 

480 # docstring in parent class 

481 self.test_numerical_parser( 

482 [ 

483 ("Urea", []), # should fail; no values 

484 ("U 4", [4]), 

485 ("Urea 4.3", [4.3]), 

486 ("U 4.5 mEq/L", [4.5]), 

487 ("Ur 4.5 mM", [4.5]), 

488 ( 

489 "Present: Ursula Rogers (U). 1.0 Minutes of last meeting", 

490 [], 

491 ), 

492 ( 

493 "Present: Ursula Rogers (UR) 1.0 Minutes of last meeting", 

494 [], 

495 ), 

496 ("U (H) 5.6 mM", [5.6]), 

497 ("Ur (*) 5.6 mM", [5.6]), 

498 ("Urea (X) 5.6 mM", []), 

499 ("blah (U) 5.6 mM", []), 

500 ("Urea (5.6) something", [5.6]), 

501 ("Urea (5.6 mM), others", [5.6]), 

502 ("U-3.2", [3.2]), 

503 ("Blood urea (X771P) 3.2", [3.2]), 

504 ("Plasma urea level (XaDvl) 3.2", [3.2]), 

505 ("Serum urea level (XM0lt) 3.2", [3.2]), 

506 ], 

507 verbose=verbose, 

508 ) 

509 

510 

511class UreaValidator(ValidatorBase): 

512 """ 

513 Validator for Urea (see help for explanation). 

514 """ 

515 

516 @classmethod 

517 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

518 return Urea.NAME, [Urea.UREA] 

519 

520 

521# ============================================================================= 

522# Creatinine 

523# ============================================================================= 

524 

525 

526class Creatinine(SimpleNumericalResultParser): 

527 """ 

528 BIOCHEMISTRY (U&E). 

529 

530 Creatinine. Default units are micromolar (SI); also supports mg/dL. 

531 """ 

532 

533 CREATININE_BASE = rf""" 

534 {WORD_BOUNDARY} Cr(?:eat(?:inine)?)? {WORD_BOUNDARY} 

535 """ 

536 # ... Cr, Creat, Creatinine 

537 # Possible that "creatine" is present as a typo... but it's wrong... 

538 CREATININE = regex_or( 

539 *regex_components_from_read_codes( 

540 ReadCodes.CREATININE, 

541 ReadCodes.CREATININE_PLASMA, 

542 ReadCodes.CREATININE_PLASMA_CORRECTED, 

543 ReadCodes.CREATININE_SERUM, 

544 ReadCodes.CREATININE_SERUM_CORRECTED, 

545 ), 

546 CREATININE_BASE, 

547 wrap_each_in_noncapture_group=True, 

548 wrap_result_in_noncapture_group=False, 

549 ) 

550 REGEX = make_simple_numeric_regex( 

551 quantity=CREATININE, 

552 units=regex_or( 

553 MICROMOLAR, # good 

554 MICROMOLES_PER_L, # good 

555 MICROEQ_PER_L, # good 

556 MG_PER_DL, # good but needs conversion 

557 # ... note that MG_PER_DL must precede MG 

558 MG, # bad 

559 ), 

560 optional_ignorable_after_quantity=OPTIONAL_POC, 

561 ) 

562 CREATININE_MOLECULAR_MASS_G_PER_MOL = 113.12 

563 # ... https://pubchem.ncbi.nlm.nih.gov/compound/creatinine 

564 NAME = "Creatinine" 

565 PREFERRED_UNIT_COLUMN = "value_micromol_L" 

566 UNIT_MAPPING = { 

567 MICROMOLAR: 1, # preferred unit 

568 MICROMOLES_PER_L: 1, 

569 MICROEQ_PER_L: 1, 

570 MG_PER_DL: factor_micromolar_from_mg_per_dl( 

571 CREATININE_MOLECULAR_MASS_G_PER_MOL 

572 ), 

573 # but not MG 

574 } 

575 

576 def __init__( 

577 self, 

578 nlpdef: Optional[NlpDefinition], 

579 cfg_processor_name: Optional[str], 

580 commit: bool = False, 

581 ) -> None: 

582 # see documentation above 

583 super().__init__( 

584 nlpdef=nlpdef, 

585 cfg_processor_name=cfg_processor_name, 

586 regex_str=self.REGEX, 

587 variable=self.NAME, 

588 target_unit=self.PREFERRED_UNIT_COLUMN, 

589 units_to_factor=self.UNIT_MAPPING, 

590 commit=commit, 

591 take_absolute=True, 

592 ) 

593 

594 def test(self, verbose: bool = False) -> None: 

595 # docstring in parent class 

596 def convert(mg_dl: float) -> float: 

597 # Convert mg/dl to μM 

598 return micromolar_from_mg_per_dl( 

599 mg_dl, self.CREATININE_MOLECULAR_MASS_G_PER_MOL 

600 ) 

601 

602 self.test_numerical_parser( 

603 [ 

604 ("Creatinine", []), # should fail; no values 

605 ("Cr 50", [50]), 

606 ("Creat 125.5", [125.5]), 

607 ("Creat 75 uEq/L", [75]), 

608 ("Cr 75 μM", [75]), 

609 ( 

610 "Present: Chloe Rogers (CR). 1.0 Minutes of last meeting", 

611 [], 

612 ), 

613 ("Creatinine (H) 200 uM", [200]), 

614 ("Creatinine (*) 200 micromol/L", [200]), 

615 ("Creatinine (X) 200 uM", []), 

616 ("Creatinine 200 micromolar", [200]), 

617 ("Creatinine 200 micromolar, others", [200]), 

618 ("blah (creat) 5.6 uM", []), 

619 ("Creatinine (200) something", [200]), 

620 ("Creatinine (200 micromolar)", [200]), 

621 ("Creatinine (200 micromolar), others", [200]), 

622 ("Cr-75", [75]), 

623 ("creatinine 3 mg/dl", [convert(3)]), 

624 ("creatinine 3 mg", []), 

625 ("Creatinine level (X771Q) 75", [75]), 

626 ("Plasma creatinine level (XaETQ) 75", [75]), 

627 ("Cor plasma creatinine level (XaERX) 75", [75]), 

628 ("Serum creatinine level (XE2q5) 75", [75]), 

629 ("Cor serum creatinine level (XaERc) 75", [75]), 

630 ], 

631 verbose=verbose, 

632 ) 

633 

634 

635class CreatinineValidator(ValidatorBase): 

636 """ 

637 Validator for Creatinine (see help for explanation). 

638 """ 

639 

640 @classmethod 

641 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

642 return Creatinine.NAME, [Creatinine.CREATININE] 

643 

644 

645# ============================================================================= 

646# Lithium (Li) 

647# ============================================================================= 

648 

649 

650class Lithium(SimpleNumericalResultParser): 

651 """ 

652 BIOCHEMISTRY (THERAPEUTIC DRUG MONITORING). 

653 

654 Lithium (Li) levels (for blood tests, not doses), in mM. 

655 """ 

656 

657 LITHIUM_BASE = rf""" 

658 {WORD_BOUNDARY} Li(?:thium)? {WORD_BOUNDARY} 

659 """ 

660 LITHIUM = regex_or( 

661 *regex_components_from_read_codes( 

662 ReadCodes.LITHIUM_SERUM, 

663 ), 

664 LITHIUM_BASE, 

665 wrap_each_in_noncapture_group=True, 

666 wrap_result_in_noncapture_group=False, 

667 ) 

668 REGEX = make_simple_numeric_regex( 

669 quantity=LITHIUM, 

670 units=regex_or( 

671 MILLIMOLAR, # good 

672 MILLIMOLES_PER_L, # good 

673 MILLIEQ_PER_L, # good 

674 MG, # bad 

675 G, # bad 

676 ), 

677 ) 

678 NAME = "Lithium" 

679 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

680 UNIT_MAPPING = { 

681 MILLIMOLAR: 1, # preferred unit 

682 MILLIMOLES_PER_L: 1, 

683 MILLIEQ_PER_L: 1, 

684 # but not MG 

685 # and not G 

686 } 

687 

688 def __init__( 

689 self, 

690 nlpdef: Optional[NlpDefinition], 

691 cfg_processor_name: Optional[str], 

692 commit: bool = False, 

693 ) -> None: 

694 # see documentation above 

695 super().__init__( 

696 nlpdef=nlpdef, 

697 cfg_processor_name=cfg_processor_name, 

698 regex_str=self.REGEX, 

699 variable=self.NAME, 

700 target_unit=self.PREFERRED_UNIT_COLUMN, 

701 units_to_factor=self.UNIT_MAPPING, 

702 commit=commit, 

703 take_absolute=True, 

704 ) 

705 

706 def test(self, verbose: bool = False) -> None: 

707 # docstring in parent class 

708 self.test_numerical_parser( 

709 [ 

710 ("Li", []), # should fail; no values 

711 ("Li 0.4", [0.4]), 

712 ("li 1200 mg", []), # that's a dose 

713 ("li 1.2 g", []), # that's a dose 

714 ("lithium 1200 mg", []), # that's a dose 

715 ("lithium 153", [153]), # an unhappy patient... 

716 ("Li 135 mEq/L", [135]), 

717 ("Li 139 mM", [139]), 

718 ("lithium carbonate 800mg", []), 

719 ( 

720 "Present: Linda Ingles (LI). 1.0 Minutes of last meeting", 

721 [], 

722 ), 

723 ("Present: Linda Ingles (LI) 1.0 Minutes of last meeting", []), 

724 ("Li (H) 1.3 mM", [1.3]), 

725 ("Li (*) 1.3 mM", [1.3]), 

726 ("Li (X) 1.3 mM", []), 

727 ("blah (Li) 1.2 mM", []), 

728 ("Li (1.3) something", [1.3]), 

729 ("Li (0.4 mM), others", [0.4]), 

730 ("Li-0.4", [0.4]), 

731 ("Serum lithium level (XE25g) 0.4", [0.4]), 

732 ], 

733 verbose=verbose, 

734 ) 

735 

736 

737class LithiumValidator(ValidatorBase): 

738 """ 

739 Validator for Lithium (see help for explanation). 

740 """ 

741 

742 @classmethod 

743 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

744 return Lithium.NAME, [Lithium.LITHIUM] 

745 

746 

747# ============================================================================= 

748# Thyroid-stimulating hormone (TSH) 

749# ============================================================================= 

750 

751 

752class Tsh(SimpleNumericalResultParser): 

753 """ 

754 BIOCHEMISTRY (ENDOCRINOLOGY). 

755 

756 Thyroid-stimulating hormone (TSH), in mIU/L (or μIU/mL). 

757 """ 

758 

759 TSH_BASE = rf""" 

760 {WORD_BOUNDARY} 

761 (?: TSH | thyroid [-\s]+ stimulating [-\s]+ hormone ) 

762 {WORD_BOUNDARY} 

763 """ 

764 TSH = regex_or( 

765 *regex_components_from_read_codes( 

766 ReadCodes.TSH_PLASMA, 

767 ReadCodes.TSH_PLASMA_30_MIN, 

768 ReadCodes.TSH_PLASMA_60_MIN, 

769 ReadCodes.TSH_PLASMA_90_MIN, 

770 ReadCodes.TSH_PLASMA_120_MIN, 

771 ReadCodes.TSH_PLASMA_150_MIN, 

772 ReadCodes.TSH_SERUM, 

773 ReadCodes.TSH_SERUM_60_MIN, 

774 ReadCodes.TSH_SERUM_90_MIN, 

775 ReadCodes.TSH_SERUM_120_MIN, 

776 ReadCodes.TSH_SERUM_150_MIN, 

777 ), 

778 TSH_BASE, 

779 wrap_each_in_noncapture_group=True, 

780 wrap_result_in_noncapture_group=False, 

781 ) 

782 REGEX = make_simple_numeric_regex( 

783 quantity=TSH, 

784 units=regex_or( 

785 MILLIUNITS_PER_L, # good 

786 MICROUNITS_PER_ML, # good 

787 ), 

788 ) 

789 NAME = "TSH" 

790 PREFERRED_UNIT_COLUMN = "value_mU_L" 

791 UNIT_MAPPING = { 

792 MILLIUNITS_PER_L: 1, # preferred unit 

793 MICROUNITS_PER_ML: 1, 

794 } 

795 

796 def __init__( 

797 self, 

798 nlpdef: Optional[NlpDefinition], 

799 cfg_processor_name: Optional[str], 

800 commit: bool = False, 

801 ) -> None: 

802 # see documentation above 

803 super().__init__( 

804 nlpdef=nlpdef, 

805 cfg_processor_name=cfg_processor_name, 

806 regex_str=self.REGEX, 

807 variable=self.NAME, 

808 target_unit=self.PREFERRED_UNIT_COLUMN, 

809 units_to_factor=self.UNIT_MAPPING, 

810 commit=commit, 

811 take_absolute=True, 

812 ) 

813 

814 def test(self, verbose: bool = False) -> None: 

815 # docstring in superclass 

816 self.test_numerical_parser( 

817 [ 

818 ("TSH", []), # should fail; no values 

819 ("TSH 1.5", [1.5]), 

820 ("thyroid-stimulating hormone 1.5", [1.5]), 

821 ("TSH 1.5 mU/L", [1.5]), 

822 ("TSH 1.5 mIU/L", [1.5]), 

823 ("TSH 1.5 μU/mL", [1.5]), 

824 ("TSH 1.5 μIU/mL", [1.5]), 

825 ("TSH 1.5 uU/mL", [1.5]), 

826 ("TSH 1.5 uIU/mL", [1.5]), 

827 ("TSH-2.3", [2.3]), 

828 ("Plasma TSH level (XaELW) 2.3", [2.3]), 

829 ("Serum TSH level (XaELV) 2.3", [2.3]), 

830 # etc.; not all Read codes tested here 

831 ], 

832 verbose=verbose, 

833 ) 

834 

835 

836class TshValidator(ValidatorBase): 

837 """ 

838 Validator for TSH (see help for explanation). 

839 """ 

840 

841 @classmethod 

842 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

843 return Tsh.NAME, [Tsh.TSH] 

844 

845 

846# ============================================================================= 

847# Alkaline phosphatase 

848# ============================================================================= 

849 

850 

851class AlkPhos(SimpleNumericalResultParser): 

852 """ 

853 BIOCHEMISTRY (LFTs/BFTs). 

854 

855 Alkaline phosphatase (ALP, AlkP, AlkPhos). Units are U/L. 

856 """ 

857 

858 ALKP_BASE = rf""" 

859 {WORD_BOUNDARY} 

860 (?: 

861 (?: ALk?P (?:\. | {WORD_BOUNDARY}) ) | 

862 (?: 

863 alk(?:aline | \.)? 

864 [-\s]* 

865 phos(?:phatase{WORD_BOUNDARY} | \. | {WORD_BOUNDARY}) 

866 ) 

867 ) 

868 """ 

869 ALKP = regex_or( 

870 *regex_components_from_read_codes( 

871 ReadCodes.ALKPHOS_PLASMA, 

872 ReadCodes.ALKPHOS_SERUM, 

873 ReadCodes.ALKPHOS, # least specific; at end 

874 ), 

875 ALKP_BASE, 

876 wrap_each_in_noncapture_group=True, 

877 wrap_result_in_noncapture_group=False, 

878 ) 

879 REGEX = make_simple_numeric_regex(quantity=ALKP, units=UNITS_PER_L) 

880 NAME = "AlkPhos" 

881 PREFERRED_UNIT_COLUMN = "value_U_L" 

882 UNIT_MAPPING = {UNITS_PER_L: 1} # preferred unit 

883 

884 def __init__( 

885 self, 

886 nlpdef: Optional[NlpDefinition], 

887 cfg_processor_name: Optional[str], 

888 commit: bool = False, 

889 ) -> None: 

890 # see documentation above 

891 super().__init__( 

892 nlpdef=nlpdef, 

893 cfg_processor_name=cfg_processor_name, 

894 regex_str=self.REGEX, 

895 variable=self.NAME, 

896 target_unit=self.PREFERRED_UNIT_COLUMN, 

897 units_to_factor=self.UNIT_MAPPING, 

898 commit=commit, 

899 take_absolute=True, 

900 ) 

901 

902 def test(self, verbose: bool = False) -> None: 

903 # docstring in superclass 

904 self.test_numerical_parser( 

905 [ 

906 ("ALP", []), # should fail; no values 

907 ("was 7", []), # no quantity 

908 ("ALP 55", [55]), 

909 ("Alkaline-Phosphatase 55", [55]), 

910 ("Alkaline Phosphatase 55 U/L ", [55]), 

911 ("ALP 55 U/L", [55]), 

912 ("ALP-55", [55]), 

913 ("AlkP 55", [55]), 

914 ("alk.phos. 55", [55]), 

915 ("alk. phos. 55", [55]), 

916 ("alkphos 55", [55]), 

917 ("Alkaline phosphatase level (44F3.) 55", [55]), 

918 ( 

919 "Alkaline phosphatase level (44F3x) 55", 

920 [], 

921 ), # test "." in regex 

922 ("Plasma alkaline phosphatase level (XaIRj) 55", [55]), 

923 ("Serum alkaline phosphatase level (XE2px) 55", [55]), 

924 ], 

925 verbose=verbose, 

926 ) 

927 

928 

929class AlkPhosValidator(ValidatorBase): 

930 """ 

931 Validator for AlkPhos (see help for explanation). 

932 """ 

933 

934 @classmethod 

935 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

936 return AlkPhos.NAME, [AlkPhos.ALKP] 

937 

938 

939# ============================================================================= 

940# Alanine aminotransferase (ALT) 

941# ============================================================================= 

942 

943 

944class ALT(SimpleNumericalResultParser): 

945 """ 

946 BIOCHEMISTRY (LFTs). 

947 

948 Alanine aminotransferase (ALT), a.k.a. alanine transaminase (ALT). 

949 Units are U/L. 

950 

951 A.k.a. serum glutamate-pyruvate transaminase (SGPT), or serum 

952 glutamate-pyruvic transaminase (SGPT), but not a.k.a. those in recent 

953 memory! 

954 """ 

955 

956 ALT_BASE = rf""" 

957 {WORD_BOUNDARY} 

958 (?: 

959 ALT | 

960 alanine [-\s]+ (?: aminotransferase | transaminase ) 

961 ) 

962 {WORD_BOUNDARY} 

963 """ 

964 ALT = regex_or( 

965 *regex_components_from_read_codes( 

966 ReadCodes.ALT, 

967 ), 

968 ALT_BASE, 

969 wrap_each_in_noncapture_group=True, 

970 wrap_result_in_noncapture_group=False, 

971 ) 

972 REGEX = make_simple_numeric_regex(quantity=ALT, units=UNITS_PER_L) 

973 NAME = "ALT" 

974 PREFERRED_UNIT_COLUMN = "value_U_L" 

975 UNIT_MAPPING = {UNITS_PER_L: 1} # preferred unit 

976 

977 def __init__( 

978 self, 

979 nlpdef: Optional[NlpDefinition], 

980 cfg_processor_name: Optional[str], 

981 commit: bool = False, 

982 ) -> None: 

983 # see documentation above 

984 super().__init__( 

985 nlpdef=nlpdef, 

986 cfg_processor_name=cfg_processor_name, 

987 regex_str=self.REGEX, 

988 variable=self.NAME, 

989 target_unit=self.PREFERRED_UNIT_COLUMN, 

990 units_to_factor=self.UNIT_MAPPING, 

991 commit=commit, 

992 take_absolute=True, 

993 ) 

994 

995 def test(self, verbose: bool = False) -> None: 

996 # docstring in superclass 

997 self.test_numerical_parser( 

998 [ 

999 ("ALT", []), # should fail; no values 

1000 ("was 7", []), # no quantity 

1001 ("ALT 55", [55]), 

1002 ("alanine-aminotransferase 55", [55]), 

1003 ("Alanine aminotransferase 55 U/L ", [55]), 

1004 ("alanine transaminase 55 U/L ", [55]), 

1005 ("ALT 55 U/L", [55]), 

1006 ("ALT-55", [55]), 

1007 ("ALP 55", []), # wrong thing 

1008 ("ALT/SGPT serum level (44G3.) 55", [55]), 

1009 ], 

1010 verbose=verbose, 

1011 ) 

1012 

1013 

1014class ALTValidator(ValidatorBase): 

1015 """ 

1016 Validator for ALT (see help for explanation). 

1017 """ 

1018 

1019 @classmethod 

1020 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1021 return ALT.NAME, [ALT.ALT] 

1022 

1023 

1024# ============================================================================= 

1025# Gamma GT (gGT) 

1026# ============================================================================= 

1027 

1028 

1029class GammaGT(SimpleNumericalResultParser): 

1030 """ 

1031 BIOCHEMISTRY (LFTs). 

1032 

1033 Gamma-glutamyl transferase (gGT), in U/L. 

1034 """ 

1035 

1036 GGT_BASE = rf""" 

1037 {WORD_BOUNDARY} 

1038 (?: 

1039 (?: γ | G | gamma) 

1040 [-\s]* 

1041 (?: 

1042 GT | 

1043 glutamyl [-\s]+ transferase 

1044 ) 

1045 ) 

1046 {WORD_BOUNDARY} 

1047 """ 

1048 GGT = regex_or( 

1049 *regex_components_from_read_codes( 

1050 ReadCodes.GAMMA_GT, 

1051 ReadCodes.GAMMA_GT_PLASMA, 

1052 ReadCodes.GAMMA_GT_SERUM, 

1053 ), 

1054 GGT_BASE, 

1055 wrap_each_in_noncapture_group=True, 

1056 wrap_result_in_noncapture_group=False, 

1057 ) 

1058 REGEX = make_simple_numeric_regex(quantity=GGT, units=UNITS_PER_L) 

1059 NAME = "GammaGT" 

1060 PREFERRED_UNIT_COLUMN = "value_U_L" 

1061 UNIT_MAPPING = {UNITS_PER_L: 1} # preferred unit 

1062 

1063 def __init__( 

1064 self, 

1065 nlpdef: Optional[NlpDefinition], 

1066 cfg_processor_name: Optional[str], 

1067 commit: bool = False, 

1068 ) -> None: 

1069 # see documentation above 

1070 super().__init__( 

1071 nlpdef=nlpdef, 

1072 cfg_processor_name=cfg_processor_name, 

1073 regex_str=self.REGEX, 

1074 variable=self.NAME, 

1075 target_unit=self.PREFERRED_UNIT_COLUMN, 

1076 units_to_factor=self.UNIT_MAPPING, 

1077 commit=commit, 

1078 take_absolute=True, 

1079 ) 

1080 

1081 def test(self, verbose: bool = False) -> None: 

1082 # docstring in superclass 

1083 self.test_numerical_parser( 

1084 [ 

1085 ("gGT", []), # should fail; no values 

1086 ("was 7", []), # no quantity 

1087 ("gGT 55", [55]), 

1088 ("gamma Glutamyl Transferase 19 U/L", [19]), 

1089 ("Gamma GT 55 U/L ", [55]), 

1090 ("GGT 55 U/L", [55]), 

1091 ("ggt-55", [55]), 

1092 ("γGT 55", [55]), 

1093 ("Gamma-glutamyl transferase lev (44G4.) 55", [55]), 

1094 ("Plasma gamma-glutamyl transferase level (XaES4) 55", [55]), 

1095 ("Serum gamma-glutamyl transferase level (XaES3) 55", [55]), 

1096 ], 

1097 verbose=verbose, 

1098 ) 

1099 

1100 

1101class GammaGTValidator(ValidatorBase): 

1102 """ 

1103 Validator for GammaGT (see help for explanation). 

1104 """ 

1105 

1106 @classmethod 

1107 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1108 return GammaGT.NAME, [GammaGT.GGT] 

1109 

1110 

1111# ============================================================================= 

1112# Total bilirubin 

1113# ============================================================================= 

1114 

1115 

1116class Bilirubin(SimpleNumericalResultParser): 

1117 """ 

1118 BIOCHEMISTRY (LFTs). 

1119 

1120 Total bilirubin. Units are μM. 

1121 """ 

1122 

1123 BILIRUBIN_BASE = rf""" 

1124 {WORD_BOUNDARY} 

1125 (?: t(?: ot(?:al | \.)? | \.) \s+ )? 

1126 bili?(?: \. | rubin{WORD_BOUNDARY})? 

1127 """ 

1128 BILIRUBIN = regex_or( 

1129 *regex_components_from_read_codes( 

1130 ReadCodes.BILIRUBIN_PLASMA_TOTAL, 

1131 ReadCodes.BILIRUBIN_SERUM, 

1132 ReadCodes.BILIRUBIN_SERUM_TOTAL, 

1133 ReadCodes.BILIRUBIN_TOTAL, 

1134 ), 

1135 BILIRUBIN_BASE, 

1136 wrap_each_in_noncapture_group=True, 

1137 wrap_result_in_noncapture_group=False, 

1138 ) 

1139 REGEX = make_simple_numeric_regex( 

1140 quantity=BILIRUBIN, 

1141 units=regex_or( 

1142 MICROMOLAR, # good 

1143 MICROMOLES_PER_L, # good 

1144 ), 

1145 ) 

1146 NAME = "Bilirubin" 

1147 PREFERRED_UNIT_COLUMN = "value_micromol_L" 

1148 UNIT_MAPPING = {MICROMOLAR: 1, MICROMOLES_PER_L: 1} # preferred unit 

1149 

1150 def __init__( 

1151 self, 

1152 nlpdef: Optional[NlpDefinition], 

1153 cfg_processor_name: Optional[str], 

1154 commit: bool = False, 

1155 ) -> None: 

1156 # see documentation above 

1157 super().__init__( 

1158 nlpdef=nlpdef, 

1159 cfg_processor_name=cfg_processor_name, 

1160 regex_str=self.REGEX, 

1161 variable=self.NAME, 

1162 target_unit=self.PREFERRED_UNIT_COLUMN, 

1163 units_to_factor=self.UNIT_MAPPING, 

1164 commit=commit, 

1165 take_absolute=True, 

1166 ) 

1167 

1168 def test(self, verbose: bool = False) -> None: 

1169 # docstring in superclass 

1170 self.test_numerical_parser( 

1171 [ 

1172 ("tot Bil", []), # should fail; no values 

1173 ("was 7", []), # no quantity 

1174 ("tot Bil 6", [6]), 

1175 ("Total Bilirubin: 6", [6]), 

1176 ("Total Bilirubin 6 umol/L", [6]), 

1177 ("bilirubin 17 μM", [17]), 

1178 ("t.bilirubin 17 μM", [17]), 

1179 ("t. bilirubin 17 μM", [17]), 

1180 ("bili. 17 μM", [17]), 

1181 ("bili 17 μM", [17]), 

1182 ("Plasma total bilirubin level (XaETf) 17", [17]), 

1183 ("Serum bilirubin level (44E..) 17", [17]), 

1184 ("Serum total bilirubin level (XaERu) 17", [17]), 

1185 ("Total bilirubin level (XE2qu) 17", [17]), 

1186 ( 

1187 "Total bilirubin \t level \n (XE2qu) 17", 

1188 [17], 

1189 ), # test whitespace 

1190 ( 

1191 "xTotal bilirubin level (XE2qu) 17", 

1192 [], 

1193 ), # test word boundary 

1194 ("Serum total bilirubin level (XaERu) 6 umol/L", [6]), 

1195 ], 

1196 verbose=verbose, 

1197 ) 

1198 

1199 

1200class BilirubinValidator(ValidatorBase): 

1201 """ 

1202 Validator for Bilirubin (see help for explanation). 

1203 """ 

1204 

1205 @classmethod 

1206 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1207 return Bilirubin.NAME, [Bilirubin.BILIRUBIN] 

1208 

1209 

1210# ============================================================================= 

1211# Albumin (Alb) 

1212# ============================================================================= 

1213 

1214 

1215class Albumin(SimpleNumericalResultParser): 

1216 """ 

1217 BIOCHEMISTRY (LFTs). 

1218 

1219 Albumin (Alb). Units are g/L. 

1220 """ 

1221 

1222 ALBUMIN_BASE = rf""" 

1223 {WORD_BOUNDARY} 

1224 (?: 

1225 alb(?:\. | umin{WORD_BOUNDARY})? 

1226 (?: \s+ level{WORD_BOUNDARY})? 

1227 ) 

1228 """ 

1229 ALBUMIN = regex_or( 

1230 *regex_components_from_read_codes( 

1231 ReadCodes.ALBUMIN_PLASMA, 

1232 ReadCodes.ALBUMIN_SERUM, 

1233 ), 

1234 ALBUMIN_BASE, 

1235 wrap_each_in_noncapture_group=True, 

1236 wrap_result_in_noncapture_group=False, 

1237 ) 

1238 REGEX = make_simple_numeric_regex(quantity=ALBUMIN, units=G_PER_L) 

1239 NAME = "Albumin" 

1240 PREFERRED_UNIT_COLUMN = "value_g_L" 

1241 UNIT_MAPPING = {G_PER_L: 1} # preferred unit 

1242 

1243 def __init__( 

1244 self, 

1245 nlpdef: Optional[NlpDefinition], 

1246 cfg_processor_name: Optional[str], 

1247 commit: bool = False, 

1248 ) -> None: 

1249 # see documentation above 

1250 super().__init__( 

1251 nlpdef=nlpdef, 

1252 cfg_processor_name=cfg_processor_name, 

1253 regex_str=self.REGEX, 

1254 variable=self.NAME, 

1255 target_unit=self.PREFERRED_UNIT_COLUMN, 

1256 units_to_factor=self.UNIT_MAPPING, 

1257 commit=commit, 

1258 take_absolute=True, 

1259 ) 

1260 

1261 def test(self, verbose: bool = False) -> None: 

1262 # docstring in superclass 

1263 self.test_numerical_parser( 

1264 [ 

1265 ("Alb", []), # should fail; no values 

1266 ("was 7", []), # no quantity 

1267 ("ALP 6", []), # wrong quantity 

1268 ("Alb 6", [6]), 

1269 ("Albumin: 48", [48]), 

1270 ("Albumin 48 g/L", [48]), 

1271 ("alb. 48", [48]), 

1272 ("albumin level 48", [48]), 

1273 ("Plasma albumin level (XaIRc) 48", [48]), 

1274 ("Serum albumin level (XE2eA) 48", [48]), 

1275 ], 

1276 verbose=verbose, 

1277 ) 

1278 

1279 

1280class AlbuminValidator(ValidatorBase): 

1281 """ 

1282 Validator for Albumin (see help for explanation). 

1283 """ 

1284 

1285 @classmethod 

1286 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1287 return Albumin.NAME, [Albumin.ALBUMIN] 

1288 

1289 

1290# ============================================================================= 

1291# Glucose 

1292# ============================================================================= 

1293 

1294 

1295class Glucose(SimpleNumericalResultParser): 

1296 """ 

1297 BIOCHEMISTRY. 

1298 

1299 Glucose. Default units are mM; also supports mg/dL. 

1300 """ 

1301 

1302 # By Emanuele Osimo, Feb 2019. 

1303 # Some modifications by Rudolf Cardinal, Feb 2019. 

1304 GLUCOSE_BASE = rf""" 

1305 {WORD_BOUNDARY} glu(?:c(?:ose)?)? {WORD_BOUNDARY} 

1306 # glu, gluc, glucose 

1307 """ 

1308 GLUCOSE = regex_or( 

1309 *regex_components_from_read_codes( 

1310 ReadCodes.GLUCOSE, 

1311 ReadCodes.GLUCOSE_BLOOD, 

1312 ReadCodes.GLUCOSE_BLOOD_2H_POSTPRANDIAL, 

1313 ReadCodes.GLUCOSE_BLOOD_150_MIN, 

1314 ReadCodes.GLUCOSE_PLASMA_RANDOM, 

1315 ReadCodes.GLUCOSE_PLASMA_FASTING, 

1316 ReadCodes.GLUCOSE_PLASMA_30_MIN, 

1317 ReadCodes.GLUCOSE_PLASMA_60_MIN, 

1318 ReadCodes.GLUCOSE_PLASMA_90_MIN, 

1319 ReadCodes.GLUCOSE_PLASMA_120_MIN, 

1320 ReadCodes.GLUCOSE_PLASMA_2H_POSTPRANDIAL, 

1321 ReadCodes.GLUCOSE_PLASMA_150_MIN, 

1322 ReadCodes.GLUCOSE_SERUM, 

1323 ReadCodes.GLUCOSE_SERUM_RANDOM, 

1324 ReadCodes.GLUCOSE_SERUM_FASTING, 

1325 ReadCodes.GLUCOSE_SERUM_30_MIN, 

1326 ReadCodes.GLUCOSE_SERUM_60_MIN, 

1327 ReadCodes.GLUCOSE_SERUM_90_MIN, 

1328 ReadCodes.GLUCOSE_SERUM_120_MIN, 

1329 ReadCodes.GLUCOSE_SERUM_2H_POSTPRANDIAL, 

1330 ReadCodes.GLUCOSE_SERUM_150_MIN, 

1331 # ! 

1332 ), 

1333 GLUCOSE_BASE, 

1334 wrap_each_in_noncapture_group=True, 

1335 wrap_result_in_noncapture_group=False, 

1336 ) 

1337 REGEX = make_simple_numeric_regex( 

1338 quantity=GLUCOSE, 

1339 units=regex_or( 

1340 MILLIMOLAR, # good 

1341 MILLIMOLES_PER_L, # good 

1342 MG_PER_DL, # good but needs conversion 

1343 ), 

1344 optional_ignorable_after_quantity=OPTIONAL_POC, 

1345 ) 

1346 GLUCOSE_MOLECULAR_MASS_G_PER_MOL = 180.156 

1347 # ... https://pubchem.ncbi.nlm.nih.gov/compound/D-glucose 

1348 NAME = "Glucose" 

1349 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

1350 UNIT_MAPPING = { 

1351 MILLIMOLAR: 1, # preferred unit 

1352 MILLIMOLES_PER_L: 1, 

1353 MG_PER_DL: factor_millimolar_from_mg_per_dl( 

1354 GLUCOSE_MOLECULAR_MASS_G_PER_MOL 

1355 ), 

1356 } 

1357 

1358 def __init__( 

1359 self, 

1360 nlpdef: Optional[NlpDefinition], 

1361 cfg_processor_name: Optional[str], 

1362 commit: bool = False, 

1363 ) -> None: 

1364 # see documentation above 

1365 super().__init__( 

1366 nlpdef=nlpdef, 

1367 cfg_processor_name=cfg_processor_name, 

1368 regex_str=self.REGEX, 

1369 variable=self.NAME, 

1370 target_unit=self.PREFERRED_UNIT_COLUMN, 

1371 units_to_factor=self.UNIT_MAPPING, 

1372 commit=commit, 

1373 take_absolute=True, 

1374 ) 

1375 

1376 def test(self, verbose: bool = False) -> None: 

1377 # docstring in parent class 

1378 

1379 def convert(mg_dl: float) -> float: 

1380 # Convert mg/dl to mM 

1381 return millimolar_from_mg_per_dl( 

1382 mg_dl, self.GLUCOSE_MOLECULAR_MASS_G_PER_MOL 

1383 ) 

1384 

1385 self.test_numerical_parser( 

1386 [ 

1387 ("glu", []), # should fail; no values 

1388 ("glucose 6 mM", [6]), 

1389 ("glucose 6 mmol", [6]), 

1390 ("glucose 6", [6]), 

1391 ("glu 6", [6]), 

1392 ("glucose 90 mg/dl", [convert(90)]), # unit conversion 

1393 ("gluc = 6", [6]), 

1394 ("glucose: 6", [6]), 

1395 ("glu equals 6", [6]), 

1396 ("glucose is equal to 6", [6]), 

1397 ("glu <4", [4]), 

1398 ("glucose less than 1", [1]), # would be bad news... 

1399 ("glu more than 20", [20]), 

1400 ("glucose was 15", [15]), 

1401 ("glucose was 90 mg/dl", [convert(90)]), 

1402 ("glu is 90 mg dl-1", [convert(90)]), 

1403 ("glucose is 90 mg dl -1", [convert(90)]), 

1404 ("glu-5", [5]), 

1405 ("glucose | 20.3 (H) | mmol/L", [20.3]), 

1406 ("Glucose level (X772y) 5", [5]), 

1407 ("Blood glucose level (X772z) 5", [5]), 

1408 # Not all Read codes tested. 

1409 ], 

1410 verbose=verbose, 

1411 ) 

1412 

1413 

1414class GlucoseValidator(ValidatorBase): 

1415 """ 

1416 Validator for Glucose (see help for explanation). 

1417 """ 

1418 

1419 @classmethod 

1420 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1421 return Glucose.NAME, [Glucose.GLUCOSE] 

1422 

1423 

1424# ============================================================================= 

1425# LDL cholesterol 

1426# ============================================================================= 

1427 

1428 

1429class LDLCholesterol(SimpleNumericalResultParser): 

1430 """ 

1431 BIOCHEMISTRY (LIPID PROFILE). 

1432 

1433 Low density lipoprotein (LDL) cholesterol. 

1434 Default units are mM; also supports mg/dL. 

1435 """ 

1436 

1437 # By Emanuele Osimo, Feb 2019. 

1438 # Some modifications by Rudolf Cardinal, Feb 2019. 

1439 LDL_BASE = rf""" 

1440 {WORD_BOUNDARY} 

1441 LDL [-\s]* 

1442 (?: 

1443 chol(?:esterol)?{WORD_BOUNDARY} | 

1444 chol\. | 

1445 {WORD_BOUNDARY} # allows LDL by itself 

1446 ) 

1447 """ 

1448 LDL = regex_or( 

1449 *regex_components_from_read_codes( 

1450 ReadCodes.LDL_PLASMA, 

1451 ReadCodes.LDL_PLASMA_FASTING, 

1452 ReadCodes.LDL_PLASMA_RANDOM, 

1453 ReadCodes.LDL_SERUM, 

1454 ReadCodes.LDL_SERUM_FASTING, 

1455 ReadCodes.LDL_SERUM_RANDOM, 

1456 ), 

1457 LDL_BASE, 

1458 wrap_each_in_noncapture_group=True, 

1459 wrap_result_in_noncapture_group=False, 

1460 ) 

1461 REGEX = make_simple_numeric_regex( 

1462 quantity=LDL, 

1463 units=regex_or( 

1464 MILLIMOLAR, # good 

1465 MILLIMOLES_PER_L, # good 

1466 MG_PER_DL, # good but needs conversion 

1467 ), 

1468 ) 

1469 NAME = "LDL cholesterol" 

1470 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

1471 FACTOR_MG_DL_TO_MMOL_L = 0.02586 

1472 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/ 

1473 UNIT_MAPPING = { 

1474 MILLIMOLAR: 1, # preferred unit 

1475 MILLIMOLES_PER_L: 1, 

1476 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L, 

1477 } 

1478 

1479 def __init__( 

1480 self, 

1481 nlpdef: Optional[NlpDefinition], 

1482 cfg_processor_name: Optional[str], 

1483 commit: bool = False, 

1484 ) -> None: 

1485 # see documentation above 

1486 super().__init__( 

1487 nlpdef=nlpdef, 

1488 cfg_processor_name=cfg_processor_name, 

1489 regex_str=self.REGEX, 

1490 variable=self.NAME, 

1491 target_unit=self.PREFERRED_UNIT_COLUMN, 

1492 units_to_factor=self.UNIT_MAPPING, 

1493 commit=commit, 

1494 take_absolute=True, 

1495 ) 

1496 

1497 def test(self, verbose: bool = False) -> None: 

1498 # docstring in parent class 

1499 

1500 def convert(mg_dl: float) -> float: 

1501 # Convert mg/dl to mM 

1502 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl 

1503 

1504 self.test_numerical_parser( 

1505 [ 

1506 ("LDL", []), # should fail; no values 

1507 ("LDL 4 mM", [4]), 

1508 ("LDL chol 4 mmol", [4]), 

1509 ("LDL chol. 4 mmol", [4]), 

1510 ("LDL 4", [4]), 

1511 ("chol 4", []), # that's total cholesterol 

1512 ("HDL chol 4", []), # that's HDL cholesterol 

1513 ( 

1514 "LDL cholesterol 140 mg/dl", 

1515 [convert(140)], 

1516 ), # unit conversion 

1517 ("LDL = 4", [4]), 

1518 ("LDL: 4", [4]), 

1519 ("LDL equals 4", [4]), 

1520 ("LDL is equal to 4", [4]), 

1521 ("LDL <4", [4]), 

1522 ("LDLchol less than 4", [4]), 

1523 ("LDL cholesterol more than 20", [20]), 

1524 ("LDL was 4", [4]), 

1525 ("LDL chol was 140 mg/dl", [convert(140)]), 

1526 ("chol was 140 mg/dl", []), 

1527 ("LDL is 140 mg dl-1", [convert(140)]), 

1528 ("ldl chol is 140 mg dl -1", [convert(140)]), 

1529 ("ldl-4", [4]), 

1530 ("LDL chol | 6.2 (H) | mmol/L", [6.2]), 

1531 ("Plasma LDL cholesterol level (XaEVs) 4", [4]), 

1532 ("Plasma rndm LDL cholest level (44d4.) 4", [4]), 

1533 ("Plasma fast LDL cholest level (44d5.) 4", [4]), 

1534 ("Serum LDL cholesterol level (44P6.) 4", [4]), 

1535 ("Serum fast LDL cholesterol lev (44PD.) 4", [4]), 

1536 ("Ser random LDL cholesterol lev (44PE.) 4", [4]), 

1537 ], 

1538 verbose=verbose, 

1539 ) 

1540 

1541 

1542class LDLCholesterolValidator(ValidatorBase): 

1543 """ 

1544 Validator for LDLCholesterol (see help for explanation). 

1545 """ 

1546 

1547 @classmethod 

1548 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1549 return LDLCholesterol.NAME, [LDLCholesterol.LDL] 

1550 

1551 

1552# ============================================================================= 

1553# HDL cholesterol 

1554# ============================================================================= 

1555 

1556 

1557class HDLCholesterol(SimpleNumericalResultParser): 

1558 """ 

1559 BIOCHEMISTRY (LIPID PROFILE). 

1560 

1561 High-density lipoprotein (HDL) cholesterol. 

1562 Default units are mM; also supports mg/dL. 

1563 """ 

1564 

1565 # By Emanuele Osimo, Feb 2019. 

1566 # Some modifications by Rudolf Cardinal, Feb 2019. 

1567 HDL_BASE = rf""" 

1568 {WORD_BOUNDARY} 

1569 HDL [-\s]* 

1570 (?: 

1571 chol(?:esterol)?{WORD_BOUNDARY} | 

1572 chol\. | 

1573 {WORD_BOUNDARY} # allows HDL by itself 

1574 ) 

1575 """ 

1576 HDL = regex_or( 

1577 *regex_components_from_read_codes( 

1578 ReadCodes.HDL_PLASMA, 

1579 ReadCodes.HDL_PLASMA_FASTING, 

1580 ReadCodes.HDL_PLASMA_RANDOM, 

1581 ReadCodes.HDL_SERUM, 

1582 ReadCodes.HDL_SERUM_FASTING, 

1583 ReadCodes.HDL_SERUM_RANDOM, 

1584 ), 

1585 HDL_BASE, 

1586 wrap_each_in_noncapture_group=True, 

1587 wrap_result_in_noncapture_group=False, 

1588 ) 

1589 REGEX = make_simple_numeric_regex( 

1590 quantity=HDL, 

1591 units=regex_or( 

1592 MILLIMOLAR, # good 

1593 MILLIMOLES_PER_L, # good 

1594 MG_PER_DL, # good but needs conversion 

1595 ), 

1596 ) 

1597 NAME = "HDL cholesterol" 

1598 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

1599 FACTOR_MG_DL_TO_MMOL_L = 0.02586 

1600 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/ 

1601 UNIT_MAPPING = { 

1602 MILLIMOLAR: 1, # preferred unit 

1603 MILLIMOLES_PER_L: 1, 

1604 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L, 

1605 } 

1606 

1607 def __init__( 

1608 self, 

1609 nlpdef: Optional[NlpDefinition], 

1610 cfg_processor_name: Optional[str], 

1611 commit: bool = False, 

1612 ) -> None: 

1613 # see documentation above 

1614 super().__init__( 

1615 nlpdef=nlpdef, 

1616 cfg_processor_name=cfg_processor_name, 

1617 regex_str=self.REGEX, 

1618 variable=self.NAME, 

1619 target_unit=self.PREFERRED_UNIT_COLUMN, 

1620 units_to_factor=self.UNIT_MAPPING, 

1621 commit=commit, 

1622 take_absolute=True, 

1623 ) 

1624 

1625 def test(self, verbose: bool = False) -> None: 

1626 # docstring in parent class 

1627 

1628 def convert(mg_dl: float) -> float: 

1629 # Convert mg/dl to mM 

1630 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl 

1631 

1632 self.test_numerical_parser( 

1633 [ 

1634 ("HDL", []), # should fail; no values 

1635 ("HDL 4 mM", [4]), 

1636 ("HDL chol 4 mmol", [4]), 

1637 ("HDL chol. 4 mmol", [4]), 

1638 ("HDL 4", [4]), 

1639 ("chol 4", []), # that's total cholesterol 

1640 ("LDL chol 4", []), # that's LDL cholesterol 

1641 ( 

1642 "HDL cholesterol 140 mg/dl", 

1643 [convert(140)], 

1644 ), # unit conversion 

1645 ("HDL = 4", [4]), 

1646 ("HDL: 4", [4]), 

1647 ("HDL equals 4", [4]), 

1648 ("HDL is equal to 4", [4]), 

1649 ("HDL <4", [4]), 

1650 ("HDLchol less than 4", [4]), 

1651 ("HDL cholesterol more than 20", [20]), 

1652 ("HDL was 4", [4]), 

1653 ("HDL chol was 140 mg/dl", [convert(140)]), 

1654 ("chol was 140 mg/dl", []), 

1655 ("HDL is 140 mg dl-1", [convert(140)]), 

1656 ("Hdl chol is 140 mg dl -1", [convert(140)]), 

1657 ("hdl-4", [4]), 

1658 ("HDL chol | 6.2 (H) | mmol/L", [6.2]), 

1659 ("Plasma HDL cholesterol level (XaEVr) 4", [4]), 

1660 ("Plasma rndm HDL cholest level (44d2.) 4", [4]), 

1661 ("Plasma fast HDL cholest level (44d3.) 4", [4]), 

1662 ("Serum HDL cholesterol level (44P5.) 4", [4]), 

1663 ("Serum fast HDL cholesterol lev (44PB.) 4", [4]), 

1664 ("Ser random HDL cholesterol lev (44PC.) 4", [4]), 

1665 ], 

1666 verbose=verbose, 

1667 ) 

1668 

1669 

1670class HDLCholesterolValidator(ValidatorBase): 

1671 """ 

1672 Validator for HDLCholesterol (see help for explanation). 

1673 """ 

1674 

1675 @classmethod 

1676 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1677 return HDLCholesterol.NAME, [HDLCholesterol.HDL] 

1678 

1679 

1680# ============================================================================= 

1681# Total cholesterol 

1682# ============================================================================= 

1683 

1684 

1685class TotalCholesterol(SimpleNumericalResultParser): 

1686 """ 

1687 BIOCHEMISTRY (LIPID PROFILE). 

1688 

1689 Total or undifferentiated cholesterol. 

1690 Default units are mM; also supports mg/dL. 

1691 """ 

1692 

1693 CHOLESTEROL_BASE = rf""" 

1694 {WORD_BOUNDARY} 

1695 (?<!HDL[-\s]+) (?<!LDL[-\s]+) # not preceded by HDL or LDL 

1696 (?: tot(?:al) [-\s] )? # optional "total" prefix 

1697 (?: 

1698 chol(?:esterol)?{WORD_BOUNDARY} | 

1699 chol\. 

1700 ) 

1701 """ 

1702 # ... (?<! something ) is a negative lookbehind assertion 

1703 CHOLESTEROL = regex_or( 

1704 *regex_components_from_read_codes( 

1705 ReadCodes.CHOLESTEROL_SERUM, 

1706 ReadCodes.CHOLESTEROL_TOTAL_PLASMA, 

1707 ReadCodes.CHOLESTEROL_TOTAL_SERUM, 

1708 ), 

1709 CHOLESTEROL_BASE, 

1710 wrap_each_in_noncapture_group=True, 

1711 wrap_result_in_noncapture_group=False, 

1712 ) 

1713 REGEX = make_simple_numeric_regex( 

1714 quantity=CHOLESTEROL, 

1715 units=regex_or( 

1716 MILLIMOLAR, # good 

1717 MILLIMOLES_PER_L, # good 

1718 MG_PER_DL, # good but needs conversion 

1719 ), 

1720 ) 

1721 NAME = "Total cholesterol" 

1722 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

1723 FACTOR_MG_DL_TO_MMOL_L = 0.02586 

1724 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/ 

1725 UNIT_MAPPING = { 

1726 MILLIMOLAR: 1, # preferred unit 

1727 MILLIMOLES_PER_L: 1, 

1728 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L, 

1729 } 

1730 

1731 def __init__( 

1732 self, 

1733 nlpdef: Optional[NlpDefinition], 

1734 cfg_processor_name: Optional[str], 

1735 commit: bool = False, 

1736 ) -> None: 

1737 # see documentation above 

1738 super().__init__( 

1739 nlpdef=nlpdef, 

1740 cfg_processor_name=cfg_processor_name, 

1741 regex_str=self.REGEX, 

1742 variable=self.NAME, 

1743 target_unit=self.PREFERRED_UNIT_COLUMN, 

1744 units_to_factor=self.UNIT_MAPPING, 

1745 commit=commit, 

1746 take_absolute=True, 

1747 ) 

1748 

1749 def test(self, verbose: bool = False) -> None: 

1750 # docstring in parent class 

1751 

1752 def convert(mg_dl: float) -> float: 

1753 # Convert mg/dl to mM 

1754 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl 

1755 

1756 self.test_numerical_parser( 

1757 [ 

1758 ("chol", []), # should fail; no values 

1759 ("chol 4 mM", [4]), 

1760 ("total chol 4 mmol", [4]), 

1761 ("chol. 4 mmol", [4]), 

1762 ("chol 4", [4]), 

1763 ("HDL chol 4", []), # that's HDL cholesterol 

1764 ("LDL chol 4", []), # that's LDL cholesterol 

1765 ( 

1766 "total cholesterol 140 mg/dl", 

1767 [convert(140)], 

1768 ), # unit conversion 

1769 ("chol = 4", [4]), 

1770 ("chol: 4", [4]), 

1771 ("chol equals 4", [4]), 

1772 ("chol is equal to 4", [4]), 

1773 ("chol <4", [4]), 

1774 ("chol less than 4", [4]), 

1775 ("cholesterol more than 20", [20]), 

1776 ("chol was 4", [4]), 

1777 ("chol was 140 mg/dl", [convert(140)]), 

1778 ("chol was 140", [140]), # but probably wrong interpretation! 

1779 ("chol is 140 mg dl-1", [convert(140)]), 

1780 ("chol is 140 mg dl -1", [convert(140)]), 

1781 ("chol-4", [4]), 

1782 ("chol | 6.2 (H) | mmol/L", [6.2]), 

1783 ("Serum cholesterol level (XE2eD) 4", [4]), 

1784 ("Plasma total cholesterol level (XaIRd) 4", [4]), 

1785 ("Serum total cholesterol level (XaJe9) 4", [4]), 

1786 ], 

1787 verbose=verbose, 

1788 ) 

1789 

1790 

1791class TotalCholesterolValidator(ValidatorBase): 

1792 """ 

1793 Validator for TotalCholesterol (see help for explanation). 

1794 """ 

1795 

1796 @classmethod 

1797 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1798 return TotalCholesterol.NAME, [TotalCholesterol.CHOLESTEROL] 

1799 

1800 

1801# ============================================================================= 

1802# Triglycerides 

1803# ============================================================================= 

1804 

1805 

1806class Triglycerides(SimpleNumericalResultParser): 

1807 """ 

1808 BIOCHEMISTRY (LIPID PROFILE). 

1809 

1810 Triglycerides. 

1811 Default units are mM; also supports mg/dL. 

1812 """ 

1813 

1814 # By Emanuele Osimo, Feb 2019. 

1815 # Some modifications by Rudolf Cardinal, Feb 2019. 

1816 TG_BASE = rf""" 

1817 {WORD_BOUNDARY} 

1818 (?: Triglyceride[s]? | TG ) 

1819 {WORD_BOUNDARY} 

1820 """ 

1821 TG = regex_or( 

1822 *regex_components_from_read_codes( 

1823 ReadCodes.TG, 

1824 ReadCodes.TG_PLASMA, 

1825 ReadCodes.TG_PLASMA_FASTING, 

1826 ReadCodes.TG_PLASMA_RANDOM, 

1827 ReadCodes.TG_SERUM, 

1828 ReadCodes.TG_SERUM_FASTING, 

1829 ReadCodes.TG_SERUM_RANDOM, 

1830 ), 

1831 TG_BASE, 

1832 wrap_each_in_noncapture_group=True, 

1833 wrap_result_in_noncapture_group=False, 

1834 ) 

1835 REGEX = make_simple_numeric_regex( 

1836 quantity=TG, 

1837 units=regex_or( 

1838 MILLIMOLAR, # good 

1839 MILLIMOLES_PER_L, # good 

1840 MG_PER_DL, # good but needs conversion 

1841 ), 

1842 ) 

1843 NAME = "Triglycerides" 

1844 PREFERRED_UNIT_COLUMN = "value_mmol_L" 

1845 FACTOR_MG_DL_TO_MMOL_L = 0.01129 # reciprocal of 88.57 

1846 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/ 

1847 # ... https://www.ncbi.nlm.nih.gov/books/NBK83505/ 

1848 UNIT_MAPPING = { 

1849 MILLIMOLAR: 1, # preferred unit 

1850 MILLIMOLES_PER_L: 1, 

1851 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L, 

1852 } 

1853 

1854 def __init__( 

1855 self, 

1856 nlpdef: Optional[NlpDefinition], 

1857 cfg_processor_name: Optional[str], 

1858 commit: bool = False, 

1859 ) -> None: 

1860 # see documentation above 

1861 super().__init__( 

1862 nlpdef=nlpdef, 

1863 cfg_processor_name=cfg_processor_name, 

1864 regex_str=self.REGEX, 

1865 variable=self.NAME, 

1866 target_unit=self.PREFERRED_UNIT_COLUMN, 

1867 units_to_factor=self.UNIT_MAPPING, 

1868 commit=commit, 

1869 take_absolute=True, 

1870 ) 

1871 

1872 def test(self, verbose: bool = False) -> None: 

1873 # docstring in parent class 

1874 

1875 def convert(mg_dl: float) -> float: 

1876 # Convert mg/dl to mM 

1877 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl 

1878 

1879 self.test_numerical_parser( 

1880 [ 

1881 ("TG", []), # should fail; no values 

1882 ("triglycerides", []), # should fail; no values 

1883 ("TG 4 mM", [4]), 

1884 ("triglycerides 4 mmol", [4]), 

1885 ("triglyceride 4 mmol", [4]), 

1886 ("TG 4", [4]), 

1887 ("TG 140 mg/dl", [convert(140)]), # unit conversion 

1888 ("TG = 4", [4]), 

1889 ("TG: 4", [4]), 

1890 ("TG equals 4", [4]), 

1891 ("TG is equal to 4", [4]), 

1892 ("TG <4", [4]), 

1893 ("TG less than 4", [4]), 

1894 ("TG more than 20", [20]), 

1895 ("TG was 4", [4]), 

1896 ("TG was 140 mg/dl", [convert(140)]), 

1897 ("TG was 140", [140]), # but probably wrong interpretation! 

1898 ("TG is 140 mg dl-1", [convert(140)]), 

1899 ("TG is 140 mg dl -1", [convert(140)]), 

1900 ("TG-4", [4]), 

1901 ("triglycerides | 6.2 (H) | mmol/L", [6.2]), 

1902 ("Triglyceride level (X772O) 4", [4]), 

1903 ("Plasma triglyceride level (44e..) 4", [4]), 

1904 ("Plasma rndm triglyceride level (44e0.) 4", [4]), 

1905 ("Plasma fast triglyceride level (44e1.) 4", [4]), 

1906 ("Serum triglyceride levels (XE2q9) 4", [4]), 

1907 ("Serum fasting triglyceride lev (44Q4.) 4", [4]), 

1908 ("Serum random triglyceride lev (44Q5.) 4", [4]), 

1909 ], 

1910 verbose=verbose, 

1911 ) 

1912 

1913 

1914class TriglyceridesValidator(ValidatorBase): 

1915 """ 

1916 Validator for Triglycerides (see help for explanation). 

1917 """ 

1918 

1919 @classmethod 

1920 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

1921 return Triglycerides.NAME, [Triglycerides.TG] 

1922 

1923 

1924# ============================================================================= 

1925# HbA1c 

1926# ============================================================================= 

1927 

1928 

1929def hba1c_mmol_per_mol_from_percent( 

1930 percent: Union[float, str] 

1931) -> Optional[float]: 

1932 """ 

1933 Convert an HbA1c value from old percentage units -- DCCT (Diabetes Control 

1934 and Complications Trial), UKPDS (United Kingdom Prospective Diabetes Study) 

1935 or NGSP (National Glycohemoglobin Standardization Program) -- to newer IFCC 

1936 (International Federation of Clinical Chemistry) mmol/mol units (mmol HbA1c 

1937 / mol Hb). 

1938 

1939 Args: 

1940 percent: DCCT value as a percentage 

1941 

1942 Returns: 

1943 IFCC value in mmol/mol 

1944 

1945 Example: 5% becomes 31.1 mmol/mol. 

1946 

1947 By Emanuele Osimo, Feb 2019. 

1948 Some modifications by Rudolf Cardinal, Feb 2019. 

1949 

1950 References: 

1951 

1952 - Emanuele had mmol_per_mol = (percent - 2.14) * 10.929 -- primary source 

1953 awaited. 

1954 - Jeppsson 2002, https://www.ncbi.nlm.nih.gov/pubmed/11916276 -- no, that's 

1955 the chemistry 

1956 - https://www.ifcchba1c.org/ 

1957 - http://www.ngsp.org/ifccngsp.asp -- gives master equation of 

1958 NGSP = [0.09148 × IFCC] + 2.152), therefore implying 

1959 IFCC = (NGSP – 2.152) × 10.93135. 

1960 - Little & Rohlfing 2013: https://www.ncbi.nlm.nih.gov/pubmed/23318564; 

1961 also gives NGSP = [0.09148 * IFCC] + 2.152. 

1962 

1963 Note also that you may see eAG values (estimated average glucose), in 

1964 mmol/L or mg/dl; see http://www.ngsp.org/A1ceAG.asp; these are not direct 

1965 measurements of HbA1c. 

1966 

1967 """ 

1968 if isinstance(percent, str): 

1969 percent = to_float(percent) 

1970 if not percent: 

1971 return None 

1972 percent = abs(percent) # deals with e.g. "HbA1c-8%" -> -8 

1973 return (percent - 2.152) * 10.93135 

1974 

1975 

1976class HbA1c(SimpleNumericalResultParser): 

1977 """ 

1978 BIOCHEMISTRY. 

1979 

1980 Glycosylated (glycated) haemoglobin (HbA1c). 

1981 Default units are mmol/mol; also supports %. 

1982 

1983 Note: HbA1 is different 

1984 (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2541274). 

1985 """ 

1986 

1987 # By Emanuele Osimo, Feb 2019. 

1988 # Some modifications by Rudolf Cardinal, Feb 2019. 

1989 HBA1C_BASE = rf""" 

1990 {WORD_BOUNDARY} 

1991 (?: 

1992 (?: Glyc(?:osyl)?ated [-\s]+ (?:ha?emoglobin|Hb) ) | 

1993 HbA1c 

1994 ) 

1995 {WORD_BOUNDARY} 

1996 """ 

1997 HBA1C = regex_or( 

1998 *regex_components_from_read_codes( 

1999 ReadCodes.HBA1C, 

2000 ReadCodes.HBA1C_DCCT, 

2001 ReadCodes.HBA1C_IFCC, 

2002 ), 

2003 HBA1C_BASE, 

2004 wrap_each_in_noncapture_group=True, 

2005 wrap_result_in_noncapture_group=False, 

2006 ) 

2007 REGEX = make_simple_numeric_regex( 

2008 quantity=HBA1C, 

2009 units=regex_or( 

2010 MILLIMOLES_PER_MOL, # standard 

2011 PERCENT, # good but needs conversion 

2012 MILLIMOLES_PER_L, # bad; may be an eAG value 

2013 MG_PER_DL, # bad; may be an eAG value 

2014 ), 

2015 ) 

2016 NAME = "HBA1C" 

2017 PREFERRED_UNIT_COLUMN = "value_mmol_mol" 

2018 UNIT_MAPPING = { 

2019 MILLIMOLES_PER_MOL: 1, # preferred unit 

2020 PERCENT: hba1c_mmol_per_mol_from_percent, 

2021 # but not MILLIMOLES_PER_L 

2022 # and not MG_PER_DL 

2023 } 

2024 

2025 def __init__( 

2026 self, 

2027 nlpdef: Optional[NlpDefinition], 

2028 cfg_processor_name: Optional[str], 

2029 commit: bool = False, 

2030 ) -> None: 

2031 # see documentation above 

2032 super().__init__( 

2033 nlpdef=nlpdef, 

2034 cfg_processor_name=cfg_processor_name, 

2035 regex_str=self.REGEX, 

2036 variable=self.NAME, 

2037 target_unit=self.PREFERRED_UNIT_COLUMN, 

2038 units_to_factor=self.UNIT_MAPPING, 

2039 commit=commit, 

2040 take_absolute=True, 

2041 ) 

2042 

2043 def test(self, verbose: bool = False) -> None: 

2044 # docstring in parent class 

2045 

2046 def convert(percent: float) -> float: 

2047 # Convert % to mmol/mol 

2048 return hba1c_mmol_per_mol_from_percent(percent) 

2049 

2050 self.test_numerical_parser( 

2051 [ 

2052 ("HbA1c", []), # should fail; no values 

2053 ("glycosylated haemoglobin", []), # should fail; no values 

2054 ("HbA1c 31", [31]), 

2055 ("HbA1c 31 mmol/mol", [31]), 

2056 ("HbA1c 31 mg/dl", []), # wrong units 

2057 ("HbA1c 31 mmol/L", []), # wrong units 

2058 ("glycosylated haemoglobin 31 mmol/mol", [31]), 

2059 ("glycated hemoglobin 31 mmol/mol", [31]), 

2060 ("HbA1c 8%", [convert(8)]), 

2061 ("HbA1c = 8%", [convert(8)]), 

2062 ("HbA1c: 31", [31]), 

2063 ("HbA1c equals 31", [31]), 

2064 ("HbA1c is equal to 31", [31]), 

2065 ("HbA1c <31.2", [31.2]), 

2066 ("HbA1c less than 4", [4]), 

2067 ("HbA1c more than 20", [20]), 

2068 ("HbA1c was 31", [31]), 

2069 ("HbA1c was 15%", [convert(15)]), 

2070 ("HbA1c-31", [31]), 

2071 ("HbA1c-8%", [convert(8)]), 

2072 ("HbA1c | 40 (H) | mmol/mol", [40]), 

2073 ("Haemoglobin A1c level (X772q) 8%", [convert(8)]), 

2074 ("HbA1c level (DCCT aligned) (XaERp) 8%", [convert(8)]), 

2075 ("HbA1c levl - IFCC standardised (XaPbt) 31 mmol/mol", [31]), 

2076 ], 

2077 verbose=verbose, 

2078 ) 

2079 

2080 

2081class HbA1cValidator(ValidatorBase): 

2082 """ 

2083 Validator for HbA1c (see help for explanation). 

2084 """ 

2085 

2086 @classmethod 

2087 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]: 

2088 return HbA1c.NAME, [HbA1c.HBA1C] 

2089 

2090 

2091# ============================================================================= 

2092# All classes in this module 

2093# ============================================================================= 

2094 

2095ALL_BIOCHEMISTRY_NLP_AND_VALIDATORS = [ 

2096 (Albumin, AlbuminValidator), 

2097 (AlkPhos, AlkPhosValidator), 

2098 (ALT, ALTValidator), 

2099 (Bilirubin, BilirubinValidator), 

2100 (Creatinine, CreatinineValidator), 

2101 (Crp, CrpValidator), 

2102 (GammaGT, GammaGTValidator), 

2103 (Glucose, GlucoseValidator), 

2104 (HbA1c, HbA1cValidator), 

2105 (HDLCholesterol, HDLCholesterolValidator), 

2106 (LDLCholesterol, LDLCholesterolValidator), 

2107 (Lithium, LithiumValidator), 

2108 (Potassium, PotassiumValidator), 

2109 (Sodium, SodiumValidator), 

2110 (TotalCholesterol, TotalCholesterolValidator), 

2111 (Triglycerides, TriglyceridesValidator), 

2112 (Tsh, TshValidator), 

2113 (Urea, UreaValidator), 

2114]