Coverage for nlp_manager/parse_biochemistry.py: 99%
307 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/parse_biochemistry.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Python regex-based NLP processors for biochemistry data.**
28All inherit from
29:class:`crate_anon.nlp_manager.regex_parser.SimpleNumericalResultParser` and
30are constructed with these arguments:
32nlpdef:
33 a :class:`crate_anon.nlp_manager.nlp_definition.NlpDefinition`
34cfgsection:
35 the name of a CRATE NLP config file section (from which we may
36 choose to get extra config information)
37commit:
38 force a COMMIT whenever we insert data? You should specify this
39 in multiprocess mode, or you may get database deadlocks.
41"""
43import logging
44from typing import List, Optional, Tuple, Union
46from crate_anon.common.regex_helpers import (
47 regex_or,
48 WORD_BOUNDARY,
49)
50from crate_anon.nlp_manager.nlp_definition import NlpDefinition
51from crate_anon.nlp_manager.number import to_float
52from crate_anon.nlp_manager.regex_parser import (
53 make_simple_numeric_regex,
54 OPTIONAL_POC,
55 SimpleNumericalResultParser,
56 ValidatorBase,
57)
58from crate_anon.nlp_manager.regex_read_codes import (
59 ReadCodes,
60 regex_components_from_read_codes,
61)
62from crate_anon.nlp_manager.regex_units import (
63 factor_micromolar_from_mg_per_dl,
64 factor_millimolar_from_mg_per_dl,
65 G,
66 G_PER_L,
67 MG,
68 MG_PER_DL,
69 MG_PER_L,
70 MICROEQ_PER_L,
71 MICROMOLAR,
72 micromolar_from_mg_per_dl,
73 MICROMOLES_PER_L,
74 MICROUNITS_PER_ML,
75 MILLIEQ_PER_L,
76 MILLIMOLAR,
77 millimolar_from_mg_per_dl,
78 MILLIMOLES_PER_L,
79 MILLIMOLES_PER_MOL,
80 MILLIUNITS_PER_L,
81 PERCENT,
82 UNITS_PER_L,
83)
85log = logging.getLogger(__name__)
88# =============================================================================
89# C-reactive protein (CRP)
90# =============================================================================
93class Crp(SimpleNumericalResultParser):
94 """
95 BIOCHEMISTRY.
97 C-reactive protein (CRP). Default units are mg/L; also supports mg/dL.
99 CRP units:
101 - mg/L is commonest in the UK (or at least standard at Addenbrooke's,
102 Hinchingbrooke, and Dundee);
104 - values of <=6 mg/L or <10 mg/L are normal, and e.g. 70-250 mg/L in
105 pneumonia.
107 - Refs include:
109 - https://www.ncbi.nlm.nih.gov/pubmed/7705110
110 - https://emedicine.medscape.com/article/2086909-overview
112 - 1 mg/dL = 10 mg/L, so normal in mg/dL is <=1 roughly.
114 """
116 CRP_BASE = rf"""
117 {WORD_BOUNDARY}
118 (?: (?: C [-\s]+ reactive [\s]+ protein ) | CRP )
119 {WORD_BOUNDARY}
120 """
121 CRP = regex_or(
122 *regex_components_from_read_codes(
123 ReadCodes.CRP_PLASMA,
124 ReadCodes.CRP_SERUM,
125 ),
126 CRP_BASE,
127 wrap_each_in_noncapture_group=True,
128 wrap_result_in_noncapture_group=False,
129 )
130 REGEX = make_simple_numeric_regex(
131 quantity=CRP,
132 units=regex_or(MG_PER_DL, MG_PER_L),
133 optional_ignorable_after_quantity=OPTIONAL_POC,
134 )
135 NAME = "CRP"
136 PREFERRED_UNIT_COLUMN = "value_mg_L"
137 UNIT_MAPPING = {
138 MG_PER_L: 1, # preferred unit
139 MG_PER_DL: 10, # 1 mg/dL -> 10 mg/L
140 }
142 def __init__(
143 self,
144 nlpdef: Optional[NlpDefinition],
145 cfg_processor_name: Optional[str],
146 commit: bool = False,
147 ) -> None:
148 # see documentation above
149 super().__init__(
150 nlpdef=nlpdef,
151 cfg_processor_name=cfg_processor_name,
152 regex_str=self.REGEX,
153 variable=self.NAME,
154 target_unit=self.PREFERRED_UNIT_COLUMN,
155 units_to_factor=self.UNIT_MAPPING,
156 commit=commit,
157 take_absolute=True,
158 )
160 def test(self, verbose: bool = False) -> None:
161 # docstring in parent class
162 self.test_numerical_parser(
163 [
164 ("CRP", []), # should fail; no values
165 ("CRP 6", [6]),
166 ("C-reactive protein 6", [6]),
167 ("C reactive protein 6", [6]),
168 ("CRP = 6", [6]),
169 ("CRP 6 mg/dl", [60]),
170 ("CRP: 6", [6]),
171 ("CRP equals 6", [6]),
172 ("CRP is equal to 6", [6]),
173 ("CRP <1", [1]),
174 ("CRP less than 1", [1]),
175 ("CRP <1 mg/dl", [10]),
176 ("CRP >250", [250]),
177 ("CRP more than 1", [1]),
178 ("CRP greater than 1", [1]),
179 ("CRP >250 mg/dl", [2500]),
180 ("CRP was 62", [62]),
181 ("CRP was 62 mg/l", [62]),
182 ("CRP was <1", [1]),
183 ("CRP is 19.2", [19.2]),
184 ("CRP is >250", [250]),
185 ("CRP is 19 mg dl-1", [190]),
186 ("CRP is 19 mg dl -1", [190]),
187 ("CRP 1.9 mg/L", [1.9]),
188 ("CRP-97", [97]),
189 ("CRP 1.9 mg L-1", [1.9]),
190 ("CRP | 1.9 (H) | mg/L", [1.9]),
191 ("Plasma C-reactive protein level (XE2dy) 45 mg/L", [45]),
192 ("Serum C reactive protein level (XaINL) 45 mg/L", [45]),
193 ("CRP (mg/L) 62", [62]),
194 ],
195 verbose=verbose,
196 )
199class CrpValidator(ValidatorBase):
200 """
201 Validator for Crp (see help for explanation).
202 """
204 @classmethod
205 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
206 return Crp.NAME, [Crp.CRP]
209# =============================================================================
210# Sodium (Na)
211# =============================================================================
212# ... handy to check approximately expected distribution of results!
215class Sodium(SimpleNumericalResultParser):
216 """
217 BIOCHEMISTRY (U&E).
219 Sodium (Na), in mM.
220 """
222 SODIUM_BASE = rf"""
223 {WORD_BOUNDARY} (?: Na | Sodium ) {WORD_BOUNDARY}
224 """
225 SODIUM = regex_or(
226 *regex_components_from_read_codes(
227 ReadCodes.SODIUM,
228 ReadCodes.SODIUM_BLOOD,
229 ReadCodes.SODIUM_PLASMA,
230 ReadCodes.SODIUM_SERUM,
231 ),
232 SODIUM_BASE,
233 wrap_each_in_noncapture_group=True,
234 wrap_result_in_noncapture_group=False,
235 )
236 REGEX = make_simple_numeric_regex(
237 quantity=SODIUM,
238 units=regex_or(
239 MILLIMOLAR, # good
240 MILLIMOLES_PER_L, # good
241 MILLIEQ_PER_L, # good
242 MG, # bad
243 ),
244 optional_ignorable_after_quantity=OPTIONAL_POC,
245 )
246 NAME = "Sodium"
247 PREFERRED_UNIT_COLUMN = "value_mmol_L"
248 UNIT_MAPPING = {
249 MILLIMOLAR: 1, # preferred unit
250 MILLIMOLES_PER_L: 1,
251 MILLIEQ_PER_L: 1,
252 # but not MG
253 }
255 def __init__(
256 self,
257 nlpdef: Optional[NlpDefinition],
258 cfg_processor_name: Optional[str],
259 commit: bool = False,
260 ) -> None:
261 # see documentation above
262 super().__init__(
263 nlpdef=nlpdef,
264 cfg_processor_name=cfg_processor_name,
265 regex_str=self.REGEX,
266 variable=self.NAME,
267 target_unit=self.PREFERRED_UNIT_COLUMN,
268 units_to_factor=self.UNIT_MAPPING,
269 commit=commit,
270 take_absolute=True,
271 )
273 def test(self, verbose: bool = False) -> None:
274 # docstring in parent class
275 self.test_numerical_parser(
276 [
277 ("Na", []), # should fail; no values
278 ("Na 120", [120]),
279 ("sodium 153", [153]),
280 ("Na 135 mEq/L", [135]),
281 ("Na 139 mM", [139]),
282 ("docusate sodium 100mg", []),
283 (
284 "Present: Nicola Adams (NA). 1.0 Minutes of last meeting",
285 [],
286 ),
287 ("Present: Nicola Adams (NA) 1.0 Minutes of last meeting", []),
288 ("Na (H) 145 mM", [145]),
289 ("Na (*) 145 mM", [145]),
290 ("Na (X) 145 mM", []),
291 ("blah (Na) 145 mM", []),
292 ("Na (145) something", [145]),
293 ("Na (145 mM), others", [145]),
294 ("Na-145", [145]),
295 ("Sodium level (X771T) 145", [145]),
296 ("Blood sodium level (XaDva) 145", [145]),
297 ("Plasma sodium level (XaIRf) 145", [145]),
298 ("Serum sodium level (XE2q0) 145", [145]),
299 ("Serum sodium level (mmol/L) 137", [137]),
300 ],
301 verbose=verbose,
302 )
305class SodiumValidator(ValidatorBase):
306 """
307 Validator for Sodium (see help for explanation).
308 """
310 @classmethod
311 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
312 return Sodium.NAME, [Sodium.SODIUM]
315# =============================================================================
316# Potassium (K)
317# =============================================================================
320class Potassium(SimpleNumericalResultParser):
321 """
322 BIOCHEMISTRY (U&E).
324 Potassium (K), in mM.
325 """
327 POTASSIUM_BASE = rf"""
328 {WORD_BOUNDARY} (?: K | Potassium ) {WORD_BOUNDARY}
329 """
330 POTASSIUM = regex_or(
331 POTASSIUM_BASE,
332 *regex_components_from_read_codes(
333 ReadCodes.POTASSIUM,
334 ReadCodes.POTASSIUM_BLOOD,
335 ReadCodes.POTASSIUM_PLASMA,
336 ReadCodes.POTASSIUM_SERUM,
337 ),
338 wrap_each_in_noncapture_group=True,
339 wrap_result_in_noncapture_group=False,
340 )
341 REGEX = make_simple_numeric_regex(
342 quantity=POTASSIUM,
343 units=regex_or(
344 MILLIMOLAR, # good
345 MILLIMOLES_PER_L, # good
346 MILLIEQ_PER_L, # good
347 MG, # bad
348 ),
349 optional_ignorable_after_quantity=OPTIONAL_POC,
350 )
351 NAME = "Potassium"
352 PREFERRED_UNIT_COLUMN = "value_mmol_L"
353 UNIT_MAPPING = {
354 MILLIMOLAR: 1, # preferred unit
355 MILLIMOLES_PER_L: 1,
356 MILLIEQ_PER_L: 1,
357 # but not MG
358 }
360 def __init__(
361 self,
362 nlpdef: Optional[NlpDefinition],
363 cfg_processor_name: Optional[str],
364 commit: bool = False,
365 ) -> None:
366 # see documentation above
367 super().__init__(
368 nlpdef=nlpdef,
369 cfg_processor_name=cfg_processor_name,
370 regex_str=self.REGEX,
371 variable=self.NAME,
372 target_unit=self.PREFERRED_UNIT_COLUMN,
373 units_to_factor=self.UNIT_MAPPING,
374 commit=commit,
375 take_absolute=True,
376 )
378 def test(self, verbose: bool = False) -> None:
379 # docstring in parent class
380 self.test_numerical_parser(
381 [
382 ("K", []), # should fail; no values
383 ("K 4", [4]),
384 ("Potassium 4.3", [4.3]),
385 ("K 4.5 mEq/L", [4.5]),
386 ("K 4.5 mM", [4.5]),
387 ("losartan potassium 50mg", []),
388 ("Present: Kerry Smith (K). 1.0 Minutes of last meeting", []),
389 ("Present: Kerry Smith (K) 1.0 Minutes of last meeting", []),
390 ("K (H) 5.6 mM", [5.6]),
391 ("K (*) 5.6 mM", [5.6]),
392 ("K (X) 5.6 mM", []),
393 ("blah (K) 5.6 mM", []),
394 ("K (5.6) something", [5.6]),
395 ("K (5.6 mM), others", [5.6]),
396 ("K-3.2", [3.2]),
397 ("Potassium level (X771S) 3.2", [3.2]),
398 ("Blood potassium level (XaDvZ) 3.2", [3.2]),
399 ("Plasma potassium level (XaIRl) 3.2", [3.2]),
400 ("Serum potassium level (XE2pz) 3.2", [3.2]),
401 ("Serum potassium level (XaIRl) 3.2", []), # wrong code
402 ],
403 verbose=verbose,
404 )
407class PotassiumValidator(ValidatorBase):
408 """
409 Validator for Potassium (see help for explanation).
410 """
412 @classmethod
413 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
414 return Potassium.NAME, [Potassium.POTASSIUM]
417# =============================================================================
418# Urea
419# =============================================================================
422class Urea(SimpleNumericalResultParser):
423 """
424 BIOCHEMISTRY (U&E).
426 Urea, in mM.
427 """
429 UREA_BASE = rf"""
430 {WORD_BOUNDARY} U(?:r(?:ea)?)? {WORD_BOUNDARY}
431 """
432 UREA = regex_or(
433 *regex_components_from_read_codes(
434 ReadCodes.UREA_BLOOD,
435 ReadCodes.UREA_PLASMA,
436 ReadCodes.UREA_SERUM,
437 ),
438 UREA_BASE,
439 wrap_each_in_noncapture_group=True,
440 wrap_result_in_noncapture_group=False,
441 )
442 REGEX = make_simple_numeric_regex(
443 quantity=UREA,
444 units=regex_or(
445 MILLIMOLAR, # good
446 MILLIMOLES_PER_L, # good
447 MILLIEQ_PER_L, # good
448 MG, # bad
449 ),
450 optional_ignorable_after_quantity=OPTIONAL_POC,
451 )
452 NAME = "Urea"
453 PREFERRED_UNIT_COLUMN = "value_mmol_L"
454 UNIT_MAPPING = {
455 MILLIMOLAR: 1, # preferred unit
456 MILLIMOLES_PER_L: 1,
457 MILLIEQ_PER_L: 1,
458 # but not MG
459 }
461 def __init__(
462 self,
463 nlpdef: Optional[NlpDefinition],
464 cfg_processor_name: Optional[str],
465 commit: bool = False,
466 ) -> None:
467 # see documentation above
468 super().__init__(
469 nlpdef=nlpdef,
470 cfg_processor_name=cfg_processor_name,
471 regex_str=self.REGEX,
472 variable=self.NAME,
473 target_unit=self.PREFERRED_UNIT_COLUMN,
474 units_to_factor=self.UNIT_MAPPING,
475 commit=commit,
476 take_absolute=True,
477 )
479 def test(self, verbose: bool = False) -> None:
480 # docstring in parent class
481 self.test_numerical_parser(
482 [
483 ("Urea", []), # should fail; no values
484 ("U 4", [4]),
485 ("Urea 4.3", [4.3]),
486 ("U 4.5 mEq/L", [4.5]),
487 ("Ur 4.5 mM", [4.5]),
488 (
489 "Present: Ursula Rogers (U). 1.0 Minutes of last meeting",
490 [],
491 ),
492 (
493 "Present: Ursula Rogers (UR) 1.0 Minutes of last meeting",
494 [],
495 ),
496 ("U (H) 5.6 mM", [5.6]),
497 ("Ur (*) 5.6 mM", [5.6]),
498 ("Urea (X) 5.6 mM", []),
499 ("blah (U) 5.6 mM", []),
500 ("Urea (5.6) something", [5.6]),
501 ("Urea (5.6 mM), others", [5.6]),
502 ("U-3.2", [3.2]),
503 ("Blood urea (X771P) 3.2", [3.2]),
504 ("Plasma urea level (XaDvl) 3.2", [3.2]),
505 ("Serum urea level (XM0lt) 3.2", [3.2]),
506 ],
507 verbose=verbose,
508 )
511class UreaValidator(ValidatorBase):
512 """
513 Validator for Urea (see help for explanation).
514 """
516 @classmethod
517 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
518 return Urea.NAME, [Urea.UREA]
521# =============================================================================
522# Creatinine
523# =============================================================================
526class Creatinine(SimpleNumericalResultParser):
527 """
528 BIOCHEMISTRY (U&E).
530 Creatinine. Default units are micromolar (SI); also supports mg/dL.
531 """
533 CREATININE_BASE = rf"""
534 {WORD_BOUNDARY} Cr(?:eat(?:inine)?)? {WORD_BOUNDARY}
535 """
536 # ... Cr, Creat, Creatinine
537 # Possible that "creatine" is present as a typo... but it's wrong...
538 CREATININE = regex_or(
539 *regex_components_from_read_codes(
540 ReadCodes.CREATININE,
541 ReadCodes.CREATININE_PLASMA,
542 ReadCodes.CREATININE_PLASMA_CORRECTED,
543 ReadCodes.CREATININE_SERUM,
544 ReadCodes.CREATININE_SERUM_CORRECTED,
545 ),
546 CREATININE_BASE,
547 wrap_each_in_noncapture_group=True,
548 wrap_result_in_noncapture_group=False,
549 )
550 REGEX = make_simple_numeric_regex(
551 quantity=CREATININE,
552 units=regex_or(
553 MICROMOLAR, # good
554 MICROMOLES_PER_L, # good
555 MICROEQ_PER_L, # good
556 MG_PER_DL, # good but needs conversion
557 # ... note that MG_PER_DL must precede MG
558 MG, # bad
559 ),
560 optional_ignorable_after_quantity=OPTIONAL_POC,
561 )
562 CREATININE_MOLECULAR_MASS_G_PER_MOL = 113.12
563 # ... https://pubchem.ncbi.nlm.nih.gov/compound/creatinine
564 NAME = "Creatinine"
565 PREFERRED_UNIT_COLUMN = "value_micromol_L"
566 UNIT_MAPPING = {
567 MICROMOLAR: 1, # preferred unit
568 MICROMOLES_PER_L: 1,
569 MICROEQ_PER_L: 1,
570 MG_PER_DL: factor_micromolar_from_mg_per_dl(
571 CREATININE_MOLECULAR_MASS_G_PER_MOL
572 ),
573 # but not MG
574 }
576 def __init__(
577 self,
578 nlpdef: Optional[NlpDefinition],
579 cfg_processor_name: Optional[str],
580 commit: bool = False,
581 ) -> None:
582 # see documentation above
583 super().__init__(
584 nlpdef=nlpdef,
585 cfg_processor_name=cfg_processor_name,
586 regex_str=self.REGEX,
587 variable=self.NAME,
588 target_unit=self.PREFERRED_UNIT_COLUMN,
589 units_to_factor=self.UNIT_MAPPING,
590 commit=commit,
591 take_absolute=True,
592 )
594 def test(self, verbose: bool = False) -> None:
595 # docstring in parent class
596 def convert(mg_dl: float) -> float:
597 # Convert mg/dl to μM
598 return micromolar_from_mg_per_dl(
599 mg_dl, self.CREATININE_MOLECULAR_MASS_G_PER_MOL
600 )
602 self.test_numerical_parser(
603 [
604 ("Creatinine", []), # should fail; no values
605 ("Cr 50", [50]),
606 ("Creat 125.5", [125.5]),
607 ("Creat 75 uEq/L", [75]),
608 ("Cr 75 μM", [75]),
609 (
610 "Present: Chloe Rogers (CR). 1.0 Minutes of last meeting",
611 [],
612 ),
613 ("Creatinine (H) 200 uM", [200]),
614 ("Creatinine (*) 200 micromol/L", [200]),
615 ("Creatinine (X) 200 uM", []),
616 ("Creatinine 200 micromolar", [200]),
617 ("Creatinine 200 micromolar, others", [200]),
618 ("blah (creat) 5.6 uM", []),
619 ("Creatinine (200) something", [200]),
620 ("Creatinine (200 micromolar)", [200]),
621 ("Creatinine (200 micromolar), others", [200]),
622 ("Cr-75", [75]),
623 ("creatinine 3 mg/dl", [convert(3)]),
624 ("creatinine 3 mg", []),
625 ("Creatinine level (X771Q) 75", [75]),
626 ("Plasma creatinine level (XaETQ) 75", [75]),
627 ("Cor plasma creatinine level (XaERX) 75", [75]),
628 ("Serum creatinine level (XE2q5) 75", [75]),
629 ("Cor serum creatinine level (XaERc) 75", [75]),
630 ],
631 verbose=verbose,
632 )
635class CreatinineValidator(ValidatorBase):
636 """
637 Validator for Creatinine (see help for explanation).
638 """
640 @classmethod
641 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
642 return Creatinine.NAME, [Creatinine.CREATININE]
645# =============================================================================
646# Lithium (Li)
647# =============================================================================
650class Lithium(SimpleNumericalResultParser):
651 """
652 BIOCHEMISTRY (THERAPEUTIC DRUG MONITORING).
654 Lithium (Li) levels (for blood tests, not doses), in mM.
655 """
657 LITHIUM_BASE = rf"""
658 {WORD_BOUNDARY} Li(?:thium)? {WORD_BOUNDARY}
659 """
660 LITHIUM = regex_or(
661 *regex_components_from_read_codes(
662 ReadCodes.LITHIUM_SERUM,
663 ),
664 LITHIUM_BASE,
665 wrap_each_in_noncapture_group=True,
666 wrap_result_in_noncapture_group=False,
667 )
668 REGEX = make_simple_numeric_regex(
669 quantity=LITHIUM,
670 units=regex_or(
671 MILLIMOLAR, # good
672 MILLIMOLES_PER_L, # good
673 MILLIEQ_PER_L, # good
674 MG, # bad
675 G, # bad
676 ),
677 )
678 NAME = "Lithium"
679 PREFERRED_UNIT_COLUMN = "value_mmol_L"
680 UNIT_MAPPING = {
681 MILLIMOLAR: 1, # preferred unit
682 MILLIMOLES_PER_L: 1,
683 MILLIEQ_PER_L: 1,
684 # but not MG
685 # and not G
686 }
688 def __init__(
689 self,
690 nlpdef: Optional[NlpDefinition],
691 cfg_processor_name: Optional[str],
692 commit: bool = False,
693 ) -> None:
694 # see documentation above
695 super().__init__(
696 nlpdef=nlpdef,
697 cfg_processor_name=cfg_processor_name,
698 regex_str=self.REGEX,
699 variable=self.NAME,
700 target_unit=self.PREFERRED_UNIT_COLUMN,
701 units_to_factor=self.UNIT_MAPPING,
702 commit=commit,
703 take_absolute=True,
704 )
706 def test(self, verbose: bool = False) -> None:
707 # docstring in parent class
708 self.test_numerical_parser(
709 [
710 ("Li", []), # should fail; no values
711 ("Li 0.4", [0.4]),
712 ("li 1200 mg", []), # that's a dose
713 ("li 1.2 g", []), # that's a dose
714 ("lithium 1200 mg", []), # that's a dose
715 ("lithium 153", [153]), # an unhappy patient...
716 ("Li 135 mEq/L", [135]),
717 ("Li 139 mM", [139]),
718 ("lithium carbonate 800mg", []),
719 (
720 "Present: Linda Ingles (LI). 1.0 Minutes of last meeting",
721 [],
722 ),
723 ("Present: Linda Ingles (LI) 1.0 Minutes of last meeting", []),
724 ("Li (H) 1.3 mM", [1.3]),
725 ("Li (*) 1.3 mM", [1.3]),
726 ("Li (X) 1.3 mM", []),
727 ("blah (Li) 1.2 mM", []),
728 ("Li (1.3) something", [1.3]),
729 ("Li (0.4 mM), others", [0.4]),
730 ("Li-0.4", [0.4]),
731 ("Serum lithium level (XE25g) 0.4", [0.4]),
732 ],
733 verbose=verbose,
734 )
737class LithiumValidator(ValidatorBase):
738 """
739 Validator for Lithium (see help for explanation).
740 """
742 @classmethod
743 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
744 return Lithium.NAME, [Lithium.LITHIUM]
747# =============================================================================
748# Thyroid-stimulating hormone (TSH)
749# =============================================================================
752class Tsh(SimpleNumericalResultParser):
753 """
754 BIOCHEMISTRY (ENDOCRINOLOGY).
756 Thyroid-stimulating hormone (TSH), in mIU/L (or μIU/mL).
757 """
759 TSH_BASE = rf"""
760 {WORD_BOUNDARY}
761 (?: TSH | thyroid [-\s]+ stimulating [-\s]+ hormone )
762 {WORD_BOUNDARY}
763 """
764 TSH = regex_or(
765 *regex_components_from_read_codes(
766 ReadCodes.TSH_PLASMA,
767 ReadCodes.TSH_PLASMA_30_MIN,
768 ReadCodes.TSH_PLASMA_60_MIN,
769 ReadCodes.TSH_PLASMA_90_MIN,
770 ReadCodes.TSH_PLASMA_120_MIN,
771 ReadCodes.TSH_PLASMA_150_MIN,
772 ReadCodes.TSH_SERUM,
773 ReadCodes.TSH_SERUM_60_MIN,
774 ReadCodes.TSH_SERUM_90_MIN,
775 ReadCodes.TSH_SERUM_120_MIN,
776 ReadCodes.TSH_SERUM_150_MIN,
777 ),
778 TSH_BASE,
779 wrap_each_in_noncapture_group=True,
780 wrap_result_in_noncapture_group=False,
781 )
782 REGEX = make_simple_numeric_regex(
783 quantity=TSH,
784 units=regex_or(
785 MILLIUNITS_PER_L, # good
786 MICROUNITS_PER_ML, # good
787 ),
788 )
789 NAME = "TSH"
790 PREFERRED_UNIT_COLUMN = "value_mU_L"
791 UNIT_MAPPING = {
792 MILLIUNITS_PER_L: 1, # preferred unit
793 MICROUNITS_PER_ML: 1,
794 }
796 def __init__(
797 self,
798 nlpdef: Optional[NlpDefinition],
799 cfg_processor_name: Optional[str],
800 commit: bool = False,
801 ) -> None:
802 # see documentation above
803 super().__init__(
804 nlpdef=nlpdef,
805 cfg_processor_name=cfg_processor_name,
806 regex_str=self.REGEX,
807 variable=self.NAME,
808 target_unit=self.PREFERRED_UNIT_COLUMN,
809 units_to_factor=self.UNIT_MAPPING,
810 commit=commit,
811 take_absolute=True,
812 )
814 def test(self, verbose: bool = False) -> None:
815 # docstring in superclass
816 self.test_numerical_parser(
817 [
818 ("TSH", []), # should fail; no values
819 ("TSH 1.5", [1.5]),
820 ("thyroid-stimulating hormone 1.5", [1.5]),
821 ("TSH 1.5 mU/L", [1.5]),
822 ("TSH 1.5 mIU/L", [1.5]),
823 ("TSH 1.5 μU/mL", [1.5]),
824 ("TSH 1.5 μIU/mL", [1.5]),
825 ("TSH 1.5 uU/mL", [1.5]),
826 ("TSH 1.5 uIU/mL", [1.5]),
827 ("TSH-2.3", [2.3]),
828 ("Plasma TSH level (XaELW) 2.3", [2.3]),
829 ("Serum TSH level (XaELV) 2.3", [2.3]),
830 # etc.; not all Read codes tested here
831 ],
832 verbose=verbose,
833 )
836class TshValidator(ValidatorBase):
837 """
838 Validator for TSH (see help for explanation).
839 """
841 @classmethod
842 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
843 return Tsh.NAME, [Tsh.TSH]
846# =============================================================================
847# Alkaline phosphatase
848# =============================================================================
851class AlkPhos(SimpleNumericalResultParser):
852 """
853 BIOCHEMISTRY (LFTs/BFTs).
855 Alkaline phosphatase (ALP, AlkP, AlkPhos). Units are U/L.
856 """
858 ALKP_BASE = rf"""
859 {WORD_BOUNDARY}
860 (?:
861 (?: ALk?P (?:\. | {WORD_BOUNDARY}) ) |
862 (?:
863 alk(?:aline | \.)?
864 [-\s]*
865 phos(?:phatase{WORD_BOUNDARY} | \. | {WORD_BOUNDARY})
866 )
867 )
868 """
869 ALKP = regex_or(
870 *regex_components_from_read_codes(
871 ReadCodes.ALKPHOS_PLASMA,
872 ReadCodes.ALKPHOS_SERUM,
873 ReadCodes.ALKPHOS, # least specific; at end
874 ),
875 ALKP_BASE,
876 wrap_each_in_noncapture_group=True,
877 wrap_result_in_noncapture_group=False,
878 )
879 REGEX = make_simple_numeric_regex(quantity=ALKP, units=UNITS_PER_L)
880 NAME = "AlkPhos"
881 PREFERRED_UNIT_COLUMN = "value_U_L"
882 UNIT_MAPPING = {UNITS_PER_L: 1} # preferred unit
884 def __init__(
885 self,
886 nlpdef: Optional[NlpDefinition],
887 cfg_processor_name: Optional[str],
888 commit: bool = False,
889 ) -> None:
890 # see documentation above
891 super().__init__(
892 nlpdef=nlpdef,
893 cfg_processor_name=cfg_processor_name,
894 regex_str=self.REGEX,
895 variable=self.NAME,
896 target_unit=self.PREFERRED_UNIT_COLUMN,
897 units_to_factor=self.UNIT_MAPPING,
898 commit=commit,
899 take_absolute=True,
900 )
902 def test(self, verbose: bool = False) -> None:
903 # docstring in superclass
904 self.test_numerical_parser(
905 [
906 ("ALP", []), # should fail; no values
907 ("was 7", []), # no quantity
908 ("ALP 55", [55]),
909 ("Alkaline-Phosphatase 55", [55]),
910 ("Alkaline Phosphatase 55 U/L ", [55]),
911 ("ALP 55 U/L", [55]),
912 ("ALP-55", [55]),
913 ("AlkP 55", [55]),
914 ("alk.phos. 55", [55]),
915 ("alk. phos. 55", [55]),
916 ("alkphos 55", [55]),
917 ("Alkaline phosphatase level (44F3.) 55", [55]),
918 (
919 "Alkaline phosphatase level (44F3x) 55",
920 [],
921 ), # test "." in regex
922 ("Plasma alkaline phosphatase level (XaIRj) 55", [55]),
923 ("Serum alkaline phosphatase level (XE2px) 55", [55]),
924 ],
925 verbose=verbose,
926 )
929class AlkPhosValidator(ValidatorBase):
930 """
931 Validator for AlkPhos (see help for explanation).
932 """
934 @classmethod
935 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
936 return AlkPhos.NAME, [AlkPhos.ALKP]
939# =============================================================================
940# Alanine aminotransferase (ALT)
941# =============================================================================
944class ALT(SimpleNumericalResultParser):
945 """
946 BIOCHEMISTRY (LFTs).
948 Alanine aminotransferase (ALT), a.k.a. alanine transaminase (ALT).
949 Units are U/L.
951 A.k.a. serum glutamate-pyruvate transaminase (SGPT), or serum
952 glutamate-pyruvic transaminase (SGPT), but not a.k.a. those in recent
953 memory!
954 """
956 ALT_BASE = rf"""
957 {WORD_BOUNDARY}
958 (?:
959 ALT |
960 alanine [-\s]+ (?: aminotransferase | transaminase )
961 )
962 {WORD_BOUNDARY}
963 """
964 ALT = regex_or(
965 *regex_components_from_read_codes(
966 ReadCodes.ALT,
967 ),
968 ALT_BASE,
969 wrap_each_in_noncapture_group=True,
970 wrap_result_in_noncapture_group=False,
971 )
972 REGEX = make_simple_numeric_regex(quantity=ALT, units=UNITS_PER_L)
973 NAME = "ALT"
974 PREFERRED_UNIT_COLUMN = "value_U_L"
975 UNIT_MAPPING = {UNITS_PER_L: 1} # preferred unit
977 def __init__(
978 self,
979 nlpdef: Optional[NlpDefinition],
980 cfg_processor_name: Optional[str],
981 commit: bool = False,
982 ) -> None:
983 # see documentation above
984 super().__init__(
985 nlpdef=nlpdef,
986 cfg_processor_name=cfg_processor_name,
987 regex_str=self.REGEX,
988 variable=self.NAME,
989 target_unit=self.PREFERRED_UNIT_COLUMN,
990 units_to_factor=self.UNIT_MAPPING,
991 commit=commit,
992 take_absolute=True,
993 )
995 def test(self, verbose: bool = False) -> None:
996 # docstring in superclass
997 self.test_numerical_parser(
998 [
999 ("ALT", []), # should fail; no values
1000 ("was 7", []), # no quantity
1001 ("ALT 55", [55]),
1002 ("alanine-aminotransferase 55", [55]),
1003 ("Alanine aminotransferase 55 U/L ", [55]),
1004 ("alanine transaminase 55 U/L ", [55]),
1005 ("ALT 55 U/L", [55]),
1006 ("ALT-55", [55]),
1007 ("ALP 55", []), # wrong thing
1008 ("ALT/SGPT serum level (44G3.) 55", [55]),
1009 ],
1010 verbose=verbose,
1011 )
1014class ALTValidator(ValidatorBase):
1015 """
1016 Validator for ALT (see help for explanation).
1017 """
1019 @classmethod
1020 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1021 return ALT.NAME, [ALT.ALT]
1024# =============================================================================
1025# Gamma GT (gGT)
1026# =============================================================================
1029class GammaGT(SimpleNumericalResultParser):
1030 """
1031 BIOCHEMISTRY (LFTs).
1033 Gamma-glutamyl transferase (gGT), in U/L.
1034 """
1036 GGT_BASE = rf"""
1037 {WORD_BOUNDARY}
1038 (?:
1039 (?: γ | G | gamma)
1040 [-\s]*
1041 (?:
1042 GT |
1043 glutamyl [-\s]+ transferase
1044 )
1045 )
1046 {WORD_BOUNDARY}
1047 """
1048 GGT = regex_or(
1049 *regex_components_from_read_codes(
1050 ReadCodes.GAMMA_GT,
1051 ReadCodes.GAMMA_GT_PLASMA,
1052 ReadCodes.GAMMA_GT_SERUM,
1053 ),
1054 GGT_BASE,
1055 wrap_each_in_noncapture_group=True,
1056 wrap_result_in_noncapture_group=False,
1057 )
1058 REGEX = make_simple_numeric_regex(quantity=GGT, units=UNITS_PER_L)
1059 NAME = "GammaGT"
1060 PREFERRED_UNIT_COLUMN = "value_U_L"
1061 UNIT_MAPPING = {UNITS_PER_L: 1} # preferred unit
1063 def __init__(
1064 self,
1065 nlpdef: Optional[NlpDefinition],
1066 cfg_processor_name: Optional[str],
1067 commit: bool = False,
1068 ) -> None:
1069 # see documentation above
1070 super().__init__(
1071 nlpdef=nlpdef,
1072 cfg_processor_name=cfg_processor_name,
1073 regex_str=self.REGEX,
1074 variable=self.NAME,
1075 target_unit=self.PREFERRED_UNIT_COLUMN,
1076 units_to_factor=self.UNIT_MAPPING,
1077 commit=commit,
1078 take_absolute=True,
1079 )
1081 def test(self, verbose: bool = False) -> None:
1082 # docstring in superclass
1083 self.test_numerical_parser(
1084 [
1085 ("gGT", []), # should fail; no values
1086 ("was 7", []), # no quantity
1087 ("gGT 55", [55]),
1088 ("gamma Glutamyl Transferase 19 U/L", [19]),
1089 ("Gamma GT 55 U/L ", [55]),
1090 ("GGT 55 U/L", [55]),
1091 ("ggt-55", [55]),
1092 ("γGT 55", [55]),
1093 ("Gamma-glutamyl transferase lev (44G4.) 55", [55]),
1094 ("Plasma gamma-glutamyl transferase level (XaES4) 55", [55]),
1095 ("Serum gamma-glutamyl transferase level (XaES3) 55", [55]),
1096 ],
1097 verbose=verbose,
1098 )
1101class GammaGTValidator(ValidatorBase):
1102 """
1103 Validator for GammaGT (see help for explanation).
1104 """
1106 @classmethod
1107 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1108 return GammaGT.NAME, [GammaGT.GGT]
1111# =============================================================================
1112# Total bilirubin
1113# =============================================================================
1116class Bilirubin(SimpleNumericalResultParser):
1117 """
1118 BIOCHEMISTRY (LFTs).
1120 Total bilirubin. Units are μM.
1121 """
1123 BILIRUBIN_BASE = rf"""
1124 {WORD_BOUNDARY}
1125 (?: t(?: ot(?:al | \.)? | \.) \s+ )?
1126 bili?(?: \. | rubin{WORD_BOUNDARY})?
1127 """
1128 BILIRUBIN = regex_or(
1129 *regex_components_from_read_codes(
1130 ReadCodes.BILIRUBIN_PLASMA_TOTAL,
1131 ReadCodes.BILIRUBIN_SERUM,
1132 ReadCodes.BILIRUBIN_SERUM_TOTAL,
1133 ReadCodes.BILIRUBIN_TOTAL,
1134 ),
1135 BILIRUBIN_BASE,
1136 wrap_each_in_noncapture_group=True,
1137 wrap_result_in_noncapture_group=False,
1138 )
1139 REGEX = make_simple_numeric_regex(
1140 quantity=BILIRUBIN,
1141 units=regex_or(
1142 MICROMOLAR, # good
1143 MICROMOLES_PER_L, # good
1144 ),
1145 )
1146 NAME = "Bilirubin"
1147 PREFERRED_UNIT_COLUMN = "value_micromol_L"
1148 UNIT_MAPPING = {MICROMOLAR: 1, MICROMOLES_PER_L: 1} # preferred unit
1150 def __init__(
1151 self,
1152 nlpdef: Optional[NlpDefinition],
1153 cfg_processor_name: Optional[str],
1154 commit: bool = False,
1155 ) -> None:
1156 # see documentation above
1157 super().__init__(
1158 nlpdef=nlpdef,
1159 cfg_processor_name=cfg_processor_name,
1160 regex_str=self.REGEX,
1161 variable=self.NAME,
1162 target_unit=self.PREFERRED_UNIT_COLUMN,
1163 units_to_factor=self.UNIT_MAPPING,
1164 commit=commit,
1165 take_absolute=True,
1166 )
1168 def test(self, verbose: bool = False) -> None:
1169 # docstring in superclass
1170 self.test_numerical_parser(
1171 [
1172 ("tot Bil", []), # should fail; no values
1173 ("was 7", []), # no quantity
1174 ("tot Bil 6", [6]),
1175 ("Total Bilirubin: 6", [6]),
1176 ("Total Bilirubin 6 umol/L", [6]),
1177 ("bilirubin 17 μM", [17]),
1178 ("t.bilirubin 17 μM", [17]),
1179 ("t. bilirubin 17 μM", [17]),
1180 ("bili. 17 μM", [17]),
1181 ("bili 17 μM", [17]),
1182 ("Plasma total bilirubin level (XaETf) 17", [17]),
1183 ("Serum bilirubin level (44E..) 17", [17]),
1184 ("Serum total bilirubin level (XaERu) 17", [17]),
1185 ("Total bilirubin level (XE2qu) 17", [17]),
1186 (
1187 "Total bilirubin \t level \n (XE2qu) 17",
1188 [17],
1189 ), # test whitespace
1190 (
1191 "xTotal bilirubin level (XE2qu) 17",
1192 [],
1193 ), # test word boundary
1194 ("Serum total bilirubin level (XaERu) 6 umol/L", [6]),
1195 ],
1196 verbose=verbose,
1197 )
1200class BilirubinValidator(ValidatorBase):
1201 """
1202 Validator for Bilirubin (see help for explanation).
1203 """
1205 @classmethod
1206 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1207 return Bilirubin.NAME, [Bilirubin.BILIRUBIN]
1210# =============================================================================
1211# Albumin (Alb)
1212# =============================================================================
1215class Albumin(SimpleNumericalResultParser):
1216 """
1217 BIOCHEMISTRY (LFTs).
1219 Albumin (Alb). Units are g/L.
1220 """
1222 ALBUMIN_BASE = rf"""
1223 {WORD_BOUNDARY}
1224 (?:
1225 alb(?:\. | umin{WORD_BOUNDARY})?
1226 (?: \s+ level{WORD_BOUNDARY})?
1227 )
1228 """
1229 ALBUMIN = regex_or(
1230 *regex_components_from_read_codes(
1231 ReadCodes.ALBUMIN_PLASMA,
1232 ReadCodes.ALBUMIN_SERUM,
1233 ),
1234 ALBUMIN_BASE,
1235 wrap_each_in_noncapture_group=True,
1236 wrap_result_in_noncapture_group=False,
1237 )
1238 REGEX = make_simple_numeric_regex(quantity=ALBUMIN, units=G_PER_L)
1239 NAME = "Albumin"
1240 PREFERRED_UNIT_COLUMN = "value_g_L"
1241 UNIT_MAPPING = {G_PER_L: 1} # preferred unit
1243 def __init__(
1244 self,
1245 nlpdef: Optional[NlpDefinition],
1246 cfg_processor_name: Optional[str],
1247 commit: bool = False,
1248 ) -> None:
1249 # see documentation above
1250 super().__init__(
1251 nlpdef=nlpdef,
1252 cfg_processor_name=cfg_processor_name,
1253 regex_str=self.REGEX,
1254 variable=self.NAME,
1255 target_unit=self.PREFERRED_UNIT_COLUMN,
1256 units_to_factor=self.UNIT_MAPPING,
1257 commit=commit,
1258 take_absolute=True,
1259 )
1261 def test(self, verbose: bool = False) -> None:
1262 # docstring in superclass
1263 self.test_numerical_parser(
1264 [
1265 ("Alb", []), # should fail; no values
1266 ("was 7", []), # no quantity
1267 ("ALP 6", []), # wrong quantity
1268 ("Alb 6", [6]),
1269 ("Albumin: 48", [48]),
1270 ("Albumin 48 g/L", [48]),
1271 ("alb. 48", [48]),
1272 ("albumin level 48", [48]),
1273 ("Plasma albumin level (XaIRc) 48", [48]),
1274 ("Serum albumin level (XE2eA) 48", [48]),
1275 ],
1276 verbose=verbose,
1277 )
1280class AlbuminValidator(ValidatorBase):
1281 """
1282 Validator for Albumin (see help for explanation).
1283 """
1285 @classmethod
1286 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1287 return Albumin.NAME, [Albumin.ALBUMIN]
1290# =============================================================================
1291# Glucose
1292# =============================================================================
1295class Glucose(SimpleNumericalResultParser):
1296 """
1297 BIOCHEMISTRY.
1299 Glucose. Default units are mM; also supports mg/dL.
1300 """
1302 # By Emanuele Osimo, Feb 2019.
1303 # Some modifications by Rudolf Cardinal, Feb 2019.
1304 GLUCOSE_BASE = rf"""
1305 {WORD_BOUNDARY} glu(?:c(?:ose)?)? {WORD_BOUNDARY}
1306 # glu, gluc, glucose
1307 """
1308 GLUCOSE = regex_or(
1309 *regex_components_from_read_codes(
1310 ReadCodes.GLUCOSE,
1311 ReadCodes.GLUCOSE_BLOOD,
1312 ReadCodes.GLUCOSE_BLOOD_2H_POSTPRANDIAL,
1313 ReadCodes.GLUCOSE_BLOOD_150_MIN,
1314 ReadCodes.GLUCOSE_PLASMA_RANDOM,
1315 ReadCodes.GLUCOSE_PLASMA_FASTING,
1316 ReadCodes.GLUCOSE_PLASMA_30_MIN,
1317 ReadCodes.GLUCOSE_PLASMA_60_MIN,
1318 ReadCodes.GLUCOSE_PLASMA_90_MIN,
1319 ReadCodes.GLUCOSE_PLASMA_120_MIN,
1320 ReadCodes.GLUCOSE_PLASMA_2H_POSTPRANDIAL,
1321 ReadCodes.GLUCOSE_PLASMA_150_MIN,
1322 ReadCodes.GLUCOSE_SERUM,
1323 ReadCodes.GLUCOSE_SERUM_RANDOM,
1324 ReadCodes.GLUCOSE_SERUM_FASTING,
1325 ReadCodes.GLUCOSE_SERUM_30_MIN,
1326 ReadCodes.GLUCOSE_SERUM_60_MIN,
1327 ReadCodes.GLUCOSE_SERUM_90_MIN,
1328 ReadCodes.GLUCOSE_SERUM_120_MIN,
1329 ReadCodes.GLUCOSE_SERUM_2H_POSTPRANDIAL,
1330 ReadCodes.GLUCOSE_SERUM_150_MIN,
1331 # !
1332 ),
1333 GLUCOSE_BASE,
1334 wrap_each_in_noncapture_group=True,
1335 wrap_result_in_noncapture_group=False,
1336 )
1337 REGEX = make_simple_numeric_regex(
1338 quantity=GLUCOSE,
1339 units=regex_or(
1340 MILLIMOLAR, # good
1341 MILLIMOLES_PER_L, # good
1342 MG_PER_DL, # good but needs conversion
1343 ),
1344 optional_ignorable_after_quantity=OPTIONAL_POC,
1345 )
1346 GLUCOSE_MOLECULAR_MASS_G_PER_MOL = 180.156
1347 # ... https://pubchem.ncbi.nlm.nih.gov/compound/D-glucose
1348 NAME = "Glucose"
1349 PREFERRED_UNIT_COLUMN = "value_mmol_L"
1350 UNIT_MAPPING = {
1351 MILLIMOLAR: 1, # preferred unit
1352 MILLIMOLES_PER_L: 1,
1353 MG_PER_DL: factor_millimolar_from_mg_per_dl(
1354 GLUCOSE_MOLECULAR_MASS_G_PER_MOL
1355 ),
1356 }
1358 def __init__(
1359 self,
1360 nlpdef: Optional[NlpDefinition],
1361 cfg_processor_name: Optional[str],
1362 commit: bool = False,
1363 ) -> None:
1364 # see documentation above
1365 super().__init__(
1366 nlpdef=nlpdef,
1367 cfg_processor_name=cfg_processor_name,
1368 regex_str=self.REGEX,
1369 variable=self.NAME,
1370 target_unit=self.PREFERRED_UNIT_COLUMN,
1371 units_to_factor=self.UNIT_MAPPING,
1372 commit=commit,
1373 take_absolute=True,
1374 )
1376 def test(self, verbose: bool = False) -> None:
1377 # docstring in parent class
1379 def convert(mg_dl: float) -> float:
1380 # Convert mg/dl to mM
1381 return millimolar_from_mg_per_dl(
1382 mg_dl, self.GLUCOSE_MOLECULAR_MASS_G_PER_MOL
1383 )
1385 self.test_numerical_parser(
1386 [
1387 ("glu", []), # should fail; no values
1388 ("glucose 6 mM", [6]),
1389 ("glucose 6 mmol", [6]),
1390 ("glucose 6", [6]),
1391 ("glu 6", [6]),
1392 ("glucose 90 mg/dl", [convert(90)]), # unit conversion
1393 ("gluc = 6", [6]),
1394 ("glucose: 6", [6]),
1395 ("glu equals 6", [6]),
1396 ("glucose is equal to 6", [6]),
1397 ("glu <4", [4]),
1398 ("glucose less than 1", [1]), # would be bad news...
1399 ("glu more than 20", [20]),
1400 ("glucose was 15", [15]),
1401 ("glucose was 90 mg/dl", [convert(90)]),
1402 ("glu is 90 mg dl-1", [convert(90)]),
1403 ("glucose is 90 mg dl -1", [convert(90)]),
1404 ("glu-5", [5]),
1405 ("glucose | 20.3 (H) | mmol/L", [20.3]),
1406 ("Glucose level (X772y) 5", [5]),
1407 ("Blood glucose level (X772z) 5", [5]),
1408 # Not all Read codes tested.
1409 ],
1410 verbose=verbose,
1411 )
1414class GlucoseValidator(ValidatorBase):
1415 """
1416 Validator for Glucose (see help for explanation).
1417 """
1419 @classmethod
1420 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1421 return Glucose.NAME, [Glucose.GLUCOSE]
1424# =============================================================================
1425# LDL cholesterol
1426# =============================================================================
1429class LDLCholesterol(SimpleNumericalResultParser):
1430 """
1431 BIOCHEMISTRY (LIPID PROFILE).
1433 Low density lipoprotein (LDL) cholesterol.
1434 Default units are mM; also supports mg/dL.
1435 """
1437 # By Emanuele Osimo, Feb 2019.
1438 # Some modifications by Rudolf Cardinal, Feb 2019.
1439 LDL_BASE = rf"""
1440 {WORD_BOUNDARY}
1441 LDL [-\s]*
1442 (?:
1443 chol(?:esterol)?{WORD_BOUNDARY} |
1444 chol\. |
1445 {WORD_BOUNDARY} # allows LDL by itself
1446 )
1447 """
1448 LDL = regex_or(
1449 *regex_components_from_read_codes(
1450 ReadCodes.LDL_PLASMA,
1451 ReadCodes.LDL_PLASMA_FASTING,
1452 ReadCodes.LDL_PLASMA_RANDOM,
1453 ReadCodes.LDL_SERUM,
1454 ReadCodes.LDL_SERUM_FASTING,
1455 ReadCodes.LDL_SERUM_RANDOM,
1456 ),
1457 LDL_BASE,
1458 wrap_each_in_noncapture_group=True,
1459 wrap_result_in_noncapture_group=False,
1460 )
1461 REGEX = make_simple_numeric_regex(
1462 quantity=LDL,
1463 units=regex_or(
1464 MILLIMOLAR, # good
1465 MILLIMOLES_PER_L, # good
1466 MG_PER_DL, # good but needs conversion
1467 ),
1468 )
1469 NAME = "LDL cholesterol"
1470 PREFERRED_UNIT_COLUMN = "value_mmol_L"
1471 FACTOR_MG_DL_TO_MMOL_L = 0.02586
1472 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/
1473 UNIT_MAPPING = {
1474 MILLIMOLAR: 1, # preferred unit
1475 MILLIMOLES_PER_L: 1,
1476 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L,
1477 }
1479 def __init__(
1480 self,
1481 nlpdef: Optional[NlpDefinition],
1482 cfg_processor_name: Optional[str],
1483 commit: bool = False,
1484 ) -> None:
1485 # see documentation above
1486 super().__init__(
1487 nlpdef=nlpdef,
1488 cfg_processor_name=cfg_processor_name,
1489 regex_str=self.REGEX,
1490 variable=self.NAME,
1491 target_unit=self.PREFERRED_UNIT_COLUMN,
1492 units_to_factor=self.UNIT_MAPPING,
1493 commit=commit,
1494 take_absolute=True,
1495 )
1497 def test(self, verbose: bool = False) -> None:
1498 # docstring in parent class
1500 def convert(mg_dl: float) -> float:
1501 # Convert mg/dl to mM
1502 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl
1504 self.test_numerical_parser(
1505 [
1506 ("LDL", []), # should fail; no values
1507 ("LDL 4 mM", [4]),
1508 ("LDL chol 4 mmol", [4]),
1509 ("LDL chol. 4 mmol", [4]),
1510 ("LDL 4", [4]),
1511 ("chol 4", []), # that's total cholesterol
1512 ("HDL chol 4", []), # that's HDL cholesterol
1513 (
1514 "LDL cholesterol 140 mg/dl",
1515 [convert(140)],
1516 ), # unit conversion
1517 ("LDL = 4", [4]),
1518 ("LDL: 4", [4]),
1519 ("LDL equals 4", [4]),
1520 ("LDL is equal to 4", [4]),
1521 ("LDL <4", [4]),
1522 ("LDLchol less than 4", [4]),
1523 ("LDL cholesterol more than 20", [20]),
1524 ("LDL was 4", [4]),
1525 ("LDL chol was 140 mg/dl", [convert(140)]),
1526 ("chol was 140 mg/dl", []),
1527 ("LDL is 140 mg dl-1", [convert(140)]),
1528 ("ldl chol is 140 mg dl -1", [convert(140)]),
1529 ("ldl-4", [4]),
1530 ("LDL chol | 6.2 (H) | mmol/L", [6.2]),
1531 ("Plasma LDL cholesterol level (XaEVs) 4", [4]),
1532 ("Plasma rndm LDL cholest level (44d4.) 4", [4]),
1533 ("Plasma fast LDL cholest level (44d5.) 4", [4]),
1534 ("Serum LDL cholesterol level (44P6.) 4", [4]),
1535 ("Serum fast LDL cholesterol lev (44PD.) 4", [4]),
1536 ("Ser random LDL cholesterol lev (44PE.) 4", [4]),
1537 ],
1538 verbose=verbose,
1539 )
1542class LDLCholesterolValidator(ValidatorBase):
1543 """
1544 Validator for LDLCholesterol (see help for explanation).
1545 """
1547 @classmethod
1548 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1549 return LDLCholesterol.NAME, [LDLCholesterol.LDL]
1552# =============================================================================
1553# HDL cholesterol
1554# =============================================================================
1557class HDLCholesterol(SimpleNumericalResultParser):
1558 """
1559 BIOCHEMISTRY (LIPID PROFILE).
1561 High-density lipoprotein (HDL) cholesterol.
1562 Default units are mM; also supports mg/dL.
1563 """
1565 # By Emanuele Osimo, Feb 2019.
1566 # Some modifications by Rudolf Cardinal, Feb 2019.
1567 HDL_BASE = rf"""
1568 {WORD_BOUNDARY}
1569 HDL [-\s]*
1570 (?:
1571 chol(?:esterol)?{WORD_BOUNDARY} |
1572 chol\. |
1573 {WORD_BOUNDARY} # allows HDL by itself
1574 )
1575 """
1576 HDL = regex_or(
1577 *regex_components_from_read_codes(
1578 ReadCodes.HDL_PLASMA,
1579 ReadCodes.HDL_PLASMA_FASTING,
1580 ReadCodes.HDL_PLASMA_RANDOM,
1581 ReadCodes.HDL_SERUM,
1582 ReadCodes.HDL_SERUM_FASTING,
1583 ReadCodes.HDL_SERUM_RANDOM,
1584 ),
1585 HDL_BASE,
1586 wrap_each_in_noncapture_group=True,
1587 wrap_result_in_noncapture_group=False,
1588 )
1589 REGEX = make_simple_numeric_regex(
1590 quantity=HDL,
1591 units=regex_or(
1592 MILLIMOLAR, # good
1593 MILLIMOLES_PER_L, # good
1594 MG_PER_DL, # good but needs conversion
1595 ),
1596 )
1597 NAME = "HDL cholesterol"
1598 PREFERRED_UNIT_COLUMN = "value_mmol_L"
1599 FACTOR_MG_DL_TO_MMOL_L = 0.02586
1600 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/
1601 UNIT_MAPPING = {
1602 MILLIMOLAR: 1, # preferred unit
1603 MILLIMOLES_PER_L: 1,
1604 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L,
1605 }
1607 def __init__(
1608 self,
1609 nlpdef: Optional[NlpDefinition],
1610 cfg_processor_name: Optional[str],
1611 commit: bool = False,
1612 ) -> None:
1613 # see documentation above
1614 super().__init__(
1615 nlpdef=nlpdef,
1616 cfg_processor_name=cfg_processor_name,
1617 regex_str=self.REGEX,
1618 variable=self.NAME,
1619 target_unit=self.PREFERRED_UNIT_COLUMN,
1620 units_to_factor=self.UNIT_MAPPING,
1621 commit=commit,
1622 take_absolute=True,
1623 )
1625 def test(self, verbose: bool = False) -> None:
1626 # docstring in parent class
1628 def convert(mg_dl: float) -> float:
1629 # Convert mg/dl to mM
1630 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl
1632 self.test_numerical_parser(
1633 [
1634 ("HDL", []), # should fail; no values
1635 ("HDL 4 mM", [4]),
1636 ("HDL chol 4 mmol", [4]),
1637 ("HDL chol. 4 mmol", [4]),
1638 ("HDL 4", [4]),
1639 ("chol 4", []), # that's total cholesterol
1640 ("LDL chol 4", []), # that's LDL cholesterol
1641 (
1642 "HDL cholesterol 140 mg/dl",
1643 [convert(140)],
1644 ), # unit conversion
1645 ("HDL = 4", [4]),
1646 ("HDL: 4", [4]),
1647 ("HDL equals 4", [4]),
1648 ("HDL is equal to 4", [4]),
1649 ("HDL <4", [4]),
1650 ("HDLchol less than 4", [4]),
1651 ("HDL cholesterol more than 20", [20]),
1652 ("HDL was 4", [4]),
1653 ("HDL chol was 140 mg/dl", [convert(140)]),
1654 ("chol was 140 mg/dl", []),
1655 ("HDL is 140 mg dl-1", [convert(140)]),
1656 ("Hdl chol is 140 mg dl -1", [convert(140)]),
1657 ("hdl-4", [4]),
1658 ("HDL chol | 6.2 (H) | mmol/L", [6.2]),
1659 ("Plasma HDL cholesterol level (XaEVr) 4", [4]),
1660 ("Plasma rndm HDL cholest level (44d2.) 4", [4]),
1661 ("Plasma fast HDL cholest level (44d3.) 4", [4]),
1662 ("Serum HDL cholesterol level (44P5.) 4", [4]),
1663 ("Serum fast HDL cholesterol lev (44PB.) 4", [4]),
1664 ("Ser random HDL cholesterol lev (44PC.) 4", [4]),
1665 ],
1666 verbose=verbose,
1667 )
1670class HDLCholesterolValidator(ValidatorBase):
1671 """
1672 Validator for HDLCholesterol (see help for explanation).
1673 """
1675 @classmethod
1676 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1677 return HDLCholesterol.NAME, [HDLCholesterol.HDL]
1680# =============================================================================
1681# Total cholesterol
1682# =============================================================================
1685class TotalCholesterol(SimpleNumericalResultParser):
1686 """
1687 BIOCHEMISTRY (LIPID PROFILE).
1689 Total or undifferentiated cholesterol.
1690 Default units are mM; also supports mg/dL.
1691 """
1693 CHOLESTEROL_BASE = rf"""
1694 {WORD_BOUNDARY}
1695 (?<!HDL[-\s]+) (?<!LDL[-\s]+) # not preceded by HDL or LDL
1696 (?: tot(?:al) [-\s] )? # optional "total" prefix
1697 (?:
1698 chol(?:esterol)?{WORD_BOUNDARY} |
1699 chol\.
1700 )
1701 """
1702 # ... (?<! something ) is a negative lookbehind assertion
1703 CHOLESTEROL = regex_or(
1704 *regex_components_from_read_codes(
1705 ReadCodes.CHOLESTEROL_SERUM,
1706 ReadCodes.CHOLESTEROL_TOTAL_PLASMA,
1707 ReadCodes.CHOLESTEROL_TOTAL_SERUM,
1708 ),
1709 CHOLESTEROL_BASE,
1710 wrap_each_in_noncapture_group=True,
1711 wrap_result_in_noncapture_group=False,
1712 )
1713 REGEX = make_simple_numeric_regex(
1714 quantity=CHOLESTEROL,
1715 units=regex_or(
1716 MILLIMOLAR, # good
1717 MILLIMOLES_PER_L, # good
1718 MG_PER_DL, # good but needs conversion
1719 ),
1720 )
1721 NAME = "Total cholesterol"
1722 PREFERRED_UNIT_COLUMN = "value_mmol_L"
1723 FACTOR_MG_DL_TO_MMOL_L = 0.02586
1724 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/
1725 UNIT_MAPPING = {
1726 MILLIMOLAR: 1, # preferred unit
1727 MILLIMOLES_PER_L: 1,
1728 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L,
1729 }
1731 def __init__(
1732 self,
1733 nlpdef: Optional[NlpDefinition],
1734 cfg_processor_name: Optional[str],
1735 commit: bool = False,
1736 ) -> None:
1737 # see documentation above
1738 super().__init__(
1739 nlpdef=nlpdef,
1740 cfg_processor_name=cfg_processor_name,
1741 regex_str=self.REGEX,
1742 variable=self.NAME,
1743 target_unit=self.PREFERRED_UNIT_COLUMN,
1744 units_to_factor=self.UNIT_MAPPING,
1745 commit=commit,
1746 take_absolute=True,
1747 )
1749 def test(self, verbose: bool = False) -> None:
1750 # docstring in parent class
1752 def convert(mg_dl: float) -> float:
1753 # Convert mg/dl to mM
1754 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl
1756 self.test_numerical_parser(
1757 [
1758 ("chol", []), # should fail; no values
1759 ("chol 4 mM", [4]),
1760 ("total chol 4 mmol", [4]),
1761 ("chol. 4 mmol", [4]),
1762 ("chol 4", [4]),
1763 ("HDL chol 4", []), # that's HDL cholesterol
1764 ("LDL chol 4", []), # that's LDL cholesterol
1765 (
1766 "total cholesterol 140 mg/dl",
1767 [convert(140)],
1768 ), # unit conversion
1769 ("chol = 4", [4]),
1770 ("chol: 4", [4]),
1771 ("chol equals 4", [4]),
1772 ("chol is equal to 4", [4]),
1773 ("chol <4", [4]),
1774 ("chol less than 4", [4]),
1775 ("cholesterol more than 20", [20]),
1776 ("chol was 4", [4]),
1777 ("chol was 140 mg/dl", [convert(140)]),
1778 ("chol was 140", [140]), # but probably wrong interpretation!
1779 ("chol is 140 mg dl-1", [convert(140)]),
1780 ("chol is 140 mg dl -1", [convert(140)]),
1781 ("chol-4", [4]),
1782 ("chol | 6.2 (H) | mmol/L", [6.2]),
1783 ("Serum cholesterol level (XE2eD) 4", [4]),
1784 ("Plasma total cholesterol level (XaIRd) 4", [4]),
1785 ("Serum total cholesterol level (XaJe9) 4", [4]),
1786 ],
1787 verbose=verbose,
1788 )
1791class TotalCholesterolValidator(ValidatorBase):
1792 """
1793 Validator for TotalCholesterol (see help for explanation).
1794 """
1796 @classmethod
1797 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1798 return TotalCholesterol.NAME, [TotalCholesterol.CHOLESTEROL]
1801# =============================================================================
1802# Triglycerides
1803# =============================================================================
1806class Triglycerides(SimpleNumericalResultParser):
1807 """
1808 BIOCHEMISTRY (LIPID PROFILE).
1810 Triglycerides.
1811 Default units are mM; also supports mg/dL.
1812 """
1814 # By Emanuele Osimo, Feb 2019.
1815 # Some modifications by Rudolf Cardinal, Feb 2019.
1816 TG_BASE = rf"""
1817 {WORD_BOUNDARY}
1818 (?: Triglyceride[s]? | TG )
1819 {WORD_BOUNDARY}
1820 """
1821 TG = regex_or(
1822 *regex_components_from_read_codes(
1823 ReadCodes.TG,
1824 ReadCodes.TG_PLASMA,
1825 ReadCodes.TG_PLASMA_FASTING,
1826 ReadCodes.TG_PLASMA_RANDOM,
1827 ReadCodes.TG_SERUM,
1828 ReadCodes.TG_SERUM_FASTING,
1829 ReadCodes.TG_SERUM_RANDOM,
1830 ),
1831 TG_BASE,
1832 wrap_each_in_noncapture_group=True,
1833 wrap_result_in_noncapture_group=False,
1834 )
1835 REGEX = make_simple_numeric_regex(
1836 quantity=TG,
1837 units=regex_or(
1838 MILLIMOLAR, # good
1839 MILLIMOLES_PER_L, # good
1840 MG_PER_DL, # good but needs conversion
1841 ),
1842 )
1843 NAME = "Triglycerides"
1844 PREFERRED_UNIT_COLUMN = "value_mmol_L"
1845 FACTOR_MG_DL_TO_MMOL_L = 0.01129 # reciprocal of 88.57
1846 # ... https://www.ncbi.nlm.nih.gov/books/NBK33478/
1847 # ... https://www.ncbi.nlm.nih.gov/books/NBK83505/
1848 UNIT_MAPPING = {
1849 MILLIMOLAR: 1, # preferred unit
1850 MILLIMOLES_PER_L: 1,
1851 MG_PER_DL: FACTOR_MG_DL_TO_MMOL_L,
1852 }
1854 def __init__(
1855 self,
1856 nlpdef: Optional[NlpDefinition],
1857 cfg_processor_name: Optional[str],
1858 commit: bool = False,
1859 ) -> None:
1860 # see documentation above
1861 super().__init__(
1862 nlpdef=nlpdef,
1863 cfg_processor_name=cfg_processor_name,
1864 regex_str=self.REGEX,
1865 variable=self.NAME,
1866 target_unit=self.PREFERRED_UNIT_COLUMN,
1867 units_to_factor=self.UNIT_MAPPING,
1868 commit=commit,
1869 take_absolute=True,
1870 )
1872 def test(self, verbose: bool = False) -> None:
1873 # docstring in parent class
1875 def convert(mg_dl: float) -> float:
1876 # Convert mg/dl to mM
1877 return self.FACTOR_MG_DL_TO_MMOL_L * mg_dl
1879 self.test_numerical_parser(
1880 [
1881 ("TG", []), # should fail; no values
1882 ("triglycerides", []), # should fail; no values
1883 ("TG 4 mM", [4]),
1884 ("triglycerides 4 mmol", [4]),
1885 ("triglyceride 4 mmol", [4]),
1886 ("TG 4", [4]),
1887 ("TG 140 mg/dl", [convert(140)]), # unit conversion
1888 ("TG = 4", [4]),
1889 ("TG: 4", [4]),
1890 ("TG equals 4", [4]),
1891 ("TG is equal to 4", [4]),
1892 ("TG <4", [4]),
1893 ("TG less than 4", [4]),
1894 ("TG more than 20", [20]),
1895 ("TG was 4", [4]),
1896 ("TG was 140 mg/dl", [convert(140)]),
1897 ("TG was 140", [140]), # but probably wrong interpretation!
1898 ("TG is 140 mg dl-1", [convert(140)]),
1899 ("TG is 140 mg dl -1", [convert(140)]),
1900 ("TG-4", [4]),
1901 ("triglycerides | 6.2 (H) | mmol/L", [6.2]),
1902 ("Triglyceride level (X772O) 4", [4]),
1903 ("Plasma triglyceride level (44e..) 4", [4]),
1904 ("Plasma rndm triglyceride level (44e0.) 4", [4]),
1905 ("Plasma fast triglyceride level (44e1.) 4", [4]),
1906 ("Serum triglyceride levels (XE2q9) 4", [4]),
1907 ("Serum fasting triglyceride lev (44Q4.) 4", [4]),
1908 ("Serum random triglyceride lev (44Q5.) 4", [4]),
1909 ],
1910 verbose=verbose,
1911 )
1914class TriglyceridesValidator(ValidatorBase):
1915 """
1916 Validator for Triglycerides (see help for explanation).
1917 """
1919 @classmethod
1920 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
1921 return Triglycerides.NAME, [Triglycerides.TG]
1924# =============================================================================
1925# HbA1c
1926# =============================================================================
1929def hba1c_mmol_per_mol_from_percent(
1930 percent: Union[float, str]
1931) -> Optional[float]:
1932 """
1933 Convert an HbA1c value from old percentage units -- DCCT (Diabetes Control
1934 and Complications Trial), UKPDS (United Kingdom Prospective Diabetes Study)
1935 or NGSP (National Glycohemoglobin Standardization Program) -- to newer IFCC
1936 (International Federation of Clinical Chemistry) mmol/mol units (mmol HbA1c
1937 / mol Hb).
1939 Args:
1940 percent: DCCT value as a percentage
1942 Returns:
1943 IFCC value in mmol/mol
1945 Example: 5% becomes 31.1 mmol/mol.
1947 By Emanuele Osimo, Feb 2019.
1948 Some modifications by Rudolf Cardinal, Feb 2019.
1950 References:
1952 - Emanuele had mmol_per_mol = (percent - 2.14) * 10.929 -- primary source
1953 awaited.
1954 - Jeppsson 2002, https://www.ncbi.nlm.nih.gov/pubmed/11916276 -- no, that's
1955 the chemistry
1956 - https://www.ifcchba1c.org/
1957 - http://www.ngsp.org/ifccngsp.asp -- gives master equation of
1958 NGSP = [0.09148 × IFCC] + 2.152), therefore implying
1959 IFCC = (NGSP – 2.152) × 10.93135.
1960 - Little & Rohlfing 2013: https://www.ncbi.nlm.nih.gov/pubmed/23318564;
1961 also gives NGSP = [0.09148 * IFCC] + 2.152.
1963 Note also that you may see eAG values (estimated average glucose), in
1964 mmol/L or mg/dl; see http://www.ngsp.org/A1ceAG.asp; these are not direct
1965 measurements of HbA1c.
1967 """
1968 if isinstance(percent, str):
1969 percent = to_float(percent)
1970 if not percent:
1971 return None
1972 percent = abs(percent) # deals with e.g. "HbA1c-8%" -> -8
1973 return (percent - 2.152) * 10.93135
1976class HbA1c(SimpleNumericalResultParser):
1977 """
1978 BIOCHEMISTRY.
1980 Glycosylated (glycated) haemoglobin (HbA1c).
1981 Default units are mmol/mol; also supports %.
1983 Note: HbA1 is different
1984 (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2541274).
1985 """
1987 # By Emanuele Osimo, Feb 2019.
1988 # Some modifications by Rudolf Cardinal, Feb 2019.
1989 HBA1C_BASE = rf"""
1990 {WORD_BOUNDARY}
1991 (?:
1992 (?: Glyc(?:osyl)?ated [-\s]+ (?:ha?emoglobin|Hb) ) |
1993 HbA1c
1994 )
1995 {WORD_BOUNDARY}
1996 """
1997 HBA1C = regex_or(
1998 *regex_components_from_read_codes(
1999 ReadCodes.HBA1C,
2000 ReadCodes.HBA1C_DCCT,
2001 ReadCodes.HBA1C_IFCC,
2002 ),
2003 HBA1C_BASE,
2004 wrap_each_in_noncapture_group=True,
2005 wrap_result_in_noncapture_group=False,
2006 )
2007 REGEX = make_simple_numeric_regex(
2008 quantity=HBA1C,
2009 units=regex_or(
2010 MILLIMOLES_PER_MOL, # standard
2011 PERCENT, # good but needs conversion
2012 MILLIMOLES_PER_L, # bad; may be an eAG value
2013 MG_PER_DL, # bad; may be an eAG value
2014 ),
2015 )
2016 NAME = "HBA1C"
2017 PREFERRED_UNIT_COLUMN = "value_mmol_mol"
2018 UNIT_MAPPING = {
2019 MILLIMOLES_PER_MOL: 1, # preferred unit
2020 PERCENT: hba1c_mmol_per_mol_from_percent,
2021 # but not MILLIMOLES_PER_L
2022 # and not MG_PER_DL
2023 }
2025 def __init__(
2026 self,
2027 nlpdef: Optional[NlpDefinition],
2028 cfg_processor_name: Optional[str],
2029 commit: bool = False,
2030 ) -> None:
2031 # see documentation above
2032 super().__init__(
2033 nlpdef=nlpdef,
2034 cfg_processor_name=cfg_processor_name,
2035 regex_str=self.REGEX,
2036 variable=self.NAME,
2037 target_unit=self.PREFERRED_UNIT_COLUMN,
2038 units_to_factor=self.UNIT_MAPPING,
2039 commit=commit,
2040 take_absolute=True,
2041 )
2043 def test(self, verbose: bool = False) -> None:
2044 # docstring in parent class
2046 def convert(percent: float) -> float:
2047 # Convert % to mmol/mol
2048 return hba1c_mmol_per_mol_from_percent(percent)
2050 self.test_numerical_parser(
2051 [
2052 ("HbA1c", []), # should fail; no values
2053 ("glycosylated haemoglobin", []), # should fail; no values
2054 ("HbA1c 31", [31]),
2055 ("HbA1c 31 mmol/mol", [31]),
2056 ("HbA1c 31 mg/dl", []), # wrong units
2057 ("HbA1c 31 mmol/L", []), # wrong units
2058 ("glycosylated haemoglobin 31 mmol/mol", [31]),
2059 ("glycated hemoglobin 31 mmol/mol", [31]),
2060 ("HbA1c 8%", [convert(8)]),
2061 ("HbA1c = 8%", [convert(8)]),
2062 ("HbA1c: 31", [31]),
2063 ("HbA1c equals 31", [31]),
2064 ("HbA1c is equal to 31", [31]),
2065 ("HbA1c <31.2", [31.2]),
2066 ("HbA1c less than 4", [4]),
2067 ("HbA1c more than 20", [20]),
2068 ("HbA1c was 31", [31]),
2069 ("HbA1c was 15%", [convert(15)]),
2070 ("HbA1c-31", [31]),
2071 ("HbA1c-8%", [convert(8)]),
2072 ("HbA1c | 40 (H) | mmol/mol", [40]),
2073 ("Haemoglobin A1c level (X772q) 8%", [convert(8)]),
2074 ("HbA1c level (DCCT aligned) (XaERp) 8%", [convert(8)]),
2075 ("HbA1c levl - IFCC standardised (XaPbt) 31 mmol/mol", [31]),
2076 ],
2077 verbose=verbose,
2078 )
2081class HbA1cValidator(ValidatorBase):
2082 """
2083 Validator for HbA1c (see help for explanation).
2084 """
2086 @classmethod
2087 def get_variablename_regexstrlist(cls) -> Tuple[str, List[str]]:
2088 return HbA1c.NAME, [HbA1c.HBA1C]
2091# =============================================================================
2092# All classes in this module
2093# =============================================================================
2095ALL_BIOCHEMISTRY_NLP_AND_VALIDATORS = [
2096 (Albumin, AlbuminValidator),
2097 (AlkPhos, AlkPhosValidator),
2098 (ALT, ALTValidator),
2099 (Bilirubin, BilirubinValidator),
2100 (Creatinine, CreatinineValidator),
2101 (Crp, CrpValidator),
2102 (GammaGT, GammaGTValidator),
2103 (Glucose, GlucoseValidator),
2104 (HbA1c, HbA1cValidator),
2105 (HDLCholesterol, HDLCholesterolValidator),
2106 (LDLCholesterol, LDLCholesterolValidator),
2107 (Lithium, LithiumValidator),
2108 (Potassium, PotassiumValidator),
2109 (Sodium, SodiumValidator),
2110 (TotalCholesterol, TotalCholesterolValidator),
2111 (Triglycerides, TriglyceridesValidator),
2112 (Tsh, TshValidator),
2113 (Urea, UreaValidator),
2114]