Coverage for nlp_manager/tests/regex_units_tests.py: 100%
64 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/tests/regex_units_tests.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Unit tests.
28"""
30import unittest
32from crate_anon.nlp_manager.regex_units import (
33 # ---------------------------------------------------------------------
34 # Relationships
35 # ---------------------------------------------------------------------
36 out_of,
37 per,
38 # ---------------------------------------------------------------------
39 # Distance
40 # ---------------------------------------------------------------------
41 CM,
42 FEET,
43 INCHES,
44 M,
45 MM,
46 # ---------------------------------------------------------------------
47 # Mass
48 # ---------------------------------------------------------------------
49 G,
50 KG,
51 LB,
52 MCG,
53 MG,
54 STONES,
55 # ---------------------------------------------------------------------
56 # Volume
57 # ---------------------------------------------------------------------
58 CUBIC_MM,
59 CUBIC_MM_OR_MICROLITRE,
60 DL,
61 FEMTOLITRE,
62 L,
63 MICROLITRE,
64 ML,
65 # ---------------------------------------------------------------------
66 # Reciprocal volume
67 # ---------------------------------------------------------------------
68 PER_CUBIC_MM,
69 # ---------------------------------------------------------------------
70 # Time
71 # ---------------------------------------------------------------------
72 HOUR,
73 # ---------------------------------------------------------------------
74 # Proportion
75 # ---------------------------------------------------------------------
76 PERCENT,
77 # -------------------------------------------------------------------------
78 # Arbitrary count things
79 # -------------------------------------------------------------------------
80 CELLS,
81 MICROUNITS,
82 MILLIUNITS,
83 SCORE,
84 UNITS,
85 # -------------------------------------------------------------------------
86 # Moles
87 # -------------------------------------------------------------------------
88 MICROEQ,
89 MICROMOLES,
90 MILLIEQ,
91 MILLIMOLES,
92 MOLES,
93 # -------------------------------------------------------------------------
94 # Concentration (molarity)
95 # -------------------------------------------------------------------------
96 MILLIMOLAR,
97 MILLIMOLES_PER_L,
98 MICROEQ_PER_L,
99 MICROMOLAR,
100 MICROMOLES_PER_L,
101 MILLIEQ_PER_L,
102 # -------------------------------------------------------------------------
103 # Concentration (mass)
104 # -------------------------------------------------------------------------
105 G_PER_DL,
106 G_PER_L,
107 MG_PER_DL,
108 MG_PER_L,
109 # -------------------------------------------------------------------------
110 # Concentration (arbitrary count and dimensionless things)
111 # -------------------------------------------------------------------------
112 BILLION_PER_L,
113 CELLS_PER_CUBIC_MM,
114 CELLS_PER_CUBIC_MM_OR_MICROLITRE,
115 L_PER_L,
116 MICROUNITS_PER_ML,
117 MILLIMOLES_PER_MOL,
118 MILLIUNITS_PER_L,
119 TRILLION_PER_L,
120 UNITS_PER_L,
121 # -------------------------------------------------------------------------
122 # Speed
123 # -------------------------------------------------------------------------
124 MM_PER_H,
125 # -------------------------------------------------------------------------
126 # Pressure
127 # -------------------------------------------------------------------------
128 MM_HG,
129 # -------------------------------------------------------------------------
130 # Area and related
131 # -------------------------------------------------------------------------
132 SQ_M,
133 KG_PER_SQ_M,
134)
135from crate_anon.nlp_manager.tests.regex_test_helperfunc import (
136 assert_text_regex,
137)
140# =============================================================================
141# Unit tests
142# =============================================================================
145class UnitRegexesTests(unittest.TestCase):
146 @staticmethod
147 def test_unit_regexes() -> None:
148 """
149 Test all "unit" regexes.
150 """
151 verbose = True
153 # ---------------------------------------------------------------------
154 # Relationships
155 # ---------------------------------------------------------------------
157 assert_text_regex(
158 "out_of(5)",
159 out_of(5),
160 [
161 ("4 out of 5", ["out of 5"]),
162 ("4/5", ["/5"]),
163 ("4 / 5", ["/ 5"]),
164 ],
165 verbose=verbose,
166 )
167 assert_text_regex(
168 "per(n, d)",
169 per("n", "d"),
170 [
171 ("blah n per d blah", ["n per d"]),
172 ("blah n/d blah", ["n/d"]),
173 ("n / d", ["n / d"]),
174 ("n d -1", ["n d -1"]),
175 ("n d -1", ["n d -1"]),
176 ("n blah d", []),
177 ],
178 verbose=verbose,
179 )
180 assert_text_regex(
181 "per(n, d, numerator_optional=True)",
182 per("n", "d", numerator_optional=True),
183 [
184 ("blah n per d blah", ["n per d"]),
185 ("blah n/d blah", ["n/d"]),
186 ("n / d", ["n / d"]),
187 ("n d -1", ["n d -1"]),
188 ("n d -1", ["n d -1"]),
189 ("n blah d", []),
190 ("/ d", ["/ d"]),
191 (" / d", ["/ d"]),
192 (" per d", ["per d"]),
193 ],
194 verbose=verbose,
195 )
197 # ---------------------------------------------------------------------
198 # Distance
199 # ---------------------------------------------------------------------
201 assert_text_regex(
202 "CM",
203 CM,
204 [
205 ("5 centimetres long", ["centimetres"]),
206 ("5 centimeters long", ["centimeters"]),
207 ("5cm long", ["cm"]),
208 ],
209 verbose=verbose,
210 )
211 assert_text_regex(
212 "FEET",
213 FEET,
214 [
215 ("5 feet long", ["feet"]),
216 ("5 foot long", ["foot"]),
217 ("5' long", ["'"]), # ASCII apostrophe
218 ("5’ long", ["’"]), # right single quote (U+2019)
219 ("5′ long", ["′"]), # prime (U+2032)
220 ],
221 verbose=verbose,
222 )
223 assert_text_regex(
224 "INCHES",
225 INCHES,
226 [
227 ("5 inches long", ["inches"]),
228 ("5 in long", ["in"]),
229 ('5" long', ['"']), # ASCII double quote
230 ("5” long", ["”"]), # right double quote (U+2014)
231 ("5″ long", ["″"]), # double prime (U+2033)
232 ],
233 verbose=verbose,
234 )
235 assert_text_regex(
236 "M",
237 M,
238 [
239 ("5 metres long", ["metres"]),
240 ("5 meters long", ["meters"]),
241 ("5m long", ["m"]),
242 ],
243 verbose=verbose,
244 )
245 assert_text_regex(
246 "MM",
247 MM,
248 [
249 ("5 millimetres long", ["millimetres"]),
250 ("5 millimeters long", ["millimeters"]),
251 ("5mm long", ["mm"]),
252 ],
253 verbose=verbose,
254 )
256 # ---------------------------------------------------------------------
257 # Mass
258 # ---------------------------------------------------------------------
260 assert_text_regex(
261 "G",
262 G,
263 [
264 ("5 grams", ["grams"]),
265 ("5 g", ["g"]),
266 ],
267 verbose=verbose,
268 )
269 assert_text_regex(
270 "KG",
271 KG,
272 [
273 ("5 kilograms", ["kilograms"]),
274 ("5 kg", ["kg"]),
275 ],
276 verbose=verbose,
277 )
278 assert_text_regex(
279 "LB",
280 LB,
281 [
282 ("5 pounds", ["pounds"]),
283 ("5 lb", ["lb"]),
284 ],
285 verbose=verbose,
286 )
287 assert_text_regex(
288 "MCG",
289 MCG,
290 [
291 ("5 micrograms", ["micrograms"]),
292 ("5 mcg", ["mcg"]),
293 ("5 ug", ["ug"]),
294 ("5 μg", ["μg"]),
295 ],
296 verbose=verbose,
297 )
298 assert_text_regex(
299 "MG",
300 MG,
301 [
302 ("5 milligrams", ["milligrams"]),
303 ("5 mg", ["mg"]),
304 ],
305 verbose=verbose,
306 )
307 assert_text_regex(
308 "STONES",
309 STONES,
310 [
311 ("5 stones", ["stones"]),
312 ("5 stone", ["stone"]),
313 ("5 st", ["st"]),
314 ],
315 verbose=verbose,
316 )
318 # ---------------------------------------------------------------------
319 # Volume
320 # ---------------------------------------------------------------------
322 assert_text_regex(
323 "CUBIC_MM",
324 CUBIC_MM,
325 [
326 ("mm3", ["mm3"]),
327 ("blibble", []),
328 ("5 mm^3", ["mm^3"]),
329 ("5 cubic mm", ["cubic mm"]),
330 ("5 cubic millimetres", ["cubic millimetres"]),
331 ],
332 verbose=verbose,
333 )
334 assert_text_regex(
335 "CUBIC_MM_OR_MICROLITRE",
336 CUBIC_MM_OR_MICROLITRE,
337 [
338 ("5 mm^3", ["mm^3"]),
339 ("5 cubic mm", ["cubic mm"]),
340 ("5 cubic millimetres", ["cubic millimetres"]),
341 ("5 microlitre", ["microlitre"]),
342 ("5 microL", ["microL"]),
343 ("5 microliters", ["microliters"]),
344 ("5 μL", ["μL"]),
345 ("5 ul", ["ul"]),
346 ],
347 verbose=verbose,
348 )
349 assert_text_regex(
350 "DL",
351 DL,
352 [
353 ("5 decilitres", ["decilitres"]),
354 ("5 deciliters", ["deciliters"]),
355 ("5 dl", ["dl"]),
356 ("5 dL", ["dL"]),
357 ],
358 verbose=verbose,
359 )
360 assert_text_regex(
361 "FEMTOLITRE",
362 FEMTOLITRE,
363 [
364 ("5 femtolitres", ["femtolitres"]),
365 ("5 femtoliters", ["femtoliters"]),
366 ("5 fl", ["fl"]),
367 ("5 fL", ["fL"]),
368 ],
369 verbose=verbose,
370 )
371 assert_text_regex(
372 "L",
373 L,
374 [
375 ("5 litres", ["litres"]),
376 ("5 liters", ["liters"]),
377 ("5 l", ["l"]),
378 ("5 L", ["L"]),
379 ],
380 verbose=verbose,
381 )
382 assert_text_regex(
383 "MICROLITRE",
384 MICROLITRE,
385 [
386 ("5 microlitre", ["microlitre"]),
387 ("5 microL", ["microL"]),
388 ("5 microliters", ["microliters"]),
389 ("5 μL", ["μL"]),
390 ("5 ul", ["ul"]),
391 ],
392 verbose=verbose,
393 )
394 assert_text_regex(
395 "ML",
396 ML,
397 [
398 ("5 millilitres", ["millilitres"]),
399 ("5 milliliters", ["milliliters"]),
400 ("5 ml", ["ml"]),
401 ("5 mL", ["mL"]),
402 ],
403 verbose=verbose,
404 )
406 # ---------------------------------------------------------------------
407 # Reciprocal volume
408 # ---------------------------------------------------------------------
410 assert_text_regex(
411 "PER_CUBIC_MM",
412 PER_CUBIC_MM,
413 [
414 ("per cubic mm", ["per cubic mm"]),
415 ("5/mm^3", ["/mm^3"]),
416 ("5 per cubic mm", ["per cubic mm"]),
417 ("5 per cubic millimetres", ["per cubic millimetres"]),
418 ],
419 verbose=verbose,
420 )
422 # ---------------------------------------------------------------------
423 # Time
424 # ---------------------------------------------------------------------
426 assert_text_regex(
427 "HOUR",
428 HOUR,
429 [
430 ("5 hours", ["hours"]),
431 ("5 hr", ["hr"]),
432 ("5 h", ["h"]),
433 ],
434 verbose=verbose,
435 )
437 # ---------------------------------------------------------------------
438 # Proportion
439 # ---------------------------------------------------------------------
441 assert_text_regex(
442 "PERCENT",
443 PERCENT,
444 [
445 ("5 percent", ["percent"]),
446 ("5 per cent", ["per cent"]),
447 ("5 pct", ["pct"]),
448 ("5%", ["%"]),
449 ],
450 verbose=verbose,
451 )
453 # ---------------------------------------------------------------------
454 # Arbitrary count things
455 # ---------------------------------------------------------------------
457 assert_text_regex(
458 "CELLS",
459 CELLS,
460 [
461 ("cells", ["cells"]),
462 ("blibble", []),
463 ("5 cells", ["cells"]),
464 ("5 cell", ["cell"]),
465 ],
466 verbose=verbose,
467 )
468 assert_text_regex(
469 "MICROUNITS",
470 MICROUNITS,
471 [
472 ("5 uU", ["uU"]),
473 ("5 μU", ["μU"]),
474 ("5 uIU", ["uIU"]),
475 ("5 μIU", ["μIU"]),
476 ],
477 verbose=verbose,
478 )
479 assert_text_regex(
480 "MILLIUNITS",
481 MILLIUNITS,
482 [
483 ("5 mU", ["mU"]),
484 ("5 mIU", ["mIU"]),
485 ],
486 verbose=verbose,
487 )
488 assert_text_regex(
489 "SCORE",
490 SCORE,
491 [
492 ("I scored 5", ["scored"]),
493 ("MMSE score 5", ["score"]),
494 ],
495 verbose=verbose,
496 )
497 assert_text_regex(
498 "UNITS",
499 UNITS,
500 [
501 ("5 U", ["U"]),
502 ("5 IU", ["IU"]),
503 ],
504 verbose=verbose,
505 )
507 # ---------------------------------------------------------------------
508 # Moles
509 # ---------------------------------------------------------------------
511 assert_text_regex(
512 "MICROEQ",
513 MICROEQ,
514 [
515 ("5 μEq", ["μEq"]),
516 ("5 uEq", ["uEq"]),
517 ],
518 verbose=verbose,
519 )
520 assert_text_regex(
521 "MICROMOLES",
522 MICROMOLES,
523 [
524 ("5 micromoles", ["micromoles"]),
525 ("5 micromol", ["micromol"]),
526 ("5 umol", ["umol"]),
527 ("5 μmol", ["μmol"]),
528 ],
529 verbose=verbose,
530 )
531 assert_text_regex(
532 "MILLIEQ",
533 MILLIEQ,
534 [
535 ("5 mEq", ["mEq"]),
536 ],
537 verbose=verbose,
538 )
539 assert_text_regex(
540 "MILLIMOLES",
541 MILLIMOLES,
542 [
543 ("5 millimoles", ["millimoles"]),
544 ("5 millimol", ["millimol"]),
545 ("5 mmol", ["mmol"]),
546 ],
547 verbose=verbose,
548 )
549 assert_text_regex(
550 "MOLES",
551 MOLES,
552 [
553 ("5 moles", ["moles"]),
554 ("5 mol", ["mol"]),
555 ],
556 verbose=verbose,
557 )
559 # -------------------------------------------------------------------------
560 # Concentration (molarity)
561 # -------------------------------------------------------------------------
563 assert_text_regex(
564 "MILLIMOLAR",
565 MILLIMOLAR,
566 [
567 ("5 mM", ["mM"]),
568 ],
569 verbose=verbose,
570 )
571 assert_text_regex(
572 "MILLIMOLES_PER_L",
573 MILLIMOLES_PER_L,
574 [
575 ("5 mmol/L", ["mmol/L"]),
576 ("5 millimoles per litre", ["millimoles per litre"]),
577 ],
578 verbose=verbose,
579 )
580 assert_text_regex(
581 "MICROEQ_PER_L",
582 MICROEQ_PER_L,
583 [
584 ("5 μEq/L", ["μEq/L"]),
585 ("5 microequivalents per litre", []), # not supported
586 ("5 microEq per litre", ["microEq per litre"]),
587 ],
588 verbose=verbose,
589 )
590 assert_text_regex(
591 "MICROMOLAR",
592 MICROMOLAR,
593 [
594 ("5 micromolar", ["micromolar"]),
595 ("5 μM", ["μM"]),
596 ("5 uM", ["uM"]),
597 ],
598 verbose=verbose,
599 )
600 assert_text_regex(
601 "MICROMOLES_PER_L",
602 MICROMOLES_PER_L,
603 [
604 ("5 micromol/L", ["micromol/L"]),
605 ("5 micromoles/litre", ["micromoles/litre"]),
606 ("5 umol/L", ["umol/L"]),
607 ("5 μmol/L", ["μmol/L"]),
608 ],
609 verbose=verbose,
610 )
611 assert_text_regex(
612 "MILLIEQ_PER_L",
613 MILLIEQ_PER_L,
614 [
615 ("5 mEq/L", ["mEq/L"]),
616 ("5 milliequivalents per litre", []), # not supported
617 ("5 milliEq per litre", ["milliEq per litre"]),
618 ],
619 verbose=verbose,
620 )
622 # -------------------------------------------------------------------------
623 # Concentration (mass)
624 # -------------------------------------------------------------------------
626 assert_text_regex(
627 "G_PER_DL",
628 G_PER_DL,
629 [
630 ("5 g/dL", ["g/dL"]),
631 ("5 grams per deciliter", ["grams per deciliter"]),
632 ],
633 verbose=verbose,
634 )
635 assert_text_regex(
636 "G_PER_L",
637 G_PER_L,
638 [
639 ("5 g/L", ["g/L"]),
640 ("5 g L-1", ["g L-1"]),
641 ("5 grams per liter", ["grams per liter"]),
642 ],
643 verbose=verbose,
644 )
645 assert_text_regex(
646 "MG_PER_DL",
647 MG_PER_DL,
648 [
649 ("5 mg/dL", ["mg/dL"]),
650 ("5 milligrams per deciliter", ["milligrams per deciliter"]),
651 ],
652 verbose=verbose,
653 )
654 assert_text_regex(
655 "MG_PER_L",
656 MG_PER_L,
657 [
658 ("5 mg/L", ["mg/L"]),
659 ("5 mg L-1", ["mg L-1"]),
660 ("5 milligrams per liter", ["milligrams per liter"]),
661 ],
662 verbose=verbose,
663 )
665 # -------------------------------------------------------------------------
666 # Concentration (arbitrary count and dimensionless things)
667 # -------------------------------------------------------------------------
669 assert_text_regex(
670 "BILLION_PER_L",
671 BILLION_PER_L,
672 [
673 ("5 × 10^9/L", ["× 10^9/L"]),
674 ("5 * 10e9/L", ["* 10e9/L"]),
675 ("5 x 10e9 per litre", ["x 10e9 per litre"]),
676 ],
677 verbose=verbose,
678 )
679 assert_text_regex(
680 "CELLS_PER_CUBIC_MM",
681 CELLS_PER_CUBIC_MM,
682 [
683 ("cells/mm3", ["cells/mm3"]),
684 ("blibble", []),
685 ("9800 / mm3", ["/ mm3"]),
686 ("9800 cell/mm3", ["cell/mm3"]),
687 ("9800 cells/mm3", ["cells/mm3"]),
688 ("9800 cells per cubic mm", ["cells per cubic mm"]),
689 ("9800 per cubic mm", ["per cubic mm"]),
690 ("9800 per cmm", ["per cmm"]),
691 ],
692 verbose=verbose,
693 )
694 assert_text_regex(
695 "CELLS_PER_CUBIC_MM_OR_MICROLITRE",
696 CELLS_PER_CUBIC_MM_OR_MICROLITRE,
697 [
698 ("9800 / mm3", ["/ mm3"]),
699 ("9800 cell/mm3", ["cell/mm3"]),
700 ("9800 cells/mm3", ["cells/mm3"]),
701 ("9800 cells per cubic mm", ["cells per cubic mm"]),
702 ("9800 per cubic mm", ["per cubic mm"]),
703 ("9800 per cmm", ["per cmm"]),
704 ("9800 per μL", ["per μL"]),
705 ("9800 per microliter", ["per microliter"]),
706 ("9800 / microlitre", ["/ microlitre"]),
707 ],
708 verbose=verbose,
709 )
710 assert_text_regex(
711 "L_PER_L",
712 L_PER_L,
713 [
714 ("5 L/L", ["L/L"]),
715 ("5 l/l", ["l/l"]),
716 ("5 litre per liter", ["litre per liter"]),
717 ],
718 verbose=verbose,
719 )
720 assert_text_regex(
721 "MICROUNITS_PER_ML",
722 MICROUNITS_PER_ML,
723 [
724 ("5 microunits/mL", ["microunits/mL"]),
725 ("5 microU/millilitre", ["microU/millilitre"]),
726 ("5 uU/mL", ["uU/mL"]),
727 ("5 μIU/ml", ["μIU/ml"]),
728 ],
729 verbose=verbose,
730 )
731 assert_text_regex(
732 "MILLIMOLES_PER_MOL",
733 MILLIMOLES_PER_MOL,
734 [
735 ("5 mmol/mol", ["mmol/mol"]),
736 ("5 millimoles per mole", ["millimoles per mole"]),
737 ],
738 verbose=verbose,
739 )
740 assert_text_regex(
741 "MILLIUNITS_PER_L",
742 MILLIUNITS_PER_L,
743 [
744 ("5 milliunits/L", ["milliunits/L"]),
745 ("5 milliU/litre", ["milliU/litre"]),
746 ("5 mIU/litre", ["mIU/litre"]),
747 ("5 mU/L", ["mU/L"]),
748 ],
749 verbose=verbose,
750 )
751 assert_text_regex(
752 "TRILLION_PER_L",
753 TRILLION_PER_L,
754 [
755 ("5 × 10^12/L", ["× 10^12/L"]),
756 ("5 * 10e12/L", ["* 10e12/L"]),
757 ("5 x 10e12 per litre", ["x 10e12 per litre"]),
758 ],
759 verbose=verbose,
760 )
761 assert_text_regex(
762 "UNITS_PER_L",
763 UNITS_PER_L,
764 [
765 ("5 units/L", ["units/L"]),
766 ("5 U/litre", ["U/litre"]),
767 ("5 U/L", ["U/L"]),
768 ],
769 verbose=verbose,
770 )
772 # -------------------------------------------------------------------------
773 # Speed
774 # -------------------------------------------------------------------------
776 assert_text_regex(
777 "MM_PER_H",
778 MM_PER_H,
779 [
780 ("5 mm/h", ["mm/h"]),
781 ("5 mm per h", ["mm per h"]),
782 ],
783 verbose=verbose,
784 )
786 # -------------------------------------------------------------------------
787 # Pressure
788 # -------------------------------------------------------------------------
790 assert_text_regex(
791 "MM_HG",
792 MM_HG,
793 [
794 ("5 mmHg", ["mmHg"]),
795 ("5 mm Hg", ["mm Hg"]),
796 ],
797 verbose=verbose,
798 )
800 # -------------------------------------------------------------------------
801 # Area and related
802 # -------------------------------------------------------------------------
804 assert_text_regex(
805 "SQ_M",
806 SQ_M,
807 [
808 ("5 square metres", ["square metres"]),
809 ("5 sq m", ["sq m"]),
810 ("5 m^2", ["m^2"]),
811 ],
812 verbose=verbose,
813 )
814 assert_text_regex(
815 "KG_PER_SQ_M",
816 KG_PER_SQ_M,
817 [
818 ("5 kg per square metre", ["kg per square metre"]),
819 ("5 kg/sq m", ["kg/sq m"]),
820 ("5 kg/m^2", ["kg/m^2"]),
821 ("5 kg*m^-2", ["kg*m^-2"]),
822 ],
823 verbose=verbose,
824 )