Coverage for nlp_manager/tests/regex_test_helperfunc.py: 87%
31 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/tests/regex_test_helperfunc.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Functions for testing regular expressions.**
28"""
30import logging
32from typing import List, Pattern, Tuple
33from crate_anon.nlp_manager.regex_func import compile_regex
34from crate_anon.nlp_manager.regex_parser import BaseNlpParser, ValidatorBase
36log = logging.getLogger(__name__)
39def f_score(precision: float, recall: float, beta: float = 1) -> float:
40 """
41 Calculates an F score (e.g. an F1 score for ``beta == 1``).
42 See https://en.wikipedia.org/wiki/F1_score.
44 Args:
45 precision: precision of the test, P(really positive | test positive)
46 recall: recall of the test, P(test positive | really positive)
47 beta: controls the type of the F score (the relative emphasis on
48 precision versus recall)
50 Returns:
51 the F score
53 """
54 beta_sq = beta**2
55 return (
56 (1 + beta_sq) * precision * recall / ((beta_sq * precision) + recall)
57 )
60def get_compiled_regex_results(
61 compiled_regex: Pattern, text: str
62) -> List[str]:
63 """
64 Finds all the hits for a regex when applied to text.
66 Args:
67 compiled_regex: a compiled regular expression
68 text: text to parse
70 Returns:
71 a list of all the (entire) hits for this regex in ``text``
73 """
74 results = [] # type: List[str]
75 for m in compiled_regex.finditer(text):
76 results.append(m.group(0))
77 return results
80def print_compiled_regex_results(
81 compiled_regex: Pattern, text: str, prefix_spaces: int = 4
82) -> None:
83 """
84 Applies a regex to text and prints (to stdout) all its hits.
86 Args:
87 compiled_regex: a compiled regular expression
88 text: text to parse
89 prefix_spaces: number of spaces to begin each answer with
90 """
91 results = get_compiled_regex_results(compiled_regex, text)
92 print(f"{' ' * prefix_spaces}{text!r} -> {results!r}")
95def assert_text_regex(
96 name: str,
97 regex_text: str,
98 test_expected_list: List[Tuple[str, List[str]]],
99 verbose: bool = False,
100) -> None:
101 """
102 Test a regex upon some text.
104 Args:
105 name: regex name (for display purposes only)
106 regex_text: text that should be compiled to give our regex
107 test_expected_list:
108 list of tuples ``teststring, expected_results``, where
109 ``teststring`` is some text and ``expected_results`` is a list of
110 expected hits for the regex within ``teststring``
111 verbose: be verbose?
113 Returns:
115 """
116 log.info(f"Testing regex named {name}")
117 compiled_regex = compile_regex(regex_text)
118 if verbose:
119 log.debug(f"... regex text:\n{regex_text}")
120 for test_string, expected_values in test_expected_list:
121 actual_values = get_compiled_regex_results(compiled_regex, test_string)
122 assert actual_values == expected_values, (
123 "Regex {name}: Expected {expected_values}, got {actual_values}, "
124 "when parsing {test_string}. Regex text:\n{regex_text}]".format(
125 name=name,
126 expected_values=expected_values,
127 actual_values=actual_values,
128 test_string=repr(test_string),
129 regex_text=regex_text,
130 )
131 )
132 log.info("... OK")
135def run_tests_nlp_and_validator_classes(
136 all_nlp_and_validators: List[Tuple[BaseNlpParser, ValidatorBase]]
137) -> None:
138 """
139 Tests multiple pairs of NLP classes and their associated validators.
140 """
141 all_nlp_classes, all_validator_classes = zip(*all_nlp_and_validators)
142 for cls in all_nlp_classes:
143 cls(None, None).test(verbose=True)
144 for cls in all_validator_classes:
145 cls(None, None).test(verbose=True)