Coverage for nlp_manager/tests/regex_test_helperfunc.py: 87%

31 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/nlp_manager/tests/regex_test_helperfunc.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Functions for testing regular expressions.** 

27 

28""" 

29 

30import logging 

31 

32from typing import List, Pattern, Tuple 

33from crate_anon.nlp_manager.regex_func import compile_regex 

34from crate_anon.nlp_manager.regex_parser import BaseNlpParser, ValidatorBase 

35 

36log = logging.getLogger(__name__) 

37 

38 

39def f_score(precision: float, recall: float, beta: float = 1) -> float: 

40 """ 

41 Calculates an F score (e.g. an F1 score for ``beta == 1``). 

42 See https://en.wikipedia.org/wiki/F1_score. 

43 

44 Args: 

45 precision: precision of the test, P(really positive | test positive) 

46 recall: recall of the test, P(test positive | really positive) 

47 beta: controls the type of the F score (the relative emphasis on 

48 precision versus recall) 

49 

50 Returns: 

51 the F score 

52 

53 """ 

54 beta_sq = beta**2 

55 return ( 

56 (1 + beta_sq) * precision * recall / ((beta_sq * precision) + recall) 

57 ) 

58 

59 

60def get_compiled_regex_results( 

61 compiled_regex: Pattern, text: str 

62) -> List[str]: 

63 """ 

64 Finds all the hits for a regex when applied to text. 

65 

66 Args: 

67 compiled_regex: a compiled regular expression 

68 text: text to parse 

69 

70 Returns: 

71 a list of all the (entire) hits for this regex in ``text`` 

72 

73 """ 

74 results = [] # type: List[str] 

75 for m in compiled_regex.finditer(text): 

76 results.append(m.group(0)) 

77 return results 

78 

79 

80def print_compiled_regex_results( 

81 compiled_regex: Pattern, text: str, prefix_spaces: int = 4 

82) -> None: 

83 """ 

84 Applies a regex to text and prints (to stdout) all its hits. 

85 

86 Args: 

87 compiled_regex: a compiled regular expression 

88 text: text to parse 

89 prefix_spaces: number of spaces to begin each answer with 

90 """ 

91 results = get_compiled_regex_results(compiled_regex, text) 

92 print(f"{' ' * prefix_spaces}{text!r} -> {results!r}") 

93 

94 

95def assert_text_regex( 

96 name: str, 

97 regex_text: str, 

98 test_expected_list: List[Tuple[str, List[str]]], 

99 verbose: bool = False, 

100) -> None: 

101 """ 

102 Test a regex upon some text. 

103 

104 Args: 

105 name: regex name (for display purposes only) 

106 regex_text: text that should be compiled to give our regex 

107 test_expected_list: 

108 list of tuples ``teststring, expected_results``, where 

109 ``teststring`` is some text and ``expected_results`` is a list of 

110 expected hits for the regex within ``teststring`` 

111 verbose: be verbose? 

112 

113 Returns: 

114 

115 """ 

116 log.info(f"Testing regex named {name}") 

117 compiled_regex = compile_regex(regex_text) 

118 if verbose: 

119 log.debug(f"... regex text:\n{regex_text}") 

120 for test_string, expected_values in test_expected_list: 

121 actual_values = get_compiled_regex_results(compiled_regex, test_string) 

122 assert actual_values == expected_values, ( 

123 "Regex {name}: Expected {expected_values}, got {actual_values}, " 

124 "when parsing {test_string}. Regex text:\n{regex_text}]".format( 

125 name=name, 

126 expected_values=expected_values, 

127 actual_values=actual_values, 

128 test_string=repr(test_string), 

129 regex_text=regex_text, 

130 ) 

131 ) 

132 log.info("... OK") 

133 

134 

135def run_tests_nlp_and_validator_classes( 

136 all_nlp_and_validators: List[Tuple[BaseNlpParser, ValidatorBase]] 

137) -> None: 

138 """ 

139 Tests multiple pairs of NLP classes and their associated validators. 

140 """ 

141 all_nlp_classes, all_validator_classes = zip(*all_nlp_and_validators) 

142 for cls in all_nlp_classes: 

143 cls(None, None).test(verbose=True) 

144 for cls in all_validator_classes: 

145 cls(None, None).test(verbose=True)