Coverage for nlp_manager/tests/regex_numbers_tests.py: 100%
26 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/nlp_manager/tests/regex_numbers_tests.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Unit tests.
28"""
30import unittest
32from crate_anon.nlp_manager.regex_numbers import (
33 MINUS_SIGN,
34 MULTIPLY,
35 PLUS_SIGN,
36 POWER,
37 POWER_INC_E,
38 # POWER_INC_E_ASTERISK,
39 SIGN,
40 BILLION,
41 TRILLION,
42 PLAIN_INTEGER,
43 PLAIN_INTEGER_W_THOUSAND_COMMAS,
44 SCIENTIFIC_NOTATION_EXPONENT,
45 IGNORESIGN_FLOAT,
46 IGNORESIGN_INTEGER,
47 LIBERAL_NUMBER,
48 SIGNED_FLOAT,
49 SIGNED_INTEGER,
50 UNSIGNED_FLOAT,
51 UNSIGNED_INTEGER,
52)
53from crate_anon.nlp_manager.tests.regex_test_helperfunc import (
54 assert_text_regex,
55)
58# =============================================================================
59# Unit tests
60# =============================================================================
63class NumberRegexesTests(unittest.TestCase):
64 @staticmethod
65 def test_number_regexes() -> None:
66 verbose = True
68 # ---------------------------------------------------------------------
69 # Operators, etc.
70 # ---------------------------------------------------------------------
71 assert_text_regex(
72 "MULTIPLY",
73 MULTIPLY,
74 [
75 ("a * b", ["*"]),
76 ("a x b", ["x"]),
77 ("a × b", ["×"]),
78 ("a ⋅ b", ["⋅"]),
79 ("a blah b", []),
80 ],
81 verbose=verbose,
82 )
83 assert_text_regex(
84 "POWER",
85 POWER,
86 [
87 ("a ^ b", ["^"]),
88 ("a ** b", ["**"]),
89 ("10e5", []),
90 ("10E5", []),
91 ("a blah b", []),
92 ],
93 verbose=verbose,
94 )
95 assert_text_regex(
96 "POWER_INC_E",
97 POWER_INC_E,
98 [
99 ("a ^ b", ["^"]),
100 ("a ** b", ["**"]),
101 ("10e5", ["e"]),
102 ("10E5", ["E"]),
103 ("a blah b", []),
104 ],
105 verbose=verbose,
106 )
107 assert_text_regex(
108 "BILLION",
109 BILLION,
110 [
111 ("10 x 10^9/l", ["x 10^9"]),
112 ],
113 verbose=verbose,
114 )
115 assert_text_regex(
116 "PLUS_SIGN",
117 PLUS_SIGN,
118 [
119 ("a + b", ["+"]),
120 ("a blah b", []),
121 ],
122 verbose=verbose,
123 )
124 assert_text_regex(
125 "MINUS_SIGN",
126 MINUS_SIGN,
127 [
128 # good:
129 ("a - b", ["-"]), # ASCII hyphen-minus
130 ("a − b", ["−"]), # Unicode minus
131 ("a – b", ["–"]), # en dash
132 # bad:
133 ("a — b", []), # em dash
134 ("a ‐ b", []), # Unicode formal hyphen
135 ("a blah b", []),
136 ],
137 verbose=verbose,
138 )
139 # Can't test optional regexes very easily! They match nothing.
140 assert_text_regex(
141 "SIGN",
142 SIGN,
143 [
144 # good:
145 ("a + b", ["+"]),
146 ("a - b", ["-"]), # ASCII hyphen-minus
147 ("a − b", ["−"]), # Unicode minus
148 ("a – b", ["–"]), # en dash
149 # bad:
150 ("a — b", []), # em dash
151 ("a ‐ b", []), # Unicode formal hyphen
152 ("a blah b", []),
153 ],
154 verbose=verbose,
155 )
157 # ---------------------------------------------------------------------
158 # Quantities
159 # ---------------------------------------------------------------------
161 assert_text_regex(
162 "BILLION",
163 BILLION,
164 [
165 ("* 10^9", ["* 10^9"]),
166 ("× 10e9", ["× 10e9"]),
167 ("x 10 ** 9", ["x 10 ** 9"]),
168 ],
169 verbose=verbose,
170 )
171 assert_text_regex(
172 "TRILLION",
173 TRILLION,
174 [
175 ("* 10^12", ["* 10^12"]),
176 ("× 10e12", ["× 10e12"]),
177 ("x 10 ** 12", ["x 10 ** 12"]),
178 ],
179 verbose=verbose,
180 )
182 # ---------------------------------------------------------------------
183 # Number elements
184 # ---------------------------------------------------------------------
186 assert_text_regex(
187 "PLAIN_INTEGER",
188 PLAIN_INTEGER,
189 [
190 ("a 1234 b", ["1234"]),
191 ("a 1234.5 b", ["1234", "5"]),
192 ("a 12,000 b", ["12", "000"]),
193 ],
194 verbose=verbose,
195 )
196 assert_text_regex(
197 "PLAIN_INTEGER_W_THOUSAND_COMMAS",
198 PLAIN_INTEGER_W_THOUSAND_COMMAS,
199 [
200 ("a 1234 b", ["1234"]),
201 ("a 1234.5 b", ["1234", "5"]),
202 ("a 12,000 b", ["12,000"]),
203 ],
204 verbose=verbose,
205 )
206 assert_text_regex(
207 "SCIENTIFIC_NOTATION_EXPONENT",
208 SCIENTIFIC_NOTATION_EXPONENT,
209 [
210 ("a 1234 b", []),
211 ("E-4", ["E-4"]),
212 ("e15", ["e15"]),
213 ("e15.3", ["e15"]),
214 ],
215 verbose=verbose,
216 )
218 # ---------------------------------------------------------------------
219 # Number types
220 # ---------------------------------------------------------------------
222 assert_text_regex(
223 "IGNORESIGN_FLOAT",
224 IGNORESIGN_FLOAT,
225 [
226 ("1", ["1"]),
227 ("12345", ["12345"]),
228 ("-1", ["1"]), # NB may be unexpected!
229 ("1.2", ["1.2"]),
230 ("-3.4", ["3.4"]), # NB may be unexpected!
231 ("+3.4", ["3.4"]),
232 ("-3.4e27.3", ["3.4", "27.3"]),
233 ("3.4e-27", ["3.4", "27"]),
234 ("9,800", ["9,800"]),
235 ("17,600.34", ["17,600.34"]),
236 ("-17,300.6588", ["17,300.6588"]),
237 ("+12345", ["12345"]),
238 ("-12345", ["12345"]), # NB may be unexpected!
239 ("-12345.67", ["12345.67"]), # NB may be unexpected!
240 ("12345.67", ["12345.67"]),
241 ("-12345.67e-5", ["12345.67", "5"]), # NB may be unexpected!
242 ("12345.67e-5", ["12345.67", "5"]), # NB may be unexpected!
243 ],
244 verbose=verbose,
245 )
246 assert_text_regex(
247 "IGNORESIGN_INTEGER",
248 IGNORESIGN_INTEGER,
249 [
250 ("1", ["1"]),
251 ("12345", ["12345"]),
252 ("-1", ["1"]), # will drop sign
253 ("1.2", ["1", "2"]),
254 ("-3.4", ["3", "4"]),
255 ("+3.4", ["3", "4"]),
256 ("-3.4e27.3", ["3", "4", "27", "3"]),
257 ("3.4e-27", ["3", "4", "27"]),
258 ("9,800", ["9,800"]),
259 ("17,600.34", ["17,600", "34"]),
260 ("-17,300.6588", ["17,300", "6588"]),
261 ("-12345", ["12345"]), # NB may be unexpected!
262 ("-12345.67", ["12345", "67"]), # NB may be unexpected!
263 ],
264 verbose=verbose,
265 )
266 assert_text_regex(
267 "LIBERAL_NUMBER",
268 LIBERAL_NUMBER,
269 [
270 ("1", ["1"]),
271 ("12345", ["12345"]),
272 ("-1", ["-1"]),
273 ("1.2", ["1.2"]),
274 ("-3.4", ["-3.4"]),
275 ("+3.4", ["+3.4"]),
276 (
277 "-3.4e27.3",
278 ["-3.4e27", "3"],
279 ), # not valid scientific notation
280 ("3.4e-27", ["3.4e-27"]),
281 ("9,800", ["9,800"]),
282 ("17,600.34", ["17,600.34"]),
283 ("-17,300.6588", ["-17,300.6588"]),
284 ("+12345", ["+12345"]),
285 ("-12345", ["-12345"]),
286 ("-12345.67", ["-12345.67"]),
287 ("-12345.67e-5", ["-12345.67e-5"]),
288 ],
289 verbose=verbose,
290 )
291 assert_text_regex(
292 "SIGNED_FLOAT",
293 SIGNED_FLOAT,
294 [
295 ("1", ["1"]),
296 ("12345", ["12345"]),
297 ("-1", ["-1"]),
298 ("1.2", ["1.2"]),
299 ("-3.4", ["-3.4"]),
300 ("+3.4", ["+3.4"]),
301 ("-3.4e27.3", ["-3.4", "27.3"]),
302 ("3.4e-27", ["3.4", "-27"]),
303 ("9,800", ["9,800"]),
304 ("17,600.34", ["17,600.34"]),
305 ("-17,300.6588", ["-17,300.6588"]),
306 ("+12345", ["+12345"]),
307 ("-12345", ["-12345"]),
308 ("-12345.67", ["-12345.67"]),
309 ("-12345.67e-5", ["-12345.67", "-5"]), # NB may be unexpected!
310 ],
311 verbose=verbose,
312 )
313 assert_text_regex(
314 "SIGNED_INTEGER",
315 SIGNED_INTEGER,
316 [
317 ("1", ["1"]),
318 ("12345", ["12345"]),
319 ("-1", ["-1"]),
320 ("1.2", ["1", "2"]),
321 ("-3.4", ["-3", "4"]),
322 ("+3.4", ["+3", "4"]),
323 ("-3.4e27.3", ["-3", "4", "27", "3"]),
324 ("3.4e-27", ["3", "4", "-27"]),
325 ("9,800", ["9,800"]),
326 ("17,600.34", ["17,600", "34"]),
327 ("-17,300.6588", ["-17,300", "6588"]),
328 ("+12345", ["+12345"]),
329 ("-12345", ["-12345"]),
330 ("-12345.67", ["-12345", "67"]), # NB may be unexpected!
331 (
332 "-12345.67e-5",
333 ["-12345", "67", "-5"],
334 ), # NB may be unexpected!
335 ],
336 verbose=verbose,
337 )
338 assert_text_regex(
339 "UNSIGNED_FLOAT",
340 UNSIGNED_FLOAT,
341 [
342 ("1", ["1"]),
343 ("12345", ["12345"]),
344 ("-1", []),
345 ("1.2", ["1.2"]),
346 ("-3.4", []),
347 ("+3.4", ["+3.4"]),
348 ("-3.4e27.3", ["27.3"]),
349 ("3.4e-27", ["3.4"]),
350 ("9,800", ["9,800"]),
351 ("17,600.34", ["17,600.34"]),
352 ("-17,300.6588", []),
353 ("+12345", ["+12345"]),
354 ("-12345", []),
355 ("-12345.67", []),
356 ("12345.67", ["12345.67"]),
357 ("-12345.67e-5", []),
358 ("12345.67e-5", ["12345.67"]), # NB may be unexpected!
359 ],
360 verbose=verbose,
361 )
362 assert_text_regex(
363 "UNSIGNED_INTEGER",
364 UNSIGNED_INTEGER,
365 [
366 ("12345", ["12345"]),
367 ("+12345", ["+12345"]),
368 ("-12345", []),
369 ("-12345.67", []),
370 ("-12345.67e-5", []),
371 ],
372 verbose=verbose,
373 )