Coverage for crateweb/anonymise_api/tests.py: 100%
354 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/crateweb/anonymise_api/tests.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26End-to-end API tests. Not an exhaustive test of anonymisation.
28"""
30import secrets
31from tempfile import NamedTemporaryFile
32from typing import Dict
34from cardinal_pythonlib.httpconst import HttpStatus
35from cardinal_pythonlib.nhs import generate_random_nhs_number
36from django.test import override_settings, TestCase
37from faker import Faker
38from rest_framework.response import Response
39from rest_framework.test import APIClient
41from crate_anon.anonymise.constants import AnonymiseConfigKeys as ConfigKeys
42from crate_anon.crateweb.anonymise_api.constants import (
43 ApiKeys,
44 ApiSettingsKeys,
45)
46from crate_anon.crateweb.core.constants import (
47 DJANGO_DEFAULT_CONNECTION,
48 RESEARCH_DB_CONNECTION_NAME,
49)
51DEFAULT_SETTINGS = {ApiSettingsKeys.HASH_KEY: secrets.token_urlsafe(16)}
54@override_settings(ANONYMISE_API=DEFAULT_SETTINGS)
55class AnonymisationTests(TestCase):
56 databases = {DJANGO_DEFAULT_CONNECTION, RESEARCH_DB_CONNECTION_NAME}
58 def setUp(self) -> None:
59 super().setUp()
61 self.client = APIClient()
63 self.fake = Faker(["en-GB"])
64 self.fake.seed_instance(1234)
66 def scrub_post(self, payload: Dict) -> Response:
67 return self.client.post("/anon_api/scrub/", payload, format="json")
69 def test_denylist_replaced(self) -> None:
70 name = self.fake.name()
71 address = self.fake.address()
72 nhs_number = generate_random_nhs_number()
74 text = (
75 f"{name} {self.fake.text()} {address} {self.fake.text()} "
76 f"{nhs_number} {self.fake.text()}"
77 )
79 payload = {
80 ApiKeys.DENYLIST: {
81 ApiKeys.WORDS: [name, address],
82 },
83 ApiKeys.TEXT: {"test": text},
84 }
86 self.assertIn(name, text)
87 self.assertIn(address, text)
88 self.assertIn(str(nhs_number), text)
90 response = self.scrub_post(payload)
91 self.assertEqual(
92 response.status_code, HttpStatus.OK, msg=response.data
93 )
95 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
97 self.assertNotIn(name, anonymised)
98 self.assertNotIn(address, anonymised)
99 self.assertIn(str(nhs_number), anonymised)
101 self.assertEqual(anonymised.count("[~~~]"), 2)
103 def test_denylist_files(self) -> None:
104 payload = {
105 ApiKeys.DENYLIST: {ApiKeys.FILES: ["test"]},
106 ApiKeys.TEXT: {"test": "secret private confidential"},
107 }
109 with NamedTemporaryFile(delete=False, mode="w") as f:
110 filename = f.name
111 f.write("secret\n")
112 f.write("private\n")
113 f.write("confidential\n")
115 filename_map = {"test": filename}
116 settings = DEFAULT_SETTINGS.copy()
117 settings[ApiSettingsKeys.DENYLIST_FILENAMES] = filename_map
119 with override_settings(ANONYMISE_API=settings):
120 response = self.scrub_post(payload)
121 self.assertEqual(
122 response.status_code, HttpStatus.OK, msg=response.data
123 )
125 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
127 self.assertNotIn("secret", anonymised)
128 self.assertNotIn("private", anonymised)
129 self.assertNotIn("confidential", anonymised)
130 self.assertEqual(anonymised.count("[~~~]"), 3)
132 def test_denylist_replacement_text(self) -> None:
133 word = "secret"
135 payload = {
136 ApiKeys.DENYLIST: {
137 ApiKeys.WORDS: [word],
138 },
139 ConfigKeys.REPLACE_NONSPECIFIC_INFO_WITH: "[REDACTED]",
140 ApiKeys.TEXT: {"test": word},
141 }
143 response = self.scrub_post(payload)
144 self.assertEqual(
145 response.status_code, HttpStatus.OK, msg=response.data
146 )
148 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
150 self.assertEqual(anonymised.count("[REDACTED]"), 1)
152 def test_patient_date_replaced(self) -> None:
153 date_of_birth = self.fake.date_of_birth().strftime("%d %b %Y")
154 text = f"{date_of_birth} {self.fake.text()}"
156 payload = {
157 ApiKeys.PATIENT: {
158 ApiKeys.DATES: [date_of_birth],
159 },
160 ApiKeys.TEXT: {"test": text},
161 }
163 self.assertIn(date_of_birth, text)
165 response = self.scrub_post(payload)
166 self.assertEqual(
167 response.status_code, HttpStatus.OK, msg=response.data
168 )
170 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
172 self.assertNotIn(date_of_birth, anonymised)
174 self.assertEqual(anonymised.count("[__PPP__]"), 1)
176 def test_patient_words_replaced(self) -> None:
177 words = "one two three"
179 text = f"one {self.fake.text()} two {self.fake.text()} three"
180 payload = {
181 ApiKeys.PATIENT: {
182 ApiKeys.WORDS: [words],
183 },
184 ApiKeys.TEXT: {"test": text},
185 }
187 all_words = text.split()
189 self.assertIn("one", all_words)
190 self.assertIn("two", all_words)
191 self.assertIn("three", all_words)
193 response = self.scrub_post(payload)
194 self.assertEqual(
195 response.status_code, HttpStatus.OK, msg=response.data
196 )
198 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
199 anonymised_words = anonymised.split()
201 self.assertNotIn("one", anonymised_words)
202 self.assertNotIn("two", anonymised_words)
203 self.assertNotIn("three", anonymised_words)
205 self.assertEqual(anonymised.count("[__PPP__]"), 3)
207 def test_patient_replacement_text(self) -> None:
208 word = "secret"
209 payload = {
210 ApiKeys.PATIENT: {
211 ApiKeys.WORDS: [word],
212 },
213 ConfigKeys.REPLACE_PATIENT_INFO_WITH: "[REDACTED]",
214 ApiKeys.TEXT: {"test": word},
215 }
217 response = self.scrub_post(payload)
218 self.assertEqual(
219 response.status_code, HttpStatus.OK, msg=response.data
220 )
222 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
223 self.assertEqual(anonymised.count("[REDACTED]"), 1)
225 def test_patient_phrase_replaced(self) -> None:
226 address = self.fake.address()
228 text = f"{address} {self.fake.text()}"
230 payload = {
231 ApiKeys.PATIENT: {
232 ApiKeys.PHRASES: [address],
233 },
234 ApiKeys.TEXT: {"test": text},
235 }
237 self.assertIn(address, text)
239 response = self.scrub_post(payload)
240 self.assertEqual(
241 response.status_code, HttpStatus.OK, msg=response.data
242 )
244 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
246 self.assertNotIn(address, anonymised)
248 self.assertEqual(anonymised.count("[__PPP__]"), 1)
250 def test_patient_non_numeric_phrases_replaced(self) -> None:
251 non_numeric_phrase = "5 High Street"
252 numeric_phrase = "5"
254 payload = {
255 ApiKeys.PATIENT: {
256 ApiKeys.NON_NUMERIC_PHRASES: [
257 non_numeric_phrase,
258 numeric_phrase,
259 ],
260 },
261 ApiKeys.TEXT: {
262 "test": "Address is 5 High Street haloperidol 5 mg"
263 },
264 }
266 response = self.scrub_post(payload)
267 self.assertEqual(
268 response.status_code, HttpStatus.OK, msg=response.data
269 )
271 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
273 self.assertEqual(anonymised, "Address is [__PPP__] haloperidol 5 mg")
275 def test_patient_numeric_replaced(self) -> None:
276 phone = self.fake.phone_number()
278 text = f"{phone} {self.fake.text()}"
280 payload = {
281 ApiKeys.PATIENT: {
282 ApiKeys.NUMBERS: [phone],
283 },
284 ApiKeys.TEXT: {"test": text},
285 }
287 self.assertIn(phone, text)
289 response = self.scrub_post(payload)
290 self.assertEqual(
291 response.status_code, HttpStatus.OK, msg=response.data
292 )
294 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
296 self.assertNotIn(phone, anonymised)
298 self.assertEqual(anonymised.count("[__PPP__]"), 1)
300 def test_patient_code_replaced(self) -> None:
301 postcode = self.fake.postcode()
302 text = f"{postcode} {self.fake.text()}"
304 payload = {
305 ApiKeys.PATIENT: {
306 ApiKeys.CODES: [postcode],
307 },
308 ApiKeys.TEXT: {"test": text},
309 }
311 self.assertIn(postcode, text)
313 response = self.scrub_post(payload)
314 self.assertEqual(
315 response.status_code, HttpStatus.OK, msg=response.data
316 )
318 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
320 self.assertNotIn(postcode, anonymised)
322 self.assertEqual(anonymised.count("[__PPP__]"), 1)
324 def test_third_party_code_replaced(self) -> None:
325 postcode = self.fake.postcode()
326 text = f"{postcode} {self.fake.text()}"
328 payload = {
329 ApiKeys.THIRD_PARTY: {
330 ApiKeys.CODES: [postcode],
331 },
332 ApiKeys.TEXT: {"test": text},
333 }
335 self.assertIn(postcode, text)
337 response = self.scrub_post(payload)
338 self.assertEqual(
339 response.status_code, HttpStatus.OK, msg=response.data
340 )
342 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
344 self.assertNotIn(postcode, anonymised)
346 self.assertEqual(anonymised.count("[__TTT__]"), 1)
348 def test_third_party_replacement_text(self) -> None:
349 postcode = self.fake.postcode()
351 payload = {
352 ApiKeys.THIRD_PARTY: {
353 ApiKeys.CODES: [postcode],
354 },
355 ApiKeys.TEXT: {"test": postcode},
356 ConfigKeys.REPLACE_THIRD_PARTY_INFO_WITH: "[REDACTED]",
357 }
359 response = self.scrub_post(payload)
360 self.assertEqual(
361 response.status_code, HttpStatus.OK, msg=response.data
362 )
364 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
366 self.assertNotIn(postcode, anonymised)
368 self.assertEqual(anonymised.count("[REDACTED]"), 1)
370 def test_anonymise_codes_ignoring_word_boundaries(self) -> None:
371 postcode = self.fake.postcode()
372 text = f"text{postcode}text"
374 payload = {
375 ConfigKeys.ANONYMISE_CODES_AT_WORD_BOUNDARIES_ONLY: False,
376 ApiKeys.THIRD_PARTY: {
377 ApiKeys.CODES: [postcode],
378 },
379 ApiKeys.TEXT: {"test": text},
380 }
382 self.assertIn(postcode, text)
384 response = self.scrub_post(payload)
385 self.assertEqual(
386 response.status_code, HttpStatus.OK, msg=response.data
387 )
389 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
391 self.assertNotIn(postcode, anonymised)
393 self.assertEqual(anonymised.count("[__TTT__]"), 1)
395 def test_anonymise_dates_ignoring_word_boundaries(self) -> None:
396 date_of_birth = self.fake.date_of_birth().strftime("%d %b %Y")
397 text = f"text{date_of_birth}text"
399 payload = {
400 ConfigKeys.ANONYMISE_DATES_AT_WORD_BOUNDARIES_ONLY: False,
401 ApiKeys.THIRD_PARTY: {
402 ApiKeys.DATES: [date_of_birth],
403 },
404 ApiKeys.TEXT: {"test": text},
405 }
407 self.assertIn(date_of_birth, text)
409 response = self.scrub_post(payload)
410 self.assertEqual(
411 response.status_code, HttpStatus.OK, msg=response.data
412 )
414 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
416 self.assertNotIn(date_of_birth, anonymised)
418 self.assertEqual(anonymised.count("[__TTT__]"), 1)
420 def test_anonymise_numbers_ignoring_word_boundaries(self) -> None:
421 phone = self.fake.phone_number()
422 text = f"text{phone}text"
424 payload = {
425 ConfigKeys.ANONYMISE_NUMBERS_AT_NUMERIC_BOUNDARIES_ONLY: False,
426 ConfigKeys.ANONYMISE_NUMBERS_AT_WORD_BOUNDARIES_ONLY: False,
427 ApiKeys.THIRD_PARTY: {
428 ApiKeys.NUMBERS: [phone],
429 },
430 ApiKeys.TEXT: {"test": text},
431 }
433 self.assertIn(phone, text)
435 response = self.scrub_post(payload)
436 self.assertEqual(
437 response.status_code, HttpStatus.OK, msg=response.data
438 )
440 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
442 self.assertNotIn(phone, anonymised)
443 self.assertEqual(anonymised.count("[__TTT__]"), 1)
445 def test_anonymise_numbers_ignoring_numeric_boundaries(self) -> None:
446 phone = self.fake.phone_number()
447 text = f"1234{phone}5678"
449 payload = {
450 ConfigKeys.ANONYMISE_NUMBERS_AT_NUMERIC_BOUNDARIES_ONLY: False,
451 ConfigKeys.ANONYMISE_NUMBERS_AT_WORD_BOUNDARIES_ONLY: False,
452 ApiKeys.THIRD_PARTY: {
453 ApiKeys.NUMBERS: [phone],
454 },
455 ApiKeys.TEXT: {"test": text},
456 }
458 self.assertIn(phone, text)
460 response = self.scrub_post(payload)
461 self.assertEqual(
462 response.status_code, HttpStatus.OK, msg=response.data
463 )
465 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
467 self.assertNotIn(phone, anonymised)
468 self.assertEqual(anonymised.count("[__TTT__]"), 1)
470 def test_anonymise_strings_ignoring_word_boundaries(self) -> None:
471 word = "secret"
472 text = f"text{word}text"
474 payload = {
475 ConfigKeys.ANONYMISE_STRINGS_AT_WORD_BOUNDARIES_ONLY: False,
476 ApiKeys.THIRD_PARTY: {
477 ApiKeys.WORDS: [word],
478 },
479 ApiKeys.TEXT: {"test": text},
480 }
482 self.assertIn(word, text)
484 response = self.scrub_post(payload)
485 self.assertEqual(
486 response.status_code, HttpStatus.OK, msg=response.data
487 )
489 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
491 self.assertNotIn(word, anonymised)
492 self.assertEqual(anonymised.count("[__TTT__]"), 1)
494 def test_string_max_regex_errors(self) -> None:
495 word = "secret"
496 typo = "sceret"
497 text = f"{typo}"
499 payload = {
500 ConfigKeys.STRING_MAX_REGEX_ERRORS: 2, # delete 1, insert 1
501 ApiKeys.THIRD_PARTY: {
502 ApiKeys.WORDS: [word],
503 },
504 ApiKeys.TEXT: {"test": text},
505 }
507 response = self.scrub_post(payload)
508 self.assertEqual(
509 response.status_code, HttpStatus.OK, msg=response.data
510 )
512 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
514 self.assertNotIn(typo, anonymised)
515 self.assertEqual(anonymised.count("[__TTT__]"), 1)
517 def test_min_string_length_for_errors(self) -> None:
518 word1 = "secret"
519 typo1 = "sceret"
521 word2 = "private"
522 typo2 = "prviate"
523 text = f"{typo1} {typo2}"
525 payload = {
526 ConfigKeys.STRING_MAX_REGEX_ERRORS: 2, # delete 1, insert 1
527 ConfigKeys.MIN_STRING_LENGTH_FOR_ERRORS: 7,
528 ApiKeys.THIRD_PARTY: {
529 ApiKeys.WORDS: [word1, word2],
530 },
531 ApiKeys.TEXT: {"test": text},
532 }
534 response = self.scrub_post(payload)
535 self.assertEqual(
536 response.status_code, HttpStatus.OK, msg=response.data
537 )
539 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
541 self.assertIn(typo1, anonymised)
542 self.assertNotIn(typo2, anonymised)
543 self.assertEqual(anonymised.count("[__TTT__]"), 1)
545 def test_min_string_length_to_scrub_with(self) -> None:
546 payload = {
547 ConfigKeys.MIN_STRING_LENGTH_TO_SCRUB_WITH: 6,
548 ApiKeys.THIRD_PARTY: {
549 ApiKeys.WORDS: ["Craig Buchanan"],
550 },
551 ApiKeys.TEXT: {"test": "Craig Buchanan"},
552 }
554 response = self.scrub_post(payload)
555 self.assertEqual(
556 response.status_code, HttpStatus.OK, msg=response.data
557 )
559 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
561 self.assertIn("Craig", anonymised)
562 self.assertNotIn("Buchanan", anonymised)
563 self.assertEqual(anonymised.count("[__TTT__]"), 1)
565 def test_scrub_string_suffixes(self) -> None:
566 word = "secret"
568 payload = {
569 ConfigKeys.SCRUB_STRING_SUFFIXES: ["s"],
570 ApiKeys.THIRD_PARTY: {
571 ApiKeys.WORDS: [word],
572 },
573 ApiKeys.TEXT: {"test": "secrets"},
574 }
576 response = self.scrub_post(payload)
577 self.assertEqual(
578 response.status_code, HttpStatus.OK, msg=response.data
579 )
581 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
583 self.assertNotIn("secrets", anonymised)
584 self.assertEqual(anonymised.count("[__TTT__]"), 1)
586 def test_allowlist_words(self) -> None:
587 # A bit of a contrived example but the allowlist should
588 # take precedence.
589 payload = {
590 ApiKeys.THIRD_PARTY: {
591 ApiKeys.WORDS: ["secret", "private", "confidential"],
592 },
593 ApiKeys.ALLOWLIST: {"words": ["secret"]},
594 ApiKeys.TEXT: {"test": "secret private confidential"},
595 }
597 response = self.scrub_post(payload)
598 self.assertEqual(
599 response.status_code, HttpStatus.OK, msg=response.data
600 )
602 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
604 self.assertIn("secret", anonymised)
605 self.assertNotIn("private", anonymised)
606 self.assertNotIn("confidential", anonymised)
607 self.assertEqual(anonymised.count("[__TTT__]"), 2)
609 def test_allowlist_files(self) -> None:
610 payload = {
611 ApiKeys.THIRD_PARTY: {
612 ApiKeys.WORDS: ["secret", "private", "confidential"],
613 },
614 ApiKeys.ALLOWLIST: {"files": ["test"]},
615 ApiKeys.TEXT: {"test": "secret private confidential"},
616 }
618 with NamedTemporaryFile(delete=False, mode="w") as f:
619 filename = f.name
620 f.write("secret\n")
622 filename_map = {"test": filename}
623 settings = DEFAULT_SETTINGS.copy()
624 settings[ApiSettingsKeys.ALLOWLIST_FILENAMES] = filename_map
626 with override_settings(ANONYMISE_API=settings):
627 response = self.scrub_post(payload)
628 self.assertEqual(
629 response.status_code, HttpStatus.OK, msg=response.data
630 )
632 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
634 self.assertIn("secret", anonymised)
635 self.assertNotIn("private", anonymised)
636 self.assertNotIn("confidential", anonymised)
637 self.assertEqual(anonymised.count("[__TTT__]"), 2)
639 def test_phrase_alternatives(self) -> None:
640 payload = {
641 ApiKeys.THIRD_PARTY: {
642 ApiKeys.PHRASES: ["22 Acacia Avenue"],
643 },
644 ApiKeys.ALTERNATIVES: [["Avenue", "Ave"]],
645 ApiKeys.TEXT: {"test": "22 Acacia Ave"},
646 }
648 response = self.scrub_post(payload)
649 self.assertEqual(
650 response.status_code, HttpStatus.OK, msg=response.data
651 )
653 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
655 self.assertNotIn("22 Acacia Ave", anonymised)
656 self.assertEqual(anonymised.count("[__TTT__]"), 1)
658 def test_scrub_all_numbers_of_n_digits(self) -> None:
659 nhs_number = str(generate_random_nhs_number())
661 text = f"{self.fake.text()} {nhs_number} {self.fake.text()}"
663 self.assertIn(nhs_number, text)
665 payload = {
666 ConfigKeys.SCRUB_ALL_NUMBERS_OF_N_DIGITS: [10],
667 ApiKeys.TEXT: {"test": text},
668 }
670 response = self.scrub_post(payload)
671 self.assertEqual(
672 response.status_code, HttpStatus.OK, msg=response.data
673 )
675 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
677 self.assertNotIn(nhs_number, anonymised)
678 self.assertEqual(anonymised.count("[~~~]"), 1)
680 def test_scrub_all_numbers_of_n_digits_ignoring_word_boundaries(
681 self,
682 ) -> None:
683 nhs_number = str(generate_random_nhs_number())
685 text = f"text{nhs_number}text"
687 self.assertIn(nhs_number, text)
689 payload = {
690 ConfigKeys.SCRUB_ALL_NUMBERS_OF_N_DIGITS: [10],
691 ConfigKeys.ANONYMISE_NUMBERS_AT_WORD_BOUNDARIES_ONLY: False,
692 ApiKeys.TEXT: {"test": text},
693 }
695 response = self.scrub_post(payload)
696 self.assertEqual(
697 response.status_code, HttpStatus.OK, msg=response.data
698 )
700 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
702 self.assertNotIn(nhs_number, anonymised)
703 self.assertEqual(anonymised.count("[~~~]"), 1)
705 def test_scrub_all_uk_postcodes(self) -> None:
706 postcode = self.fake.postcode()
708 text = f"{self.fake.text()} {postcode} {self.fake.text()}"
710 self.assertIn(postcode, text)
712 payload = {
713 ConfigKeys.SCRUB_ALL_UK_POSTCODES: True,
714 ApiKeys.TEXT: {"test": text},
715 }
717 response = self.scrub_post(payload)
718 self.assertEqual(
719 response.status_code, HttpStatus.OK, msg=response.data
720 )
722 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
724 self.assertNotIn(postcode, anonymised)
725 self.assertEqual(anonymised.count("[~~~]"), 1)
727 def test_scrub_all_uk_postcodes_ignoring_word_boundary(self) -> None:
728 postcode = self.fake.postcode()
730 text = f"text{postcode}text"
732 self.assertIn(postcode, text)
734 payload = {
735 ConfigKeys.ANONYMISE_CODES_AT_WORD_BOUNDARIES_ONLY: False,
736 ConfigKeys.SCRUB_ALL_UK_POSTCODES: True,
737 ApiKeys.TEXT: {"test": text},
738 }
740 response = self.scrub_post(payload)
741 self.assertEqual(
742 response.status_code, HttpStatus.OK, msg=response.data
743 )
745 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
747 self.assertNotIn(postcode, anonymised)
748 self.assertEqual(anonymised.count("[~~~]"), 1)
750 def test_scrub_all_uk_postcodes_replacement_text(self) -> None:
751 postcode = self.fake.postcode()
753 payload = {
754 ConfigKeys.SCRUB_ALL_UK_POSTCODES: True,
755 ConfigKeys.REPLACE_NONSPECIFIC_INFO_WITH: "[REDACTED]",
756 ApiKeys.TEXT: {"test": postcode},
757 }
759 response = self.scrub_post(payload)
760 self.assertEqual(
761 response.status_code, HttpStatus.OK, msg=response.data
762 )
764 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
766 self.assertNotIn(postcode, anonymised)
767 self.assertEqual(anonymised.count("[REDACTED]"), 1)
769 def test_scrub_all_dates(self) -> None:
770 dob = self.fake.date_of_birth().strftime("%d %b %Y")
772 text = f"{self.fake.text()} {dob} {self.fake.text()}"
774 self.assertIn(dob, text)
776 payload = {
777 ConfigKeys.SCRUB_ALL_DATES: True,
778 ApiKeys.TEXT: {"test": text},
779 }
781 response = self.scrub_post(payload)
782 self.assertEqual(
783 response.status_code, HttpStatus.OK, msg=response.data
784 )
786 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
788 self.assertNotIn(dob, anonymised)
789 self.assertEqual(anonymised.count("[~~~]"), 1)
791 def test_blur_all_dates(self) -> None:
792 dob = self.fake.date_of_birth()
793 dob_string = dob.strftime("%d %b %Y")
795 text = f"{self.fake.text()} {dob_string} {self.fake.text()}"
797 self.assertIn(dob_string, text)
799 payload = {
800 ConfigKeys.SCRUB_ALL_DATES: True,
801 ConfigKeys.REPLACE_ALL_DATES_WITH: "%b '%y",
802 ApiKeys.TEXT: {"test": text},
803 }
805 response = self.scrub_post(payload)
806 self.assertEqual(
807 response.status_code, HttpStatus.OK, msg=response.data
808 )
810 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
812 self.assertNotIn(dob_string, anonymised)
813 expected = dob.strftime("%b '%y")
814 self.assertEqual(anonymised.count(expected), 1)
816 def test_scrub_all_email_addresses(self) -> None:
817 email = self.fake.email()
819 text = f"{self.fake.text()} {email} {self.fake.text()}"
821 self.assertIn(email, text)
823 payload = {
824 ConfigKeys.SCRUB_ALL_EMAIL_ADDRESSES: True,
825 ApiKeys.TEXT: {"test": text},
826 }
828 response = self.scrub_post(payload)
829 self.assertEqual(
830 response.status_code, HttpStatus.OK, msg=response.data
831 )
833 anonymised = response.data[ApiKeys.ANONYMISED]["test"]
835 self.assertNotIn(email, anonymised)
836 self.assertEqual(anonymised.count("[~~~]"), 1)