Coverage for crateweb/anonymise_api/tests.py: 100%

354 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/crateweb/anonymise_api/tests.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26End-to-end API tests. Not an exhaustive test of anonymisation. 

27 

28""" 

29 

30import secrets 

31from tempfile import NamedTemporaryFile 

32from typing import Dict 

33 

34from cardinal_pythonlib.httpconst import HttpStatus 

35from cardinal_pythonlib.nhs import generate_random_nhs_number 

36from django.test import override_settings, TestCase 

37from faker import Faker 

38from rest_framework.response import Response 

39from rest_framework.test import APIClient 

40 

41from crate_anon.anonymise.constants import AnonymiseConfigKeys as ConfigKeys 

42from crate_anon.crateweb.anonymise_api.constants import ( 

43 ApiKeys, 

44 ApiSettingsKeys, 

45) 

46from crate_anon.crateweb.core.constants import ( 

47 DJANGO_DEFAULT_CONNECTION, 

48 RESEARCH_DB_CONNECTION_NAME, 

49) 

50 

51DEFAULT_SETTINGS = {ApiSettingsKeys.HASH_KEY: secrets.token_urlsafe(16)} 

52 

53 

54@override_settings(ANONYMISE_API=DEFAULT_SETTINGS) 

55class AnonymisationTests(TestCase): 

56 databases = {DJANGO_DEFAULT_CONNECTION, RESEARCH_DB_CONNECTION_NAME} 

57 

58 def setUp(self) -> None: 

59 super().setUp() 

60 

61 self.client = APIClient() 

62 

63 self.fake = Faker(["en-GB"]) 

64 self.fake.seed_instance(1234) 

65 

66 def scrub_post(self, payload: Dict) -> Response: 

67 return self.client.post("/anon_api/scrub/", payload, format="json") 

68 

69 def test_denylist_replaced(self) -> None: 

70 name = self.fake.name() 

71 address = self.fake.address() 

72 nhs_number = generate_random_nhs_number() 

73 

74 text = ( 

75 f"{name} {self.fake.text()} {address} {self.fake.text()} " 

76 f"{nhs_number} {self.fake.text()}" 

77 ) 

78 

79 payload = { 

80 ApiKeys.DENYLIST: { 

81 ApiKeys.WORDS: [name, address], 

82 }, 

83 ApiKeys.TEXT: {"test": text}, 

84 } 

85 

86 self.assertIn(name, text) 

87 self.assertIn(address, text) 

88 self.assertIn(str(nhs_number), text) 

89 

90 response = self.scrub_post(payload) 

91 self.assertEqual( 

92 response.status_code, HttpStatus.OK, msg=response.data 

93 ) 

94 

95 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

96 

97 self.assertNotIn(name, anonymised) 

98 self.assertNotIn(address, anonymised) 

99 self.assertIn(str(nhs_number), anonymised) 

100 

101 self.assertEqual(anonymised.count("[~~~]"), 2) 

102 

103 def test_denylist_files(self) -> None: 

104 payload = { 

105 ApiKeys.DENYLIST: {ApiKeys.FILES: ["test"]}, 

106 ApiKeys.TEXT: {"test": "secret private confidential"}, 

107 } 

108 

109 with NamedTemporaryFile(delete=False, mode="w") as f: 

110 filename = f.name 

111 f.write("secret\n") 

112 f.write("private\n") 

113 f.write("confidential\n") 

114 

115 filename_map = {"test": filename} 

116 settings = DEFAULT_SETTINGS.copy() 

117 settings[ApiSettingsKeys.DENYLIST_FILENAMES] = filename_map 

118 

119 with override_settings(ANONYMISE_API=settings): 

120 response = self.scrub_post(payload) 

121 self.assertEqual( 

122 response.status_code, HttpStatus.OK, msg=response.data 

123 ) 

124 

125 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

126 

127 self.assertNotIn("secret", anonymised) 

128 self.assertNotIn("private", anonymised) 

129 self.assertNotIn("confidential", anonymised) 

130 self.assertEqual(anonymised.count("[~~~]"), 3) 

131 

132 def test_denylist_replacement_text(self) -> None: 

133 word = "secret" 

134 

135 payload = { 

136 ApiKeys.DENYLIST: { 

137 ApiKeys.WORDS: [word], 

138 }, 

139 ConfigKeys.REPLACE_NONSPECIFIC_INFO_WITH: "[REDACTED]", 

140 ApiKeys.TEXT: {"test": word}, 

141 } 

142 

143 response = self.scrub_post(payload) 

144 self.assertEqual( 

145 response.status_code, HttpStatus.OK, msg=response.data 

146 ) 

147 

148 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

149 

150 self.assertEqual(anonymised.count("[REDACTED]"), 1) 

151 

152 def test_patient_date_replaced(self) -> None: 

153 date_of_birth = self.fake.date_of_birth().strftime("%d %b %Y") 

154 text = f"{date_of_birth} {self.fake.text()}" 

155 

156 payload = { 

157 ApiKeys.PATIENT: { 

158 ApiKeys.DATES: [date_of_birth], 

159 }, 

160 ApiKeys.TEXT: {"test": text}, 

161 } 

162 

163 self.assertIn(date_of_birth, text) 

164 

165 response = self.scrub_post(payload) 

166 self.assertEqual( 

167 response.status_code, HttpStatus.OK, msg=response.data 

168 ) 

169 

170 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

171 

172 self.assertNotIn(date_of_birth, anonymised) 

173 

174 self.assertEqual(anonymised.count("[__PPP__]"), 1) 

175 

176 def test_patient_words_replaced(self) -> None: 

177 words = "one two three" 

178 

179 text = f"one {self.fake.text()} two {self.fake.text()} three" 

180 payload = { 

181 ApiKeys.PATIENT: { 

182 ApiKeys.WORDS: [words], 

183 }, 

184 ApiKeys.TEXT: {"test": text}, 

185 } 

186 

187 all_words = text.split() 

188 

189 self.assertIn("one", all_words) 

190 self.assertIn("two", all_words) 

191 self.assertIn("three", all_words) 

192 

193 response = self.scrub_post(payload) 

194 self.assertEqual( 

195 response.status_code, HttpStatus.OK, msg=response.data 

196 ) 

197 

198 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

199 anonymised_words = anonymised.split() 

200 

201 self.assertNotIn("one", anonymised_words) 

202 self.assertNotIn("two", anonymised_words) 

203 self.assertNotIn("three", anonymised_words) 

204 

205 self.assertEqual(anonymised.count("[__PPP__]"), 3) 

206 

207 def test_patient_replacement_text(self) -> None: 

208 word = "secret" 

209 payload = { 

210 ApiKeys.PATIENT: { 

211 ApiKeys.WORDS: [word], 

212 }, 

213 ConfigKeys.REPLACE_PATIENT_INFO_WITH: "[REDACTED]", 

214 ApiKeys.TEXT: {"test": word}, 

215 } 

216 

217 response = self.scrub_post(payload) 

218 self.assertEqual( 

219 response.status_code, HttpStatus.OK, msg=response.data 

220 ) 

221 

222 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

223 self.assertEqual(anonymised.count("[REDACTED]"), 1) 

224 

225 def test_patient_phrase_replaced(self) -> None: 

226 address = self.fake.address() 

227 

228 text = f"{address} {self.fake.text()}" 

229 

230 payload = { 

231 ApiKeys.PATIENT: { 

232 ApiKeys.PHRASES: [address], 

233 }, 

234 ApiKeys.TEXT: {"test": text}, 

235 } 

236 

237 self.assertIn(address, text) 

238 

239 response = self.scrub_post(payload) 

240 self.assertEqual( 

241 response.status_code, HttpStatus.OK, msg=response.data 

242 ) 

243 

244 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

245 

246 self.assertNotIn(address, anonymised) 

247 

248 self.assertEqual(anonymised.count("[__PPP__]"), 1) 

249 

250 def test_patient_non_numeric_phrases_replaced(self) -> None: 

251 non_numeric_phrase = "5 High Street" 

252 numeric_phrase = "5" 

253 

254 payload = { 

255 ApiKeys.PATIENT: { 

256 ApiKeys.NON_NUMERIC_PHRASES: [ 

257 non_numeric_phrase, 

258 numeric_phrase, 

259 ], 

260 }, 

261 ApiKeys.TEXT: { 

262 "test": "Address is 5 High Street haloperidol 5 mg" 

263 }, 

264 } 

265 

266 response = self.scrub_post(payload) 

267 self.assertEqual( 

268 response.status_code, HttpStatus.OK, msg=response.data 

269 ) 

270 

271 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

272 

273 self.assertEqual(anonymised, "Address is [__PPP__] haloperidol 5 mg") 

274 

275 def test_patient_numeric_replaced(self) -> None: 

276 phone = self.fake.phone_number() 

277 

278 text = f"{phone} {self.fake.text()}" 

279 

280 payload = { 

281 ApiKeys.PATIENT: { 

282 ApiKeys.NUMBERS: [phone], 

283 }, 

284 ApiKeys.TEXT: {"test": text}, 

285 } 

286 

287 self.assertIn(phone, text) 

288 

289 response = self.scrub_post(payload) 

290 self.assertEqual( 

291 response.status_code, HttpStatus.OK, msg=response.data 

292 ) 

293 

294 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

295 

296 self.assertNotIn(phone, anonymised) 

297 

298 self.assertEqual(anonymised.count("[__PPP__]"), 1) 

299 

300 def test_patient_code_replaced(self) -> None: 

301 postcode = self.fake.postcode() 

302 text = f"{postcode} {self.fake.text()}" 

303 

304 payload = { 

305 ApiKeys.PATIENT: { 

306 ApiKeys.CODES: [postcode], 

307 }, 

308 ApiKeys.TEXT: {"test": text}, 

309 } 

310 

311 self.assertIn(postcode, text) 

312 

313 response = self.scrub_post(payload) 

314 self.assertEqual( 

315 response.status_code, HttpStatus.OK, msg=response.data 

316 ) 

317 

318 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

319 

320 self.assertNotIn(postcode, anonymised) 

321 

322 self.assertEqual(anonymised.count("[__PPP__]"), 1) 

323 

324 def test_third_party_code_replaced(self) -> None: 

325 postcode = self.fake.postcode() 

326 text = f"{postcode} {self.fake.text()}" 

327 

328 payload = { 

329 ApiKeys.THIRD_PARTY: { 

330 ApiKeys.CODES: [postcode], 

331 }, 

332 ApiKeys.TEXT: {"test": text}, 

333 } 

334 

335 self.assertIn(postcode, text) 

336 

337 response = self.scrub_post(payload) 

338 self.assertEqual( 

339 response.status_code, HttpStatus.OK, msg=response.data 

340 ) 

341 

342 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

343 

344 self.assertNotIn(postcode, anonymised) 

345 

346 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

347 

348 def test_third_party_replacement_text(self) -> None: 

349 postcode = self.fake.postcode() 

350 

351 payload = { 

352 ApiKeys.THIRD_PARTY: { 

353 ApiKeys.CODES: [postcode], 

354 }, 

355 ApiKeys.TEXT: {"test": postcode}, 

356 ConfigKeys.REPLACE_THIRD_PARTY_INFO_WITH: "[REDACTED]", 

357 } 

358 

359 response = self.scrub_post(payload) 

360 self.assertEqual( 

361 response.status_code, HttpStatus.OK, msg=response.data 

362 ) 

363 

364 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

365 

366 self.assertNotIn(postcode, anonymised) 

367 

368 self.assertEqual(anonymised.count("[REDACTED]"), 1) 

369 

370 def test_anonymise_codes_ignoring_word_boundaries(self) -> None: 

371 postcode = self.fake.postcode() 

372 text = f"text{postcode}text" 

373 

374 payload = { 

375 ConfigKeys.ANONYMISE_CODES_AT_WORD_BOUNDARIES_ONLY: False, 

376 ApiKeys.THIRD_PARTY: { 

377 ApiKeys.CODES: [postcode], 

378 }, 

379 ApiKeys.TEXT: {"test": text}, 

380 } 

381 

382 self.assertIn(postcode, text) 

383 

384 response = self.scrub_post(payload) 

385 self.assertEqual( 

386 response.status_code, HttpStatus.OK, msg=response.data 

387 ) 

388 

389 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

390 

391 self.assertNotIn(postcode, anonymised) 

392 

393 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

394 

395 def test_anonymise_dates_ignoring_word_boundaries(self) -> None: 

396 date_of_birth = self.fake.date_of_birth().strftime("%d %b %Y") 

397 text = f"text{date_of_birth}text" 

398 

399 payload = { 

400 ConfigKeys.ANONYMISE_DATES_AT_WORD_BOUNDARIES_ONLY: False, 

401 ApiKeys.THIRD_PARTY: { 

402 ApiKeys.DATES: [date_of_birth], 

403 }, 

404 ApiKeys.TEXT: {"test": text}, 

405 } 

406 

407 self.assertIn(date_of_birth, text) 

408 

409 response = self.scrub_post(payload) 

410 self.assertEqual( 

411 response.status_code, HttpStatus.OK, msg=response.data 

412 ) 

413 

414 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

415 

416 self.assertNotIn(date_of_birth, anonymised) 

417 

418 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

419 

420 def test_anonymise_numbers_ignoring_word_boundaries(self) -> None: 

421 phone = self.fake.phone_number() 

422 text = f"text{phone}text" 

423 

424 payload = { 

425 ConfigKeys.ANONYMISE_NUMBERS_AT_NUMERIC_BOUNDARIES_ONLY: False, 

426 ConfigKeys.ANONYMISE_NUMBERS_AT_WORD_BOUNDARIES_ONLY: False, 

427 ApiKeys.THIRD_PARTY: { 

428 ApiKeys.NUMBERS: [phone], 

429 }, 

430 ApiKeys.TEXT: {"test": text}, 

431 } 

432 

433 self.assertIn(phone, text) 

434 

435 response = self.scrub_post(payload) 

436 self.assertEqual( 

437 response.status_code, HttpStatus.OK, msg=response.data 

438 ) 

439 

440 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

441 

442 self.assertNotIn(phone, anonymised) 

443 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

444 

445 def test_anonymise_numbers_ignoring_numeric_boundaries(self) -> None: 

446 phone = self.fake.phone_number() 

447 text = f"1234{phone}5678" 

448 

449 payload = { 

450 ConfigKeys.ANONYMISE_NUMBERS_AT_NUMERIC_BOUNDARIES_ONLY: False, 

451 ConfigKeys.ANONYMISE_NUMBERS_AT_WORD_BOUNDARIES_ONLY: False, 

452 ApiKeys.THIRD_PARTY: { 

453 ApiKeys.NUMBERS: [phone], 

454 }, 

455 ApiKeys.TEXT: {"test": text}, 

456 } 

457 

458 self.assertIn(phone, text) 

459 

460 response = self.scrub_post(payload) 

461 self.assertEqual( 

462 response.status_code, HttpStatus.OK, msg=response.data 

463 ) 

464 

465 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

466 

467 self.assertNotIn(phone, anonymised) 

468 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

469 

470 def test_anonymise_strings_ignoring_word_boundaries(self) -> None: 

471 word = "secret" 

472 text = f"text{word}text" 

473 

474 payload = { 

475 ConfigKeys.ANONYMISE_STRINGS_AT_WORD_BOUNDARIES_ONLY: False, 

476 ApiKeys.THIRD_PARTY: { 

477 ApiKeys.WORDS: [word], 

478 }, 

479 ApiKeys.TEXT: {"test": text}, 

480 } 

481 

482 self.assertIn(word, text) 

483 

484 response = self.scrub_post(payload) 

485 self.assertEqual( 

486 response.status_code, HttpStatus.OK, msg=response.data 

487 ) 

488 

489 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

490 

491 self.assertNotIn(word, anonymised) 

492 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

493 

494 def test_string_max_regex_errors(self) -> None: 

495 word = "secret" 

496 typo = "sceret" 

497 text = f"{typo}" 

498 

499 payload = { 

500 ConfigKeys.STRING_MAX_REGEX_ERRORS: 2, # delete 1, insert 1 

501 ApiKeys.THIRD_PARTY: { 

502 ApiKeys.WORDS: [word], 

503 }, 

504 ApiKeys.TEXT: {"test": text}, 

505 } 

506 

507 response = self.scrub_post(payload) 

508 self.assertEqual( 

509 response.status_code, HttpStatus.OK, msg=response.data 

510 ) 

511 

512 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

513 

514 self.assertNotIn(typo, anonymised) 

515 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

516 

517 def test_min_string_length_for_errors(self) -> None: 

518 word1 = "secret" 

519 typo1 = "sceret" 

520 

521 word2 = "private" 

522 typo2 = "prviate" 

523 text = f"{typo1} {typo2}" 

524 

525 payload = { 

526 ConfigKeys.STRING_MAX_REGEX_ERRORS: 2, # delete 1, insert 1 

527 ConfigKeys.MIN_STRING_LENGTH_FOR_ERRORS: 7, 

528 ApiKeys.THIRD_PARTY: { 

529 ApiKeys.WORDS: [word1, word2], 

530 }, 

531 ApiKeys.TEXT: {"test": text}, 

532 } 

533 

534 response = self.scrub_post(payload) 

535 self.assertEqual( 

536 response.status_code, HttpStatus.OK, msg=response.data 

537 ) 

538 

539 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

540 

541 self.assertIn(typo1, anonymised) 

542 self.assertNotIn(typo2, anonymised) 

543 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

544 

545 def test_min_string_length_to_scrub_with(self) -> None: 

546 payload = { 

547 ConfigKeys.MIN_STRING_LENGTH_TO_SCRUB_WITH: 6, 

548 ApiKeys.THIRD_PARTY: { 

549 ApiKeys.WORDS: ["Craig Buchanan"], 

550 }, 

551 ApiKeys.TEXT: {"test": "Craig Buchanan"}, 

552 } 

553 

554 response = self.scrub_post(payload) 

555 self.assertEqual( 

556 response.status_code, HttpStatus.OK, msg=response.data 

557 ) 

558 

559 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

560 

561 self.assertIn("Craig", anonymised) 

562 self.assertNotIn("Buchanan", anonymised) 

563 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

564 

565 def test_scrub_string_suffixes(self) -> None: 

566 word = "secret" 

567 

568 payload = { 

569 ConfigKeys.SCRUB_STRING_SUFFIXES: ["s"], 

570 ApiKeys.THIRD_PARTY: { 

571 ApiKeys.WORDS: [word], 

572 }, 

573 ApiKeys.TEXT: {"test": "secrets"}, 

574 } 

575 

576 response = self.scrub_post(payload) 

577 self.assertEqual( 

578 response.status_code, HttpStatus.OK, msg=response.data 

579 ) 

580 

581 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

582 

583 self.assertNotIn("secrets", anonymised) 

584 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

585 

586 def test_allowlist_words(self) -> None: 

587 # A bit of a contrived example but the allowlist should 

588 # take precedence. 

589 payload = { 

590 ApiKeys.THIRD_PARTY: { 

591 ApiKeys.WORDS: ["secret", "private", "confidential"], 

592 }, 

593 ApiKeys.ALLOWLIST: {"words": ["secret"]}, 

594 ApiKeys.TEXT: {"test": "secret private confidential"}, 

595 } 

596 

597 response = self.scrub_post(payload) 

598 self.assertEqual( 

599 response.status_code, HttpStatus.OK, msg=response.data 

600 ) 

601 

602 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

603 

604 self.assertIn("secret", anonymised) 

605 self.assertNotIn("private", anonymised) 

606 self.assertNotIn("confidential", anonymised) 

607 self.assertEqual(anonymised.count("[__TTT__]"), 2) 

608 

609 def test_allowlist_files(self) -> None: 

610 payload = { 

611 ApiKeys.THIRD_PARTY: { 

612 ApiKeys.WORDS: ["secret", "private", "confidential"], 

613 }, 

614 ApiKeys.ALLOWLIST: {"files": ["test"]}, 

615 ApiKeys.TEXT: {"test": "secret private confidential"}, 

616 } 

617 

618 with NamedTemporaryFile(delete=False, mode="w") as f: 

619 filename = f.name 

620 f.write("secret\n") 

621 

622 filename_map = {"test": filename} 

623 settings = DEFAULT_SETTINGS.copy() 

624 settings[ApiSettingsKeys.ALLOWLIST_FILENAMES] = filename_map 

625 

626 with override_settings(ANONYMISE_API=settings): 

627 response = self.scrub_post(payload) 

628 self.assertEqual( 

629 response.status_code, HttpStatus.OK, msg=response.data 

630 ) 

631 

632 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

633 

634 self.assertIn("secret", anonymised) 

635 self.assertNotIn("private", anonymised) 

636 self.assertNotIn("confidential", anonymised) 

637 self.assertEqual(anonymised.count("[__TTT__]"), 2) 

638 

639 def test_phrase_alternatives(self) -> None: 

640 payload = { 

641 ApiKeys.THIRD_PARTY: { 

642 ApiKeys.PHRASES: ["22 Acacia Avenue"], 

643 }, 

644 ApiKeys.ALTERNATIVES: [["Avenue", "Ave"]], 

645 ApiKeys.TEXT: {"test": "22 Acacia Ave"}, 

646 } 

647 

648 response = self.scrub_post(payload) 

649 self.assertEqual( 

650 response.status_code, HttpStatus.OK, msg=response.data 

651 ) 

652 

653 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

654 

655 self.assertNotIn("22 Acacia Ave", anonymised) 

656 self.assertEqual(anonymised.count("[__TTT__]"), 1) 

657 

658 def test_scrub_all_numbers_of_n_digits(self) -> None: 

659 nhs_number = str(generate_random_nhs_number()) 

660 

661 text = f"{self.fake.text()} {nhs_number} {self.fake.text()}" 

662 

663 self.assertIn(nhs_number, text) 

664 

665 payload = { 

666 ConfigKeys.SCRUB_ALL_NUMBERS_OF_N_DIGITS: [10], 

667 ApiKeys.TEXT: {"test": text}, 

668 } 

669 

670 response = self.scrub_post(payload) 

671 self.assertEqual( 

672 response.status_code, HttpStatus.OK, msg=response.data 

673 ) 

674 

675 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

676 

677 self.assertNotIn(nhs_number, anonymised) 

678 self.assertEqual(anonymised.count("[~~~]"), 1) 

679 

680 def test_scrub_all_numbers_of_n_digits_ignoring_word_boundaries( 

681 self, 

682 ) -> None: 

683 nhs_number = str(generate_random_nhs_number()) 

684 

685 text = f"text{nhs_number}text" 

686 

687 self.assertIn(nhs_number, text) 

688 

689 payload = { 

690 ConfigKeys.SCRUB_ALL_NUMBERS_OF_N_DIGITS: [10], 

691 ConfigKeys.ANONYMISE_NUMBERS_AT_WORD_BOUNDARIES_ONLY: False, 

692 ApiKeys.TEXT: {"test": text}, 

693 } 

694 

695 response = self.scrub_post(payload) 

696 self.assertEqual( 

697 response.status_code, HttpStatus.OK, msg=response.data 

698 ) 

699 

700 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

701 

702 self.assertNotIn(nhs_number, anonymised) 

703 self.assertEqual(anonymised.count("[~~~]"), 1) 

704 

705 def test_scrub_all_uk_postcodes(self) -> None: 

706 postcode = self.fake.postcode() 

707 

708 text = f"{self.fake.text()} {postcode} {self.fake.text()}" 

709 

710 self.assertIn(postcode, text) 

711 

712 payload = { 

713 ConfigKeys.SCRUB_ALL_UK_POSTCODES: True, 

714 ApiKeys.TEXT: {"test": text}, 

715 } 

716 

717 response = self.scrub_post(payload) 

718 self.assertEqual( 

719 response.status_code, HttpStatus.OK, msg=response.data 

720 ) 

721 

722 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

723 

724 self.assertNotIn(postcode, anonymised) 

725 self.assertEqual(anonymised.count("[~~~]"), 1) 

726 

727 def test_scrub_all_uk_postcodes_ignoring_word_boundary(self) -> None: 

728 postcode = self.fake.postcode() 

729 

730 text = f"text{postcode}text" 

731 

732 self.assertIn(postcode, text) 

733 

734 payload = { 

735 ConfigKeys.ANONYMISE_CODES_AT_WORD_BOUNDARIES_ONLY: False, 

736 ConfigKeys.SCRUB_ALL_UK_POSTCODES: True, 

737 ApiKeys.TEXT: {"test": text}, 

738 } 

739 

740 response = self.scrub_post(payload) 

741 self.assertEqual( 

742 response.status_code, HttpStatus.OK, msg=response.data 

743 ) 

744 

745 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

746 

747 self.assertNotIn(postcode, anonymised) 

748 self.assertEqual(anonymised.count("[~~~]"), 1) 

749 

750 def test_scrub_all_uk_postcodes_replacement_text(self) -> None: 

751 postcode = self.fake.postcode() 

752 

753 payload = { 

754 ConfigKeys.SCRUB_ALL_UK_POSTCODES: True, 

755 ConfigKeys.REPLACE_NONSPECIFIC_INFO_WITH: "[REDACTED]", 

756 ApiKeys.TEXT: {"test": postcode}, 

757 } 

758 

759 response = self.scrub_post(payload) 

760 self.assertEqual( 

761 response.status_code, HttpStatus.OK, msg=response.data 

762 ) 

763 

764 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

765 

766 self.assertNotIn(postcode, anonymised) 

767 self.assertEqual(anonymised.count("[REDACTED]"), 1) 

768 

769 def test_scrub_all_dates(self) -> None: 

770 dob = self.fake.date_of_birth().strftime("%d %b %Y") 

771 

772 text = f"{self.fake.text()} {dob} {self.fake.text()}" 

773 

774 self.assertIn(dob, text) 

775 

776 payload = { 

777 ConfigKeys.SCRUB_ALL_DATES: True, 

778 ApiKeys.TEXT: {"test": text}, 

779 } 

780 

781 response = self.scrub_post(payload) 

782 self.assertEqual( 

783 response.status_code, HttpStatus.OK, msg=response.data 

784 ) 

785 

786 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

787 

788 self.assertNotIn(dob, anonymised) 

789 self.assertEqual(anonymised.count("[~~~]"), 1) 

790 

791 def test_blur_all_dates(self) -> None: 

792 dob = self.fake.date_of_birth() 

793 dob_string = dob.strftime("%d %b %Y") 

794 

795 text = f"{self.fake.text()} {dob_string} {self.fake.text()}" 

796 

797 self.assertIn(dob_string, text) 

798 

799 payload = { 

800 ConfigKeys.SCRUB_ALL_DATES: True, 

801 ConfigKeys.REPLACE_ALL_DATES_WITH: "%b '%y", 

802 ApiKeys.TEXT: {"test": text}, 

803 } 

804 

805 response = self.scrub_post(payload) 

806 self.assertEqual( 

807 response.status_code, HttpStatus.OK, msg=response.data 

808 ) 

809 

810 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

811 

812 self.assertNotIn(dob_string, anonymised) 

813 expected = dob.strftime("%b '%y") 

814 self.assertEqual(anonymised.count(expected), 1) 

815 

816 def test_scrub_all_email_addresses(self) -> None: 

817 email = self.fake.email() 

818 

819 text = f"{self.fake.text()} {email} {self.fake.text()}" 

820 

821 self.assertIn(email, text) 

822 

823 payload = { 

824 ConfigKeys.SCRUB_ALL_EMAIL_ADDRESSES: True, 

825 ApiKeys.TEXT: {"test": text}, 

826 } 

827 

828 response = self.scrub_post(payload) 

829 self.assertEqual( 

830 response.status_code, HttpStatus.OK, msg=response.data 

831 ) 

832 

833 anonymised = response.data[ApiKeys.ANONYMISED]["test"] 

834 

835 self.assertNotIn(email, anonymised) 

836 self.assertEqual(anonymised.count("[~~~]"), 1)