Coverage for nlp_manager/tests/cloud_request_sender_tests.py: 100%

234 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/nlp_manager/tests/cloud_request_sender_tests.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26crate_anon/nlp_manager/tests/nlp_manager_tests.py 

27""" 

28 

29import logging 

30from typing import Any, Dict, Generator, Optional, Tuple 

31from unittest import mock, TestCase 

32 

33from crate_anon.nlp_manager.cloud_request import CloudRequestProcess 

34from crate_anon.nlp_manager.cloud_request_sender import CloudRequestSender 

35from crate_anon.nlp_manager.constants import ( 

36 FN_SRCDB, 

37 FN_SRCFIELD, 

38 FN_SRCPKFIELD, 

39 FN_SRCPKSTR, 

40 FN_SRCPKVAL, 

41 FN_SRCTABLE, 

42 HashClass, 

43) 

44from crate_anon.nlp_manager.models import FN_SRCHASH, NlpRecord 

45from crate_anon.nlprp.constants import NlprpKeys 

46 

47PANAMOWA = "A woman, a plan, a canal. Panamowa!" 

48PAGODA = "A dog! A panic in a pagoda." 

49PATACA = "A cat! A panic in a pataca." 

50REVOLT = "Won't lovers revolt now?" 

51 

52 

53class TestCloudRequestSender(CloudRequestSender): 

54 def __init__(self, *args, **kwargs): 

55 super().__init__(*args, **kwargs) 

56 self.call_count = 0 

57 self.test_requests = [] 

58 

59 def _get_new_cloud_request(self) -> CloudRequestProcess: 

60 request = self.test_requests[self.call_count] 

61 self.call_count += 1 

62 

63 return request 

64 

65 

66class CloudRequestSenderTests(TestCase): 

67 def get_text(self) -> Generator[Tuple[str, Dict[str, Any]], None, None]: 

68 for text, other_values in self.test_text: 

69 yield text, other_values 

70 

71 def setUp(self) -> None: 

72 # Set some sensible defaults here and be explicit in individual tests 

73 remote_processors = {("name-version", None): mock.Mock()} 

74 self.cloud_config = mock.Mock( 

75 remote_processors=remote_processors, 

76 limit_before_commit=1000, 

77 max_records_per_request=1000, 

78 max_content_length=50000, 

79 has_gate_processors=True, 

80 ) 

81 # can't set name attribute in constructor here as it has special 

82 # meaning 

83 self.nlpdef = mock.Mock( 

84 get_cloud_config_or_raise=mock.Mock(return_value=self.cloud_config) 

85 ) 

86 self.nlpdef.name = "" # so set it here 

87 

88 self.hasher = HashClass("hashphrase") 

89 

90 self.crinfo = mock.Mock( 

91 get_remote_processors=mock.Mock(return_value=remote_processors), 

92 cloudcfg=self.cloud_config, 

93 # if we don't set this explicitly, getsize will get into an 

94 # infinite loop when trying to recursively weigh the mock object 

95 nlpdef=mock.Mock(hash=self.hasher.hash), 

96 ) 

97 self.ifconfig = mock.Mock() 

98 self.sender = TestCloudRequestSender( 

99 self.get_text(), 

100 self.crinfo, 

101 self.ifconfig, 

102 ) 

103 

104 def test_exits_when_no_available_processors(self) -> None: 

105 self.test_text = [ 

106 ("", {"": None}), 

107 ] 

108 

109 crinfo = mock.Mock(get_remote_processors=mock.Mock(return_value=[])) 

110 global_recnum_in = 123 

111 ifconfig = mock.Mock() 

112 

113 # No need to use TestCloudRequestSender because we should never get as 

114 # far as creating requests. 

115 sender = CloudRequestSender( 

116 self.get_text(), 

117 crinfo, 

118 ifconfig, 

119 ) 

120 ( 

121 cloud_requests, 

122 records_left, 

123 global_recnum_out, 

124 ) = sender.send_requests(global_recnum_in) 

125 

126 self.assertEqual(cloud_requests, []) 

127 self.assertFalse(records_left) 

128 self.assertEqual(global_recnum_out, global_recnum_in) 

129 

130 def test_single_text_sent_in_single_request(self) -> None: 

131 self.test_text = [ 

132 ( 

133 PANAMOWA, 

134 { 

135 FN_SRCPKVAL: 1, 

136 FN_SRCPKSTR: "pkstr", 

137 }, 

138 ), 

139 ] 

140 

141 global_recnum_in = 123 

142 

143 self.sender.test_requests = [ 

144 CloudRequestProcess(crinfo=self.crinfo, nlpdef=self.nlpdef) 

145 ] 

146 

147 with mock.patch.object( 

148 self.sender.test_requests[0], "send_process_request" 

149 ) as mock_send: 

150 ( 

151 cloud_requests, 

152 records_processed, 

153 global_recnum_out, 

154 ) = self.sender.send_requests(global_recnum=global_recnum_in) 

155 

156 self.assertEqual(cloud_requests[0], self.sender.test_requests[0]) 

157 self.assertTrue(records_processed) 

158 self.assertEqual(global_recnum_out, 124) 

159 

160 mock_send.assert_called_once_with( 

161 queue=True, 

162 cookies=None, # First call: no cookies 

163 include_text_in_reply=True, # has_gate_processors from config 

164 ) 

165 

166 records = cloud_requests[0]._request_process[NlprpKeys.ARGS][ 

167 NlprpKeys.CONTENT 

168 ] 

169 

170 self.assertEqual(records[0][NlprpKeys.METADATA][FN_SRCPKVAL], 1) 

171 self.assertEqual(records[0][NlprpKeys.METADATA][FN_SRCPKSTR], "pkstr") 

172 self.assertEqual( 

173 records[0][NlprpKeys.METADATA][FN_SRCHASH], 

174 self.hasher.hash(PANAMOWA), 

175 ) 

176 self.assertEqual(records[0][NlprpKeys.TEXT], PANAMOWA) 

177 

178 def test_multiple_records_sent_in_single_request(self) -> None: 

179 self.test_text = [ 

180 ( 

181 PANAMOWA, 

182 { 

183 FN_SRCPKVAL: 1, 

184 FN_SRCPKSTR: "pkstr", 

185 }, 

186 ), 

187 ( 

188 PAGODA, 

189 { 

190 FN_SRCPKVAL: 2, 

191 FN_SRCPKSTR: "pkstr", 

192 }, 

193 ), 

194 ( 

195 REVOLT, 

196 { 

197 FN_SRCPKVAL: 3, 

198 FN_SRCPKSTR: "pkstr", 

199 }, 

200 ), 

201 ] 

202 

203 global_recnum_in = 123 

204 

205 self.sender.test_requests = [ 

206 CloudRequestProcess(crinfo=self.crinfo, nlpdef=self.nlpdef) 

207 ] 

208 

209 with mock.patch.object( 

210 self.sender.test_requests[0], "send_process_request" 

211 ) as mock_send: 

212 ( 

213 cloud_requests, 

214 records_processed, 

215 global_recnum_out, 

216 ) = self.sender.send_requests(global_recnum_in) 

217 

218 self.assertEqual(cloud_requests[0], self.sender.test_requests[0]) 

219 self.assertTrue(records_processed) 

220 self.assertEqual(global_recnum_out, 126) 

221 

222 mock_send.assert_called_once_with( 

223 queue=True, 

224 cookies=None, # First call: no cookies 

225 include_text_in_reply=True, # has_gate_processors 

226 ) 

227 

228 records = cloud_requests[0]._request_process[NlprpKeys.ARGS][ 

229 NlprpKeys.CONTENT 

230 ] 

231 

232 self.assertEqual(records[0][NlprpKeys.METADATA][FN_SRCPKVAL], 1) 

233 self.assertEqual(records[1][NlprpKeys.METADATA][FN_SRCPKSTR], "pkstr") 

234 self.assertEqual(records[2][NlprpKeys.TEXT], REVOLT) 

235 

236 def test_max_records_per_request(self) -> None: 

237 self.test_text = [ 

238 ( 

239 PANAMOWA, 

240 { 

241 FN_SRCPKVAL: 1, 

242 FN_SRCPKSTR: "pkstr", 

243 }, 

244 ), 

245 ( 

246 PAGODA, 

247 { 

248 FN_SRCPKVAL: 2, 

249 FN_SRCPKSTR: "pkstr", 

250 }, 

251 ), 

252 ( 

253 REVOLT, 

254 { 

255 FN_SRCPKVAL: 3, 

256 FN_SRCPKSTR: "pkstr", 

257 }, 

258 ), 

259 ] 

260 

261 global_recnum_in = 123 

262 

263 self.sender.test_requests = [ 

264 CloudRequestProcess( 

265 crinfo=self.crinfo, 

266 nlpdef=self.nlpdef, 

267 ), 

268 CloudRequestProcess( 

269 crinfo=self.crinfo, 

270 nlpdef=self.nlpdef, 

271 ), 

272 CloudRequestProcess( 

273 crinfo=self.crinfo, 

274 nlpdef=self.nlpdef, 

275 ), 

276 ] 

277 

278 self.cloud_config.max_records_per_request = 1 

279 

280 mock_cookies = mock.Mock() 

281 

282 # noinspection PyUnusedLocal 

283 def mock_send_0_side_effect(*args, **kwargs): 

284 self.sender.test_requests[0].cookies = mock_cookies 

285 

286 with self.assertLogs(level=logging.INFO) as logging_cm: 

287 with mock.patch.object( 

288 self.sender.test_requests[0], "send_process_request" 

289 ) as mock_send_0: 

290 mock_send_0.side_effect = mock_send_0_side_effect 

291 with mock.patch.object( 

292 self.sender.test_requests[1], "send_process_request" 

293 ) as mock_send_1: 

294 with mock.patch.object( 

295 self.sender.test_requests[2], "send_process_request" 

296 ) as mock_send_2: 

297 ( 

298 requests_out, 

299 records_processed, 

300 global_recnum_out, 

301 ) = self.sender.send_requests(global_recnum_in) 

302 

303 self.assertEqual(requests_out[0], self.sender.test_requests[0]) 

304 self.assertEqual(requests_out[1], self.sender.test_requests[1]) 

305 self.assertEqual(requests_out[2], self.sender.test_requests[2]) 

306 

307 self.assertTrue(records_processed) 

308 self.assertEqual(global_recnum_out, 126) 

309 

310 mock_send_0.assert_called_once_with( 

311 queue=True, 

312 cookies=None, # First call: no cookies 

313 include_text_in_reply=True, # has_gate_processors from config 

314 ) 

315 mock_send_1.assert_called_once_with( 

316 queue=True, 

317 cookies=mock_cookies, # Should remember cookies from first response # noqa: E501 

318 include_text_in_reply=True, # has_gate_processors from config 

319 ) 

320 mock_send_2.assert_called_once_with( 

321 queue=True, 

322 cookies=mock_cookies, # Should remember cookies from first response # noqa: E501 

323 include_text_in_reply=True, # has_gate_processors from config 

324 ) 

325 

326 content_0 = requests_out[0]._request_process[NlprpKeys.ARGS][ 

327 NlprpKeys.CONTENT 

328 ] 

329 self.assertEqual(content_0[0][NlprpKeys.TEXT], PANAMOWA) 

330 

331 content_1 = requests_out[1]._request_process[NlprpKeys.ARGS][ 

332 NlprpKeys.CONTENT 

333 ] 

334 self.assertEqual(content_1[0][NlprpKeys.TEXT], PAGODA) 

335 

336 content_2 = requests_out[2]._request_process[NlprpKeys.ARGS][ 

337 NlprpKeys.CONTENT 

338 ] 

339 self.assertEqual(content_2[0][NlprpKeys.TEXT], REVOLT) 

340 

341 logger_name = "crate_anon.nlp_manager.cloud_request_sender" 

342 expected_message = "Sent request to be processed: #1 of this block" 

343 self.assertIn( 

344 f"INFO:{logger_name}:{expected_message}", logging_cm.output 

345 ) 

346 expected_message = "Sent request to be processed: #2 of this block" 

347 self.assertIn( 

348 f"INFO:{logger_name}:{expected_message}", logging_cm.output 

349 ) 

350 expected_message = "Sent request to be processed: #3 of this block" 

351 self.assertIn( 

352 f"INFO:{logger_name}:{expected_message}", logging_cm.output 

353 ) 

354 

355 def test_limit_before_commit_2(self) -> None: 

356 self.test_text = [ 

357 ( 

358 PANAMOWA, 

359 { 

360 FN_SRCPKVAL: 1, 

361 FN_SRCPKSTR: "pkstr", 

362 }, 

363 ), 

364 ( 

365 PAGODA, 

366 { 

367 FN_SRCPKVAL: 2, 

368 FN_SRCPKSTR: "pkstr", 

369 }, 

370 ), 

371 ( 

372 REVOLT, 

373 { 

374 FN_SRCPKVAL: 3, 

375 FN_SRCPKSTR: "pkstr", 

376 }, 

377 ), 

378 ] 

379 

380 global_recnum_in = 123 

381 

382 self.sender.test_requests = [ 

383 CloudRequestProcess( 

384 crinfo=self.crinfo, 

385 nlpdef=self.nlpdef, 

386 ), 

387 ] 

388 

389 self.cloud_config.limit_before_commit = 2 

390 

391 with mock.patch.object( 

392 self.sender.test_requests[0], "send_process_request" 

393 ) as mock_send: 

394 ( 

395 requests_out, 

396 records_processed, 

397 global_recnum_out, 

398 ) = self.sender.send_requests(global_recnum_in) 

399 

400 self.assertEqual(requests_out[0], self.sender.test_requests[0]) 

401 

402 self.assertTrue(records_processed) 

403 self.assertEqual(global_recnum_out, 125) 

404 

405 mock_send.assert_called_once_with( 

406 queue=True, 

407 cookies=None, # First call: no cookies 

408 include_text_in_reply=True, # has_gate_processors from config 

409 ) 

410 

411 content_0 = requests_out[0]._request_process[NlprpKeys.ARGS][ 

412 NlprpKeys.CONTENT 

413 ] 

414 self.assertEqual(len(content_0), 2) 

415 self.assertEqual(content_0[0][NlprpKeys.TEXT], PANAMOWA) 

416 

417 self.assertEqual(content_0[1][NlprpKeys.TEXT], PAGODA) 

418 

419 def test_max_content_length(self) -> None: 

420 self.test_text = [ 

421 ( 

422 PANAMOWA, 

423 { 

424 FN_SRCPKVAL: 1, 

425 FN_SRCPKSTR: "pkstr", 

426 }, 

427 ), 

428 ( 

429 PAGODA, 

430 { 

431 FN_SRCPKVAL: 2, 

432 FN_SRCPKSTR: "pkstr", 

433 }, 

434 ), 

435 ( 

436 REVOLT, 

437 { 

438 FN_SRCPKVAL: 3, 

439 FN_SRCPKSTR: "pkstr", 

440 }, 

441 ), 

442 ] 

443 

444 global_recnum_in = 123 

445 

446 self.sender.test_requests = [ 

447 CloudRequestProcess( 

448 crinfo=self.crinfo, 

449 nlpdef=self.nlpdef, 

450 ), 

451 CloudRequestProcess( 

452 crinfo=self.crinfo, 

453 nlpdef=self.nlpdef, 

454 ), 

455 ] 

456 

457 # json lengths: 274, ?, 533 

458 self.cloud_config.max_content_length = 500 

459 

460 with mock.patch.object( 

461 self.sender.test_requests[0], "send_process_request" 

462 ) as mock_send_0: 

463 with mock.patch.object( 

464 self.sender.test_requests[1], "send_process_request" 

465 ) as mock_send_1: 

466 ( 

467 requests_out, 

468 records_processed, 

469 global_recnum_out, 

470 ) = self.sender.send_requests(global_recnum_in) 

471 

472 self.assertEqual(requests_out[0], self.sender.test_requests[0]) 

473 self.assertEqual(requests_out[1], self.sender.test_requests[1]) 

474 

475 self.assertTrue(records_processed) 

476 self.assertEqual(global_recnum_out, 126) 

477 

478 mock_send_0.assert_called_once_with( 

479 queue=True, 

480 cookies=None, # First call: no cookies 

481 include_text_in_reply=True, # has_gate_processors from config 

482 ) 

483 mock_send_1.assert_called_once_with( 

484 queue=True, 

485 cookies=None, # First call: no cookies 

486 include_text_in_reply=True, # has_gate_processors from config 

487 ) 

488 

489 content_0 = requests_out[0]._request_process[NlprpKeys.ARGS][ 

490 NlprpKeys.CONTENT 

491 ] 

492 self.assertEqual(content_0[0][NlprpKeys.TEXT], PANAMOWA) 

493 

494 self.assertEqual(content_0[1][NlprpKeys.TEXT], PAGODA) 

495 

496 content_1 = requests_out[1]._request_process[NlprpKeys.ARGS][ 

497 NlprpKeys.CONTENT 

498 ] 

499 self.assertEqual(content_1[0][NlprpKeys.TEXT], REVOLT) 

500 

501 def test_record_bigger_than_max_content_length_skipped(self) -> None: 

502 short_text = "Some text with serialized length greater than 500. " 

503 long_text = short_text * 6 

504 self.test_text = [ 

505 ( 

506 PANAMOWA, 

507 { 

508 FN_SRCPKVAL: 1, 

509 FN_SRCPKSTR: "pkstr", 

510 }, 

511 ), 

512 ( 

513 long_text, 

514 { 

515 FN_SRCPKVAL: 2, 

516 FN_SRCPKSTR: "pkstr", 

517 }, 

518 ), 

519 ( 

520 REVOLT, 

521 { 

522 FN_SRCPKVAL: 3, 

523 FN_SRCPKSTR: "pkstr", 

524 }, 

525 ), 

526 ] 

527 

528 global_recnum_in = 123 

529 

530 self.sender.test_requests = [ 

531 CloudRequestProcess( 

532 crinfo=self.crinfo, 

533 nlpdef=self.nlpdef, 

534 ), 

535 CloudRequestProcess( 

536 crinfo=self.crinfo, 

537 nlpdef=self.nlpdef, 

538 ), 

539 ] 

540 

541 self.cloud_config.max_content_length = 500 

542 

543 with self.assertLogs(level=logging.WARNING) as logging_cm: 

544 with mock.patch.object( 

545 self.sender.test_requests[0], "send_process_request" 

546 ) as mock_send_0: 

547 with mock.patch.object( 

548 self.sender.test_requests[1], "send_process_request" 

549 ) as mock_send_1: 

550 ( 

551 requests_out, 

552 records_processed, 

553 global_recnum_out, 

554 ) = self.sender.send_requests(global_recnum_in) 

555 

556 self.assertEqual(requests_out[0], self.sender.test_requests[0]) 

557 self.assertEqual(requests_out[1], self.sender.test_requests[1]) 

558 

559 self.assertTrue(records_processed) 

560 self.assertEqual(global_recnum_out, 126) 

561 

562 mock_send_0.assert_called_once_with( 

563 queue=True, 

564 cookies=None, # First call: no cookies 

565 include_text_in_reply=True, # has_gate_processors from config 

566 ) 

567 mock_send_1.assert_called_once_with( 

568 queue=True, 

569 cookies=None, # First call: no cookies 

570 include_text_in_reply=True, # has_gate_processors from config 

571 ) 

572 

573 content_0 = requests_out[0]._request_process[NlprpKeys.ARGS][ 

574 NlprpKeys.CONTENT 

575 ] 

576 self.assertEqual(len(content_0), 1) 

577 self.assertEqual(content_0[0][NlprpKeys.TEXT], PANAMOWA) 

578 

579 content_1 = requests_out[1]._request_process[NlprpKeys.ARGS][ 

580 NlprpKeys.CONTENT 

581 ] 

582 self.assertEqual(content_1[0][NlprpKeys.TEXT], REVOLT) 

583 

584 logger_name = "crate_anon.nlp_manager.cloud_request_sender" 

585 self.assertIn( 

586 ( 

587 f"WARNING:{logger_name}:" 

588 f"Skipping text that's too long to send" 

589 ), 

590 logging_cm.output, 

591 ) 

592 

593 def test_skips_previous_record_if_incremental(self) -> None: 

594 self.test_text = [ 

595 ( 

596 PANAMOWA, 

597 { 

598 FN_SRCPKVAL: 1, 

599 FN_SRCPKSTR: "pkstr", 

600 }, 

601 ), 

602 ( 

603 PAGODA, 

604 { 

605 FN_SRCPKVAL: 2, 

606 FN_SRCPKSTR: "pkstr", 

607 }, 

608 ), 

609 ( 

610 REVOLT, 

611 { 

612 FN_SRCPKVAL: 3, 

613 FN_SRCPKSTR: "pkstr", 

614 }, 

615 ), 

616 ] 

617 

618 global_recnum_in = 123 

619 

620 self.sender._incremental = True 

621 self.sender.test_requests = [ 

622 CloudRequestProcess( 

623 crinfo=self.crinfo, 

624 nlpdef=self.nlpdef, 

625 ), 

626 ] 

627 

628 # noinspection PyUnusedLocal 

629 def get_progress_record(pkval: int, pkstr: str) -> Optional[NlpRecord]: 

630 

631 # same as before 

632 if pkval == 1: 

633 return mock.Mock( 

634 srchash=self.hasher.hash(self.test_text[0][0]) 

635 ) 

636 

637 # changed 

638 if pkval == 2: 

639 return mock.Mock(srchash=self.hasher.hash(PATACA)) 

640 

641 # new 

642 return None 

643 

644 self.ifconfig.get_progress_record = get_progress_record 

645 

646 with self.assertLogs(level=logging.DEBUG) as logging_cm: 

647 with mock.patch.object( 

648 self.sender.test_requests[0], "send_process_request" 

649 ) as mock_send_0: 

650 ( 

651 requests_out, 

652 records_processed, 

653 global_recnum_out, 

654 ) = self.sender.send_requests(global_recnum_in) 

655 

656 self.assertEqual(requests_out[0], self.sender.test_requests[0]) 

657 

658 self.assertTrue(records_processed) 

659 self.assertEqual(global_recnum_out, 126) 

660 

661 mock_send_0.assert_called_once_with( 

662 queue=True, 

663 cookies=None, # First call: no cookies 

664 include_text_in_reply=True, # has_gate_processors from config 

665 ) 

666 

667 content_0 = requests_out[0]._request_process[NlprpKeys.ARGS][ 

668 NlprpKeys.CONTENT 

669 ] 

670 self.assertEqual(content_0[0][NlprpKeys.TEXT], PAGODA) 

671 

672 logger_name = "crate_anon.nlp_manager.cloud_request_sender" 

673 self.assertIn( 

674 (f"DEBUG:{logger_name}:Record previously processed; " "skipping"), 

675 logging_cm.output, 

676 ) 

677 self.assertIn( 

678 f"DEBUG:{logger_name}:Record has changed", logging_cm.output 

679 ) 

680 self.assertIn(f"DEBUG:{logger_name}:Record is new", logging_cm.output) 

681 

682 def test_log_message_frequency(self) -> None: 

683 self.test_text = [ 

684 ( 

685 PANAMOWA, 

686 { 

687 FN_SRCDB: "db", 

688 FN_SRCFIELD: "field", 

689 FN_SRCPKFIELD: "pkfield", 

690 FN_SRCPKSTR: "pkstr", 

691 FN_SRCPKVAL: 1, 

692 FN_SRCTABLE: "table", 

693 }, 

694 ), 

695 ( 

696 PAGODA, 

697 { 

698 FN_SRCPKSTR: "pkstr", 

699 FN_SRCPKVAL: 2, 

700 }, 

701 ), 

702 ( 

703 REVOLT, 

704 { 

705 FN_SRCDB: "db", 

706 FN_SRCFIELD: "field", 

707 FN_SRCPKFIELD: "pkfield", 

708 FN_SRCPKSTR: "", 

709 FN_SRCPKVAL: 3, 

710 FN_SRCTABLE: "table", 

711 }, 

712 ), 

713 ] 

714 

715 global_recnum_in = 1 

716 

717 self.sender._report_every = 2 

718 self.sender.test_requests = [ 

719 CloudRequestProcess( 

720 crinfo=self.crinfo, 

721 nlpdef=self.nlpdef, 

722 ), 

723 ] 

724 

725 self.ifconfig.get_count = mock.Mock(return_value=100) 

726 with self.assertLogs(level=logging.INFO) as logging_cm: 

727 with mock.patch.object( 

728 self.sender.test_requests[0], "send_process_request" 

729 ): 

730 self.sender.send_requests(global_recnum_in) 

731 

732 logger_name = "crate_anon.nlp_manager.cloud_request_sender" 

733 expected_message = ( 

734 "Processing db.table.field, PK: pkfield=pkstr " "(record 2/100)" 

735 ) 

736 self.assertIn( 

737 f"INFO:{logger_name}:{expected_message}", logging_cm.output 

738 ) 

739 

740 expected_message = ( 

741 "Processing db.table.field, PK: pkfield=3 " "(record 4/100)" 

742 ) 

743 self.assertIn( 

744 f"INFO:{logger_name}:{expected_message}", logging_cm.output 

745 ) 

746 

747 def test_failed_request_logged(self) -> None: 

748 self.test_text = [ 

749 ( 

750 PANAMOWA, 

751 { 

752 FN_SRCPKSTR: "pkstr", 

753 FN_SRCPKVAL: 1, 

754 }, 

755 ), 

756 ] 

757 

758 global_recnum_in = 1 

759 

760 self.sender.test_requests = [ 

761 CloudRequestProcess( 

762 crinfo=self.crinfo, 

763 nlpdef=self.nlpdef, 

764 ), 

765 ] 

766 

767 # noinspection PyUnusedLocal 

768 def mock_send_0_side_effect(*args, **kwargs): 

769 self.sender.test_requests[0].request_failed = True 

770 

771 with self.assertLogs(level=logging.WARNING) as logging_cm: 

772 with mock.patch.object( 

773 self.sender.test_requests[0], "send_process_request" 

774 ) as mock_send_0: 

775 mock_send_0.side_effect = mock_send_0_side_effect 

776 

777 ( 

778 requests_out, 

779 records_processed, 

780 global_recnum_out, 

781 ) = self.sender.send_requests(global_recnum_in) 

782 

783 self.assertEqual(requests_out, []) 

784 self.assertTrue(records_processed) 

785 self.assertEqual(global_recnum_out, 2) 

786 

787 logger_name = "crate_anon.nlp_manager.cloud_request_sender" 

788 self.assertIn( 

789 f"WARNING:{logger_name}:Continuing after failed request.", 

790 logging_cm.output, 

791 ) 

792 

793 def test_record_with_no_text_skipped(self) -> None: 

794 self.test_text = [ 

795 ( 

796 PANAMOWA, 

797 { 

798 FN_SRCPKVAL: 1, 

799 FN_SRCPKSTR: "pkstr", 

800 }, 

801 ), 

802 ( 

803 " \t\t\n\n ", 

804 { 

805 FN_SRCPKVAL: 2, 

806 FN_SRCPKSTR: "pkstr", 

807 }, 

808 ), 

809 ( 

810 REVOLT, 

811 { 

812 FN_SRCPKVAL: 3, 

813 FN_SRCPKSTR: "pkstr", 

814 }, 

815 ), 

816 ] 

817 

818 global_recnum_in = 123 

819 

820 self.sender.test_requests = [ 

821 CloudRequestProcess( 

822 crinfo=self.crinfo, 

823 nlpdef=self.nlpdef, 

824 ), 

825 ] 

826 

827 with mock.patch.object( 

828 self.sender.test_requests[0], "send_process_request" 

829 ) as mock_send: 

830 ( 

831 requests_out, 

832 records_processed, 

833 global_recnum_out, 

834 ) = self.sender.send_requests(global_recnum_in) 

835 

836 self.assertEqual(requests_out[0], self.sender.test_requests[0]) 

837 

838 self.assertTrue(records_processed) 

839 self.assertEqual(global_recnum_out, 126) 

840 

841 mock_send.assert_called_once_with( 

842 queue=True, 

843 cookies=None, # First call: no cookies 

844 include_text_in_reply=True, # has_gate_processors from config 

845 ) 

846 

847 content_0 = requests_out[0]._request_process[NlprpKeys.ARGS][ 

848 NlprpKeys.CONTENT 

849 ] 

850 self.assertEqual(len(content_0), 2) 

851 self.assertEqual(content_0[0][NlprpKeys.TEXT], PANAMOWA) 

852 

853 self.assertEqual(content_0[1][NlprpKeys.TEXT], REVOLT)