Coverage for anonymise/tests/researcher_report_tests.py: 97%
101 statements
« prev ^ index » next coverage.py v7.8.0, created at 2026-01-08 09:05 -0600
« prev ^ index » next coverage.py v7.8.0, created at 2026-01-08 09:05 -0600
1"""
2crate_anon/anonymise/tests/researcher_report_tests.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Researcher report tests.
28"""
30import os.path
31import random
32from tempfile import TemporaryDirectory
33from typing import List, TYPE_CHECKING
34from unittest import mock
36import factory
37from pypdf import PdfReader
38import pytest
39from sqlalchemy import (
40 Column,
41 DateTime,
42 ForeignKey,
43 Integer,
44 Text,
45)
46from sqlalchemy.orm import relationship
48from crate_anon.anonymise.researcher_report import (
49 mk_researcher_report_pdf,
50 ResearcherReportConfig,
51 TEMPLATE_DIR,
52)
53from crate_anon.testing import AnonTestBase
54from crate_anon.testing.classes import DatabaseTestCase
55from crate_anon.testing.factories import AnonTestBaseFactory, Fake
56from crate_anon.testing.models import SexColType
58if TYPE_CHECKING:
59 from django.conf import LazySettings
60 from factory.builder import Resolver
63class AnonNote(AnonTestBase):
64 __tablename__ = "anon_note"
66 note_id = Column(Integer, primary_key=True, comment="Note ID")
67 patient_id = Column(
68 Integer, ForeignKey("anon_patient.patient_id"), comment="Patient ID"
69 )
70 note = Column(Text, comment="Text of the note")
71 note_datetime = Column(DateTime, comment="Date/time of the note")
73 patient = relationship("AnonPatient")
76class AnonPatient(AnonTestBase):
77 __tablename__ = "anon_patient"
79 patient_id = Column(
80 Integer,
81 primary_key=True,
82 autoincrement=False,
83 comment="Patient ID",
84 )
85 sex = Column(
86 SexColType,
87 comment="Sex (M, F, X)",
88 )
89 age = Column(Integer, comment="Age")
92class AnonPatientFactory(AnonTestBaseFactory):
93 class Meta:
94 model = AnonPatient
96 patient_id = factory.Sequence(lambda n: n + 1)
98 sex = factory.LazyFunction(Fake.en_gb.sex)
99 age = factory.LazyFunction(Fake.en_gb.age)
101 @factory.post_generation
102 def notes(obj: "Resolver", create: bool, extracted: int, **kwargs) -> None:
103 if not create:
104 return
106 if extracted:
107 AnonNoteFactory.create_batch(size=extracted, patient=obj, **kwargs)
110class AnonNoteFactory(AnonTestBaseFactory):
111 class Meta:
112 model = AnonNote
114 class Params:
115 words_per_note = 100
117 note_datetime = factory.LazyFunction(Fake.en_gb.incrementing_date)
119 @factory.lazy_attribute
120 def note(obj: "Resolver") -> str:
121 # Use en_US because you get Lorem ipsum with en_GB.
122 paragraph = Fake.en_us.paragraph(
123 nb_sentences=obj.words_per_note / 2, # way more than we need
124 )
126 return " ".join(paragraph.split()[: obj.words_per_note])
129@pytest.fixture
130def django_test_settings(settings: "LazySettings") -> None:
131 settings.TEMPLATES = [
132 {
133 "BACKEND": "django.template.backends.django.DjangoTemplates",
134 "DIRS": [TEMPLATE_DIR],
135 }
136 ]
139class ResearcherReportTests(DatabaseTestCase):
140 def setUp(self) -> None:
141 super().setUp()
143 self.num_patients = 100
144 self.notes_per_patient = 5
145 seed = 1234
147 # Seed both the global python RNG and Faker's RNG as we don't use Faker
148 # for everything and Factory Boy's interface with Faker doesn't seem to
149 # allow for sharing with the global RNG used by python (though Faker on
150 # its own does). The value of the seed isn't particularly important
151 # unless we're checking particular details but it's better to have one
152 # for consistency of tests.
153 random.seed(seed)
154 factory.random.reseed_random(seed)
156 AnonPatientFactory.create_batch(
157 self.num_patients, notes=self.notes_per_patient
158 )
159 self.anon_dbsession.commit()
161 self.tempdir = TemporaryDirectory()
163 @pytest.mark.usefixtures("django_test_settings")
164 def test_report_has_pages_for_each_table(self) -> None:
165 def index_of_list_substring(items: List[str], substr: str) -> int:
166 for i, item in enumerate(items):
167 if substr in item:
168 return i
170 return -1
172 anon_config = mock.Mock()
174 reportfilename = os.path.join(self.tempdir.name, "tmpreport.pdf")
176 with open(reportfilename, mode="w") as f:
177 mock_db = mock.Mock(
178 table_names=["anon_patient", "anon_note"],
179 metadata=AnonTestBase.metadata,
180 )
182 with mock.patch.multiple(
183 "crate_anon.anonymise.researcher_report.ResearcherReportConfig", # noqa: E501
184 __post_init__=mock.Mock(),
185 ):
186 report_config = ResearcherReportConfig(
187 output_filename=reportfilename,
188 anonconfig=anon_config,
189 use_dd=False,
190 )
191 report_config.db_session = self.anon_dbsession
192 report_config.db = mock_db
193 mk_researcher_report_pdf(report_config)
195 with open(reportfilename, "rb") as f:
196 reader = PdfReader(f)
198 patient_found = False
199 note_found = False
200 for page in reader.pages:
201 lines = page.extract_text().replace("\t", " ").splitlines()
202 # Sometimes spaces come back as tabs; fix that.
204 rows_index = index_of_list_substring(
205 lines,
206 "Number of rows in this table:",
207 )
208 # The label text here is from
209 # crate_anon/anonymise/templates/researcher_report/table.html.
211 if rows_index < 0:
212 continue
214 num_rows = int(lines[rows_index + 1])
215 table_name = lines[0]
217 if table_name == "anon_patient":
218 patient_found = True
219 self.assertEqual(num_rows, self.num_patients)
221 elif table_name == "anon_note":
222 note_found = True
223 self.assertEqual(
224 num_rows, self.num_patients * self.notes_per_patient
225 )
227 self.assertTrue(patient_found)
228 self.assertTrue(note_found)