Coverage for anonymise/tests/researcher_report_tests.py: 97%

101 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2026-01-08 09:05 -0600

1""" 

2crate_anon/anonymise/tests/researcher_report_tests.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26Researcher report tests. 

27 

28""" 

29 

30import os.path 

31import random 

32from tempfile import TemporaryDirectory 

33from typing import List, TYPE_CHECKING 

34from unittest import mock 

35 

36import factory 

37from pypdf import PdfReader 

38import pytest 

39from sqlalchemy import ( 

40 Column, 

41 DateTime, 

42 ForeignKey, 

43 Integer, 

44 Text, 

45) 

46from sqlalchemy.orm import relationship 

47 

48from crate_anon.anonymise.researcher_report import ( 

49 mk_researcher_report_pdf, 

50 ResearcherReportConfig, 

51 TEMPLATE_DIR, 

52) 

53from crate_anon.testing import AnonTestBase 

54from crate_anon.testing.classes import DatabaseTestCase 

55from crate_anon.testing.factories import AnonTestBaseFactory, Fake 

56from crate_anon.testing.models import SexColType 

57 

58if TYPE_CHECKING: 

59 from django.conf import LazySettings 

60 from factory.builder import Resolver 

61 

62 

63class AnonNote(AnonTestBase): 

64 __tablename__ = "anon_note" 

65 

66 note_id = Column(Integer, primary_key=True, comment="Note ID") 

67 patient_id = Column( 

68 Integer, ForeignKey("anon_patient.patient_id"), comment="Patient ID" 

69 ) 

70 note = Column(Text, comment="Text of the note") 

71 note_datetime = Column(DateTime, comment="Date/time of the note") 

72 

73 patient = relationship("AnonPatient") 

74 

75 

76class AnonPatient(AnonTestBase): 

77 __tablename__ = "anon_patient" 

78 

79 patient_id = Column( 

80 Integer, 

81 primary_key=True, 

82 autoincrement=False, 

83 comment="Patient ID", 

84 ) 

85 sex = Column( 

86 SexColType, 

87 comment="Sex (M, F, X)", 

88 ) 

89 age = Column(Integer, comment="Age") 

90 

91 

92class AnonPatientFactory(AnonTestBaseFactory): 

93 class Meta: 

94 model = AnonPatient 

95 

96 patient_id = factory.Sequence(lambda n: n + 1) 

97 

98 sex = factory.LazyFunction(Fake.en_gb.sex) 

99 age = factory.LazyFunction(Fake.en_gb.age) 

100 

101 @factory.post_generation 

102 def notes(obj: "Resolver", create: bool, extracted: int, **kwargs) -> None: 

103 if not create: 

104 return 

105 

106 if extracted: 

107 AnonNoteFactory.create_batch(size=extracted, patient=obj, **kwargs) 

108 

109 

110class AnonNoteFactory(AnonTestBaseFactory): 

111 class Meta: 

112 model = AnonNote 

113 

114 class Params: 

115 words_per_note = 100 

116 

117 note_datetime = factory.LazyFunction(Fake.en_gb.incrementing_date) 

118 

119 @factory.lazy_attribute 

120 def note(obj: "Resolver") -> str: 

121 # Use en_US because you get Lorem ipsum with en_GB. 

122 paragraph = Fake.en_us.paragraph( 

123 nb_sentences=obj.words_per_note / 2, # way more than we need 

124 ) 

125 

126 return " ".join(paragraph.split()[: obj.words_per_note]) 

127 

128 

129@pytest.fixture 

130def django_test_settings(settings: "LazySettings") -> None: 

131 settings.TEMPLATES = [ 

132 { 

133 "BACKEND": "django.template.backends.django.DjangoTemplates", 

134 "DIRS": [TEMPLATE_DIR], 

135 } 

136 ] 

137 

138 

139class ResearcherReportTests(DatabaseTestCase): 

140 def setUp(self) -> None: 

141 super().setUp() 

142 

143 self.num_patients = 100 

144 self.notes_per_patient = 5 

145 seed = 1234 

146 

147 # Seed both the global python RNG and Faker's RNG as we don't use Faker 

148 # for everything and Factory Boy's interface with Faker doesn't seem to 

149 # allow for sharing with the global RNG used by python (though Faker on 

150 # its own does). The value of the seed isn't particularly important 

151 # unless we're checking particular details but it's better to have one 

152 # for consistency of tests. 

153 random.seed(seed) 

154 factory.random.reseed_random(seed) 

155 

156 AnonPatientFactory.create_batch( 

157 self.num_patients, notes=self.notes_per_patient 

158 ) 

159 self.anon_dbsession.commit() 

160 

161 self.tempdir = TemporaryDirectory() 

162 

163 @pytest.mark.usefixtures("django_test_settings") 

164 def test_report_has_pages_for_each_table(self) -> None: 

165 def index_of_list_substring(items: List[str], substr: str) -> int: 

166 for i, item in enumerate(items): 

167 if substr in item: 

168 return i 

169 

170 return -1 

171 

172 anon_config = mock.Mock() 

173 

174 reportfilename = os.path.join(self.tempdir.name, "tmpreport.pdf") 

175 

176 with open(reportfilename, mode="w") as f: 

177 mock_db = mock.Mock( 

178 table_names=["anon_patient", "anon_note"], 

179 metadata=AnonTestBase.metadata, 

180 ) 

181 

182 with mock.patch.multiple( 

183 "crate_anon.anonymise.researcher_report.ResearcherReportConfig", # noqa: E501 

184 __post_init__=mock.Mock(), 

185 ): 

186 report_config = ResearcherReportConfig( 

187 output_filename=reportfilename, 

188 anonconfig=anon_config, 

189 use_dd=False, 

190 ) 

191 report_config.db_session = self.anon_dbsession 

192 report_config.db = mock_db 

193 mk_researcher_report_pdf(report_config) 

194 

195 with open(reportfilename, "rb") as f: 

196 reader = PdfReader(f) 

197 

198 patient_found = False 

199 note_found = False 

200 for page in reader.pages: 

201 lines = page.extract_text().replace("\t", " ").splitlines() 

202 # Sometimes spaces come back as tabs; fix that. 

203 

204 rows_index = index_of_list_substring( 

205 lines, 

206 "Number of rows in this table:", 

207 ) 

208 # The label text here is from 

209 # crate_anon/anonymise/templates/researcher_report/table.html. 

210 

211 if rows_index < 0: 

212 continue 

213 

214 num_rows = int(lines[rows_index + 1]) 

215 table_name = lines[0] 

216 

217 if table_name == "anon_patient": 

218 patient_found = True 

219 self.assertEqual(num_rows, self.num_patients) 

220 

221 elif table_name == "anon_note": 

222 note_found = True 

223 self.assertEqual( 

224 num_rows, self.num_patients * self.notes_per_patient 

225 ) 

226 

227 self.assertTrue(patient_found) 

228 self.assertTrue(note_found)