Coverage for preprocess/tests/systmone_ddgen_tests.py: 99%

114 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/preprocess/tests/systmone_ddgen_tests.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26Unit testing. 

27 

28""" 

29 

30# ============================================================================= 

31# Imports 

32# ============================================================================= 

33 

34import csv 

35from tempfile import NamedTemporaryFile 

36from typing import List, TYPE_CHECKING 

37from unittest import mock, TestCase 

38 

39from crate_anon.anonymise.dd import DataDictionary 

40from crate_anon.anonymise.ddr import DataDictionaryRow 

41from crate_anon.preprocess.systmone_ddgen import ( 

42 core_tablename, 

43 eq, 

44 eq_re, 

45 is_free_text, 

46 is_in_re, 

47 modify_dd_for_systmone, 

48 OMIT_AND_IGNORE_TABLES_REGEX, 

49 SystmOneContext, 

50 SystmOneSRESpecRow, 

51) 

52 

53if TYPE_CHECKING: 

54 from crate_anon.anonymise.config import Config 

55 

56 

57# ============================================================================= 

58# Unit tests 

59# ============================================================================= 

60 

61 

62class SystmOneDDGenTests(TestCase): 

63 def test_excluded_tables(self) -> None: 

64 """ 

65 Test some regex functions for excluding tables. 

66 """ 

67 cpft = SystmOneContext.CPFT_DW 

68 test_referralsopen = "S1_ReferralsOpen" # CPFT version 

69 test_referralsopen_core = core_tablename( 

70 tablename=test_referralsopen, 

71 from_context=cpft, 

72 allow_unprefixed=True, 

73 ) 

74 self.assertTrue(eq(test_referralsopen_core, "ReferralsOpen")) 

75 self.assertTrue(eq_re(test_referralsopen_core, "ReferralsOpen$")) 

76 omit_tables = OMIT_AND_IGNORE_TABLES_REGEX[cpft] 

77 self.assertTrue(is_in_re(test_referralsopen_core, omit_tables)) 

78 self.assertTrue(is_in_re("Accommodation_20210329", omit_tables)) 

79 self.assertTrue(is_in_re("Accommodation_20210329_blah", omit_tables)) 

80 self.assertTrue(is_in_re("S1_Accommodation_20210329", omit_tables)) 

81 

82 def test_freetext_columns(self) -> None: 

83 sre = SystmOneContext.TPP_SRE 

84 cpft = SystmOneContext.CPFT_DW 

85 # Free-text columns in all environments: 

86 for context in [sre, cpft]: 

87 self.assertTrue(is_free_text("FreeText", "FreeText", context)) 

88 # CPFT but not SRE environment: 

89 self.assertTrue( 

90 is_free_text( 

91 "FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", cpft 

92 ) 

93 ) 

94 self.assertFalse( 

95 is_free_text( 

96 "FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", sre 

97 ) 

98 ) 

99 # Not even in CPFT: 

100 self.assertFalse( 

101 is_free_text("FreeText_Honos_Scoring_Answers", "FreeText", cpft) 

102 ) 

103 

104 

105class SystmOneDDGenTestCase(TestCase): 

106 def setUp(self) -> None: 

107 super().setUp() 

108 

109 self.src_spec_row_dict = dict( 

110 TableName="", 

111 TableDescription="", 

112 ColumnName="", 

113 ColumnDescription="", 

114 ColumnDataType="", 

115 ColumnLength=0, 

116 DateDefining="Yes", 

117 ColumnOrdinal=0, 

118 LinkedTable="", 

119 LinkedColumn1="", 

120 LinkedColumn2="", 

121 ) 

122 

123 self.context = SystmOneContext.CPFT_DW 

124 

125 

126class SystmOneSRESpecRowTests(SystmOneDDGenTestCase): 

127 def test_comment_has_table_and_column_descriptions(self) -> None: 

128 self.src_spec_row_dict.update( 

129 TableName="SRPatient", 

130 ColumnName="IDPatient", 

131 TableDescription="SRPatient description from spec", 

132 ColumnDescription="IDPatient description from spec", 

133 ) 

134 row = SystmOneSRESpecRow(self.src_spec_row_dict) 

135 

136 self.assertEqual( 

137 row.comment(self.context), 

138 ( 

139 "TABLE: SRPatient description from spec // " 

140 "COLUMN: IDPatient description from spec" 

141 ), 

142 ) 

143 

144 def test_description_has_translated_table_column_and_spec_descriptions( 

145 self, 

146 ) -> None: 

147 self.src_spec_row_dict.update( 

148 TableName="SRPatient", 

149 ColumnName="IDPatient", 

150 TableDescription="SRPatient description from spec", 

151 ColumnDescription="IDPatient description from spec", 

152 ) 

153 

154 row = SystmOneSRESpecRow(self.src_spec_row_dict) 

155 

156 description = row.description(self.context) 

157 self.assertEqual( 

158 description, 

159 ( 

160 "S1_Patient.IDPatient // " 

161 "TABLE: SRPatient description from spec // " 

162 "COLUMN: IDPatient description from spec" 

163 ), 

164 ) 

165 

166 

167class TestDataDictionary(DataDictionary): 

168 def __init__( 

169 self, config: "Config", rows: List[DataDictionaryRow] 

170 ) -> None: 

171 super().__init__(config) 

172 

173 self.rows = rows 

174 

175 

176class ModifyDDForSystmOneTests(SystmOneDDGenTestCase): 

177 def test_table_comments_from_spec_added_to_data_dictionary(self) -> None: 

178 mock_config = mock.Mock() 

179 

180 dd_row_1 = DataDictionaryRow(mock_config) 

181 dd_row_1.src_db = "Source" 

182 dd_row_1.src_table = "S1_Patient" 

183 dd_row_1.src_field = "IDPatient" 

184 dd_row_1.comment = "IDPatient comment" 

185 

186 dd_row_2 = DataDictionaryRow(mock_config) 

187 dd_row_2.src_db = "Source" 

188 dd_row_2.src_table = "S1_Patient" 

189 dd_row_2.src_field = "NHSNumber" 

190 dd_row_2.comment = "NHSNumber comment" 

191 

192 dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2]) 

193 

194 context = SystmOneContext.CPFT_DW 

195 with NamedTemporaryFile(delete=False, mode="w") as f: 

196 fieldnames = self.src_spec_row_dict.keys() 

197 writer = csv.DictWriter(f, fieldnames=fieldnames) 

198 writer.writeheader() 

199 

200 spec_row_1 = self.src_spec_row_dict.copy() 

201 spec_row_1.update( 

202 TableName="SRPatient", 

203 ColumnName="IDPatient", 

204 TableDescription="SRPatient description from spec", 

205 ColumnDescription="IDPatient description from spec", 

206 ) 

207 

208 spec_row_2 = self.src_spec_row_dict.copy() 

209 spec_row_2.update( 

210 TableName="SRPatient", 

211 ColumnName="NHSNumber", 

212 TableDescription="SRPatient description from spec", 

213 ColumnDescription="NHSNumber description from spec", 

214 ) 

215 

216 writer.writerow(spec_row_1) 

217 writer.writerow(spec_row_2) 

218 

219 with open(f.name, mode="r") as f: 

220 modify_dd_for_systmone( 

221 dd, context, sre_spec_csv_filename=f.name, append_comments=True 

222 ) 

223 

224 self.assertEqual(len(dd.rows), 3) 

225 

226 # Comment row is sorted to the top 

227 self.assertEqual(dd.rows[0].comment, "SRPatient description from spec") 

228 self.assertEqual( 

229 dd.rows[1].comment, 

230 ( 

231 "IDPatient comment // " 

232 "TABLE: SRPatient description from spec // " 

233 "COLUMN: IDPatient description from spec" 

234 ), 

235 ) 

236 

237 self.assertEqual( 

238 dd.rows[2].comment, 

239 ( 

240 "NHSNumber comment // " 

241 "TABLE: SRPatient description from spec // " 

242 "COLUMN: NHSNumber description from spec" 

243 ), 

244 ) 

245 

246 def test_ddr_existing_table_comment_appended_with_spec_description( 

247 self, 

248 ) -> None: 

249 mock_config = mock.Mock() 

250 

251 dd_row_1 = DataDictionaryRow(mock_config) 

252 dd_row_1.src_db = "Source" 

253 dd_row_1.src_table = "S1_Patient" 

254 dd_row_1.src_field = "IDPatient" 

255 dd_row_1.comment = "IDPatient comment" 

256 

257 dd_row_2 = DataDictionaryRow(mock_config) 

258 dd_row_2.src_db = "Source" 

259 dd_row_2.src_table = "S1_Patient" 

260 dd_row_2.src_field = "NHSNumber" 

261 dd_row_2.comment = "NHSNumber comment" 

262 

263 dd_row_3 = DataDictionaryRow(mock_config) 

264 dd_row_3.src_db = "Source" 

265 dd_row_3.src_table = "S1_Patient" 

266 dd_row_3.src_field = "" 

267 dd_row_3.comment = "Existing table comment" 

268 

269 dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2, dd_row_3]) 

270 

271 context = SystmOneContext.CPFT_DW 

272 with NamedTemporaryFile(delete=False, mode="w") as f: 

273 fieldnames = self.src_spec_row_dict.keys() 

274 writer = csv.DictWriter(f, fieldnames=fieldnames) 

275 writer.writeheader() 

276 

277 spec_row_1 = self.src_spec_row_dict.copy() 

278 spec_row_1.update( 

279 TableName="SRPatient", 

280 ColumnName="IDPatient", 

281 TableDescription="SRPatient description from spec", 

282 ColumnDescription="IDPatient description from spec", 

283 ) 

284 

285 spec_row_2 = self.src_spec_row_dict.copy() 

286 spec_row_2.update( 

287 TableName="SRPatient", 

288 ColumnName="NHSNumber", 

289 TableDescription="SRPatient description from spec", 

290 ColumnDescription="NHSNumber description from spec", 

291 ) 

292 

293 writer.writerow(spec_row_1) 

294 writer.writerow(spec_row_2) 

295 

296 with open(f.name, mode="r") as f: 

297 modify_dd_for_systmone( 

298 dd, context, sre_spec_csv_filename=f.name, append_comments=True 

299 ) 

300 

301 self.assertEqual(len(dd.rows), 3) 

302 

303 # Comment row is sorted to the top 

304 self.assertEqual( 

305 dd.rows[0].comment, 

306 "Existing table comment // SRPatient description from spec", 

307 ) 

308 self.assertEqual( 

309 dd.rows[1].comment, 

310 ( 

311 "IDPatient comment // " 

312 "TABLE: SRPatient description from spec // " 

313 "COLUMN: IDPatient description from spec" 

314 ), 

315 ) 

316 

317 self.assertEqual( 

318 dd.rows[2].comment, 

319 ( 

320 "NHSNumber comment // " 

321 "TABLE: SRPatient description from spec // " 

322 "COLUMN: NHSNumber description from spec" 

323 ), 

324 )