Coverage for preprocess/tests/systmone_ddgen_tests.py: 99%
114 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/preprocess/tests/systmone_ddgen_tests.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Unit testing.
28"""
30# =============================================================================
31# Imports
32# =============================================================================
34import csv
35from tempfile import NamedTemporaryFile
36from typing import List, TYPE_CHECKING
37from unittest import mock, TestCase
39from crate_anon.anonymise.dd import DataDictionary
40from crate_anon.anonymise.ddr import DataDictionaryRow
41from crate_anon.preprocess.systmone_ddgen import (
42 core_tablename,
43 eq,
44 eq_re,
45 is_free_text,
46 is_in_re,
47 modify_dd_for_systmone,
48 OMIT_AND_IGNORE_TABLES_REGEX,
49 SystmOneContext,
50 SystmOneSRESpecRow,
51)
53if TYPE_CHECKING:
54 from crate_anon.anonymise.config import Config
57# =============================================================================
58# Unit tests
59# =============================================================================
62class SystmOneDDGenTests(TestCase):
63 def test_excluded_tables(self) -> None:
64 """
65 Test some regex functions for excluding tables.
66 """
67 cpft = SystmOneContext.CPFT_DW
68 test_referralsopen = "S1_ReferralsOpen" # CPFT version
69 test_referralsopen_core = core_tablename(
70 tablename=test_referralsopen,
71 from_context=cpft,
72 allow_unprefixed=True,
73 )
74 self.assertTrue(eq(test_referralsopen_core, "ReferralsOpen"))
75 self.assertTrue(eq_re(test_referralsopen_core, "ReferralsOpen$"))
76 omit_tables = OMIT_AND_IGNORE_TABLES_REGEX[cpft]
77 self.assertTrue(is_in_re(test_referralsopen_core, omit_tables))
78 self.assertTrue(is_in_re("Accommodation_20210329", omit_tables))
79 self.assertTrue(is_in_re("Accommodation_20210329_blah", omit_tables))
80 self.assertTrue(is_in_re("S1_Accommodation_20210329", omit_tables))
82 def test_freetext_columns(self) -> None:
83 sre = SystmOneContext.TPP_SRE
84 cpft = SystmOneContext.CPFT_DW
85 # Free-text columns in all environments:
86 for context in [sre, cpft]:
87 self.assertTrue(is_free_text("FreeText", "FreeText", context))
88 # CPFT but not SRE environment:
89 self.assertTrue(
90 is_free_text(
91 "FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", cpft
92 )
93 )
94 self.assertFalse(
95 is_free_text(
96 "FreeText_CYPFRS_TelephoneTriage", "RiskofAbsconding", sre
97 )
98 )
99 # Not even in CPFT:
100 self.assertFalse(
101 is_free_text("FreeText_Honos_Scoring_Answers", "FreeText", cpft)
102 )
105class SystmOneDDGenTestCase(TestCase):
106 def setUp(self) -> None:
107 super().setUp()
109 self.src_spec_row_dict = dict(
110 TableName="",
111 TableDescription="",
112 ColumnName="",
113 ColumnDescription="",
114 ColumnDataType="",
115 ColumnLength=0,
116 DateDefining="Yes",
117 ColumnOrdinal=0,
118 LinkedTable="",
119 LinkedColumn1="",
120 LinkedColumn2="",
121 )
123 self.context = SystmOneContext.CPFT_DW
126class SystmOneSRESpecRowTests(SystmOneDDGenTestCase):
127 def test_comment_has_table_and_column_descriptions(self) -> None:
128 self.src_spec_row_dict.update(
129 TableName="SRPatient",
130 ColumnName="IDPatient",
131 TableDescription="SRPatient description from spec",
132 ColumnDescription="IDPatient description from spec",
133 )
134 row = SystmOneSRESpecRow(self.src_spec_row_dict)
136 self.assertEqual(
137 row.comment(self.context),
138 (
139 "TABLE: SRPatient description from spec // "
140 "COLUMN: IDPatient description from spec"
141 ),
142 )
144 def test_description_has_translated_table_column_and_spec_descriptions(
145 self,
146 ) -> None:
147 self.src_spec_row_dict.update(
148 TableName="SRPatient",
149 ColumnName="IDPatient",
150 TableDescription="SRPatient description from spec",
151 ColumnDescription="IDPatient description from spec",
152 )
154 row = SystmOneSRESpecRow(self.src_spec_row_dict)
156 description = row.description(self.context)
157 self.assertEqual(
158 description,
159 (
160 "S1_Patient.IDPatient // "
161 "TABLE: SRPatient description from spec // "
162 "COLUMN: IDPatient description from spec"
163 ),
164 )
167class TestDataDictionary(DataDictionary):
168 def __init__(
169 self, config: "Config", rows: List[DataDictionaryRow]
170 ) -> None:
171 super().__init__(config)
173 self.rows = rows
176class ModifyDDForSystmOneTests(SystmOneDDGenTestCase):
177 def test_table_comments_from_spec_added_to_data_dictionary(self) -> None:
178 mock_config = mock.Mock()
180 dd_row_1 = DataDictionaryRow(mock_config)
181 dd_row_1.src_db = "Source"
182 dd_row_1.src_table = "S1_Patient"
183 dd_row_1.src_field = "IDPatient"
184 dd_row_1.comment = "IDPatient comment"
186 dd_row_2 = DataDictionaryRow(mock_config)
187 dd_row_2.src_db = "Source"
188 dd_row_2.src_table = "S1_Patient"
189 dd_row_2.src_field = "NHSNumber"
190 dd_row_2.comment = "NHSNumber comment"
192 dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2])
194 context = SystmOneContext.CPFT_DW
195 with NamedTemporaryFile(delete=False, mode="w") as f:
196 fieldnames = self.src_spec_row_dict.keys()
197 writer = csv.DictWriter(f, fieldnames=fieldnames)
198 writer.writeheader()
200 spec_row_1 = self.src_spec_row_dict.copy()
201 spec_row_1.update(
202 TableName="SRPatient",
203 ColumnName="IDPatient",
204 TableDescription="SRPatient description from spec",
205 ColumnDescription="IDPatient description from spec",
206 )
208 spec_row_2 = self.src_spec_row_dict.copy()
209 spec_row_2.update(
210 TableName="SRPatient",
211 ColumnName="NHSNumber",
212 TableDescription="SRPatient description from spec",
213 ColumnDescription="NHSNumber description from spec",
214 )
216 writer.writerow(spec_row_1)
217 writer.writerow(spec_row_2)
219 with open(f.name, mode="r") as f:
220 modify_dd_for_systmone(
221 dd, context, sre_spec_csv_filename=f.name, append_comments=True
222 )
224 self.assertEqual(len(dd.rows), 3)
226 # Comment row is sorted to the top
227 self.assertEqual(dd.rows[0].comment, "SRPatient description from spec")
228 self.assertEqual(
229 dd.rows[1].comment,
230 (
231 "IDPatient comment // "
232 "TABLE: SRPatient description from spec // "
233 "COLUMN: IDPatient description from spec"
234 ),
235 )
237 self.assertEqual(
238 dd.rows[2].comment,
239 (
240 "NHSNumber comment // "
241 "TABLE: SRPatient description from spec // "
242 "COLUMN: NHSNumber description from spec"
243 ),
244 )
246 def test_ddr_existing_table_comment_appended_with_spec_description(
247 self,
248 ) -> None:
249 mock_config = mock.Mock()
251 dd_row_1 = DataDictionaryRow(mock_config)
252 dd_row_1.src_db = "Source"
253 dd_row_1.src_table = "S1_Patient"
254 dd_row_1.src_field = "IDPatient"
255 dd_row_1.comment = "IDPatient comment"
257 dd_row_2 = DataDictionaryRow(mock_config)
258 dd_row_2.src_db = "Source"
259 dd_row_2.src_table = "S1_Patient"
260 dd_row_2.src_field = "NHSNumber"
261 dd_row_2.comment = "NHSNumber comment"
263 dd_row_3 = DataDictionaryRow(mock_config)
264 dd_row_3.src_db = "Source"
265 dd_row_3.src_table = "S1_Patient"
266 dd_row_3.src_field = ""
267 dd_row_3.comment = "Existing table comment"
269 dd = TestDataDictionary(mock_config, [dd_row_1, dd_row_2, dd_row_3])
271 context = SystmOneContext.CPFT_DW
272 with NamedTemporaryFile(delete=False, mode="w") as f:
273 fieldnames = self.src_spec_row_dict.keys()
274 writer = csv.DictWriter(f, fieldnames=fieldnames)
275 writer.writeheader()
277 spec_row_1 = self.src_spec_row_dict.copy()
278 spec_row_1.update(
279 TableName="SRPatient",
280 ColumnName="IDPatient",
281 TableDescription="SRPatient description from spec",
282 ColumnDescription="IDPatient description from spec",
283 )
285 spec_row_2 = self.src_spec_row_dict.copy()
286 spec_row_2.update(
287 TableName="SRPatient",
288 ColumnName="NHSNumber",
289 TableDescription="SRPatient description from spec",
290 ColumnDescription="NHSNumber description from spec",
291 )
293 writer.writerow(spec_row_1)
294 writer.writerow(spec_row_2)
296 with open(f.name, mode="r") as f:
297 modify_dd_for_systmone(
298 dd, context, sre_spec_csv_filename=f.name, append_comments=True
299 )
301 self.assertEqual(len(dd.rows), 3)
303 # Comment row is sorted to the top
304 self.assertEqual(
305 dd.rows[0].comment,
306 "Existing table comment // SRPatient description from spec",
307 )
308 self.assertEqual(
309 dd.rows[1].comment,
310 (
311 "IDPatient comment // "
312 "TABLE: SRPatient description from spec // "
313 "COLUMN: IDPatient description from spec"
314 ),
315 )
317 self.assertEqual(
318 dd.rows[2].comment,
319 (
320 "NHSNumber comment // "
321 "TABLE: SRPatient description from spec // "
322 "COLUMN: NHSNumber description from spec"
323 ),
324 )