Coverage for preprocess/tests/autoimport_db_tests.py: 100%
80 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/preprocess/tests/autoimport_db_tests.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26Unit testing.
28"""
30# =============================================================================
31# Imports
32# =============================================================================
34import datetime
35from unittest import TestCase
37import pendulum
38from sqlalchemy import (
39 BigInteger,
40 Boolean,
41 Date,
42 DateTime,
43 Float,
44 String,
45)
47from crate_anon.preprocess.autoimport_db import (
48 ColumnTypeDetector,
49 is_date_like_not_datetime_like,
50 is_datetime_or_date_like,
51)
54# =============================================================================
55# Unit tests
56# =============================================================================
58DUMMY_COLNAME = "somecolumn"
60DUMMY_DATE = datetime.date(2000, 12, 31)
61DUMMY_DATE_P = pendulum.Date(2000, 12, 31)
62DUMMY_DATE_STR = "2000-12-31"
64DUMMY_DATETIME = datetime.datetime(2000, 12, 31, 23, 59, 59)
65DUMMY_DATETIME_P = pendulum.DateTime(2000, 12, 31, 23, 59, 59)
66DUMMY_DATETIME_STR = "2000-12-31T23:59:59"
68DUMMY_DATETIME_P_DATEONLY = pendulum.DateTime(2000, 12, 31)
71class AutoImportDBTests(TestCase):
72 """
73 Test automatic column type detection.
74 """
76 def test_is_date_like(self) -> None:
77 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATE), True)
78 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATE_P), True)
79 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATE_STR), True)
81 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATETIME), False)
82 self.assertEqual(
83 is_date_like_not_datetime_like(DUMMY_DATETIME_P), False
84 )
85 self.assertEqual(
86 is_date_like_not_datetime_like(DUMMY_DATETIME_STR), False
87 )
89 self.assertEqual(
90 is_date_like_not_datetime_like(DUMMY_DATETIME_P_DATEONLY), True
91 )
93 def test_is_datetime_like(self) -> None:
94 # Dates are also datetime-like.
95 self.assertEqual(is_datetime_or_date_like(DUMMY_DATE), True)
96 self.assertEqual(is_datetime_or_date_like(DUMMY_DATE_P), True)
97 self.assertEqual(is_datetime_or_date_like(DUMMY_DATE_STR), True)
99 self.assertEqual(is_datetime_or_date_like(DUMMY_DATETIME), True)
100 self.assertEqual(is_datetime_or_date_like(DUMMY_DATETIME_P), True)
101 self.assertEqual(is_datetime_or_date_like(DUMMY_DATETIME_STR), True)
103 self.assertEqual(
104 is_datetime_or_date_like(DUMMY_DATETIME_P_DATEONLY), True
105 )
107 def test_datatype_detection_missing(self) -> None:
108 # No data:
109 d = ColumnTypeDetector(DUMMY_COLNAME)
110 self.assertRaises(ValueError, d.sqlalchemy_column)
112 def test_datatype_detection_null(self) -> None:
113 # Only NULL data:
114 d = ColumnTypeDetector(DUMMY_COLNAME, [None])
115 self.assertRaises(ValueError, d.sqlalchemy_column)
117 def test_datatype_detection_int(self) -> None:
118 # Integers:
119 d = ColumnTypeDetector(DUMMY_COLNAME, [4, -3, None])
120 c = d.sqlalchemy_column()
121 self.assertEqual(type(c.type), BigInteger)
122 self.assertEqual(c.nullable, True)
123 # ... and rejecting inappropriate nullable=False:
124 self.assertRaises(ValueError, d.sqlalchemy_column, nullable=False)
126 def test_datatype_detection_float(self) -> None:
127 # Float:
128 d = ColumnTypeDetector(DUMMY_COLNAME, [4, -3, 2.5, None])
129 c = d.sqlalchemy_column()
130 self.assertEqual(type(c.type), Float)
131 self.assertEqual(c.nullable, True)
133 def test_datatype_detection_str_null(self) -> None:
134 # String:
135 d = ColumnTypeDetector(DUMMY_COLNAME, ["hello", "world", None])
136 c = d.sqlalchemy_column()
137 self.assertEqual(type(c.type), String)
138 self.assertEqual(c.nullable, True)
140 def test_datatype_detection_str_not_null(self) -> None:
141 # String, NOT NULL:
142 d = ColumnTypeDetector(DUMMY_COLNAME, ["hello", "world"])
143 c = d.sqlalchemy_column(nullable=False)
144 self.assertEqual(type(c.type), String)
145 self.assertEqual(c.nullable, False)
147 def test_datatype_detection_bad_mix(self) -> None:
148 # Inappropriately mixed data:
149 d = ColumnTypeDetector(DUMMY_COLNAME, [4, -3, 2.5, "hello", None])
150 self.assertRaises(ValueError, d.sqlalchemy_column)
152 def test_datatype_detection_date(self) -> None:
153 # Dates
154 d = ColumnTypeDetector(
155 DUMMY_COLNAME,
156 [
157 DUMMY_DATE,
158 DUMMY_DATE_P,
159 DUMMY_DATE_STR,
160 DUMMY_DATETIME_P_DATEONLY,
161 None,
162 ],
163 )
164 c = d.sqlalchemy_column()
165 self.assertEqual(type(c.type), Date)
167 def test_datatype_detection_datetime(self) -> None:
168 # Dates
169 d = ColumnTypeDetector(
170 DUMMY_COLNAME,
171 [DUMMY_DATETIME, DUMMY_DATETIME_P, DUMMY_DATETIME_STR, None],
172 )
173 c = d.sqlalchemy_column()
174 self.assertEqual(type(c.type), DateTime)
176 def test_datatype_detection_mixed_date_datetime(self) -> None:
177 # Dates plus datetimes should resolve to datetime.
178 d = ColumnTypeDetector(
179 DUMMY_COLNAME,
180 [
181 DUMMY_DATE,
182 DUMMY_DATE_STR,
183 DUMMY_DATETIME,
184 DUMMY_DATETIME_STR,
185 None,
186 ],
187 )
188 c = d.sqlalchemy_column()
189 self.assertEqual(type(c.type), DateTime)
191 def test_datatype_detection_mixed_str_date_datetime(self) -> None:
192 # Dates plus datetimes plus other strings should resolve to str.
193 d = ColumnTypeDetector(
194 DUMMY_COLNAME,
195 [
196 DUMMY_DATE,
197 DUMMY_DATE_STR,
198 DUMMY_DATETIME,
199 DUMMY_DATETIME_STR,
200 "hello",
201 None,
202 ],
203 )
204 c = d.sqlalchemy_column()
205 self.assertEqual(type(c.type), String)
207 def test_datatype_detection_bool(self) -> None:
208 # Boolean
209 d = ColumnTypeDetector(
210 DUMMY_COLNAME,
211 [
212 True,
213 False,
214 None,
215 ],
216 )
217 c = d.sqlalchemy_column()
218 self.assertEqual(type(c.type), Boolean)