Coverage for preprocess/tests/autoimport_db_tests.py: 100%

80 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/preprocess/tests/autoimport_db_tests.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26Unit testing. 

27 

28""" 

29 

30# ============================================================================= 

31# Imports 

32# ============================================================================= 

33 

34import datetime 

35from unittest import TestCase 

36 

37import pendulum 

38from sqlalchemy import ( 

39 BigInteger, 

40 Boolean, 

41 Date, 

42 DateTime, 

43 Float, 

44 String, 

45) 

46 

47from crate_anon.preprocess.autoimport_db import ( 

48 ColumnTypeDetector, 

49 is_date_like_not_datetime_like, 

50 is_datetime_or_date_like, 

51) 

52 

53 

54# ============================================================================= 

55# Unit tests 

56# ============================================================================= 

57 

58DUMMY_COLNAME = "somecolumn" 

59 

60DUMMY_DATE = datetime.date(2000, 12, 31) 

61DUMMY_DATE_P = pendulum.Date(2000, 12, 31) 

62DUMMY_DATE_STR = "2000-12-31" 

63 

64DUMMY_DATETIME = datetime.datetime(2000, 12, 31, 23, 59, 59) 

65DUMMY_DATETIME_P = pendulum.DateTime(2000, 12, 31, 23, 59, 59) 

66DUMMY_DATETIME_STR = "2000-12-31T23:59:59" 

67 

68DUMMY_DATETIME_P_DATEONLY = pendulum.DateTime(2000, 12, 31) 

69 

70 

71class AutoImportDBTests(TestCase): 

72 """ 

73 Test automatic column type detection. 

74 """ 

75 

76 def test_is_date_like(self) -> None: 

77 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATE), True) 

78 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATE_P), True) 

79 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATE_STR), True) 

80 

81 self.assertEqual(is_date_like_not_datetime_like(DUMMY_DATETIME), False) 

82 self.assertEqual( 

83 is_date_like_not_datetime_like(DUMMY_DATETIME_P), False 

84 ) 

85 self.assertEqual( 

86 is_date_like_not_datetime_like(DUMMY_DATETIME_STR), False 

87 ) 

88 

89 self.assertEqual( 

90 is_date_like_not_datetime_like(DUMMY_DATETIME_P_DATEONLY), True 

91 ) 

92 

93 def test_is_datetime_like(self) -> None: 

94 # Dates are also datetime-like. 

95 self.assertEqual(is_datetime_or_date_like(DUMMY_DATE), True) 

96 self.assertEqual(is_datetime_or_date_like(DUMMY_DATE_P), True) 

97 self.assertEqual(is_datetime_or_date_like(DUMMY_DATE_STR), True) 

98 

99 self.assertEqual(is_datetime_or_date_like(DUMMY_DATETIME), True) 

100 self.assertEqual(is_datetime_or_date_like(DUMMY_DATETIME_P), True) 

101 self.assertEqual(is_datetime_or_date_like(DUMMY_DATETIME_STR), True) 

102 

103 self.assertEqual( 

104 is_datetime_or_date_like(DUMMY_DATETIME_P_DATEONLY), True 

105 ) 

106 

107 def test_datatype_detection_missing(self) -> None: 

108 # No data: 

109 d = ColumnTypeDetector(DUMMY_COLNAME) 

110 self.assertRaises(ValueError, d.sqlalchemy_column) 

111 

112 def test_datatype_detection_null(self) -> None: 

113 # Only NULL data: 

114 d = ColumnTypeDetector(DUMMY_COLNAME, [None]) 

115 self.assertRaises(ValueError, d.sqlalchemy_column) 

116 

117 def test_datatype_detection_int(self) -> None: 

118 # Integers: 

119 d = ColumnTypeDetector(DUMMY_COLNAME, [4, -3, None]) 

120 c = d.sqlalchemy_column() 

121 self.assertEqual(type(c.type), BigInteger) 

122 self.assertEqual(c.nullable, True) 

123 # ... and rejecting inappropriate nullable=False: 

124 self.assertRaises(ValueError, d.sqlalchemy_column, nullable=False) 

125 

126 def test_datatype_detection_float(self) -> None: 

127 # Float: 

128 d = ColumnTypeDetector(DUMMY_COLNAME, [4, -3, 2.5, None]) 

129 c = d.sqlalchemy_column() 

130 self.assertEqual(type(c.type), Float) 

131 self.assertEqual(c.nullable, True) 

132 

133 def test_datatype_detection_str_null(self) -> None: 

134 # String: 

135 d = ColumnTypeDetector(DUMMY_COLNAME, ["hello", "world", None]) 

136 c = d.sqlalchemy_column() 

137 self.assertEqual(type(c.type), String) 

138 self.assertEqual(c.nullable, True) 

139 

140 def test_datatype_detection_str_not_null(self) -> None: 

141 # String, NOT NULL: 

142 d = ColumnTypeDetector(DUMMY_COLNAME, ["hello", "world"]) 

143 c = d.sqlalchemy_column(nullable=False) 

144 self.assertEqual(type(c.type), String) 

145 self.assertEqual(c.nullable, False) 

146 

147 def test_datatype_detection_bad_mix(self) -> None: 

148 # Inappropriately mixed data: 

149 d = ColumnTypeDetector(DUMMY_COLNAME, [4, -3, 2.5, "hello", None]) 

150 self.assertRaises(ValueError, d.sqlalchemy_column) 

151 

152 def test_datatype_detection_date(self) -> None: 

153 # Dates 

154 d = ColumnTypeDetector( 

155 DUMMY_COLNAME, 

156 [ 

157 DUMMY_DATE, 

158 DUMMY_DATE_P, 

159 DUMMY_DATE_STR, 

160 DUMMY_DATETIME_P_DATEONLY, 

161 None, 

162 ], 

163 ) 

164 c = d.sqlalchemy_column() 

165 self.assertEqual(type(c.type), Date) 

166 

167 def test_datatype_detection_datetime(self) -> None: 

168 # Dates 

169 d = ColumnTypeDetector( 

170 DUMMY_COLNAME, 

171 [DUMMY_DATETIME, DUMMY_DATETIME_P, DUMMY_DATETIME_STR, None], 

172 ) 

173 c = d.sqlalchemy_column() 

174 self.assertEqual(type(c.type), DateTime) 

175 

176 def test_datatype_detection_mixed_date_datetime(self) -> None: 

177 # Dates plus datetimes should resolve to datetime. 

178 d = ColumnTypeDetector( 

179 DUMMY_COLNAME, 

180 [ 

181 DUMMY_DATE, 

182 DUMMY_DATE_STR, 

183 DUMMY_DATETIME, 

184 DUMMY_DATETIME_STR, 

185 None, 

186 ], 

187 ) 

188 c = d.sqlalchemy_column() 

189 self.assertEqual(type(c.type), DateTime) 

190 

191 def test_datatype_detection_mixed_str_date_datetime(self) -> None: 

192 # Dates plus datetimes plus other strings should resolve to str. 

193 d = ColumnTypeDetector( 

194 DUMMY_COLNAME, 

195 [ 

196 DUMMY_DATE, 

197 DUMMY_DATE_STR, 

198 DUMMY_DATETIME, 

199 DUMMY_DATETIME_STR, 

200 "hello", 

201 None, 

202 ], 

203 ) 

204 c = d.sqlalchemy_column() 

205 self.assertEqual(type(c.type), String) 

206 

207 def test_datatype_detection_bool(self) -> None: 

208 # Boolean 

209 d = ColumnTypeDetector( 

210 DUMMY_COLNAME, 

211 [ 

212 True, 

213 False, 

214 None, 

215 ], 

216 ) 

217 c = d.sqlalchemy_column() 

218 self.assertEqual(type(c.type), Boolean)