Coverage for testing/models.py: 84%

76 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/testing/models.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Factory Boy SQL Alchemy test models.** 

27 

28""" 

29 

30import datetime 

31import enum 

32import os 

33from typing import TYPE_CHECKING 

34 

35from sqlalchemy import ( 

36 BigInteger, 

37 Boolean, 

38 Column, 

39 Date, 

40 DateTime, # NB name clash with pendulum 

41 Enum, 

42 ForeignKey, 

43 Integer, 

44 LargeBinary, 

45 String, 

46 Text, 

47) 

48from sqlalchemy.ext.compiler import compiles 

49from sqlalchemy.orm import relationship 

50 

51from crate_anon.anonymise.constants import ( 

52 COMMENT, 

53 TABLE_KWARGS, 

54) 

55from crate_anon.testing import SourceTestBase 

56 

57if TYPE_CHECKING: 

58 from sqlalchemy.sql.type_api import TypeEngine 

59 from sqlalchemy.sql.compiler import SQLCompiler 

60 

61MAX_EXT_LENGTH_WITH_DOT = 10 

62PATIENT_ID_COMMENT = "Patient ID" 

63 

64 

65# ============================================================================= 

66# BLOB type 

67# ============================================================================= 

68 

69 

70# http://docs.sqlalchemy.org/en/latest/core/custom_types.html 

71# noinspection PyUnusedLocal 

72@compiles(LargeBinary, "mysql") 

73def compile_blob_mysql( 

74 type_: "TypeEngine", compiler: "SQLCompiler", **kw 

75) -> str: 

76 """ 

77 Provides a custom type for the SQLAlchemy ``LargeBinary`` type under MySQL, 

78 by using ``LONGBLOB`` (which overrides the default of ``BLOB``). 

79 

80 MySQL: https://dev.mysql.com/doc/refman/5.7/en/storage-requirements.html 

81 

82 .. code-block:: none 

83 

84 TINYBLOB: up to 2^8 bytes 

85 BLOB: up to 2^16 bytes = 64 KiB 

86 MEDIUMBLOB: up to 2^24 bytes = 16 MiB <-- minimum for docs 

87 LONGBLOB: up to 2^32 bytes = 4 GiB 

88 

89 VARBINARY: up to 65535 = 64 KiB 

90 

91 SQL Server: https://msdn.microsoft.com/en-us/library/ms188362.aspx 

92 

93 .. code-block:: none 

94 

95 BINARY: up to 8000 bytes = 8 KB 

96 VARBINARY(MAX): up to 2^31 - 1 bytes = 2 GiB <-- minimum for docs 

97 IMAGE: deprecated; up to 2^31 - 1 bytes = 2 GiB 

98 https://msdn.microsoft.com/en-us/library/ms187993.aspx 

99 

100 SQL Alchemy: 

101 

102 .. code-block:: none 

103 

104 _Binary: base class 

105 LargeBinary: translates to BLOB in MySQL 

106 VARBINARY, as an SQL base data type 

107 dialects.mysql.base.LONGBLOB 

108 dialects.mssql.base.VARBINARY 

109 

110 Therefore, we can take the LargeBinary type and modify it. 

111 """ 

112 return "LONGBLOB" # would have been "BLOB" 

113 

114 

115# If this goes wrong for future versions of SQL Server, write another 

116# specializer to produce "VARBINARY(MAX)" instead of "IMAGE". I haven't done 

117# that because it may be that SQL Alchemy is reading the SQL Server version 

118# (it definitely executes "select @@version") and specializing accordingly. 

119 

120SexColType = String(length=1) 

121 

122 

123# ============================================================================= 

124# A silly enum 

125# ============================================================================= 

126class EnumColours(enum.Enum): 

127 """ 

128 A silly enum, for testing. 

129 """ 

130 

131 red = 1 

132 green = 2 

133 blue = 3 

134 

135 

136# ============================================================================= 

137# Tables 

138# ============================================================================= 

139 

140 

141class Patient(SourceTestBase): 

142 """ 

143 SQLAlchemy ORM class for fictional patients. 

144 """ 

145 

146 __tablename__ = "patient" 

147 __table_args__ = { 

148 COMMENT: "Fictional patients", 

149 **TABLE_KWARGS, 

150 } 

151 

152 patient_id = Column( 

153 Integer, 

154 primary_key=True, 

155 autoincrement=False, 

156 comment=PATIENT_ID_COMMENT, 

157 ) 

158 sex = Column( 

159 SexColType, 

160 comment="Sex (M, F, X)", 

161 ) 

162 forename = Column(String(50), comment="Forename") 

163 surname = Column(String(50), comment="Surname") 

164 dob = Column(Date, comment="Date of birth (DOB)") 

165 nullfield = Column(Integer, comment="Always NULL") 

166 nhsnum = Column(BigInteger, comment="NHS number") 

167 phone = Column(String(50), comment="Phone number") 

168 postcode = Column(String(50), comment="Postcode") 

169 optout = Column( 

170 Boolean, default=False, comment="Opt out from research database?" 

171 ) 

172 related_patient_id = Column( 

173 Integer, 

174 ForeignKey("patient.patient_id"), 

175 comment="ID of another patient", 

176 ) 

177 related_patient = relationship( 

178 "Patient", uselist=False, remote_side=[patient_id] 

179 ) 

180 related_patient_relationship = Column( 

181 String(50), 

182 comment="Decription of relationship between patient and relation", 

183 ) 

184 colour = Column( 

185 Enum(EnumColours), 

186 nullable=True, 

187 comment="An enum column, which may be red/green/blue", 

188 ) # new in v0.18.41 

189 

190 @property 

191 def related_patient_name(self) -> str: 

192 if self.related_patient is None: 

193 return "" 

194 

195 forename = self.related_patient.forename 

196 surname = self.related_patient.surname 

197 

198 return f"{forename} {surname}" 

199 

200 

201class Note(SourceTestBase): 

202 """ 

203 SQLAlchemy ORM class for fictional notes. 

204 """ 

205 

206 __tablename__ = "note" 

207 __table_args__ = { 

208 COMMENT: "Fictional textual notes", 

209 **TABLE_KWARGS, 

210 } 

211 

212 note_id = Column(Integer, primary_key=True, comment="Note ID") 

213 patient_id = Column( 

214 Integer, ForeignKey("patient.patient_id"), comment=PATIENT_ID_COMMENT 

215 ) 

216 note = Column(Text, comment="Text of the note") 

217 note_datetime = Column(DateTime, comment="Date/time of the note") 

218 

219 patient = relationship("Patient") 

220 

221 

222class BlobDoc(SourceTestBase): 

223 """ 

224 SQLAlchemy ORM class for fictional binary documents. 

225 """ 

226 

227 __tablename__ = "blobdoc" 

228 __table_args__ = { 

229 COMMENT: "Fictional documents as binary large objects", 

230 **TABLE_KWARGS, 

231 } 

232 

233 blob_doc_id = Column( 

234 Integer, primary_key=True, comment="Binary document ID" 

235 ) 

236 patient_id = Column( 

237 Integer, ForeignKey("patient.patient_id"), comment=PATIENT_ID_COMMENT 

238 ) 

239 blob = Column( 

240 LargeBinary, comment="The BLOB (binary large object)" 

241 ) # modified as above! 

242 extension = Column( 

243 String(MAX_EXT_LENGTH_WITH_DOT), comment="Filename extension" 

244 ) 

245 blob_datetime = Column(DateTime, comment="Date/time of the document") 

246 

247 patient = relationship("Patient") 

248 

249 def __init__( 

250 self, patient: Patient, filename: str, blob_datetime: datetime.datetime 

251 ) -> None: 

252 """ 

253 Args: 

254 patient: corresponding :class:`Patient` object 

255 filename: filename containing the binary document to load and 

256 store in the database 

257 blob_datetime: date/time value to give this BLOB 

258 """ 

259 _, extension = os.path.splitext(filename) 

260 with open(filename, "rb") as f: 

261 contents = f.read() # will be of type 'bytes' 

262 # noinspection PyArgumentList 

263 super().__init__( 

264 patient=patient, 

265 blob=contents, 

266 extension=extension, 

267 blob_datetime=blob_datetime, 

268 ) 

269 

270 

271class FilenameDoc(SourceTestBase): 

272 """ 

273 SQLAlchemy ORM class for a table containing the filenames of binary 

274 documents. 

275 """ 

276 

277 __tablename__ = "filenamedoc" 

278 __table_args__ = { 

279 COMMENT: "Filenames of binary documents", 

280 **TABLE_KWARGS, 

281 } 

282 

283 filename_doc_id = Column(Integer, primary_key=True, comment="Filename ID") 

284 patient_id = Column( 

285 Integer, ForeignKey("patient.patient_id"), comment=PATIENT_ID_COMMENT 

286 ) 

287 filename = Column(Text, comment="Filename") 

288 file_datetime = Column(DateTime, comment="Date/time of the document") 

289 

290 patient = relationship("Patient")