Coverage for crateweb/research/archive_backend.py: 46%

107 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-08-27 10:34 -0500

1""" 

2crate_anon/crateweb/research/archive_backend.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CRATE. 

10 

11 CRATE is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CRATE is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CRATE. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Support functions for the archive system.** 

27 

28""" 

29 

30import logging 

31 

32# from os import DirEntry, scandir 

33from os.path import abspath, getmtime, isfile, join 

34from typing import Any, Dict, List 

35 

36from cardinal_pythonlib.fileops import mkdir_p 

37from django.conf import settings 

38from django.http.request import HttpRequest 

39from django.http.response import ( 

40 HttpResponse, 

41 HttpResponseBadRequest, 

42) 

43from django.urls import reverse 

44from mako.lookup import TemplateLookup 

45 

46from crate_anon.crateweb.config.constants import UrlNames, UrlKeys 

47from crate_anon.crateweb.core.utils import url_with_querystring 

48from crate_anon.crateweb.core.constants import SettingsKeys 

49from crate_anon.crateweb.research.models import ( 

50 ArchiveAttachmentAudit, 

51 ArchiveTemplateAudit, 

52) 

53 

54log = logging.getLogger(__name__) 

55 

56 

57# ============================================================================= 

58# Constants 

59# ============================================================================= 

60 

61 

62class ArchiveContextKeys: 

63 """ 

64 Names of objects that become part of the context in which archive templates 

65 operate. Some are also used as URL parameter keys. 

66 

67 The case here is to avoid confusion not to indicate "constness" within 

68 this class. 

69 """ 

70 

71 CRATE_HOME_URL = "CRATE_HOME_URL" 

72 execute = "execute" 

73 get_attachment_url = "get_attachment_url" 

74 get_patient_template_url = "get_patient_template_url" 

75 get_static_url = "get_static_url" 

76 get_template_url = "get_template_url" 

77 patient_id = "patient_id" 

78 query_params = "query_params" 

79 request = "request" 

80 

81 

82# For attachments: default for guess_content_type 

83DEFAULT_GUESS_CONTENT_TYPE = True 

84 

85 

86# ============================================================================= 

87# Configuration-dependent quasi-constants 

88# ============================================================================= 

89 

90# Read from settings. Better to crash early than when a user asks. 

91 

92_archive_attachment_dir = getattr( 

93 settings, SettingsKeys.ARCHIVE_ATTACHMENT_DIR, "" 

94) 

95_archive_root_template = getattr( 

96 settings, SettingsKeys.ARCHIVE_ROOT_TEMPLATE, "" 

97) 

98_archive_static_dir = getattr(settings, SettingsKeys.ARCHIVE_STATIC_DIR, "") 

99_archive_template_cache_dir = getattr( 

100 settings, SettingsKeys.ARCHIVE_TEMPLATE_CACHE_DIR, "" 

101) 

102_archive_template_dir = getattr( 

103 settings, SettingsKeys.ARCHIVE_TEMPLATE_DIR, "" 

104) 

105 

106ARCHIVE_CONTEXT = getattr(settings, SettingsKeys.ARCHIVE_CONTEXT, {}) 

107 

108CACHE_CONTROL_MAX_AGE_ARCHIVE_ATTACHMENTS = getattr( 

109 settings, SettingsKeys.CACHE_CONTROL_MAX_AGE_ARCHIVE_ATTACHMENTS, 0 

110) 

111CACHE_CONTROL_MAX_AGE_ARCHIVE_TEMPLATES = getattr( 

112 settings, SettingsKeys.CACHE_CONTROL_MAX_AGE_ARCHIVE_TEMPLATES, 0 

113) 

114CACHE_CONTROL_MAX_AGE_ARCHIVE_STATIC = getattr( 

115 settings, SettingsKeys.CACHE_CONTROL_MAX_AGE_ARCHIVE_STATIC, 0 

116) 

117 

118# ============================================================================= 

119# Configuration checks 

120# ============================================================================= 

121 

122ARCHIVE_IS_CONFIGURED = bool( 

123 _archive_attachment_dir 

124 and _archive_root_template 

125 and _archive_static_dir 

126 and _archive_template_cache_dir 

127 and _archive_template_dir 

128) 

129 

130 

131def archive_misconfigured_response() -> HttpResponse: 

132 """ 

133 Returns an error :class:`HttpResponse` describing how the archive is 

134 misconfigured. 

135 """ 

136 missing = [] # type: List[str] 

137 if not _archive_attachment_dir: 

138 missing.append(SettingsKeys.ARCHIVE_ATTACHMENT_DIR) 

139 if not _archive_root_template: 

140 missing.append(SettingsKeys.ARCHIVE_ROOT_TEMPLATE) 

141 if not _archive_static_dir: 

142 missing.append(SettingsKeys.ARCHIVE_STATIC_DIR) 

143 if not _archive_template_cache_dir: 

144 missing.append(SettingsKeys.ARCHIVE_TEMPLATE_CACHE_DIR) 

145 if not _archive_template_dir: 

146 missing.append(SettingsKeys.ARCHIVE_TEMPLATE_DIR) 

147 return HttpResponseBadRequest( 

148 f"Archive not configured. Administrator has not set: {missing!r}" 

149 ) 

150 

151 

152# ============================================================================= 

153# Set up caches and Mako lookups. 

154# ============================================================================= 

155 

156if ARCHIVE_IS_CONFIGURED: 

157 mkdir_p(_archive_template_cache_dir) 

158 archive_mako_lookup = TemplateLookup( 

159 directories=[_archive_template_dir], 

160 module_directory=_archive_template_cache_dir, 

161 strict_undefined=True, # raise error immediately upon typos! 

162 ) 

163else: 

164 archive_mako_lookup = None 

165 

166 

167# ============================================================================= 

168# Auditing 

169# ============================================================================= 

170 

171 

172def audit_archive_template( 

173 request: HttpRequest, patient_id: str, query_string: str 

174) -> None: 

175 """ 

176 Audits access to a template for a patient. 

177 

178 Args: 

179 request: 

180 Django request 

181 patient_id: 

182 patient ID 

183 query_string: 

184 URL query string, which will include details of the template and 

185 any other arguments. 

186 """ 

187 auditor = ArchiveTemplateAudit( 

188 user=request.user, patient_id=patient_id, query_string=query_string 

189 ) 

190 auditor.save() 

191 

192 

193def audit_archive_attachment( 

194 request: HttpRequest, patient_id: str, filename: str 

195) -> None: 

196 """ 

197 Audits access to an attachment via a patient's archive view. 

198 

199 Args: 

200 request: 

201 Django request 

202 patient_id: 

203 patient ID 

204 filename: 

205 filename of attachment within archive 

206 """ 

207 auditor = ArchiveAttachmentAudit( 

208 user=request.user, patient_id=patient_id, filename=filename 

209 ) 

210 auditor.save() 

211 

212 

213# ============================================================================= 

214# Generic paths 

215# ============================================================================= 

216 

217 

218def safe_path(directory: str, filename: str) -> str: 

219 """ 

220 Ensures that a filename is safe and within a directory -- for example, that 

221 nobody passes a filename like ``../../../etc/passwd`` to break out of our 

222 directory. 

223 

224 Args: 

225 directory: directory, within which filename must be 

226 filename: filename 

227 

228 Returns: 

229 str: the filename if it's safe and exists 

230 """ 

231 if not directory: 

232 return "" 

233 final_filename = abspath(join(directory, filename)) 

234 if not final_filename.startswith(directory): 

235 return "" 

236 if not isfile(final_filename): 

237 return "" 

238 return final_filename 

239 

240 

241# ============================================================================= 

242# Archive paths 

243# ============================================================================= 

244 

245 

246def get_archive_template_filepath(template_name: str) -> str: 

247 """ 

248 Returns the full path of a template, or "" if none is found. 

249 

250 Args: 

251 template_name: name of the template 

252 """ 

253 return join(_archive_template_dir, template_name) 

254 # for entry in scandir(_archive_template_dir): # type: DirEntry 

255 # if entry.name == template_name: 

256 # return entry.path 

257 # return "" 

258 

259 

260def get_archive_attachment_filepath(filename: str) -> str: 

261 """ 

262 Returns the full path of an archive attachment. 

263 

264 Args: 

265 filename: name of the attachment 

266 """ 

267 return safe_path(_archive_attachment_dir, filename) 

268 

269 

270def get_archive_static_filepath(filename: str) -> str: 

271 """ 

272 Returns the full path of an archive static file. 

273 

274 Args: 

275 filename: name of the static file 

276 """ 

277 return safe_path(_archive_static_dir, filename) 

278 

279 

280# ============================================================================= 

281# Generic URL generation 

282# ============================================================================= 

283 

284 

285def add_file_timestamp_to_url_query( 

286 filepath: str, qparams: Dict[str, Any] 

287) -> None: 

288 """ 

289 Adds a file's timestamp to the query parameters that will make up a URL. 

290 

291 Why? So that if the file is edited, a new URL is generated, and caching 

292 browsers will automatically refresh. 

293 

294 See 

295 

296 - https://stackoverflow.com/questions/9692665/cache-busting-via-params 

297 - https://docs.python.org/3/library/os.path.html#os.path.getmtime 

298 

299 Args: 

300 filepath: full path to file 

301 qparams: parameter dictionary, which will be modified 

302 """ 

303 if not isfile(filepath): 

304 log.error( 

305 f"add_file_timestamp_to_url_query: nonexistent file {filepath!r}" 

306 ) 

307 return 

308 qparams[UrlKeys.MTIME] = str(getmtime(filepath)) 

309 

310 

311# ============================================================================= 

312# Archive URL generation 

313# ============================================================================= 

314 

315 

316def archive_template_url( 

317 template_name: str = "", patient_id: str = "", **kwargs 

318) -> str: 

319 """ 

320 Creates a URL to inspect part of the archive. 

321 

322 Args: 

323 template_name: 

324 short name of the (configurable) template 

325 patient_id: 

326 patient ID 

327 **kwargs: 

328 other optional arguments, passed as URL parameters 

329 

330 Returns: 

331 A URL. 

332 

333 """ 

334 kwargs = kwargs or {} # type: Dict[str, Any] 

335 qparams = kwargs.copy() 

336 if template_name: 

337 qparams[UrlKeys.TEMPLATE] = template_name 

338 filepath = get_archive_template_filepath(template_name) 

339 add_file_timestamp_to_url_query(filepath, qparams) 

340 if patient_id: 

341 qparams[UrlKeys.PATIENT_ID] = patient_id 

342 # log.critical("qparams: {!r}", qparams) 

343 url = url_with_querystring(reverse(UrlNames.ARCHIVE_TEMPLATE), **qparams) 

344 # log.critical(f"archive_template_url: {url!r}") 

345 return url 

346 

347 

348def archive_root_url() -> str: 

349 """ 

350 Returns a URL to the root of the archive, typically including the "launch 

351 for patient" view. 

352 """ 

353 return archive_template_url(_archive_root_template) 

354 

355 

356def archive_attachment_url( 

357 filename: str, 

358 patient_id: str = "", 

359 content_type: str = "", 

360 offered_filename: str = "", 

361 guess_content_type: bool = None, 

362) -> str: 

363 """ 

364 Returns a URL to download an archive attachment (e.g. a PDF). 

365 

366 Args: 

367 filename: 

368 filename on disk, within the archive's attachment directory 

369 patient_id: 

370 patient ID (used for auditing) 

371 content_type: 

372 HTTP content type; see 

373 https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type 

374 offered_filename: 

375 filename offered to user 

376 guess_content_type: 

377 if no content_type is specified, should we guess? Pass 

378 ``None`` for the default, :data:`DEFAULT_GUESS_CONTENT_TYPE`. 

379 """ 

380 qparams = { 

381 UrlKeys.PATIENT_ID: patient_id, 

382 UrlKeys.FILENAME: filename, 

383 } 

384 if content_type: 

385 qparams[UrlKeys.CONTENT_TYPE] = content_type 

386 if offered_filename: 

387 qparams[UrlKeys.OFFERED_FILENAME] = offered_filename 

388 if guess_content_type is not None: 

389 qparams[UrlKeys.GUESS_CONTENT_TYPE] = int(guess_content_type) 

390 filepath = get_archive_attachment_filepath(filename) 

391 add_file_timestamp_to_url_query(filepath, qparams) 

392 return url_with_querystring( 

393 reverse(UrlNames.ARCHIVE_ATTACHMENT), **qparams 

394 ) 

395 

396 

397def archive_static_url(filename: str) -> str: 

398 """ 

399 Returns a URL to download a static file from the archive. 

400 

401 Args: 

402 filename: 

403 filename on disk, within the archive's static directory 

404 """ 

405 qparams = {UrlKeys.FILENAME: filename} 

406 filepath = get_archive_static_filepath(filename) 

407 add_file_timestamp_to_url_query(filepath, qparams) 

408 return url_with_querystring(reverse(UrlNames.ARCHIVE_STATIC), **qparams)