Coverage for crateweb/research/archive_backend.py: 46%
107 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
« prev ^ index » next coverage.py v7.8.0, created at 2025-08-27 10:34 -0500
1"""
2crate_anon/crateweb/research/archive_backend.py
4===============================================================================
6 Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CRATE.
11 CRATE is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CRATE is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CRATE. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**Support functions for the archive system.**
28"""
30import logging
32# from os import DirEntry, scandir
33from os.path import abspath, getmtime, isfile, join
34from typing import Any, Dict, List
36from cardinal_pythonlib.fileops import mkdir_p
37from django.conf import settings
38from django.http.request import HttpRequest
39from django.http.response import (
40 HttpResponse,
41 HttpResponseBadRequest,
42)
43from django.urls import reverse
44from mako.lookup import TemplateLookup
46from crate_anon.crateweb.config.constants import UrlNames, UrlKeys
47from crate_anon.crateweb.core.utils import url_with_querystring
48from crate_anon.crateweb.core.constants import SettingsKeys
49from crate_anon.crateweb.research.models import (
50 ArchiveAttachmentAudit,
51 ArchiveTemplateAudit,
52)
54log = logging.getLogger(__name__)
57# =============================================================================
58# Constants
59# =============================================================================
62class ArchiveContextKeys:
63 """
64 Names of objects that become part of the context in which archive templates
65 operate. Some are also used as URL parameter keys.
67 The case here is to avoid confusion not to indicate "constness" within
68 this class.
69 """
71 CRATE_HOME_URL = "CRATE_HOME_URL"
72 execute = "execute"
73 get_attachment_url = "get_attachment_url"
74 get_patient_template_url = "get_patient_template_url"
75 get_static_url = "get_static_url"
76 get_template_url = "get_template_url"
77 patient_id = "patient_id"
78 query_params = "query_params"
79 request = "request"
82# For attachments: default for guess_content_type
83DEFAULT_GUESS_CONTENT_TYPE = True
86# =============================================================================
87# Configuration-dependent quasi-constants
88# =============================================================================
90# Read from settings. Better to crash early than when a user asks.
92_archive_attachment_dir = getattr(
93 settings, SettingsKeys.ARCHIVE_ATTACHMENT_DIR, ""
94)
95_archive_root_template = getattr(
96 settings, SettingsKeys.ARCHIVE_ROOT_TEMPLATE, ""
97)
98_archive_static_dir = getattr(settings, SettingsKeys.ARCHIVE_STATIC_DIR, "")
99_archive_template_cache_dir = getattr(
100 settings, SettingsKeys.ARCHIVE_TEMPLATE_CACHE_DIR, ""
101)
102_archive_template_dir = getattr(
103 settings, SettingsKeys.ARCHIVE_TEMPLATE_DIR, ""
104)
106ARCHIVE_CONTEXT = getattr(settings, SettingsKeys.ARCHIVE_CONTEXT, {})
108CACHE_CONTROL_MAX_AGE_ARCHIVE_ATTACHMENTS = getattr(
109 settings, SettingsKeys.CACHE_CONTROL_MAX_AGE_ARCHIVE_ATTACHMENTS, 0
110)
111CACHE_CONTROL_MAX_AGE_ARCHIVE_TEMPLATES = getattr(
112 settings, SettingsKeys.CACHE_CONTROL_MAX_AGE_ARCHIVE_TEMPLATES, 0
113)
114CACHE_CONTROL_MAX_AGE_ARCHIVE_STATIC = getattr(
115 settings, SettingsKeys.CACHE_CONTROL_MAX_AGE_ARCHIVE_STATIC, 0
116)
118# =============================================================================
119# Configuration checks
120# =============================================================================
122ARCHIVE_IS_CONFIGURED = bool(
123 _archive_attachment_dir
124 and _archive_root_template
125 and _archive_static_dir
126 and _archive_template_cache_dir
127 and _archive_template_dir
128)
131def archive_misconfigured_response() -> HttpResponse:
132 """
133 Returns an error :class:`HttpResponse` describing how the archive is
134 misconfigured.
135 """
136 missing = [] # type: List[str]
137 if not _archive_attachment_dir:
138 missing.append(SettingsKeys.ARCHIVE_ATTACHMENT_DIR)
139 if not _archive_root_template:
140 missing.append(SettingsKeys.ARCHIVE_ROOT_TEMPLATE)
141 if not _archive_static_dir:
142 missing.append(SettingsKeys.ARCHIVE_STATIC_DIR)
143 if not _archive_template_cache_dir:
144 missing.append(SettingsKeys.ARCHIVE_TEMPLATE_CACHE_DIR)
145 if not _archive_template_dir:
146 missing.append(SettingsKeys.ARCHIVE_TEMPLATE_DIR)
147 return HttpResponseBadRequest(
148 f"Archive not configured. Administrator has not set: {missing!r}"
149 )
152# =============================================================================
153# Set up caches and Mako lookups.
154# =============================================================================
156if ARCHIVE_IS_CONFIGURED:
157 mkdir_p(_archive_template_cache_dir)
158 archive_mako_lookup = TemplateLookup(
159 directories=[_archive_template_dir],
160 module_directory=_archive_template_cache_dir,
161 strict_undefined=True, # raise error immediately upon typos!
162 )
163else:
164 archive_mako_lookup = None
167# =============================================================================
168# Auditing
169# =============================================================================
172def audit_archive_template(
173 request: HttpRequest, patient_id: str, query_string: str
174) -> None:
175 """
176 Audits access to a template for a patient.
178 Args:
179 request:
180 Django request
181 patient_id:
182 patient ID
183 query_string:
184 URL query string, which will include details of the template and
185 any other arguments.
186 """
187 auditor = ArchiveTemplateAudit(
188 user=request.user, patient_id=patient_id, query_string=query_string
189 )
190 auditor.save()
193def audit_archive_attachment(
194 request: HttpRequest, patient_id: str, filename: str
195) -> None:
196 """
197 Audits access to an attachment via a patient's archive view.
199 Args:
200 request:
201 Django request
202 patient_id:
203 patient ID
204 filename:
205 filename of attachment within archive
206 """
207 auditor = ArchiveAttachmentAudit(
208 user=request.user, patient_id=patient_id, filename=filename
209 )
210 auditor.save()
213# =============================================================================
214# Generic paths
215# =============================================================================
218def safe_path(directory: str, filename: str) -> str:
219 """
220 Ensures that a filename is safe and within a directory -- for example, that
221 nobody passes a filename like ``../../../etc/passwd`` to break out of our
222 directory.
224 Args:
225 directory: directory, within which filename must be
226 filename: filename
228 Returns:
229 str: the filename if it's safe and exists
230 """
231 if not directory:
232 return ""
233 final_filename = abspath(join(directory, filename))
234 if not final_filename.startswith(directory):
235 return ""
236 if not isfile(final_filename):
237 return ""
238 return final_filename
241# =============================================================================
242# Archive paths
243# =============================================================================
246def get_archive_template_filepath(template_name: str) -> str:
247 """
248 Returns the full path of a template, or "" if none is found.
250 Args:
251 template_name: name of the template
252 """
253 return join(_archive_template_dir, template_name)
254 # for entry in scandir(_archive_template_dir): # type: DirEntry
255 # if entry.name == template_name:
256 # return entry.path
257 # return ""
260def get_archive_attachment_filepath(filename: str) -> str:
261 """
262 Returns the full path of an archive attachment.
264 Args:
265 filename: name of the attachment
266 """
267 return safe_path(_archive_attachment_dir, filename)
270def get_archive_static_filepath(filename: str) -> str:
271 """
272 Returns the full path of an archive static file.
274 Args:
275 filename: name of the static file
276 """
277 return safe_path(_archive_static_dir, filename)
280# =============================================================================
281# Generic URL generation
282# =============================================================================
285def add_file_timestamp_to_url_query(
286 filepath: str, qparams: Dict[str, Any]
287) -> None:
288 """
289 Adds a file's timestamp to the query parameters that will make up a URL.
291 Why? So that if the file is edited, a new URL is generated, and caching
292 browsers will automatically refresh.
294 See
296 - https://stackoverflow.com/questions/9692665/cache-busting-via-params
297 - https://docs.python.org/3/library/os.path.html#os.path.getmtime
299 Args:
300 filepath: full path to file
301 qparams: parameter dictionary, which will be modified
302 """
303 if not isfile(filepath):
304 log.error(
305 f"add_file_timestamp_to_url_query: nonexistent file {filepath!r}"
306 )
307 return
308 qparams[UrlKeys.MTIME] = str(getmtime(filepath))
311# =============================================================================
312# Archive URL generation
313# =============================================================================
316def archive_template_url(
317 template_name: str = "", patient_id: str = "", **kwargs
318) -> str:
319 """
320 Creates a URL to inspect part of the archive.
322 Args:
323 template_name:
324 short name of the (configurable) template
325 patient_id:
326 patient ID
327 **kwargs:
328 other optional arguments, passed as URL parameters
330 Returns:
331 A URL.
333 """
334 kwargs = kwargs or {} # type: Dict[str, Any]
335 qparams = kwargs.copy()
336 if template_name:
337 qparams[UrlKeys.TEMPLATE] = template_name
338 filepath = get_archive_template_filepath(template_name)
339 add_file_timestamp_to_url_query(filepath, qparams)
340 if patient_id:
341 qparams[UrlKeys.PATIENT_ID] = patient_id
342 # log.critical("qparams: {!r}", qparams)
343 url = url_with_querystring(reverse(UrlNames.ARCHIVE_TEMPLATE), **qparams)
344 # log.critical(f"archive_template_url: {url!r}")
345 return url
348def archive_root_url() -> str:
349 """
350 Returns a URL to the root of the archive, typically including the "launch
351 for patient" view.
352 """
353 return archive_template_url(_archive_root_template)
356def archive_attachment_url(
357 filename: str,
358 patient_id: str = "",
359 content_type: str = "",
360 offered_filename: str = "",
361 guess_content_type: bool = None,
362) -> str:
363 """
364 Returns a URL to download an archive attachment (e.g. a PDF).
366 Args:
367 filename:
368 filename on disk, within the archive's attachment directory
369 patient_id:
370 patient ID (used for auditing)
371 content_type:
372 HTTP content type; see
373 https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Type
374 offered_filename:
375 filename offered to user
376 guess_content_type:
377 if no content_type is specified, should we guess? Pass
378 ``None`` for the default, :data:`DEFAULT_GUESS_CONTENT_TYPE`.
379 """
380 qparams = {
381 UrlKeys.PATIENT_ID: patient_id,
382 UrlKeys.FILENAME: filename,
383 }
384 if content_type:
385 qparams[UrlKeys.CONTENT_TYPE] = content_type
386 if offered_filename:
387 qparams[UrlKeys.OFFERED_FILENAME] = offered_filename
388 if guess_content_type is not None:
389 qparams[UrlKeys.GUESS_CONTENT_TYPE] = int(guess_content_type)
390 filepath = get_archive_attachment_filepath(filename)
391 add_file_timestamp_to_url_query(filepath, qparams)
392 return url_with_querystring(
393 reverse(UrlNames.ARCHIVE_ATTACHMENT), **qparams
394 )
397def archive_static_url(filename: str) -> str:
398 """
399 Returns a URL to download a static file from the archive.
401 Args:
402 filename:
403 filename on disk, within the archive's static directory
404 """
405 qparams = {UrlKeys.FILENAME: filename}
406 filepath = get_archive_static_filepath(filename)
407 add_file_timestamp_to_url_query(filepath, qparams)
408 return url_with_querystring(reverse(UrlNames.ARCHIVE_STATIC), **qparams)