Coverage for cc_modules/cc_validators.py: 45%
148 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 15:51 +0100
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 15:51 +0100
1"""
2camcops_server/cc_modules/cc_validators.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**String validators and the like.**
28All functions starting ``validate_`` do nothing if the input is good, and raise
29:exc:`ValueError` if it's bad, with a descriptive error (you can use ``str()``
30on the exception).
32All validators take a
33:class:`camcops_server.cc_modules.cc_request.CamcopsRequest` parameter, for
34internationalized error messages.
36WARNING: even the error messages shouldn't contain the error-producing strings.
37"""
39import ipaddress
40import logging
41import re
42from typing import Callable, List, Optional, TYPE_CHECKING
43import urllib.parse
45from cardinal_pythonlib.logs import BraceStyleAdapter
46from colander import EMAIL_RE
48from camcops_server.cc_modules.cc_constants import (
49 MINIMUM_PASSWORD_LENGTH,
50 StringLengths,
51)
52from camcops_server.cc_modules.cc_password import password_prohibited
54if TYPE_CHECKING:
55 from camcops_server.cc_modules.cc_request import CamcopsRequest
57log = BraceStyleAdapter(logging.getLogger(__name__))
60# =============================================================================
61# Typing constants
62# =============================================================================
64STRING_VALIDATOR_TYPE = Callable[[str, Optional["CamcopsRequest"]], None]
65# ... string validators raise ValueError if the string is invalid
68# =============================================================================
69# Raising exceptions: sometimes internationalized, sometimes not
70# =============================================================================
73def dummy_gettext(x: str) -> str:
74 """
75 Returns the input directly.
76 """
77 return x
80# =============================================================================
81# Regex manipulation
82# =============================================================================
85def anchor(
86 expression: str, anchor_start: bool = True, anchor_end: bool = True
87) -> str:
88 """
89 Adds start/end anchors.
90 """
91 start = "^" if anchor_start else ""
92 end = "$" if anchor_end else ""
93 return f"{start}{expression}{end}"
96def zero_or_more(expression: str) -> str:
97 """
98 Regex for zero or more copies.
99 """
100 return f"{expression}*"
103def one_or_more(expression: str) -> str:
104 """
105 Regex for one or more copies.
106 """
107 return f"{expression}+"
110def min_max_copies(expression: str, max_count: int, min_count: int = 1) -> str:
111 """
112 Given a regex expression, permit it a minimum/maximum number of times. For
113 example, for a regex group ``x``, produce ``x{min,max}``.
115 Be very careful if you use ``min_count == 0`` -- without other
116 restrictions, your regex may match an empty string.
117 """
118 assert 0 <= min_count <= max_count
119 return f"{expression}{{{min_count},{max_count}}}"
122def describe_regex_permitted_char(
123 expression: str,
124 req: Optional["CamcopsRequest"] = None,
125 invalid_prefix: bool = True,
126) -> str:
127 """
128 Describes the characters permitted in a regular expression character
129 selector -- as long as it's simple! This won't handle arbitrary regexes.
130 """
131 assert expression.startswith("[") and expression.endswith("]")
132 content = expression[1:-1] # strip off surrounding []
133 permitted = [] # type: List[str]
134 length = len(content)
135 _ = req.gettext if req else dummy_gettext
136 i = 0
137 while i < length:
138 if content[i] == "\\":
139 # backslash preceding another character: regex code or escaped char
140 assert i + 1 < length, f"Bad escaping in {expression!r}"
141 escaped = content[i + 1]
142 if escaped == "w":
143 permitted.append(_("word character"))
144 elif escaped == "W":
145 permitted.append(_("non-word character"))
146 elif escaped == "d":
147 permitted.append(_("digit"))
148 elif escaped == "D":
149 permitted.append(_("non-digit"))
150 elif escaped == "s":
151 permitted.append(_("whitespace"))
152 elif escaped == "S":
153 permitted.append(_("non-whitespace"))
154 else:
155 permitted.append(repr(escaped))
156 i += 2
157 elif i + 1 < length and content[i + 1] == "-":
158 # range like A-Z
159 assert i + 2 < length, f"Bad range specification in {expression!r}"
160 permitted.append(content[i : i + 3])
161 i += 3
162 else:
163 char = content[i]
164 if char == ".":
165 permitted.append(_("any character"))
166 else:
167 permitted.append(repr(char))
168 i += 1
169 description = ", ".join(permitted)
170 prefix = _("Invalid string.") + " " if invalid_prefix else ""
171 return prefix + _("Permitted characters:") + " " + description
174def describe_regex_permitted_char_length(
175 expression: str,
176 max_length: int,
177 min_length: int = 1,
178 req: Optional["CamcopsRequest"] = None,
179) -> str:
180 """
181 Describes a valid string by permitted characters and length.
182 """
183 _ = req.gettext if req else dummy_gettext
184 return (
185 _("Invalid string.")
186 + " "
187 + _("Minimum length = {}. Maximum length = {}.").format(
188 min_length, max_length
189 )
190 + " "
191 + describe_regex_permitted_char(expression, req, invalid_prefix=False)
192 )
195# =============================================================================
196# Generic validation functions
197# =============================================================================
200def validate_by_char_and_length(
201 x: str,
202 permitted_char_expression: str,
203 max_length: int,
204 min_length: int = 1,
205 req: Optional["CamcopsRequest"] = None,
206 flags: int = 0,
207) -> None:
208 """
209 Validate a string based on permitted characters and length.
210 """
211 regex = re.compile(
212 anchor(
213 min_max_copies(
214 expression=permitted_char_expression,
215 min_count=min_length,
216 max_count=max_length,
217 )
218 ),
219 flags=flags,
220 )
221 if not regex.match(x):
222 raise ValueError(
223 describe_regex_permitted_char_length(
224 permitted_char_expression,
225 min_length=min_length,
226 max_length=max_length,
227 req=req,
228 )
229 )
232# =============================================================================
233# Generic strings
234# =============================================================================
236ALPHA_CHAR = "[A-Za-z]"
238ALPHANUM_UNDERSCORE_CHAR = "[A-Za-z0-9_]"
239ALPHANUM_UNDERSCORE_REGEX = re.compile(
240 anchor(one_or_more(ALPHANUM_UNDERSCORE_CHAR))
241)
243ALPHANUM_UNDERSCORE_HYPHEN_CHAR = r"[A-Za-z0-9_\-]"
244ALPHANUM_UNDERSCORE_HYPHEN_DOT_CHAR = r"[A-Za-z0-9_\-\.]"
245ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR = r"[A-Za-z0-9,_\-\{\}]"
246ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR = r"[A-Za-z0-9_\- ]"
248HUMAN_NAME_CHAR_UNICODE = r"[\w\-'’ \.]"
249# \w is a word character; with the re.UNICODE flag, that includes accented
250# characters. Then we allow hyphen, plain apostrophe, Unicode apostrophe,
251# space, dot.
252HUMAN_MANDATORY_CHAR_REGEX = re.compile(r"\w+", re.UNICODE)
253# ... for "at least one word character somewhere"
256# -----------------------------------------------------------------------------
257# Level 1. Computer-style simple strings with no spaces.
258# -----------------------------------------------------------------------------
261def validate_alphanum(x: str, req: Optional["CamcopsRequest"] = None) -> None:
262 """
263 Validates a generic alphanumeric string.
264 """
265 if not x.isalnum():
266 _ = req.gettext if req else dummy_gettext
267 raise ValueError(_("Invalid alphanumeric string"))
270def validate_alphanum_underscore(
271 x: str, req: Optional["CamcopsRequest"] = None
272) -> None:
273 """
274 Validates a string that can be alphanumeric or contain an underscore.
275 """
276 if not ALPHANUM_UNDERSCORE_REGEX.match(x):
277 raise ValueError(
278 describe_regex_permitted_char(ALPHANUM_UNDERSCORE_CHAR, req)
279 )
282# -----------------------------------------------------------------------------
283# Level 2. Human-style simple strings, allowing spaces but only minimal
284# punctuation.
285# -----------------------------------------------------------------------------
287# ... see specific validators.
289# -----------------------------------------------------------------------------
290# Level 3. Human-style strings, such as people's names; may involve accented
291# characters, spaces, some punctuation; may be used as Python or SQL search
292# literals (with suitable precautions).
293# -----------------------------------------------------------------------------
295# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
296# 3(a). Human names
297# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
300def validate_human_name(
301 x: str,
302 req: Optional["CamcopsRequest"] = None,
303 min_length: int = 0,
304 max_length: int = StringLengths.PATIENT_NAME_MAX_LEN,
305) -> None:
306 """
307 Accepts spaces, accents, etc.
309 This is hard. See
310 https://stackoverflow.com/questions/888838/regular-expression-for-validating-names-and-surnames
311 """
312 validate_by_char_and_length(
313 x,
314 permitted_char_expression=HUMAN_NAME_CHAR_UNICODE,
315 min_length=min_length,
316 max_length=max_length,
317 req=req,
318 )
319 if not HUMAN_MANDATORY_CHAR_REGEX.match(x):
320 _ = req.gettext if req else dummy_gettext
321 raise ValueError("Names require at least one 'word' character")
324# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
325# 3(c). Search terms for simple near-alphanumeric SQL content, allowing
326# wildcards.
327# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
329RESTRICTED_SQL_SEARCH_LITERAL_CHAR = r"[A-Za-z0-9\- _%]"
330# ... hyphens are meaningful in regexes, so escape it
333def validate_restricted_sql_search_literal(
334 x: str,
335 req: Optional["CamcopsRequest"] = None,
336 min_length: int = 0,
337 max_length: int = StringLengths.SQL_SEARCH_LITERAL_MAX_LENGTH,
338) -> None:
339 """
340 Validates a string that can be fairly broad, and can do SQL finding via
341 wildcards such as ``%`` and ``_``, but should be syntactically safe in
342 terms of HTML etc. It does not permit arbitrary strings; it's a subset of
343 what might be possible in SQL.
344 """
345 validate_by_char_and_length(
346 x,
347 permitted_char_expression=RESTRICTED_SQL_SEARCH_LITERAL_CHAR,
348 min_length=min_length,
349 max_length=max_length,
350 req=req,
351 )
354# -----------------------------------------------------------------------------
355# Level 4. Infinitely worrying.
356# -----------------------------------------------------------------------------
359# noinspection PyUnusedLocal
360def validate_anything(x: str, req: Optional["CamcopsRequest"] = None) -> None:
361 """
362 Lets anything through. May be unwise.
363 """
364 pass
367# =============================================================================
368# Specific well-known computer formats
369# =============================================================================
371# -----------------------------------------------------------------------------
372# Base 64 encoding
373# -----------------------------------------------------------------------------
375# BASE64_REGEX = re.compile(
376# "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"
377# # https://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data # noqa
378# )
381# -----------------------------------------------------------------------------
382# Email addresses
383# -----------------------------------------------------------------------------
385EMAIL_RE_COMPILED = re.compile(EMAIL_RE)
388def validate_email(email: str, req: Optional["CamcopsRequest"] = None) -> None:
389 """
390 Validate an e-mail address.
392 Is this a valid e-mail address?
394 We use the same validation system as our web form (which uses Colander's
395 method plus a length constraint).
396 """
397 if len(
398 email
399 ) > StringLengths.EMAIL_ADDRESS_MAX_LEN or not EMAIL_RE_COMPILED.match(
400 email
401 ):
402 _ = req.gettext if req else dummy_gettext
403 raise ValueError(_("Invalid e-mail address"))
406# -----------------------------------------------------------------------------
407# IP addresses
408# -----------------------------------------------------------------------------
411def validate_ip_address(
412 x: str, req: Optional["CamcopsRequest"] = None
413) -> None:
414 """
415 Validates an IP address.
416 """
417 # https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python # noqa
418 try:
419 ipaddress.ip_address(x)
420 except ValueError:
421 _ = req.gettext if req else dummy_gettext
422 raise ValueError(_("Invalid IP address"))
425# -----------------------------------------------------------------------------
426# URLs
427# -----------------------------------------------------------------------------
429# Per https://mathiasbynens.be/demo/url-regex, using @stephenhay's regex but
430# restricted further.
431VALID_REDIRECT_URL_REGEX = re.compile(r"^https?://[^\s/$.?#].\S*$")
434def validate_any_url(url: str, req: Optional["CamcopsRequest"] = None) -> None:
435 """
436 Validates a URL. If valid, returns the URL; if not, returns ``default``.
437 See https://stackoverflow.com/questions/22238090/validating-urls-in-python
439 However, avoid this one. For example, a URL such as
440 xxhttps://127.0.0.1:8088/ can trigger Chrome to launch ``xdg-open``.
441 """
442 log.warning("Avoid this validator! It allows open-this-file URLs!")
443 result = urllib.parse.urlparse(url)
444 if not result.scheme or not result.netloc:
445 _ = req.gettext if req else dummy_gettext
446 raise ValueError(_("Invalid URL"))
449def validate_redirect_url(
450 url: str, req: Optional["CamcopsRequest"] = None
451) -> None:
452 """
453 Validates a URL. If valid, returns the URL; if not, returns ``default``.
454 See https://stackoverflow.com/questions/22238090/validating-urls-in-python
455 """
456 if not VALID_REDIRECT_URL_REGEX.match(url):
457 _ = req.gettext if req else dummy_gettext
458 raise ValueError(_("Invalid redirection URL"))
461# =============================================================================
462# CamCOPS system-oriented names
463# =============================================================================
465# -----------------------------------------------------------------------------
466# Group names
467# -----------------------------------------------------------------------------
470def validate_group_name(
471 name: str, req: Optional["CamcopsRequest"] = None
472) -> None:
473 """
474 Is the string a valid group name?
476 Group descriptions can be anything, but group names shouldn't have odd
477 characters in -- this greatly facilitates config file handling etc. (for
478 example: no spaces, no commas).
479 """
480 validate_by_char_and_length(
481 name,
482 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_CHAR,
483 min_length=StringLengths.GROUP_NAME_MIN_LEN,
484 max_length=StringLengths.GROUP_NAME_MAX_LEN,
485 req=req,
486 )
489# -----------------------------------------------------------------------------
490# Usernames
491# -----------------------------------------------------------------------------
494def validate_username(
495 name: str, req: Optional["CamcopsRequest"] = None
496) -> None:
497 """
498 Is the string a valid user name?
499 """
500 validate_by_char_and_length(
501 name,
502 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR,
503 min_length=StringLengths.USERNAME_CAMCOPS_MIN_LEN,
504 max_length=StringLengths.USERNAME_CAMCOPS_MAX_LEN,
505 req=req,
506 )
509# -----------------------------------------------------------------------------
510# Devices
511# -----------------------------------------------------------------------------
514def validate_device_name(
515 x: str, req: Optional["CamcopsRequest"] = None
516) -> None:
517 """
518 Validate a client device name -- the computer-oriented one, not the
519 friendly one.
520 """
521 validate_by_char_and_length(
522 x,
523 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR,
524 min_length=1,
525 max_length=StringLengths.DEVICE_NAME_MAX_LEN,
526 req=req,
527 )
530# -----------------------------------------------------------------------------
531# Export recipients
532# -----------------------------------------------------------------------------
535def validate_export_recipient_name(
536 x: str, req: Optional["CamcopsRequest"] = None
537) -> None:
538 validate_by_char_and_length(
539 x,
540 permitted_char_expression=ALPHANUM_UNDERSCORE_CHAR,
541 min_length=StringLengths.EXPORT_RECIPIENT_NAME_MIN_LEN,
542 max_length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN,
543 req=req,
544 )
547# -----------------------------------------------------------------------------
548# Passwords
549# -----------------------------------------------------------------------------
552def validate_new_password(
553 x: str, req: Optional["CamcopsRequest"] = None
554) -> None:
555 """
556 Validate a proposed new password. Enforce our password policy.
557 """
558 _ = req.gettext if req else dummy_gettext
559 if not x or not x.strip():
560 raise ValueError(_("Passwords can't be blank"))
561 if len(x) < MINIMUM_PASSWORD_LENGTH:
562 raise ValueError(
563 _("Passwords can't be shorter than {} characters").format(
564 MINIMUM_PASSWORD_LENGTH
565 )
566 )
567 # No maximum length, because we store a hash.
568 # No other character limitations.
569 if password_prohibited(x):
570 raise ValueError(_("That password is used too commonly; try again"))
573# -----------------------------------------------------------------------------
574# HL7
575# -----------------------------------------------------------------------------
578def validate_hl7_id_type(
579 x: str, req: Optional["CamcopsRequest"] = None
580) -> None:
581 """
582 Validate HL7 Identifier Type.
583 """
584 validate_by_char_and_length(
585 x,
586 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR,
587 min_length=0,
588 max_length=StringLengths.HL7_ID_TYPE_MAX_LEN,
589 req=req,
590 )
593def validate_hl7_aa(x: str, req: Optional["CamcopsRequest"] = None) -> None:
594 """
595 Validate HL7 Assigning Authority.
596 """
597 validate_by_char_and_length(
598 x,
599 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR,
600 min_length=0,
601 max_length=StringLengths.HL7_AA_MAX_LEN,
602 req=req,
603 )
606# -----------------------------------------------------------------------------
607# Task table names
608# -----------------------------------------------------------------------------
610TASK_TABLENAME_REGEX = re.compile(
611 anchor(ALPHA_CHAR, anchor_start=True, anchor_end=False)
612 +
613 # ... don't start with a number
614 # ... and although tables can and do start with underscores, task tables
615 # don't.
616 anchor(
617 min_max_copies(
618 ALPHANUM_UNDERSCORE_CHAR,
619 min_count=0,
620 max_count=StringLengths.TABLENAME_MAX_LEN - 1,
621 ),
622 anchor_start=False,
623 anchor_end=True,
624 )
625)
628def validate_task_tablename(
629 x: str, req: Optional["CamcopsRequest"] = None
630) -> None:
631 """
632 Validates a string that could be a task tablename.
633 """
634 if not TASK_TABLENAME_REGEX.match(x):
635 _ = req.gettext if req else dummy_gettext
636 raise ValueError(
637 _(
638 "Task table names must start with a letter, and contain only "
639 "contain alphanumeric characters (A-Z, a-z, 0-9) or "
640 "underscores (_)."
641 )
642 )
645# -----------------------------------------------------------------------------
646# Filenames
647# -----------------------------------------------------------------------------
649DOWNLOAD_FILENAME_REGEX = re.compile(r"\w[\w-]*.\w+")
650# \w is equivalent to [A-Za-z0-9_]; see https://regexr.com/
653def validate_download_filename(
654 x: str, req: Optional["CamcopsRequest"] = None
655) -> None:
656 """
657 Validate a file for user download.
659 - Permit e.g. ``CamCOPS_dump_2021-06-04T100622.zip``.
660 - Prohibit silly things (like directory/drive delimiters).
661 """
662 if not DOWNLOAD_FILENAME_REGEX.match(x):
663 _ = req.gettext if req else dummy_gettext
664 raise ValueError(
665 _(
666 "Download filenames must (1) begin with an "
667 "alphanumeric/underscore character; (2) contain only "
668 "alphanumeric characters, underscores, and hyphens; and "
669 "(3) end with a full stop followed by an "
670 "alphanumeric/underscore extension."
671 )
672 )