Coverage for cc_modules/cc_validators.py : 42%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_validators.py
6===============================================================================
8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
10 This file is part of CamCOPS.
12 CamCOPS is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 CamCOPS is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
25===============================================================================
27**String validators and the like.**
29All functions starting ``validate_`` do nothing if the input is good, and raise
30:exc:`ValueError` if it's bad, with a descriptive error (you can use ``str()``
31on the exception).
33All validators take a
34:class:`camcops_server.cc_modules.cc_request.CamcopsRequest` parameter, for
35internationalized error messages.
37WARNING: even the error messages shouldn't contain the error-producing strings.
38"""
40import ipaddress
41import logging
42import re
43from typing import Callable, List, Optional, TYPE_CHECKING
44import urllib.parse
46from cardinal_pythonlib.logs import BraceStyleAdapter
47from colander import EMAIL_RE
49from camcops_server.cc_modules.cc_constants import (
50 MINIMUM_PASSWORD_LENGTH,
51 StringLengths,
52)
53from camcops_server.cc_modules.cc_password import password_prohibited
55if TYPE_CHECKING:
56 from camcops_server.cc_modules.cc_request import CamcopsRequest
58log = BraceStyleAdapter(logging.getLogger(__name__))
61# =============================================================================
62# Typing constants
63# =============================================================================
65STRING_VALIDATOR_TYPE = Callable[[str, Optional["CamcopsRequest"]], None]
66# ... string validators raise ValueError if the string is invalid
69# =============================================================================
70# Raising exceptions: sometimes internationalized, sometimes not
71# =============================================================================
73def dummy_gettext(x: str) -> str:
74 """
75 Returns the input directly.
76 """
77 return x
80# =============================================================================
81# Regex manipulation
82# =============================================================================
84def anchor(expression: str,
85 anchor_start: bool = True,
86 anchor_end: bool = True) -> str:
87 """
88 Adds start/end anchors.
89 """
90 start = "^" if anchor_start else ""
91 end = "$" if anchor_end else ""
92 return f"{start}{expression}{end}"
95def zero_or_more(expression: str) -> str:
96 """
97 Regex for zero or more copies.
98 """
99 return f"{expression}*"
102def one_or_more(expression: str) -> str:
103 """
104 Regex for one or more copies.
105 """
106 return f"{expression}+"
109def min_max_copies(
110 expression: str,
111 max_count: int,
112 min_count: int = 1) -> str:
113 """
114 Given a regex expression, permit it a minimum/maximum number of times. For
115 example, for a regex group ``x``, produce ``x{min,max}``.
117 Be very careful if you use ``min_count == 0`` -- without other
118 restrictions, your regex may match an empty string.
119 """
120 assert 0 <= min_count <= max_count
121 return f"{expression}{{{min_count},{max_count}}}"
124def describe_regex_permitted_char(
125 expression: str,
126 req: Optional["CamcopsRequest"] = None,
127 invalid_prefix: bool = True) -> str:
128 """
129 Describes the characters permitted in a regular expression character
130 selector -- as long as it's simple! This won't handle arbitrary regexes.
131 """
132 assert expression.startswith("[") and expression.endswith("]")
133 content = expression[1:-1] # strip off surrounding []
134 permitted = [] # type: List[str]
135 length = len(content)
136 _ = req.gettext if req else dummy_gettext
137 i = 0
138 while i < length:
139 if content[i] == "\\":
140 # backslash preceding another character: regex code or escaped char
141 assert i + 1 < length, f"Bad escaping in {expression!r}"
142 escaped = content[i + 1]
143 if escaped == "w":
144 permitted.append(_("word character"))
145 elif escaped == "W":
146 permitted.append(_("non-word character"))
147 elif escaped == "d":
148 permitted.append(_("digit"))
149 elif escaped == "D":
150 permitted.append(_("non-digit"))
151 elif escaped == "s":
152 permitted.append(_("whitespace"))
153 elif escaped == "S":
154 permitted.append(_("non-whitespace"))
155 else:
156 permitted.append(repr(escaped))
157 i += 2
158 elif i + 1 < length and content[i + 1] == "-":
159 # range like A-Z
160 assert i + 2 < length, f"Bad range specification in {expression!r}"
161 permitted.append(content[i:i + 3])
162 i += 3
163 else:
164 char = content[i]
165 if char == ".":
166 permitted.append(_("any character"))
167 else:
168 permitted.append(repr(char))
169 i += 1
170 description = ", ".join(permitted)
171 prefix = _("Invalid string.") + " " if invalid_prefix else ""
172 return prefix + _("Permitted characters:") + " " + description
175def describe_regex_permitted_char_length(
176 expression: str,
177 max_length: int,
178 min_length: int = 1,
179 req: Optional["CamcopsRequest"] = None) -> str:
180 """
181 Describes a valid string by permitted characters and length.
182 """
183 _ = req.gettext if req else dummy_gettext
184 return(
185 _("Invalid string.") +
186 " " +
187 _("Minimum length = {}. Maximum length = {}.").format(
188 min_length, max_length
189 ) +
190 " " +
191 describe_regex_permitted_char(expression, req, invalid_prefix=False)
192 )
195# =============================================================================
196# Generic validation functions
197# =============================================================================
199def validate_by_char_and_length(
200 x: str,
201 permitted_char_expression: str,
202 max_length: int,
203 min_length: int = 1,
204 req: Optional["CamcopsRequest"] = None,
205 flags: int = 0) -> None:
206 """
207 Validate a string based on permitted characters and length.
208 """
209 regex = re.compile(
210 anchor(min_max_copies(
211 expression=permitted_char_expression,
212 min_count=min_length,
213 max_count=max_length
214 )),
215 flags=flags
216 )
217 if not regex.match(x):
218 raise ValueError(describe_regex_permitted_char_length(
219 permitted_char_expression,
220 min_length=min_length, max_length=max_length, req=req
221 ))
224# =============================================================================
225# Generic strings
226# =============================================================================
228ALPHA_CHAR = "[A-Za-z]"
230ALPHANUM_UNDERSCORE_CHAR = "[A-Za-z0-9_]"
231ALPHANUM_UNDERSCORE_REGEX = re.compile(
232 anchor(one_or_more(ALPHANUM_UNDERSCORE_CHAR))
233)
235ALPHANUM_UNDERSCORE_HYPHEN_CHAR = r"[A-Za-z0-9_\-]"
236ALPHANUM_UNDERSCORE_HYPHEN_DOT_CHAR = r"[A-Za-z0-9_\-\.]"
237ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR = r"[A-Za-z0-9,_\-\{\}]"
238ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR = r"[A-Za-z0-9_\- ]"
240HUMAN_NAME_CHAR_UNICODE = r"[\w\-'’ \.]"
241# \w is a word character; with the re.UNICODE flag, that includes accented
242# characters. Then we allow hyphen, plain apostrophe, Unicode apostrophe,
243# space, dot.
244HUMAN_MANDATORY_CHAR_REGEX = re.compile(r"\w+", re.UNICODE)
245# ... for "at least one word character somewhere"
248# -----------------------------------------------------------------------------
249# Level 1. Computer-style simple strings with no spaces.
250# -----------------------------------------------------------------------------
252def validate_alphanum(x: str,
253 req: Optional["CamcopsRequest"] = None) -> None:
254 """
255 Validates a generic alphanumeric string.
256 """
257 if not x.isalnum():
258 _ = req.gettext if req else dummy_gettext
259 raise ValueError(_("Invalid alphanumeric string"))
262def validate_alphanum_underscore(
263 x: str,
264 req: Optional["CamcopsRequest"] = None) -> None:
265 """
266 Validates a string that can be alphanumeric or contain an underscore.
267 """
268 if not ALPHANUM_UNDERSCORE_REGEX.match(x):
269 raise ValueError(
270 describe_regex_permitted_char(ALPHANUM_UNDERSCORE_CHAR, req)
271 )
274# -----------------------------------------------------------------------------
275# Level 2. Human-style simple strings, allowing spaces but only minimal
276# punctuation.
277# -----------------------------------------------------------------------------
279# ... see specific validators.
281# -----------------------------------------------------------------------------
282# Level 3. Human-style strings, such as people's names; may involve accented
283# characters, spaces, some punctuation; may be used as Python or SQL search
284# literals (with suitable precautions).
285# -----------------------------------------------------------------------------
287# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
288# 3(a). Human names
289# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
291def validate_human_name(
292 x: str,
293 req: Optional["CamcopsRequest"] = None,
294 min_length: int = 0,
295 max_length: int = StringLengths.PATIENT_NAME_MAX_LEN) -> None:
296 """
297 Accepts spaces, accents, etc.
299 This is hard. See
300 https://stackoverflow.com/questions/888838/regular-expression-for-validating-names-and-surnames
301 """ # noqa
302 validate_by_char_and_length(
303 x,
304 permitted_char_expression=HUMAN_NAME_CHAR_UNICODE,
305 min_length=min_length,
306 max_length=max_length,
307 req=req
308 )
309 if not HUMAN_MANDATORY_CHAR_REGEX.match(x):
310 _ = req.gettext if req else dummy_gettext
311 raise ValueError("Names require at least one 'word' character")
314# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
315# 3(c). Search terms for simple near-alphanumeric SQL content, allowing
316# wildcards.
317# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
319RESTRICTED_SQL_SEARCH_LITERAL_CHAR = r"[A-Za-z0-9\- _%]"
320# ... hyphens are meaningful in regexes, so escape it
323def validate_restricted_sql_search_literal(
324 x: str,
325 req: Optional["CamcopsRequest"] = None,
326 min_length: int = 0,
327 max_length: int = StringLengths.SQL_SEARCH_LITERAL_MAX_LENGTH) -> None:
328 """
329 Validates a string that can be fairly broad, and can do SQL finding via
330 wildcards such as ``%`` and ``_``, but should be syntactically safe in
331 terms of HTML etc. It does not permit arbitrary strings; it's a subset of
332 what might be possible in SQL.
333 """
334 validate_by_char_and_length(
335 x,
336 permitted_char_expression=RESTRICTED_SQL_SEARCH_LITERAL_CHAR,
337 min_length=min_length,
338 max_length=max_length,
339 req=req
340 )
343# -----------------------------------------------------------------------------
344# Level 4. Infinitely worrying.
345# -----------------------------------------------------------------------------
347# noinspection PyUnusedLocal
348def validate_anything(x: str,
349 req: Optional["CamcopsRequest"] = None) -> None:
350 """
351 Lets anything through. May be unwise.
352 """
353 pass
356# =============================================================================
357# Specific well-known computer formats
358# =============================================================================
360# -----------------------------------------------------------------------------
361# Base 64 encoding
362# -----------------------------------------------------------------------------
364# BASE64_REGEX = re.compile(
365# "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"
366# # https://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data # noqa
367# )
370# -----------------------------------------------------------------------------
371# Email addresses
372# -----------------------------------------------------------------------------
374EMAIL_RE_COMPILED = re.compile(EMAIL_RE)
377def validate_email(email: str,
378 req: Optional["CamcopsRequest"] = None) -> None:
379 """
380 Validate an e-mail address.
382 Is this a valid e-mail address?
384 We use the same validation system as our web form (which uses Colander's
385 method plus a length constraint).
386 """
387 if (len(email) > StringLengths.EMAIL_ADDRESS_MAX_LEN or
388 not EMAIL_RE_COMPILED.match(email)):
389 _ = req.gettext if req else dummy_gettext
390 raise ValueError(_("Invalid e-mail address"))
393# -----------------------------------------------------------------------------
394# IP addresses
395# -----------------------------------------------------------------------------
397def validate_ip_address(
398 x: str,
399 req: Optional["CamcopsRequest"] = None) -> None:
400 """
401 Validates an IP address.
402 """
403 # https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python # noqa
404 try:
405 ipaddress.ip_address(x)
406 except ValueError:
407 _ = req.gettext if req else dummy_gettext
408 raise ValueError(_("Invalid IP address"))
411# -----------------------------------------------------------------------------
412# URLs
413# -----------------------------------------------------------------------------
415# Per https://mathiasbynens.be/demo/url-regex, using @stephenhay's regex but
416# restricted further.
417VALID_REDIRECT_URL_REGEX = re.compile(r"^https?://[^\s/$.?#].[^\s]*$")
420def validate_any_url(url: str,
421 req: Optional["CamcopsRequest"] = None) -> None:
422 """
423 Validates a URL. If valid, returns the URL; if not, returns ``default``.
424 See https://stackoverflow.com/questions/22238090/validating-urls-in-python
426 However, avoid this one. For example, a URL such as
427 xxhttps://127.0.0.1:8088/ can trigger Chrome to launch ``xdg-open``.
428 """
429 log.warning("Avoid this validator! It allows open-this-file URLs!")
430 result = urllib.parse.urlparse(url)
431 if not result.scheme or not result.netloc:
432 _ = req.gettext if req else dummy_gettext
433 raise ValueError(_("Invalid URL"))
436def validate_redirect_url(url: str,
437 req: Optional["CamcopsRequest"] = None) -> None:
438 """
439 Validates a URL. If valid, returns the URL; if not, returns ``default``.
440 See https://stackoverflow.com/questions/22238090/validating-urls-in-python
441 """
442 if not VALID_REDIRECT_URL_REGEX.match(url):
443 _ = req.gettext if req else dummy_gettext
444 raise ValueError(_("Invalid redirection URL"))
447# =============================================================================
448# CamCOPS system-oriented names
449# =============================================================================
451# -----------------------------------------------------------------------------
452# Group names
453# -----------------------------------------------------------------------------
455def validate_group_name(name: str,
456 req: Optional["CamcopsRequest"] = None) -> None:
457 """
458 Is the string a valid group name?
460 Group descriptions can be anything, but group names shouldn't have odd
461 characters in -- this greatly facilitates config file handling etc. (for
462 example: no spaces, no commas).
463 """
464 validate_by_char_and_length(
465 name,
466 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_CHAR,
467 min_length=StringLengths.GROUP_NAME_MIN_LEN,
468 max_length=StringLengths.GROUP_NAME_MAX_LEN,
469 req=req
470 )
473# -----------------------------------------------------------------------------
474# Usernames
475# -----------------------------------------------------------------------------
477def validate_username(name: str,
478 req: Optional["CamcopsRequest"] = None) -> None:
479 """
480 Is the string a valid user name?
481 """
482 validate_by_char_and_length(
483 name,
484 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR,
485 min_length=StringLengths.USERNAME_CAMCOPS_MIN_LEN,
486 max_length=StringLengths.USERNAME_CAMCOPS_MAX_LEN,
487 req=req
488 )
491# -----------------------------------------------------------------------------
492# Devices
493# -----------------------------------------------------------------------------
495def validate_device_name(
496 x: str, req: Optional["CamcopsRequest"] = None) -> None:
497 """
498 Validate a client device name -- the computer-oriented one, not the
499 friendly one.
500 """
501 validate_by_char_and_length(
502 x,
503 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR,
504 min_length=1,
505 max_length=StringLengths.DEVICE_NAME_MAX_LEN,
506 req=req
507 )
510# -----------------------------------------------------------------------------
511# Export recipients
512# -----------------------------------------------------------------------------
514def validate_export_recipient_name(
515 x: str, req: Optional["CamcopsRequest"] = None) -> None:
516 validate_by_char_and_length(
517 x,
518 permitted_char_expression=ALPHANUM_UNDERSCORE_CHAR,
519 min_length=StringLengths.EXPORT_RECIPIENT_NAME_MIN_LEN,
520 max_length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN,
521 req=req
522 )
525# -----------------------------------------------------------------------------
526# Passwords
527# -----------------------------------------------------------------------------
529def validate_new_password(
530 x: str, req: Optional["CamcopsRequest"] = None) -> None:
531 """
532 Validate a proposed new password. Enforce our password policy.
533 """
534 _ = req.gettext if req else dummy_gettext
535 if not x or not x.strip():
536 raise ValueError(_("Passwords can't be blank"))
537 if len(x) < MINIMUM_PASSWORD_LENGTH:
538 raise ValueError(
539 _("Passwords can't be shorter than {} characters").format(
540 MINIMUM_PASSWORD_LENGTH
541 )
542 )
543 # No maximum length, because we store a hash.
544 # No other character limitations.
545 if password_prohibited(x):
546 raise ValueError(_("That password is used too commonly; try again"))
549# -----------------------------------------------------------------------------
550# HL7
551# -----------------------------------------------------------------------------
553def validate_hl7_id_type(
554 x: str, req: Optional["CamcopsRequest"] = None) -> None:
555 """
556 Validate HL7 Identifier Type.
557 """
558 validate_by_char_and_length(
559 x,
560 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR,
561 min_length=0,
562 max_length=StringLengths.HL7_ID_TYPE_MAX_LEN,
563 req=req
564 )
567def validate_hl7_aa(
568 x: str, req: Optional["CamcopsRequest"] = None) -> None:
569 """
570 Validate HL7 Assigning Authority.
571 """
572 validate_by_char_and_length(
573 x,
574 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR,
575 min_length=0,
576 max_length=StringLengths.HL7_AA_MAX_LEN,
577 req=req
578 )
581# -----------------------------------------------------------------------------
582# Task table names
583# -----------------------------------------------------------------------------
585TASK_TABLENAME_REGEX = re.compile(
586 anchor(ALPHA_CHAR, anchor_start=True, anchor_end=False) +
587 # ... don't start with a number
588 # ... and although tables can and do start with underscores, task tables
589 # don't.
590 anchor(
591 min_max_copies(ALPHANUM_UNDERSCORE_CHAR,
592 min_count=0,
593 max_count=StringLengths.TABLENAME_MAX_LEN - 1),
594 anchor_start=False,
595 anchor_end=True
596 )
597)
600def validate_task_tablename(
601 x: str,
602 req: Optional["CamcopsRequest"] = None) -> None:
603 """
604 Validates a string that could be a task tablename.
605 """
606 if not TASK_TABLENAME_REGEX.match(x):
607 _ = req.gettext if req else dummy_gettext
608 raise ValueError(_(
609 "Task table names must start with a letter, and contain only "
610 "contain alphanumeric characters (A-Z, a-z, 0-9) or "
611 "underscores (_)."
612 ))