Coverage for cc_modules/cc_validators.py: 45%

148 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-15 15:51 +0100

1""" 

2camcops_server/cc_modules/cc_validators.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CamCOPS. 

10 

11 CamCOPS is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CamCOPS is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**String validators and the like.** 

27 

28All functions starting ``validate_`` do nothing if the input is good, and raise 

29:exc:`ValueError` if it's bad, with a descriptive error (you can use ``str()`` 

30on the exception). 

31 

32All validators take a 

33:class:`camcops_server.cc_modules.cc_request.CamcopsRequest` parameter, for 

34internationalized error messages. 

35 

36WARNING: even the error messages shouldn't contain the error-producing strings. 

37""" 

38 

39import ipaddress 

40import logging 

41import re 

42from typing import Callable, List, Optional, TYPE_CHECKING 

43import urllib.parse 

44 

45from cardinal_pythonlib.logs import BraceStyleAdapter 

46from colander import EMAIL_RE 

47 

48from camcops_server.cc_modules.cc_constants import ( 

49 MINIMUM_PASSWORD_LENGTH, 

50 StringLengths, 

51) 

52from camcops_server.cc_modules.cc_password import password_prohibited 

53 

54if TYPE_CHECKING: 

55 from camcops_server.cc_modules.cc_request import CamcopsRequest 

56 

57log = BraceStyleAdapter(logging.getLogger(__name__)) 

58 

59 

60# ============================================================================= 

61# Typing constants 

62# ============================================================================= 

63 

64STRING_VALIDATOR_TYPE = Callable[[str, Optional["CamcopsRequest"]], None] 

65# ... string validators raise ValueError if the string is invalid 

66 

67 

68# ============================================================================= 

69# Raising exceptions: sometimes internationalized, sometimes not 

70# ============================================================================= 

71 

72 

73def dummy_gettext(x: str) -> str: 

74 """ 

75 Returns the input directly. 

76 """ 

77 return x 

78 

79 

80# ============================================================================= 

81# Regex manipulation 

82# ============================================================================= 

83 

84 

85def anchor( 

86 expression: str, anchor_start: bool = True, anchor_end: bool = True 

87) -> str: 

88 """ 

89 Adds start/end anchors. 

90 """ 

91 start = "^" if anchor_start else "" 

92 end = "$" if anchor_end else "" 

93 return f"{start}{expression}{end}" 

94 

95 

96def zero_or_more(expression: str) -> str: 

97 """ 

98 Regex for zero or more copies. 

99 """ 

100 return f"{expression}*" 

101 

102 

103def one_or_more(expression: str) -> str: 

104 """ 

105 Regex for one or more copies. 

106 """ 

107 return f"{expression}+" 

108 

109 

110def min_max_copies(expression: str, max_count: int, min_count: int = 1) -> str: 

111 """ 

112 Given a regex expression, permit it a minimum/maximum number of times. For 

113 example, for a regex group ``x``, produce ``x{min,max}``. 

114 

115 Be very careful if you use ``min_count == 0`` -- without other 

116 restrictions, your regex may match an empty string. 

117 """ 

118 assert 0 <= min_count <= max_count 

119 return f"{expression}{{{min_count},{max_count}}}" 

120 

121 

122def describe_regex_permitted_char( 

123 expression: str, 

124 req: Optional["CamcopsRequest"] = None, 

125 invalid_prefix: bool = True, 

126) -> str: 

127 """ 

128 Describes the characters permitted in a regular expression character 

129 selector -- as long as it's simple! This won't handle arbitrary regexes. 

130 """ 

131 assert expression.startswith("[") and expression.endswith("]") 

132 content = expression[1:-1] # strip off surrounding [] 

133 permitted = [] # type: List[str] 

134 length = len(content) 

135 _ = req.gettext if req else dummy_gettext 

136 i = 0 

137 while i < length: 

138 if content[i] == "\\": 

139 # backslash preceding another character: regex code or escaped char 

140 assert i + 1 < length, f"Bad escaping in {expression!r}" 

141 escaped = content[i + 1] 

142 if escaped == "w": 

143 permitted.append(_("word character")) 

144 elif escaped == "W": 

145 permitted.append(_("non-word character")) 

146 elif escaped == "d": 

147 permitted.append(_("digit")) 

148 elif escaped == "D": 

149 permitted.append(_("non-digit")) 

150 elif escaped == "s": 

151 permitted.append(_("whitespace")) 

152 elif escaped == "S": 

153 permitted.append(_("non-whitespace")) 

154 else: 

155 permitted.append(repr(escaped)) 

156 i += 2 

157 elif i + 1 < length and content[i + 1] == "-": 

158 # range like A-Z 

159 assert i + 2 < length, f"Bad range specification in {expression!r}" 

160 permitted.append(content[i : i + 3]) 

161 i += 3 

162 else: 

163 char = content[i] 

164 if char == ".": 

165 permitted.append(_("any character")) 

166 else: 

167 permitted.append(repr(char)) 

168 i += 1 

169 description = ", ".join(permitted) 

170 prefix = _("Invalid string.") + " " if invalid_prefix else "" 

171 return prefix + _("Permitted characters:") + " " + description 

172 

173 

174def describe_regex_permitted_char_length( 

175 expression: str, 

176 max_length: int, 

177 min_length: int = 1, 

178 req: Optional["CamcopsRequest"] = None, 

179) -> str: 

180 """ 

181 Describes a valid string by permitted characters and length. 

182 """ 

183 _ = req.gettext if req else dummy_gettext 

184 return ( 

185 _("Invalid string.") 

186 + " " 

187 + _("Minimum length = {}. Maximum length = {}.").format( 

188 min_length, max_length 

189 ) 

190 + " " 

191 + describe_regex_permitted_char(expression, req, invalid_prefix=False) 

192 ) 

193 

194 

195# ============================================================================= 

196# Generic validation functions 

197# ============================================================================= 

198 

199 

200def validate_by_char_and_length( 

201 x: str, 

202 permitted_char_expression: str, 

203 max_length: int, 

204 min_length: int = 1, 

205 req: Optional["CamcopsRequest"] = None, 

206 flags: int = 0, 

207) -> None: 

208 """ 

209 Validate a string based on permitted characters and length. 

210 """ 

211 regex = re.compile( 

212 anchor( 

213 min_max_copies( 

214 expression=permitted_char_expression, 

215 min_count=min_length, 

216 max_count=max_length, 

217 ) 

218 ), 

219 flags=flags, 

220 ) 

221 if not regex.match(x): 

222 raise ValueError( 

223 describe_regex_permitted_char_length( 

224 permitted_char_expression, 

225 min_length=min_length, 

226 max_length=max_length, 

227 req=req, 

228 ) 

229 ) 

230 

231 

232# ============================================================================= 

233# Generic strings 

234# ============================================================================= 

235 

236ALPHA_CHAR = "[A-Za-z]" 

237 

238ALPHANUM_UNDERSCORE_CHAR = "[A-Za-z0-9_]" 

239ALPHANUM_UNDERSCORE_REGEX = re.compile( 

240 anchor(one_or_more(ALPHANUM_UNDERSCORE_CHAR)) 

241) 

242 

243ALPHANUM_UNDERSCORE_HYPHEN_CHAR = r"[A-Za-z0-9_\-]" 

244ALPHANUM_UNDERSCORE_HYPHEN_DOT_CHAR = r"[A-Za-z0-9_\-\.]" 

245ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR = r"[A-Za-z0-9,_\-\{\}]" 

246ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR = r"[A-Za-z0-9_\- ]" 

247 

248HUMAN_NAME_CHAR_UNICODE = r"[\w\-'’ \.]" 

249# \w is a word character; with the re.UNICODE flag, that includes accented 

250# characters. Then we allow hyphen, plain apostrophe, Unicode apostrophe, 

251# space, dot. 

252HUMAN_MANDATORY_CHAR_REGEX = re.compile(r"\w+", re.UNICODE) 

253# ... for "at least one word character somewhere" 

254 

255 

256# ----------------------------------------------------------------------------- 

257# Level 1. Computer-style simple strings with no spaces. 

258# ----------------------------------------------------------------------------- 

259 

260 

261def validate_alphanum(x: str, req: Optional["CamcopsRequest"] = None) -> None: 

262 """ 

263 Validates a generic alphanumeric string. 

264 """ 

265 if not x.isalnum(): 

266 _ = req.gettext if req else dummy_gettext 

267 raise ValueError(_("Invalid alphanumeric string")) 

268 

269 

270def validate_alphanum_underscore( 

271 x: str, req: Optional["CamcopsRequest"] = None 

272) -> None: 

273 """ 

274 Validates a string that can be alphanumeric or contain an underscore. 

275 """ 

276 if not ALPHANUM_UNDERSCORE_REGEX.match(x): 

277 raise ValueError( 

278 describe_regex_permitted_char(ALPHANUM_UNDERSCORE_CHAR, req) 

279 ) 

280 

281 

282# ----------------------------------------------------------------------------- 

283# Level 2. Human-style simple strings, allowing spaces but only minimal 

284# punctuation. 

285# ----------------------------------------------------------------------------- 

286 

287# ... see specific validators. 

288 

289# ----------------------------------------------------------------------------- 

290# Level 3. Human-style strings, such as people's names; may involve accented 

291# characters, spaces, some punctuation; may be used as Python or SQL search 

292# literals (with suitable precautions). 

293# ----------------------------------------------------------------------------- 

294 

295# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

296# 3(a). Human names 

297# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

298 

299 

300def validate_human_name( 

301 x: str, 

302 req: Optional["CamcopsRequest"] = None, 

303 min_length: int = 0, 

304 max_length: int = StringLengths.PATIENT_NAME_MAX_LEN, 

305) -> None: 

306 """ 

307 Accepts spaces, accents, etc. 

308 

309 This is hard. See 

310 https://stackoverflow.com/questions/888838/regular-expression-for-validating-names-and-surnames 

311 """ 

312 validate_by_char_and_length( 

313 x, 

314 permitted_char_expression=HUMAN_NAME_CHAR_UNICODE, 

315 min_length=min_length, 

316 max_length=max_length, 

317 req=req, 

318 ) 

319 if not HUMAN_MANDATORY_CHAR_REGEX.match(x): 

320 _ = req.gettext if req else dummy_gettext 

321 raise ValueError("Names require at least one 'word' character") 

322 

323 

324# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

325# 3(c). Search terms for simple near-alphanumeric SQL content, allowing 

326# wildcards. 

327# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

328 

329RESTRICTED_SQL_SEARCH_LITERAL_CHAR = r"[A-Za-z0-9\- _%]" 

330# ... hyphens are meaningful in regexes, so escape it 

331 

332 

333def validate_restricted_sql_search_literal( 

334 x: str, 

335 req: Optional["CamcopsRequest"] = None, 

336 min_length: int = 0, 

337 max_length: int = StringLengths.SQL_SEARCH_LITERAL_MAX_LENGTH, 

338) -> None: 

339 """ 

340 Validates a string that can be fairly broad, and can do SQL finding via 

341 wildcards such as ``%`` and ``_``, but should be syntactically safe in 

342 terms of HTML etc. It does not permit arbitrary strings; it's a subset of 

343 what might be possible in SQL. 

344 """ 

345 validate_by_char_and_length( 

346 x, 

347 permitted_char_expression=RESTRICTED_SQL_SEARCH_LITERAL_CHAR, 

348 min_length=min_length, 

349 max_length=max_length, 

350 req=req, 

351 ) 

352 

353 

354# ----------------------------------------------------------------------------- 

355# Level 4. Infinitely worrying. 

356# ----------------------------------------------------------------------------- 

357 

358 

359# noinspection PyUnusedLocal 

360def validate_anything(x: str, req: Optional["CamcopsRequest"] = None) -> None: 

361 """ 

362 Lets anything through. May be unwise. 

363 """ 

364 pass 

365 

366 

367# ============================================================================= 

368# Specific well-known computer formats 

369# ============================================================================= 

370 

371# ----------------------------------------------------------------------------- 

372# Base 64 encoding 

373# ----------------------------------------------------------------------------- 

374 

375# BASE64_REGEX = re.compile( 

376# "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$" 

377# # https://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data # noqa 

378# ) 

379 

380 

381# ----------------------------------------------------------------------------- 

382# Email addresses 

383# ----------------------------------------------------------------------------- 

384 

385EMAIL_RE_COMPILED = re.compile(EMAIL_RE) 

386 

387 

388def validate_email(email: str, req: Optional["CamcopsRequest"] = None) -> None: 

389 """ 

390 Validate an e-mail address. 

391 

392 Is this a valid e-mail address? 

393 

394 We use the same validation system as our web form (which uses Colander's 

395 method plus a length constraint). 

396 """ 

397 if len( 

398 email 

399 ) > StringLengths.EMAIL_ADDRESS_MAX_LEN or not EMAIL_RE_COMPILED.match( 

400 email 

401 ): 

402 _ = req.gettext if req else dummy_gettext 

403 raise ValueError(_("Invalid e-mail address")) 

404 

405 

406# ----------------------------------------------------------------------------- 

407# IP addresses 

408# ----------------------------------------------------------------------------- 

409 

410 

411def validate_ip_address( 

412 x: str, req: Optional["CamcopsRequest"] = None 

413) -> None: 

414 """ 

415 Validates an IP address. 

416 """ 

417 # https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python # noqa 

418 try: 

419 ipaddress.ip_address(x) 

420 except ValueError: 

421 _ = req.gettext if req else dummy_gettext 

422 raise ValueError(_("Invalid IP address")) 

423 

424 

425# ----------------------------------------------------------------------------- 

426# URLs 

427# ----------------------------------------------------------------------------- 

428 

429# Per https://mathiasbynens.be/demo/url-regex, using @stephenhay's regex but 

430# restricted further. 

431VALID_REDIRECT_URL_REGEX = re.compile(r"^https?://[^\s/$.?#].\S*$") 

432 

433 

434def validate_any_url(url: str, req: Optional["CamcopsRequest"] = None) -> None: 

435 """ 

436 Validates a URL. If valid, returns the URL; if not, returns ``default``. 

437 See https://stackoverflow.com/questions/22238090/validating-urls-in-python 

438 

439 However, avoid this one. For example, a URL such as 

440 xxhttps://127.0.0.1:8088/ can trigger Chrome to launch ``xdg-open``. 

441 """ 

442 log.warning("Avoid this validator! It allows open-this-file URLs!") 

443 result = urllib.parse.urlparse(url) 

444 if not result.scheme or not result.netloc: 

445 _ = req.gettext if req else dummy_gettext 

446 raise ValueError(_("Invalid URL")) 

447 

448 

449def validate_redirect_url( 

450 url: str, req: Optional["CamcopsRequest"] = None 

451) -> None: 

452 """ 

453 Validates a URL. If valid, returns the URL; if not, returns ``default``. 

454 See https://stackoverflow.com/questions/22238090/validating-urls-in-python 

455 """ 

456 if not VALID_REDIRECT_URL_REGEX.match(url): 

457 _ = req.gettext if req else dummy_gettext 

458 raise ValueError(_("Invalid redirection URL")) 

459 

460 

461# ============================================================================= 

462# CamCOPS system-oriented names 

463# ============================================================================= 

464 

465# ----------------------------------------------------------------------------- 

466# Group names 

467# ----------------------------------------------------------------------------- 

468 

469 

470def validate_group_name( 

471 name: str, req: Optional["CamcopsRequest"] = None 

472) -> None: 

473 """ 

474 Is the string a valid group name? 

475 

476 Group descriptions can be anything, but group names shouldn't have odd 

477 characters in -- this greatly facilitates config file handling etc. (for 

478 example: no spaces, no commas). 

479 """ 

480 validate_by_char_and_length( 

481 name, 

482 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_CHAR, 

483 min_length=StringLengths.GROUP_NAME_MIN_LEN, 

484 max_length=StringLengths.GROUP_NAME_MAX_LEN, 

485 req=req, 

486 ) 

487 

488 

489# ----------------------------------------------------------------------------- 

490# Usernames 

491# ----------------------------------------------------------------------------- 

492 

493 

494def validate_username( 

495 name: str, req: Optional["CamcopsRequest"] = None 

496) -> None: 

497 """ 

498 Is the string a valid user name? 

499 """ 

500 validate_by_char_and_length( 

501 name, 

502 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR, 

503 min_length=StringLengths.USERNAME_CAMCOPS_MIN_LEN, 

504 max_length=StringLengths.USERNAME_CAMCOPS_MAX_LEN, 

505 req=req, 

506 ) 

507 

508 

509# ----------------------------------------------------------------------------- 

510# Devices 

511# ----------------------------------------------------------------------------- 

512 

513 

514def validate_device_name( 

515 x: str, req: Optional["CamcopsRequest"] = None 

516) -> None: 

517 """ 

518 Validate a client device name -- the computer-oriented one, not the 

519 friendly one. 

520 """ 

521 validate_by_char_and_length( 

522 x, 

523 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR, 

524 min_length=1, 

525 max_length=StringLengths.DEVICE_NAME_MAX_LEN, 

526 req=req, 

527 ) 

528 

529 

530# ----------------------------------------------------------------------------- 

531# Export recipients 

532# ----------------------------------------------------------------------------- 

533 

534 

535def validate_export_recipient_name( 

536 x: str, req: Optional["CamcopsRequest"] = None 

537) -> None: 

538 validate_by_char_and_length( 

539 x, 

540 permitted_char_expression=ALPHANUM_UNDERSCORE_CHAR, 

541 min_length=StringLengths.EXPORT_RECIPIENT_NAME_MIN_LEN, 

542 max_length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN, 

543 req=req, 

544 ) 

545 

546 

547# ----------------------------------------------------------------------------- 

548# Passwords 

549# ----------------------------------------------------------------------------- 

550 

551 

552def validate_new_password( 

553 x: str, req: Optional["CamcopsRequest"] = None 

554) -> None: 

555 """ 

556 Validate a proposed new password. Enforce our password policy. 

557 """ 

558 _ = req.gettext if req else dummy_gettext 

559 if not x or not x.strip(): 

560 raise ValueError(_("Passwords can't be blank")) 

561 if len(x) < MINIMUM_PASSWORD_LENGTH: 

562 raise ValueError( 

563 _("Passwords can't be shorter than {} characters").format( 

564 MINIMUM_PASSWORD_LENGTH 

565 ) 

566 ) 

567 # No maximum length, because we store a hash. 

568 # No other character limitations. 

569 if password_prohibited(x): 

570 raise ValueError(_("That password is used too commonly; try again")) 

571 

572 

573# ----------------------------------------------------------------------------- 

574# HL7 

575# ----------------------------------------------------------------------------- 

576 

577 

578def validate_hl7_id_type( 

579 x: str, req: Optional["CamcopsRequest"] = None 

580) -> None: 

581 """ 

582 Validate HL7 Identifier Type. 

583 """ 

584 validate_by_char_and_length( 

585 x, 

586 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR, 

587 min_length=0, 

588 max_length=StringLengths.HL7_ID_TYPE_MAX_LEN, 

589 req=req, 

590 ) 

591 

592 

593def validate_hl7_aa(x: str, req: Optional["CamcopsRequest"] = None) -> None: 

594 """ 

595 Validate HL7 Assigning Authority. 

596 """ 

597 validate_by_char_and_length( 

598 x, 

599 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR, 

600 min_length=0, 

601 max_length=StringLengths.HL7_AA_MAX_LEN, 

602 req=req, 

603 ) 

604 

605 

606# ----------------------------------------------------------------------------- 

607# Task table names 

608# ----------------------------------------------------------------------------- 

609 

610TASK_TABLENAME_REGEX = re.compile( 

611 anchor(ALPHA_CHAR, anchor_start=True, anchor_end=False) 

612 + 

613 # ... don't start with a number 

614 # ... and although tables can and do start with underscores, task tables 

615 # don't. 

616 anchor( 

617 min_max_copies( 

618 ALPHANUM_UNDERSCORE_CHAR, 

619 min_count=0, 

620 max_count=StringLengths.TABLENAME_MAX_LEN - 1, 

621 ), 

622 anchor_start=False, 

623 anchor_end=True, 

624 ) 

625) 

626 

627 

628def validate_task_tablename( 

629 x: str, req: Optional["CamcopsRequest"] = None 

630) -> None: 

631 """ 

632 Validates a string that could be a task tablename. 

633 """ 

634 if not TASK_TABLENAME_REGEX.match(x): 

635 _ = req.gettext if req else dummy_gettext 

636 raise ValueError( 

637 _( 

638 "Task table names must start with a letter, and contain only " 

639 "contain alphanumeric characters (A-Z, a-z, 0-9) or " 

640 "underscores (_)." 

641 ) 

642 ) 

643 

644 

645# ----------------------------------------------------------------------------- 

646# Filenames 

647# ----------------------------------------------------------------------------- 

648 

649DOWNLOAD_FILENAME_REGEX = re.compile(r"\w[\w-]*.\w+") 

650# \w is equivalent to [A-Za-z0-9_]; see https://regexr.com/ 

651 

652 

653def validate_download_filename( 

654 x: str, req: Optional["CamcopsRequest"] = None 

655) -> None: 

656 """ 

657 Validate a file for user download. 

658 

659 - Permit e.g. ``CamCOPS_dump_2021-06-04T100622.zip``. 

660 - Prohibit silly things (like directory/drive delimiters). 

661 """ 

662 if not DOWNLOAD_FILENAME_REGEX.match(x): 

663 _ = req.gettext if req else dummy_gettext 

664 raise ValueError( 

665 _( 

666 "Download filenames must (1) begin with an " 

667 "alphanumeric/underscore character; (2) contain only " 

668 "alphanumeric characters, underscores, and hyphens; and " 

669 "(3) end with a full stop followed by an " 

670 "alphanumeric/underscore extension." 

671 ) 

672 )