Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_validators.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com). 

9 

10 This file is part of CamCOPS. 

11 

12 CamCOPS is free software: you can redistribute it and/or modify 

13 it under the terms of the GNU General Public License as published by 

14 the Free Software Foundation, either version 3 of the License, or 

15 (at your option) any later version. 

16 

17 CamCOPS is distributed in the hope that it will be useful, 

18 but WITHOUT ANY WARRANTY; without even the implied warranty of 

19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

20 GNU General Public License for more details. 

21 

22 You should have received a copy of the GNU General Public License 

23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

24 

25=============================================================================== 

26 

27**String validators and the like.** 

28 

29All functions starting ``validate_`` do nothing if the input is good, and raise 

30:exc:`ValueError` if it's bad, with a descriptive error (you can use ``str()`` 

31on the exception). 

32 

33All validators take a 

34:class:`camcops_server.cc_modules.cc_request.CamcopsRequest` parameter, for 

35internationalized error messages. 

36 

37WARNING: even the error messages shouldn't contain the error-producing strings. 

38""" 

39 

40import ipaddress 

41import logging 

42import re 

43from typing import Callable, List, Optional, TYPE_CHECKING 

44import urllib.parse 

45 

46from cardinal_pythonlib.logs import BraceStyleAdapter 

47from colander import EMAIL_RE 

48 

49from camcops_server.cc_modules.cc_constants import ( 

50 MINIMUM_PASSWORD_LENGTH, 

51 StringLengths, 

52) 

53from camcops_server.cc_modules.cc_password import password_prohibited 

54 

55if TYPE_CHECKING: 

56 from camcops_server.cc_modules.cc_request import CamcopsRequest 

57 

58log = BraceStyleAdapter(logging.getLogger(__name__)) 

59 

60 

61# ============================================================================= 

62# Typing constants 

63# ============================================================================= 

64 

65STRING_VALIDATOR_TYPE = Callable[[str, Optional["CamcopsRequest"]], None] 

66# ... string validators raise ValueError if the string is invalid 

67 

68 

69# ============================================================================= 

70# Raising exceptions: sometimes internationalized, sometimes not 

71# ============================================================================= 

72 

73def dummy_gettext(x: str) -> str: 

74 """ 

75 Returns the input directly. 

76 """ 

77 return x 

78 

79 

80# ============================================================================= 

81# Regex manipulation 

82# ============================================================================= 

83 

84def anchor(expression: str, 

85 anchor_start: bool = True, 

86 anchor_end: bool = True) -> str: 

87 """ 

88 Adds start/end anchors. 

89 """ 

90 start = "^" if anchor_start else "" 

91 end = "$" if anchor_end else "" 

92 return f"{start}{expression}{end}" 

93 

94 

95def zero_or_more(expression: str) -> str: 

96 """ 

97 Regex for zero or more copies. 

98 """ 

99 return f"{expression}*" 

100 

101 

102def one_or_more(expression: str) -> str: 

103 """ 

104 Regex for one or more copies. 

105 """ 

106 return f"{expression}+" 

107 

108 

109def min_max_copies( 

110 expression: str, 

111 max_count: int, 

112 min_count: int = 1) -> str: 

113 """ 

114 Given a regex expression, permit it a minimum/maximum number of times. For 

115 example, for a regex group ``x``, produce ``x{min,max}``. 

116 

117 Be very careful if you use ``min_count == 0`` -- without other 

118 restrictions, your regex may match an empty string. 

119 """ 

120 assert 0 <= min_count <= max_count 

121 return f"{expression}{{{min_count},{max_count}}}" 

122 

123 

124def describe_regex_permitted_char( 

125 expression: str, 

126 req: Optional["CamcopsRequest"] = None, 

127 invalid_prefix: bool = True) -> str: 

128 """ 

129 Describes the characters permitted in a regular expression character 

130 selector -- as long as it's simple! This won't handle arbitrary regexes. 

131 """ 

132 assert expression.startswith("[") and expression.endswith("]") 

133 content = expression[1:-1] # strip off surrounding [] 

134 permitted = [] # type: List[str] 

135 length = len(content) 

136 _ = req.gettext if req else dummy_gettext 

137 i = 0 

138 while i < length: 

139 if content[i] == "\\": 

140 # backslash preceding another character: regex code or escaped char 

141 assert i + 1 < length, f"Bad escaping in {expression!r}" 

142 escaped = content[i + 1] 

143 if escaped == "w": 

144 permitted.append(_("word character")) 

145 elif escaped == "W": 

146 permitted.append(_("non-word character")) 

147 elif escaped == "d": 

148 permitted.append(_("digit")) 

149 elif escaped == "D": 

150 permitted.append(_("non-digit")) 

151 elif escaped == "s": 

152 permitted.append(_("whitespace")) 

153 elif escaped == "S": 

154 permitted.append(_("non-whitespace")) 

155 else: 

156 permitted.append(repr(escaped)) 

157 i += 2 

158 elif i + 1 < length and content[i + 1] == "-": 

159 # range like A-Z 

160 assert i + 2 < length, f"Bad range specification in {expression!r}" 

161 permitted.append(content[i:i + 3]) 

162 i += 3 

163 else: 

164 char = content[i] 

165 if char == ".": 

166 permitted.append(_("any character")) 

167 else: 

168 permitted.append(repr(char)) 

169 i += 1 

170 description = ", ".join(permitted) 

171 prefix = _("Invalid string.") + " " if invalid_prefix else "" 

172 return prefix + _("Permitted characters:") + " " + description 

173 

174 

175def describe_regex_permitted_char_length( 

176 expression: str, 

177 max_length: int, 

178 min_length: int = 1, 

179 req: Optional["CamcopsRequest"] = None) -> str: 

180 """ 

181 Describes a valid string by permitted characters and length. 

182 """ 

183 _ = req.gettext if req else dummy_gettext 

184 return( 

185 _("Invalid string.") + 

186 " " + 

187 _("Minimum length = {}. Maximum length = {}.").format( 

188 min_length, max_length 

189 ) + 

190 " " + 

191 describe_regex_permitted_char(expression, req, invalid_prefix=False) 

192 ) 

193 

194 

195# ============================================================================= 

196# Generic validation functions 

197# ============================================================================= 

198 

199def validate_by_char_and_length( 

200 x: str, 

201 permitted_char_expression: str, 

202 max_length: int, 

203 min_length: int = 1, 

204 req: Optional["CamcopsRequest"] = None, 

205 flags: int = 0) -> None: 

206 """ 

207 Validate a string based on permitted characters and length. 

208 """ 

209 regex = re.compile( 

210 anchor(min_max_copies( 

211 expression=permitted_char_expression, 

212 min_count=min_length, 

213 max_count=max_length 

214 )), 

215 flags=flags 

216 ) 

217 if not regex.match(x): 

218 raise ValueError(describe_regex_permitted_char_length( 

219 permitted_char_expression, 

220 min_length=min_length, max_length=max_length, req=req 

221 )) 

222 

223 

224# ============================================================================= 

225# Generic strings 

226# ============================================================================= 

227 

228ALPHA_CHAR = "[A-Za-z]" 

229 

230ALPHANUM_UNDERSCORE_CHAR = "[A-Za-z0-9_]" 

231ALPHANUM_UNDERSCORE_REGEX = re.compile( 

232 anchor(one_or_more(ALPHANUM_UNDERSCORE_CHAR)) 

233) 

234 

235ALPHANUM_UNDERSCORE_HYPHEN_CHAR = r"[A-Za-z0-9_\-]" 

236ALPHANUM_UNDERSCORE_HYPHEN_DOT_CHAR = r"[A-Za-z0-9_\-\.]" 

237ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR = r"[A-Za-z0-9,_\-\{\}]" 

238ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR = r"[A-Za-z0-9_\- ]" 

239 

240HUMAN_NAME_CHAR_UNICODE = r"[\w\-'’ \.]" 

241# \w is a word character; with the re.UNICODE flag, that includes accented 

242# characters. Then we allow hyphen, plain apostrophe, Unicode apostrophe, 

243# space, dot. 

244HUMAN_MANDATORY_CHAR_REGEX = re.compile(r"\w+", re.UNICODE) 

245# ... for "at least one word character somewhere" 

246 

247 

248# ----------------------------------------------------------------------------- 

249# Level 1. Computer-style simple strings with no spaces. 

250# ----------------------------------------------------------------------------- 

251 

252def validate_alphanum(x: str, 

253 req: Optional["CamcopsRequest"] = None) -> None: 

254 """ 

255 Validates a generic alphanumeric string. 

256 """ 

257 if not x.isalnum(): 

258 _ = req.gettext if req else dummy_gettext 

259 raise ValueError(_("Invalid alphanumeric string")) 

260 

261 

262def validate_alphanum_underscore( 

263 x: str, 

264 req: Optional["CamcopsRequest"] = None) -> None: 

265 """ 

266 Validates a string that can be alphanumeric or contain an underscore. 

267 """ 

268 if not ALPHANUM_UNDERSCORE_REGEX.match(x): 

269 raise ValueError( 

270 describe_regex_permitted_char(ALPHANUM_UNDERSCORE_CHAR, req) 

271 ) 

272 

273 

274# ----------------------------------------------------------------------------- 

275# Level 2. Human-style simple strings, allowing spaces but only minimal 

276# punctuation. 

277# ----------------------------------------------------------------------------- 

278 

279# ... see specific validators. 

280 

281# ----------------------------------------------------------------------------- 

282# Level 3. Human-style strings, such as people's names; may involve accented 

283# characters, spaces, some punctuation; may be used as Python or SQL search 

284# literals (with suitable precautions). 

285# ----------------------------------------------------------------------------- 

286 

287# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

288# 3(a). Human names 

289# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

290 

291def validate_human_name( 

292 x: str, 

293 req: Optional["CamcopsRequest"] = None, 

294 min_length: int = 0, 

295 max_length: int = StringLengths.PATIENT_NAME_MAX_LEN) -> None: 

296 """ 

297 Accepts spaces, accents, etc. 

298 

299 This is hard. See 

300 https://stackoverflow.com/questions/888838/regular-expression-for-validating-names-and-surnames 

301 """ # noqa 

302 validate_by_char_and_length( 

303 x, 

304 permitted_char_expression=HUMAN_NAME_CHAR_UNICODE, 

305 min_length=min_length, 

306 max_length=max_length, 

307 req=req 

308 ) 

309 if not HUMAN_MANDATORY_CHAR_REGEX.match(x): 

310 _ = req.gettext if req else dummy_gettext 

311 raise ValueError("Names require at least one 'word' character") 

312 

313 

314# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

315# 3(c). Search terms for simple near-alphanumeric SQL content, allowing 

316# wildcards. 

317# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

318 

319RESTRICTED_SQL_SEARCH_LITERAL_CHAR = r"[A-Za-z0-9\- _%]" 

320# ... hyphens are meaningful in regexes, so escape it 

321 

322 

323def validate_restricted_sql_search_literal( 

324 x: str, 

325 req: Optional["CamcopsRequest"] = None, 

326 min_length: int = 0, 

327 max_length: int = StringLengths.SQL_SEARCH_LITERAL_MAX_LENGTH) -> None: 

328 """ 

329 Validates a string that can be fairly broad, and can do SQL finding via 

330 wildcards such as ``%`` and ``_``, but should be syntactically safe in 

331 terms of HTML etc. It does not permit arbitrary strings; it's a subset of 

332 what might be possible in SQL. 

333 """ 

334 validate_by_char_and_length( 

335 x, 

336 permitted_char_expression=RESTRICTED_SQL_SEARCH_LITERAL_CHAR, 

337 min_length=min_length, 

338 max_length=max_length, 

339 req=req 

340 ) 

341 

342 

343# ----------------------------------------------------------------------------- 

344# Level 4. Infinitely worrying. 

345# ----------------------------------------------------------------------------- 

346 

347# noinspection PyUnusedLocal 

348def validate_anything(x: str, 

349 req: Optional["CamcopsRequest"] = None) -> None: 

350 """ 

351 Lets anything through. May be unwise. 

352 """ 

353 pass 

354 

355 

356# ============================================================================= 

357# Specific well-known computer formats 

358# ============================================================================= 

359 

360# ----------------------------------------------------------------------------- 

361# Base 64 encoding 

362# ----------------------------------------------------------------------------- 

363 

364# BASE64_REGEX = re.compile( 

365# "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$" 

366# # https://stackoverflow.com/questions/475074/regex-to-parse-or-validate-base64-data # noqa 

367# ) 

368 

369 

370# ----------------------------------------------------------------------------- 

371# Email addresses 

372# ----------------------------------------------------------------------------- 

373 

374EMAIL_RE_COMPILED = re.compile(EMAIL_RE) 

375 

376 

377def validate_email(email: str, 

378 req: Optional["CamcopsRequest"] = None) -> None: 

379 """ 

380 Validate an e-mail address. 

381 

382 Is this a valid e-mail address? 

383 

384 We use the same validation system as our web form (which uses Colander's 

385 method plus a length constraint). 

386 """ 

387 if (len(email) > StringLengths.EMAIL_ADDRESS_MAX_LEN or 

388 not EMAIL_RE_COMPILED.match(email)): 

389 _ = req.gettext if req else dummy_gettext 

390 raise ValueError(_("Invalid e-mail address")) 

391 

392 

393# ----------------------------------------------------------------------------- 

394# IP addresses 

395# ----------------------------------------------------------------------------- 

396 

397def validate_ip_address( 

398 x: str, 

399 req: Optional["CamcopsRequest"] = None) -> None: 

400 """ 

401 Validates an IP address. 

402 """ 

403 # https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python # noqa 

404 try: 

405 ipaddress.ip_address(x) 

406 except ValueError: 

407 _ = req.gettext if req else dummy_gettext 

408 raise ValueError(_("Invalid IP address")) 

409 

410 

411# ----------------------------------------------------------------------------- 

412# URLs 

413# ----------------------------------------------------------------------------- 

414 

415# Per https://mathiasbynens.be/demo/url-regex, using @stephenhay's regex but 

416# restricted further. 

417VALID_REDIRECT_URL_REGEX = re.compile(r"^https?://[^\s/$.?#].[^\s]*$") 

418 

419 

420def validate_any_url(url: str, 

421 req: Optional["CamcopsRequest"] = None) -> None: 

422 """ 

423 Validates a URL. If valid, returns the URL; if not, returns ``default``. 

424 See https://stackoverflow.com/questions/22238090/validating-urls-in-python 

425 

426 However, avoid this one. For example, a URL such as 

427 xxhttps://127.0.0.1:8088/ can trigger Chrome to launch ``xdg-open``. 

428 """ 

429 log.warning("Avoid this validator! It allows open-this-file URLs!") 

430 result = urllib.parse.urlparse(url) 

431 if not result.scheme or not result.netloc: 

432 _ = req.gettext if req else dummy_gettext 

433 raise ValueError(_("Invalid URL")) 

434 

435 

436def validate_redirect_url(url: str, 

437 req: Optional["CamcopsRequest"] = None) -> None: 

438 """ 

439 Validates a URL. If valid, returns the URL; if not, returns ``default``. 

440 See https://stackoverflow.com/questions/22238090/validating-urls-in-python 

441 """ 

442 if not VALID_REDIRECT_URL_REGEX.match(url): 

443 _ = req.gettext if req else dummy_gettext 

444 raise ValueError(_("Invalid redirection URL")) 

445 

446 

447# ============================================================================= 

448# CamCOPS system-oriented names 

449# ============================================================================= 

450 

451# ----------------------------------------------------------------------------- 

452# Group names 

453# ----------------------------------------------------------------------------- 

454 

455def validate_group_name(name: str, 

456 req: Optional["CamcopsRequest"] = None) -> None: 

457 """ 

458 Is the string a valid group name? 

459 

460 Group descriptions can be anything, but group names shouldn't have odd 

461 characters in -- this greatly facilitates config file handling etc. (for 

462 example: no spaces, no commas). 

463 """ 

464 validate_by_char_and_length( 

465 name, 

466 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_CHAR, 

467 min_length=StringLengths.GROUP_NAME_MIN_LEN, 

468 max_length=StringLengths.GROUP_NAME_MAX_LEN, 

469 req=req 

470 ) 

471 

472 

473# ----------------------------------------------------------------------------- 

474# Usernames 

475# ----------------------------------------------------------------------------- 

476 

477def validate_username(name: str, 

478 req: Optional["CamcopsRequest"] = None) -> None: 

479 """ 

480 Is the string a valid user name? 

481 """ 

482 validate_by_char_and_length( 

483 name, 

484 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR, 

485 min_length=StringLengths.USERNAME_CAMCOPS_MIN_LEN, 

486 max_length=StringLengths.USERNAME_CAMCOPS_MAX_LEN, 

487 req=req 

488 ) 

489 

490 

491# ----------------------------------------------------------------------------- 

492# Devices 

493# ----------------------------------------------------------------------------- 

494 

495def validate_device_name( 

496 x: str, req: Optional["CamcopsRequest"] = None) -> None: 

497 """ 

498 Validate a client device name -- the computer-oriented one, not the 

499 friendly one. 

500 """ 

501 validate_by_char_and_length( 

502 x, 

503 permitted_char_expression=ALPHANUM_COMMA_UNDERSCORE_HYPHEN_BRACE_CHAR, 

504 min_length=1, 

505 max_length=StringLengths.DEVICE_NAME_MAX_LEN, 

506 req=req 

507 ) 

508 

509 

510# ----------------------------------------------------------------------------- 

511# Export recipients 

512# ----------------------------------------------------------------------------- 

513 

514def validate_export_recipient_name( 

515 x: str, req: Optional["CamcopsRequest"] = None) -> None: 

516 validate_by_char_and_length( 

517 x, 

518 permitted_char_expression=ALPHANUM_UNDERSCORE_CHAR, 

519 min_length=StringLengths.EXPORT_RECIPIENT_NAME_MIN_LEN, 

520 max_length=StringLengths.EXPORT_RECIPIENT_NAME_MAX_LEN, 

521 req=req 

522 ) 

523 

524 

525# ----------------------------------------------------------------------------- 

526# Passwords 

527# ----------------------------------------------------------------------------- 

528 

529def validate_new_password( 

530 x: str, req: Optional["CamcopsRequest"] = None) -> None: 

531 """ 

532 Validate a proposed new password. Enforce our password policy. 

533 """ 

534 _ = req.gettext if req else dummy_gettext 

535 if not x or not x.strip(): 

536 raise ValueError(_("Passwords can't be blank")) 

537 if len(x) < MINIMUM_PASSWORD_LENGTH: 

538 raise ValueError( 

539 _("Passwords can't be shorter than {} characters").format( 

540 MINIMUM_PASSWORD_LENGTH 

541 ) 

542 ) 

543 # No maximum length, because we store a hash. 

544 # No other character limitations. 

545 if password_prohibited(x): 

546 raise ValueError(_("That password is used too commonly; try again")) 

547 

548 

549# ----------------------------------------------------------------------------- 

550# HL7 

551# ----------------------------------------------------------------------------- 

552 

553def validate_hl7_id_type( 

554 x: str, req: Optional["CamcopsRequest"] = None) -> None: 

555 """ 

556 Validate HL7 Identifier Type. 

557 """ 

558 validate_by_char_and_length( 

559 x, 

560 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR, 

561 min_length=0, 

562 max_length=StringLengths.HL7_ID_TYPE_MAX_LEN, 

563 req=req 

564 ) 

565 

566 

567def validate_hl7_aa( 

568 x: str, req: Optional["CamcopsRequest"] = None) -> None: 

569 """ 

570 Validate HL7 Assigning Authority. 

571 """ 

572 validate_by_char_and_length( 

573 x, 

574 permitted_char_expression=ALPHANUM_UNDERSCORE_HYPHEN_SPACE_CHAR, 

575 min_length=0, 

576 max_length=StringLengths.HL7_AA_MAX_LEN, 

577 req=req 

578 ) 

579 

580 

581# ----------------------------------------------------------------------------- 

582# Task table names 

583# ----------------------------------------------------------------------------- 

584 

585TASK_TABLENAME_REGEX = re.compile( 

586 anchor(ALPHA_CHAR, anchor_start=True, anchor_end=False) + 

587 # ... don't start with a number 

588 # ... and although tables can and do start with underscores, task tables 

589 # don't. 

590 anchor( 

591 min_max_copies(ALPHANUM_UNDERSCORE_CHAR, 

592 min_count=0, 

593 max_count=StringLengths.TABLENAME_MAX_LEN - 1), 

594 anchor_start=False, 

595 anchor_end=True 

596 ) 

597) 

598 

599 

600def validate_task_tablename( 

601 x: str, 

602 req: Optional["CamcopsRequest"] = None) -> None: 

603 """ 

604 Validates a string that could be a task tablename. 

605 """ 

606 if not TASK_TABLENAME_REGEX.match(x): 

607 _ = req.gettext if req else dummy_gettext 

608 raise ValueError(_( 

609 "Task table names must start with a letter, and contain only " 

610 "contain alphanumeric characters (A-Z, a-z, 0-9) or " 

611 "underscores (_)." 

612 ))