Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/cardinal_pythonlib/rnc_web.py : 28%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
2# cardinal_pythonlib/rnc_web.py
4"""
5===============================================================================
7 Original code copyright (C) 2009-2021 Rudolf Cardinal (rudolf@pobox.com).
9 This file is part of cardinal_pythonlib.
11 Licensed under the Apache License, Version 2.0 (the "License");
12 you may not use this file except in compliance with the License.
13 You may obtain a copy of the License at
15 https://www.apache.org/licenses/LICENSE-2.0
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
23===============================================================================
25**Support for web scripts.**
27"""
30import base64
31import binascii
32import cgi
33import configparser
34import dateutil.parser
35import dateutil.tz
36import datetime
37import html
38import os
39import re
40import sys
41from typing import (Any, Callable, Dict, Iterable, List, Optional,
42 Tuple, Union)
44from cardinal_pythonlib.wsgi.constants import (
45 TYPE_WSGI_APP_RESULT,
46 TYPE_WSGI_START_RESPONSE,
47 TYPE_WSGI_RESPONSE_HEADERS,
48)
49from cardinal_pythonlib.logs import get_brace_style_log_with_null_handler
51log = get_brace_style_log_with_null_handler(__name__)
53WSGI_TUPLE_TYPE = Tuple[str, TYPE_WSGI_RESPONSE_HEADERS, bytes]
54# ... contenttype, extraheaders, output
56# =============================================================================
57# Constants
58# =============================================================================
60_NEWLINE_REGEX = re.compile("\n", re.MULTILINE)
61BASE64_PNG_URL_PREFIX = "data:image/png;base64,"
62PNG_SIGNATURE_HEXSTRING = "89504E470D0A1A0A"
63# ... http://en.wikipedia.org/wiki/Portable_Network_Graphics#Technical_details
64PNG_SIGNATURE_HEX = binascii.unhexlify(PNG_SIGNATURE_HEXSTRING)
65# ... bytes in Python 3; str in Python 2
68# =============================================================================
69# Misc
70# =============================================================================
72def print_utf8(s: str) -> None:
73 """
74 Writes a Unicode string to ``sys.stdout`` in UTF-8 encoding.
75 """
76 sys.stdout.buffer.write(s.encode('utf-8'))
79def get_int_or_none(s: str) -> Optional[int]:
80 """
81 Returns the integer value of a string, or ``None`` if it's not convertible
82 to an ``int``.
83 """
84 try:
85 return int(s)
86 # int(x) will return something of type long if it's a big number,
87 # but happily
88 except (TypeError, ValueError):
89 return None
92def get_float_or_none(s: str) -> Optional[float]:
93 """
94 Returns the float value of a string, or ``None`` if it's not convertible
95 to a ``float``.
96 """
97 try:
98 return float(s)
99 except (TypeError, ValueError):
100 return None
103def is_1(s: str) -> bool:
104 """
105 ``True`` if the input is the string literal ``"1"``, otherwise ``False``.
106 """
107 return True if s == "1" else False
110def number_to_dp(number: Optional[float],
111 dp: int,
112 default: Optional[str] = "",
113 en_dash_for_minus: bool = True) -> str:
114 """
115 Format number to ``dp`` decimal places, optionally using a UTF-8 en dash
116 for minus signs.
117 """
118 if number is None:
119 return default
120 if number == float("inf"):
121 return u"∞"
122 if number == float("-inf"):
123 s = u"-∞"
124 else:
125 s = f"{number:.{dp}f}"
126 if en_dash_for_minus:
127 s = s.replace("-", u"–") # hyphen becomes en dash for minus sign
128 return s
131# =============================================================================
132# CGI
133# =============================================================================
135def debug_form_contents(form: cgi.FieldStorage,
136 to_stderr: bool = True,
137 to_logger: bool = False) -> None:
138 """
139 Writes the keys and values of a CGI form to ``stderr``.
140 """
141 for k in form.keys():
142 text = f"{k} = {form.getvalue(k)}"
143 if to_stderr:
144 sys.stderr.write(text)
145 if to_logger:
146 log.info(text)
147 # But note also: cgi.print_form(form)
150def cgi_method_is_post(environ: Dict[str, str]) -> bool:
151 """
152 Determines if the CGI method was ``POST``, given the CGI environment.
153 """
154 method = environ.get("REQUEST_METHOD", None)
155 if not method:
156 return False
157 return method.upper() == "POST"
160def get_cgi_parameter_str(form: cgi.FieldStorage,
161 key: str,
162 default: str = None) -> str:
163 """
164 Extracts a string parameter from a CGI form.
165 Note: ``key`` is CASE-SENSITIVE.
166 """
167 paramlist = form.getlist(key)
168 if len(paramlist) == 0:
169 return default
170 return paramlist[0]
173def get_cgi_parameter_str_or_none(form: cgi.FieldStorage,
174 key: str) -> Optional[str]:
175 """
176 Extracts a string parameter from a CGI form, or ``None`` if the key doesn't
177 exist or the string is zero-length.
178 """
179 s = get_cgi_parameter_str(form, key)
180 if s is None or len(s) == 0:
181 return None
182 return s
185def get_cgi_parameter_list(form: cgi.FieldStorage, key: str) -> List[str]:
186 """
187 Extracts a list of values, all with the same key, from a CGI form.
188 """
189 return form.getlist(key)
192def get_cgi_parameter_bool(form: cgi.FieldStorage, key: str) -> bool:
193 """
194 Extracts a boolean parameter from a CGI form, on the assumption that
195 ``"1"`` is ``True`` and everything else is ``False``.
196 """
197 return is_1(get_cgi_parameter_str(form, key))
200def get_cgi_parameter_bool_or_default(form: cgi.FieldStorage,
201 key: str,
202 default: bool = None) -> Optional[bool]:
203 """
204 Extracts a boolean parameter from a CGI form (``"1"`` = ``True``,
205 other string = ``False``, absent/zero-length string = default value).
206 """
207 s = get_cgi_parameter_str(form, key)
208 if s is None or len(s) == 0:
209 return default
210 return is_1(s)
213def get_cgi_parameter_bool_or_none(form: cgi.FieldStorage,
214 key: str) -> Optional[bool]:
215 """
216 Extracts a boolean parameter from a CGI form (``"1"`` = ``True``,
217 other string = False, absent/zero-length string = ``None``).
218 """
219 return get_cgi_parameter_bool_or_default(form, key, default=None)
222def get_cgi_parameter_int(form: cgi.FieldStorage, key: str) -> Optional[int]:
223 """
224 Extracts an integer parameter from a CGI form, or ``None`` if the key is
225 absent or the string value is not convertible to ``int``.
226 """
227 return get_int_or_none(get_cgi_parameter_str(form, key))
230def get_cgi_parameter_float(form: cgi.FieldStorage,
231 key: str) -> Optional[float]:
232 """
233 Extracts a float parameter from a CGI form, or None if the key is
234 absent or the string value is not convertible to ``float``.
235 """
236 return get_float_or_none(get_cgi_parameter_str(form, key))
239def get_cgi_parameter_datetime(form: cgi.FieldStorage,
240 key: str) -> Optional[datetime.datetime]:
241 """
242 Extracts a date/time parameter from a CGI form. Applies the LOCAL
243 timezone if none specified.
244 """
245 try:
246 s = get_cgi_parameter_str(form, key)
247 if not s:
248 # if you dateutil.parser.parse() an empty string,
249 # you get today's date
250 return None
251 d = dateutil.parser.parse(s)
252 if d.tzinfo is None: # as it will be
253 d = d.replace(tzinfo=dateutil.tz.tzlocal())
254 return d
255 except ValueError:
256 return None
259def get_cgi_parameter_file(form: cgi.FieldStorage,
260 key: str) -> Optional[bytes]:
261 """
262 Extracts a file's contents from a "file" input in a CGI form, or None
263 if no such file was uploaded.
264 """
265 (filename, filecontents) = get_cgi_parameter_filename_and_file(form, key)
266 return filecontents
269def get_cgi_parameter_filename_and_file(form: cgi.FieldStorage, key: str) \
270 -> Tuple[Optional[str], Optional[bytes]]:
271 """
272 Extracts a file's name and contents from a "file" input in a CGI form.
273 Returns ``(name, contents)``, or ``(None, None)`` if no such file was
274 uploaded.
275 """
276 if not (key in form):
277 log.warning('get_cgi_parameter_file: form has no key {}', key)
278 return None, None
279 fileitem = form[key] # a nested FieldStorage instance; see
280 # http://docs.python.org/2/library/cgi.html#using-the-cgi-module
281 if isinstance(fileitem, cgi.MiniFieldStorage):
282 log.warning('get_cgi_parameter_file: MiniFieldStorage found - did you '
283 'forget to set enctype="multipart/form-data" in '
284 'your form?')
285 return None, None
286 if not isinstance(fileitem, cgi.FieldStorage):
287 log.warning('get_cgi_parameter_file: no FieldStorage instance with '
288 'key {} found', key)
289 return None, None
290 if fileitem.filename and fileitem.file: # can check "file" or "filename"
291 return fileitem.filename, fileitem.file.read()
292 # as per
293 # http://upsilon.cc/~zack/teaching/0607/techweb/02-python-cgi.pdf
294 # Alternative:
295 # return get_cgi_parameter_str(form, key) # contents of the file
296 # Otherwise, information about problems:
297 if not fileitem.file:
298 log.warning('get_cgi_parameter_file: fileitem has no file')
299 elif not fileitem.filename:
300 log.warning('get_cgi_parameter_file: fileitem has no filename')
301 else:
302 log.warning('get_cgi_parameter_file: unknown failure reason')
303 return None, None
305 # "If a field represents an uploaded file, accessing the value
306 # via the value attribute or the getvalue() method reads the
307 # entire file in memory as a string. This may not be what you
308 # want. You can test for an uploaded file by testing either
309 # the filename attribute or the file attribute. You can then
310 # read the data at leisure from the file attribute:"
313def cgi_parameter_exists(form: cgi.FieldStorage, key: str) -> bool:
314 """
315 Does a CGI form contain the key?
316 """
317 s = get_cgi_parameter_str(form, key)
318 return s is not None
321def checkbox_checked(b: Any) -> str:
322 """
323 Returns ``' checked="checked"'`` if ``b`` is true; otherwise ``''``.
325 Use this code to fill the ``{}`` in e.g.:
327 .. code-block:: none
329 <label>
330 <input type="checkbox" name="myfield" value="1"{}>
331 This will be pre-ticked if you insert " checked" where the braces
332 are. The newer, more stringent requirement is ' checked="checked"'.
333 </label>
334 """
335 return ' checked="checked"' if b else ''
338def option_selected(variable: Any, testvalue: Any) -> str:
339 """
340 Returns ``' selected="selected"'`` if ``variable == testvalue`` else
341 ``''``; for use with HTML select options.
342 """
343 return ' selected="selected"' if variable == testvalue else ''
346# =============================================================================
347# Environment
348# =============================================================================
350def getenv_escaped(key: str, default: str = None) -> Optional[str]:
351 """
352 Returns an environment variable's value, CGI-escaped, or ``None``.
353 """
354 value = os.getenv(key, default)
355 return html.escape(value) if value is not None else None
358def getconfigvar_escaped(config: configparser.ConfigParser,
359 section: str,
360 key: str) -> Optional[str]:
361 """
362 Returns a CGI-escaped version of the value read from an INI file using
363 :class:`ConfigParser`, or ``None``.
364 """
365 value = config.get(section, key)
366 # noinspection PyDeprecation
367 return html.escape(value) if value is not None else None
370def get_cgi_fieldstorage_from_wsgi_env(
371 env: Dict[str, str],
372 include_query_string: bool = True) -> cgi.FieldStorage:
373 """
374 Returns a :class:`cgi.FieldStorage` object from the WSGI environment.
375 """
376 # https://stackoverflow.com/questions/530526/accessing-post-data-from-wsgi
377 post_env = env.copy()
378 if not include_query_string:
379 post_env['QUERY_STRING'] = ''
380 form = cgi.FieldStorage(
381 fp=env['wsgi.input'],
382 environ=post_env,
383 keep_blank_values=True
384 )
385 return form
388# =============================================================================
389# Blobs, pictures...
390# =============================================================================
392def is_valid_png(blob: Optional[bytes]) -> bool:
393 """
394 Does a blob have a valid PNG signature?
395 """
396 if not blob:
397 return False
398 return blob[:8] == PNG_SIGNATURE_HEX
401def get_png_data_url(blob: Optional[bytes]) -> str:
402 """
403 Converts a PNG blob into a local URL encapsulating the PNG.
404 """
405 return BASE64_PNG_URL_PREFIX + base64.b64encode(blob).decode('ascii')
408def get_png_img_html(blob: Union[bytes, memoryview],
409 extra_html_class: str = None) -> str:
410 """
411 Converts a PNG blob to an HTML IMG tag with embedded data.
412 """
413 return '<img {}src="{}" />'.format(
414 f'class="{extra_html_class}" ' if extra_html_class else "",
415 get_png_data_url(blob)
416 )
419# =============================================================================
420# HTTP results
421# =============================================================================
423# Also, filenames:
424# https://stackoverflow.com/questions/151079
425# http://greenbytes.de/tech/tc2231/#inlwithasciifilenamepdf
427def pdf_result(pdf_binary: bytes,
428 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None,
429 filename: str = None) -> WSGI_TUPLE_TYPE:
430 """
431 Returns ``(contenttype, extraheaders, data)`` tuple for a PDF.
432 """
433 extraheaders = extraheaders or []
434 if filename:
435 extraheaders.append(
436 ('content-disposition', f'inline; filename="{filename}"')
437 )
438 contenttype = 'application/pdf'
439 if filename:
440 contenttype += f'; filename="{filename}"'
441 # log.debug("type(pdf_binary): {}", type(pdf_binary))
442 return contenttype, extraheaders, pdf_binary
445def zip_result(zip_binary: bytes,
446 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None,
447 filename: str = None) -> WSGI_TUPLE_TYPE:
448 """
449 Returns ``(contenttype, extraheaders, data)`` tuple for a ZIP.
450 """
451 extraheaders = extraheaders or []
452 if filename:
453 extraheaders.append(
454 ('content-disposition', f'inline; filename="{filename}"')
455 )
456 contenttype = 'application/zip'
457 if filename:
458 contenttype += f'; filename="{filename}"'
459 return contenttype, extraheaders, zip_binary
462# noinspection PyShadowingNames
463def html_result(html: str,
464 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None) \
465 -> WSGI_TUPLE_TYPE:
466 """
467 Returns ``(contenttype, extraheaders, data)`` tuple for UTF-8 HTML.
468 """
469 extraheaders = extraheaders or []
470 return 'text/html; charset=utf-8', extraheaders, html.encode("utf-8")
473def xml_result(xml: str,
474 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None) \
475 -> WSGI_TUPLE_TYPE:
476 """
477 Returns ``(contenttype, extraheaders, data)`` tuple for UTF-8 XML.
478 """
479 extraheaders = extraheaders or []
480 return 'text/xml; charset=utf-8', extraheaders, xml.encode("utf-8")
483def text_result(text: str,
484 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None,
485 filename: str = None) -> WSGI_TUPLE_TYPE:
486 """
487 Returns ``(contenttype, extraheaders, data)`` tuple for UTF-8 text.
488 """
489 extraheaders = extraheaders or []
490 if filename:
491 extraheaders.append(
492 ('content-disposition', f'inline; filename="{filename}"')
493 )
494 contenttype = 'text/plain; charset=utf-8'
495 if filename:
496 contenttype += f'; filename="{filename}"'
497 return contenttype, extraheaders, text.encode("utf-8")
500def tsv_result(text: str,
501 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None,
502 filename: str = None) -> WSGI_TUPLE_TYPE:
503 """
504 Returns ``(contenttype, extraheaders, data)`` tuple for UTF-8 TSV.
505 """
506 extraheaders = extraheaders or []
507 if filename:
508 extraheaders.append(
509 ('content-disposition', f'inline; filename="{filename}"')
510 )
511 contenttype = 'text/tab-separated-values; charset=utf-8'
512 if filename:
513 contenttype += f'; filename="{filename}"'
514 return contenttype, extraheaders, text.encode("utf-8")
517# =============================================================================
518# CGI
519# =============================================================================
521def print_result_for_plain_cgi_script_from_tuple(
522 contenttype_headers_content: WSGI_TUPLE_TYPE,
523 status: str = '200 OK') -> None:
524 """
525 Writes HTTP result to stdout.
527 Args:
528 contenttype_headers_content:
529 the tuple ``(contenttype, extraheaders, data)``
530 status:
531 HTTP status message (default ``"200 OK``)
532 """
533 contenttype, headers, content = contenttype_headers_content
534 print_result_for_plain_cgi_script(contenttype, headers, content, status)
537def print_result_for_plain_cgi_script(contenttype: str,
538 headers: TYPE_WSGI_RESPONSE_HEADERS,
539 content: bytes,
540 status: str = '200 OK') -> None:
541 """
542 Writes HTTP request result to stdout.
543 """
544 headers = [
545 ("Status", status),
546 ("Content-Type", contenttype),
547 ("Content-Length", str(len(content))),
548 ] + headers
549 sys.stdout.write("\n".join([h[0] + ": " + h[1] for h in headers]) + "\n\n")
550 sys.stdout.buffer.write(content)
553# =============================================================================
554# WSGI
555# =============================================================================
557def wsgi_simple_responder(
558 result: Union[str, bytes],
559 handler: Callable[[Union[str, bytes]], WSGI_TUPLE_TYPE],
560 start_response: TYPE_WSGI_START_RESPONSE,
561 status: str = '200 OK',
562 extraheaders: TYPE_WSGI_RESPONSE_HEADERS = None) \
563 -> TYPE_WSGI_APP_RESULT:
564 """
565 Simple WSGI app.
567 Args:
568 result: the data to be processed by ``handler``
569 handler: a function returning a ``(contenttype, extraheaders, data)``
570 tuple, e.g. ``text_result``, ``html_result``
571 start_response: standard WSGI ``start_response`` function
572 status: status code (default ``"200 OK"``)
573 extraheaders: optional extra HTTP headers
575 Returns:
576 WSGI application result
578 """
579 extraheaders = extraheaders or []
580 (contenttype, extraheaders2, output) = handler(result)
581 response_headers = [('Content-Type', contenttype),
582 ('Content-Length', str(len(output)))]
583 response_headers.extend(extraheaders)
584 if extraheaders2 is not None:
585 response_headers.extend(extraheaders2)
586 # noinspection PyArgumentList
587 start_response(status, response_headers)
588 return [output]
591# =============================================================================
592# HTML
593# =============================================================================
595def webify(v: Any, preserve_newlines: bool = True) -> str:
596 """
597 Converts a value into an HTML-safe ``str`` (formerly, in Python 2:
598 ``unicode``).
600 Converts value ``v`` to a string; escapes it to be safe in HTML
601 format (escaping ampersands, replacing newlines with ``<br>``, etc.).
602 Returns ``""`` for blank input.
603 """
604 nl = "<br>" if preserve_newlines else " "
605 if v is None:
606 return ""
607 if not isinstance(v, str):
608 v = str(v)
609 return html.escape(v).replace("\n", nl).replace("\\n", nl)
612def websafe(value: str) -> str:
613 """
614 Makes a string safe for inclusion in ASCII-encoded HTML.
615 """
616 return html.escape(value).encode('ascii', 'xmlcharrefreplace')
617 # https://stackoverflow.com/questions/1061697
620def replace_nl_with_html_br(string: str) -> str:
621 """
622 Replaces newlines with ``<br>``.
623 """
624 return _NEWLINE_REGEX.sub("<br>", string)
627def bold_if_not_blank(x: Optional[str]) -> str:
628 """
629 HTML-emboldens content, unless blank.
630 """
631 if x is None:
632 return f"{x}"
633 return f"<b>{x}</b>"
636def make_urls_hyperlinks(text: str) -> str:
637 """
638 Adds hyperlinks to text that appears to contain URLs.
640 See
642 - https://stackoverflow.com/questions/1071191
644 - ... except that double-replaces everything; e.g. try with
645 ``text = "me@somewhere.com me@somewhere.com"``
647 - http://stackp.online.fr/?p=19
648 """
649 find_url = r'''
650 (?x)( # verbose identify URLs within text
651 (http|ftp|gopher) # make sure we find a resource type
652 :// # ...needs to be followed by colon-slash-slash
653 (\w+[:.]?){2,} # at least two domain groups, e.g. (gnosis.)(cx)
654 (/?| # could be just the domain name (maybe w/ slash)
655 [^ \n\r"]+ # or stuff then space, newline, tab, quote
656 [\w/]) # resource name ends in alphanumeric or slash
657 (?=[\s\.,>)'"\]]) # assert: followed by white or clause ending
658 ) # end of match group
659 '''
660 replace_url = r'<a href="\1">\1</a>'
661 find_email = re.compile(r'([.\w\-]+@(\w[\w\-]+\.)+[\w\-]+)')
662 # '.' doesn't need escaping inside square brackets
663 # https://stackoverflow.com/questions/10397968/escape-dot-in-a-regex-range
664 replace_email = r'<a href="mailto:\1">\1</a>'
665 text = re.sub(find_url, replace_url, text)
666 text = re.sub(find_email, replace_email, text)
667 return text
670# noinspection PyShadowingNames
671def html_table_from_query(rows: Iterable[Iterable[Optional[str]]],
672 descriptions: Iterable[Optional[str]]) -> str:
673 """
674 Converts rows from an SQL query result to an HTML table.
675 Suitable for processing output from the defunct function
676 ``rnc_db.fetchall_with_fieldnames(sql)``.
677 """
678 html = u"<table>\n"
680 # Header row
681 html += u"<tr>"
682 for x in descriptions:
683 if x is None:
684 x = u""
685 html += f"<th>{webify(x)}</th>"
686 html += u"</tr>\n"
688 # Data rows
689 for row in rows:
690 html += u"<tr>"
691 for x in row:
692 if x is None:
693 x = u""
694 html += f"<td>{webify(x)}</td>"
695 html += u"<tr>\n"
697 html += u"</table>\n"
698 return html