Coverage for cc_modules/cc_xml.py : 39%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1#!/usr/bin/env python
3"""
4camcops_server/cc_modules/cc_xml.py
6===============================================================================
8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com).
10 This file is part of CamCOPS.
12 CamCOPS is free software: you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation, either version 3 of the License, or
15 (at your option) any later version.
17 CamCOPS is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
25===============================================================================
27**XML helper functions/classes.**
29"""
31import base64
32import datetime
33import logging
34from typing import Any, List, Optional, TYPE_CHECKING, Union
35import xml.sax.saxutils
37from cardinal_pythonlib.logs import BraceStyleAdapter
38from cardinal_pythonlib.reprfunc import auto_repr
39from cardinal_pythonlib.sqlalchemy.orm_inspect import gen_columns
40import pendulum # avoid name confusion with Date
41from pendulum import DateTime as Pendulum
42from semantic_version.base import Version
43from sqlalchemy.sql.schema import Column
44from sqlalchemy.sql.type_api import TypeEngine
46from camcops_server.cc_modules.cc_simpleobjects import XmlSimpleValue
47from camcops_server.cc_modules.cc_sqla_coltypes import gen_camcops_blob_columns
49if TYPE_CHECKING:
50 from camcops_server.cc_modules.cc_request import CamcopsRequest # noqa: E501,F401
51 from camcops_server.cc_modules.cc_summaryelement import SummaryElement # noqa: E501,F401
53log = BraceStyleAdapter(logging.getLogger(__name__))
56# =============================================================================
57# Constants
58# =============================================================================
60XML_NAME_SNOMED_CODES = "snomed_ct_codes"
62XML_NAMESPACES = [
63 ' xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance"'
64 # ' xmlns:dt="https://www.w3.org/2001/XMLSchema-datatypes"'
65]
66XML_IGNORE_NAMESPACES = [
67 'xmlns:mc="https://schemas.openxmlformats.org/markup-compatibility/2006"',
68 'xmlns:ignore="https://camcops.readthedocs.org/ignore"',
69 # ... actual URL unimportant
70 'mc:Ignorable="ignore"'
71]
72# http://www.w3.org/TR/xmlschema-1/
73# http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html
76class XmlDataTypes(object):
77 """
78 Constants representing standard XML data types.
79 """
80 BASE64BINARY = "base64Binary"
81 BOOLEAN = "boolean"
82 DATE = "date"
83 DATETIME = "dateTime"
84 DOUBLE = "double"
85 INTEGER = "integer"
86 STRING = "string"
87 TIME = "time"
90# =============================================================================
91# XML element
92# =============================================================================
94class XmlElement(object):
95 """
96 Represents XML data in a tree.
97 """
98 def __init__(self, name: str, value: Any = None, datatype: str = None,
99 comment: str = None, literal: str = None) -> None:
100 """
101 Args:
102 name: name of this XML element
103 value: value of this element: may be a raw value or a list of
104 :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects
105 (default: ``None``)
106 datatype: data type of this element (default: ``None``)
107 comment: description of this element (default: ``None``)
108 literal: literal XML; overrides all other options
109 """
110 # Special: boolean requires lower case "true"/"false" (or 0/1)
111 if datatype == XmlDataTypes.BOOLEAN and value is not None:
112 value = str(value).lower()
113 self.name = name
114 self.value = value
115 self.datatype = datatype
116 self.comment = comment
117 self.literal = literal
119 def __repr__(self) -> str:
120 """
121 Shows just this element.
122 """
123 return auto_repr(self, with_addr=True)
126class XmlLiteral(XmlElement):
127 """
128 Represents literal XML.
129 """
130 def __init__(self, literal: str) -> None:
131 super().__init__(name="", literal=literal)
134# =============================================================================
135# Some literals
136# =============================================================================
138XML_COMMENT_ANCILLARY = XmlLiteral("<!-- Ancillary records -->")
139XML_COMMENT_ANONYMOUS = XmlLiteral("<!-- Anonymous task; no patient info -->")
140XML_COMMENT_BLOBS = XmlLiteral("<!-- Associated BLOBs -->")
141XML_COMMENT_CALCULATED = XmlLiteral("<!-- Calculated fields -->")
142XML_COMMENT_PATIENT = XmlLiteral("<!-- Associated patient details -->")
143XML_COMMENT_SNOMED_CT = XmlLiteral("<!-- SNOMED-CT codes -->")
144XML_COMMENT_SPECIAL_NOTES = XmlLiteral("<!-- Any special notes added -->")
145XML_COMMENT_STORED = XmlLiteral("<!-- Stored fields -->")
148# =============================================================================
149# XML processing
150# =============================================================================
151# The xml.etree.ElementTree and lxml libraries can both do this sort of thing.
152# However, they do look quite fiddly and we only want to create something
153# simple. Therefore, let's roll our own:
155def make_xml_branches_from_columns(
156 obj,
157 skip_fields: List[str] = None) -> List[XmlElement]:
158 """
159 Returns a list of XML branches, each an
160 :class:`camcops_server.cc_modules.cc_xml.XmlElement`, from an SQLAlchemy
161 ORM object, using the list of SQLAlchemy Column objects that
162 define/describe its fields.
164 Args:
165 obj: the SQLAlchemy ORM object
166 skip_fields: database column names to skip
167 """
168 skip_fields = skip_fields or [] # type: List[str]
169 branches = [] # type: List[XmlElement]
170 for attrname, column in gen_columns(obj):
171 # log.debug("make_xml_branches_from_columns: {!r}", attrname)
172 colname = column.name
173 if colname in skip_fields:
174 continue
175 branches.append(XmlElement(
176 name=colname,
177 value=getattr(obj, attrname),
178 datatype=get_xml_datatype_from_sqla_column(column),
179 comment=column.comment
180 ))
181 return branches
184def make_xml_branches_from_summaries(
185 summaries: List["SummaryElement"],
186 skip_fields: List[str] = None,
187 sort_by_name: bool = True) -> List[XmlElement]:
188 """
189 Returns a list of XML branches, each an
190 :class:`camcops_server.cc_modules.cc_xml.XmlElement`, from a list of
191 summary data provided by a task.
193 Args:
194 summaries: list of :class:`SummaryElement` objects
195 skip_fields: summary element names to skip
196 sort_by_name: sort branches by element name?
197 """
198 skip_fields = skip_fields or []
199 branches = []
200 for s in summaries:
201 name = s.name
202 if name in skip_fields:
203 continue
204 branches.append(XmlElement(
205 name=name,
206 value=s.value,
207 datatype=get_xml_datatype_from_sqla_column_type(s.coltype),
208 comment=s.comment
209 ))
210 if sort_by_name:
211 branches.sort(key=lambda el: el.name)
212 return branches
215def make_xml_branches_from_blobs(
216 req: "CamcopsRequest",
217 obj,
218 skip_fields: List[str] = None) -> List[XmlElement]:
219 """
220 Return XML branches from those attributes of an SQLAlchemy ORM object
221 (e.g. task) that represent BLOBs.
223 Args:
224 req: the :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
225 obj: the SQLAlchemy ORM object
226 skip_fields: database column names to skip
228 Returns:
229 a list of :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects
231 """
232 skip_fields = skip_fields or [] # type: List[str]
233 branches = [] # type: List[XmlElement]
234 for id_attrname, column in gen_camcops_blob_columns(obj):
235 colname = column.name
236 if colname in skip_fields:
237 continue
238 relationship_attr = column.blob_relationship_attr_name
239 blob = getattr(obj, relationship_attr)
240 branches.append(XmlElement(
241 name=relationship_attr,
242 value=None if blob is None else blob.get_xml_element(req),
243 comment=column.comment,
244 ))
245 return branches
248def xml_header(eol: str = '\n') -> str:
249 """
250 XML declaration header.
251 """
252 return f'<?xml version="1.0" encoding="UTF-8"?>{eol}'
255def get_xml_datatype_from_sqla_column_type(coltype: TypeEngine) -> str:
256 """
257 Returns the XML schema datatype from an SQLAlchemy column type,
258 such as ``Integer``. Compare :func:`get_xml_datatype_from_sqla_column`.
259 """
260 # http://www.xml.dvint.com/docs/SchemaDataTypesQR-2.pdf
261 # http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html
262 pt = coltype.python_type
263 # pt is a *type*, not an *instance* of that type, so we use issubclass:
264 # Watch the order. Move from more specific to less specific.
265 # For example, issubclass(bool, int) == True, so do bool first.
266 if issubclass(pt, datetime.datetime) or issubclass(pt, Pendulum):
267 return XmlDataTypes.DATETIME
268 if issubclass(pt, datetime.date) or issubclass(pt, pendulum.Date):
269 return XmlDataTypes.DATE
270 if issubclass(pt, datetime.time) or issubclass(pt, pendulum.Time):
271 return XmlDataTypes.TIME
272 if issubclass(pt, bool):
273 return XmlDataTypes.BOOLEAN
274 if issubclass(pt, int):
275 return XmlDataTypes.INTEGER
276 if issubclass(pt, float):
277 return XmlDataTypes.DOUBLE
278 if issubclass(pt, str) or issubclass(pt, Version):
279 return XmlDataTypes.STRING
280 # BLOBs are handled separately.
281 raise NotImplementedError(
282 f"Don't know XML type for SQLAlchemy type {coltype!r} with Python "
283 f"type {pt!r}")
286def get_xml_datatype_from_sqla_column(column: Column) -> Optional[str]:
287 """
288 Returns the XML schema datatype from an SQLAlchemy Column, such as
289 ``Integer()``. Compare :func:`get_xml_datatype_from_sqla_column_type`.
290 """
291 coltype = column.type # type: TypeEngine
292 return get_xml_datatype_from_sqla_column_type(coltype)
295def get_xml_blob_element(name: str,
296 blobdata: Optional[bytes],
297 comment: str = None) -> XmlElement:
298 """
299 Returns an XmlElement representing a base-64-encoded BLOB.
301 Args:
302 name: XML element name
303 blobdata: the raw binary, or ``None``
304 comment: XML comment
305 """
306 if blobdata:
307 # blobdata is raw binary
308 b64bytes = base64.b64encode(blobdata)
309 b64str = b64bytes.decode("ascii")
310 value = b64str
311 else:
312 value = None
313 return XmlElement(
314 name=name,
315 value=value,
316 datatype=XmlDataTypes.BASE64BINARY,
317 comment=comment
318 )
319 # http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#base64Binary
322def xml_escape_value(value: str) -> str:
323 """
324 Escape a value for XML.
325 """
326 # http://stackoverflow.com/questions/1091945/
327 # https://wiki.python.org/moin/EscapingXml
328 return xml.sax.saxutils.escape(value)
331def xml_quote_attribute(attr: str) -> str:
332 """
333 Escapes and quotes an attribute for XML.
335 More stringent than value escaping.
336 """
337 return xml.sax.saxutils.quoteattr(attr)
340def get_xml_tree(element: Union[XmlElement, XmlSimpleValue,
341 List[Union[XmlElement, XmlSimpleValue]]],
342 level: int = 0,
343 indent_spaces: int = 4,
344 eol: str = '\n',
345 include_comments: bool = False) -> str:
346 # noinspection HttpUrlsUsage
347 """
348 Returns an :class:`camcops_server.cc_modules.cc_xml.XmlElement` as text.
350 Args:
351 element: root :class:`camcops_server.cc_modules.cc_xml.XmlElement`
352 level: starting level/depth (used for recursion)
353 indent_spaces: number of spaces to indent formatted XML
354 eol: end-of-line string
355 include_comments: include comments describing each field?
357 We will represent NULL values with ``xsi:nil``, but this requires a
358 namespace:
360 - https://stackoverflow.com/questions/774192
361 - http://books.xmlschemata.org/relaxng/relax-CHP-11-SECT-1.html
363 Comments:
365 - http://blog.galasoft.ch/posts/2010/02/quick-tip-commenting-out-properties-in-xaml/
366 - https://stackoverflow.com/questions/2073140/
368 Regarding newlines:
370 - We do nothing special, i.e. newlines are provided in raw format.
371 - However, some browsers may fail to display them correctly (i.e. they look
372 like they're missing) -- e.g. Firefox, Chrome -- see
373 https://stackoverflow.com/questions/2004386. Just try saving and
374 inspecting the results with a text editor, or use the browser's "View
375 Source" function (which, for Chrome, shows both newlines and line numbers
376 too).
378 """ # noqa
379 xmltext = ""
380 prefix = ' ' * level * indent_spaces
382 if isinstance(element, XmlElement):
384 if element.literal:
385 # A user-inserted piece of XML. Insert, but indent.
386 xmltext += prefix + element.literal + eol
388 else:
390 # Attributes
391 namespaces = []
392 if level == 0: # root
393 # Apply namespace to root element (will inherit):
394 namespaces.extend(XML_NAMESPACES)
395 if include_comments:
396 namespaces.extend(XML_IGNORE_NAMESPACES)
397 namespace = " ".join(namespaces)
398 if element.datatype:
399 dt = f' xsi:type="{element.datatype}"'
400 else:
401 # log.warning("XmlElement has no datatype: {!r}", element)
402 dt = ""
403 cmt = ""
404 if include_comments and element.comment:
405 cmt = f' ignore:comment={xml_quote_attribute(element.comment)}'
406 attributes = f"{namespace}{dt}{cmt}"
408 # Assemble
409 if element.value is None:
410 # NULL handling
411 xmltext += (
412 f'{prefix}<{element.name}{attributes} '
413 f'xsi:nil="true"/>{eol}'
414 )
415 else:
416 complex_value = isinstance(element.value, XmlElement) \
417 or isinstance(element.value, list)
418 value_to_recurse = element.value if complex_value else \
419 XmlSimpleValue(element.value)
420 # ... XmlSimpleValue is a marker that subsequently
421 # distinguishes things that were part of an XmlElement from
422 # user-inserted raw XML.
423 nl = eol if complex_value else ""
424 pr2 = prefix if complex_value else ""
425 v = get_xml_tree(
426 value_to_recurse,
427 level=level + 1,
428 indent_spaces=indent_spaces,
429 eol=eol,
430 include_comments=include_comments
431 )
432 xmltext += (
433 f'{prefix}<{element.name}{attributes}>{nl}'
434 f'{v}{pr2}</{element.name}>{eol}'
435 )
437 elif isinstance(element, list):
438 for subelement in element:
439 xmltext += get_xml_tree(subelement, level,
440 indent_spaces=indent_spaces,
441 eol=eol,
442 include_comments=include_comments)
443 # recursive
445 elif isinstance(element, XmlSimpleValue):
446 # The lowest-level thing a value. No extra indent.
447 xmltext += xml_escape_value(str(element.value))
449 else:
450 raise ValueError(f"Bad value to get_xml_tree: {element!r}")
452 return xmltext
455def get_xml_document(root: XmlElement,
456 indent_spaces: int = 4,
457 eol: str = '\n',
458 include_comments: bool = False) -> str:
459 """
460 Returns an entire XML document as text, given the root
461 :class:`camcops_server.cc_modules.cc_xml.XmlElement`.
463 Args:
464 root: root :class:`camcops_server.cc_modules.cc_xml.XmlElement`
465 indent_spaces: number of spaces to indent formatted XML
466 eol: end-of-line string
467 include_comments: include comments describing each field?
468 """
469 if not isinstance(root, XmlElement):
470 raise AssertionError("get_xml_document: root not an XmlElement; "
471 "XML requires a single root")
472 return xml_header(eol) + get_xml_tree(
473 root,
474 indent_spaces=indent_spaces,
475 eol=eol,
476 include_comments=include_comments
477 )