Coverage for cc_modules/cc_xml.py: 39%
155 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-15 14:23 +0100
1"""
2camcops_server/cc_modules/cc_xml.py
4===============================================================================
6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry.
7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
9 This file is part of CamCOPS.
11 CamCOPS is free software: you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation, either version 3 of the License, or
14 (at your option) any later version.
16 CamCOPS is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>.
24===============================================================================
26**XML helper functions/classes.**
28"""
30import base64
31import datetime
32import logging
33from typing import Any, List, Optional, TYPE_CHECKING, Union
34import xml.sax.saxutils
36from cardinal_pythonlib.logs import BraceStyleAdapter
37from cardinal_pythonlib.reprfunc import auto_repr
38from cardinal_pythonlib.sqlalchemy.orm_inspect import gen_columns
39import pendulum # avoid name confusion with Date
40from pendulum import DateTime as Pendulum
41from semantic_version.base import Version
42from sqlalchemy.sql.schema import Column
43from sqlalchemy.sql.type_api import TypeEngine
45from camcops_server.cc_modules.cc_simpleobjects import XmlSimpleValue
46from camcops_server.cc_modules.cc_sqla_coltypes import (
47 COLATTR_BLOB_RELATIONSHIP_ATTR_NAME,
48 gen_camcops_blob_columns,
49)
51if TYPE_CHECKING:
52 from camcops_server.cc_modules.cc_request import (
53 CamcopsRequest,
54 )
55 from camcops_server.cc_modules.cc_summaryelement import (
56 SummaryElement,
57 )
59log = BraceStyleAdapter(logging.getLogger(__name__))
62# =============================================================================
63# Constants
64# =============================================================================
66XML_NAME_SNOMED_CODES = "snomed_ct_codes"
68XML_NAMESPACES = [
69 ' xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance"'
70 # ' xmlns:dt="https://www.w3.org/2001/XMLSchema-datatypes"'
71]
72XML_IGNORE_NAMESPACES = [
73 'xmlns:mc="https://schemas.openxmlformats.org/markup-compatibility/2006"',
74 'xmlns:ignore="https://camcops.readthedocs.org/ignore"',
75 # ... actual URL unimportant
76 'mc:Ignorable="ignore"',
77]
78# http://www.w3.org/TR/xmlschema-1/
79# http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html
82class XmlDataTypes(object):
83 """
84 Constants representing standard XML data types.
85 """
87 BASE64BINARY = "base64Binary"
88 BOOLEAN = "boolean"
89 DATE = "date"
90 DATETIME = "dateTime"
91 DOUBLE = "double"
92 INTEGER = "integer"
93 STRING = "string"
94 TIME = "time"
97# =============================================================================
98# XML element
99# =============================================================================
102class XmlElement(object):
103 """
104 Represents XML data in a tree.
105 """
107 def __init__(
108 self,
109 name: str,
110 value: Any = None,
111 datatype: str = None,
112 comment: str = None,
113 literal: str = None,
114 ) -> None:
115 """
116 Args:
117 name: name of this XML element
118 value: value of this element: may be a raw value or a list of
119 :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects
120 (default: ``None``)
121 datatype: data type of this element (default: ``None``)
122 comment: description of this element (default: ``None``)
123 literal: literal XML; overrides all other options
124 """
125 # Special: boolean requires lower case "true"/"false" (or 0/1)
126 if datatype == XmlDataTypes.BOOLEAN and value is not None:
127 value = str(value).lower()
128 self.name = name
129 self.value = value
130 self.datatype = datatype
131 self.comment = comment
132 self.literal = literal
134 def __repr__(self) -> str:
135 """
136 Shows just this element.
137 """
138 return auto_repr(self, with_addr=True)
141class XmlLiteral(XmlElement):
142 """
143 Represents literal XML.
144 """
146 def __init__(self, literal: str) -> None:
147 super().__init__(name="", literal=literal)
150# =============================================================================
151# Some literals
152# =============================================================================
154XML_COMMENT_ANCILLARY = XmlLiteral("<!-- Ancillary records -->")
155XML_COMMENT_ANONYMOUS = XmlLiteral("<!-- Anonymous task; no patient info -->")
156XML_COMMENT_BLOBS = XmlLiteral("<!-- Associated BLOBs -->")
157XML_COMMENT_CALCULATED = XmlLiteral("<!-- Calculated fields -->")
158XML_COMMENT_PATIENT = XmlLiteral("<!-- Associated patient details -->")
159XML_COMMENT_SNOMED_CT = XmlLiteral("<!-- SNOMED-CT codes -->")
160XML_COMMENT_SPECIAL_NOTES = XmlLiteral("<!-- Any special notes added -->")
161XML_COMMENT_STORED = XmlLiteral("<!-- Stored fields -->")
164# =============================================================================
165# XML processing
166# =============================================================================
167# The xml.etree.ElementTree and lxml libraries can both do this sort of thing.
168# However, they do look quite fiddly and we only want to create something
169# simple. Therefore, let's roll our own:
172def make_xml_branches_from_columns( # type: ignore[no-untyped-def]
173 obj, skip_fields: List[str] = None
174) -> List[XmlElement]:
175 """
176 Returns a list of XML branches, each an
177 :class:`camcops_server.cc_modules.cc_xml.XmlElement`, from an SQLAlchemy
178 ORM object, using the list of SQLAlchemy Column objects that
179 define/describe its fields.
181 Args:
182 obj: the SQLAlchemy ORM object
183 skip_fields: database column names to skip
184 """
185 skip_fields = skip_fields or [] # type: List[str]
186 branches = [] # type: List[XmlElement]
187 for attrname, column in gen_columns(obj):
188 # log.debug("make_xml_branches_from_columns: {!r}", attrname)
189 colname = column.name
190 if colname in skip_fields:
191 continue
192 branches.append(
193 XmlElement(
194 name=colname,
195 value=getattr(obj, attrname),
196 datatype=get_xml_datatype_from_sqla_column(column),
197 comment=column.comment,
198 )
199 )
200 return branches
203def make_xml_branches_from_summaries(
204 summaries: List["SummaryElement"],
205 skip_fields: List[str] = None,
206 sort_by_name: bool = True,
207) -> List[XmlElement]:
208 """
209 Returns a list of XML branches, each an
210 :class:`camcops_server.cc_modules.cc_xml.XmlElement`, from a list of
211 summary data provided by a task.
213 Args:
214 summaries: list of :class:`SummaryElement` objects
215 skip_fields: summary element names to skip
216 sort_by_name: sort branches by element name?
217 """
218 skip_fields = skip_fields or []
219 branches = []
220 for s in summaries:
221 name = s.name
222 if name in skip_fields:
223 continue
224 branches.append(
225 XmlElement(
226 name=name,
227 value=s.value,
228 datatype=get_xml_datatype_from_sqla_column_type(s.coltype),
229 comment=s.comment,
230 )
231 )
232 if sort_by_name:
233 branches.sort(key=lambda el: el.name)
234 return branches
237def make_xml_branches_from_blobs( # type: ignore[no-untyped-def]
238 req: "CamcopsRequest", obj, skip_fields: List[str] = None
239) -> List[XmlElement]:
240 """
241 Return XML branches from those attributes of an SQLAlchemy ORM object
242 (e.g. task) that represent BLOBs.
244 Args:
245 req: the :class:`camcops_server.cc_modules.cc_request.CamcopsRequest`
246 obj: the SQLAlchemy ORM object
247 skip_fields: database column names to skip
249 Returns:
250 a list of :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects
252 """
253 skip_fields = skip_fields or [] # type: List[str]
254 branches = [] # type: List[XmlElement]
255 for id_attrname, column in gen_camcops_blob_columns(obj):
256 colname = column.name
257 if colname in skip_fields:
258 continue
259 relationship_attr = column.info.get(
260 COLATTR_BLOB_RELATIONSHIP_ATTR_NAME, ""
261 )
262 blob = getattr(obj, relationship_attr)
263 branches.append(
264 XmlElement(
265 name=relationship_attr,
266 value=None if blob is None else blob.get_xml_element(req),
267 comment=column.comment,
268 )
269 )
270 return branches
273def xml_header(eol: str = "\n") -> str:
274 """
275 XML declaration header.
276 """
277 return f'<?xml version="1.0" encoding="UTF-8"?>{eol}'
280def get_xml_datatype_from_sqla_column_type(coltype: TypeEngine) -> str:
281 """
282 Returns the XML schema datatype from an SQLAlchemy column type,
283 such as ``Integer``. Compare :func:`get_xml_datatype_from_sqla_column`.
284 """
285 # http://www.xml.dvint.com/docs/SchemaDataTypesQR-2.pdf
286 # http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html
287 pt = coltype.python_type
288 # pt is a *type*, not an *instance* of that type, so we use issubclass:
289 # Watch the order. Move from more specific to less specific.
290 # For example, issubclass(bool, int) == True, so do bool first.
291 if issubclass(pt, datetime.datetime) or issubclass(pt, Pendulum):
292 return XmlDataTypes.DATETIME
293 if issubclass(pt, datetime.date) or issubclass(pt, pendulum.Date):
294 return XmlDataTypes.DATE
295 if issubclass(pt, datetime.time) or issubclass(pt, pendulum.Time):
296 return XmlDataTypes.TIME
297 if issubclass(pt, bool):
298 return XmlDataTypes.BOOLEAN
299 if issubclass(pt, int):
300 return XmlDataTypes.INTEGER
301 if issubclass(pt, float):
302 return XmlDataTypes.DOUBLE
303 if issubclass(pt, str) or issubclass(pt, Version):
304 return XmlDataTypes.STRING
305 # BLOBs are handled separately.
306 raise NotImplementedError(
307 f"Don't know XML type for SQLAlchemy type {coltype!r} with Python "
308 f"type {pt!r}"
309 )
312def get_xml_datatype_from_sqla_column(column: Column) -> Optional[str]:
313 """
314 Returns the XML schema datatype from an SQLAlchemy Column, such as
315 ``Integer()``. Compare :func:`get_xml_datatype_from_sqla_column_type`.
316 """
317 coltype = column.type # type: TypeEngine
318 return get_xml_datatype_from_sqla_column_type(coltype)
321def get_xml_blob_element(
322 name: str, blobdata: Optional[bytes], comment: str = None
323) -> XmlElement:
324 """
325 Returns an XmlElement representing a base-64-encoded BLOB.
327 Args:
328 name: XML element name
329 blobdata: the raw binary, or ``None``
330 comment: XML comment
331 """
332 if blobdata:
333 # blobdata is raw binary
334 b64bytes = base64.b64encode(blobdata)
335 b64str = b64bytes.decode("ascii")
336 value = b64str
337 else:
338 value = None
339 return XmlElement(
340 name=name,
341 value=value,
342 datatype=XmlDataTypes.BASE64BINARY,
343 comment=comment,
344 )
345 # http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#base64Binary
348def xml_escape_value(value: str) -> str:
349 """
350 Escape a value for XML.
351 """
352 # http://stackoverflow.com/questions/1091945/
353 # https://wiki.python.org/moin/EscapingXml
354 return xml.sax.saxutils.escape(value)
357def xml_quote_attribute(attr: str) -> str:
358 """
359 Escapes and quotes an attribute for XML.
361 More stringent than value escaping.
362 """
363 return xml.sax.saxutils.quoteattr(attr)
366def get_xml_tree(
367 element: Union[
368 XmlElement, XmlSimpleValue, List[Union[XmlElement, XmlSimpleValue]]
369 ],
370 level: int = 0,
371 indent_spaces: int = 4,
372 eol: str = "\n",
373 include_comments: bool = False,
374) -> str:
375 # noinspection HttpUrlsUsage
376 """
377 Returns an :class:`camcops_server.cc_modules.cc_xml.XmlElement` as text.
379 Args:
380 element: root :class:`camcops_server.cc_modules.cc_xml.XmlElement`
381 level: starting level/depth (used for recursion)
382 indent_spaces: number of spaces to indent formatted XML
383 eol: end-of-line string
384 include_comments: include comments describing each field?
386 We will represent NULL values with ``xsi:nil``, but this requires a
387 namespace:
389 - https://stackoverflow.com/questions/774192
390 - http://books.xmlschemata.org/relaxng/relax-CHP-11-SECT-1.html
392 Comments:
394 - http://blog.galasoft.ch/posts/2010/02/quick-tip-commenting-out-properties-in-xaml/
395 - https://stackoverflow.com/questions/2073140/
397 Regarding newlines:
399 - We do nothing special, i.e. newlines are provided in raw format.
400 - However, some browsers may fail to display them correctly (i.e. they look
401 like they're missing) -- e.g. Firefox, Chrome -- see
402 https://stackoverflow.com/questions/2004386. Just try saving and
403 inspecting the results with a text editor, or use the browser's "View
404 Source" function (which, for Chrome, shows both newlines and line numbers
405 too).
407 """ # noqa
408 xmltext = ""
409 prefix = " " * level * indent_spaces
411 if isinstance(element, XmlElement):
413 if element.literal:
414 # A user-inserted piece of XML. Insert, but indent.
415 xmltext += prefix + element.literal + eol
417 else:
419 # Attributes
420 namespaces = []
421 if level == 0: # root
422 # Apply namespace to root element (will inherit):
423 namespaces.extend(XML_NAMESPACES)
424 if include_comments:
425 namespaces.extend(XML_IGNORE_NAMESPACES)
426 namespace = " ".join(namespaces)
427 if element.datatype:
428 dt = f' xsi:type="{element.datatype}"'
429 else:
430 # log.warning("XmlElement has no datatype: {!r}", element)
431 dt = ""
432 cmt = ""
433 if include_comments and element.comment:
434 cmt = f" ignore:comment={xml_quote_attribute(element.comment)}"
435 attributes = f"{namespace}{dt}{cmt}"
437 # Assemble
438 if element.value is None:
439 # NULL handling
440 xmltext += (
441 f"{prefix}<{element.name}{attributes} "
442 f'xsi:nil="true"/>{eol}'
443 )
444 else:
445 complex_value = isinstance(
446 element.value, XmlElement
447 ) or isinstance(element.value, list)
448 value_to_recurse = (
449 element.value
450 if complex_value
451 else XmlSimpleValue(element.value)
452 )
453 # ... XmlSimpleValue is a marker that subsequently
454 # distinguishes things that were part of an XmlElement from
455 # user-inserted raw XML.
456 nl = eol if complex_value else ""
457 pr2 = prefix if complex_value else ""
458 v = get_xml_tree(
459 value_to_recurse,
460 level=level + 1,
461 indent_spaces=indent_spaces,
462 eol=eol,
463 include_comments=include_comments,
464 )
465 xmltext += (
466 f"{prefix}<{element.name}{attributes}>{nl}"
467 f"{v}{pr2}</{element.name}>{eol}"
468 )
470 elif isinstance(element, list):
471 for subelement in element:
472 xmltext += get_xml_tree(
473 subelement,
474 level,
475 indent_spaces=indent_spaces,
476 eol=eol,
477 include_comments=include_comments,
478 )
479 # recursive
481 elif isinstance(element, XmlSimpleValue):
482 # The lowest-level thing a value. No extra indent.
483 xmltext += xml_escape_value(str(element.value))
485 else:
486 raise ValueError(f"Bad value to get_xml_tree: {element!r}")
488 return xmltext
491def get_xml_document(
492 root: XmlElement,
493 indent_spaces: int = 4,
494 eol: str = "\n",
495 include_comments: bool = False,
496) -> str:
497 """
498 Returns an entire XML document as text, given the root
499 :class:`camcops_server.cc_modules.cc_xml.XmlElement`.
501 Args:
502 root: root :class:`camcops_server.cc_modules.cc_xml.XmlElement`
503 indent_spaces: number of spaces to indent formatted XML
504 eol: end-of-line string
505 include_comments: include comments describing each field?
506 """
507 if not isinstance(root, XmlElement):
508 raise AssertionError(
509 "get_xml_document: root not an XmlElement; "
510 "XML requires a single root"
511 )
512 return xml_header(eol) + get_xml_tree(
513 root,
514 indent_spaces=indent_spaces,
515 eol=eol,
516 include_comments=include_comments,
517 )