Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_xml.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com). 

9 

10 This file is part of CamCOPS. 

11 

12 CamCOPS is free software: you can redistribute it and/or modify 

13 it under the terms of the GNU General Public License as published by 

14 the Free Software Foundation, either version 3 of the License, or 

15 (at your option) any later version. 

16 

17 CamCOPS is distributed in the hope that it will be useful, 

18 but WITHOUT ANY WARRANTY; without even the implied warranty of 

19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

20 GNU General Public License for more details. 

21 

22 You should have received a copy of the GNU General Public License 

23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

24 

25=============================================================================== 

26 

27**XML helper functions/classes.** 

28 

29""" 

30 

31import base64 

32import datetime 

33import logging 

34from typing import Any, List, Optional, TYPE_CHECKING, Union 

35import xml.sax.saxutils 

36 

37from cardinal_pythonlib.logs import BraceStyleAdapter 

38from cardinal_pythonlib.reprfunc import auto_repr 

39from cardinal_pythonlib.sqlalchemy.orm_inspect import gen_columns 

40import pendulum # avoid name confusion with Date 

41from pendulum import DateTime as Pendulum 

42from semantic_version.base import Version 

43from sqlalchemy.sql.schema import Column 

44from sqlalchemy.sql.type_api import TypeEngine 

45 

46from camcops_server.cc_modules.cc_simpleobjects import XmlSimpleValue 

47from camcops_server.cc_modules.cc_sqla_coltypes import gen_camcops_blob_columns 

48 

49if TYPE_CHECKING: 

50 from camcops_server.cc_modules.cc_request import CamcopsRequest # noqa: E501,F401 

51 from camcops_server.cc_modules.cc_summaryelement import SummaryElement # noqa: E501,F401 

52 

53log = BraceStyleAdapter(logging.getLogger(__name__)) 

54 

55 

56# ============================================================================= 

57# Constants 

58# ============================================================================= 

59 

60XML_NAME_SNOMED_CODES = "snomed_ct_codes" 

61 

62XML_NAMESPACES = [ 

63 ' xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance"' 

64 # ' xmlns:dt="https://www.w3.org/2001/XMLSchema-datatypes"' 

65] 

66XML_IGNORE_NAMESPACES = [ 

67 'xmlns:mc="https://schemas.openxmlformats.org/markup-compatibility/2006"', 

68 'xmlns:ignore="https://camcops.readthedocs.org/ignore"', 

69 # ... actual URL unimportant 

70 'mc:Ignorable="ignore"' 

71] 

72# http://www.w3.org/TR/xmlschema-1/ 

73# http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html 

74 

75 

76class XmlDataTypes(object): 

77 """ 

78 Constants representing standard XML data types. 

79 """ 

80 BASE64BINARY = "base64Binary" 

81 BOOLEAN = "boolean" 

82 DATE = "date" 

83 DATETIME = "dateTime" 

84 DOUBLE = "double" 

85 INTEGER = "integer" 

86 STRING = "string" 

87 TIME = "time" 

88 

89 

90# ============================================================================= 

91# XML element 

92# ============================================================================= 

93 

94class XmlElement(object): 

95 """ 

96 Represents XML data in a tree. 

97 """ 

98 def __init__(self, name: str, value: Any = None, datatype: str = None, 

99 comment: str = None, literal: str = None) -> None: 

100 """ 

101 Args: 

102 name: name of this XML element 

103 value: value of this element: may be a raw value or a list of 

104 :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects 

105 (default: ``None``) 

106 datatype: data type of this element (default: ``None``) 

107 comment: description of this element (default: ``None``) 

108 literal: literal XML; overrides all other options 

109 """ 

110 # Special: boolean requires lower case "true"/"false" (or 0/1) 

111 if datatype == XmlDataTypes.BOOLEAN and value is not None: 

112 value = str(value).lower() 

113 self.name = name 

114 self.value = value 

115 self.datatype = datatype 

116 self.comment = comment 

117 self.literal = literal 

118 

119 def __repr__(self) -> str: 

120 """ 

121 Shows just this element. 

122 """ 

123 return auto_repr(self, with_addr=True) 

124 

125 

126class XmlLiteral(XmlElement): 

127 """ 

128 Represents literal XML. 

129 """ 

130 def __init__(self, literal: str) -> None: 

131 super().__init__(name="", literal=literal) 

132 

133 

134# ============================================================================= 

135# Some literals 

136# ============================================================================= 

137 

138XML_COMMENT_ANCILLARY = XmlLiteral("<!-- Ancillary records -->") 

139XML_COMMENT_ANONYMOUS = XmlLiteral("<!-- Anonymous task; no patient info -->") 

140XML_COMMENT_BLOBS = XmlLiteral("<!-- Associated BLOBs -->") 

141XML_COMMENT_CALCULATED = XmlLiteral("<!-- Calculated fields -->") 

142XML_COMMENT_PATIENT = XmlLiteral("<!-- Associated patient details -->") 

143XML_COMMENT_SNOMED_CT = XmlLiteral("<!-- SNOMED-CT codes -->") 

144XML_COMMENT_SPECIAL_NOTES = XmlLiteral("<!-- Any special notes added -->") 

145XML_COMMENT_STORED = XmlLiteral("<!-- Stored fields -->") 

146 

147 

148# ============================================================================= 

149# XML processing 

150# ============================================================================= 

151# The xml.etree.ElementTree and lxml libraries can both do this sort of thing. 

152# However, they do look quite fiddly and we only want to create something 

153# simple. Therefore, let's roll our own: 

154 

155def make_xml_branches_from_columns( 

156 obj, 

157 skip_fields: List[str] = None) -> List[XmlElement]: 

158 """ 

159 Returns a list of XML branches, each an 

160 :class:`camcops_server.cc_modules.cc_xml.XmlElement`, from an SQLAlchemy 

161 ORM object, using the list of SQLAlchemy Column objects that 

162 define/describe its fields. 

163 

164 Args: 

165 obj: the SQLAlchemy ORM object 

166 skip_fields: database column names to skip 

167 """ 

168 skip_fields = skip_fields or [] # type: List[str] 

169 branches = [] # type: List[XmlElement] 

170 for attrname, column in gen_columns(obj): 

171 # log.debug("make_xml_branches_from_columns: {!r}", attrname) 

172 colname = column.name 

173 if colname in skip_fields: 

174 continue 

175 branches.append(XmlElement( 

176 name=colname, 

177 value=getattr(obj, attrname), 

178 datatype=get_xml_datatype_from_sqla_column(column), 

179 comment=column.comment 

180 )) 

181 return branches 

182 

183 

184def make_xml_branches_from_summaries( 

185 summaries: List["SummaryElement"], 

186 skip_fields: List[str] = None, 

187 sort_by_name: bool = True) -> List[XmlElement]: 

188 """ 

189 Returns a list of XML branches, each an 

190 :class:`camcops_server.cc_modules.cc_xml.XmlElement`, from a list of 

191 summary data provided by a task. 

192 

193 Args: 

194 summaries: list of :class:`SummaryElement` objects 

195 skip_fields: summary element names to skip 

196 sort_by_name: sort branches by element name? 

197 """ 

198 skip_fields = skip_fields or [] 

199 branches = [] 

200 for s in summaries: 

201 name = s.name 

202 if name in skip_fields: 

203 continue 

204 branches.append(XmlElement( 

205 name=name, 

206 value=s.value, 

207 datatype=get_xml_datatype_from_sqla_column_type(s.coltype), 

208 comment=s.comment 

209 )) 

210 if sort_by_name: 

211 branches.sort(key=lambda el: el.name) 

212 return branches 

213 

214 

215def make_xml_branches_from_blobs( 

216 req: "CamcopsRequest", 

217 obj, 

218 skip_fields: List[str] = None) -> List[XmlElement]: 

219 """ 

220 Return XML branches from those attributes of an SQLAlchemy ORM object 

221 (e.g. task) that represent BLOBs. 

222 

223 Args: 

224 req: the :class:`camcops_server.cc_modules.cc_request.CamcopsRequest` 

225 obj: the SQLAlchemy ORM object 

226 skip_fields: database column names to skip 

227 

228 Returns: 

229 a list of :class:`camcops_server.cc_modules.cc_xml.XmlElement` objects 

230 

231 """ 

232 skip_fields = skip_fields or [] # type: List[str] 

233 branches = [] # type: List[XmlElement] 

234 for id_attrname, column in gen_camcops_blob_columns(obj): 

235 colname = column.name 

236 if colname in skip_fields: 

237 continue 

238 relationship_attr = column.blob_relationship_attr_name 

239 blob = getattr(obj, relationship_attr) 

240 branches.append(XmlElement( 

241 name=relationship_attr, 

242 value=None if blob is None else blob.get_xml_element(req), 

243 comment=column.comment, 

244 )) 

245 return branches 

246 

247 

248def xml_header(eol: str = '\n') -> str: 

249 """ 

250 XML declaration header. 

251 """ 

252 return f'<?xml version="1.0" encoding="UTF-8"?>{eol}' 

253 

254 

255def get_xml_datatype_from_sqla_column_type(coltype: TypeEngine) -> str: 

256 """ 

257 Returns the XML schema datatype from an SQLAlchemy column type, 

258 such as ``Integer``. Compare :func:`get_xml_datatype_from_sqla_column`. 

259 """ 

260 # http://www.xml.dvint.com/docs/SchemaDataTypesQR-2.pdf 

261 # http://www.w3.org/TR/2004/REC-xmlschema-2-20041028/datatypes.html 

262 pt = coltype.python_type 

263 # pt is a *type*, not an *instance* of that type, so we use issubclass: 

264 # Watch the order. Move from more specific to less specific. 

265 # For example, issubclass(bool, int) == True, so do bool first. 

266 if issubclass(pt, datetime.datetime) or issubclass(pt, Pendulum): 

267 return XmlDataTypes.DATETIME 

268 if issubclass(pt, datetime.date) or issubclass(pt, pendulum.Date): 

269 return XmlDataTypes.DATE 

270 if issubclass(pt, datetime.time) or issubclass(pt, pendulum.Time): 

271 return XmlDataTypes.TIME 

272 if issubclass(pt, bool): 

273 return XmlDataTypes.BOOLEAN 

274 if issubclass(pt, int): 

275 return XmlDataTypes.INTEGER 

276 if issubclass(pt, float): 

277 return XmlDataTypes.DOUBLE 

278 if issubclass(pt, str) or issubclass(pt, Version): 

279 return XmlDataTypes.STRING 

280 # BLOBs are handled separately. 

281 raise NotImplementedError( 

282 f"Don't know XML type for SQLAlchemy type {coltype!r} with Python " 

283 f"type {pt!r}") 

284 

285 

286def get_xml_datatype_from_sqla_column(column: Column) -> Optional[str]: 

287 """ 

288 Returns the XML schema datatype from an SQLAlchemy Column, such as 

289 ``Integer()``. Compare :func:`get_xml_datatype_from_sqla_column_type`. 

290 """ 

291 coltype = column.type # type: TypeEngine 

292 return get_xml_datatype_from_sqla_column_type(coltype) 

293 

294 

295def get_xml_blob_element(name: str, 

296 blobdata: Optional[bytes], 

297 comment: str = None) -> XmlElement: 

298 """ 

299 Returns an XmlElement representing a base-64-encoded BLOB. 

300 

301 Args: 

302 name: XML element name 

303 blobdata: the raw binary, or ``None`` 

304 comment: XML comment 

305 """ 

306 if blobdata: 

307 # blobdata is raw binary 

308 b64bytes = base64.b64encode(blobdata) 

309 b64str = b64bytes.decode("ascii") 

310 value = b64str 

311 else: 

312 value = None 

313 return XmlElement( 

314 name=name, 

315 value=value, 

316 datatype=XmlDataTypes.BASE64BINARY, 

317 comment=comment 

318 ) 

319 # http://www.w3.org/TR/2001/REC-xmlschema-2-20010502/#base64Binary 

320 

321 

322def xml_escape_value(value: str) -> str: 

323 """ 

324 Escape a value for XML. 

325 """ 

326 # http://stackoverflow.com/questions/1091945/ 

327 # https://wiki.python.org/moin/EscapingXml 

328 return xml.sax.saxutils.escape(value) 

329 

330 

331def xml_quote_attribute(attr: str) -> str: 

332 """ 

333 Escapes and quotes an attribute for XML. 

334 

335 More stringent than value escaping. 

336 """ 

337 return xml.sax.saxutils.quoteattr(attr) 

338 

339 

340def get_xml_tree(element: Union[XmlElement, XmlSimpleValue, 

341 List[Union[XmlElement, XmlSimpleValue]]], 

342 level: int = 0, 

343 indent_spaces: int = 4, 

344 eol: str = '\n', 

345 include_comments: bool = False) -> str: 

346 # noinspection HttpUrlsUsage 

347 """ 

348 Returns an :class:`camcops_server.cc_modules.cc_xml.XmlElement` as text. 

349 

350 Args: 

351 element: root :class:`camcops_server.cc_modules.cc_xml.XmlElement` 

352 level: starting level/depth (used for recursion) 

353 indent_spaces: number of spaces to indent formatted XML 

354 eol: end-of-line string 

355 include_comments: include comments describing each field? 

356 

357 We will represent NULL values with ``xsi:nil``, but this requires a 

358 namespace: 

359 

360 - https://stackoverflow.com/questions/774192 

361 - http://books.xmlschemata.org/relaxng/relax-CHP-11-SECT-1.html 

362 

363 Comments: 

364 

365 - http://blog.galasoft.ch/posts/2010/02/quick-tip-commenting-out-properties-in-xaml/ 

366 - https://stackoverflow.com/questions/2073140/ 

367 

368 Regarding newlines: 

369 

370 - We do nothing special, i.e. newlines are provided in raw format. 

371 - However, some browsers may fail to display them correctly (i.e. they look 

372 like they're missing) -- e.g. Firefox, Chrome -- see 

373 https://stackoverflow.com/questions/2004386. Just try saving and 

374 inspecting the results with a text editor, or use the browser's "View 

375 Source" function (which, for Chrome, shows both newlines and line numbers 

376 too). 

377 

378 """ # noqa 

379 xmltext = "" 

380 prefix = ' ' * level * indent_spaces 

381 

382 if isinstance(element, XmlElement): 

383 

384 if element.literal: 

385 # A user-inserted piece of XML. Insert, but indent. 

386 xmltext += prefix + element.literal + eol 

387 

388 else: 

389 

390 # Attributes 

391 namespaces = [] 

392 if level == 0: # root 

393 # Apply namespace to root element (will inherit): 

394 namespaces.extend(XML_NAMESPACES) 

395 if include_comments: 

396 namespaces.extend(XML_IGNORE_NAMESPACES) 

397 namespace = " ".join(namespaces) 

398 if element.datatype: 

399 dt = f' xsi:type="{element.datatype}"' 

400 else: 

401 # log.warning("XmlElement has no datatype: {!r}", element) 

402 dt = "" 

403 cmt = "" 

404 if include_comments and element.comment: 

405 cmt = f' ignore:comment={xml_quote_attribute(element.comment)}' 

406 attributes = f"{namespace}{dt}{cmt}" 

407 

408 # Assemble 

409 if element.value is None: 

410 # NULL handling 

411 xmltext += ( 

412 f'{prefix}<{element.name}{attributes} ' 

413 f'xsi:nil="true"/>{eol}' 

414 ) 

415 else: 

416 complex_value = isinstance(element.value, XmlElement) \ 

417 or isinstance(element.value, list) 

418 value_to_recurse = element.value if complex_value else \ 

419 XmlSimpleValue(element.value) 

420 # ... XmlSimpleValue is a marker that subsequently 

421 # distinguishes things that were part of an XmlElement from 

422 # user-inserted raw XML. 

423 nl = eol if complex_value else "" 

424 pr2 = prefix if complex_value else "" 

425 v = get_xml_tree( 

426 value_to_recurse, 

427 level=level + 1, 

428 indent_spaces=indent_spaces, 

429 eol=eol, 

430 include_comments=include_comments 

431 ) 

432 xmltext += ( 

433 f'{prefix}<{element.name}{attributes}>{nl}' 

434 f'{v}{pr2}</{element.name}>{eol}' 

435 ) 

436 

437 elif isinstance(element, list): 

438 for subelement in element: 

439 xmltext += get_xml_tree(subelement, level, 

440 indent_spaces=indent_spaces, 

441 eol=eol, 

442 include_comments=include_comments) 

443 # recursive 

444 

445 elif isinstance(element, XmlSimpleValue): 

446 # The lowest-level thing a value. No extra indent. 

447 xmltext += xml_escape_value(str(element.value)) 

448 

449 else: 

450 raise ValueError(f"Bad value to get_xml_tree: {element!r}") 

451 

452 return xmltext 

453 

454 

455def get_xml_document(root: XmlElement, 

456 indent_spaces: int = 4, 

457 eol: str = '\n', 

458 include_comments: bool = False) -> str: 

459 """ 

460 Returns an entire XML document as text, given the root 

461 :class:`camcops_server.cc_modules.cc_xml.XmlElement`. 

462 

463 Args: 

464 root: root :class:`camcops_server.cc_modules.cc_xml.XmlElement` 

465 indent_spaces: number of spaces to indent formatted XML 

466 eol: end-of-line string 

467 include_comments: include comments describing each field? 

468 """ 

469 if not isinstance(root, XmlElement): 

470 raise AssertionError("get_xml_document: root not an XmlElement; " 

471 "XML requires a single root") 

472 return xml_header(eol) + get_xml_tree( 

473 root, 

474 indent_spaces=indent_spaces, 

475 eol=eol, 

476 include_comments=include_comments 

477 )