Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2 

3""" 

4camcops_server/cc_modules/cc_string.py 

5 

6=============================================================================== 

7 

8 Copyright (C) 2012-2020 Rudolf Cardinal (rudolf@pobox.com). 

9 

10 This file is part of CamCOPS. 

11 

12 CamCOPS is free software: you can redistribute it and/or modify 

13 it under the terms of the GNU General Public License as published by 

14 the Free Software Foundation, either version 3 of the License, or 

15 (at your option) any later version. 

16 

17 CamCOPS is distributed in the hope that it will be useful, 

18 but WITHOUT ANY WARRANTY; without even the implied warranty of 

19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

20 GNU General Public License for more details. 

21 

22 You should have received a copy of the GNU General Public License 

23 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

24 

25=============================================================================== 

26 

27**Manage the "extra strings" that the server reads from XML files. The server 

28uses these for displaying tasks, and provides them to client devices.** 

29 

30""" 

31 

32import glob 

33import logging 

34from typing import Dict, List 

35import xml.etree.cElementTree as ElementTree 

36# ... cElementTree is a faster implementation 

37# ... http://docs.python.org/2/library/xml.etree.elementtree.html 

38# ... http://effbot.org/zone/celementtree.htm 

39from xml.etree.ElementTree import Element, tostring 

40 

41from cardinal_pythonlib.logs import BraceStyleAdapter 

42from cardinal_pythonlib.text import unescape_newlines 

43 

44from camcops_server.cc_modules.cc_cache import cache_region_static, fkg 

45from camcops_server.cc_modules.cc_config import get_config 

46from camcops_server.cc_modules.cc_exception import raise_runtime_error 

47 

48log = BraceStyleAdapter(logging.getLogger(__name__)) 

49 

50 

51APPSTRING_TASKNAME = "camcops" 

52MISSING_LOCALE = "" 

53 

54 

55# ============================================================================= 

56# XML helper functions 

57# ============================================================================= 

58 

59def text_contents(e: Element, plain: bool = False, strip: bool = True) -> str: 

60 """ 

61 Extract the exact text contents of an XML element, including any XML/HTML 

62 tags within it. 

63 

64 A normal string looks like 

65 

66 .. code-block:: xml 

67 

68 <string name="stringname">words words words</string> 

69 

70 and we extract its contents ("words words words") with 

71 

72 .. code-block:: python 

73 

74 e.text 

75 

76 However, for this: 

77 

78 .. code-block:: xml 

79 

80 <string name="stringname">words <b>bold words</b> words</string> 

81 

82 we want to extract ``words <b>bold words</b> words`` and that's a little 

83 trickier. This function does that. 

84 

85 Args: 

86 e: the :class:`Element` to read 

87 plain: remove all HTML/XML tags? 

88 strip: strip leading/trailing whitespace? 

89 

90 Returns: 

91 the text contents of the element 

92 """ 

93 n_children = len(e) 

94 if n_children == 0: 

95 result = e.text or "" 

96 elif plain: 

97 result = "".join(e.itertext()) # e.g. "words bold words words" 

98 else: 

99 result = ( 

100 (e.text or "") + 

101 "".join(tostring(child, encoding="unicode") for child in e) + 

102 (e.tail or "") 

103 ) 

104 if strip: 

105 return result.strip() 

106 else: 

107 return result 

108 

109 

110# ============================================================================= 

111# Localization strings 

112# ============================================================================= 

113# In a change to thinking... Pyramid emphasizes: NO MUTABLE GLOBAL STATE. 

114# https://docs.pylonsproject.org/projects/pyramid/en/latest/narr/advanced-features.html # noqa 

115# This is a good thing. But it means that: 

116# - because we configure our XML files in our config... 

117# - and in principle even two different threads coming here may have different 

118# configs... 

119# - ... that string requests need to be attached to a Pyramid Request. 

120 

121class AS(object): 

122 """ 

123 List of appstrings present in ``camcops.xml``. 

124 

125 Should match ``appstrings.cpp`` in the client, and of course 

126 ``camcops.xml`` itself. 

127 """ 

128 

129 # ------------------------------------------------------------------------- 

130 # NHS Data Dictionary elements 

131 # ------------------------------------------------------------------------- 

132 

133 NHS_PERSON_MARITAL_STATUS_CODE_S = "nhs_person_marital_status_code_S" 

134 NHS_PERSON_MARITAL_STATUS_CODE_M = "nhs_person_marital_status_code_M" 

135 NHS_PERSON_MARITAL_STATUS_CODE_D = "nhs_person_marital_status_code_D" 

136 NHS_PERSON_MARITAL_STATUS_CODE_W = "nhs_person_marital_status_code_W" 

137 NHS_PERSON_MARITAL_STATUS_CODE_P = "nhs_person_marital_status_code_P" 

138 NHS_PERSON_MARITAL_STATUS_CODE_N = "nhs_person_marital_status_code_N" 

139 

140 NHS_ETHNIC_CATEGORY_CODE_A = "nhs_ethnic_category_code_A" 

141 NHS_ETHNIC_CATEGORY_CODE_B = "nhs_ethnic_category_code_B" 

142 NHS_ETHNIC_CATEGORY_CODE_C = "nhs_ethnic_category_code_C" 

143 NHS_ETHNIC_CATEGORY_CODE_D = "nhs_ethnic_category_code_D" 

144 NHS_ETHNIC_CATEGORY_CODE_E = "nhs_ethnic_category_code_E" 

145 NHS_ETHNIC_CATEGORY_CODE_F = "nhs_ethnic_category_code_F" 

146 NHS_ETHNIC_CATEGORY_CODE_G = "nhs_ethnic_category_code_G" 

147 NHS_ETHNIC_CATEGORY_CODE_H = "nhs_ethnic_category_code_H" 

148 NHS_ETHNIC_CATEGORY_CODE_J = "nhs_ethnic_category_code_J" 

149 NHS_ETHNIC_CATEGORY_CODE_K = "nhs_ethnic_category_code_K" 

150 NHS_ETHNIC_CATEGORY_CODE_L = "nhs_ethnic_category_code_L" 

151 NHS_ETHNIC_CATEGORY_CODE_M = "nhs_ethnic_category_code_M" 

152 NHS_ETHNIC_CATEGORY_CODE_N = "nhs_ethnic_category_code_N" 

153 NHS_ETHNIC_CATEGORY_CODE_P = "nhs_ethnic_category_code_P" 

154 NHS_ETHNIC_CATEGORY_CODE_R = "nhs_ethnic_category_code_R" 

155 NHS_ETHNIC_CATEGORY_CODE_S = "nhs_ethnic_category_code_S" 

156 NHS_ETHNIC_CATEGORY_CODE_Z = "nhs_ethnic_category_code_Z" 

157 

158 # ------------------------------------------------------------------------- 

159 # String elements for specific restricted tasks (see camcops.xml) 

160 # ------------------------------------------------------------------------- 

161 

162 BDI_WHICH_SCALE = "bdi_which_scale" 

163 GAF_SCORE = "gaf_score" 

164 HADS_ANXIETY_SCORE = "hads_anxiety_score" 

165 HADS_DEPRESSION_SCORE = "hads_depression_score" 

166 IESR_A_PREFIX = "iesr_a" 

167 WSAS_A_PREFIX = "wsas_a" 

168 ZBI_A_PREFIX = "zbi_a" 

169 

170 # ------------------------------------------------------------------------- 

171 # Strings shared across several tasks 

172 # ------------------------------------------------------------------------- 

173 

174 DATA_COLLECTION_ONLY = "data_collection_only" 

175 DATE_PERTAINS_TO = "date_pertains_to" 

176 ICD10_SYMPTOMATIC_DISCLAIMER = "icd10_symptomatic_disclaimer" 

177 SATIS_BAD_Q = "satis_bad_q" 

178 SATIS_BAD_S = "satis_bad_s" 

179 SATIS_GOOD_Q = "satis_good_q" 

180 SATIS_GOOD_S = "satis_good_s" 

181 SATIS_PT_RATING_Q = "satis_pt_rating_q" 

182 SATIS_REF_GEN_RATING_Q = "satis_ref_gen_rating_q" 

183 SATIS_REF_SPEC_RATING_Q = "satis_ref_spec_rating_q" 

184 SATIS_RATING_A_PREFIX = "satis_rating_a" 

185 SATIS_SERVICE_BEING_RATED = "satis_service_being_rated" 

186 

187 

188@cache_region_static.cache_on_arguments(function_key_generator=fkg) 

189def all_extra_strings_as_dicts( 

190 config_filename: str) -> Dict[str, Dict[str, Dict[str, str]]]: 

191 r""" 

192 Returns strings from the all the extra XML string files. 

193 

194 The result is cached (via a proper cache). 

195 

196 Args: 

197 config_filename: a CamCOPS config filename 

198 

199 Returns: a dictionary like 

200 

201 .. code-block:: none 

202 

203 { 

204 'task1': { 

205 'stringname1': { 

206 "en-GB": "a string in British English", 

207 "da-DK": "a string in Danish", 

208 }, 

209 'stringname1': { 

210 }, 

211 }, 

212 'task2: { 

213 ... 

214 }, 

215 ... 

216 } 

217 

218 ... in other words a ``Dict[taskname: str, Dict[stringname: str, 

219 Dict[locale: str, stringvalue: str]]]``. 

220 

221 For example, ``result['phq9']['q5'][locale] == "5. Poor appetite or 

222 overeating"``. There is also a top-level dictionary with the key 

223 ``APPSTRING_TASKNAME``. 

224 

225 **XML format** 

226 

227 The extra string files should look like this: 

228 

229 .. code-block:: xml 

230 

231 <?xml version="1.0" encoding="UTF-8"?> 

232 <resources> 

233 <task name="TASK_1" locale="en_GB"> 

234 <string name="NAME_1">VALUE</string> 

235 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

236 <!-- ... --> 

237 </task> 

238 <!-- ... --> 

239 </resources> 

240 

241 If the ``language`` attribute is not specified, a language tag of ``""`` is 

242 used internally and will be the fallback position if nothing else is found. 

243 

244 """ 

245 _ = """ 

246 The extra string files looked like this prior to 2019-05-05: 

247 

248 .. code-block:: xml 

249 

250 <?xml version="1.0" encoding="UTF-8"?> 

251 <resources> 

252 <task name="TASK_1"> 

253 <string name="NAME_1">VALUE</string> 

254 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

255 <!-- ... --> 

256 </task> 

257 <!-- ... --> 

258 </resources> 

259 

260 Designing XML: 

261 

262 - an "element" looks like ``<thing>blah</thing>``, or ``<thing />``; 

263 the "element name" is "thing" in this example, and "blah" is called the 

264 "content". 

265 - the delimiters of an element are tags: start tags such as ``<thing>``, 

266 end tags such as ``</thing>``, or empty-element tags such as 

267 ``<thing />``. 

268 - an "attribute" is a name-value pair, e.g. ``<tagname attrname=value 

269 ...>``; "attrname" in this example is called the "attribute name". 

270 - So you can add information via the element structure or the attribute 

271 system. 

272 

273 So, as we add language support (2019-05-05), we start with: 

274 

275 - element names for types of information (task, string) 

276 - attribute values for labelling the content 

277 - content for the string data 

278 

279 There are many ways we could add language information. Adding an attribute 

280 to every string seems verbose, though. We could use one of these systems: 

281 

282 .. code-block:: xml 

283 

284 <?xml version="1.0" encoding="UTF-8"?> 

285 <resources> 

286 <task name="TASK_1"> 

287 <locale name="en_GB"> 

288 <string name="NAME_1">VALUE</string> 

289 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

290 <!-- ... --> 

291 </locale> 

292 </task> 

293 <!-- ... --> 

294 </resources> 

295 

296 .. code-block:: xml 

297 

298 <?xml version="1.0" encoding="UTF-8"?> 

299 <resources> 

300 <task name="TASK_1" locale="en_GB"> 

301 <string name="NAME_1">VALUE</string> 

302 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

303 <!-- ... --> 

304 </task> 

305 <!-- ... --> 

306 </resources> 

307 

308 The second seems a bit clearer (fewer levels). Let's do that. It also makes 

309 all existing XML files automatically compatible (with minor code 

310 adaptations). If the ``locale`` parameter is missing, strings go into a 

311 "no-locale" state and serve as the default. 

312 """ 

313 

314 cfg = get_config(config_filename) 

315 assert cfg.extra_string_files is not None 

316 filenames = [] # type: List [str] 

317 for filespec in cfg.extra_string_files: 

318 possibles = glob.glob(filespec) 

319 filenames.extend(possibles) 

320 filenames = sorted(set(filenames)) # just unique ones 

321 if not filenames: 

322 raise_runtime_error("No CamCOPS extra string files specified; " 

323 "config is misconfigured; aborting") 

324 allstrings = {} # type: Dict[str, Dict[str, Dict[str, str]]] 

325 for filename in filenames: 

326 log.info("Loading string XML file: {}", filename) 

327 parser = ElementTree.XMLParser(encoding="UTF-8") 

328 tree = ElementTree.parse(filename, parser=parser) 

329 root = tree.getroot() 

330 # We'll search via an XPath. See 

331 # https://docs.python.org/3.7/library/xml.etree.elementtree.html#xpath-support # noqa 

332 for taskroot in root.findall("./task[@name]"): 

333 # ... "all elements with the tag 'task' that have an attribute 

334 # named 'name'" 

335 taskname = taskroot.attrib.get("name") 

336 locale = taskroot.attrib.get("locale", MISSING_LOCALE) 

337 taskstrings = allstrings.setdefault(taskname, {}) # type: Dict[str, Dict[str, str]] # noqa 

338 for e in taskroot.findall("./string[@name]"): 

339 # ... "all elements with the tag 'string' that have an attribute 

340 # named 'name'" 

341 stringname = e.attrib.get("name") 

342 final_string = text_contents(e) 

343 final_string = unescape_newlines(final_string) 

344 langversions = taskstrings.setdefault(stringname, {}) # type: Dict[str, str] # noqa 

345 langversions[locale] = final_string 

346 

347 if APPSTRING_TASKNAME not in allstrings: 

348 raise_runtime_error( 

349 "Extra string files do not contain core CamCOPS strings; " 

350 "config is misconfigured; aborting") 

351 

352 return allstrings