Coverage for cc_modules/cc_string.py: 61%

96 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-15 14:23 +0100

1""" 

2camcops_server/cc_modules/cc_string.py 

3 

4=============================================================================== 

5 

6 Copyright (C) 2012, University of Cambridge, Department of Psychiatry. 

7 Created by Rudolf Cardinal (rnc1001@cam.ac.uk). 

8 

9 This file is part of CamCOPS. 

10 

11 CamCOPS is free software: you can redistribute it and/or modify 

12 it under the terms of the GNU General Public License as published by 

13 the Free Software Foundation, either version 3 of the License, or 

14 (at your option) any later version. 

15 

16 CamCOPS is distributed in the hope that it will be useful, 

17 but WITHOUT ANY WARRANTY; without even the implied warranty of 

18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

19 GNU General Public License for more details. 

20 

21 You should have received a copy of the GNU General Public License 

22 along with CamCOPS. If not, see <https://www.gnu.org/licenses/>. 

23 

24=============================================================================== 

25 

26**Manage the "extra strings" that the server reads from XML files. The server 

27uses these for displaying tasks, and provides them to client devices.** 

28 

29""" 

30 

31import glob 

32import logging 

33from typing import Dict, List 

34import xml.etree.cElementTree as ElementTree 

35 

36# ... cElementTree is a faster implementation 

37# ... http://docs.python.org/2/library/xml.etree.elementtree.html 

38# ... http://effbot.org/zone/celementtree.htm 

39from xml.etree.ElementTree import Element, tostring 

40 

41from cardinal_pythonlib.logs import BraceStyleAdapter 

42from cardinal_pythonlib.text import unescape_newlines 

43 

44from camcops_server.cc_modules.cc_cache import cache_region_static, fkg 

45from camcops_server.cc_modules.cc_config import get_config 

46from camcops_server.cc_modules.cc_exception import raise_runtime_error 

47 

48log = BraceStyleAdapter(logging.getLogger(__name__)) 

49 

50 

51APPSTRING_TASKNAME = "camcops" 

52MISSING_LOCALE = "" 

53 

54 

55# ============================================================================= 

56# XML helper functions 

57# ============================================================================= 

58 

59 

60def text_contents(e: Element, plain: bool = False, strip: bool = True) -> str: 

61 """ 

62 Extract the exact text contents of an XML element, including any XML/HTML 

63 tags within it. 

64 

65 A normal string looks like 

66 

67 .. code-block:: xml 

68 

69 <string name="stringname">words words words</string> 

70 

71 and we extract its contents ("words words words") with 

72 

73 .. code-block:: python 

74 

75 e.text 

76 

77 However, for this: 

78 

79 .. code-block:: xml 

80 

81 <string name="stringname">words <b>bold words</b> words</string> 

82 

83 we want to extract ``words <b>bold words</b> words`` and that"s a little 

84 trickier. This function does that. 

85 

86 Args: 

87 e: the :class:`Element` to read 

88 plain: remove all HTML/XML tags? 

89 strip: strip leading/trailing whitespace? 

90 

91 Returns: 

92 the text contents of the element 

93 """ 

94 n_children = len(e) 

95 if n_children == 0: 

96 result = e.text or "" 

97 elif plain: 

98 result = "".join(e.itertext()) # e.g. "words bold words words" 

99 else: 

100 result = ( 

101 (e.text or "") 

102 + "".join(tostring(child, encoding="unicode") for child in e) 

103 + (e.tail or "") 

104 ) 

105 if strip: 

106 return result.strip() 

107 else: 

108 return result 

109 

110 

111# ============================================================================= 

112# Localization strings 

113# ============================================================================= 

114# In a change to thinking... Pyramid emphasizes: NO MUTABLE GLOBAL STATE. 

115# https://docs.pylonsproject.org/projects/pyramid/en/latest/narr/advanced-features.html # noqa 

116# This is a good thing. But it means that: 

117# - because we configure our XML files in our config... 

118# - and in principle even two different threads coming here may have different 

119# configs... 

120# - ... that string requests need to be attached to a Pyramid Request. 

121 

122 

123class AS(object): 

124 """ 

125 List of appstrings present in ``camcops.xml``. 

126 

127 Should match ``appstrings.cpp`` in the client, and of course 

128 ``camcops.xml`` itself. 

129 """ 

130 

131 # ------------------------------------------------------------------------- 

132 # NHS Data Dictionary elements 

133 # ------------------------------------------------------------------------- 

134 

135 NHS_PERSON_MARITAL_STATUS_CODE_S = "nhs_person_marital_status_code_S" 

136 NHS_PERSON_MARITAL_STATUS_CODE_M = "nhs_person_marital_status_code_M" 

137 NHS_PERSON_MARITAL_STATUS_CODE_D = "nhs_person_marital_status_code_D" 

138 NHS_PERSON_MARITAL_STATUS_CODE_W = "nhs_person_marital_status_code_W" 

139 NHS_PERSON_MARITAL_STATUS_CODE_P = "nhs_person_marital_status_code_P" 

140 NHS_PERSON_MARITAL_STATUS_CODE_N = "nhs_person_marital_status_code_N" 

141 

142 NHS_ETHNIC_CATEGORY_CODE_A = "nhs_ethnic_category_code_A" 

143 NHS_ETHNIC_CATEGORY_CODE_B = "nhs_ethnic_category_code_B" 

144 NHS_ETHNIC_CATEGORY_CODE_C = "nhs_ethnic_category_code_C" 

145 NHS_ETHNIC_CATEGORY_CODE_D = "nhs_ethnic_category_code_D" 

146 NHS_ETHNIC_CATEGORY_CODE_E = "nhs_ethnic_category_code_E" 

147 NHS_ETHNIC_CATEGORY_CODE_F = "nhs_ethnic_category_code_F" 

148 NHS_ETHNIC_CATEGORY_CODE_G = "nhs_ethnic_category_code_G" 

149 NHS_ETHNIC_CATEGORY_CODE_H = "nhs_ethnic_category_code_H" 

150 NHS_ETHNIC_CATEGORY_CODE_J = "nhs_ethnic_category_code_J" 

151 NHS_ETHNIC_CATEGORY_CODE_K = "nhs_ethnic_category_code_K" 

152 NHS_ETHNIC_CATEGORY_CODE_L = "nhs_ethnic_category_code_L" 

153 NHS_ETHNIC_CATEGORY_CODE_M = "nhs_ethnic_category_code_M" 

154 NHS_ETHNIC_CATEGORY_CODE_N = "nhs_ethnic_category_code_N" 

155 NHS_ETHNIC_CATEGORY_CODE_P = "nhs_ethnic_category_code_P" 

156 NHS_ETHNIC_CATEGORY_CODE_R = "nhs_ethnic_category_code_R" 

157 NHS_ETHNIC_CATEGORY_CODE_S = "nhs_ethnic_category_code_S" 

158 NHS_ETHNIC_CATEGORY_CODE_Z = "nhs_ethnic_category_code_Z" 

159 

160 # ------------------------------------------------------------------------- 

161 # String elements for specific restricted tasks (see camcops.xml) 

162 # ------------------------------------------------------------------------- 

163 

164 BDI_WHICH_SCALE = "bdi_which_scale" 

165 GAF_SCORE = "gaf_score" 

166 HADS_ANXIETY_SCORE = "hads_anxiety_score" 

167 HADS_DEPRESSION_SCORE = "hads_depression_score" 

168 IESR_A_PREFIX = "iesr_a" 

169 WSAS_A_PREFIX = "wsas_a" 

170 ZBI_A_PREFIX = "zbi_a" 

171 

172 # ------------------------------------------------------------------------- 

173 # Strings shared across several tasks 

174 # ------------------------------------------------------------------------- 

175 

176 DATA_COLLECTION_ONLY = "data_collection_only" 

177 DATE_PERTAINS_TO = "date_pertains_to" 

178 ICD10_SYMPTOMATIC_DISCLAIMER = "icd10_symptomatic_disclaimer" 

179 SATIS_BAD_Q = "satis_bad_q" 

180 SATIS_BAD_S = "satis_bad_s" 

181 SATIS_GOOD_Q = "satis_good_q" 

182 SATIS_GOOD_S = "satis_good_s" 

183 SATIS_PT_RATING_Q = "satis_pt_rating_q" 

184 SATIS_REF_GEN_RATING_Q = "satis_ref_gen_rating_q" 

185 SATIS_REF_SPEC_RATING_Q = "satis_ref_spec_rating_q" 

186 SATIS_RATING_A_PREFIX = "satis_rating_a" 

187 SATIS_SERVICE_BEING_RATED = "satis_service_being_rated" 

188 

189 

190@cache_region_static.cache_on_arguments(function_key_generator=fkg) 

191def all_extra_strings_as_dicts( 

192 config_filename: str, 

193) -> Dict[str, Dict[str, Dict[str, str]]]: 

194 r""" 

195 Returns strings from the all the extra XML string files. 

196 

197 The result is cached (via a proper cache). We reload the config file, which 

198 is suboptimal, but that's because a filename is a sensibly cacheable 

199 argument, unlike a config object. 

200 

201 Args: 

202 config_filename: a CamCOPS config filename 

203 

204 Returns: a dictionary like 

205 

206 .. code-block:: none 

207 

208 { 

209 "task1": { 

210 "stringname1": { 

211 "en-GB": "a string in British English", 

212 "da-DK": "a string in Danish", 

213 }, 

214 "stringname2": { 

215 ... 

216 }, 

217 }, 

218 "task2": { 

219 ... 

220 }, 

221 ... 

222 } 

223 

224 ... in other words a ``Dict[taskname: str, Dict[stringname: str, 

225 Dict[locale: str, stringvalue: str]]]``. For example, 

226 

227 .. code-block:: none 

228 

229 result["phq9"]["q5"][locale] == "5. Poor appetite or overeating" 

230 

231 There is also a top-level dictionary with the key ``APPSTRING_TASKNAME``. 

232 

233 **XML format** 

234 

235 The extra string files should look like this: 

236 

237 .. code-block:: xml 

238 

239 <?xml version="1.0" encoding="UTF-8"?> 

240 <resources> 

241 <task name="TASK_1" locale="en_GB"> 

242 <string name="NAME_1">VALUE</string> 

243 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

244 <!-- ... --> 

245 </task> 

246 <!-- ... --> 

247 </resources> 

248 

249 If the ``locale`` attribute is not specified, a locale (language) tag of 

250 ``""`` is used internally, and will be the fallback position if nothing 

251 else is found. 

252 

253 """ 

254 _ = """ 

255 The extra string files looked like this prior to 2019-05-05: 

256 

257 .. code-block:: xml 

258 

259 <?xml version="1.0" encoding="UTF-8"?> 

260 <resources> 

261 <task name="TASK_1"> 

262 <string name="NAME_1">VALUE</string> 

263 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

264 <!-- ... --> 

265 </task> 

266 <!-- ... --> 

267 </resources> 

268 

269 Designing XML: 

270 

271 - an "element" looks like ``<thing>blah</thing>``, or ``<thing />``; 

272 the "element name" is "thing" in this example, and "blah" is called the 

273 "content". 

274 - the delimiters of an element are tags: start tags such as ``<thing>``, 

275 end tags such as ``</thing>``, or empty-element tags such as 

276 ``<thing />``. 

277 - an "attribute" is a name-value pair, e.g. ``<tagname attrname=value 

278 ...>``; "attrname" in this example is called the "attribute name". 

279 - So you can add information via the element structure or the attribute 

280 system. 

281 

282 So, as we add language support (2019-05-05), we start with: 

283 

284 - element names for types of information (task, string) 

285 - attribute values for labelling the content 

286 - content for the string data 

287 

288 There are many ways we could add language information. Adding an attribute 

289 to every string seems verbose, though. We could use one of these systems: 

290 

291 .. code-block:: xml 

292 

293 <?xml version="1.0" encoding="UTF-8"?> 

294 <resources> 

295 <task name="TASK_1"> 

296 <locale name="en_GB"> 

297 <string name="NAME_1">VALUE</string> 

298 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

299 <!-- ... --> 

300 </locale> 

301 </task> 

302 <!-- ... --> 

303 </resources> 

304 

305 .. code-block:: xml 

306 

307 <?xml version="1.0" encoding="UTF-8"?> 

308 <resources> 

309 <task name="TASK_1" locale="en_GB"> 

310 <string name="NAME_1">VALUE</string> 

311 <string name="NAME_2">VALUE WITH\nNEWLINE</string> 

312 <!-- ... --> 

313 </task> 

314 <!-- ... --> 

315 </resources> 

316 

317 The second seems a bit clearer (fewer levels). Let's do that. It also makes 

318 all existing XML files automatically compatible (with minor code 

319 adaptations). If the ``locale`` parameter is missing, strings go into a 

320 "no-locale" state and serve as the default. 

321 """ 

322 

323 cfg = get_config(config_filename) 

324 assert cfg.extra_string_files is not None 

325 filenames = [] # type: List [str] 

326 for filespec in cfg.extra_string_files: 

327 possibles = glob.glob(filespec) 

328 filenames.extend(possibles) 

329 filenames = sorted(set(filenames)) # just unique ones 

330 if not filenames: 

331 raise_runtime_error( 

332 "No CamCOPS extra string files specified; " 

333 "config is misconfigured; aborting" 

334 ) 

335 allstrings = {} # type: Dict[str, Dict[str, Dict[str, str]]] 

336 for filename in filenames: 

337 log.info("Loading string XML file: {}", filename) 

338 parser = ElementTree.XMLParser(encoding="UTF-8") 

339 tree = ElementTree.parse(filename, parser=parser) 

340 root = tree.getroot() 

341 # We"ll search via an XPath. See 

342 # https://docs.python.org/3.7/library/xml.etree.elementtree.html#xpath-support # noqa 

343 for taskroot in root.findall("./task[@name]"): 

344 # ... all elements with the tag "task" that have an attribute named 

345 # "name" 

346 taskname = taskroot.attrib.get("name") 

347 locale = taskroot.attrib.get("locale", MISSING_LOCALE) 

348 taskstrings = allstrings.setdefault( 

349 taskname, {} 

350 ) # type: Dict[str, Dict[str, str]] 

351 for e in taskroot.findall("./string[@name]"): 

352 # ... all elements with the tag "string" that have an attribute 

353 # named "name" 

354 stringname = e.attrib.get("name") 

355 final_string = unescape_newlines(text_contents(e)) 

356 langversions = taskstrings.setdefault( 

357 stringname, {} 

358 ) # type: Dict[str, str] 

359 langversions[locale] = final_string 

360 

361 if APPSTRING_TASKNAME not in allstrings: 

362 raise_runtime_error( 

363 "Extra string files do not contain core CamCOPS strings; " 

364 "config is misconfigured; aborting" 

365 ) 

366 

367 return allstrings