Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1#!/usr/bin/env python 

2# cardinal_pythonlib/athena_ohdsi.py 

3 

4""" 

5=============================================================================== 

6 

7 Original code copyright (C) 2009-2021 Rudolf Cardinal (rudolf@pobox.com). 

8 

9 This file is part of cardinal_pythonlib. 

10 

11 Licensed under the Apache License, Version 2.0 (the "License"); 

12 you may not use this file except in compliance with the License. 

13 You may obtain a copy of the License at 

14 

15 https://www.apache.org/licenses/LICENSE-2.0 

16 

17 Unless required by applicable law or agreed to in writing, software 

18 distributed under the License is distributed on an "AS IS" BASIS, 

19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

20 See the License for the specific language governing permissions and 

21 limitations under the License. 

22 

23=============================================================================== 

24 

25**Functions to assist with SNOMED-CT.** 

26 

27See http://snomed.org/. 

28 

29Note that the licensing arrangements for SNOMED-CT mean that the actual codes 

30must be separate (and not part of this code). 

31 

32A full SNOMED CT download is about 1.1 Gb; see 

33https://digital.nhs.uk/services/terminology-and-classifications/snomed-ct. 

34Within a file such as ``uk_sct2cl_26.0.2_20181107000001.zip``, relevant files 

35include: 

36 

37.. code-block:: none 

38 

39 # Files with "Amoxicillin" in include two snapshots and two full files: 

40 

41 SnomedCT_UKClinicalRF2_PRODUCTION_20181031T000001Z/Full/Terminology/sct2_Description_Full-en-GB_GB1000000_20181031.txt 

42 # ... 234,755 lines 

43 

44 SnomedCT_InternationalRF2_PRODUCTION_20180731T120000Z/Full/Terminology/sct2_Description_Full-en_INT_20180731.txt 

45 # ... 2,513,953 lines; this is the main file. 

46 

47Note grammar: 

48 

49- http://snomed.org/scg 

50- https://confluence.ihtsdotools.org/display/DOCSCG 

51- https://confluence.ihtsdotools.org/download/attachments/33494865/SnomedCtExpo_Expressions_20161028_s2_20161101.pdf # noqa 

52- https://confluence.ihtsdotools.org/display/SLPG/SNOMED+CT+Expression+Constraint+Language 

53 

54Test basic expressions: 

55 

56.. code-block:: python 

57 

58 import logging 

59 from cardinal_pythonlib.logs import main_only_quicksetup_rootlogger 

60 from cardinal_pythonlib.snomed import * 

61 main_only_quicksetup_rootlogger(level=logging.DEBUG) 

62  

63 # --------------------------------------------------------------------- 

64 # From the SNOMED-CT examples (http://snomed.org/scg), with some values 

65 # fixed from the term browser: 

66 # --------------------------------------------------------------------- 

67  

68 diabetes = SnomedConcept(73211009, "Diabetes mellitus (disorder)") 

69 diabetes_expr = SnomedExpression(diabetes) 

70 print(diabetes_expr.longform) 

71 print(diabetes_expr.shortform) 

72  

73 pain = SnomedConcept(22253000, "Pain (finding)") 

74 finding_site = SnomedConcept(36369800, "Finding site") 

75 foot = SnomedConcept(56459004, "Foot") 

76  

77 pain_in_foot = SnomedExpression(pain, {finding_site: foot}) 

78 print(pain_in_foot.longform) 

79 print(pain_in_foot.shortform) 

80  

81 amoxicillin_medicine = SnomedConcept(27658006, "Product containing amoxicillin (medicinal product)") 

82 amoxicillin_substance = SnomedConcept(372687004, "Amoxicillin (substance)") 

83 has_dose_form = SnomedConcept(411116001, "Has manufactured dose form (attribute)") 

84 capsule = SnomedConcept(385049006, "Capsule (basic dose form)") 

85 has_active_ingredient = SnomedConcept(127489000, "Has active ingredient (attribute)") 

86 has_basis_of_strength_substance = SnomedConcept(732943007, "Has basis of strength substance (attribute)") 

87 mass = SnomedConcept(118538004, "Mass, a measure of quantity of matter (property) (qualifier value)") 

88 unit_of_measure = SnomedConcept(767524001, "Unit of measure (qualifier value)") 

89 milligrams = SnomedConcept(258684004, "milligram (qualifier value)") 

90  

91 amoxicillin_500mg_capsule = SnomedExpression( 

92 amoxicillin_medicine, [ 

93 SnomedAttributeSet({has_dose_form: capsule}), 

94 SnomedAttributeGroup({ 

95 has_active_ingredient: amoxicillin_substance, 

96 has_basis_of_strength_substance: SnomedExpression( 

97 amoxicillin_substance, { 

98 mass: 500, 

99 unit_of_measure: milligrams, 

100 } 

101 ), 

102 }), 

103 ] 

104 ) 

105 print(amoxicillin_500mg_capsule.longform) 

106 print(amoxicillin_500mg_capsule.shortform) 

107 

108""" # noqa 

109 

110from typing import Dict, Iterable, List, Union 

111 

112from cardinal_pythonlib.reprfunc import simple_repr 

113 

114 

115# ============================================================================= 

116# Constants 

117# ============================================================================= 

118 

119BACKSLASH = "\\" 

120COLON = ":" 

121COMMA = "," 

122EQUALS = "=" 

123HASH = "#" 

124LBRACE = "{" 

125LBRACKET = "(" 

126PIPE = "|" 

127PLUS = "+" 

128QM = '"' # double quotation mark 

129RBRACE = "}" 

130RBRACKET = ")" 

131TAB = "\t" 

132NEWLINE = "\n" 

133 

134ID_MIN_DIGITS = 6 

135ID_MAX_DIGITS = 18 

136 

137VALUE_TYPE = Union["SnomedConcept", "SnomedExpression", int, float, str] 

138DICT_ATTR_TYPE = Dict["SnomedConcept", VALUE_TYPE] 

139 

140SNOMED_XML_NAME = "snomed_ct_expression" 

141 

142 

143# ============================================================================= 

144# Quoting strings 

145# ============================================================================= 

146 

147def double_quoted(s: str) -> str: 

148 r""" 

149 Returns a representation of the string argument with double quotes and 

150 escaped characters. 

151 

152 Args: 

153 s: the argument 

154 

155 See: 

156 

157 - http://code.activestate.com/lists/python-list/272714/ -- does not work 

158 as null values get escaped in different ways in modern Python, and in a 

159 slightly unpredictable way 

160 - https://mail.python.org/pipermail/python-list/2003-April/236940.html -- 

161 won't deal with repr() using triple-quotes 

162 - https://stackoverflow.com/questions/1675181/get-str-repr-with-double-quotes-python 

163 -- probably the right general approach 

164 

165 Test code: 

166 

167 .. code-block:: python 

168 

169 from cardinal_pythonlib.snomed import double_quoted 

170  

171 def test(s): 

172 print(f"double_quoted({s!r}) -> {double_quoted(s)}") 

173  

174  

175 test("ab'cd") 

176 test("ab'c\"d") 

177 test('ab"cd') 

178 

179 """ # noqa 

180 # For efficiency, we use a list: 

181 # https://stackoverflow.com/questions/3055477/how-slow-is-pythons-string-concatenation-vs-str-join # noqa 

182 # https://waymoot.org/home/python_string/ 

183 dquote = '"' 

184 ret = [dquote] # type: List[str] 

185 for c in s: 

186 # "Named" characters 

187 if c == NEWLINE: 

188 ret.append(r"\n") 

189 elif c == TAB: 

190 ret.append(r"\t") 

191 elif c == QM: 

192 ret.append(r'\"') 

193 elif c == BACKSLASH: 

194 ret.append(r"\\") 

195 elif ord(c) < 32: 

196 # two-digit hex format, e.g. \x1F for ASCII 31 

197 ret.append(fr"\x{ord(c):02X}") 

198 else: 

199 ret.append(c) 

200 ret.append(dquote) 

201 return "".join(ret) 

202 

203 

204# ============================================================================= 

205# SNOMED-CT concepts 

206# ============================================================================= 

207 

208class SnomedBase(object): 

209 """ 

210 Common functions for SNOMED-CT classes 

211 """ 

212 def as_string(self, longform: bool = True) -> str: 

213 """ 

214 Returns the string form. 

215 

216 Args: 

217 longform: print SNOMED-CT concepts in long form? 

218 """ 

219 raise NotImplementedError("implement in subclass") 

220 

221 @property 

222 def shortform(self) -> str: 

223 """ 

224 Returns the short form, without terms. 

225 """ 

226 return self.as_string(False) 

227 

228 @property 

229 def longform(self) -> str: 

230 return self.as_string(True) 

231 

232 def __str__(self) -> str: 

233 return self.as_string(True) 

234 

235 

236class SnomedConcept(SnomedBase): 

237 """ 

238 Represents a SNOMED concept with its description (associated term). 

239 """ 

240 def __init__(self, identifier: int, term: str) -> None: 

241 """ 

242 Args: 

243 identifier: SNOMED-CT identifier (code) 

244 term: associated term (description) 

245 """ 

246 assert isinstance(identifier, int), ( 

247 f"SNOMED-CT concept identifier is not an integer: {identifier!r}" 

248 ) 

249 ndigits = len(str(identifier)) 

250 assert ID_MIN_DIGITS <= ndigits <= ID_MAX_DIGITS, ( 

251 f"SNOMED-CT concept identifier has wrong number of digits: " 

252 f"{identifier!r}" 

253 ) 

254 assert PIPE not in term, ( 

255 f"SNOMED-CT term has invalid pipe character: {term!r}" 

256 ) 

257 self.identifier = identifier 

258 self.term = term 

259 

260 def __repr__(self) -> str: 

261 return simple_repr(self, ["identifier", "term"]) 

262 

263 def as_string(self, longform: bool = True) -> str: 

264 # Docstring in base class. 

265 if longform: 

266 return f"{self.identifier} {PIPE}{self.term}{PIPE}" 

267 else: 

268 return str(self.identifier) 

269 

270 def concept_reference(self, longform: bool = True) -> str: 

271 """ 

272 Returns one of the string representations. 

273 

274 Args: 

275 longform: in long form, with the description (associated term)? 

276 """ 

277 return self.as_string(longform) 

278 

279 

280# ============================================================================= 

281# SNOMED-CT expressions 

282# ============================================================================= 

283 

284class SnomedValue(SnomedBase): 

285 """ 

286 Represents a value: either a concrete value (e.g. int, float, str), or a 

287 SNOMED-CT concept/expression. 

288 

289 Implements the grammar elements: attributeValue, expressionValue, 

290 stringValue, numericValue, integerValue, decimalValue. 

291 """ 

292 def __init__(self, value: VALUE_TYPE) -> None: 

293 """ 

294 Args: 

295 value: the value 

296 """ 

297 assert isinstance(value, (SnomedConcept, SnomedExpression, 

298 int, float, str)), ( 

299 f"Invalid value type to SnomedValue: {value!r}" 

300 ) 

301 self.value = value 

302 

303 def as_string(self, longform: bool = True) -> str: 

304 # Docstring in base class 

305 x = self.value 

306 if isinstance(x, SnomedConcept): 

307 return x.concept_reference(longform) 

308 elif isinstance(x, SnomedExpression): 

309 # As per p16 of formal reference cited above. 

310 return f"{LBRACKET} {x.as_string(longform)} {RBRACKET}" 

311 elif isinstance(x, (int, float)): 

312 return HASH + str(x) 

313 elif isinstance(x, str): 

314 # On the basis that SNOMED's "QM" (quote mark) is 0x22, the double 

315 # quote: 

316 return double_quoted(x) 

317 else: 

318 raise ValueError("Bad input value type") 

319 

320 def __repr__(self) -> str: 

321 return simple_repr(self, ["value"]) 

322 

323 

324class SnomedFocusConcept(SnomedBase): 

325 """ 

326 Represents a SNOMED-CT focus concept, which is one or more concepts. 

327 """ 

328 def __init__(self, 

329 concept: Union[SnomedConcept, Iterable[SnomedConcept]]) \ 

330 -> None: 

331 """ 

332 Args: 

333 concept: the core concept(s); a :class:`SnomedCode` or an 

334 iterable of them 

335 """ 

336 if isinstance(concept, SnomedConcept): 

337 self.concepts = [concept] 

338 else: 

339 self.concepts = list(concept) 

340 assert all(isinstance(x, SnomedConcept) for x in self.concepts) 

341 

342 def as_string(self, longform: bool = True) -> str: 

343 # Docstring in base class. 

344 sep = " " + PLUS + " " 

345 return sep.join(c.concept_reference(longform) for c in self.concepts) 

346 

347 def __repr__(self) -> str: 

348 return simple_repr(self, ["concepts"]) 

349 

350 

351class SnomedAttribute(SnomedBase): 

352 """ 

353 Represents a SNOMED-CT attribute, being a name/value pair. 

354 """ 

355 def __init__(self, name: SnomedConcept, value: VALUE_TYPE) -> None: 

356 """ 

357 Args: 

358 name: a :class:`SnomedConcept` (attribute name) 

359 value: an attribute value (:class:`SnomedConcept`, number, or 

360 string) 

361 """ 

362 assert isinstance(name, SnomedConcept) 

363 if not isinstance(value, SnomedValue): 

364 value = SnomedValue(value) 

365 self.name = name 

366 self.value = value 

367 

368 def as_string(self, longform: bool = True) -> str: 

369 # Docstring in base class. 

370 return ( 

371 f"{self.name.concept_reference(longform)} {EQUALS} " 

372 f"{self.value.as_string(longform)}" 

373 ) 

374 

375 def __repr__(self) -> str: 

376 return simple_repr(self, ["name", "value"]) 

377 

378 

379class SnomedAttributeSet(SnomedBase): 

380 """ 

381 Represents an attribute set. 

382 """ 

383 def __init__(self, attributes: Union[DICT_ATTR_TYPE, 

384 Iterable[SnomedAttribute]]) -> None: 

385 """ 

386 Args: 

387 attributes: the attributes 

388 """ 

389 if isinstance(attributes, dict): 

390 self.attributes = [SnomedAttribute(k, v) 

391 for k, v in attributes.items()] 

392 else: 

393 self.attributes = list(attributes) 

394 assert all(isinstance(x, SnomedAttribute) for x in self.attributes) 

395 

396 def as_string(self, longform: bool = True) -> str: 

397 # Docstring in base class. 

398 attrsep = COMMA + " " 

399 return attrsep.join(attr.as_string(longform) 

400 for attr in self.attributes) 

401 

402 def __repr__(self) -> str: 

403 return simple_repr(self, ["attributes"]) 

404 

405 

406class SnomedAttributeGroup(SnomedBase): 

407 """ 

408 Represents a collected group of attribute/value pairs. 

409 """ 

410 def __init__(self, attribute_set: Union[DICT_ATTR_TYPE, 

411 SnomedAttributeSet]) -> None: 

412 """ 

413 Args: 

414 attribute_set: a :class:`SnomedAttributeSet` to group 

415 """ 

416 if isinstance(attribute_set, dict): 

417 attribute_set = SnomedAttributeSet(attribute_set) 

418 assert isinstance(attribute_set, SnomedAttributeSet) 

419 self.attribute_set = attribute_set 

420 

421 def as_string(self, longform: bool = True) -> str: 

422 # Docstring in base class. 

423 return f"{LBRACE} {self.attribute_set.as_string(longform)} {RBRACE}" 

424 

425 def __repr__(self) -> str: 

426 return simple_repr(self, ["attribute_set"]) 

427 

428 

429class SnomedRefinement(SnomedBase): 

430 """ 

431 Implements a SNOMED-CT "refinement", which is an attribute set +/- some 

432 attribute groups. 

433 """ 

434 def __init__(self, 

435 refinements: Union[DICT_ATTR_TYPE, 

436 Iterable[Union[SnomedAttributeSet, 

437 SnomedAttributeGroup]]]) \ 

438 -> None: 

439 """ 

440 Args: 

441 refinements: iterable of :class:`SnomedAttributeSet` (but only 

442 zero or one) and :class:`SnomedAttributeGroup` objects 

443 """ 

444 if isinstance(refinements, dict): 

445 refinements = [SnomedAttributeSet(refinements)] 

446 self.attrsets = [] # type: List[SnomedBase] 

447 self.attrgroups = [] # type: List[SnomedBase] 

448 for r in refinements: 

449 if isinstance(r, SnomedAttributeSet): 

450 if self.attrsets: 

451 raise ValueError("Only one SnomedAttributeSet allowed " 

452 "to SnomedRefinement") 

453 self.attrsets.append(r) 

454 elif isinstance(r, SnomedAttributeGroup): 

455 self.attrgroups.append(r) 

456 else: 

457 raise ValueError(f"Unknown object to SnomedRefinement: {r!r}") 

458 

459 def as_string(self, longform: bool = True) -> str: 

460 # Docstring in base class. 

461 # Ungrouped before grouped; see 6.5 in "SNOMED CT Compositional Grammar 

462 # v2.3.1" 

463 sep = COMMA + " " 

464 return sep.join(x.as_string(longform) 

465 for x in self.attrsets + self.attrgroups) 

466 

467 def __repr__(self) -> str: 

468 return simple_repr(self, ["attrsets", "attrgroups"]) 

469 

470 

471class SnomedExpression(SnomedBase): 

472 """ 

473 An expression containing several SNOMED-CT codes in relationships. 

474 """ 

475 def __init__(self, 

476 focus_concept: Union[SnomedConcept, SnomedFocusConcept], 

477 refinement: Union[SnomedRefinement, 

478 DICT_ATTR_TYPE, 

479 List[Union[SnomedAttributeSet, 

480 SnomedAttributeGroup]]] = None) \ 

481 -> None: 

482 """ 

483 Args: 

484 focus_concept: the core concept(s); a :class:`SnomedFocusConcept` 

485 refinement: optional additional information; a 

486 :class:`SnomedRefinement` or a dictionary or list that can be 

487 converted to one 

488 """ 

489 if isinstance(focus_concept, SnomedConcept): 

490 focus_concept = SnomedFocusConcept(focus_concept) 

491 assert isinstance(focus_concept, SnomedFocusConcept) 

492 if isinstance(refinement, (dict, list)): 

493 refinement = SnomedRefinement(refinement) 

494 if refinement is not None: 

495 assert isinstance(refinement, SnomedRefinement) 

496 self.focus_concept = focus_concept 

497 self.refinement = refinement 

498 

499 def as_string(self, longform: bool = True) -> str: 

500 # Docstring in base class. 

501 s = self.focus_concept.as_string(longform) 

502 if self.refinement: 

503 s += " " + COLON + " " + self.refinement.as_string(longform) 

504 return s 

505 

506 def __repr__(self) -> str: 

507 return simple_repr(self, ["focus_concept", "refinement"])