Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2from __future__ import unicode_literals 

3import six 

4from .containers import Factory 

5 

6 

7def parse(line, encoding='utf-8', factory=Factory): 

8 """Returns a instance of the :py:class:`hl7.Message` that allows 

9 indexed access to the data elements. 

10 

11 A custom :py:class:`hl7.Factory` subclass can be passed in to be used when 

12 constructing the message and it's components. 

13 

14 .. note:: 

15 

16 HL7 usually contains only ASCII, but can use other character 

17 sets (HL7 Standards Document, Section 1.7.1), however as of v2.8, 

18 UTF-8 is the preferred character set [#]_. 

19 

20 python-hl7 works on Python unicode strings. :py:func:`hl7.parse` 

21 will accept unicode string or will attempt to convert bytestrings 

22 into unicode strings using the optional ``encoding`` parameter. 

23 ``encoding`` defaults to UTF-8, so no work is needed for bytestrings 

24 in UTF-8, but for other character sets like 'cp1252' or 'latin1', 

25 ``encoding`` must be set appropriately. 

26 

27 >>> h = hl7.parse(message) 

28 

29 To decode a non-UTF-8 byte string:: 

30 

31 hl7.parse(message, encoding='latin1') 

32 

33 :rtype: :py:class:`hl7.Message` 

34 

35 .. [#] http://wiki.hl7.org/index.php?title=Character_Set_used_in_v2_messages 

36 

37 """ 

38 # Ensure we are working with unicode data, decode the bytestring 

39 # if needed 

40 if isinstance(line, six.binary_type): 

41 line = line.decode(encoding) 

42 # Strip out unnecessary whitespace 

43 strmsg = line.strip() 

44 # The method for parsing the message 

45 plan = create_parse_plan(strmsg, factory) 

46 # Start spliting the methods based upon the ParsePlan 

47 return _split(strmsg, plan) 

48 

49 

50def _split(text, plan): 

51 """Recursive function to split the *text* into an n-deep list, 

52 according to the :py:class:`hl7._ParsePlan`. 

53 """ 

54 # Base condition, if we have used up all the plans 

55 if not plan: 

56 return text 

57 

58 if not plan.applies(text): 

59 return plan.container([text]) 

60 

61 # Parsing of the first segment is awkward because it contains 

62 # the separator characters in a field 

63 if plan.containers[0] == plan.factory.create_segment and text[:3] in ['MSH', 'FHS']: 

64 seg = text[:3] 

65 sep0 = text[3] 

66 sep_end_off = text.find(sep0, 4) 

67 seps = text[4:sep_end_off] 

68 text = text[sep_end_off + 1:] 

69 data = [plan.factory.create_field('', [seg]), plan.factory.create_field('', [sep0]), plan.factory.create_field(sep0, [seps])] 

70 else: 

71 data = [] 

72 

73 if text: 

74 data = data + [_split(x, plan.next()) for x in text.split(plan.separator)] 

75 # Return the instance of the current message part according 

76 # to the plan 

77 return plan.container(data) 

78 

79 

80def create_parse_plan(strmsg, factory=Factory): 

81 """Creates a plan on how to parse the HL7 message according to 

82 the details stored within the message. 

83 """ 

84 # We will always use a carriage return to separate segments 

85 separators = ['\r'] 

86 

87 # Extract the rest of the separators. Defaults used if not present. 

88 assert strmsg[:3] in ('MSH') 

89 sep0 = strmsg[3] 

90 seps = list(strmsg[3: strmsg.find(sep0, 4)]) 

91 

92 separators.append(seps[0]) 

93 if len(seps) > 2: 

94 separators.append(seps[2]) # repetition separator 

95 else: 

96 separators.append('~') # repetition separator 

97 if len(seps) > 1: 

98 separators.append(seps[1]) # component separator 

99 else: 

100 separators.append('^') # component separator 

101 if len(seps) > 4: 

102 separators.append(seps[4]) # sub-component separator 

103 else: 

104 separators.append('&') # sub-component separator 

105 if len(seps) > 3: 

106 esc = seps[3] 

107 else: 

108 esc = '\\' 

109 

110 # The ordered list of containers to create 

111 containers = [factory.create_message, factory.create_segment, factory.create_field, factory.create_repetition, factory.create_component] 

112 return _ParsePlan(separators, containers, esc, factory) 

113 

114 

115class _ParsePlan(object): 

116 """Details on how to parse an HL7 message. Typically this object 

117 should be created via :func:`hl7.create_parse_plan` 

118 """ 

119 # field, component, repetition, escape, subcomponent 

120 

121 def __init__(self, separators, containers, esc, factory): 

122 # TODO test to see performance implications of the assertion 

123 # since we generate the ParsePlan, this should never be in 

124 # invalid state 

125 assert len(containers) == len(separators) 

126 self.separators = separators 

127 self.containers = containers 

128 self.esc = esc 

129 self.factory = factory 

130 

131 @property 

132 def separator(self): 

133 """Return the current separator to use based on the plan.""" 

134 return self.separators[0] 

135 

136 def container(self, data): 

137 """Return an instance of the approriate container for the *data* 

138 as specified by the current plan. 

139 """ 

140 return self.containers[0](self.separator, data, self.esc, self.separators, self.factory) 

141 

142 def next(self): 

143 """Generate the next level of the plan (essentially generates 

144 a copy of this plan with the level of the container and the 

145 seperator starting at the next index. 

146 """ 

147 if len(self.containers) > 1: 

148 # Return a new instance of this class using the tails of 

149 # the separators and containers lists. Use self.__class__() 

150 # in case :class:`hl7.ParsePlan` is subclassed 

151 return self.__class__(self.separators[1:], self.containers[1:], self.esc, self.factory) 

152 # When we have no separators and containers left, return None, 

153 # which indicates that we have nothing further. 

154 return None 

155 

156 def applies(self, text): 

157 """return True if the separator or those if the children are in the text""" 

158 for s in self.separators: 

159 if text.find(s) >= 0: 

160 return True 

161 return False