Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# mako/filters.py 

2# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file> 

3# 

4# This module is part of Mako and is released under 

5# the MIT License: http://www.opensource.org/licenses/mit-license.php 

6 

7 

8import codecs 

9import re 

10 

11from mako import compat 

12from mako.compat import codepoint2name 

13from mako.compat import name2codepoint 

14from mako.compat import quote_plus 

15from mako.compat import unquote_plus 

16 

17xml_escapes = { 

18 "&": "&amp;", 

19 ">": "&gt;", 

20 "<": "&lt;", 

21 '"': "&#34;", # also &quot; in html-only 

22 "'": "&#39;", # also &apos; in html-only 

23} 

24 

25# XXX: &quot; is valid in HTML and XML 

26# &apos; is not valid HTML, but is valid XML 

27 

28 

29def legacy_html_escape(s): 

30 """legacy HTML escape for non-unicode mode.""" 

31 s = s.replace("&", "&amp;") 

32 s = s.replace(">", "&gt;") 

33 s = s.replace("<", "&lt;") 

34 s = s.replace('"', "&#34;") 

35 s = s.replace("'", "&#39;") 

36 return s 

37 

38 

39try: 

40 import markupsafe 

41 

42 html_escape = markupsafe.escape 

43except ImportError: 

44 html_escape = legacy_html_escape 

45 

46 

47def xml_escape(string): 

48 return re.sub(r'([&<"\'>])', lambda m: xml_escapes[m.group()], string) 

49 

50 

51def url_escape(string): 

52 # convert into a list of octets 

53 string = string.encode("utf8") 

54 return quote_plus(string) 

55 

56 

57def legacy_url_escape(string): 

58 # convert into a list of octets 

59 return quote_plus(string) 

60 

61 

62def url_unescape(string): 

63 text = unquote_plus(string) 

64 if not is_ascii_str(text): 

65 text = text.decode("utf8") 

66 return text 

67 

68 

69def trim(string): 

70 return string.strip() 

71 

72 

73class Decode(object): 

74 def __getattr__(self, key): 

75 def decode(x): 

76 if isinstance(x, compat.text_type): 

77 return x 

78 elif not isinstance(x, compat.binary_type): 

79 return decode(str(x)) 

80 else: 

81 return compat.text_type(x, encoding=key) 

82 

83 return decode 

84 

85 

86decode = Decode() 

87 

88 

89_ASCII_re = re.compile(r"\A[\x00-\x7f]*\Z") 

90 

91 

92def is_ascii_str(text): 

93 return isinstance(text, str) and _ASCII_re.match(text) 

94 

95 

96################################################################ 

97 

98 

99class XMLEntityEscaper(object): 

100 def __init__(self, codepoint2name, name2codepoint): 

101 self.codepoint2entity = dict( 

102 [ 

103 (c, compat.text_type("&%s;" % n)) 

104 for c, n in codepoint2name.items() 

105 ] 

106 ) 

107 self.name2codepoint = name2codepoint 

108 

109 def escape_entities(self, text): 

110 """Replace characters with their character entity references. 

111 

112 Only characters corresponding to a named entity are replaced. 

113 """ 

114 return compat.text_type(text).translate(self.codepoint2entity) 

115 

116 def __escape(self, m): 

117 codepoint = ord(m.group()) 

118 try: 

119 return self.codepoint2entity[codepoint] 

120 except (KeyError, IndexError): 

121 return "&#x%X;" % codepoint 

122 

123 __escapable = re.compile(r'["&<>]|[^\x00-\x7f]') 

124 

125 def escape(self, text): 

126 """Replace characters with their character references. 

127 

128 Replace characters by their named entity references. 

129 Non-ASCII characters, if they do not have a named entity reference, 

130 are replaced by numerical character references. 

131 

132 The return value is guaranteed to be ASCII. 

133 """ 

134 return self.__escapable.sub( 

135 self.__escape, compat.text_type(text) 

136 ).encode("ascii") 

137 

138 # XXX: This regexp will not match all valid XML entity names__. 

139 # (It punts on details involving involving CombiningChars and Extenders.) 

140 # 

141 # .. __: http://www.w3.org/TR/2000/REC-xml-20001006#NT-EntityRef 

142 __characterrefs = re.compile( 

143 r"""& (?: 

144 \#(\d+) 

145 | \#x([\da-f]+) 

146 | ( (?!\d) [:\w] [-.:\w]+ ) 

147 ) ;""", 

148 re.X | re.UNICODE, 

149 ) 

150 

151 def __unescape(self, m): 

152 dval, hval, name = m.groups() 

153 if dval: 

154 codepoint = int(dval) 

155 elif hval: 

156 codepoint = int(hval, 16) 

157 else: 

158 codepoint = self.name2codepoint.get(name, 0xFFFD) 

159 # U+FFFD = "REPLACEMENT CHARACTER" 

160 if codepoint < 128: 

161 return chr(codepoint) 

162 return chr(codepoint) 

163 

164 def unescape(self, text): 

165 """Unescape character references. 

166 

167 All character references (both entity references and numerical 

168 character references) are unescaped. 

169 """ 

170 return self.__characterrefs.sub(self.__unescape, text) 

171 

172 

173_html_entities_escaper = XMLEntityEscaper(codepoint2name, name2codepoint) 

174 

175html_entities_escape = _html_entities_escaper.escape_entities 

176html_entities_unescape = _html_entities_escaper.unescape 

177 

178 

179def htmlentityreplace_errors(ex): 

180 """An encoding error handler. 

181 

182 This python codecs error handler replaces unencodable 

183 characters with HTML entities, or, if no HTML entity exists for 

184 the character, XML character references:: 

185 

186 >>> u'The cost was \u20ac12.'.encode('latin1', 'htmlentityreplace') 

187 'The cost was &euro;12.' 

188 """ 

189 if isinstance(ex, UnicodeEncodeError): 

190 # Handle encoding errors 

191 bad_text = ex.object[ex.start : ex.end] 

192 text = _html_entities_escaper.escape(bad_text) 

193 return (compat.text_type(text), ex.end) 

194 raise ex 

195 

196 

197codecs.register_error("htmlentityreplace", htmlentityreplace_errors) 

198 

199 

200# TODO: options to make this dynamic per-compilation will be added in a later 

201# release 

202DEFAULT_ESCAPES = { 

203 "x": "filters.xml_escape", 

204 "h": "filters.html_escape", 

205 "u": "filters.url_escape", 

206 "trim": "filters.trim", 

207 "entity": "filters.html_entities_escape", 

208 "unicode": "unicode", 

209 "decode": "decode", 

210 "str": "str", 

211 "n": "n", 

212} 

213 

214if compat.py3k: 

215 DEFAULT_ESCAPES.update({"unicode": "str"}) 

216 

217NON_UNICODE_ESCAPES = DEFAULT_ESCAPES.copy() 

218NON_UNICODE_ESCAPES["h"] = "filters.legacy_html_escape" 

219NON_UNICODE_ESCAPES["u"] = "filters.legacy_url_escape"