Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/mako/lexer.py : 10%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# mako/lexer.py
2# Copyright 2006-2020 the Mako authors and contributors <see AUTHORS file>
3#
4# This module is part of Mako and is released under
5# the MIT License: http://www.opensource.org/licenses/mit-license.php
7"""provides the Lexer class for parsing template strings into parse trees."""
9import codecs
10import re
12from mako import compat
13from mako import exceptions
14from mako import parsetree
15from mako.pygen import adjust_whitespace
17_regexp_cache = {}
20class Lexer(object):
21 def __init__(
22 self,
23 text,
24 filename=None,
25 disable_unicode=False,
26 input_encoding=None,
27 preprocessor=None,
28 ):
29 self.text = text
30 self.filename = filename
31 self.template = parsetree.TemplateNode(self.filename)
32 self.matched_lineno = 1
33 self.matched_charpos = 0
34 self.lineno = 1
35 self.match_position = 0
36 self.tag = []
37 self.control_line = []
38 self.ternary_stack = []
39 self.disable_unicode = disable_unicode
40 self.encoding = input_encoding
42 if compat.py3k and disable_unicode:
43 raise exceptions.UnsupportedError(
44 "Mako for Python 3 does not " "support disabling Unicode"
45 )
47 if preprocessor is None:
48 self.preprocessor = []
49 elif not hasattr(preprocessor, "__iter__"):
50 self.preprocessor = [preprocessor]
51 else:
52 self.preprocessor = preprocessor
54 @property
55 def exception_kwargs(self):
56 return {
57 "source": self.text,
58 "lineno": self.matched_lineno,
59 "pos": self.matched_charpos,
60 "filename": self.filename,
61 }
63 def match(self, regexp, flags=None):
64 """compile the given regexp, cache the reg, and call match_reg()."""
66 try:
67 reg = _regexp_cache[(regexp, flags)]
68 except KeyError:
69 if flags:
70 reg = re.compile(regexp, flags)
71 else:
72 reg = re.compile(regexp)
73 _regexp_cache[(regexp, flags)] = reg
75 return self.match_reg(reg)
77 def match_reg(self, reg):
78 """match the given regular expression object to the current text
79 position.
81 if a match occurs, update the current text and line position.
83 """
85 mp = self.match_position
87 match = reg.match(self.text, self.match_position)
88 if match:
89 (start, end) = match.span()
90 if end == start:
91 self.match_position = end + 1
92 else:
93 self.match_position = end
94 self.matched_lineno = self.lineno
95 lines = re.findall(r"\n", self.text[mp : self.match_position])
96 cp = mp - 1
97 while cp >= 0 and cp < self.textlength and self.text[cp] != "\n":
98 cp -= 1
99 self.matched_charpos = mp - cp
100 self.lineno += len(lines)
101 # print "MATCHED:", match.group(0), "LINE START:",
102 # self.matched_lineno, "LINE END:", self.lineno
103 # print "MATCH:", regexp, "\n", self.text[mp : mp + 15], \
104 # (match and "TRUE" or "FALSE")
105 return match
107 def parse_until_text(self, watch_nesting, *text):
108 startpos = self.match_position
109 text_re = r"|".join(text)
110 brace_level = 0
111 paren_level = 0
112 bracket_level = 0
113 while True:
114 match = self.match(r"#.*\n")
115 if match:
116 continue
117 match = self.match(
118 r"(\"\"\"|\'\'\'|\"|\')[^\\]*?(\\.[^\\]*?)*\1", re.S
119 )
120 if match:
121 continue
122 match = self.match(r"(%s)" % text_re)
123 if match and not (
124 watch_nesting
125 and (brace_level > 0 or paren_level > 0 or bracket_level > 0)
126 ):
127 return (
128 self.text[
129 startpos : self.match_position - len(match.group(1))
130 ],
131 match.group(1),
132 )
133 elif not match:
134 match = self.match(r"(.*?)(?=\"|\'|#|%s)" % text_re, re.S)
135 if match:
136 brace_level += match.group(1).count("{")
137 brace_level -= match.group(1).count("}")
138 paren_level += match.group(1).count("(")
139 paren_level -= match.group(1).count(")")
140 bracket_level += match.group(1).count("[")
141 bracket_level -= match.group(1).count("]")
142 continue
143 raise exceptions.SyntaxException(
144 "Expected: %s" % ",".join(text), **self.exception_kwargs
145 )
147 def append_node(self, nodecls, *args, **kwargs):
148 kwargs.setdefault("source", self.text)
149 kwargs.setdefault("lineno", self.matched_lineno)
150 kwargs.setdefault("pos", self.matched_charpos)
151 kwargs["filename"] = self.filename
152 node = nodecls(*args, **kwargs)
153 if len(self.tag):
154 self.tag[-1].nodes.append(node)
155 else:
156 self.template.nodes.append(node)
157 # build a set of child nodes for the control line
158 # (used for loop variable detection)
159 # also build a set of child nodes on ternary control lines
160 # (used for determining if a pass needs to be auto-inserted
161 if self.control_line:
162 control_frame = self.control_line[-1]
163 control_frame.nodes.append(node)
164 if not (
165 isinstance(node, parsetree.ControlLine)
166 and control_frame.is_ternary(node.keyword)
167 ):
168 if self.ternary_stack and self.ternary_stack[-1]:
169 self.ternary_stack[-1][-1].nodes.append(node)
170 if isinstance(node, parsetree.Tag):
171 if len(self.tag):
172 node.parent = self.tag[-1]
173 self.tag.append(node)
174 elif isinstance(node, parsetree.ControlLine):
175 if node.isend:
176 self.control_line.pop()
177 self.ternary_stack.pop()
178 elif node.is_primary:
179 self.control_line.append(node)
180 self.ternary_stack.append([])
181 elif self.control_line and self.control_line[-1].is_ternary(
182 node.keyword
183 ):
184 self.ternary_stack[-1].append(node)
185 elif self.control_line and not self.control_line[-1].is_ternary(
186 node.keyword
187 ):
188 raise exceptions.SyntaxException(
189 "Keyword '%s' not a legal ternary for keyword '%s'"
190 % (node.keyword, self.control_line[-1].keyword),
191 **self.exception_kwargs
192 )
194 _coding_re = re.compile(r"#.*coding[:=]\s*([-\w.]+).*\r?\n")
196 def decode_raw_stream(self, text, decode_raw, known_encoding, filename):
197 """given string/unicode or bytes/string, determine encoding
198 from magic encoding comment, return body as unicode
199 or raw if decode_raw=False
201 """
202 if isinstance(text, compat.text_type):
203 m = self._coding_re.match(text)
204 encoding = m and m.group(1) or known_encoding or "utf-8"
205 return encoding, text
207 if text.startswith(codecs.BOM_UTF8):
208 text = text[len(codecs.BOM_UTF8) :]
209 parsed_encoding = "utf-8"
210 m = self._coding_re.match(text.decode("utf-8", "ignore"))
211 if m is not None and m.group(1) != "utf-8":
212 raise exceptions.CompileException(
213 "Found utf-8 BOM in file, with conflicting "
214 "magic encoding comment of '%s'" % m.group(1),
215 text.decode("utf-8", "ignore"),
216 0,
217 0,
218 filename,
219 )
220 else:
221 m = self._coding_re.match(text.decode("utf-8", "ignore"))
222 if m:
223 parsed_encoding = m.group(1)
224 else:
225 parsed_encoding = known_encoding or "utf-8"
227 if decode_raw:
228 try:
229 text = text.decode(parsed_encoding)
230 except UnicodeDecodeError:
231 raise exceptions.CompileException(
232 "Unicode decode operation of encoding '%s' failed"
233 % parsed_encoding,
234 text.decode("utf-8", "ignore"),
235 0,
236 0,
237 filename,
238 )
240 return parsed_encoding, text
242 def parse(self):
243 self.encoding, self.text = self.decode_raw_stream(
244 self.text, not self.disable_unicode, self.encoding, self.filename
245 )
247 for preproc in self.preprocessor:
248 self.text = preproc(self.text)
250 # push the match marker past the
251 # encoding comment.
252 self.match_reg(self._coding_re)
254 self.textlength = len(self.text)
256 while True:
257 if self.match_position > self.textlength:
258 break
260 if self.match_end():
261 break
262 if self.match_expression():
263 continue
264 if self.match_control_line():
265 continue
266 if self.match_comment():
267 continue
268 if self.match_tag_start():
269 continue
270 if self.match_tag_end():
271 continue
272 if self.match_python_block():
273 continue
274 if self.match_text():
275 continue
277 if self.match_position > self.textlength:
278 break
279 raise exceptions.CompileException("assertion failed")
281 if len(self.tag):
282 raise exceptions.SyntaxException(
283 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,
284 **self.exception_kwargs
285 )
286 if len(self.control_line):
287 raise exceptions.SyntaxException(
288 "Unterminated control keyword: '%s'"
289 % self.control_line[-1].keyword,
290 self.text,
291 self.control_line[-1].lineno,
292 self.control_line[-1].pos,
293 self.filename,
294 )
295 return self.template
297 def match_tag_start(self):
298 match = self.match(
299 r"""
300 \<% # opening tag
302 ([\w\.\:]+) # keyword
304 ((?:\s+\w+|\s*=\s*|".*?"|'.*?')*) # attrname, = \
305 # sign, string expression
307 \s* # more whitespace
309 (/)?> # closing
311 """,
312 re.I | re.S | re.X,
313 )
315 if match:
316 keyword, attr, isend = match.groups()
317 self.keyword = keyword
318 attributes = {}
319 if attr:
320 for att in re.findall(
321 r"\s*(\w+)\s*=\s*(?:'([^']*)'|\"([^\"]*)\")", attr
322 ):
323 key, val1, val2 = att
324 text = val1 or val2
325 text = text.replace("\r\n", "\n")
326 attributes[key] = text
327 self.append_node(parsetree.Tag, keyword, attributes)
328 if isend:
329 self.tag.pop()
330 else:
331 if keyword == "text":
332 match = self.match(r"(.*?)(?=\</%text>)", re.S)
333 if not match:
334 raise exceptions.SyntaxException(
335 "Unclosed tag: <%%%s>" % self.tag[-1].keyword,
336 **self.exception_kwargs
337 )
338 self.append_node(parsetree.Text, match.group(1))
339 return self.match_tag_end()
340 return True
341 else:
342 return False
344 def match_tag_end(self):
345 match = self.match(r"\</%[\t ]*(.+?)[\t ]*>")
346 if match:
347 if not len(self.tag):
348 raise exceptions.SyntaxException(
349 "Closing tag without opening tag: </%%%s>"
350 % match.group(1),
351 **self.exception_kwargs
352 )
353 elif self.tag[-1].keyword != match.group(1):
354 raise exceptions.SyntaxException(
355 "Closing tag </%%%s> does not match tag: <%%%s>"
356 % (match.group(1), self.tag[-1].keyword),
357 **self.exception_kwargs
358 )
359 self.tag.pop()
360 return True
361 else:
362 return False
364 def match_end(self):
365 match = self.match(r"\Z", re.S)
366 if match:
367 string = match.group()
368 if string:
369 return string
370 else:
371 return True
372 else:
373 return False
375 def match_text(self):
376 match = self.match(
377 r"""
378 (.*?) # anything, followed by:
379 (
380 (?<=\n)(?=[ \t]*(?=%|\#\#)) # an eval or line-based
381 # comment preceded by a
382 # consumed newline and whitespace
383 |
384 (?=\${) # an expression
385 |
386 (?=</?[%&]) # a substitution or block or call start or end
387 # - don't consume
388 |
389 (\\\r?\n) # an escaped newline - throw away
390 |
391 \Z # end of string
392 )""",
393 re.X | re.S,
394 )
396 if match:
397 text = match.group(1)
398 if text:
399 self.append_node(parsetree.Text, text)
400 return True
401 else:
402 return False
404 def match_python_block(self):
405 match = self.match(r"<%(!)?")
406 if match:
407 line, pos = self.matched_lineno, self.matched_charpos
408 text, end = self.parse_until_text(False, r"%>")
409 # the trailing newline helps
410 # compiler.parse() not complain about indentation
411 text = adjust_whitespace(text) + "\n"
412 self.append_node(
413 parsetree.Code,
414 text,
415 match.group(1) == "!",
416 lineno=line,
417 pos=pos,
418 )
419 return True
420 else:
421 return False
423 def match_expression(self):
424 match = self.match(r"\${")
425 if match:
426 line, pos = self.matched_lineno, self.matched_charpos
427 text, end = self.parse_until_text(True, r"\|", r"}")
428 if end == "|":
429 escapes, end = self.parse_until_text(True, r"}")
430 else:
431 escapes = ""
432 text = text.replace("\r\n", "\n")
433 self.append_node(
434 parsetree.Expression,
435 text,
436 escapes.strip(),
437 lineno=line,
438 pos=pos,
439 )
440 return True
441 else:
442 return False
444 def match_control_line(self):
445 match = self.match(
446 r"(?<=^)[\t ]*(%(?!%)|##)[\t ]*((?:(?:\\r?\n)|[^\r\n])*)"
447 r"(?:\r?\n|\Z)",
448 re.M,
449 )
450 if match:
451 operator = match.group(1)
452 text = match.group(2)
453 if operator == "%":
454 m2 = re.match(r"(end)?(\w+)\s*(.*)", text)
455 if not m2:
456 raise exceptions.SyntaxException(
457 "Invalid control line: '%s'" % text,
458 **self.exception_kwargs
459 )
460 isend, keyword = m2.group(1, 2)
461 isend = isend is not None
463 if isend:
464 if not len(self.control_line):
465 raise exceptions.SyntaxException(
466 "No starting keyword '%s' for '%s'"
467 % (keyword, text),
468 **self.exception_kwargs
469 )
470 elif self.control_line[-1].keyword != keyword:
471 raise exceptions.SyntaxException(
472 "Keyword '%s' doesn't match keyword '%s'"
473 % (text, self.control_line[-1].keyword),
474 **self.exception_kwargs
475 )
476 self.append_node(parsetree.ControlLine, keyword, isend, text)
477 else:
478 self.append_node(parsetree.Comment, text)
479 return True
480 else:
481 return False
483 def match_comment(self):
484 """matches the multiline version of a comment"""
485 match = self.match(r"<%doc>(.*?)</%doc>", re.S)
486 if match:
487 self.append_node(parsetree.Comment, match.group(1))
488 return True
489 else:
490 return False