Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/chameleon/tokenize.py : 60%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# http://code.activestate.com/recipes/65125-xml-lexing-shallow-parsing/
2# by Paul Prescod
3# licensed under the PSF License
4#
5# modified to capture all non-overlapping parts of tokens
7import re
9try:
10 str = unicode
11except NameError:
12 pass
14class recollector:
15 def __init__(self):
16 self.res = {}
18 def add(self, name, reg ):
19 re.compile(reg) # check that it is valid
20 self.res[name] = reg % self.res
22collector = recollector()
23a = collector.add
25a("TextSE", "[^<]+")
26a("UntilHyphen", "[^-]*-")
27a("Until2Hyphens", "%(UntilHyphen)s(?:[^-]%(UntilHyphen)s)*-")
28a("CommentCE", "%(Until2Hyphens)s>?")
29a("UntilRSBs", "[^\\]]*](?:[^\\]]+])*]+")
30a("CDATA_CE", "%(UntilRSBs)s(?:[^\\]>]%(UntilRSBs)s)*>" )
31a("S", "[ \\n\\t\\r]+")
32a("Simple", "[^\"'>/]+")
33a("NameStrt", "[A-Za-z_:@]|[^\\x00-\\x7F]")
34a("NameChar", "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]")
35a("Name", "(?:%(NameStrt)s)(?:%(NameChar)s)*")
36a("QuoteSE", "\"[^\"]*\"|'[^']*'")
37a("DT_IdentSE" , "%(S)s%(Name)s(?:%(S)s(?:%(Name)s|%(QuoteSE)s))*" )
38a("MarkupDeclCE" , "(?:[^\\]\"'><]+|%(QuoteSE)s)*>" )
39a("S1", "[\\n\\r\\t ]")
40a("UntilQMs", "[^?]*\\?+")
41a("PI_Tail" , "\\?>|%(S1)s%(UntilQMs)s(?:[^>?]%(UntilQMs)s)*>" )
42a("DT_ItemSE",
43 "<(?:!(?:--%(Until2Hyphens)s>|[^-]%(MarkupDeclCE)s)|"
44 "\\?%(Name)s(?:%(PI_Tail)s))|%%%(Name)s;|%(S)s"
45)
46a("DocTypeCE" ,
47"%(DT_IdentSE)s(?:%(S)s)?(?:\\[(?:%(DT_ItemSE)s)*](?:%(S)s)?)?>?" )
48a("DeclCE",
49 "--(?:%(CommentCE)s)?|\\[CDATA\\[(?:%(CDATA_CE)s)?|"
50 "DOCTYPE(?:%(DocTypeCE)s)?")
51a("PI_CE", "%(Name)s(?:%(PI_Tail)s)?")
52a("EndTagCE", "%(Name)s(?:%(S)s)?>?")
53a("AttValSE", r"\"[^\"]*\"|'[^']*'|[^\s=<>`]+")
54a("ElemTagCE",
55 "(%(Name)s)(?:(%(S)s)(%(Name)s)(((?:%(S)s)?=(?:%(S)s)?)"
56 "(?:%(AttValSE)s|%(Simple)s)|(?!(?:%(S)s)?=)))*(?:%(S)s)?(/?>)?")
57a("MarkupSPE",
58 "<(?:!(?:%(DeclCE)s)?|"
59 "\\?(?:%(PI_CE)s)?|/(?:%(EndTagCE)s)?|(?:%(ElemTagCE)s)?)")
60a("XML_SPE", "%(TextSE)s|%(MarkupSPE)s")
61a("XML_MARKUP_ONLY_SPE", "%(MarkupSPE)s")
62a("ElemTagSPE", "<|%(Name)s")
64re_xml_spe = re.compile(collector.res['XML_SPE'])
65re_markup_only_spe = re.compile(collector.res['XML_MARKUP_ONLY_SPE'])
68def iter_xml(body, filename=None):
69 for match in re_xml_spe.finditer(body):
70 string = match.group()
71 pos = match.start()
72 yield Token(string, pos, body, filename)
75def iter_text(body, filename=None):
76 yield Token(body, 0, body, filename)
79class Token(str):
80 __slots__ = "pos", "source", "filename"
82 def __new__(cls, string, pos=0, source=None, filename=None):
83 inst = str.__new__(cls, string)
84 inst.pos = pos
85 inst.source = source
86 inst.filename = filename or ""
87 return inst
89 def __getslice__(self, i, j):
90 slice = str.__getslice__(self, i, j)
91 return Token(slice, self.pos + i, self.source, self.filename)
93 def __getitem__(self, index):
94 s = str.__getitem__(self, index)
95 if isinstance(index, slice):
96 return Token(
97 s, self.pos + (index.start or 0), self.source, self.filename)
98 return s
100 def __add__(self, other):
101 if other is None:
102 return self
104 return Token(
105 str.__add__(self, other), self.pos, self.source, self.filename)
107 def __eq__(self, other):
108 return str.__eq__(self, other)
110 def __hash__(self):
111 return str.__hash__(self)
113 def replace(self, *args):
114 s = str.replace(self, *args)
115 return Token(s, self.pos, self.source, self.filename)
117 def split(self, *args):
118 l = str.split(self, *args)
119 pos = self.pos
120 for i, s in enumerate(l):
121 l[i] = Token(s, pos, self.source, self.filename)
122 pos += len(s)
123 return l
125 def strip(self, *args):
126 return self.lstrip(*args).rstrip(*args)
128 def lstrip(self, *args):
129 s = str.lstrip(self, *args)
130 return Token(
131 s, self.pos + len(self) - len(s), self.source, self.filename)
133 def rstrip(self, *args):
134 s = str.rstrip(self, *args)
135 return Token(s, self.pos, self.source, self.filename)
137 @property
138 def location(self):
139 if self.source is None:
140 return 0, self.pos
142 body = self.source[:self.pos]
143 line = body.count('\n')
144 return line + 1, self.pos - body.rfind('\n', 0) - 1