1
2 """
3 Classes used by Tokenizer and Parser
4 """
5 __all__ = ['Token', 'Tokenre']
6 __docformat__ = 'restructuredtext'
7 __author__ = '$LastChangedBy: doerwalter $'
8 __date__ = '$LastChangedDate: 2007-08-02 22:58:23 +0200 (Do, 02 Aug 2007) $'
9 __version__ = '0.9.2a2 $LastChangedRevision: 160 $'
10
11 import re
12 import util
13
14
16 """
17 constants for Tokenizer and Parser to use
18 values are just identifiers!
19
20 a CSS Token consisting of
21
22 line
23 startline of the token
24 col
25 startcol of the token
26 type
27 of the token
28 value
29 literal value of the token including backslashes
30 normalvalue
31 normalized value of the token
32
33 - no ``\`` like ``c\olor``
34 - for type Token.S normalvalue is always u' ' - a single space
35 - lowercase
36
37 literal
38 REMOVED in 0.9.1 (literal value of the token including backslashes)
39
40 So e.g. a token t might initialized with::
41
42 t = Token(1, 1, Token.IDENT, u'c\olor')
43
44 resulting in a token with attributes::
45
46 t.line == 1
47 t.col == 1
48 t.type == Token.IDENT
49 t.value == u'c\olor'
50 t.normalvalue == u'color'
51
52 includes some CSS3 parts
53 http://www.w3.org/TR/css3-selectors/
54 """
55 EOF = u'EOF'
56
57 IDENT = u'{ident}'
58
59 ATKEYWORD = u'@{ident}'
60 IMPORT_SYM = u'@import'
61 PAGE_SYM = u'@page'
62 MEDIA_SYM = u'@media'
63 CHARSET_SYM = u'@charset'
64 NAMESPACE_SYM = u'@namespace'
65
66 STRING = u'{string}'
67 HASH = u'HASH #{name}'
68 NUMBER = u'{num}'
69 PERCENTAGE = u'PERCENTAGE {num}%'
70 DIMENSION = u'DIMENSION {num}{ident}'
71
72 URI = u'url\({w}{string}{w}\)|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)'
73
74 UNICODE_RANGE = u'U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})?'
75 CDO = u'<!--'
76 CDC = u'-->'
77 SEMICOLON = u';'
78 LBRACE = u'{'
79 RBRACE = u'}'
80 LBRACKET = u'['
81 RBRACKET = u']'
82 LPARANTHESIS = u'('
83 RPARANTHESIS = u')'
84 S = ur'[ ]'
85 COMMENT = u'COMMENT'
86 FUNCTION = u'{ident}\('
87
88 IMPORTANT_SYM = u'!{w}important'
89
90 DELIM = u'DELIM'
91
92 UNIVERSAL = u'*'
93 CLASS = u'.'
94
95
96 GREATER = u'>'
97 PLUS = u'+'
98 TILDE = u'~'
99
100
101 INCLUDES = u'~='
102 DASHMATCH = u'|='
103
104 PREFIXMATCH = u'^='
105 SUFFIXMATCH = u'$='
106 SUBSTRINGMATCH = u'*='
107 PSEUDO_ELEMENT = u'::'
108
109
110 INVALID = u'INVALID'
111
112
113 URL = 'URL'
114
115 COMMA = u','
116
117
118
119
120
121
122 - def __init__(self, line=1, col=1, type=None, value=u''):
127
128
131
143
144 value = property(_getvalue, _setvalue,
145 doc='value and normalized value')
146
147
149 """
150 how to compare a token to another
151 """
152 if self.line == token.line and\
153 self.col == token.col and\
154 self.type == token.type and\
155 self.value == token.value:
156 return True
157 else:
158 return False
159
161 """
162 string representation of Token
163 """
164 return u'%03d:%03d %s: %s' % (
165 self.line, self.col, self.type, self.value)
166
167
168
170 """
171 regexes for CSS tokens, on initialization all attributes will
172 be compiled to re.match objects
173 """
174
175 DIMENSION = r'{num}{ident}'
176 HASH = r'#{name}'
177 URI = u'url\({w}{string}{w}\)|url\({w}{url}{w}\)'
178
179 atkeyword = r'^@[-]?{nmstart}{nmchar}*'
180 ident = r'[-]?{nmstart}{nmchar}*'
181 name = r'{nmchar}+'
182 nmstart = r'[_a-z]|{nonascii}|{escape}'
183 nonascii = r'[^\0-\177]'
184 unicode = r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?'
185 escape = r'{unicode}|\\[ -~\200-\777]'
186
187 int = r'[-]?\d+'
188 nmchar = r'[\w-]|{nonascii}|{escape}'
189 num = r'[-]?\d+|\d*\.\d+'
190 number = r'{num}'
191 string = r'{string1}|{string2}'
192 string1 = r'"(\\\"|[^\"])*"'
193 string2 = r"'(\\\'|[^\'])*'"
194 url = u'([!#$%&*-~]|{nonascii}|{escape})*'
195 nl = r'\n|\r\n|\r|\f'
196 w = r'\s*'
197
199 """
200 compile class attribute values to re.match objects
201 """
202 res = {}
203 for x in dir(self):
204 v = self.__getattribute__(x)
205 if isinstance(v, basestring) and not x.startswith('_'):
206 res[x] = v
207
208 self._compile_regexes(self._expand_macros(res))
209
211 """
212 Expand macros in token dictionary
213 """
214 def macro_value(m):
215 return '(?:%s)' % res[m.groupdict()['macro']]
216
217
218 res = tokdict.copy()
219 for key, value in tokdict.items():
220 while re.search(r'{[a-z][a-z0-9-]*}', value):
221 value = re.sub(r'{(?P<macro>[a-z][a-z0-9-]*)}',
222 macro_value, value)
223 tokdict[key] = value
224 return tokdict
225
227 """
228 Compile all regular expressions into callable objects
229 """
230 for key, value in tokdict.items():
231 self.__setattr__(key, re.compile('^%s$' % value, re.I).match)
232
233
234 if __name__ == '__main__':
235 t = Token()
236 print t
237