1
2 """
3 Classes used by Tokenizer and Parser
4 """
5 __all__ = ['Token', 'Tokenre']
6 __docformat__ = 'restructuredtext'
7 __author__ = '$LastChangedBy: cthedot $'
8 __date__ = '$LastChangedDate: 2007-08-11 23:53:52 +0200 (Sa, 11 Aug 2007) $'
9 __version__ = '$LastChangedRevision: 215 $'
10
11 import re
12 import util
13
15 """
16 constants for Tokenizer and Parser to use
17 values are just identifiers!
18
19 a CSS Token consisting of
20
21 line
22 startline of the token
23 col
24 startcol of the token
25 type
26 of the token
27 value
28 literal value of the token including backslashes
29 normalvalue
30 normalized value of the token
31
32 - no ``\`` like ``c\olor``
33 - for type Token.S normalvalue is always u' ' - a single space
34 - lowercase
35
36 So e.g. a token t might be initialized with::
37
38 t = Token(1, 1, Token.IDENT, u'c\olor')
39
40 resulting in a token with the following attributes::
41
42 t.line == 1
43 t.col == 1
44 t.type == Token.IDENT
45 t.value == u'c\olor'
46 t.normalvalue == u'color'
47
48 includes some CSS3 parts
49 http://www.w3.org/TR/css3-selectors/
50 """
51 EOF = u'EOF'
52
53 IDENT = u'{ident}'
54
55 ATKEYWORD = u'@{ident}'
56 IMPORT_SYM = u'@import'
57 PAGE_SYM = u'@page'
58 MEDIA_SYM = u'@media'
59 CHARSET_SYM = u'@charset'
60 NAMESPACE_SYM = u'@namespace'
61
62 STRING = u'{string}'
63 HASH = u'HASH #{name}'
64 NUMBER = u'{num}'
65 PERCENTAGE = u'PERCENTAGE {num}%'
66 DIMENSION = u'DIMENSION {num}{ident}'
67
68 URI = u'url\({w}{string}{w}\)|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)'
69
70 UNICODE_RANGE = u'U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})?'
71 CDO = u'<!--'
72 CDC = u'-->'
73 SEMICOLON = u';'
74 LBRACE = u'{'
75 RBRACE = u'}'
76 LBRACKET = u'['
77 RBRACKET = u']'
78 LPARANTHESIS = u'('
79 RPARANTHESIS = u')'
80 S = ur'[ ]'
81 COMMENT = u'COMMENT'
82 FUNCTION = u'{ident}\('
83
84 IMPORTANT_SYM = u'!{w}important'
85
86 DELIM = u'DELIM'
87
88 UNIVERSAL = u'*'
89 CLASS = u'.'
90
91
92 GREATER = u'>'
93 PLUS = u'+'
94 TILDE = u'~'
95
96
97 INCLUDES = u'~='
98 DASHMATCH = u'|='
99
100 PREFIXMATCH = u'^='
101 SUFFIXMATCH = u'$='
102 SUBSTRINGMATCH = u'*='
103 PSEUDO_ELEMENT = u'::'
104
105
106 INVALID = u'INVALID'
107
108
109 COMMA = u','
110
111
112
113
114
115 - def __init__(self, line=1, col=1, type=None, value=u''):
120
123
135
136 value = property(_getvalue, _setvalue,
137 doc='value and normalized value')
138
140 """
141 how to compare a token to another
142 """
143 if self.line == token.line and\
144 self.col == token.col and\
145 self.type == token.type and\
146 self.value == token.value:
147 return True
148 else:
149 return False
150
152 """
153 string representation of Token
154 """
155 return u'%03d:%03d %s: %s' % (
156 self.line, self.col, self.type, self.value)
157
158
160 """
161 regexes for CSS tokens, on initialization all attributes will
162 be compiled to re.match objects
163 """
164
165 DIMENSION = r'{num}{ident}'
166 HASH = r'#{name}'
167 URI = u'url\({w}{string}{w}\)|url\({w}{url}{w}\)'
168
169 atkeyword = r'^@[-]?{nmstart}{nmchar}*'
170 ident = r'[-]?{nmstart}{nmchar}*'
171 name = r'{nmchar}+'
172 nmstart = r'[_a-z]|{nonascii}|{escape}'
173 nonascii = r'[^\0-\177]'
174 unicode = r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?'
175 escape = r'{unicode}|\\[ -~\200-\777]'
176
177 int = r'[-]?\d+'
178 nmchar = r'[\w-]|{nonascii}|{escape}'
179 num = r'[-]?\d+|\d*\.\d+'
180 number = r'{num}'
181 string = r'{string1}|{string2}'
182 string1 = r'"(\\\"|[^\"])*"'
183 string2 = r"'(\\\'|[^\'])*'"
184 url = u'([!#$%&*-~]|{nonascii}|{escape})*'
185 nl = r'\n|\r\n|\r|\f'
186 w = r'\s*'
187
189 """
190 compile class attribute values to re.match objects
191 """
192 res = {}
193 for x in dir(self):
194 v = self.__getattribute__(x)
195 if isinstance(v, basestring) and not x.startswith('_'):
196 res[x] = v
197
198 self._compile_regexes(self._expand_macros(res))
199
201 """
202 Expand macros in token dictionary
203 """
204 def macro_value(m):
205 return '(?:%s)' % res[m.groupdict()['macro']]
206
207
208 res = tokdict.copy()
209 for key, value in tokdict.items():
210 while re.search(r'{[a-z][a-z0-9-]*}', value):
211 value = re.sub(r'{(?P<macro>[a-z][a-z0-9-]*)}',
212 macro_value, value)
213 tokdict[key] = value
214 return tokdict
215
217 """
218 Compile all regular expressions into callable objects
219 """
220 for key, value in tokdict.items():
221 self.__setattr__(key, re.compile('^%s$' % value, re.I).match)
222
223
224 if __name__ == '__main__':
225 t = Token()
226 print t
227