Package cssutils :: Module token
[hide private]
[frames] | no frames]

Source Code for Module cssutils.token

  1  #!/usr/bin/env python 
  2  """ 
  3  Classes used by Tokenizer and Parser 
  4  """ 
  5  __all__ = ['Token', 'Tokenre'] 
  6  __docformat__ = 'restructuredtext' 
  7  __author__ = '$LastChangedBy: doerwalter $' 
  8  __date__ = '$LastChangedDate: 2007-08-02 22:58:23 +0200 (Do, 02 Aug 2007) $' 
  9  __version__ = '0.9.2a2 $LastChangedRevision: 160 $' 
 10   
 11  import re 
 12  import util 
 13   
 14   
15 -class Token(object):
16 """ 17 constants for Tokenizer and Parser to use 18 values are just identifiers! 19 20 a CSS Token consisting of 21 22 line 23 startline of the token 24 col 25 startcol of the token 26 type 27 of the token 28 value 29 literal value of the token including backslashes 30 normalvalue 31 normalized value of the token 32 33 - no ``\`` like ``c\olor`` 34 - for type Token.S normalvalue is always u' ' - a single space 35 - lowercase 36 37 literal 38 REMOVED in 0.9.1 (literal value of the token including backslashes) 39 40 So e.g. a token t might initialized with:: 41 42 t = Token(1, 1, Token.IDENT, u'c\olor') 43 44 resulting in a token with attributes:: 45 46 t.line == 1 47 t.col == 1 48 t.type == Token.IDENT 49 t.value == u'c\olor' 50 t.normalvalue == u'color' 51 52 includes some CSS3 parts 53 http://www.w3.org/TR/css3-selectors/ 54 """ 55 EOF = u'EOF' # EndOfFile 56 57 IDENT = u'{ident}' 58 59 ATKEYWORD = u'@{ident}' 60 IMPORT_SYM = u'@import' 61 PAGE_SYM = u'@page' # not used 62 MEDIA_SYM = u'@media' 63 CHARSET_SYM = u'@charset' 64 NAMESPACE_SYM = u'@namespace' 65 66 STRING = u'{string}' 67 HASH = u'HASH #{name}' 68 NUMBER = u'{num}' 69 PERCENTAGE = u'PERCENTAGE {num}%' 70 DIMENSION = u'DIMENSION {num}{ident}' 71 # TODO 72 URI = u'url\({w}{string}{w}\)|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)' 73 # TODO? 74 UNICODE_RANGE = u'U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})?' 75 CDO = u'<!--' 76 CDC = u'-->' 77 SEMICOLON = u';' 78 LBRACE = u'{' 79 RBRACE = u'}' 80 LBRACKET = u'[' 81 RBRACKET = u']' 82 LPARANTHESIS = u'(' 83 RPARANTHESIS = u')' 84 S = ur'[ ]'#\t\r\n\f]+' 85 COMMENT = u'COMMENT' # no comment between !important but S ist handled 86 FUNCTION = u'{ident}\(' 87 88 IMPORTANT_SYM = u'!{w}important' 89 90 DELIM = u'DELIM' 91 92 UNIVERSAL = u'*' 93 CLASS = u'.' 94 95 # combinators 96 GREATER = u'>' 97 PLUS = u'+' 98 TILDE = u'~' 99 100 # atts: 101 INCLUDES = u'~=' 102 DASHMATCH = u'|=' 103 # CSS3 104 PREFIXMATCH = u'^=' 105 SUFFIXMATCH = u'$=' 106 SUBSTRINGMATCH = u'*=' 107 PSEUDO_ELEMENT = u'::' 108 109 # TODO? 110 INVALID = u'INVALID' 111 #{invalid} return INVALID; 112 113 URL = 'URL' 114 115 COMMA = u',' # TODO! 116 #EQUALS = u'=' 117 #DASH = u'-' 118 #PIPE = u'|' 119 #":not(" return NOT; 120 121
122 - def __init__(self, line=1, col=1, type=None, value=u''):
123 self.line = line 124 self.col = col 125 self.type = type 126 self.value = value
127 128
129 - def _getvalue(self):
130 return self._value
131
132 - def _setvalue(self, value):
133 if self.type == Token.S: 134 self.normalvalue = u' ' 135 self._value = value 136 137 elif self.type == Token.IDENT: 138 self.normalvalue = util.Base._normalize(value) 139 self._value = value 140 141 else: 142 self.normalvalue = self._value = value
143 144 value = property(_getvalue, _setvalue, 145 doc='value and normalized value') 146 147
148 - def __eq__(self, token):
149 """ 150 how to compare a token to another 151 """ 152 if self.line == token.line and\ 153 self.col == token.col and\ 154 self.type == token.type and\ 155 self.value == token.value: 156 return True 157 else: 158 return False
159
160 - def __repr__(self):
161 """ 162 string representation of Token 163 """ 164 return u'%03d:%03d %s: %s' % ( 165 self.line, self.col, self.type, self.value)
166 167 168
169 -class Tokenre(object):
170 """ 171 regexes for CSS tokens, on initialization all attributes will 172 be compiled to re.match objects 173 """ 174 # custom 175 DIMENSION = r'{num}{ident}' 176 HASH = r'#{name}' 177 URI = u'url\({w}{string}{w}\)|url\({w}{url}{w}\)' 178 # see spec 179 atkeyword = r'^@[-]?{nmstart}{nmchar}*' #? 180 ident = r'[-]?{nmstart}{nmchar}*' 181 name = r'{nmchar}+' 182 nmstart = r'[_a-z]|{nonascii}|{escape}' 183 nonascii = r'[^\0-\177]' 184 unicode = r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?' 185 escape = r'{unicode}|\\[ -~\200-\777]' 186 # escape = r'{unicode}|\\[ -~\200-\4177777]' 187 int = r'[-]?\d+' 188 nmchar = r'[\w-]|{nonascii}|{escape}' 189 num = r'[-]?\d+|\d*\.\d+' 190 number = r'{num}' 191 string = r'{string1}|{string2}' 192 string1 = r'"(\\\"|[^\"])*"' 193 string2 = r"'(\\\'|[^\'])*'" 194 url = u'([!#$%&*-~]|{nonascii}|{escape})*' 195 nl = r'\n|\r\n|\r|\f' 196 w = r'\s*' 197
198 - def __init__(self):
199 """ 200 compile class attribute values to re.match objects 201 """ 202 res = {} 203 for x in dir(self): 204 v = self.__getattribute__(x) 205 if isinstance(v, basestring) and not x.startswith('_'): 206 res[x] = v 207 208 self._compile_regexes(self._expand_macros(res))
209
210 - def _expand_macros(self, tokdict):
211 """ 212 Expand macros in token dictionary 213 """ 214 def macro_value(m): 215 return '(?:%s)' % res[m.groupdict()['macro']]
216 217 # copy for macros 218 res = tokdict.copy() 219 for key, value in tokdict.items(): 220 while re.search(r'{[a-z][a-z0-9-]*}', value): 221 value = re.sub(r'{(?P<macro>[a-z][a-z0-9-]*)}', 222 macro_value, value) 223 tokdict[key] = value 224 return tokdict
225
226 - def _compile_regexes(self, tokdict):
227 """ 228 Compile all regular expressions into callable objects 229 """ 230 for key, value in tokdict.items(): 231 self.__setattr__(key, re.compile('^%s$' % value, re.I).match)
232 233 234 if __name__ == '__main__': 235 t = Token() 236 print t 237