Package cssutils :: Module token
[hide private]
[frames] | no frames]

Source Code for Module cssutils.token

  1  #!/usr/bin/env python 
  2  """ 
  3  Classes used by Tokenizer and Parser 
  4  """ 
  5  __all__ = ['Token', 'Tokenre'] 
  6  __docformat__ = 'restructuredtext' 
  7  __author__ = '$LastChangedBy: cthedot $' 
  8  __date__ = '$LastChangedDate: 2007-08-11 23:53:52 +0200 (Sa, 11 Aug 2007) $' 
  9  __version__ = '$LastChangedRevision: 215 $' 
 10   
 11  import re 
 12  import util 
 13   
14 -class Token(object):
15 """ 16 constants for Tokenizer and Parser to use 17 values are just identifiers! 18 19 a CSS Token consisting of 20 21 line 22 startline of the token 23 col 24 startcol of the token 25 type 26 of the token 27 value 28 literal value of the token including backslashes 29 normalvalue 30 normalized value of the token 31 32 - no ``\`` like ``c\olor`` 33 - for type Token.S normalvalue is always u' ' - a single space 34 - lowercase 35 36 So e.g. a token t might be initialized with:: 37 38 t = Token(1, 1, Token.IDENT, u'c\olor') 39 40 resulting in a token with the following attributes:: 41 42 t.line == 1 43 t.col == 1 44 t.type == Token.IDENT 45 t.value == u'c\olor' 46 t.normalvalue == u'color' 47 48 includes some CSS3 parts 49 http://www.w3.org/TR/css3-selectors/ 50 """ 51 EOF = u'EOF' # EndOfFile 52 53 IDENT = u'{ident}' 54 55 ATKEYWORD = u'@{ident}' 56 IMPORT_SYM = u'@import' 57 PAGE_SYM = u'@page' # not used 58 MEDIA_SYM = u'@media' 59 CHARSET_SYM = u'@charset' 60 NAMESPACE_SYM = u'@namespace' 61 62 STRING = u'{string}' 63 HASH = u'HASH #{name}' 64 NUMBER = u'{num}' 65 PERCENTAGE = u'PERCENTAGE {num}%' 66 DIMENSION = u'DIMENSION {num}{ident}' 67 # TODO 68 URI = u'url\({w}{string}{w}\)|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\)' 69 # TODO? 70 UNICODE_RANGE = u'U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})?' 71 CDO = u'<!--' 72 CDC = u'-->' 73 SEMICOLON = u';' 74 LBRACE = u'{' 75 RBRACE = u'}' 76 LBRACKET = u'[' 77 RBRACKET = u']' 78 LPARANTHESIS = u'(' 79 RPARANTHESIS = u')' 80 S = ur'[ ]'#\t\r\n\f]+' 81 COMMENT = u'COMMENT' # no comment between !important but S ist handled 82 FUNCTION = u'{ident}\(' 83 84 IMPORTANT_SYM = u'!{w}important' 85 86 DELIM = u'DELIM' 87 88 UNIVERSAL = u'*' 89 CLASS = u'.' 90 91 # combinators 92 GREATER = u'>' 93 PLUS = u'+' 94 TILDE = u'~' 95 96 # atts: 97 INCLUDES = u'~=' 98 DASHMATCH = u'|=' 99 # CSS3 100 PREFIXMATCH = u'^=' 101 SUFFIXMATCH = u'$=' 102 SUBSTRINGMATCH = u'*=' 103 PSEUDO_ELEMENT = u'::' 104 105 # TODO? 106 INVALID = u'INVALID' 107 #{invalid} return INVALID; 108 109 COMMA = u',' # TODO! 110 #EQUALS = u'=' 111 #DASH = u'-' 112 #PIPE = u'|' 113 #":not(" return NOT; 114
115 - def __init__(self, line=1, col=1, type=None, value=u''):
116 self.line = line 117 self.col = col 118 self.type = type 119 self.value = value
120
121 - def _getvalue(self):
122 return self._value
123
124 - def _setvalue(self, value):
125 if self.type == Token.S: 126 self.normalvalue = u' ' 127 self._value = value 128 129 elif self.type == Token.IDENT: 130 self.normalvalue = util.Base._normalize(value) 131 self._value = value 132 133 else: 134 self.normalvalue = self._value = value
135 136 value = property(_getvalue, _setvalue, 137 doc='value and normalized value') 138
139 - def __eq__(self, token):
140 """ 141 how to compare a token to another 142 """ 143 if self.line == token.line and\ 144 self.col == token.col and\ 145 self.type == token.type and\ 146 self.value == token.value: 147 return True 148 else: 149 return False
150
151 - def __repr__(self):
152 """ 153 string representation of Token 154 """ 155 return u'%03d:%03d %s: %s' % ( 156 self.line, self.col, self.type, self.value)
157 158
159 -class Tokenre(object):
160 """ 161 regexes for CSS tokens, on initialization all attributes will 162 be compiled to re.match objects 163 """ 164 # custom 165 DIMENSION = r'{num}{ident}' 166 HASH = r'#{name}' 167 URI = u'url\({w}{string}{w}\)|url\({w}{url}{w}\)' 168 # see spec 169 atkeyword = r'^@[-]?{nmstart}{nmchar}*' #? 170 ident = r'[-]?{nmstart}{nmchar}*' 171 name = r'{nmchar}+' 172 nmstart = r'[_a-z]|{nonascii}|{escape}' 173 nonascii = r'[^\0-\177]' 174 unicode = r'\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])?' 175 escape = r'{unicode}|\\[ -~\200-\777]' 176 # escape = r'{unicode}|\\[ -~\200-\4177777]' 177 int = r'[-]?\d+' 178 nmchar = r'[\w-]|{nonascii}|{escape}' 179 num = r'[-]?\d+|\d*\.\d+' 180 number = r'{num}' 181 string = r'{string1}|{string2}' 182 string1 = r'"(\\\"|[^\"])*"' 183 string2 = r"'(\\\'|[^\'])*'" 184 url = u'([!#$%&*-~]|{nonascii}|{escape})*' 185 nl = r'\n|\r\n|\r|\f' 186 w = r'\s*' 187
188 - def __init__(self):
189 """ 190 compile class attribute values to re.match objects 191 """ 192 res = {} 193 for x in dir(self): 194 v = self.__getattribute__(x) 195 if isinstance(v, basestring) and not x.startswith('_'): 196 res[x] = v 197 198 self._compile_regexes(self._expand_macros(res))
199
200 - def _expand_macros(self, tokdict):
201 """ 202 Expand macros in token dictionary 203 """ 204 def macro_value(m): 205 return '(?:%s)' % res[m.groupdict()['macro']]
206 207 # copy for macros 208 res = tokdict.copy() 209 for key, value in tokdict.items(): 210 while re.search(r'{[a-z][a-z0-9-]*}', value): 211 value = re.sub(r'{(?P<macro>[a-z][a-z0-9-]*)}', 212 macro_value, value) 213 tokdict[key] = value 214 return tokdict
215
216 - def _compile_regexes(self, tokdict):
217 """ 218 Compile all regular expressions into callable objects 219 """ 220 for key, value in tokdict.items(): 221 self.__setattr__(key, re.compile('^%s$' % value, re.I).match)
222 223 224 if __name__ == '__main__': 225 t = Token() 226 print t 227