Package cssutils :: Package tests :: Module test_tokenize
[hide private]
[frames] | no frames]

Source Code for Module cssutils.tests.test_tokenize

  1  # -*- coding: iso-8859-1 -*- 
  2  """ 
  3  testcases for cssutils.tokenize.Tokenizer 
  4  """ 
  5  __author__ = '$LastChangedBy: cthedot $' 
  6  __date__ = '$LastChangedDate: 2007-09-01 15:56:36 +0200 (Sa, 01 Sep 2007) $' 
  7  __version__ = '$LastChangedRevision: 302 $' 
  8   
  9  import xml.dom 
 10   
 11  import basetest 
 12   
 13  from cssutils.tokenize import Tokenizer 
 14  from cssutils.token import Token 
 15   
16 -class TokenizerTestCase(basetest.BaseTestCase):
17
18 - def setUp(self):
19 #log = cssutils.errorhandler.ErrorHandler() 20 self.tokenizer = Tokenizer() 21 self.ttype = Token
22
23 - def test_tokenize(self):
24 "Tokenizer tests" 25 26 # testcss: expected token list with 27 # (line, col, type, value[, normalvalue]) 28 29 tt = self.ttype 30 tests = { 31 # SPACES are condensed to 1 SPACE only! 32 u' ': [(1, 1, tt.S, u' ', u' ')], 33 u' ': [(1, 1, tt.S, u' ', u' ')], 34 u'\r': [(1, 1, tt.S, u'\r', u' ')], 35 u'\n': [(1, 1, tt.S, u'\n', u' ')], 36 u'\r\n': [(1, 1, tt.S, u'\r\n', u' ')], 37 u'\f': [(1, 1, tt.S, u'\f', u' ')], 38 u'\t': [(1, 1, tt.S, u'\t', u' ')], 39 u'\r\n\r\n\f\t ': [(1, 1, tt.S, u'\r\n\r\n\f\t ', u' ')], 40 41 # IDENT 42 u'a': [(1, 1, tt.IDENT, u'a')], 43 u'a-b': [(1, 1, tt.IDENT, u'a-b')], 44 u'a-': [(1, 1, tt.IDENT, u'a-')], 45 u'-b': [(1, 1, tt.IDENT, u'-b')], 46 47 # ATKEYWORD 48 u'@a @_ @ab1 @-ab @1': [(1, 1, tt.ATKEYWORD, u'@a'), 49 (1, 3, tt.S, u' '), (1, 4, tt.ATKEYWORD, u'@_'), 50 (1, 6, tt.S, u' '), (1, 7, tt.ATKEYWORD, u'@ab1'), 51 (1, 11, tt.S, u' '), (1, 12, tt.ATKEYWORD, u'@-ab'), 52 (1, 16, tt.S, u' '), (1, 17, tt.DELIM, u'@'), 53 (1, 18, tt.NUMBER, u'1')], 54 u'x x1 -x .-x #_x -': [(1, 1, tt.IDENT, u'x'), 55 (1, 2, tt.S, u' '), 56 (1, 3, tt.IDENT, u'x1'), 57 (1, 5, tt.S, u' '), 58 (1, 6, tt.IDENT, u'-x'), 59 (1, 8, tt.S, u' '), 60 (1, 9, tt.CLASS, u'.'), 61 (1, 10, tt.IDENT, u'-x'), 62 (1, 12, tt.S, u' '), 63 (1, 13, tt.HASH, u'#_x'), 64 (1, 16, tt.S, u' '), 65 (1, 17, tt.DELIM, u'-')], 66 u'@import': [(1, 1, tt.IMPORT_SYM, u'@import')], 67 u'@page': [(1, 1, tt.PAGE_SYM, u'@page')], 68 u'@media': [(1, 1, tt.MEDIA_SYM, u'@media')], 69 u'@charset': [(1, 1, tt.CHARSET_SYM, u'@charset')], 70 # simple escapes, should \ be removed??? 71 u'\\{\\}\\(\\)\\;\\}\\:\\,': [ 72 (1, 1, u'{ident}', u'\\{\\}\\(\\)\\;\\}\\:\\,')], 73 74 # comment 75 u'/*x*//': [(1, 1, tt.COMMENT, u'/*x*/'), (1, 6, tt.DELIM, u'/')], 76 u'/* */ */': [(1, 1, tt.COMMENT, u'/* */'), (1, 6, tt.S, u' '), 77 (1, 7, tt.UNIVERSAL, u'*'), (1, 8, tt.DELIM, u'/')], 78 u'1/*\\*/2': [(1, 1, tt.NUMBER, u'1'), 79 (1, 2, tt.COMMENT, u'/*\\*/'), 80 (1, 7, tt.NUMBER, u'2')], 81 82 # STRING 83 u'"x"': [(1, 1, tt.STRING, u'"x"')], 84 u'"\\""': [(1, 1, tt.STRING, u'"\\""')], 85 u'"x\\""a': [(1, 1, tt.STRING, u'"x\\""'), (1, 6, tt.IDENT, u'a')], 86 u"'x'": [(1, 1, tt.STRING, u"'x'")], 87 u"'\\''": [(1, 1, tt.STRING, u"'\\''")], 88 u'''"1\\\n2"''': [(1, 1, tt.STRING, u'"12"')], 89 u'''"1\\\r2"''': [(1, 1, tt.STRING, u'"12"')], 90 u'''"1\\\r\n2"''': [(1, 1, tt.STRING, u'"12"')], 91 u'''"1\\\f2"''': [(1, 1, tt.STRING, u'"12"')], 92 u'''"\\"1\\\n\\\r\\\f\\\r\n2"''': [(1, 1, tt.STRING, u'"\\"12"')], 93 94 # ESCAPES 95 # full length 6 digit escape 96 u'\\000029a': [(1, 1, tt.IDENT, u'\\000029a')], 97 # escape short form 98 u'\\29': [(1, 1, tt.IDENT, u'\\29')], 99 # escape ends as non hexdigit follows 100 u'\\29x': [(1, 1, tt.IDENT, u'\\29x')], 101 # escape ends with explicit space 102 u'\\29 a': [(1, 1, tt.IDENT, u'\\29 a')], 103 # escape ends with explicit space but \r\n as single space 104 u'\\29\r\na': [(1, 1, tt.IDENT, u'\\29 a')], 105 # escape ends, double space becomes single 106 u'\\1 ': [(1, 1, tt.IDENT, u'\\1 '), (1, 4, tt.S, u' ')], 107 u'\\12 ': [(1, 1, tt.IDENT, u'\\12 '), (1, 5, tt.S, u' ')], 108 u'\\123 ': [(1, 1, tt.IDENT, u'\\123 '), (1, 6, tt.S, u' ')], 109 u'\\1234 ': [(1, 1, tt.IDENT, u'\\1234 '), (1, 7, tt.S, u' ')], 110 u'\\12345 ': [(1, 1, tt.IDENT, u'\\12345 '), (1, 8, tt.S, u' ')], 111 u'\\123456 ': [(1, 1, tt.IDENT, u'\\123456'), (1, 8, tt.S, u' ')], 112 u'\\123456 ': [(1, 1, tt.IDENT, u'\\123456'), (1, 8, tt.S, u' ')], 113 # escape ends with space but space stays as escaped itself 114 u'\\29\\ ': [(1, 1, tt.IDENT, u'\\29\\ ')], 115 # escape inside string, escape end removed! 116 u'"\\29 a "': [(1, 1, tt.STRING, u'"\\29 a "')], 117 118 # HTML CDO and CDC are filtered out by tokenizer 119 u'1 <!-- x --> 2': [(1, 1, tt.NUMBER, u'1'), (1, 2, tt.S, u' '), 120 #(1, 3, tt.CDO, u'<!--'), 121 (1, 7, tt.S, u' '), 122 (1, 8, tt.IDENT, u'x'), (1, 9, tt.S, u' '), 123 #(1, 10, tt.CDC, u'-->'), 124 (1, 13, tt.S, u' '), 125 (1, 14, tt.NUMBER, u'2')], 126 u'<!--"--><!--"-->': [#(1, 1, tt.CDO, u'<!--'), 127 (1, 5, tt.STRING, u'"--><!--"'), 128 #(1, 14, tt.CDC, u'-->') 129 ], 130 131 # PERCENTAGE 132 u'1 2% 3': [(1, 1, tt.NUMBER, u'1'), 133 (1, 2, tt.S, u' '), 134 (1, 3, tt.PERCENTAGE, u'2%'), 135 (1, 5, tt.S, u' '), 136 (1, 6, tt.NUMBER, u'3')], 137 u'"2%"': [(1, 1, tt.STRING, u'"2%"')], 138 139 # IMPORTANT_SYM 140 u' !important ': [(1, 1, tt.S, u' '), 141 (1, 2, tt.IMPORTANT_SYM, u'!important'), 142 (1, 12, tt.S, u' ')], 143 u'x !important': [(1,1, tt.IDENT, u'x'), 144 (1, 2, tt.S, u' '), 145 (1, 3, tt.IMPORTANT_SYM, u'!important')], 146 u' ! ': [(1, 1, tt.S, u' '), (1, 2, tt.DELIM, u'!'), 147 (1, 3, tt.S, u' ')], 148 u'!important': [(1,1, tt.IMPORTANT_SYM, u'!important')], 149 u' !important': [ 150 (1,1, tt.S, u' '), 151 (1,2, tt.IMPORTANT_SYM, u'!important') 152 ], 153 u' ! x important !/*important*/important': [ 154 (1,1, tt.S, u' '), 155 (1,2, tt.DELIM, u'!'), 156 (1,3, tt.S, u' '), 157 (1,4, tt.IDENT, u'x'), 158 (1,5, tt.S, u' '), 159 (1,6, tt.IDENT, u'important'), 160 (1,15, tt.S, u' '), 161 (1,16, tt.DELIM, u'!'), 162 (1,17, tt.COMMENT, u'/*important*/'), 163 (1,30, tt.IDENT, u'important') 164 ], 165 166 # num 167 u'1 1.1 -1 -1.1 .1 -.1 1.': [(1, 1, tt.NUMBER, u'1'), 168 (1, 2, tt.S, u' '), (1, 3, tt.NUMBER, u'1.1'), 169 (1, 6, tt.S, u' '), (1, 7, tt.NUMBER, u'-1'), 170 (1, 9, tt.S, u' '), (1, 10, tt.NUMBER, u'-1.1'), 171 (1, 14, tt.S, u' '), (1, 15, tt.NUMBER, u'0.1'), 172 (1, 17, tt.S, u' '), (1, 18, tt.NUMBER, u'-0.1'), 173 (1, 21, tt.S, u' '), 174 (1, 22, tt.NUMBER, u'1'), (1, 23, tt.CLASS, u'.') 175 ], 176 # Attribute INCLUDES & DASHMATCH + CSS3 177 u'a=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.DELIM, u'='), 178 (1, 3, tt.NUMBER, u'1')], 179 u'a~=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.INCLUDES, u'~='), 180 (1, 4, tt.NUMBER, u'1')], 181 u'a|=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.DASHMATCH, u'|='), 182 (1, 4, tt.NUMBER, u'1')], 183 u'a^=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.PREFIXMATCH, u'^='), 184 (1, 4, tt.NUMBER, u'1')], 185 u'a$=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.SUFFIXMATCH, u'$='), 186 (1, 4, tt.NUMBER, u'1')], 187 u'a*=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.SUBSTRINGMATCH, u'*='), 188 (1, 4, tt.NUMBER, u'1')], 189 190 # CSS3 pseudo 191 u'::': [(1, 1, tt.PSEUDO_ELEMENT, u'::')], 192 193 # SPECIALS 194 u'*+>~{},': [(1, 1, tt.UNIVERSAL, u'*'), 195 (1, 2, tt.PLUS, u'+'), 196 (1, 3, tt.GREATER, u'>'), 197 (1, 4, tt.TILDE, u'~'), 198 (1, 5, tt.LBRACE, u'{'), 199 (1, 6, tt.RBRACE, u'}'), 200 (1, 7, tt.COMMA, u',')], 201 202 # DELIM 203 u'!%:&$|': [(1, 1, tt.DELIM, u'!'), 204 (1, 2, tt.DELIM, u'%'), 205 (1, 3, tt.DELIM, u':'), 206 (1, 4, tt.DELIM, u'&'), 207 (1, 5, tt.DELIM, u'$'), 208 (1, 6, tt.DELIM, u'|')], 209 210 211 # DIMENSION 212 u'5em': [(1, 1, tt.DIMENSION, u'5em')], 213 u' 5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'5em')], 214 u'5em ': [(1, 1, tt.DIMENSION, u'5em'), (1, 4, tt.S, u' ')], 215 216 u'-5em': [(1, 1, tt.DIMENSION, u'-5em')], 217 u' -5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'-5em')], 218 u'-5em ': [(1, 1, tt.DIMENSION, u'-5em'), (1, 5, tt.S, u' ')], 219 220 u'.5em': [(1, 1, tt.DIMENSION, u'0.5em')], 221 u' .5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'0.5em')], 222 u'.5em ': [(1, 1, tt.DIMENSION, u'0.5em'), (1, 5, tt.S, u' ')], 223 224 u'-.5em': [(1, 1, tt.DIMENSION, u'-0.5em')], 225 u' -.5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'-0.5em')], 226 u'-.5em ': [(1, 1, tt.DIMENSION, u'-0.5em'), (1, 6, tt.S, u' ')], 227 228 u'5em5_-': [(1, 1, tt.DIMENSION, u'5em5_-')], 229 230 u'a a5 a5a 5 5a 5a5': [(1, 1, tt.IDENT, u'a'), 231 (1, 2, tt.S, u' '), 232 (1, 3, tt.IDENT, u'a5'), 233 (1, 5, tt.S, u' '), 234 (1, 6, tt.IDENT, u'a5a'), 235 (1, 9, tt.S, u' '), 236 (1, 10, tt.NUMBER, u'5'), 237 (1, 11, tt.S, u' '), 238 (1, 12, tt.DIMENSION, u'5a'), 239 (1, 14, tt.S, u' '), 240 (1, 15, tt.DIMENSION, u'5a5')], 241 242 # URI 243 u'url()': [(1, 1, tt.URI, u'url()')], 244 u'url();': [(1, 1, tt.URI, u'url()'), (1, 6, tt.SEMICOLON, ';')], 245 u'url("x")': [(1, 1, tt.URI, u'url("x")')], 246 u'url( "x")': [(1, 1, tt.URI, u'url("x")')], 247 u'url("x" )': [(1, 1, tt.URI, u'url("x")')], 248 u'url( "x" )': [(1, 1, tt.URI, u'url("x")')], 249 u' url("x")': [ 250 (1, 1, tt.S, u' '), 251 (1, 2, tt.URI, u'url("x")')], 252 u'url("x") ': [ 253 (1, 1, tt.URI, u'url("x")'), 254 (1, 9, tt.S, u' '), 255 ], 256 u'url(ab)': [(1, 1, tt.URI, u'url(ab)')], 257 u'url($#/ab)': [(1, 1, tt.URI, u'url($#/ab)')], 258 u'url(\1233/a/b)': [(1, 1, tt.URI, u'url(\1233/a/b)')], 259 # not URI 260 u'url("1""2")': [ 261 (1, 1, tt.FUNCTION, u'url('), 262 (1, 5, tt.STRING, u'"1"'), 263 (1, 8, tt.STRING, u'"2"'), 264 (1, 11, tt.RPARANTHESIS, u')'), 265 ], 266 u'url(a"2")': [ 267 (1, 1, tt.FUNCTION, u'url('), 268 (1, 5, tt.IDENT, u'a'), 269 (1, 6, tt.STRING, u'"2"'), 270 (1, 9, tt.RPARANTHESIS, u')'), 271 ], 272 u'url(a b)': [ 273 (1, 1, tt.FUNCTION, u'url('), 274 (1, 5, tt.IDENT, u'a'), 275 (1, 6, tt.S, u' '), 276 (1, 7, tt.IDENT, u'b'), 277 (1, 8, tt.RPARANTHESIS, u')'), 278 ], 279 280 # FUNCTION 281 u' counter("x")': [ 282 (1,1, tt.S, u' '), 283 (1, 2, tt.FUNCTION, u'counter('), 284 (1, 10, tt.STRING, u'"x"'), 285 (1, 13, tt.RPARANTHESIS, u')')], 286 # HASH 287 u'# #a #_a #-a #1': [ 288 (1, 1, tt.DELIM, u'#'), 289 (1, 2, tt.S, u' '), 290 (1, 3, tt.HASH, u'#a'), 291 (1, 5, tt.S, u' '), 292 (1, 6, tt.HASH, u'#_a'), 293 (1, 9, tt.S, u' '), 294 (1, 10, tt.HASH, u'#-a'), 295 (1, 13, tt.S, u' '), 296 (1, 14, tt.HASH, u'#1') 297 ], 298 u'#1a1 ': [ 299 (1, 1, tt.HASH, u'#1a1'), 300 (1, 5, tt.S, u' '), 301 ], 302 u'#1a1\n': [ 303 (1, 1, tt.HASH, u'#1a1'), 304 (1, 5, tt.S, u'\n'), 305 ], 306 u'#1a1{': [ 307 (1, 1, tt.HASH, u'#1a1'), 308 (1, 5, tt.LBRACE, u'{'), 309 ], 310 u'#1a1 {': [ 311 (1, 1, tt.HASH, u'#1a1'), 312 (1, 5, tt.S, u' '), 313 (1, 6, tt.LBRACE, u'{'), 314 ], 315 u'#1a1\n{': [ 316 (1, 1, tt.HASH, u'#1a1'), 317 (1, 5, tt.S, u'\n'), 318 (2, 1, tt.LBRACE, u'{'), 319 ], 320 u'#1a1\n {': [ 321 (1, 1, tt.HASH, u'#1a1'), 322 (1, 5, tt.S, u'\n '), 323 (2, 2, tt.LBRACE, u'{'), 324 ], 325 u'#1a1 \n{': [ 326 (1, 1, tt.HASH, u'#1a1'), 327 (1, 5, tt.S, u' \n'), 328 (2, 1, tt.LBRACE, u'{'), 329 ], 330 # STRINGS with NL 331 u'"x\n': [(1,1, tt.INVALID, u'"x\n')], 332 u'"x\r': [(1,1, tt.INVALID, u'"x\r')], 333 u'"x\f': [(1,1, tt.INVALID, u'"x\f')], 334 u'"x\n ': [ 335 (1,1, tt.INVALID, u'"x\n'), 336 (2,1, tt.S, u' ') 337 ] 338 339 } 340 341 for css in tests: 342 tokens = self.tokenizer.tokenize(css) 343 expected = [Token(x[0], x[1], x[2], x[3]) for x in tests[css]] 344 self.assertEqual(expected, tokens) 345 # check normalvalue for single tokens 346 if len(tests[css][0]) > 4: 347 expectednv = tests[css][0][4] 348 self.assertEqual(expected[0].normalvalue, expectednv) 349 350 tests = { 351 u'/*a': xml.dom.SyntaxErr, 352 u'"a': xml.dom.SyntaxErr, 353 u"'a": xml.dom.SyntaxErr, 354 u"\\0 a": xml.dom.SyntaxErr, 355 u"\\00": xml.dom.SyntaxErr, 356 u"\\000": xml.dom.SyntaxErr, 357 u"\\0000": xml.dom.SyntaxErr, 358 u"\\00000": xml.dom.SyntaxErr, 359 u"\\000000": xml.dom.SyntaxErr, 360 u"\\0000001": xml.dom.SyntaxErr 361 } 362 self.tokenizer.log.raiseExceptions = True #!! 363 for css, exception in tests.items(): 364 self.assertRaises(exception, self.tokenizer.tokenize, css)
365 366 367 if __name__ == '__main__': 368 import unittest 369 unittest.main() 370