Package cssutils :: Package tests :: Module test_tokenize
[hide private]
[frames] | no frames]

Source Code for Module cssutils.tests.test_tokenize

  1  # -*- coding: iso-8859-1 -*- 
  2  """ 
  3  testcases for cssutils.tokenize.Tokenizer 
  4  """ 
  5  __author__ = '$LastChangedBy: doerwalter $' 
  6  __date__ = '$LastChangedDate: 2007-08-02 22:58:23 +0200 (Do, 02 Aug 2007) $' 
  7  __version__ = '0.9.2b2, $LastChangedRevision: 160 $' 
  8   
  9  import xml.dom 
 10   
 11  import basetest 
 12   
 13  from cssutils.tokenize import Tokenizer 
 14  from cssutils.token import Token 
 15   
 16   
17 -class TokenizerTestCase(basetest.BaseTestCase):
18
19 - def setUp(self):
20 #log = cssutils.errorhandler.ErrorHandler() 21 self.tokenizer = Tokenizer() 22 self.ttype = Token
23 24
25 - def test_tokenize(self):
26 "Tokenizer tests" 27 28 # testcss: expected token list with 29 # (line, col, type, value[, normalvalue]) 30 31 tt = self.ttype 32 tests = { 33 # SPACES are condensed to 1 SPACE only! 34 u' ': [(1, 1, tt.S, u' ', u' ')], 35 u' ': [(1, 1, tt.S, u' ', u' ')], 36 u'\r': [(1, 1, tt.S, u'\r', u' ')], 37 u'\n': [(1, 1, tt.S, u'\n', u' ')], 38 u'\r\n': [(1, 1, tt.S, u'\r\n', u' ')], 39 u'\f': [(1, 1, tt.S, u'\f', u' ')], 40 u'\t': [(1, 1, tt.S, u'\t', u' ')], 41 u'\r\n\r\n\f\t ': [(1, 1, tt.S, u'\r\n\r\n\f\t ', u' ')], 42 43 # IDENT 44 u'a': [(1, 1, tt.IDENT, u'a')], 45 u'a-b': [(1, 1, tt.IDENT, u'a-b')], 46 u'a-': [(1, 1, tt.IDENT, u'a-')], 47 u'-b': [(1, 1, tt.IDENT, u'-b')], 48 49 # ATKEYWORD 50 u'@a @_ @ab1 @-ab @1': [(1, 1, tt.ATKEYWORD, u'@a'), 51 (1, 3, tt.S, u' '), (1, 4, tt.ATKEYWORD, u'@_'), 52 (1, 6, tt.S, u' '), (1, 7, tt.ATKEYWORD, u'@ab1'), 53 (1, 11, tt.S, u' '), (1, 12, tt.ATKEYWORD, u'@-ab'), 54 (1, 16, tt.S, u' '), (1, 17, tt.DELIM, u'@'), 55 (1, 18, tt.NUMBER, u'1')], 56 u'x x1 -x .-x #_x -': [(1, 1, tt.IDENT, u'x'), 57 (1, 2, tt.S, u' '), 58 (1, 3, tt.IDENT, u'x1'), 59 (1, 5, tt.S, u' '), 60 (1, 6, tt.IDENT, u'-x'), 61 (1, 8, tt.S, u' '), 62 (1, 9, tt.CLASS, u'.'), 63 (1, 10, tt.IDENT, u'-x'), 64 (1, 12, tt.S, u' '), 65 (1, 13, tt.HASH, u'#_x'), 66 (1, 16, tt.S, u' '), 67 (1, 17, tt.DELIM, u'-')], 68 u'@import': [(1, 1, tt.IMPORT_SYM, u'@import')], 69 u'@page': [(1, 1, tt.PAGE_SYM, u'@page')], 70 u'@media': [(1, 1, tt.MEDIA_SYM, u'@media')], 71 u'@charset': [(1, 1, tt.CHARSET_SYM, u'@charset')], 72 # simple escapes, should \ be removed??? 73 u'\\{\\}\\(\\)\\;\\}\\:\\,': [ 74 (1, 1, u'{ident}', u'\\{\\}\\(\\)\\;\\}\\:\\,')], 75 76 # comment 77 u'/*x*//': [(1, 1, tt.COMMENT, u'/*x*/'), (1, 6, tt.DELIM, u'/')], 78 u'/* */ */': [(1, 1, tt.COMMENT, u'/* */'), (1, 6, tt.S, u' '), 79 (1, 7, tt.UNIVERSAL, u'*'), (1, 8, tt.DELIM, u'/')], 80 u'1/*\\*/2': [(1, 1, tt.NUMBER, u'1'), 81 (1, 2, tt.COMMENT, u'/*\\*/'), 82 (1, 7, tt.NUMBER, u'2')], 83 84 # STRING 85 u'"x"': [(1, 1, tt.STRING, u'"x"')], 86 u'"\\""': [(1, 1, tt.STRING, u'"\\""')], 87 u'"x\\""a': [(1, 1, tt.STRING, u'"x\\""'), (1, 6, tt.IDENT, u'a')], 88 u"'x'": [(1, 1, tt.STRING, u"'x'")], 89 u"'\\''": [(1, 1, tt.STRING, u"'\\''")], 90 u'''"1\\\n2"''': [(1, 1, tt.STRING, u'"12"')], 91 u'''"1\\\r2"''': [(1, 1, tt.STRING, u'"12"')], 92 u'''"1\\\r\n2"''': [(1, 1, tt.STRING, u'"12"')], 93 u'''"1\\\f2"''': [(1, 1, tt.STRING, u'"12"')], 94 u'''"\\"1\\\n\\\r\\\f\\\r\n2"''': [(1, 1, tt.STRING, u'"\\"12"')], 95 96 # ESCAPES 97 # full length 6 digit escape 98 u'\\000029a': [(1, 1, tt.IDENT, u'\\000029a')], 99 # escape short form 100 u'\\29': [(1, 1, tt.IDENT, u'\\29')], 101 # escape ends as non hexdigit follows 102 u'\\29x': [(1, 1, tt.IDENT, u'\\29x')], 103 # escape ends with explicit space 104 u'\\29 a': [(1, 1, tt.IDENT, u'\\29 a')], 105 # escape ends with explicit space but \r\n as single space 106 u'\\29\r\na': [(1, 1, tt.IDENT, u'\\29 a')], 107 # escape ends, double space becomes single 108 u'\\1 ': [(1, 1, tt.IDENT, u'\\1 '), (1, 4, tt.S, u' ')], 109 u'\\12 ': [(1, 1, tt.IDENT, u'\\12 '), (1, 5, tt.S, u' ')], 110 u'\\123 ': [(1, 1, tt.IDENT, u'\\123 '), (1, 6, tt.S, u' ')], 111 u'\\1234 ': [(1, 1, tt.IDENT, u'\\1234 '), (1, 7, tt.S, u' ')], 112 u'\\12345 ': [(1, 1, tt.IDENT, u'\\12345 '), (1, 8, tt.S, u' ')], 113 u'\\123456 ': [(1, 1, tt.IDENT, u'\\123456'), (1, 8, tt.S, u' ')], 114 u'\\123456 ': [(1, 1, tt.IDENT, u'\\123456'), (1, 8, tt.S, u' ')], 115 # escape ends with space but space stays as escaped itself 116 u'\\29\\ ': [(1, 1, tt.IDENT, u'\\29\\ ')], 117 # escape inside string, escape end removed! 118 u'"\\29 a "': [(1, 1, tt.STRING, u'"\\29 a "')], 119 120 # HTML CDO and CDC 121 u'1 <!-- x --> 2': [(1, 1, tt.NUMBER, u'1'), (1, 2, tt.S, u' '), 122 (1, 3, tt.CDO, u'<!--'), (1, 7, tt.S, u' '), 123 (1, 8, tt.IDENT, u'x'), (1, 9, tt.S, u' '), 124 (1, 10, tt.CDC, u'-->'), (1, 13, tt.S, u' '), 125 (1, 14, tt.NUMBER, u'2')], 126 u'<!--"--><!--"-->': [(1, 1, tt.CDO, u'<!--'), 127 (1, 5, tt.STRING, u'"--><!--"'), 128 (1, 14, tt.CDC, u'-->')], 129 130 # PERCENTAGE 131 u'1 2% 3': [(1, 1, tt.NUMBER, u'1'), 132 (1, 2, tt.S, u' '), 133 (1, 3, tt.PERCENTAGE, u'2%'), 134 (1, 5, tt.S, u' '), 135 (1, 6, tt.NUMBER, u'3')], 136 u'"2%"': [(1, 1, tt.STRING, u'"2%"')], 137 138 # IMPORTANT_SYM 139 u' !important ': [(1, 1, tt.S, u' '), 140 (1, 2, tt.IMPORTANT_SYM, u'!important'), 141 (1, 12, tt.S, u' ')], 142 u'x !important': [(1,1, tt.IDENT, u'x'), 143 (1, 2, tt.S, u' '), 144 (1, 3, tt.IMPORTANT_SYM, u'!important')], 145 u' ! ': [(1, 1, tt.S, u' '), (1, 2, tt.DELIM, u'!'), 146 (1, 3, tt.S, u' ')], 147 u'!important': [(1,1, tt.IMPORTANT_SYM, u'!important')], 148 u' !important': [ 149 (1,1, tt.S, u' '), 150 (1,2, tt.IMPORTANT_SYM, u'!important') 151 ], 152 u' ! x important !/*important*/important': [ 153 (1,1, tt.S, u' '), 154 (1,2, tt.DELIM, u'!'), 155 (1,3, tt.S, u' '), 156 (1,4, tt.IDENT, u'x'), 157 (1,5, tt.S, u' '), 158 (1,6, tt.IDENT, u'important'), 159 (1,15, tt.S, u' '), 160 (1,16, tt.DELIM, u'!'), 161 (1,17, tt.COMMENT, u'/*important*/'), 162 (1,30, tt.IDENT, u'important') 163 ], 164 165 # num 166 u'1 1.1 -1 -1.1 .1 -.1 1.': [(1, 1, tt.NUMBER, u'1'), 167 (1, 2, tt.S, u' '), (1, 3, tt.NUMBER, u'1.1'), 168 (1, 6, tt.S, u' '), (1, 7, tt.NUMBER, u'-1'), 169 (1, 9, tt.S, u' '), (1, 10, tt.NUMBER, u'-1.1'), 170 (1, 14, tt.S, u' '), (1, 15, tt.NUMBER, u'0.1'), 171 (1, 17, tt.S, u' '), (1, 18, tt.NUMBER, u'-0.1'), 172 (1, 21, tt.S, u' '), 173 (1, 22, tt.NUMBER, u'1'), (1, 23, tt.CLASS, u'.') 174 ], 175 # Attribute INCLUDES & DASHMATCH + CSS3 176 u'a=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.DELIM, u'='), 177 (1, 3, tt.NUMBER, u'1')], 178 u'a~=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.INCLUDES, u'~='), 179 (1, 4, tt.NUMBER, u'1')], 180 u'a|=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.DASHMATCH, u'|='), 181 (1, 4, tt.NUMBER, u'1')], 182 u'a^=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.PREFIXMATCH, u'^='), 183 (1, 4, tt.NUMBER, u'1')], 184 u'a$=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.SUFFIXMATCH, u'$='), 185 (1, 4, tt.NUMBER, u'1')], 186 u'a*=1': [(1, 1, tt.IDENT, u'a'), (1, 2, tt.SUBSTRINGMATCH, u'*='), 187 (1, 4, tt.NUMBER, u'1')], 188 189 # CSS3 pseudo 190 u'::': [(1, 1, tt.PSEUDO_ELEMENT, u'::')], 191 192 # SPECIALS 193 u'*+>~{},': [(1, 1, tt.UNIVERSAL, u'*'), 194 (1, 2, tt.PLUS, u'+'), 195 (1, 3, tt.GREATER, u'>'), 196 (1, 4, tt.TILDE, u'~'), 197 (1, 5, tt.LBRACE, u'{'), 198 (1, 6, tt.RBRACE, u'}'), 199 (1, 7, tt.COMMA, u',')], 200 201 # DELIM 202 u'!%:&$|': [(1, 1, tt.DELIM, u'!'), 203 (1, 2, tt.DELIM, u'%'), 204 (1, 3, tt.DELIM, u':'), 205 (1, 4, tt.DELIM, u'&'), 206 (1, 5, tt.DELIM, u'$'), 207 (1, 6, tt.DELIM, u'|')], 208 209 210 # DIMENSION 211 u'5em': [(1, 1, tt.DIMENSION, u'5em')], 212 u' 5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'5em')], 213 u'5em ': [(1, 1, tt.DIMENSION, u'5em'), (1, 4, tt.S, u' ')], 214 215 u'-5em': [(1, 1, tt.DIMENSION, u'-5em')], 216 u' -5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'-5em')], 217 u'-5em ': [(1, 1, tt.DIMENSION, u'-5em'), (1, 5, tt.S, u' ')], 218 219 u'.5em': [(1, 1, tt.DIMENSION, u'0.5em')], 220 u' .5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'0.5em')], 221 u'.5em ': [(1, 1, tt.DIMENSION, u'0.5em'), (1, 5, tt.S, u' ')], 222 223 u'-.5em': [(1, 1, tt.DIMENSION, u'-0.5em')], 224 u' -.5em': [(1, 1, tt.S, u' '), (1, 2, tt.DIMENSION, u'-0.5em')], 225 u'-.5em ': [(1, 1, tt.DIMENSION, u'-0.5em'), (1, 6, tt.S, u' ')], 226 227 u'5em5_-': [(1, 1, tt.DIMENSION, u'5em5_-')], 228 229 u'a a5 a5a 5 5a 5a5': [(1, 1, tt.IDENT, u'a'), 230 (1, 2, tt.S, u' '), 231 (1, 3, tt.IDENT, u'a5'), 232 (1, 5, tt.S, u' '), 233 (1, 6, tt.IDENT, u'a5a'), 234 (1, 9, tt.S, u' '), 235 (1, 10, tt.NUMBER, u'5'), 236 (1, 11, tt.S, u' '), 237 (1, 12, tt.DIMENSION, u'5a'), 238 (1, 14, tt.S, u' '), 239 (1, 15, tt.DIMENSION, u'5a5')], 240 241 # URI 242 u'url("x")': [(1, 1, tt.URI, u'url("x")')], 243 u'url( "x")': [(1, 1, tt.URI, u'url("x")')], 244 u'url("x" )': [(1, 1, tt.URI, u'url("x")')], 245 u'url( "x" )': [(1, 1, tt.URI, u'url("x")')], 246 u' url("x")': [ 247 (1, 1, tt.S, u' '), 248 (1, 2, tt.URI, u'url("x")')], 249 u'url("x") ': [ 250 (1, 1, tt.URI, u'url("x")'), 251 (1, 9, tt.S, u' '), 252 ], 253 u'url(ab)': [(1, 1, tt.URI, u'url(ab)')], 254 u'url($#/ab)': [(1, 1, tt.URI, u'url($#/ab)')], 255 u'url(\1233/a/b)': [(1, 1, tt.URI, u'url(\1233/a/b)')], 256 # not URI 257 u'url("1""2")': [ 258 (1, 1, tt.FUNCTION, u'url('), 259 (1, 5, tt.STRING, u'"1"'), 260 (1, 8, tt.STRING, u'"2"'), 261 (1, 11, tt.RPARANTHESIS, u')'), 262 ], 263 u'url(a"2")': [ 264 (1, 1, tt.FUNCTION, u'url('), 265 (1, 5, tt.IDENT, u'a'), 266 (1, 6, tt.STRING, u'"2"'), 267 (1, 9, tt.RPARANTHESIS, u')'), 268 ], 269 u'url(a b)': [ 270 (1, 1, tt.FUNCTION, u'url('), 271 (1, 5, tt.IDENT, u'a'), 272 (1, 6, tt.S, u' '), 273 (1, 7, tt.IDENT, u'b'), 274 (1, 8, tt.RPARANTHESIS, u')'), 275 ], 276 277 # FUNCTION 278 u' counter("x")': [ 279 (1,1, tt.S, u' '), 280 (1, 2, tt.FUNCTION, u'counter('), 281 (1, 10, tt.STRING, u'"x"'), 282 (1, 13, tt.RPARANTHESIS, u')')], 283 # HASH 284 u'# #a #_a #-a #1': [ 285 (1, 1, tt.DELIM, u'#'), 286 (1, 2, tt.S, u' '), 287 (1, 3, tt.HASH, u'#a'), 288 (1, 5, tt.S, u' '), 289 (1, 6, tt.HASH, u'#_a'), 290 (1, 9, tt.S, u' '), 291 (1, 10, tt.HASH, u'#-a'), 292 (1, 13, tt.S, u' '), 293 (1, 14, tt.HASH, u'#1') 294 ], 295 u'#1a1 ': [ 296 (1, 1, tt.HASH, u'#1a1'), 297 (1, 5, tt.S, u' '), 298 ], 299 u'#1a1\n': [ 300 (1, 1, tt.HASH, u'#1a1'), 301 (1, 5, tt.S, u'\n'), 302 ], 303 u'#1a1{': [ 304 (1, 1, tt.HASH, u'#1a1'), 305 (1, 5, tt.LBRACE, u'{'), 306 ], 307 u'#1a1 {': [ 308 (1, 1, tt.HASH, u'#1a1'), 309 (1, 5, tt.S, u' '), 310 (1, 6, tt.LBRACE, u'{'), 311 ], 312 u'#1a1\n{': [ 313 (1, 1, tt.HASH, u'#1a1'), 314 (1, 5, tt.S, u'\n'), 315 (2, 1, tt.LBRACE, u'{'), 316 ], 317 u'#1a1\n {': [ 318 (1, 1, tt.HASH, u'#1a1'), 319 (1, 5, tt.S, u'\n '), 320 (2, 2, tt.LBRACE, u'{'), 321 ], 322 u'#1a1 \n{': [ 323 (1, 1, tt.HASH, u'#1a1'), 324 (1, 5, tt.S, u' \n'), 325 (2, 1, tt.LBRACE, u'{'), 326 ], 327 # STRINGS with NL 328 u'"x\n': [(1,1, tt.INVALID, u'"x\n')], 329 u'"x\r': [(1,1, tt.INVALID, u'"x\r')], 330 u'"x\f': [(1,1, tt.INVALID, u'"x\f')], 331 u'"x\n ': [ 332 (1,1, tt.INVALID, u'"x\n'), 333 (2,1, tt.S, u' ') 334 ] 335 336 } 337 338 for css in tests: 339 tokens = self.tokenizer.tokenize(css) 340 expected = [Token(x[0], x[1], x[2], x[3]) for x in tests[css]] 341 self.assertEqual(expected, tokens) 342 # check normalvalue for single tokens 343 if len(tests[css][0]) > 4: 344 expectednv = tests[css][0][4] 345 self.assertEqual(expected[0].normalvalue, expectednv) 346 347 tests = { 348 u'/*a': xml.dom.SyntaxErr, 349 u'"a': xml.dom.SyntaxErr, 350 u"'a": xml.dom.SyntaxErr, 351 u"\\0 a": xml.dom.SyntaxErr, 352 u"\\00": xml.dom.SyntaxErr, 353 u"\\000": xml.dom.SyntaxErr, 354 u"\\0000": xml.dom.SyntaxErr, 355 u"\\00000": xml.dom.SyntaxErr, 356 u"\\000000": xml.dom.SyntaxErr, 357 u"\\0000001": xml.dom.SyntaxErr 358 } 359 self.tokenizer.log.raiseExceptions = True #!! 360 for css, exception in tests.items(): 361 self.assertRaises(exception, self.tokenizer.tokenize, css)
362 363 364 if __name__ == '__main__': 365 import unittest 366 unittest.main() 367