Package cssutils :: Module util
[hide private]
[frames] | no frames]

Source Code for Module cssutils.util

  1  """base classes and helper functions for css and stylesheets packages 
  2  """ 
  3  __all__ = [] 
  4  __docformat__ = 'restructuredtext' 
  5  __version__ = '$Id: util.py 1321 2008-06-29 20:59:58Z cthedot $' 
  6   
  7  import codecs 
  8  from itertools import ifilter 
  9  import re 
 10  import types 
 11  import urllib2 
 12  import xml.dom 
 13  import cssutils 
 14  from tokenize2 import Tokenizer 
15 # COMMENT OUT IF RUNNING THIS TEST STANDALONE! 16 import encutils 17 18 -class Base(object):
19 """ 20 Base class for most CSS and StyleSheets classes 21 22 **Superceded by Base2 which is used for new seq handling class.** 23 See cssutils.util.Base2 24 25 Contains helper methods for inheriting classes helping parsing 26 27 ``_normalize`` is static as used by Preferences. 28 """ 29 __tokenizer2 = Tokenizer() 30 31 _log = cssutils.log 32 _prods = cssutils.tokenize2.CSSProductions 33 34 # for more on shorthand properties see 35 # http://www.dustindiaz.com/css-shorthand/ 36 # format: shorthand: [(propname, mandatorycheck?)*] 37 _SHORTHANDPROPERTIES = { 38 u'background': [], 39 u'border': [], 40 u'border-left': [], 41 u'border-right': [], 42 u'border-top': [], 43 u'border-bottom': [], 44 u'border-color': [], 45 u'border-style': [], 46 u'border-width': [], 47 u'cue': [], 48 u'font': [], 49 # [('font-weight', True), 50 # ('font-size', True), 51 # ('line-height', False), 52 # ('font-family', True)], 53 u'list-style': [], 54 u'margin': [], 55 u'outline': [], 56 u'padding': [], 57 u'pause': [] 58 } 59 60 # simple escapes, all non unicodes 61 __escapes = re.compile(ur'(\\[^0-9a-fA-F])').sub 62 # all unicode (see cssproductions "unicode") 63 __unicodes = re.compile(ur'\\[0-9a-fA-F]{1,6}[\t|\r|\n|\f|\x20]?').sub 64 65 @staticmethod
66 - def _normalize(x):
67 """ 68 normalizes x, namely: 69 70 - remove any \ before non unicode sequences (0-9a-zA-Z) so for 71 x=="c\olor\" return "color" (unicode escape sequences should have 72 been resolved by the tokenizer already) 73 - lowercase 74 """ 75 if x: 76 def removeescape(matchobj): 77 return matchobj.group(0)[1:]
78 x = Base.__escapes(removeescape, x) 79 return x.lower() 80 else: 81 return x
82
83 - def _checkReadonly(self):
84 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly" 85 if hasattr(self, '_readonly') and self._readonly: 86 raise xml.dom.NoModificationAllowedErr( 87 u'%s is readonly.' % self.__class__) 88 return True 89 return False
90
91 - def _splitNamespacesOff(self, text_namespaces_tuple):
92 """ 93 returns tuple (text, dict-of-namespaces) or if no namespaces are 94 in cssText returns (cssText, {}) 95 96 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and 97 CSSStyleSheet 98 """ 99 if isinstance(text_namespaces_tuple, tuple): 100 return text_namespaces_tuple[0], _SimpleNamespaces( 101 text_namespaces_tuple[1]) 102 else: 103 return text_namespaces_tuple, _SimpleNamespaces()
104
105 - def _tokenize2(self, textortokens):
106 """ 107 returns tokens of textortokens which may already be tokens in which 108 case simply returns input 109 """ 110 if not textortokens: 111 return None 112 elif isinstance(textortokens, basestring): 113 # needs to be tokenized 114 return self.__tokenizer2.tokenize( 115 textortokens) 116 elif types.GeneratorType == type(textortokens): 117 # already tokenized 118 return textortokens 119 elif isinstance(textortokens, tuple): 120 # a single token (like a comment) 121 return [textortokens] 122 else: 123 # already tokenized but return generator 124 return (x for x in textortokens)
125
126 - def _nexttoken(self, tokenizer, default=None):
127 "returns next token in generator tokenizer or the default value" 128 try: 129 return tokenizer.next() 130 except (StopIteration, AttributeError): 131 return default
132
133 - def _type(self, token):
134 "returns type of Tokenizer token" 135 if token: 136 return token[0] 137 else: 138 return None
139
140 - def _tokenvalue(self, token, normalize=False):
141 "returns value of Tokenizer token" 142 if token and normalize: 143 return Base._normalize(token[1]) 144 elif token: 145 return token[1] 146 else: 147 return None
148
149 - def _stringtokenvalue(self, token):
150 """ 151 for STRING returns the actual content without surrounding "" or '' 152 and without respective escapes, e.g.:: 153 154 "with \" char" => with " char 155 """ 156 if token: 157 value = token[1] 158 return value.replace('\\'+value[0], value[0])[1:-1] 159 else: 160 return None
161
162 - def _uritokenvalue(self, token):
163 """ 164 for URI returns the actual content without surrounding url() 165 or url(""), url('') and without respective escapes, e.g.:: 166 167 url("\"") => " 168 """ 169 if token: 170 value = token[1][4:-1].strip() 171 if (value[0] in '\'"') and (value[0] == value[-1]): 172 # a string "..." or '...' 173 value = value.replace('\\'+value[0], value[0])[1:-1] 174 return value 175 else: 176 return None
177
178 - def _tokensupto2(self, 179 tokenizer, 180 starttoken=None, 181 blockstartonly=False, # { 182 blockendonly=False, # } 183 mediaendonly=False, 184 importmediaqueryendonly=False, # ; or STRING 185 mediaqueryendonly=False, # { or STRING 186 semicolon=False, # ; 187 propertynameendonly=False, # : 188 propertyvalueendonly=False, # ! ; } 189 propertypriorityendonly=False, # ; } 190 selectorattendonly=False, # ] 191 funcendonly=False, # ) 192 listseponly=False, # , 193 separateEnd=False # returns (resulttokens, endtoken) 194 ):
195 """ 196 returns tokens upto end of atrule and end index 197 end is defined by parameters, might be ; } ) or other 198 199 default looks for ending "}" and ";" 200 """ 201 ends = u';}' 202 endtypes = () 203 brace = bracket = parant = 0 # {}, [], () 204 205 if blockstartonly: # { 206 ends = u'{' 207 brace = -1 # set to 0 with first { 208 elif blockendonly: # } 209 ends = u'}' 210 brace = 1 211 elif mediaendonly: # } 212 ends = u'}' 213 brace = 1 # rules } and mediarules } 214 elif importmediaqueryendonly: 215 # end of mediaquery which may be ; or STRING 216 ends = u';' 217 endtypes = ('STRING',) 218 elif mediaqueryendonly: 219 # end of mediaquery which may be { or STRING 220 # special case, see below 221 ends = u'{' 222 brace = -1 # set to 0 with first { 223 endtypes = ('STRING',) 224 elif semicolon: 225 ends = u';' 226 elif propertynameendonly: # : and ; in case of an error 227 ends = u':;' 228 elif propertyvalueendonly: # ; or !important 229 ends = u';!' 230 elif propertypriorityendonly: # ; 231 ends = u';' 232 elif selectorattendonly: # ] 233 ends = u']' 234 if starttoken and self._tokenvalue(starttoken) == u'[': 235 bracket = 1 236 elif funcendonly: # ) 237 ends = u')' 238 parant = 1 239 elif listseponly: # , 240 ends = u',' 241 242 resulttokens = [] 243 if starttoken: 244 resulttokens.append(starttoken) 245 if tokenizer: 246 for token in tokenizer: 247 typ, val, line, col = token 248 if 'EOF' == typ: 249 resulttokens.append(token) 250 break 251 if u'{' == val: 252 brace += 1 253 elif u'}' == val: 254 brace -= 1 255 elif u'[' == val: 256 bracket += 1 257 elif u']' == val: 258 bracket -= 1 259 # function( or single ( 260 elif u'(' == val or \ 261 Base._prods.FUNCTION == typ: 262 parant += 1 263 elif u')' == val: 264 parant -= 1 265 266 resulttokens.append(token) 267 268 if (brace == bracket == parant == 0) and ( 269 val in ends or typ in endtypes): 270 break 271 elif mediaqueryendonly and brace == -1 and ( 272 bracket == parant == 0) and typ in endtypes: 273 # mediaqueryendonly with STRING 274 break 275 276 if separateEnd: 277 # TODO: use this method as generator, then this makes sense 278 if resulttokens: 279 return resulttokens[:-1], resulttokens[-1] 280 else: 281 return resulttokens, None 282 else: 283 return resulttokens
284
285 - def _valuestr(self, t):
286 """ 287 returns string value of t (t may be a string, a list of token tuples 288 or a single tuple in format (type, value, line, col). 289 Mainly used to get a string value of t for error messages. 290 """ 291 if not t: 292 return u'' 293 elif isinstance(t, basestring): 294 return t 295 else: 296 return u''.join([x[1] for x in t])
297
298 - def _adddefaultproductions(self, productions, new=None):
299 """ 300 adds default productions if not already present, used by 301 _parse only 302 303 each production should return the next expected token 304 normaly a name like "uri" or "EOF" 305 some have no expectation like S or COMMENT, so simply return 306 the current value of self.__expected 307 """ 308 def ATKEYWORD(expected, seq, token, tokenizer=None): 309 "TODO: add default impl for unexpected @rule?" 310 if expected != 'EOF': 311 # TODO: parentStyleSheet=self 312 rule = cssutils.css.CSSUnknownRule() 313 rule.cssText = self._tokensupto2(tokenizer, token) 314 if rule.wellformed: 315 seq.append(rule) 316 return expected 317 else: 318 new['wellformed'] = False 319 self._log.error(u'Expected EOF.', token=token) 320 return expected
321 322 def COMMENT(expected, seq, token, tokenizer=None): 323 "default implementation for COMMENT token adds CSSCommentRule" 324 seq.append(cssutils.css.CSSComment([token])) 325 return expected 326 327 def S(expected, seq, token, tokenizer=None): 328 "default implementation for S token, does nothing" 329 return expected 330 331 def EOF(expected=None, seq=None, token=None, tokenizer=None): 332 "default implementation for EOF token" 333 return 'EOF' 334 335 p = {'ATKEYWORD': ATKEYWORD, 336 'COMMENT': COMMENT, 337 'S': S, 338 'EOF': EOF # only available if fullsheet 339 } 340 p.update(productions) 341 return p 342
343 - def _parse(self, expected, seq, tokenizer, productions, default=None, 344 new=None):
345 """ 346 puts parsed tokens in seq by calling a production with 347 (seq, tokenizer, token) 348 349 expected 350 a name what token or value is expected next, e.g. 'uri' 351 seq 352 to add rules etc to 353 tokenizer 354 call tokenizer.next() to get next token 355 productions 356 callbacks {tokentype: callback} 357 default 358 default callback if tokentype not in productions 359 new 360 used to init default productions 361 362 returns (wellformed, expected) which the last prod might have set 363 """ 364 wellformed = True 365 if tokenizer: 366 prods = self._adddefaultproductions(productions, new) 367 for token in tokenizer: 368 p = prods.get(token[0], default) 369 if p: 370 expected = p(expected, seq, token, tokenizer) 371 else: 372 wellformed = False 373 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token) 374 return wellformed, expected
375
376 377 -class Base2(Base):
378 """ 379 Base class for new seq handling, used by Selector for now only 380 """
381 - def __init__(self):
382 self._seq = Seq()
383
384 - def _setSeq(self, newseq):
385 """ 386 sets newseq and makes it readonly 387 """ 388 newseq._readonly = True 389 self._seq = newseq
390 391 seq = property(lambda self: self._seq, doc="seq for most classes") 392
393 - def _tempSeq(self, readonly=False):
394 "get a writeable Seq() which is added later" 395 return Seq(readonly=readonly)
396
397 - def _adddefaultproductions(self, productions, new=None):
398 """ 399 adds default productions if not already present, used by 400 _parse only 401 402 each production should return the next expected token 403 normaly a name like "uri" or "EOF" 404 some have no expectation like S or COMMENT, so simply return 405 the current value of self.__expected 406 """ 407 def ATKEYWORD(expected, seq, token, tokenizer=None): 408 "default impl for unexpected @rule" 409 if expected != 'EOF': 410 # TODO: parentStyleSheet=self 411 rule = cssutils.css.CSSUnknownRule() 412 rule.cssText = self._tokensupto2(tokenizer, token) 413 if rule.wellformed: 414 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE, 415 line=token[2], col=token[3]) 416 return expected 417 else: 418 new['wellformed'] = False 419 self._log.error(u'Expected EOF.', token=token) 420 return expected
421 422 def COMMENT(expected, seq, token, tokenizer=None): 423 "default impl, adds CSSCommentRule if not token == EOF" 424 if expected == 'EOF': 425 new['wellformed'] = False 426 self._log.error(u'Expected EOF but found comment.', token=token) 427 seq.append(cssutils.css.CSSComment([token]), 'COMMENT') 428 return expected
429 430 def S(expected, seq, token, tokenizer=None): 431 "default impl, does nothing if not token == EOF" 432 if expected == 'EOF': 433 new['wellformed'] = False 434 self._log.error(u'Expected EOF but found whitespace.', token=token) 435 return expected 436 437 def EOF(expected=None, seq=None, token=None, tokenizer=None): 438 "default implementation for EOF token" 439 return 'EOF' 440 441 defaultproductions = {'ATKEYWORD': ATKEYWORD, 442 'COMMENT': COMMENT, 443 'S': S, 444 'EOF': EOF # only available if fullsheet 445 } 446 defaultproductions.update(productions) 447 return defaultproductions 448
449 450 -class Seq(object):
451 """ 452 property seq of Base2 inheriting classes, holds a list of Item objects. 453 454 used only by Selector for now 455 456 is normally readonly, only writable during parsing 457 """
458 - def __init__(self, readonly=True):
459 """ 460 only way to write to a Seq is to initialize it with new items 461 each itemtuple has (value, type, line) where line is optional 462 """ 463 self._seq = [] 464 self._readonly = readonly
465
466 - def __delitem__(self, i):
467 del self._seq[i]
468
469 - def __getitem__(self, i):
470 return self._seq[i]
471
472 - def __setitem__(self, i, (val, typ, line, col)):
473 self._seq[i] = Item(val, typ, line, col)
474
475 - def __iter__(self):
476 return iter(self._seq)
477
478 - def __len__(self):
479 return len(self._seq)
480
481 - def append(self, val, typ, line=None, col=None):
482 "if not readonly add new Item()" 483 if self._readonly: 484 raise AttributeError('Seq is readonly.') 485 else: 486 self._seq.append(Item(val, typ, line, col))
487
488 - def appendItem(self, item):
489 "if not readonly add item which must be an Item" 490 if self._readonly: 491 raise AttributeError('Seq is readonly.') 492 else: 493 self._seq.append(item)
494
495 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
496 """ 497 if not readonly replace Item at index with new Item or 498 simply replace value or type 499 """ 500 if self._readonly: 501 raise AttributeError('Seq is readonly.') 502 else: 503 self._seq[index] = Item(val, typ, line, col)
504
505 - def __repr__(self):
506 "returns a repr same as a list of tuples of (value, type)" 507 return u'cssutils.%s.%s([\n %s])' % (self.__module__, 508 self.__class__.__name__, 509 u',\n '.join([u'(%r, %r)' % (item.type, item.value) 510 for item in self._seq] 511 ))
512 - def __str__(self):
513 return "<cssutils.%s.%s object length=%r at 0x%x>" % ( 514 self.__module__, self.__class__.__name__, len(self), id(self))
515
516 -class Item(object):
517 """ 518 an item in the seq list of classes (successor to tuple items in old seq) 519 520 each item has attributes: 521 522 type 523 a sematic type like "element", "attribute" 524 value 525 the actual value which may be a string, number etc or an instance 526 of e.g. a CSSComment 527 *line* 528 **NOT IMPLEMENTED YET, may contain the line in the source later** 529 """
530 - def __init__(self, value, type, line=None, col=None):
531 self.__value = value 532 self.__type = type 533 self.__line = line 534 self.__col = col
535 536 type = property(lambda self: self.__type) 537 value = property(lambda self: self.__value) 538 line = property(lambda self: self.__line) 539 col = property(lambda self: self.__col) 540
541 - def __repr__(self):
542 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % ( 543 self.__module__, self.__class__.__name__, 544 self.__value, self.__type, self.__line, self.__col)
545
546 547 -class ListSeq(object):
548 """ 549 (EXPERIMENTAL) 550 A base class used for list classes like css.SelectorList or 551 stylesheets.MediaList 552 553 adds list like behaviour running on inhering class' property ``seq`` 554 555 - item in x => bool 556 - len(x) => integer 557 - get, set and del x[i] 558 - for item in x 559 - append(item) 560 561 some methods must be overwritten in inheriting class 562 """
563 - def __init__(self):
564 self.seq = [] # does not need to use ``Seq`` as simple list only
565
566 - def __contains__(self, item):
567 return item in self.seq
568
569 - def __delitem__(self, index):
570 del self.seq[index]
571
572 - def __getitem__(self, index):
573 return self.seq[index]
574
575 - def __iter__(self):
576 def gen(): 577 for x in self.seq: 578 yield x
579 return gen()
580
581 - def __len__(self):
582 return len(self.seq)
583
584 - def __setitem__(self, index, item):
585 "must be overwritten" 586 raise NotImplementedError
587
588 - def append(self, item):
589 "must be overwritten" 590 raise NotImplementedError
591
592 593 -class _Namespaces(object):
594 """ 595 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet. 596 Works on effective namespaces, so e.g. if:: 597 598 @namespace p1 "uri"; 599 @namespace p2 "uri"; 600 601 only the second rule is effective and kept. 602 603 namespaces 604 a dictionary {prefix: namespaceURI} containing the effective namespaces 605 only. These are the latest set in the CSSStyleSheet. 606 parentStyleSheet 607 the parent CSSStyleSheet 608 """
609 - def __init__(self, parentStyleSheet, *args):
610 "no initial values are set, only the relevant sheet is" 611 self.parentStyleSheet = parentStyleSheet
612
613 - def __contains__(self, prefix):
614 return prefix in self.namespaces
615
616 - def __delitem__(self, prefix):
617 """deletes CSSNamespaceRule(s) with rule.prefix == prefix 618 619 prefix '' and None are handled the same 620 """ 621 if not prefix: 622 prefix = u'' 623 delrule = self.__findrule(prefix) 624 for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE, 625 self.parentStyleSheet.cssRules)): 626 if rule == delrule: 627 self.parentStyleSheet.deleteRule(i) 628 return 629 630 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
631
632 - def __getitem__(self, prefix):
633 try: 634 return self.namespaces[prefix] 635 except KeyError, e: 636 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
637
638 - def __iter__(self):
639 return self.namespaces.__iter__()
640
641 - def __len__(self):
642 return len(self.namespaces)
643
644 - def __setitem__(self, prefix, namespaceURI):
645 "replaces prefix or sets new rule, may raise NoModificationAllowedErr" 646 if not prefix: 647 prefix = u'' # None or '' 648 rule = self.__findrule(prefix) 649 if not rule: 650 self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule( 651 prefix=prefix, 652 namespaceURI=namespaceURI), 653 inOrder=True) 654 else: 655 if prefix in self.namespaces: 656 rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr 657 if namespaceURI in self.namespaces.values(): 658 rule.prefix = prefix
659
660 - def __findrule(self, prefix):
661 # returns namespace rule where prefix == key 662 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 663 reversed(self.parentStyleSheet.cssRules)): 664 if rule.prefix == prefix: 665 return rule
666
667 - def __getNamespaces(self):
668 namespaces = {} 669 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 670 reversed(self.parentStyleSheet.cssRules)): 671 if rule.namespaceURI not in namespaces.values(): 672 namespaces[rule.prefix] = rule.namespaceURI 673 return namespaces
674 675 namespaces = property(__getNamespaces, 676 doc=u'Holds only effective @namespace rules in self.parentStyleSheets' 677 '@namespace rules.') 678
679 - def get(self, prefix, default):
680 return self.namespaces.get(prefix, default)
681
682 - def items(self):
683 return self.namespaces.items()
684
685 - def keys(self):
686 return self.namespaces.keys()
687
688 - def values(self):
689 return self.namespaces.values()
690
691 - def prefixForNamespaceURI(self, namespaceURI):
692 """ 693 returns effective prefix for given namespaceURI or raises IndexError 694 if this cannot be found""" 695 for prefix, uri in self.namespaces.items(): 696 if uri == namespaceURI: 697 return prefix 698 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
699
700 - def __str__(self):
701 return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % ( 702 self.__class__.__name__, str(self.parentStyleSheet), id(self))
703
704 705 -class _SimpleNamespaces(_Namespaces):
706 """ 707 namespaces used in objects like Selector as long as they are not connected 708 to a CSSStyleSheet 709 """
710 - def __init__(self, *args):
711 self.__namespaces = dict(*args)
712
713 - def __setitem__(self, prefix, namespaceURI):
714 self.__namespaces[prefix] = namespaceURI
715 716 namespaces = property(lambda self: self.__namespaces, 717 doc=u'Dict Wrapper for self.sheets @namespace rules.') 718
719 - def __str__(self):
720 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % ( 721 self.__class__.__name__, self.namespaces, id(self))
722
723 - def __repr__(self):
724 return u"cssutils.util.%s(%r)" % (self.__class__.__name__, 725 self.namespaces)
726
727 728 -def _defaultFetcher(url):
729 """Retrieve data from ``url``. cssutils default implementation of fetch 730 URL function. 731 732 Returns ``(encoding, string)`` or ``None`` 733 """ 734 try: 735 res = urllib2.urlopen(url) 736 except WindowsError, e: 737 # e.g if file URL and not found 738 cssutils.log.warn(e, error=WindowsError) 739 except (WindowsError, ValueError), e: 740 # invalid url, e.g. "1" 741 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError) 742 except urllib2.HTTPError, e: 743 # http error, e.g. 404, e can be raised 744 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' % 745 (url, e.code, e.msg), error=e) 746 except urllib2.URLError, e: 747 # URLError like mailto: or other IO errors, e can be raised 748 cssutils.log.warn(u'URLError, %s' % e.reason, error=e) 749 else: 750 if res: 751 mimeType, encoding = encutils.getHTTPInfo(res) 752 if mimeType != u'text/css': 753 cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' % 754 (url, mimeType), error=ValueError) 755 return encoding, res.read()
756
757 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
758 """ 759 Read cssText from url and decode it using all relevant methods (HTTP 760 header, BOM, @charset). Returns encoding (which is needed to set encoding 761 of stylesheet properly) and decoded text 762 763 ``fetcher`` 764 see cssutils.registerFetchUrl for details 765 ``overrideEncoding`` 766 If given this encoding is used and all other encoding information is 767 ignored (HTTP, BOM etc) 768 ``parentEncoding`` 769 Encoding of parent stylesheet (while e.g. reading @import references sheets) 770 or document if available. 771 772 Priority or encoding information 773 -------------------------------- 774 **cssutils only**: overrideEncoding 775 776 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols) 777 2. BOM and/or @charset (see below) 778 3. <link charset=""> or other metadata from the linking mechanism (if any) 779 4. charset of referring style sheet or document (if any) 780 5. Assume UTF-8 781 782 """ 783 if not fetcher: 784 fetcher = _defaultFetcher 785 r = fetcher(url) 786 if r and len(r) == 2 and r[1] is not None: 787 httpEncoding, content = r 788 UTF8_BOM = u'\xEF\xBB\xBF' 789 790 if overrideEncoding: 791 # 0. override encoding 792 encoding = overrideEncoding 793 elif httpEncoding: 794 # 1. HTTP 795 encoding = httpEncoding 796 else: 797 try: 798 if content.startswith(u'@charset "utf-8";') or \ 799 content.startswith(UTF8_BOM + u'@charset "utf-8";'): 800 # 2. BOM/@charset: explicitly UTF-8 801 contentEncoding = 'utf-8' 802 else: 803 # other encoding with ascii content as not UnicodeDecodeError 804 contentEncoding = False 805 except UnicodeDecodeError, e: 806 # other encoding in any way (with other than ascii content) 807 contentEncoding = False 808 809 if contentEncoding: 810 encoding = contentEncoding 811 else: 812 # contentEncoding may be UTF-8 but this may not be explicit 813 (contentEncoding, explicit) = cssutils.codec.detectencoding_str(content) 814 # contentEncoding may be None for empty string! 815 if contentEncoding and explicit: 816 # 2. BOM/@charset: explicitly not UTF-8 817 encoding = contentEncoding 818 else: 819 # 4. parent stylesheet or document 820 # may also be None in which case 5. is used in next step anyway 821 encoding = parentEncoding 822 try: 823 # encoding may still be wrong if encoding *is lying*! 824 decodedContent = codecs.lookup("css")[1](content, encoding=encoding)[0] 825 except UnicodeDecodeError, e: 826 cssutils.log.warn(e, neverraise=True) 827 decodedContent = None 828 829 return encoding, decodedContent 830 else: 831 return None, None
832