Package cssutils :: Module util
[hide private]
[frames] | no frames]

Source Code for Module cssutils.util

  1  """base classes and helper functions for css and stylesheets packages 
  2  """ 
  3  __all__ = [] 
  4  __docformat__ = 'restructuredtext' 
  5  __version__ = '$Id: util.py 1345 2008-07-09 20:51:59Z cthedot $' 
  6   
  7  import codecs 
  8  from itertools import ifilter 
  9  import re 
 10  import types 
 11  import urllib2 
 12  import xml.dom 
 13  import cssutils 
 14  from tokenize2 import Tokenizer 
15 # COMMENT OUT IF RUNNING THIS TEST STANDALONE! 16 import encutils 17 18 -class Base(object):
19 """ 20 Base class for most CSS and StyleSheets classes 21 22 **Superceded by Base2 which is used for new seq handling class.** 23 See cssutils.util.Base2 24 25 Contains helper methods for inheriting classes helping parsing 26 27 ``_normalize`` is static as used by Preferences. 28 """ 29 __tokenizer2 = Tokenizer() 30 31 _log = cssutils.log 32 _prods = cssutils.tokenize2.CSSProductions 33 34 # for more on shorthand properties see 35 # http://www.dustindiaz.com/css-shorthand/ 36 # format: shorthand: [(propname, mandatorycheck?)*] 37 _SHORTHANDPROPERTIES = { 38 u'background': [], 39 u'border': [], 40 u'border-left': [], 41 u'border-right': [], 42 u'border-top': [], 43 u'border-bottom': [], 44 u'border-color': [], 45 u'border-style': [], 46 u'border-width': [], 47 u'cue': [], 48 u'font': [], 49 # [('font-weight', True), 50 # ('font-size', True), 51 # ('line-height', False), 52 # ('font-family', True)], 53 u'list-style': [], 54 u'margin': [], 55 u'outline': [], 56 u'padding': [], 57 u'pause': [] 58 } 59 60 # simple escapes, all non unicodes 61 __simpleescapes = re.compile(ur'(\\[^0-9a-fA-F])').sub 62 63 @staticmethod
64 - def _normalize(x):
65 """ 66 normalizes x, namely: 67 68 - remove any \ before non unicode sequences (0-9a-zA-Z) so for 69 x=="c\olor\" return "color" (unicode escape sequences should have 70 been resolved by the tokenizer already) 71 - lowercase 72 """ 73 if x: 74 def removeescape(matchobj): 75 return matchobj.group(0)[1:]
76 x = Base.__simpleescapes(removeescape, x) 77 return x.lower() 78 else: 79 return x
80
81 - def _checkReadonly(self):
82 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly" 83 if hasattr(self, '_readonly') and self._readonly: 84 raise xml.dom.NoModificationAllowedErr( 85 u'%s is readonly.' % self.__class__) 86 return True 87 return False
88
89 - def _splitNamespacesOff(self, text_namespaces_tuple):
90 """ 91 returns tuple (text, dict-of-namespaces) or if no namespaces are 92 in cssText returns (cssText, {}) 93 94 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and 95 CSSStyleSheet 96 """ 97 if isinstance(text_namespaces_tuple, tuple): 98 return text_namespaces_tuple[0], _SimpleNamespaces( 99 text_namespaces_tuple[1]) 100 else: 101 return text_namespaces_tuple, _SimpleNamespaces()
102
103 - def _tokenize2(self, textortokens):
104 """ 105 returns tokens of textortokens which may already be tokens in which 106 case simply returns input 107 """ 108 if not textortokens: 109 return None 110 elif isinstance(textortokens, basestring): 111 # needs to be tokenized 112 return self.__tokenizer2.tokenize( 113 textortokens) 114 elif types.GeneratorType == type(textortokens): 115 # already tokenized 116 return textortokens 117 elif isinstance(textortokens, tuple): 118 # a single token (like a comment) 119 return [textortokens] 120 else: 121 # already tokenized but return generator 122 return (x for x in textortokens)
123
124 - def _nexttoken(self, tokenizer, default=None):
125 "returns next token in generator tokenizer or the default value" 126 try: 127 return tokenizer.next() 128 except (StopIteration, AttributeError): 129 return default
130
131 - def _type(self, token):
132 "returns type of Tokenizer token" 133 if token: 134 return token[0] 135 else: 136 return None
137
138 - def _tokenvalue(self, token, normalize=False):
139 "returns value of Tokenizer token" 140 if token and normalize: 141 return Base._normalize(token[1]) 142 elif token: 143 return token[1] 144 else: 145 return None
146
147 - def _stringtokenvalue(self, token):
148 """ 149 for STRING returns the actual content without surrounding "" or '' 150 and without respective escapes, e.g.:: 151 152 "with \" char" => with " char 153 """ 154 if token: 155 value = token[1] 156 return value.replace('\\'+value[0], value[0])[1:-1] 157 else: 158 return None
159
160 - def _uritokenvalue(self, token):
161 """ 162 for URI returns the actual content without surrounding url() 163 or url(""), url('') and without respective escapes, e.g.:: 164 165 url("\"") => " 166 """ 167 if token: 168 value = token[1][4:-1].strip() 169 if (value[0] in '\'"') and (value[0] == value[-1]): 170 # a string "..." or '...' 171 value = value.replace('\\'+value[0], value[0])[1:-1] 172 return value 173 else: 174 return None
175
176 - def _tokensupto2(self, 177 tokenizer, 178 starttoken=None, 179 blockstartonly=False, # { 180 blockendonly=False, # } 181 mediaendonly=False, 182 importmediaqueryendonly=False, # ; or STRING 183 mediaqueryendonly=False, # { or STRING 184 semicolon=False, # ; 185 propertynameendonly=False, # : 186 propertyvalueendonly=False, # ! ; } 187 propertypriorityendonly=False, # ; } 188 selectorattendonly=False, # ] 189 funcendonly=False, # ) 190 listseponly=False, # , 191 separateEnd=False # returns (resulttokens, endtoken) 192 ):
193 """ 194 returns tokens upto end of atrule and end index 195 end is defined by parameters, might be ; } ) or other 196 197 default looks for ending "}" and ";" 198 """ 199 ends = u';}' 200 endtypes = () 201 brace = bracket = parant = 0 # {}, [], () 202 203 if blockstartonly: # { 204 ends = u'{' 205 brace = -1 # set to 0 with first { 206 elif blockendonly: # } 207 ends = u'}' 208 brace = 1 209 elif mediaendonly: # } 210 ends = u'}' 211 brace = 1 # rules } and mediarules } 212 elif importmediaqueryendonly: 213 # end of mediaquery which may be ; or STRING 214 ends = u';' 215 endtypes = ('STRING',) 216 elif mediaqueryendonly: 217 # end of mediaquery which may be { or STRING 218 # special case, see below 219 ends = u'{' 220 brace = -1 # set to 0 with first { 221 endtypes = ('STRING',) 222 elif semicolon: 223 ends = u';' 224 elif propertynameendonly: # : and ; in case of an error 225 ends = u':;' 226 elif propertyvalueendonly: # ; or !important 227 ends = u';!' 228 elif propertypriorityendonly: # ; 229 ends = u';' 230 elif selectorattendonly: # ] 231 ends = u']' 232 if starttoken and self._tokenvalue(starttoken) == u'[': 233 bracket = 1 234 elif funcendonly: # ) 235 ends = u')' 236 parant = 1 237 elif listseponly: # , 238 ends = u',' 239 240 resulttokens = [] 241 if starttoken: 242 resulttokens.append(starttoken) 243 if tokenizer: 244 for token in tokenizer: 245 typ, val, line, col = token 246 if 'EOF' == typ: 247 resulttokens.append(token) 248 break 249 if u'{' == val: 250 brace += 1 251 elif u'}' == val: 252 brace -= 1 253 elif u'[' == val: 254 bracket += 1 255 elif u']' == val: 256 bracket -= 1 257 # function( or single ( 258 elif u'(' == val or \ 259 Base._prods.FUNCTION == typ: 260 parant += 1 261 elif u')' == val: 262 parant -= 1 263 264 resulttokens.append(token) 265 266 if (brace == bracket == parant == 0) and ( 267 val in ends or typ in endtypes): 268 break 269 elif mediaqueryendonly and brace == -1 and ( 270 bracket == parant == 0) and typ in endtypes: 271 # mediaqueryendonly with STRING 272 break 273 274 if separateEnd: 275 # TODO: use this method as generator, then this makes sense 276 if resulttokens: 277 return resulttokens[:-1], resulttokens[-1] 278 else: 279 return resulttokens, None 280 else: 281 return resulttokens
282
283 - def _valuestr(self, t):
284 """ 285 returns string value of t (t may be a string, a list of token tuples 286 or a single tuple in format (type, value, line, col). 287 Mainly used to get a string value of t for error messages. 288 """ 289 if not t: 290 return u'' 291 elif isinstance(t, basestring): 292 return t 293 else: 294 return u''.join([x[1] for x in t])
295
296 - def _adddefaultproductions(self, productions, new=None):
297 """ 298 adds default productions if not already present, used by 299 _parse only 300 301 each production should return the next expected token 302 normaly a name like "uri" or "EOF" 303 some have no expectation like S or COMMENT, so simply return 304 the current value of self.__expected 305 """ 306 def ATKEYWORD(expected, seq, token, tokenizer=None): 307 "TODO: add default impl for unexpected @rule?" 308 if expected != 'EOF': 309 # TODO: parentStyleSheet=self 310 rule = cssutils.css.CSSUnknownRule() 311 rule.cssText = self._tokensupto2(tokenizer, token) 312 if rule.wellformed: 313 seq.append(rule) 314 return expected 315 else: 316 new['wellformed'] = False 317 self._log.error(u'Expected EOF.', token=token) 318 return expected
319 320 def COMMENT(expected, seq, token, tokenizer=None): 321 "default implementation for COMMENT token adds CSSCommentRule" 322 seq.append(cssutils.css.CSSComment([token])) 323 return expected 324 325 def S(expected, seq, token, tokenizer=None): 326 "default implementation for S token, does nothing" 327 return expected 328 329 def EOF(expected=None, seq=None, token=None, tokenizer=None): 330 "default implementation for EOF token" 331 return 'EOF' 332 333 p = {'ATKEYWORD': ATKEYWORD, 334 'COMMENT': COMMENT, 335 'S': S, 336 'EOF': EOF # only available if fullsheet 337 } 338 p.update(productions) 339 return p 340
341 - def _parse(self, expected, seq, tokenizer, productions, default=None, 342 new=None):
343 """ 344 puts parsed tokens in seq by calling a production with 345 (seq, tokenizer, token) 346 347 expected 348 a name what token or value is expected next, e.g. 'uri' 349 seq 350 to add rules etc to 351 tokenizer 352 call tokenizer.next() to get next token 353 productions 354 callbacks {tokentype: callback} 355 default 356 default callback if tokentype not in productions 357 new 358 used to init default productions 359 360 returns (wellformed, expected) which the last prod might have set 361 """ 362 wellformed = True 363 if tokenizer: 364 prods = self._adddefaultproductions(productions, new) 365 for token in tokenizer: 366 p = prods.get(token[0], default) 367 if p: 368 expected = p(expected, seq, token, tokenizer) 369 else: 370 wellformed = False 371 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token) 372 return wellformed, expected
373
374 375 -class Base2(Base):
376 """ 377 Base class for new seq handling, used by Selector for now only 378 """
379 - def __init__(self):
380 self._seq = Seq()
381
382 - def _setSeq(self, newseq):
383 """ 384 sets newseq and makes it readonly 385 """ 386 newseq._readonly = True 387 self._seq = newseq
388 389 seq = property(lambda self: self._seq, doc="seq for most classes") 390
391 - def _tempSeq(self, readonly=False):
392 "get a writeable Seq() which is added later" 393 return Seq(readonly=readonly)
394
395 - def _adddefaultproductions(self, productions, new=None):
396 """ 397 adds default productions if not already present, used by 398 _parse only 399 400 each production should return the next expected token 401 normaly a name like "uri" or "EOF" 402 some have no expectation like S or COMMENT, so simply return 403 the current value of self.__expected 404 """ 405 def ATKEYWORD(expected, seq, token, tokenizer=None): 406 "default impl for unexpected @rule" 407 if expected != 'EOF': 408 # TODO: parentStyleSheet=self 409 rule = cssutils.css.CSSUnknownRule() 410 rule.cssText = self._tokensupto2(tokenizer, token) 411 if rule.wellformed: 412 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE, 413 line=token[2], col=token[3]) 414 return expected 415 else: 416 new['wellformed'] = False 417 self._log.error(u'Expected EOF.', token=token) 418 return expected
419 420 def COMMENT(expected, seq, token, tokenizer=None): 421 "default impl, adds CSSCommentRule if not token == EOF" 422 if expected == 'EOF': 423 new['wellformed'] = False 424 self._log.error(u'Expected EOF but found comment.', token=token) 425 seq.append(cssutils.css.CSSComment([token]), 'COMMENT') 426 return expected
427 428 def S(expected, seq, token, tokenizer=None): 429 "default impl, does nothing if not token == EOF" 430 if expected == 'EOF': 431 new['wellformed'] = False 432 self._log.error(u'Expected EOF but found whitespace.', token=token) 433 return expected 434 435 def EOF(expected=None, seq=None, token=None, tokenizer=None): 436 "default implementation for EOF token" 437 return 'EOF' 438 439 defaultproductions = {'ATKEYWORD': ATKEYWORD, 440 'COMMENT': COMMENT, 441 'S': S, 442 'EOF': EOF # only available if fullsheet 443 } 444 defaultproductions.update(productions) 445 return defaultproductions 446
447 448 -class Seq(object):
449 """ 450 property seq of Base2 inheriting classes, holds a list of Item objects. 451 452 used only by Selector for now 453 454 is normally readonly, only writable during parsing 455 """
456 - def __init__(self, readonly=True):
457 """ 458 only way to write to a Seq is to initialize it with new items 459 each itemtuple has (value, type, line) where line is optional 460 """ 461 self._seq = [] 462 self._readonly = readonly
463
464 - def __delitem__(self, i):
465 del self._seq[i]
466
467 - def __getitem__(self, i):
468 return self._seq[i]
469
470 - def __setitem__(self, i, (val, typ, line, col)):
471 self._seq[i] = Item(val, typ, line, col)
472
473 - def __iter__(self):
474 return iter(self._seq)
475
476 - def __len__(self):
477 return len(self._seq)
478
479 - def append(self, val, typ, line=None, col=None):
480 "if not readonly add new Item()" 481 if self._readonly: 482 raise AttributeError('Seq is readonly.') 483 else: 484 self._seq.append(Item(val, typ, line, col))
485
486 - def appendItem(self, item):
487 "if not readonly add item which must be an Item" 488 if self._readonly: 489 raise AttributeError('Seq is readonly.') 490 else: 491 self._seq.append(item)
492
493 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
494 """ 495 if not readonly replace Item at index with new Item or 496 simply replace value or type 497 """ 498 if self._readonly: 499 raise AttributeError('Seq is readonly.') 500 else: 501 self._seq[index] = Item(val, typ, line, col)
502
503 - def __repr__(self):
504 "returns a repr same as a list of tuples of (value, type)" 505 return u'cssutils.%s.%s([\n %s])' % (self.__module__, 506 self.__class__.__name__, 507 u',\n '.join([u'(%r, %r)' % (item.type, item.value) 508 for item in self._seq] 509 ))
510 - def __str__(self):
511 return "<cssutils.%s.%s object length=%r at 0x%x>" % ( 512 self.__module__, self.__class__.__name__, len(self), id(self))
513
514 -class Item(object):
515 """ 516 an item in the seq list of classes (successor to tuple items in old seq) 517 518 each item has attributes: 519 520 type 521 a sematic type like "element", "attribute" 522 value 523 the actual value which may be a string, number etc or an instance 524 of e.g. a CSSComment 525 *line* 526 **NOT IMPLEMENTED YET, may contain the line in the source later** 527 """
528 - def __init__(self, value, type, line=None, col=None):
529 self.__value = value 530 self.__type = type 531 self.__line = line 532 self.__col = col
533 534 type = property(lambda self: self.__type) 535 value = property(lambda self: self.__value) 536 line = property(lambda self: self.__line) 537 col = property(lambda self: self.__col) 538
539 - def __repr__(self):
540 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % ( 541 self.__module__, self.__class__.__name__, 542 self.__value, self.__type, self.__line, self.__col)
543
544 545 -class ListSeq(object):
546 """ 547 (EXPERIMENTAL) 548 A base class used for list classes like css.SelectorList or 549 stylesheets.MediaList 550 551 adds list like behaviour running on inhering class' property ``seq`` 552 553 - item in x => bool 554 - len(x) => integer 555 - get, set and del x[i] 556 - for item in x 557 - append(item) 558 559 some methods must be overwritten in inheriting class 560 """
561 - def __init__(self):
562 self.seq = [] # does not need to use ``Seq`` as simple list only
563
564 - def __contains__(self, item):
565 return item in self.seq
566
567 - def __delitem__(self, index):
568 del self.seq[index]
569
570 - def __getitem__(self, index):
571 return self.seq[index]
572
573 - def __iter__(self):
574 def gen(): 575 for x in self.seq: 576 yield x
577 return gen()
578
579 - def __len__(self):
580 return len(self.seq)
581
582 - def __setitem__(self, index, item):
583 "must be overwritten" 584 raise NotImplementedError
585
586 - def append(self, item):
587 "must be overwritten" 588 raise NotImplementedError
589
590 591 -class _Namespaces(object):
592 """ 593 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet. 594 Works on effective namespaces, so e.g. if:: 595 596 @namespace p1 "uri"; 597 @namespace p2 "uri"; 598 599 only the second rule is effective and kept. 600 601 namespaces 602 a dictionary {prefix: namespaceURI} containing the effective namespaces 603 only. These are the latest set in the CSSStyleSheet. 604 parentStyleSheet 605 the parent CSSStyleSheet 606 """
607 - def __init__(self, parentStyleSheet, *args):
608 "no initial values are set, only the relevant sheet is" 609 self.parentStyleSheet = parentStyleSheet
610
611 - def __contains__(self, prefix):
612 return prefix in self.namespaces
613
614 - def __delitem__(self, prefix):
615 """deletes CSSNamespaceRule(s) with rule.prefix == prefix 616 617 prefix '' and None are handled the same 618 """ 619 if not prefix: 620 prefix = u'' 621 delrule = self.__findrule(prefix) 622 for i, rule in enumerate(ifilter(lambda r: r.type == r.NAMESPACE_RULE, 623 self.parentStyleSheet.cssRules)): 624 if rule == delrule: 625 self.parentStyleSheet.deleteRule(i) 626 return 627 628 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
629
630 - def __getitem__(self, prefix):
631 try: 632 return self.namespaces[prefix] 633 except KeyError, e: 634 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
635
636 - def __iter__(self):
637 return self.namespaces.__iter__()
638
639 - def __len__(self):
640 return len(self.namespaces)
641
642 - def __setitem__(self, prefix, namespaceURI):
643 "replaces prefix or sets new rule, may raise NoModificationAllowedErr" 644 if not prefix: 645 prefix = u'' # None or '' 646 rule = self.__findrule(prefix) 647 if not rule: 648 self.parentStyleSheet.insertRule(cssutils.css.CSSNamespaceRule( 649 prefix=prefix, 650 namespaceURI=namespaceURI), 651 inOrder=True) 652 else: 653 if prefix in self.namespaces: 654 rule.namespaceURI = namespaceURI # raises NoModificationAllowedErr 655 if namespaceURI in self.namespaces.values(): 656 rule.prefix = prefix
657
658 - def __findrule(self, prefix):
659 # returns namespace rule where prefix == key 660 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 661 reversed(self.parentStyleSheet.cssRules)): 662 if rule.prefix == prefix: 663 return rule
664
665 - def __getNamespaces(self):
666 namespaces = {} 667 for rule in ifilter(lambda r: r.type == r.NAMESPACE_RULE, 668 reversed(self.parentStyleSheet.cssRules)): 669 if rule.namespaceURI not in namespaces.values(): 670 namespaces[rule.prefix] = rule.namespaceURI 671 return namespaces
672 673 namespaces = property(__getNamespaces, 674 doc=u'Holds only effective @namespace rules in self.parentStyleSheets' 675 '@namespace rules.') 676
677 - def get(self, prefix, default):
678 return self.namespaces.get(prefix, default)
679
680 - def items(self):
681 return self.namespaces.items()
682
683 - def keys(self):
684 return self.namespaces.keys()
685
686 - def values(self):
687 return self.namespaces.values()
688
689 - def prefixForNamespaceURI(self, namespaceURI):
690 """ 691 returns effective prefix for given namespaceURI or raises IndexError 692 if this cannot be found""" 693 for prefix, uri in self.namespaces.items(): 694 if uri == namespaceURI: 695 return prefix 696 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
697
698 - def __str__(self):
699 return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % ( 700 self.__class__.__name__, str(self.parentStyleSheet), id(self))
701
702 703 -class _SimpleNamespaces(_Namespaces):
704 """ 705 namespaces used in objects like Selector as long as they are not connected 706 to a CSSStyleSheet 707 """
708 - def __init__(self, *args):
709 self.__namespaces = dict(*args)
710
711 - def __setitem__(self, prefix, namespaceURI):
712 self.__namespaces[prefix] = namespaceURI
713 714 namespaces = property(lambda self: self.__namespaces, 715 doc=u'Dict Wrapper for self.sheets @namespace rules.') 716
717 - def __str__(self):
718 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % ( 719 self.__class__.__name__, self.namespaces, id(self))
720
721 - def __repr__(self):
722 return u"cssutils.util.%s(%r)" % (self.__class__.__name__, 723 self.namespaces)
724
725 726 -def _defaultFetcher(url):
727 """Retrieve data from ``url``. cssutils default implementation of fetch 728 URL function. 729 730 Returns ``(encoding, string)`` or ``None`` 731 """ 732 try: 733 res = urllib2.urlopen(url) 734 except WindowsError, e: 735 # e.g if file URL and not found 736 cssutils.log.warn(e, error=WindowsError) 737 except (WindowsError, ValueError), e: 738 # invalid url, e.g. "1" 739 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError) 740 except urllib2.HTTPError, e: 741 # http error, e.g. 404, e can be raised 742 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' % 743 (url, e.code, e.msg), error=e) 744 except urllib2.URLError, e: 745 # URLError like mailto: or other IO errors, e can be raised 746 cssutils.log.warn(u'URLError, %s' % e.reason, error=e) 747 else: 748 if res: 749 mimeType, encoding = encutils.getHTTPInfo(res) 750 if mimeType != u'text/css': 751 cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' % 752 (url, mimeType), error=ValueError) 753 return encoding, res.read()
754
755 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
756 """ 757 Read cssText from url and decode it using all relevant methods (HTTP 758 header, BOM, @charset). Returns encoding (which is needed to set encoding 759 of stylesheet properly) and decoded text 760 761 ``fetcher`` 762 see cssutils.registerFetchUrl for details 763 ``overrideEncoding`` 764 If given this encoding is used and all other encoding information is 765 ignored (HTTP, BOM etc) 766 ``parentEncoding`` 767 Encoding of parent stylesheet (while e.g. reading @import references sheets) 768 or document if available. 769 770 Priority or encoding information 771 -------------------------------- 772 **cssutils only**: overrideEncoding 773 774 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols) 775 2. BOM and/or @charset (see below) 776 3. <link charset=""> or other metadata from the linking mechanism (if any) 777 4. charset of referring style sheet or document (if any) 778 5. Assume UTF-8 779 780 """ 781 if not fetcher: 782 fetcher = _defaultFetcher 783 r = fetcher(url) 784 if r and len(r) == 2 and r[1] is not None: 785 httpEncoding, content = r 786 UTF8_BOM = u'\xEF\xBB\xBF' 787 788 if overrideEncoding: 789 # 0. override encoding 790 encoding = overrideEncoding 791 elif httpEncoding: 792 # 1. HTTP 793 encoding = httpEncoding 794 else: 795 try: 796 if content.startswith(u'@charset "utf-8";') or \ 797 content.startswith(UTF8_BOM + u'@charset "utf-8";'): 798 # 2. BOM/@charset: explicitly UTF-8 799 contentEncoding = 'utf-8' 800 else: 801 # other encoding with ascii content as not UnicodeDecodeError 802 contentEncoding = False 803 except UnicodeDecodeError, e: 804 # other encoding in any way (with other than ascii content) 805 contentEncoding = False 806 807 if contentEncoding: 808 encoding = contentEncoding 809 else: 810 # contentEncoding may be UTF-8 but this may not be explicit 811 (contentEncoding, explicit) = cssutils.codec.detectencoding_str(content) 812 # contentEncoding may be None for empty string! 813 if contentEncoding and explicit: 814 # 2. BOM/@charset: explicitly not UTF-8 815 encoding = contentEncoding 816 else: 817 # 4. parent stylesheet or document 818 # may also be None in which case 5. is used in next step anyway 819 encoding = parentEncoding 820 try: 821 # encoding may still be wrong if encoding *is lying*! 822 decodedContent = codecs.lookup("css")[1](content, encoding=encoding)[0] 823 except UnicodeDecodeError, e: 824 cssutils.log.warn(e, neverraise=True) 825 decodedContent = None 826 827 return encoding, decodedContent 828 else: 829 return None, None
830