1 """base classes and helper functions for css and stylesheets packages
2 """
3 __all__ = []
4 __docformat__ = 'restructuredtext'
5 __version__ = '$Id: util.py 1345 2008-07-09 20:51:59Z cthedot $'
6
7 import codecs
8 from itertools import ifilter
9 import re
10 import types
11 import urllib2
12 import xml.dom
13 import cssutils
14 from tokenize2 import Tokenizer
15
16 import encutils
17
18 -class Base(object):
19 """
20 Base class for most CSS and StyleSheets classes
21
22 **Superceded by Base2 which is used for new seq handling class.**
23 See cssutils.util.Base2
24
25 Contains helper methods for inheriting classes helping parsing
26
27 ``_normalize`` is static as used by Preferences.
28 """
29 __tokenizer2 = Tokenizer()
30
31 _log = cssutils.log
32 _prods = cssutils.tokenize2.CSSProductions
33
34
35
36
37 _SHORTHANDPROPERTIES = {
38 u'background': [],
39 u'border': [],
40 u'border-left': [],
41 u'border-right': [],
42 u'border-top': [],
43 u'border-bottom': [],
44 u'border-color': [],
45 u'border-style': [],
46 u'border-width': [],
47 u'cue': [],
48 u'font': [],
49
50
51
52
53 u'list-style': [],
54 u'margin': [],
55 u'outline': [],
56 u'padding': [],
57 u'pause': []
58 }
59
60
61 __simpleescapes = re.compile(ur'(\\[^0-9a-fA-F])').sub
62
63 @staticmethod
65 """
66 normalizes x, namely:
67
68 - remove any \ before non unicode sequences (0-9a-zA-Z) so for
69 x=="c\olor\" return "color" (unicode escape sequences should have
70 been resolved by the tokenizer already)
71 - lowercase
72 """
73 if x:
74 def removeescape(matchobj):
75 return matchobj.group(0)[1:]
76 x = Base.__simpleescapes(removeescape, x)
77 return x.lower()
78 else:
79 return x
80
82 "raises xml.dom.NoModificationAllowedErr if rule/... is readonly"
83 if hasattr(self, '_readonly') and self._readonly:
84 raise xml.dom.NoModificationAllowedErr(
85 u'%s is readonly.' % self.__class__)
86 return True
87 return False
88
90 """
91 returns tuple (text, dict-of-namespaces) or if no namespaces are
92 in cssText returns (cssText, {})
93
94 used in Selector, SelectorList, CSSStyleRule, CSSMediaRule and
95 CSSStyleSheet
96 """
97 if isinstance(text_namespaces_tuple, tuple):
98 return text_namespaces_tuple[0], _SimpleNamespaces(
99 text_namespaces_tuple[1])
100 else:
101 return text_namespaces_tuple, _SimpleNamespaces()
102
104 """
105 returns tokens of textortokens which may already be tokens in which
106 case simply returns input
107 """
108 if not textortokens:
109 return None
110 elif isinstance(textortokens, basestring):
111
112 return self.__tokenizer2.tokenize(
113 textortokens)
114 elif types.GeneratorType == type(textortokens):
115
116 return textortokens
117 elif isinstance(textortokens, tuple):
118
119 return [textortokens]
120 else:
121
122 return (x for x in textortokens)
123
125 "returns next token in generator tokenizer or the default value"
126 try:
127 return tokenizer.next()
128 except (StopIteration, AttributeError):
129 return default
130
132 "returns type of Tokenizer token"
133 if token:
134 return token[0]
135 else:
136 return None
137
139 "returns value of Tokenizer token"
140 if token and normalize:
141 return Base._normalize(token[1])
142 elif token:
143 return token[1]
144 else:
145 return None
146
148 """
149 for STRING returns the actual content without surrounding "" or ''
150 and without respective escapes, e.g.::
151
152 "with \" char" => with " char
153 """
154 if token:
155 value = token[1]
156 return value.replace('\\'+value[0], value[0])[1:-1]
157 else:
158 return None
159
161 """
162 for URI returns the actual content without surrounding url()
163 or url(""), url('') and without respective escapes, e.g.::
164
165 url("\"") => "
166 """
167 if token:
168 value = token[1][4:-1].strip()
169 if (value[0] in '\'"') and (value[0] == value[-1]):
170
171 value = value.replace('\\'+value[0], value[0])[1:-1]
172 return value
173 else:
174 return None
175
176 - def _tokensupto2(self,
177 tokenizer,
178 starttoken=None,
179 blockstartonly=False,
180 blockendonly=False,
181 mediaendonly=False,
182 importmediaqueryendonly=False,
183 mediaqueryendonly=False,
184 semicolon=False,
185 propertynameendonly=False,
186 propertyvalueendonly=False,
187 propertypriorityendonly=False,
188 selectorattendonly=False,
189 funcendonly=False,
190 listseponly=False,
191 separateEnd=False
192 ):
193 """
194 returns tokens upto end of atrule and end index
195 end is defined by parameters, might be ; } ) or other
196
197 default looks for ending "}" and ";"
198 """
199 ends = u';}'
200 endtypes = ()
201 brace = bracket = parant = 0
202
203 if blockstartonly:
204 ends = u'{'
205 brace = -1
206 elif blockendonly:
207 ends = u'}'
208 brace = 1
209 elif mediaendonly:
210 ends = u'}'
211 brace = 1
212 elif importmediaqueryendonly:
213
214 ends = u';'
215 endtypes = ('STRING',)
216 elif mediaqueryendonly:
217
218
219 ends = u'{'
220 brace = -1
221 endtypes = ('STRING',)
222 elif semicolon:
223 ends = u';'
224 elif propertynameendonly:
225 ends = u':;'
226 elif propertyvalueendonly:
227 ends = u';!'
228 elif propertypriorityendonly:
229 ends = u';'
230 elif selectorattendonly:
231 ends = u']'
232 if starttoken and self._tokenvalue(starttoken) == u'[':
233 bracket = 1
234 elif funcendonly:
235 ends = u')'
236 parant = 1
237 elif listseponly:
238 ends = u','
239
240 resulttokens = []
241 if starttoken:
242 resulttokens.append(starttoken)
243 if tokenizer:
244 for token in tokenizer:
245 typ, val, line, col = token
246 if 'EOF' == typ:
247 resulttokens.append(token)
248 break
249 if u'{' == val:
250 brace += 1
251 elif u'}' == val:
252 brace -= 1
253 elif u'[' == val:
254 bracket += 1
255 elif u']' == val:
256 bracket -= 1
257
258 elif u'(' == val or \
259 Base._prods.FUNCTION == typ:
260 parant += 1
261 elif u')' == val:
262 parant -= 1
263
264 resulttokens.append(token)
265
266 if (brace == bracket == parant == 0) and (
267 val in ends or typ in endtypes):
268 break
269 elif mediaqueryendonly and brace == -1 and (
270 bracket == parant == 0) and typ in endtypes:
271
272 break
273
274 if separateEnd:
275
276 if resulttokens:
277 return resulttokens[:-1], resulttokens[-1]
278 else:
279 return resulttokens, None
280 else:
281 return resulttokens
282
284 """
285 returns string value of t (t may be a string, a list of token tuples
286 or a single tuple in format (type, value, line, col).
287 Mainly used to get a string value of t for error messages.
288 """
289 if not t:
290 return u''
291 elif isinstance(t, basestring):
292 return t
293 else:
294 return u''.join([x[1] for x in t])
295
297 """
298 adds default productions if not already present, used by
299 _parse only
300
301 each production should return the next expected token
302 normaly a name like "uri" or "EOF"
303 some have no expectation like S or COMMENT, so simply return
304 the current value of self.__expected
305 """
306 def ATKEYWORD(expected, seq, token, tokenizer=None):
307 "TODO: add default impl for unexpected @rule?"
308 if expected != 'EOF':
309
310 rule = cssutils.css.CSSUnknownRule()
311 rule.cssText = self._tokensupto2(tokenizer, token)
312 if rule.wellformed:
313 seq.append(rule)
314 return expected
315 else:
316 new['wellformed'] = False
317 self._log.error(u'Expected EOF.', token=token)
318 return expected
319
320 def COMMENT(expected, seq, token, tokenizer=None):
321 "default implementation for COMMENT token adds CSSCommentRule"
322 seq.append(cssutils.css.CSSComment([token]))
323 return expected
324
325 def S(expected, seq, token, tokenizer=None):
326 "default implementation for S token, does nothing"
327 return expected
328
329 def EOF(expected=None, seq=None, token=None, tokenizer=None):
330 "default implementation for EOF token"
331 return 'EOF'
332
333 p = {'ATKEYWORD': ATKEYWORD,
334 'COMMENT': COMMENT,
335 'S': S,
336 'EOF': EOF
337 }
338 p.update(productions)
339 return p
340
341 - def _parse(self, expected, seq, tokenizer, productions, default=None,
342 new=None):
343 """
344 puts parsed tokens in seq by calling a production with
345 (seq, tokenizer, token)
346
347 expected
348 a name what token or value is expected next, e.g. 'uri'
349 seq
350 to add rules etc to
351 tokenizer
352 call tokenizer.next() to get next token
353 productions
354 callbacks {tokentype: callback}
355 default
356 default callback if tokentype not in productions
357 new
358 used to init default productions
359
360 returns (wellformed, expected) which the last prod might have set
361 """
362 wellformed = True
363 if tokenizer:
364 prods = self._adddefaultproductions(productions, new)
365 for token in tokenizer:
366 p = prods.get(token[0], default)
367 if p:
368 expected = p(expected, seq, token, tokenizer)
369 else:
370 wellformed = False
371 self._log.error(u'Unexpected token (%s, %s, %s, %s)' % token)
372 return wellformed, expected
373
376 """
377 Base class for new seq handling, used by Selector for now only
378 """
381
383 """
384 sets newseq and makes it readonly
385 """
386 newseq._readonly = True
387 self._seq = newseq
388
389 seq = property(lambda self: self._seq, doc="seq for most classes")
390
392 "get a writeable Seq() which is added later"
393 return Seq(readonly=readonly)
394
396 """
397 adds default productions if not already present, used by
398 _parse only
399
400 each production should return the next expected token
401 normaly a name like "uri" or "EOF"
402 some have no expectation like S or COMMENT, so simply return
403 the current value of self.__expected
404 """
405 def ATKEYWORD(expected, seq, token, tokenizer=None):
406 "default impl for unexpected @rule"
407 if expected != 'EOF':
408
409 rule = cssutils.css.CSSUnknownRule()
410 rule.cssText = self._tokensupto2(tokenizer, token)
411 if rule.wellformed:
412 seq.append(rule, cssutils.css.CSSRule.UNKNOWN_RULE,
413 line=token[2], col=token[3])
414 return expected
415 else:
416 new['wellformed'] = False
417 self._log.error(u'Expected EOF.', token=token)
418 return expected
419
420 def COMMENT(expected, seq, token, tokenizer=None):
421 "default impl, adds CSSCommentRule if not token == EOF"
422 if expected == 'EOF':
423 new['wellformed'] = False
424 self._log.error(u'Expected EOF but found comment.', token=token)
425 seq.append(cssutils.css.CSSComment([token]), 'COMMENT')
426 return expected
427
428 def S(expected, seq, token, tokenizer=None):
429 "default impl, does nothing if not token == EOF"
430 if expected == 'EOF':
431 new['wellformed'] = False
432 self._log.error(u'Expected EOF but found whitespace.', token=token)
433 return expected
434
435 def EOF(expected=None, seq=None, token=None, tokenizer=None):
436 "default implementation for EOF token"
437 return 'EOF'
438
439 defaultproductions = {'ATKEYWORD': ATKEYWORD,
440 'COMMENT': COMMENT,
441 'S': S,
442 'EOF': EOF
443 }
444 defaultproductions.update(productions)
445 return defaultproductions
446
447
448 -class Seq(object):
449 """
450 property seq of Base2 inheriting classes, holds a list of Item objects.
451
452 used only by Selector for now
453
454 is normally readonly, only writable during parsing
455 """
457 """
458 only way to write to a Seq is to initialize it with new items
459 each itemtuple has (value, type, line) where line is optional
460 """
461 self._seq = []
462 self._readonly = readonly
463
466
469
472
474 return iter(self._seq)
475
477 return len(self._seq)
478
479 - def append(self, val, typ, line=None, col=None):
480 "if not readonly add new Item()"
481 if self._readonly:
482 raise AttributeError('Seq is readonly.')
483 else:
484 self._seq.append(Item(val, typ, line, col))
485
487 "if not readonly add item which must be an Item"
488 if self._readonly:
489 raise AttributeError('Seq is readonly.')
490 else:
491 self._seq.append(item)
492
493 - def replace(self, index=-1, val=None, typ=None, line=None, col=None):
494 """
495 if not readonly replace Item at index with new Item or
496 simply replace value or type
497 """
498 if self._readonly:
499 raise AttributeError('Seq is readonly.')
500 else:
501 self._seq[index] = Item(val, typ, line, col)
502
504 "returns a repr same as a list of tuples of (value, type)"
505 return u'cssutils.%s.%s([\n %s])' % (self.__module__,
506 self.__class__.__name__,
507 u',\n '.join([u'(%r, %r)' % (item.type, item.value)
508 for item in self._seq]
509 ))
511 return "<cssutils.%s.%s object length=%r at 0x%x>" % (
512 self.__module__, self.__class__.__name__, len(self), id(self))
513
515 """
516 an item in the seq list of classes (successor to tuple items in old seq)
517
518 each item has attributes:
519
520 type
521 a sematic type like "element", "attribute"
522 value
523 the actual value which may be a string, number etc or an instance
524 of e.g. a CSSComment
525 *line*
526 **NOT IMPLEMENTED YET, may contain the line in the source later**
527 """
528 - def __init__(self, value, type, line=None, col=None):
529 self.__value = value
530 self.__type = type
531 self.__line = line
532 self.__col = col
533
534 type = property(lambda self: self.__type)
535 value = property(lambda self: self.__value)
536 line = property(lambda self: self.__line)
537 col = property(lambda self: self.__col)
538
540 return "%s.%s(value=%r, type=%r, line=%r, col=%r)" % (
541 self.__module__, self.__class__.__name__,
542 self.__value, self.__type, self.__line, self.__col)
543
546 """
547 (EXPERIMENTAL)
548 A base class used for list classes like css.SelectorList or
549 stylesheets.MediaList
550
551 adds list like behaviour running on inhering class' property ``seq``
552
553 - item in x => bool
554 - len(x) => integer
555 - get, set and del x[i]
556 - for item in x
557 - append(item)
558
559 some methods must be overwritten in inheriting class
560 """
563
566
569
571 return self.seq[index]
572
574 def gen():
575 for x in self.seq:
576 yield x
577 return gen()
578
581
583 "must be overwritten"
584 raise NotImplementedError
585
587 "must be overwritten"
588 raise NotImplementedError
589
592 """
593 A dictionary like wrapper for @namespace rules used in a CSSStyleSheet.
594 Works on effective namespaces, so e.g. if::
595
596 @namespace p1 "uri";
597 @namespace p2 "uri";
598
599 only the second rule is effective and kept.
600
601 namespaces
602 a dictionary {prefix: namespaceURI} containing the effective namespaces
603 only. These are the latest set in the CSSStyleSheet.
604 parentStyleSheet
605 the parent CSSStyleSheet
606 """
607 - def __init__(self, parentStyleSheet, *args):
610
613
629
631 try:
632 return self.namespaces[prefix]
633 except KeyError, e:
634 raise xml.dom.NamespaceErr('Prefix %r not found.' % prefix)
635
638
641
657
664
672
673 namespaces = property(__getNamespaces,
674 doc=u'Holds only effective @namespace rules in self.parentStyleSheets'
675 '@namespace rules.')
676
677 - def get(self, prefix, default):
679
682
685
688
690 """
691 returns effective prefix for given namespaceURI or raises IndexError
692 if this cannot be found"""
693 for prefix, uri in self.namespaces.items():
694 if uri == namespaceURI:
695 return prefix
696 raise IndexError(u'NamespaceURI %r not found.' % namespaceURI)
697
699 return u"<cssutils.util.%s object parentStyleSheet=%r at 0x%x>" % (
700 self.__class__.__name__, str(self.parentStyleSheet), id(self))
701
704 """
705 namespaces used in objects like Selector as long as they are not connected
706 to a CSSStyleSheet
707 """
709 self.__namespaces = dict(*args)
710
713
714 namespaces = property(lambda self: self.__namespaces,
715 doc=u'Dict Wrapper for self.sheets @namespace rules.')
716
718 return u"<cssutils.util.%s object namespaces=%r at 0x%x>" % (
719 self.__class__.__name__, self.namespaces, id(self))
720
722 return u"cssutils.util.%s(%r)" % (self.__class__.__name__,
723 self.namespaces)
724
727 """Retrieve data from ``url``. cssutils default implementation of fetch
728 URL function.
729
730 Returns ``(encoding, string)`` or ``None``
731 """
732 try:
733 res = urllib2.urlopen(url)
734 except WindowsError, e:
735
736 cssutils.log.warn(e, error=WindowsError)
737 except (WindowsError, ValueError), e:
738
739 cssutils.log.warn(u'ValueError, %s' % e.message, error=ValueError)
740 except urllib2.HTTPError, e:
741
742 cssutils.log.warn(u'HTTPError opening url=%r: %s %s' %
743 (url, e.code, e.msg), error=e)
744 except urllib2.URLError, e:
745
746 cssutils.log.warn(u'URLError, %s' % e.reason, error=e)
747 else:
748 if res:
749 mimeType, encoding = encutils.getHTTPInfo(res)
750 if mimeType != u'text/css':
751 cssutils.log.error(u'Expected "text/css" mime type for url=%s but found: %r' %
752 (url, mimeType), error=ValueError)
753 return encoding, res.read()
754
755 -def _readUrl(url, fetcher=None, overrideEncoding=None, parentEncoding=None):
756 """
757 Read cssText from url and decode it using all relevant methods (HTTP
758 header, BOM, @charset). Returns encoding (which is needed to set encoding
759 of stylesheet properly) and decoded text
760
761 ``fetcher``
762 see cssutils.registerFetchUrl for details
763 ``overrideEncoding``
764 If given this encoding is used and all other encoding information is
765 ignored (HTTP, BOM etc)
766 ``parentEncoding``
767 Encoding of parent stylesheet (while e.g. reading @import references sheets)
768 or document if available.
769
770 Priority or encoding information
771 --------------------------------
772 **cssutils only**: overrideEncoding
773
774 1. An HTTP "charset" parameter in a "Content-Type" field (or similar parameters in other protocols)
775 2. BOM and/or @charset (see below)
776 3. <link charset=""> or other metadata from the linking mechanism (if any)
777 4. charset of referring style sheet or document (if any)
778 5. Assume UTF-8
779
780 """
781 if not fetcher:
782 fetcher = _defaultFetcher
783 r = fetcher(url)
784 if r and len(r) == 2 and r[1] is not None:
785 httpEncoding, content = r
786 UTF8_BOM = u'\xEF\xBB\xBF'
787
788 if overrideEncoding:
789
790 encoding = overrideEncoding
791 elif httpEncoding:
792
793 encoding = httpEncoding
794 else:
795 try:
796 if content.startswith(u'@charset "utf-8";') or \
797 content.startswith(UTF8_BOM + u'@charset "utf-8";'):
798
799 contentEncoding = 'utf-8'
800 else:
801
802 contentEncoding = False
803 except UnicodeDecodeError, e:
804
805 contentEncoding = False
806
807 if contentEncoding:
808 encoding = contentEncoding
809 else:
810
811 (contentEncoding, explicit) = cssutils.codec.detectencoding_str(content)
812
813 if contentEncoding and explicit:
814
815 encoding = contentEncoding
816 else:
817
818
819 encoding = parentEncoding
820 try:
821
822 decodedContent = codecs.lookup("css")[1](content, encoding=encoding)[0]
823 except UnicodeDecodeError, e:
824 cssutils.log.warn(e, neverraise=True)
825 decodedContent = None
826
827 return encoding, decodedContent
828 else:
829 return None, None
830