1 """
2 lxml-based doctest output comparison.
3
4 To use this you must call ``lxmldoctest.install()``, which will cause
5 doctest to use this in all subsequent calls.
6
7 This changes the way output is checked and comparisons are made for
8 XML or HTML-like content.
9
10 XML or HTML content is noticed because the example starts with ``<``
11 (it's HTML if it starts with ``<html``). You can also use the
12 ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
13
14 Some rough wildcard-like things are allowed. Whitespace is generally
15 ignored (except in attributes). In text (attributes and text in the
16 body) you can use ``...`` as a wildcard. In an example it also
17 matches any trailing tags in the element, though it does not match
18 leading tags. You may create a tag ``<any>`` or include an ``any``
19 attribute in the tag. An ``any`` tag matches any tag, while the
20 attribute matches any and all attributes.
21
22 When a match fails, the reformatted example and gotten text is
23 displayed (indented), and a rough diff-like output is given. Anything
24 marked with ``-`` is in the output but wasn't supposed to be, and
25 similarly ``+`` means its in the example but wasn't in the output.
26 """
27
28 from lxml import etree
29 from lxml.html import document_fromstring
30 import re
31 import doctest
32 import cgi
33
34 __all__ = ['PARSE_HTML', 'PARSE_XML', 'LXMLOutputChecker',
35 'LHTMLOutputChecker', 'install', 'temp_install']
36
37 PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
38 PARSE_XML = doctest.register_optionflag('PARSE_XML')
39
40 OutputChecker = doctest.OutputChecker
41
43 if v is None:
44 return None
45 else:
46 return v.strip()
47
50
51
52 _repr_re = re.compile(r'^<[^>]+ (at|object) ')
53 _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
54
56
57 empty_tags = (
58 'param', 'img', 'area', 'br', 'basefont', 'input',
59 'base', 'meta', 'link', 'col')
60
63
65 alt_self = getattr(self, '_temp_override_self', None)
66 if alt_self is not None:
67 super_method = self._temp_call_super_check_output
68 self = alt_self
69 else:
70 super_method = OutputChecker.check_output
71 parser = self.get_parser(want, got, optionflags)
72 if not parser:
73 return super_method(
74 self, want, got, optionflags)
75 try:
76 want_doc = parser(want)
77 except etree.XMLSyntaxError:
78 return False
79 try:
80 got_doc = parser(got)
81 except etree.XMLSyntaxError:
82 return False
83 return self.compare_docs(want_doc, got_doc)
84
98
100 s = s.strip()
101 return (s.startswith('<')
102 and not _repr_re.search(s))
103
105 if want.tag != got.tag and want.tag != 'any':
106 return False
107 if not self.text_compare(want.text, got.text, True):
108 return False
109 if not self.text_compare(want.tail, got.tail, True):
110 return False
111 if 'any' not in want.attrib:
112 want_keys = sorted(want.attrib.keys())
113 got_keys = sorted(got.attrib.keys())
114 if want_keys != got_keys:
115 return False
116 for key in want_keys:
117 if not self.text_compare(want.attrib[key], got.attrib[key], False):
118 return False
119 if want.text != '...' or len(want):
120 want_children = list(want)
121 got_children = list(got)
122 while want_children or got_children:
123 if not want_children or not got_children:
124 return False
125 want_first = want_children.pop(0)
126 got_first = got_children.pop(0)
127 if not self.compare_docs(want_first, got_first):
128 return False
129 if not got_children and want_first.tail == '...':
130 break
131 return True
132
133 - def text_compare(self, want, got, strip):
134 want = want or ''
135 got = got or ''
136 if strip:
137 want = norm_whitespace(want).strip()
138 got = norm_whitespace(got).strip()
139 want = '^%s$' % re.escape(want)
140 want = want.replace(r'\.\.\.', '.*')
141 if re.search(want, got):
142 return True
143 else:
144 return False
145
147 want = example.want
148 parser = self.get_parser(want, got, optionflags)
149 errors = []
150 if parser is not None:
151 try:
152 want_doc = parser(want)
153 except etree.XMLSyntaxError, e:
154 errors.append('In example: %s' % e)
155 try:
156 got_doc = parser(got)
157 except etree.XMLSyntaxError, e:
158 errors.append('In actual output: %s' % e)
159 if parser is None or errors:
160 value = OutputChecker.output_difference(
161 self, example, got, optionflags)
162 if errors:
163 errors.append(value)
164 return '\n'.join(errors)
165 else:
166 return value
167 html = parser is document_fromstring
168 diff_parts = []
169 diff_parts.append('Expected:')
170 diff_parts.append(self.format_doc(want_doc, html, 2))
171 diff_parts.append('Got:')
172 diff_parts.append(self.format_doc(got_doc, html, 2))
173 diff_parts.append('Diff:')
174 diff_parts.append(self.collect_diff(want_doc, got_doc, html, 2))
175 return '\n'.join(diff_parts)
176
178 if not html:
179 return False
180 if el.tag not in self.empty_tags:
181 return False
182 if el.text or len(el):
183
184 return False
185 return True
186
221
228
239
245
247 parts = []
248 if not len(want) and not len(got):
249 parts.append(' '*indent)
250 parts.append(self.collect_diff_tag(want, got))
251 if not self.html_empty_tag(got, html):
252 parts.append(self.collect_diff_text(want.text, got.text))
253 parts.append(self.collect_diff_end_tag(want, got))
254 parts.append(self.collect_diff_text(want.tail, got.tail))
255 parts.append('\n')
256 return ''.join(parts)
257 parts.append(' '*indent)
258 parts.append(self.collect_diff_tag(want, got))
259 parts.append('\n')
260 if strip(want.text) or strip(got.text):
261 parts.append(' '*indent)
262 parts.append(self.collect_diff_text(want.text, got.text))
263 parts.append('\n')
264 want_children = list(want)
265 got_children = list(got)
266 while want_children or got_children:
267 if not want_children:
268 parts.append(self.format_doc(got_children.pop(0), html, indent+2, '-'))
269 continue
270 if not got_children:
271 parts.append(self.format_doc(want_children.pop(0), html, indent+2, '+'))
272 continue
273 parts.append(self.collect_diff(
274 want_children.pop(0), got_children.pop(0), html, indent+2))
275 parts.append(' '*indent)
276 parts.append(self.collect_diff_end_tag(want, got))
277 parts.append('\n')
278 if strip(want.tail) or strip(got.tail):
279 parts.append(' '*indent)
280 parts.append(self.collect_diff_text(want.tail, got.tail))
281 parts.append('\n')
282 return ''.join(parts)
283
285 if want.tag != got.tag and want.tag != 'any':
286 tag = '%s (got: %s)' % (want.tag, got.tag)
287 else:
288 tag = got.tag
289 attrs = []
290 any = want.tag == 'any' or 'any' in want.attrib
291 for name, value in sorted(got.attrib.items()):
292 if name not in want.attrib and not any:
293 attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
294 else:
295 if name in want.attrib:
296 text = self.collect_diff_text(value, want.attrib[name], False)
297 else:
298 text = self.format_text(value, False)
299 attrs.append('%s="%s"' % (name, text))
300 if not any:
301 for name, value in sorted(want.attrib.items()):
302 if name in got.attrib:
303 continue
304 attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
305 if attrs:
306 tag = '<%s %s>' % (tag, ' '.join(attrs))
307 else:
308 tag = '<%s>' % tag
309 return tag
310
312 if want.tag != got.tag:
313 tag = '%s (got: %s)' % (want.tag, got.tag)
314 else:
315 tag = got.tag
316 return '</%s>' % tag
317
318 - def collect_diff_text(self, want, got, strip=True):
319 if self.text_compare(want, got, strip):
320 if not got:
321 return ''
322 return self.format_text(got, strip)
323 text = '%s (got: %s)' % (want, got)
324 return self.format_text(text, strip)
325
329
331 """
332 Install doctestcompare for all future doctests.
333
334 If html is true, then by default the HTML parser will be used;
335 otherwise the XML parser is used.
336 """
337 if html:
338 doctest.OutputChecker = LHTMLOutputChecker
339 else:
340 doctest.OutputChecker = LXMLOutputChecker
341
343 """
344 Use this *inside* a doctest to enable this checker for this
345 doctest only.
346
347 If html is true, then by default the HTML parser will be used;
348 otherwise the XML parser is used.
349 """
350 if html:
351 Checker = LHTMLOutputChecker
352 else:
353 Checker = LXMLOutputChecker
354 frame = _find_doctest_frame()
355 dt_self = frame.f_locals['self']
356 checker = Checker()
357 old_checker = dt_self._checker
358 dt_self._checker = checker
359
360
361
362
363
364
365
366
367
368 check_func = frame.f_locals['check'].im_func
369
370
371 doctest.etree = etree
372 _RestoreChecker(dt_self, old_checker, checker,
373 check_func, checker.check_output.im_func,
374 del_module)
375
377 - def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
378 del_module):
379 self.dt_self = dt_self
380 self.checker = old_checker
381 self.checker._temp_call_super_check_output = self.call_super
382 self.checker._temp_override_self = new_checker
383 self.check_func = check_func
384 self.clone_func = clone_func
385 self.del_module = del_module
386 self.install_clone()
387 self.install_dt_self()
389 self.func_code = self.check_func.func_code
390 self.func_globals = self.check_func.func_globals
391 self.check_func.func_code = self.clone_func.func_code
393 self.check_func.func_code = self.func_code
395 self.prev_func = self.dt_self._DocTestRunner__record_outcome
396 self.dt_self._DocTestRunner__record_outcome = self
398 self.dt_self._DocTestRunner__record_outcome = self.prev_func
400 if self.del_module:
401 import sys
402 del sys.modules[self.del_module]
403 if '.' in self.del_module:
404 package, module = self.del_module.rsplit('.', 1)
405 package_mod = sys.modules[package]
406 delattr(package_mod, module)
421
423 import sys
424 frame = sys._getframe(1)
425 while frame:
426 l = frame.f_locals
427 if 'BOOM' in l:
428
429 return frame
430 frame = frame.f_back
431 raise LookupError(
432 "Could not find doctest (only use this function *inside* a doctest)")
433