Source code for aglyph.compat.ipyetree

# -*- coding: UTF-8 -*-

# Copyright (c) 2006-2015 Matthew Zipay <mattz@ninthtest.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

"""This module defines an :class:`xml.etree.ElementTree.XMLParser` that
delegates to the .NET
`System.Xml.XmlReader
<http://msdn.microsoft.com/en-us/library/system.xml.xmlreader>`_ XML
parser to parse an Aglyph XML context document.

`IronPython <http://ironpython.net/>`_ is not able to load CPython's
:mod:`xml.parsers.expat` module, and so the default parser used by
ElementTree does not exist.

.. versionadded:: 2.0.0
   To address the missing :mod:`xml.parsers.expat` module, this module\
   now defines the :class:`CLRXMLParser` class, which replaces\
   :class:`XmlReaderTreeBuilder` and is used by\
   :class:`aglyph.context.XMLContext` as the default parser when\
   running under IronPython.

Alternatively, IronPython developers may wish to install ``expat`` or an
``expat``-compatible library as a site package. **However, this has not
been tested with Aglyph.**

"""

__author__ = "Matthew Zipay <mattz@ninthtest.net>"
__version__ = "2.1.0"

import logging
import platform
import warnings
import xml.etree.ElementTree as ET

from aglyph import AglyphDeprecationWarning, AglyphError
from aglyph.compat import DoctypeTreeBuilder, is_ironpython

__all__ = ["CLRXMLParser", "XmlReaderTreeBuilder"]

_logger = logging.getLogger(__name__)

if (is_ironpython):
    import clr

    clr.AddReference("System.Xml")

    from System.IO import StringReader
    from System.Text.RegularExpressions import Regex, RegexOptions
    from System.Xml import (
        DtdProcessing,
        ValidationType,
        XmlNodeType,
        XmlReader,
        XmlReaderSettings
    )

    _logger.info(
        "loaded System.Xml, System.IO, and System.Text CLR namespaces")

    CRE_ENCODING = Regex("encoding=['\"](?<enc_name>.*?)['\"]",
                         RegexOptions.Compiled)

    class CLRXMLParser(ET.XMLParser):
        """An :class:`xml.etree.ElementTree.XMLParser` that delegates
        parsing to the .NET `System.Xml.XmlReader
        <http://msdn.microsoft.com/en-us/library/system.xml.xmlreader>`_
        parser.

        """

        __logger = logging.getLogger("%s.CLRXMLParser" % __name__)

        def __init__(self, target=None, validating=False):
            """
            :param xml.etree.ElementTree.TreeBuilder target:\
               the target object (if omitted, a standard\
               ``TreeBuilder`` instance is used)
            :param bool validating:\
               specify ``True`` to use a validating parser

            """
            self.__logger.debug("TRACE target=%r, validating=%r",
                                target, validating)
            settings = XmlReaderSettings()
            settings.IgnoreComments = True
            settings.IgnoreProcessingInstructions = True
            settings.IgnoreWhitespace = True
            if (not validating):
                settings.DtdProcessing = DtdProcessing.Ignore
                settings.ValidationType = getattr(ValidationType, "None")
            else:
                settings.DtdProcessing = DtdProcessing.Parse
                settings.ValidationType = ValidationType.DTD
            self.settings = settings
            self.version = "%s %s" % (platform.platform(),
                                      platform.python_compiler())
            self.__logger.debug("ET parser version is %r", self.version)
            self._target = (target if (target is not None)
                            else DoctypeTreeBuilder())
            self._buffer = []
            self._document_encoding = "UTF-8"  # default
            self.__logger.debug("RETURN")

[docs] def feed(self, data): """Add more XML data to be parsed. :param str data: raw XML read from a stream .. note:: All *data* across calls to this method are buffered internally; the parser itself is not actually created until the :meth:`close` method is called. """ self._buffer.append(data)
[docs] def close(self): """Parse the XML from the internal buffer to build an element tree. :return: the root element of the XML document :rtype: :class:`xml.etree.ElementTree.ElementTree` """ self.__logger.debug("TRACE") xml_string = "".join(self._buffer) self._buffer = None reader = XmlReader.Create(StringReader(xml_string), self.settings) while (reader.Read()): if (reader.IsStartElement()): self._start_element(reader) elif (reader.NodeType in [XmlNodeType.Text, XmlNodeType.CDATA]): # decode the value first (see the comment for # 'self._document_encoding' and the docstring for # '_parse_xml_declaration(xml_decl)' self._target.data( reader.Value.decode(self._document_encoding)) elif (reader.NodeType == XmlNodeType.EndElement): self._target.end(reader.LocalName) elif (reader.NodeType == XmlNodeType.XmlDeclaration): self._parse_xml_declaration(reader.Value) return self._target.close() self.__logger.debug("RETURN")
def _parse_xml_declaration(self, xml_decl): """Parse the document encoding from *xml_decl*. :param str xml_decl: the document XML declaration *xml_decl* is reported by `System.Xml.XmlReader <http://msdn.microsoft.com/en-us/library/system.xml.xmlreader>`_ as a node of the type `XmlNodeType.XmlDeclaration <http://msdn.microsoft.com/en-us/library/system.xml.xmlnodetype>`_. """ self.__logger.debug("TRACE %r", xml_decl) enc_name = CRE_ENCODING.Match(xml_decl).Groups["enc_name"].Value if (enc_name): self.__logger.info("document encoding is %r", enc_name) self._document_encoding = enc_name self.__logger.debug("RETURN") def _start_element(self, reader): """Notify the tree builder that a start element has been encountered. *reader* is a reference to a .NET `System.Xml.XmlReader <http://msdn.microsoft.com/en-us/library/system.xml.xmlreader>`_. If the element is an empty element (e.g. ``<name/>``), the tree builder is also notified that the element has been closed. """ name = reader.LocalName attributes = {} while (reader.MoveToNextAttribute()): attributes[reader.Name] = reader.Value reader.MoveToElement() self._target.start(name, attributes) if (reader.IsEmptyElement): self._target.end(name) else: _logger.warn("not running under IronPython; .NET CLR is not available")
[docs] class CLRXMLParser(ET.XMLParser): """A dummy class that will throw :class:`aglyph.AglyphError` if instantiated. """ def __new__(self, *args, **keywords): raise AglyphError(".NET CLR is not available")
[docs]class XmlReaderTreeBuilder(CLRXMLParser): """Build an `ElementTree <http://effbot.org/zone/element-index.htm>`_ using the .NET `System.Xml.XmlReader <http://msdn.microsoft.com/en-us/library/system.xml.xmlreader>`_ XML parser. .. versionchanged:: 2.0.0 It is no longer necessary for IronPython applications to use\ this class explicitly. :class:`aglyph.context.XMLContext` now\ uses :class:`CLRXMLParser` by default if running under\ IronPython. .. deprecated:: 2.0.0 This class has been renamed to :class:`CLRXMLParser`.\ ``XmlReaderTreeBuilder`` will be **removed** in release 3.0.0. """ def __init__(self, validating=False): warnings.warn( AglyphDeprecationWarning( "aglyph.compat.ipyetree.XmlReaderTreeBuilder", replacement="aglyph.compat.ipyetree.CLRXMLParser")) super(XmlReaderTreeBuilder, self).__init__(validating=validating)