Package pyxb :: Package binding :: Module saxer
[hide private]
[frames] | no frames]

Source Code for Module pyxb.binding.saxer

  1  # Copyright 2009, Peter A. Bigot 
  2  # 
  3  # Licensed under the Apache License, Version 2.0 (the "License"); you may 
  4  # not use this file except in compliance with the License. You may obtain a 
  5  # copy of the License at: 
  6  # 
  7  #            http://www.apache.org/licenses/LICENSE-2.0 
  8  # 
  9  # Unless required by applicable law or agreed to in writing, software 
 10  # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 
 11  # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 
 12  # License for the specific language governing permissions and limitations 
 13  # under the License. 
 14   
 15  """This module contains support for generating bindings from an XML stream 
 16  using a SAX parser.""" 
 17   
 18  import xml.sax 
 19  import xml.sax.handler 
 20  import pyxb.namespace 
 21  import pyxb.utils.saxutils 
 22  import pyxb.utils.saxdom 
 23  import pyxb.utils.utility 
 24  import basis 
 25   
26 -class _SAXElementState (pyxb.utils.saxutils.SAXElementState):
27 """State required to generate bindings for a specific element. 28 29 If the document being parsed includes references to unrecognized elements, 30 a DOM instance of the element and its content is created and treated as a 31 wildcard element. 32 """ 33 34 # An expanded name corresponding to xsi:nil 35 __XSINilTuple = pyxb.namespace.XMLSchema_instance.createExpandedName('nil').uriTuple() 36 37 # The binding object being created for this element. When the 38 # element type has simple content, the binding instance cannot be 39 # created until the end of the element has been reached and the 40 # content of the element has been processed accumulated for use in 41 # the instance constructor. When the element type has complex 42 # content, the binding instance must be created at the start of 43 # the element, so contained elements can be properly stored. 44 __bindingObject = None 45 46 # The nearest enclosing complex type definition
47 - def enclosingCTD (self):
48 """The nearest enclosing complex type definition, as used for 49 resolving local element/attribute names. 50 51 @return: An instance of L{basis.complexTypeDefinition}, or C{None} if 52 the element is top-level 53 """ 54 return self.__enclosingCTD
55 __enclosingCTD = None 56 57 # The factory that is called to create a binding instance for this 58 # element; None if the binding instance was created at the start 59 # of the element. 60 __delayedConstructor = None 61 62 # An xml.sax.xmlreader.Attributes instance providing the 63 # attributes for the element. 64 __attributes = None 65 66 # An xml.dom.Node corresponding to the (sub-)document 67 __domDocument = None 68 69 __domDepth = None 70
71 - def __init__ (self, **kw):
72 super(_SAXElementState, self).__init__(**kw) 73 self.__bindingObject = None 74 parent_state = self.parentState() 75 if isinstance(parent_state, _SAXElementState): 76 self.__enclosingCTD = parent_state.enclosingCTD() 77 self.__domDocument = parent_state.__domDocument 78 if self.__domDocument is not None: 79 self.__domDepth = parent_state.__domDepth + 1
80
81 - def setEnclosingCTD (self, enclosing_ctd):
82 """Set the enclosing complex type definition for this element. 83 84 @param enclosing_ctd: The scope for a local element. 85 @type enclosing_ctd: L{basis.complexTypeDefinition} 86 @return: C{self} 87 """ 88 self.__enclosingCTD = enclosing_ctd
89 90 # Create the binding instance for this element.
91 - def __constructElement (self, new_object_factory, attrs, content=None):
92 kw = {} 93 94 # Note whether the node is marked nil 95 if attrs.has_key(self.__XSINilTuple): 96 kw['_nil'] = pyxb.binding.datatypes.boolean(attrs.getValue(self.__XSINilTuple)) 97 98 if content is None: 99 content = [] 100 self.__bindingObject = new_object_factory(*content, **kw) 101 if isinstance(self.__bindingObject, pyxb.utils.utility.Locatable_mixin): 102 self.__bindingObject._setLocation(self.location()) 103 104 # Record the namespace context so users of the binding can 105 # interpret QNames within the attributes and content. 106 self.__bindingObject._setNamespaceContext(self.__namespaceContext) 107 108 # Set the attributes. 109 if isinstance(self.__bindingObject, pyxb.binding.basis.complexTypeDefinition): 110 # NB: attrs implements the SAX AttributesNS interface, meaning 111 # that names are pairs of (namespaceURI, localName), just like we 112 # want them to be. 113 for attr_name in self.__attributes.getNames(): 114 attr_en = pyxb.namespace.ExpandedName(attr_name) 115 # Ignore xmlns and xsi attributes; we've already handled those 116 if attr_en.namespace() in ( pyxb.namespace.XMLNamespaces, pyxb.namespace.XMLSchema_instance ): 117 continue 118 au = self.__bindingObject._setAttribute(attr_en, attrs.getValue(attr_name)) 119 120 return self.__bindingObject
121
122 - def inDOMMode (self):
123 return self.__domDocument is not None
124
125 - def enterDOMMode (self, attrs):
126 """Actions upon first encountering an element for which we cannot create a binding. 127 128 Invoking this transitions the parser into DOM mode, creating a new DOM 129 document that will represent this element including its content.""" 130 assert not self.__domDocument 131 self.__domDocument = pyxb.utils.saxdom.Document(namespace_context=self.namespaceContext()) 132 self.__domDepth = 0 133 return self.startDOMElement(attrs)
134
135 - def startDOMElement (self, attrs):
136 """Actions upon entering an element that is part of a DOM subtree.""" 137 self.__domDepth += 1 138 #print 'Enter level %d with %s' % (self.__domDepth, self.expandedName()) 139 self.__attributes = pyxb.utils.saxdom.NamedNodeMap() 140 ns_ctx = self.namespaceContext() 141 for name in attrs.getNames(): 142 attr_en = pyxb.namespace.ExpandedName(name) 143 self.__attributes._addItem(pyxb.utils.saxdom.Attr(expanded_name=attr_en, namespace_context=ns_ctx, value=attrs.getValue(name), location=self.location()))
144
145 - def endDOMElement (self):
146 """Actions upon leaving an element that is part of a DOM subtree.""" 147 ns_ctx = self.namespaceContext() 148 element = pyxb.utils.saxdom.Element(namespace_context=ns_ctx, expanded_name=self.expandedName(), attributes=self.__attributes, location=self.location()) 149 for ( content, element_use, maybe_element ) in self.content(): 150 if isinstance(content, xml.dom.Node): 151 element.appendChild(content) 152 else: 153 element.appendChild(pyxb.utils.saxdom.Text(content, namespace_context=ns_ctx)) 154 #print 'Leaving level %d with %s' % (self.__domDepth, self.expandedName()) 155 self.__domDepth -= 1 156 if 0 == self.__domDepth: 157 self.__domDocument.appendChild(element) 158 #pyxb.utils.saxdom._DumpDOM(self.__domDocument) 159 self.__domDepth = None 160 self.__domDocument = None 161 parent_state = self.parentState() 162 parent_state.addElementContent(element, None) 163 return element
164
165 - def startBindingElement (self, type_class, new_object_factory, element_use, attrs):
166 """Actions upon entering an element that will produce a binding instance. 167 168 The element use is recorded. If the type is a subclass of 169 L{basis.simpleTypeDefinition}, a delayed constructor is recorded so 170 the binding instance can be created upon completion of the element; 171 otherwise, a binding instance is created and stored. The attributes 172 are used to initialize the binding instance (now, or upon element 173 end). 174 175 @param type_class: The Python type of the binding instance 176 @type type_class: subclass of L{basis._TypeBinding_mixin} 177 @param new_object_factory: A callable object that creates an instance of the C{type_class} 178 @param element_use: The element use with which the binding instance is associated. Will be C{None} for top-level elements 179 @type element_use: L{basis.element} 180 @param attrs: The XML attributes associated with the element 181 @type attrs: C{xml.sax.xmlreader.Attributes} 182 @return: The generated binding instance, or C{None} if creation is delayed 183 """ 184 self.__delayedConstructor = None 185 self.__elementUse = element_use 186 self.__attributes = attrs 187 if type_class._IsSimpleTypeContent(): 188 self.__delayedConstructor = new_object_factory 189 self.__attributes = attrs 190 else: 191 self.__constructElement(new_object_factory, attrs) 192 return self.__bindingObject
193
194 - def endBindingElement (self):
195 """Perform any end-of-element processing. 196 197 For simple type instances, this creates the binding instance. 198 @return: The generated binding instance 199 """ 200 if self.__delayedConstructor is not None: 201 args = [] 202 for (content, element_use, maybe_element) in self.__content: 203 assert not maybe_element 204 assert element_use is None 205 assert isinstance(content, basestring) 206 args.append(content) 207 assert 1 >= len(args), 'Unexpected STD content %s' % (args,) 208 self.__constructElement(self.__delayedConstructor, self.__attributes, args) 209 else: 210 #print 'Extending %s by content %s' % (self.__bindingObject, self.__content,) 211 for (content, element_use, maybe_element) in self.__content: 212 self.__bindingObject.append(content, element_use, maybe_element, require_validation=pyxb._ParsingRequiresValid) 213 parent_state = self.parentState() 214 if parent_state is not None: 215 parent_state.addElementContent(self.__bindingObject, self.__elementUse) 216 # As CreateFromDOM does, validate the resulting element 217 if pyxb._ParsingRequiresValid: 218 self.__bindingObject.validateBinding() 219 return self.__bindingObject
220
221 -class PyXBSAXHandler (pyxb.utils.saxutils.BaseSAXHandler):
222 """A SAX handler class which generates a binding instance for a document 223 through a streaming parser. 224 225 An example of using this to parse the document held in the string C{xmls} is:: 226 227 import pyxb.binding.saxer 228 import StringIO 229 230 saxer = pyxb.binding.saxer.make_parser() 231 handler = saxer.getContentHandler() 232 saxer.parse(StringIO.StringIO(xml)) 233 instance = handler.rootObject() 234 235 """ 236 237 # Whether invocation of handler methods should be traced 238 __trace = False 239 240 # An expanded name corresponding to xsi:type 241 __XSITypeTuple = pyxb.namespace.XMLSchema_instance.createExpandedName('type').uriTuple() 242 243 __domHandler = None 244 __domDepth = None 245
246 - def rootObject (self):
247 """Return the binding object corresponding to the top-most 248 element in the document 249 250 @return: An instance of L{basis._TypeBinding_mixin} (most usually a 251 L{basis.complexTypeDefinition}. 252 253 @raise pyxb.UnrecognizedElementError: No binding could be found to 254 match the top-level element in the document.""" 255 if not isinstance(self.__rootObject, basis._TypeBinding_mixin): 256 # Happens if the top-level element got processed as a DOM instance. 257 raise pyxb.UnrecognizedElementError(self.__rootObject) 258 return self.__rootObject
259 __rootObject = None 260
261 - def reset (self):
262 """Reset the state of the handler in preparation for processing a new 263 document. 264 265 @return: C{self} 266 """ 267 super(PyXBSAXHandler, self).reset() 268 self.__rootObject = None 269 return self
270
271 - def __init__ (self, **kw):
272 """Create a parser instance for converting XML to bindings. 273 274 @keyword element_state_constructor: Overridden with the value 275 L{_SAXElementState} before invoking the L{superclass 276 constructor<pyxb.utils.saxutils.BaseSAXHandler.__init__>}. 277 """ 278 279 kw.setdefault('element_state_constructor', _SAXElementState) 280 super(PyXBSAXHandler, self).__init__(**kw) 281 self.reset()
282
283 - def startElementNS (self, name, qname, attrs):
284 (this_state, parent_state, ns_ctx, name_en) = super(PyXBSAXHandler, self).startElementNS(name, qname, attrs) 285 286 # Delegate processing if in DOM mode 287 if this_state.inDOMMode(): 288 return this_state.startDOMElement(attrs) 289 290 # Start knowing nothing 291 type_class = None 292 293 # Process an xsi:type attribute, if present 294 if attrs.has_key(self.__XSITypeTuple): 295 xsi_type = attrs.getValue(self.__XSITypeTuple) 296 type_class = ns_ctx.interpretQName(xsi_type).typeBinding() 297 298 # Resolve the element within the appropriate context. Note 299 # that global elements have no use, only the binding. 300 if parent_state.enclosingCTD() is not None: 301 (element_binding, element_use) = parent_state.enclosingCTD()._ElementBindingUseForName(name_en) 302 else: 303 element_use = None 304 element_binding = name_en.elementBinding() 305 306 # Non-root elements should have an element use, from which we can 307 # extract the binding. (Keep any current binding, since it may be a 308 # member of a substitution group.) 309 if (element_use is not None) and (element_binding is None): 310 assert self.__rootObject is not None 311 element_binding = element_use.elementBinding() 312 assert element_binding is not None 313 314 # Get the factory method for the binding type for the element instance 315 if type_class is not None: 316 # @todo: validate xsi:type against abstract 317 new_object_factory = type_class.Factory 318 elif element_binding is None: 319 # Bother. We don't know what this thing is. But that's not an 320 # error, if the schema accepts wildcards. For consistency with 321 # the DOM-based interface, we need to build a DOM node. 322 return this_state.enterDOMMode(attrs) 323 else: 324 # Invoke binding __call__ method not Factory, so can check for 325 # abstract elements. 326 assert element_binding is not None 327 element_binding = element_binding.elementForName(name) 328 new_object_factory = element_binding 329 type_class = element_binding.typeDefinition() 330 331 # Update the enclosing complex type definition for this 332 # element state. 333 assert type_class is not None 334 if issubclass(type_class, pyxb.binding.basis.complexTypeDefinition): 335 this_state.setEnclosingCTD(type_class) 336 else: 337 this_state.setEnclosingCTD(parent_state.enclosingCTD()) 338 339 # Process the element start. This may or may not return a 340 # binding object. 341 binding_object = this_state.startBindingElement(type_class, new_object_factory, element_use, attrs) 342 343 # If the top-level element has complex content, this sets the 344 # root object. If it has simple content, see endElementNS. 345 if self.__rootObject is None: 346 self.__rootObject = binding_object
347
348 - def endElementNS (self, name, qname):
349 this_state = super(PyXBSAXHandler, self).endElementNS(name, qname) 350 if this_state.inDOMMode(): 351 # Delegate processing if in DOM mode. Note that completing this 352 # element may take us out of DOM mode. In any case, the returned 353 # binding object is a DOM element instance. 354 binding_object = this_state.endDOMElement() 355 else: 356 # Process the element end. This will return a binding object, 357 # either the one created at the start or the one created at 358 # the end. 359 binding_object = this_state.endBindingElement() 360 assert binding_object is not None 361 362 # If we don't have a root object, save it. No, there is not a 363 # problem doing this on the close of the element. If the 364 # top-level element has complex content, the object was 365 # created on start, and the root object has been assigned. If 366 # it has simple content, then there are no internal elements 367 # that could slip in and set this before we get to it here. 368 if self.__rootObject is None: 369 self.__rootObject = binding_object
370
371 -def make_parser (*args, **kw):
372 """Extend L{pyxb.utils.saxutils.make_parser} to change the default 373 C{content_handler_constructor} to be L{PyXBSAXHandler}. 374 """ 375 kw.setdefault('content_handler_constructor', PyXBSAXHandler) 376 return pyxb.utils.saxutils.make_parser(*args, **kw)
377 378 ## Local Variables: 379 ## fill-column:78 380 ## End: 381