Package pyxsd :: Module schemaBase
[hide private]
[frames] | no frames]

Source Code for Module pyxsd.schemaBase

  1  import sets 
  2   
3 -class SchemaBase(object):
4 """ 5 Serves as the base class for all schema type classes created. The pythonic instance tree is built from this class. 6 This class also contains the means to do non-fatal parser error checking. A little bit of the work this class does 7 is also done in pyXSD. The schema and xml file do not line up perfectly. The top level element in the schema and the 8 schema tag both contain information relevent to the top-level tag in the XML. For this reason, the tree 9 building/checking must be started in the same location the method `makeInstanceFromTag` is called in this class. 10 """ 11 12 #======================================================= 13 #
14 - def __init__(self):
15 """ 16 Creates the instances that are in the tree. These objects are initialized from within SchemaBase. 17 18 No parameters 19 """ 20 self._children_ = [] 21 22 self._value_ = None
23 24 #======================================================= 25 #
26 - def makeInstanceFromTag(cls, elementTag):
27 """ 28 A classmethod. It takes in a schema type class and its corresponding xml element. 29 It then instanciates the class. It adds a name from the name in the xml element, 30 and then it hands the instance and the element to other methods to add attributes, 31 elements, and values to this instance. It adds these according to the schema classes, 32 and not the element. A non-fatal (when possible) error is raised when the xml element 33 does not correspond to the schema class. 34 35 Parameters: 36 37 - `cls`- The schema type class to use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 38 - `elementTag`- The xml element that correspond to `cls` 39 40 """ 41 42 instance = cls() 43 44 instance._name_ = elementTag.tag.split('}')[-1] 45 46 cls.addAttributesTo(instance, elementTag) 47 cls.addElementsTo (instance, elementTag) 48 cls.addValueTo (instance, elementTag) 49 50 return instance
51 52 makeInstanceFromTag = classmethod(makeInstanceFromTag) 53 54 #======================================================= 55 #
56 - def addAttributesTo(cls, instance, elementTag):
57 58 """ 59 A classmethod. Called by `makeInstanceFromTag()`. Adds attributes according to the schema by calling 60 `getAttributesFromTag()`. The attributes are then checked. 61 62 parameters: 63 64 - `cls`- The schema type class to use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 65 - `instance` - The instance of `cls` that is having attributes added to it. 66 - `elementTag`- The xml element that correspond to `cls` 67 68 """ 69 70 tagsUsed = instance.getAttributesFromTag(elementTag) 71 72 instance.checkAttributes(tagsUsed, elementTag)
73 74 addAttributesTo = classmethod(addAttributesTo) 75 76 77 #======================================================= 78 #
79 - def getAttributesFromTag(self, elementTag):
80 81 """ 82 Adds attributes to the *_attribs_* dictionary in the instance. Only attributes in 83 the type classes are added. All of the attribute values are validated against 84 descriptors in the Attribute class in ElementRepresentatives. The only exception 85 to this proceedure is for namespace and schemaLocation tags, as the program 86 currently does not have any mechanism to actually check these. 87 88 Parameters: 89 90 - `elementTag`: the xml element that the instance represents 91 92 """ 93 94 self._attribs_ = {} 95 96 usedAttributes = [] 97 98 99 100 for attr in elementTag.attrib.keys(): 101 if 'xmlns' in attr or 'xsi:' in attr: 102 103 setattr(self,attr,elementTag.attrib[attr]) 104 105 usedAttributes.append(attr) 106 107 self._attribs_[attr] = elementTag.attrib[attr] 108 109 for name in self.descAttributeNames(): 110 111 if name in elementTag.attrib.keys(): 112 113 setattr(self,name,elementTag.attrib[name]) 114 115 usedAttributes.append(name) 116 117 self._attribs_[name] = elementTag.attrib[name] 118 119 return usedAttributes
120 121 #======================================================= 122 #
123 - def addElementsTo(cls, instance, elementTag):
124 125 """ 126 A classmethod. Checks order on the child elements, with different functions for `sequences` and `choices`. 127 Iterates through all the elements specified in the class of the schema, and matches these elements with the 128 elements from the xml. Redirects elements that are primitive types (integer, double, string, and so on) to another function. 129 Calls makeInstanceFromTag() on all the children. 130 131 Parameters: 132 133 - `cls`- The schema type class to use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 134 - `instance` - The instance of `cls` that is having elements added to it. 135 - `elementTag`- The xml element that correspond to `cls` 136 137 """ 138 139 subElements = elementTag.getchildren() 140 141 getSubElementName = lambda x: x.tag.split('}')[-1] 142 143 elemDescriptors = instance._getElements() 144 145 if len(subElements) == 0: # This element has no children 146 return 147 148 if elemDescriptors[0].sOrC == "sequence": 149 150 cls.checkElementOrderInSequence(elemDescriptors, subElements) 151 152 if elemDescriptors[0].sOrC == "choice": 153 154 cls.checkElementOrderInChoice(elemDescriptors[0], subElements) 155 156 #----------------------------- 157 158 for descriptor in elemDescriptors: 159 160 descriptorName = descriptor.name 161 162 for subElement in subElements: 163 164 subElementName = getSubElementName(subElement) 165 166 if descriptorName == subElementName: 167 168 subElCls = descriptor.getType() 169 170 if subElCls == None: #An Error Message 171 print "Parser Error: There is no type in the schema that corresponds to the type stated in the %s element" \ 172 % descriptorName 173 continue 174 175 #----------------------------- 176 #for elements with primitive types 177 178 if not issubclass(subElCls, SchemaBase): 179 subInstance = cls.primitiveValueFor(subElCls, subElement) 180 subInstance._name_ = subElementName 181 instance._children_.append(subInstance) 182 setattr(instance, subElementName, subInstance) 183 continue 184 185 #----------------------------- 186 187 subInstance = subElCls.makeInstanceFromTag(subElement) 188 subInstance._name_ = subElementName 189 instance._children_.append(subInstance) 190 continue 191 192 return instance
193 194 addElementsTo = classmethod(addElementsTo) 195 196 #======================================================= 197 #
198 - def addValueTo(cls, instance, elementTag):
199 200 """ 201 Checks to see if the tag has a value, and assigns it to the element instance if it does. 202 Uses the ElementTree function `.text` to retrieve this information from the tag. 203 204 Parameters: 205 206 - `cls`- The schema type class to use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 207 - `instance` - The instance of `cls` that is having values added to it. 208 - `elementTag`- The xml element that correspond to `cls` 209 210 """ 211 212 lineStrip = lambda x: x.strip().strip('\n').strip('\t') 213 214 if elementTag.text: 215 216 dataEntry = None 217 218 instance._value_ = [] 219 220 if '\n' in sets.Set(elementTag.text.rstrip('\n')): 221 222 dataEntry = elementTag.text.split('\n') 223 224 for line in dataEntry: 225 226 line = lineStrip(line) 227 228 if len(line) == 0: 229 230 continue 231 232 instance._value_.append(line) 233 234 if len(instance._value_) == 0: 235 instance._value_ = None
236 237 addValueTo = classmethod(addValueTo) 238 239 #======================================================= 240 #
241 - def checkElementOrderInChoice(cls, elemDescriptor, subElements):
242 """ 243 A classmethod. Checks to see that elements in a choice field, which is specified in the schema, follow 244 the rules of such a field. Gets minOccurs and maxOccurs from the choice element in the schema, and 245 checks the number of elements from there. 246 247 Parameters: 248 249 - `cls`- The schema type class in use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 250 - `subElements`- All of the children of an element that is being processed in addElementsTo(). 251 252 """ 253 254 minOccurs = elemDescriptor.getMinOccurs() 255 if minOccurs < 0: 256 print "Parser Error: the value of 'minOccurs' in %s must be greater than or equal to zero." % cls.name 257 print "The program will assign minOccurs the default vaule of 1 and attempt to proceed." 258 print 259 minOccurs = 1 260 261 262 maxOccurs = elemDescriptor.getMaxOccurs() 263 if minOccurs < 0: 264 print "Parser Error: the value of 'maxOccurs' in %s must be greater than or equal to zero." % cls.name 265 print "The program will assign minOccurs the default vaule of 1 and attempt to proceed." 266 print 267 maxOccurs = 1 268 269 270 if len(subElements) < minOccurs: 271 272 print "Parser Error: the program cannot find any elements in the xml that are specified in the choice field for", cls.name 273 print 274 275 elif len(subElements) > maxOccurs: 276 277 print "Parser Error: the parser found too many elements for a choice element in %s." % cls.name 278 print "This choice element can only have one element in it." 279 print 280 281 282 return
283 284 checkElementOrderInChoice = classmethod(checkElementOrderInChoice) 285 286 #======================================================= 287 #
288 - def checkElementOrderInSequence(cls, descriptors, subElements):
289 290 """ 291 A classmethodChecks the element order in sequence fields to make sure that the order specified in the schema is preserved 292 in the xml. Raises non-fatal errors when a problem is found. Checks minOccurs and maxOccurs on each element as well. 293 294 Parameters: 295 296 - `cls`- The schema type class in use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 297 - `descriptors`- a list of schema-specified elements that define parameters for an element. Called `descriptors` because the program takes advantage of descriptors in python to help check the data. These descriptors are in the Element class in elementRepresentatives. 298 - `subElements`- All of the children of an element that is being processed in addElementsTo(). Correspond to elements in `descriptors` 299 300 """ 301 302 descriptorNames = map(lambda x: x.name, descriptors) 303 subElementNames = map(lambda x: x.tag.split('}')[1], subElements) 304 if not len(descriptorNames) == len(descriptors): 305 print "Name descriptor mismatch" 306 307 for index in range(0, len(descriptors)): 308 descriptor = descriptors[index] 309 dname = descriptorNames[index] 310 311 312 count, subElementNames = cls.consume(dname, subElementNames) 313 314 if count == 0: 315 if descriptor.getMinOccurs() == 0: 316 if not dname in subElementNames: 317 continue 318 print "Parser Error: Order Error - Expected element name '%s' in different position." % dname 319 print 320 continue 321 322 if count < descriptor.getMinOccurs(): 323 #complain 324 print "Parser Error: The Element '%s' in '%s' occurs less" % (dname, cls.name) 325 print "than the specified number of minOccurs (%i) in the schema." % descriptor.getMinOccurs() 326 print "Note: it is possible that there is a problem with the order of" 327 print "elements and not the minOccurs value." 328 print 329 continue 330 331 if count > descriptor.getMaxOccurs(): 332 #complain 333 print "Parser Error: The element '%s' in '%s' occurs more" % (dname, cls.name) 334 print " than the specified number of maxOccurs in the schema." 335 if descriptor.getMaxOccurs()==1: 336 print "Your maxOccurs value is 1, which is the default value." 337 print "Perhaps you meant to assign this vairablea different value?" 338 print 339 continue
340 341 checkElementOrderInSequence = classmethod(checkElementOrderInSequence) 342 343 #======================================================= 344 #
345 - def consume(cls, dname, subElements):
346 """ 347 A classmethod. Used to check the number of times an element type in the schema is used with the xml elements. Used by checkElementOrderInSequence(). 348 349 Parameters: 350 351 - `cls`- The schema type class in use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 352 - `dname`- The name of the descriptor that is currently being checked. 353 - `subElements`- the list of subElements being checked. 354 355 """ 356 count = 0 357 while len(subElements) > 0 and subElements[0] == dname: 358 subElements = subElements[1:] 359 count+=1 360 361 return count, subElements
362 363 consume = classmethod(consume) 364 365 #======================================================= 366 #
367 - def primitiveValueFor(cls, subElCls, subElement):
368 """ 369 A classmethod. Used to check and assign primitive values to an instance. called by addElementsTo(). 370 NOTE: this class may not work correctly for all elements with primitive data types. 371 If you find an error in this method or any other error in the program, please submit 372 this error and the appropiate correction on the `pyXSD website <http://pyxsd.org>`_. 373 374 Parameters: 375 376 - `cls`- The schema type class in use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 377 - `subElCls`- the schema type class that corresponds to the subElement that is being processed. 378 - `subElement`- The subElement that has a primitive data type. 379 380 """ 381 382 dataTypeChildren = subElement.getchildren() 383 384 dataTypeText = subElement.text 385 386 dataTypeAttrib = subElement.items() 387 388 if dataTypeText == None and len(dataTypeAttrib) == 0 \ 389 and len(dataTypeChildren) == 0: 390 391 dataTypeVal = True 392 393 elif dataTypeText: 394 395 dataTypeVal = dataTypeText 396 397 elif len(dataTypeAttrib) == 1: 398 399 dataTypeVal = dataTypeAttrib[0][1] 400 401 elif len(dataTypeChildren) == 1: 402 403 dataTypeVal = dataTypeChildren[0] 404 405 else: 406 407 print "An error occured while reading the data in the %s element." \ 408 % subElementName 409 410 return 411 412 dataTypeValInst = subElCls(dataTypeVal) 413 414 dataTypeValInst._attribs_ = subElement.attrib 415 416 dataTypeValInst._value_ = dataTypeText 417 418 dataTypeValInst._children_ = dataTypeChildren 419 420 dataTypeVal 421 422 return dataTypeValInst
423 424 primitiveValueFor = classmethod(primitiveValueFor) 425 #======================================================= 426 #
427 - def addBaseDescriptors (cls):
428 """ 429 Adds attribute descriptors from classes that are bases to the current class. 430 Does this recursively down the list of bases. Everything returned as a dictionary. 431 A classmethod. 432 433 Parameters: 434 435 - `cls`- The schema type class in use. NOTE: since this function is a classmethod, `cls` is the first argument, not the instance of the class. 436 437 """ 438 439 descriptors = {} 440 for key, value in vars(cls).iteritems(): 441 if isinstance(value, Attribute): 442 descriptors[key] = value 443 444 for bcls in cls.__bases__: 445 if hasattr(bcls,'addBaseDescriptors'): 446 bclsDescriptors = bcls.addBaseDescriptors() 447 for key, value in bclsDescriptors.iteritems(): 448 if key in descriptors.keys(): 449 continue 450 descriptors[key] = value 451 452 return descriptors
453 addBaseDescriptors = classmethod(addBaseDescriptors) 454 #======================================================= 455 #
456 - def descAttributes(self):
457 """ 458 Returns a dictionary of the descriptor attributes. These attributes are from the schema and use descriptors, 459 which are specified in the Attribute class in elementReprsentatives, that help check element attribute values. 460 Uses lazy evulation by storing the descriptor attributes in a variable called '_descAttrs_', which it returns 461 if this variable is specified. 462 463 No parameters. 464 """ 465 if '_descAttrs_' in self.__dict__.keys(): 466 return self._descAttrs_ 467 attrs = {} 468 for key, value in vars(self.__class__).iteritems(): 469 if isinstance(value, Attribute): 470 attrs[key] = value 471 472 for base in self.__class__.__bases__: 473 if hasattr(base, 'addBaseDescriptors'): 474 bclsDescriptors = base.addBaseDescriptors() 475 for key, value in bclsDescriptors.iteritems(): 476 if key in attrs.keys(): 477 continue 478 attrs[key] = value 479 480 self.__dict__['_descAttrs_'] = attrs 481 482 return attrs
483 484 #======================================================= 485 #
486 - def descAttributeNames(self):
487 """ 488 Returns a list that has all of the names of attribute descriptors. Calls descAttributes(), and returns a list of the keys from 489 that dictionary. 490 """ 491 return self.descAttributes().keys()
492 493 #======================================================= 494 #
495 - def checkAttributes(self, usedAttrs, elementTag):
496 """ 497 Checks to see that required attributes are used in the xml, and does other such checks on the attributes. 498 Note: the attribute descriptors check the values in element attributes. 499 500 Parameters: 501 502 - `usedAttrs`- a list containing the names of attributes that were put into the instance. 503 - `elementTag`- The ElementTree tag for the instance that is being checked. 504 505 """ 506 507 descriptorAttributes = self.descAttributes() 508 509 descriptorAttributeNames = self.descAttributeNames() 510 511 attrInElementTag = list(elementTag.attrib.keys()) 512 513 if len(usedAttrs) > len(attrInElementTag): 514 print "Parser Error: For an unknown reason, in %s, the program parsed more attributes than there are in the XML file." \ 515 %self.__class__.__name__ 516 elif len(usedAttrs) < len(attrInElementTag): 517 print "Parser Error: Not all attributes in the XML file in %s were parsed." %self.__class__.__name__ 518 print "Attributes not processed:" 519 for attrET in attrInElementTag: 520 if not attrET in usedAttrs: 521 print " ", attrET 522 for descriptorAttrName in descriptorAttributeNames: 523 found = False 524 attrUse = descriptorAttributes[descriptorAttrName].getUse() 525 for usedAttr in usedAttrs: 526 if usedAttr == descriptorAttrName: 527 found = True 528 if attrUse == 'required' and not found: 529 print "Parser Error: the %s in the %s element is required but was not found."\ 530 % (self.descriptorAttrName, self.tagName)
531 532 533 534 #======================================================= 535 #
536 - def dumpCls(cls):
537 538 """ 539 For debugging purposes only. Prints out the contents of a class. A staticmethod. 540 541 parameters: 542 543 - `cls`- The class to dump the contents of. NOTE: this is the only arguement, since it is a staticmethod. 544 545 """ 546 547 print " In dumpCls[%s] bases = %s " % \ 548 (cls.__name__, cls.__bases__) 549 550 for key,value in cls.__dict__.iteritems(): 551 print " %s - %s" % (key, repr(value))
552 553 dumpCls = staticmethod(dumpCls)
554 555 #======================================================= 556 # 557 558 import elementRepresentatives.elementRepresentative 559 from elementRepresentatives.attribute import Attribute 560