1
2
3 """
4 =================
5 pyXSD Version 0.1
6 =================
7
8 PyXSD was developed in order to map XML and the related schema (XSD) files into the
9 programming language Python. The program builds a Pythonic representation of the XML
10 tree according to the specifications in the schema and raises non-fatal parser errors
11 whenever possible in order to help the user validate their XML document. The program
12 allows the user to specify *transform* classes, which manipulate and transform the
13 XML tree in various ways. The program then writes the tree back out into XML. This
14 program was written in order to replace many of the old tools that were written in
15 FORTRAN with the more modern XML format and the more modern and powerful Python
16 programming language. PyXSD allows users to create their own transform classes with
17 the help of a transform library. These classes are fairly simple to write, making the
18 system highly adaptable to very specific uses, as one might find in many scientific
19 applications; however, the program has potential uses in other fields, since XML is
20 widely used. The program allows the user to specify the desired transform classes,
21 along with their arguments and sequence of application, so the user can create
22 customised tools. The program can be used either as a standalone command line program
23 or as a library in other programs.
24
25 For more information on pyXSD, see the `pyXSD website <http://pyxsd.org>`_
26 if you haven't already.
27
28 Overview:
29 =========
30
31 - Creates python classes for all types defined in an XSD schema file (xml)
32 - Reads in a xml file and builds a new pythonic tree according to classes. This tree of instances maintains the same overall structure of the original xml document.
33 - Provides some xml/schema parsing with non-fatal errors in order to help the user write a valid xml document, without requiring it
34 - Transforms the pythonic reprsentation according to built-in and add-on 'transform' classes that the user specifies
35 - Sends data to a writer to write the pythonic tree back into an xml file
36
37
38 Features:
39 =========
40
41 - Transforms allow users to easily adapt pyXSD to vast number of applications
42 + Provides a framework and libraries to write transform so the user can more easily write these transform functions
43 + Allows the user to specify the desired transform classes with arguments and the order in a file so the user can create a sort of custom tool
44 + Allows for transforms that can export to other formats, giving pyXSD powerful flexibility
45 - The pythonic data tree format uses a very simple structure that allows for an easily understood API, so that users can easily manipute this tree in transforms and use the writer in other programs
46 - Can be used as a standalone program at the command line or as a library in other programs
47 - uses the cElementTree library for fast reading of the xml and xsd files
48
49 """
50
51 __module__ = "pyXSD"
52 __version__ = "0.1"
53 __author__ = "Karl Norby and Mike Summers"
54
55
56 import sets, sys, traceback, imp, urllib, os.path
57
58 try:
59 import cElementTree as ET
60 except:
61 try:
62 from xml.etree import cElementTree as ET
63 except:
64 try:
65 from xml.etree import ElementTree as ET
66 except:
67 try:
68 import elementtree.ElementTree as ET
69 except:
70 raise ImportError, "Your system needs the ElementTree or cElementTree library in order to run this package"
71
72
73 from elementRepresentatives.elementRepresentative import ElementRepresentative
74 from writers.xmlTreeWriter import XmlTreeWriter
75 from schemaBase import SchemaBase
76
78
79 """main class of the program that is in charge of data flow.
80 Has command line support when it is called as a script."""
81
82 - def __init__(self, xmlFileInput, xsdFile=None, xmlFileOutput=False, transformOutputName=None, transforms=[], classFile=None, verbose = False, quiet=False):
83 """This class is init'ed from the command line normally. Use this information for uses of pyXSD as a library.
84
85 Parameters:
86
87 `- xmlFileInput`- The filename of the xml file to input. Can include path information. Will raise an error if not specified.
88 `- xsdFile`- The filename/path information for the schema file. Will attempt to use the schemaLocation tag in the xml if not specified.
89 `- xmlFileOutput`- location for xml output to be sent after it is parsed. Will use a default name if not specified. Will not output if value is set to _No_Output_
90 `- transformOutputName`- location of the xml output after transform. Will make default filename if not specified.
91 `- transforms`- A list containing the transform calls in the order they will be performed.
92 `- classFile`- The location of the overlay class file. Experimental.
93 `- verbose`- A boolean value. If set to true, will output more information.
94 `- quiet`- A boolean value. If set to true, will output less information and errors than normal.
95
96 """
97
98 self.verbose = verbose
99 self.quiet = quiet
100 self.classes = {}
101
102 if isinstance(xmlFileInput, basestring):
103 self.xmlFileInput = os.path.abspath(xmlFileInput)
104 self.xmlPath, self.xmlFileInputName = os.path.split(self.xmlFileInput)
105 sys.path.append(self.xmlPath)
106 else:
107 self.xmlFileInput = xmlFileInput
108
109 self.xsdFile = xsdFile
110 self.xmlFileOutput = xmlFileOutput
111
112
113 self.xmlRoot = self.getXmlTree()
114
115 if not self.xmlFileOutput == '_No_Output_':
116 if self.xmlFileOutput == None:
117 if isinstance(xmlFileInput, basestring):
118 self.xmlFileOutput = self.getXmlOutputFileName()
119
120 self.transforms = transforms
121
122 if xsdFile == None:
123 self.xsdFile = self.getSchemaInfo('l')
124 self.getSchemaFile()
125
126 self.nameSpace = self.getSchemaInfo('n')
127
128 self.parseXSD()
129
130 if classFile:
131 if self.verbose:
132 print "Attempting to load overlay classes from the file '%s'..." % classFile
133 self.loadClassesFromFile(classFile)
134
135 rootInstance = self.parseXML()
136
137 if not self.xmlFileOutput == '_No_Output_':
138 rootInstance = self.writeParsedXMLFile(rootInstance)
139
140 self.transformOutputName = transformOutputName
141 self.executeAndWriteTransforms(rootInstance)
142
143
166
167
169 try:
170 self.xsdFile, messages = urllib.urlretrieve(self.xsdFile)
171 except Exception, e:
172 if not self.quiet:
173 print "Warning: Something seems wrong with the xsd filename..."
174 if verbose:
175 print "the function urlretrieve in library 'urllib' enconutered the following errors:"
176 print e
177 return
178
179
181 """
182 Function to write the xml output after it is parsed. Called from the __init__.
183
184 Parameters:
185
186 -`rootInstance`: The root instance of a tree. Must be formatted in program's tree structure.
187
188 """
189 if isinstance(self.xmlFileOutput, basestring):
190 self.xmlFileOutput = open(self.xmlFileOutput, 'w')
191 if self.xmlFileOutput:
192 self.writeXML(rootInstance, self.xmlFileOutput)
193 return rootInstance
194
195
196
198
199 """
200 Reads the given xsd file and creates a set of classes that corespond to the complex and simple type definitions.
201
202 No parameters.
203 """
204 if self.verbose:
205 print "Sending the schema file to the ElementTree Parser..."
206
207
208 tree = ET.parse(self.xsdFile)
209
210 root = tree.getroot()
211
212 if self.verbose:
213 print "Sending the schema ElementTree to the ElementRepresntative module..."
214
215 schemaER = ElementRepresentative.factory(root, None)
216
217 for simpleType in schemaER.simpleTypes.values():
218
219 cls = simpleType.clsFor(self)
220
221 self.classes[simpleType.name] = cls
222
223 if self.verbose:
224 print "Class created for the %s type..." % simpleType.name
225
226 for complexType in schemaER.complexTypes.values():
227
228 cls = complexType.clsFor(self)
229
230 self.classes[complexType.name] = cls
231
232 if self.verbose:
233 print "Class created for the %s type..." % complexType.name
234
235 return
236
237
238
239
241
242 """
243 Reads the given xml file in the context of the xsd file.
244 Produces instances of the above classes.
245 Does validation.
246 returns a a schema instance object.
247
248 no parameters
249 """
250
251 if self.verbose:
252 print "Starting to parse the xml file."
253
254 schemaClass = self.getClasses()['schema']
255
256 schemaClassInstance = schemaClass()
257
258 rootName = self.xmlRoot.tag.split('}')[1]
259
260 topLevelDescriptors = schemaClassInstance._getElements()
261
262 if len(topLevelDescriptors) > 1:
263 if not quiet:
264 print "Error: Invalid XML Schema-there is more than one root element in this document."
265 print "There are %s root elements in this documents:" % repr(len(topLevelDescriptors))
266 for element in topLevelDescriptors:
267 print element.name
268 print "The parser will proceed and attempt to parse only %s" % repr(topLevelDescriptors[0].name)
269 print
270
271 if len(topLevelDescriptors) == 0:
272 raise "Error: Invalid XML Schema-the parser could not find any root elements in the schema"
273 return None
274
275 rootElement = topLevelDescriptors[0]
276 rootElementName = rootElement.name
277
278 if rootElementName == rootName:
279 subCls = rootElement.getType()
280 self.generateCorrectSchemaTags()
281 subInstance = subCls.makeInstanceFromTag(self.xmlRoot)
282 setattr(schemaClassInstance, rootElementName, subInstance)
283
284 return subInstance
285
286
287
316
317
318
319 - def writeXML(self, rootInstance, output):
320 """
321 Sends a pythonic instance tree to the tree writer.
322
323 parameters:
324
325 - `rootInstance`: The root instance of a tree. Must be formatted in program's tree structure.
326 - `output`: The file object to write the tree to.
327
328 """
329 if isinstance(output, basestring):
330 output = open(output, 'w')
331 writeTree = XmlTreeWriter(rootInstance, output)
332 if self.verbose:
333 print "Data sent to the writer..."
334
335
336
337
339 """
340 Returns the dictionary of classes created by ElementRepresentative for each type specified in the schema.
341
342 no parameters
343 """
344 return self.classes
345
346
348 """
349 Loads a file with overlay classes into the class dictionary.
350 Overlay classes add to and override the schema type classes to allow for a user to create their own types without changing the schema file itself.
351 **Consider this functionality experimental.**
352
353 parameters:
354
355 - `classFile`: A string that specifies the location of a user-created overlay class file
356
357 """
358 try:
359 fp, pathname, description = imp.find_module(classFile)
360 except ImportError:
361 raise ImportError, "the file '%s' was not found. Please check your spelling." % classFile
362 module = imp.load_module(classFile, fp, pathname, description)
363 newClasses = {}
364 for var in vars(module):
365 if isinstance(var, object):
366 if issubclass(var, SchemaBase):
367 try:
368 className = var.name
369 except:
370 if not self.quiet:
371 print "Load Error: the class %s must have a 'name' attribute. Will attempt to use '__name__' instead." % var
372 if var.__name__:
373 try:
374 className = var.__name__
375 except:
376 if not self.quiet:
377 print "Load Fail: the class %s could not be loaded. The program will continue to load classes." % var
378 continue
379 newClasses[className] = var
380 if self.self.verbose:
381 print "Loaded the %s class" % className
382 self.classes.update(newClasses)
383
384
386 """
387 Sends the xml file into the ElementTree library's parser. Allows for the program to get the schemaLocation before parsing the xml against the schema.
388
389 No parameters.
390 """
391
392 try:
393
394 tree = ET.parse(self.xmlFileInput)
395 if self.verbose:
396 print "XML file parsed by the ElementTree library Suceessfully..."
397 except Exception,e:
398 print
399 print "Program Error: The ElementTree library's parse function was unable to read your"
400 print "XML File correctly. The following are its errors (the program will halt):"
401 print
402
403 tree = ET.parse(self.xmlFileInput)
404
405 return tree.getroot()
406
407
408
410 """
411 Creates a default name for xml file that is parsed without any transforms.
412 Uses the name from the inputed xml file.
413
414 no parameters
415 """
416
417 inputName = self.xmlFileInput
418
419 path, inputName = os.path.split(inputName)
420
421 inputNameSplit = inputName.split('.')
422
423 nonExtensionName = inputNameSplit[-2]
424
425 nonExtensionName = nonExtensionName + 'Parsed'
426
427 return os.path.join(path, (nonExtensionName + '.xml'))
428
429
431 """
432 Loads a transform class from its class name.
433 The file that it is located in must be the same as the className, except the first
434 letter in the filename must be lowercase. The transform must be located in a
435 directory called `transforms` that is in the installation folder of pyXSD, the
436 directory you called the program from, or in the directory where the xml file is.
437
438 parameters:
439
440 - `className`: A string of the transform class name being called
441 """
442 fileName = className[:1].lower() + className[1:]
443 transformMod = __import__(('transforms' + '.' + fileName), globals(), locals(), [className])
444 return transformMod
445
446
447
448
450 """
451 Calls the transforms specified by the user. Each transform is loaded into memory by
452 getTransformModuleAndLoad(). The transform class is passed the instance of the root
453 element when it is initialized. The transform object is called with the specified
454 arguements and the new root instance is set to whatever the transform returns,
455 which is usually the root, but it is not required. Any user who uses a transform
456 that does not return the root tree instance should be aware that any transform
457 that uses the root instance will fail to work and raise a fatal error. Transforms
458 add a great amount of power to the program, but users might need to tweak their
459 transform calls and any user-written classes in order to get them to work correctly.
460
461 Further documentation is located in the doc/ directory and on the `pyXSD website <http://pyxsd.org>`_.
462 These documents can help users write transform classes and calls.
463
464 Parameters:
465
466 - `transforms`: a list containing the transform calls in the order that they should be called.
467 - `root`: the root instance of a tree. Must be formatted in program's tree structure.
468
469 """
470 currentRoot = root
471 def echoArgs(*args, **kwargs):
472 return args, kwargs
473
474 for transform in transforms:
475
476 if not '(' in sets.Set(transform):
477 raise "Transform Call Error: the transform call '%s' does not use correct syntax." % transform
478
479 transformSplit = transform.split('(')
480 args, kwargs = eval(' echoArgs(' + '('.join(transformSplit[1:]))
481 transformer = self.getTransformModuleAndLoad(transformSplit[0])
482 argString = str(args)
483
484
485 if not len(kwargs) == 0:
486
487 keywordArgString = ""
488 for key, value in kwargs.iteritems():
489 keywordArgString = keywordArgString + '%s=%s,' % (key, value)
490 argString = argString.rstrip(',').rstrip(')') + keywordArgString + ')'
491
492 if self.verbose:
493 print "Starting the transform '%s' with the following args: %s" % (transformSplit[0], argString)
494 currentRoot = eval('transformer.%s(currentRoot)%s' % (transformSplit[0], argString))
495
496 return currentRoot
497
498
499
501 """
502 Creates a default name for xml file that is written after all of the transforms.
503 Uses the name from the inputed xml file.
504
505 no parameters
506 """
507
508 inputName = self.xmlFileInputName
509
510 if '.' in sets.Set(self.xmlFileInputName):
511
512 inputNameSplit = self.xmlFileInputName.split('.')
513
514 nonExtensionName = inputNameSplit[-2]
515
516 else:
517
518 nonExtensionName = self.xmlFileInputName
519
520 nonExtensionName = nonExtensionName + 'Transformed'
521
522 newName = nonExtensionName + '.xml'
523
524 newName = os.path.join(self.xmlPath, self.xmlFileInputName)
525
526 if self.verbose:
527
528 print "Setting the transformed xml file name to the default:", newName
529
530 return newName
531
532
533
534 - def getSchemaInfo(self, nameOrLocation):
535
536 """
537 Extracts information from the *schemaLocation* tag or the *noNamespaceSchemaLocation* tag.
538 Depending on the value of parameter `nameOrLocation`, the function outputs the namespace,
539 schema location, or the tag type. This function is meant for use with other functions
540 to easily grab bits of data that are used in various locations in the program.
541
542 parameters:
543
544 - `nameOrLocation`: a one letter string that is either 'l', 'n', or 't'. If the variable is
545 'l', the location of the schema is returned. If it is 'n', the namespace is returned, if there
546 is one. 't' returns the tag name to indicate if the xml uses *schemaLocation* or *noNamespaceSchemaLocation*
547
548 """
549
550 xsiNS = 'http://www.w3.org/2001/XMLSchema-instance'
551
552 if self.makeFullName(xsiNS, 'schemaLocation') in self.xmlRoot.attrib:
553
554 schemaLocationTag = self.xmlRoot.attrib[self.makeFullName(xsiNS, 'schemaLocation')]
555
556 if '\n' in sets.Set(schemaLocationTag):
557 schemaLocationSplit = schemaLocationTag.split('\n')
558 else:
559 schemaLocationSplit = schemaLocationTag.split(' ')
560
561 if not len(schemaLocationSplit)==2:
562
563 print "Parser Error: the 'schemaLocation' tag must be a pair of values seperated by a space or line break"
564 print "with the namespace stated first, followed by the location of the schema."
565 print "The program will attempt to use the 'noNamespaceSchemaLocation' tag instead."
566 print
567 del self.xmlRoot.attrib[self.makeFullName(xsiNS, 'schemaLocation')]
568 self.xmlRoot.attrib[self.makeFullName(xsiNS, 'noNamespaceSchemaLocation')] = schemaLocationTag
569
570 if self.makeFullName(xsiNS, 'noNamespaceSchemaLocation') in self.xmlRoot.attrib:
571
572 if nameOrLocation == 't':
573
574 return self.makeFullName(xsiNS, 'noNamespaceSchemaLocation')
575
576 if nameOrLocation == 'n':
577
578 return None
579
580 if nameOrLocation == 'l':
581
582 return self.xmlRoot.attrib[self.makeFullName(xsiNS, 'noNamespaceSchemaLocation')]
583
584
585 schemaNS = schemaLocationSplit[0]
586
587 if nameOrLocation == 'n':
588
589 return schemaNS
590
591 schemaLocation = schemaLocationSplit[-1]
592
593 if nameOrLocation == 'l':
594
595 return schemaLocation
596
597 if nameOrLocation == 't':
598
599 return self.makeFullName(xsiNS, 'schemaLocation')
600
601
602
604 """
605 Makes a string that looks similar to some of the names in ElementTree when it contains namespace information.
606
607 parameters:
608
609 - `ns`: a string of the namespace used. For this function, this variable is usually set to a url.
610 - `text`: a string of the name of the tag that the full name is being created for.
611
612 """
613
614
615 return "{%s}%s" % (ns, text)
616
617
618
619 from optparse import OptionParser
620
621
622
624 """
625 This function is called when pyXSD is being called from the command line.
626 It runs the OptionParser found under optparse in the standard library. Some
627 checks are performed on the data collected, and if these checks pass, it initializes
628 the pyXSD class.
629
630 No parameters
631 """
632 usage = "usage: ./pyXSD.py [options] arg"
633 parser = OptionParser(usage, version="PyXSD 0.1")
634
635 parser.add_option("-i", "--inputXml", type="string", dest="inputXmlFile", default="stdin",
636 help="filename for the xml file to read in. Reads from stdin by default." )
637 parser.add_option("-s", "--inputXsd", "--schema", type="string", dest="inputXsdFile", default=None,
638 help="filename for the xsd (schema) file to read in. Trys to determine location from the input xml file by default." )
639
640 parser.add_option("-p", "--parsedXml", "--parsedOutput", type="string", dest="parsedOutputFile", default=None,
641 help="filename for the xml file that contains the parsed output of the xml file, which contains no further transformation. By default, the filename is the xml input filename followed by 'Parsed.'" )
642 parser.add_option("-k", "--ParsedFile", action="store_false", dest="outputParsed", default=True,
643 help="outputs a parsed version of the xml file without transform. Use for debugging. Off by default. If no filename is specified, it will be determined from the xml filename.")
644 parser.add_option("-o", "--transformOutput", action="store", type="string", dest="transformOutputFile", default="stdout",
645 help="filename for the output after the xml has been parsed and transformed. Output is sent to stdout by default. Any specified filename will override this option." )
646 parser.add_option("-d", "--useDefaultFile", action="store_true", dest="transformDefaultOutput",
647 help="Uses the default filename for transformed output. If not specified and no filename is specified, uses stdout" )
648 parser.add_option("-t", "--transform", action="store", type="string", dest="transformCall", default=None,
649 help="the transform class with args. See the documentation for syntax and further information." )
650 parser.add_option("-T", "--transformFile", action="store", type="string", dest="transformFile", default=None,
651 help="file with transform class calls. See the documentation for information on the this function" )
652 parser.add_option("-c", "--overlayClassesFile", action="store", type="string", dest="classFile", default=None,
653 help="Experimental. Allows for user defined schemas to override and add to the types defined in the schema file. See the documentation for information on the this function" )
654 parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False,
655 help="uses the verbose mode. Experts Only. (limited functionality)" )
656 parser.add_option("-q", "--quiet", action="store_true", dest="quiet", default=False,
657 help="uses the quiet mode. Few errors reported. (limited functionalily)" )
658
659 (options, args) = parser.parse_args()
660
661 if len(args) > 0:
662 parser.error("The arguement(s) '%s' is/are not valid. See the syntax help under -h." % args)
663
664 if options.transformDefaultOutput:
665 if options.transformOutputFile == 'stdout':
666 options.transformOutputFile = None
667
668 if options.inputXmlFile == "stdin":
669 if sys.stdin.isatty():
670 parser.error("if no input xml file is specified, the xml must\n\t\t be fed in through the stdin (i.e. pipes)")
671 inputXmlFile = 'stdin.xml'
672 newFile = open(inputXmlFile, 'w')
673 newFile.write(sys.stdin.read())
674 newFile.close()
675 else:
676 inputXmlFile = options.inputXmlFile
677
678 if options.transformCall and options.transformFile:
679 parser.error("A transform file and a transform call cannot both be specified.")
680
681 linestrip = lambda x: x.strip('>').strip('\n').strip()
682 transforms = []
683
684 if options.transformCall:
685 if '>' in sets.Set(options.transformCall):
686 transforms = options.transformCall.split('>')
687 transforms = map(linestrip, transforms)
688 else:
689 transforms.append(options.transformCall)
690 if options.transformFile:
691 transformFile = open(options.transformFile, 'r')
692 transforms = transformFile.readlines()
693 transformFile.close()
694 transforms = map(linestrip, transforms)
695
696 if options.outputParsed:
697 options.parsedOutputFile = "_No_Output_"
698
699 if options.quiet and options.verbose:
700 parser.error("Both the verbose mode and the quiet mode cannot be on at the same time")
701 xmlParser = PyXSD(inputXmlFile, options.inputXsdFile, options.parsedOutputFile, options.transformOutputFile, transforms, options.classFile, options.verbose, options.quiet)
702
703 if __name__ == '__main__':
704
705 main()
706