1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.1"
61 __versionTime__ = "21 Mar 2016 05:04 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import functools
74 import itertools
75 import traceback
76
77
78
79 __all__ = [
80 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
81 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
82 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
83 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
84 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
85 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
86 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
87 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
88 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
89 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
90 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
91 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
92 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
93 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
94 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
95 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
96 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
97 ]
98
99 PY_3 = sys.version.startswith('3')
100 if PY_3:
101 _MAX_INT = sys.maxsize
102 basestring = str
103 unichr = chr
104 _ustr = str
105
106
107 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
108
109 else:
110 _MAX_INT = sys.maxint
111 range = xrange
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
116 then < returns the unicode object | encodes it with the default encoding | ... >.
117 """
118 if isinstance(obj,unicode):
119 return obj
120
121 try:
122
123
124 return str(obj)
125
126 except UnicodeEncodeError:
127
128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
129 xmlcharref = Regex('&#\d+;')
130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
131 return xmlcharref.transformString(ret)
132
133
134 singleArgBuiltins = []
135 import __builtin__
136 for fname in "sum len sorted reversed list tuple set any all min max".split():
137 try:
138 singleArgBuiltins.append(getattr(__builtin__,fname))
139 except AttributeError:
140 continue
141
142 _generatorType = type((y for y in range(1)))
145 """Escape &, <, >, ", ', etc. in a string of data."""
146
147
148 from_symbols = '&><"\''
149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
150 for from_,to_ in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
152 return data
153
156
157 alphas = string.ascii_uppercase + string.ascii_lowercase
158 nums = "0123456789"
159 hexnums = nums + "ABCDEFabcdef"
160 alphanums = alphas + nums
161 _bslash = chr(92)
162 printables = "".join(c for c in string.printable if c not in string.whitespace)
165 """base exception class for all parsing runtime exceptions"""
166
167
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc
170 if msg is None:
171 self.msg = pstr
172 self.pstr = ""
173 else:
174 self.msg = msg
175 self.pstr = pstr
176 self.parserElement = elem
177
179 """supported attributes by name are:
180 - lineno - returns the line number of the exception text
181 - col - returns the column number of the exception text
182 - line - returns the line containing the exception text
183 """
184 if( aname == "lineno" ):
185 return lineno( self.loc, self.pstr )
186 elif( aname in ("col", "column") ):
187 return col( self.loc, self.pstr )
188 elif( aname == "line" ):
189 return line( self.loc, self.pstr )
190 else:
191 raise AttributeError(aname)
192
194 return "%s (at char %d), (line:%d, col:%d)" % \
195 ( self.msg, self.loc, self.lineno, self.column )
209 return "lineno col line".split() + dir(type(self))
210
212 """exception thrown when parse expressions don't match class;
213 supported attributes by name are:
214 - lineno - returns the line number of the exception text
215 - col - returns the column number of the exception text
216 - line - returns the line containing the exception text
217 """
218 pass
219
221 """user-throwable exception thrown when inconsistent parse content
222 is found; stops all parsing immediately"""
223 pass
224
226 """just like C{L{ParseFatalException}}, but thrown internally when an
227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
228 an unbacktrackable syntax error has been found"""
232
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
260 return repr(self.tup)
262 self.tup = (self.tup[0],i)
263
265 """Structured parse results, to provide multiple means of access to the parsed data:
266 - as a list (C{len(results)})
267 - by list index (C{results[0], results[1]}, etc.)
268 - by attribute (C{results.<resultsName>})
269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls):
272 return toklist
273 retobj = object.__new__(cls)
274 retobj.__doinit = True
275 return retobj
276
277
278
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit:
281 self.__doinit = False
282 self.__name = None
283 self.__parent = None
284 self.__accumNames = {}
285 self.__asList = asList
286 self.__modal = modal
287 if toklist is None:
288 toklist = []
289 if isinstance(toklist, list):
290 self.__toklist = toklist[:]
291 elif isinstance(toklist, _generatorType):
292 self.__toklist = list(toklist)
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name is not None and name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name)
302 self.__name = name
303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,(int,slice)):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353
354
355
356
357
358
359 for name,occurrences in self.__tokdict.items():
360 for j in removed:
361 for k, (value, position) in enumerate(occurrences):
362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
363 else:
364 del self.__tokdict[i]
365
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
375 """Returns all named result keys."""
376 if hasattr(self.__tokdict, "iterkeys"):
377 return self.__tokdict.iterkeys()
378 else:
379 return iter(self.__tokdict)
380
382 """Returns all named result values."""
383 return (self[k] for k in self.iterkeys())
384
386 return ((k, self[k]) for k in self.iterkeys())
387
388 if PY_3:
389 keys = iterkeys
390 values = itervalues
391 items = iteritems
392 else:
394 """Returns all named result keys."""
395 return list(self.iterkeys())
396
398 """Returns all named result values."""
399 return list(self.itervalues())
400
402 """Returns all named result keys and values as a list of tuples."""
403 return list(self.iteritems())
404
406 """Since keys() returns an iterator, this method is helpful in bypassing
407 code that looks for the existence of any defined results names."""
408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last).
412 Supports both list and dict semantics for pop(). If passed no
413 argument or an integer argument, it will use list semantics
414 and pop tokens from the list of parsed tokens. If passed a
415 non-integer argument (most likely a string), it will use dict
416 semantics and pop the corresponding value from any defined
417 results names. A second default return value argument is
418 supported, just as in dict.pop()."""
419 if not args:
420 args = [-1]
421 for k,v in kwargs.items():
422 if k == 'default':
423 args = (args[0], v)
424 else:
425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
426 if (isinstance(args[0], int) or
427 len(args) == 1 or
428 args[0] in self):
429 index = args[0]
430 ret = self[index]
431 del self[index]
432 return ret
433 else:
434 defaultvalue = args[1]
435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no
439 such name, then returns the given C{defaultValue} or C{None} if no
440 C{defaultValue} is specified."""
441 if key in self:
442 return self[key]
443 else:
444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens."""
448 self.__toklist.insert(index, insStr)
449
450
451
452
453
454 for name,occurrences in self.__tokdict.items():
455 for k, (value, position) in enumerate(occurrences):
456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
459 """Add single element to end of ParseResults list of elements."""
460 self.__toklist.append(item)
461
463 """Add sequence of elements to end of ParseResults list of elements."""
464 if isinstance(itemseq, ParseResults):
465 self += itemseq
466 else:
467 self.__toklist.extend(itemseq)
468
470 """Clear all elements and results names."""
471 del self.__toklist[:]
472 self.__tokdict.clear()
473
475 try:
476 return self[name]
477 except KeyError:
478 return ""
479
480 if name in self.__tokdict:
481 if name not in self.__accumNames:
482 return self.__tokdict[name][-1][0]
483 else:
484 return ParseResults([ v[0] for v in self.__tokdict[name] ])
485 else:
486 return ""
487
489 ret = self.copy()
490 ret += other
491 return ret
492
494 if other.__tokdict:
495 offset = len(self.__toklist)
496 addoffset = lambda a: offset if a<0 else a+offset
497 otheritems = other.__tokdict.items()
498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
499 for (k,vlist) in otheritems for v in vlist]
500 for k,v in otherdictitems:
501 self[k] = v
502 if isinstance(v[0],ParseResults):
503 v[0].__parent = wkref(self)
504
505 self.__toklist += other.__toklist
506 self.__accumNames.update( other.__accumNames )
507 return self
508
510 if isinstance(other,int) and other == 0:
511
512 return self.copy()
513 else:
514
515 return other + self
516
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
524 out = []
525 for item in self.__toklist:
526 if out and sep:
527 out.append(sep)
528 if isinstance( item, ParseResults ):
529 out += item._asStringList()
530 else:
531 out.append( _ustr(item) )
532 return out
533
535 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
539 """Returns the named parse results as a nested dictionary."""
540 if PY_3:
541 item_fn = self.items
542 else:
543 item_fn = self.iteritems
544
545 def toItem(obj):
546 if isinstance(obj, ParseResults):
547 if obj.haskeys():
548 return obj.asDict()
549 else:
550 return [toItem(v) for v in obj]
551 else:
552 return obj
553
554 return dict((k,toItem(v)) for k,v in item_fn())
555
557 """Returns a new copy of a C{ParseResults} object."""
558 ret = ParseResults( self.__toklist )
559 ret.__tokdict = self.__tokdict.copy()
560 ret.__parent = self.__parent
561 ret.__accumNames.update( self.__accumNames )
562 ret.__name = self.__name
563 return ret
564
565 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
566 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
567 nl = "\n"
568 out = []
569 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
570 for v in vlist)
571 nextLevelIndent = indent + " "
572
573
574 if not formatted:
575 indent = ""
576 nextLevelIndent = ""
577 nl = ""
578
579 selfTag = None
580 if doctag is not None:
581 selfTag = doctag
582 else:
583 if self.__name:
584 selfTag = self.__name
585
586 if not selfTag:
587 if namedItemsOnly:
588 return ""
589 else:
590 selfTag = "ITEM"
591
592 out += [ nl, indent, "<", selfTag, ">" ]
593
594 for i,res in enumerate(self.__toklist):
595 if isinstance(res,ParseResults):
596 if i in namedItems:
597 out += [ res.asXML(namedItems[i],
598 namedItemsOnly and doctag is None,
599 nextLevelIndent,
600 formatted)]
601 else:
602 out += [ res.asXML(None,
603 namedItemsOnly and doctag is None,
604 nextLevelIndent,
605 formatted)]
606 else:
607
608 resTag = None
609 if i in namedItems:
610 resTag = namedItems[i]
611 if not resTag:
612 if namedItemsOnly:
613 continue
614 else:
615 resTag = "ITEM"
616 xmlBodyText = _xml_escape(_ustr(res))
617 out += [ nl, nextLevelIndent, "<", resTag, ">",
618 xmlBodyText,
619 "</", resTag, ">" ]
620
621 out += [ nl, indent, "</", selfTag, ">" ]
622 return "".join(out)
623
625 for k,vlist in self.__tokdict.items():
626 for v,loc in vlist:
627 if sub is v:
628 return k
629 return None
630
632 """Returns the results name for this token expression."""
633 if self.__name:
634 return self.__name
635 elif self.__parent:
636 par = self.__parent()
637 if par:
638 return par.__lookup(self)
639 else:
640 return None
641 elif (len(self) == 1 and
642 len(self.__tokdict) == 1 and
643 self.__tokdict.values()[0][0][1] in (0,-1)):
644 return self.__tokdict.keys()[0]
645 else:
646 return None
647
648 - def dump(self,indent='',depth=0):
649 """Diagnostic method for listing out the contents of a C{ParseResults}.
650 Accepts an optional C{indent} argument so that this string can be embedded
651 in a nested display of other data."""
652 out = []
653 NL = '\n'
654 out.append( indent+_ustr(self.asList()) )
655 if self.haskeys():
656 items = sorted(self.items())
657 for k,v in items:
658 if out:
659 out.append(NL)
660 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
661 if isinstance(v,ParseResults):
662 if v:
663 out.append( v.dump(indent,depth+1) )
664 else:
665 out.append(_ustr(v))
666 else:
667 out.append(_ustr(v))
668 elif any(isinstance(vv,ParseResults) for vv in self):
669 v = self
670 for i,vv in enumerate(v):
671 if isinstance(vv,ParseResults):
672 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
673 else:
674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
675
676 return "".join(out)
677
678 - def pprint(self, *args, **kwargs):
679 """Pretty-printer for parsed results as a list, using the C{pprint} module.
680 Accepts additional positional or keyword args as defined for the
681 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
682 pprint.pprint(self.asList(), *args, **kwargs)
683
684
686 return ( self.__toklist,
687 ( self.__tokdict.copy(),
688 self.__parent is not None and self.__parent() or None,
689 self.__accumNames,
690 self.__name ) )
691
693 self.__toklist = state[0]
694 (self.__tokdict,
695 par,
696 inAccumNames,
697 self.__name) = state[1]
698 self.__accumNames = {}
699 self.__accumNames.update(inAccumNames)
700 if par is not None:
701 self.__parent = wkref(par)
702 else:
703 self.__parent = None
704
706 return self.__toklist, self.__name, self.__asList, self.__modal
707
709 return (dir(type(self)) + list(self.keys()))
710
711 collections.MutableMapping.register(ParseResults)
712
713 -def col (loc,strg):
714 """Returns current column within a string, counting newlines as line separators.
715 The first column is number 1.
716
717 Note: the default parsing behavior is to expand tabs in the input string
718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
720 consistent view of the parsed string, the parse location, and line and column
721 positions within the parsed string.
722 """
723 s = strg
724 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
725
727 """Returns current line number within a string, counting newlines as line separators.
728 The first line is number 1.
729
730 Note: the default parsing behavior is to expand tabs in the input string
731 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
732 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
733 consistent view of the parsed string, the parse location, and line and column
734 positions within the parsed string.
735 """
736 return strg.count("\n",0,loc) + 1
737
738 -def line( loc, strg ):
739 """Returns the line of text containing loc within a string, counting newlines as line separators.
740 """
741 lastCR = strg.rfind("\n", 0, loc)
742 nextCR = strg.find("\n", loc)
743 if nextCR >= 0:
744 return strg[lastCR+1:nextCR]
745 else:
746 return strg[lastCR+1:]
747
749 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
750
752 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
753
755 print ("Exception raised:" + _ustr(exc))
756
758 """'Do-nothing' debug action, to suppress debugging output during parsing."""
759 pass
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783 'decorator to trim function calls to match the arity of the target'
785 if func in singleArgBuiltins:
786 return lambda s,l,t: func(t)
787 limit = [0]
788 foundArity = [False]
789 def wrapper(*args):
790 while 1:
791 try:
792 ret = func(*args[limit[0]:])
793 foundArity[0] = True
794 return ret
795 except TypeError:
796
797 if foundArity[0]:
798 raise
799 else:
800 try:
801 tb = sys.exc_info()[-1]
802 exc_source_line = traceback.extract_tb(tb)[-1][-1]
803 if not exc_source_line.endswith('#~@$^*)+_(&%#!=-`~;:"[]{}'):
804 raise
805 finally:
806 del tb
807
808 if limit[0] <= maxargs:
809 limit[0] += 1
810 continue
811 raise
812 return wrapper
813
815 """Abstract base level parser element class."""
816 DEFAULT_WHITE_CHARS = " \n\t\r"
817 verbose_stacktrace = False
818
819 @staticmethod
824
825 @staticmethod
827 """
828 Set class to be used for inclusion of string literals into a parser.
829 """
830 ParserElement.literalStringClass = cls
831
833 self.parseAction = list()
834 self.failAction = None
835
836 self.strRepr = None
837 self.resultsName = None
838 self.saveAsList = savelist
839 self.skipWhitespace = True
840 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
841 self.copyDefaultWhiteChars = True
842 self.mayReturnEmpty = False
843 self.keepTabs = False
844 self.ignoreExprs = list()
845 self.debug = False
846 self.streamlined = False
847 self.mayIndexError = True
848 self.errmsg = ""
849 self.modalResults = True
850 self.debugActions = ( None, None, None )
851 self.re = None
852 self.callPreparse = True
853 self.callDuringTry = False
854
856 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
857 for the same parsing pattern, using copies of the original parse element."""
858 cpy = copy.copy( self )
859 cpy.parseAction = self.parseAction[:]
860 cpy.ignoreExprs = self.ignoreExprs[:]
861 if self.copyDefaultWhiteChars:
862 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
863 return cpy
864
866 """Define name for this expression, for use in debugging."""
867 self.name = name
868 self.errmsg = "Expected " + self.name
869 if hasattr(self,"exception"):
870 self.exception.msg = self.errmsg
871 return self
872
874 """Define name for referencing matching tokens as a nested attribute
875 of the returned parse results.
876 NOTE: this returns a *copy* of the original C{ParserElement} object;
877 this is so that the client can define a basic element, such as an
878 integer, and reference it in multiple places with different names.
879
880 You can also set results names using the abbreviated syntax,
881 C{expr("name")} in place of C{expr.setResultsName("name")} -
882 see L{I{__call__}<__call__>}.
883 """
884 newself = self.copy()
885 if name.endswith("*"):
886 name = name[:-1]
887 listAllMatches=True
888 newself.resultsName = name
889 newself.modalResults = not listAllMatches
890 return newself
891
893 """Method to invoke the Python pdb debugger when this element is
894 about to be parsed. Set C{breakFlag} to True to enable, False to
895 disable.
896 """
897 if breakFlag:
898 _parseMethod = self._parse
899 def breaker(instring, loc, doActions=True, callPreParse=True):
900 import pdb
901 pdb.set_trace()
902 return _parseMethod( instring, loc, doActions, callPreParse )
903 breaker._originalParseMethod = _parseMethod
904 self._parse = breaker
905 else:
906 if hasattr(self._parse,"_originalParseMethod"):
907 self._parse = self._parse._originalParseMethod
908 return self
909
911 """Define action to perform when successfully matching parse element definition.
912 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
913 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
914 - s = the original string being parsed (see note below)
915 - loc = the location of the matching substring
916 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
917 If the functions in fns modify the tokens, they can return them as the return
918 value from fn, and the modified list of tokens will replace the original.
919 Otherwise, fn does not need to return any value.
920
921 Note: the default parsing behavior is to expand tabs in the input string
922 before starting the parsing process. See L{I{parseString}<parseString>} for more information
923 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
924 consistent view of the parsed string, the parse location, and line and column
925 positions within the parsed string.
926 """
927 self.parseAction = list(map(_trim_arity, list(fns)))
928 self.callDuringTry = kwargs.get("callDuringTry", False)
929 return self
930
932 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
933 self.parseAction += list(map(_trim_arity, list(fns)))
934 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
935 return self
936
938 """Add a boolean predicate function to expression's list of parse actions. See
939 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can
940 be used to define a custom message to be used in the raised exception."""
941 msg = kwargs.get("message") or "failed user-defined condition"
942 for fn in fns:
943 def pa(s,l,t):
944 if not bool(_trim_arity(fn)(s,l,t)):
945 raise ParseException(s,l,msg)
946 return t
947 self.parseAction.append(pa)
948 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
949 return self
950
952 """Define action to perform if parsing fails at this expression.
953 Fail acton fn is a callable function that takes the arguments
954 C{fn(s,loc,expr,err)} where:
955 - s = string being parsed
956 - loc = location where expression match was attempted and failed
957 - expr = the parse expression that failed
958 - err = the exception thrown
959 The function returns no value. It may throw C{L{ParseFatalException}}
960 if it is desired to stop parsing immediately."""
961 self.failAction = fn
962 return self
963
965 exprsFound = True
966 while exprsFound:
967 exprsFound = False
968 for e in self.ignoreExprs:
969 try:
970 while 1:
971 loc,dummy = e._parse( instring, loc )
972 exprsFound = True
973 except ParseException:
974 pass
975 return loc
976
978 if self.ignoreExprs:
979 loc = self._skipIgnorables( instring, loc )
980
981 if self.skipWhitespace:
982 wt = self.whiteChars
983 instrlen = len(instring)
984 while loc < instrlen and instring[loc] in wt:
985 loc += 1
986
987 return loc
988
989 - def parseImpl( self, instring, loc, doActions=True ):
991
992 - def postParse( self, instring, loc, tokenlist ):
994
995
996 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
997 debugging = ( self.debug )
998
999 if debugging or self.failAction:
1000
1001 if (self.debugActions[0] ):
1002 self.debugActions[0]( instring, loc, self )
1003 if callPreParse and self.callPreparse:
1004 preloc = self.preParse( instring, loc )
1005 else:
1006 preloc = loc
1007 tokensStart = preloc
1008 try:
1009 try:
1010 loc,tokens = self.parseImpl( instring, preloc, doActions )
1011 except IndexError:
1012 raise ParseException( instring, len(instring), self.errmsg, self )
1013 except ParseBaseException as err:
1014
1015 if self.debugActions[2]:
1016 self.debugActions[2]( instring, tokensStart, self, err )
1017 if self.failAction:
1018 self.failAction( instring, tokensStart, self, err )
1019 raise
1020 else:
1021 if callPreParse and self.callPreparse:
1022 preloc = self.preParse( instring, loc )
1023 else:
1024 preloc = loc
1025 tokensStart = preloc
1026 if self.mayIndexError or loc >= len(instring):
1027 try:
1028 loc,tokens = self.parseImpl( instring, preloc, doActions )
1029 except IndexError:
1030 raise ParseException( instring, len(instring), self.errmsg, self )
1031 else:
1032 loc,tokens = self.parseImpl( instring, preloc, doActions )
1033
1034 tokens = self.postParse( instring, loc, tokens )
1035
1036 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1037 if self.parseAction and (doActions or self.callDuringTry):
1038 if debugging:
1039 try:
1040 for fn in self.parseAction:
1041 tokens = fn( instring, tokensStart, retTokens )
1042 if tokens is not None:
1043 retTokens = ParseResults( tokens,
1044 self.resultsName,
1045 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1046 modal=self.modalResults )
1047 except ParseBaseException as err:
1048
1049 if (self.debugActions[2] ):
1050 self.debugActions[2]( instring, tokensStart, self, err )
1051 raise
1052 else:
1053 for fn in self.parseAction:
1054 tokens = fn( instring, tokensStart, retTokens )
1055 if tokens is not None:
1056 retTokens = ParseResults( tokens,
1057 self.resultsName,
1058 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1059 modal=self.modalResults )
1060
1061 if debugging:
1062
1063 if (self.debugActions[1] ):
1064 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1065
1066 return loc, retTokens
1067
1073
1075 try:
1076 self.tryParse(instring, loc)
1077 except (ParseException, IndexError):
1078 return False
1079 else:
1080 return True
1081
1082
1083
1084 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1085 lookup = (self,instring,loc,callPreParse,doActions)
1086 if lookup in ParserElement._exprArgCache:
1087 value = ParserElement._exprArgCache[ lookup ]
1088 if isinstance(value, Exception):
1089 raise value
1090 return (value[0],value[1].copy())
1091 else:
1092 try:
1093 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1094 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1095 return value
1096 except ParseBaseException as pe:
1097 pe.__traceback__ = None
1098 ParserElement._exprArgCache[ lookup ] = pe
1099 raise
1100
1101 _parse = _parseNoCache
1102
1103
1104 _exprArgCache = {}
1105 @staticmethod
1108
1109 _packratEnabled = False
1110 @staticmethod
1112 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1113 Repeated parse attempts at the same string location (which happens
1114 often in many complex grammars) can immediately return a cached value,
1115 instead of re-executing parsing/validating code. Memoizing is done of
1116 both valid results and parsing exceptions.
1117
1118 This speedup may break existing programs that use parse actions that
1119 have side-effects. For this reason, packrat parsing is disabled when
1120 you first import pyparsing. To activate the packrat feature, your
1121 program must call the class method C{ParserElement.enablePackrat()}. If
1122 your program uses C{psyco} to "compile as you go", you must call
1123 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1124 Python will crash. For best results, call C{enablePackrat()} immediately
1125 after importing pyparsing.
1126 """
1127 if not ParserElement._packratEnabled:
1128 ParserElement._packratEnabled = True
1129 ParserElement._parse = ParserElement._parseCache
1130
1132 """Execute the parse expression with the given string.
1133 This is the main interface to the client code, once the complete
1134 expression has been built.
1135
1136 If you want the grammar to require that the entire input string be
1137 successfully parsed, then set C{parseAll} to True (equivalent to ending
1138 the grammar with C{L{StringEnd()}}).
1139
1140 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1141 in order to report proper column numbers in parse actions.
1142 If the input string contains tabs and
1143 the grammar uses parse actions that use the C{loc} argument to index into the
1144 string being parsed, you can ensure you have a consistent view of the input
1145 string by:
1146 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1147 (see L{I{parseWithTabs}<parseWithTabs>})
1148 - define your parse action using the full C{(s,loc,toks)} signature, and
1149 reference the input string using the parse action's C{s} argument
1150 - explictly expand the tabs in your input string before calling
1151 C{parseString}
1152 """
1153 ParserElement.resetCache()
1154 if not self.streamlined:
1155 self.streamline()
1156
1157 for e in self.ignoreExprs:
1158 e.streamline()
1159 if not self.keepTabs:
1160 instring = instring.expandtabs()
1161 try:
1162 loc, tokens = self._parse( instring, 0 )
1163 if parseAll:
1164 loc = self.preParse( instring, loc )
1165 se = Empty() + StringEnd()
1166 se._parse( instring, loc )
1167 except ParseBaseException as exc:
1168 if ParserElement.verbose_stacktrace:
1169 raise
1170 else:
1171
1172 raise exc
1173 else:
1174 return tokens
1175
1177 """Scan the input string for expression matches. Each match will return the
1178 matching tokens, start location, and end location. May be called with optional
1179 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1180 C{overlap} is specified, then overlapping matches will be reported.
1181
1182 Note that the start and end locations are reported relative to the string
1183 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1184 strings with embedded tabs."""
1185 if not self.streamlined:
1186 self.streamline()
1187 for e in self.ignoreExprs:
1188 e.streamline()
1189
1190 if not self.keepTabs:
1191 instring = _ustr(instring).expandtabs()
1192 instrlen = len(instring)
1193 loc = 0
1194 preparseFn = self.preParse
1195 parseFn = self._parse
1196 ParserElement.resetCache()
1197 matches = 0
1198 try:
1199 while loc <= instrlen and matches < maxMatches:
1200 try:
1201 preloc = preparseFn( instring, loc )
1202 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1203 except ParseException:
1204 loc = preloc+1
1205 else:
1206 if nextLoc > loc:
1207 matches += 1
1208 yield tokens, preloc, nextLoc
1209 if overlap:
1210 nextloc = preparseFn( instring, loc )
1211 if nextloc > loc:
1212 loc = nextLoc
1213 else:
1214 loc += 1
1215 else:
1216 loc = nextLoc
1217 else:
1218 loc = preloc+1
1219 except ParseBaseException as exc:
1220 if ParserElement.verbose_stacktrace:
1221 raise
1222 else:
1223
1224 raise exc
1225
1258
1260 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1261 to match the given parse expression. May be called with optional
1262 C{maxMatches} argument, to clip searching after 'n' matches are found.
1263 """
1264 try:
1265 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1266 except ParseBaseException as exc:
1267 if ParserElement.verbose_stacktrace:
1268 raise
1269 else:
1270
1271 raise exc
1272
1274 """Implementation of + operator - returns C{L{And}}"""
1275 if isinstance( other, basestring ):
1276 other = ParserElement.literalStringClass( other )
1277 if not isinstance( other, ParserElement ):
1278 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1279 SyntaxWarning, stacklevel=2)
1280 return None
1281 return And( [ self, other ] )
1282
1284 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1285 if isinstance( other, basestring ):
1286 other = ParserElement.literalStringClass( other )
1287 if not isinstance( other, ParserElement ):
1288 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1289 SyntaxWarning, stacklevel=2)
1290 return None
1291 return other + self
1292
1294 """Implementation of - operator, returns C{L{And}} with error stop"""
1295 if isinstance( other, basestring ):
1296 other = ParserElement.literalStringClass( other )
1297 if not isinstance( other, ParserElement ):
1298 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1299 SyntaxWarning, stacklevel=2)
1300 return None
1301 return And( [ self, And._ErrorStop(), other ] )
1302
1304 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1305 if isinstance( other, basestring ):
1306 other = ParserElement.literalStringClass( other )
1307 if not isinstance( other, ParserElement ):
1308 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1309 SyntaxWarning, stacklevel=2)
1310 return None
1311 return other - self
1312
1314 """Implementation of * operator, allows use of C{expr * 3} in place of
1315 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1316 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1317 may also include C{None} as in:
1318 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1319 to C{expr*n + L{ZeroOrMore}(expr)}
1320 (read as "at least n instances of C{expr}")
1321 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1322 (read as "0 to n instances of C{expr}")
1323 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1324 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1325
1326 Note that C{expr*(None,n)} does not raise an exception if
1327 more than n exprs exist in the input stream; that is,
1328 C{expr*(None,n)} does not enforce a maximum number of expr
1329 occurrences. If this behavior is desired, then write
1330 C{expr*(None,n) + ~expr}
1331
1332 """
1333 if isinstance(other,int):
1334 minElements, optElements = other,0
1335 elif isinstance(other,tuple):
1336 other = (other + (None, None))[:2]
1337 if other[0] is None:
1338 other = (0, other[1])
1339 if isinstance(other[0],int) and other[1] is None:
1340 if other[0] == 0:
1341 return ZeroOrMore(self)
1342 if other[0] == 1:
1343 return OneOrMore(self)
1344 else:
1345 return self*other[0] + ZeroOrMore(self)
1346 elif isinstance(other[0],int) and isinstance(other[1],int):
1347 minElements, optElements = other
1348 optElements -= minElements
1349 else:
1350 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1351 else:
1352 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1353
1354 if minElements < 0:
1355 raise ValueError("cannot multiply ParserElement by negative value")
1356 if optElements < 0:
1357 raise ValueError("second tuple value must be greater or equal to first tuple value")
1358 if minElements == optElements == 0:
1359 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1360
1361 if (optElements):
1362 def makeOptionalList(n):
1363 if n>1:
1364 return Optional(self + makeOptionalList(n-1))
1365 else:
1366 return Optional(self)
1367 if minElements:
1368 if minElements == 1:
1369 ret = self + makeOptionalList(optElements)
1370 else:
1371 ret = And([self]*minElements) + makeOptionalList(optElements)
1372 else:
1373 ret = makeOptionalList(optElements)
1374 else:
1375 if minElements == 1:
1376 ret = self
1377 else:
1378 ret = And([self]*minElements)
1379 return ret
1380
1383
1385 """Implementation of | operator - returns C{L{MatchFirst}}"""
1386 if isinstance( other, basestring ):
1387 other = ParserElement.literalStringClass( other )
1388 if not isinstance( other, ParserElement ):
1389 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1390 SyntaxWarning, stacklevel=2)
1391 return None
1392 return MatchFirst( [ self, other ] )
1393
1395 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1396 if isinstance( other, basestring ):
1397 other = ParserElement.literalStringClass( other )
1398 if not isinstance( other, ParserElement ):
1399 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1400 SyntaxWarning, stacklevel=2)
1401 return None
1402 return other | self
1403
1405 """Implementation of ^ operator - returns C{L{Or}}"""
1406 if isinstance( other, basestring ):
1407 other = ParserElement.literalStringClass( other )
1408 if not isinstance( other, ParserElement ):
1409 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1410 SyntaxWarning, stacklevel=2)
1411 return None
1412 return Or( [ self, other ] )
1413
1415 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1416 if isinstance( other, basestring ):
1417 other = ParserElement.literalStringClass( other )
1418 if not isinstance( other, ParserElement ):
1419 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1420 SyntaxWarning, stacklevel=2)
1421 return None
1422 return other ^ self
1423
1425 """Implementation of & operator - returns C{L{Each}}"""
1426 if isinstance( other, basestring ):
1427 other = ParserElement.literalStringClass( other )
1428 if not isinstance( other, ParserElement ):
1429 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1430 SyntaxWarning, stacklevel=2)
1431 return None
1432 return Each( [ self, other ] )
1433
1435 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1436 if isinstance( other, basestring ):
1437 other = ParserElement.literalStringClass( other )
1438 if not isinstance( other, ParserElement ):
1439 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1440 SyntaxWarning, stacklevel=2)
1441 return None
1442 return other & self
1443
1445 """Implementation of ~ operator - returns C{L{NotAny}}"""
1446 return NotAny( self )
1447
1449 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1450 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1451 could be written as::
1452 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1453
1454 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1455 passed as C{True}.
1456
1457 If C{name} is omitted, same as calling C{L{copy}}.
1458 """
1459 if name is not None:
1460 return self.setResultsName(name)
1461 else:
1462 return self.copy()
1463
1465 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1466 cluttering up returned output.
1467 """
1468 return Suppress( self )
1469
1471 """Disables the skipping of whitespace before matching the characters in the
1472 C{ParserElement}'s defined pattern. This is normally only used internally by
1473 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1474 """
1475 self.skipWhitespace = False
1476 return self
1477
1479 """Overrides the default whitespace chars
1480 """
1481 self.skipWhitespace = True
1482 self.whiteChars = chars
1483 self.copyDefaultWhiteChars = False
1484 return self
1485
1487 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1488 Must be called before C{parseString} when the input grammar contains elements that
1489 match C{<TAB>} characters."""
1490 self.keepTabs = True
1491 return self
1492
1494 """Define expression to be ignored (e.g., comments) while doing pattern
1495 matching; may be called repeatedly, to define multiple comment or other
1496 ignorable patterns.
1497 """
1498 if isinstance(other, basestring):
1499 other = Suppress(other)
1500
1501 if isinstance( other, Suppress ):
1502 if other not in self.ignoreExprs:
1503 self.ignoreExprs.append(other)
1504 else:
1505 self.ignoreExprs.append( Suppress( other.copy() ) )
1506 return self
1507
1508 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1509 """Enable display of debugging messages while doing pattern matching."""
1510 self.debugActions = (startAction or _defaultStartDebugAction,
1511 successAction or _defaultSuccessDebugAction,
1512 exceptionAction or _defaultExceptionDebugAction)
1513 self.debug = True
1514 return self
1515
1517 """Enable display of debugging messages while doing pattern matching.
1518 Set C{flag} to True to enable, False to disable."""
1519 if flag:
1520 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1521 else:
1522 self.debug = False
1523 return self
1524
1527
1530
1532 self.streamlined = True
1533 self.strRepr = None
1534 return self
1535
1538
1539 - def validate( self, validateTrace=[] ):
1540 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1541 self.checkRecursion( [] )
1542
1543 - def parseFile( self, file_or_filename, parseAll=False ):
1544 """Execute the parse expression on the given file or filename.
1545 If a filename is specified (instead of a file object),
1546 the entire file is opened, read, and closed before parsing.
1547 """
1548 try:
1549 file_contents = file_or_filename.read()
1550 except AttributeError:
1551 f = open(file_or_filename, "r")
1552 file_contents = f.read()
1553 f.close()
1554 try:
1555 return self.parseString(file_contents, parseAll)
1556 except ParseBaseException as exc:
1557 if ParserElement.verbose_stacktrace:
1558 raise
1559 else:
1560
1561 raise exc
1562
1564 if isinstance(other, ParserElement):
1565 return self is other or vars(self) == vars(other)
1566 elif isinstance(other, basestring):
1567 try:
1568 self.parseString(_ustr(other), parseAll=True)
1569 return True
1570 except ParseBaseException:
1571 return False
1572 else:
1573 return super(ParserElement,self)==other
1574
1576 return not (self == other)
1577
1579 return hash(id(self))
1580
1582 return self == other
1583
1585 return not (self == other)
1586
1587 - def runTests(self, tests, parseAll=False):
1588 """Execute the parse expression on a series of test strings, showing each
1589 test, the parsed results or where the parse failed. Quick and easy way to
1590 run a parse expression against a list of sample strings.
1591
1592 Parameters:
1593 - tests - a list of separate test strings, or a multiline string of test strings
1594 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests
1595 """
1596 if isinstance(tests, basestring):
1597 tests = map(str.strip, tests.splitlines())
1598 for t in tests:
1599 out = [t]
1600 try:
1601 out.append(self.parseString(t, parseAll=parseAll).dump())
1602 except ParseException as pe:
1603 if '\n' in t:
1604 out.append(line(pe.loc, t))
1605 out.append(' '*(col(pe.loc,t)-1) + '^')
1606 else:
1607 out.append(' '*pe.loc + '^')
1608 out.append(str(pe))
1609 out.append('')
1610 print('\n'.join(out))
1611
1612
1613 -class Token(ParserElement):
1614 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1617
1618
1619 -class Empty(Token):
1620 """An empty token, will always match."""
1622 super(Empty,self).__init__()
1623 self.name = "Empty"
1624 self.mayReturnEmpty = True
1625 self.mayIndexError = False
1626
1629 """A token that will never match."""
1631 super(NoMatch,self).__init__()
1632 self.name = "NoMatch"
1633 self.mayReturnEmpty = True
1634 self.mayIndexError = False
1635 self.errmsg = "Unmatchable token"
1636
1637 - def parseImpl( self, instring, loc, doActions=True ):
1639
1642 """Token to exactly match a specified string."""
1644 super(Literal,self).__init__()
1645 self.match = matchString
1646 self.matchLen = len(matchString)
1647 try:
1648 self.firstMatchChar = matchString[0]
1649 except IndexError:
1650 warnings.warn("null string passed to Literal; use Empty() instead",
1651 SyntaxWarning, stacklevel=2)
1652 self.__class__ = Empty
1653 self.name = '"%s"' % _ustr(self.match)
1654 self.errmsg = "Expected " + self.name
1655 self.mayReturnEmpty = False
1656 self.mayIndexError = False
1657
1658
1659
1660
1661
1662 - def parseImpl( self, instring, loc, doActions=True ):
1663 if (instring[loc] == self.firstMatchChar and
1664 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1665 return loc+self.matchLen, self.match
1666 raise ParseException(instring, loc, self.errmsg, self)
1667 _L = Literal
1668 ParserElement.literalStringClass = Literal
1671 """Token to exactly match a specified string as a keyword, that is, it must be
1672 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1673 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1674 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1675 Accepts two optional constructor arguments in addition to the keyword string:
1676 C{identChars} is a string of characters that would be valid identifier characters,
1677 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1678 matching, default is C{False}.
1679 """
1680 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1681
1683 super(Keyword,self).__init__()
1684 self.match = matchString
1685 self.matchLen = len(matchString)
1686 try:
1687 self.firstMatchChar = matchString[0]
1688 except IndexError:
1689 warnings.warn("null string passed to Keyword; use Empty() instead",
1690 SyntaxWarning, stacklevel=2)
1691 self.name = '"%s"' % self.match
1692 self.errmsg = "Expected " + self.name
1693 self.mayReturnEmpty = False
1694 self.mayIndexError = False
1695 self.caseless = caseless
1696 if caseless:
1697 self.caselessmatch = matchString.upper()
1698 identChars = identChars.upper()
1699 self.identChars = set(identChars)
1700
1701 - def parseImpl( self, instring, loc, doActions=True ):
1702 if self.caseless:
1703 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1704 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1705 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1706 return loc+self.matchLen, self.match
1707 else:
1708 if (instring[loc] == self.firstMatchChar and
1709 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1710 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1711 (loc == 0 or instring[loc-1] not in self.identChars) ):
1712 return loc+self.matchLen, self.match
1713 raise ParseException(instring, loc, self.errmsg, self)
1714
1719
1720 @staticmethod
1725
1727 """Token to match a specified string, ignoring case of letters.
1728 Note: the matched results will always be in the case of the given
1729 match string, NOT the case of the input text.
1730 """
1732 super(CaselessLiteral,self).__init__( matchString.upper() )
1733
1734 self.returnString = matchString
1735 self.name = "'%s'" % self.returnString
1736 self.errmsg = "Expected " + self.name
1737
1738 - def parseImpl( self, instring, loc, doActions=True ):
1739 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1740 return loc+self.matchLen, self.returnString
1741 raise ParseException(instring, loc, self.errmsg, self)
1742
1746
1747 - def parseImpl( self, instring, loc, doActions=True ):
1748 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1749 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1750 return loc+self.matchLen, self.match
1751 raise ParseException(instring, loc, self.errmsg, self)
1752
1754 """Token for matching words composed of allowed character sets.
1755 Defined with string containing all allowed initial characters,
1756 an optional string containing allowed body characters (if omitted,
1757 defaults to the initial character set), and an optional minimum,
1758 maximum, and/or exact length. The default value for C{min} is 1 (a
1759 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1760 are 0, meaning no maximum or exact length restriction. An optional
1761 C{excludeChars} parameter can list characters that might be found in
1762 the input C{bodyChars} string; useful to define a word of all printables
1763 except for one or two characters, for instance.
1764 """
1765 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1766 super(Word,self).__init__()
1767 if excludeChars:
1768 initChars = ''.join(c for c in initChars if c not in excludeChars)
1769 if bodyChars:
1770 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1771 self.initCharsOrig = initChars
1772 self.initChars = set(initChars)
1773 if bodyChars :
1774 self.bodyCharsOrig = bodyChars
1775 self.bodyChars = set(bodyChars)
1776 else:
1777 self.bodyCharsOrig = initChars
1778 self.bodyChars = set(initChars)
1779
1780 self.maxSpecified = max > 0
1781
1782 if min < 1:
1783 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1784
1785 self.minLen = min
1786
1787 if max > 0:
1788 self.maxLen = max
1789 else:
1790 self.maxLen = _MAX_INT
1791
1792 if exact > 0:
1793 self.maxLen = exact
1794 self.minLen = exact
1795
1796 self.name = _ustr(self)
1797 self.errmsg = "Expected " + self.name
1798 self.mayIndexError = False
1799 self.asKeyword = asKeyword
1800
1801 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1802 if self.bodyCharsOrig == self.initCharsOrig:
1803 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1804 elif len(self.initCharsOrig) == 1:
1805 self.reString = "%s[%s]*" % \
1806 (re.escape(self.initCharsOrig),
1807 _escapeRegexRangeChars(self.bodyCharsOrig),)
1808 else:
1809 self.reString = "[%s][%s]*" % \
1810 (_escapeRegexRangeChars(self.initCharsOrig),
1811 _escapeRegexRangeChars(self.bodyCharsOrig),)
1812 if self.asKeyword:
1813 self.reString = r"\b"+self.reString+r"\b"
1814 try:
1815 self.re = re.compile( self.reString )
1816 except:
1817 self.re = None
1818
1819 - def parseImpl( self, instring, loc, doActions=True ):
1820 if self.re:
1821 result = self.re.match(instring,loc)
1822 if not result:
1823 raise ParseException(instring, loc, self.errmsg, self)
1824
1825 loc = result.end()
1826 return loc, result.group()
1827
1828 if not(instring[ loc ] in self.initChars):
1829 raise ParseException(instring, loc, self.errmsg, self)
1830
1831 start = loc
1832 loc += 1
1833 instrlen = len(instring)
1834 bodychars = self.bodyChars
1835 maxloc = start + self.maxLen
1836 maxloc = min( maxloc, instrlen )
1837 while loc < maxloc and instring[loc] in bodychars:
1838 loc += 1
1839
1840 throwException = False
1841 if loc - start < self.minLen:
1842 throwException = True
1843 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1844 throwException = True
1845 if self.asKeyword:
1846 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1847 throwException = True
1848
1849 if throwException:
1850 raise ParseException(instring, loc, self.errmsg, self)
1851
1852 return loc, instring[start:loc]
1853
1855 try:
1856 return super(Word,self).__str__()
1857 except:
1858 pass
1859
1860
1861 if self.strRepr is None:
1862
1863 def charsAsStr(s):
1864 if len(s)>4:
1865 return s[:4]+"..."
1866 else:
1867 return s
1868
1869 if ( self.initCharsOrig != self.bodyCharsOrig ):
1870 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1871 else:
1872 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1873
1874 return self.strRepr
1875
1876
1877 -class Regex(Token):
1878 """Token for matching strings that match a given regular expression.
1879 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1880 """
1881 compiledREtype = type(re.compile("[A-Z]"))
1882 - def __init__( self, pattern, flags=0):
1883 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1884 super(Regex,self).__init__()
1885
1886 if isinstance(pattern, basestring):
1887 if not pattern:
1888 warnings.warn("null string passed to Regex; use Empty() instead",
1889 SyntaxWarning, stacklevel=2)
1890
1891 self.pattern = pattern
1892 self.flags = flags
1893
1894 try:
1895 self.re = re.compile(self.pattern, self.flags)
1896 self.reString = self.pattern
1897 except sre_constants.error:
1898 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1899 SyntaxWarning, stacklevel=2)
1900 raise
1901
1902 elif isinstance(pattern, Regex.compiledREtype):
1903 self.re = pattern
1904 self.pattern = \
1905 self.reString = str(pattern)
1906 self.flags = flags
1907
1908 else:
1909 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1910
1911 self.name = _ustr(self)
1912 self.errmsg = "Expected " + self.name
1913 self.mayIndexError = False
1914 self.mayReturnEmpty = True
1915
1916 - def parseImpl( self, instring, loc, doActions=True ):
1917 result = self.re.match(instring,loc)
1918 if not result:
1919 raise ParseException(instring, loc, self.errmsg, self)
1920
1921 loc = result.end()
1922 d = result.groupdict()
1923 ret = ParseResults(result.group())
1924 if d:
1925 for k in d:
1926 ret[k] = d[k]
1927 return loc,ret
1928
1930 try:
1931 return super(Regex,self).__str__()
1932 except:
1933 pass
1934
1935 if self.strRepr is None:
1936 self.strRepr = "Re:(%s)" % repr(self.pattern)
1937
1938 return self.strRepr
1939
1942 """Token for matching strings that are delimited by quoting characters.
1943 """
1944 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
1945 r"""Defined with the following parameters:
1946 - quoteChar - string of one or more characters defining the quote delimiting string
1947 - escChar - character to escape quotes, typically backslash (default=None)
1948 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1949 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1950 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1951 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1952 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
1953 """
1954 super(QuotedString,self).__init__()
1955
1956
1957 quoteChar = quoteChar.strip()
1958 if not quoteChar:
1959 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1960 raise SyntaxError()
1961
1962 if endQuoteChar is None:
1963 endQuoteChar = quoteChar
1964 else:
1965 endQuoteChar = endQuoteChar.strip()
1966 if not endQuoteChar:
1967 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1968 raise SyntaxError()
1969
1970 self.quoteChar = quoteChar
1971 self.quoteCharLen = len(quoteChar)
1972 self.firstQuoteChar = quoteChar[0]
1973 self.endQuoteChar = endQuoteChar
1974 self.endQuoteCharLen = len(endQuoteChar)
1975 self.escChar = escChar
1976 self.escQuote = escQuote
1977 self.unquoteResults = unquoteResults
1978 self.convertWhitespaceEscapes = convertWhitespaceEscapes
1979
1980 if multiline:
1981 self.flags = re.MULTILINE | re.DOTALL
1982 self.pattern = r'%s(?:[^%s%s]' % \
1983 ( re.escape(self.quoteChar),
1984 _escapeRegexRangeChars(self.endQuoteChar[0]),
1985 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1986 else:
1987 self.flags = 0
1988 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1989 ( re.escape(self.quoteChar),
1990 _escapeRegexRangeChars(self.endQuoteChar[0]),
1991 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1992 if len(self.endQuoteChar) > 1:
1993 self.pattern += (
1994 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1995 _escapeRegexRangeChars(self.endQuoteChar[i]))
1996 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
1997 )
1998 if escQuote:
1999 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2000 if escChar:
2001 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2002 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2003 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2004
2005 try:
2006 self.re = re.compile(self.pattern, self.flags)
2007 self.reString = self.pattern
2008 except sre_constants.error:
2009 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2010 SyntaxWarning, stacklevel=2)
2011 raise
2012
2013 self.name = _ustr(self)
2014 self.errmsg = "Expected " + self.name
2015 self.mayIndexError = False
2016 self.mayReturnEmpty = True
2017
2018 - def parseImpl( self, instring, loc, doActions=True ):
2019 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2020 if not result:
2021 raise ParseException(instring, loc, self.errmsg, self)
2022
2023 loc = result.end()
2024 ret = result.group()
2025
2026 if self.unquoteResults:
2027
2028
2029 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2030
2031 if isinstance(ret,basestring):
2032
2033 if '\\' in ret and self.convertWhitespaceEscapes:
2034 ws_map = {
2035 r'\t' : '\t',
2036 r'\n' : '\n',
2037 r'\f' : '\f',
2038 r'\r' : '\r',
2039 }
2040 for wslit,wschar in ws_map.items():
2041 ret = ret.replace(wslit, wschar)
2042
2043
2044 if self.escChar:
2045 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2046
2047
2048 if self.escQuote:
2049 ret = ret.replace(self.escQuote, self.endQuoteChar)
2050
2051 return loc, ret
2052
2054 try:
2055 return super(QuotedString,self).__str__()
2056 except:
2057 pass
2058
2059 if self.strRepr is None:
2060 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2061
2062 return self.strRepr
2063
2066 """Token for matching words composed of characters *not* in a given set.
2067 Defined with string containing all disallowed characters, and an optional
2068 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2069 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2070 are 0, meaning no maximum or exact length restriction.
2071 """
2072 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2073 super(CharsNotIn,self).__init__()
2074 self.skipWhitespace = False
2075 self.notChars = notChars
2076
2077 if min < 1:
2078 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2079
2080 self.minLen = min
2081
2082 if max > 0:
2083 self.maxLen = max
2084 else:
2085 self.maxLen = _MAX_INT
2086
2087 if exact > 0:
2088 self.maxLen = exact
2089 self.minLen = exact
2090
2091 self.name = _ustr(self)
2092 self.errmsg = "Expected " + self.name
2093 self.mayReturnEmpty = ( self.minLen == 0 )
2094 self.mayIndexError = False
2095
2096 - def parseImpl( self, instring, loc, doActions=True ):
2097 if instring[loc] in self.notChars:
2098 raise ParseException(instring, loc, self.errmsg, self)
2099
2100 start = loc
2101 loc += 1
2102 notchars = self.notChars
2103 maxlen = min( start+self.maxLen, len(instring) )
2104 while loc < maxlen and \
2105 (instring[loc] not in notchars):
2106 loc += 1
2107
2108 if loc - start < self.minLen:
2109 raise ParseException(instring, loc, self.errmsg, self)
2110
2111 return loc, instring[start:loc]
2112
2114 try:
2115 return super(CharsNotIn, self).__str__()
2116 except:
2117 pass
2118
2119 if self.strRepr is None:
2120 if len(self.notChars) > 4:
2121 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2122 else:
2123 self.strRepr = "!W:(%s)" % self.notChars
2124
2125 return self.strRepr
2126
2128 """Special matching class for matching whitespace. Normally, whitespace is ignored
2129 by pyparsing grammars. This class is included when some whitespace structures
2130 are significant. Define with a string containing the whitespace characters to be
2131 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2132 as defined for the C{L{Word}} class."""
2133 whiteStrs = {
2134 " " : "<SPC>",
2135 "\t": "<TAB>",
2136 "\n": "<LF>",
2137 "\r": "<CR>",
2138 "\f": "<FF>",
2139 }
2140 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2141 super(White,self).__init__()
2142 self.matchWhite = ws
2143 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2144
2145 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2146 self.mayReturnEmpty = True
2147 self.errmsg = "Expected " + self.name
2148
2149 self.minLen = min
2150
2151 if max > 0:
2152 self.maxLen = max
2153 else:
2154 self.maxLen = _MAX_INT
2155
2156 if exact > 0:
2157 self.maxLen = exact
2158 self.minLen = exact
2159
2160 - def parseImpl( self, instring, loc, doActions=True ):
2161 if not(instring[ loc ] in self.matchWhite):
2162 raise ParseException(instring, loc, self.errmsg, self)
2163 start = loc
2164 loc += 1
2165 maxloc = start + self.maxLen
2166 maxloc = min( maxloc, len(instring) )
2167 while loc < maxloc and instring[loc] in self.matchWhite:
2168 loc += 1
2169
2170 if loc - start < self.minLen:
2171 raise ParseException(instring, loc, self.errmsg, self)
2172
2173 return loc, instring[start:loc]
2174
2178 super(_PositionToken,self).__init__()
2179 self.name=self.__class__.__name__
2180 self.mayReturnEmpty = True
2181 self.mayIndexError = False
2182
2184 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2188
2190 if col(loc,instring) != self.col:
2191 instrlen = len(instring)
2192 if self.ignoreExprs:
2193 loc = self._skipIgnorables( instring, loc )
2194 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2195 loc += 1
2196 return loc
2197
2198 - def parseImpl( self, instring, loc, doActions=True ):
2199 thiscol = col( loc, instring )
2200 if thiscol > self.col:
2201 raise ParseException( instring, loc, "Text not in expected column", self )
2202 newloc = loc + self.col - thiscol
2203 ret = instring[ loc: newloc ]
2204 return newloc, ret
2205
2207 """Matches if current position is at the beginning of a line within the parse string"""
2212
2214 preloc = super(LineStart,self).preParse(instring,loc)
2215 if instring[preloc] == "\n":
2216 loc += 1
2217 return loc
2218
2219 - def parseImpl( self, instring, loc, doActions=True ):
2220 if not( loc==0 or
2221 (loc == self.preParse( instring, 0 )) or
2222 (instring[loc-1] == "\n") ):
2223 raise ParseException(instring, loc, self.errmsg, self)
2224 return loc, []
2225
2227 """Matches if current position is at the end of a line within the parse string"""
2232
2233 - def parseImpl( self, instring, loc, doActions=True ):
2234 if loc<len(instring):
2235 if instring[loc] == "\n":
2236 return loc+1, "\n"
2237 else:
2238 raise ParseException(instring, loc, self.errmsg, self)
2239 elif loc == len(instring):
2240 return loc+1, []
2241 else:
2242 raise ParseException(instring, loc, self.errmsg, self)
2243
2245 """Matches if current position is at the beginning of the parse string"""
2249
2250 - def parseImpl( self, instring, loc, doActions=True ):
2251 if loc != 0:
2252
2253 if loc != self.preParse( instring, 0 ):
2254 raise ParseException(instring, loc, self.errmsg, self)
2255 return loc, []
2256
2258 """Matches if current position is at the end of the parse string"""
2262
2263 - def parseImpl( self, instring, loc, doActions=True ):
2264 if loc < len(instring):
2265 raise ParseException(instring, loc, self.errmsg, self)
2266 elif loc == len(instring):
2267 return loc+1, []
2268 elif loc > len(instring):
2269 return loc, []
2270 else:
2271 raise ParseException(instring, loc, self.errmsg, self)
2272
2274 """Matches if the current position is at the beginning of a Word, and
2275 is not preceded by any character in a given set of C{wordChars}
2276 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2277 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2278 the string being parsed, or at the beginning of a line.
2279 """
2281 super(WordStart,self).__init__()
2282 self.wordChars = set(wordChars)
2283 self.errmsg = "Not at the start of a word"
2284
2285 - def parseImpl(self, instring, loc, doActions=True ):
2286 if loc != 0:
2287 if (instring[loc-1] in self.wordChars or
2288 instring[loc] not in self.wordChars):
2289 raise ParseException(instring, loc, self.errmsg, self)
2290 return loc, []
2291
2293 """Matches if the current position is at the end of a Word, and
2294 is not followed by any character in a given set of C{wordChars}
2295 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2296 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2297 the string being parsed, or at the end of a line.
2298 """
2300 super(WordEnd,self).__init__()
2301 self.wordChars = set(wordChars)
2302 self.skipWhitespace = False
2303 self.errmsg = "Not at the end of a word"
2304
2305 - def parseImpl(self, instring, loc, doActions=True ):
2306 instrlen = len(instring)
2307 if instrlen>0 and loc<instrlen:
2308 if (instring[loc] in self.wordChars or
2309 instring[loc-1] not in self.wordChars):
2310 raise ParseException(instring, loc, self.errmsg, self)
2311 return loc, []
2312
2315 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2316 - def __init__( self, exprs, savelist = False ):
2317 super(ParseExpression,self).__init__(savelist)
2318 if isinstance( exprs, _generatorType ):
2319 exprs = list(exprs)
2320
2321 if isinstance( exprs, basestring ):
2322 self.exprs = [ Literal( exprs ) ]
2323 elif isinstance( exprs, collections.Sequence ):
2324
2325 if all(isinstance(expr, basestring) for expr in exprs):
2326 exprs = map(Literal, exprs)
2327 self.exprs = list(exprs)
2328 else:
2329 try:
2330 self.exprs = list( exprs )
2331 except TypeError:
2332 self.exprs = [ exprs ]
2333 self.callPreparse = False
2334
2336 return self.exprs[i]
2337
2339 self.exprs.append( other )
2340 self.strRepr = None
2341 return self
2342
2344 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2345 all contained expressions."""
2346 self.skipWhitespace = False
2347 self.exprs = [ e.copy() for e in self.exprs ]
2348 for e in self.exprs:
2349 e.leaveWhitespace()
2350 return self
2351
2353 if isinstance( other, Suppress ):
2354 if other not in self.ignoreExprs:
2355 super( ParseExpression, self).ignore( other )
2356 for e in self.exprs:
2357 e.ignore( self.ignoreExprs[-1] )
2358 else:
2359 super( ParseExpression, self).ignore( other )
2360 for e in self.exprs:
2361 e.ignore( self.ignoreExprs[-1] )
2362 return self
2363
2365 try:
2366 return super(ParseExpression,self).__str__()
2367 except:
2368 pass
2369
2370 if self.strRepr is None:
2371 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2372 return self.strRepr
2373
2375 super(ParseExpression,self).streamline()
2376
2377 for e in self.exprs:
2378 e.streamline()
2379
2380
2381
2382
2383 if ( len(self.exprs) == 2 ):
2384 other = self.exprs[0]
2385 if ( isinstance( other, self.__class__ ) and
2386 not(other.parseAction) and
2387 other.resultsName is None and
2388 not other.debug ):
2389 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2390 self.strRepr = None
2391 self.mayReturnEmpty |= other.mayReturnEmpty
2392 self.mayIndexError |= other.mayIndexError
2393
2394 other = self.exprs[-1]
2395 if ( isinstance( other, self.__class__ ) and
2396 not(other.parseAction) and
2397 other.resultsName is None and
2398 not other.debug ):
2399 self.exprs = self.exprs[:-1] + other.exprs[:]
2400 self.strRepr = None
2401 self.mayReturnEmpty |= other.mayReturnEmpty
2402 self.mayIndexError |= other.mayIndexError
2403
2404 self.errmsg = "Expected " + _ustr(self)
2405
2406 return self
2407
2411
2412 - def validate( self, validateTrace=[] ):
2413 tmp = validateTrace[:]+[self]
2414 for e in self.exprs:
2415 e.validate(tmp)
2416 self.checkRecursion( [] )
2417
2422
2423 -class And(ParseExpression):
2424 """Requires all given C{ParseExpression}s to be found in the given order.
2425 Expressions may be separated by whitespace.
2426 May be constructed using the C{'+'} operator.
2427 """
2428
2434
2435 - def __init__( self, exprs, savelist = True ):
2436 super(And,self).__init__(exprs, savelist)
2437 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2438 self.setWhitespaceChars( self.exprs[0].whiteChars )
2439 self.skipWhitespace = self.exprs[0].skipWhitespace
2440 self.callPreparse = True
2441
2442 - def parseImpl( self, instring, loc, doActions=True ):
2443
2444
2445 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2446 errorStop = False
2447 for e in self.exprs[1:]:
2448 if isinstance(e, And._ErrorStop):
2449 errorStop = True
2450 continue
2451 if errorStop:
2452 try:
2453 loc, exprtokens = e._parse( instring, loc, doActions )
2454 except ParseSyntaxException:
2455 raise
2456 except ParseBaseException as pe:
2457 pe.__traceback__ = None
2458 raise ParseSyntaxException(pe)
2459 except IndexError:
2460 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2461 else:
2462 loc, exprtokens = e._parse( instring, loc, doActions )
2463 if exprtokens or exprtokens.haskeys():
2464 resultlist += exprtokens
2465 return loc, resultlist
2466
2468 if isinstance( other, basestring ):
2469 other = Literal( other )
2470 return self.append( other )
2471
2473 subRecCheckList = parseElementList[:] + [ self ]
2474 for e in self.exprs:
2475 e.checkRecursion( subRecCheckList )
2476 if not e.mayReturnEmpty:
2477 break
2478
2480 if hasattr(self,"name"):
2481 return self.name
2482
2483 if self.strRepr is None:
2484 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2485
2486 return self.strRepr
2487
2488
2489 -class Or(ParseExpression):
2490 """Requires that at least one C{ParseExpression} is found.
2491 If two expressions match, the expression that matches the longest string will be used.
2492 May be constructed using the C{'^'} operator.
2493 """
2494 - def __init__( self, exprs, savelist = False ):
2495 super(Or,self).__init__(exprs, savelist)
2496 if self.exprs:
2497 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2498 else:
2499 self.mayReturnEmpty = True
2500
2501 - def parseImpl( self, instring, loc, doActions=True ):
2502 maxExcLoc = -1
2503 maxException = None
2504 matches = []
2505 for e in self.exprs:
2506 try:
2507 loc2 = e.tryParse( instring, loc )
2508 except ParseException as err:
2509 err.__traceback__ = None
2510 if err.loc > maxExcLoc:
2511 maxException = err
2512 maxExcLoc = err.loc
2513 except IndexError:
2514 if len(instring) > maxExcLoc:
2515 maxException = ParseException(instring,len(instring),e.errmsg,self)
2516 maxExcLoc = len(instring)
2517 else:
2518
2519 matches.append((loc2, e))
2520
2521 if matches:
2522 matches.sort(key=lambda x: -x[0])
2523 for _,e in matches:
2524 try:
2525 return e._parse( instring, loc, doActions )
2526 except ParseException as err:
2527 err.__traceback__ = None
2528 if err.loc > maxExcLoc:
2529 maxException = err
2530 maxExcLoc = err.loc
2531
2532 if maxException is not None:
2533 maxException.msg = self.errmsg
2534 raise maxException
2535 else:
2536 raise ParseException(instring, loc, "no defined alternatives to match", self)
2537
2538
2540 if isinstance( other, basestring ):
2541 other = ParserElement.literalStringClass( other )
2542 return self.append( other )
2543
2545 if hasattr(self,"name"):
2546 return self.name
2547
2548 if self.strRepr is None:
2549 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2550
2551 return self.strRepr
2552
2554 subRecCheckList = parseElementList[:] + [ self ]
2555 for e in self.exprs:
2556 e.checkRecursion( subRecCheckList )
2557
2560 """Requires that at least one C{ParseExpression} is found.
2561 If two expressions match, the first one listed is the one that will match.
2562 May be constructed using the C{'|'} operator.
2563 """
2564 - def __init__( self, exprs, savelist = False ):
2565 super(MatchFirst,self).__init__(exprs, savelist)
2566 if self.exprs:
2567 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2568 else:
2569 self.mayReturnEmpty = True
2570
2571 - def parseImpl( self, instring, loc, doActions=True ):
2572 maxExcLoc = -1
2573 maxException = None
2574 for e in self.exprs:
2575 try:
2576 ret = e._parse( instring, loc, doActions )
2577 return ret
2578 except ParseException as err:
2579 if err.loc > maxExcLoc:
2580 maxException = err
2581 maxExcLoc = err.loc
2582 except IndexError:
2583 if len(instring) > maxExcLoc:
2584 maxException = ParseException(instring,len(instring),e.errmsg,self)
2585 maxExcLoc = len(instring)
2586
2587
2588 else:
2589 if maxException is not None:
2590 maxException.msg = self.errmsg
2591 raise maxException
2592 else:
2593 raise ParseException(instring, loc, "no defined alternatives to match", self)
2594
2596 if isinstance( other, basestring ):
2597 other = ParserElement.literalStringClass( other )
2598 return self.append( other )
2599
2601 if hasattr(self,"name"):
2602 return self.name
2603
2604 if self.strRepr is None:
2605 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2606
2607 return self.strRepr
2608
2610 subRecCheckList = parseElementList[:] + [ self ]
2611 for e in self.exprs:
2612 e.checkRecursion( subRecCheckList )
2613
2614
2615 -class Each(ParseExpression):
2616 """Requires all given C{ParseExpression}s to be found, but in any order.
2617 Expressions may be separated by whitespace.
2618 May be constructed using the C{'&'} operator.
2619 """
2620 - def __init__( self, exprs, savelist = True ):
2621 super(Each,self).__init__(exprs, savelist)
2622 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2623 self.skipWhitespace = True
2624 self.initExprGroups = True
2625
2626 - def parseImpl( self, instring, loc, doActions=True ):
2627 if self.initExprGroups:
2628 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
2629 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2630 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
2631 self.optionals = opt1 + opt2
2632 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2633 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2634 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2635 self.required += self.multirequired
2636 self.initExprGroups = False
2637 tmpLoc = loc
2638 tmpReqd = self.required[:]
2639 tmpOpt = self.optionals[:]
2640 matchOrder = []
2641
2642 keepMatching = True
2643 while keepMatching:
2644 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2645 failed = []
2646 for e in tmpExprs:
2647 try:
2648 tmpLoc = e.tryParse( instring, tmpLoc )
2649 except ParseException:
2650 failed.append(e)
2651 else:
2652 matchOrder.append(self.opt1map.get(id(e),e))
2653 if e in tmpReqd:
2654 tmpReqd.remove(e)
2655 elif e in tmpOpt:
2656 tmpOpt.remove(e)
2657 if len(failed) == len(tmpExprs):
2658 keepMatching = False
2659
2660 if tmpReqd:
2661 missing = ", ".join(_ustr(e) for e in tmpReqd)
2662 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2663
2664
2665 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2666
2667 resultlist = []
2668 for e in matchOrder:
2669 loc,results = e._parse(instring,loc,doActions)
2670 resultlist.append(results)
2671
2672 finalResults = ParseResults()
2673 for r in resultlist:
2674 dups = {}
2675 for k in r.keys():
2676 if k in finalResults:
2677 tmp = ParseResults(finalResults[k])
2678 tmp += ParseResults(r[k])
2679 dups[k] = tmp
2680 finalResults += ParseResults(r)
2681 for k,v in dups.items():
2682 finalResults[k] = v
2683 return loc, finalResults
2684
2686 if hasattr(self,"name"):
2687 return self.name
2688
2689 if self.strRepr is None:
2690 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2691
2692 return self.strRepr
2693
2695 subRecCheckList = parseElementList[:] + [ self ]
2696 for e in self.exprs:
2697 e.checkRecursion( subRecCheckList )
2698
2701 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2702 - def __init__( self, expr, savelist=False ):
2703 super(ParseElementEnhance,self).__init__(savelist)
2704 if isinstance( expr, basestring ):
2705 expr = Literal(expr)
2706 self.expr = expr
2707 self.strRepr = None
2708 if expr is not None:
2709 self.mayIndexError = expr.mayIndexError
2710 self.mayReturnEmpty = expr.mayReturnEmpty
2711 self.setWhitespaceChars( expr.whiteChars )
2712 self.skipWhitespace = expr.skipWhitespace
2713 self.saveAsList = expr.saveAsList
2714 self.callPreparse = expr.callPreparse
2715 self.ignoreExprs.extend(expr.ignoreExprs)
2716
2717 - def parseImpl( self, instring, loc, doActions=True ):
2718 if self.expr is not None:
2719 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2720 else:
2721 raise ParseException("",loc,self.errmsg,self)
2722
2724 self.skipWhitespace = False
2725 self.expr = self.expr.copy()
2726 if self.expr is not None:
2727 self.expr.leaveWhitespace()
2728 return self
2729
2731 if isinstance( other, Suppress ):
2732 if other not in self.ignoreExprs:
2733 super( ParseElementEnhance, self).ignore( other )
2734 if self.expr is not None:
2735 self.expr.ignore( self.ignoreExprs[-1] )
2736 else:
2737 super( ParseElementEnhance, self).ignore( other )
2738 if self.expr is not None:
2739 self.expr.ignore( self.ignoreExprs[-1] )
2740 return self
2741
2747
2749 if self in parseElementList:
2750 raise RecursiveGrammarException( parseElementList+[self] )
2751 subRecCheckList = parseElementList[:] + [ self ]
2752 if self.expr is not None:
2753 self.expr.checkRecursion( subRecCheckList )
2754
2755 - def validate( self, validateTrace=[] ):
2756 tmp = validateTrace[:]+[self]
2757 if self.expr is not None:
2758 self.expr.validate(tmp)
2759 self.checkRecursion( [] )
2760
2762 try:
2763 return super(ParseElementEnhance,self).__str__()
2764 except:
2765 pass
2766
2767 if self.strRepr is None and self.expr is not None:
2768 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2769 return self.strRepr
2770
2773 """Lookahead matching of the given parse expression. C{FollowedBy}
2774 does *not* advance the parsing position within the input string, it only
2775 verifies that the specified parse expression matches at the current
2776 position. C{FollowedBy} always returns a null token list."""
2780
2781 - def parseImpl( self, instring, loc, doActions=True ):
2782 self.expr.tryParse( instring, loc )
2783 return loc, []
2784
2785
2786 -class NotAny(ParseElementEnhance):
2787 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2788 does *not* advance the parsing position within the input string, it only
2789 verifies that the specified parse expression does *not* match at the current
2790 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2791 always returns a null token list. May be constructed using the '~' operator."""
2793 super(NotAny,self).__init__(expr)
2794
2795 self.skipWhitespace = False
2796 self.mayReturnEmpty = True
2797 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2798
2799 - def parseImpl( self, instring, loc, doActions=True ):
2803
2805 if hasattr(self,"name"):
2806 return self.name
2807
2808 if self.strRepr is None:
2809 self.strRepr = "~{" + _ustr(self.expr) + "}"
2810
2811 return self.strRepr
2812
2815 """Repetition of one or more of the given expression.
2816
2817 Parameters:
2818 - expr - expression that must match one or more times
2819 - stopOn - (default=None) - expression for a terminating sentinel
2820 (only required if the sentinel would ordinarily match the repetition
2821 expression)
2822 """
2823 - def __init__( self, expr, stopOn=None):
2824 super(OneOrMore, self).__init__(expr)
2825 ender = stopOn
2826 if isinstance(ender, basestring):
2827 ender = Literal(ender)
2828 self.not_ender = ~ender if ender is not None else None
2829
2830 - def parseImpl( self, instring, loc, doActions=True ):
2831 self_expr_parse = self.expr._parse
2832 self_skip_ignorables = self._skipIgnorables
2833 check_ender = self.not_ender is not None
2834 if check_ender:
2835 try_not_ender = self.not_ender.tryParse
2836
2837
2838
2839 if check_ender:
2840 try_not_ender(instring, loc)
2841 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
2842 try:
2843 hasIgnoreExprs = (not not self.ignoreExprs)
2844 while 1:
2845 if check_ender:
2846 try_not_ender(instring, loc)
2847 if hasIgnoreExprs:
2848 preloc = self_skip_ignorables( instring, loc )
2849 else:
2850 preloc = loc
2851 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
2852 if tmptokens or tmptokens.haskeys():
2853 tokens += tmptokens
2854 except (ParseException,IndexError):
2855 pass
2856
2857 return loc, tokens
2858
2860 if hasattr(self,"name"):
2861 return self.name
2862
2863 if self.strRepr is None:
2864 self.strRepr = "{" + _ustr(self.expr) + "}..."
2865
2866 return self.strRepr
2867
2872
2874 """Optional repetition of zero or more of the given expression.
2875
2876 Parameters:
2877 - expr - expression that must match zero or more times
2878 - stopOn - (default=None) - expression for a terminating sentinel
2879 (only required if the sentinel would ordinarily match the repetition
2880 expression)
2881 """
2882 - def __init__( self, expr, stopOn=None):
2885
2886 - def parseImpl( self, instring, loc, doActions=True ):
2891
2893 if hasattr(self,"name"):
2894 return self.name
2895
2896 if self.strRepr is None:
2897 self.strRepr = "[" + _ustr(self.expr) + "]..."
2898
2899 return self.strRepr
2900
2907
2908 _optionalNotMatched = _NullToken()
2910 """Optional matching of the given expression.
2911
2912 Parameters:
2913 - expr - expression that must match zero or more times
2914 - default (optional) - value to be returned if the optional expression
2915 is not found.
2916 """
2918 super(Optional,self).__init__( expr, savelist=False )
2919 self.defaultValue = default
2920 self.mayReturnEmpty = True
2921
2922 - def parseImpl( self, instring, loc, doActions=True ):
2923 try:
2924 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2925 except (ParseException,IndexError):
2926 if self.defaultValue is not _optionalNotMatched:
2927 if self.expr.resultsName:
2928 tokens = ParseResults([ self.defaultValue ])
2929 tokens[self.expr.resultsName] = self.defaultValue
2930 else:
2931 tokens = [ self.defaultValue ]
2932 else:
2933 tokens = []
2934 return loc, tokens
2935
2937 if hasattr(self,"name"):
2938 return self.name
2939
2940 if self.strRepr is None:
2941 self.strRepr = "[" + _ustr(self.expr) + "]"
2942
2943 return self.strRepr
2944
2945 -class SkipTo(ParseElementEnhance):
2946 """Token for skipping over all undefined text until the matched expression is found.
2947
2948 Parameters:
2949 - expr - target expression marking the end of the data to be skipped
2950 - include - (default=False) if True, the target expression is also parsed
2951 (the skipped text and target expression are returned as a 2-element list).
2952 - ignore - (default=None) used to define grammars (typically quoted strings and
2953 comments) that might contain false matches to the target expression
2954 - failOn - (default=None) define expressions that are not allowed to be
2955 included in the skipped test; if found before the target expression is found,
2956 the SkipTo is not a match
2957 """
2958 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2959 super( SkipTo, self ).__init__( other )
2960 self.ignoreExpr = ignore
2961 self.mayReturnEmpty = True
2962 self.mayIndexError = False
2963 self.includeMatch = include
2964 self.asList = False
2965 if isinstance(failOn, basestring):
2966 self.failOn = Literal(failOn)
2967 else:
2968 self.failOn = failOn
2969 self.errmsg = "No match found for "+_ustr(self.expr)
2970
2971 - def parseImpl( self, instring, loc, doActions=True ):
2972 startloc = loc
2973 instrlen = len(instring)
2974 expr = self.expr
2975 expr_parse = self.expr._parse
2976 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
2977 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
2978
2979 tmploc = loc
2980 while tmploc <= instrlen:
2981 if self_failOn_canParseNext is not None:
2982
2983 if self_failOn_canParseNext(instring, tmploc):
2984 break
2985
2986 if self_ignoreExpr_tryParse is not None:
2987
2988 while 1:
2989 try:
2990 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
2991 except ParseBaseException:
2992 break
2993
2994 try:
2995 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
2996 except (ParseException, IndexError):
2997
2998 tmploc += 1
2999 else:
3000
3001 break
3002
3003 else:
3004
3005 raise ParseException(instring, loc, self.errmsg, self)
3006
3007
3008 loc = tmploc
3009 skiptext = instring[startloc:loc]
3010 skipresult = ParseResults(skiptext)
3011
3012 if self.includeMatch:
3013 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3014 skipresult += mat
3015
3016 return loc, skipresult
3017
3018 -class Forward(ParseElementEnhance):
3019 """Forward declaration of an expression to be defined later -
3020 used for recursive grammars, such as algebraic infix notation.
3021 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3022
3023 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3024 Specifically, '|' has a lower precedence than '<<', so that::
3025 fwdExpr << a | b | c
3026 will actually be evaluated as::
3027 (fwdExpr << a) | b | c
3028 thereby leaving b and c out as parseable alternatives. It is recommended that you
3029 explicitly group the values inserted into the C{Forward}::
3030 fwdExpr << (a | b | c)
3031 Converting to use the '<<=' operator instead will avoid this problem.
3032 """
3035
3037 if isinstance( other, basestring ):
3038 other = ParserElement.literalStringClass(other)
3039 self.expr = other
3040 self.strRepr = None
3041 self.mayIndexError = self.expr.mayIndexError
3042 self.mayReturnEmpty = self.expr.mayReturnEmpty
3043 self.setWhitespaceChars( self.expr.whiteChars )
3044 self.skipWhitespace = self.expr.skipWhitespace
3045 self.saveAsList = self.expr.saveAsList
3046 self.ignoreExprs.extend(self.expr.ignoreExprs)
3047 return self
3048
3050 return self << other
3051
3053 self.skipWhitespace = False
3054 return self
3055
3057 if not self.streamlined:
3058 self.streamlined = True
3059 if self.expr is not None:
3060 self.expr.streamline()
3061 return self
3062
3063 - def validate( self, validateTrace=[] ):
3064 if self not in validateTrace:
3065 tmp = validateTrace[:]+[self]
3066 if self.expr is not None:
3067 self.expr.validate(tmp)
3068 self.checkRecursion([])
3069
3071 if hasattr(self,"name"):
3072 return self.name
3073 return self.__class__.__name__ + ": ..."
3074
3075
3076 self._revertClass = self.__class__
3077 self.__class__ = _ForwardNoRecurse
3078 try:
3079 if self.expr is not None:
3080 retString = _ustr(self.expr)
3081 else:
3082 retString = "None"
3083 finally:
3084 self.__class__ = self._revertClass
3085 return self.__class__.__name__ + ": " + retString
3086
3088 if self.expr is not None:
3089 return super(Forward,self).copy()
3090 else:
3091 ret = Forward()
3092 ret <<= self
3093 return ret
3094
3098
3100 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3101 - def __init__( self, expr, savelist=False ):
3104
3106 """Converter to concatenate all matching tokens to a single string.
3107 By default, the matching patterns must also be contiguous in the input string;
3108 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3109 """
3110 - def __init__( self, expr, joinString="", adjacent=True ):
3111 super(Combine,self).__init__( expr )
3112
3113 if adjacent:
3114 self.leaveWhitespace()
3115 self.adjacent = adjacent
3116 self.skipWhitespace = True
3117 self.joinString = joinString
3118 self.callPreparse = True
3119
3126
3127 - def postParse( self, instring, loc, tokenlist ):
3128 retToks = tokenlist.copy()
3129 del retToks[:]
3130 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3131
3132 if self.resultsName and retToks.haskeys():
3133 return [ retToks ]
3134 else:
3135 return retToks
3136
3137 -class Group(TokenConverter):
3138 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3140 super(Group,self).__init__( expr )
3141 self.saveAsList = True
3142
3143 - def postParse( self, instring, loc, tokenlist ):
3144 return [ tokenlist ]
3145
3146 -class Dict(TokenConverter):
3147 """Converter to return a repetitive expression as a list, but also as a dictionary.
3148 Each element can also be referenced using the first token in the expression as its key.
3149 Useful for tabular report scraping when the first column can be used as a item key.
3150 """
3152 super(Dict,self).__init__( expr )
3153 self.saveAsList = True
3154
3155 - def postParse( self, instring, loc, tokenlist ):
3156 for i,tok in enumerate(tokenlist):
3157 if len(tok) == 0:
3158 continue
3159 ikey = tok[0]
3160 if isinstance(ikey,int):
3161 ikey = _ustr(tok[0]).strip()
3162 if len(tok)==1:
3163 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3164 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3165 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3166 else:
3167 dictvalue = tok.copy()
3168 del dictvalue[0]
3169 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3170 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3171 else:
3172 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3173
3174 if self.resultsName:
3175 return [ tokenlist ]
3176 else:
3177 return tokenlist
3178
3181 """Converter for ignoring the results of a parsed expression."""
3182 - def postParse( self, instring, loc, tokenlist ):
3184
3187
3190 """Wrapper for parse actions, to ensure they are only called once."""
3192 self.callable = _trim_arity(methodCall)
3193 self.called = False
3195 if not self.called:
3196 results = self.callable(s,l,t)
3197 self.called = True
3198 return results
3199 raise ParseException(s,l,"")
3202
3204 """Decorator for debugging parse actions."""
3205 f = _trim_arity(f)
3206 def z(*paArgs):
3207 thisFunc = f.func_name
3208 s,l,t = paArgs[-3:]
3209 if len(paArgs)>3:
3210 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3211 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3212 try:
3213 ret = f(*paArgs)
3214 except Exception as exc:
3215 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3216 raise
3217 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3218 return ret
3219 try:
3220 z.__name__ = f.__name__
3221 except AttributeError:
3222 pass
3223 return z
3224
3225
3226
3227
3228 -def delimitedList( expr, delim=",", combine=False ):
3229 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3230 By default, the list elements and delimiters can have intervening whitespace, and
3231 comments, but this can be overridden by passing C{combine=True} in the constructor.
3232 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3233 string, with the delimiters included; otherwise, the matching tokens are returned
3234 as a list of tokens, with the delimiters suppressed.
3235 """
3236 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3237 if combine:
3238 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3239 else:
3240 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3241
3243 """Helper to define a counted list of expressions.
3244 This helper defines a pattern of the form::
3245 integer expr expr expr...
3246 where the leading integer tells how many expr expressions follow.
3247 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3248 """
3249 arrayExpr = Forward()
3250 def countFieldParseAction(s,l,t):
3251 n = t[0]
3252 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3253 return []
3254 if intExpr is None:
3255 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3256 else:
3257 intExpr = intExpr.copy()
3258 intExpr.setName("arrayLen")
3259 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3260 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
3261
3263 ret = []
3264 for i in L:
3265 if isinstance(i,list):
3266 ret.extend(_flatten(i))
3267 else:
3268 ret.append(i)
3269 return ret
3270
3272 """Helper to define an expression that is indirectly defined from
3273 the tokens matched in a previous expression, that is, it looks
3274 for a 'repeat' of a previous expression. For example::
3275 first = Word(nums)
3276 second = matchPreviousLiteral(first)
3277 matchExpr = first + ":" + second
3278 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3279 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3280 If this is not desired, use C{matchPreviousExpr}.
3281 Do *not* use with packrat parsing enabled.
3282 """
3283 rep = Forward()
3284 def copyTokenToRepeater(s,l,t):
3285 if t:
3286 if len(t) == 1:
3287 rep << t[0]
3288 else:
3289
3290 tflat = _flatten(t.asList())
3291 rep << And(Literal(tt) for tt in tflat)
3292 else:
3293 rep << Empty()
3294 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3295 rep.setName('(prev) ' + _ustr(expr))
3296 return rep
3297
3299 """Helper to define an expression that is indirectly defined from
3300 the tokens matched in a previous expression, that is, it looks
3301 for a 'repeat' of a previous expression. For example::
3302 first = Word(nums)
3303 second = matchPreviousExpr(first)
3304 matchExpr = first + ":" + second
3305 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3306 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3307 the expressions are evaluated first, and then compared, so
3308 C{"1"} is compared with C{"10"}.
3309 Do *not* use with packrat parsing enabled.
3310 """
3311 rep = Forward()
3312 e2 = expr.copy()
3313 rep <<= e2
3314 def copyTokenToRepeater(s,l,t):
3315 matchTokens = _flatten(t.asList())
3316 def mustMatchTheseTokens(s,l,t):
3317 theseTokens = _flatten(t.asList())
3318 if theseTokens != matchTokens:
3319 raise ParseException("",0,"")
3320 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3321 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3322 rep.setName('(prev) ' + _ustr(expr))
3323 return rep
3324
3326
3327 for c in r"\^-]":
3328 s = s.replace(c,_bslash+c)
3329 s = s.replace("\n",r"\n")
3330 s = s.replace("\t",r"\t")
3331 return _ustr(s)
3332
3333 -def oneOf( strs, caseless=False, useRegex=True ):
3334 """Helper to quickly define a set of alternative Literals, and makes sure to do
3335 longest-first testing when there is a conflict, regardless of the input order,
3336 but returns a C{L{MatchFirst}} for best performance.
3337
3338 Parameters:
3339 - strs - a string of space-delimited literals, or a list of string literals
3340 - caseless - (default=False) - treat all literals as caseless
3341 - useRegex - (default=True) - as an optimization, will generate a Regex
3342 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3343 if creating a C{Regex} raises an exception)
3344 """
3345 if caseless:
3346 isequal = ( lambda a,b: a.upper() == b.upper() )
3347 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3348 parseElementClass = CaselessLiteral
3349 else:
3350 isequal = ( lambda a,b: a == b )
3351 masks = ( lambda a,b: b.startswith(a) )
3352 parseElementClass = Literal
3353
3354 symbols = []
3355 if isinstance(strs,basestring):
3356 symbols = strs.split()
3357 elif isinstance(strs, collections.Sequence):
3358 symbols = list(strs[:])
3359 elif isinstance(strs, _generatorType):
3360 symbols = list(strs)
3361 else:
3362 warnings.warn("Invalid argument to oneOf, expected string or list",
3363 SyntaxWarning, stacklevel=2)
3364 if not symbols:
3365 return NoMatch()
3366
3367 i = 0
3368 while i < len(symbols)-1:
3369 cur = symbols[i]
3370 for j,other in enumerate(symbols[i+1:]):
3371 if ( isequal(other, cur) ):
3372 del symbols[i+j+1]
3373 break
3374 elif ( masks(cur, other) ):
3375 del symbols[i+j+1]
3376 symbols.insert(i,other)
3377 cur = other
3378 break
3379 else:
3380 i += 1
3381
3382 if not caseless and useRegex:
3383
3384 try:
3385 if len(symbols)==len("".join(symbols)):
3386 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
3387 else:
3388 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
3389 except:
3390 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3391 SyntaxWarning, stacklevel=2)
3392
3393
3394
3395 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3396
3398 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3399 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3400 in the proper order. The key pattern can include delimiting markers or punctuation,
3401 as long as they are suppressed, thereby leaving the significant key text. The value
3402 pattern can include named results, so that the C{Dict} results can include named token
3403 fields.
3404 """
3405 return Dict( ZeroOrMore( Group ( key + value ) ) )
3406
3407 -def originalTextFor(expr, asString=True):
3408 """Helper to return the original, untokenized text for a given expression. Useful to
3409 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3410 revert separate tokens with intervening whitespace back to the original matching
3411 input text. By default, returns astring containing the original parsed text.
3412
3413 If the optional C{asString} argument is passed as C{False}, then the return value is a
3414 C{L{ParseResults}} containing any results names that were originally matched, and a
3415 single token containing the original matched text from the input string. So if
3416 the expression passed to C{L{originalTextFor}} contains expressions with defined
3417 results names, you must set C{asString} to C{False} if you want to preserve those
3418 results name values."""
3419 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3420 endlocMarker = locMarker.copy()
3421 endlocMarker.callPreparse = False
3422 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3423 if asString:
3424 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3425 else:
3426 def extractText(s,l,t):
3427 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3428 matchExpr.setParseAction(extractText)
3429 return matchExpr
3430
3432 """Helper to undo pyparsing's default grouping of And expressions, even
3433 if all but one are non-empty."""
3434 return TokenConverter(expr).setParseAction(lambda t:t[0])
3435
3437 """Helper to decorate a returned token with its starting and ending locations in the input string.
3438 This helper adds the following results names:
3439 - locn_start = location where matched expression begins
3440 - locn_end = location where matched expression ends
3441 - value = the actual parsed results
3442
3443 Be careful if the input text contains C{<TAB>} characters, you may want to call
3444 C{L{ParserElement.parseWithTabs}}
3445 """
3446 locator = Empty().setParseAction(lambda s,l,t: l)
3447 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3448
3449
3450
3451 empty = Empty().setName("empty")
3452 lineStart = LineStart().setName("lineStart")
3453 lineEnd = LineEnd().setName("lineEnd")
3454 stringStart = StringStart().setName("stringStart")
3455 stringEnd = StringEnd().setName("stringEnd")
3456
3457 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3458 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3459 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3460 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
3461 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3462 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3465 r"""Helper to easily define string ranges for use in Word construction. Borrows
3466 syntax from regexp '[]' string range definitions::
3467 srange("[0-9]") -> "0123456789"
3468 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3469 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3470 The input string must be enclosed in []'s, and the returned string is the expanded
3471 character set joined into a single string.
3472 The values enclosed in the []'s may be::
3473 a single character
3474 an escaped character with a leading backslash (such as \- or \])
3475 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3476 (\0x## is also supported for backwards compatibility)
3477 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3478 a range of any of the above, separated by a dash ('a-z', etc.)
3479 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3480 """
3481 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3482 try:
3483 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3484 except:
3485 return ""
3486
3488 """Helper method for defining parse actions that require matching at a specific
3489 column in the input text.
3490 """
3491 def verifyCol(strg,locn,toks):
3492 if col(locn,strg) != n:
3493 raise ParseException(strg,locn,"matched token not at column %d" % n)
3494 return verifyCol
3495
3497 """Helper method for common parse actions that simply return a literal value. Especially
3498 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3499 """
3500 return lambda s,l,t: [replStr]
3501
3503 """Helper parse action for removing quotation marks from parsed quoted strings.
3504 To use, add this parse action to quoted string using::
3505 quotedString.setParseAction( removeQuotes )
3506 """
3507 return t[0][1:-1]
3508
3510 """Helper parse action to convert tokens to upper case."""
3511 return [ tt.upper() for tt in map(_ustr,t) ]
3512
3514 """Helper parse action to convert tokens to lower case."""
3515 return [ tt.lower() for tt in map(_ustr,t) ]
3516
3545
3549
3553
3555 """Helper to create a validating parse action to be used with start tags created
3556 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3557 with a required attribute value, to avoid false matches on common tags such as
3558 C{<TD>} or C{<DIV>}.
3559
3560 Call C{withAttribute} with a series of attribute names and values. Specify the list
3561 of filter attributes names and values as:
3562 - keyword arguments, as in C{(align="right")}, or
3563 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3564 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3565 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3566 For attribute names with a namespace prefix, you must use the second form. Attribute
3567 names are matched insensitive to upper/lower case.
3568
3569 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
3570
3571 To verify that the attribute exists, but without specifying a value, pass
3572 C{withAttribute.ANY_VALUE} as the value.
3573 """
3574 if args:
3575 attrs = args[:]
3576 else:
3577 attrs = attrDict.items()
3578 attrs = [(k,v) for k,v in attrs]
3579 def pa(s,l,tokens):
3580 for attrName,attrValue in attrs:
3581 if attrName not in tokens:
3582 raise ParseException(s,l,"no matching attribute " + attrName)
3583 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3584 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3585 (attrName, tokens[attrName], attrValue))
3586 return pa
3587 withAttribute.ANY_VALUE = object()
3588
3589 -def withClass(classname, namespace=''):
3590 """Simplified version of C{L{withAttribute}} when matching on a div class - made
3591 difficult because C{class} is a reserved word in Python.
3592 """
3593 classattr = "%s:class" % namespace if namespace else "class"
3594 return withAttribute(**{classattr : classname})
3595
3596 opAssoc = _Constants()
3597 opAssoc.LEFT = object()
3598 opAssoc.RIGHT = object()
3601 """Helper method for constructing grammars of expressions made up of
3602 operators working in a precedence hierarchy. Operators may be unary or
3603 binary, left- or right-associative. Parse actions can also be attached
3604 to operator expressions.
3605
3606 Parameters:
3607 - baseExpr - expression representing the most basic element for the nested
3608 - opList - list of tuples, one for each operator precedence level in the
3609 expression grammar; each tuple is of the form
3610 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3611 - opExpr is the pyparsing expression for the operator;
3612 may also be a string, which will be converted to a Literal;
3613 if numTerms is 3, opExpr is a tuple of two expressions, for the
3614 two operators separating the 3 terms
3615 - numTerms is the number of terms for this operator (must
3616 be 1, 2, or 3)
3617 - rightLeftAssoc is the indicator whether the operator is
3618 right or left associative, using the pyparsing-defined
3619 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3620 - parseAction is the parse action to be associated with
3621 expressions matching this operator expression (the
3622 parse action tuple member may be omitted)
3623 - lpar - expression for matching left-parentheses (default=Suppress('('))
3624 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3625 """
3626 ret = Forward()
3627 lastExpr = baseExpr | ( lpar + ret + rpar )
3628 for i,operDef in enumerate(opList):
3629 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3630 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
3631 if arity == 3:
3632 if opExpr is None or len(opExpr) != 2:
3633 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3634 opExpr1, opExpr2 = opExpr
3635 thisExpr = Forward().setName(termName)
3636 if rightLeftAssoc == opAssoc.LEFT:
3637 if arity == 1:
3638 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3639 elif arity == 2:
3640 if opExpr is not None:
3641 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3642 else:
3643 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3644 elif arity == 3:
3645 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3646 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3647 else:
3648 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3649 elif rightLeftAssoc == opAssoc.RIGHT:
3650 if arity == 1:
3651
3652 if not isinstance(opExpr, Optional):
3653 opExpr = Optional(opExpr)
3654 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3655 elif arity == 2:
3656 if opExpr is not None:
3657 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3658 else:
3659 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3660 elif arity == 3:
3661 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3662 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3663 else:
3664 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3665 else:
3666 raise ValueError("operator must indicate right or left associativity")
3667 if pa:
3668 matchExpr.setParseAction( pa )
3669 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
3670 lastExpr = thisExpr
3671 ret <<= lastExpr
3672 return ret
3673 operatorPrecedence = infixNotation
3674
3675 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3676 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3677 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3678 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3681 """Helper method for defining nested lists enclosed in opening and closing
3682 delimiters ("(" and ")" are the default).
3683
3684 Parameters:
3685 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3686 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3687 - content - expression for items within the nested lists (default=None)
3688 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3689
3690 If an expression is not provided for the content argument, the nested
3691 expression will capture all whitespace-delimited content between delimiters
3692 as a list of separate values.
3693
3694 Use the C{ignoreExpr} argument to define expressions that may contain
3695 opening or closing characters that should not be treated as opening
3696 or closing characters for nesting, such as quotedString or a comment
3697 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3698 The default is L{quotedString}, but if no expressions are to be ignored,
3699 then pass C{None} for this argument.
3700 """
3701 if opener == closer:
3702 raise ValueError("opening and closing strings cannot be the same")
3703 if content is None:
3704 if isinstance(opener,basestring) and isinstance(closer,basestring):
3705 if len(opener) == 1 and len(closer)==1:
3706 if ignoreExpr is not None:
3707 content = (Combine(OneOrMore(~ignoreExpr +
3708 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3709 ).setParseAction(lambda t:t[0].strip()))
3710 else:
3711 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3712 ).setParseAction(lambda t:t[0].strip()))
3713 else:
3714 if ignoreExpr is not None:
3715 content = (Combine(OneOrMore(~ignoreExpr +
3716 ~Literal(opener) + ~Literal(closer) +
3717 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3718 ).setParseAction(lambda t:t[0].strip()))
3719 else:
3720 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3721 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3722 ).setParseAction(lambda t:t[0].strip()))
3723 else:
3724 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3725 ret = Forward()
3726 if ignoreExpr is not None:
3727 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3728 else:
3729 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3730 ret.setName('nested %s%s expression' % (opener,closer))
3731 return ret
3732
3733 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3734 """Helper method for defining space-delimited indentation blocks, such as
3735 those used to define block statements in Python source code.
3736
3737 Parameters:
3738 - blockStatementExpr - expression defining syntax of statement that
3739 is repeated within the indented block
3740 - indentStack - list created by caller to manage indentation stack
3741 (multiple statementWithIndentedBlock expressions within a single grammar
3742 should share a common indentStack)
3743 - indent - boolean indicating whether block must be indented beyond the
3744 the current level; set to False for block of left-most statements
3745 (default=True)
3746
3747 A valid block must contain at least one C{blockStatement}.
3748 """
3749 def checkPeerIndent(s,l,t):
3750 if l >= len(s): return
3751 curCol = col(l,s)
3752 if curCol != indentStack[-1]:
3753 if curCol > indentStack[-1]:
3754 raise ParseFatalException(s,l,"illegal nesting")
3755 raise ParseException(s,l,"not a peer entry")
3756
3757 def checkSubIndent(s,l,t):
3758 curCol = col(l,s)
3759 if curCol > indentStack[-1]:
3760 indentStack.append( curCol )
3761 else:
3762 raise ParseException(s,l,"not a subentry")
3763
3764 def checkUnindent(s,l,t):
3765 if l >= len(s): return
3766 curCol = col(l,s)
3767 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3768 raise ParseException(s,l,"not an unindent")
3769 indentStack.pop()
3770
3771 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3772 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
3773 PEER = Empty().setParseAction(checkPeerIndent).setName('')
3774 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
3775 if indent:
3776 smExpr = Group( Optional(NL) +
3777
3778 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3779 else:
3780 smExpr = Group( Optional(NL) +
3781 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3782 blockStatementExpr.ignore(_bslash + LineEnd())
3783 return smExpr.setName('indented block')
3784
3785 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3786 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3787
3788 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
3789 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
3790 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3792 """Helper parser action to replace common HTML entities with their special characters"""
3793 return _htmlEntityMap.get(t.entity)
3794
3795
3796 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3797
3798 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
3799 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
3800 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3801 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3802
3803 javaStyleComment = cppStyleComment
3804 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3805 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3806 Optional( Word(" \t") +
3807 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3808 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3809
3810
3811 if __name__ == "__main__":
3812
3813 selectToken = CaselessLiteral( "select" )
3814 fromToken = CaselessLiteral( "from" )
3815
3816 ident = Word( alphas, alphanums + "_$" )
3817 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3818 columnNameList = Group( delimitedList( columnName ) ).setName("columns")
3819 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3820 tableNameList = Group( delimitedList( tableName ) ).setName("tables")
3821 simpleSQL = ( selectToken + \
3822 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3823 fromToken + \
3824 tableNameList.setResultsName( "tables" ) )
3825
3826 simpleSQL.runTests("""\
3827 SELECT * from XYZZY, ABC
3828 select * from SYS.XYZZY
3829 Select A from Sys.dual
3830 Select AA,BB,CC from Sys.dual
3831 Select A, B, C from Sys.dual
3832 Select A, B, C from Sys.dual
3833 Xelect A, B, C from Sys.dual
3834 Select A, B, C frox Sys.dual
3835 Select
3836 Select ^^^ frox Sys.dual
3837 Select A, B, C from Sys.dual, Table2""")
3838