1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.0"
61 __versionTime__ = "7 Feb 2016 14:09"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import functools
74 import itertools
75 import traceback
76
77
78
79 __all__ = [
80 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
81 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
82 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
83 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
84 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
85 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
86 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
87 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
88 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
89 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
90 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
91 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
92 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
93 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
94 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
95 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
96 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
97 ]
98
99 PY_3 = sys.version.startswith('3')
100 if PY_3:
101 _MAX_INT = sys.maxsize
102 basestring = str
103 unichr = chr
104 _ustr = str
105
106
107 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
108
109 else:
110 _MAX_INT = sys.maxint
111 range = xrange
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
116 then < returns the unicode object | encodes it with the default encoding | ... >.
117 """
118 if isinstance(obj,unicode):
119 return obj
120
121 try:
122
123
124 return str(obj)
125
126 except UnicodeEncodeError:
127
128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
129 xmlcharref = Regex('&#\d+;')
130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
131 return xmlcharref.transformString(ret)
132
133
134 singleArgBuiltins = []
135 import __builtin__
136 for fname in "sum len sorted reversed list tuple set any all min max".split():
137 try:
138 singleArgBuiltins.append(getattr(__builtin__,fname))
139 except AttributeError:
140 continue
141
142 _generatorType = type((y for y in range(1)))
145 """Escape &, <, >, ", ', etc. in a string of data."""
146
147
148 from_symbols = '&><"\''
149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
150 for from_,to_ in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
152 return data
153
156
157 alphas = string.ascii_uppercase + string.ascii_lowercase
158 nums = "0123456789"
159 hexnums = nums + "ABCDEFabcdef"
160 alphanums = alphas + nums
161 _bslash = chr(92)
162 printables = "".join(c for c in string.printable if c not in string.whitespace)
165 """base exception class for all parsing runtime exceptions"""
166
167
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc
170 if msg is None:
171 self.msg = pstr
172 self.pstr = ""
173 else:
174 self.msg = msg
175 self.pstr = pstr
176 self.parserElement = elem
177
179 """supported attributes by name are:
180 - lineno - returns the line number of the exception text
181 - col - returns the column number of the exception text
182 - line - returns the line containing the exception text
183 """
184 if( aname == "lineno" ):
185 return lineno( self.loc, self.pstr )
186 elif( aname in ("col", "column") ):
187 return col( self.loc, self.pstr )
188 elif( aname == "line" ):
189 return line( self.loc, self.pstr )
190 else:
191 raise AttributeError(aname)
192
194 return "%s (at char %d), (line:%d, col:%d)" % \
195 ( self.msg, self.loc, self.lineno, self.column )
209 return "lineno col line".split() + dir(type(self))
210
212 """exception thrown when parse expressions don't match class;
213 supported attributes by name are:
214 - lineno - returns the line number of the exception text
215 - col - returns the column number of the exception text
216 - line - returns the line containing the exception text
217 """
218 pass
219
221 """user-throwable exception thrown when inconsistent parse content
222 is found; stops all parsing immediately"""
223 pass
224
226 """just like C{L{ParseFatalException}}, but thrown internally when an
227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
228 an unbacktrackable syntax error has been found"""
232
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
260 return repr(self.tup)
262 self.tup = (self.tup[0],i)
263
265 """Structured parse results, to provide multiple means of access to the parsed data:
266 - as a list (C{len(results)})
267 - by list index (C{results[0], results[1]}, etc.)
268 - by attribute (C{results.<resultsName>})
269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls):
272 return toklist
273 retobj = object.__new__(cls)
274 retobj.__doinit = True
275 return retobj
276
277
278
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit:
281 self.__doinit = False
282 self.__name = None
283 self.__parent = None
284 self.__accumNames = {}
285 self.__asList = asList
286 self.__modal = modal
287 if toklist is None:
288 toklist = []
289 if isinstance(toklist, list):
290 self.__toklist = toklist[:]
291 elif isinstance(toklist, _generatorType):
292 self.__toklist = list(toklist)
293 else:
294 self.__toklist = [toklist]
295 self.__tokdict = dict()
296
297 if name is not None and name:
298 if not modal:
299 self.__accumNames[name] = 0
300 if isinstance(name,int):
301 name = _ustr(name)
302 self.__name = name
303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
304 if isinstance(toklist,basestring):
305 toklist = [ toklist ]
306 if asList:
307 if isinstance(toklist,ParseResults):
308 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
309 else:
310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
311 self[name].__name = name
312 else:
313 try:
314 self[name] = toklist[0]
315 except (KeyError,TypeError,IndexError):
316 self[name] = toklist
317
319 if isinstance( i, (int,slice) ):
320 return self.__toklist[i]
321 else:
322 if i not in self.__accumNames:
323 return self.__tokdict[i][-1][0]
324 else:
325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
328 if isinstance(v,_ParseResultsWithOffset):
329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 sub = v[0]
331 elif isinstance(k,int):
332 self.__toklist[k] = v
333 sub = v
334 else:
335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
336 sub = v
337 if isinstance(sub,ParseResults):
338 sub.__parent = wkref(self)
339
341 if isinstance(i,(int,slice)):
342 mylen = len( self.__toklist )
343 del self.__toklist[i]
344
345
346 if isinstance(i, int):
347 if i < 0:
348 i += mylen
349 i = slice(i, i+1)
350
351 removed = list(range(*i.indices(mylen)))
352 removed.reverse()
353
354
355
356
357
358
359 for name,occurrences in self.__tokdict.items():
360 for j in removed:
361 for k, (value, position) in enumerate(occurrences):
362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
363 else:
364 del self.__tokdict[i]
365
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
375 """Returns all named result keys."""
376 if hasattr(self.__tokdict, "iterkeys"):
377 return self.__tokdict.iterkeys()
378 else:
379 return iter(self.__tokdict)
380
382 """Returns all named result values."""
383 return (self[k] for k in self.iterkeys())
384
386 return ((k, self[k]) for k in self.iterkeys())
387
388 if PY_3:
389 keys = iterkeys
390 values = itervalues
391 items = iteritems
392 else:
394 """Returns all named result keys."""
395 return list(self.iterkeys())
396
398 """Returns all named result values."""
399 return list(self.itervalues())
400
402 """Returns all named result keys and values as a list of tuples."""
403 return list(self.iteritems())
404
406 """Since keys() returns an iterator, this method is helpful in bypassing
407 code that looks for the existence of any defined results names."""
408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last).
412 Supports both list and dict semantics for pop(). If passed no
413 argument or an integer argument, it will use list semantics
414 and pop tokens from the list of parsed tokens. If passed a
415 non-integer argument (most likely a string), it will use dict
416 semantics and pop the corresponding value from any defined
417 results names. A second default return value argument is
418 supported, just as in dict.pop()."""
419 if not args:
420 args = [-1]
421 for k,v in kwargs.items():
422 if k == 'default':
423 args = (args[0], v)
424 else:
425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
426 if (isinstance(args[0], int) or
427 len(args) == 1 or
428 args[0] in self):
429 index = args[0]
430 ret = self[index]
431 del self[index]
432 return ret
433 else:
434 defaultvalue = args[1]
435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no
439 such name, then returns the given C{defaultValue} or C{None} if no
440 C{defaultValue} is specified."""
441 if key in self:
442 return self[key]
443 else:
444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens."""
448 self.__toklist.insert(index, insStr)
449
450
451
452
453
454 for name,occurrences in self.__tokdict.items():
455 for k, (value, position) in enumerate(occurrences):
456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
459 """Add single element to end of ParseResults list of elements."""
460 self.__toklist.append(item)
461
463 """Add sequence of elements to end of ParseResults list of elements."""
464 if isinstance(itemseq, ParseResults):
465 self += itemseq
466 else:
467 self.__toklist.extend(itemseq)
468
470 """Clear all elements and results names."""
471 del self.__toklist[:]
472 self.__tokdict.clear()
473
475 try:
476 return self[name]
477 except KeyError:
478 return ""
479
480 if name in self.__tokdict:
481 if name not in self.__accumNames:
482 return self.__tokdict[name][-1][0]
483 else:
484 return ParseResults([ v[0] for v in self.__tokdict[name] ])
485 else:
486 return ""
487
489 ret = self.copy()
490 ret += other
491 return ret
492
494 if other.__tokdict:
495 offset = len(self.__toklist)
496 addoffset = lambda a: offset if a<0 else a+offset
497 otheritems = other.__tokdict.items()
498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
499 for (k,vlist) in otheritems for v in vlist]
500 for k,v in otherdictitems:
501 self[k] = v
502 if isinstance(v[0],ParseResults):
503 v[0].__parent = wkref(self)
504
505 self.__toklist += other.__toklist
506 self.__accumNames.update( other.__accumNames )
507 return self
508
510 if isinstance(other,int) and other == 0:
511
512 return self.copy()
513 else:
514
515 return other + self
516
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
524 out = []
525 for item in self.__toklist:
526 if out and sep:
527 out.append(sep)
528 if isinstance( item, ParseResults ):
529 out += item._asStringList()
530 else:
531 out.append( _ustr(item) )
532 return out
533
535 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
539 """Returns the named parse results as a nested dictionary."""
540 if PY_3:
541 item_fn = self.items
542 else:
543 item_fn = self.iteritems
544 return dict((k,v.asDict()) if isinstance(v, ParseResults) else (k,v) for k,v in item_fn())
545
547 """Returns a new copy of a C{ParseResults} object."""
548 ret = ParseResults( self.__toklist )
549 ret.__tokdict = self.__tokdict.copy()
550 ret.__parent = self.__parent
551 ret.__accumNames.update( self.__accumNames )
552 ret.__name = self.__name
553 return ret
554
555 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
556 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
557 nl = "\n"
558 out = []
559 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
560 for v in vlist)
561 nextLevelIndent = indent + " "
562
563
564 if not formatted:
565 indent = ""
566 nextLevelIndent = ""
567 nl = ""
568
569 selfTag = None
570 if doctag is not None:
571 selfTag = doctag
572 else:
573 if self.__name:
574 selfTag = self.__name
575
576 if not selfTag:
577 if namedItemsOnly:
578 return ""
579 else:
580 selfTag = "ITEM"
581
582 out += [ nl, indent, "<", selfTag, ">" ]
583
584 for i,res in enumerate(self.__toklist):
585 if isinstance(res,ParseResults):
586 if i in namedItems:
587 out += [ res.asXML(namedItems[i],
588 namedItemsOnly and doctag is None,
589 nextLevelIndent,
590 formatted)]
591 else:
592 out += [ res.asXML(None,
593 namedItemsOnly and doctag is None,
594 nextLevelIndent,
595 formatted)]
596 else:
597
598 resTag = None
599 if i in namedItems:
600 resTag = namedItems[i]
601 if not resTag:
602 if namedItemsOnly:
603 continue
604 else:
605 resTag = "ITEM"
606 xmlBodyText = _xml_escape(_ustr(res))
607 out += [ nl, nextLevelIndent, "<", resTag, ">",
608 xmlBodyText,
609 "</", resTag, ">" ]
610
611 out += [ nl, indent, "</", selfTag, ">" ]
612 return "".join(out)
613
615 for k,vlist in self.__tokdict.items():
616 for v,loc in vlist:
617 if sub is v:
618 return k
619 return None
620
622 """Returns the results name for this token expression."""
623 if self.__name:
624 return self.__name
625 elif self.__parent:
626 par = self.__parent()
627 if par:
628 return par.__lookup(self)
629 else:
630 return None
631 elif (len(self) == 1 and
632 len(self.__tokdict) == 1 and
633 self.__tokdict.values()[0][0][1] in (0,-1)):
634 return self.__tokdict.keys()[0]
635 else:
636 return None
637
638 - def dump(self,indent='',depth=0):
639 """Diagnostic method for listing out the contents of a C{ParseResults}.
640 Accepts an optional C{indent} argument so that this string can be embedded
641 in a nested display of other data."""
642 out = []
643 NL = '\n'
644 out.append( indent+_ustr(self.asList()) )
645 if self.haskeys():
646 items = sorted(self.items())
647 for k,v in items:
648 if out:
649 out.append(NL)
650 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
651 if isinstance(v,ParseResults):
652 if v:
653 out.append( v.dump(indent,depth+1) )
654 else:
655 out.append(_ustr(v))
656 else:
657 out.append(_ustr(v))
658 elif any(isinstance(vv,ParseResults) for vv in self):
659 v = self
660 for i,vv in enumerate(v):
661 if isinstance(vv,ParseResults):
662 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
663 else:
664 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
665
666 return "".join(out)
667
668 - def pprint(self, *args, **kwargs):
669 """Pretty-printer for parsed results as a list, using the C{pprint} module.
670 Accepts additional positional or keyword args as defined for the
671 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
672 pprint.pprint(self.asList(), *args, **kwargs)
673
674
676 return ( self.__toklist,
677 ( self.__tokdict.copy(),
678 self.__parent is not None and self.__parent() or None,
679 self.__accumNames,
680 self.__name ) )
681
683 self.__toklist = state[0]
684 (self.__tokdict,
685 par,
686 inAccumNames,
687 self.__name) = state[1]
688 self.__accumNames = {}
689 self.__accumNames.update(inAccumNames)
690 if par is not None:
691 self.__parent = wkref(par)
692 else:
693 self.__parent = None
694
696 return self.__toklist, self.__name, self.__asList, self.__modal
697
699 return (dir(type(self)) + list(self.keys()))
700
701 collections.MutableMapping.register(ParseResults)
702
703 -def col (loc,strg):
704 """Returns current column within a string, counting newlines as line separators.
705 The first column is number 1.
706
707 Note: the default parsing behavior is to expand tabs in the input string
708 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
709 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
710 consistent view of the parsed string, the parse location, and line and column
711 positions within the parsed string.
712 """
713 s = strg
714 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
715
717 """Returns current line number within a string, counting newlines as line separators.
718 The first line is number 1.
719
720 Note: the default parsing behavior is to expand tabs in the input string
721 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
722 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
723 consistent view of the parsed string, the parse location, and line and column
724 positions within the parsed string.
725 """
726 return strg.count("\n",0,loc) + 1
727
728 -def line( loc, strg ):
729 """Returns the line of text containing loc within a string, counting newlines as line separators.
730 """
731 lastCR = strg.rfind("\n", 0, loc)
732 nextCR = strg.find("\n", loc)
733 if nextCR >= 0:
734 return strg[lastCR+1:nextCR]
735 else:
736 return strg[lastCR+1:]
737
739 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
740
742 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
743
745 print ("Exception raised:" + _ustr(exc))
746
748 """'Do-nothing' debug action, to suppress debugging output during parsing."""
749 pass
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773 'decorator to trim function calls to match the arity of the target'
775 if func in singleArgBuiltins:
776 return lambda s,l,t: func(t)
777 limit = [0]
778 foundArity = [False]
779 def wrapper(*args):
780 while 1:
781 try:
782 ret = func(*args[limit[0]:])
783 foundArity[0] = True
784 return ret
785 except TypeError:
786
787 if foundArity[0]:
788 raise
789 else:
790 try:
791 tb = sys.exc_info()[-1]
792 exc_source_line = traceback.extract_tb(tb)[-1][-1]
793 if not exc_source_line.endswith('#~@$^*)+_(&%#!=-`~;:"[]{}'):
794 raise
795 finally:
796 del tb
797
798 if limit[0] <= maxargs:
799 limit[0] += 1
800 continue
801 raise
802 return wrapper
803
805 """Abstract base level parser element class."""
806 DEFAULT_WHITE_CHARS = " \n\t\r"
807 verbose_stacktrace = False
808
809 @staticmethod
814
815 @staticmethod
817 """
818 Set class to be used for inclusion of string literals into a parser.
819 """
820 ParserElement.literalStringClass = cls
821
823 self.parseAction = list()
824 self.failAction = None
825
826 self.strRepr = None
827 self.resultsName = None
828 self.saveAsList = savelist
829 self.skipWhitespace = True
830 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
831 self.copyDefaultWhiteChars = True
832 self.mayReturnEmpty = False
833 self.keepTabs = False
834 self.ignoreExprs = list()
835 self.debug = False
836 self.streamlined = False
837 self.mayIndexError = True
838 self.errmsg = ""
839 self.modalResults = True
840 self.debugActions = ( None, None, None )
841 self.re = None
842 self.callPreparse = True
843 self.callDuringTry = False
844
846 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
847 for the same parsing pattern, using copies of the original parse element."""
848 cpy = copy.copy( self )
849 cpy.parseAction = self.parseAction[:]
850 cpy.ignoreExprs = self.ignoreExprs[:]
851 if self.copyDefaultWhiteChars:
852 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
853 return cpy
854
856 """Define name for this expression, for use in debugging."""
857 self.name = name
858 self.errmsg = "Expected " + self.name
859 if hasattr(self,"exception"):
860 self.exception.msg = self.errmsg
861 return self
862
864 """Define name for referencing matching tokens as a nested attribute
865 of the returned parse results.
866 NOTE: this returns a *copy* of the original C{ParserElement} object;
867 this is so that the client can define a basic element, such as an
868 integer, and reference it in multiple places with different names.
869
870 You can also set results names using the abbreviated syntax,
871 C{expr("name")} in place of C{expr.setResultsName("name")} -
872 see L{I{__call__}<__call__>}.
873 """
874 newself = self.copy()
875 if name.endswith("*"):
876 name = name[:-1]
877 listAllMatches=True
878 newself.resultsName = name
879 newself.modalResults = not listAllMatches
880 return newself
881
883 """Method to invoke the Python pdb debugger when this element is
884 about to be parsed. Set C{breakFlag} to True to enable, False to
885 disable.
886 """
887 if breakFlag:
888 _parseMethod = self._parse
889 def breaker(instring, loc, doActions=True, callPreParse=True):
890 import pdb
891 pdb.set_trace()
892 return _parseMethod( instring, loc, doActions, callPreParse )
893 breaker._originalParseMethod = _parseMethod
894 self._parse = breaker
895 else:
896 if hasattr(self._parse,"_originalParseMethod"):
897 self._parse = self._parse._originalParseMethod
898 return self
899
901 """Define action to perform when successfully matching parse element definition.
902 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
903 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
904 - s = the original string being parsed (see note below)
905 - loc = the location of the matching substring
906 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
907 If the functions in fns modify the tokens, they can return them as the return
908 value from fn, and the modified list of tokens will replace the original.
909 Otherwise, fn does not need to return any value.
910
911 Note: the default parsing behavior is to expand tabs in the input string
912 before starting the parsing process. See L{I{parseString}<parseString>} for more information
913 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
914 consistent view of the parsed string, the parse location, and line and column
915 positions within the parsed string.
916 """
917 self.parseAction = list(map(_trim_arity, list(fns)))
918 self.callDuringTry = kwargs.get("callDuringTry", False)
919 return self
920
922 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
923 self.parseAction += list(map(_trim_arity, list(fns)))
924 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
925 return self
926
928 """Add a boolean predicate function to expression's list of parse actions. See
929 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can
930 be used to define a custom message to be used in the raised exception."""
931 msg = kwargs.get("message") or "failed user-defined condition"
932 for fn in fns:
933 def pa(s,l,t):
934 if not bool(_trim_arity(fn)(s,l,t)):
935 raise ParseException(s,l,msg)
936 return t
937 self.parseAction.append(pa)
938 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
939 return self
940
942 """Define action to perform if parsing fails at this expression.
943 Fail acton fn is a callable function that takes the arguments
944 C{fn(s,loc,expr,err)} where:
945 - s = string being parsed
946 - loc = location where expression match was attempted and failed
947 - expr = the parse expression that failed
948 - err = the exception thrown
949 The function returns no value. It may throw C{L{ParseFatalException}}
950 if it is desired to stop parsing immediately."""
951 self.failAction = fn
952 return self
953
955 exprsFound = True
956 while exprsFound:
957 exprsFound = False
958 for e in self.ignoreExprs:
959 try:
960 while 1:
961 loc,dummy = e._parse( instring, loc )
962 exprsFound = True
963 except ParseException:
964 pass
965 return loc
966
968 if self.ignoreExprs:
969 loc = self._skipIgnorables( instring, loc )
970
971 if self.skipWhitespace:
972 wt = self.whiteChars
973 instrlen = len(instring)
974 while loc < instrlen and instring[loc] in wt:
975 loc += 1
976
977 return loc
978
979 - def parseImpl( self, instring, loc, doActions=True ):
981
982 - def postParse( self, instring, loc, tokenlist ):
984
985
986 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
987 debugging = ( self.debug )
988
989 if debugging or self.failAction:
990
991 if (self.debugActions[0] ):
992 self.debugActions[0]( instring, loc, self )
993 if callPreParse and self.callPreparse:
994 preloc = self.preParse( instring, loc )
995 else:
996 preloc = loc
997 tokensStart = preloc
998 try:
999 try:
1000 loc,tokens = self.parseImpl( instring, preloc, doActions )
1001 except IndexError:
1002 raise ParseException( instring, len(instring), self.errmsg, self )
1003 except ParseBaseException as err:
1004
1005 if self.debugActions[2]:
1006 self.debugActions[2]( instring, tokensStart, self, err )
1007 if self.failAction:
1008 self.failAction( instring, tokensStart, self, err )
1009 raise
1010 else:
1011 if callPreParse and self.callPreparse:
1012 preloc = self.preParse( instring, loc )
1013 else:
1014 preloc = loc
1015 tokensStart = preloc
1016 if self.mayIndexError or loc >= len(instring):
1017 try:
1018 loc,tokens = self.parseImpl( instring, preloc, doActions )
1019 except IndexError:
1020 raise ParseException( instring, len(instring), self.errmsg, self )
1021 else:
1022 loc,tokens = self.parseImpl( instring, preloc, doActions )
1023
1024 tokens = self.postParse( instring, loc, tokens )
1025
1026 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1027 if self.parseAction and (doActions or self.callDuringTry):
1028 if debugging:
1029 try:
1030 for fn in self.parseAction:
1031 tokens = fn( instring, tokensStart, retTokens )
1032 if tokens is not None:
1033 retTokens = ParseResults( tokens,
1034 self.resultsName,
1035 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1036 modal=self.modalResults )
1037 except ParseBaseException as err:
1038
1039 if (self.debugActions[2] ):
1040 self.debugActions[2]( instring, tokensStart, self, err )
1041 raise
1042 else:
1043 for fn in self.parseAction:
1044 tokens = fn( instring, tokensStart, retTokens )
1045 if tokens is not None:
1046 retTokens = ParseResults( tokens,
1047 self.resultsName,
1048 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1049 modal=self.modalResults )
1050
1051 if debugging:
1052
1053 if (self.debugActions[1] ):
1054 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1055
1056 return loc, retTokens
1057
1063
1065 try:
1066 self.tryParse(instring, loc)
1067 except (ParseException, IndexError):
1068 return False
1069 else:
1070 return True
1071
1072
1073
1074 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1075 lookup = (self,instring,loc,callPreParse,doActions)
1076 if lookup in ParserElement._exprArgCache:
1077 value = ParserElement._exprArgCache[ lookup ]
1078 if isinstance(value, Exception):
1079 raise value
1080 return (value[0],value[1].copy())
1081 else:
1082 try:
1083 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1084 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1085 return value
1086 except ParseBaseException as pe:
1087 pe.__traceback__ = None
1088 ParserElement._exprArgCache[ lookup ] = pe
1089 raise
1090
1091 _parse = _parseNoCache
1092
1093
1094 _exprArgCache = {}
1095 @staticmethod
1098
1099 _packratEnabled = False
1100 @staticmethod
1102 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1103 Repeated parse attempts at the same string location (which happens
1104 often in many complex grammars) can immediately return a cached value,
1105 instead of re-executing parsing/validating code. Memoizing is done of
1106 both valid results and parsing exceptions.
1107
1108 This speedup may break existing programs that use parse actions that
1109 have side-effects. For this reason, packrat parsing is disabled when
1110 you first import pyparsing. To activate the packrat feature, your
1111 program must call the class method C{ParserElement.enablePackrat()}. If
1112 your program uses C{psyco} to "compile as you go", you must call
1113 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1114 Python will crash. For best results, call C{enablePackrat()} immediately
1115 after importing pyparsing.
1116 """
1117 if not ParserElement._packratEnabled:
1118 ParserElement._packratEnabled = True
1119 ParserElement._parse = ParserElement._parseCache
1120
1122 """Execute the parse expression with the given string.
1123 This is the main interface to the client code, once the complete
1124 expression has been built.
1125
1126 If you want the grammar to require that the entire input string be
1127 successfully parsed, then set C{parseAll} to True (equivalent to ending
1128 the grammar with C{L{StringEnd()}}).
1129
1130 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1131 in order to report proper column numbers in parse actions.
1132 If the input string contains tabs and
1133 the grammar uses parse actions that use the C{loc} argument to index into the
1134 string being parsed, you can ensure you have a consistent view of the input
1135 string by:
1136 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1137 (see L{I{parseWithTabs}<parseWithTabs>})
1138 - define your parse action using the full C{(s,loc,toks)} signature, and
1139 reference the input string using the parse action's C{s} argument
1140 - explictly expand the tabs in your input string before calling
1141 C{parseString}
1142 """
1143 ParserElement.resetCache()
1144 if not self.streamlined:
1145 self.streamline()
1146
1147 for e in self.ignoreExprs:
1148 e.streamline()
1149 if not self.keepTabs:
1150 instring = instring.expandtabs()
1151 try:
1152 loc, tokens = self._parse( instring, 0 )
1153 if parseAll:
1154 loc = self.preParse( instring, loc )
1155 se = Empty() + StringEnd()
1156 se._parse( instring, loc )
1157 except ParseBaseException as exc:
1158 if ParserElement.verbose_stacktrace:
1159 raise
1160 else:
1161
1162 raise exc
1163 else:
1164 return tokens
1165
1167 """Scan the input string for expression matches. Each match will return the
1168 matching tokens, start location, and end location. May be called with optional
1169 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1170 C{overlap} is specified, then overlapping matches will be reported.
1171
1172 Note that the start and end locations are reported relative to the string
1173 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1174 strings with embedded tabs."""
1175 if not self.streamlined:
1176 self.streamline()
1177 for e in self.ignoreExprs:
1178 e.streamline()
1179
1180 if not self.keepTabs:
1181 instring = _ustr(instring).expandtabs()
1182 instrlen = len(instring)
1183 loc = 0
1184 preparseFn = self.preParse
1185 parseFn = self._parse
1186 ParserElement.resetCache()
1187 matches = 0
1188 try:
1189 while loc <= instrlen and matches < maxMatches:
1190 try:
1191 preloc = preparseFn( instring, loc )
1192 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1193 except ParseException:
1194 loc = preloc+1
1195 else:
1196 if nextLoc > loc:
1197 matches += 1
1198 yield tokens, preloc, nextLoc
1199 if overlap:
1200 nextloc = preparseFn( instring, loc )
1201 if nextloc > loc:
1202 loc = nextLoc
1203 else:
1204 loc += 1
1205 else:
1206 loc = nextLoc
1207 else:
1208 loc = preloc+1
1209 except ParseBaseException as exc:
1210 if ParserElement.verbose_stacktrace:
1211 raise
1212 else:
1213
1214 raise exc
1215
1248
1250 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1251 to match the given parse expression. May be called with optional
1252 C{maxMatches} argument, to clip searching after 'n' matches are found.
1253 """
1254 try:
1255 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1256 except ParseBaseException as exc:
1257 if ParserElement.verbose_stacktrace:
1258 raise
1259 else:
1260
1261 raise exc
1262
1264 """Implementation of + operator - returns C{L{And}}"""
1265 if isinstance( other, basestring ):
1266 other = ParserElement.literalStringClass( other )
1267 if not isinstance( other, ParserElement ):
1268 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1269 SyntaxWarning, stacklevel=2)
1270 return None
1271 return And( [ self, other ] )
1272
1274 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1275 if isinstance( other, basestring ):
1276 other = ParserElement.literalStringClass( other )
1277 if not isinstance( other, ParserElement ):
1278 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1279 SyntaxWarning, stacklevel=2)
1280 return None
1281 return other + self
1282
1284 """Implementation of - operator, returns C{L{And}} with error stop"""
1285 if isinstance( other, basestring ):
1286 other = ParserElement.literalStringClass( other )
1287 if not isinstance( other, ParserElement ):
1288 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1289 SyntaxWarning, stacklevel=2)
1290 return None
1291 return And( [ self, And._ErrorStop(), other ] )
1292
1294 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1295 if isinstance( other, basestring ):
1296 other = ParserElement.literalStringClass( other )
1297 if not isinstance( other, ParserElement ):
1298 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1299 SyntaxWarning, stacklevel=2)
1300 return None
1301 return other - self
1302
1304 """Implementation of * operator, allows use of C{expr * 3} in place of
1305 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1306 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1307 may also include C{None} as in:
1308 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1309 to C{expr*n + L{ZeroOrMore}(expr)}
1310 (read as "at least n instances of C{expr}")
1311 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1312 (read as "0 to n instances of C{expr}")
1313 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1314 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1315
1316 Note that C{expr*(None,n)} does not raise an exception if
1317 more than n exprs exist in the input stream; that is,
1318 C{expr*(None,n)} does not enforce a maximum number of expr
1319 occurrences. If this behavior is desired, then write
1320 C{expr*(None,n) + ~expr}
1321
1322 """
1323 if isinstance(other,int):
1324 minElements, optElements = other,0
1325 elif isinstance(other,tuple):
1326 other = (other + (None, None))[:2]
1327 if other[0] is None:
1328 other = (0, other[1])
1329 if isinstance(other[0],int) and other[1] is None:
1330 if other[0] == 0:
1331 return ZeroOrMore(self)
1332 if other[0] == 1:
1333 return OneOrMore(self)
1334 else:
1335 return self*other[0] + ZeroOrMore(self)
1336 elif isinstance(other[0],int) and isinstance(other[1],int):
1337 minElements, optElements = other
1338 optElements -= minElements
1339 else:
1340 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1341 else:
1342 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1343
1344 if minElements < 0:
1345 raise ValueError("cannot multiply ParserElement by negative value")
1346 if optElements < 0:
1347 raise ValueError("second tuple value must be greater or equal to first tuple value")
1348 if minElements == optElements == 0:
1349 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1350
1351 if (optElements):
1352 def makeOptionalList(n):
1353 if n>1:
1354 return Optional(self + makeOptionalList(n-1))
1355 else:
1356 return Optional(self)
1357 if minElements:
1358 if minElements == 1:
1359 ret = self + makeOptionalList(optElements)
1360 else:
1361 ret = And([self]*minElements) + makeOptionalList(optElements)
1362 else:
1363 ret = makeOptionalList(optElements)
1364 else:
1365 if minElements == 1:
1366 ret = self
1367 else:
1368 ret = And([self]*minElements)
1369 return ret
1370
1373
1375 """Implementation of | operator - returns C{L{MatchFirst}}"""
1376 if isinstance( other, basestring ):
1377 other = ParserElement.literalStringClass( other )
1378 if not isinstance( other, ParserElement ):
1379 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1380 SyntaxWarning, stacklevel=2)
1381 return None
1382 return MatchFirst( [ self, other ] )
1383
1385 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1386 if isinstance( other, basestring ):
1387 other = ParserElement.literalStringClass( other )
1388 if not isinstance( other, ParserElement ):
1389 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1390 SyntaxWarning, stacklevel=2)
1391 return None
1392 return other | self
1393
1395 """Implementation of ^ operator - returns C{L{Or}}"""
1396 if isinstance( other, basestring ):
1397 other = ParserElement.literalStringClass( other )
1398 if not isinstance( other, ParserElement ):
1399 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1400 SyntaxWarning, stacklevel=2)
1401 return None
1402 return Or( [ self, other ] )
1403
1405 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1406 if isinstance( other, basestring ):
1407 other = ParserElement.literalStringClass( other )
1408 if not isinstance( other, ParserElement ):
1409 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1410 SyntaxWarning, stacklevel=2)
1411 return None
1412 return other ^ self
1413
1415 """Implementation of & operator - returns C{L{Each}}"""
1416 if isinstance( other, basestring ):
1417 other = ParserElement.literalStringClass( other )
1418 if not isinstance( other, ParserElement ):
1419 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1420 SyntaxWarning, stacklevel=2)
1421 return None
1422 return Each( [ self, other ] )
1423
1425 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1426 if isinstance( other, basestring ):
1427 other = ParserElement.literalStringClass( other )
1428 if not isinstance( other, ParserElement ):
1429 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1430 SyntaxWarning, stacklevel=2)
1431 return None
1432 return other & self
1433
1435 """Implementation of ~ operator - returns C{L{NotAny}}"""
1436 return NotAny( self )
1437
1439 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1440 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1441 could be written as::
1442 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1443
1444 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1445 passed as C{True}.
1446
1447 If C{name} is omitted, same as calling C{L{copy}}.
1448 """
1449 if name is not None:
1450 return self.setResultsName(name)
1451 else:
1452 return self.copy()
1453
1455 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1456 cluttering up returned output.
1457 """
1458 return Suppress( self )
1459
1461 """Disables the skipping of whitespace before matching the characters in the
1462 C{ParserElement}'s defined pattern. This is normally only used internally by
1463 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1464 """
1465 self.skipWhitespace = False
1466 return self
1467
1469 """Overrides the default whitespace chars
1470 """
1471 self.skipWhitespace = True
1472 self.whiteChars = chars
1473 self.copyDefaultWhiteChars = False
1474 return self
1475
1477 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1478 Must be called before C{parseString} when the input grammar contains elements that
1479 match C{<TAB>} characters."""
1480 self.keepTabs = True
1481 return self
1482
1484 """Define expression to be ignored (e.g., comments) while doing pattern
1485 matching; may be called repeatedly, to define multiple comment or other
1486 ignorable patterns.
1487 """
1488 if isinstance(other, basestring):
1489 other = Suppress(other)
1490
1491 if isinstance( other, Suppress ):
1492 if other not in self.ignoreExprs:
1493 self.ignoreExprs.append(other)
1494 else:
1495 self.ignoreExprs.append( Suppress( other.copy() ) )
1496 return self
1497
1498 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1499 """Enable display of debugging messages while doing pattern matching."""
1500 self.debugActions = (startAction or _defaultStartDebugAction,
1501 successAction or _defaultSuccessDebugAction,
1502 exceptionAction or _defaultExceptionDebugAction)
1503 self.debug = True
1504 return self
1505
1507 """Enable display of debugging messages while doing pattern matching.
1508 Set C{flag} to True to enable, False to disable."""
1509 if flag:
1510 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1511 else:
1512 self.debug = False
1513 return self
1514
1517
1520
1522 self.streamlined = True
1523 self.strRepr = None
1524 return self
1525
1528
1529 - def validate( self, validateTrace=[] ):
1530 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1531 self.checkRecursion( [] )
1532
1533 - def parseFile( self, file_or_filename, parseAll=False ):
1534 """Execute the parse expression on the given file or filename.
1535 If a filename is specified (instead of a file object),
1536 the entire file is opened, read, and closed before parsing.
1537 """
1538 try:
1539 file_contents = file_or_filename.read()
1540 except AttributeError:
1541 f = open(file_or_filename, "r")
1542 file_contents = f.read()
1543 f.close()
1544 try:
1545 return self.parseString(file_contents, parseAll)
1546 except ParseBaseException as exc:
1547 if ParserElement.verbose_stacktrace:
1548 raise
1549 else:
1550
1551 raise exc
1552
1554 if isinstance(other, ParserElement):
1555 return self is other or self.__dict__ == other.__dict__
1556 elif isinstance(other, basestring):
1557 try:
1558 self.parseString(_ustr(other), parseAll=True)
1559 return True
1560 except ParseBaseException:
1561 return False
1562 else:
1563 return super(ParserElement,self)==other
1564
1566 return not (self == other)
1567
1569 return hash(id(self))
1570
1572 return self == other
1573
1575 return not (self == other)
1576
1577 - def runTests(self, tests, parseAll=False):
1578 """Execute the parse expression on a series of test strings, showing each
1579 test, the parsed results or where the parse failed. Quick and easy way to
1580 run a parse expression against a list of sample strings.
1581
1582 Parameters:
1583 - tests - a list of separate test strings, or a multiline string of test strings
1584 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests
1585 """
1586 if isinstance(tests, basestring):
1587 tests = map(str.strip, tests.splitlines())
1588 for t in tests:
1589 out = [t]
1590 try:
1591 out.append(self.parseString(t, parseAll=parseAll).dump())
1592 except ParseException as pe:
1593 if '\n' in t:
1594 out.append(line(pe.loc, t))
1595 out.append(' '*(col(pe.loc,t)-1) + '^')
1596 else:
1597 out.append(' '*pe.loc + '^')
1598 out.append(str(pe))
1599 out.append('')
1600 print('\n'.join(out))
1601
1602
1603 -class Token(ParserElement):
1604 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1607
1608
1609 -class Empty(Token):
1610 """An empty token, will always match."""
1612 super(Empty,self).__init__()
1613 self.name = "Empty"
1614 self.mayReturnEmpty = True
1615 self.mayIndexError = False
1616
1619 """A token that will never match."""
1621 super(NoMatch,self).__init__()
1622 self.name = "NoMatch"
1623 self.mayReturnEmpty = True
1624 self.mayIndexError = False
1625 self.errmsg = "Unmatchable token"
1626
1627 - def parseImpl( self, instring, loc, doActions=True ):
1629
1632 """Token to exactly match a specified string."""
1634 super(Literal,self).__init__()
1635 self.match = matchString
1636 self.matchLen = len(matchString)
1637 try:
1638 self.firstMatchChar = matchString[0]
1639 except IndexError:
1640 warnings.warn("null string passed to Literal; use Empty() instead",
1641 SyntaxWarning, stacklevel=2)
1642 self.__class__ = Empty
1643 self.name = '"%s"' % _ustr(self.match)
1644 self.errmsg = "Expected " + self.name
1645 self.mayReturnEmpty = False
1646 self.mayIndexError = False
1647
1648
1649
1650
1651
1652 - def parseImpl( self, instring, loc, doActions=True ):
1653 if (instring[loc] == self.firstMatchChar and
1654 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1655 return loc+self.matchLen, self.match
1656 raise ParseException(instring, loc, self.errmsg, self)
1657 _L = Literal
1658 ParserElement.literalStringClass = Literal
1661 """Token to exactly match a specified string as a keyword, that is, it must be
1662 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1663 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1664 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1665 Accepts two optional constructor arguments in addition to the keyword string:
1666 C{identChars} is a string of characters that would be valid identifier characters,
1667 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1668 matching, default is C{False}.
1669 """
1670 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1671
1673 super(Keyword,self).__init__()
1674 self.match = matchString
1675 self.matchLen = len(matchString)
1676 try:
1677 self.firstMatchChar = matchString[0]
1678 except IndexError:
1679 warnings.warn("null string passed to Keyword; use Empty() instead",
1680 SyntaxWarning, stacklevel=2)
1681 self.name = '"%s"' % self.match
1682 self.errmsg = "Expected " + self.name
1683 self.mayReturnEmpty = False
1684 self.mayIndexError = False
1685 self.caseless = caseless
1686 if caseless:
1687 self.caselessmatch = matchString.upper()
1688 identChars = identChars.upper()
1689 self.identChars = set(identChars)
1690
1691 - def parseImpl( self, instring, loc, doActions=True ):
1692 if self.caseless:
1693 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1694 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1695 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1696 return loc+self.matchLen, self.match
1697 else:
1698 if (instring[loc] == self.firstMatchChar and
1699 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1700 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1701 (loc == 0 or instring[loc-1] not in self.identChars) ):
1702 return loc+self.matchLen, self.match
1703 raise ParseException(instring, loc, self.errmsg, self)
1704
1709
1710 @staticmethod
1715
1717 """Token to match a specified string, ignoring case of letters.
1718 Note: the matched results will always be in the case of the given
1719 match string, NOT the case of the input text.
1720 """
1722 super(CaselessLiteral,self).__init__( matchString.upper() )
1723
1724 self.returnString = matchString
1725 self.name = "'%s'" % self.returnString
1726 self.errmsg = "Expected " + self.name
1727
1728 - def parseImpl( self, instring, loc, doActions=True ):
1729 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1730 return loc+self.matchLen, self.returnString
1731 raise ParseException(instring, loc, self.errmsg, self)
1732
1736
1737 - def parseImpl( self, instring, loc, doActions=True ):
1738 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1739 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1740 return loc+self.matchLen, self.match
1741 raise ParseException(instring, loc, self.errmsg, self)
1742
1744 """Token for matching words composed of allowed character sets.
1745 Defined with string containing all allowed initial characters,
1746 an optional string containing allowed body characters (if omitted,
1747 defaults to the initial character set), and an optional minimum,
1748 maximum, and/or exact length. The default value for C{min} is 1 (a
1749 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1750 are 0, meaning no maximum or exact length restriction. An optional
1751 C{excludeChars} parameter can list characters that might be found in
1752 the input C{bodyChars} string; useful to define a word of all printables
1753 except for one or two characters, for instance.
1754 """
1755 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1756 super(Word,self).__init__()
1757 if excludeChars:
1758 initChars = ''.join(c for c in initChars if c not in excludeChars)
1759 if bodyChars:
1760 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1761 self.initCharsOrig = initChars
1762 self.initChars = set(initChars)
1763 if bodyChars :
1764 self.bodyCharsOrig = bodyChars
1765 self.bodyChars = set(bodyChars)
1766 else:
1767 self.bodyCharsOrig = initChars
1768 self.bodyChars = set(initChars)
1769
1770 self.maxSpecified = max > 0
1771
1772 if min < 1:
1773 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1774
1775 self.minLen = min
1776
1777 if max > 0:
1778 self.maxLen = max
1779 else:
1780 self.maxLen = _MAX_INT
1781
1782 if exact > 0:
1783 self.maxLen = exact
1784 self.minLen = exact
1785
1786 self.name = _ustr(self)
1787 self.errmsg = "Expected " + self.name
1788 self.mayIndexError = False
1789 self.asKeyword = asKeyword
1790
1791 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1792 if self.bodyCharsOrig == self.initCharsOrig:
1793 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1794 elif len(self.initCharsOrig) == 1:
1795 self.reString = "%s[%s]*" % \
1796 (re.escape(self.initCharsOrig),
1797 _escapeRegexRangeChars(self.bodyCharsOrig),)
1798 else:
1799 self.reString = "[%s][%s]*" % \
1800 (_escapeRegexRangeChars(self.initCharsOrig),
1801 _escapeRegexRangeChars(self.bodyCharsOrig),)
1802 if self.asKeyword:
1803 self.reString = r"\b"+self.reString+r"\b"
1804 try:
1805 self.re = re.compile( self.reString )
1806 except:
1807 self.re = None
1808
1809 - def parseImpl( self, instring, loc, doActions=True ):
1810 if self.re:
1811 result = self.re.match(instring,loc)
1812 if not result:
1813 raise ParseException(instring, loc, self.errmsg, self)
1814
1815 loc = result.end()
1816 return loc, result.group()
1817
1818 if not(instring[ loc ] in self.initChars):
1819 raise ParseException(instring, loc, self.errmsg, self)
1820
1821 start = loc
1822 loc += 1
1823 instrlen = len(instring)
1824 bodychars = self.bodyChars
1825 maxloc = start + self.maxLen
1826 maxloc = min( maxloc, instrlen )
1827 while loc < maxloc and instring[loc] in bodychars:
1828 loc += 1
1829
1830 throwException = False
1831 if loc - start < self.minLen:
1832 throwException = True
1833 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1834 throwException = True
1835 if self.asKeyword:
1836 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1837 throwException = True
1838
1839 if throwException:
1840 raise ParseException(instring, loc, self.errmsg, self)
1841
1842 return loc, instring[start:loc]
1843
1845 try:
1846 return super(Word,self).__str__()
1847 except:
1848 pass
1849
1850
1851 if self.strRepr is None:
1852
1853 def charsAsStr(s):
1854 if len(s)>4:
1855 return s[:4]+"..."
1856 else:
1857 return s
1858
1859 if ( self.initCharsOrig != self.bodyCharsOrig ):
1860 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1861 else:
1862 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1863
1864 return self.strRepr
1865
1866
1867 -class Regex(Token):
1868 """Token for matching strings that match a given regular expression.
1869 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1870 """
1871 compiledREtype = type(re.compile("[A-Z]"))
1872 - def __init__( self, pattern, flags=0):
1873 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1874 super(Regex,self).__init__()
1875
1876 if isinstance(pattern, basestring):
1877 if not pattern:
1878 warnings.warn("null string passed to Regex; use Empty() instead",
1879 SyntaxWarning, stacklevel=2)
1880
1881 self.pattern = pattern
1882 self.flags = flags
1883
1884 try:
1885 self.re = re.compile(self.pattern, self.flags)
1886 self.reString = self.pattern
1887 except sre_constants.error:
1888 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1889 SyntaxWarning, stacklevel=2)
1890 raise
1891
1892 elif isinstance(pattern, Regex.compiledREtype):
1893 self.re = pattern
1894 self.pattern = \
1895 self.reString = str(pattern)
1896 self.flags = flags
1897
1898 else:
1899 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1900
1901 self.name = _ustr(self)
1902 self.errmsg = "Expected " + self.name
1903 self.mayIndexError = False
1904 self.mayReturnEmpty = True
1905
1906 - def parseImpl( self, instring, loc, doActions=True ):
1907 result = self.re.match(instring,loc)
1908 if not result:
1909 raise ParseException(instring, loc, self.errmsg, self)
1910
1911 loc = result.end()
1912 d = result.groupdict()
1913 ret = ParseResults(result.group())
1914 if d:
1915 for k in d:
1916 ret[k] = d[k]
1917 return loc,ret
1918
1920 try:
1921 return super(Regex,self).__str__()
1922 except:
1923 pass
1924
1925 if self.strRepr is None:
1926 self.strRepr = "Re:(%s)" % repr(self.pattern)
1927
1928 return self.strRepr
1929
1932 """Token for matching strings that are delimited by quoting characters.
1933 """
1934 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1935 """
1936 Defined with the following parameters:
1937 - quoteChar - string of one or more characters defining the quote delimiting string
1938 - escChar - character to escape quotes, typically backslash (default=None)
1939 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1940 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1941 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1942 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1943 """
1944 super(QuotedString,self).__init__()
1945
1946
1947 quoteChar = quoteChar.strip()
1948 if not quoteChar:
1949 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1950 raise SyntaxError()
1951
1952 if endQuoteChar is None:
1953 endQuoteChar = quoteChar
1954 else:
1955 endQuoteChar = endQuoteChar.strip()
1956 if not endQuoteChar:
1957 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1958 raise SyntaxError()
1959
1960 self.quoteChar = quoteChar
1961 self.quoteCharLen = len(quoteChar)
1962 self.firstQuoteChar = quoteChar[0]
1963 self.endQuoteChar = endQuoteChar
1964 self.endQuoteCharLen = len(endQuoteChar)
1965 self.escChar = escChar
1966 self.escQuote = escQuote
1967 self.unquoteResults = unquoteResults
1968
1969 if multiline:
1970 self.flags = re.MULTILINE | re.DOTALL
1971 self.pattern = r'%s(?:[^%s%s]' % \
1972 ( re.escape(self.quoteChar),
1973 _escapeRegexRangeChars(self.endQuoteChar[0]),
1974 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1975 else:
1976 self.flags = 0
1977 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1978 ( re.escape(self.quoteChar),
1979 _escapeRegexRangeChars(self.endQuoteChar[0]),
1980 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1981 if len(self.endQuoteChar) > 1:
1982 self.pattern += (
1983 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1984 _escapeRegexRangeChars(self.endQuoteChar[i]))
1985 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
1986 )
1987 if escQuote:
1988 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1989 if escChar:
1990 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1991 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1992 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1993
1994 try:
1995 self.re = re.compile(self.pattern, self.flags)
1996 self.reString = self.pattern
1997 except sre_constants.error:
1998 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1999 SyntaxWarning, stacklevel=2)
2000 raise
2001
2002 self.name = _ustr(self)
2003 self.errmsg = "Expected " + self.name
2004 self.mayIndexError = False
2005 self.mayReturnEmpty = True
2006
2007 - def parseImpl( self, instring, loc, doActions=True ):
2008 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2009 if not result:
2010 raise ParseException(instring, loc, self.errmsg, self)
2011
2012 loc = result.end()
2013 ret = result.group()
2014
2015 if self.unquoteResults:
2016
2017
2018 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2019
2020 if isinstance(ret,basestring):
2021
2022 if self.escChar:
2023 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2024
2025
2026 if self.escQuote:
2027 ret = ret.replace(self.escQuote, self.endQuoteChar)
2028
2029 return loc, ret
2030
2032 try:
2033 return super(QuotedString,self).__str__()
2034 except:
2035 pass
2036
2037 if self.strRepr is None:
2038 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2039
2040 return self.strRepr
2041
2044 """Token for matching words composed of characters *not* in a given set.
2045 Defined with string containing all disallowed characters, and an optional
2046 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2047 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2048 are 0, meaning no maximum or exact length restriction.
2049 """
2050 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2051 super(CharsNotIn,self).__init__()
2052 self.skipWhitespace = False
2053 self.notChars = notChars
2054
2055 if min < 1:
2056 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2057
2058 self.minLen = min
2059
2060 if max > 0:
2061 self.maxLen = max
2062 else:
2063 self.maxLen = _MAX_INT
2064
2065 if exact > 0:
2066 self.maxLen = exact
2067 self.minLen = exact
2068
2069 self.name = _ustr(self)
2070 self.errmsg = "Expected " + self.name
2071 self.mayReturnEmpty = ( self.minLen == 0 )
2072 self.mayIndexError = False
2073
2074 - def parseImpl( self, instring, loc, doActions=True ):
2075 if instring[loc] in self.notChars:
2076 raise ParseException(instring, loc, self.errmsg, self)
2077
2078 start = loc
2079 loc += 1
2080 notchars = self.notChars
2081 maxlen = min( start+self.maxLen, len(instring) )
2082 while loc < maxlen and \
2083 (instring[loc] not in notchars):
2084 loc += 1
2085
2086 if loc - start < self.minLen:
2087 raise ParseException(instring, loc, self.errmsg, self)
2088
2089 return loc, instring[start:loc]
2090
2092 try:
2093 return super(CharsNotIn, self).__str__()
2094 except:
2095 pass
2096
2097 if self.strRepr is None:
2098 if len(self.notChars) > 4:
2099 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2100 else:
2101 self.strRepr = "!W:(%s)" % self.notChars
2102
2103 return self.strRepr
2104
2106 """Special matching class for matching whitespace. Normally, whitespace is ignored
2107 by pyparsing grammars. This class is included when some whitespace structures
2108 are significant. Define with a string containing the whitespace characters to be
2109 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2110 as defined for the C{L{Word}} class."""
2111 whiteStrs = {
2112 " " : "<SPC>",
2113 "\t": "<TAB>",
2114 "\n": "<LF>",
2115 "\r": "<CR>",
2116 "\f": "<FF>",
2117 }
2118 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2119 super(White,self).__init__()
2120 self.matchWhite = ws
2121 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2122
2123 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2124 self.mayReturnEmpty = True
2125 self.errmsg = "Expected " + self.name
2126
2127 self.minLen = min
2128
2129 if max > 0:
2130 self.maxLen = max
2131 else:
2132 self.maxLen = _MAX_INT
2133
2134 if exact > 0:
2135 self.maxLen = exact
2136 self.minLen = exact
2137
2138 - def parseImpl( self, instring, loc, doActions=True ):
2139 if not(instring[ loc ] in self.matchWhite):
2140 raise ParseException(instring, loc, self.errmsg, self)
2141 start = loc
2142 loc += 1
2143 maxloc = start + self.maxLen
2144 maxloc = min( maxloc, len(instring) )
2145 while loc < maxloc and instring[loc] in self.matchWhite:
2146 loc += 1
2147
2148 if loc - start < self.minLen:
2149 raise ParseException(instring, loc, self.errmsg, self)
2150
2151 return loc, instring[start:loc]
2152
2156 super(_PositionToken,self).__init__()
2157 self.name=self.__class__.__name__
2158 self.mayReturnEmpty = True
2159 self.mayIndexError = False
2160
2162 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2166
2168 if col(loc,instring) != self.col:
2169 instrlen = len(instring)
2170 if self.ignoreExprs:
2171 loc = self._skipIgnorables( instring, loc )
2172 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2173 loc += 1
2174 return loc
2175
2176 - def parseImpl( self, instring, loc, doActions=True ):
2177 thiscol = col( loc, instring )
2178 if thiscol > self.col:
2179 raise ParseException( instring, loc, "Text not in expected column", self )
2180 newloc = loc + self.col - thiscol
2181 ret = instring[ loc: newloc ]
2182 return newloc, ret
2183
2185 """Matches if current position is at the beginning of a line within the parse string"""
2190
2192 preloc = super(LineStart,self).preParse(instring,loc)
2193 if instring[preloc] == "\n":
2194 loc += 1
2195 return loc
2196
2197 - def parseImpl( self, instring, loc, doActions=True ):
2198 if not( loc==0 or
2199 (loc == self.preParse( instring, 0 )) or
2200 (instring[loc-1] == "\n") ):
2201 raise ParseException(instring, loc, self.errmsg, self)
2202 return loc, []
2203
2205 """Matches if current position is at the end of a line within the parse string"""
2210
2211 - def parseImpl( self, instring, loc, doActions=True ):
2212 if loc<len(instring):
2213 if instring[loc] == "\n":
2214 return loc+1, "\n"
2215 else:
2216 raise ParseException(instring, loc, self.errmsg, self)
2217 elif loc == len(instring):
2218 return loc+1, []
2219 else:
2220 raise ParseException(instring, loc, self.errmsg, self)
2221
2223 """Matches if current position is at the beginning of the parse string"""
2227
2228 - def parseImpl( self, instring, loc, doActions=True ):
2229 if loc != 0:
2230
2231 if loc != self.preParse( instring, 0 ):
2232 raise ParseException(instring, loc, self.errmsg, self)
2233 return loc, []
2234
2236 """Matches if current position is at the end of the parse string"""
2240
2241 - def parseImpl( self, instring, loc, doActions=True ):
2242 if loc < len(instring):
2243 raise ParseException(instring, loc, self.errmsg, self)
2244 elif loc == len(instring):
2245 return loc+1, []
2246 elif loc > len(instring):
2247 return loc, []
2248 else:
2249 raise ParseException(instring, loc, self.errmsg, self)
2250
2252 """Matches if the current position is at the beginning of a Word, and
2253 is not preceded by any character in a given set of C{wordChars}
2254 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2255 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2256 the string being parsed, or at the beginning of a line.
2257 """
2259 super(WordStart,self).__init__()
2260 self.wordChars = set(wordChars)
2261 self.errmsg = "Not at the start of a word"
2262
2263 - def parseImpl(self, instring, loc, doActions=True ):
2264 if loc != 0:
2265 if (instring[loc-1] in self.wordChars or
2266 instring[loc] not in self.wordChars):
2267 raise ParseException(instring, loc, self.errmsg, self)
2268 return loc, []
2269
2271 """Matches if the current position is at the end of a Word, and
2272 is not followed by any character in a given set of C{wordChars}
2273 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2274 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2275 the string being parsed, or at the end of a line.
2276 """
2278 super(WordEnd,self).__init__()
2279 self.wordChars = set(wordChars)
2280 self.skipWhitespace = False
2281 self.errmsg = "Not at the end of a word"
2282
2283 - def parseImpl(self, instring, loc, doActions=True ):
2284 instrlen = len(instring)
2285 if instrlen>0 and loc<instrlen:
2286 if (instring[loc] in self.wordChars or
2287 instring[loc-1] not in self.wordChars):
2288 raise ParseException(instring, loc, self.errmsg, self)
2289 return loc, []
2290
2293 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2294 - def __init__( self, exprs, savelist = False ):
2295 super(ParseExpression,self).__init__(savelist)
2296 if isinstance( exprs, _generatorType ):
2297 exprs = list(exprs)
2298
2299 if isinstance( exprs, basestring ):
2300 self.exprs = [ Literal( exprs ) ]
2301 elif isinstance( exprs, collections.Sequence ):
2302
2303 if all(isinstance(expr, basestring) for expr in exprs):
2304 exprs = map(Literal, exprs)
2305 self.exprs = list(exprs)
2306 else:
2307 try:
2308 self.exprs = list( exprs )
2309 except TypeError:
2310 self.exprs = [ exprs ]
2311 self.callPreparse = False
2312
2314 return self.exprs[i]
2315
2317 self.exprs.append( other )
2318 self.strRepr = None
2319 return self
2320
2322 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2323 all contained expressions."""
2324 self.skipWhitespace = False
2325 self.exprs = [ e.copy() for e in self.exprs ]
2326 for e in self.exprs:
2327 e.leaveWhitespace()
2328 return self
2329
2331 if isinstance( other, Suppress ):
2332 if other not in self.ignoreExprs:
2333 super( ParseExpression, self).ignore( other )
2334 for e in self.exprs:
2335 e.ignore( self.ignoreExprs[-1] )
2336 else:
2337 super( ParseExpression, self).ignore( other )
2338 for e in self.exprs:
2339 e.ignore( self.ignoreExprs[-1] )
2340 return self
2341
2343 try:
2344 return super(ParseExpression,self).__str__()
2345 except:
2346 pass
2347
2348 if self.strRepr is None:
2349 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2350 return self.strRepr
2351
2353 super(ParseExpression,self).streamline()
2354
2355 for e in self.exprs:
2356 e.streamline()
2357
2358
2359
2360
2361 if ( len(self.exprs) == 2 ):
2362 other = self.exprs[0]
2363 if ( isinstance( other, self.__class__ ) and
2364 not(other.parseAction) and
2365 other.resultsName is None and
2366 not other.debug ):
2367 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2368 self.strRepr = None
2369 self.mayReturnEmpty |= other.mayReturnEmpty
2370 self.mayIndexError |= other.mayIndexError
2371
2372 other = self.exprs[-1]
2373 if ( isinstance( other, self.__class__ ) and
2374 not(other.parseAction) and
2375 other.resultsName is None and
2376 not other.debug ):
2377 self.exprs = self.exprs[:-1] + other.exprs[:]
2378 self.strRepr = None
2379 self.mayReturnEmpty |= other.mayReturnEmpty
2380 self.mayIndexError |= other.mayIndexError
2381
2382 self.errmsg = "Expected " + _ustr(self)
2383
2384 return self
2385
2389
2390 - def validate( self, validateTrace=[] ):
2391 tmp = validateTrace[:]+[self]
2392 for e in self.exprs:
2393 e.validate(tmp)
2394 self.checkRecursion( [] )
2395
2400
2401 -class And(ParseExpression):
2402 """Requires all given C{ParseExpression}s to be found in the given order.
2403 Expressions may be separated by whitespace.
2404 May be constructed using the C{'+'} operator.
2405 """
2406
2412
2413 - def __init__( self, exprs, savelist = True ):
2414 super(And,self).__init__(exprs, savelist)
2415 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2416 self.setWhitespaceChars( self.exprs[0].whiteChars )
2417 self.skipWhitespace = self.exprs[0].skipWhitespace
2418 self.callPreparse = True
2419
2420 - def parseImpl( self, instring, loc, doActions=True ):
2421
2422
2423 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2424 errorStop = False
2425 for e in self.exprs[1:]:
2426 if isinstance(e, And._ErrorStop):
2427 errorStop = True
2428 continue
2429 if errorStop:
2430 try:
2431 loc, exprtokens = e._parse( instring, loc, doActions )
2432 except ParseSyntaxException:
2433 raise
2434 except ParseBaseException as pe:
2435 pe.__traceback__ = None
2436 raise ParseSyntaxException(pe)
2437 except IndexError:
2438 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2439 else:
2440 loc, exprtokens = e._parse( instring, loc, doActions )
2441 if exprtokens or exprtokens.haskeys():
2442 resultlist += exprtokens
2443 return loc, resultlist
2444
2446 if isinstance( other, basestring ):
2447 other = Literal( other )
2448 return self.append( other )
2449
2451 subRecCheckList = parseElementList[:] + [ self ]
2452 for e in self.exprs:
2453 e.checkRecursion( subRecCheckList )
2454 if not e.mayReturnEmpty:
2455 break
2456
2458 if hasattr(self,"name"):
2459 return self.name
2460
2461 if self.strRepr is None:
2462 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2463
2464 return self.strRepr
2465
2466
2467 -class Or(ParseExpression):
2468 """Requires that at least one C{ParseExpression} is found.
2469 If two expressions match, the expression that matches the longest string will be used.
2470 May be constructed using the C{'^'} operator.
2471 """
2472 - def __init__( self, exprs, savelist = False ):
2473 super(Or,self).__init__(exprs, savelist)
2474 if self.exprs:
2475 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2476 else:
2477 self.mayReturnEmpty = True
2478
2479 - def parseImpl( self, instring, loc, doActions=True ):
2480 maxExcLoc = -1
2481 maxException = None
2482 matches = []
2483 for e in self.exprs:
2484 try:
2485 loc2 = e.tryParse( instring, loc )
2486 except ParseException as err:
2487 err.__traceback__ = None
2488 if err.loc > maxExcLoc:
2489 maxException = err
2490 maxExcLoc = err.loc
2491 except IndexError:
2492 if len(instring) > maxExcLoc:
2493 maxException = ParseException(instring,len(instring),e.errmsg,self)
2494 maxExcLoc = len(instring)
2495 else:
2496
2497 matches.append((loc2, e))
2498
2499 if matches:
2500 matches.sort(key=lambda x: -x[0])
2501 for _,e in matches:
2502 try:
2503 return e._parse( instring, loc, doActions )
2504 except ParseException as err:
2505 err.__traceback__ = None
2506 if err.loc > maxExcLoc:
2507 maxException = err
2508 maxExcLoc = err.loc
2509
2510 if maxException is not None:
2511 maxException.msg = self.errmsg
2512 raise maxException
2513 else:
2514 raise ParseException(instring, loc, "no defined alternatives to match", self)
2515
2516
2518 if isinstance( other, basestring ):
2519 other = ParserElement.literalStringClass( other )
2520 return self.append( other )
2521
2523 if hasattr(self,"name"):
2524 return self.name
2525
2526 if self.strRepr is None:
2527 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2528
2529 return self.strRepr
2530
2532 subRecCheckList = parseElementList[:] + [ self ]
2533 for e in self.exprs:
2534 e.checkRecursion( subRecCheckList )
2535
2538 """Requires that at least one C{ParseExpression} is found.
2539 If two expressions match, the first one listed is the one that will match.
2540 May be constructed using the C{'|'} operator.
2541 """
2542 - def __init__( self, exprs, savelist = False ):
2543 super(MatchFirst,self).__init__(exprs, savelist)
2544 if self.exprs:
2545 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2546 else:
2547 self.mayReturnEmpty = True
2548
2549 - def parseImpl( self, instring, loc, doActions=True ):
2550 maxExcLoc = -1
2551 maxException = None
2552 for e in self.exprs:
2553 try:
2554 ret = e._parse( instring, loc, doActions )
2555 return ret
2556 except ParseException as err:
2557 if err.loc > maxExcLoc:
2558 maxException = err
2559 maxExcLoc = err.loc
2560 except IndexError:
2561 if len(instring) > maxExcLoc:
2562 maxException = ParseException(instring,len(instring),e.errmsg,self)
2563 maxExcLoc = len(instring)
2564
2565
2566 else:
2567 if maxException is not None:
2568 maxException.msg = self.errmsg
2569 raise maxException
2570 else:
2571 raise ParseException(instring, loc, "no defined alternatives to match", self)
2572
2574 if isinstance( other, basestring ):
2575 other = ParserElement.literalStringClass( other )
2576 return self.append( other )
2577
2579 if hasattr(self,"name"):
2580 return self.name
2581
2582 if self.strRepr is None:
2583 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2584
2585 return self.strRepr
2586
2588 subRecCheckList = parseElementList[:] + [ self ]
2589 for e in self.exprs:
2590 e.checkRecursion( subRecCheckList )
2591
2592
2593 -class Each(ParseExpression):
2594 """Requires all given C{ParseExpression}s to be found, but in any order.
2595 Expressions may be separated by whitespace.
2596 May be constructed using the C{'&'} operator.
2597 """
2598 - def __init__( self, exprs, savelist = True ):
2599 super(Each,self).__init__(exprs, savelist)
2600 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2601 self.skipWhitespace = True
2602 self.initExprGroups = True
2603
2604 - def parseImpl( self, instring, loc, doActions=True ):
2605 if self.initExprGroups:
2606 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
2607 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2608 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
2609 self.optionals = opt1 + opt2
2610 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2611 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2612 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2613 self.required += self.multirequired
2614 self.initExprGroups = False
2615 tmpLoc = loc
2616 tmpReqd = self.required[:]
2617 tmpOpt = self.optionals[:]
2618 matchOrder = []
2619
2620 keepMatching = True
2621 while keepMatching:
2622 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2623 failed = []
2624 for e in tmpExprs:
2625 if e.canParseNext(instring, tmpLoc):
2626 matchOrder.append(self.opt1map.get(id(e),e))
2627 if e in tmpReqd:
2628 tmpReqd.remove(e)
2629 elif e in tmpOpt:
2630 tmpOpt.remove(e)
2631 else:
2632 failed.append(e)
2633 if len(failed) == len(tmpExprs):
2634 keepMatching = False
2635
2636 if tmpReqd:
2637 missing = ", ".join(_ustr(e) for e in tmpReqd)
2638 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2639
2640
2641 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2642
2643 resultlist = []
2644 for e in matchOrder:
2645 loc,results = e._parse(instring,loc,doActions)
2646 resultlist.append(results)
2647
2648 finalResults = ParseResults()
2649 for r in resultlist:
2650 dups = {}
2651 for k in r.keys():
2652 if k in finalResults:
2653 tmp = ParseResults(finalResults[k])
2654 tmp += ParseResults(r[k])
2655 dups[k] = tmp
2656 finalResults += ParseResults(r)
2657 for k,v in dups.items():
2658 finalResults[k] = v
2659 return loc, finalResults
2660
2662 if hasattr(self,"name"):
2663 return self.name
2664
2665 if self.strRepr is None:
2666 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2667
2668 return self.strRepr
2669
2671 subRecCheckList = parseElementList[:] + [ self ]
2672 for e in self.exprs:
2673 e.checkRecursion( subRecCheckList )
2674
2677 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2678 - def __init__( self, expr, savelist=False ):
2679 super(ParseElementEnhance,self).__init__(savelist)
2680 if isinstance( expr, basestring ):
2681 expr = Literal(expr)
2682 self.expr = expr
2683 self.strRepr = None
2684 if expr is not None:
2685 self.mayIndexError = expr.mayIndexError
2686 self.mayReturnEmpty = expr.mayReturnEmpty
2687 self.setWhitespaceChars( expr.whiteChars )
2688 self.skipWhitespace = expr.skipWhitespace
2689 self.saveAsList = expr.saveAsList
2690 self.callPreparse = expr.callPreparse
2691 self.ignoreExprs.extend(expr.ignoreExprs)
2692
2693 - def parseImpl( self, instring, loc, doActions=True ):
2694 if self.expr is not None:
2695 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2696 else:
2697 raise ParseException("",loc,self.errmsg,self)
2698
2700 self.skipWhitespace = False
2701 self.expr = self.expr.copy()
2702 if self.expr is not None:
2703 self.expr.leaveWhitespace()
2704 return self
2705
2707 if isinstance( other, Suppress ):
2708 if other not in self.ignoreExprs:
2709 super( ParseElementEnhance, self).ignore( other )
2710 if self.expr is not None:
2711 self.expr.ignore( self.ignoreExprs[-1] )
2712 else:
2713 super( ParseElementEnhance, self).ignore( other )
2714 if self.expr is not None:
2715 self.expr.ignore( self.ignoreExprs[-1] )
2716 return self
2717
2723
2725 if self in parseElementList:
2726 raise RecursiveGrammarException( parseElementList+[self] )
2727 subRecCheckList = parseElementList[:] + [ self ]
2728 if self.expr is not None:
2729 self.expr.checkRecursion( subRecCheckList )
2730
2731 - def validate( self, validateTrace=[] ):
2732 tmp = validateTrace[:]+[self]
2733 if self.expr is not None:
2734 self.expr.validate(tmp)
2735 self.checkRecursion( [] )
2736
2738 try:
2739 return super(ParseElementEnhance,self).__str__()
2740 except:
2741 pass
2742
2743 if self.strRepr is None and self.expr is not None:
2744 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2745 return self.strRepr
2746
2749 """Lookahead matching of the given parse expression. C{FollowedBy}
2750 does *not* advance the parsing position within the input string, it only
2751 verifies that the specified parse expression matches at the current
2752 position. C{FollowedBy} always returns a null token list."""
2756
2757 - def parseImpl( self, instring, loc, doActions=True ):
2758 self.expr.tryParse( instring, loc )
2759 return loc, []
2760
2761
2762 -class NotAny(ParseElementEnhance):
2763 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2764 does *not* advance the parsing position within the input string, it only
2765 verifies that the specified parse expression does *not* match at the current
2766 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2767 always returns a null token list. May be constructed using the '~' operator."""
2769 super(NotAny,self).__init__(expr)
2770
2771 self.skipWhitespace = False
2772 self.mayReturnEmpty = True
2773 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2774
2775 - def parseImpl( self, instring, loc, doActions=True ):
2779
2781 if hasattr(self,"name"):
2782 return self.name
2783
2784 if self.strRepr is None:
2785 self.strRepr = "~{" + _ustr(self.expr) + "}"
2786
2787 return self.strRepr
2788
2791 """Repetition of one or more of the given expression.
2792
2793 Parameters:
2794 - expr - expression that must match one or more times
2795 - stopOn - (default=None) - expression for a terminating sentinel
2796 (only required if the sentinel would ordinarily match the repetition
2797 expression)
2798 """
2799 - def __init__( self, expr, stopOn=None):
2800 super(OneOrMore, self).__init__(expr)
2801 ender = stopOn
2802 if isinstance(ender, basestring):
2803 ender = Literal(ender)
2804 self.not_ender = ~ender if ender is not None else None
2805
2806 - def parseImpl( self, instring, loc, doActions=True ):
2807 self_expr_parse = self.expr._parse
2808 self_skip_ignorables = self._skipIgnorables
2809 check_ender = self.not_ender is not None
2810 if check_ender:
2811 try_not_ender = self.not_ender.tryParse
2812
2813
2814
2815 if check_ender:
2816 try_not_ender(instring, loc)
2817 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
2818 try:
2819 hasIgnoreExprs = (not not self.ignoreExprs)
2820 while 1:
2821 if check_ender:
2822 try_not_ender(instring, loc)
2823 if hasIgnoreExprs:
2824 preloc = self_skip_ignorables( instring, loc )
2825 else:
2826 preloc = loc
2827 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
2828 if tmptokens or tmptokens.haskeys():
2829 tokens += tmptokens
2830 except (ParseException,IndexError):
2831 pass
2832
2833 return loc, tokens
2834
2836 if hasattr(self,"name"):
2837 return self.name
2838
2839 if self.strRepr is None:
2840 self.strRepr = "{" + _ustr(self.expr) + "}..."
2841
2842 return self.strRepr
2843
2848
2850 """Optional repetition of zero or more of the given expression.
2851
2852 Parameters:
2853 - expr - expression that must match zero or more times
2854 - stopOn - (default=None) - expression for a terminating sentinel
2855 (only required if the sentinel would ordinarily match the repetition
2856 expression)
2857 """
2858 - def __init__( self, expr, stopOn=None):
2861
2862 - def parseImpl( self, instring, loc, doActions=True ):
2867
2869 if hasattr(self,"name"):
2870 return self.name
2871
2872 if self.strRepr is None:
2873 self.strRepr = "[" + _ustr(self.expr) + "]..."
2874
2875 return self.strRepr
2876
2883
2884 _optionalNotMatched = _NullToken()
2886 """Optional matching of the given expression.
2887
2888 Parameters:
2889 - expr - expression that must match zero or more times
2890 - default (optional) - value to be returned if the optional expression
2891 is not found.
2892 """
2894 super(Optional,self).__init__( expr, savelist=False )
2895 self.defaultValue = default
2896 self.mayReturnEmpty = True
2897
2898 - def parseImpl( self, instring, loc, doActions=True ):
2899 try:
2900 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2901 except (ParseException,IndexError):
2902 if self.defaultValue is not _optionalNotMatched:
2903 if self.expr.resultsName:
2904 tokens = ParseResults([ self.defaultValue ])
2905 tokens[self.expr.resultsName] = self.defaultValue
2906 else:
2907 tokens = [ self.defaultValue ]
2908 else:
2909 tokens = []
2910 return loc, tokens
2911
2913 if hasattr(self,"name"):
2914 return self.name
2915
2916 if self.strRepr is None:
2917 self.strRepr = "[" + _ustr(self.expr) + "]"
2918
2919 return self.strRepr
2920
2921 -class SkipTo(ParseElementEnhance):
2922 """Token for skipping over all undefined text until the matched expression is found.
2923
2924 Parameters:
2925 - expr - target expression marking the end of the data to be skipped
2926 - include - (default=False) if True, the target expression is also parsed
2927 (the skipped text and target expression are returned as a 2-element list).
2928 - ignore - (default=None) used to define grammars (typically quoted strings and
2929 comments) that might contain false matches to the target expression
2930 - failOn - (default=None) define expressions that are not allowed to be
2931 included in the skipped test; if found before the target expression is found,
2932 the SkipTo is not a match
2933 """
2934 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2935 super( SkipTo, self ).__init__( other )
2936 self.ignoreExpr = ignore
2937 self.mayReturnEmpty = True
2938 self.mayIndexError = False
2939 self.includeMatch = include
2940 self.asList = False
2941 if failOn is not None and isinstance(failOn, basestring):
2942 self.failOn = Literal(failOn)
2943 else:
2944 self.failOn = failOn
2945 self.errmsg = "No match found for "+_ustr(self.expr)
2946
2947 - def parseImpl( self, instring, loc, doActions=True ):
2948 startloc = loc
2949 instrlen = len(instring)
2950 expr = self.expr
2951 expr_parse = self.expr._parse
2952 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
2953 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
2954
2955 tmploc = loc
2956 while tmploc <= instrlen:
2957 if self_failOn_canParseNext is not None:
2958
2959 if self_failOn.canParseNext(instring, tmploc):
2960 break
2961
2962 if self_ignoreExpr_tryParse is not None:
2963
2964 while 1:
2965 try:
2966 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
2967 except ParseBaseException:
2968 break
2969
2970 try:
2971 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
2972 except (ParseException, IndexError):
2973
2974 tmploc += 1
2975 else:
2976
2977 break
2978
2979 else:
2980
2981 raise ParseException(instring, loc, self.errmsg, self)
2982
2983
2984 loc = tmploc
2985 skiptext = instring[startloc:loc]
2986 skipresult = ParseResults(skiptext)
2987
2988 if self.includeMatch:
2989 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
2990 skipresult += mat
2991
2992 return loc, skipresult
2993
2994 -class Forward(ParseElementEnhance):
2995 """Forward declaration of an expression to be defined later -
2996 used for recursive grammars, such as algebraic infix notation.
2997 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
2998
2999 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3000 Specifically, '|' has a lower precedence than '<<', so that::
3001 fwdExpr << a | b | c
3002 will actually be evaluated as::
3003 (fwdExpr << a) | b | c
3004 thereby leaving b and c out as parseable alternatives. It is recommended that you
3005 explicitly group the values inserted into the C{Forward}::
3006 fwdExpr << (a | b | c)
3007 Converting to use the '<<=' operator instead will avoid this problem.
3008 """
3011
3013 if isinstance( other, basestring ):
3014 other = ParserElement.literalStringClass(other)
3015 self.expr = other
3016 self.strRepr = None
3017 self.mayIndexError = self.expr.mayIndexError
3018 self.mayReturnEmpty = self.expr.mayReturnEmpty
3019 self.setWhitespaceChars( self.expr.whiteChars )
3020 self.skipWhitespace = self.expr.skipWhitespace
3021 self.saveAsList = self.expr.saveAsList
3022 self.ignoreExprs.extend(self.expr.ignoreExprs)
3023 return self
3024
3026 return self << other
3027
3029 self.skipWhitespace = False
3030 return self
3031
3033 if not self.streamlined:
3034 self.streamlined = True
3035 if self.expr is not None:
3036 self.expr.streamline()
3037 return self
3038
3039 - def validate( self, validateTrace=[] ):
3040 if self not in validateTrace:
3041 tmp = validateTrace[:]+[self]
3042 if self.expr is not None:
3043 self.expr.validate(tmp)
3044 self.checkRecursion([])
3045
3047 if hasattr(self,"name"):
3048 return self.name
3049 return self.__class__.__name__ + ": ..."
3050
3051
3052 self._revertClass = self.__class__
3053 self.__class__ = _ForwardNoRecurse
3054 try:
3055 if self.expr is not None:
3056 retString = _ustr(self.expr)
3057 else:
3058 retString = "None"
3059 finally:
3060 self.__class__ = self._revertClass
3061 return self.__class__.__name__ + ": " + retString
3062
3064 if self.expr is not None:
3065 return super(Forward,self).copy()
3066 else:
3067 ret = Forward()
3068 ret <<= self
3069 return ret
3070
3074
3076 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3077 - def __init__( self, expr, savelist=False ):
3080
3082 """Converter to concatenate all matching tokens to a single string.
3083 By default, the matching patterns must also be contiguous in the input string;
3084 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3085 """
3086 - def __init__( self, expr, joinString="", adjacent=True ):
3087 super(Combine,self).__init__( expr )
3088
3089 if adjacent:
3090 self.leaveWhitespace()
3091 self.adjacent = adjacent
3092 self.skipWhitespace = True
3093 self.joinString = joinString
3094 self.callPreparse = True
3095
3102
3103 - def postParse( self, instring, loc, tokenlist ):
3104 retToks = tokenlist.copy()
3105 del retToks[:]
3106 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3107
3108 if self.resultsName and retToks.haskeys():
3109 return [ retToks ]
3110 else:
3111 return retToks
3112
3113 -class Group(TokenConverter):
3114 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3116 super(Group,self).__init__( expr )
3117 self.saveAsList = True
3118
3119 - def postParse( self, instring, loc, tokenlist ):
3120 return [ tokenlist ]
3121
3122 -class Dict(TokenConverter):
3123 """Converter to return a repetitive expression as a list, but also as a dictionary.
3124 Each element can also be referenced using the first token in the expression as its key.
3125 Useful for tabular report scraping when the first column can be used as a item key.
3126 """
3128 super(Dict,self).__init__( expr )
3129 self.saveAsList = True
3130
3131 - def postParse( self, instring, loc, tokenlist ):
3132 for i,tok in enumerate(tokenlist):
3133 if len(tok) == 0:
3134 continue
3135 ikey = tok[0]
3136 if isinstance(ikey,int):
3137 ikey = _ustr(tok[0]).strip()
3138 if len(tok)==1:
3139 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3140 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3141 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3142 else:
3143 dictvalue = tok.copy()
3144 del dictvalue[0]
3145 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3146 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3147 else:
3148 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3149
3150 if self.resultsName:
3151 return [ tokenlist ]
3152 else:
3153 return tokenlist
3154
3157 """Converter for ignoring the results of a parsed expression."""
3158 - def postParse( self, instring, loc, tokenlist ):
3160
3163
3166 """Wrapper for parse actions, to ensure they are only called once."""
3168 self.callable = _trim_arity(methodCall)
3169 self.called = False
3171 if not self.called:
3172 results = self.callable(s,l,t)
3173 self.called = True
3174 return results
3175 raise ParseException(s,l,"")
3178
3180 """Decorator for debugging parse actions."""
3181 f = _trim_arity(f)
3182 def z(*paArgs):
3183 thisFunc = f.func_name
3184 s,l,t = paArgs[-3:]
3185 if len(paArgs)>3:
3186 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3187 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3188 try:
3189 ret = f(*paArgs)
3190 except Exception as exc:
3191 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3192 raise
3193 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3194 return ret
3195 try:
3196 z.__name__ = f.__name__
3197 except AttributeError:
3198 pass
3199 return z
3200
3201
3202
3203
3204 -def delimitedList( expr, delim=",", combine=False ):
3205 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3206 By default, the list elements and delimiters can have intervening whitespace, and
3207 comments, but this can be overridden by passing C{combine=True} in the constructor.
3208 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3209 string, with the delimiters included; otherwise, the matching tokens are returned
3210 as a list of tokens, with the delimiters suppressed.
3211 """
3212 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3213 if combine:
3214 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3215 else:
3216 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3217
3219 """Helper to define a counted list of expressions.
3220 This helper defines a pattern of the form::
3221 integer expr expr expr...
3222 where the leading integer tells how many expr expressions follow.
3223 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3224 """
3225 arrayExpr = Forward()
3226 def countFieldParseAction(s,l,t):
3227 n = t[0]
3228 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3229 return []
3230 if intExpr is None:
3231 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3232 else:
3233 intExpr = intExpr.copy()
3234 intExpr.setName("arrayLen")
3235 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3236 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
3237
3239 ret = []
3240 for i in L:
3241 if isinstance(i,list):
3242 ret.extend(_flatten(i))
3243 else:
3244 ret.append(i)
3245 return ret
3246
3248 """Helper to define an expression that is indirectly defined from
3249 the tokens matched in a previous expression, that is, it looks
3250 for a 'repeat' of a previous expression. For example::
3251 first = Word(nums)
3252 second = matchPreviousLiteral(first)
3253 matchExpr = first + ":" + second
3254 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3255 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3256 If this is not desired, use C{matchPreviousExpr}.
3257 Do *not* use with packrat parsing enabled.
3258 """
3259 rep = Forward()
3260 def copyTokenToRepeater(s,l,t):
3261 if t:
3262 if len(t) == 1:
3263 rep << t[0]
3264 else:
3265
3266 tflat = _flatten(t.asList())
3267 rep << And(Literal(tt) for tt in tflat)
3268 else:
3269 rep << Empty()
3270 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3271 rep.setName('(prev) ' + _ustr(expr))
3272 return rep
3273
3275 """Helper to define an expression that is indirectly defined from
3276 the tokens matched in a previous expression, that is, it looks
3277 for a 'repeat' of a previous expression. For example::
3278 first = Word(nums)
3279 second = matchPreviousExpr(first)
3280 matchExpr = first + ":" + second
3281 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3282 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3283 the expressions are evaluated first, and then compared, so
3284 C{"1"} is compared with C{"10"}.
3285 Do *not* use with packrat parsing enabled.
3286 """
3287 rep = Forward()
3288 e2 = expr.copy()
3289 rep <<= e2
3290 def copyTokenToRepeater(s,l,t):
3291 matchTokens = _flatten(t.asList())
3292 def mustMatchTheseTokens(s,l,t):
3293 theseTokens = _flatten(t.asList())
3294 if theseTokens != matchTokens:
3295 raise ParseException("",0,"")
3296 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3297 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3298 rep.setName('(prev) ' + _ustr(expr))
3299 return rep
3300
3302
3303 for c in r"\^-]":
3304 s = s.replace(c,_bslash+c)
3305 s = s.replace("\n",r"\n")
3306 s = s.replace("\t",r"\t")
3307 return _ustr(s)
3308
3309 -def oneOf( strs, caseless=False, useRegex=True ):
3310 """Helper to quickly define a set of alternative Literals, and makes sure to do
3311 longest-first testing when there is a conflict, regardless of the input order,
3312 but returns a C{L{MatchFirst}} for best performance.
3313
3314 Parameters:
3315 - strs - a string of space-delimited literals, or a list of string literals
3316 - caseless - (default=False) - treat all literals as caseless
3317 - useRegex - (default=True) - as an optimization, will generate a Regex
3318 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3319 if creating a C{Regex} raises an exception)
3320 """
3321 if caseless:
3322 isequal = ( lambda a,b: a.upper() == b.upper() )
3323 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3324 parseElementClass = CaselessLiteral
3325 else:
3326 isequal = ( lambda a,b: a == b )
3327 masks = ( lambda a,b: b.startswith(a) )
3328 parseElementClass = Literal
3329
3330 symbols = []
3331 if isinstance(strs,basestring):
3332 symbols = strs.split()
3333 elif isinstance(strs, collections.Sequence):
3334 symbols = list(strs[:])
3335 elif isinstance(strs, _generatorType):
3336 symbols = list(strs)
3337 else:
3338 warnings.warn("Invalid argument to oneOf, expected string or list",
3339 SyntaxWarning, stacklevel=2)
3340 if not symbols:
3341 return NoMatch()
3342
3343 i = 0
3344 while i < len(symbols)-1:
3345 cur = symbols[i]
3346 for j,other in enumerate(symbols[i+1:]):
3347 if ( isequal(other, cur) ):
3348 del symbols[i+j+1]
3349 break
3350 elif ( masks(cur, other) ):
3351 del symbols[i+j+1]
3352 symbols.insert(i,other)
3353 cur = other
3354 break
3355 else:
3356 i += 1
3357
3358 if not caseless and useRegex:
3359
3360 try:
3361 if len(symbols)==len("".join(symbols)):
3362 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
3363 else:
3364 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
3365 except:
3366 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3367 SyntaxWarning, stacklevel=2)
3368
3369
3370
3371 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3372
3374 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3375 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3376 in the proper order. The key pattern can include delimiting markers or punctuation,
3377 as long as they are suppressed, thereby leaving the significant key text. The value
3378 pattern can include named results, so that the C{Dict} results can include named token
3379 fields.
3380 """
3381 return Dict( ZeroOrMore( Group ( key + value ) ) )
3382
3383 -def originalTextFor(expr, asString=True):
3384 """Helper to return the original, untokenized text for a given expression. Useful to
3385 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3386 revert separate tokens with intervening whitespace back to the original matching
3387 input text. By default, returns astring containing the original parsed text.
3388
3389 If the optional C{asString} argument is passed as C{False}, then the return value is a
3390 C{L{ParseResults}} containing any results names that were originally matched, and a
3391 single token containing the original matched text from the input string. So if
3392 the expression passed to C{L{originalTextFor}} contains expressions with defined
3393 results names, you must set C{asString} to C{False} if you want to preserve those
3394 results name values."""
3395 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3396 endlocMarker = locMarker.copy()
3397 endlocMarker.callPreparse = False
3398 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3399 if asString:
3400 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3401 else:
3402 def extractText(s,l,t):
3403 del t[:]
3404 t.insert(0, s[t._original_start:t._original_end])
3405 del t["_original_start"]
3406 del t["_original_end"]
3407 matchExpr.setParseAction(extractText)
3408 return matchExpr
3409
3411 """Helper to undo pyparsing's default grouping of And expressions, even
3412 if all but one are non-empty."""
3413 return TokenConverter(expr).setParseAction(lambda t:t[0])
3414
3416 """Helper to decorate a returned token with its starting and ending locations in the input string.
3417 This helper adds the following results names:
3418 - locn_start = location where matched expression begins
3419 - locn_end = location where matched expression ends
3420 - value = the actual parsed results
3421
3422 Be careful if the input text contains C{<TAB>} characters, you may want to call
3423 C{L{ParserElement.parseWithTabs}}
3424 """
3425 locator = Empty().setParseAction(lambda s,l,t: l)
3426 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3427
3428
3429
3430 empty = Empty().setName("empty")
3431 lineStart = LineStart().setName("lineStart")
3432 lineEnd = LineEnd().setName("lineEnd")
3433 stringStart = StringStart().setName("stringStart")
3434 stringEnd = StringEnd().setName("stringEnd")
3435
3436 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3437 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3438 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3439 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
3440 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3441 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3444 r"""Helper to easily define string ranges for use in Word construction. Borrows
3445 syntax from regexp '[]' string range definitions::
3446 srange("[0-9]") -> "0123456789"
3447 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3448 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3449 The input string must be enclosed in []'s, and the returned string is the expanded
3450 character set joined into a single string.
3451 The values enclosed in the []'s may be::
3452 a single character
3453 an escaped character with a leading backslash (such as \- or \])
3454 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3455 (\0x## is also supported for backwards compatibility)
3456 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3457 a range of any of the above, separated by a dash ('a-z', etc.)
3458 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3459 """
3460 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3461 try:
3462 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3463 except:
3464 return ""
3465
3467 """Helper method for defining parse actions that require matching at a specific
3468 column in the input text.
3469 """
3470 def verifyCol(strg,locn,toks):
3471 if col(locn,strg) != n:
3472 raise ParseException(strg,locn,"matched token not at column %d" % n)
3473 return verifyCol
3474
3476 """Helper method for common parse actions that simply return a literal value. Especially
3477 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3478 """
3479
3480
3481
3482 return functools.partial(next, itertools.repeat([replStr]))
3483
3485 """Helper parse action for removing quotation marks from parsed quoted strings.
3486 To use, add this parse action to quoted string using::
3487 quotedString.setParseAction( removeQuotes )
3488 """
3489 return t[0][1:-1]
3490
3492 """Helper parse action to convert tokens to upper case."""
3493 return [ tt.upper() for tt in map(_ustr,t) ]
3494
3496 """Helper parse action to convert tokens to lower case."""
3497 return [ tt.lower() for tt in map(_ustr,t) ]
3498
3500 """Method to be called from within a parse action to determine the end
3501 location of the parsed tokens."""
3502 import inspect
3503 fstack = inspect.stack()
3504 try:
3505
3506 for f in fstack[2:]:
3507 if f[3] == "_parseNoCache":
3508 endloc = f[0].f_locals["loc"]
3509 return endloc
3510 else:
3511 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3512 finally:
3513 del fstack
3514
3543
3547
3551
3553 """Helper to create a validating parse action to be used with start tags created
3554 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3555 with a required attribute value, to avoid false matches on common tags such as
3556 C{<TD>} or C{<DIV>}.
3557
3558 Call C{withAttribute} with a series of attribute names and values. Specify the list
3559 of filter attributes names and values as:
3560 - keyword arguments, as in C{(align="right")}, or
3561 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3562 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3563 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3564 For attribute names with a namespace prefix, you must use the second form. Attribute
3565 names are matched insensitive to upper/lower case.
3566
3567 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
3568
3569 To verify that the attribute exists, but without specifying a value, pass
3570 C{withAttribute.ANY_VALUE} as the value.
3571 """
3572 if args:
3573 attrs = args[:]
3574 else:
3575 attrs = attrDict.items()
3576 attrs = [(k,v) for k,v in attrs]
3577 def pa(s,l,tokens):
3578 for attrName,attrValue in attrs:
3579 if attrName not in tokens:
3580 raise ParseException(s,l,"no matching attribute " + attrName)
3581 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3582 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3583 (attrName, tokens[attrName], attrValue))
3584 return pa
3585 withAttribute.ANY_VALUE = object()
3586
3587 -def withClass(classname, namespace=''):
3588 """Simplified version of C{L{withAttribute}} when matching on a div class - made
3589 difficult because C{class} is a reserved word in Python.
3590 """
3591 classattr = "%s:class" % namespace if namespace else "class"
3592 return withAttribute(**{classattr : classname})
3593
3594 opAssoc = _Constants()
3595 opAssoc.LEFT = object()
3596 opAssoc.RIGHT = object()
3599 """Helper method for constructing grammars of expressions made up of
3600 operators working in a precedence hierarchy. Operators may be unary or
3601 binary, left- or right-associative. Parse actions can also be attached
3602 to operator expressions.
3603
3604 Parameters:
3605 - baseExpr - expression representing the most basic element for the nested
3606 - opList - list of tuples, one for each operator precedence level in the
3607 expression grammar; each tuple is of the form
3608 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3609 - opExpr is the pyparsing expression for the operator;
3610 may also be a string, which will be converted to a Literal;
3611 if numTerms is 3, opExpr is a tuple of two expressions, for the
3612 two operators separating the 3 terms
3613 - numTerms is the number of terms for this operator (must
3614 be 1, 2, or 3)
3615 - rightLeftAssoc is the indicator whether the operator is
3616 right or left associative, using the pyparsing-defined
3617 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3618 - parseAction is the parse action to be associated with
3619 expressions matching this operator expression (the
3620 parse action tuple member may be omitted)
3621 - lpar - expression for matching left-parentheses (default=Suppress('('))
3622 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3623 """
3624 ret = Forward()
3625 lastExpr = baseExpr | ( lpar + ret + rpar )
3626 for i,operDef in enumerate(opList):
3627 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3628 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
3629 if arity == 3:
3630 if opExpr is None or len(opExpr) != 2:
3631 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3632 opExpr1, opExpr2 = opExpr
3633 thisExpr = Forward().setName(termName)
3634 if rightLeftAssoc == opAssoc.LEFT:
3635 if arity == 1:
3636 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3637 elif arity == 2:
3638 if opExpr is not None:
3639 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3640 else:
3641 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3642 elif arity == 3:
3643 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3644 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3645 else:
3646 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3647 elif rightLeftAssoc == opAssoc.RIGHT:
3648 if arity == 1:
3649
3650 if not isinstance(opExpr, Optional):
3651 opExpr = Optional(opExpr)
3652 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3653 elif arity == 2:
3654 if opExpr is not None:
3655 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3656 else:
3657 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3658 elif arity == 3:
3659 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3660 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3661 else:
3662 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3663 else:
3664 raise ValueError("operator must indicate right or left associativity")
3665 if pa:
3666 matchExpr.setParseAction( pa )
3667 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
3668 lastExpr = thisExpr
3669 ret <<= lastExpr
3670 return ret
3671 operatorPrecedence = infixNotation
3672
3673 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3674 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3675 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3676 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3679 """Helper method for defining nested lists enclosed in opening and closing
3680 delimiters ("(" and ")" are the default).
3681
3682 Parameters:
3683 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3684 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3685 - content - expression for items within the nested lists (default=None)
3686 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3687
3688 If an expression is not provided for the content argument, the nested
3689 expression will capture all whitespace-delimited content between delimiters
3690 as a list of separate values.
3691
3692 Use the C{ignoreExpr} argument to define expressions that may contain
3693 opening or closing characters that should not be treated as opening
3694 or closing characters for nesting, such as quotedString or a comment
3695 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3696 The default is L{quotedString}, but if no expressions are to be ignored,
3697 then pass C{None} for this argument.
3698 """
3699 if opener == closer:
3700 raise ValueError("opening and closing strings cannot be the same")
3701 if content is None:
3702 if isinstance(opener,basestring) and isinstance(closer,basestring):
3703 if len(opener) == 1 and len(closer)==1:
3704 if ignoreExpr is not None:
3705 content = (Combine(OneOrMore(~ignoreExpr +
3706 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3707 ).setParseAction(lambda t:t[0].strip()))
3708 else:
3709 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3710 ).setParseAction(lambda t:t[0].strip()))
3711 else:
3712 if ignoreExpr is not None:
3713 content = (Combine(OneOrMore(~ignoreExpr +
3714 ~Literal(opener) + ~Literal(closer) +
3715 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3716 ).setParseAction(lambda t:t[0].strip()))
3717 else:
3718 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3719 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3720 ).setParseAction(lambda t:t[0].strip()))
3721 else:
3722 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3723 ret = Forward()
3724 if ignoreExpr is not None:
3725 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3726 else:
3727 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3728 ret.setName('nested %s%s expression' % (opener,closer))
3729 return ret
3730
3731 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3732 """Helper method for defining space-delimited indentation blocks, such as
3733 those used to define block statements in Python source code.
3734
3735 Parameters:
3736 - blockStatementExpr - expression defining syntax of statement that
3737 is repeated within the indented block
3738 - indentStack - list created by caller to manage indentation stack
3739 (multiple statementWithIndentedBlock expressions within a single grammar
3740 should share a common indentStack)
3741 - indent - boolean indicating whether block must be indented beyond the
3742 the current level; set to False for block of left-most statements
3743 (default=True)
3744
3745 A valid block must contain at least one C{blockStatement}.
3746 """
3747 def checkPeerIndent(s,l,t):
3748 if l >= len(s): return
3749 curCol = col(l,s)
3750 if curCol != indentStack[-1]:
3751 if curCol > indentStack[-1]:
3752 raise ParseFatalException(s,l,"illegal nesting")
3753 raise ParseException(s,l,"not a peer entry")
3754
3755 def checkSubIndent(s,l,t):
3756 curCol = col(l,s)
3757 if curCol > indentStack[-1]:
3758 indentStack.append( curCol )
3759 else:
3760 raise ParseException(s,l,"not a subentry")
3761
3762 def checkUnindent(s,l,t):
3763 if l >= len(s): return
3764 curCol = col(l,s)
3765 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3766 raise ParseException(s,l,"not an unindent")
3767 indentStack.pop()
3768
3769 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3770 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
3771 PEER = Empty().setParseAction(checkPeerIndent).setName('')
3772 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
3773 if indent:
3774 smExpr = Group( Optional(NL) +
3775
3776 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3777 else:
3778 smExpr = Group( Optional(NL) +
3779 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3780 blockStatementExpr.ignore(_bslash + LineEnd())
3781 return smExpr.setName('indented block')
3782
3783 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3784 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3785
3786 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
3787 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
3788 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3790 """Helper parser action to replace common HTML entities with their special characters"""
3791 return _htmlEntityMap.get(t.entity)
3792
3793
3794 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3795
3796 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
3797 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
3798 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3799 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3800
3801 javaStyleComment = cppStyleComment
3802 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3803 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3804 Optional( Word(" \t") +
3805 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3806 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3807
3808
3809 if __name__ == "__main__":
3810
3811 selectToken = CaselessLiteral( "select" )
3812 fromToken = CaselessLiteral( "from" )
3813
3814 ident = Word( alphas, alphanums + "_$" )
3815 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3816 columnNameList = Group( delimitedList( columnName ) ).setName("columns")
3817 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3818 tableNameList = Group( delimitedList( tableName ) ).setName("tables")
3819 simpleSQL = ( selectToken + \
3820 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3821 fromToken + \
3822 tableNameList.setResultsName( "tables" ) )
3823
3824 simpleSQL.runTests("""\
3825 SELECT * from XYZZY, ABC
3826 select * from SYS.XYZZY
3827 Select A from Sys.dual
3828 Select AA,BB,CC from Sys.dual
3829 Select A, B, C from Sys.dual
3830 Select A, B, C from Sys.dual
3831 Xelect A, B, C from Sys.dual
3832 Select A, B, C frox Sys.dual
3833 Select
3834 Select ^^^ frox Sys.dual
3835 Select A, B, C from Sys.dual, Table2""")
3836