1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.4"
61 __versionTime__ = "13 May 2016 18:25 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import functools
74 import itertools
75 import traceback
76
77
78
79 __all__ = [
80 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
81 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
82 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
83 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
84 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
85 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
86 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
87 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
88 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
89 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
90 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
91 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
92 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
93 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
94 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
95 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
96 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
97 'pyparsing_common',
98 ]
99
100 system_version = tuple(sys.version_info)[:3]
101 PY_3 = system_version[0] == 3
102 if PY_3:
103 _MAX_INT = sys.maxsize
104 basestring = str
105 unichr = chr
106 _ustr = str
107
108
109 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
110
111 else:
112 _MAX_INT = sys.maxint
113 range = xrange
116 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
117 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
118 then < returns the unicode object | encodes it with the default encoding | ... >.
119 """
120 if isinstance(obj,unicode):
121 return obj
122
123 try:
124
125
126 return str(obj)
127
128 except UnicodeEncodeError:
129
130 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
131 xmlcharref = Regex('&#\d+;')
132 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
133 return xmlcharref.transformString(ret)
134
135
136 singleArgBuiltins = []
137 import __builtin__
138 for fname in "sum len sorted reversed list tuple set any all min max".split():
139 try:
140 singleArgBuiltins.append(getattr(__builtin__,fname))
141 except AttributeError:
142 continue
143
144 _generatorType = type((y for y in range(1)))
147 """Escape &, <, >, ", ', etc. in a string of data."""
148
149
150 from_symbols = '&><"\''
151 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
152 for from_,to_ in zip(from_symbols, to_symbols):
153 data = data.replace(from_, to_)
154 return data
155
158
159 alphas = string.ascii_uppercase + string.ascii_lowercase
160 nums = "0123456789"
161 hexnums = nums + "ABCDEFabcdef"
162 alphanums = alphas + nums
163 _bslash = chr(92)
164 printables = "".join(c for c in string.printable if c not in string.whitespace)
167 """base exception class for all parsing runtime exceptions"""
168
169
170 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
171 self.loc = loc
172 if msg is None:
173 self.msg = pstr
174 self.pstr = ""
175 else:
176 self.msg = msg
177 self.pstr = pstr
178 self.parserElement = elem
179
181 """supported attributes by name are:
182 - lineno - returns the line number of the exception text
183 - col - returns the column number of the exception text
184 - line - returns the line containing the exception text
185 """
186 if( aname == "lineno" ):
187 return lineno( self.loc, self.pstr )
188 elif( aname in ("col", "column") ):
189 return col( self.loc, self.pstr )
190 elif( aname == "line" ):
191 return line( self.loc, self.pstr )
192 else:
193 raise AttributeError(aname)
194
196 return "%s (at char %d), (line:%d, col:%d)" % \
197 ( self.msg, self.loc, self.lineno, self.column )
211 return "lineno col line".split() + dir(type(self))
212
214 """exception thrown when parse expressions don't match class;
215 supported attributes by name are:
216 - lineno - returns the line number of the exception text
217 - col - returns the column number of the exception text
218 - line - returns the line containing the exception text
219 """
220 pass
221
223 """user-throwable exception thrown when inconsistent parse content
224 is found; stops all parsing immediately"""
225 pass
226
228 """just like C{L{ParseFatalException}}, but thrown internally when an
229 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
230 an unbacktrackable syntax error has been found"""
234
249 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
250 - def __init__( self, parseElementList ):
251 self.parseElementTrace = parseElementList
252
254 return "RecursiveGrammarException: %s" % self.parseElementTrace
255
262 return repr(self.tup)
264 self.tup = (self.tup[0],i)
265
267 """Structured parse results, to provide multiple means of access to the parsed data:
268 - as a list (C{len(results)})
269 - by list index (C{results[0], results[1]}, etc.)
270 - by attribute (C{results.<resultsName>})
271 """
272 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
273 if isinstance(toklist, cls):
274 return toklist
275 retobj = object.__new__(cls)
276 retobj.__doinit = True
277 return retobj
278
279
280
281 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
282 if self.__doinit:
283 self.__doinit = False
284 self.__name = None
285 self.__parent = None
286 self.__accumNames = {}
287 self.__asList = asList
288 self.__modal = modal
289 if toklist is None:
290 toklist = []
291 if isinstance(toklist, list):
292 self.__toklist = toklist[:]
293 elif isinstance(toklist, _generatorType):
294 self.__toklist = list(toklist)
295 else:
296 self.__toklist = [toklist]
297 self.__tokdict = dict()
298
299 if name is not None and name:
300 if not modal:
301 self.__accumNames[name] = 0
302 if isinstance(name,int):
303 name = _ustr(name)
304 self.__name = name
305 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
306 if isinstance(toklist,basestring):
307 toklist = [ toklist ]
308 if asList:
309 if isinstance(toklist,ParseResults):
310 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
311 else:
312 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
313 self[name].__name = name
314 else:
315 try:
316 self[name] = toklist[0]
317 except (KeyError,TypeError,IndexError):
318 self[name] = toklist
319
321 if isinstance( i, (int,slice) ):
322 return self.__toklist[i]
323 else:
324 if i not in self.__accumNames:
325 return self.__tokdict[i][-1][0]
326 else:
327 return ParseResults([ v[0] for v in self.__tokdict[i] ])
328
330 if isinstance(v,_ParseResultsWithOffset):
331 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
332 sub = v[0]
333 elif isinstance(k,(int,slice)):
334 self.__toklist[k] = v
335 sub = v
336 else:
337 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
338 sub = v
339 if isinstance(sub,ParseResults):
340 sub.__parent = wkref(self)
341
343 if isinstance(i,(int,slice)):
344 mylen = len( self.__toklist )
345 del self.__toklist[i]
346
347
348 if isinstance(i, int):
349 if i < 0:
350 i += mylen
351 i = slice(i, i+1)
352
353 removed = list(range(*i.indices(mylen)))
354 removed.reverse()
355
356
357
358
359
360
361 for name,occurrences in self.__tokdict.items():
362 for j in removed:
363 for k, (value, position) in enumerate(occurrences):
364 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
365 else:
366 del self.__tokdict[i]
367
369 return k in self.__tokdict
370
371 - def __len__( self ): return len( self.__toklist )
372 - def __bool__(self): return ( not not self.__toklist )
373 __nonzero__ = __bool__
374 - def __iter__( self ): return iter( self.__toklist )
375 - def __reversed__( self ): return iter( self.__toklist[::-1] )
377 """Returns all named result keys."""
378 if hasattr(self.__tokdict, "iterkeys"):
379 return self.__tokdict.iterkeys()
380 else:
381 return iter(self.__tokdict)
382
384 """Returns all named result values."""
385 return (self[k] for k in self.iterkeys())
386
388 return ((k, self[k]) for k in self.iterkeys())
389
390 if PY_3:
391 keys = iterkeys
392 values = itervalues
393 items = iteritems
394 else:
396 """Returns all named result keys."""
397 return list(self.iterkeys())
398
400 """Returns all named result values."""
401 return list(self.itervalues())
402
404 """Returns all named result keys and values as a list of tuples."""
405 return list(self.iteritems())
406
408 """Since keys() returns an iterator, this method is helpful in bypassing
409 code that looks for the existence of any defined results names."""
410 return bool(self.__tokdict)
411
412 - def pop( self, *args, **kwargs):
413 """Removes and returns item at specified index (default=last).
414 Supports both list and dict semantics for pop(). If passed no
415 argument or an integer argument, it will use list semantics
416 and pop tokens from the list of parsed tokens. If passed a
417 non-integer argument (most likely a string), it will use dict
418 semantics and pop the corresponding value from any defined
419 results names. A second default return value argument is
420 supported, just as in dict.pop()."""
421 if not args:
422 args = [-1]
423 for k,v in kwargs.items():
424 if k == 'default':
425 args = (args[0], v)
426 else:
427 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
428 if (isinstance(args[0], int) or
429 len(args) == 1 or
430 args[0] in self):
431 index = args[0]
432 ret = self[index]
433 del self[index]
434 return ret
435 else:
436 defaultvalue = args[1]
437 return defaultvalue
438
439 - def get(self, key, defaultValue=None):
440 """Returns named result matching the given key, or if there is no
441 such name, then returns the given C{defaultValue} or C{None} if no
442 C{defaultValue} is specified."""
443 if key in self:
444 return self[key]
445 else:
446 return defaultValue
447
448 - def insert( self, index, insStr ):
449 """Inserts new element at location index in the list of parsed tokens."""
450 self.__toklist.insert(index, insStr)
451
452
453
454
455
456 for name,occurrences in self.__tokdict.items():
457 for k, (value, position) in enumerate(occurrences):
458 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
459
461 """Add single element to end of ParseResults list of elements."""
462 self.__toklist.append(item)
463
465 """Add sequence of elements to end of ParseResults list of elements."""
466 if isinstance(itemseq, ParseResults):
467 self += itemseq
468 else:
469 self.__toklist.extend(itemseq)
470
472 """Clear all elements and results names."""
473 del self.__toklist[:]
474 self.__tokdict.clear()
475
477 try:
478 return self[name]
479 except KeyError:
480 return ""
481
482 if name in self.__tokdict:
483 if name not in self.__accumNames:
484 return self.__tokdict[name][-1][0]
485 else:
486 return ParseResults([ v[0] for v in self.__tokdict[name] ])
487 else:
488 return ""
489
491 ret = self.copy()
492 ret += other
493 return ret
494
496 if other.__tokdict:
497 offset = len(self.__toklist)
498 addoffset = lambda a: offset if a<0 else a+offset
499 otheritems = other.__tokdict.items()
500 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
501 for (k,vlist) in otheritems for v in vlist]
502 for k,v in otherdictitems:
503 self[k] = v
504 if isinstance(v[0],ParseResults):
505 v[0].__parent = wkref(self)
506
507 self.__toklist += other.__toklist
508 self.__accumNames.update( other.__accumNames )
509 return self
510
512 if isinstance(other,int) and other == 0:
513
514 return self.copy()
515 else:
516
517 return other + self
518
520 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
521
523 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
524
526 out = []
527 for item in self.__toklist:
528 if out and sep:
529 out.append(sep)
530 if isinstance( item, ParseResults ):
531 out += item._asStringList()
532 else:
533 out.append( _ustr(item) )
534 return out
535
537 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
538 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
539
541 """Returns the named parse results as a nested dictionary."""
542 if PY_3:
543 item_fn = self.items
544 else:
545 item_fn = self.iteritems
546
547 def toItem(obj):
548 if isinstance(obj, ParseResults):
549 if obj.haskeys():
550 return obj.asDict()
551 else:
552 return [toItem(v) for v in obj]
553 else:
554 return obj
555
556 return dict((k,toItem(v)) for k,v in item_fn())
557
559 """Returns a new copy of a C{ParseResults} object."""
560 ret = ParseResults( self.__toklist )
561 ret.__tokdict = self.__tokdict.copy()
562 ret.__parent = self.__parent
563 ret.__accumNames.update( self.__accumNames )
564 ret.__name = self.__name
565 return ret
566
567 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
568 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
569 nl = "\n"
570 out = []
571 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
572 for v in vlist)
573 nextLevelIndent = indent + " "
574
575
576 if not formatted:
577 indent = ""
578 nextLevelIndent = ""
579 nl = ""
580
581 selfTag = None
582 if doctag is not None:
583 selfTag = doctag
584 else:
585 if self.__name:
586 selfTag = self.__name
587
588 if not selfTag:
589 if namedItemsOnly:
590 return ""
591 else:
592 selfTag = "ITEM"
593
594 out += [ nl, indent, "<", selfTag, ">" ]
595
596 for i,res in enumerate(self.__toklist):
597 if isinstance(res,ParseResults):
598 if i in namedItems:
599 out += [ res.asXML(namedItems[i],
600 namedItemsOnly and doctag is None,
601 nextLevelIndent,
602 formatted)]
603 else:
604 out += [ res.asXML(None,
605 namedItemsOnly and doctag is None,
606 nextLevelIndent,
607 formatted)]
608 else:
609
610 resTag = None
611 if i in namedItems:
612 resTag = namedItems[i]
613 if not resTag:
614 if namedItemsOnly:
615 continue
616 else:
617 resTag = "ITEM"
618 xmlBodyText = _xml_escape(_ustr(res))
619 out += [ nl, nextLevelIndent, "<", resTag, ">",
620 xmlBodyText,
621 "</", resTag, ">" ]
622
623 out += [ nl, indent, "</", selfTag, ">" ]
624 return "".join(out)
625
627 for k,vlist in self.__tokdict.items():
628 for v,loc in vlist:
629 if sub is v:
630 return k
631 return None
632
634 """Returns the results name for this token expression."""
635 if self.__name:
636 return self.__name
637 elif self.__parent:
638 par = self.__parent()
639 if par:
640 return par.__lookup(self)
641 else:
642 return None
643 elif (len(self) == 1 and
644 len(self.__tokdict) == 1 and
645 self.__tokdict.values()[0][0][1] in (0,-1)):
646 return self.__tokdict.keys()[0]
647 else:
648 return None
649
650 - def dump(self,indent='',depth=0):
651 """Diagnostic method for listing out the contents of a C{ParseResults}.
652 Accepts an optional C{indent} argument so that this string can be embedded
653 in a nested display of other data."""
654 out = []
655 NL = '\n'
656 out.append( indent+_ustr(self.asList()) )
657 if self.haskeys():
658 items = sorted(self.items())
659 for k,v in items:
660 if out:
661 out.append(NL)
662 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
663 if isinstance(v,ParseResults):
664 if v:
665 out.append( v.dump(indent,depth+1) )
666 else:
667 out.append(_ustr(v))
668 else:
669 out.append(_ustr(v))
670 elif any(isinstance(vv,ParseResults) for vv in self):
671 v = self
672 for i,vv in enumerate(v):
673 if isinstance(vv,ParseResults):
674 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
675 else:
676 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
677
678 return "".join(out)
679
680 - def pprint(self, *args, **kwargs):
681 """Pretty-printer for parsed results as a list, using the C{pprint} module.
682 Accepts additional positional or keyword args as defined for the
683 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})"""
684 pprint.pprint(self.asList(), *args, **kwargs)
685
686
688 return ( self.__toklist,
689 ( self.__tokdict.copy(),
690 self.__parent is not None and self.__parent() or None,
691 self.__accumNames,
692 self.__name ) )
693
695 self.__toklist = state[0]
696 (self.__tokdict,
697 par,
698 inAccumNames,
699 self.__name) = state[1]
700 self.__accumNames = {}
701 self.__accumNames.update(inAccumNames)
702 if par is not None:
703 self.__parent = wkref(par)
704 else:
705 self.__parent = None
706
708 return self.__toklist, self.__name, self.__asList, self.__modal
709
711 return (dir(type(self)) + list(self.keys()))
712
713 collections.MutableMapping.register(ParseResults)
714
715 -def col (loc,strg):
716 """Returns current column within a string, counting newlines as line separators.
717 The first column is number 1.
718
719 Note: the default parsing behavior is to expand tabs in the input string
720 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
721 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
722 consistent view of the parsed string, the parse location, and line and column
723 positions within the parsed string.
724 """
725 s = strg
726 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
727
729 """Returns current line number within a string, counting newlines as line separators.
730 The first line is number 1.
731
732 Note: the default parsing behavior is to expand tabs in the input string
733 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
734 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
735 consistent view of the parsed string, the parse location, and line and column
736 positions within the parsed string.
737 """
738 return strg.count("\n",0,loc) + 1
739
740 -def line( loc, strg ):
741 """Returns the line of text containing loc within a string, counting newlines as line separators.
742 """
743 lastCR = strg.rfind("\n", 0, loc)
744 nextCR = strg.find("\n", loc)
745 if nextCR >= 0:
746 return strg[lastCR+1:nextCR]
747 else:
748 return strg[lastCR+1:]
749
751 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
752
754 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
755
757 print ("Exception raised:" + _ustr(exc))
758
760 """'Do-nothing' debug action, to suppress debugging output during parsing."""
761 pass
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785 'decorator to trim function calls to match the arity of the target'
787 if func in singleArgBuiltins:
788 return lambda s,l,t: func(t)
789 limit = [0]
790 foundArity = [False]
791
792
793 if system_version[:2] >= (3,5):
794 def extract_stack():
795
796 offset = -3 if system_version == (3,5,0) else -2
797 frame_summary = traceback.extract_stack()[offset]
798 return [(frame_summary.filename, frame_summary.lineno)]
799 def extract_tb(tb):
800 frames = traceback.extract_tb(tb)
801 frame_summary = frames[-1]
802 return [(frame_summary.filename, frame_summary.lineno)]
803 else:
804 extract_stack = traceback.extract_stack
805 extract_tb = traceback.extract_tb
806
807
808
809
810 LINE_DIFF = 6
811
812
813 this_line = extract_stack()[-1]
814 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
815
816 def wrapper(*args):
817 while 1:
818 try:
819 ret = func(*args[limit[0]:])
820 foundArity[0] = True
821 return ret
822 except TypeError:
823
824 if foundArity[0]:
825 raise
826 else:
827 try:
828 tb = sys.exc_info()[-1]
829 if not extract_tb(tb)[-1][:2] == pa_call_line_synth:
830 raise
831 finally:
832 del tb
833
834 if limit[0] <= maxargs:
835 limit[0] += 1
836 continue
837 raise
838
839
840 func_name = "<parse action>"
841 try:
842 func_name = getattr(func, '__name__',
843 getattr(func, '__class__').__name__)
844 except Exception:
845 func_name = str(func)
846 wrapper.__name__ = func_name
847
848 return wrapper
849
851 """Abstract base level parser element class."""
852 DEFAULT_WHITE_CHARS = " \n\t\r"
853 verbose_stacktrace = False
854
855 @staticmethod
860
861 @staticmethod
863 """
864 Set class to be used for inclusion of string literals into a parser.
865 """
866 ParserElement.literalStringClass = cls
867
869 self.parseAction = list()
870 self.failAction = None
871
872 self.strRepr = None
873 self.resultsName = None
874 self.saveAsList = savelist
875 self.skipWhitespace = True
876 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
877 self.copyDefaultWhiteChars = True
878 self.mayReturnEmpty = False
879 self.keepTabs = False
880 self.ignoreExprs = list()
881 self.debug = False
882 self.streamlined = False
883 self.mayIndexError = True
884 self.errmsg = ""
885 self.modalResults = True
886 self.debugActions = ( None, None, None )
887 self.re = None
888 self.callPreparse = True
889 self.callDuringTry = False
890
892 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
893 for the same parsing pattern, using copies of the original parse element."""
894 cpy = copy.copy( self )
895 cpy.parseAction = self.parseAction[:]
896 cpy.ignoreExprs = self.ignoreExprs[:]
897 if self.copyDefaultWhiteChars:
898 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
899 return cpy
900
902 """Define name for this expression, for use in debugging."""
903 self.name = name
904 self.errmsg = "Expected " + self.name
905 if hasattr(self,"exception"):
906 self.exception.msg = self.errmsg
907 return self
908
910 """Define name for referencing matching tokens as a nested attribute
911 of the returned parse results.
912 NOTE: this returns a *copy* of the original C{ParserElement} object;
913 this is so that the client can define a basic element, such as an
914 integer, and reference it in multiple places with different names.
915
916 You can also set results names using the abbreviated syntax,
917 C{expr("name")} in place of C{expr.setResultsName("name")} -
918 see L{I{__call__}<__call__>}.
919 """
920 newself = self.copy()
921 if name.endswith("*"):
922 name = name[:-1]
923 listAllMatches=True
924 newself.resultsName = name
925 newself.modalResults = not listAllMatches
926 return newself
927
929 """Method to invoke the Python pdb debugger when this element is
930 about to be parsed. Set C{breakFlag} to True to enable, False to
931 disable.
932 """
933 if breakFlag:
934 _parseMethod = self._parse
935 def breaker(instring, loc, doActions=True, callPreParse=True):
936 import pdb
937 pdb.set_trace()
938 return _parseMethod( instring, loc, doActions, callPreParse )
939 breaker._originalParseMethod = _parseMethod
940 self._parse = breaker
941 else:
942 if hasattr(self._parse,"_originalParseMethod"):
943 self._parse = self._parse._originalParseMethod
944 return self
945
947 """Define action to perform when successfully matching parse element definition.
948 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
949 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
950 - s = the original string being parsed (see note below)
951 - loc = the location of the matching substring
952 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
953 If the functions in fns modify the tokens, they can return them as the return
954 value from fn, and the modified list of tokens will replace the original.
955 Otherwise, fn does not need to return any value.
956
957 Optional keyword arguments::
958 - callDuringTry = (default=False) indicate if parse action should be run during lookaheads and alternate testing
959
960 Note: the default parsing behavior is to expand tabs in the input string
961 before starting the parsing process. See L{I{parseString}<parseString>} for more information
962 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
963 consistent view of the parsed string, the parse location, and line and column
964 positions within the parsed string.
965 """
966 self.parseAction = list(map(_trim_arity, list(fns)))
967 self.callDuringTry = kwargs.get("callDuringTry", False)
968 return self
969
971 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
972 self.parseAction += list(map(_trim_arity, list(fns)))
973 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
974 return self
975
977 """Add a boolean predicate function to expression's list of parse actions. See
978 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
979 functions passed to C{addCondition} need to return boolean success/fail of the condition.
980
981 Optional keyword arguments::
982 - message = define a custom message to be used in the raised exception
983 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
984 """
985 msg = kwargs.get("message", "failed user-defined condition")
986 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
987 for fn in fns:
988 def pa(s,l,t):
989 if not bool(_trim_arity(fn)(s,l,t)):
990 raise exc_type(s,l,msg)
991 self.parseAction.append(pa)
992 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
993 return self
994
996 """Define action to perform if parsing fails at this expression.
997 Fail acton fn is a callable function that takes the arguments
998 C{fn(s,loc,expr,err)} where:
999 - s = string being parsed
1000 - loc = location where expression match was attempted and failed
1001 - expr = the parse expression that failed
1002 - err = the exception thrown
1003 The function returns no value. It may throw C{L{ParseFatalException}}
1004 if it is desired to stop parsing immediately."""
1005 self.failAction = fn
1006 return self
1007
1009 exprsFound = True
1010 while exprsFound:
1011 exprsFound = False
1012 for e in self.ignoreExprs:
1013 try:
1014 while 1:
1015 loc,dummy = e._parse( instring, loc )
1016 exprsFound = True
1017 except ParseException:
1018 pass
1019 return loc
1020
1022 if self.ignoreExprs:
1023 loc = self._skipIgnorables( instring, loc )
1024
1025 if self.skipWhitespace:
1026 wt = self.whiteChars
1027 instrlen = len(instring)
1028 while loc < instrlen and instring[loc] in wt:
1029 loc += 1
1030
1031 return loc
1032
1033 - def parseImpl( self, instring, loc, doActions=True ):
1035
1036 - def postParse( self, instring, loc, tokenlist ):
1038
1039
1040 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1041 debugging = ( self.debug )
1042
1043 if debugging or self.failAction:
1044
1045 if (self.debugActions[0] ):
1046 self.debugActions[0]( instring, loc, self )
1047 if callPreParse and self.callPreparse:
1048 preloc = self.preParse( instring, loc )
1049 else:
1050 preloc = loc
1051 tokensStart = preloc
1052 try:
1053 try:
1054 loc,tokens = self.parseImpl( instring, preloc, doActions )
1055 except IndexError:
1056 raise ParseException( instring, len(instring), self.errmsg, self )
1057 except ParseBaseException as err:
1058
1059 if self.debugActions[2]:
1060 self.debugActions[2]( instring, tokensStart, self, err )
1061 if self.failAction:
1062 self.failAction( instring, tokensStart, self, err )
1063 raise
1064 else:
1065 if callPreParse and self.callPreparse:
1066 preloc = self.preParse( instring, loc )
1067 else:
1068 preloc = loc
1069 tokensStart = preloc
1070 if self.mayIndexError or loc >= len(instring):
1071 try:
1072 loc,tokens = self.parseImpl( instring, preloc, doActions )
1073 except IndexError:
1074 raise ParseException( instring, len(instring), self.errmsg, self )
1075 else:
1076 loc,tokens = self.parseImpl( instring, preloc, doActions )
1077
1078 tokens = self.postParse( instring, loc, tokens )
1079
1080 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1081 if self.parseAction and (doActions or self.callDuringTry):
1082 if debugging:
1083 try:
1084 for fn in self.parseAction:
1085 tokens = fn( instring, tokensStart, retTokens )
1086 if tokens is not None:
1087 retTokens = ParseResults( tokens,
1088 self.resultsName,
1089 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1090 modal=self.modalResults )
1091 except ParseBaseException as err:
1092
1093 if (self.debugActions[2] ):
1094 self.debugActions[2]( instring, tokensStart, self, err )
1095 raise
1096 else:
1097 for fn in self.parseAction:
1098 tokens = fn( instring, tokensStart, retTokens )
1099 if tokens is not None:
1100 retTokens = ParseResults( tokens,
1101 self.resultsName,
1102 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1103 modal=self.modalResults )
1104
1105 if debugging:
1106
1107 if (self.debugActions[1] ):
1108 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1109
1110 return loc, retTokens
1111
1117
1119 try:
1120 self.tryParse(instring, loc)
1121 except (ParseException, IndexError):
1122 return False
1123 else:
1124 return True
1125
1126
1127
1128 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1129 lookup = (self,instring,loc,callPreParse,doActions)
1130 if lookup in ParserElement._exprArgCache:
1131 value = ParserElement._exprArgCache[ lookup ]
1132 if isinstance(value, Exception):
1133 raise value
1134 return (value[0],value[1].copy())
1135 else:
1136 try:
1137 value = self._parseNoCache( instring, loc, doActions, callPreParse )
1138 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1139 return value
1140 except ParseBaseException as pe:
1141 pe.__traceback__ = None
1142 ParserElement._exprArgCache[ lookup ] = pe
1143 raise
1144
1145 _parse = _parseNoCache
1146
1147
1148 _exprArgCache = {}
1149 @staticmethod
1152
1153 _packratEnabled = False
1154 @staticmethod
1156 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1157 Repeated parse attempts at the same string location (which happens
1158 often in many complex grammars) can immediately return a cached value,
1159 instead of re-executing parsing/validating code. Memoizing is done of
1160 both valid results and parsing exceptions.
1161
1162 This speedup may break existing programs that use parse actions that
1163 have side-effects. For this reason, packrat parsing is disabled when
1164 you first import pyparsing. To activate the packrat feature, your
1165 program must call the class method C{ParserElement.enablePackrat()}. If
1166 your program uses C{psyco} to "compile as you go", you must call
1167 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1168 Python will crash. For best results, call C{enablePackrat()} immediately
1169 after importing pyparsing.
1170 """
1171 if not ParserElement._packratEnabled:
1172 ParserElement._packratEnabled = True
1173 ParserElement._parse = ParserElement._parseCache
1174
1176 """Execute the parse expression with the given string.
1177 This is the main interface to the client code, once the complete
1178 expression has been built.
1179
1180 If you want the grammar to require that the entire input string be
1181 successfully parsed, then set C{parseAll} to True (equivalent to ending
1182 the grammar with C{L{StringEnd()}}).
1183
1184 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1185 in order to report proper column numbers in parse actions.
1186 If the input string contains tabs and
1187 the grammar uses parse actions that use the C{loc} argument to index into the
1188 string being parsed, you can ensure you have a consistent view of the input
1189 string by:
1190 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1191 (see L{I{parseWithTabs}<parseWithTabs>})
1192 - define your parse action using the full C{(s,loc,toks)} signature, and
1193 reference the input string using the parse action's C{s} argument
1194 - explictly expand the tabs in your input string before calling
1195 C{parseString}
1196 """
1197 ParserElement.resetCache()
1198 if not self.streamlined:
1199 self.streamline()
1200
1201 for e in self.ignoreExprs:
1202 e.streamline()
1203 if not self.keepTabs:
1204 instring = instring.expandtabs()
1205 try:
1206 loc, tokens = self._parse( instring, 0 )
1207 if parseAll:
1208 loc = self.preParse( instring, loc )
1209 se = Empty() + StringEnd()
1210 se._parse( instring, loc )
1211 except ParseBaseException as exc:
1212 if ParserElement.verbose_stacktrace:
1213 raise
1214 else:
1215
1216 raise exc
1217 else:
1218 return tokens
1219
1221 """Scan the input string for expression matches. Each match will return the
1222 matching tokens, start location, and end location. May be called with optional
1223 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1224 C{overlap} is specified, then overlapping matches will be reported.
1225
1226 Note that the start and end locations are reported relative to the string
1227 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1228 strings with embedded tabs."""
1229 if not self.streamlined:
1230 self.streamline()
1231 for e in self.ignoreExprs:
1232 e.streamline()
1233
1234 if not self.keepTabs:
1235 instring = _ustr(instring).expandtabs()
1236 instrlen = len(instring)
1237 loc = 0
1238 preparseFn = self.preParse
1239 parseFn = self._parse
1240 ParserElement.resetCache()
1241 matches = 0
1242 try:
1243 while loc <= instrlen and matches < maxMatches:
1244 try:
1245 preloc = preparseFn( instring, loc )
1246 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1247 except ParseException:
1248 loc = preloc+1
1249 else:
1250 if nextLoc > loc:
1251 matches += 1
1252 yield tokens, preloc, nextLoc
1253 if overlap:
1254 nextloc = preparseFn( instring, loc )
1255 if nextloc > loc:
1256 loc = nextLoc
1257 else:
1258 loc += 1
1259 else:
1260 loc = nextLoc
1261 else:
1262 loc = preloc+1
1263 except ParseBaseException as exc:
1264 if ParserElement.verbose_stacktrace:
1265 raise
1266 else:
1267
1268 raise exc
1269
1302
1304 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1305 to match the given parse expression. May be called with optional
1306 C{maxMatches} argument, to clip searching after 'n' matches are found.
1307 """
1308 try:
1309 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1310 except ParseBaseException as exc:
1311 if ParserElement.verbose_stacktrace:
1312 raise
1313 else:
1314
1315 raise exc
1316
1318 """Implementation of + operator - returns C{L{And}}"""
1319 if isinstance( other, basestring ):
1320 other = ParserElement.literalStringClass( other )
1321 if not isinstance( other, ParserElement ):
1322 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1323 SyntaxWarning, stacklevel=2)
1324 return None
1325 return And( [ self, other ] )
1326
1328 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1329 if isinstance( other, basestring ):
1330 other = ParserElement.literalStringClass( other )
1331 if not isinstance( other, ParserElement ):
1332 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1333 SyntaxWarning, stacklevel=2)
1334 return None
1335 return other + self
1336
1338 """Implementation of - operator, returns C{L{And}} with error stop"""
1339 if isinstance( other, basestring ):
1340 other = ParserElement.literalStringClass( other )
1341 if not isinstance( other, ParserElement ):
1342 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1343 SyntaxWarning, stacklevel=2)
1344 return None
1345 return And( [ self, And._ErrorStop(), other ] )
1346
1348 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1349 if isinstance( other, basestring ):
1350 other = ParserElement.literalStringClass( other )
1351 if not isinstance( other, ParserElement ):
1352 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1353 SyntaxWarning, stacklevel=2)
1354 return None
1355 return other - self
1356
1358 """Implementation of * operator, allows use of C{expr * 3} in place of
1359 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1360 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1361 may also include C{None} as in:
1362 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1363 to C{expr*n + L{ZeroOrMore}(expr)}
1364 (read as "at least n instances of C{expr}")
1365 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1366 (read as "0 to n instances of C{expr}")
1367 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1368 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1369
1370 Note that C{expr*(None,n)} does not raise an exception if
1371 more than n exprs exist in the input stream; that is,
1372 C{expr*(None,n)} does not enforce a maximum number of expr
1373 occurrences. If this behavior is desired, then write
1374 C{expr*(None,n) + ~expr}
1375
1376 """
1377 if isinstance(other,int):
1378 minElements, optElements = other,0
1379 elif isinstance(other,tuple):
1380 other = (other + (None, None))[:2]
1381 if other[0] is None:
1382 other = (0, other[1])
1383 if isinstance(other[0],int) and other[1] is None:
1384 if other[0] == 0:
1385 return ZeroOrMore(self)
1386 if other[0] == 1:
1387 return OneOrMore(self)
1388 else:
1389 return self*other[0] + ZeroOrMore(self)
1390 elif isinstance(other[0],int) and isinstance(other[1],int):
1391 minElements, optElements = other
1392 optElements -= minElements
1393 else:
1394 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1395 else:
1396 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1397
1398 if minElements < 0:
1399 raise ValueError("cannot multiply ParserElement by negative value")
1400 if optElements < 0:
1401 raise ValueError("second tuple value must be greater or equal to first tuple value")
1402 if minElements == optElements == 0:
1403 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1404
1405 if (optElements):
1406 def makeOptionalList(n):
1407 if n>1:
1408 return Optional(self + makeOptionalList(n-1))
1409 else:
1410 return Optional(self)
1411 if minElements:
1412 if minElements == 1:
1413 ret = self + makeOptionalList(optElements)
1414 else:
1415 ret = And([self]*minElements) + makeOptionalList(optElements)
1416 else:
1417 ret = makeOptionalList(optElements)
1418 else:
1419 if minElements == 1:
1420 ret = self
1421 else:
1422 ret = And([self]*minElements)
1423 return ret
1424
1427
1429 """Implementation of | operator - returns C{L{MatchFirst}}"""
1430 if isinstance( other, basestring ):
1431 other = ParserElement.literalStringClass( other )
1432 if not isinstance( other, ParserElement ):
1433 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1434 SyntaxWarning, stacklevel=2)
1435 return None
1436 return MatchFirst( [ self, other ] )
1437
1439 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1440 if isinstance( other, basestring ):
1441 other = ParserElement.literalStringClass( other )
1442 if not isinstance( other, ParserElement ):
1443 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1444 SyntaxWarning, stacklevel=2)
1445 return None
1446 return other | self
1447
1449 """Implementation of ^ operator - returns C{L{Or}}"""
1450 if isinstance( other, basestring ):
1451 other = ParserElement.literalStringClass( other )
1452 if not isinstance( other, ParserElement ):
1453 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1454 SyntaxWarning, stacklevel=2)
1455 return None
1456 return Or( [ self, other ] )
1457
1459 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1460 if isinstance( other, basestring ):
1461 other = ParserElement.literalStringClass( other )
1462 if not isinstance( other, ParserElement ):
1463 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1464 SyntaxWarning, stacklevel=2)
1465 return None
1466 return other ^ self
1467
1469 """Implementation of & operator - returns C{L{Each}}"""
1470 if isinstance( other, basestring ):
1471 other = ParserElement.literalStringClass( other )
1472 if not isinstance( other, ParserElement ):
1473 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1474 SyntaxWarning, stacklevel=2)
1475 return None
1476 return Each( [ self, other ] )
1477
1479 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1480 if isinstance( other, basestring ):
1481 other = ParserElement.literalStringClass( other )
1482 if not isinstance( other, ParserElement ):
1483 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1484 SyntaxWarning, stacklevel=2)
1485 return None
1486 return other & self
1487
1489 """Implementation of ~ operator - returns C{L{NotAny}}"""
1490 return NotAny( self )
1491
1493 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1494 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1495 could be written as::
1496 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1497
1498 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1499 passed as C{True}.
1500
1501 If C{name} is omitted, same as calling C{L{copy}}.
1502 """
1503 if name is not None:
1504 return self.setResultsName(name)
1505 else:
1506 return self.copy()
1507
1509 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1510 cluttering up returned output.
1511 """
1512 return Suppress( self )
1513
1515 """Disables the skipping of whitespace before matching the characters in the
1516 C{ParserElement}'s defined pattern. This is normally only used internally by
1517 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1518 """
1519 self.skipWhitespace = False
1520 return self
1521
1523 """Overrides the default whitespace chars
1524 """
1525 self.skipWhitespace = True
1526 self.whiteChars = chars
1527 self.copyDefaultWhiteChars = False
1528 return self
1529
1531 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1532 Must be called before C{parseString} when the input grammar contains elements that
1533 match C{<TAB>} characters."""
1534 self.keepTabs = True
1535 return self
1536
1538 """Define expression to be ignored (e.g., comments) while doing pattern
1539 matching; may be called repeatedly, to define multiple comment or other
1540 ignorable patterns.
1541 """
1542 if isinstance(other, basestring):
1543 other = Suppress(other)
1544
1545 if isinstance( other, Suppress ):
1546 if other not in self.ignoreExprs:
1547 self.ignoreExprs.append(other)
1548 else:
1549 self.ignoreExprs.append( Suppress( other.copy() ) )
1550 return self
1551
1552 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1553 """Enable display of debugging messages while doing pattern matching."""
1554 self.debugActions = (startAction or _defaultStartDebugAction,
1555 successAction or _defaultSuccessDebugAction,
1556 exceptionAction or _defaultExceptionDebugAction)
1557 self.debug = True
1558 return self
1559
1561 """Enable display of debugging messages while doing pattern matching.
1562 Set C{flag} to True to enable, False to disable."""
1563 if flag:
1564 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1565 else:
1566 self.debug = False
1567 return self
1568
1571
1574
1576 self.streamlined = True
1577 self.strRepr = None
1578 return self
1579
1582
1583 - def validate( self, validateTrace=[] ):
1584 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1585 self.checkRecursion( [] )
1586
1587 - def parseFile( self, file_or_filename, parseAll=False ):
1588 """Execute the parse expression on the given file or filename.
1589 If a filename is specified (instead of a file object),
1590 the entire file is opened, read, and closed before parsing.
1591 """
1592 try:
1593 file_contents = file_or_filename.read()
1594 except AttributeError:
1595 f = open(file_or_filename, "r")
1596 file_contents = f.read()
1597 f.close()
1598 try:
1599 return self.parseString(file_contents, parseAll)
1600 except ParseBaseException as exc:
1601 if ParserElement.verbose_stacktrace:
1602 raise
1603 else:
1604
1605 raise exc
1606
1608 if isinstance(other, ParserElement):
1609 return self is other or vars(self) == vars(other)
1610 elif isinstance(other, basestring):
1611 return self.matches(other)
1612 else:
1613 return super(ParserElement,self)==other
1614
1616 return not (self == other)
1617
1619 return hash(id(self))
1620
1622 return self == other
1623
1625 return not (self == other)
1626
1627 - def matches(self, s, parseAll=True):
1628 """Method for quick testing of a parser against a test string. Good for simple
1629 inline microtests of sub expressions while building up larger parser, as in:
1630
1631 expr = Word(nums)
1632 assert expr.matches("100")
1633
1634 Parameters:
1635 - testString - string
1636 """
1637 try:
1638 self.parseString(_ustr(s), parseAll=parseAll)
1639 return True
1640 except ParseBaseException:
1641 return False
1642
1643 - def runTests(self, tests, parseAll=False, comment='#', printResults=True):
1644 """Execute the parse expression on a series of test strings, showing each
1645 test, the parsed results or where the parse failed. Quick and easy way to
1646 run a parse expression against a list of sample strings.
1647
1648 Parameters:
1649 - tests - a list of separate test strings, or a multiline string of test strings
1650 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests
1651 - comment - (default='#') - expression for indicating embedded comments in the test
1652 string; pass None to disable comment filtering
1653 - printResults - (default=True) prints test output to stdout; if False, returns a
1654 (success, results) tuple, where success indicates that all tests succeeded, and the
1655 results contain a list of lines of each test's output as it would have been
1656 printed to stdout
1657 """
1658 if isinstance(tests, basestring):
1659 tests = list(map(str.strip, tests.splitlines()))
1660 if isinstance(comment, basestring):
1661 comment = Literal(comment)
1662 allResults = []
1663 comments = []
1664 success = True
1665 for t in tests:
1666 if comment is not None and comment.matches(t, False) or comments and not t:
1667 comments.append(t)
1668 continue
1669 if not t:
1670 continue
1671 out = ['\n'.join(comments), t]
1672 comments = []
1673 try:
1674 out.append(self.parseString(t, parseAll=parseAll).dump())
1675 except ParseBaseException as pe:
1676 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
1677 if '\n' in t:
1678 out.append(line(pe.loc, t))
1679 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
1680 else:
1681 out.append(' '*pe.loc + '^' + fatal)
1682 out.append("FAIL: " + str(pe))
1683 success = False
1684
1685 if printResults:
1686 out.append('')
1687 print('\n'.join(out))
1688 else:
1689 allResults.append(out)
1690
1691 if not printResults:
1692 return success, allResults
1693
1694
1695 -class Token(ParserElement):
1696 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1699
1700
1701 -class Empty(Token):
1702 """An empty token, will always match."""
1704 super(Empty,self).__init__()
1705 self.name = "Empty"
1706 self.mayReturnEmpty = True
1707 self.mayIndexError = False
1708
1711 """A token that will never match."""
1713 super(NoMatch,self).__init__()
1714 self.name = "NoMatch"
1715 self.mayReturnEmpty = True
1716 self.mayIndexError = False
1717 self.errmsg = "Unmatchable token"
1718
1719 - def parseImpl( self, instring, loc, doActions=True ):
1721
1724 """Token to exactly match a specified string."""
1726 super(Literal,self).__init__()
1727 self.match = matchString
1728 self.matchLen = len(matchString)
1729 try:
1730 self.firstMatchChar = matchString[0]
1731 except IndexError:
1732 warnings.warn("null string passed to Literal; use Empty() instead",
1733 SyntaxWarning, stacklevel=2)
1734 self.__class__ = Empty
1735 self.name = '"%s"' % _ustr(self.match)
1736 self.errmsg = "Expected " + self.name
1737 self.mayReturnEmpty = False
1738 self.mayIndexError = False
1739
1740
1741
1742
1743
1744 - def parseImpl( self, instring, loc, doActions=True ):
1745 if (instring[loc] == self.firstMatchChar and
1746 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1747 return loc+self.matchLen, self.match
1748 raise ParseException(instring, loc, self.errmsg, self)
1749 _L = Literal
1750 ParserElement.literalStringClass = Literal
1753 """Token to exactly match a specified string as a keyword, that is, it must be
1754 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1755 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1756 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1757 Accepts two optional constructor arguments in addition to the keyword string:
1758 C{identChars} is a string of characters that would be valid identifier characters,
1759 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1760 matching, default is C{False}.
1761 """
1762 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1763
1765 super(Keyword,self).__init__()
1766 self.match = matchString
1767 self.matchLen = len(matchString)
1768 try:
1769 self.firstMatchChar = matchString[0]
1770 except IndexError:
1771 warnings.warn("null string passed to Keyword; use Empty() instead",
1772 SyntaxWarning, stacklevel=2)
1773 self.name = '"%s"' % self.match
1774 self.errmsg = "Expected " + self.name
1775 self.mayReturnEmpty = False
1776 self.mayIndexError = False
1777 self.caseless = caseless
1778 if caseless:
1779 self.caselessmatch = matchString.upper()
1780 identChars = identChars.upper()
1781 self.identChars = set(identChars)
1782
1783 - def parseImpl( self, instring, loc, doActions=True ):
1784 if self.caseless:
1785 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1786 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1787 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1788 return loc+self.matchLen, self.match
1789 else:
1790 if (instring[loc] == self.firstMatchChar and
1791 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1792 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1793 (loc == 0 or instring[loc-1] not in self.identChars) ):
1794 return loc+self.matchLen, self.match
1795 raise ParseException(instring, loc, self.errmsg, self)
1796
1801
1802 @staticmethod
1807
1809 """Token to match a specified string, ignoring case of letters.
1810 Note: the matched results will always be in the case of the given
1811 match string, NOT the case of the input text.
1812 """
1814 super(CaselessLiteral,self).__init__( matchString.upper() )
1815
1816 self.returnString = matchString
1817 self.name = "'%s'" % self.returnString
1818 self.errmsg = "Expected " + self.name
1819
1820 - def parseImpl( self, instring, loc, doActions=True ):
1821 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1822 return loc+self.matchLen, self.returnString
1823 raise ParseException(instring, loc, self.errmsg, self)
1824
1828
1829 - def parseImpl( self, instring, loc, doActions=True ):
1830 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1831 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1832 return loc+self.matchLen, self.match
1833 raise ParseException(instring, loc, self.errmsg, self)
1834
1836 """Token for matching words composed of allowed character sets.
1837 Defined with string containing all allowed initial characters,
1838 an optional string containing allowed body characters (if omitted,
1839 defaults to the initial character set), and an optional minimum,
1840 maximum, and/or exact length. The default value for C{min} is 1 (a
1841 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1842 are 0, meaning no maximum or exact length restriction. An optional
1843 C{excludeChars} parameter can list characters that might be found in
1844 the input C{bodyChars} string; useful to define a word of all printables
1845 except for one or two characters, for instance.
1846 """
1847 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1848 super(Word,self).__init__()
1849 if excludeChars:
1850 initChars = ''.join(c for c in initChars if c not in excludeChars)
1851 if bodyChars:
1852 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
1853 self.initCharsOrig = initChars
1854 self.initChars = set(initChars)
1855 if bodyChars :
1856 self.bodyCharsOrig = bodyChars
1857 self.bodyChars = set(bodyChars)
1858 else:
1859 self.bodyCharsOrig = initChars
1860 self.bodyChars = set(initChars)
1861
1862 self.maxSpecified = max > 0
1863
1864 if min < 1:
1865 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1866
1867 self.minLen = min
1868
1869 if max > 0:
1870 self.maxLen = max
1871 else:
1872 self.maxLen = _MAX_INT
1873
1874 if exact > 0:
1875 self.maxLen = exact
1876 self.minLen = exact
1877
1878 self.name = _ustr(self)
1879 self.errmsg = "Expected " + self.name
1880 self.mayIndexError = False
1881 self.asKeyword = asKeyword
1882
1883 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1884 if self.bodyCharsOrig == self.initCharsOrig:
1885 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1886 elif len(self.initCharsOrig) == 1:
1887 self.reString = "%s[%s]*" % \
1888 (re.escape(self.initCharsOrig),
1889 _escapeRegexRangeChars(self.bodyCharsOrig),)
1890 else:
1891 self.reString = "[%s][%s]*" % \
1892 (_escapeRegexRangeChars(self.initCharsOrig),
1893 _escapeRegexRangeChars(self.bodyCharsOrig),)
1894 if self.asKeyword:
1895 self.reString = r"\b"+self.reString+r"\b"
1896 try:
1897 self.re = re.compile( self.reString )
1898 except:
1899 self.re = None
1900
1901 - def parseImpl( self, instring, loc, doActions=True ):
1902 if self.re:
1903 result = self.re.match(instring,loc)
1904 if not result:
1905 raise ParseException(instring, loc, self.errmsg, self)
1906
1907 loc = result.end()
1908 return loc, result.group()
1909
1910 if not(instring[ loc ] in self.initChars):
1911 raise ParseException(instring, loc, self.errmsg, self)
1912
1913 start = loc
1914 loc += 1
1915 instrlen = len(instring)
1916 bodychars = self.bodyChars
1917 maxloc = start + self.maxLen
1918 maxloc = min( maxloc, instrlen )
1919 while loc < maxloc and instring[loc] in bodychars:
1920 loc += 1
1921
1922 throwException = False
1923 if loc - start < self.minLen:
1924 throwException = True
1925 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1926 throwException = True
1927 if self.asKeyword:
1928 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1929 throwException = True
1930
1931 if throwException:
1932 raise ParseException(instring, loc, self.errmsg, self)
1933
1934 return loc, instring[start:loc]
1935
1937 try:
1938 return super(Word,self).__str__()
1939 except:
1940 pass
1941
1942
1943 if self.strRepr is None:
1944
1945 def charsAsStr(s):
1946 if len(s)>4:
1947 return s[:4]+"..."
1948 else:
1949 return s
1950
1951 if ( self.initCharsOrig != self.bodyCharsOrig ):
1952 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1953 else:
1954 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1955
1956 return self.strRepr
1957
1958
1959 -class Regex(Token):
1960 """Token for matching strings that match a given regular expression.
1961 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1962 """
1963 compiledREtype = type(re.compile("[A-Z]"))
1964 - def __init__( self, pattern, flags=0):
1965 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1966 super(Regex,self).__init__()
1967
1968 if isinstance(pattern, basestring):
1969 if not pattern:
1970 warnings.warn("null string passed to Regex; use Empty() instead",
1971 SyntaxWarning, stacklevel=2)
1972
1973 self.pattern = pattern
1974 self.flags = flags
1975
1976 try:
1977 self.re = re.compile(self.pattern, self.flags)
1978 self.reString = self.pattern
1979 except sre_constants.error:
1980 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1981 SyntaxWarning, stacklevel=2)
1982 raise
1983
1984 elif isinstance(pattern, Regex.compiledREtype):
1985 self.re = pattern
1986 self.pattern = \
1987 self.reString = str(pattern)
1988 self.flags = flags
1989
1990 else:
1991 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1992
1993 self.name = _ustr(self)
1994 self.errmsg = "Expected " + self.name
1995 self.mayIndexError = False
1996 self.mayReturnEmpty = True
1997
1998 - def parseImpl( self, instring, loc, doActions=True ):
1999 result = self.re.match(instring,loc)
2000 if not result:
2001 raise ParseException(instring, loc, self.errmsg, self)
2002
2003 loc = result.end()
2004 d = result.groupdict()
2005 ret = ParseResults(result.group())
2006 if d:
2007 for k in d:
2008 ret[k] = d[k]
2009 return loc,ret
2010
2012 try:
2013 return super(Regex,self).__str__()
2014 except:
2015 pass
2016
2017 if self.strRepr is None:
2018 self.strRepr = "Re:(%s)" % repr(self.pattern)
2019
2020 return self.strRepr
2021
2024 """Token for matching strings that are delimited by quoting characters.
2025 """
2026 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2027 r"""Defined with the following parameters:
2028 - quoteChar - string of one or more characters defining the quote delimiting string
2029 - escChar - character to escape quotes, typically backslash (default=None)
2030 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
2031 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2032 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2033 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2034 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2035 """
2036 super(QuotedString,self).__init__()
2037
2038
2039 quoteChar = quoteChar.strip()
2040 if not quoteChar:
2041 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2042 raise SyntaxError()
2043
2044 if endQuoteChar is None:
2045 endQuoteChar = quoteChar
2046 else:
2047 endQuoteChar = endQuoteChar.strip()
2048 if not endQuoteChar:
2049 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2050 raise SyntaxError()
2051
2052 self.quoteChar = quoteChar
2053 self.quoteCharLen = len(quoteChar)
2054 self.firstQuoteChar = quoteChar[0]
2055 self.endQuoteChar = endQuoteChar
2056 self.endQuoteCharLen = len(endQuoteChar)
2057 self.escChar = escChar
2058 self.escQuote = escQuote
2059 self.unquoteResults = unquoteResults
2060 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2061
2062 if multiline:
2063 self.flags = re.MULTILINE | re.DOTALL
2064 self.pattern = r'%s(?:[^%s%s]' % \
2065 ( re.escape(self.quoteChar),
2066 _escapeRegexRangeChars(self.endQuoteChar[0]),
2067 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2068 else:
2069 self.flags = 0
2070 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2071 ( re.escape(self.quoteChar),
2072 _escapeRegexRangeChars(self.endQuoteChar[0]),
2073 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2074 if len(self.endQuoteChar) > 1:
2075 self.pattern += (
2076 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2077 _escapeRegexRangeChars(self.endQuoteChar[i]))
2078 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2079 )
2080 if escQuote:
2081 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2082 if escChar:
2083 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2084 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2085 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2086
2087 try:
2088 self.re = re.compile(self.pattern, self.flags)
2089 self.reString = self.pattern
2090 except sre_constants.error:
2091 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2092 SyntaxWarning, stacklevel=2)
2093 raise
2094
2095 self.name = _ustr(self)
2096 self.errmsg = "Expected " + self.name
2097 self.mayIndexError = False
2098 self.mayReturnEmpty = True
2099
2100 - def parseImpl( self, instring, loc, doActions=True ):
2101 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2102 if not result:
2103 raise ParseException(instring, loc, self.errmsg, self)
2104
2105 loc = result.end()
2106 ret = result.group()
2107
2108 if self.unquoteResults:
2109
2110
2111 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2112
2113 if isinstance(ret,basestring):
2114
2115 if '\\' in ret and self.convertWhitespaceEscapes:
2116 ws_map = {
2117 r'\t' : '\t',
2118 r'\n' : '\n',
2119 r'\f' : '\f',
2120 r'\r' : '\r',
2121 }
2122 for wslit,wschar in ws_map.items():
2123 ret = ret.replace(wslit, wschar)
2124
2125
2126 if self.escChar:
2127 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2128
2129
2130 if self.escQuote:
2131 ret = ret.replace(self.escQuote, self.endQuoteChar)
2132
2133 return loc, ret
2134
2136 try:
2137 return super(QuotedString,self).__str__()
2138 except:
2139 pass
2140
2141 if self.strRepr is None:
2142 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2143
2144 return self.strRepr
2145
2148 """Token for matching words composed of characters *not* in a given set.
2149 Defined with string containing all disallowed characters, and an optional
2150 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2151 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2152 are 0, meaning no maximum or exact length restriction.
2153 """
2154 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2155 super(CharsNotIn,self).__init__()
2156 self.skipWhitespace = False
2157 self.notChars = notChars
2158
2159 if min < 1:
2160 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2161
2162 self.minLen = min
2163
2164 if max > 0:
2165 self.maxLen = max
2166 else:
2167 self.maxLen = _MAX_INT
2168
2169 if exact > 0:
2170 self.maxLen = exact
2171 self.minLen = exact
2172
2173 self.name = _ustr(self)
2174 self.errmsg = "Expected " + self.name
2175 self.mayReturnEmpty = ( self.minLen == 0 )
2176 self.mayIndexError = False
2177
2178 - def parseImpl( self, instring, loc, doActions=True ):
2179 if instring[loc] in self.notChars:
2180 raise ParseException(instring, loc, self.errmsg, self)
2181
2182 start = loc
2183 loc += 1
2184 notchars = self.notChars
2185 maxlen = min( start+self.maxLen, len(instring) )
2186 while loc < maxlen and \
2187 (instring[loc] not in notchars):
2188 loc += 1
2189
2190 if loc - start < self.minLen:
2191 raise ParseException(instring, loc, self.errmsg, self)
2192
2193 return loc, instring[start:loc]
2194
2196 try:
2197 return super(CharsNotIn, self).__str__()
2198 except:
2199 pass
2200
2201 if self.strRepr is None:
2202 if len(self.notChars) > 4:
2203 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2204 else:
2205 self.strRepr = "!W:(%s)" % self.notChars
2206
2207 return self.strRepr
2208
2210 """Special matching class for matching whitespace. Normally, whitespace is ignored
2211 by pyparsing grammars. This class is included when some whitespace structures
2212 are significant. Define with a string containing the whitespace characters to be
2213 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2214 as defined for the C{L{Word}} class."""
2215 whiteStrs = {
2216 " " : "<SPC>",
2217 "\t": "<TAB>",
2218 "\n": "<LF>",
2219 "\r": "<CR>",
2220 "\f": "<FF>",
2221 }
2222 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2223 super(White,self).__init__()
2224 self.matchWhite = ws
2225 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2226
2227 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2228 self.mayReturnEmpty = True
2229 self.errmsg = "Expected " + self.name
2230
2231 self.minLen = min
2232
2233 if max > 0:
2234 self.maxLen = max
2235 else:
2236 self.maxLen = _MAX_INT
2237
2238 if exact > 0:
2239 self.maxLen = exact
2240 self.minLen = exact
2241
2242 - def parseImpl( self, instring, loc, doActions=True ):
2243 if not(instring[ loc ] in self.matchWhite):
2244 raise ParseException(instring, loc, self.errmsg, self)
2245 start = loc
2246 loc += 1
2247 maxloc = start + self.maxLen
2248 maxloc = min( maxloc, len(instring) )
2249 while loc < maxloc and instring[loc] in self.matchWhite:
2250 loc += 1
2251
2252 if loc - start < self.minLen:
2253 raise ParseException(instring, loc, self.errmsg, self)
2254
2255 return loc, instring[start:loc]
2256
2260 super(_PositionToken,self).__init__()
2261 self.name=self.__class__.__name__
2262 self.mayReturnEmpty = True
2263 self.mayIndexError = False
2264
2266 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2270
2272 if col(loc,instring) != self.col:
2273 instrlen = len(instring)
2274 if self.ignoreExprs:
2275 loc = self._skipIgnorables( instring, loc )
2276 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2277 loc += 1
2278 return loc
2279
2280 - def parseImpl( self, instring, loc, doActions=True ):
2281 thiscol = col( loc, instring )
2282 if thiscol > self.col:
2283 raise ParseException( instring, loc, "Text not in expected column", self )
2284 newloc = loc + self.col - thiscol
2285 ret = instring[ loc: newloc ]
2286 return newloc, ret
2287
2289 """Matches if current position is at the beginning of a line within the parse string"""
2294
2296 preloc = super(LineStart,self).preParse(instring,loc)
2297 if instring[preloc] == "\n":
2298 loc += 1
2299 return loc
2300
2301 - def parseImpl( self, instring, loc, doActions=True ):
2302 if not( loc==0 or
2303 (loc == self.preParse( instring, 0 )) or
2304 (instring[loc-1] == "\n") ):
2305 raise ParseException(instring, loc, self.errmsg, self)
2306 return loc, []
2307
2309 """Matches if current position is at the end of a line within the parse string"""
2314
2315 - def parseImpl( self, instring, loc, doActions=True ):
2316 if loc<len(instring):
2317 if instring[loc] == "\n":
2318 return loc+1, "\n"
2319 else:
2320 raise ParseException(instring, loc, self.errmsg, self)
2321 elif loc == len(instring):
2322 return loc+1, []
2323 else:
2324 raise ParseException(instring, loc, self.errmsg, self)
2325
2327 """Matches if current position is at the beginning of the parse string"""
2331
2332 - def parseImpl( self, instring, loc, doActions=True ):
2333 if loc != 0:
2334
2335 if loc != self.preParse( instring, 0 ):
2336 raise ParseException(instring, loc, self.errmsg, self)
2337 return loc, []
2338
2340 """Matches if current position is at the end of the parse string"""
2344
2345 - def parseImpl( self, instring, loc, doActions=True ):
2346 if loc < len(instring):
2347 raise ParseException(instring, loc, self.errmsg, self)
2348 elif loc == len(instring):
2349 return loc+1, []
2350 elif loc > len(instring):
2351 return loc, []
2352 else:
2353 raise ParseException(instring, loc, self.errmsg, self)
2354
2356 """Matches if the current position is at the beginning of a Word, and
2357 is not preceded by any character in a given set of C{wordChars}
2358 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2359 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2360 the string being parsed, or at the beginning of a line.
2361 """
2363 super(WordStart,self).__init__()
2364 self.wordChars = set(wordChars)
2365 self.errmsg = "Not at the start of a word"
2366
2367 - def parseImpl(self, instring, loc, doActions=True ):
2368 if loc != 0:
2369 if (instring[loc-1] in self.wordChars or
2370 instring[loc] not in self.wordChars):
2371 raise ParseException(instring, loc, self.errmsg, self)
2372 return loc, []
2373
2375 """Matches if the current position is at the end of a Word, and
2376 is not followed by any character in a given set of C{wordChars}
2377 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2378 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2379 the string being parsed, or at the end of a line.
2380 """
2382 super(WordEnd,self).__init__()
2383 self.wordChars = set(wordChars)
2384 self.skipWhitespace = False
2385 self.errmsg = "Not at the end of a word"
2386
2387 - def parseImpl(self, instring, loc, doActions=True ):
2388 instrlen = len(instring)
2389 if instrlen>0 and loc<instrlen:
2390 if (instring[loc] in self.wordChars or
2391 instring[loc-1] not in self.wordChars):
2392 raise ParseException(instring, loc, self.errmsg, self)
2393 return loc, []
2394
2397 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2398 - def __init__( self, exprs, savelist = False ):
2399 super(ParseExpression,self).__init__(savelist)
2400 if isinstance( exprs, _generatorType ):
2401 exprs = list(exprs)
2402
2403 if isinstance( exprs, basestring ):
2404 self.exprs = [ Literal( exprs ) ]
2405 elif isinstance( exprs, collections.Sequence ):
2406
2407 if all(isinstance(expr, basestring) for expr in exprs):
2408 exprs = map(Literal, exprs)
2409 self.exprs = list(exprs)
2410 else:
2411 try:
2412 self.exprs = list( exprs )
2413 except TypeError:
2414 self.exprs = [ exprs ]
2415 self.callPreparse = False
2416
2418 return self.exprs[i]
2419
2421 self.exprs.append( other )
2422 self.strRepr = None
2423 return self
2424
2426 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2427 all contained expressions."""
2428 self.skipWhitespace = False
2429 self.exprs = [ e.copy() for e in self.exprs ]
2430 for e in self.exprs:
2431 e.leaveWhitespace()
2432 return self
2433
2435 if isinstance( other, Suppress ):
2436 if other not in self.ignoreExprs:
2437 super( ParseExpression, self).ignore( other )
2438 for e in self.exprs:
2439 e.ignore( self.ignoreExprs[-1] )
2440 else:
2441 super( ParseExpression, self).ignore( other )
2442 for e in self.exprs:
2443 e.ignore( self.ignoreExprs[-1] )
2444 return self
2445
2447 try:
2448 return super(ParseExpression,self).__str__()
2449 except:
2450 pass
2451
2452 if self.strRepr is None:
2453 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2454 return self.strRepr
2455
2457 super(ParseExpression,self).streamline()
2458
2459 for e in self.exprs:
2460 e.streamline()
2461
2462
2463
2464
2465 if ( len(self.exprs) == 2 ):
2466 other = self.exprs[0]
2467 if ( isinstance( other, self.__class__ ) and
2468 not(other.parseAction) and
2469 other.resultsName is None and
2470 not other.debug ):
2471 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2472 self.strRepr = None
2473 self.mayReturnEmpty |= other.mayReturnEmpty
2474 self.mayIndexError |= other.mayIndexError
2475
2476 other = self.exprs[-1]
2477 if ( isinstance( other, self.__class__ ) and
2478 not(other.parseAction) and
2479 other.resultsName is None and
2480 not other.debug ):
2481 self.exprs = self.exprs[:-1] + other.exprs[:]
2482 self.strRepr = None
2483 self.mayReturnEmpty |= other.mayReturnEmpty
2484 self.mayIndexError |= other.mayIndexError
2485
2486 self.errmsg = "Expected " + _ustr(self)
2487
2488 return self
2489
2493
2494 - def validate( self, validateTrace=[] ):
2495 tmp = validateTrace[:]+[self]
2496 for e in self.exprs:
2497 e.validate(tmp)
2498 self.checkRecursion( [] )
2499
2504
2505 -class And(ParseExpression):
2506 """Requires all given C{ParseExpression}s to be found in the given order.
2507 Expressions may be separated by whitespace.
2508 May be constructed using the C{'+'} operator.
2509 """
2510
2516
2517 - def __init__( self, exprs, savelist = True ):
2518 super(And,self).__init__(exprs, savelist)
2519 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2520 self.setWhitespaceChars( self.exprs[0].whiteChars )
2521 self.skipWhitespace = self.exprs[0].skipWhitespace
2522 self.callPreparse = True
2523
2524 - def parseImpl( self, instring, loc, doActions=True ):
2525
2526
2527 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2528 errorStop = False
2529 for e in self.exprs[1:]:
2530 if isinstance(e, And._ErrorStop):
2531 errorStop = True
2532 continue
2533 if errorStop:
2534 try:
2535 loc, exprtokens = e._parse( instring, loc, doActions )
2536 except ParseSyntaxException:
2537 raise
2538 except ParseBaseException as pe:
2539 pe.__traceback__ = None
2540 raise ParseSyntaxException(pe)
2541 except IndexError:
2542 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2543 else:
2544 loc, exprtokens = e._parse( instring, loc, doActions )
2545 if exprtokens or exprtokens.haskeys():
2546 resultlist += exprtokens
2547 return loc, resultlist
2548
2550 if isinstance( other, basestring ):
2551 other = Literal( other )
2552 return self.append( other )
2553
2555 subRecCheckList = parseElementList[:] + [ self ]
2556 for e in self.exprs:
2557 e.checkRecursion( subRecCheckList )
2558 if not e.mayReturnEmpty:
2559 break
2560
2562 if hasattr(self,"name"):
2563 return self.name
2564
2565 if self.strRepr is None:
2566 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
2567
2568 return self.strRepr
2569
2570
2571 -class Or(ParseExpression):
2572 """Requires that at least one C{ParseExpression} is found.
2573 If two expressions match, the expression that matches the longest string will be used.
2574 May be constructed using the C{'^'} operator.
2575 """
2576 - def __init__( self, exprs, savelist = False ):
2577 super(Or,self).__init__(exprs, savelist)
2578 if self.exprs:
2579 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2580 else:
2581 self.mayReturnEmpty = True
2582
2583 - def parseImpl( self, instring, loc, doActions=True ):
2584 maxExcLoc = -1
2585 maxException = None
2586 matches = []
2587 for e in self.exprs:
2588 try:
2589 loc2 = e.tryParse( instring, loc )
2590 except ParseException as err:
2591 err.__traceback__ = None
2592 if err.loc > maxExcLoc:
2593 maxException = err
2594 maxExcLoc = err.loc
2595 except IndexError:
2596 if len(instring) > maxExcLoc:
2597 maxException = ParseException(instring,len(instring),e.errmsg,self)
2598 maxExcLoc = len(instring)
2599 else:
2600
2601 matches.append((loc2, e))
2602
2603 if matches:
2604 matches.sort(key=lambda x: -x[0])
2605 for _,e in matches:
2606 try:
2607 return e._parse( instring, loc, doActions )
2608 except ParseException as err:
2609 err.__traceback__ = None
2610 if err.loc > maxExcLoc:
2611 maxException = err
2612 maxExcLoc = err.loc
2613
2614 if maxException is not None:
2615 maxException.msg = self.errmsg
2616 raise maxException
2617 else:
2618 raise ParseException(instring, loc, "no defined alternatives to match", self)
2619
2620
2622 if isinstance( other, basestring ):
2623 other = ParserElement.literalStringClass( other )
2624 return self.append( other )
2625
2627 if hasattr(self,"name"):
2628 return self.name
2629
2630 if self.strRepr is None:
2631 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
2632
2633 return self.strRepr
2634
2636 subRecCheckList = parseElementList[:] + [ self ]
2637 for e in self.exprs:
2638 e.checkRecursion( subRecCheckList )
2639
2642 """Requires that at least one C{ParseExpression} is found.
2643 If two expressions match, the first one listed is the one that will match.
2644 May be constructed using the C{'|'} operator.
2645 """
2646 - def __init__( self, exprs, savelist = False ):
2647 super(MatchFirst,self).__init__(exprs, savelist)
2648 if self.exprs:
2649 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
2650 else:
2651 self.mayReturnEmpty = True
2652
2653 - def parseImpl( self, instring, loc, doActions=True ):
2654 maxExcLoc = -1
2655 maxException = None
2656 for e in self.exprs:
2657 try:
2658 ret = e._parse( instring, loc, doActions )
2659 return ret
2660 except ParseException as err:
2661 if err.loc > maxExcLoc:
2662 maxException = err
2663 maxExcLoc = err.loc
2664 except IndexError:
2665 if len(instring) > maxExcLoc:
2666 maxException = ParseException(instring,len(instring),e.errmsg,self)
2667 maxExcLoc = len(instring)
2668
2669
2670 else:
2671 if maxException is not None:
2672 maxException.msg = self.errmsg
2673 raise maxException
2674 else:
2675 raise ParseException(instring, loc, "no defined alternatives to match", self)
2676
2678 if isinstance( other, basestring ):
2679 other = ParserElement.literalStringClass( other )
2680 return self.append( other )
2681
2683 if hasattr(self,"name"):
2684 return self.name
2685
2686 if self.strRepr is None:
2687 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
2688
2689 return self.strRepr
2690
2692 subRecCheckList = parseElementList[:] + [ self ]
2693 for e in self.exprs:
2694 e.checkRecursion( subRecCheckList )
2695
2696
2697 -class Each(ParseExpression):
2698 """Requires all given C{ParseExpression}s to be found, but in any order.
2699 Expressions may be separated by whitespace.
2700 May be constructed using the C{'&'} operator.
2701 """
2702 - def __init__( self, exprs, savelist = True ):
2703 super(Each,self).__init__(exprs, savelist)
2704 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
2705 self.skipWhitespace = True
2706 self.initExprGroups = True
2707
2708 - def parseImpl( self, instring, loc, doActions=True ):
2709 if self.initExprGroups:
2710 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
2711 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2712 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
2713 self.optionals = opt1 + opt2
2714 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2715 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2716 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2717 self.required += self.multirequired
2718 self.initExprGroups = False
2719 tmpLoc = loc
2720 tmpReqd = self.required[:]
2721 tmpOpt = self.optionals[:]
2722 matchOrder = []
2723
2724 keepMatching = True
2725 while keepMatching:
2726 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2727 failed = []
2728 for e in tmpExprs:
2729 try:
2730 tmpLoc = e.tryParse( instring, tmpLoc )
2731 except ParseException:
2732 failed.append(e)
2733 else:
2734 matchOrder.append(self.opt1map.get(id(e),e))
2735 if e in tmpReqd:
2736 tmpReqd.remove(e)
2737 elif e in tmpOpt:
2738 tmpOpt.remove(e)
2739 if len(failed) == len(tmpExprs):
2740 keepMatching = False
2741
2742 if tmpReqd:
2743 missing = ", ".join(_ustr(e) for e in tmpReqd)
2744 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2745
2746
2747 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2748
2749 resultlist = []
2750 for e in matchOrder:
2751 loc,results = e._parse(instring,loc,doActions)
2752 resultlist.append(results)
2753
2754 finalResults = ParseResults()
2755 for r in resultlist:
2756 dups = {}
2757 for k in r.keys():
2758 if k in finalResults:
2759 tmp = ParseResults(finalResults[k])
2760 tmp += ParseResults(r[k])
2761 dups[k] = tmp
2762 finalResults += ParseResults(r)
2763 for k,v in dups.items():
2764 finalResults[k] = v
2765 return loc, finalResults
2766
2768 if hasattr(self,"name"):
2769 return self.name
2770
2771 if self.strRepr is None:
2772 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
2773
2774 return self.strRepr
2775
2777 subRecCheckList = parseElementList[:] + [ self ]
2778 for e in self.exprs:
2779 e.checkRecursion( subRecCheckList )
2780
2783 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2784 - def __init__( self, expr, savelist=False ):
2785 super(ParseElementEnhance,self).__init__(savelist)
2786 if isinstance( expr, basestring ):
2787 expr = Literal(expr)
2788 self.expr = expr
2789 self.strRepr = None
2790 if expr is not None:
2791 self.mayIndexError = expr.mayIndexError
2792 self.mayReturnEmpty = expr.mayReturnEmpty
2793 self.setWhitespaceChars( expr.whiteChars )
2794 self.skipWhitespace = expr.skipWhitespace
2795 self.saveAsList = expr.saveAsList
2796 self.callPreparse = expr.callPreparse
2797 self.ignoreExprs.extend(expr.ignoreExprs)
2798
2799 - def parseImpl( self, instring, loc, doActions=True ):
2800 if self.expr is not None:
2801 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2802 else:
2803 raise ParseException("",loc,self.errmsg,self)
2804
2806 self.skipWhitespace = False
2807 self.expr = self.expr.copy()
2808 if self.expr is not None:
2809 self.expr.leaveWhitespace()
2810 return self
2811
2813 if isinstance( other, Suppress ):
2814 if other not in self.ignoreExprs:
2815 super( ParseElementEnhance, self).ignore( other )
2816 if self.expr is not None:
2817 self.expr.ignore( self.ignoreExprs[-1] )
2818 else:
2819 super( ParseElementEnhance, self).ignore( other )
2820 if self.expr is not None:
2821 self.expr.ignore( self.ignoreExprs[-1] )
2822 return self
2823
2829
2831 if self in parseElementList:
2832 raise RecursiveGrammarException( parseElementList+[self] )
2833 subRecCheckList = parseElementList[:] + [ self ]
2834 if self.expr is not None:
2835 self.expr.checkRecursion( subRecCheckList )
2836
2837 - def validate( self, validateTrace=[] ):
2838 tmp = validateTrace[:]+[self]
2839 if self.expr is not None:
2840 self.expr.validate(tmp)
2841 self.checkRecursion( [] )
2842
2844 try:
2845 return super(ParseElementEnhance,self).__str__()
2846 except:
2847 pass
2848
2849 if self.strRepr is None and self.expr is not None:
2850 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2851 return self.strRepr
2852
2855 """Lookahead matching of the given parse expression. C{FollowedBy}
2856 does *not* advance the parsing position within the input string, it only
2857 verifies that the specified parse expression matches at the current
2858 position. C{FollowedBy} always returns a null token list."""
2862
2863 - def parseImpl( self, instring, loc, doActions=True ):
2864 self.expr.tryParse( instring, loc )
2865 return loc, []
2866
2867
2868 -class NotAny(ParseElementEnhance):
2869 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2870 does *not* advance the parsing position within the input string, it only
2871 verifies that the specified parse expression does *not* match at the current
2872 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2873 always returns a null token list. May be constructed using the '~' operator."""
2875 super(NotAny,self).__init__(expr)
2876
2877 self.skipWhitespace = False
2878 self.mayReturnEmpty = True
2879 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2880
2881 - def parseImpl( self, instring, loc, doActions=True ):
2885
2887 if hasattr(self,"name"):
2888 return self.name
2889
2890 if self.strRepr is None:
2891 self.strRepr = "~{" + _ustr(self.expr) + "}"
2892
2893 return self.strRepr
2894
2897 """Repetition of one or more of the given expression.
2898
2899 Parameters:
2900 - expr - expression that must match one or more times
2901 - stopOn - (default=None) - expression for a terminating sentinel
2902 (only required if the sentinel would ordinarily match the repetition
2903 expression)
2904 """
2905 - def __init__( self, expr, stopOn=None):
2906 super(OneOrMore, self).__init__(expr)
2907 ender = stopOn
2908 if isinstance(ender, basestring):
2909 ender = Literal(ender)
2910 self.not_ender = ~ender if ender is not None else None
2911
2912 - def parseImpl( self, instring, loc, doActions=True ):
2913 self_expr_parse = self.expr._parse
2914 self_skip_ignorables = self._skipIgnorables
2915 check_ender = self.not_ender is not None
2916 if check_ender:
2917 try_not_ender = self.not_ender.tryParse
2918
2919
2920
2921 if check_ender:
2922 try_not_ender(instring, loc)
2923 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
2924 try:
2925 hasIgnoreExprs = (not not self.ignoreExprs)
2926 while 1:
2927 if check_ender:
2928 try_not_ender(instring, loc)
2929 if hasIgnoreExprs:
2930 preloc = self_skip_ignorables( instring, loc )
2931 else:
2932 preloc = loc
2933 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
2934 if tmptokens or tmptokens.haskeys():
2935 tokens += tmptokens
2936 except (ParseException,IndexError):
2937 pass
2938
2939 return loc, tokens
2940
2942 if hasattr(self,"name"):
2943 return self.name
2944
2945 if self.strRepr is None:
2946 self.strRepr = "{" + _ustr(self.expr) + "}..."
2947
2948 return self.strRepr
2949
2954
2956 """Optional repetition of zero or more of the given expression.
2957
2958 Parameters:
2959 - expr - expression that must match zero or more times
2960 - stopOn - (default=None) - expression for a terminating sentinel
2961 (only required if the sentinel would ordinarily match the repetition
2962 expression)
2963 """
2964 - def __init__( self, expr, stopOn=None):
2967
2968 - def parseImpl( self, instring, loc, doActions=True ):
2973
2975 if hasattr(self,"name"):
2976 return self.name
2977
2978 if self.strRepr is None:
2979 self.strRepr = "[" + _ustr(self.expr) + "]..."
2980
2981 return self.strRepr
2982
2989
2990 _optionalNotMatched = _NullToken()
2992 """Optional matching of the given expression.
2993
2994 Parameters:
2995 - expr - expression that must match zero or more times
2996 - default (optional) - value to be returned if the optional expression
2997 is not found.
2998 """
3000 super(Optional,self).__init__( expr, savelist=False )
3001 self.defaultValue = default
3002 self.mayReturnEmpty = True
3003
3004 - def parseImpl( self, instring, loc, doActions=True ):
3005 try:
3006 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3007 except (ParseException,IndexError):
3008 if self.defaultValue is not _optionalNotMatched:
3009 if self.expr.resultsName:
3010 tokens = ParseResults([ self.defaultValue ])
3011 tokens[self.expr.resultsName] = self.defaultValue
3012 else:
3013 tokens = [ self.defaultValue ]
3014 else:
3015 tokens = []
3016 return loc, tokens
3017
3019 if hasattr(self,"name"):
3020 return self.name
3021
3022 if self.strRepr is None:
3023 self.strRepr = "[" + _ustr(self.expr) + "]"
3024
3025 return self.strRepr
3026
3027 -class SkipTo(ParseElementEnhance):
3028 """Token for skipping over all undefined text until the matched expression is found.
3029
3030 Parameters:
3031 - expr - target expression marking the end of the data to be skipped
3032 - include - (default=False) if True, the target expression is also parsed
3033 (the skipped text and target expression are returned as a 2-element list).
3034 - ignore - (default=None) used to define grammars (typically quoted strings and
3035 comments) that might contain false matches to the target expression
3036 - failOn - (default=None) define expressions that are not allowed to be
3037 included in the skipped test; if found before the target expression is found,
3038 the SkipTo is not a match
3039 """
3040 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3041 super( SkipTo, self ).__init__( other )
3042 self.ignoreExpr = ignore
3043 self.mayReturnEmpty = True
3044 self.mayIndexError = False
3045 self.includeMatch = include
3046 self.asList = False
3047 if isinstance(failOn, basestring):
3048 self.failOn = Literal(failOn)
3049 else:
3050 self.failOn = failOn
3051 self.errmsg = "No match found for "+_ustr(self.expr)
3052
3053 - def parseImpl( self, instring, loc, doActions=True ):
3054 startloc = loc
3055 instrlen = len(instring)
3056 expr = self.expr
3057 expr_parse = self.expr._parse
3058 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
3059 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
3060
3061 tmploc = loc
3062 while tmploc <= instrlen:
3063 if self_failOn_canParseNext is not None:
3064
3065 if self_failOn_canParseNext(instring, tmploc):
3066 break
3067
3068 if self_ignoreExpr_tryParse is not None:
3069
3070 while 1:
3071 try:
3072 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3073 except ParseBaseException:
3074 break
3075
3076 try:
3077 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3078 except (ParseException, IndexError):
3079
3080 tmploc += 1
3081 else:
3082
3083 break
3084
3085 else:
3086
3087 raise ParseException(instring, loc, self.errmsg, self)
3088
3089
3090 loc = tmploc
3091 skiptext = instring[startloc:loc]
3092 skipresult = ParseResults(skiptext)
3093
3094 if self.includeMatch:
3095 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3096 skipresult += mat
3097
3098 return loc, skipresult
3099
3100 -class Forward(ParseElementEnhance):
3101 """Forward declaration of an expression to be defined later -
3102 used for recursive grammars, such as algebraic infix notation.
3103 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3104
3105 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3106 Specifically, '|' has a lower precedence than '<<', so that::
3107 fwdExpr << a | b | c
3108 will actually be evaluated as::
3109 (fwdExpr << a) | b | c
3110 thereby leaving b and c out as parseable alternatives. It is recommended that you
3111 explicitly group the values inserted into the C{Forward}::
3112 fwdExpr << (a | b | c)
3113 Converting to use the '<<=' operator instead will avoid this problem.
3114 """
3117
3119 if isinstance( other, basestring ):
3120 other = ParserElement.literalStringClass(other)
3121 self.expr = other
3122 self.strRepr = None
3123 self.mayIndexError = self.expr.mayIndexError
3124 self.mayReturnEmpty = self.expr.mayReturnEmpty
3125 self.setWhitespaceChars( self.expr.whiteChars )
3126 self.skipWhitespace = self.expr.skipWhitespace
3127 self.saveAsList = self.expr.saveAsList
3128 self.ignoreExprs.extend(self.expr.ignoreExprs)
3129 return self
3130
3132 return self << other
3133
3135 self.skipWhitespace = False
3136 return self
3137
3139 if not self.streamlined:
3140 self.streamlined = True
3141 if self.expr is not None:
3142 self.expr.streamline()
3143 return self
3144
3145 - def validate( self, validateTrace=[] ):
3146 if self not in validateTrace:
3147 tmp = validateTrace[:]+[self]
3148 if self.expr is not None:
3149 self.expr.validate(tmp)
3150 self.checkRecursion([])
3151
3153 if hasattr(self,"name"):
3154 return self.name
3155 return self.__class__.__name__ + ": ..."
3156
3157
3158 self._revertClass = self.__class__
3159 self.__class__ = _ForwardNoRecurse
3160 try:
3161 if self.expr is not None:
3162 retString = _ustr(self.expr)
3163 else:
3164 retString = "None"
3165 finally:
3166 self.__class__ = self._revertClass
3167 return self.__class__.__name__ + ": " + retString
3168
3170 if self.expr is not None:
3171 return super(Forward,self).copy()
3172 else:
3173 ret = Forward()
3174 ret <<= self
3175 return ret
3176
3180
3182 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3183 - def __init__( self, expr, savelist=False ):
3186
3188 """Converter to concatenate all matching tokens to a single string.
3189 By default, the matching patterns must also be contiguous in the input string;
3190 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3191 """
3192 - def __init__( self, expr, joinString="", adjacent=True ):
3193 super(Combine,self).__init__( expr )
3194
3195 if adjacent:
3196 self.leaveWhitespace()
3197 self.adjacent = adjacent
3198 self.skipWhitespace = True
3199 self.joinString = joinString
3200 self.callPreparse = True
3201
3208
3209 - def postParse( self, instring, loc, tokenlist ):
3210 retToks = tokenlist.copy()
3211 del retToks[:]
3212 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3213
3214 if self.resultsName and retToks.haskeys():
3215 return [ retToks ]
3216 else:
3217 return retToks
3218
3219 -class Group(TokenConverter):
3220 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3222 super(Group,self).__init__( expr )
3223 self.saveAsList = True
3224
3225 - def postParse( self, instring, loc, tokenlist ):
3226 return [ tokenlist ]
3227
3228 -class Dict(TokenConverter):
3229 """Converter to return a repetitive expression as a list, but also as a dictionary.
3230 Each element can also be referenced using the first token in the expression as its key.
3231 Useful for tabular report scraping when the first column can be used as a item key.
3232 """
3234 super(Dict,self).__init__( expr )
3235 self.saveAsList = True
3236
3237 - def postParse( self, instring, loc, tokenlist ):
3238 for i,tok in enumerate(tokenlist):
3239 if len(tok) == 0:
3240 continue
3241 ikey = tok[0]
3242 if isinstance(ikey,int):
3243 ikey = _ustr(tok[0]).strip()
3244 if len(tok)==1:
3245 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3246 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3247 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3248 else:
3249 dictvalue = tok.copy()
3250 del dictvalue[0]
3251 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
3252 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3253 else:
3254 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3255
3256 if self.resultsName:
3257 return [ tokenlist ]
3258 else:
3259 return tokenlist
3260
3263 """Converter for ignoring the results of a parsed expression."""
3264 - def postParse( self, instring, loc, tokenlist ):
3266
3269
3272 """Wrapper for parse actions, to ensure they are only called once."""
3274 self.callable = _trim_arity(methodCall)
3275 self.called = False
3277 if not self.called:
3278 results = self.callable(s,l,t)
3279 self.called = True
3280 return results
3281 raise ParseException(s,l,"")
3284
3286 """Decorator for debugging parse actions."""
3287 f = _trim_arity(f)
3288 def z(*paArgs):
3289 thisFunc = f.__name__
3290 s,l,t = paArgs[-3:]
3291 if len(paArgs)>3:
3292 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3293 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3294 try:
3295 ret = f(*paArgs)
3296 except Exception as exc:
3297 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3298 raise
3299 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3300 return ret
3301 try:
3302 z.__name__ = f.__name__
3303 except AttributeError:
3304 pass
3305 return z
3306
3307
3308
3309
3310 -def delimitedList( expr, delim=",", combine=False ):
3311 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3312 By default, the list elements and delimiters can have intervening whitespace, and
3313 comments, but this can be overridden by passing C{combine=True} in the constructor.
3314 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3315 string, with the delimiters included; otherwise, the matching tokens are returned
3316 as a list of tokens, with the delimiters suppressed.
3317 """
3318 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3319 if combine:
3320 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3321 else:
3322 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3323
3325 """Helper to define a counted list of expressions.
3326 This helper defines a pattern of the form::
3327 integer expr expr expr...
3328 where the leading integer tells how many expr expressions follow.
3329 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3330 """
3331 arrayExpr = Forward()
3332 def countFieldParseAction(s,l,t):
3333 n = t[0]
3334 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3335 return []
3336 if intExpr is None:
3337 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3338 else:
3339 intExpr = intExpr.copy()
3340 intExpr.setName("arrayLen")
3341 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3342 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
3343
3345 ret = []
3346 for i in L:
3347 if isinstance(i,list):
3348 ret.extend(_flatten(i))
3349 else:
3350 ret.append(i)
3351 return ret
3352
3354 """Helper to define an expression that is indirectly defined from
3355 the tokens matched in a previous expression, that is, it looks
3356 for a 'repeat' of a previous expression. For example::
3357 first = Word(nums)
3358 second = matchPreviousLiteral(first)
3359 matchExpr = first + ":" + second
3360 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3361 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3362 If this is not desired, use C{matchPreviousExpr}.
3363 Do *not* use with packrat parsing enabled.
3364 """
3365 rep = Forward()
3366 def copyTokenToRepeater(s,l,t):
3367 if t:
3368 if len(t) == 1:
3369 rep << t[0]
3370 else:
3371
3372 tflat = _flatten(t.asList())
3373 rep << And(Literal(tt) for tt in tflat)
3374 else:
3375 rep << Empty()
3376 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3377 rep.setName('(prev) ' + _ustr(expr))
3378 return rep
3379
3381 """Helper to define an expression that is indirectly defined from
3382 the tokens matched in a previous expression, that is, it looks
3383 for a 'repeat' of a previous expression. For example::
3384 first = Word(nums)
3385 second = matchPreviousExpr(first)
3386 matchExpr = first + ":" + second
3387 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3388 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3389 the expressions are evaluated first, and then compared, so
3390 C{"1"} is compared with C{"10"}.
3391 Do *not* use with packrat parsing enabled.
3392 """
3393 rep = Forward()
3394 e2 = expr.copy()
3395 rep <<= e2
3396 def copyTokenToRepeater(s,l,t):
3397 matchTokens = _flatten(t.asList())
3398 def mustMatchTheseTokens(s,l,t):
3399 theseTokens = _flatten(t.asList())
3400 if theseTokens != matchTokens:
3401 raise ParseException("",0,"")
3402 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3403 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3404 rep.setName('(prev) ' + _ustr(expr))
3405 return rep
3406
3408
3409 for c in r"\^-]":
3410 s = s.replace(c,_bslash+c)
3411 s = s.replace("\n",r"\n")
3412 s = s.replace("\t",r"\t")
3413 return _ustr(s)
3414
3415 -def oneOf( strs, caseless=False, useRegex=True ):
3416 """Helper to quickly define a set of alternative Literals, and makes sure to do
3417 longest-first testing when there is a conflict, regardless of the input order,
3418 but returns a C{L{MatchFirst}} for best performance.
3419
3420 Parameters:
3421 - strs - a string of space-delimited literals, or a list of string literals
3422 - caseless - (default=False) - treat all literals as caseless
3423 - useRegex - (default=True) - as an optimization, will generate a Regex
3424 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3425 if creating a C{Regex} raises an exception)
3426 """
3427 if caseless:
3428 isequal = ( lambda a,b: a.upper() == b.upper() )
3429 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3430 parseElementClass = CaselessLiteral
3431 else:
3432 isequal = ( lambda a,b: a == b )
3433 masks = ( lambda a,b: b.startswith(a) )
3434 parseElementClass = Literal
3435
3436 symbols = []
3437 if isinstance(strs,basestring):
3438 symbols = strs.split()
3439 elif isinstance(strs, collections.Sequence):
3440 symbols = list(strs[:])
3441 elif isinstance(strs, _generatorType):
3442 symbols = list(strs)
3443 else:
3444 warnings.warn("Invalid argument to oneOf, expected string or list",
3445 SyntaxWarning, stacklevel=2)
3446 if not symbols:
3447 return NoMatch()
3448
3449 i = 0
3450 while i < len(symbols)-1:
3451 cur = symbols[i]
3452 for j,other in enumerate(symbols[i+1:]):
3453 if ( isequal(other, cur) ):
3454 del symbols[i+j+1]
3455 break
3456 elif ( masks(cur, other) ):
3457 del symbols[i+j+1]
3458 symbols.insert(i,other)
3459 cur = other
3460 break
3461 else:
3462 i += 1
3463
3464 if not caseless and useRegex:
3465
3466 try:
3467 if len(symbols)==len("".join(symbols)):
3468 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
3469 else:
3470 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
3471 except:
3472 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3473 SyntaxWarning, stacklevel=2)
3474
3475
3476
3477 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3478
3480 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3481 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3482 in the proper order. The key pattern can include delimiting markers or punctuation,
3483 as long as they are suppressed, thereby leaving the significant key text. The value
3484 pattern can include named results, so that the C{Dict} results can include named token
3485 fields.
3486 """
3487 return Dict( ZeroOrMore( Group ( key + value ) ) )
3488
3489 -def originalTextFor(expr, asString=True):
3490 """Helper to return the original, untokenized text for a given expression. Useful to
3491 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3492 revert separate tokens with intervening whitespace back to the original matching
3493 input text. By default, returns astring containing the original parsed text.
3494
3495 If the optional C{asString} argument is passed as C{False}, then the return value is a
3496 C{L{ParseResults}} containing any results names that were originally matched, and a
3497 single token containing the original matched text from the input string. So if
3498 the expression passed to C{L{originalTextFor}} contains expressions with defined
3499 results names, you must set C{asString} to C{False} if you want to preserve those
3500 results name values."""
3501 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3502 endlocMarker = locMarker.copy()
3503 endlocMarker.callPreparse = False
3504 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3505 if asString:
3506 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3507 else:
3508 def extractText(s,l,t):
3509 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
3510 matchExpr.setParseAction(extractText)
3511 return matchExpr
3512
3514 """Helper to undo pyparsing's default grouping of And expressions, even
3515 if all but one are non-empty."""
3516 return TokenConverter(expr).setParseAction(lambda t:t[0])
3517
3519 """Helper to decorate a returned token with its starting and ending locations in the input string.
3520 This helper adds the following results names:
3521 - locn_start = location where matched expression begins
3522 - locn_end = location where matched expression ends
3523 - value = the actual parsed results
3524
3525 Be careful if the input text contains C{<TAB>} characters, you may want to call
3526 C{L{ParserElement.parseWithTabs}}
3527 """
3528 locator = Empty().setParseAction(lambda s,l,t: l)
3529 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3530
3531
3532
3533 empty = Empty().setName("empty")
3534 lineStart = LineStart().setName("lineStart")
3535 lineEnd = LineEnd().setName("lineEnd")
3536 stringStart = StringStart().setName("stringStart")
3537 stringEnd = StringEnd().setName("stringEnd")
3538
3539 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3540 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3541 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3542 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
3543 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3544 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3547 r"""Helper to easily define string ranges for use in Word construction. Borrows
3548 syntax from regexp '[]' string range definitions::
3549 srange("[0-9]") -> "0123456789"
3550 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3551 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3552 The input string must be enclosed in []'s, and the returned string is the expanded
3553 character set joined into a single string.
3554 The values enclosed in the []'s may be::
3555 a single character
3556 an escaped character with a leading backslash (such as \- or \])
3557 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3558 (\0x## is also supported for backwards compatibility)
3559 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3560 a range of any of the above, separated by a dash ('a-z', etc.)
3561 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3562 """
3563 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
3564 try:
3565 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
3566 except:
3567 return ""
3568
3570 """Helper method for defining parse actions that require matching at a specific
3571 column in the input text.
3572 """
3573 def verifyCol(strg,locn,toks):
3574 if col(locn,strg) != n:
3575 raise ParseException(strg,locn,"matched token not at column %d" % n)
3576 return verifyCol
3577
3579 """Helper method for common parse actions that simply return a literal value. Especially
3580 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3581 """
3582 return lambda s,l,t: [replStr]
3583
3585 """Helper parse action for removing quotation marks from parsed quoted strings.
3586 To use, add this parse action to quoted string using::
3587 quotedString.setParseAction( removeQuotes )
3588 """
3589 return t[0][1:-1]
3590
3592 """Helper parse action to convert tokens to upper case."""
3593 return [ tt.upper() for tt in map(_ustr,t) ]
3594
3596 """Helper parse action to convert tokens to lower case."""
3597 return [ tt.lower() for tt in map(_ustr,t) ]
3598
3627
3631
3635
3637 """Helper to create a validating parse action to be used with start tags created
3638 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3639 with a required attribute value, to avoid false matches on common tags such as
3640 C{<TD>} or C{<DIV>}.
3641
3642 Call C{withAttribute} with a series of attribute names and values. Specify the list
3643 of filter attributes names and values as:
3644 - keyword arguments, as in C{(align="right")}, or
3645 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3646 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3647 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3648 For attribute names with a namespace prefix, you must use the second form. Attribute
3649 names are matched insensitive to upper/lower case.
3650
3651 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
3652
3653 To verify that the attribute exists, but without specifying a value, pass
3654 C{withAttribute.ANY_VALUE} as the value.
3655 """
3656 if args:
3657 attrs = args[:]
3658 else:
3659 attrs = attrDict.items()
3660 attrs = [(k,v) for k,v in attrs]
3661 def pa(s,l,tokens):
3662 for attrName,attrValue in attrs:
3663 if attrName not in tokens:
3664 raise ParseException(s,l,"no matching attribute " + attrName)
3665 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3666 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3667 (attrName, tokens[attrName], attrValue))
3668 return pa
3669 withAttribute.ANY_VALUE = object()
3670
3671 -def withClass(classname, namespace=''):
3672 """Simplified version of C{L{withAttribute}} when matching on a div class - made
3673 difficult because C{class} is a reserved word in Python.
3674 """
3675 classattr = "%s:class" % namespace if namespace else "class"
3676 return withAttribute(**{classattr : classname})
3677
3678 opAssoc = _Constants()
3679 opAssoc.LEFT = object()
3680 opAssoc.RIGHT = object()
3683 """Helper method for constructing grammars of expressions made up of
3684 operators working in a precedence hierarchy. Operators may be unary or
3685 binary, left- or right-associative. Parse actions can also be attached
3686 to operator expressions.
3687
3688 Parameters:
3689 - baseExpr - expression representing the most basic element for the nested
3690 - opList - list of tuples, one for each operator precedence level in the
3691 expression grammar; each tuple is of the form
3692 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3693 - opExpr is the pyparsing expression for the operator;
3694 may also be a string, which will be converted to a Literal;
3695 if numTerms is 3, opExpr is a tuple of two expressions, for the
3696 two operators separating the 3 terms
3697 - numTerms is the number of terms for this operator (must
3698 be 1, 2, or 3)
3699 - rightLeftAssoc is the indicator whether the operator is
3700 right or left associative, using the pyparsing-defined
3701 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3702 - parseAction is the parse action to be associated with
3703 expressions matching this operator expression (the
3704 parse action tuple member may be omitted)
3705 - lpar - expression for matching left-parentheses (default=Suppress('('))
3706 - rpar - expression for matching right-parentheses (default=Suppress(')'))
3707 """
3708 ret = Forward()
3709 lastExpr = baseExpr | ( lpar + ret + rpar )
3710 for i,operDef in enumerate(opList):
3711 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3712 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
3713 if arity == 3:
3714 if opExpr is None or len(opExpr) != 2:
3715 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3716 opExpr1, opExpr2 = opExpr
3717 thisExpr = Forward().setName(termName)
3718 if rightLeftAssoc == opAssoc.LEFT:
3719 if arity == 1:
3720 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3721 elif arity == 2:
3722 if opExpr is not None:
3723 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3724 else:
3725 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3726 elif arity == 3:
3727 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3728 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3729 else:
3730 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3731 elif rightLeftAssoc == opAssoc.RIGHT:
3732 if arity == 1:
3733
3734 if not isinstance(opExpr, Optional):
3735 opExpr = Optional(opExpr)
3736 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3737 elif arity == 2:
3738 if opExpr is not None:
3739 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3740 else:
3741 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3742 elif arity == 3:
3743 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3744 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3745 else:
3746 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3747 else:
3748 raise ValueError("operator must indicate right or left associativity")
3749 if pa:
3750 matchExpr.setParseAction( pa )
3751 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
3752 lastExpr = thisExpr
3753 ret <<= lastExpr
3754 return ret
3755 operatorPrecedence = infixNotation
3756
3757 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
3758 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
3759 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
3760 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
3761 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3764 """Helper method for defining nested lists enclosed in opening and closing
3765 delimiters ("(" and ")" are the default).
3766
3767 Parameters:
3768 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3769 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3770 - content - expression for items within the nested lists (default=None)
3771 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3772
3773 If an expression is not provided for the content argument, the nested
3774 expression will capture all whitespace-delimited content between delimiters
3775 as a list of separate values.
3776
3777 Use the C{ignoreExpr} argument to define expressions that may contain
3778 opening or closing characters that should not be treated as opening
3779 or closing characters for nesting, such as quotedString or a comment
3780 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3781 The default is L{quotedString}, but if no expressions are to be ignored,
3782 then pass C{None} for this argument.
3783 """
3784 if opener == closer:
3785 raise ValueError("opening and closing strings cannot be the same")
3786 if content is None:
3787 if isinstance(opener,basestring) and isinstance(closer,basestring):
3788 if len(opener) == 1 and len(closer)==1:
3789 if ignoreExpr is not None:
3790 content = (Combine(OneOrMore(~ignoreExpr +
3791 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3792 ).setParseAction(lambda t:t[0].strip()))
3793 else:
3794 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3795 ).setParseAction(lambda t:t[0].strip()))
3796 else:
3797 if ignoreExpr is not None:
3798 content = (Combine(OneOrMore(~ignoreExpr +
3799 ~Literal(opener) + ~Literal(closer) +
3800 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3801 ).setParseAction(lambda t:t[0].strip()))
3802 else:
3803 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3804 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3805 ).setParseAction(lambda t:t[0].strip()))
3806 else:
3807 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3808 ret = Forward()
3809 if ignoreExpr is not None:
3810 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3811 else:
3812 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3813 ret.setName('nested %s%s expression' % (opener,closer))
3814 return ret
3815
3816 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3817 """Helper method for defining space-delimited indentation blocks, such as
3818 those used to define block statements in Python source code.
3819
3820 Parameters:
3821 - blockStatementExpr - expression defining syntax of statement that
3822 is repeated within the indented block
3823 - indentStack - list created by caller to manage indentation stack
3824 (multiple statementWithIndentedBlock expressions within a single grammar
3825 should share a common indentStack)
3826 - indent - boolean indicating whether block must be indented beyond the
3827 the current level; set to False for block of left-most statements
3828 (default=True)
3829
3830 A valid block must contain at least one C{blockStatement}.
3831 """
3832 def checkPeerIndent(s,l,t):
3833 if l >= len(s): return
3834 curCol = col(l,s)
3835 if curCol != indentStack[-1]:
3836 if curCol > indentStack[-1]:
3837 raise ParseFatalException(s,l,"illegal nesting")
3838 raise ParseException(s,l,"not a peer entry")
3839
3840 def checkSubIndent(s,l,t):
3841 curCol = col(l,s)
3842 if curCol > indentStack[-1]:
3843 indentStack.append( curCol )
3844 else:
3845 raise ParseException(s,l,"not a subentry")
3846
3847 def checkUnindent(s,l,t):
3848 if l >= len(s): return
3849 curCol = col(l,s)
3850 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3851 raise ParseException(s,l,"not an unindent")
3852 indentStack.pop()
3853
3854 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3855 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
3856 PEER = Empty().setParseAction(checkPeerIndent).setName('')
3857 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
3858 if indent:
3859 smExpr = Group( Optional(NL) +
3860
3861 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3862 else:
3863 smExpr = Group( Optional(NL) +
3864 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3865 blockStatementExpr.ignore(_bslash + LineEnd())
3866 return smExpr.setName('indented block')
3867
3868 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3869 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3870
3871 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
3872 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
3873 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3875 """Helper parser action to replace common HTML entities with their special characters"""
3876 return _htmlEntityMap.get(t.entity)
3877
3878
3879 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
3880
3881 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
3882 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
3883 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
3884 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
3885
3886 javaStyleComment = cppStyleComment
3887 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3888 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
3889 Optional( Word(" \t") +
3890 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3891 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3895 """
3896 Here are some common low-level expressions that may be useful in jump-starting parser development:
3897 - numeric forms (integers, reals, scientific notation)
3898 - parse actions for converting numeric strings to Python int and/or float types
3899 - common programming identifiers
3900 """
3901
3903 """
3904 Parse action for converting parsed integers to Python int
3905 """
3906 return int(t[0])
3907
3909 """
3910 Parse action for converting parsed numbers to Python float
3911 """
3912 return float(t[0])
3913
3914 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
3915 """expression that parses an unsigned integer and returns an int"""
3916
3917 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
3918 """expression that parses an integer with optional leading sign and returns an int"""
3919
3920 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
3921 """expression that parses a floating point number and returns a float"""
3922
3923 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientfic notation").setParseAction(convertToFloat)
3924 """expression that parses a floating point number with optional scientfic notation and returns a float"""
3925
3926
3927 numeric = (sciReal | real | signedInteger).streamline()
3928 """any numeric expression, returns the corresponding Python type"""
3929
3930 number = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("number").setParseAction(convertToFloat)
3931 """any int or real number, returned as float"""
3932
3933 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
3934 """typical code identifier"""
3935
3936
3937 if __name__ == "__main__":
3938
3939 selectToken = CaselessLiteral("select")
3940 fromToken = CaselessLiteral("from")
3941
3942 ident = Word(alphas, alphanums + "_$")
3943
3944 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
3945 columnNameList = Group(delimitedList(columnName)).setName("columns")
3946 columnSpec = ('*' | columnNameList)
3947
3948 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
3949 tableNameList = Group(delimitedList(tableName)).setName("tables")
3950
3951 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
3952
3953
3954 simpleSQL.runTests("""
3955 # '*' as column list and dotted table name
3956 select * from SYS.XYZZY
3957
3958 # caseless match on "SELECT", and casts back to "select"
3959 SELECT * from XYZZY, ABC
3960
3961 # list of column names, and mixed case SELECT keyword
3962 Select AA,BB,CC from Sys.dual
3963
3964 # multiple tables
3965 Select A, B, C from Sys.dual, Table2
3966
3967 # invalid SELECT keyword - should fail
3968 Xelect A, B, C from Sys.dual
3969
3970 # incomplete command - should fail
3971 Select
3972
3973 # invalid column name - should fail
3974 Select ^^^ frox Sys.dual
3975
3976 """)
3977
3978 pyparsing_common.numeric.runTests("""
3979 100
3980 -100
3981 +100
3982 3.14159
3983 6.02e23
3984 1e-12
3985 """)
3986
3987
3988 pyparsing_common.number.runTests("""
3989 100
3990 -100
3991 +100
3992 3.14159
3993 6.02e23
3994 1e-12
3995 """)
3996