Package pyparsing ::
Module pyparsing
|
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34
35 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
36
37 from pyparsing import Word, alphas
38
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
41
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
44
45 The program outputs the following::
46
47 Hello, World! -> ['Hello', ',', 'World', '!']
48
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51
52 The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57 - quoted strings
58 - embedded comments
59 """
60
61 __version__ = "1.5.7"
62 __versionTime__ = "3 August 2012 05:00"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72
73
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor', 'ungroup',
92 ]
93
94 """
95 Detect if we are running version 3.X and make appropriate changes
96 Robert A. Clark
97 """
98 _PY3K = sys.version_info[0] > 2
99 if _PY3K:
100 _MAX_INT = sys.maxsize
101 basestring = str
102 unichr = chr
103 _ustr = str
104 else:
105 _MAX_INT = sys.maxint
106 range = xrange
107 set = lambda s : dict( [(c,0) for c in s] )
108
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
112 then < returns the unicode object | encodes it with the default encoding | ... >.
113 """
114 if isinstance(obj,unicode):
115 return obj
116
117 try:
118
119
120 return str(obj)
121
122 except UnicodeEncodeError:
123
124
125
126
127
128 return unicode(obj)
129
130
131
132
133
134
135
136
137 singleArgBuiltins = []
138 import __builtin__
139 for fname in "sum len sorted reversed list tuple set any all min max".split():
140 try:
141 singleArgBuiltins.append(getattr(__builtin__,fname))
142 except AttributeError:
143 continue
144
146 """Escape &, <, >, ", ', etc. in a string of data."""
147
148
149 from_symbols = '&><"\''
150 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
151 for from_,to_ in zip(from_symbols, to_symbols):
152 data = data.replace(from_, to_)
153 return data
154
157
158 alphas = string.ascii_lowercase + string.ascii_uppercase
159 nums = "0123456789"
160 hexnums = nums + "ABCDEFabcdef"
161 alphanums = alphas + nums
162 _bslash = chr(92)
163 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
164
166 """base exception class for all parsing runtime exceptions"""
167
168
169 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
170 self.loc = loc
171 if msg is None:
172 self.msg = pstr
173 self.pstr = ""
174 else:
175 self.msg = msg
176 self.pstr = pstr
177 self.parserElement = elem
178
180 """supported attributes by name are:
181 - lineno - returns the line number of the exception text
182 - col - returns the column number of the exception text
183 - line - returns the line containing the exception text
184 """
185 if( aname == "lineno" ):
186 return lineno( self.loc, self.pstr )
187 elif( aname in ("col", "column") ):
188 return col( self.loc, self.pstr )
189 elif( aname == "line" ):
190 return line( self.loc, self.pstr )
191 else:
192 raise AttributeError(aname)
193
195 return "%s (at char %d), (line:%d, col:%d)" % \
196 ( self.msg, self.loc, self.lineno, self.column )
210 return "loc msg pstr parserElement lineno col line " \
211 "markInputline __str__ __repr__".split()
212
214 """exception thrown when parse expressions don't match class;
215 supported attributes by name are:
216 - lineno - returns the line number of the exception text
217 - col - returns the column number of the exception text
218 - line - returns the line containing the exception text
219 """
220 pass
221
223 """user-throwable exception thrown when inconsistent parse content
224 is found; stops all parsing immediately"""
225 pass
226
228 """just like C{L{ParseFatalException}}, but thrown internally when an
229 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because
230 an unbacktrackable syntax error has been found"""
234
235
236
237
238
239
240
241
242
243
244
245
246
247
249 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
250 - def __init__( self, parseElementList ):
251 self.parseElementTrace = parseElementList
252
254 return "RecursiveGrammarException: %s" % self.parseElementTrace
255
262 return repr(self.tup)
264 self.tup = (self.tup[0],i)
265
267 """Structured parse results, to provide multiple means of access to the parsed data:
268 - as a list (C{len(results)})
269 - by list index (C{results[0], results[1]}, etc.)
270 - by attribute (C{results.<resultsName>})
271 """
272
273 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
274 if isinstance(toklist, cls):
275 return toklist
276 retobj = object.__new__(cls)
277 retobj.__doinit = True
278 return retobj
279
280
281
282 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
283 if self.__doinit:
284 self.__doinit = False
285 self.__name = None
286 self.__parent = None
287 self.__accumNames = {}
288 if isinstance(toklist, list):
289 self.__toklist = toklist[:]
290 else:
291 self.__toklist = [toklist]
292 self.__tokdict = dict()
293
294 if name is not None and name:
295 if not modal:
296 self.__accumNames[name] = 0
297 if isinstance(name,int):
298 name = _ustr(name)
299 self.__name = name
300 if not toklist in (None,'',[]):
301 if isinstance(toklist,basestring):
302 toklist = [ toklist ]
303 if asList:
304 if isinstance(toklist,ParseResults):
305 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
306 else:
307 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
308 self[name].__name = name
309 else:
310 try:
311 self[name] = toklist[0]
312 except (KeyError,TypeError,IndexError):
313 self[name] = toklist
314
316 if isinstance( i, (int,slice) ):
317 return self.__toklist[i]
318 else:
319 if i not in self.__accumNames:
320 return self.__tokdict[i][-1][0]
321 else:
322 return ParseResults([ v[0] for v in self.__tokdict[i] ])
323
325 if isinstance(v,_ParseResultsWithOffset):
326 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
327 sub = v[0]
328 elif isinstance(k,int):
329 self.__toklist[k] = v
330 sub = v
331 else:
332 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
333 sub = v
334 if isinstance(sub,ParseResults):
335 sub.__parent = wkref(self)
336
338 if isinstance(i,(int,slice)):
339 mylen = len( self.__toklist )
340 del self.__toklist[i]
341
342
343 if isinstance(i, int):
344 if i < 0:
345 i += mylen
346 i = slice(i, i+1)
347
348 removed = list(range(*i.indices(mylen)))
349 removed.reverse()
350
351 for name in self.__tokdict:
352 occurrences = self.__tokdict[name]
353 for j in removed:
354 for k, (value, position) in enumerate(occurrences):
355 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
356 else:
357 del self.__tokdict[i]
358
360 return k in self.__tokdict
361
362 - def __len__( self ): return len( self.__toklist )
363 - def __bool__(self): return len( self.__toklist ) > 0
364 __nonzero__ = __bool__
365 - def __iter__( self ): return iter( self.__toklist )
366 - def __reversed__( self ): return iter( self.__toklist[::-1] )
368 """Returns all named result keys."""
369 return self.__tokdict.keys()
370
371 - def pop( self, index=-1 ):
372 """Removes and returns item at specified index (default=last).
373 Will work with either numeric indices or dict-key indicies."""
374 ret = self[index]
375 del self[index]
376 return ret
377
378 - def get(self, key, defaultValue=None):
379 """Returns named result matching the given key, or if there is no
380 such name, then returns the given C{defaultValue} or C{None} if no
381 C{defaultValue} is specified."""
382 if key in self:
383 return self[key]
384 else:
385 return defaultValue
386
387 - def insert( self, index, insStr ):
388 """Inserts new element at location index in the list of parsed tokens."""
389 self.__toklist.insert(index, insStr)
390
391 for name in self.__tokdict:
392 occurrences = self.__tokdict[name]
393 for k, (value, position) in enumerate(occurrences):
394 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
395
397 """Returns all named result keys and values as a list of tuples."""
398 return [(k,self[k]) for k in self.__tokdict]
399
401 """Returns all named result values."""
402 return [ v[-1][0] for v in self.__tokdict.values() ]
403
405 if True:
406 if name in self.__tokdict:
407 if name not in self.__accumNames:
408 return self.__tokdict[name][-1][0]
409 else:
410 return ParseResults([ v[0] for v in self.__tokdict[name] ])
411 else:
412 return ""
413 return None
414
416 ret = self.copy()
417 ret += other
418 return ret
419
421 if other.__tokdict:
422 offset = len(self.__toklist)
423 addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
424 otheritems = other.__tokdict.items()
425 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
426 for (k,vlist) in otheritems for v in vlist]
427 for k,v in otherdictitems:
428 self[k] = v
429 if isinstance(v[0],ParseResults):
430 v[0].__parent = wkref(self)
431
432 self.__toklist += other.__toklist
433 self.__accumNames.update( other.__accumNames )
434 return self
435
437 if isinstance(other,int) and other == 0:
438 return self.copy()
439
441 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
442
444 out = []
445 for i in self.__toklist:
446 if isinstance(i, ParseResults):
447 out.append(_ustr(i))
448 else:
449 out.append(repr(i))
450 return '[' + ', '.join(out) + ']'
451
453 out = []
454 for item in self.__toklist:
455 if out and sep:
456 out.append(sep)
457 if isinstance( item, ParseResults ):
458 out += item._asStringList()
459 else:
460 out.append( _ustr(item) )
461 return out
462
464 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
465 out = []
466 for res in self.__toklist:
467 if isinstance(res,ParseResults):
468 out.append( res.asList() )
469 else:
470 out.append( res )
471 return out
472
474 """Returns the named parse results as dictionary."""
475 return dict( self.items() )
476
478 """Returns a new copy of a C{ParseResults} object."""
479 ret = ParseResults( self.__toklist )
480 ret.__tokdict = self.__tokdict.copy()
481 ret.__parent = self.__parent
482 ret.__accumNames.update( self.__accumNames )
483 ret.__name = self.__name
484 return ret
485
486 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
487 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
488 nl = "\n"
489 out = []
490 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
491 for v in vlist ] )
492 nextLevelIndent = indent + " "
493
494
495 if not formatted:
496 indent = ""
497 nextLevelIndent = ""
498 nl = ""
499
500 selfTag = None
501 if doctag is not None:
502 selfTag = doctag
503 else:
504 if self.__name:
505 selfTag = self.__name
506
507 if not selfTag:
508 if namedItemsOnly:
509 return ""
510 else:
511 selfTag = "ITEM"
512
513 out += [ nl, indent, "<", selfTag, ">" ]
514
515 worklist = self.__toklist
516 for i,res in enumerate(worklist):
517 if isinstance(res,ParseResults):
518 if i in namedItems:
519 out += [ res.asXML(namedItems[i],
520 namedItemsOnly and doctag is None,
521 nextLevelIndent,
522 formatted)]
523 else:
524 out += [ res.asXML(None,
525 namedItemsOnly and doctag is None,
526 nextLevelIndent,
527 formatted)]
528 else:
529
530 resTag = None
531 if i in namedItems:
532 resTag = namedItems[i]
533 if not resTag:
534 if namedItemsOnly:
535 continue
536 else:
537 resTag = "ITEM"
538 xmlBodyText = _xml_escape(_ustr(res))
539 out += [ nl, nextLevelIndent, "<", resTag, ">",
540 xmlBodyText,
541 "</", resTag, ">" ]
542
543 out += [ nl, indent, "</", selfTag, ">" ]
544 return "".join(out)
545
547 for k,vlist in self.__tokdict.items():
548 for v,loc in vlist:
549 if sub is v:
550 return k
551 return None
552
554 """Returns the results name for this token expression."""
555 if self.__name:
556 return self.__name
557 elif self.__parent:
558 par = self.__parent()
559 if par:
560 return par.__lookup(self)
561 else:
562 return None
563 elif (len(self) == 1 and
564 len(self.__tokdict) == 1 and
565 self.__tokdict.values()[0][0][1] in (0,-1)):
566 return self.__tokdict.keys()[0]
567 else:
568 return None
569
570 - def dump(self,indent='',depth=0):
571 """Diagnostic method for listing out the contents of a C{ParseResults}.
572 Accepts an optional C{indent} argument so that this string can be embedded
573 in a nested display of other data."""
574 out = []
575 out.append( indent+_ustr(self.asList()) )
576 keys = self.items()
577 keys.sort()
578 for k,v in keys:
579 if out:
580 out.append('\n')
581 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
582 if isinstance(v,ParseResults):
583 if v.keys():
584 out.append( v.dump(indent,depth+1) )
585 else:
586 out.append(_ustr(v))
587 else:
588 out.append(_ustr(v))
589 return "".join(out)
590
591
593 return ( self.__toklist,
594 ( self.__tokdict.copy(),
595 self.__parent is not None and self.__parent() or None,
596 self.__accumNames,
597 self.__name ) )
598
600 self.__toklist = state[0]
601 (self.__tokdict,
602 par,
603 inAccumNames,
604 self.__name) = state[1]
605 self.__accumNames = {}
606 self.__accumNames.update(inAccumNames)
607 if par is not None:
608 self.__parent = wkref(par)
609 else:
610 self.__parent = None
611
614
616 """Returns current column within a string, counting newlines as line separators.
617 The first column is number 1.
618
619 Note: the default parsing behavior is to expand tabs in the input string
620 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
621 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
622 consistent view of the parsed string, the parse location, and line and column
623 positions within the parsed string.
624 """
625 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
626
628 """Returns current line number within a string, counting newlines as line separators.
629 The first line is number 1.
630
631 Note: the default parsing behavior is to expand tabs in the input string
632 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
633 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
634 consistent view of the parsed string, the parse location, and line and column
635 positions within the parsed string.
636 """
637 return strg.count("\n",0,loc) + 1
638
639 -def line( loc, strg ):
640 """Returns the line of text containing loc within a string, counting newlines as line separators.
641 """
642 lastCR = strg.rfind("\n", 0, loc)
643 nextCR = strg.find("\n", loc)
644 if nextCR >= 0:
645 return strg[lastCR+1:nextCR]
646 else:
647 return strg[lastCR+1:]
648
650 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
651
653 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
654
656 print ("Exception raised:" + _ustr(exc))
657
659 """'Do-nothing' debug action, to suppress debugging output during parsing."""
660 pass
661
662 'decorator to trim function calls to match the arity of the target'
663 if not _PY3K:
665 if func in singleArgBuiltins:
666 return lambda s,l,t: func(t)
667 limit = [0]
668 def wrapper(*args):
669 while 1:
670 try:
671 return func(*args[limit[0]:])
672 except TypeError:
673 if limit[0] <= maxargs:
674 limit[0] += 1
675 continue
676 raise
677 return wrapper
678 else:
680 if func in singleArgBuiltins:
681 return lambda s,l,t: func(t)
682 limit = maxargs
683 def wrapper(*args):
684
685 while 1:
686 try:
687 return func(*args[limit:])
688 except TypeError:
689 if limit:
690 limit -= 1
691 continue
692 raise
693 return wrapper
694
696 """Abstract base level parser element class."""
697 DEFAULT_WHITE_CHARS = " \n\t\r"
698 verbose_stacktrace = False
699
704 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
705
707 """
708 Set class to be used for inclusion of string literals into a parser.
709 """
710 ParserElement.literalStringClass = cls
711 inlineLiteralsUsing = staticmethod(inlineLiteralsUsing)
712
714 self.parseAction = list()
715 self.failAction = None
716
717 self.strRepr = None
718 self.resultsName = None
719 self.saveAsList = savelist
720 self.skipWhitespace = True
721 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
722 self.copyDefaultWhiteChars = True
723 self.mayReturnEmpty = False
724 self.keepTabs = False
725 self.ignoreExprs = list()
726 self.debug = False
727 self.streamlined = False
728 self.mayIndexError = True
729 self.errmsg = ""
730 self.modalResults = True
731 self.debugActions = ( None, None, None )
732 self.re = None
733 self.callPreparse = True
734 self.callDuringTry = False
735
737 """Make a copy of this C{ParserElement}. Useful for defining different parse actions
738 for the same parsing pattern, using copies of the original parse element."""
739 cpy = copy.copy( self )
740 cpy.parseAction = self.parseAction[:]
741 cpy.ignoreExprs = self.ignoreExprs[:]
742 if self.copyDefaultWhiteChars:
743 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
744 return cpy
745
747 """Define name for this expression, for use in debugging."""
748 self.name = name
749 self.errmsg = "Expected " + self.name
750 if hasattr(self,"exception"):
751 self.exception.msg = self.errmsg
752 return self
753
755 """Define name for referencing matching tokens as a nested attribute
756 of the returned parse results.
757 NOTE: this returns a *copy* of the original C{ParserElement} object;
758 this is so that the client can define a basic element, such as an
759 integer, and reference it in multiple places with different names.
760
761 You can also set results names using the abbreviated syntax,
762 C{expr("name")} in place of C{expr.setResultsName("name")} -
763 see L{I{__call__}<__call__>}.
764 """
765 newself = self.copy()
766 if name.endswith("*"):
767 name = name[:-1]
768 listAllMatches=True
769 newself.resultsName = name
770 newself.modalResults = not listAllMatches
771 return newself
772
774 """Method to invoke the Python pdb debugger when this element is
775 about to be parsed. Set C{breakFlag} to True to enable, False to
776 disable.
777 """
778 if breakFlag:
779 _parseMethod = self._parse
780 def breaker(instring, loc, doActions=True, callPreParse=True):
781 import pdb
782 pdb.set_trace()
783 return _parseMethod( instring, loc, doActions, callPreParse )
784 breaker._originalParseMethod = _parseMethod
785 self._parse = breaker
786 else:
787 if hasattr(self._parse,"_originalParseMethod"):
788 self._parse = self._parse._originalParseMethod
789 return self
790
792 """Define action to perform when successfully matching parse element definition.
793 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
794 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
795 - s = the original string being parsed (see note below)
796 - loc = the location of the matching substring
797 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
798 If the functions in fns modify the tokens, they can return them as the return
799 value from fn, and the modified list of tokens will replace the original.
800 Otherwise, fn does not need to return any value.
801
802 Note: the default parsing behavior is to expand tabs in the input string
803 before starting the parsing process. See L{I{parseString}<parseString>} for more information
804 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
805 consistent view of the parsed string, the parse location, and line and column
806 positions within the parsed string.
807 """
808 self.parseAction = list(map(_trim_arity, list(fns)))
809 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
810 return self
811
813 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
814 self.parseAction += list(map(_trim_arity, list(fns)))
815 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
816 return self
817
819 """Define action to perform if parsing fails at this expression.
820 Fail acton fn is a callable function that takes the arguments
821 C{fn(s,loc,expr,err)} where:
822 - s = string being parsed
823 - loc = location where expression match was attempted and failed
824 - expr = the parse expression that failed
825 - err = the exception thrown
826 The function returns no value. It may throw C{L{ParseFatalException}}
827 if it is desired to stop parsing immediately."""
828 self.failAction = fn
829 return self
830
832 exprsFound = True
833 while exprsFound:
834 exprsFound = False
835 for e in self.ignoreExprs:
836 try:
837 while 1:
838 loc,dummy = e._parse( instring, loc )
839 exprsFound = True
840 except ParseException:
841 pass
842 return loc
843
845 if self.ignoreExprs:
846 loc = self._skipIgnorables( instring, loc )
847
848 if self.skipWhitespace:
849 wt = self.whiteChars
850 instrlen = len(instring)
851 while loc < instrlen and instring[loc] in wt:
852 loc += 1
853
854 return loc
855
856 - def parseImpl( self, instring, loc, doActions=True ):
858
859 - def postParse( self, instring, loc, tokenlist ):
861
862
863 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
864 debugging = ( self.debug )
865
866 if debugging or self.failAction:
867
868 if (self.debugActions[0] ):
869 self.debugActions[0]( instring, loc, self )
870 if callPreParse and self.callPreparse:
871 preloc = self.preParse( instring, loc )
872 else:
873 preloc = loc
874 tokensStart = preloc
875 try:
876 try:
877 loc,tokens = self.parseImpl( instring, preloc, doActions )
878 except IndexError:
879 raise ParseException( instring, len(instring), self.errmsg, self )
880 except ParseBaseException:
881
882 err = None
883 if self.debugActions[2]:
884 err = sys.exc_info()[1]
885 self.debugActions[2]( instring, tokensStart, self, err )
886 if self.failAction:
887 if err is None:
888 err = sys.exc_info()[1]
889 self.failAction( instring, tokensStart, self, err )
890 raise
891 else:
892 if callPreParse and self.callPreparse:
893 preloc = self.preParse( instring, loc )
894 else:
895 preloc = loc
896 tokensStart = preloc
897 if self.mayIndexError or loc >= len(instring):
898 try:
899 loc,tokens = self.parseImpl( instring, preloc, doActions )
900 except IndexError:
901 raise ParseException( instring, len(instring), self.errmsg, self )
902 else:
903 loc,tokens = self.parseImpl( instring, preloc, doActions )
904
905 tokens = self.postParse( instring, loc, tokens )
906
907 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
908 if self.parseAction and (doActions or self.callDuringTry):
909 if debugging:
910 try:
911 for fn in self.parseAction:
912 tokens = fn( instring, tokensStart, retTokens )
913 if tokens is not None:
914 retTokens = ParseResults( tokens,
915 self.resultsName,
916 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
917 modal=self.modalResults )
918 except ParseBaseException:
919
920 if (self.debugActions[2] ):
921 err = sys.exc_info()[1]
922 self.debugActions[2]( instring, tokensStart, self, err )
923 raise
924 else:
925 for fn in self.parseAction:
926 tokens = fn( instring, tokensStart, retTokens )
927 if tokens is not None:
928 retTokens = ParseResults( tokens,
929 self.resultsName,
930 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
931 modal=self.modalResults )
932
933 if debugging:
934
935 if (self.debugActions[1] ):
936 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
937
938 return loc, retTokens
939
945
946
947
948 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
949 lookup = (self,instring,loc,callPreParse,doActions)
950 if lookup in ParserElement._exprArgCache:
951 value = ParserElement._exprArgCache[ lookup ]
952 if isinstance(value, Exception):
953 raise value
954 return (value[0],value[1].copy())
955 else:
956 try:
957 value = self._parseNoCache( instring, loc, doActions, callPreParse )
958 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
959 return value
960 except ParseBaseException:
961 pe = sys.exc_info()[1]
962 ParserElement._exprArgCache[ lookup ] = pe
963 raise
964
965 _parse = _parseNoCache
966
967
968 _exprArgCache = {}
971 resetCache = staticmethod(resetCache)
972
973 _packratEnabled = False
975 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
976 Repeated parse attempts at the same string location (which happens
977 often in many complex grammars) can immediately return a cached value,
978 instead of re-executing parsing/validating code. Memoizing is done of
979 both valid results and parsing exceptions.
980
981 This speedup may break existing programs that use parse actions that
982 have side-effects. For this reason, packrat parsing is disabled when
983 you first import pyparsing. To activate the packrat feature, your
984 program must call the class method C{ParserElement.enablePackrat()}. If
985 your program uses C{psyco} to "compile as you go", you must call
986 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
987 Python will crash. For best results, call C{enablePackrat()} immediately
988 after importing pyparsing.
989 """
990 if not ParserElement._packratEnabled:
991 ParserElement._packratEnabled = True
992 ParserElement._parse = ParserElement._parseCache
993 enablePackrat = staticmethod(enablePackrat)
994
996 """Execute the parse expression with the given string.
997 This is the main interface to the client code, once the complete
998 expression has been built.
999
1000 If you want the grammar to require that the entire input string be
1001 successfully parsed, then set C{parseAll} to True (equivalent to ending
1002 the grammar with C{L{StringEnd()}}).
1003
1004 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1005 in order to report proper column numbers in parse actions.
1006 If the input string contains tabs and
1007 the grammar uses parse actions that use the C{loc} argument to index into the
1008 string being parsed, you can ensure you have a consistent view of the input
1009 string by:
1010 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1011 (see L{I{parseWithTabs}<parseWithTabs>})
1012 - define your parse action using the full C{(s,loc,toks)} signature, and
1013 reference the input string using the parse action's C{s} argument
1014 - explictly expand the tabs in your input string before calling
1015 C{parseString}
1016 """
1017 ParserElement.resetCache()
1018 if not self.streamlined:
1019 self.streamline()
1020
1021 for e in self.ignoreExprs:
1022 e.streamline()
1023 if not self.keepTabs:
1024 instring = instring.expandtabs()
1025 try:
1026 loc, tokens = self._parse( instring, 0 )
1027 if parseAll:
1028 loc = self.preParse( instring, loc )
1029 se = Empty() + StringEnd()
1030 se._parse( instring, loc )
1031 except ParseBaseException:
1032 if ParserElement.verbose_stacktrace:
1033 raise
1034 else:
1035
1036 exc = sys.exc_info()[1]
1037 raise exc
1038 else:
1039 return tokens
1040
1042 """Scan the input string for expression matches. Each match will return the
1043 matching tokens, start location, and end location. May be called with optional
1044 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1045 C{overlap} is specified, then overlapping matches will be reported.
1046
1047 Note that the start and end locations are reported relative to the string
1048 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1049 strings with embedded tabs."""
1050 if not self.streamlined:
1051 self.streamline()
1052 for e in self.ignoreExprs:
1053 e.streamline()
1054
1055 if not self.keepTabs:
1056 instring = _ustr(instring).expandtabs()
1057 instrlen = len(instring)
1058 loc = 0
1059 preparseFn = self.preParse
1060 parseFn = self._parse
1061 ParserElement.resetCache()
1062 matches = 0
1063 try:
1064 while loc <= instrlen and matches < maxMatches:
1065 try:
1066 preloc = preparseFn( instring, loc )
1067 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1068 except ParseException:
1069 loc = preloc+1
1070 else:
1071 if nextLoc > loc:
1072 matches += 1
1073 yield tokens, preloc, nextLoc
1074 if overlap:
1075 nextloc = preparseFn( instring, loc )
1076 if nextloc > loc:
1077 loc = nextLoc
1078 else:
1079 loc += 1
1080 else:
1081 loc = nextLoc
1082 else:
1083 loc = preloc+1
1084 except ParseBaseException:
1085 if ParserElement.verbose_stacktrace:
1086 raise
1087 else:
1088
1089 exc = sys.exc_info()[1]
1090 raise exc
1091
1125
1127 """Another extension to C{L{scanString}}, simplifying the access to the tokens found
1128 to match the given parse expression. May be called with optional
1129 C{maxMatches} argument, to clip searching after 'n' matches are found.
1130 """
1131 try:
1132 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1133 except ParseBaseException:
1134 if ParserElement.verbose_stacktrace:
1135 raise
1136 else:
1137
1138 exc = sys.exc_info()[1]
1139 raise exc
1140
1142 """Implementation of + operator - returns C{L{And}}"""
1143 if isinstance( other, basestring ):
1144 other = ParserElement.literalStringClass( other )
1145 if not isinstance( other, ParserElement ):
1146 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1147 SyntaxWarning, stacklevel=2)
1148 return None
1149 return And( [ self, other ] )
1150
1152 """Implementation of + operator when left operand is not a C{L{ParserElement}}"""
1153 if isinstance( other, basestring ):
1154 other = ParserElement.literalStringClass( other )
1155 if not isinstance( other, ParserElement ):
1156 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1157 SyntaxWarning, stacklevel=2)
1158 return None
1159 return other + self
1160
1162 """Implementation of - operator, returns C{L{And}} with error stop"""
1163 if isinstance( other, basestring ):
1164 other = ParserElement.literalStringClass( other )
1165 if not isinstance( other, ParserElement ):
1166 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1167 SyntaxWarning, stacklevel=2)
1168 return None
1169 return And( [ self, And._ErrorStop(), other ] )
1170
1172 """Implementation of - operator when left operand is not a C{L{ParserElement}}"""
1173 if isinstance( other, basestring ):
1174 other = ParserElement.literalStringClass( other )
1175 if not isinstance( other, ParserElement ):
1176 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1177 SyntaxWarning, stacklevel=2)
1178 return None
1179 return other - self
1180
1182 """Implementation of * operator, allows use of C{expr * 3} in place of
1183 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1184 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1185 may also include C{None} as in:
1186 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1187 to C{expr*n + L{ZeroOrMore}(expr)}
1188 (read as "at least n instances of C{expr}")
1189 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1190 (read as "0 to n instances of C{expr}")
1191 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1192 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1193
1194 Note that C{expr*(None,n)} does not raise an exception if
1195 more than n exprs exist in the input stream; that is,
1196 C{expr*(None,n)} does not enforce a maximum number of expr
1197 occurrences. If this behavior is desired, then write
1198 C{expr*(None,n) + ~expr}
1199
1200 """
1201 if isinstance(other,int):
1202 minElements, optElements = other,0
1203 elif isinstance(other,tuple):
1204 other = (other + (None, None))[:2]
1205 if other[0] is None:
1206 other = (0, other[1])
1207 if isinstance(other[0],int) and other[1] is None:
1208 if other[0] == 0:
1209 return ZeroOrMore(self)
1210 if other[0] == 1:
1211 return OneOrMore(self)
1212 else:
1213 return self*other[0] + ZeroOrMore(self)
1214 elif isinstance(other[0],int) and isinstance(other[1],int):
1215 minElements, optElements = other
1216 optElements -= minElements
1217 else:
1218 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1219 else:
1220 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1221
1222 if minElements < 0:
1223 raise ValueError("cannot multiply ParserElement by negative value")
1224 if optElements < 0:
1225 raise ValueError("second tuple value must be greater or equal to first tuple value")
1226 if minElements == optElements == 0:
1227 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1228
1229 if (optElements):
1230 def makeOptionalList(n):
1231 if n>1:
1232 return Optional(self + makeOptionalList(n-1))
1233 else:
1234 return Optional(self)
1235 if minElements:
1236 if minElements == 1:
1237 ret = self + makeOptionalList(optElements)
1238 else:
1239 ret = And([self]*minElements) + makeOptionalList(optElements)
1240 else:
1241 ret = makeOptionalList(optElements)
1242 else:
1243 if minElements == 1:
1244 ret = self
1245 else:
1246 ret = And([self]*minElements)
1247 return ret
1248
1251
1253 """Implementation of | operator - returns C{L{MatchFirst}}"""
1254 if isinstance( other, basestring ):
1255 other = ParserElement.literalStringClass( other )
1256 if not isinstance( other, ParserElement ):
1257 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1258 SyntaxWarning, stacklevel=2)
1259 return None
1260 return MatchFirst( [ self, other ] )
1261
1263 """Implementation of | operator when left operand is not a C{L{ParserElement}}"""
1264 if isinstance( other, basestring ):
1265 other = ParserElement.literalStringClass( other )
1266 if not isinstance( other, ParserElement ):
1267 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1268 SyntaxWarning, stacklevel=2)
1269 return None
1270 return other | self
1271
1273 """Implementation of ^ operator - returns C{L{Or}}"""
1274 if isinstance( other, basestring ):
1275 other = ParserElement.literalStringClass( other )
1276 if not isinstance( other, ParserElement ):
1277 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1278 SyntaxWarning, stacklevel=2)
1279 return None
1280 return Or( [ self, other ] )
1281
1283 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}"""
1284 if isinstance( other, basestring ):
1285 other = ParserElement.literalStringClass( other )
1286 if not isinstance( other, ParserElement ):
1287 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1288 SyntaxWarning, stacklevel=2)
1289 return None
1290 return other ^ self
1291
1293 """Implementation of & operator - returns C{L{Each}}"""
1294 if isinstance( other, basestring ):
1295 other = ParserElement.literalStringClass( other )
1296 if not isinstance( other, ParserElement ):
1297 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1298 SyntaxWarning, stacklevel=2)
1299 return None
1300 return Each( [ self, other ] )
1301
1303 """Implementation of & operator when left operand is not a C{L{ParserElement}}"""
1304 if isinstance( other, basestring ):
1305 other = ParserElement.literalStringClass( other )
1306 if not isinstance( other, ParserElement ):
1307 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1308 SyntaxWarning, stacklevel=2)
1309 return None
1310 return other & self
1311
1313 """Implementation of ~ operator - returns C{L{NotAny}}"""
1314 return NotAny( self )
1315
1317 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}::
1318 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1319 could be written as::
1320 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1321
1322 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1323 passed as C{True}.
1324 """
1325 return self.setResultsName(name)
1326
1328 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1329 cluttering up returned output.
1330 """
1331 return Suppress( self )
1332
1334 """Disables the skipping of whitespace before matching the characters in the
1335 C{ParserElement}'s defined pattern. This is normally only used internally by
1336 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1337 """
1338 self.skipWhitespace = False
1339 return self
1340
1342 """Overrides the default whitespace chars
1343 """
1344 self.skipWhitespace = True
1345 self.whiteChars = chars
1346 self.copyDefaultWhiteChars = False
1347 return self
1348
1350 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
1351 Must be called before C{parseString} when the input grammar contains elements that
1352 match C{<TAB>} characters."""
1353 self.keepTabs = True
1354 return self
1355
1357 """Define expression to be ignored (e.g., comments) while doing pattern
1358 matching; may be called repeatedly, to define multiple comment or other
1359 ignorable patterns.
1360 """
1361 if isinstance( other, Suppress ):
1362 if other not in self.ignoreExprs:
1363 self.ignoreExprs.append( other.copy() )
1364 else:
1365 self.ignoreExprs.append( Suppress( other.copy() ) )
1366 return self
1367
1368 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1369 """Enable display of debugging messages while doing pattern matching."""
1370 self.debugActions = (startAction or _defaultStartDebugAction,
1371 successAction or _defaultSuccessDebugAction,
1372 exceptionAction or _defaultExceptionDebugAction)
1373 self.debug = True
1374 return self
1375
1377 """Enable display of debugging messages while doing pattern matching.
1378 Set C{flag} to True to enable, False to disable."""
1379 if flag:
1380 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1381 else:
1382 self.debug = False
1383 return self
1384
1387
1390
1392 self.streamlined = True
1393 self.strRepr = None
1394 return self
1395
1398
1399 - def validate( self, validateTrace=[] ):
1400 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1401 self.checkRecursion( [] )
1402
1403 - def parseFile( self, file_or_filename, parseAll=False ):
1404 """Execute the parse expression on the given file or filename.
1405 If a filename is specified (instead of a file object),
1406 the entire file is opened, read, and closed before parsing.
1407 """
1408 try:
1409 file_contents = file_or_filename.read()
1410 except AttributeError:
1411 f = open(file_or_filename, "r")
1412 file_contents = f.read()
1413 f.close()
1414 try:
1415 return self.parseString(file_contents, parseAll)
1416 except ParseBaseException:
1417
1418 exc = sys.exc_info()[1]
1419 raise exc
1420
1423
1425 if aname == "myException":
1426 self.myException = ret = self.getException();
1427 return ret;
1428 else:
1429 raise AttributeError("no such attribute " + aname)
1430
1432 if isinstance(other, ParserElement):
1433 return self is other or self.__dict__ == other.__dict__
1434 elif isinstance(other, basestring):
1435 try:
1436 self.parseString(_ustr(other), parseAll=True)
1437 return True
1438 except ParseBaseException:
1439 return False
1440 else:
1441 return super(ParserElement,self)==other
1442
1444 return not (self == other)
1445
1447 return hash(id(self))
1448
1450 return self == other
1451
1453 return not (self == other)
1454
1455
1456 -class Token(ParserElement):
1457 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1460
1462 s = super(Token,self).setName(name)
1463 self.errmsg = "Expected " + self.name
1464 return s
1465
1466
1468 """An empty token, will always match."""
1470 super(Empty,self).__init__()
1471 self.name = "Empty"
1472 self.mayReturnEmpty = True
1473 self.mayIndexError = False
1474
1475
1477 """A token that will never match."""
1479 super(NoMatch,self).__init__()
1480 self.name = "NoMatch"
1481 self.mayReturnEmpty = True
1482 self.mayIndexError = False
1483 self.errmsg = "Unmatchable token"
1484
1485 - def parseImpl( self, instring, loc, doActions=True ):
1486 exc = self.myException
1487 exc.loc = loc
1488 exc.pstr = instring
1489 raise exc
1490
1491
1493 """Token to exactly match a specified string."""
1495 super(Literal,self).__init__()
1496 self.match = matchString
1497 self.matchLen = len(matchString)
1498 try:
1499 self.firstMatchChar = matchString[0]
1500 except IndexError:
1501 warnings.warn("null string passed to Literal; use Empty() instead",
1502 SyntaxWarning, stacklevel=2)
1503 self.__class__ = Empty
1504 self.name = '"%s"' % _ustr(self.match)
1505 self.errmsg = "Expected " + self.name
1506 self.mayReturnEmpty = False
1507 self.mayIndexError = False
1508
1509
1510
1511
1512
1513 - def parseImpl( self, instring, loc, doActions=True ):
1514 if (instring[loc] == self.firstMatchChar and
1515 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1516 return loc+self.matchLen, self.match
1517
1518 exc = self.myException
1519 exc.loc = loc
1520 exc.pstr = instring
1521 raise exc
1522 _L = Literal
1523 ParserElement.literalStringClass = Literal
1524
1526 """Token to exactly match a specified string as a keyword, that is, it must be
1527 immediately followed by a non-keyword character. Compare with C{L{Literal}}::
1528 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}.
1529 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
1530 Accepts two optional constructor arguments in addition to the keyword string:
1531 C{identChars} is a string of characters that would be valid identifier characters,
1532 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive
1533 matching, default is C{False}.
1534 """
1535 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1536
1538 super(Keyword,self).__init__()
1539 self.match = matchString
1540 self.matchLen = len(matchString)
1541 try:
1542 self.firstMatchChar = matchString[0]
1543 except IndexError:
1544 warnings.warn("null string passed to Keyword; use Empty() instead",
1545 SyntaxWarning, stacklevel=2)
1546 self.name = '"%s"' % self.match
1547 self.errmsg = "Expected " + self.name
1548 self.mayReturnEmpty = False
1549 self.mayIndexError = False
1550 self.caseless = caseless
1551 if caseless:
1552 self.caselessmatch = matchString.upper()
1553 identChars = identChars.upper()
1554 self.identChars = set(identChars)
1555
1556 - def parseImpl( self, instring, loc, doActions=True ):
1557 if self.caseless:
1558 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1559 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1560 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1561 return loc+self.matchLen, self.match
1562 else:
1563 if (instring[loc] == self.firstMatchChar and
1564 (self.matchLen==1 or instring.startswith(self.match,loc)) and
1565 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1566 (loc == 0 or instring[loc-1] not in self.identChars) ):
1567 return loc+self.matchLen, self.match
1568
1569 exc = self.myException
1570 exc.loc = loc
1571 exc.pstr = instring
1572 raise exc
1573
1578
1583 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1584
1586 """Token to match a specified string, ignoring case of letters.
1587 Note: the matched results will always be in the case of the given
1588 match string, NOT the case of the input text.
1589 """
1591 super(CaselessLiteral,self).__init__( matchString.upper() )
1592
1593 self.returnString = matchString
1594 self.name = "'%s'" % self.returnString
1595 self.errmsg = "Expected " + self.name
1596
1597 - def parseImpl( self, instring, loc, doActions=True ):
1598 if instring[ loc:loc+self.matchLen ].upper() == self.match:
1599 return loc+self.matchLen, self.returnString
1600
1601 exc = self.myException
1602 exc.loc = loc
1603 exc.pstr = instring
1604 raise exc
1605
1609
1610 - def parseImpl( self, instring, loc, doActions=True ):
1611 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1612 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1613 return loc+self.matchLen, self.match
1614
1615 exc = self.myException
1616 exc.loc = loc
1617 exc.pstr = instring
1618 raise exc
1619
1621 """Token for matching words composed of allowed character sets.
1622 Defined with string containing all allowed initial characters,
1623 an optional string containing allowed body characters (if omitted,
1624 defaults to the initial character set), and an optional minimum,
1625 maximum, and/or exact length. The default value for C{min} is 1 (a
1626 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1627 are 0, meaning no maximum or exact length restriction. An optional
1628 C{exclude} parameter can list characters that might be found in
1629 the input C{bodyChars} string; useful to define a word of all printables
1630 except for one or two characters, for instance.
1631 """
1632 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1633 super(Word,self).__init__()
1634 if excludeChars:
1635 initChars = ''.join([c for c in initChars if c not in excludeChars])
1636 if bodyChars:
1637 bodyChars = ''.join([c for c in bodyChars if c not in excludeChars])
1638 self.initCharsOrig = initChars
1639 self.initChars = set(initChars)
1640 if bodyChars :
1641 self.bodyCharsOrig = bodyChars
1642 self.bodyChars = set(bodyChars)
1643 else:
1644 self.bodyCharsOrig = initChars
1645 self.bodyChars = set(initChars)
1646
1647 self.maxSpecified = max > 0
1648
1649 if min < 1:
1650 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1651
1652 self.minLen = min
1653
1654 if max > 0:
1655 self.maxLen = max
1656 else:
1657 self.maxLen = _MAX_INT
1658
1659 if exact > 0:
1660 self.maxLen = exact
1661 self.minLen = exact
1662
1663 self.name = _ustr(self)
1664 self.errmsg = "Expected " + self.name
1665 self.mayIndexError = False
1666 self.asKeyword = asKeyword
1667
1668 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1669 if self.bodyCharsOrig == self.initCharsOrig:
1670 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
1671 elif len(self.bodyCharsOrig) == 1:
1672 self.reString = "%s[%s]*" % \
1673 (re.escape(self.initCharsOrig),
1674 _escapeRegexRangeChars(self.bodyCharsOrig),)
1675 else:
1676 self.reString = "[%s][%s]*" % \
1677 (_escapeRegexRangeChars(self.initCharsOrig),
1678 _escapeRegexRangeChars(self.bodyCharsOrig),)
1679 if self.asKeyword:
1680 self.reString = r"\b"+self.reString+r"\b"
1681 try:
1682 self.re = re.compile( self.reString )
1683 except:
1684 self.re = None
1685
1686 - def parseImpl( self, instring, loc, doActions=True ):
1687 if self.re:
1688 result = self.re.match(instring,loc)
1689 if not result:
1690 exc = self.myException
1691 exc.loc = loc
1692 exc.pstr = instring
1693 raise exc
1694
1695 loc = result.end()
1696 return loc, result.group()
1697
1698 if not(instring[ loc ] in self.initChars):
1699
1700 exc = self.myException
1701 exc.loc = loc
1702 exc.pstr = instring
1703 raise exc
1704 start = loc
1705 loc += 1
1706 instrlen = len(instring)
1707 bodychars = self.bodyChars
1708 maxloc = start + self.maxLen
1709 maxloc = min( maxloc, instrlen )
1710 while loc < maxloc and instring[loc] in bodychars:
1711 loc += 1
1712
1713 throwException = False
1714 if loc - start < self.minLen:
1715 throwException = True
1716 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1717 throwException = True
1718 if self.asKeyword:
1719 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1720 throwException = True
1721
1722 if throwException:
1723
1724 exc = self.myException
1725 exc.loc = loc
1726 exc.pstr = instring
1727 raise exc
1728
1729 return loc, instring[start:loc]
1730
1732 try:
1733 return super(Word,self).__str__()
1734 except:
1735 pass
1736
1737
1738 if self.strRepr is None:
1739
1740 def charsAsStr(s):
1741 if len(s)>4:
1742 return s[:4]+"..."
1743 else:
1744 return s
1745
1746 if ( self.initCharsOrig != self.bodyCharsOrig ):
1747 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1748 else:
1749 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1750
1751 return self.strRepr
1752
1753
1755 """Token for matching strings that match a given regular expression.
1756 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1757 """
1758 compiledREtype = type(re.compile("[A-Z]"))
1759 - def __init__( self, pattern, flags=0):
1760 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
1761 super(Regex,self).__init__()
1762
1763 if isinstance(pattern, basestring):
1764 if len(pattern) == 0:
1765 warnings.warn("null string passed to Regex; use Empty() instead",
1766 SyntaxWarning, stacklevel=2)
1767
1768 self.pattern = pattern
1769 self.flags = flags
1770
1771 try:
1772 self.re = re.compile(self.pattern, self.flags)
1773 self.reString = self.pattern
1774 except sre_constants.error:
1775 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1776 SyntaxWarning, stacklevel=2)
1777 raise
1778
1779 elif isinstance(pattern, Regex.compiledREtype):
1780 self.re = pattern
1781 self.pattern = \
1782 self.reString = str(pattern)
1783 self.flags = flags
1784
1785 else:
1786 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
1787
1788 self.name = _ustr(self)
1789 self.errmsg = "Expected " + self.name
1790 self.mayIndexError = False
1791 self.mayReturnEmpty = True
1792
1793 - def parseImpl( self, instring, loc, doActions=True ):
1794 result = self.re.match(instring,loc)
1795 if not result:
1796 exc = self.myException
1797 exc.loc = loc
1798 exc.pstr = instring
1799 raise exc
1800
1801 loc = result.end()
1802 d = result.groupdict()
1803 ret = ParseResults(result.group())
1804 if d:
1805 for k in d:
1806 ret[k] = d[k]
1807 return loc,ret
1808
1810 try:
1811 return super(Regex,self).__str__()
1812 except:
1813 pass
1814
1815 if self.strRepr is None:
1816 self.strRepr = "Re:(%s)" % repr(self.pattern)
1817
1818 return self.strRepr
1819
1820
1822 """Token for matching strings that are delimited by quoting characters.
1823 """
1824 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1825 """
1826 Defined with the following parameters:
1827 - quoteChar - string of one or more characters defining the quote delimiting string
1828 - escChar - character to escape quotes, typically backslash (default=None)
1829 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1830 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
1831 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
1832 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
1833 """
1834 super(QuotedString,self).__init__()
1835
1836
1837 quoteChar = quoteChar.strip()
1838 if len(quoteChar) == 0:
1839 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1840 raise SyntaxError()
1841
1842 if endQuoteChar is None:
1843 endQuoteChar = quoteChar
1844 else:
1845 endQuoteChar = endQuoteChar.strip()
1846 if len(endQuoteChar) == 0:
1847 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1848 raise SyntaxError()
1849
1850 self.quoteChar = quoteChar
1851 self.quoteCharLen = len(quoteChar)
1852 self.firstQuoteChar = quoteChar[0]
1853 self.endQuoteChar = endQuoteChar
1854 self.endQuoteCharLen = len(endQuoteChar)
1855 self.escChar = escChar
1856 self.escQuote = escQuote
1857 self.unquoteResults = unquoteResults
1858
1859 if multiline:
1860 self.flags = re.MULTILINE | re.DOTALL
1861 self.pattern = r'%s(?:[^%s%s]' % \
1862 ( re.escape(self.quoteChar),
1863 _escapeRegexRangeChars(self.endQuoteChar[0]),
1864 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1865 else:
1866 self.flags = 0
1867 self.pattern = r'%s(?:[^%s\n\r%s]' % \
1868 ( re.escape(self.quoteChar),
1869 _escapeRegexRangeChars(self.endQuoteChar[0]),
1870 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1871 if len(self.endQuoteChar) > 1:
1872 self.pattern += (
1873 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1874 _escapeRegexRangeChars(self.endQuoteChar[i]))
1875 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1876 )
1877 if escQuote:
1878 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1879 if escChar:
1880 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1881 charset = ''.join(set(self.quoteChar[0]+self.endQuoteChar[0])).replace('^',r'\^').replace('-',r'\-')
1882 self.escCharReplacePattern = re.escape(self.escChar)+("([%s])" % charset)
1883 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1884
1885 try:
1886 self.re = re.compile(self.pattern, self.flags)
1887 self.reString = self.pattern
1888 except sre_constants.error:
1889 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1890 SyntaxWarning, stacklevel=2)
1891 raise
1892
1893 self.name = _ustr(self)
1894 self.errmsg = "Expected " + self.name
1895 self.mayIndexError = False
1896 self.mayReturnEmpty = True
1897
1898 - def parseImpl( self, instring, loc, doActions=True ):
1899 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1900 if not result:
1901 exc = self.myException
1902 exc.loc = loc
1903 exc.pstr = instring
1904 raise exc
1905
1906 loc = result.end()
1907 ret = result.group()
1908
1909 if self.unquoteResults:
1910
1911
1912 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1913
1914 if isinstance(ret,basestring):
1915
1916 if self.escChar:
1917 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1918
1919
1920 if self.escQuote:
1921 ret = ret.replace(self.escQuote, self.endQuoteChar)
1922
1923 return loc, ret
1924
1926 try:
1927 return super(QuotedString,self).__str__()
1928 except:
1929 pass
1930
1931 if self.strRepr is None:
1932 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1933
1934 return self.strRepr
1935
1936
1938 """Token for matching words composed of characters *not* in a given set.
1939 Defined with string containing all disallowed characters, and an optional
1940 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
1941 minimum value < 1 is not valid); the default values for C{max} and C{exact}
1942 are 0, meaning no maximum or exact length restriction.
1943 """
1944 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1945 super(CharsNotIn,self).__init__()
1946 self.skipWhitespace = False
1947 self.notChars = notChars
1948
1949 if min < 1:
1950 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1951
1952 self.minLen = min
1953
1954 if max > 0:
1955 self.maxLen = max
1956 else:
1957 self.maxLen = _MAX_INT
1958
1959 if exact > 0:
1960 self.maxLen = exact
1961 self.minLen = exact
1962
1963 self.name = _ustr(self)
1964 self.errmsg = "Expected " + self.name
1965 self.mayReturnEmpty = ( self.minLen == 0 )
1966 self.mayIndexError = False
1967
1968 - def parseImpl( self, instring, loc, doActions=True ):
1969 if instring[loc] in self.notChars:
1970
1971 exc = self.myException
1972 exc.loc = loc
1973 exc.pstr = instring
1974 raise exc
1975
1976 start = loc
1977 loc += 1
1978 notchars = self.notChars
1979 maxlen = min( start+self.maxLen, len(instring) )
1980 while loc < maxlen and \
1981 (instring[loc] not in notchars):
1982 loc += 1
1983
1984 if loc - start < self.minLen:
1985
1986 exc = self.myException
1987 exc.loc = loc
1988 exc.pstr = instring
1989 raise exc
1990
1991 return loc, instring[start:loc]
1992
1994 try:
1995 return super(CharsNotIn, self).__str__()
1996 except:
1997 pass
1998
1999 if self.strRepr is None:
2000 if len(self.notChars) > 4:
2001 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2002 else:
2003 self.strRepr = "!W:(%s)" % self.notChars
2004
2005 return self.strRepr
2006
2008 """Special matching class for matching whitespace. Normally, whitespace is ignored
2009 by pyparsing grammars. This class is included when some whitespace structures
2010 are significant. Define with a string containing the whitespace characters to be
2011 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2012 as defined for the C{L{Word}} class."""
2013 whiteStrs = {
2014 " " : "<SPC>",
2015 "\t": "<TAB>",
2016 "\n": "<LF>",
2017 "\r": "<CR>",
2018 "\f": "<FF>",
2019 }
2020 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2021 super(White,self).__init__()
2022 self.matchWhite = ws
2023 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2024
2025 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2026 self.mayReturnEmpty = True
2027 self.errmsg = "Expected " + self.name
2028
2029 self.minLen = min
2030
2031 if max > 0:
2032 self.maxLen = max
2033 else:
2034 self.maxLen = _MAX_INT
2035
2036 if exact > 0:
2037 self.maxLen = exact
2038 self.minLen = exact
2039
2040 - def parseImpl( self, instring, loc, doActions=True ):
2041 if not(instring[ loc ] in self.matchWhite):
2042
2043 exc = self.myException
2044 exc.loc = loc
2045 exc.pstr = instring
2046 raise exc
2047 start = loc
2048 loc += 1
2049 maxloc = start + self.maxLen
2050 maxloc = min( maxloc, len(instring) )
2051 while loc < maxloc and instring[loc] in self.matchWhite:
2052 loc += 1
2053
2054 if loc - start < self.minLen:
2055
2056 exc = self.myException
2057 exc.loc = loc
2058 exc.pstr = instring
2059 raise exc
2060
2061 return loc, instring[start:loc]
2062
2063
2066 super(_PositionToken,self).__init__()
2067 self.name=self.__class__.__name__
2068 self.mayReturnEmpty = True
2069 self.mayIndexError = False
2070
2072 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2076
2078 if col(loc,instring) != self.col:
2079 instrlen = len(instring)
2080 if self.ignoreExprs:
2081 loc = self._skipIgnorables( instring, loc )
2082 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2083 loc += 1
2084 return loc
2085
2086 - def parseImpl( self, instring, loc, doActions=True ):
2087 thiscol = col( loc, instring )
2088 if thiscol > self.col:
2089 raise ParseException( instring, loc, "Text not in expected column", self )
2090 newloc = loc + self.col - thiscol
2091 ret = instring[ loc: newloc ]
2092 return newloc, ret
2093
2095 """Matches if current position is at the beginning of a line within the parse string"""
2100
2102 preloc = super(LineStart,self).preParse(instring,loc)
2103 if instring[preloc] == "\n":
2104 loc += 1
2105 return loc
2106
2107 - def parseImpl( self, instring, loc, doActions=True ):
2108 if not( loc==0 or
2109 (loc == self.preParse( instring, 0 )) or
2110 (instring[loc-1] == "\n") ):
2111
2112 exc = self.myException
2113 exc.loc = loc
2114 exc.pstr = instring
2115 raise exc
2116 return loc, []
2117
2119 """Matches if current position is at the end of a line within the parse string"""
2124
2125 - def parseImpl( self, instring, loc, doActions=True ):
2126 if loc<len(instring):
2127 if instring[loc] == "\n":
2128 return loc+1, "\n"
2129 else:
2130
2131 exc = self.myException
2132 exc.loc = loc
2133 exc.pstr = instring
2134 raise exc
2135 elif loc == len(instring):
2136 return loc+1, []
2137 else:
2138 exc = self.myException
2139 exc.loc = loc
2140 exc.pstr = instring
2141 raise exc
2142
2144 """Matches if current position is at the beginning of the parse string"""
2148
2149 - def parseImpl( self, instring, loc, doActions=True ):
2150 if loc != 0:
2151
2152 if loc != self.preParse( instring, 0 ):
2153
2154 exc = self.myException
2155 exc.loc = loc
2156 exc.pstr = instring
2157 raise exc
2158 return loc, []
2159
2161 """Matches if current position is at the end of the parse string"""
2165
2166 - def parseImpl( self, instring, loc, doActions=True ):
2167 if loc < len(instring):
2168
2169 exc = self.myException
2170 exc.loc = loc
2171 exc.pstr = instring
2172 raise exc
2173 elif loc == len(instring):
2174 return loc+1, []
2175 elif loc > len(instring):
2176 return loc, []
2177 else:
2178 exc = self.myException
2179 exc.loc = loc
2180 exc.pstr = instring
2181 raise exc
2182
2184 """Matches if the current position is at the beginning of a Word, and
2185 is not preceded by any character in a given set of C{wordChars}
2186 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2187 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
2188 the string being parsed, or at the beginning of a line.
2189 """
2191 super(WordStart,self).__init__()
2192 self.wordChars = set(wordChars)
2193 self.errmsg = "Not at the start of a word"
2194
2195 - def parseImpl(self, instring, loc, doActions=True ):
2196 if loc != 0:
2197 if (instring[loc-1] in self.wordChars or
2198 instring[loc] not in self.wordChars):
2199 exc = self.myException
2200 exc.loc = loc
2201 exc.pstr = instring
2202 raise exc
2203 return loc, []
2204
2206 """Matches if the current position is at the end of a Word, and
2207 is not followed by any character in a given set of C{wordChars}
2208 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
2209 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
2210 the string being parsed, or at the end of a line.
2211 """
2213 super(WordEnd,self).__init__()
2214 self.wordChars = set(wordChars)
2215 self.skipWhitespace = False
2216 self.errmsg = "Not at the end of a word"
2217
2218 - def parseImpl(self, instring, loc, doActions=True ):
2219 instrlen = len(instring)
2220 if instrlen>0 and loc<instrlen:
2221 if (instring[loc] in self.wordChars or
2222 instring[loc-1] not in self.wordChars):
2223
2224 exc = self.myException
2225 exc.loc = loc
2226 exc.pstr = instring
2227 raise exc
2228 return loc, []
2229
2230
2232 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2233 - def __init__( self, exprs, savelist = False ):
2234 super(ParseExpression,self).__init__(savelist)
2235 if isinstance( exprs, list ):
2236 self.exprs = exprs
2237 elif isinstance( exprs, basestring ):
2238 self.exprs = [ Literal( exprs ) ]
2239 else:
2240 try:
2241 self.exprs = list( exprs )
2242 except TypeError:
2243 self.exprs = [ exprs ]
2244 self.callPreparse = False
2245
2247 return self.exprs[i]
2248
2250 self.exprs.append( other )
2251 self.strRepr = None
2252 return self
2253
2255 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
2256 all contained expressions."""
2257 self.skipWhitespace = False
2258 self.exprs = [ e.copy() for e in self.exprs ]
2259 for e in self.exprs:
2260 e.leaveWhitespace()
2261 return self
2262
2264 if isinstance( other, Suppress ):
2265 if other not in self.ignoreExprs:
2266 super( ParseExpression, self).ignore( other )
2267 for e in self.exprs:
2268 e.ignore( self.ignoreExprs[-1] )
2269 else:
2270 super( ParseExpression, self).ignore( other )
2271 for e in self.exprs:
2272 e.ignore( self.ignoreExprs[-1] )
2273 return self
2274
2276 try:
2277 return super(ParseExpression,self).__str__()
2278 except:
2279 pass
2280
2281 if self.strRepr is None:
2282 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2283 return self.strRepr
2284
2286 super(ParseExpression,self).streamline()
2287
2288 for e in self.exprs:
2289 e.streamline()
2290
2291
2292
2293
2294 if ( len(self.exprs) == 2 ):
2295 other = self.exprs[0]
2296 if ( isinstance( other, self.__class__ ) and
2297 not(other.parseAction) and
2298 other.resultsName is None and
2299 not other.debug ):
2300 self.exprs = other.exprs[:] + [ self.exprs[1] ]
2301 self.strRepr = None
2302 self.mayReturnEmpty |= other.mayReturnEmpty
2303 self.mayIndexError |= other.mayIndexError
2304
2305 other = self.exprs[-1]
2306 if ( isinstance( other, self.__class__ ) and
2307 not(other.parseAction) and
2308 other.resultsName is None and
2309 not other.debug ):
2310 self.exprs = self.exprs[:-1] + other.exprs[:]
2311 self.strRepr = None
2312 self.mayReturnEmpty |= other.mayReturnEmpty
2313 self.mayIndexError |= other.mayIndexError
2314
2315 return self
2316
2320
2321 - def validate( self, validateTrace=[] ):
2322 tmp = validateTrace[:]+[self]
2323 for e in self.exprs:
2324 e.validate(tmp)
2325 self.checkRecursion( [] )
2326
2331
2332 -class And(ParseExpression):
2333 """Requires all given C{ParseExpression}s to be found in the given order.
2334 Expressions may be separated by whitespace.
2335 May be constructed using the C{'+'} operator.
2336 """
2337
2342
2343 - def __init__( self, exprs, savelist = True ):
2344 super(And,self).__init__(exprs, savelist)
2345 self.mayReturnEmpty = True
2346 for e in self.exprs:
2347 if not e.mayReturnEmpty:
2348 self.mayReturnEmpty = False
2349 break
2350 self.setWhitespaceChars( exprs[0].whiteChars )
2351 self.skipWhitespace = exprs[0].skipWhitespace
2352 self.callPreparse = True
2353
2354 - def parseImpl( self, instring, loc, doActions=True ):
2355
2356
2357 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2358 errorStop = False
2359 for e in self.exprs[1:]:
2360 if isinstance(e, And._ErrorStop):
2361 errorStop = True
2362 continue
2363 if errorStop:
2364 try:
2365 loc, exprtokens = e._parse( instring, loc, doActions )
2366 except ParseSyntaxException:
2367 raise
2368 except ParseBaseException:
2369 pe = sys.exc_info()[1]
2370 raise ParseSyntaxException(pe)
2371 except IndexError:
2372 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2373 else:
2374 loc, exprtokens = e._parse( instring, loc, doActions )
2375 if exprtokens or exprtokens.keys():
2376 resultlist += exprtokens
2377 return loc, resultlist
2378
2380 if isinstance( other, basestring ):
2381 other = Literal( other )
2382 return self.append( other )
2383
2385 subRecCheckList = parseElementList[:] + [ self ]
2386 for e in self.exprs:
2387 e.checkRecursion( subRecCheckList )
2388 if not e.mayReturnEmpty:
2389 break
2390
2392 if hasattr(self,"name"):
2393 return self.name
2394
2395 if self.strRepr is None:
2396 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2397
2398 return self.strRepr
2399
2400
2401 -class Or(ParseExpression):
2402 """Requires that at least one C{ParseExpression} is found.
2403 If two expressions match, the expression that matches the longest string will be used.
2404 May be constructed using the C{'^'} operator.
2405 """
2406 - def __init__( self, exprs, savelist = False ):
2407 super(Or,self).__init__(exprs, savelist)
2408 self.mayReturnEmpty = False
2409 for e in self.exprs:
2410 if e.mayReturnEmpty:
2411 self.mayReturnEmpty = True
2412 break
2413
2414 - def parseImpl( self, instring, loc, doActions=True ):
2415 maxExcLoc = -1
2416 maxMatchLoc = -1
2417 maxException = None
2418 for e in self.exprs:
2419 try:
2420 loc2 = e.tryParse( instring, loc )
2421 except ParseException:
2422 err = sys.exc_info()[1]
2423 if err.loc > maxExcLoc:
2424 maxException = err
2425 maxExcLoc = err.loc
2426 except IndexError:
2427 if len(instring) > maxExcLoc:
2428 maxException = ParseException(instring,len(instring),e.errmsg,self)
2429 maxExcLoc = len(instring)
2430 else:
2431 if loc2 > maxMatchLoc:
2432 maxMatchLoc = loc2
2433 maxMatchExp = e
2434
2435 if maxMatchLoc < 0:
2436 if maxException is not None:
2437 raise maxException
2438 else:
2439 raise ParseException(instring, loc, "no defined alternatives to match", self)
2440
2441 return maxMatchExp._parse( instring, loc, doActions )
2442
2444 if isinstance( other, basestring ):
2445 other = ParserElement.literalStringClass( other )
2446 return self.append( other )
2447
2449 if hasattr(self,"name"):
2450 return self.name
2451
2452 if self.strRepr is None:
2453 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2454
2455 return self.strRepr
2456
2458 subRecCheckList = parseElementList[:] + [ self ]
2459 for e in self.exprs:
2460 e.checkRecursion( subRecCheckList )
2461
2462
2464 """Requires that at least one C{ParseExpression} is found.
2465 If two expressions match, the first one listed is the one that will match.
2466 May be constructed using the C{'|'} operator.
2467 """
2468 - def __init__( self, exprs, savelist = False ):
2469 super(MatchFirst,self).__init__(exprs, savelist)
2470 if exprs:
2471 self.mayReturnEmpty = False
2472 for e in self.exprs:
2473 if e.mayReturnEmpty:
2474 self.mayReturnEmpty = True
2475 break
2476 else:
2477 self.mayReturnEmpty = True
2478
2479 - def parseImpl( self, instring, loc, doActions=True ):
2480 maxExcLoc = -1
2481 maxException = None
2482 for e in self.exprs:
2483 try:
2484 ret = e._parse( instring, loc, doActions )
2485 return ret
2486 except ParseException, err:
2487 if err.loc > maxExcLoc:
2488 maxException = err
2489 maxExcLoc = err.loc
2490 except IndexError:
2491 if len(instring) > maxExcLoc:
2492 maxException = ParseException(instring,len(instring),e.errmsg,self)
2493 maxExcLoc = len(instring)
2494
2495
2496 else:
2497 if maxException is not None:
2498 raise maxException
2499 else:
2500 raise ParseException(instring, loc, "no defined alternatives to match", self)
2501
2503 if isinstance( other, basestring ):
2504 other = ParserElement.literalStringClass( other )
2505 return self.append( other )
2506
2508 if hasattr(self,"name"):
2509 return self.name
2510
2511 if self.strRepr is None:
2512 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2513
2514 return self.strRepr
2515
2517 subRecCheckList = parseElementList[:] + [ self ]
2518 for e in self.exprs:
2519 e.checkRecursion( subRecCheckList )
2520
2521
2522 -class Each(ParseExpression):
2523 """Requires all given C{ParseExpression}s to be found, but in any order.
2524 Expressions may be separated by whitespace.
2525 May be constructed using the C{'&'} operator.
2526 """
2527 - def __init__( self, exprs, savelist = True ):
2528 super(Each,self).__init__(exprs, savelist)
2529 self.mayReturnEmpty = True
2530 for e in self.exprs:
2531 if not e.mayReturnEmpty:
2532 self.mayReturnEmpty = False
2533 break
2534 self.skipWhitespace = True
2535 self.initExprGroups = True
2536
2537 - def parseImpl( self, instring, loc, doActions=True ):
2538 if self.initExprGroups:
2539 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2540 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ]
2541 self.optionals = opt1 + opt2
2542 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2543 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2544 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2545 self.required += self.multirequired
2546 self.initExprGroups = False
2547 tmpLoc = loc
2548 tmpReqd = self.required[:]
2549 tmpOpt = self.optionals[:]
2550 matchOrder = []
2551
2552 keepMatching = True
2553 while keepMatching:
2554 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2555 failed = []
2556 for e in tmpExprs:
2557 try:
2558 tmpLoc = e.tryParse( instring, tmpLoc )
2559 except ParseException:
2560 failed.append(e)
2561 else:
2562 matchOrder.append(e)
2563 if e in tmpReqd:
2564 tmpReqd.remove(e)
2565 elif e in tmpOpt:
2566 tmpOpt.remove(e)
2567 if len(failed) == len(tmpExprs):
2568 keepMatching = False
2569
2570 if tmpReqd:
2571 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2572 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2573
2574
2575 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
2576
2577 resultlist = []
2578 for e in matchOrder:
2579 loc,results = e._parse(instring,loc,doActions)
2580 resultlist.append(results)
2581
2582 finalResults = ParseResults([])
2583 for r in resultlist:
2584 dups = {}
2585 for k in r.keys():
2586 if k in finalResults.keys():
2587 tmp = ParseResults(finalResults[k])
2588 tmp += ParseResults(r[k])
2589 dups[k] = tmp
2590 finalResults += ParseResults(r)
2591 for k,v in dups.items():
2592 finalResults[k] = v
2593 return loc, finalResults
2594
2596 if hasattr(self,"name"):
2597 return self.name
2598
2599 if self.strRepr is None:
2600 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2601
2602 return self.strRepr
2603
2605 subRecCheckList = parseElementList[:] + [ self ]
2606 for e in self.exprs:
2607 e.checkRecursion( subRecCheckList )
2608
2609
2611 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2612 - def __init__( self, expr, savelist=False ):
2613 super(ParseElementEnhance,self).__init__(savelist)
2614 if isinstance( expr, basestring ):
2615 expr = Literal(expr)
2616 self.expr = expr
2617 self.strRepr = None
2618 if expr is not None:
2619 self.mayIndexError = expr.mayIndexError
2620 self.mayReturnEmpty = expr.mayReturnEmpty
2621 self.setWhitespaceChars( expr.whiteChars )
2622 self.skipWhitespace = expr.skipWhitespace
2623 self.saveAsList = expr.saveAsList
2624 self.callPreparse = expr.callPreparse
2625 self.ignoreExprs.extend(expr.ignoreExprs)
2626
2627 - def parseImpl( self, instring, loc, doActions=True ):
2628 if self.expr is not None:
2629 return self.expr._parse( instring, loc, doActions, callPreParse=False )
2630 else:
2631 raise ParseException("",loc,self.errmsg,self)
2632
2634 self.skipWhitespace = False
2635 self.expr = self.expr.copy()
2636 if self.expr is not None:
2637 self.expr.leaveWhitespace()
2638 return self
2639
2641 if isinstance( other, Suppress ):
2642 if other not in self.ignoreExprs:
2643 super( ParseElementEnhance, self).ignore( other )
2644 if self.expr is not None:
2645 self.expr.ignore( self.ignoreExprs[-1] )
2646 else:
2647 super( ParseElementEnhance, self).ignore( other )
2648 if self.expr is not None:
2649 self.expr.ignore( self.ignoreExprs[-1] )
2650 return self
2651
2657
2659 if self in parseElementList:
2660 raise RecursiveGrammarException( parseElementList+[self] )
2661 subRecCheckList = parseElementList[:] + [ self ]
2662 if self.expr is not None:
2663 self.expr.checkRecursion( subRecCheckList )
2664
2665 - def validate( self, validateTrace=[] ):
2666 tmp = validateTrace[:]+[self]
2667 if self.expr is not None:
2668 self.expr.validate(tmp)
2669 self.checkRecursion( [] )
2670
2672 try:
2673 return super(ParseElementEnhance,self).__str__()
2674 except:
2675 pass
2676
2677 if self.strRepr is None and self.expr is not None:
2678 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2679 return self.strRepr
2680
2681
2683 """Lookahead matching of the given parse expression. C{FollowedBy}
2684 does *not* advance the parsing position within the input string, it only
2685 verifies that the specified parse expression matches at the current
2686 position. C{FollowedBy} always returns a null token list."""
2690
2691 - def parseImpl( self, instring, loc, doActions=True ):
2692 self.expr.tryParse( instring, loc )
2693 return loc, []
2694
2695
2696 -class NotAny(ParseElementEnhance):
2697 """Lookahead to disallow matching with the given parse expression. C{NotAny}
2698 does *not* advance the parsing position within the input string, it only
2699 verifies that the specified parse expression does *not* match at the current
2700 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
2701 always returns a null token list. May be constructed using the '~' operator."""
2703 super(NotAny,self).__init__(expr)
2704
2705 self.skipWhitespace = False
2706 self.mayReturnEmpty = True
2707 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2708
2709 - def parseImpl( self, instring, loc, doActions=True ):
2710 try:
2711 self.expr.tryParse( instring, loc )
2712 except (ParseException,IndexError):
2713 pass
2714 else:
2715
2716 exc = self.myException
2717 exc.loc = loc
2718 exc.pstr = instring
2719 raise exc
2720 return loc, []
2721
2723 if hasattr(self,"name"):
2724 return self.name
2725
2726 if self.strRepr is None:
2727 self.strRepr = "~{" + _ustr(self.expr) + "}"
2728
2729 return self.strRepr
2730
2731
2733 """Optional repetition of zero or more of the given expression."""
2737
2738 - def parseImpl( self, instring, loc, doActions=True ):
2739 tokens = []
2740 try:
2741 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2742 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2743 while 1:
2744 if hasIgnoreExprs:
2745 preloc = self._skipIgnorables( instring, loc )
2746 else:
2747 preloc = loc
2748 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2749 if tmptokens or tmptokens.keys():
2750 tokens += tmptokens
2751 except (ParseException,IndexError):
2752 pass
2753
2754 return loc, tokens
2755
2757 if hasattr(self,"name"):
2758 return self.name
2759
2760 if self.strRepr is None:
2761 self.strRepr = "[" + _ustr(self.expr) + "]..."
2762
2763 return self.strRepr
2764
2769
2770
2772 """Repetition of one or more of the given expression."""
2773 - def parseImpl( self, instring, loc, doActions=True ):
2774
2775 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2776 try:
2777 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2778 while 1:
2779 if hasIgnoreExprs:
2780 preloc = self._skipIgnorables( instring, loc )
2781 else:
2782 preloc = loc
2783 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2784 if tmptokens or tmptokens.keys():
2785 tokens += tmptokens
2786 except (ParseException,IndexError):
2787 pass
2788
2789 return loc, tokens
2790
2792 if hasattr(self,"name"):
2793 return self.name
2794
2795 if self.strRepr is None:
2796 self.strRepr = "{" + _ustr(self.expr) + "}..."
2797
2798 return self.strRepr
2799
2804
2811
2812 _optionalNotMatched = _NullToken()
2814 """Optional matching of the given expression.
2815 A default return string can also be specified, if the optional expression
2816 is not found.
2817 """
2819 super(Optional,self).__init__( exprs, savelist=False )
2820 self.defaultValue = default
2821 self.mayReturnEmpty = True
2822
2823 - def parseImpl( self, instring, loc, doActions=True ):
2824 try:
2825 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2826 except (ParseException,IndexError):
2827 if self.defaultValue is not _optionalNotMatched:
2828 if self.expr.resultsName:
2829 tokens = ParseResults([ self.defaultValue ])
2830 tokens[self.expr.resultsName] = self.defaultValue
2831 else:
2832 tokens = [ self.defaultValue ]
2833 else:
2834 tokens = []
2835 return loc, tokens
2836
2838 if hasattr(self,"name"):
2839 return self.name
2840
2841 if self.strRepr is None:
2842 self.strRepr = "[" + _ustr(self.expr) + "]"
2843
2844 return self.strRepr
2845
2846
2847 -class SkipTo(ParseElementEnhance):
2848 """Token for skipping over all undefined text until the matched expression is found.
2849 If C{include} is set to true, the matched expression is also parsed (the skipped text
2850 and matched expression are returned as a 2-element list). The C{ignore}
2851 argument is used to define grammars (typically quoted strings and comments) that
2852 might contain false matches.
2853 """
2854 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2855 super( SkipTo, self ).__init__( other )
2856 self.ignoreExpr = ignore
2857 self.mayReturnEmpty = True
2858 self.mayIndexError = False
2859 self.includeMatch = include
2860 self.asList = False
2861 if failOn is not None and isinstance(failOn, basestring):
2862 self.failOn = Literal(failOn)
2863 else:
2864 self.failOn = failOn
2865 self.errmsg = "No match found for "+_ustr(self.expr)
2866
2867 - def parseImpl( self, instring, loc, doActions=True ):
2868 startLoc = loc
2869 instrlen = len(instring)
2870 expr = self.expr
2871 failParse = False
2872 while loc <= instrlen:
2873 try:
2874 if self.failOn:
2875 try:
2876 self.failOn.tryParse(instring, loc)
2877 except ParseBaseException:
2878 pass
2879 else:
2880 failParse = True
2881 raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2882 failParse = False
2883 if self.ignoreExpr is not None:
2884 while 1:
2885 try:
2886 loc = self.ignoreExpr.tryParse(instring,loc)
2887
2888 except ParseBaseException:
2889 break
2890 expr._parse( instring, loc, doActions=False, callPreParse=False )
2891 skipText = instring[startLoc:loc]
2892 if self.includeMatch:
2893 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2894 if mat:
2895 skipRes = ParseResults( skipText )
2896 skipRes += mat
2897 return loc, [ skipRes ]
2898 else:
2899 return loc, [ skipText ]
2900 else:
2901 return loc, [ skipText ]
2902 except (ParseException,IndexError):
2903 if failParse:
2904 raise
2905 else:
2906 loc += 1
2907 exc = self.myException
2908 exc.loc = loc
2909 exc.pstr = instring
2910 raise exc
2911
2912 -class Forward(ParseElementEnhance):
2913 """Forward declaration of an expression to be defined later -
2914 used for recursive grammars, such as algebraic infix notation.
2915 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
2916
2917 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
2918 Specifically, '|' has a lower precedence than '<<', so that::
2919 fwdExpr << a | b | c
2920 will actually be evaluated as::
2921 (fwdExpr << a) | b | c
2922 thereby leaving b and c out as parseable alternatives. It is recommended that you
2923 explicitly group the values inserted into the C{Forward}::
2924 fwdExpr << (a | b | c)
2925 Converting to use the '<<=' operator instead will avoid this problem.
2926 """
2929
2931 if isinstance( other, basestring ):
2932 other = ParserElement.literalStringClass(other)
2933 self.expr = other
2934 self.mayReturnEmpty = other.mayReturnEmpty
2935 self.strRepr = None
2936 self.mayIndexError = self.expr.mayIndexError
2937 self.mayReturnEmpty = self.expr.mayReturnEmpty
2938 self.setWhitespaceChars( self.expr.whiteChars )
2939 self.skipWhitespace = self.expr.skipWhitespace
2940 self.saveAsList = self.expr.saveAsList
2941 self.ignoreExprs.extend(self.expr.ignoreExprs)
2942 return None
2943 __ilshift__ = __lshift__
2944
2946 self.skipWhitespace = False
2947 return self
2948
2950 if not self.streamlined:
2951 self.streamlined = True
2952 if self.expr is not None:
2953 self.expr.streamline()
2954 return self
2955
2956 - def validate( self, validateTrace=[] ):
2957 if self not in validateTrace:
2958 tmp = validateTrace[:]+[self]
2959 if self.expr is not None:
2960 self.expr.validate(tmp)
2961 self.checkRecursion([])
2962
2964 if hasattr(self,"name"):
2965 return self.name
2966
2967 self._revertClass = self.__class__
2968 self.__class__ = _ForwardNoRecurse
2969 try:
2970 if self.expr is not None:
2971 retString = _ustr(self.expr)
2972 else:
2973 retString = "None"
2974 finally:
2975 self.__class__ = self._revertClass
2976 return self.__class__.__name__ + ": " + retString
2977
2979 if self.expr is not None:
2980 return super(Forward,self).copy()
2981 else:
2982 ret = Forward()
2983 ret << self
2984 return ret
2985
2989
2991 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
2992 - def __init__( self, expr, savelist=False ):
2995
2996 -class Upcase(TokenConverter):
2997 """Converter to upper case all matching tokens."""
2999 super(Upcase,self).__init__(*args)
3000 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
3001 DeprecationWarning,stacklevel=2)
3002
3003 - def postParse( self, instring, loc, tokenlist ):
3004 return list(map( string.upper, tokenlist ))
3005
3006
3008 """Converter to concatenate all matching tokens to a single string.
3009 By default, the matching patterns must also be contiguous in the input string;
3010 this can be disabled by specifying C{'adjacent=False'} in the constructor.
3011 """
3012 - def __init__( self, expr, joinString="", adjacent=True ):
3013 super(Combine,self).__init__( expr )
3014
3015 if adjacent:
3016 self.leaveWhitespace()
3017 self.adjacent = adjacent
3018 self.skipWhitespace = True
3019 self.joinString = joinString
3020 self.callPreparse = True
3021
3028
3029 - def postParse( self, instring, loc, tokenlist ):
3030 retToks = tokenlist.copy()
3031 del retToks[:]
3032 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3033
3034 if self.resultsName and len(retToks.keys())>0:
3035 return [ retToks ]
3036 else:
3037 return retToks
3038
3039 -class Group(TokenConverter):
3040 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3042 super(Group,self).__init__( expr )
3043 self.saveAsList = True
3044
3045 - def postParse( self, instring, loc, tokenlist ):
3046 return [ tokenlist ]
3047
3048 -class Dict(TokenConverter):
3049 """Converter to return a repetitive expression as a list, but also as a dictionary.
3050 Each element can also be referenced using the first token in the expression as its key.
3051 Useful for tabular report scraping when the first column can be used as a item key.
3052 """
3054 super(Dict,self).__init__( exprs )
3055 self.saveAsList = True
3056
3057 - def postParse( self, instring, loc, tokenlist ):
3058 for i,tok in enumerate(tokenlist):
3059 if len(tok) == 0:
3060 continue
3061 ikey = tok[0]
3062 if isinstance(ikey,int):
3063 ikey = _ustr(tok[0]).strip()
3064 if len(tok)==1:
3065 tokenlist[ikey] = _ParseResultsWithOffset("",i)
3066 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3067 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3068 else:
3069 dictvalue = tok.copy()
3070 del dictvalue[0]
3071 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3072 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3073 else:
3074 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3075
3076 if self.resultsName:
3077 return [ tokenlist ]
3078 else:
3079 return tokenlist
3080
3081
3083 """Converter for ignoring the results of a parsed expression."""
3084 - def postParse( self, instring, loc, tokenlist ):
3086
3089
3090
3092 """Wrapper for parse actions, to ensure they are only called once."""
3094 self.callable = _trim_arity(methodCall)
3095 self.called = False
3097 if not self.called:
3098 results = self.callable(s,l,t)
3099 self.called = True
3100 return results
3101 raise ParseException(s,l,"")
3104
3106 """Decorator for debugging parse actions."""
3107 f = _trim_arity(f)
3108 def z(*paArgs):
3109 thisFunc = f.func_name
3110 s,l,t = paArgs[-3:]
3111 if len(paArgs)>3:
3112 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3113 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3114 try:
3115 ret = f(*paArgs)
3116 except Exception:
3117 exc = sys.exc_info()[1]
3118 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3119 raise
3120 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3121 return ret
3122 try:
3123 z.__name__ = f.__name__
3124 except AttributeError:
3125 pass
3126 return z
3127
3128
3129
3130
3132 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3133 By default, the list elements and delimiters can have intervening whitespace, and
3134 comments, but this can be overridden by passing C{combine=True} in the constructor.
3135 If C{combine} is set to C{True}, the matching tokens are returned as a single token
3136 string, with the delimiters included; otherwise, the matching tokens are returned
3137 as a list of tokens, with the delimiters suppressed.
3138 """
3139 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3140 if combine:
3141 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3142 else:
3143 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3144
3146 """Helper to define a counted list of expressions.
3147 This helper defines a pattern of the form::
3148 integer expr expr expr...
3149 where the leading integer tells how many expr expressions follow.
3150 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3151 """
3152 arrayExpr = Forward()
3153 def countFieldParseAction(s,l,t):
3154 n = t[0]
3155 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3156 return []
3157 if intExpr is None:
3158 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
3159 else:
3160 intExpr = intExpr.copy()
3161 intExpr.setName("arrayLen")
3162 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
3163 return ( intExpr + arrayExpr )
3164
3166 ret = []
3167 for i in L:
3168 if isinstance(i,list):
3169 ret.extend(_flatten(i))
3170 else:
3171 ret.append(i)
3172 return ret
3173
3175 """Helper to define an expression that is indirectly defined from
3176 the tokens matched in a previous expression, that is, it looks
3177 for a 'repeat' of a previous expression. For example::
3178 first = Word(nums)
3179 second = matchPreviousLiteral(first)
3180 matchExpr = first + ":" + second
3181 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
3182 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
3183 If this is not desired, use C{matchPreviousExpr}.
3184 Do *not* use with packrat parsing enabled.
3185 """
3186 rep = Forward()
3187 def copyTokenToRepeater(s,l,t):
3188 if t:
3189 if len(t) == 1:
3190 rep << t[0]
3191 else:
3192
3193 tflat = _flatten(t.asList())
3194 rep << And( [ Literal(tt) for tt in tflat ] )
3195 else:
3196 rep << Empty()
3197 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3198 return rep
3199
3201 """Helper to define an expression that is indirectly defined from
3202 the tokens matched in a previous expression, that is, it looks
3203 for a 'repeat' of a previous expression. For example::
3204 first = Word(nums)
3205 second = matchPreviousExpr(first)
3206 matchExpr = first + ":" + second
3207 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
3208 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
3209 the expressions are evaluated first, and then compared, so
3210 C{"1"} is compared with C{"10"}.
3211 Do *not* use with packrat parsing enabled.
3212 """
3213 rep = Forward()
3214 e2 = expr.copy()
3215 rep << e2
3216 def copyTokenToRepeater(s,l,t):
3217 matchTokens = _flatten(t.asList())
3218 def mustMatchTheseTokens(s,l,t):
3219 theseTokens = _flatten(t.asList())
3220 if theseTokens != matchTokens:
3221 raise ParseException("",0,"")
3222 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3223 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3224 return rep
3225
3227
3228 for c in r"\^-]":
3229 s = s.replace(c,_bslash+c)
3230 s = s.replace("\n",r"\n")
3231 s = s.replace("\t",r"\t")
3232 return _ustr(s)
3233
3234 -def oneOf( strs, caseless=False, useRegex=True ):
3235 """Helper to quickly define a set of alternative Literals, and makes sure to do
3236 longest-first testing when there is a conflict, regardless of the input order,
3237 but returns a C{L{MatchFirst}} for best performance.
3238
3239 Parameters:
3240 - strs - a string of space-delimited literals, or a list of string literals
3241 - caseless - (default=False) - treat all literals as caseless
3242 - useRegex - (default=True) - as an optimization, will generate a Regex
3243 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
3244 if creating a C{Regex} raises an exception)
3245 """
3246 if caseless:
3247 isequal = ( lambda a,b: a.upper() == b.upper() )
3248 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3249 parseElementClass = CaselessLiteral
3250 else:
3251 isequal = ( lambda a,b: a == b )
3252 masks = ( lambda a,b: b.startswith(a) )
3253 parseElementClass = Literal
3254
3255 if isinstance(strs,(list,tuple)):
3256 symbols = list(strs[:])
3257 elif isinstance(strs,basestring):
3258 symbols = strs.split()
3259 else:
3260 warnings.warn("Invalid argument to oneOf, expected string or list",
3261 SyntaxWarning, stacklevel=2)
3262
3263 i = 0
3264 while i < len(symbols)-1:
3265 cur = symbols[i]
3266 for j,other in enumerate(symbols[i+1:]):
3267 if ( isequal(other, cur) ):
3268 del symbols[i+j+1]
3269 break
3270 elif ( masks(cur, other) ):
3271 del symbols[i+j+1]
3272 symbols.insert(i,other)
3273 cur = other
3274 break
3275 else:
3276 i += 1
3277
3278 if not caseless and useRegex:
3279
3280 try:
3281 if len(symbols)==len("".join(symbols)):
3282 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3283 else:
3284 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3285 except:
3286 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3287 SyntaxWarning, stacklevel=2)
3288
3289
3290
3291 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3292
3294 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3295 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
3296 in the proper order. The key pattern can include delimiting markers or punctuation,
3297 as long as they are suppressed, thereby leaving the significant key text. The value
3298 pattern can include named results, so that the C{Dict} results can include named token
3299 fields.
3300 """
3301 return Dict( ZeroOrMore( Group ( key + value ) ) )
3302
3303 -def originalTextFor(expr, asString=True):
3304 """Helper to return the original, untokenized text for a given expression. Useful to
3305 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3306 revert separate tokens with intervening whitespace back to the original matching
3307 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not
3308 require the inspect module to chase up the call stack. By default, returns a
3309 string containing the original parsed text.
3310
3311 If the optional C{asString} argument is passed as C{False}, then the return value is a
3312 C{L{ParseResults}} containing any results names that were originally matched, and a
3313 single token containing the original matched text from the input string. So if
3314 the expression passed to C{L{originalTextFor}} contains expressions with defined
3315 results names, you must set C{asString} to C{False} if you want to preserve those
3316 results name values."""
3317 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3318 endlocMarker = locMarker.copy()
3319 endlocMarker.callPreparse = False
3320 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
3321 if asString:
3322 extractText = lambda s,l,t: s[t._original_start:t._original_end]
3323 else:
3324 def extractText(s,l,t):
3325 del t[:]
3326 t.insert(0, s[t._original_start:t._original_end])
3327 del t["_original_start"]
3328 del t["_original_end"]
3329 matchExpr.setParseAction(extractText)
3330 return matchExpr
3331
3333 """Helper to undo pyparsing's default grouping of And expressions, even
3334 if all but one are non-empty."""
3335 return TokenConverter(expr).setParseAction(lambda t:t[0])
3336
3337
3338 empty = Empty().setName("empty")
3339 lineStart = LineStart().setName("lineStart")
3340 lineEnd = LineEnd().setName("lineEnd")
3341 stringStart = StringStart().setName("stringStart")
3342 stringEnd = StringEnd().setName("stringEnd")
3343
3344 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3345 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3346 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
3347 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
3348 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3349 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3350 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3351
3352 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3353
3355 r"""Helper to easily define string ranges for use in Word construction. Borrows
3356 syntax from regexp '[]' string range definitions::
3357 srange("[0-9]") -> "0123456789"
3358 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3359 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3360 The input string must be enclosed in []'s, and the returned string is the expanded
3361 character set joined into a single string.
3362 The values enclosed in the []'s may be::
3363 a single character
3364 an escaped character with a leading backslash (such as \- or \])
3365 an escaped hex character with a leading '\x' (\x21, which is a '!' character)
3366 (\0x## is also supported for backwards compatibility)
3367 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3368 a range of any of the above, separated by a dash ('a-z', etc.)
3369 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3370 """
3371 try:
3372 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3373 except:
3374 return ""
3375
3377 """Helper method for defining parse actions that require matching at a specific
3378 column in the input text.
3379 """
3380 def verifyCol(strg,locn,toks):
3381 if col(locn,strg) != n:
3382 raise ParseException(strg,locn,"matched token not at column %d" % n)
3383 return verifyCol
3384
3386 """Helper method for common parse actions that simply return a literal value. Especially
3387 useful when used with C{L{transformString<ParserElement.transformString>}()}.
3388 """
3389 def _replFunc(*args):
3390 return [replStr]
3391 return _replFunc
3392
3394 """Helper parse action for removing quotation marks from parsed quoted strings.
3395 To use, add this parse action to quoted string using::
3396 quotedString.setParseAction( removeQuotes )
3397 """
3398 return t[0][1:-1]
3399
3401 """Helper parse action to convert tokens to upper case."""
3402 return [ tt.upper() for tt in map(_ustr,t) ]
3403
3405 """Helper parse action to convert tokens to lower case."""
3406 return [ tt.lower() for tt in map(_ustr,t) ]
3407
3408 -def keepOriginalText(s,startLoc,t):
3409 """DEPRECATED - use new helper method C{L{originalTextFor}}.
3410 Helper parse action to preserve original parsed text,
3411 overriding any nested parse actions."""
3412 try:
3413 endloc = getTokensEndLoc()
3414 except ParseException:
3415 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3416 del t[:]
3417 t += ParseResults(s[startLoc:endloc])
3418 return t
3419
3421 """Method to be called from within a parse action to determine the end
3422 location of the parsed tokens."""
3423 import inspect
3424 fstack = inspect.stack()
3425 try:
3426
3427 for f in fstack[2:]:
3428 if f[3] == "_parseNoCache":
3429 endloc = f[0].f_locals["loc"]
3430 return endloc
3431 else:
3432 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3433 finally:
3434 del fstack
3435
3464
3468
3472
3474 """Helper to create a validating parse action to be used with start tags created
3475 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
3476 with a required attribute value, to avoid false matches on common tags such as
3477 C{<TD>} or C{<DIV>}.
3478
3479 Call C{withAttribute} with a series of attribute names and values. Specify the list
3480 of filter attributes names and values as:
3481 - keyword arguments, as in C{(align="right")}, or
3482 - as an explicit dict with C{**} operator, when an attribute name is also a Python
3483 reserved word, as in C{**{"class":"Customer", "align":"right"}}
3484 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3485 For attribute names with a namespace prefix, you must use the second form. Attribute
3486 names are matched insensitive to upper/lower case.
3487
3488 To verify that the attribute exists, but without specifying a value, pass
3489 C{withAttribute.ANY_VALUE} as the value.
3490 """
3491 if args:
3492 attrs = args[:]
3493 else:
3494 attrs = attrDict.items()
3495 attrs = [(k,v) for k,v in attrs]
3496 def pa(s,l,tokens):
3497 for attrName,attrValue in attrs:
3498 if attrName not in tokens:
3499 raise ParseException(s,l,"no matching attribute " + attrName)
3500 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3501 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3502 (attrName, tokens[attrName], attrValue))
3503 return pa
3504 withAttribute.ANY_VALUE = object()
3505
3506 opAssoc = _Constants()
3507 opAssoc.LEFT = object()
3508 opAssoc.RIGHT = object()
3509
3511 """Helper method for constructing grammars of expressions made up of
3512 operators working in a precedence hierarchy. Operators may be unary or
3513 binary, left- or right-associative. Parse actions can also be attached
3514 to operator expressions.
3515
3516 Parameters:
3517 - baseExpr - expression representing the most basic element for the nested
3518 - opList - list of tuples, one for each operator precedence level in the
3519 expression grammar; each tuple is of the form
3520 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3521 - opExpr is the pyparsing expression for the operator;
3522 may also be a string, which will be converted to a Literal;
3523 if numTerms is 3, opExpr is a tuple of two expressions, for the
3524 two operators separating the 3 terms
3525 - numTerms is the number of terms for this operator (must
3526 be 1, 2, or 3)
3527 - rightLeftAssoc is the indicator whether the operator is
3528 right or left associative, using the pyparsing-defined
3529 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
3530 - parseAction is the parse action to be associated with
3531 expressions matching this operator expression (the
3532 parse action tuple member may be omitted)
3533 """
3534 ret = Forward()
3535 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3536 for i,operDef in enumerate(opList):
3537 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3538 if arity == 3:
3539 if opExpr is None or len(opExpr) != 2:
3540 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3541 opExpr1, opExpr2 = opExpr
3542 thisExpr = Forward()
3543 if rightLeftAssoc == opAssoc.LEFT:
3544 if arity == 1:
3545 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3546 elif arity == 2:
3547 if opExpr is not None:
3548 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3549 else:
3550 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3551 elif arity == 3:
3552 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3553 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3554 else:
3555 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3556 elif rightLeftAssoc == opAssoc.RIGHT:
3557 if arity == 1:
3558
3559 if not isinstance(opExpr, Optional):
3560 opExpr = Optional(opExpr)
3561 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3562 elif arity == 2:
3563 if opExpr is not None:
3564 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3565 else:
3566 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3567 elif arity == 3:
3568 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3569 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3570 else:
3571 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3572 else:
3573 raise ValueError("operator must indicate right or left associativity")
3574 if pa:
3575 matchExpr.setParseAction( pa )
3576 thisExpr << ( matchExpr | lastExpr )
3577 lastExpr = thisExpr
3578 ret << lastExpr
3579 return ret
3580
3581 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3582 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3583 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3584 unicodeString = Combine(_L('u') + quotedString.copy())
3585
3587 """Helper method for defining nested lists enclosed in opening and closing
3588 delimiters ("(" and ")" are the default).
3589
3590 Parameters:
3591 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3592 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3593 - content - expression for items within the nested lists (default=None)
3594 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3595
3596 If an expression is not provided for the content argument, the nested
3597 expression will capture all whitespace-delimited content between delimiters
3598 as a list of separate values.
3599
3600 Use the C{ignoreExpr} argument to define expressions that may contain
3601 opening or closing characters that should not be treated as opening
3602 or closing characters for nesting, such as quotedString or a comment
3603 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
3604 The default is L{quotedString}, but if no expressions are to be ignored,
3605 then pass C{None} for this argument.
3606 """
3607 if opener == closer:
3608 raise ValueError("opening and closing strings cannot be the same")
3609 if content is None:
3610 if isinstance(opener,basestring) and isinstance(closer,basestring):
3611 if len(opener) == 1 and len(closer)==1:
3612 if ignoreExpr is not None:
3613 content = (Combine(OneOrMore(~ignoreExpr +
3614 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3615 ).setParseAction(lambda t:t[0].strip()))
3616 else:
3617 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3618 ).setParseAction(lambda t:t[0].strip()))
3619 else:
3620 if ignoreExpr is not None:
3621 content = (Combine(OneOrMore(~ignoreExpr +
3622 ~Literal(opener) + ~Literal(closer) +
3623 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3624 ).setParseAction(lambda t:t[0].strip()))
3625 else:
3626 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3627 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3628 ).setParseAction(lambda t:t[0].strip()))
3629 else:
3630 raise ValueError("opening and closing arguments must be strings if no content expression is given")
3631 ret = Forward()
3632 if ignoreExpr is not None:
3633 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3634 else:
3635 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3636 return ret
3637
3638 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3639 """Helper method for defining space-delimited indentation blocks, such as
3640 those used to define block statements in Python source code.
3641
3642 Parameters:
3643 - blockStatementExpr - expression defining syntax of statement that
3644 is repeated within the indented block
3645 - indentStack - list created by caller to manage indentation stack
3646 (multiple statementWithIndentedBlock expressions within a single grammar
3647 should share a common indentStack)
3648 - indent - boolean indicating whether block must be indented beyond the
3649 the current level; set to False for block of left-most statements
3650 (default=True)
3651
3652 A valid block must contain at least one C{blockStatement}.
3653 """
3654 def checkPeerIndent(s,l,t):
3655 if l >= len(s): return
3656 curCol = col(l,s)
3657 if curCol != indentStack[-1]:
3658 if curCol > indentStack[-1]:
3659 raise ParseFatalException(s,l,"illegal nesting")
3660 raise ParseException(s,l,"not a peer entry")
3661
3662 def checkSubIndent(s,l,t):
3663 curCol = col(l,s)
3664 if curCol > indentStack[-1]:
3665 indentStack.append( curCol )
3666 else:
3667 raise ParseException(s,l,"not a subentry")
3668
3669 def checkUnindent(s,l,t):
3670 if l >= len(s): return
3671 curCol = col(l,s)
3672 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3673 raise ParseException(s,l,"not an unindent")
3674 indentStack.pop()
3675
3676 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3677 INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3678 PEER = Empty().setParseAction(checkPeerIndent)
3679 UNDENT = Empty().setParseAction(checkUnindent)
3680 if indent:
3681 smExpr = Group( Optional(NL) +
3682
3683 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3684 else:
3685 smExpr = Group( Optional(NL) +
3686 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3687 blockStatementExpr.ignore(_bslash + LineEnd())
3688 return smExpr
3689
3690 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3691 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3692
3693 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3694 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3695 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3696 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3697
3698
3699 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3700
3701 htmlComment = Regex(r"<!--[\s\S]*?-->")
3702 restOfLine = Regex(r".*").leaveWhitespace()
3703 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3704 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3705
3706 javaStyleComment = cppStyleComment
3707 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3708 _noncomma = "".join( [ c for c in printables if c != "," ] )
3709 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3710 Optional( Word(" \t") +
3711 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3712 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
3713
3714
3715 if __name__ == "__main__":
3716
3717 - def test( teststring ):
3718 try:
3719 tokens = simpleSQL.parseString( teststring )
3720 tokenlist = tokens.asList()
3721 print (teststring + "->" + str(tokenlist))
3722 print ("tokens = " + str(tokens))
3723 print ("tokens.columns = " + str(tokens.columns))
3724 print ("tokens.tables = " + str(tokens.tables))
3725 print (tokens.asXML("SQL",True))
3726 except ParseBaseException:
3727 err = sys.exc_info()[1]
3728 print (teststring + "->")
3729 print (err.line)
3730 print (" "*(err.column-1) + "^")
3731 print (err)
3732 print()
3733
3734 selectToken = CaselessLiteral( "select" )
3735 fromToken = CaselessLiteral( "from" )
3736
3737 ident = Word( alphas, alphanums + "_$" )
3738 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3739 columnNameList = Group( delimitedList( columnName ) )
3740 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3741 tableNameList = Group( delimitedList( tableName ) )
3742 simpleSQL = ( selectToken + \
3743 ( '*' | columnNameList ).setResultsName( "columns" ) + \
3744 fromToken + \
3745 tableNameList.setResultsName( "tables" ) )
3746
3747 test( "SELECT * from XYZZY, ABC" )
3748 test( "select * from SYS.XYZZY" )
3749 test( "Select A from Sys.dual" )
3750 test( "Select AA,BB,CC from Sys.dual" )
3751 test( "Select A, B, C from Sys.dual" )
3752 test( "Select A, B, C from Sys.dual" )
3753 test( "Xelect A, B, C from Sys.dual" )
3754 test( "Select A, B, C frox Sys.dual" )
3755 test( "Select" )
3756 test( "Select ^^^ frox Sys.dual" )
3757 test( "Select A, B, C from Sys.dual, Table2 " )
3758