Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.0.4" 
  61  __versionTime__ = "28 Oct 2015 21:50" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75   
  76  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  77   
  78  __all__ = [ 
  79  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  80  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  81  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  82  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  83  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  84  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  85  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  86  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  87  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  88  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  89  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  90  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  91  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  92  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  93  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  94  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  95  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  96  ] 
  97   
  98  PY_3 = sys.version.startswith('3') 
  99  if PY_3: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104   
 105      # build list of single arg builtins, that can be used as parse actions 
 106      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 107   
 108  else: 
 109      _MAX_INT = sys.maxint 
 110      range = xrange 
111 112 - def _ustr(obj):
113 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 114 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 115 then < returns the unicode object | encodes it with the default encoding | ... >. 116 """ 117 if isinstance(obj,unicode): 118 return obj 119 120 try: 121 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 122 # it won't break any existing code. 123 return str(obj) 124 125 except UnicodeEncodeError: 126 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 127 # state that "The return value must be a string object". However, does a 128 # unicode object (being a subclass of basestring) count as a "string 129 # object"? 130 # If so, then return a unicode object: 131 return unicode(obj)
132 # Else encode it... but how? There are many choices... :) 133 # Replace unprintables with escape codes? 134 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 135 # Replace unprintables with question marks? 136 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 137 # ... 138 139 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 140 singleArgBuiltins = [] 141 import __builtin__ 142 for fname in "sum len sorted reversed list tuple set any all min max".split(): 143 try: 144 singleArgBuiltins.append(getattr(__builtin__,fname)) 145 except AttributeError: 146 continue 147 148 _generatorType = type((y for y in range(1)))
149 150 -def _xml_escape(data):
151 """Escape &, <, >, ", ', etc. in a string of data.""" 152 153 # ampersand must be replaced first 154 from_symbols = '&><"\'' 155 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 156 for from_,to_ in zip(from_symbols, to_symbols): 157 data = data.replace(from_, to_) 158 return data
159
160 -class _Constants(object):
161 pass
162 163 alphas = string.ascii_lowercase + string.ascii_uppercase 164 nums = "0123456789" 165 hexnums = nums + "ABCDEFabcdef" 166 alphanums = alphas + nums 167 _bslash = chr(92) 168 printables = "".join(c for c in string.printable if c not in string.whitespace)
169 170 -class ParseBaseException(Exception):
171 """base exception class for all parsing runtime exceptions""" 172 # Performance tuning: we construct a *lot* of these, so keep this 173 # constructor as small and fast as possible
174 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
175 self.loc = loc 176 if msg is None: 177 self.msg = pstr 178 self.pstr = "" 179 else: 180 self.msg = msg 181 self.pstr = pstr 182 self.parserElement = elem
183
184 - def __getattr__( self, aname ):
185 """supported attributes by name are: 186 - lineno - returns the line number of the exception text 187 - col - returns the column number of the exception text 188 - line - returns the line containing the exception text 189 """ 190 if( aname == "lineno" ): 191 return lineno( self.loc, self.pstr ) 192 elif( aname in ("col", "column") ): 193 return col( self.loc, self.pstr ) 194 elif( aname == "line" ): 195 return line( self.loc, self.pstr ) 196 else: 197 raise AttributeError(aname)
198
199 - def __str__( self ):
200 return "%s (at char %d), (line:%d, col:%d)" % \ 201 ( self.msg, self.loc, self.lineno, self.column )
202 - def __repr__( self ):
203 return _ustr(self)
204 - def markInputline( self, markerString = ">!<" ):
205 """Extracts the exception line from the input string, and marks 206 the location of the exception with a special symbol. 207 """ 208 line_str = self.line 209 line_column = self.column - 1 210 if markerString: 211 line_str = "".join((line_str[:line_column], 212 markerString, line_str[line_column:])) 213 return line_str.strip()
214 - def __dir__(self):
215 return "loc msg pstr parserElement lineno col line " \ 216 "markInputline __str__ __repr__".split()
217
218 -class ParseException(ParseBaseException):
219 """exception thrown when parse expressions don't match class; 220 supported attributes by name are: 221 - lineno - returns the line number of the exception text 222 - col - returns the column number of the exception text 223 - line - returns the line containing the exception text 224 """ 225 pass
226
227 -class ParseFatalException(ParseBaseException):
228 """user-throwable exception thrown when inconsistent parse content 229 is found; stops all parsing immediately""" 230 pass
231
232 -class ParseSyntaxException(ParseFatalException):
233 """just like C{L{ParseFatalException}}, but thrown internally when an 234 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 235 an unbacktrackable syntax error has been found"""
236 - def __init__(self, pe):
237 super(ParseSyntaxException, self).__init__( 238 pe.pstr, pe.loc, pe.msg, pe.parserElement)
239
240 #~ class ReparseException(ParseBaseException): 241 #~ """Experimental class - parse actions can raise this exception to cause 242 #~ pyparsing to reparse the input string: 243 #~ - with a modified input string, and/or 244 #~ - with a modified start location 245 #~ Set the values of the ReparseException in the constructor, and raise the 246 #~ exception in a parse action to cause pyparsing to use the new string/location. 247 #~ Setting the values as None causes no change to be made. 248 #~ """ 249 #~ def __init_( self, newstring, restartLoc ): 250 #~ self.newParseText = newstring 251 #~ self.reparseLoc = restartLoc 252 253 -class RecursiveGrammarException(Exception):
254 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
255 - def __init__( self, parseElementList ):
256 self.parseElementTrace = parseElementList
257
258 - def __str__( self ):
259 return "RecursiveGrammarException: %s" % self.parseElementTrace
260
261 -class _ParseResultsWithOffset(object):
262 - def __init__(self,p1,p2):
263 self.tup = (p1,p2)
264 - def __getitem__(self,i):
265 return self.tup[i]
266 - def __repr__(self):
267 return repr(self.tup)
268 - def setOffset(self,i):
269 self.tup = (self.tup[0],i)
270
271 -class ParseResults(object):
272 """Structured parse results, to provide multiple means of access to the parsed data: 273 - as a list (C{len(results)}) 274 - by list index (C{results[0], results[1]}, etc.) 275 - by attribute (C{results.<resultsName>}) 276 """
277 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
278 if isinstance(toklist, cls): 279 return toklist 280 retobj = object.__new__(cls) 281 retobj.__doinit = True 282 return retobj
283 284 # Performance tuning: we construct a *lot* of these, so keep this 285 # constructor as small and fast as possible
286 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
287 if self.__doinit: 288 self.__doinit = False 289 self.__name = None 290 self.__parent = None 291 self.__accumNames = {} 292 if isinstance(toklist, list): 293 self.__toklist = toklist[:] 294 elif isinstance(toklist, _generatorType): 295 self.__toklist = list(toklist) 296 else: 297 self.__toklist = [toklist] 298 self.__tokdict = dict() 299 300 if name is not None and name: 301 if not modal: 302 self.__accumNames[name] = 0 303 if isinstance(name,int): 304 name = _ustr(name) # will always return a str, but use _ustr for consistency 305 self.__name = name 306 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 307 if isinstance(toklist,basestring): 308 toklist = [ toklist ] 309 if asList: 310 if isinstance(toklist,ParseResults): 311 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 312 else: 313 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 314 self[name].__name = name 315 else: 316 try: 317 self[name] = toklist[0] 318 except (KeyError,TypeError,IndexError): 319 self[name] = toklist
320
321 - def __getitem__( self, i ):
322 if isinstance( i, (int,slice) ): 323 return self.__toklist[i] 324 else: 325 if i not in self.__accumNames: 326 return self.__tokdict[i][-1][0] 327 else: 328 return ParseResults([ v[0] for v in self.__tokdict[i] ])
329
330 - def __setitem__( self, k, v, isinstance=isinstance ):
331 if isinstance(v,_ParseResultsWithOffset): 332 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 333 sub = v[0] 334 elif isinstance(k,int): 335 self.__toklist[k] = v 336 sub = v 337 else: 338 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 339 sub = v 340 if isinstance(sub,ParseResults): 341 sub.__parent = wkref(self)
342
343 - def __delitem__( self, i ):
344 if isinstance(i,(int,slice)): 345 mylen = len( self.__toklist ) 346 del self.__toklist[i] 347 348 # convert int to slice 349 if isinstance(i, int): 350 if i < 0: 351 i += mylen 352 i = slice(i, i+1) 353 # get removed indices 354 removed = list(range(*i.indices(mylen))) 355 removed.reverse() 356 # fixup indices in token dictionary 357 #~ for name in self.__tokdict: 358 #~ occurrences = self.__tokdict[name] 359 #~ for j in removed: 360 #~ for k, (value, position) in enumerate(occurrences): 361 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 362 for name,occurrences in self.__tokdict.items(): 363 for j in removed: 364 for k, (value, position) in enumerate(occurrences): 365 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 366 else: 367 del self.__tokdict[i]
368
369 - def __contains__( self, k ):
370 return k in self.__tokdict
371
372 - def __len__( self ): return len( self.__toklist )
373 - def __bool__(self): return len( self.__toklist ) > 0
374 __nonzero__ = __bool__
375 - def __iter__( self ): return iter( self.__toklist )
376 - def __reversed__( self ): return iter( self.__toklist[::-1] )
377 - def iterkeys( self ):
378 """Returns all named result keys.""" 379 if hasattr(self.__tokdict, "iterkeys"): 380 return self.__tokdict.iterkeys() 381 else: 382 return iter(self.__tokdict)
383
384 - def itervalues( self ):
385 """Returns all named result values.""" 386 return (self[k] for k in self.iterkeys())
387
388 - def iteritems( self ):
389 return ((k, self[k]) for k in self.iterkeys())
390 391 if PY_3: 392 keys = iterkeys 393 values = itervalues 394 items = iteritems 395 else:
396 - def keys( self ):
397 """Returns all named result keys.""" 398 return list(self.iterkeys())
399
400 - def values( self ):
401 """Returns all named result values.""" 402 return list(self.itervalues())
403
404 - def items( self ):
405 """Returns all named result keys and values as a list of tuples.""" 406 return list(self.iteritems())
407
408 - def haskeys( self ):
409 """Since keys() returns an iterator, this method is helpful in bypassing 410 code that looks for the existence of any defined results names.""" 411 return bool(self.__tokdict)
412
413 - def pop( self, *args, **kwargs):
414 """Removes and returns item at specified index (default=last). 415 Supports both list and dict semantics for pop(). If passed no 416 argument or an integer argument, it will use list semantics 417 and pop tokens from the list of parsed tokens. If passed a 418 non-integer argument (most likely a string), it will use dict 419 semantics and pop the corresponding value from any defined 420 results names. A second default return value argument is 421 supported, just as in dict.pop().""" 422 if not args: 423 args = [-1] 424 for k,v in kwargs.items(): 425 if k == 'default': 426 args = (args[0], v) 427 else: 428 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 429 if (isinstance(args[0], int) or 430 len(args) == 1 or 431 args[0] in self): 432 index = args[0] 433 ret = self[index] 434 del self[index] 435 return ret 436 else: 437 defaultvalue = args[1] 438 return defaultvalue
439
440 - def get(self, key, defaultValue=None):
441 """Returns named result matching the given key, or if there is no 442 such name, then returns the given C{defaultValue} or C{None} if no 443 C{defaultValue} is specified.""" 444 if key in self: 445 return self[key] 446 else: 447 return defaultValue
448
449 - def insert( self, index, insStr ):
450 """Inserts new element at location index in the list of parsed tokens.""" 451 self.__toklist.insert(index, insStr) 452 # fixup indices in token dictionary 453 #~ for name in self.__tokdict: 454 #~ occurrences = self.__tokdict[name] 455 #~ for k, (value, position) in enumerate(occurrences): 456 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 457 for name,occurrences in self.__tokdict.items(): 458 for k, (value, position) in enumerate(occurrences): 459 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
460
461 - def append( self, item ):
462 """Add single element to end of ParseResults list of elements.""" 463 self.__toklist.append(item)
464
465 - def extend( self, itemseq ):
466 """Add sequence of elements to end of ParseResults list of elements.""" 467 if isinstance(itemseq, ParseResults): 468 self += itemseq 469 else: 470 self.__toklist.extend(itemseq)
471
472 - def clear( self ):
473 """Clear all elements and results names.""" 474 del self.__toklist[:] 475 self.__tokdict.clear()
476
477 - def __getattr__( self, name ):
478 try: 479 return self[name] 480 except KeyError: 481 return "" 482 483 if name in self.__tokdict: 484 if name not in self.__accumNames: 485 return self.__tokdict[name][-1][0] 486 else: 487 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 488 else: 489 return ""
490
491 - def __add__( self, other ):
492 ret = self.copy() 493 ret += other 494 return ret
495
496 - def __iadd__( self, other ):
497 if other.__tokdict: 498 offset = len(self.__toklist) 499 addoffset = lambda a: offset if a<0 else a+offset 500 otheritems = other.__tokdict.items() 501 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 502 for (k,vlist) in otheritems for v in vlist] 503 for k,v in otherdictitems: 504 self[k] = v 505 if isinstance(v[0],ParseResults): 506 v[0].__parent = wkref(self) 507 508 self.__toklist += other.__toklist 509 self.__accumNames.update( other.__accumNames ) 510 return self
511
512 - def __radd__(self, other):
513 if isinstance(other,int) and other == 0: 514 return self.copy()
515
516 - def __repr__( self ):
517 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
518
519 - def __str__( self ):
520 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
521
522 - def _asStringList( self, sep='' ):
523 out = [] 524 for item in self.__toklist: 525 if out and sep: 526 out.append(sep) 527 if isinstance( item, ParseResults ): 528 out += item._asStringList() 529 else: 530 out.append( _ustr(item) ) 531 return out
532
533 - def asList( self ):
534 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 535 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
536
537 - def asDict( self ):
538 """Returns the named parse results as dictionary.""" 539 if PY_3: 540 return dict( self.items() ) 541 else: 542 return dict( self.iteritems() )
543
544 - def copy( self ):
545 """Returns a new copy of a C{ParseResults} object.""" 546 ret = ParseResults( self.__toklist ) 547 ret.__tokdict = self.__tokdict.copy() 548 ret.__parent = self.__parent 549 ret.__accumNames.update( self.__accumNames ) 550 ret.__name = self.__name 551 return ret
552
553 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
554 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 555 nl = "\n" 556 out = [] 557 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 558 for v in vlist) 559 nextLevelIndent = indent + " " 560 561 # collapse out indents if formatting is not desired 562 if not formatted: 563 indent = "" 564 nextLevelIndent = "" 565 nl = "" 566 567 selfTag = None 568 if doctag is not None: 569 selfTag = doctag 570 else: 571 if self.__name: 572 selfTag = self.__name 573 574 if not selfTag: 575 if namedItemsOnly: 576 return "" 577 else: 578 selfTag = "ITEM" 579 580 out += [ nl, indent, "<", selfTag, ">" ] 581 582 for i,res in enumerate(self.__toklist): 583 if isinstance(res,ParseResults): 584 if i in namedItems: 585 out += [ res.asXML(namedItems[i], 586 namedItemsOnly and doctag is None, 587 nextLevelIndent, 588 formatted)] 589 else: 590 out += [ res.asXML(None, 591 namedItemsOnly and doctag is None, 592 nextLevelIndent, 593 formatted)] 594 else: 595 # individual token, see if there is a name for it 596 resTag = None 597 if i in namedItems: 598 resTag = namedItems[i] 599 if not resTag: 600 if namedItemsOnly: 601 continue 602 else: 603 resTag = "ITEM" 604 xmlBodyText = _xml_escape(_ustr(res)) 605 out += [ nl, nextLevelIndent, "<", resTag, ">", 606 xmlBodyText, 607 "</", resTag, ">" ] 608 609 out += [ nl, indent, "</", selfTag, ">" ] 610 return "".join(out)
611
612 - def __lookup(self,sub):
613 for k,vlist in self.__tokdict.items(): 614 for v,loc in vlist: 615 if sub is v: 616 return k 617 return None
618
619 - def getName(self):
620 """Returns the results name for this token expression.""" 621 if self.__name: 622 return self.__name 623 elif self.__parent: 624 par = self.__parent() 625 if par: 626 return par.__lookup(self) 627 else: 628 return None 629 elif (len(self) == 1 and 630 len(self.__tokdict) == 1 and 631 self.__tokdict.values()[0][0][1] in (0,-1)): 632 return self.__tokdict.keys()[0] 633 else: 634 return None
635
636 - def dump(self,indent='',depth=0):
637 """Diagnostic method for listing out the contents of a C{ParseResults}. 638 Accepts an optional C{indent} argument so that this string can be embedded 639 in a nested display of other data.""" 640 out = [] 641 NL = '\n' 642 out.append( indent+_ustr(self.asList()) ) 643 if self.haskeys(): 644 items = sorted(self.items()) 645 for k,v in items: 646 if out: 647 out.append(NL) 648 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 649 if isinstance(v,ParseResults): 650 if v: 651 out.append( v.dump(indent,depth+1) ) 652 else: 653 out.append(_ustr(v)) 654 else: 655 out.append(_ustr(v)) 656 elif any(isinstance(vv,ParseResults) for vv in self): 657 v = self 658 for i,vv in enumerate(v): 659 if isinstance(vv,ParseResults): 660 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 661 else: 662 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 663 else: 664 out.append(_ustr(self)) 665 666 return "".join(out)
667
668 - def pprint(self, *args, **kwargs):
669 """Pretty-printer for parsed results as a list, using the C{pprint} module. 670 Accepts additional positional or keyword args as defined for the 671 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 672 pprint.pprint(self.asList(), *args, **kwargs)
673 674 # add support for pickle protocol
675 - def __getstate__(self):
676 return ( self.__toklist, 677 ( self.__tokdict.copy(), 678 self.__parent is not None and self.__parent() or None, 679 self.__accumNames, 680 self.__name ) )
681
682 - def __setstate__(self,state):
683 self.__toklist = state[0] 684 (self.__tokdict, 685 par, 686 inAccumNames, 687 self.__name) = state[1] 688 self.__accumNames = {} 689 self.__accumNames.update(inAccumNames) 690 if par is not None: 691 self.__parent = wkref(par) 692 else: 693 self.__parent = None
694
695 - def __dir__(self):
696 return dir(super(ParseResults,self)) + list(self.keys())
697 698 collections.MutableMapping.register(ParseResults)
699 700 -def col (loc,strg):
701 """Returns current column within a string, counting newlines as line separators. 702 The first column is number 1. 703 704 Note: the default parsing behavior is to expand tabs in the input string 705 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 706 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 707 consistent view of the parsed string, the parse location, and line and column 708 positions within the parsed string. 709 """ 710 s = strg 711 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
712
713 -def lineno(loc,strg):
714 """Returns current line number within a string, counting newlines as line separators. 715 The first line is number 1. 716 717 Note: the default parsing behavior is to expand tabs in the input string 718 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 719 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 720 consistent view of the parsed string, the parse location, and line and column 721 positions within the parsed string. 722 """ 723 return strg.count("\n",0,loc) + 1
724
725 -def line( loc, strg ):
726 """Returns the line of text containing loc within a string, counting newlines as line separators. 727 """ 728 lastCR = strg.rfind("\n", 0, loc) 729 nextCR = strg.find("\n", loc) 730 if nextCR >= 0: 731 return strg[lastCR+1:nextCR] 732 else: 733 return strg[lastCR+1:]
734
735 -def _defaultStartDebugAction( instring, loc, expr ):
736 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
737
738 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
739 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
740
741 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
742 print ("Exception raised:" + _ustr(exc))
743
744 -def nullDebugAction(*args):
745 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 746 pass
747 748 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 749 #~ 'decorator to trim function calls to match the arity of the target' 750 #~ def _trim_arity(func, maxargs=3): 751 #~ if func in singleArgBuiltins: 752 #~ return lambda s,l,t: func(t) 753 #~ limit = 0 754 #~ foundArity = False 755 #~ def wrapper(*args): 756 #~ nonlocal limit,foundArity 757 #~ while 1: 758 #~ try: 759 #~ ret = func(*args[limit:]) 760 #~ foundArity = True 761 #~ return ret 762 #~ except TypeError: 763 #~ if limit == maxargs or foundArity: 764 #~ raise 765 #~ limit += 1 766 #~ continue 767 #~ return wrapper 768 769 # this version is Python 2.x-3.x cross-compatible 770 'decorator to trim function calls to match the arity of the target'
771 -def _trim_arity(func, maxargs=2):
772 if func in singleArgBuiltins: 773 return lambda s,l,t: func(t) 774 limit = [0] 775 foundArity = [False] 776 def wrapper(*args): 777 while 1: 778 try: 779 ret = func(*args[limit[0]:]) 780 foundArity[0] = True 781 return ret 782 except TypeError: 783 if limit[0] <= maxargs and not foundArity[0]: 784 limit[0] += 1 785 continue 786 raise
787 return wrapper 788
789 -class ParserElement(object):
790 """Abstract base level parser element class.""" 791 DEFAULT_WHITE_CHARS = " \n\t\r" 792 verbose_stacktrace = False 793 794 @staticmethod
795 - def setDefaultWhitespaceChars( chars ):
796 """Overrides the default whitespace chars 797 """ 798 ParserElement.DEFAULT_WHITE_CHARS = chars
799 800 @staticmethod
801 - def inlineLiteralsUsing(cls):
802 """ 803 Set class to be used for inclusion of string literals into a parser. 804 """ 805 ParserElement.literalStringClass = cls
806
807 - def __init__( self, savelist=False ):
808 self.parseAction = list() 809 self.failAction = None 810 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 811 self.strRepr = None 812 self.resultsName = None 813 self.saveAsList = savelist 814 self.skipWhitespace = True 815 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 816 self.copyDefaultWhiteChars = True 817 self.mayReturnEmpty = False # used when checking for left-recursion 818 self.keepTabs = False 819 self.ignoreExprs = list() 820 self.debug = False 821 self.streamlined = False 822 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 823 self.errmsg = "" 824 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 825 self.debugActions = ( None, None, None ) #custom debug actions 826 self.re = None 827 self.callPreparse = True # used to avoid redundant calls to preParse 828 self.callDuringTry = False
829
830 - def copy( self ):
831 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 832 for the same parsing pattern, using copies of the original parse element.""" 833 cpy = copy.copy( self ) 834 cpy.parseAction = self.parseAction[:] 835 cpy.ignoreExprs = self.ignoreExprs[:] 836 if self.copyDefaultWhiteChars: 837 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 838 return cpy
839
840 - def setName( self, name ):
841 """Define name for this expression, for use in debugging.""" 842 self.name = name 843 self.errmsg = "Expected " + self.name 844 if hasattr(self,"exception"): 845 self.exception.msg = self.errmsg 846 return self
847
848 - def setResultsName( self, name, listAllMatches=False ):
849 """Define name for referencing matching tokens as a nested attribute 850 of the returned parse results. 851 NOTE: this returns a *copy* of the original C{ParserElement} object; 852 this is so that the client can define a basic element, such as an 853 integer, and reference it in multiple places with different names. 854 855 You can also set results names using the abbreviated syntax, 856 C{expr("name")} in place of C{expr.setResultsName("name")} - 857 see L{I{__call__}<__call__>}. 858 """ 859 newself = self.copy() 860 if name.endswith("*"): 861 name = name[:-1] 862 listAllMatches=True 863 newself.resultsName = name 864 newself.modalResults = not listAllMatches 865 return newself
866
867 - def setBreak(self,breakFlag = True):
868 """Method to invoke the Python pdb debugger when this element is 869 about to be parsed. Set C{breakFlag} to True to enable, False to 870 disable. 871 """ 872 if breakFlag: 873 _parseMethod = self._parse 874 def breaker(instring, loc, doActions=True, callPreParse=True): 875 import pdb 876 pdb.set_trace() 877 return _parseMethod( instring, loc, doActions, callPreParse )
878 breaker._originalParseMethod = _parseMethod 879 self._parse = breaker 880 else: 881 if hasattr(self._parse,"_originalParseMethod"): 882 self._parse = self._parse._originalParseMethod 883 return self
884
885 - def setParseAction( self, *fns, **kwargs ):
886 """Define action to perform when successfully matching parse element definition. 887 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 888 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 889 - s = the original string being parsed (see note below) 890 - loc = the location of the matching substring 891 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 892 If the functions in fns modify the tokens, they can return them as the return 893 value from fn, and the modified list of tokens will replace the original. 894 Otherwise, fn does not need to return any value. 895 896 Note: the default parsing behavior is to expand tabs in the input string 897 before starting the parsing process. See L{I{parseString}<parseString>} for more information 898 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 899 consistent view of the parsed string, the parse location, and line and column 900 positions within the parsed string. 901 """ 902 self.parseAction = list(map(_trim_arity, list(fns))) 903 self.callDuringTry = kwargs.get("callDuringTry", False) 904 return self
905
906 - def addParseAction( self, *fns, **kwargs ):
907 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 908 self.parseAction += list(map(_trim_arity, list(fns))) 909 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 910 return self
911
912 - def addCondition(self, *fns, **kwargs):
913 """Add a boolean predicate function to expression's list of parse actions. See 914 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 915 be used to define a custom message to be used in the raised exception.""" 916 msg = kwargs.get("message") or "failed user-defined condition" 917 for fn in fns: 918 def pa(s,l,t): 919 if not bool(_trim_arity(fn)(s,l,t)): 920 raise ParseException(s,l,msg) 921 return t
922 self.parseAction.append(pa) 923 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 924 return self 925
926 - def setFailAction( self, fn ):
927 """Define action to perform if parsing fails at this expression. 928 Fail acton fn is a callable function that takes the arguments 929 C{fn(s,loc,expr,err)} where: 930 - s = string being parsed 931 - loc = location where expression match was attempted and failed 932 - expr = the parse expression that failed 933 - err = the exception thrown 934 The function returns no value. It may throw C{L{ParseFatalException}} 935 if it is desired to stop parsing immediately.""" 936 self.failAction = fn 937 return self
938
939 - def _skipIgnorables( self, instring, loc ):
940 exprsFound = True 941 while exprsFound: 942 exprsFound = False 943 for e in self.ignoreExprs: 944 try: 945 while 1: 946 loc,dummy = e._parse( instring, loc ) 947 exprsFound = True 948 except ParseException: 949 pass 950 return loc
951
952 - def preParse( self, instring, loc ):
953 if self.ignoreExprs: 954 loc = self._skipIgnorables( instring, loc ) 955 956 if self.skipWhitespace: 957 wt = self.whiteChars 958 instrlen = len(instring) 959 while loc < instrlen and instring[loc] in wt: 960 loc += 1 961 962 return loc
963
964 - def parseImpl( self, instring, loc, doActions=True ):
965 return loc, []
966
967 - def postParse( self, instring, loc, tokenlist ):
968 return tokenlist
969 970 #~ @profile
971 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
972 debugging = ( self.debug ) #and doActions ) 973 974 if debugging or self.failAction: 975 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 976 if (self.debugActions[0] ): 977 self.debugActions[0]( instring, loc, self ) 978 if callPreParse and self.callPreparse: 979 preloc = self.preParse( instring, loc ) 980 else: 981 preloc = loc 982 tokensStart = preloc 983 try: 984 try: 985 loc,tokens = self.parseImpl( instring, preloc, doActions ) 986 except IndexError: 987 raise ParseException( instring, len(instring), self.errmsg, self ) 988 except ParseBaseException as err: 989 #~ print ("Exception raised:", err) 990 if self.debugActions[2]: 991 self.debugActions[2]( instring, tokensStart, self, err ) 992 if self.failAction: 993 self.failAction( instring, tokensStart, self, err ) 994 raise 995 else: 996 if callPreParse and self.callPreparse: 997 preloc = self.preParse( instring, loc ) 998 else: 999 preloc = loc 1000 tokensStart = preloc 1001 if self.mayIndexError or loc >= len(instring): 1002 try: 1003 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1004 except IndexError: 1005 raise ParseException( instring, len(instring), self.errmsg, self ) 1006 else: 1007 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1008 1009 tokens = self.postParse( instring, loc, tokens ) 1010 1011 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1012 if self.parseAction and (doActions or self.callDuringTry): 1013 if debugging: 1014 try: 1015 for fn in self.parseAction: 1016 tokens = fn( instring, tokensStart, retTokens ) 1017 if tokens is not None: 1018 retTokens = ParseResults( tokens, 1019 self.resultsName, 1020 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1021 modal=self.modalResults ) 1022 except ParseBaseException as err: 1023 #~ print "Exception raised in user parse action:", err 1024 if (self.debugActions[2] ): 1025 self.debugActions[2]( instring, tokensStart, self, err ) 1026 raise 1027 else: 1028 for fn in self.parseAction: 1029 tokens = fn( instring, tokensStart, retTokens ) 1030 if tokens is not None: 1031 retTokens = ParseResults( tokens, 1032 self.resultsName, 1033 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1034 modal=self.modalResults ) 1035 1036 if debugging: 1037 #~ print ("Matched",self,"->",retTokens.asList()) 1038 if (self.debugActions[1] ): 1039 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1040 1041 return loc, retTokens
1042
1043 - def tryParse( self, instring, loc ):
1044 try: 1045 return self._parse( instring, loc, doActions=False )[0] 1046 except ParseFatalException: 1047 raise ParseException( instring, loc, self.errmsg, self)
1048 1049 # this method gets repeatedly called during backtracking with the same arguments - 1050 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1051 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1052 lookup = (self,instring,loc,callPreParse,doActions) 1053 if lookup in ParserElement._exprArgCache: 1054 value = ParserElement._exprArgCache[ lookup ] 1055 if isinstance(value, Exception): 1056 raise value 1057 return (value[0],value[1].copy()) 1058 else: 1059 try: 1060 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1061 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1062 return value 1063 except ParseBaseException as pe: 1064 pe.__traceback__ = None 1065 ParserElement._exprArgCache[ lookup ] = pe 1066 raise
1067 1068 _parse = _parseNoCache 1069 1070 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1071 _exprArgCache = {} 1072 @staticmethod
1073 - def resetCache():
1074 ParserElement._exprArgCache.clear()
1075 1076 _packratEnabled = False 1077 @staticmethod
1078 - def enablePackrat():
1079 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1080 Repeated parse attempts at the same string location (which happens 1081 often in many complex grammars) can immediately return a cached value, 1082 instead of re-executing parsing/validating code. Memoizing is done of 1083 both valid results and parsing exceptions. 1084 1085 This speedup may break existing programs that use parse actions that 1086 have side-effects. For this reason, packrat parsing is disabled when 1087 you first import pyparsing. To activate the packrat feature, your 1088 program must call the class method C{ParserElement.enablePackrat()}. If 1089 your program uses C{psyco} to "compile as you go", you must call 1090 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1091 Python will crash. For best results, call C{enablePackrat()} immediately 1092 after importing pyparsing. 1093 """ 1094 if not ParserElement._packratEnabled: 1095 ParserElement._packratEnabled = True 1096 ParserElement._parse = ParserElement._parseCache
1097
1098 - def parseString( self, instring, parseAll=False ):
1099 """Execute the parse expression with the given string. 1100 This is the main interface to the client code, once the complete 1101 expression has been built. 1102 1103 If you want the grammar to require that the entire input string be 1104 successfully parsed, then set C{parseAll} to True (equivalent to ending 1105 the grammar with C{L{StringEnd()}}). 1106 1107 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1108 in order to report proper column numbers in parse actions. 1109 If the input string contains tabs and 1110 the grammar uses parse actions that use the C{loc} argument to index into the 1111 string being parsed, you can ensure you have a consistent view of the input 1112 string by: 1113 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1114 (see L{I{parseWithTabs}<parseWithTabs>}) 1115 - define your parse action using the full C{(s,loc,toks)} signature, and 1116 reference the input string using the parse action's C{s} argument 1117 - explictly expand the tabs in your input string before calling 1118 C{parseString} 1119 """ 1120 ParserElement.resetCache() 1121 if not self.streamlined: 1122 self.streamline() 1123 #~ self.saveAsList = True 1124 for e in self.ignoreExprs: 1125 e.streamline() 1126 if not self.keepTabs: 1127 instring = instring.expandtabs() 1128 try: 1129 loc, tokens = self._parse( instring, 0 ) 1130 if parseAll: 1131 loc = self.preParse( instring, loc ) 1132 se = Empty() + StringEnd() 1133 se._parse( instring, loc ) 1134 except ParseBaseException as exc: 1135 if ParserElement.verbose_stacktrace: 1136 raise 1137 else: 1138 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1139 raise exc 1140 else: 1141 return tokens
1142
1143 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1144 """Scan the input string for expression matches. Each match will return the 1145 matching tokens, start location, and end location. May be called with optional 1146 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1147 C{overlap} is specified, then overlapping matches will be reported. 1148 1149 Note that the start and end locations are reported relative to the string 1150 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1151 strings with embedded tabs.""" 1152 if not self.streamlined: 1153 self.streamline() 1154 for e in self.ignoreExprs: 1155 e.streamline() 1156 1157 if not self.keepTabs: 1158 instring = _ustr(instring).expandtabs() 1159 instrlen = len(instring) 1160 loc = 0 1161 preparseFn = self.preParse 1162 parseFn = self._parse 1163 ParserElement.resetCache() 1164 matches = 0 1165 try: 1166 while loc <= instrlen and matches < maxMatches: 1167 try: 1168 preloc = preparseFn( instring, loc ) 1169 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1170 except ParseException: 1171 loc = preloc+1 1172 else: 1173 if nextLoc > loc: 1174 matches += 1 1175 yield tokens, preloc, nextLoc 1176 if overlap: 1177 nextloc = preparseFn( instring, loc ) 1178 if nextloc > loc: 1179 loc = nextLoc 1180 else: 1181 loc += 1 1182 else: 1183 loc = nextLoc 1184 else: 1185 loc = preloc+1 1186 except ParseBaseException as exc: 1187 if ParserElement.verbose_stacktrace: 1188 raise 1189 else: 1190 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1191 raise exc
1192
1193 - def transformString( self, instring ):
1194 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1195 be returned from a parse action. To use C{transformString}, define a grammar and 1196 attach a parse action to it that modifies the returned token list. 1197 Invoking C{transformString()} on a target string will then scan for matches, 1198 and replace the matched text patterns according to the logic in the parse 1199 action. C{transformString()} returns the resulting transformed string.""" 1200 out = [] 1201 lastE = 0 1202 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1203 # keep string locs straight between transformString and scanString 1204 self.keepTabs = True 1205 try: 1206 for t,s,e in self.scanString( instring ): 1207 out.append( instring[lastE:s] ) 1208 if t: 1209 if isinstance(t,ParseResults): 1210 out += t.asList() 1211 elif isinstance(t,list): 1212 out += t 1213 else: 1214 out.append(t) 1215 lastE = e 1216 out.append(instring[lastE:]) 1217 out = [o for o in out if o] 1218 return "".join(map(_ustr,_flatten(out))) 1219 except ParseBaseException as exc: 1220 if ParserElement.verbose_stacktrace: 1221 raise 1222 else: 1223 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1224 raise exc
1225
1226 - def searchString( self, instring, maxMatches=_MAX_INT ):
1227 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1228 to match the given parse expression. May be called with optional 1229 C{maxMatches} argument, to clip searching after 'n' matches are found. 1230 """ 1231 try: 1232 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1233 except ParseBaseException as exc: 1234 if ParserElement.verbose_stacktrace: 1235 raise 1236 else: 1237 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1238 raise exc
1239
1240 - def __add__(self, other ):
1241 """Implementation of + operator - returns C{L{And}}""" 1242 if isinstance( other, basestring ): 1243 other = ParserElement.literalStringClass( other ) 1244 if not isinstance( other, ParserElement ): 1245 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1246 SyntaxWarning, stacklevel=2) 1247 return None 1248 return And( [ self, other ] )
1249
1250 - def __radd__(self, other ):
1251 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1252 if isinstance( other, basestring ): 1253 other = ParserElement.literalStringClass( other ) 1254 if not isinstance( other, ParserElement ): 1255 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1256 SyntaxWarning, stacklevel=2) 1257 return None 1258 return other + self
1259
1260 - def __sub__(self, other):
1261 """Implementation of - operator, returns C{L{And}} with error stop""" 1262 if isinstance( other, basestring ): 1263 other = ParserElement.literalStringClass( other ) 1264 if not isinstance( other, ParserElement ): 1265 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1266 SyntaxWarning, stacklevel=2) 1267 return None 1268 return And( [ self, And._ErrorStop(), other ] )
1269
1270 - def __rsub__(self, other ):
1271 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1272 if isinstance( other, basestring ): 1273 other = ParserElement.literalStringClass( other ) 1274 if not isinstance( other, ParserElement ): 1275 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1276 SyntaxWarning, stacklevel=2) 1277 return None 1278 return other - self
1279
1280 - def __mul__(self,other):
1281 """Implementation of * operator, allows use of C{expr * 3} in place of 1282 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1283 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1284 may also include C{None} as in: 1285 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1286 to C{expr*n + L{ZeroOrMore}(expr)} 1287 (read as "at least n instances of C{expr}") 1288 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1289 (read as "0 to n instances of C{expr}") 1290 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1291 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1292 1293 Note that C{expr*(None,n)} does not raise an exception if 1294 more than n exprs exist in the input stream; that is, 1295 C{expr*(None,n)} does not enforce a maximum number of expr 1296 occurrences. If this behavior is desired, then write 1297 C{expr*(None,n) + ~expr} 1298 1299 """ 1300 if isinstance(other,int): 1301 minElements, optElements = other,0 1302 elif isinstance(other,tuple): 1303 other = (other + (None, None))[:2] 1304 if other[0] is None: 1305 other = (0, other[1]) 1306 if isinstance(other[0],int) and other[1] is None: 1307 if other[0] == 0: 1308 return ZeroOrMore(self) 1309 if other[0] == 1: 1310 return OneOrMore(self) 1311 else: 1312 return self*other[0] + ZeroOrMore(self) 1313 elif isinstance(other[0],int) and isinstance(other[1],int): 1314 minElements, optElements = other 1315 optElements -= minElements 1316 else: 1317 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1318 else: 1319 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1320 1321 if minElements < 0: 1322 raise ValueError("cannot multiply ParserElement by negative value") 1323 if optElements < 0: 1324 raise ValueError("second tuple value must be greater or equal to first tuple value") 1325 if minElements == optElements == 0: 1326 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1327 1328 if (optElements): 1329 def makeOptionalList(n): 1330 if n>1: 1331 return Optional(self + makeOptionalList(n-1)) 1332 else: 1333 return Optional(self)
1334 if minElements: 1335 if minElements == 1: 1336 ret = self + makeOptionalList(optElements) 1337 else: 1338 ret = And([self]*minElements) + makeOptionalList(optElements) 1339 else: 1340 ret = makeOptionalList(optElements) 1341 else: 1342 if minElements == 1: 1343 ret = self 1344 else: 1345 ret = And([self]*minElements) 1346 return ret 1347
1348 - def __rmul__(self, other):
1349 return self.__mul__(other)
1350
1351 - def __or__(self, other ):
1352 """Implementation of | operator - returns C{L{MatchFirst}}""" 1353 if isinstance( other, basestring ): 1354 other = ParserElement.literalStringClass( other ) 1355 if not isinstance( other, ParserElement ): 1356 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1357 SyntaxWarning, stacklevel=2) 1358 return None 1359 return MatchFirst( [ self, other ] )
1360
1361 - def __ror__(self, other ):
1362 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1363 if isinstance( other, basestring ): 1364 other = ParserElement.literalStringClass( other ) 1365 if not isinstance( other, ParserElement ): 1366 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1367 SyntaxWarning, stacklevel=2) 1368 return None 1369 return other | self
1370
1371 - def __xor__(self, other ):
1372 """Implementation of ^ operator - returns C{L{Or}}""" 1373 if isinstance( other, basestring ): 1374 other = ParserElement.literalStringClass( other ) 1375 if not isinstance( other, ParserElement ): 1376 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1377 SyntaxWarning, stacklevel=2) 1378 return None 1379 return Or( [ self, other ] )
1380
1381 - def __rxor__(self, other ):
1382 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1383 if isinstance( other, basestring ): 1384 other = ParserElement.literalStringClass( other ) 1385 if not isinstance( other, ParserElement ): 1386 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1387 SyntaxWarning, stacklevel=2) 1388 return None 1389 return other ^ self
1390
1391 - def __and__(self, other ):
1392 """Implementation of & operator - returns C{L{Each}}""" 1393 if isinstance( other, basestring ): 1394 other = ParserElement.literalStringClass( other ) 1395 if not isinstance( other, ParserElement ): 1396 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1397 SyntaxWarning, stacklevel=2) 1398 return None 1399 return Each( [ self, other ] )
1400
1401 - def __rand__(self, other ):
1402 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1403 if isinstance( other, basestring ): 1404 other = ParserElement.literalStringClass( other ) 1405 if not isinstance( other, ParserElement ): 1406 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1407 SyntaxWarning, stacklevel=2) 1408 return None 1409 return other & self
1410
1411 - def __invert__( self ):
1412 """Implementation of ~ operator - returns C{L{NotAny}}""" 1413 return NotAny( self )
1414
1415 - def __call__(self, name=None):
1416 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1417 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1418 could be written as:: 1419 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1420 1421 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1422 passed as C{True}. 1423 1424 If C{name} is omitted, same as calling C{L{copy}}. 1425 """ 1426 if name is not None: 1427 return self.setResultsName(name) 1428 else: 1429 return self.copy()
1430
1431 - def suppress( self ):
1432 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1433 cluttering up returned output. 1434 """ 1435 return Suppress( self )
1436
1437 - def leaveWhitespace( self ):
1438 """Disables the skipping of whitespace before matching the characters in the 1439 C{ParserElement}'s defined pattern. This is normally only used internally by 1440 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1441 """ 1442 self.skipWhitespace = False 1443 return self
1444
1445 - def setWhitespaceChars( self, chars ):
1446 """Overrides the default whitespace chars 1447 """ 1448 self.skipWhitespace = True 1449 self.whiteChars = chars 1450 self.copyDefaultWhiteChars = False 1451 return self
1452
1453 - def parseWithTabs( self ):
1454 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1455 Must be called before C{parseString} when the input grammar contains elements that 1456 match C{<TAB>} characters.""" 1457 self.keepTabs = True 1458 return self
1459
1460 - def ignore( self, other ):
1461 """Define expression to be ignored (e.g., comments) while doing pattern 1462 matching; may be called repeatedly, to define multiple comment or other 1463 ignorable patterns. 1464 """ 1465 if isinstance( other, Suppress ): 1466 if other not in self.ignoreExprs: 1467 self.ignoreExprs.append( other.copy() ) 1468 else: 1469 self.ignoreExprs.append( Suppress( other.copy() ) ) 1470 return self
1471
1472 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1473 """Enable display of debugging messages while doing pattern matching.""" 1474 self.debugActions = (startAction or _defaultStartDebugAction, 1475 successAction or _defaultSuccessDebugAction, 1476 exceptionAction or _defaultExceptionDebugAction) 1477 self.debug = True 1478 return self
1479
1480 - def setDebug( self, flag=True ):
1481 """Enable display of debugging messages while doing pattern matching. 1482 Set C{flag} to True to enable, False to disable.""" 1483 if flag: 1484 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1485 else: 1486 self.debug = False 1487 return self
1488
1489 - def __str__( self ):
1490 return self.name
1491
1492 - def __repr__( self ):
1493 return _ustr(self)
1494
1495 - def streamline( self ):
1496 self.streamlined = True 1497 self.strRepr = None 1498 return self
1499
1500 - def checkRecursion( self, parseElementList ):
1501 pass
1502
1503 - def validate( self, validateTrace=[] ):
1504 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1505 self.checkRecursion( [] )
1506
1507 - def parseFile( self, file_or_filename, parseAll=False ):
1508 """Execute the parse expression on the given file or filename. 1509 If a filename is specified (instead of a file object), 1510 the entire file is opened, read, and closed before parsing. 1511 """ 1512 try: 1513 file_contents = file_or_filename.read() 1514 except AttributeError: 1515 f = open(file_or_filename, "r") 1516 file_contents = f.read() 1517 f.close() 1518 try: 1519 return self.parseString(file_contents, parseAll) 1520 except ParseBaseException as exc: 1521 if ParserElement.verbose_stacktrace: 1522 raise 1523 else: 1524 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1525 raise exc
1526
1527 - def __eq__(self,other):
1528 if isinstance(other, ParserElement): 1529 return self is other or self.__dict__ == other.__dict__ 1530 elif isinstance(other, basestring): 1531 try: 1532 self.parseString(_ustr(other), parseAll=True) 1533 return True 1534 except ParseBaseException: 1535 return False 1536 else: 1537 return super(ParserElement,self)==other
1538
1539 - def __ne__(self,other):
1540 return not (self == other)
1541
1542 - def __hash__(self):
1543 return hash(id(self))
1544
1545 - def __req__(self,other):
1546 return self == other
1547
1548 - def __rne__(self,other):
1549 return not (self == other)
1550
1551 - def runTests(self, tests):
1552 """Execute the parse expression on a series of test strings, showing each 1553 test, the parsed results or where the parse failed. Quick and easy way to 1554 run a parse expression against a list of sample strings. 1555 """ 1556 for t in tests: 1557 print t 1558 try: 1559 print self.parseString(t).dump() 1560 except ParseException as pe: 1561 if '\n' in t: 1562 print line(pe.loc, t) 1563 print ' '*(col(pe.loc,t)-1) + '^' 1564 else: 1565 print ' '*pe.loc + '^' 1566 print pe 1567 print
1568
1569 1570 -class Token(ParserElement):
1571 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1572 - def __init__( self ):
1573 super(Token,self).__init__( savelist=False )
1574
1575 1576 -class Empty(Token):
1577 """An empty token, will always match."""
1578 - def __init__( self ):
1579 super(Empty,self).__init__() 1580 self.name = "Empty" 1581 self.mayReturnEmpty = True 1582 self.mayIndexError = False
1583
1584 1585 -class NoMatch(Token):
1586 """A token that will never match."""
1587 - def __init__( self ):
1588 super(NoMatch,self).__init__() 1589 self.name = "NoMatch" 1590 self.mayReturnEmpty = True 1591 self.mayIndexError = False 1592 self.errmsg = "Unmatchable token"
1593
1594 - def parseImpl( self, instring, loc, doActions=True ):
1595 raise ParseException(instring, loc, self.errmsg, self)
1596
1597 1598 -class Literal(Token):
1599 """Token to exactly match a specified string."""
1600 - def __init__( self, matchString ):
1601 super(Literal,self).__init__() 1602 self.match = matchString 1603 self.matchLen = len(matchString) 1604 try: 1605 self.firstMatchChar = matchString[0] 1606 except IndexError: 1607 warnings.warn("null string passed to Literal; use Empty() instead", 1608 SyntaxWarning, stacklevel=2) 1609 self.__class__ = Empty 1610 self.name = '"%s"' % _ustr(self.match) 1611 self.errmsg = "Expected " + self.name 1612 self.mayReturnEmpty = False 1613 self.mayIndexError = False
1614 1615 # Performance tuning: this routine gets called a *lot* 1616 # if this is a single character match string and the first character matches, 1617 # short-circuit as quickly as possible, and avoid calling startswith 1618 #~ @profile
1619 - def parseImpl( self, instring, loc, doActions=True ):
1620 if (instring[loc] == self.firstMatchChar and 1621 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1622 return loc+self.matchLen, self.match 1623 raise ParseException(instring, loc, self.errmsg, self)
1624 _L = Literal 1625 ParserElement.literalStringClass = Literal
1626 1627 -class Keyword(Token):
1628 """Token to exactly match a specified string as a keyword, that is, it must be 1629 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1630 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1631 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1632 Accepts two optional constructor arguments in addition to the keyword string: 1633 C{identChars} is a string of characters that would be valid identifier characters, 1634 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1635 matching, default is C{False}. 1636 """ 1637 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1638
1639 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1640 super(Keyword,self).__init__() 1641 self.match = matchString 1642 self.matchLen = len(matchString) 1643 try: 1644 self.firstMatchChar = matchString[0] 1645 except IndexError: 1646 warnings.warn("null string passed to Keyword; use Empty() instead", 1647 SyntaxWarning, stacklevel=2) 1648 self.name = '"%s"' % self.match 1649 self.errmsg = "Expected " + self.name 1650 self.mayReturnEmpty = False 1651 self.mayIndexError = False 1652 self.caseless = caseless 1653 if caseless: 1654 self.caselessmatch = matchString.upper() 1655 identChars = identChars.upper() 1656 self.identChars = set(identChars)
1657
1658 - def parseImpl( self, instring, loc, doActions=True ):
1659 if self.caseless: 1660 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1661 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1662 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1663 return loc+self.matchLen, self.match 1664 else: 1665 if (instring[loc] == self.firstMatchChar and 1666 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1667 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1668 (loc == 0 or instring[loc-1] not in self.identChars) ): 1669 return loc+self.matchLen, self.match 1670 raise ParseException(instring, loc, self.errmsg, self)
1671
1672 - def copy(self):
1673 c = super(Keyword,self).copy() 1674 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1675 return c
1676 1677 @staticmethod
1678 - def setDefaultKeywordChars( chars ):
1679 """Overrides the default Keyword chars 1680 """ 1681 Keyword.DEFAULT_KEYWORD_CHARS = chars
1682
1683 -class CaselessLiteral(Literal):
1684 """Token to match a specified string, ignoring case of letters. 1685 Note: the matched results will always be in the case of the given 1686 match string, NOT the case of the input text. 1687 """
1688 - def __init__( self, matchString ):
1689 super(CaselessLiteral,self).__init__( matchString.upper() ) 1690 # Preserve the defining literal. 1691 self.returnString = matchString 1692 self.name = "'%s'" % self.returnString 1693 self.errmsg = "Expected " + self.name
1694
1695 - def parseImpl( self, instring, loc, doActions=True ):
1696 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1697 return loc+self.matchLen, self.returnString 1698 raise ParseException(instring, loc, self.errmsg, self)
1699
1700 -class CaselessKeyword(Keyword):
1701 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1702 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1703
1704 - def parseImpl( self, instring, loc, doActions=True ):
1705 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1706 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1707 return loc+self.matchLen, self.match 1708 raise ParseException(instring, loc, self.errmsg, self)
1709
1710 -class Word(Token):
1711 """Token for matching words composed of allowed character sets. 1712 Defined with string containing all allowed initial characters, 1713 an optional string containing allowed body characters (if omitted, 1714 defaults to the initial character set), and an optional minimum, 1715 maximum, and/or exact length. The default value for C{min} is 1 (a 1716 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1717 are 0, meaning no maximum or exact length restriction. An optional 1718 C{exclude} parameter can list characters that might be found in 1719 the input C{bodyChars} string; useful to define a word of all printables 1720 except for one or two characters, for instance. 1721 """
1722 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1723 super(Word,self).__init__() 1724 if excludeChars: 1725 initChars = ''.join(c for c in initChars if c not in excludeChars) 1726 if bodyChars: 1727 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1728 self.initCharsOrig = initChars 1729 self.initChars = set(initChars) 1730 if bodyChars : 1731 self.bodyCharsOrig = bodyChars 1732 self.bodyChars = set(bodyChars) 1733 else: 1734 self.bodyCharsOrig = initChars 1735 self.bodyChars = set(initChars) 1736 1737 self.maxSpecified = max > 0 1738 1739 if min < 1: 1740 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1741 1742 self.minLen = min 1743 1744 if max > 0: 1745 self.maxLen = max 1746 else: 1747 self.maxLen = _MAX_INT 1748 1749 if exact > 0: 1750 self.maxLen = exact 1751 self.minLen = exact 1752 1753 self.name = _ustr(self) 1754 self.errmsg = "Expected " + self.name 1755 self.mayIndexError = False 1756 self.asKeyword = asKeyword 1757 1758 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1759 if self.bodyCharsOrig == self.initCharsOrig: 1760 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1761 elif len(self.initCharsOrig) == 1: 1762 self.reString = "%s[%s]*" % \ 1763 (re.escape(self.initCharsOrig), 1764 _escapeRegexRangeChars(self.bodyCharsOrig),) 1765 else: 1766 self.reString = "[%s][%s]*" % \ 1767 (_escapeRegexRangeChars(self.initCharsOrig), 1768 _escapeRegexRangeChars(self.bodyCharsOrig),) 1769 if self.asKeyword: 1770 self.reString = r"\b"+self.reString+r"\b" 1771 try: 1772 self.re = re.compile( self.reString ) 1773 except: 1774 self.re = None
1775
1776 - def parseImpl( self, instring, loc, doActions=True ):
1777 if self.re: 1778 result = self.re.match(instring,loc) 1779 if not result: 1780 raise ParseException(instring, loc, self.errmsg, self) 1781 1782 loc = result.end() 1783 return loc, result.group() 1784 1785 if not(instring[ loc ] in self.initChars): 1786 raise ParseException(instring, loc, self.errmsg, self) 1787 1788 start = loc 1789 loc += 1 1790 instrlen = len(instring) 1791 bodychars = self.bodyChars 1792 maxloc = start + self.maxLen 1793 maxloc = min( maxloc, instrlen ) 1794 while loc < maxloc and instring[loc] in bodychars: 1795 loc += 1 1796 1797 throwException = False 1798 if loc - start < self.minLen: 1799 throwException = True 1800 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1801 throwException = True 1802 if self.asKeyword: 1803 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1804 throwException = True 1805 1806 if throwException: 1807 raise ParseException(instring, loc, self.errmsg, self) 1808 1809 return loc, instring[start:loc]
1810
1811 - def __str__( self ):
1812 try: 1813 return super(Word,self).__str__() 1814 except: 1815 pass 1816 1817 1818 if self.strRepr is None: 1819 1820 def charsAsStr(s): 1821 if len(s)>4: 1822 return s[:4]+"..." 1823 else: 1824 return s
1825 1826 if ( self.initCharsOrig != self.bodyCharsOrig ): 1827 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1828 else: 1829 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1830 1831 return self.strRepr
1832
1833 1834 -class Regex(Token):
1835 """Token for matching strings that match a given regular expression. 1836 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1837 """ 1838 compiledREtype = type(re.compile("[A-Z]"))
1839 - def __init__( self, pattern, flags=0):
1840 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1841 super(Regex,self).__init__() 1842 1843 if isinstance(pattern, basestring): 1844 if len(pattern) == 0: 1845 warnings.warn("null string passed to Regex; use Empty() instead", 1846 SyntaxWarning, stacklevel=2) 1847 1848 self.pattern = pattern 1849 self.flags = flags 1850 1851 try: 1852 self.re = re.compile(self.pattern, self.flags) 1853 self.reString = self.pattern 1854 except sre_constants.error: 1855 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1856 SyntaxWarning, stacklevel=2) 1857 raise 1858 1859 elif isinstance(pattern, Regex.compiledREtype): 1860 self.re = pattern 1861 self.pattern = \ 1862 self.reString = str(pattern) 1863 self.flags = flags 1864 1865 else: 1866 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1867 1868 self.name = _ustr(self) 1869 self.errmsg = "Expected " + self.name 1870 self.mayIndexError = False 1871 self.mayReturnEmpty = True
1872
1873 - def parseImpl( self, instring, loc, doActions=True ):
1874 result = self.re.match(instring,loc) 1875 if not result: 1876 raise ParseException(instring, loc, self.errmsg, self) 1877 1878 loc = result.end() 1879 d = result.groupdict() 1880 ret = ParseResults(result.group()) 1881 if d: 1882 for k in d: 1883 ret[k] = d[k] 1884 return loc,ret
1885
1886 - def __str__( self ):
1887 try: 1888 return super(Regex,self).__str__() 1889 except: 1890 pass 1891 1892 if self.strRepr is None: 1893 self.strRepr = "Re:(%s)" % repr(self.pattern) 1894 1895 return self.strRepr
1896
1897 1898 -class QuotedString(Token):
1899 """Token for matching strings that are delimited by quoting characters. 1900 """
1901 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1902 """ 1903 Defined with the following parameters: 1904 - quoteChar - string of one or more characters defining the quote delimiting string 1905 - escChar - character to escape quotes, typically backslash (default=None) 1906 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1907 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1908 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1909 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1910 """ 1911 super(QuotedString,self).__init__() 1912 1913 # remove white space from quote chars - wont work anyway 1914 quoteChar = quoteChar.strip() 1915 if len(quoteChar) == 0: 1916 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1917 raise SyntaxError() 1918 1919 if endQuoteChar is None: 1920 endQuoteChar = quoteChar 1921 else: 1922 endQuoteChar = endQuoteChar.strip() 1923 if len(endQuoteChar) == 0: 1924 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1925 raise SyntaxError() 1926 1927 self.quoteChar = quoteChar 1928 self.quoteCharLen = len(quoteChar) 1929 self.firstQuoteChar = quoteChar[0] 1930 self.endQuoteChar = endQuoteChar 1931 self.endQuoteCharLen = len(endQuoteChar) 1932 self.escChar = escChar 1933 self.escQuote = escQuote 1934 self.unquoteResults = unquoteResults 1935 1936 if multiline: 1937 self.flags = re.MULTILINE | re.DOTALL 1938 self.pattern = r'%s(?:[^%s%s]' % \ 1939 ( re.escape(self.quoteChar), 1940 _escapeRegexRangeChars(self.endQuoteChar[0]), 1941 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1942 else: 1943 self.flags = 0 1944 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1945 ( re.escape(self.quoteChar), 1946 _escapeRegexRangeChars(self.endQuoteChar[0]), 1947 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1948 if len(self.endQuoteChar) > 1: 1949 self.pattern += ( 1950 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1951 _escapeRegexRangeChars(self.endQuoteChar[i])) 1952 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1953 ) 1954 if escQuote: 1955 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1956 if escChar: 1957 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1958 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1959 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1960 1961 try: 1962 self.re = re.compile(self.pattern, self.flags) 1963 self.reString = self.pattern 1964 except sre_constants.error: 1965 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1966 SyntaxWarning, stacklevel=2) 1967 raise 1968 1969 self.name = _ustr(self) 1970 self.errmsg = "Expected " + self.name 1971 self.mayIndexError = False 1972 self.mayReturnEmpty = True
1973
1974 - def parseImpl( self, instring, loc, doActions=True ):
1975 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1976 if not result: 1977 raise ParseException(instring, loc, self.errmsg, self) 1978 1979 loc = result.end() 1980 ret = result.group() 1981 1982 if self.unquoteResults: 1983 1984 # strip off quotes 1985 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1986 1987 if isinstance(ret,basestring): 1988 # replace escaped characters 1989 if self.escChar: 1990 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1991 1992 # replace escaped quotes 1993 if self.escQuote: 1994 ret = ret.replace(self.escQuote, self.endQuoteChar) 1995 1996 return loc, ret
1997
1998 - def __str__( self ):
1999 try: 2000 return super(QuotedString,self).__str__() 2001 except: 2002 pass 2003 2004 if self.strRepr is None: 2005 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2006 2007 return self.strRepr
2008
2009 2010 -class CharsNotIn(Token):
2011 """Token for matching words composed of characters *not* in a given set. 2012 Defined with string containing all disallowed characters, and an optional 2013 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2014 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2015 are 0, meaning no maximum or exact length restriction. 2016 """
2017 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2018 super(CharsNotIn,self).__init__() 2019 self.skipWhitespace = False 2020 self.notChars = notChars 2021 2022 if min < 1: 2023 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2024 2025 self.minLen = min 2026 2027 if max > 0: 2028 self.maxLen = max 2029 else: 2030 self.maxLen = _MAX_INT 2031 2032 if exact > 0: 2033 self.maxLen = exact 2034 self.minLen = exact 2035 2036 self.name = _ustr(self) 2037 self.errmsg = "Expected " + self.name 2038 self.mayReturnEmpty = ( self.minLen == 0 ) 2039 self.mayIndexError = False
2040
2041 - def parseImpl( self, instring, loc, doActions=True ):
2042 if instring[loc] in self.notChars: 2043 raise ParseException(instring, loc, self.errmsg, self) 2044 2045 start = loc 2046 loc += 1 2047 notchars = self.notChars 2048 maxlen = min( start+self.maxLen, len(instring) ) 2049 while loc < maxlen and \ 2050 (instring[loc] not in notchars): 2051 loc += 1 2052 2053 if loc - start < self.minLen: 2054 raise ParseException(instring, loc, self.errmsg, self) 2055 2056 return loc, instring[start:loc]
2057
2058 - def __str__( self ):
2059 try: 2060 return super(CharsNotIn, self).__str__() 2061 except: 2062 pass 2063 2064 if self.strRepr is None: 2065 if len(self.notChars) > 4: 2066 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2067 else: 2068 self.strRepr = "!W:(%s)" % self.notChars 2069 2070 return self.strRepr
2071
2072 -class White(Token):
2073 """Special matching class for matching whitespace. Normally, whitespace is ignored 2074 by pyparsing grammars. This class is included when some whitespace structures 2075 are significant. Define with a string containing the whitespace characters to be 2076 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2077 as defined for the C{L{Word}} class.""" 2078 whiteStrs = { 2079 " " : "<SPC>", 2080 "\t": "<TAB>", 2081 "\n": "<LF>", 2082 "\r": "<CR>", 2083 "\f": "<FF>", 2084 }
2085 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2086 super(White,self).__init__() 2087 self.matchWhite = ws 2088 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2089 #~ self.leaveWhitespace() 2090 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2091 self.mayReturnEmpty = True 2092 self.errmsg = "Expected " + self.name 2093 2094 self.minLen = min 2095 2096 if max > 0: 2097 self.maxLen = max 2098 else: 2099 self.maxLen = _MAX_INT 2100 2101 if exact > 0: 2102 self.maxLen = exact 2103 self.minLen = exact
2104
2105 - def parseImpl( self, instring, loc, doActions=True ):
2106 if not(instring[ loc ] in self.matchWhite): 2107 raise ParseException(instring, loc, self.errmsg, self) 2108 start = loc 2109 loc += 1 2110 maxloc = start + self.maxLen 2111 maxloc = min( maxloc, len(instring) ) 2112 while loc < maxloc and instring[loc] in self.matchWhite: 2113 loc += 1 2114 2115 if loc - start < self.minLen: 2116 raise ParseException(instring, loc, self.errmsg, self) 2117 2118 return loc, instring[start:loc]
2119
2120 2121 -class _PositionToken(Token):
2122 - def __init__( self ):
2123 super(_PositionToken,self).__init__() 2124 self.name=self.__class__.__name__ 2125 self.mayReturnEmpty = True 2126 self.mayIndexError = False
2127
2128 -class GoToColumn(_PositionToken):
2129 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2130 - def __init__( self, colno ):
2131 super(GoToColumn,self).__init__() 2132 self.col = colno
2133
2134 - def preParse( self, instring, loc ):
2135 if col(loc,instring) != self.col: 2136 instrlen = len(instring) 2137 if self.ignoreExprs: 2138 loc = self._skipIgnorables( instring, loc ) 2139 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2140 loc += 1 2141 return loc
2142
2143 - def parseImpl( self, instring, loc, doActions=True ):
2144 thiscol = col( loc, instring ) 2145 if thiscol > self.col: 2146 raise ParseException( instring, loc, "Text not in expected column", self ) 2147 newloc = loc + self.col - thiscol 2148 ret = instring[ loc: newloc ] 2149 return newloc, ret
2150
2151 -class LineStart(_PositionToken):
2152 """Matches if current position is at the beginning of a line within the parse string"""
2153 - def __init__( self ):
2154 super(LineStart,self).__init__() 2155 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2156 self.errmsg = "Expected start of line"
2157
2158 - def preParse( self, instring, loc ):
2159 preloc = super(LineStart,self).preParse(instring,loc) 2160 if instring[preloc] == "\n": 2161 loc += 1 2162 return loc
2163
2164 - def parseImpl( self, instring, loc, doActions=True ):
2165 if not( loc==0 or 2166 (loc == self.preParse( instring, 0 )) or 2167 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2168 raise ParseException(instring, loc, self.errmsg, self) 2169 return loc, []
2170
2171 -class LineEnd(_PositionToken):
2172 """Matches if current position is at the end of a line within the parse string"""
2173 - def __init__( self ):
2174 super(LineEnd,self).__init__() 2175 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2176 self.errmsg = "Expected end of line"
2177
2178 - def parseImpl( self, instring, loc, doActions=True ):
2179 if loc<len(instring): 2180 if instring[loc] == "\n": 2181 return loc+1, "\n" 2182 else: 2183 raise ParseException(instring, loc, self.errmsg, self) 2184 elif loc == len(instring): 2185 return loc+1, [] 2186 else: 2187 raise ParseException(instring, loc, self.errmsg, self)
2188
2189 -class StringStart(_PositionToken):
2190 """Matches if current position is at the beginning of the parse string"""
2191 - def __init__( self ):
2192 super(StringStart,self).__init__() 2193 self.errmsg = "Expected start of text"
2194
2195 - def parseImpl( self, instring, loc, doActions=True ):
2196 if loc != 0: 2197 # see if entire string up to here is just whitespace and ignoreables 2198 if loc != self.preParse( instring, 0 ): 2199 raise ParseException(instring, loc, self.errmsg, self) 2200 return loc, []
2201
2202 -class StringEnd(_PositionToken):
2203 """Matches if current position is at the end of the parse string"""
2204 - def __init__( self ):
2205 super(StringEnd,self).__init__() 2206 self.errmsg = "Expected end of text"
2207
2208 - def parseImpl( self, instring, loc, doActions=True ):
2209 if loc < len(instring): 2210 raise ParseException(instring, loc, self.errmsg, self) 2211 elif loc == len(instring): 2212 return loc+1, [] 2213 elif loc > len(instring): 2214 return loc, [] 2215 else: 2216 raise ParseException(instring, loc, self.errmsg, self)
2217
2218 -class WordStart(_PositionToken):
2219 """Matches if the current position is at the beginning of a Word, and 2220 is not preceded by any character in a given set of C{wordChars} 2221 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2222 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2223 the string being parsed, or at the beginning of a line. 2224 """
2225 - def __init__(self, wordChars = printables):
2226 super(WordStart,self).__init__() 2227 self.wordChars = set(wordChars) 2228 self.errmsg = "Not at the start of a word"
2229
2230 - def parseImpl(self, instring, loc, doActions=True ):
2231 if loc != 0: 2232 if (instring[loc-1] in self.wordChars or 2233 instring[loc] not in self.wordChars): 2234 raise ParseException(instring, loc, self.errmsg, self) 2235 return loc, []
2236
2237 -class WordEnd(_PositionToken):
2238 """Matches if the current position is at the end of a Word, and 2239 is not followed by any character in a given set of C{wordChars} 2240 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2241 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2242 the string being parsed, or at the end of a line. 2243 """
2244 - def __init__(self, wordChars = printables):
2245 super(WordEnd,self).__init__() 2246 self.wordChars = set(wordChars) 2247 self.skipWhitespace = False 2248 self.errmsg = "Not at the end of a word"
2249
2250 - def parseImpl(self, instring, loc, doActions=True ):
2251 instrlen = len(instring) 2252 if instrlen>0 and loc<instrlen: 2253 if (instring[loc] in self.wordChars or 2254 instring[loc-1] not in self.wordChars): 2255 raise ParseException(instring, loc, self.errmsg, self) 2256 return loc, []
2257
2258 2259 -class ParseExpression(ParserElement):
2260 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2261 - def __init__( self, exprs, savelist = False ):
2262 super(ParseExpression,self).__init__(savelist) 2263 if isinstance( exprs, _generatorType ): 2264 exprs = list(exprs) 2265 2266 if isinstance( exprs, basestring ): 2267 self.exprs = [ Literal( exprs ) ] 2268 elif isinstance( exprs, collections.Sequence ): 2269 # if sequence of strings provided, wrap with Literal 2270 if all(isinstance(expr, basestring) for expr in exprs): 2271 exprs = map(Literal, exprs) 2272 self.exprs = list(exprs) 2273 else: 2274 try: 2275 self.exprs = list( exprs ) 2276 except TypeError: 2277 self.exprs = [ exprs ] 2278 self.callPreparse = False
2279
2280 - def __getitem__( self, i ):
2281 return self.exprs[i]
2282
2283 - def append( self, other ):
2284 self.exprs.append( other ) 2285 self.strRepr = None 2286 return self
2287
2288 - def leaveWhitespace( self ):
2289 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2290 all contained expressions.""" 2291 self.skipWhitespace = False 2292 self.exprs = [ e.copy() for e in self.exprs ] 2293 for e in self.exprs: 2294 e.leaveWhitespace() 2295 return self
2296
2297 - def ignore( self, other ):
2298 if isinstance( other, Suppress ): 2299 if other not in self.ignoreExprs: 2300 super( ParseExpression, self).ignore( other ) 2301 for e in self.exprs: 2302 e.ignore( self.ignoreExprs[-1] ) 2303 else: 2304 super( ParseExpression, self).ignore( other ) 2305 for e in self.exprs: 2306 e.ignore( self.ignoreExprs[-1] ) 2307 return self
2308
2309 - def __str__( self ):
2310 try: 2311 return super(ParseExpression,self).__str__() 2312 except: 2313 pass 2314 2315 if self.strRepr is None: 2316 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2317 return self.strRepr
2318
2319 - def streamline( self ):
2320 super(ParseExpression,self).streamline() 2321 2322 for e in self.exprs: 2323 e.streamline() 2324 2325 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2326 # but only if there are no parse actions or resultsNames on the nested And's 2327 # (likewise for Or's and MatchFirst's) 2328 if ( len(self.exprs) == 2 ): 2329 other = self.exprs[0] 2330 if ( isinstance( other, self.__class__ ) and 2331 not(other.parseAction) and 2332 other.resultsName is None and 2333 not other.debug ): 2334 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2335 self.strRepr = None 2336 self.mayReturnEmpty |= other.mayReturnEmpty 2337 self.mayIndexError |= other.mayIndexError 2338 2339 other = self.exprs[-1] 2340 if ( isinstance( other, self.__class__ ) and 2341 not(other.parseAction) and 2342 other.resultsName is None and 2343 not other.debug ): 2344 self.exprs = self.exprs[:-1] + other.exprs[:] 2345 self.strRepr = None 2346 self.mayReturnEmpty |= other.mayReturnEmpty 2347 self.mayIndexError |= other.mayIndexError 2348 2349 return self
2350
2351 - def setResultsName( self, name, listAllMatches=False ):
2352 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2353 return ret
2354
2355 - def validate( self, validateTrace=[] ):
2356 tmp = validateTrace[:]+[self] 2357 for e in self.exprs: 2358 e.validate(tmp) 2359 self.checkRecursion( [] )
2360
2361 - def copy(self):
2362 ret = super(ParseExpression,self).copy() 2363 ret.exprs = [e.copy() for e in self.exprs] 2364 return ret
2365
2366 -class And(ParseExpression):
2367 """Requires all given C{ParseExpression}s to be found in the given order. 2368 Expressions may be separated by whitespace. 2369 May be constructed using the C{'+'} operator. 2370 """ 2371
2372 - class _ErrorStop(Empty):
2373 - def __init__(self, *args, **kwargs):
2374 super(And._ErrorStop,self).__init__(*args, **kwargs) 2375 self.name = '-' 2376 self.leaveWhitespace()
2377
2378 - def __init__( self, exprs, savelist = True ):
2379 super(And,self).__init__(exprs, savelist) 2380 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2381 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2382 self.skipWhitespace = self.exprs[0].skipWhitespace 2383 self.callPreparse = True
2384
2385 - def parseImpl( self, instring, loc, doActions=True ):
2386 # pass False as last arg to _parse for first element, since we already 2387 # pre-parsed the string as part of our And pre-parsing 2388 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2389 errorStop = False 2390 for e in self.exprs[1:]: 2391 if isinstance(e, And._ErrorStop): 2392 errorStop = True 2393 continue 2394 if errorStop: 2395 try: 2396 loc, exprtokens = e._parse( instring, loc, doActions ) 2397 except ParseSyntaxException: 2398 raise 2399 except ParseBaseException as pe: 2400 pe.__traceback__ = None 2401 raise ParseSyntaxException(pe) 2402 except IndexError: 2403 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2404 else: 2405 loc, exprtokens = e._parse( instring, loc, doActions ) 2406 if exprtokens or exprtokens.haskeys(): 2407 resultlist += exprtokens 2408 return loc, resultlist
2409
2410 - def __iadd__(self, other ):
2411 if isinstance( other, basestring ): 2412 other = Literal( other ) 2413 return self.append( other ) #And( [ self, other ] )
2414
2415 - def checkRecursion( self, parseElementList ):
2416 subRecCheckList = parseElementList[:] + [ self ] 2417 for e in self.exprs: 2418 e.checkRecursion( subRecCheckList ) 2419 if not e.mayReturnEmpty: 2420 break
2421
2422 - def __str__( self ):
2423 if hasattr(self,"name"): 2424 return self.name 2425 2426 if self.strRepr is None: 2427 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2428 2429 return self.strRepr
2430
2431 2432 -class Or(ParseExpression):
2433 """Requires that at least one C{ParseExpression} is found. 2434 If two expressions match, the expression that matches the longest string will be used. 2435 May be constructed using the C{'^'} operator. 2436 """
2437 - def __init__( self, exprs, savelist = False ):
2438 super(Or,self).__init__(exprs, savelist) 2439 if self.exprs: 2440 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2441 else: 2442 self.mayReturnEmpty = True
2443
2444 - def parseImpl( self, instring, loc, doActions=True ):
2445 maxExcLoc = -1 2446 maxMatchLoc = -1 2447 maxException = None 2448 for e in self.exprs: 2449 try: 2450 loc2 = e.tryParse( instring, loc ) 2451 except ParseException as err: 2452 err.__traceback__ = None 2453 if err.loc > maxExcLoc: 2454 maxException = err 2455 maxExcLoc = err.loc 2456 except IndexError: 2457 if len(instring) > maxExcLoc: 2458 maxException = ParseException(instring,len(instring),e.errmsg,self) 2459 maxExcLoc = len(instring) 2460 else: 2461 if loc2 > maxMatchLoc: 2462 maxMatchLoc = loc2 2463 maxMatchExp = e 2464 2465 if maxMatchLoc < 0: 2466 if maxException is not None: 2467 raise maxException 2468 else: 2469 raise ParseException(instring, loc, "no defined alternatives to match", self) 2470 2471 return maxMatchExp._parse( instring, loc, doActions )
2472
2473 - def __ixor__(self, other ):
2474 if isinstance( other, basestring ): 2475 other = ParserElement.literalStringClass( other ) 2476 return self.append( other ) #Or( [ self, other ] )
2477
2478 - def __str__( self ):
2479 if hasattr(self,"name"): 2480 return self.name 2481 2482 if self.strRepr is None: 2483 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2484 2485 return self.strRepr
2486
2487 - def checkRecursion( self, parseElementList ):
2488 subRecCheckList = parseElementList[:] + [ self ] 2489 for e in self.exprs: 2490 e.checkRecursion( subRecCheckList )
2491
2492 2493 -class MatchFirst(ParseExpression):
2494 """Requires that at least one C{ParseExpression} is found. 2495 If two expressions match, the first one listed is the one that will match. 2496 May be constructed using the C{'|'} operator. 2497 """
2498 - def __init__( self, exprs, savelist = False ):
2499 super(MatchFirst,self).__init__(exprs, savelist) 2500 if self.exprs: 2501 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2502 else: 2503 self.mayReturnEmpty = True
2504
2505 - def parseImpl( self, instring, loc, doActions=True ):
2506 maxExcLoc = -1 2507 maxException = None 2508 for e in self.exprs: 2509 try: 2510 ret = e._parse( instring, loc, doActions ) 2511 return ret 2512 except ParseException as err: 2513 if err.loc > maxExcLoc: 2514 maxException = err 2515 maxExcLoc = err.loc 2516 except IndexError: 2517 if len(instring) > maxExcLoc: 2518 maxException = ParseException(instring,len(instring),e.errmsg,self) 2519 maxExcLoc = len(instring) 2520 2521 # only got here if no expression matched, raise exception for match that made it the furthest 2522 else: 2523 if maxException is not None: 2524 raise maxException 2525 else: 2526 raise ParseException(instring, loc, "no defined alternatives to match", self)
2527
2528 - def __ior__(self, other ):
2529 if isinstance( other, basestring ): 2530 other = ParserElement.literalStringClass( other ) 2531 return self.append( other ) #MatchFirst( [ self, other ] )
2532
2533 - def __str__( self ):
2534 if hasattr(self,"name"): 2535 return self.name 2536 2537 if self.strRepr is None: 2538 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2539 2540 return self.strRepr
2541
2542 - def checkRecursion( self, parseElementList ):
2543 subRecCheckList = parseElementList[:] + [ self ] 2544 for e in self.exprs: 2545 e.checkRecursion( subRecCheckList )
2546
2547 2548 -class Each(ParseExpression):
2549 """Requires all given C{ParseExpression}s to be found, but in any order. 2550 Expressions may be separated by whitespace. 2551 May be constructed using the C{'&'} operator. 2552 """
2553 - def __init__( self, exprs, savelist = True ):
2554 super(Each,self).__init__(exprs, savelist) 2555 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2556 self.skipWhitespace = True 2557 self.initExprGroups = True
2558
2559 - def parseImpl( self, instring, loc, doActions=True ):
2560 if self.initExprGroups: 2561 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2562 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and e not in opt1 ] 2563 self.optionals = opt1 + opt2 2564 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2565 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2566 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2567 self.required += self.multirequired 2568 self.initExprGroups = False 2569 tmpLoc = loc 2570 tmpReqd = self.required[:] 2571 tmpOpt = self.optionals[:] 2572 matchOrder = [] 2573 2574 keepMatching = True 2575 while keepMatching: 2576 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2577 failed = [] 2578 for e in tmpExprs: 2579 try: 2580 tmpLoc = e.tryParse( instring, tmpLoc ) 2581 except ParseException: 2582 failed.append(e) 2583 else: 2584 matchOrder.append(e) 2585 if e in tmpReqd: 2586 tmpReqd.remove(e) 2587 elif e in tmpOpt: 2588 tmpOpt.remove(e) 2589 if len(failed) == len(tmpExprs): 2590 keepMatching = False 2591 2592 if tmpReqd: 2593 missing = ", ".join(_ustr(e) for e in tmpReqd) 2594 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2595 2596 # add any unmatched Optionals, in case they have default values defined 2597 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2598 2599 resultlist = [] 2600 for e in matchOrder: 2601 loc,results = e._parse(instring,loc,doActions) 2602 resultlist.append(results) 2603 2604 finalResults = ParseResults([]) 2605 for r in resultlist: 2606 dups = {} 2607 for k in r.keys(): 2608 if k in finalResults: 2609 tmp = ParseResults(finalResults[k]) 2610 tmp += ParseResults(r[k]) 2611 dups[k] = tmp 2612 finalResults += ParseResults(r) 2613 for k,v in dups.items(): 2614 finalResults[k] = v 2615 return loc, finalResults
2616
2617 - def __str__( self ):
2618 if hasattr(self,"name"): 2619 return self.name 2620 2621 if self.strRepr is None: 2622 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2623 2624 return self.strRepr
2625
2626 - def checkRecursion( self, parseElementList ):
2627 subRecCheckList = parseElementList[:] + [ self ] 2628 for e in self.exprs: 2629 e.checkRecursion( subRecCheckList )
2630
2631 2632 -class ParseElementEnhance(ParserElement):
2633 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2634 - def __init__( self, expr, savelist=False ):
2635 super(ParseElementEnhance,self).__init__(savelist) 2636 if isinstance( expr, basestring ): 2637 expr = Literal(expr) 2638 self.expr = expr 2639 self.strRepr = None 2640 if expr is not None: 2641 self.mayIndexError = expr.mayIndexError 2642 self.mayReturnEmpty = expr.mayReturnEmpty 2643 self.setWhitespaceChars( expr.whiteChars ) 2644 self.skipWhitespace = expr.skipWhitespace 2645 self.saveAsList = expr.saveAsList 2646 self.callPreparse = expr.callPreparse 2647 self.ignoreExprs.extend(expr.ignoreExprs)
2648
2649 - def parseImpl( self, instring, loc, doActions=True ):
2650 if self.expr is not None: 2651 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2652 else: 2653 raise ParseException("",loc,self.errmsg,self)
2654
2655 - def leaveWhitespace( self ):
2656 self.skipWhitespace = False 2657 self.expr = self.expr.copy() 2658 if self.expr is not None: 2659 self.expr.leaveWhitespace() 2660 return self
2661
2662 - def ignore( self, other ):
2663 if isinstance( other, Suppress ): 2664 if other not in self.ignoreExprs: 2665 super( ParseElementEnhance, self).ignore( other ) 2666 if self.expr is not None: 2667 self.expr.ignore( self.ignoreExprs[-1] ) 2668 else: 2669 super( ParseElementEnhance, self).ignore( other ) 2670 if self.expr is not None: 2671 self.expr.ignore( self.ignoreExprs[-1] ) 2672 return self
2673
2674 - def streamline( self ):
2675 super(ParseElementEnhance,self).streamline() 2676 if self.expr is not None: 2677 self.expr.streamline() 2678 return self
2679
2680 - def checkRecursion( self, parseElementList ):
2681 if self in parseElementList: 2682 raise RecursiveGrammarException( parseElementList+[self] ) 2683 subRecCheckList = parseElementList[:] + [ self ] 2684 if self.expr is not None: 2685 self.expr.checkRecursion( subRecCheckList )
2686
2687 - def validate( self, validateTrace=[] ):
2688 tmp = validateTrace[:]+[self] 2689 if self.expr is not None: 2690 self.expr.validate(tmp) 2691 self.checkRecursion( [] )
2692
2693 - def __str__( self ):
2694 try: 2695 return super(ParseElementEnhance,self).__str__() 2696 except: 2697 pass 2698 2699 if self.strRepr is None and self.expr is not None: 2700 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2701 return self.strRepr
2702
2703 2704 -class FollowedBy(ParseElementEnhance):
2705 """Lookahead matching of the given parse expression. C{FollowedBy} 2706 does *not* advance the parsing position within the input string, it only 2707 verifies that the specified parse expression matches at the current 2708 position. C{FollowedBy} always returns a null token list."""
2709 - def __init__( self, expr ):
2710 super(FollowedBy,self).__init__(expr) 2711 self.mayReturnEmpty = True
2712
2713 - def parseImpl( self, instring, loc, doActions=True ):
2714 self.expr.tryParse( instring, loc ) 2715 return loc, []
2716
2717 2718 -class NotAny(ParseElementEnhance):
2719 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2720 does *not* advance the parsing position within the input string, it only 2721 verifies that the specified parse expression does *not* match at the current 2722 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2723 always returns a null token list. May be constructed using the '~' operator."""
2724 - def __init__( self, expr ):
2725 super(NotAny,self).__init__(expr) 2726 #~ self.leaveWhitespace() 2727 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2728 self.mayReturnEmpty = True 2729 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2730
2731 - def parseImpl( self, instring, loc, doActions=True ):
2732 try: 2733 self.expr.tryParse( instring, loc ) 2734 except (ParseException,IndexError): 2735 pass 2736 else: 2737 raise ParseException(instring, loc, self.errmsg, self) 2738 return loc, []
2739
2740 - def __str__( self ):
2741 if hasattr(self,"name"): 2742 return self.name 2743 2744 if self.strRepr is None: 2745 self.strRepr = "~{" + _ustr(self.expr) + "}" 2746 2747 return self.strRepr
2748
2749 2750 -class ZeroOrMore(ParseElementEnhance):
2751 """Optional repetition of zero or more of the given expression."""
2752 - def __init__( self, expr ):
2753 super(ZeroOrMore,self).__init__(expr) 2754 self.mayReturnEmpty = True
2755
2756 - def parseImpl( self, instring, loc, doActions=True ):
2757 tokens = [] 2758 try: 2759 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2760 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2761 while 1: 2762 if hasIgnoreExprs: 2763 preloc = self._skipIgnorables( instring, loc ) 2764 else: 2765 preloc = loc 2766 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2767 if tmptokens or tmptokens.haskeys(): 2768 tokens += tmptokens 2769 except (ParseException,IndexError): 2770 pass 2771 2772 return loc, tokens
2773
2774 - def __str__( self ):
2775 if hasattr(self,"name"): 2776 return self.name 2777 2778 if self.strRepr is None: 2779 self.strRepr = "[" + _ustr(self.expr) + "]..." 2780 2781 return self.strRepr
2782
2783 - def setResultsName( self, name, listAllMatches=False ):
2784 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2785 ret.saveAsList = True 2786 return ret
2787
2788 2789 -class OneOrMore(ParseElementEnhance):
2790 """Repetition of one or more of the given expression."""
2791 - def parseImpl( self, instring, loc, doActions=True ):
2792 # must be at least one 2793 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2794 try: 2795 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2796 while 1: 2797 if hasIgnoreExprs: 2798 preloc = self._skipIgnorables( instring, loc ) 2799 else: 2800 preloc = loc 2801 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2802 if tmptokens or tmptokens.haskeys(): 2803 tokens += tmptokens 2804 except (ParseException,IndexError): 2805 pass 2806 2807 return loc, tokens
2808
2809 - def __str__( self ):
2810 if hasattr(self,"name"): 2811 return self.name 2812 2813 if self.strRepr is None: 2814 self.strRepr = "{" + _ustr(self.expr) + "}..." 2815 2816 return self.strRepr
2817
2818 - def setResultsName( self, name, listAllMatches=False ):
2819 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2820 ret.saveAsList = True 2821 return ret
2822
2823 -class _NullToken(object):
2824 - def __bool__(self):
2825 return False
2826 __nonzero__ = __bool__
2827 - def __str__(self):
2828 return ""
2829 2830 _optionalNotMatched = _NullToken()
2831 -class Optional(ParseElementEnhance):
2832 """Optional matching of the given expression. 2833 A default return string can also be specified, if the optional expression 2834 is not found. 2835 """
2836 - def __init__( self, expr, default=_optionalNotMatched ):
2837 super(Optional,self).__init__( expr, savelist=False ) 2838 self.defaultValue = default 2839 self.mayReturnEmpty = True
2840
2841 - def parseImpl( self, instring, loc, doActions=True ):
2842 try: 2843 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2844 except (ParseException,IndexError): 2845 if self.defaultValue is not _optionalNotMatched: 2846 if self.expr.resultsName: 2847 tokens = ParseResults([ self.defaultValue ]) 2848 tokens[self.expr.resultsName] = self.defaultValue 2849 else: 2850 tokens = [ self.defaultValue ] 2851 else: 2852 tokens = [] 2853 return loc, tokens
2854
2855 - def __str__( self ):
2856 if hasattr(self,"name"): 2857 return self.name 2858 2859 if self.strRepr is None: 2860 self.strRepr = "[" + _ustr(self.expr) + "]" 2861 2862 return self.strRepr
2863
2864 2865 -class SkipTo(ParseElementEnhance):
2866 """Token for skipping over all undefined text until the matched expression is found. 2867 If C{include} is set to true, the matched expression is also parsed (the skipped text 2868 and matched expression are returned as a 2-element list). The C{ignore} 2869 argument is used to define grammars (typically quoted strings and comments) that 2870 might contain false matches. 2871 """
2872 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2873 super( SkipTo, self ).__init__( other ) 2874 self.ignoreExpr = ignore 2875 self.mayReturnEmpty = True 2876 self.mayIndexError = False 2877 self.includeMatch = include 2878 self.asList = False 2879 if failOn is not None and isinstance(failOn, basestring): 2880 self.failOn = Literal(failOn) 2881 else: 2882 self.failOn = failOn 2883 self.errmsg = "No match found for "+_ustr(self.expr)
2884
2885 - def parseImpl( self, instring, loc, doActions=True ):
2886 startLoc = loc 2887 instrlen = len(instring) 2888 expr = self.expr 2889 failParse = False 2890 while loc <= instrlen: 2891 try: 2892 if self.failOn: 2893 try: 2894 self.failOn.tryParse(instring, loc) 2895 except ParseBaseException: 2896 pass 2897 else: 2898 failParse = True 2899 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2900 failParse = False 2901 if self.ignoreExpr is not None: 2902 while 1: 2903 try: 2904 loc = self.ignoreExpr.tryParse(instring,loc) 2905 # print("found ignoreExpr, advance to", loc) 2906 except ParseBaseException: 2907 break 2908 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2909 skipText = instring[startLoc:loc] 2910 if self.includeMatch: 2911 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2912 if mat: 2913 skipRes = ParseResults( skipText ) 2914 skipRes += mat 2915 return loc, [ skipRes ] 2916 else: 2917 return loc, [ skipText ] 2918 else: 2919 return loc, [ skipText ] 2920 except (ParseException,IndexError): 2921 if failParse: 2922 raise 2923 else: 2924 loc += 1 2925 raise ParseException(instring, loc, self.errmsg, self)
2926
2927 -class Forward(ParseElementEnhance):
2928 """Forward declaration of an expression to be defined later - 2929 used for recursive grammars, such as algebraic infix notation. 2930 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2931 2932 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2933 Specifically, '|' has a lower precedence than '<<', so that:: 2934 fwdExpr << a | b | c 2935 will actually be evaluated as:: 2936 (fwdExpr << a) | b | c 2937 thereby leaving b and c out as parseable alternatives. It is recommended that you 2938 explicitly group the values inserted into the C{Forward}:: 2939 fwdExpr << (a | b | c) 2940 Converting to use the '<<=' operator instead will avoid this problem. 2941 """
2942 - def __init__( self, other=None ):
2943 super(Forward,self).__init__( other, savelist=False )
2944
2945 - def __lshift__( self, other ):
2946 if isinstance( other, basestring ): 2947 other = ParserElement.literalStringClass(other) 2948 self.expr = other 2949 self.mayReturnEmpty = other.mayReturnEmpty 2950 self.strRepr = None 2951 self.mayIndexError = self.expr.mayIndexError 2952 self.mayReturnEmpty = self.expr.mayReturnEmpty 2953 self.setWhitespaceChars( self.expr.whiteChars ) 2954 self.skipWhitespace = self.expr.skipWhitespace 2955 self.saveAsList = self.expr.saveAsList 2956 self.ignoreExprs.extend(self.expr.ignoreExprs) 2957 return self
2958
2959 - def __ilshift__(self, other):
2960 return self << other
2961
2962 - def leaveWhitespace( self ):
2963 self.skipWhitespace = False 2964 return self
2965
2966 - def streamline( self ):
2967 if not self.streamlined: 2968 self.streamlined = True 2969 if self.expr is not None: 2970 self.expr.streamline() 2971 return self
2972
2973 - def validate( self, validateTrace=[] ):
2974 if self not in validateTrace: 2975 tmp = validateTrace[:]+[self] 2976 if self.expr is not None: 2977 self.expr.validate(tmp) 2978 self.checkRecursion([])
2979
2980 - def __str__( self ):
2981 if hasattr(self,"name"): 2982 return self.name 2983 2984 self._revertClass = self.__class__ 2985 self.__class__ = _ForwardNoRecurse 2986 try: 2987 if self.expr is not None: 2988 retString = _ustr(self.expr) 2989 else: 2990 retString = "None" 2991 finally: 2992 self.__class__ = self._revertClass 2993 return self.__class__.__name__ + ": " + retString
2994
2995 - def copy(self):
2996 if self.expr is not None: 2997 return super(Forward,self).copy() 2998 else: 2999 ret = Forward() 3000 ret <<= self 3001 return ret
3002
3003 -class _ForwardNoRecurse(Forward):
3004 - def __str__( self ):
3005 return "..."
3006
3007 -class TokenConverter(ParseElementEnhance):
3008 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3009 - def __init__( self, expr, savelist=False ):
3010 super(TokenConverter,self).__init__( expr )#, savelist ) 3011 self.saveAsList = False
3012
3013 -class Upcase(TokenConverter):
3014 """Converter to upper case all matching tokens."""
3015 - def __init__(self, *args):
3016 super(Upcase,self).__init__(*args) 3017 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3018 DeprecationWarning,stacklevel=2)
3019
3020 - def postParse( self, instring, loc, tokenlist ):
3021 return list(map( str.upper, tokenlist ))
3022
3023 3024 -class Combine(TokenConverter):
3025 """Converter to concatenate all matching tokens to a single string. 3026 By default, the matching patterns must also be contiguous in the input string; 3027 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3028 """
3029 - def __init__( self, expr, joinString="", adjacent=True ):
3030 super(Combine,self).__init__( expr ) 3031 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3032 if adjacent: 3033 self.leaveWhitespace() 3034 self.adjacent = adjacent 3035 self.skipWhitespace = True 3036 self.joinString = joinString 3037 self.callPreparse = True
3038
3039 - def ignore( self, other ):
3040 if self.adjacent: 3041 ParserElement.ignore(self, other) 3042 else: 3043 super( Combine, self).ignore( other ) 3044 return self
3045
3046 - def postParse( self, instring, loc, tokenlist ):
3047 retToks = tokenlist.copy() 3048 del retToks[:] 3049 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3050 3051 if self.resultsName and retToks.haskeys(): 3052 return [ retToks ] 3053 else: 3054 return retToks
3055
3056 -class Group(TokenConverter):
3057 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3058 - def __init__( self, expr ):
3059 super(Group,self).__init__( expr ) 3060 self.saveAsList = True
3061
3062 - def postParse( self, instring, loc, tokenlist ):
3063 return [ tokenlist ]
3064
3065 -class Dict(TokenConverter):
3066 """Converter to return a repetitive expression as a list, but also as a dictionary. 3067 Each element can also be referenced using the first token in the expression as its key. 3068 Useful for tabular report scraping when the first column can be used as a item key. 3069 """
3070 - def __init__( self, expr ):
3071 super(Dict,self).__init__( expr ) 3072 self.saveAsList = True
3073
3074 - def postParse( self, instring, loc, tokenlist ):
3075 for i,tok in enumerate(tokenlist): 3076 if len(tok) == 0: 3077 continue 3078 ikey = tok[0] 3079 if isinstance(ikey,int): 3080 ikey = _ustr(tok[0]).strip() 3081 if len(tok)==1: 3082 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3083 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3084 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3085 else: 3086 dictvalue = tok.copy() #ParseResults(i) 3087 del dictvalue[0] 3088 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3089 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3090 else: 3091 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3092 3093 if self.resultsName: 3094 return [ tokenlist ] 3095 else: 3096 return tokenlist
3097
3098 3099 -class Suppress(TokenConverter):
3100 """Converter for ignoring the results of a parsed expression."""
3101 - def postParse( self, instring, loc, tokenlist ):
3102 return []
3103
3104 - def suppress( self ):
3105 return self
3106
3107 3108 -class OnlyOnce(object):
3109 """Wrapper for parse actions, to ensure they are only called once."""
3110 - def __init__(self, methodCall):
3111 self.callable = _trim_arity(methodCall) 3112 self.called = False
3113 - def __call__(self,s,l,t):
3114 if not self.called: 3115 results = self.callable(s,l,t) 3116 self.called = True 3117 return results 3118 raise ParseException(s,l,"")
3119 - def reset(self):
3120 self.called = False
3121
3122 -def traceParseAction(f):
3123 """Decorator for debugging parse actions.""" 3124 f = _trim_arity(f) 3125 def z(*paArgs): 3126 thisFunc = f.func_name 3127 s,l,t = paArgs[-3:] 3128 if len(paArgs)>3: 3129 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3130 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3131 try: 3132 ret = f(*paArgs) 3133 except Exception as exc: 3134 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3135 raise 3136 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3137 return ret
3138 try: 3139 z.__name__ = f.__name__ 3140 except AttributeError: 3141 pass 3142 return z 3143
3144 # 3145 # global helpers 3146 # 3147 -def delimitedList( expr, delim=",", combine=False ):
3148 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3149 By default, the list elements and delimiters can have intervening whitespace, and 3150 comments, but this can be overridden by passing C{combine=True} in the constructor. 3151 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3152 string, with the delimiters included; otherwise, the matching tokens are returned 3153 as a list of tokens, with the delimiters suppressed. 3154 """ 3155 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3156 if combine: 3157 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3158 else: 3159 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3160
3161 -def countedArray( expr, intExpr=None ):
3162 """Helper to define a counted list of expressions. 3163 This helper defines a pattern of the form:: 3164 integer expr expr expr... 3165 where the leading integer tells how many expr expressions follow. 3166 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3167 """ 3168 arrayExpr = Forward() 3169 def countFieldParseAction(s,l,t): 3170 n = t[0] 3171 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3172 return []
3173 if intExpr is None: 3174 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3175 else: 3176 intExpr = intExpr.copy() 3177 intExpr.setName("arrayLen") 3178 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3179 return ( intExpr + arrayExpr ) 3180
3181 -def _flatten(L):
3182 ret = [] 3183 for i in L: 3184 if isinstance(i,list): 3185 ret.extend(_flatten(i)) 3186 else: 3187 ret.append(i) 3188 return ret
3189
3190 -def matchPreviousLiteral(expr):
3191 """Helper to define an expression that is indirectly defined from 3192 the tokens matched in a previous expression, that is, it looks 3193 for a 'repeat' of a previous expression. For example:: 3194 first = Word(nums) 3195 second = matchPreviousLiteral(first) 3196 matchExpr = first + ":" + second 3197 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3198 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3199 If this is not desired, use C{matchPreviousExpr}. 3200 Do *not* use with packrat parsing enabled. 3201 """ 3202 rep = Forward() 3203 def copyTokenToRepeater(s,l,t): 3204 if t: 3205 if len(t) == 1: 3206 rep << t[0] 3207 else: 3208 # flatten t tokens 3209 tflat = _flatten(t.asList()) 3210 rep << And( [ Literal(tt) for tt in tflat ] ) 3211 else: 3212 rep << Empty()
3213 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3214 return rep 3215
3216 -def matchPreviousExpr(expr):
3217 """Helper to define an expression that is indirectly defined from 3218 the tokens matched in a previous expression, that is, it looks 3219 for a 'repeat' of a previous expression. For example:: 3220 first = Word(nums) 3221 second = matchPreviousExpr(first) 3222 matchExpr = first + ":" + second 3223 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3224 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3225 the expressions are evaluated first, and then compared, so 3226 C{"1"} is compared with C{"10"}. 3227 Do *not* use with packrat parsing enabled. 3228 """ 3229 rep = Forward() 3230 e2 = expr.copy() 3231 rep <<= e2 3232 def copyTokenToRepeater(s,l,t): 3233 matchTokens = _flatten(t.asList()) 3234 def mustMatchTheseTokens(s,l,t): 3235 theseTokens = _flatten(t.asList()) 3236 if theseTokens != matchTokens: 3237 raise ParseException("",0,"")
3238 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3239 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3240 return rep 3241
3242 -def _escapeRegexRangeChars(s):
3243 #~ escape these chars: ^-] 3244 for c in r"\^-]": 3245 s = s.replace(c,_bslash+c) 3246 s = s.replace("\n",r"\n") 3247 s = s.replace("\t",r"\t") 3248 return _ustr(s)
3249
3250 -def oneOf( strs, caseless=False, useRegex=True ):
3251 """Helper to quickly define a set of alternative Literals, and makes sure to do 3252 longest-first testing when there is a conflict, regardless of the input order, 3253 but returns a C{L{MatchFirst}} for best performance. 3254 3255 Parameters: 3256 - strs - a string of space-delimited literals, or a list of string literals 3257 - caseless - (default=False) - treat all literals as caseless 3258 - useRegex - (default=True) - as an optimization, will generate a Regex 3259 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3260 if creating a C{Regex} raises an exception) 3261 """ 3262 if caseless: 3263 isequal = ( lambda a,b: a.upper() == b.upper() ) 3264 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3265 parseElementClass = CaselessLiteral 3266 else: 3267 isequal = ( lambda a,b: a == b ) 3268 masks = ( lambda a,b: b.startswith(a) ) 3269 parseElementClass = Literal 3270 3271 symbols = [] 3272 if isinstance(strs,basestring): 3273 symbols = strs.split() 3274 elif isinstance(strs, collections.Sequence): 3275 symbols = list(strs[:]) 3276 elif isinstance(strs, _generatorType): 3277 symbols = list(strs) 3278 else: 3279 warnings.warn("Invalid argument to oneOf, expected string or list", 3280 SyntaxWarning, stacklevel=2) 3281 if not symbols: 3282 return NoMatch() 3283 3284 i = 0 3285 while i < len(symbols)-1: 3286 cur = symbols[i] 3287 for j,other in enumerate(symbols[i+1:]): 3288 if ( isequal(other, cur) ): 3289 del symbols[i+j+1] 3290 break 3291 elif ( masks(cur, other) ): 3292 del symbols[i+j+1] 3293 symbols.insert(i,other) 3294 cur = other 3295 break 3296 else: 3297 i += 1 3298 3299 if not caseless and useRegex: 3300 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3301 try: 3302 if len(symbols)==len("".join(symbols)): 3303 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) 3304 else: 3305 return Regex( "|".join(re.escape(sym) for sym in symbols) ) 3306 except: 3307 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3308 SyntaxWarning, stacklevel=2) 3309 3310 3311 # last resort, just use MatchFirst 3312 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3313
3314 -def dictOf( key, value ):
3315 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3316 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3317 in the proper order. The key pattern can include delimiting markers or punctuation, 3318 as long as they are suppressed, thereby leaving the significant key text. The value 3319 pattern can include named results, so that the C{Dict} results can include named token 3320 fields. 3321 """ 3322 return Dict( ZeroOrMore( Group ( key + value ) ) )
3323
3324 -def originalTextFor(expr, asString=True):
3325 """Helper to return the original, untokenized text for a given expression. Useful to 3326 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3327 revert separate tokens with intervening whitespace back to the original matching 3328 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3329 require the inspect module to chase up the call stack. By default, returns a 3330 string containing the original parsed text. 3331 3332 If the optional C{asString} argument is passed as C{False}, then the return value is a 3333 C{L{ParseResults}} containing any results names that were originally matched, and a 3334 single token containing the original matched text from the input string. So if 3335 the expression passed to C{L{originalTextFor}} contains expressions with defined 3336 results names, you must set C{asString} to C{False} if you want to preserve those 3337 results name values.""" 3338 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3339 endlocMarker = locMarker.copy() 3340 endlocMarker.callPreparse = False 3341 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3342 if asString: 3343 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3344 else: 3345 def extractText(s,l,t): 3346 del t[:] 3347 t.insert(0, s[t._original_start:t._original_end]) 3348 del t["_original_start"] 3349 del t["_original_end"]
3350 matchExpr.setParseAction(extractText) 3351 return matchExpr 3352
3353 -def ungroup(expr):
3354 """Helper to undo pyparsing's default grouping of And expressions, even 3355 if all but one are non-empty.""" 3356 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3357
3358 -def locatedExpr(expr):
3359 """Helper to decorate a returned token with its starting and ending locations in the input string. 3360 This helper adds the following results names: 3361 - locn_start = location where matched expression begins 3362 - locn_end = location where matched expression ends 3363 - value = the actual parsed results 3364 3365 Be careful if the input text contains C{<TAB>} characters, you may want to call 3366 C{L{ParserElement.parseWithTabs}} 3367 """ 3368 locator = Empty().setParseAction(lambda s,l,t: l) 3369 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3370 3371 3372 # convenience constants for positional expressions 3373 empty = Empty().setName("empty") 3374 lineStart = LineStart().setName("lineStart") 3375 lineEnd = LineEnd().setName("lineEnd") 3376 stringStart = StringStart().setName("stringStart") 3377 stringEnd = StringEnd().setName("stringEnd") 3378 3379 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3380 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3381 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3382 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3383 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3384 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3385 3386 -def srange(s):
3387 r"""Helper to easily define string ranges for use in Word construction. Borrows 3388 syntax from regexp '[]' string range definitions:: 3389 srange("[0-9]") -> "0123456789" 3390 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3391 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3392 The input string must be enclosed in []'s, and the returned string is the expanded 3393 character set joined into a single string. 3394 The values enclosed in the []'s may be:: 3395 a single character 3396 an escaped character with a leading backslash (such as \- or \]) 3397 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3398 (\0x## is also supported for backwards compatibility) 3399 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3400 a range of any of the above, separated by a dash ('a-z', etc.) 3401 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3402 """ 3403 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3404 try: 3405 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3406 except: 3407 return ""
3408
3409 -def matchOnlyAtCol(n):
3410 """Helper method for defining parse actions that require matching at a specific 3411 column in the input text. 3412 """ 3413 def verifyCol(strg,locn,toks): 3414 if col(locn,strg) != n: 3415 raise ParseException(strg,locn,"matched token not at column %d" % n)
3416 return verifyCol 3417
3418 -def replaceWith(replStr):
3419 """Helper method for common parse actions that simply return a literal value. Especially 3420 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3421 """ 3422 #def _replFunc(*args): 3423 # return [replStr] 3424 #return _replFunc 3425 return functools.partial(next, itertools.repeat([replStr]))
3426
3427 -def removeQuotes(s,l,t):
3428 """Helper parse action for removing quotation marks from parsed quoted strings. 3429 To use, add this parse action to quoted string using:: 3430 quotedString.setParseAction( removeQuotes ) 3431 """ 3432 return t[0][1:-1]
3433
3434 -def upcaseTokens(s,l,t):
3435 """Helper parse action to convert tokens to upper case.""" 3436 return [ tt.upper() for tt in map(_ustr,t) ]
3437
3438 -def downcaseTokens(s,l,t):
3439 """Helper parse action to convert tokens to lower case.""" 3440 return [ tt.lower() for tt in map(_ustr,t) ]
3441
3442 -def keepOriginalText(s,startLoc,t):
3443 """DEPRECATED - use new helper method C{L{originalTextFor}}. 3444 Helper parse action to preserve original parsed text, 3445 overriding any nested parse actions.""" 3446 try: 3447 endloc = getTokensEndLoc() 3448 except ParseException: 3449 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3450 del t[:] 3451 t += ParseResults(s[startLoc:endloc]) 3452 return t
3453
3454 -def getTokensEndLoc():
3455 """Method to be called from within a parse action to determine the end 3456 location of the parsed tokens.""" 3457 import inspect 3458 fstack = inspect.stack() 3459 try: 3460 # search up the stack (through intervening argument normalizers) for correct calling routine 3461 for f in fstack[2:]: 3462 if f[3] == "_parseNoCache": 3463 endloc = f[0].f_locals["loc"] 3464 return endloc 3465 else: 3466 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3467 finally: 3468 del fstack
3469
3470 -def _makeTags(tagStr, xml):
3471 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3472 if isinstance(tagStr,basestring): 3473 resname = tagStr 3474 tagStr = Keyword(tagStr, caseless=not xml) 3475 else: 3476 resname = tagStr.name 3477 3478 tagAttrName = Word(alphas,alphanums+"_-:") 3479 if (xml): 3480 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3481 openTag = Suppress("<") + tagStr("tag") + \ 3482 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3483 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3484 else: 3485 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3486 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3487 openTag = Suppress("<") + tagStr("tag") + \ 3488 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3489 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3490 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3491 closeTag = Combine(_L("</") + tagStr + ">") 3492 3493 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3494 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3495 openTag.tag = resname 3496 closeTag.tag = resname 3497 return openTag, closeTag
3498
3499 -def makeHTMLTags(tagStr):
3500 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3501 return _makeTags( tagStr, False )
3502
3503 -def makeXMLTags(tagStr):
3504 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3505 return _makeTags( tagStr, True )
3506
3507 -def withAttribute(*args,**attrDict):
3508 """Helper to create a validating parse action to be used with start tags created 3509 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3510 with a required attribute value, to avoid false matches on common tags such as 3511 C{<TD>} or C{<DIV>}. 3512 3513 Call C{withAttribute} with a series of attribute names and values. Specify the list 3514 of filter attributes names and values as: 3515 - keyword arguments, as in C{(align="right")}, or 3516 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3517 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3518 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3519 For attribute names with a namespace prefix, you must use the second form. Attribute 3520 names are matched insensitive to upper/lower case. 3521 3522 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3523 3524 To verify that the attribute exists, but without specifying a value, pass 3525 C{withAttribute.ANY_VALUE} as the value. 3526 """ 3527 if args: 3528 attrs = args[:] 3529 else: 3530 attrs = attrDict.items() 3531 attrs = [(k,v) for k,v in attrs] 3532 def pa(s,l,tokens): 3533 for attrName,attrValue in attrs: 3534 if attrName not in tokens: 3535 raise ParseException(s,l,"no matching attribute " + attrName) 3536 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3537 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3538 (attrName, tokens[attrName], attrValue))
3539 return pa 3540 withAttribute.ANY_VALUE = object()
3541 3542 -def withClass(classname, namespace=''):
3543 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3544 difficult because C{class} is a reserved word in Python. 3545 """ 3546 classattr = "%s:class" % namespace if namespace else "class" 3547 return withAttribute(**{classattr : classname})
3548 3549 opAssoc = _Constants() 3550 opAssoc.LEFT = object() 3551 opAssoc.RIGHT = object()
3552 3553 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3554 """Helper method for constructing grammars of expressions made up of 3555 operators working in a precedence hierarchy. Operators may be unary or 3556 binary, left- or right-associative. Parse actions can also be attached 3557 to operator expressions. 3558 3559 Parameters: 3560 - baseExpr - expression representing the most basic element for the nested 3561 - opList - list of tuples, one for each operator precedence level in the 3562 expression grammar; each tuple is of the form 3563 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3564 - opExpr is the pyparsing expression for the operator; 3565 may also be a string, which will be converted to a Literal; 3566 if numTerms is 3, opExpr is a tuple of two expressions, for the 3567 two operators separating the 3 terms 3568 - numTerms is the number of terms for this operator (must 3569 be 1, 2, or 3) 3570 - rightLeftAssoc is the indicator whether the operator is 3571 right or left associative, using the pyparsing-defined 3572 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3573 - parseAction is the parse action to be associated with 3574 expressions matching this operator expression (the 3575 parse action tuple member may be omitted) 3576 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3577 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3578 """ 3579 ret = Forward() 3580 lastExpr = baseExpr | ( lpar + ret + rpar ) 3581 for i,operDef in enumerate(opList): 3582 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3583 if arity == 3: 3584 if opExpr is None or len(opExpr) != 2: 3585 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3586 opExpr1, opExpr2 = opExpr 3587 thisExpr = Forward()#.setName("expr%d" % i) 3588 if rightLeftAssoc == opAssoc.LEFT: 3589 if arity == 1: 3590 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3591 elif arity == 2: 3592 if opExpr is not None: 3593 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3594 else: 3595 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3596 elif arity == 3: 3597 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3598 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3599 else: 3600 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3601 elif rightLeftAssoc == opAssoc.RIGHT: 3602 if arity == 1: 3603 # try to avoid LR with this extra test 3604 if not isinstance(opExpr, Optional): 3605 opExpr = Optional(opExpr) 3606 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3607 elif arity == 2: 3608 if opExpr is not None: 3609 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3610 else: 3611 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3612 elif arity == 3: 3613 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3614 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3615 else: 3616 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3617 else: 3618 raise ValueError("operator must indicate right or left associativity") 3619 if pa: 3620 matchExpr.setParseAction( pa ) 3621 thisExpr <<= ( matchExpr | lastExpr ) 3622 lastExpr = thisExpr 3623 ret <<= lastExpr 3624 return ret
3625 operatorPrecedence = infixNotation 3626 3627 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3628 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3629 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3630 unicodeString = Combine(_L('u') + quotedString.copy())
3631 3632 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3633 """Helper method for defining nested lists enclosed in opening and closing 3634 delimiters ("(" and ")" are the default). 3635 3636 Parameters: 3637 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3638 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3639 - content - expression for items within the nested lists (default=None) 3640 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3641 3642 If an expression is not provided for the content argument, the nested 3643 expression will capture all whitespace-delimited content between delimiters 3644 as a list of separate values. 3645 3646 Use the C{ignoreExpr} argument to define expressions that may contain 3647 opening or closing characters that should not be treated as opening 3648 or closing characters for nesting, such as quotedString or a comment 3649 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3650 The default is L{quotedString}, but if no expressions are to be ignored, 3651 then pass C{None} for this argument. 3652 """ 3653 if opener == closer: 3654 raise ValueError("opening and closing strings cannot be the same") 3655 if content is None: 3656 if isinstance(opener,basestring) and isinstance(closer,basestring): 3657 if len(opener) == 1 and len(closer)==1: 3658 if ignoreExpr is not None: 3659 content = (Combine(OneOrMore(~ignoreExpr + 3660 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3661 ).setParseAction(lambda t:t[0].strip())) 3662 else: 3663 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3664 ).setParseAction(lambda t:t[0].strip())) 3665 else: 3666 if ignoreExpr is not None: 3667 content = (Combine(OneOrMore(~ignoreExpr + 3668 ~Literal(opener) + ~Literal(closer) + 3669 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3670 ).setParseAction(lambda t:t[0].strip())) 3671 else: 3672 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3673 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3674 ).setParseAction(lambda t:t[0].strip())) 3675 else: 3676 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3677 ret = Forward() 3678 if ignoreExpr is not None: 3679 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3680 else: 3681 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3682 return ret
3683
3684 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3685 """Helper method for defining space-delimited indentation blocks, such as 3686 those used to define block statements in Python source code. 3687 3688 Parameters: 3689 - blockStatementExpr - expression defining syntax of statement that 3690 is repeated within the indented block 3691 - indentStack - list created by caller to manage indentation stack 3692 (multiple statementWithIndentedBlock expressions within a single grammar 3693 should share a common indentStack) 3694 - indent - boolean indicating whether block must be indented beyond the 3695 the current level; set to False for block of left-most statements 3696 (default=True) 3697 3698 A valid block must contain at least one C{blockStatement}. 3699 """ 3700 def checkPeerIndent(s,l,t): 3701 if l >= len(s): return 3702 curCol = col(l,s) 3703 if curCol != indentStack[-1]: 3704 if curCol > indentStack[-1]: 3705 raise ParseFatalException(s,l,"illegal nesting") 3706 raise ParseException(s,l,"not a peer entry")
3707 3708 def checkSubIndent(s,l,t): 3709 curCol = col(l,s) 3710 if curCol > indentStack[-1]: 3711 indentStack.append( curCol ) 3712 else: 3713 raise ParseException(s,l,"not a subentry") 3714 3715 def checkUnindent(s,l,t): 3716 if l >= len(s): return 3717 curCol = col(l,s) 3718 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3719 raise ParseException(s,l,"not an unindent") 3720 indentStack.pop() 3721 3722 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3723 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3724 PEER = Empty().setParseAction(checkPeerIndent) 3725 UNDENT = Empty().setParseAction(checkUnindent) 3726 if indent: 3727 smExpr = Group( Optional(NL) + 3728 #~ FollowedBy(blockStatementExpr) + 3729 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3730 else: 3731 smExpr = Group( Optional(NL) + 3732 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3733 blockStatementExpr.ignore(_bslash + LineEnd()) 3734 return smExpr 3735 3736 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3737 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3738 3739 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3740 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3741 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3742 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3743 3744 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3745 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3746 3747 htmlComment = Regex(r"<!--[\s\S]*?-->") 3748 restOfLine = Regex(r".*").leaveWhitespace() 3749 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3750 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3751 3752 javaStyleComment = cppStyleComment 3753 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3754 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3755 Optional( Word(" \t") + 3756 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3757 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3758 3759 3760 if __name__ == "__main__":
3761 3762 - def test( teststring ):
3763 try: 3764 tokens = simpleSQL.parseString( teststring ) 3765 tokenlist = tokens.asList() 3766 print (teststring + "->" + str(tokenlist)) 3767 print ("tokens = " + str(tokens)) 3768 print ("tokens.columns = " + str(tokens.columns)) 3769 print ("tokens.tables = " + str(tokens.tables)) 3770 print (tokens.asXML("SQL",True)) 3771 except ParseBaseException as err: 3772 print (teststring + "->") 3773 print (err.line) 3774 print (" "*(err.column-1) + "^") 3775 print (err) 3776 print()
3777 3778 selectToken = CaselessLiteral( "select" ) 3779 fromToken = CaselessLiteral( "from" ) 3780 3781 ident = Word( alphas, alphanums + "_$" ) 3782 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3783 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3784 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3785 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3786 simpleSQL = ( selectToken + \ 3787 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3788 fromToken + \ 3789 tableNameList.setResultsName( "tables" ) ) 3790 3791 test( "SELECT * from XYZZY, ABC" ) 3792 test( "select * from SYS.XYZZY" ) 3793 test( "Select A from Sys.dual" ) 3794 test( "Select AA,BB,CC from Sys.dual" ) 3795 test( "Select A, B, C from Sys.dual" ) 3796 test( "Select A, B, C from Sys.dual" ) 3797 test( "Xelect A, B, C from Sys.dual" ) 3798 test( "Select A, B, C frox Sys.dual" ) 3799 test( "Select" ) 3800 test( "Select ^^^ frox Sys.dual" ) 3801 test( "Select A, B, C from Sys.dual, Table2 " ) 3802 3803 """ 3804 CHANGES 3805 UnitTests.py 3806 """ 3807