Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.0" 
  61  __versionTime__ = "7 Feb 2016 14:09" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75  import traceback 
  76   
  77  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  78   
  79  __all__ = [ 
  80  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  81  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  82  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  83  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  84  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  85  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  86  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  87  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  88  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  89  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  90  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
  91  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  92  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  93  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  94  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  95  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  96  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  97  ] 
  98   
  99  PY_3 = sys.version.startswith('3') 
 100  if PY_3: 
 101      _MAX_INT = sys.maxsize 
 102      basestring = str 
 103      unichr = chr 
 104      _ustr = str 
 105   
 106      # build list of single arg builtins, that can be used as parse actions 
 107      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 108   
 109  else: 
 110      _MAX_INT = sys.maxint 
 111      range = xrange 
112 113 - def _ustr(obj):
114 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 115 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 116 then < returns the unicode object | encodes it with the default encoding | ... >. 117 """ 118 if isinstance(obj,unicode): 119 return obj 120 121 try: 122 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 123 # it won't break any existing code. 124 return str(obj) 125 126 except UnicodeEncodeError: 127 # Else encode it 128 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 129 xmlcharref = Regex('&#\d+;') 130 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 131 return xmlcharref.transformString(ret)
132 133 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 134 singleArgBuiltins = [] 135 import __builtin__ 136 for fname in "sum len sorted reversed list tuple set any all min max".split(): 137 try: 138 singleArgBuiltins.append(getattr(__builtin__,fname)) 139 except AttributeError: 140 continue 141 142 _generatorType = type((y for y in range(1)))
143 144 -def _xml_escape(data):
145 """Escape &, <, >, ", ', etc. in a string of data.""" 146 147 # ampersand must be replaced first 148 from_symbols = '&><"\'' 149 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 150 for from_,to_ in zip(from_symbols, to_symbols): 151 data = data.replace(from_, to_) 152 return data
153
154 -class _Constants(object):
155 pass
156 157 alphas = string.ascii_uppercase + string.ascii_lowercase 158 nums = "0123456789" 159 hexnums = nums + "ABCDEFabcdef" 160 alphanums = alphas + nums 161 _bslash = chr(92) 162 printables = "".join(c for c in string.printable if c not in string.whitespace)
163 164 -class ParseBaseException(Exception):
165 """base exception class for all parsing runtime exceptions""" 166 # Performance tuning: we construct a *lot* of these, so keep this 167 # constructor as small and fast as possible
168 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
169 self.loc = loc 170 if msg is None: 171 self.msg = pstr 172 self.pstr = "" 173 else: 174 self.msg = msg 175 self.pstr = pstr 176 self.parserElement = elem
177
178 - def __getattr__( self, aname ):
179 """supported attributes by name are: 180 - lineno - returns the line number of the exception text 181 - col - returns the column number of the exception text 182 - line - returns the line containing the exception text 183 """ 184 if( aname == "lineno" ): 185 return lineno( self.loc, self.pstr ) 186 elif( aname in ("col", "column") ): 187 return col( self.loc, self.pstr ) 188 elif( aname == "line" ): 189 return line( self.loc, self.pstr ) 190 else: 191 raise AttributeError(aname)
192
193 - def __str__( self ):
194 return "%s (at char %d), (line:%d, col:%d)" % \ 195 ( self.msg, self.loc, self.lineno, self.column )
196 - def __repr__( self ):
197 return _ustr(self)
198 - def markInputline( self, markerString = ">!<" ):
199 """Extracts the exception line from the input string, and marks 200 the location of the exception with a special symbol. 201 """ 202 line_str = self.line 203 line_column = self.column - 1 204 if markerString: 205 line_str = "".join((line_str[:line_column], 206 markerString, line_str[line_column:])) 207 return line_str.strip()
208 - def __dir__(self):
209 return "lineno col line".split() + dir(type(self))
210
211 -class ParseException(ParseBaseException):
212 """exception thrown when parse expressions don't match class; 213 supported attributes by name are: 214 - lineno - returns the line number of the exception text 215 - col - returns the column number of the exception text 216 - line - returns the line containing the exception text 217 """ 218 pass
219
220 -class ParseFatalException(ParseBaseException):
221 """user-throwable exception thrown when inconsistent parse content 222 is found; stops all parsing immediately""" 223 pass
224
225 -class ParseSyntaxException(ParseFatalException):
226 """just like C{L{ParseFatalException}}, but thrown internally when an 227 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 228 an unbacktrackable syntax error has been found"""
229 - def __init__(self, pe):
230 super(ParseSyntaxException, self).__init__( 231 pe.pstr, pe.loc, pe.msg, pe.parserElement)
232
233 #~ class ReparseException(ParseBaseException): 234 #~ """Experimental class - parse actions can raise this exception to cause 235 #~ pyparsing to reparse the input string: 236 #~ - with a modified input string, and/or 237 #~ - with a modified start location 238 #~ Set the values of the ReparseException in the constructor, and raise the 239 #~ exception in a parse action to cause pyparsing to use the new string/location. 240 #~ Setting the values as None causes no change to be made. 241 #~ """ 242 #~ def __init_( self, newstring, restartLoc ): 243 #~ self.newParseText = newstring 244 #~ self.reparseLoc = restartLoc 245 246 -class RecursiveGrammarException(Exception):
247 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
248 - def __init__( self, parseElementList ):
249 self.parseElementTrace = parseElementList
250
251 - def __str__( self ):
252 return "RecursiveGrammarException: %s" % self.parseElementTrace
253
254 -class _ParseResultsWithOffset(object):
255 - def __init__(self,p1,p2):
256 self.tup = (p1,p2)
257 - def __getitem__(self,i):
258 return self.tup[i]
259 - def __repr__(self):
260 return repr(self.tup)
261 - def setOffset(self,i):
262 self.tup = (self.tup[0],i)
263
264 -class ParseResults(object):
265 """Structured parse results, to provide multiple means of access to the parsed data: 266 - as a list (C{len(results)}) 267 - by list index (C{results[0], results[1]}, etc.) 268 - by attribute (C{results.<resultsName>}) 269 """
270 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
271 if isinstance(toklist, cls): 272 return toklist 273 retobj = object.__new__(cls) 274 retobj.__doinit = True 275 return retobj
276 277 # Performance tuning: we construct a *lot* of these, so keep this 278 # constructor as small and fast as possible
279 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
280 if self.__doinit: 281 self.__doinit = False 282 self.__name = None 283 self.__parent = None 284 self.__accumNames = {} 285 self.__asList = asList 286 self.__modal = modal 287 if toklist is None: 288 toklist = [] 289 if isinstance(toklist, list): 290 self.__toklist = toklist[:] 291 elif isinstance(toklist, _generatorType): 292 self.__toklist = list(toklist) 293 else: 294 self.__toklist = [toklist] 295 self.__tokdict = dict() 296 297 if name is not None and name: 298 if not modal: 299 self.__accumNames[name] = 0 300 if isinstance(name,int): 301 name = _ustr(name) # will always return a str, but use _ustr for consistency 302 self.__name = name 303 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 304 if isinstance(toklist,basestring): 305 toklist = [ toklist ] 306 if asList: 307 if isinstance(toklist,ParseResults): 308 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 309 else: 310 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 311 self[name].__name = name 312 else: 313 try: 314 self[name] = toklist[0] 315 except (KeyError,TypeError,IndexError): 316 self[name] = toklist
317
318 - def __getitem__( self, i ):
319 if isinstance( i, (int,slice) ): 320 return self.__toklist[i] 321 else: 322 if i not in self.__accumNames: 323 return self.__tokdict[i][-1][0] 324 else: 325 return ParseResults([ v[0] for v in self.__tokdict[i] ])
326
327 - def __setitem__( self, k, v, isinstance=isinstance ):
328 if isinstance(v,_ParseResultsWithOffset): 329 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 330 sub = v[0] 331 elif isinstance(k,int): 332 self.__toklist[k] = v 333 sub = v 334 else: 335 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 336 sub = v 337 if isinstance(sub,ParseResults): 338 sub.__parent = wkref(self)
339
340 - def __delitem__( self, i ):
341 if isinstance(i,(int,slice)): 342 mylen = len( self.__toklist ) 343 del self.__toklist[i] 344 345 # convert int to slice 346 if isinstance(i, int): 347 if i < 0: 348 i += mylen 349 i = slice(i, i+1) 350 # get removed indices 351 removed = list(range(*i.indices(mylen))) 352 removed.reverse() 353 # fixup indices in token dictionary 354 #~ for name in self.__tokdict: 355 #~ occurrences = self.__tokdict[name] 356 #~ for j in removed: 357 #~ for k, (value, position) in enumerate(occurrences): 358 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 359 for name,occurrences in self.__tokdict.items(): 360 for j in removed: 361 for k, (value, position) in enumerate(occurrences): 362 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 363 else: 364 del self.__tokdict[i]
365
366 - def __contains__( self, k ):
367 return k in self.__tokdict
368
369 - def __len__( self ): return len( self.__toklist )
370 - def __bool__(self): return ( not not self.__toklist )
371 __nonzero__ = __bool__
372 - def __iter__( self ): return iter( self.__toklist )
373 - def __reversed__( self ): return iter( self.__toklist[::-1] )
374 - def iterkeys( self ):
375 """Returns all named result keys.""" 376 if hasattr(self.__tokdict, "iterkeys"): 377 return self.__tokdict.iterkeys() 378 else: 379 return iter(self.__tokdict)
380
381 - def itervalues( self ):
382 """Returns all named result values.""" 383 return (self[k] for k in self.iterkeys())
384
385 - def iteritems( self ):
386 return ((k, self[k]) for k in self.iterkeys())
387 388 if PY_3: 389 keys = iterkeys 390 values = itervalues 391 items = iteritems 392 else:
393 - def keys( self ):
394 """Returns all named result keys.""" 395 return list(self.iterkeys())
396
397 - def values( self ):
398 """Returns all named result values.""" 399 return list(self.itervalues())
400
401 - def items( self ):
402 """Returns all named result keys and values as a list of tuples.""" 403 return list(self.iteritems())
404
405 - def haskeys( self ):
406 """Since keys() returns an iterator, this method is helpful in bypassing 407 code that looks for the existence of any defined results names.""" 408 return bool(self.__tokdict)
409
410 - def pop( self, *args, **kwargs):
411 """Removes and returns item at specified index (default=last). 412 Supports both list and dict semantics for pop(). If passed no 413 argument or an integer argument, it will use list semantics 414 and pop tokens from the list of parsed tokens. If passed a 415 non-integer argument (most likely a string), it will use dict 416 semantics and pop the corresponding value from any defined 417 results names. A second default return value argument is 418 supported, just as in dict.pop().""" 419 if not args: 420 args = [-1] 421 for k,v in kwargs.items(): 422 if k == 'default': 423 args = (args[0], v) 424 else: 425 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 426 if (isinstance(args[0], int) or 427 len(args) == 1 or 428 args[0] in self): 429 index = args[0] 430 ret = self[index] 431 del self[index] 432 return ret 433 else: 434 defaultvalue = args[1] 435 return defaultvalue
436
437 - def get(self, key, defaultValue=None):
438 """Returns named result matching the given key, or if there is no 439 such name, then returns the given C{defaultValue} or C{None} if no 440 C{defaultValue} is specified.""" 441 if key in self: 442 return self[key] 443 else: 444 return defaultValue
445
446 - def insert( self, index, insStr ):
447 """Inserts new element at location index in the list of parsed tokens.""" 448 self.__toklist.insert(index, insStr) 449 # fixup indices in token dictionary 450 #~ for name in self.__tokdict: 451 #~ occurrences = self.__tokdict[name] 452 #~ for k, (value, position) in enumerate(occurrences): 453 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 454 for name,occurrences in self.__tokdict.items(): 455 for k, (value, position) in enumerate(occurrences): 456 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
457
458 - def append( self, item ):
459 """Add single element to end of ParseResults list of elements.""" 460 self.__toklist.append(item)
461
462 - def extend( self, itemseq ):
463 """Add sequence of elements to end of ParseResults list of elements.""" 464 if isinstance(itemseq, ParseResults): 465 self += itemseq 466 else: 467 self.__toklist.extend(itemseq)
468
469 - def clear( self ):
470 """Clear all elements and results names.""" 471 del self.__toklist[:] 472 self.__tokdict.clear()
473
474 - def __getattr__( self, name ):
475 try: 476 return self[name] 477 except KeyError: 478 return "" 479 480 if name in self.__tokdict: 481 if name not in self.__accumNames: 482 return self.__tokdict[name][-1][0] 483 else: 484 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 485 else: 486 return ""
487
488 - def __add__( self, other ):
489 ret = self.copy() 490 ret += other 491 return ret
492
493 - def __iadd__( self, other ):
494 if other.__tokdict: 495 offset = len(self.__toklist) 496 addoffset = lambda a: offset if a<0 else a+offset 497 otheritems = other.__tokdict.items() 498 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 499 for (k,vlist) in otheritems for v in vlist] 500 for k,v in otherdictitems: 501 self[k] = v 502 if isinstance(v[0],ParseResults): 503 v[0].__parent = wkref(self) 504 505 self.__toklist += other.__toklist 506 self.__accumNames.update( other.__accumNames ) 507 return self
508
509 - def __radd__(self, other):
510 if isinstance(other,int) and other == 0: 511 # useful for merging many ParseResults using sum() builtin 512 return self.copy() 513 else: 514 # this may raise a TypeError - so be it 515 return other + self
516
517 - def __repr__( self ):
518 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
519
520 - def __str__( self ):
521 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
522
523 - def _asStringList( self, sep='' ):
524 out = [] 525 for item in self.__toklist: 526 if out and sep: 527 out.append(sep) 528 if isinstance( item, ParseResults ): 529 out += item._asStringList() 530 else: 531 out.append( _ustr(item) ) 532 return out
533
534 - def asList( self ):
535 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 536 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
537
538 - def asDict( self ):
539 """Returns the named parse results as a nested dictionary.""" 540 if PY_3: 541 item_fn = self.items 542 else: 543 item_fn = self.iteritems 544 return dict((k,v.asDict()) if isinstance(v, ParseResults) else (k,v) for k,v in item_fn())
545
546 - def copy( self ):
547 """Returns a new copy of a C{ParseResults} object.""" 548 ret = ParseResults( self.__toklist ) 549 ret.__tokdict = self.__tokdict.copy() 550 ret.__parent = self.__parent 551 ret.__accumNames.update( self.__accumNames ) 552 ret.__name = self.__name 553 return ret
554
555 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
556 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 557 nl = "\n" 558 out = [] 559 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 560 for v in vlist) 561 nextLevelIndent = indent + " " 562 563 # collapse out indents if formatting is not desired 564 if not formatted: 565 indent = "" 566 nextLevelIndent = "" 567 nl = "" 568 569 selfTag = None 570 if doctag is not None: 571 selfTag = doctag 572 else: 573 if self.__name: 574 selfTag = self.__name 575 576 if not selfTag: 577 if namedItemsOnly: 578 return "" 579 else: 580 selfTag = "ITEM" 581 582 out += [ nl, indent, "<", selfTag, ">" ] 583 584 for i,res in enumerate(self.__toklist): 585 if isinstance(res,ParseResults): 586 if i in namedItems: 587 out += [ res.asXML(namedItems[i], 588 namedItemsOnly and doctag is None, 589 nextLevelIndent, 590 formatted)] 591 else: 592 out += [ res.asXML(None, 593 namedItemsOnly and doctag is None, 594 nextLevelIndent, 595 formatted)] 596 else: 597 # individual token, see if there is a name for it 598 resTag = None 599 if i in namedItems: 600 resTag = namedItems[i] 601 if not resTag: 602 if namedItemsOnly: 603 continue 604 else: 605 resTag = "ITEM" 606 xmlBodyText = _xml_escape(_ustr(res)) 607 out += [ nl, nextLevelIndent, "<", resTag, ">", 608 xmlBodyText, 609 "</", resTag, ">" ] 610 611 out += [ nl, indent, "</", selfTag, ">" ] 612 return "".join(out)
613
614 - def __lookup(self,sub):
615 for k,vlist in self.__tokdict.items(): 616 for v,loc in vlist: 617 if sub is v: 618 return k 619 return None
620
621 - def getName(self):
622 """Returns the results name for this token expression.""" 623 if self.__name: 624 return self.__name 625 elif self.__parent: 626 par = self.__parent() 627 if par: 628 return par.__lookup(self) 629 else: 630 return None 631 elif (len(self) == 1 and 632 len(self.__tokdict) == 1 and 633 self.__tokdict.values()[0][0][1] in (0,-1)): 634 return self.__tokdict.keys()[0] 635 else: 636 return None
637
638 - def dump(self,indent='',depth=0):
639 """Diagnostic method for listing out the contents of a C{ParseResults}. 640 Accepts an optional C{indent} argument so that this string can be embedded 641 in a nested display of other data.""" 642 out = [] 643 NL = '\n' 644 out.append( indent+_ustr(self.asList()) ) 645 if self.haskeys(): 646 items = sorted(self.items()) 647 for k,v in items: 648 if out: 649 out.append(NL) 650 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 651 if isinstance(v,ParseResults): 652 if v: 653 out.append( v.dump(indent,depth+1) ) 654 else: 655 out.append(_ustr(v)) 656 else: 657 out.append(_ustr(v)) 658 elif any(isinstance(vv,ParseResults) for vv in self): 659 v = self 660 for i,vv in enumerate(v): 661 if isinstance(vv,ParseResults): 662 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 663 else: 664 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 665 666 return "".join(out)
667
668 - def pprint(self, *args, **kwargs):
669 """Pretty-printer for parsed results as a list, using the C{pprint} module. 670 Accepts additional positional or keyword args as defined for the 671 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 672 pprint.pprint(self.asList(), *args, **kwargs)
673 674 # add support for pickle protocol
675 - def __getstate__(self):
676 return ( self.__toklist, 677 ( self.__tokdict.copy(), 678 self.__parent is not None and self.__parent() or None, 679 self.__accumNames, 680 self.__name ) )
681
682 - def __setstate__(self,state):
683 self.__toklist = state[0] 684 (self.__tokdict, 685 par, 686 inAccumNames, 687 self.__name) = state[1] 688 self.__accumNames = {} 689 self.__accumNames.update(inAccumNames) 690 if par is not None: 691 self.__parent = wkref(par) 692 else: 693 self.__parent = None
694
695 - def __getnewargs__(self):
696 return self.__toklist, self.__name, self.__asList, self.__modal
697
698 - def __dir__(self):
699 return (dir(type(self)) + list(self.keys()))
700 701 collections.MutableMapping.register(ParseResults)
702 703 -def col (loc,strg):
704 """Returns current column within a string, counting newlines as line separators. 705 The first column is number 1. 706 707 Note: the default parsing behavior is to expand tabs in the input string 708 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 709 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 710 consistent view of the parsed string, the parse location, and line and column 711 positions within the parsed string. 712 """ 713 s = strg 714 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
715
716 -def lineno(loc,strg):
717 """Returns current line number within a string, counting newlines as line separators. 718 The first line is number 1. 719 720 Note: the default parsing behavior is to expand tabs in the input string 721 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 722 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 723 consistent view of the parsed string, the parse location, and line and column 724 positions within the parsed string. 725 """ 726 return strg.count("\n",0,loc) + 1
727
728 -def line( loc, strg ):
729 """Returns the line of text containing loc within a string, counting newlines as line separators. 730 """ 731 lastCR = strg.rfind("\n", 0, loc) 732 nextCR = strg.find("\n", loc) 733 if nextCR >= 0: 734 return strg[lastCR+1:nextCR] 735 else: 736 return strg[lastCR+1:]
737
738 -def _defaultStartDebugAction( instring, loc, expr ):
739 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
740
741 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
742 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
743
744 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
745 print ("Exception raised:" + _ustr(exc))
746
747 -def nullDebugAction(*args):
748 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 749 pass
750 751 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 752 #~ 'decorator to trim function calls to match the arity of the target' 753 #~ def _trim_arity(func, maxargs=3): 754 #~ if func in singleArgBuiltins: 755 #~ return lambda s,l,t: func(t) 756 #~ limit = 0 757 #~ foundArity = False 758 #~ def wrapper(*args): 759 #~ nonlocal limit,foundArity 760 #~ while 1: 761 #~ try: 762 #~ ret = func(*args[limit:]) 763 #~ foundArity = True 764 #~ return ret 765 #~ except TypeError: 766 #~ if limit == maxargs or foundArity: 767 #~ raise 768 #~ limit += 1 769 #~ continue 770 #~ return wrapper 771 772 # this version is Python 2.x-3.x cross-compatible 773 'decorator to trim function calls to match the arity of the target'
774 -def _trim_arity(func, maxargs=2):
775 if func in singleArgBuiltins: 776 return lambda s,l,t: func(t) 777 limit = [0] 778 foundArity = [False] 779 def wrapper(*args): 780 while 1: 781 try: 782 ret = func(*args[limit[0]:]) #~@$^*)+_(&%#!=-`~;:"[]{} 783 foundArity[0] = True 784 return ret 785 except TypeError: 786 # re-raise TypeErrors if they did not come from our arity testing 787 if foundArity[0]: 788 raise 789 else: 790 try: 791 tb = sys.exc_info()[-1] 792 exc_source_line = traceback.extract_tb(tb)[-1][-1] 793 if not exc_source_line.endswith('#~@$^*)+_(&%#!=-`~;:"[]{}'): 794 raise 795 finally: 796 del tb 797 798 if limit[0] <= maxargs: 799 limit[0] += 1 800 continue 801 raise
802 return wrapper 803
804 -class ParserElement(object):
805 """Abstract base level parser element class.""" 806 DEFAULT_WHITE_CHARS = " \n\t\r" 807 verbose_stacktrace = False 808 809 @staticmethod
810 - def setDefaultWhitespaceChars( chars ):
811 """Overrides the default whitespace chars 812 """ 813 ParserElement.DEFAULT_WHITE_CHARS = chars
814 815 @staticmethod
816 - def inlineLiteralsUsing(cls):
817 """ 818 Set class to be used for inclusion of string literals into a parser. 819 """ 820 ParserElement.literalStringClass = cls
821
822 - def __init__( self, savelist=False ):
823 self.parseAction = list() 824 self.failAction = None 825 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 826 self.strRepr = None 827 self.resultsName = None 828 self.saveAsList = savelist 829 self.skipWhitespace = True 830 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 831 self.copyDefaultWhiteChars = True 832 self.mayReturnEmpty = False # used when checking for left-recursion 833 self.keepTabs = False 834 self.ignoreExprs = list() 835 self.debug = False 836 self.streamlined = False 837 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 838 self.errmsg = "" 839 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 840 self.debugActions = ( None, None, None ) #custom debug actions 841 self.re = None 842 self.callPreparse = True # used to avoid redundant calls to preParse 843 self.callDuringTry = False
844
845 - def copy( self ):
846 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 847 for the same parsing pattern, using copies of the original parse element.""" 848 cpy = copy.copy( self ) 849 cpy.parseAction = self.parseAction[:] 850 cpy.ignoreExprs = self.ignoreExprs[:] 851 if self.copyDefaultWhiteChars: 852 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 853 return cpy
854
855 - def setName( self, name ):
856 """Define name for this expression, for use in debugging.""" 857 self.name = name 858 self.errmsg = "Expected " + self.name 859 if hasattr(self,"exception"): 860 self.exception.msg = self.errmsg 861 return self
862
863 - def setResultsName( self, name, listAllMatches=False ):
864 """Define name for referencing matching tokens as a nested attribute 865 of the returned parse results. 866 NOTE: this returns a *copy* of the original C{ParserElement} object; 867 this is so that the client can define a basic element, such as an 868 integer, and reference it in multiple places with different names. 869 870 You can also set results names using the abbreviated syntax, 871 C{expr("name")} in place of C{expr.setResultsName("name")} - 872 see L{I{__call__}<__call__>}. 873 """ 874 newself = self.copy() 875 if name.endswith("*"): 876 name = name[:-1] 877 listAllMatches=True 878 newself.resultsName = name 879 newself.modalResults = not listAllMatches 880 return newself
881
882 - def setBreak(self,breakFlag = True):
883 """Method to invoke the Python pdb debugger when this element is 884 about to be parsed. Set C{breakFlag} to True to enable, False to 885 disable. 886 """ 887 if breakFlag: 888 _parseMethod = self._parse 889 def breaker(instring, loc, doActions=True, callPreParse=True): 890 import pdb 891 pdb.set_trace() 892 return _parseMethod( instring, loc, doActions, callPreParse )
893 breaker._originalParseMethod = _parseMethod 894 self._parse = breaker 895 else: 896 if hasattr(self._parse,"_originalParseMethod"): 897 self._parse = self._parse._originalParseMethod 898 return self
899
900 - def setParseAction( self, *fns, **kwargs ):
901 """Define action to perform when successfully matching parse element definition. 902 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 903 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 904 - s = the original string being parsed (see note below) 905 - loc = the location of the matching substring 906 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 907 If the functions in fns modify the tokens, they can return them as the return 908 value from fn, and the modified list of tokens will replace the original. 909 Otherwise, fn does not need to return any value. 910 911 Note: the default parsing behavior is to expand tabs in the input string 912 before starting the parsing process. See L{I{parseString}<parseString>} for more information 913 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 914 consistent view of the parsed string, the parse location, and line and column 915 positions within the parsed string. 916 """ 917 self.parseAction = list(map(_trim_arity, list(fns))) 918 self.callDuringTry = kwargs.get("callDuringTry", False) 919 return self
920
921 - def addParseAction( self, *fns, **kwargs ):
922 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 923 self.parseAction += list(map(_trim_arity, list(fns))) 924 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 925 return self
926
927 - def addCondition(self, *fns, **kwargs):
928 """Add a boolean predicate function to expression's list of parse actions. See 929 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 930 be used to define a custom message to be used in the raised exception.""" 931 msg = kwargs.get("message") or "failed user-defined condition" 932 for fn in fns: 933 def pa(s,l,t): 934 if not bool(_trim_arity(fn)(s,l,t)): 935 raise ParseException(s,l,msg) 936 return t
937 self.parseAction.append(pa) 938 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 939 return self 940
941 - def setFailAction( self, fn ):
942 """Define action to perform if parsing fails at this expression. 943 Fail acton fn is a callable function that takes the arguments 944 C{fn(s,loc,expr,err)} where: 945 - s = string being parsed 946 - loc = location where expression match was attempted and failed 947 - expr = the parse expression that failed 948 - err = the exception thrown 949 The function returns no value. It may throw C{L{ParseFatalException}} 950 if it is desired to stop parsing immediately.""" 951 self.failAction = fn 952 return self
953
954 - def _skipIgnorables( self, instring, loc ):
955 exprsFound = True 956 while exprsFound: 957 exprsFound = False 958 for e in self.ignoreExprs: 959 try: 960 while 1: 961 loc,dummy = e._parse( instring, loc ) 962 exprsFound = True 963 except ParseException: 964 pass 965 return loc
966
967 - def preParse( self, instring, loc ):
968 if self.ignoreExprs: 969 loc = self._skipIgnorables( instring, loc ) 970 971 if self.skipWhitespace: 972 wt = self.whiteChars 973 instrlen = len(instring) 974 while loc < instrlen and instring[loc] in wt: 975 loc += 1 976 977 return loc
978
979 - def parseImpl( self, instring, loc, doActions=True ):
980 return loc, []
981
982 - def postParse( self, instring, loc, tokenlist ):
983 return tokenlist
984 985 #~ @profile
986 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
987 debugging = ( self.debug ) #and doActions ) 988 989 if debugging or self.failAction: 990 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 991 if (self.debugActions[0] ): 992 self.debugActions[0]( instring, loc, self ) 993 if callPreParse and self.callPreparse: 994 preloc = self.preParse( instring, loc ) 995 else: 996 preloc = loc 997 tokensStart = preloc 998 try: 999 try: 1000 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1001 except IndexError: 1002 raise ParseException( instring, len(instring), self.errmsg, self ) 1003 except ParseBaseException as err: 1004 #~ print ("Exception raised:", err) 1005 if self.debugActions[2]: 1006 self.debugActions[2]( instring, tokensStart, self, err ) 1007 if self.failAction: 1008 self.failAction( instring, tokensStart, self, err ) 1009 raise 1010 else: 1011 if callPreParse and self.callPreparse: 1012 preloc = self.preParse( instring, loc ) 1013 else: 1014 preloc = loc 1015 tokensStart = preloc 1016 if self.mayIndexError or loc >= len(instring): 1017 try: 1018 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1019 except IndexError: 1020 raise ParseException( instring, len(instring), self.errmsg, self ) 1021 else: 1022 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1023 1024 tokens = self.postParse( instring, loc, tokens ) 1025 1026 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1027 if self.parseAction and (doActions or self.callDuringTry): 1028 if debugging: 1029 try: 1030 for fn in self.parseAction: 1031 tokens = fn( instring, tokensStart, retTokens ) 1032 if tokens is not None: 1033 retTokens = ParseResults( tokens, 1034 self.resultsName, 1035 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1036 modal=self.modalResults ) 1037 except ParseBaseException as err: 1038 #~ print "Exception raised in user parse action:", err 1039 if (self.debugActions[2] ): 1040 self.debugActions[2]( instring, tokensStart, self, err ) 1041 raise 1042 else: 1043 for fn in self.parseAction: 1044 tokens = fn( instring, tokensStart, retTokens ) 1045 if tokens is not None: 1046 retTokens = ParseResults( tokens, 1047 self.resultsName, 1048 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1049 modal=self.modalResults ) 1050 1051 if debugging: 1052 #~ print ("Matched",self,"->",retTokens.asList()) 1053 if (self.debugActions[1] ): 1054 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1055 1056 return loc, retTokens
1057
1058 - def tryParse( self, instring, loc ):
1059 try: 1060 return self._parse( instring, loc, doActions=False )[0] 1061 except ParseFatalException: 1062 raise ParseException( instring, loc, self.errmsg, self)
1063
1064 - def canParseNext(self, instring, loc):
1065 try: 1066 self.tryParse(instring, loc) 1067 except (ParseException, IndexError): 1068 return False 1069 else: 1070 return True
1071 1072 # this method gets repeatedly called during backtracking with the same arguments - 1073 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1074 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1075 lookup = (self,instring,loc,callPreParse,doActions) 1076 if lookup in ParserElement._exprArgCache: 1077 value = ParserElement._exprArgCache[ lookup ] 1078 if isinstance(value, Exception): 1079 raise value 1080 return (value[0],value[1].copy()) 1081 else: 1082 try: 1083 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1084 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1085 return value 1086 except ParseBaseException as pe: 1087 pe.__traceback__ = None 1088 ParserElement._exprArgCache[ lookup ] = pe 1089 raise
1090 1091 _parse = _parseNoCache 1092 1093 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1094 _exprArgCache = {} 1095 @staticmethod
1096 - def resetCache():
1097 ParserElement._exprArgCache.clear()
1098 1099 _packratEnabled = False 1100 @staticmethod
1101 - def enablePackrat():
1102 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1103 Repeated parse attempts at the same string location (which happens 1104 often in many complex grammars) can immediately return a cached value, 1105 instead of re-executing parsing/validating code. Memoizing is done of 1106 both valid results and parsing exceptions. 1107 1108 This speedup may break existing programs that use parse actions that 1109 have side-effects. For this reason, packrat parsing is disabled when 1110 you first import pyparsing. To activate the packrat feature, your 1111 program must call the class method C{ParserElement.enablePackrat()}. If 1112 your program uses C{psyco} to "compile as you go", you must call 1113 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1114 Python will crash. For best results, call C{enablePackrat()} immediately 1115 after importing pyparsing. 1116 """ 1117 if not ParserElement._packratEnabled: 1118 ParserElement._packratEnabled = True 1119 ParserElement._parse = ParserElement._parseCache
1120
1121 - def parseString( self, instring, parseAll=False ):
1122 """Execute the parse expression with the given string. 1123 This is the main interface to the client code, once the complete 1124 expression has been built. 1125 1126 If you want the grammar to require that the entire input string be 1127 successfully parsed, then set C{parseAll} to True (equivalent to ending 1128 the grammar with C{L{StringEnd()}}). 1129 1130 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1131 in order to report proper column numbers in parse actions. 1132 If the input string contains tabs and 1133 the grammar uses parse actions that use the C{loc} argument to index into the 1134 string being parsed, you can ensure you have a consistent view of the input 1135 string by: 1136 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1137 (see L{I{parseWithTabs}<parseWithTabs>}) 1138 - define your parse action using the full C{(s,loc,toks)} signature, and 1139 reference the input string using the parse action's C{s} argument 1140 - explictly expand the tabs in your input string before calling 1141 C{parseString} 1142 """ 1143 ParserElement.resetCache() 1144 if not self.streamlined: 1145 self.streamline() 1146 #~ self.saveAsList = True 1147 for e in self.ignoreExprs: 1148 e.streamline() 1149 if not self.keepTabs: 1150 instring = instring.expandtabs() 1151 try: 1152 loc, tokens = self._parse( instring, 0 ) 1153 if parseAll: 1154 loc = self.preParse( instring, loc ) 1155 se = Empty() + StringEnd() 1156 se._parse( instring, loc ) 1157 except ParseBaseException as exc: 1158 if ParserElement.verbose_stacktrace: 1159 raise 1160 else: 1161 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1162 raise exc 1163 else: 1164 return tokens
1165
1166 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1167 """Scan the input string for expression matches. Each match will return the 1168 matching tokens, start location, and end location. May be called with optional 1169 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1170 C{overlap} is specified, then overlapping matches will be reported. 1171 1172 Note that the start and end locations are reported relative to the string 1173 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1174 strings with embedded tabs.""" 1175 if not self.streamlined: 1176 self.streamline() 1177 for e in self.ignoreExprs: 1178 e.streamline() 1179 1180 if not self.keepTabs: 1181 instring = _ustr(instring).expandtabs() 1182 instrlen = len(instring) 1183 loc = 0 1184 preparseFn = self.preParse 1185 parseFn = self._parse 1186 ParserElement.resetCache() 1187 matches = 0 1188 try: 1189 while loc <= instrlen and matches < maxMatches: 1190 try: 1191 preloc = preparseFn( instring, loc ) 1192 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1193 except ParseException: 1194 loc = preloc+1 1195 else: 1196 if nextLoc > loc: 1197 matches += 1 1198 yield tokens, preloc, nextLoc 1199 if overlap: 1200 nextloc = preparseFn( instring, loc ) 1201 if nextloc > loc: 1202 loc = nextLoc 1203 else: 1204 loc += 1 1205 else: 1206 loc = nextLoc 1207 else: 1208 loc = preloc+1 1209 except ParseBaseException as exc: 1210 if ParserElement.verbose_stacktrace: 1211 raise 1212 else: 1213 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1214 raise exc
1215
1216 - def transformString( self, instring ):
1217 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1218 be returned from a parse action. To use C{transformString}, define a grammar and 1219 attach a parse action to it that modifies the returned token list. 1220 Invoking C{transformString()} on a target string will then scan for matches, 1221 and replace the matched text patterns according to the logic in the parse 1222 action. C{transformString()} returns the resulting transformed string.""" 1223 out = [] 1224 lastE = 0 1225 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1226 # keep string locs straight between transformString and scanString 1227 self.keepTabs = True 1228 try: 1229 for t,s,e in self.scanString( instring ): 1230 out.append( instring[lastE:s] ) 1231 if t: 1232 if isinstance(t,ParseResults): 1233 out += t.asList() 1234 elif isinstance(t,list): 1235 out += t 1236 else: 1237 out.append(t) 1238 lastE = e 1239 out.append(instring[lastE:]) 1240 out = [o for o in out if o] 1241 return "".join(map(_ustr,_flatten(out))) 1242 except ParseBaseException as exc: 1243 if ParserElement.verbose_stacktrace: 1244 raise 1245 else: 1246 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1247 raise exc
1248
1249 - def searchString( self, instring, maxMatches=_MAX_INT ):
1250 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1251 to match the given parse expression. May be called with optional 1252 C{maxMatches} argument, to clip searching after 'n' matches are found. 1253 """ 1254 try: 1255 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1256 except ParseBaseException as exc: 1257 if ParserElement.verbose_stacktrace: 1258 raise 1259 else: 1260 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1261 raise exc
1262
1263 - def __add__(self, other ):
1264 """Implementation of + operator - returns C{L{And}}""" 1265 if isinstance( other, basestring ): 1266 other = ParserElement.literalStringClass( other ) 1267 if not isinstance( other, ParserElement ): 1268 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1269 SyntaxWarning, stacklevel=2) 1270 return None 1271 return And( [ self, other ] )
1272
1273 - def __radd__(self, other ):
1274 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1275 if isinstance( other, basestring ): 1276 other = ParserElement.literalStringClass( other ) 1277 if not isinstance( other, ParserElement ): 1278 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1279 SyntaxWarning, stacklevel=2) 1280 return None 1281 return other + self
1282
1283 - def __sub__(self, other):
1284 """Implementation of - operator, returns C{L{And}} with error stop""" 1285 if isinstance( other, basestring ): 1286 other = ParserElement.literalStringClass( other ) 1287 if not isinstance( other, ParserElement ): 1288 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1289 SyntaxWarning, stacklevel=2) 1290 return None 1291 return And( [ self, And._ErrorStop(), other ] )
1292
1293 - def __rsub__(self, other ):
1294 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1295 if isinstance( other, basestring ): 1296 other = ParserElement.literalStringClass( other ) 1297 if not isinstance( other, ParserElement ): 1298 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1299 SyntaxWarning, stacklevel=2) 1300 return None 1301 return other - self
1302
1303 - def __mul__(self,other):
1304 """Implementation of * operator, allows use of C{expr * 3} in place of 1305 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1306 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1307 may also include C{None} as in: 1308 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1309 to C{expr*n + L{ZeroOrMore}(expr)} 1310 (read as "at least n instances of C{expr}") 1311 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1312 (read as "0 to n instances of C{expr}") 1313 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1314 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1315 1316 Note that C{expr*(None,n)} does not raise an exception if 1317 more than n exprs exist in the input stream; that is, 1318 C{expr*(None,n)} does not enforce a maximum number of expr 1319 occurrences. If this behavior is desired, then write 1320 C{expr*(None,n) + ~expr} 1321 1322 """ 1323 if isinstance(other,int): 1324 minElements, optElements = other,0 1325 elif isinstance(other,tuple): 1326 other = (other + (None, None))[:2] 1327 if other[0] is None: 1328 other = (0, other[1]) 1329 if isinstance(other[0],int) and other[1] is None: 1330 if other[0] == 0: 1331 return ZeroOrMore(self) 1332 if other[0] == 1: 1333 return OneOrMore(self) 1334 else: 1335 return self*other[0] + ZeroOrMore(self) 1336 elif isinstance(other[0],int) and isinstance(other[1],int): 1337 minElements, optElements = other 1338 optElements -= minElements 1339 else: 1340 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1341 else: 1342 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1343 1344 if minElements < 0: 1345 raise ValueError("cannot multiply ParserElement by negative value") 1346 if optElements < 0: 1347 raise ValueError("second tuple value must be greater or equal to first tuple value") 1348 if minElements == optElements == 0: 1349 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1350 1351 if (optElements): 1352 def makeOptionalList(n): 1353 if n>1: 1354 return Optional(self + makeOptionalList(n-1)) 1355 else: 1356 return Optional(self)
1357 if minElements: 1358 if minElements == 1: 1359 ret = self + makeOptionalList(optElements) 1360 else: 1361 ret = And([self]*minElements) + makeOptionalList(optElements) 1362 else: 1363 ret = makeOptionalList(optElements) 1364 else: 1365 if minElements == 1: 1366 ret = self 1367 else: 1368 ret = And([self]*minElements) 1369 return ret 1370
1371 - def __rmul__(self, other):
1372 return self.__mul__(other)
1373
1374 - def __or__(self, other ):
1375 """Implementation of | operator - returns C{L{MatchFirst}}""" 1376 if isinstance( other, basestring ): 1377 other = ParserElement.literalStringClass( other ) 1378 if not isinstance( other, ParserElement ): 1379 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1380 SyntaxWarning, stacklevel=2) 1381 return None 1382 return MatchFirst( [ self, other ] )
1383
1384 - def __ror__(self, other ):
1385 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1386 if isinstance( other, basestring ): 1387 other = ParserElement.literalStringClass( other ) 1388 if not isinstance( other, ParserElement ): 1389 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1390 SyntaxWarning, stacklevel=2) 1391 return None 1392 return other | self
1393
1394 - def __xor__(self, other ):
1395 """Implementation of ^ operator - returns C{L{Or}}""" 1396 if isinstance( other, basestring ): 1397 other = ParserElement.literalStringClass( other ) 1398 if not isinstance( other, ParserElement ): 1399 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1400 SyntaxWarning, stacklevel=2) 1401 return None 1402 return Or( [ self, other ] )
1403
1404 - def __rxor__(self, other ):
1405 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1406 if isinstance( other, basestring ): 1407 other = ParserElement.literalStringClass( other ) 1408 if not isinstance( other, ParserElement ): 1409 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1410 SyntaxWarning, stacklevel=2) 1411 return None 1412 return other ^ self
1413
1414 - def __and__(self, other ):
1415 """Implementation of & operator - returns C{L{Each}}""" 1416 if isinstance( other, basestring ): 1417 other = ParserElement.literalStringClass( other ) 1418 if not isinstance( other, ParserElement ): 1419 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1420 SyntaxWarning, stacklevel=2) 1421 return None 1422 return Each( [ self, other ] )
1423
1424 - def __rand__(self, other ):
1425 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1426 if isinstance( other, basestring ): 1427 other = ParserElement.literalStringClass( other ) 1428 if not isinstance( other, ParserElement ): 1429 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1430 SyntaxWarning, stacklevel=2) 1431 return None 1432 return other & self
1433
1434 - def __invert__( self ):
1435 """Implementation of ~ operator - returns C{L{NotAny}}""" 1436 return NotAny( self )
1437
1438 - def __call__(self, name=None):
1439 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1440 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1441 could be written as:: 1442 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1443 1444 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1445 passed as C{True}. 1446 1447 If C{name} is omitted, same as calling C{L{copy}}. 1448 """ 1449 if name is not None: 1450 return self.setResultsName(name) 1451 else: 1452 return self.copy()
1453
1454 - def suppress( self ):
1455 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1456 cluttering up returned output. 1457 """ 1458 return Suppress( self )
1459
1460 - def leaveWhitespace( self ):
1461 """Disables the skipping of whitespace before matching the characters in the 1462 C{ParserElement}'s defined pattern. This is normally only used internally by 1463 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1464 """ 1465 self.skipWhitespace = False 1466 return self
1467
1468 - def setWhitespaceChars( self, chars ):
1469 """Overrides the default whitespace chars 1470 """ 1471 self.skipWhitespace = True 1472 self.whiteChars = chars 1473 self.copyDefaultWhiteChars = False 1474 return self
1475
1476 - def parseWithTabs( self ):
1477 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1478 Must be called before C{parseString} when the input grammar contains elements that 1479 match C{<TAB>} characters.""" 1480 self.keepTabs = True 1481 return self
1482
1483 - def ignore( self, other ):
1484 """Define expression to be ignored (e.g., comments) while doing pattern 1485 matching; may be called repeatedly, to define multiple comment or other 1486 ignorable patterns. 1487 """ 1488 if isinstance(other, basestring): 1489 other = Suppress(other) 1490 1491 if isinstance( other, Suppress ): 1492 if other not in self.ignoreExprs: 1493 self.ignoreExprs.append(other) 1494 else: 1495 self.ignoreExprs.append( Suppress( other.copy() ) ) 1496 return self
1497
1498 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1499 """Enable display of debugging messages while doing pattern matching.""" 1500 self.debugActions = (startAction or _defaultStartDebugAction, 1501 successAction or _defaultSuccessDebugAction, 1502 exceptionAction or _defaultExceptionDebugAction) 1503 self.debug = True 1504 return self
1505
1506 - def setDebug( self, flag=True ):
1507 """Enable display of debugging messages while doing pattern matching. 1508 Set C{flag} to True to enable, False to disable.""" 1509 if flag: 1510 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1511 else: 1512 self.debug = False 1513 return self
1514
1515 - def __str__( self ):
1516 return self.name
1517
1518 - def __repr__( self ):
1519 return _ustr(self)
1520
1521 - def streamline( self ):
1522 self.streamlined = True 1523 self.strRepr = None 1524 return self
1525
1526 - def checkRecursion( self, parseElementList ):
1527 pass
1528
1529 - def validate( self, validateTrace=[] ):
1530 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1531 self.checkRecursion( [] )
1532
1533 - def parseFile( self, file_or_filename, parseAll=False ):
1534 """Execute the parse expression on the given file or filename. 1535 If a filename is specified (instead of a file object), 1536 the entire file is opened, read, and closed before parsing. 1537 """ 1538 try: 1539 file_contents = file_or_filename.read() 1540 except AttributeError: 1541 f = open(file_or_filename, "r") 1542 file_contents = f.read() 1543 f.close() 1544 try: 1545 return self.parseString(file_contents, parseAll) 1546 except ParseBaseException as exc: 1547 if ParserElement.verbose_stacktrace: 1548 raise 1549 else: 1550 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1551 raise exc
1552
1553 - def __eq__(self,other):
1554 if isinstance(other, ParserElement): 1555 return self is other or self.__dict__ == other.__dict__ 1556 elif isinstance(other, basestring): 1557 try: 1558 self.parseString(_ustr(other), parseAll=True) 1559 return True 1560 except ParseBaseException: 1561 return False 1562 else: 1563 return super(ParserElement,self)==other
1564
1565 - def __ne__(self,other):
1566 return not (self == other)
1567
1568 - def __hash__(self):
1569 return hash(id(self))
1570
1571 - def __req__(self,other):
1572 return self == other
1573
1574 - def __rne__(self,other):
1575 return not (self == other)
1576
1577 - def runTests(self, tests, parseAll=False):
1578 """Execute the parse expression on a series of test strings, showing each 1579 test, the parsed results or where the parse failed. Quick and easy way to 1580 run a parse expression against a list of sample strings. 1581 1582 Parameters: 1583 - tests - a list of separate test strings, or a multiline string of test strings 1584 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1585 """ 1586 if isinstance(tests, basestring): 1587 tests = map(str.strip, tests.splitlines()) 1588 for t in tests: 1589 out = [t] 1590 try: 1591 out.append(self.parseString(t, parseAll=parseAll).dump()) 1592 except ParseException as pe: 1593 if '\n' in t: 1594 out.append(line(pe.loc, t)) 1595 out.append(' '*(col(pe.loc,t)-1) + '^') 1596 else: 1597 out.append(' '*pe.loc + '^') 1598 out.append(str(pe)) 1599 out.append('') 1600 print('\n'.join(out))
1601
1602 1603 -class Token(ParserElement):
1604 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1605 - def __init__( self ):
1606 super(Token,self).__init__( savelist=False )
1607
1608 1609 -class Empty(Token):
1610 """An empty token, will always match."""
1611 - def __init__( self ):
1612 super(Empty,self).__init__() 1613 self.name = "Empty" 1614 self.mayReturnEmpty = True 1615 self.mayIndexError = False
1616
1617 1618 -class NoMatch(Token):
1619 """A token that will never match."""
1620 - def __init__( self ):
1621 super(NoMatch,self).__init__() 1622 self.name = "NoMatch" 1623 self.mayReturnEmpty = True 1624 self.mayIndexError = False 1625 self.errmsg = "Unmatchable token"
1626
1627 - def parseImpl( self, instring, loc, doActions=True ):
1628 raise ParseException(instring, loc, self.errmsg, self)
1629
1630 1631 -class Literal(Token):
1632 """Token to exactly match a specified string."""
1633 - def __init__( self, matchString ):
1634 super(Literal,self).__init__() 1635 self.match = matchString 1636 self.matchLen = len(matchString) 1637 try: 1638 self.firstMatchChar = matchString[0] 1639 except IndexError: 1640 warnings.warn("null string passed to Literal; use Empty() instead", 1641 SyntaxWarning, stacklevel=2) 1642 self.__class__ = Empty 1643 self.name = '"%s"' % _ustr(self.match) 1644 self.errmsg = "Expected " + self.name 1645 self.mayReturnEmpty = False 1646 self.mayIndexError = False
1647 1648 # Performance tuning: this routine gets called a *lot* 1649 # if this is a single character match string and the first character matches, 1650 # short-circuit as quickly as possible, and avoid calling startswith 1651 #~ @profile
1652 - def parseImpl( self, instring, loc, doActions=True ):
1653 if (instring[loc] == self.firstMatchChar and 1654 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1655 return loc+self.matchLen, self.match 1656 raise ParseException(instring, loc, self.errmsg, self)
1657 _L = Literal 1658 ParserElement.literalStringClass = Literal
1659 1660 -class Keyword(Token):
1661 """Token to exactly match a specified string as a keyword, that is, it must be 1662 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1663 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1664 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1665 Accepts two optional constructor arguments in addition to the keyword string: 1666 C{identChars} is a string of characters that would be valid identifier characters, 1667 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1668 matching, default is C{False}. 1669 """ 1670 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1671
1672 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1673 super(Keyword,self).__init__() 1674 self.match = matchString 1675 self.matchLen = len(matchString) 1676 try: 1677 self.firstMatchChar = matchString[0] 1678 except IndexError: 1679 warnings.warn("null string passed to Keyword; use Empty() instead", 1680 SyntaxWarning, stacklevel=2) 1681 self.name = '"%s"' % self.match 1682 self.errmsg = "Expected " + self.name 1683 self.mayReturnEmpty = False 1684 self.mayIndexError = False 1685 self.caseless = caseless 1686 if caseless: 1687 self.caselessmatch = matchString.upper() 1688 identChars = identChars.upper() 1689 self.identChars = set(identChars)
1690
1691 - def parseImpl( self, instring, loc, doActions=True ):
1692 if self.caseless: 1693 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1694 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1695 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1696 return loc+self.matchLen, self.match 1697 else: 1698 if (instring[loc] == self.firstMatchChar and 1699 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1700 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1701 (loc == 0 or instring[loc-1] not in self.identChars) ): 1702 return loc+self.matchLen, self.match 1703 raise ParseException(instring, loc, self.errmsg, self)
1704
1705 - def copy(self):
1706 c = super(Keyword,self).copy() 1707 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1708 return c
1709 1710 @staticmethod
1711 - def setDefaultKeywordChars( chars ):
1712 """Overrides the default Keyword chars 1713 """ 1714 Keyword.DEFAULT_KEYWORD_CHARS = chars
1715
1716 -class CaselessLiteral(Literal):
1717 """Token to match a specified string, ignoring case of letters. 1718 Note: the matched results will always be in the case of the given 1719 match string, NOT the case of the input text. 1720 """
1721 - def __init__( self, matchString ):
1722 super(CaselessLiteral,self).__init__( matchString.upper() ) 1723 # Preserve the defining literal. 1724 self.returnString = matchString 1725 self.name = "'%s'" % self.returnString 1726 self.errmsg = "Expected " + self.name
1727
1728 - def parseImpl( self, instring, loc, doActions=True ):
1729 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1730 return loc+self.matchLen, self.returnString 1731 raise ParseException(instring, loc, self.errmsg, self)
1732
1733 -class CaselessKeyword(Keyword):
1734 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1735 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1736
1737 - def parseImpl( self, instring, loc, doActions=True ):
1738 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1739 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1740 return loc+self.matchLen, self.match 1741 raise ParseException(instring, loc, self.errmsg, self)
1742
1743 -class Word(Token):
1744 """Token for matching words composed of allowed character sets. 1745 Defined with string containing all allowed initial characters, 1746 an optional string containing allowed body characters (if omitted, 1747 defaults to the initial character set), and an optional minimum, 1748 maximum, and/or exact length. The default value for C{min} is 1 (a 1749 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1750 are 0, meaning no maximum or exact length restriction. An optional 1751 C{excludeChars} parameter can list characters that might be found in 1752 the input C{bodyChars} string; useful to define a word of all printables 1753 except for one or two characters, for instance. 1754 """
1755 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1756 super(Word,self).__init__() 1757 if excludeChars: 1758 initChars = ''.join(c for c in initChars if c not in excludeChars) 1759 if bodyChars: 1760 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1761 self.initCharsOrig = initChars 1762 self.initChars = set(initChars) 1763 if bodyChars : 1764 self.bodyCharsOrig = bodyChars 1765 self.bodyChars = set(bodyChars) 1766 else: 1767 self.bodyCharsOrig = initChars 1768 self.bodyChars = set(initChars) 1769 1770 self.maxSpecified = max > 0 1771 1772 if min < 1: 1773 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1774 1775 self.minLen = min 1776 1777 if max > 0: 1778 self.maxLen = max 1779 else: 1780 self.maxLen = _MAX_INT 1781 1782 if exact > 0: 1783 self.maxLen = exact 1784 self.minLen = exact 1785 1786 self.name = _ustr(self) 1787 self.errmsg = "Expected " + self.name 1788 self.mayIndexError = False 1789 self.asKeyword = asKeyword 1790 1791 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1792 if self.bodyCharsOrig == self.initCharsOrig: 1793 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1794 elif len(self.initCharsOrig) == 1: 1795 self.reString = "%s[%s]*" % \ 1796 (re.escape(self.initCharsOrig), 1797 _escapeRegexRangeChars(self.bodyCharsOrig),) 1798 else: 1799 self.reString = "[%s][%s]*" % \ 1800 (_escapeRegexRangeChars(self.initCharsOrig), 1801 _escapeRegexRangeChars(self.bodyCharsOrig),) 1802 if self.asKeyword: 1803 self.reString = r"\b"+self.reString+r"\b" 1804 try: 1805 self.re = re.compile( self.reString ) 1806 except: 1807 self.re = None
1808
1809 - def parseImpl( self, instring, loc, doActions=True ):
1810 if self.re: 1811 result = self.re.match(instring,loc) 1812 if not result: 1813 raise ParseException(instring, loc, self.errmsg, self) 1814 1815 loc = result.end() 1816 return loc, result.group() 1817 1818 if not(instring[ loc ] in self.initChars): 1819 raise ParseException(instring, loc, self.errmsg, self) 1820 1821 start = loc 1822 loc += 1 1823 instrlen = len(instring) 1824 bodychars = self.bodyChars 1825 maxloc = start + self.maxLen 1826 maxloc = min( maxloc, instrlen ) 1827 while loc < maxloc and instring[loc] in bodychars: 1828 loc += 1 1829 1830 throwException = False 1831 if loc - start < self.minLen: 1832 throwException = True 1833 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1834 throwException = True 1835 if self.asKeyword: 1836 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1837 throwException = True 1838 1839 if throwException: 1840 raise ParseException(instring, loc, self.errmsg, self) 1841 1842 return loc, instring[start:loc]
1843
1844 - def __str__( self ):
1845 try: 1846 return super(Word,self).__str__() 1847 except: 1848 pass 1849 1850 1851 if self.strRepr is None: 1852 1853 def charsAsStr(s): 1854 if len(s)>4: 1855 return s[:4]+"..." 1856 else: 1857 return s
1858 1859 if ( self.initCharsOrig != self.bodyCharsOrig ): 1860 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1861 else: 1862 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1863 1864 return self.strRepr
1865
1866 1867 -class Regex(Token):
1868 """Token for matching strings that match a given regular expression. 1869 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1870 """ 1871 compiledREtype = type(re.compile("[A-Z]"))
1872 - def __init__( self, pattern, flags=0):
1873 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1874 super(Regex,self).__init__() 1875 1876 if isinstance(pattern, basestring): 1877 if not pattern: 1878 warnings.warn("null string passed to Regex; use Empty() instead", 1879 SyntaxWarning, stacklevel=2) 1880 1881 self.pattern = pattern 1882 self.flags = flags 1883 1884 try: 1885 self.re = re.compile(self.pattern, self.flags) 1886 self.reString = self.pattern 1887 except sre_constants.error: 1888 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1889 SyntaxWarning, stacklevel=2) 1890 raise 1891 1892 elif isinstance(pattern, Regex.compiledREtype): 1893 self.re = pattern 1894 self.pattern = \ 1895 self.reString = str(pattern) 1896 self.flags = flags 1897 1898 else: 1899 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1900 1901 self.name = _ustr(self) 1902 self.errmsg = "Expected " + self.name 1903 self.mayIndexError = False 1904 self.mayReturnEmpty = True
1905
1906 - def parseImpl( self, instring, loc, doActions=True ):
1907 result = self.re.match(instring,loc) 1908 if not result: 1909 raise ParseException(instring, loc, self.errmsg, self) 1910 1911 loc = result.end() 1912 d = result.groupdict() 1913 ret = ParseResults(result.group()) 1914 if d: 1915 for k in d: 1916 ret[k] = d[k] 1917 return loc,ret
1918
1919 - def __str__( self ):
1920 try: 1921 return super(Regex,self).__str__() 1922 except: 1923 pass 1924 1925 if self.strRepr is None: 1926 self.strRepr = "Re:(%s)" % repr(self.pattern) 1927 1928 return self.strRepr
1929
1930 1931 -class QuotedString(Token):
1932 """Token for matching strings that are delimited by quoting characters. 1933 """
1934 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1935 """ 1936 Defined with the following parameters: 1937 - quoteChar - string of one or more characters defining the quote delimiting string 1938 - escChar - character to escape quotes, typically backslash (default=None) 1939 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1940 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1941 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1942 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1943 """ 1944 super(QuotedString,self).__init__() 1945 1946 # remove white space from quote chars - wont work anyway 1947 quoteChar = quoteChar.strip() 1948 if not quoteChar: 1949 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1950 raise SyntaxError() 1951 1952 if endQuoteChar is None: 1953 endQuoteChar = quoteChar 1954 else: 1955 endQuoteChar = endQuoteChar.strip() 1956 if not endQuoteChar: 1957 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1958 raise SyntaxError() 1959 1960 self.quoteChar = quoteChar 1961 self.quoteCharLen = len(quoteChar) 1962 self.firstQuoteChar = quoteChar[0] 1963 self.endQuoteChar = endQuoteChar 1964 self.endQuoteCharLen = len(endQuoteChar) 1965 self.escChar = escChar 1966 self.escQuote = escQuote 1967 self.unquoteResults = unquoteResults 1968 1969 if multiline: 1970 self.flags = re.MULTILINE | re.DOTALL 1971 self.pattern = r'%s(?:[^%s%s]' % \ 1972 ( re.escape(self.quoteChar), 1973 _escapeRegexRangeChars(self.endQuoteChar[0]), 1974 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1975 else: 1976 self.flags = 0 1977 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1978 ( re.escape(self.quoteChar), 1979 _escapeRegexRangeChars(self.endQuoteChar[0]), 1980 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1981 if len(self.endQuoteChar) > 1: 1982 self.pattern += ( 1983 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1984 _escapeRegexRangeChars(self.endQuoteChar[i])) 1985 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1986 ) 1987 if escQuote: 1988 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1989 if escChar: 1990 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1991 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1992 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1993 1994 try: 1995 self.re = re.compile(self.pattern, self.flags) 1996 self.reString = self.pattern 1997 except sre_constants.error: 1998 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1999 SyntaxWarning, stacklevel=2) 2000 raise 2001 2002 self.name = _ustr(self) 2003 self.errmsg = "Expected " + self.name 2004 self.mayIndexError = False 2005 self.mayReturnEmpty = True
2006
2007 - def parseImpl( self, instring, loc, doActions=True ):
2008 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2009 if not result: 2010 raise ParseException(instring, loc, self.errmsg, self) 2011 2012 loc = result.end() 2013 ret = result.group() 2014 2015 if self.unquoteResults: 2016 2017 # strip off quotes 2018 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2019 2020 if isinstance(ret,basestring): 2021 # replace escaped characters 2022 if self.escChar: 2023 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2024 2025 # replace escaped quotes 2026 if self.escQuote: 2027 ret = ret.replace(self.escQuote, self.endQuoteChar) 2028 2029 return loc, ret
2030
2031 - def __str__( self ):
2032 try: 2033 return super(QuotedString,self).__str__() 2034 except: 2035 pass 2036 2037 if self.strRepr is None: 2038 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2039 2040 return self.strRepr
2041
2042 2043 -class CharsNotIn(Token):
2044 """Token for matching words composed of characters *not* in a given set. 2045 Defined with string containing all disallowed characters, and an optional 2046 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2047 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2048 are 0, meaning no maximum or exact length restriction. 2049 """
2050 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2051 super(CharsNotIn,self).__init__() 2052 self.skipWhitespace = False 2053 self.notChars = notChars 2054 2055 if min < 1: 2056 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2057 2058 self.minLen = min 2059 2060 if max > 0: 2061 self.maxLen = max 2062 else: 2063 self.maxLen = _MAX_INT 2064 2065 if exact > 0: 2066 self.maxLen = exact 2067 self.minLen = exact 2068 2069 self.name = _ustr(self) 2070 self.errmsg = "Expected " + self.name 2071 self.mayReturnEmpty = ( self.minLen == 0 ) 2072 self.mayIndexError = False
2073
2074 - def parseImpl( self, instring, loc, doActions=True ):
2075 if instring[loc] in self.notChars: 2076 raise ParseException(instring, loc, self.errmsg, self) 2077 2078 start = loc 2079 loc += 1 2080 notchars = self.notChars 2081 maxlen = min( start+self.maxLen, len(instring) ) 2082 while loc < maxlen and \ 2083 (instring[loc] not in notchars): 2084 loc += 1 2085 2086 if loc - start < self.minLen: 2087 raise ParseException(instring, loc, self.errmsg, self) 2088 2089 return loc, instring[start:loc]
2090
2091 - def __str__( self ):
2092 try: 2093 return super(CharsNotIn, self).__str__() 2094 except: 2095 pass 2096 2097 if self.strRepr is None: 2098 if len(self.notChars) > 4: 2099 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2100 else: 2101 self.strRepr = "!W:(%s)" % self.notChars 2102 2103 return self.strRepr
2104
2105 -class White(Token):
2106 """Special matching class for matching whitespace. Normally, whitespace is ignored 2107 by pyparsing grammars. This class is included when some whitespace structures 2108 are significant. Define with a string containing the whitespace characters to be 2109 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2110 as defined for the C{L{Word}} class.""" 2111 whiteStrs = { 2112 " " : "<SPC>", 2113 "\t": "<TAB>", 2114 "\n": "<LF>", 2115 "\r": "<CR>", 2116 "\f": "<FF>", 2117 }
2118 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2119 super(White,self).__init__() 2120 self.matchWhite = ws 2121 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2122 #~ self.leaveWhitespace() 2123 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2124 self.mayReturnEmpty = True 2125 self.errmsg = "Expected " + self.name 2126 2127 self.minLen = min 2128 2129 if max > 0: 2130 self.maxLen = max 2131 else: 2132 self.maxLen = _MAX_INT 2133 2134 if exact > 0: 2135 self.maxLen = exact 2136 self.minLen = exact
2137
2138 - def parseImpl( self, instring, loc, doActions=True ):
2139 if not(instring[ loc ] in self.matchWhite): 2140 raise ParseException(instring, loc, self.errmsg, self) 2141 start = loc 2142 loc += 1 2143 maxloc = start + self.maxLen 2144 maxloc = min( maxloc, len(instring) ) 2145 while loc < maxloc and instring[loc] in self.matchWhite: 2146 loc += 1 2147 2148 if loc - start < self.minLen: 2149 raise ParseException(instring, loc, self.errmsg, self) 2150 2151 return loc, instring[start:loc]
2152
2153 2154 -class _PositionToken(Token):
2155 - def __init__( self ):
2156 super(_PositionToken,self).__init__() 2157 self.name=self.__class__.__name__ 2158 self.mayReturnEmpty = True 2159 self.mayIndexError = False
2160
2161 -class GoToColumn(_PositionToken):
2162 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2163 - def __init__( self, colno ):
2164 super(GoToColumn,self).__init__() 2165 self.col = colno
2166
2167 - def preParse( self, instring, loc ):
2168 if col(loc,instring) != self.col: 2169 instrlen = len(instring) 2170 if self.ignoreExprs: 2171 loc = self._skipIgnorables( instring, loc ) 2172 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2173 loc += 1 2174 return loc
2175
2176 - def parseImpl( self, instring, loc, doActions=True ):
2177 thiscol = col( loc, instring ) 2178 if thiscol > self.col: 2179 raise ParseException( instring, loc, "Text not in expected column", self ) 2180 newloc = loc + self.col - thiscol 2181 ret = instring[ loc: newloc ] 2182 return newloc, ret
2183
2184 -class LineStart(_PositionToken):
2185 """Matches if current position is at the beginning of a line within the parse string"""
2186 - def __init__( self ):
2187 super(LineStart,self).__init__() 2188 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2189 self.errmsg = "Expected start of line"
2190
2191 - def preParse( self, instring, loc ):
2192 preloc = super(LineStart,self).preParse(instring,loc) 2193 if instring[preloc] == "\n": 2194 loc += 1 2195 return loc
2196
2197 - def parseImpl( self, instring, loc, doActions=True ):
2198 if not( loc==0 or 2199 (loc == self.preParse( instring, 0 )) or 2200 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2201 raise ParseException(instring, loc, self.errmsg, self) 2202 return loc, []
2203
2204 -class LineEnd(_PositionToken):
2205 """Matches if current position is at the end of a line within the parse string"""
2206 - def __init__( self ):
2207 super(LineEnd,self).__init__() 2208 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2209 self.errmsg = "Expected end of line"
2210
2211 - def parseImpl( self, instring, loc, doActions=True ):
2212 if loc<len(instring): 2213 if instring[loc] == "\n": 2214 return loc+1, "\n" 2215 else: 2216 raise ParseException(instring, loc, self.errmsg, self) 2217 elif loc == len(instring): 2218 return loc+1, [] 2219 else: 2220 raise ParseException(instring, loc, self.errmsg, self)
2221
2222 -class StringStart(_PositionToken):
2223 """Matches if current position is at the beginning of the parse string"""
2224 - def __init__( self ):
2225 super(StringStart,self).__init__() 2226 self.errmsg = "Expected start of text"
2227
2228 - def parseImpl( self, instring, loc, doActions=True ):
2229 if loc != 0: 2230 # see if entire string up to here is just whitespace and ignoreables 2231 if loc != self.preParse( instring, 0 ): 2232 raise ParseException(instring, loc, self.errmsg, self) 2233 return loc, []
2234
2235 -class StringEnd(_PositionToken):
2236 """Matches if current position is at the end of the parse string"""
2237 - def __init__( self ):
2238 super(StringEnd,self).__init__() 2239 self.errmsg = "Expected end of text"
2240
2241 - def parseImpl( self, instring, loc, doActions=True ):
2242 if loc < len(instring): 2243 raise ParseException(instring, loc, self.errmsg, self) 2244 elif loc == len(instring): 2245 return loc+1, [] 2246 elif loc > len(instring): 2247 return loc, [] 2248 else: 2249 raise ParseException(instring, loc, self.errmsg, self)
2250
2251 -class WordStart(_PositionToken):
2252 """Matches if the current position is at the beginning of a Word, and 2253 is not preceded by any character in a given set of C{wordChars} 2254 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2255 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2256 the string being parsed, or at the beginning of a line. 2257 """
2258 - def __init__(self, wordChars = printables):
2259 super(WordStart,self).__init__() 2260 self.wordChars = set(wordChars) 2261 self.errmsg = "Not at the start of a word"
2262
2263 - def parseImpl(self, instring, loc, doActions=True ):
2264 if loc != 0: 2265 if (instring[loc-1] in self.wordChars or 2266 instring[loc] not in self.wordChars): 2267 raise ParseException(instring, loc, self.errmsg, self) 2268 return loc, []
2269
2270 -class WordEnd(_PositionToken):
2271 """Matches if the current position is at the end of a Word, and 2272 is not followed by any character in a given set of C{wordChars} 2273 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2274 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2275 the string being parsed, or at the end of a line. 2276 """
2277 - def __init__(self, wordChars = printables):
2278 super(WordEnd,self).__init__() 2279 self.wordChars = set(wordChars) 2280 self.skipWhitespace = False 2281 self.errmsg = "Not at the end of a word"
2282
2283 - def parseImpl(self, instring, loc, doActions=True ):
2284 instrlen = len(instring) 2285 if instrlen>0 and loc<instrlen: 2286 if (instring[loc] in self.wordChars or 2287 instring[loc-1] not in self.wordChars): 2288 raise ParseException(instring, loc, self.errmsg, self) 2289 return loc, []
2290
2291 2292 -class ParseExpression(ParserElement):
2293 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2294 - def __init__( self, exprs, savelist = False ):
2295 super(ParseExpression,self).__init__(savelist) 2296 if isinstance( exprs, _generatorType ): 2297 exprs = list(exprs) 2298 2299 if isinstance( exprs, basestring ): 2300 self.exprs = [ Literal( exprs ) ] 2301 elif isinstance( exprs, collections.Sequence ): 2302 # if sequence of strings provided, wrap with Literal 2303 if all(isinstance(expr, basestring) for expr in exprs): 2304 exprs = map(Literal, exprs) 2305 self.exprs = list(exprs) 2306 else: 2307 try: 2308 self.exprs = list( exprs ) 2309 except TypeError: 2310 self.exprs = [ exprs ] 2311 self.callPreparse = False
2312
2313 - def __getitem__( self, i ):
2314 return self.exprs[i]
2315
2316 - def append( self, other ):
2317 self.exprs.append( other ) 2318 self.strRepr = None 2319 return self
2320
2321 - def leaveWhitespace( self ):
2322 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2323 all contained expressions.""" 2324 self.skipWhitespace = False 2325 self.exprs = [ e.copy() for e in self.exprs ] 2326 for e in self.exprs: 2327 e.leaveWhitespace() 2328 return self
2329
2330 - def ignore( self, other ):
2331 if isinstance( other, Suppress ): 2332 if other not in self.ignoreExprs: 2333 super( ParseExpression, self).ignore( other ) 2334 for e in self.exprs: 2335 e.ignore( self.ignoreExprs[-1] ) 2336 else: 2337 super( ParseExpression, self).ignore( other ) 2338 for e in self.exprs: 2339 e.ignore( self.ignoreExprs[-1] ) 2340 return self
2341
2342 - def __str__( self ):
2343 try: 2344 return super(ParseExpression,self).__str__() 2345 except: 2346 pass 2347 2348 if self.strRepr is None: 2349 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2350 return self.strRepr
2351
2352 - def streamline( self ):
2353 super(ParseExpression,self).streamline() 2354 2355 for e in self.exprs: 2356 e.streamline() 2357 2358 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2359 # but only if there are no parse actions or resultsNames on the nested And's 2360 # (likewise for Or's and MatchFirst's) 2361 if ( len(self.exprs) == 2 ): 2362 other = self.exprs[0] 2363 if ( isinstance( other, self.__class__ ) and 2364 not(other.parseAction) and 2365 other.resultsName is None and 2366 not other.debug ): 2367 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2368 self.strRepr = None 2369 self.mayReturnEmpty |= other.mayReturnEmpty 2370 self.mayIndexError |= other.mayIndexError 2371 2372 other = self.exprs[-1] 2373 if ( isinstance( other, self.__class__ ) and 2374 not(other.parseAction) and 2375 other.resultsName is None and 2376 not other.debug ): 2377 self.exprs = self.exprs[:-1] + other.exprs[:] 2378 self.strRepr = None 2379 self.mayReturnEmpty |= other.mayReturnEmpty 2380 self.mayIndexError |= other.mayIndexError 2381 2382 self.errmsg = "Expected " + _ustr(self) 2383 2384 return self
2385
2386 - def setResultsName( self, name, listAllMatches=False ):
2387 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2388 return ret
2389
2390 - def validate( self, validateTrace=[] ):
2391 tmp = validateTrace[:]+[self] 2392 for e in self.exprs: 2393 e.validate(tmp) 2394 self.checkRecursion( [] )
2395
2396 - def copy(self):
2397 ret = super(ParseExpression,self).copy() 2398 ret.exprs = [e.copy() for e in self.exprs] 2399 return ret
2400
2401 -class And(ParseExpression):
2402 """Requires all given C{ParseExpression}s to be found in the given order. 2403 Expressions may be separated by whitespace. 2404 May be constructed using the C{'+'} operator. 2405 """ 2406
2407 - class _ErrorStop(Empty):
2408 - def __init__(self, *args, **kwargs):
2409 super(And._ErrorStop,self).__init__(*args, **kwargs) 2410 self.name = '-' 2411 self.leaveWhitespace()
2412
2413 - def __init__( self, exprs, savelist = True ):
2414 super(And,self).__init__(exprs, savelist) 2415 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2416 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2417 self.skipWhitespace = self.exprs[0].skipWhitespace 2418 self.callPreparse = True
2419
2420 - def parseImpl( self, instring, loc, doActions=True ):
2421 # pass False as last arg to _parse for first element, since we already 2422 # pre-parsed the string as part of our And pre-parsing 2423 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2424 errorStop = False 2425 for e in self.exprs[1:]: 2426 if isinstance(e, And._ErrorStop): 2427 errorStop = True 2428 continue 2429 if errorStop: 2430 try: 2431 loc, exprtokens = e._parse( instring, loc, doActions ) 2432 except ParseSyntaxException: 2433 raise 2434 except ParseBaseException as pe: 2435 pe.__traceback__ = None 2436 raise ParseSyntaxException(pe) 2437 except IndexError: 2438 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2439 else: 2440 loc, exprtokens = e._parse( instring, loc, doActions ) 2441 if exprtokens or exprtokens.haskeys(): 2442 resultlist += exprtokens 2443 return loc, resultlist
2444
2445 - def __iadd__(self, other ):
2446 if isinstance( other, basestring ): 2447 other = Literal( other ) 2448 return self.append( other ) #And( [ self, other ] )
2449
2450 - def checkRecursion( self, parseElementList ):
2451 subRecCheckList = parseElementList[:] + [ self ] 2452 for e in self.exprs: 2453 e.checkRecursion( subRecCheckList ) 2454 if not e.mayReturnEmpty: 2455 break
2456
2457 - def __str__( self ):
2458 if hasattr(self,"name"): 2459 return self.name 2460 2461 if self.strRepr is None: 2462 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2463 2464 return self.strRepr
2465
2466 2467 -class Or(ParseExpression):
2468 """Requires that at least one C{ParseExpression} is found. 2469 If two expressions match, the expression that matches the longest string will be used. 2470 May be constructed using the C{'^'} operator. 2471 """
2472 - def __init__( self, exprs, savelist = False ):
2473 super(Or,self).__init__(exprs, savelist) 2474 if self.exprs: 2475 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2476 else: 2477 self.mayReturnEmpty = True
2478
2479 - def parseImpl( self, instring, loc, doActions=True ):
2480 maxExcLoc = -1 2481 maxException = None 2482 matches = [] 2483 for e in self.exprs: 2484 try: 2485 loc2 = e.tryParse( instring, loc ) 2486 except ParseException as err: 2487 err.__traceback__ = None 2488 if err.loc > maxExcLoc: 2489 maxException = err 2490 maxExcLoc = err.loc 2491 except IndexError: 2492 if len(instring) > maxExcLoc: 2493 maxException = ParseException(instring,len(instring),e.errmsg,self) 2494 maxExcLoc = len(instring) 2495 else: 2496 # save match among all matches, to retry longest to shortest 2497 matches.append((loc2, e)) 2498 2499 if matches: 2500 matches.sort(key=lambda x: -x[0]) 2501 for _,e in matches: 2502 try: 2503 return e._parse( instring, loc, doActions ) 2504 except ParseException as err: 2505 err.__traceback__ = None 2506 if err.loc > maxExcLoc: 2507 maxException = err 2508 maxExcLoc = err.loc 2509 2510 if maxException is not None: 2511 maxException.msg = self.errmsg 2512 raise maxException 2513 else: 2514 raise ParseException(instring, loc, "no defined alternatives to match", self)
2515 2516
2517 - def __ixor__(self, other ):
2518 if isinstance( other, basestring ): 2519 other = ParserElement.literalStringClass( other ) 2520 return self.append( other ) #Or( [ self, other ] )
2521
2522 - def __str__( self ):
2523 if hasattr(self,"name"): 2524 return self.name 2525 2526 if self.strRepr is None: 2527 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2528 2529 return self.strRepr
2530
2531 - def checkRecursion( self, parseElementList ):
2532 subRecCheckList = parseElementList[:] + [ self ] 2533 for e in self.exprs: 2534 e.checkRecursion( subRecCheckList )
2535
2536 2537 -class MatchFirst(ParseExpression):
2538 """Requires that at least one C{ParseExpression} is found. 2539 If two expressions match, the first one listed is the one that will match. 2540 May be constructed using the C{'|'} operator. 2541 """
2542 - def __init__( self, exprs, savelist = False ):
2543 super(MatchFirst,self).__init__(exprs, savelist) 2544 if self.exprs: 2545 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2546 else: 2547 self.mayReturnEmpty = True
2548
2549 - def parseImpl( self, instring, loc, doActions=True ):
2550 maxExcLoc = -1 2551 maxException = None 2552 for e in self.exprs: 2553 try: 2554 ret = e._parse( instring, loc, doActions ) 2555 return ret 2556 except ParseException as err: 2557 if err.loc > maxExcLoc: 2558 maxException = err 2559 maxExcLoc = err.loc 2560 except IndexError: 2561 if len(instring) > maxExcLoc: 2562 maxException = ParseException(instring,len(instring),e.errmsg,self) 2563 maxExcLoc = len(instring) 2564 2565 # only got here if no expression matched, raise exception for match that made it the furthest 2566 else: 2567 if maxException is not None: 2568 maxException.msg = self.errmsg 2569 raise maxException 2570 else: 2571 raise ParseException(instring, loc, "no defined alternatives to match", self)
2572
2573 - def __ior__(self, other ):
2574 if isinstance( other, basestring ): 2575 other = ParserElement.literalStringClass( other ) 2576 return self.append( other ) #MatchFirst( [ self, other ] )
2577
2578 - def __str__( self ):
2579 if hasattr(self,"name"): 2580 return self.name 2581 2582 if self.strRepr is None: 2583 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2584 2585 return self.strRepr
2586
2587 - def checkRecursion( self, parseElementList ):
2588 subRecCheckList = parseElementList[:] + [ self ] 2589 for e in self.exprs: 2590 e.checkRecursion( subRecCheckList )
2591
2592 2593 -class Each(ParseExpression):
2594 """Requires all given C{ParseExpression}s to be found, but in any order. 2595 Expressions may be separated by whitespace. 2596 May be constructed using the C{'&'} operator. 2597 """
2598 - def __init__( self, exprs, savelist = True ):
2599 super(Each,self).__init__(exprs, savelist) 2600 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2601 self.skipWhitespace = True 2602 self.initExprGroups = True
2603
2604 - def parseImpl( self, instring, loc, doActions=True ):
2605 if self.initExprGroups: 2606 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2607 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2608 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2609 self.optionals = opt1 + opt2 2610 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2611 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2612 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2613 self.required += self.multirequired 2614 self.initExprGroups = False 2615 tmpLoc = loc 2616 tmpReqd = self.required[:] 2617 tmpOpt = self.optionals[:] 2618 matchOrder = [] 2619 2620 keepMatching = True 2621 while keepMatching: 2622 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2623 failed = [] 2624 for e in tmpExprs: 2625 if e.canParseNext(instring, tmpLoc): 2626 matchOrder.append(self.opt1map.get(id(e),e)) 2627 if e in tmpReqd: 2628 tmpReqd.remove(e) 2629 elif e in tmpOpt: 2630 tmpOpt.remove(e) 2631 else: 2632 failed.append(e) 2633 if len(failed) == len(tmpExprs): 2634 keepMatching = False 2635 2636 if tmpReqd: 2637 missing = ", ".join(_ustr(e) for e in tmpReqd) 2638 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2639 2640 # add any unmatched Optionals, in case they have default values defined 2641 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2642 2643 resultlist = [] 2644 for e in matchOrder: 2645 loc,results = e._parse(instring,loc,doActions) 2646 resultlist.append(results) 2647 2648 finalResults = ParseResults() 2649 for r in resultlist: 2650 dups = {} 2651 for k in r.keys(): 2652 if k in finalResults: 2653 tmp = ParseResults(finalResults[k]) 2654 tmp += ParseResults(r[k]) 2655 dups[k] = tmp 2656 finalResults += ParseResults(r) 2657 for k,v in dups.items(): 2658 finalResults[k] = v 2659 return loc, finalResults
2660
2661 - def __str__( self ):
2662 if hasattr(self,"name"): 2663 return self.name 2664 2665 if self.strRepr is None: 2666 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2667 2668 return self.strRepr
2669
2670 - def checkRecursion( self, parseElementList ):
2671 subRecCheckList = parseElementList[:] + [ self ] 2672 for e in self.exprs: 2673 e.checkRecursion( subRecCheckList )
2674
2675 2676 -class ParseElementEnhance(ParserElement):
2677 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2678 - def __init__( self, expr, savelist=False ):
2679 super(ParseElementEnhance,self).__init__(savelist) 2680 if isinstance( expr, basestring ): 2681 expr = Literal(expr) 2682 self.expr = expr 2683 self.strRepr = None 2684 if expr is not None: 2685 self.mayIndexError = expr.mayIndexError 2686 self.mayReturnEmpty = expr.mayReturnEmpty 2687 self.setWhitespaceChars( expr.whiteChars ) 2688 self.skipWhitespace = expr.skipWhitespace 2689 self.saveAsList = expr.saveAsList 2690 self.callPreparse = expr.callPreparse 2691 self.ignoreExprs.extend(expr.ignoreExprs)
2692
2693 - def parseImpl( self, instring, loc, doActions=True ):
2694 if self.expr is not None: 2695 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2696 else: 2697 raise ParseException("",loc,self.errmsg,self)
2698
2699 - def leaveWhitespace( self ):
2700 self.skipWhitespace = False 2701 self.expr = self.expr.copy() 2702 if self.expr is not None: 2703 self.expr.leaveWhitespace() 2704 return self
2705
2706 - def ignore( self, other ):
2707 if isinstance( other, Suppress ): 2708 if other not in self.ignoreExprs: 2709 super( ParseElementEnhance, self).ignore( other ) 2710 if self.expr is not None: 2711 self.expr.ignore( self.ignoreExprs[-1] ) 2712 else: 2713 super( ParseElementEnhance, self).ignore( other ) 2714 if self.expr is not None: 2715 self.expr.ignore( self.ignoreExprs[-1] ) 2716 return self
2717
2718 - def streamline( self ):
2719 super(ParseElementEnhance,self).streamline() 2720 if self.expr is not None: 2721 self.expr.streamline() 2722 return self
2723
2724 - def checkRecursion( self, parseElementList ):
2725 if self in parseElementList: 2726 raise RecursiveGrammarException( parseElementList+[self] ) 2727 subRecCheckList = parseElementList[:] + [ self ] 2728 if self.expr is not None: 2729 self.expr.checkRecursion( subRecCheckList )
2730
2731 - def validate( self, validateTrace=[] ):
2732 tmp = validateTrace[:]+[self] 2733 if self.expr is not None: 2734 self.expr.validate(tmp) 2735 self.checkRecursion( [] )
2736
2737 - def __str__( self ):
2738 try: 2739 return super(ParseElementEnhance,self).__str__() 2740 except: 2741 pass 2742 2743 if self.strRepr is None and self.expr is not None: 2744 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2745 return self.strRepr
2746
2747 2748 -class FollowedBy(ParseElementEnhance):
2749 """Lookahead matching of the given parse expression. C{FollowedBy} 2750 does *not* advance the parsing position within the input string, it only 2751 verifies that the specified parse expression matches at the current 2752 position. C{FollowedBy} always returns a null token list."""
2753 - def __init__( self, expr ):
2754 super(FollowedBy,self).__init__(expr) 2755 self.mayReturnEmpty = True
2756
2757 - def parseImpl( self, instring, loc, doActions=True ):
2758 self.expr.tryParse( instring, loc ) 2759 return loc, []
2760
2761 2762 -class NotAny(ParseElementEnhance):
2763 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2764 does *not* advance the parsing position within the input string, it only 2765 verifies that the specified parse expression does *not* match at the current 2766 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2767 always returns a null token list. May be constructed using the '~' operator."""
2768 - def __init__( self, expr ):
2769 super(NotAny,self).__init__(expr) 2770 #~ self.leaveWhitespace() 2771 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2772 self.mayReturnEmpty = True 2773 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2774
2775 - def parseImpl( self, instring, loc, doActions=True ):
2776 if self.expr.canParseNext(instring, loc): 2777 raise ParseException(instring, loc, self.errmsg, self) 2778 return loc, []
2779
2780 - def __str__( self ):
2781 if hasattr(self,"name"): 2782 return self.name 2783 2784 if self.strRepr is None: 2785 self.strRepr = "~{" + _ustr(self.expr) + "}" 2786 2787 return self.strRepr
2788
2789 2790 -class OneOrMore(ParseElementEnhance):
2791 """Repetition of one or more of the given expression. 2792 2793 Parameters: 2794 - expr - expression that must match one or more times 2795 - stopOn - (default=None) - expression for a terminating sentinel 2796 (only required if the sentinel would ordinarily match the repetition 2797 expression) 2798 """
2799 - def __init__( self, expr, stopOn=None):
2800 super(OneOrMore, self).__init__(expr) 2801 ender = stopOn 2802 if isinstance(ender, basestring): 2803 ender = Literal(ender) 2804 self.not_ender = ~ender if ender is not None else None
2805
2806 - def parseImpl( self, instring, loc, doActions=True ):
2807 self_expr_parse = self.expr._parse 2808 self_skip_ignorables = self._skipIgnorables 2809 check_ender = self.not_ender is not None 2810 if check_ender: 2811 try_not_ender = self.not_ender.tryParse 2812 2813 # must be at least one (but first see if we are the stopOn sentinel; 2814 # if so, fail) 2815 if check_ender: 2816 try_not_ender(instring, loc) 2817 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 2818 try: 2819 hasIgnoreExprs = (not not self.ignoreExprs) 2820 while 1: 2821 if check_ender: 2822 try_not_ender(instring, loc) 2823 if hasIgnoreExprs: 2824 preloc = self_skip_ignorables( instring, loc ) 2825 else: 2826 preloc = loc 2827 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 2828 if tmptokens or tmptokens.haskeys(): 2829 tokens += tmptokens 2830 except (ParseException,IndexError): 2831 pass 2832 2833 return loc, tokens
2834
2835 - def __str__( self ):
2836 if hasattr(self,"name"): 2837 return self.name 2838 2839 if self.strRepr is None: 2840 self.strRepr = "{" + _ustr(self.expr) + "}..." 2841 2842 return self.strRepr
2843
2844 - def setResultsName( self, name, listAllMatches=False ):
2845 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2846 ret.saveAsList = True 2847 return ret
2848
2849 -class ZeroOrMore(OneOrMore):
2850 """Optional repetition of zero or more of the given expression. 2851 2852 Parameters: 2853 - expr - expression that must match zero or more times 2854 - stopOn - (default=None) - expression for a terminating sentinel 2855 (only required if the sentinel would ordinarily match the repetition 2856 expression) 2857 """
2858 - def __init__( self, expr, stopOn=None):
2859 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 2860 self.mayReturnEmpty = True
2861
2862 - def parseImpl( self, instring, loc, doActions=True ):
2863 try: 2864 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 2865 except (ParseException,IndexError): 2866 return loc, []
2867
2868 - def __str__( self ):
2869 if hasattr(self,"name"): 2870 return self.name 2871 2872 if self.strRepr is None: 2873 self.strRepr = "[" + _ustr(self.expr) + "]..." 2874 2875 return self.strRepr
2876
2877 -class _NullToken(object):
2878 - def __bool__(self):
2879 return False
2880 __nonzero__ = __bool__
2881 - def __str__(self):
2882 return ""
2883 2884 _optionalNotMatched = _NullToken()
2885 -class Optional(ParseElementEnhance):
2886 """Optional matching of the given expression. 2887 2888 Parameters: 2889 - expr - expression that must match zero or more times 2890 - default (optional) - value to be returned if the optional expression 2891 is not found. 2892 """
2893 - def __init__( self, expr, default=_optionalNotMatched ):
2894 super(Optional,self).__init__( expr, savelist=False ) 2895 self.defaultValue = default 2896 self.mayReturnEmpty = True
2897
2898 - def parseImpl( self, instring, loc, doActions=True ):
2899 try: 2900 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2901 except (ParseException,IndexError): 2902 if self.defaultValue is not _optionalNotMatched: 2903 if self.expr.resultsName: 2904 tokens = ParseResults([ self.defaultValue ]) 2905 tokens[self.expr.resultsName] = self.defaultValue 2906 else: 2907 tokens = [ self.defaultValue ] 2908 else: 2909 tokens = [] 2910 return loc, tokens
2911
2912 - def __str__( self ):
2913 if hasattr(self,"name"): 2914 return self.name 2915 2916 if self.strRepr is None: 2917 self.strRepr = "[" + _ustr(self.expr) + "]" 2918 2919 return self.strRepr
2920
2921 -class SkipTo(ParseElementEnhance):
2922 """Token for skipping over all undefined text until the matched expression is found. 2923 2924 Parameters: 2925 - expr - target expression marking the end of the data to be skipped 2926 - include - (default=False) if True, the target expression is also parsed 2927 (the skipped text and target expression are returned as a 2-element list). 2928 - ignore - (default=None) used to define grammars (typically quoted strings and 2929 comments) that might contain false matches to the target expression 2930 - failOn - (default=None) define expressions that are not allowed to be 2931 included in the skipped test; if found before the target expression is found, 2932 the SkipTo is not a match 2933 """
2934 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2935 super( SkipTo, self ).__init__( other ) 2936 self.ignoreExpr = ignore 2937 self.mayReturnEmpty = True 2938 self.mayIndexError = False 2939 self.includeMatch = include 2940 self.asList = False 2941 if failOn is not None and isinstance(failOn, basestring): 2942 self.failOn = Literal(failOn) 2943 else: 2944 self.failOn = failOn 2945 self.errmsg = "No match found for "+_ustr(self.expr)
2946
2947 - def parseImpl( self, instring, loc, doActions=True ):
2948 startloc = loc 2949 instrlen = len(instring) 2950 expr = self.expr 2951 expr_parse = self.expr._parse 2952 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 2953 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 2954 2955 tmploc = loc 2956 while tmploc <= instrlen: 2957 if self_failOn_canParseNext is not None: 2958 # break if failOn expression matches 2959 if self_failOn.canParseNext(instring, tmploc): 2960 break 2961 2962 if self_ignoreExpr_tryParse is not None: 2963 # advance past ignore expressions 2964 while 1: 2965 try: 2966 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 2967 except ParseBaseException: 2968 break 2969 2970 try: 2971 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 2972 except (ParseException, IndexError): 2973 # no match, advance loc in string 2974 tmploc += 1 2975 else: 2976 # matched skipto expr, done 2977 break 2978 2979 else: 2980 # ran off the end of the input string without matching skipto expr, fail 2981 raise ParseException(instring, loc, self.errmsg, self) 2982 2983 # build up return values 2984 loc = tmploc 2985 skiptext = instring[startloc:loc] 2986 skipresult = ParseResults(skiptext) 2987 2988 if self.includeMatch: 2989 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 2990 skipresult += mat 2991 2992 return loc, skipresult
2993
2994 -class Forward(ParseElementEnhance):
2995 """Forward declaration of an expression to be defined later - 2996 used for recursive grammars, such as algebraic infix notation. 2997 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2998 2999 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3000 Specifically, '|' has a lower precedence than '<<', so that:: 3001 fwdExpr << a | b | c 3002 will actually be evaluated as:: 3003 (fwdExpr << a) | b | c 3004 thereby leaving b and c out as parseable alternatives. It is recommended that you 3005 explicitly group the values inserted into the C{Forward}:: 3006 fwdExpr << (a | b | c) 3007 Converting to use the '<<=' operator instead will avoid this problem. 3008 """
3009 - def __init__( self, other=None ):
3010 super(Forward,self).__init__( other, savelist=False )
3011
3012 - def __lshift__( self, other ):
3013 if isinstance( other, basestring ): 3014 other = ParserElement.literalStringClass(other) 3015 self.expr = other 3016 self.strRepr = None 3017 self.mayIndexError = self.expr.mayIndexError 3018 self.mayReturnEmpty = self.expr.mayReturnEmpty 3019 self.setWhitespaceChars( self.expr.whiteChars ) 3020 self.skipWhitespace = self.expr.skipWhitespace 3021 self.saveAsList = self.expr.saveAsList 3022 self.ignoreExprs.extend(self.expr.ignoreExprs) 3023 return self
3024
3025 - def __ilshift__(self, other):
3026 return self << other
3027
3028 - def leaveWhitespace( self ):
3029 self.skipWhitespace = False 3030 return self
3031
3032 - def streamline( self ):
3033 if not self.streamlined: 3034 self.streamlined = True 3035 if self.expr is not None: 3036 self.expr.streamline() 3037 return self
3038
3039 - def validate( self, validateTrace=[] ):
3040 if self not in validateTrace: 3041 tmp = validateTrace[:]+[self] 3042 if self.expr is not None: 3043 self.expr.validate(tmp) 3044 self.checkRecursion([])
3045
3046 - def __str__( self ):
3047 if hasattr(self,"name"): 3048 return self.name 3049 return self.__class__.__name__ + ": ..." 3050 3051 # stubbed out for now - creates awful memory and perf issues 3052 self._revertClass = self.__class__ 3053 self.__class__ = _ForwardNoRecurse 3054 try: 3055 if self.expr is not None: 3056 retString = _ustr(self.expr) 3057 else: 3058 retString = "None" 3059 finally: 3060 self.__class__ = self._revertClass 3061 return self.__class__.__name__ + ": " + retString
3062
3063 - def copy(self):
3064 if self.expr is not None: 3065 return super(Forward,self).copy() 3066 else: 3067 ret = Forward() 3068 ret <<= self 3069 return ret
3070
3071 -class _ForwardNoRecurse(Forward):
3072 - def __str__( self ):
3073 return "..."
3074
3075 -class TokenConverter(ParseElementEnhance):
3076 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3077 - def __init__( self, expr, savelist=False ):
3078 super(TokenConverter,self).__init__( expr )#, savelist ) 3079 self.saveAsList = False
3080
3081 -class Combine(TokenConverter):
3082 """Converter to concatenate all matching tokens to a single string. 3083 By default, the matching patterns must also be contiguous in the input string; 3084 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3085 """
3086 - def __init__( self, expr, joinString="", adjacent=True ):
3087 super(Combine,self).__init__( expr ) 3088 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3089 if adjacent: 3090 self.leaveWhitespace() 3091 self.adjacent = adjacent 3092 self.skipWhitespace = True 3093 self.joinString = joinString 3094 self.callPreparse = True
3095
3096 - def ignore( self, other ):
3097 if self.adjacent: 3098 ParserElement.ignore(self, other) 3099 else: 3100 super( Combine, self).ignore( other ) 3101 return self
3102
3103 - def postParse( self, instring, loc, tokenlist ):
3104 retToks = tokenlist.copy() 3105 del retToks[:] 3106 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3107 3108 if self.resultsName and retToks.haskeys(): 3109 return [ retToks ] 3110 else: 3111 return retToks
3112
3113 -class Group(TokenConverter):
3114 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3115 - def __init__( self, expr ):
3116 super(Group,self).__init__( expr ) 3117 self.saveAsList = True
3118
3119 - def postParse( self, instring, loc, tokenlist ):
3120 return [ tokenlist ]
3121
3122 -class Dict(TokenConverter):
3123 """Converter to return a repetitive expression as a list, but also as a dictionary. 3124 Each element can also be referenced using the first token in the expression as its key. 3125 Useful for tabular report scraping when the first column can be used as a item key. 3126 """
3127 - def __init__( self, expr ):
3128 super(Dict,self).__init__( expr ) 3129 self.saveAsList = True
3130
3131 - def postParse( self, instring, loc, tokenlist ):
3132 for i,tok in enumerate(tokenlist): 3133 if len(tok) == 0: 3134 continue 3135 ikey = tok[0] 3136 if isinstance(ikey,int): 3137 ikey = _ustr(tok[0]).strip() 3138 if len(tok)==1: 3139 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3140 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3141 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3142 else: 3143 dictvalue = tok.copy() #ParseResults(i) 3144 del dictvalue[0] 3145 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3146 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3147 else: 3148 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3149 3150 if self.resultsName: 3151 return [ tokenlist ] 3152 else: 3153 return tokenlist
3154
3155 3156 -class Suppress(TokenConverter):
3157 """Converter for ignoring the results of a parsed expression."""
3158 - def postParse( self, instring, loc, tokenlist ):
3159 return []
3160
3161 - def suppress( self ):
3162 return self
3163
3164 3165 -class OnlyOnce(object):
3166 """Wrapper for parse actions, to ensure they are only called once."""
3167 - def __init__(self, methodCall):
3168 self.callable = _trim_arity(methodCall) 3169 self.called = False
3170 - def __call__(self,s,l,t):
3171 if not self.called: 3172 results = self.callable(s,l,t) 3173 self.called = True 3174 return results 3175 raise ParseException(s,l,"")
3176 - def reset(self):
3177 self.called = False
3178
3179 -def traceParseAction(f):
3180 """Decorator for debugging parse actions.""" 3181 f = _trim_arity(f) 3182 def z(*paArgs): 3183 thisFunc = f.func_name 3184 s,l,t = paArgs[-3:] 3185 if len(paArgs)>3: 3186 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3187 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3188 try: 3189 ret = f(*paArgs) 3190 except Exception as exc: 3191 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3192 raise 3193 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3194 return ret
3195 try: 3196 z.__name__ = f.__name__ 3197 except AttributeError: 3198 pass 3199 return z 3200
3201 # 3202 # global helpers 3203 # 3204 -def delimitedList( expr, delim=",", combine=False ):
3205 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3206 By default, the list elements and delimiters can have intervening whitespace, and 3207 comments, but this can be overridden by passing C{combine=True} in the constructor. 3208 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3209 string, with the delimiters included; otherwise, the matching tokens are returned 3210 as a list of tokens, with the delimiters suppressed. 3211 """ 3212 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3213 if combine: 3214 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3215 else: 3216 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3217
3218 -def countedArray( expr, intExpr=None ):
3219 """Helper to define a counted list of expressions. 3220 This helper defines a pattern of the form:: 3221 integer expr expr expr... 3222 where the leading integer tells how many expr expressions follow. 3223 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3224 """ 3225 arrayExpr = Forward() 3226 def countFieldParseAction(s,l,t): 3227 n = t[0] 3228 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3229 return []
3230 if intExpr is None: 3231 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3232 else: 3233 intExpr = intExpr.copy() 3234 intExpr.setName("arrayLen") 3235 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3236 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 3237
3238 -def _flatten(L):
3239 ret = [] 3240 for i in L: 3241 if isinstance(i,list): 3242 ret.extend(_flatten(i)) 3243 else: 3244 ret.append(i) 3245 return ret
3246
3247 -def matchPreviousLiteral(expr):
3248 """Helper to define an expression that is indirectly defined from 3249 the tokens matched in a previous expression, that is, it looks 3250 for a 'repeat' of a previous expression. For example:: 3251 first = Word(nums) 3252 second = matchPreviousLiteral(first) 3253 matchExpr = first + ":" + second 3254 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3255 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3256 If this is not desired, use C{matchPreviousExpr}. 3257 Do *not* use with packrat parsing enabled. 3258 """ 3259 rep = Forward() 3260 def copyTokenToRepeater(s,l,t): 3261 if t: 3262 if len(t) == 1: 3263 rep << t[0] 3264 else: 3265 # flatten t tokens 3266 tflat = _flatten(t.asList()) 3267 rep << And(Literal(tt) for tt in tflat) 3268 else: 3269 rep << Empty()
3270 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3271 rep.setName('(prev) ' + _ustr(expr)) 3272 return rep 3273
3274 -def matchPreviousExpr(expr):
3275 """Helper to define an expression that is indirectly defined from 3276 the tokens matched in a previous expression, that is, it looks 3277 for a 'repeat' of a previous expression. For example:: 3278 first = Word(nums) 3279 second = matchPreviousExpr(first) 3280 matchExpr = first + ":" + second 3281 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3282 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3283 the expressions are evaluated first, and then compared, so 3284 C{"1"} is compared with C{"10"}. 3285 Do *not* use with packrat parsing enabled. 3286 """ 3287 rep = Forward() 3288 e2 = expr.copy() 3289 rep <<= e2 3290 def copyTokenToRepeater(s,l,t): 3291 matchTokens = _flatten(t.asList()) 3292 def mustMatchTheseTokens(s,l,t): 3293 theseTokens = _flatten(t.asList()) 3294 if theseTokens != matchTokens: 3295 raise ParseException("",0,"")
3296 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3297 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3298 rep.setName('(prev) ' + _ustr(expr)) 3299 return rep 3300
3301 -def _escapeRegexRangeChars(s):
3302 #~ escape these chars: ^-] 3303 for c in r"\^-]": 3304 s = s.replace(c,_bslash+c) 3305 s = s.replace("\n",r"\n") 3306 s = s.replace("\t",r"\t") 3307 return _ustr(s)
3308
3309 -def oneOf( strs, caseless=False, useRegex=True ):
3310 """Helper to quickly define a set of alternative Literals, and makes sure to do 3311 longest-first testing when there is a conflict, regardless of the input order, 3312 but returns a C{L{MatchFirst}} for best performance. 3313 3314 Parameters: 3315 - strs - a string of space-delimited literals, or a list of string literals 3316 - caseless - (default=False) - treat all literals as caseless 3317 - useRegex - (default=True) - as an optimization, will generate a Regex 3318 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3319 if creating a C{Regex} raises an exception) 3320 """ 3321 if caseless: 3322 isequal = ( lambda a,b: a.upper() == b.upper() ) 3323 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3324 parseElementClass = CaselessLiteral 3325 else: 3326 isequal = ( lambda a,b: a == b ) 3327 masks = ( lambda a,b: b.startswith(a) ) 3328 parseElementClass = Literal 3329 3330 symbols = [] 3331 if isinstance(strs,basestring): 3332 symbols = strs.split() 3333 elif isinstance(strs, collections.Sequence): 3334 symbols = list(strs[:]) 3335 elif isinstance(strs, _generatorType): 3336 symbols = list(strs) 3337 else: 3338 warnings.warn("Invalid argument to oneOf, expected string or list", 3339 SyntaxWarning, stacklevel=2) 3340 if not symbols: 3341 return NoMatch() 3342 3343 i = 0 3344 while i < len(symbols)-1: 3345 cur = symbols[i] 3346 for j,other in enumerate(symbols[i+1:]): 3347 if ( isequal(other, cur) ): 3348 del symbols[i+j+1] 3349 break 3350 elif ( masks(cur, other) ): 3351 del symbols[i+j+1] 3352 symbols.insert(i,other) 3353 cur = other 3354 break 3355 else: 3356 i += 1 3357 3358 if not caseless and useRegex: 3359 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3360 try: 3361 if len(symbols)==len("".join(symbols)): 3362 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3363 else: 3364 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 3365 except: 3366 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3367 SyntaxWarning, stacklevel=2) 3368 3369 3370 # last resort, just use MatchFirst 3371 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
3372
3373 -def dictOf( key, value ):
3374 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3375 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3376 in the proper order. The key pattern can include delimiting markers or punctuation, 3377 as long as they are suppressed, thereby leaving the significant key text. The value 3378 pattern can include named results, so that the C{Dict} results can include named token 3379 fields. 3380 """ 3381 return Dict( ZeroOrMore( Group ( key + value ) ) )
3382
3383 -def originalTextFor(expr, asString=True):
3384 """Helper to return the original, untokenized text for a given expression. Useful to 3385 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3386 revert separate tokens with intervening whitespace back to the original matching 3387 input text. By default, returns astring containing the original parsed text. 3388 3389 If the optional C{asString} argument is passed as C{False}, then the return value is a 3390 C{L{ParseResults}} containing any results names that were originally matched, and a 3391 single token containing the original matched text from the input string. So if 3392 the expression passed to C{L{originalTextFor}} contains expressions with defined 3393 results names, you must set C{asString} to C{False} if you want to preserve those 3394 results name values.""" 3395 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3396 endlocMarker = locMarker.copy() 3397 endlocMarker.callPreparse = False 3398 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3399 if asString: 3400 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3401 else: 3402 def extractText(s,l,t): 3403 del t[:] 3404 t.insert(0, s[t._original_start:t._original_end]) 3405 del t["_original_start"] 3406 del t["_original_end"]
3407 matchExpr.setParseAction(extractText) 3408 return matchExpr 3409
3410 -def ungroup(expr):
3411 """Helper to undo pyparsing's default grouping of And expressions, even 3412 if all but one are non-empty.""" 3413 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3414
3415 -def locatedExpr(expr):
3416 """Helper to decorate a returned token with its starting and ending locations in the input string. 3417 This helper adds the following results names: 3418 - locn_start = location where matched expression begins 3419 - locn_end = location where matched expression ends 3420 - value = the actual parsed results 3421 3422 Be careful if the input text contains C{<TAB>} characters, you may want to call 3423 C{L{ParserElement.parseWithTabs}} 3424 """ 3425 locator = Empty().setParseAction(lambda s,l,t: l) 3426 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3427 3428 3429 # convenience constants for positional expressions 3430 empty = Empty().setName("empty") 3431 lineStart = LineStart().setName("lineStart") 3432 lineEnd = LineEnd().setName("lineEnd") 3433 stringStart = StringStart().setName("stringStart") 3434 stringEnd = StringEnd().setName("stringEnd") 3435 3436 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3437 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3438 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3439 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3440 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3441 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3442 3443 -def srange(s):
3444 r"""Helper to easily define string ranges for use in Word construction. Borrows 3445 syntax from regexp '[]' string range definitions:: 3446 srange("[0-9]") -> "0123456789" 3447 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3448 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3449 The input string must be enclosed in []'s, and the returned string is the expanded 3450 character set joined into a single string. 3451 The values enclosed in the []'s may be:: 3452 a single character 3453 an escaped character with a leading backslash (such as \- or \]) 3454 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3455 (\0x## is also supported for backwards compatibility) 3456 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3457 a range of any of the above, separated by a dash ('a-z', etc.) 3458 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3459 """ 3460 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3461 try: 3462 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3463 except: 3464 return ""
3465
3466 -def matchOnlyAtCol(n):
3467 """Helper method for defining parse actions that require matching at a specific 3468 column in the input text. 3469 """ 3470 def verifyCol(strg,locn,toks): 3471 if col(locn,strg) != n: 3472 raise ParseException(strg,locn,"matched token not at column %d" % n)
3473 return verifyCol 3474
3475 -def replaceWith(replStr):
3476 """Helper method for common parse actions that simply return a literal value. Especially 3477 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3478 """ 3479 #def _replFunc(*args): 3480 # return [replStr] 3481 #return _replFunc 3482 return functools.partial(next, itertools.repeat([replStr]))
3483
3484 -def removeQuotes(s,l,t):
3485 """Helper parse action for removing quotation marks from parsed quoted strings. 3486 To use, add this parse action to quoted string using:: 3487 quotedString.setParseAction( removeQuotes ) 3488 """ 3489 return t[0][1:-1]
3490
3491 -def upcaseTokens(s,l,t):
3492 """Helper parse action to convert tokens to upper case.""" 3493 return [ tt.upper() for tt in map(_ustr,t) ]
3494
3495 -def downcaseTokens(s,l,t):
3496 """Helper parse action to convert tokens to lower case.""" 3497 return [ tt.lower() for tt in map(_ustr,t) ]
3498
3499 -def getTokensEndLoc():
3500 """Method to be called from within a parse action to determine the end 3501 location of the parsed tokens.""" 3502 import inspect 3503 fstack = inspect.stack() 3504 try: 3505 # search up the stack (through intervening argument normalizers) for correct calling routine 3506 for f in fstack[2:]: 3507 if f[3] == "_parseNoCache": 3508 endloc = f[0].f_locals["loc"] 3509 return endloc 3510 else: 3511 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3512 finally: 3513 del fstack
3514
3515 -def _makeTags(tagStr, xml):
3516 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3517 if isinstance(tagStr,basestring): 3518 resname = tagStr 3519 tagStr = Keyword(tagStr, caseless=not xml) 3520 else: 3521 resname = tagStr.name 3522 3523 tagAttrName = Word(alphas,alphanums+"_-:") 3524 if (xml): 3525 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3526 openTag = Suppress("<") + tagStr("tag") + \ 3527 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3528 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3529 else: 3530 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3531 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3532 openTag = Suppress("<") + tagStr("tag") + \ 3533 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3534 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3535 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3536 closeTag = Combine(_L("</") + tagStr + ">") 3537 3538 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 3539 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 3540 openTag.tag = resname 3541 closeTag.tag = resname 3542 return openTag, closeTag
3543
3544 -def makeHTMLTags(tagStr):
3545 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3546 return _makeTags( tagStr, False )
3547
3548 -def makeXMLTags(tagStr):
3549 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3550 return _makeTags( tagStr, True )
3551
3552 -def withAttribute(*args,**attrDict):
3553 """Helper to create a validating parse action to be used with start tags created 3554 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3555 with a required attribute value, to avoid false matches on common tags such as 3556 C{<TD>} or C{<DIV>}. 3557 3558 Call C{withAttribute} with a series of attribute names and values. Specify the list 3559 of filter attributes names and values as: 3560 - keyword arguments, as in C{(align="right")}, or 3561 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3562 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3563 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3564 For attribute names with a namespace prefix, you must use the second form. Attribute 3565 names are matched insensitive to upper/lower case. 3566 3567 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3568 3569 To verify that the attribute exists, but without specifying a value, pass 3570 C{withAttribute.ANY_VALUE} as the value. 3571 """ 3572 if args: 3573 attrs = args[:] 3574 else: 3575 attrs = attrDict.items() 3576 attrs = [(k,v) for k,v in attrs] 3577 def pa(s,l,tokens): 3578 for attrName,attrValue in attrs: 3579 if attrName not in tokens: 3580 raise ParseException(s,l,"no matching attribute " + attrName) 3581 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3582 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3583 (attrName, tokens[attrName], attrValue))
3584 return pa 3585 withAttribute.ANY_VALUE = object()
3586 3587 -def withClass(classname, namespace=''):
3588 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3589 difficult because C{class} is a reserved word in Python. 3590 """ 3591 classattr = "%s:class" % namespace if namespace else "class" 3592 return withAttribute(**{classattr : classname})
3593 3594 opAssoc = _Constants() 3595 opAssoc.LEFT = object() 3596 opAssoc.RIGHT = object()
3597 3598 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3599 """Helper method for constructing grammars of expressions made up of 3600 operators working in a precedence hierarchy. Operators may be unary or 3601 binary, left- or right-associative. Parse actions can also be attached 3602 to operator expressions. 3603 3604 Parameters: 3605 - baseExpr - expression representing the most basic element for the nested 3606 - opList - list of tuples, one for each operator precedence level in the 3607 expression grammar; each tuple is of the form 3608 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3609 - opExpr is the pyparsing expression for the operator; 3610 may also be a string, which will be converted to a Literal; 3611 if numTerms is 3, opExpr is a tuple of two expressions, for the 3612 two operators separating the 3 terms 3613 - numTerms is the number of terms for this operator (must 3614 be 1, 2, or 3) 3615 - rightLeftAssoc is the indicator whether the operator is 3616 right or left associative, using the pyparsing-defined 3617 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3618 - parseAction is the parse action to be associated with 3619 expressions matching this operator expression (the 3620 parse action tuple member may be omitted) 3621 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3622 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3623 """ 3624 ret = Forward() 3625 lastExpr = baseExpr | ( lpar + ret + rpar ) 3626 for i,operDef in enumerate(opList): 3627 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3628 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 3629 if arity == 3: 3630 if opExpr is None or len(opExpr) != 2: 3631 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3632 opExpr1, opExpr2 = opExpr 3633 thisExpr = Forward().setName(termName) 3634 if rightLeftAssoc == opAssoc.LEFT: 3635 if arity == 1: 3636 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3637 elif arity == 2: 3638 if opExpr is not None: 3639 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3640 else: 3641 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3642 elif arity == 3: 3643 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3644 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3645 else: 3646 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3647 elif rightLeftAssoc == opAssoc.RIGHT: 3648 if arity == 1: 3649 # try to avoid LR with this extra test 3650 if not isinstance(opExpr, Optional): 3651 opExpr = Optional(opExpr) 3652 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3653 elif arity == 2: 3654 if opExpr is not None: 3655 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3656 else: 3657 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3658 elif arity == 3: 3659 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3660 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3661 else: 3662 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3663 else: 3664 raise ValueError("operator must indicate right or left associativity") 3665 if pa: 3666 matchExpr.setParseAction( pa ) 3667 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 3668 lastExpr = thisExpr 3669 ret <<= lastExpr 3670 return ret
3671 operatorPrecedence = infixNotation 3672 3673 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3674 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3675 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3676 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
3677 3678 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3679 """Helper method for defining nested lists enclosed in opening and closing 3680 delimiters ("(" and ")" are the default). 3681 3682 Parameters: 3683 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3684 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3685 - content - expression for items within the nested lists (default=None) 3686 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3687 3688 If an expression is not provided for the content argument, the nested 3689 expression will capture all whitespace-delimited content between delimiters 3690 as a list of separate values. 3691 3692 Use the C{ignoreExpr} argument to define expressions that may contain 3693 opening or closing characters that should not be treated as opening 3694 or closing characters for nesting, such as quotedString or a comment 3695 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3696 The default is L{quotedString}, but if no expressions are to be ignored, 3697 then pass C{None} for this argument. 3698 """ 3699 if opener == closer: 3700 raise ValueError("opening and closing strings cannot be the same") 3701 if content is None: 3702 if isinstance(opener,basestring) and isinstance(closer,basestring): 3703 if len(opener) == 1 and len(closer)==1: 3704 if ignoreExpr is not None: 3705 content = (Combine(OneOrMore(~ignoreExpr + 3706 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3707 ).setParseAction(lambda t:t[0].strip())) 3708 else: 3709 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3710 ).setParseAction(lambda t:t[0].strip())) 3711 else: 3712 if ignoreExpr is not None: 3713 content = (Combine(OneOrMore(~ignoreExpr + 3714 ~Literal(opener) + ~Literal(closer) + 3715 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3716 ).setParseAction(lambda t:t[0].strip())) 3717 else: 3718 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3719 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3720 ).setParseAction(lambda t:t[0].strip())) 3721 else: 3722 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3723 ret = Forward() 3724 if ignoreExpr is not None: 3725 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3726 else: 3727 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3728 ret.setName('nested %s%s expression' % (opener,closer)) 3729 return ret
3730
3731 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3732 """Helper method for defining space-delimited indentation blocks, such as 3733 those used to define block statements in Python source code. 3734 3735 Parameters: 3736 - blockStatementExpr - expression defining syntax of statement that 3737 is repeated within the indented block 3738 - indentStack - list created by caller to manage indentation stack 3739 (multiple statementWithIndentedBlock expressions within a single grammar 3740 should share a common indentStack) 3741 - indent - boolean indicating whether block must be indented beyond the 3742 the current level; set to False for block of left-most statements 3743 (default=True) 3744 3745 A valid block must contain at least one C{blockStatement}. 3746 """ 3747 def checkPeerIndent(s,l,t): 3748 if l >= len(s): return 3749 curCol = col(l,s) 3750 if curCol != indentStack[-1]: 3751 if curCol > indentStack[-1]: 3752 raise ParseFatalException(s,l,"illegal nesting") 3753 raise ParseException(s,l,"not a peer entry")
3754 3755 def checkSubIndent(s,l,t): 3756 curCol = col(l,s) 3757 if curCol > indentStack[-1]: 3758 indentStack.append( curCol ) 3759 else: 3760 raise ParseException(s,l,"not a subentry") 3761 3762 def checkUnindent(s,l,t): 3763 if l >= len(s): return 3764 curCol = col(l,s) 3765 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3766 raise ParseException(s,l,"not an unindent") 3767 indentStack.pop() 3768 3769 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3770 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 3771 PEER = Empty().setParseAction(checkPeerIndent).setName('') 3772 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 3773 if indent: 3774 smExpr = Group( Optional(NL) + 3775 #~ FollowedBy(blockStatementExpr) + 3776 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3777 else: 3778 smExpr = Group( Optional(NL) + 3779 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3780 blockStatementExpr.ignore(_bslash + LineEnd()) 3781 return smExpr.setName('indented block') 3782 3783 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3784 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3785 3786 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 3787 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 3788 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
3789 -def replaceHTMLEntity(t):
3790 """Helper parser action to replace common HTML entities with their special characters""" 3791 return _htmlEntityMap.get(t.entity)
3792 3793 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3794 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3795 3796 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 3797 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 3798 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3799 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3800 3801 javaStyleComment = cppStyleComment 3802 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3803 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3804 Optional( Word(" \t") + 3805 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3806 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3807 3808 3809 if __name__ == "__main__": 3810 3811 selectToken = CaselessLiteral( "select" ) 3812 fromToken = CaselessLiteral( "from" ) 3813 3814 ident = Word( alphas, alphanums + "_$" ) 3815 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3816 columnNameList = Group( delimitedList( columnName ) ).setName("columns") 3817 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3818 tableNameList = Group( delimitedList( tableName ) ).setName("tables") 3819 simpleSQL = ( selectToken + \ 3820 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3821 fromToken + \ 3822 tableNameList.setResultsName( "tables" ) ) 3823 3824 simpleSQL.runTests("""\ 3825 SELECT * from XYZZY, ABC 3826 select * from SYS.XYZZY 3827 Select A from Sys.dual 3828 Select AA,BB,CC from Sys.dual 3829 Select A, B, C from Sys.dual 3830 Select A, B, C from Sys.dual 3831 Xelect A, B, C from Sys.dual 3832 Select A, B, C frox Sys.dual 3833 Select 3834 Select ^^^ frox Sys.dual 3835 Select A, B, C from Sys.dual, Table2""") 3836