Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from C{parseString()} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.0.6" 
  61  __versionTime__ = "9 Nov 2015 19:03" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import functools 
  74  import itertools 
  75   
  76  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  77   
  78  __all__ = [ 
  79  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  80  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  81  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  82  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  83  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  84  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  85  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  86  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  87  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  88  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
  89  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  90  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  91  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  92  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  93  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  94  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  95  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
  96  ] 
  97   
  98  PY_3 = sys.version.startswith('3') 
  99  if PY_3: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104   
 105      # build list of single arg builtins, that can be used as parse actions 
 106      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 107   
 108  else: 
 109      _MAX_INT = sys.maxint 
 110      range = xrange 
111 112 - def _ustr(obj):
113 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 114 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 115 then < returns the unicode object | encodes it with the default encoding | ... >. 116 """ 117 if isinstance(obj,unicode): 118 return obj 119 120 try: 121 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 122 # it won't break any existing code. 123 return str(obj) 124 125 except UnicodeEncodeError: 126 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 127 # state that "The return value must be a string object". However, does a 128 # unicode object (being a subclass of basestring) count as a "string 129 # object"? 130 # If so, then return a unicode object: 131 return unicode(obj)
132 # Else encode it... but how? There are many choices... :) 133 # Replace unprintables with escape codes? 134 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 135 # Replace unprintables with question marks? 136 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 137 # ... 138 139 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 140 singleArgBuiltins = [] 141 import __builtin__ 142 for fname in "sum len sorted reversed list tuple set any all min max".split(): 143 try: 144 singleArgBuiltins.append(getattr(__builtin__,fname)) 145 except AttributeError: 146 continue 147 148 _generatorType = type((y for y in range(1)))
149 150 -def _xml_escape(data):
151 """Escape &, <, >, ", ', etc. in a string of data.""" 152 153 # ampersand must be replaced first 154 from_symbols = '&><"\'' 155 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 156 for from_,to_ in zip(from_symbols, to_symbols): 157 data = data.replace(from_, to_) 158 return data
159
160 -class _Constants(object):
161 pass
162 163 alphas = string.ascii_lowercase + string.ascii_uppercase 164 nums = "0123456789" 165 hexnums = nums + "ABCDEFabcdef" 166 alphanums = alphas + nums 167 _bslash = chr(92) 168 printables = "".join(c for c in string.printable if c not in string.whitespace)
169 170 -class ParseBaseException(Exception):
171 """base exception class for all parsing runtime exceptions""" 172 # Performance tuning: we construct a *lot* of these, so keep this 173 # constructor as small and fast as possible
174 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
175 self.loc = loc 176 if msg is None: 177 self.msg = pstr 178 self.pstr = "" 179 else: 180 self.msg = msg 181 self.pstr = pstr 182 self.parserElement = elem
183
184 - def __getattr__( self, aname ):
185 """supported attributes by name are: 186 - lineno - returns the line number of the exception text 187 - col - returns the column number of the exception text 188 - line - returns the line containing the exception text 189 """ 190 if( aname == "lineno" ): 191 return lineno( self.loc, self.pstr ) 192 elif( aname in ("col", "column") ): 193 return col( self.loc, self.pstr ) 194 elif( aname == "line" ): 195 return line( self.loc, self.pstr ) 196 else: 197 raise AttributeError(aname)
198
199 - def __str__( self ):
200 return "%s (at char %d), (line:%d, col:%d)" % \ 201 ( self.msg, self.loc, self.lineno, self.column )
202 - def __repr__( self ):
203 return _ustr(self)
204 - def markInputline( self, markerString = ">!<" ):
205 """Extracts the exception line from the input string, and marks 206 the location of the exception with a special symbol. 207 """ 208 line_str = self.line 209 line_column = self.column - 1 210 if markerString: 211 line_str = "".join((line_str[:line_column], 212 markerString, line_str[line_column:])) 213 return line_str.strip()
214 - def __dir__(self):
215 return "loc msg pstr parserElement lineno col line " \ 216 "markInputline __str__ __repr__".split()
217
218 -class ParseException(ParseBaseException):
219 """exception thrown when parse expressions don't match class; 220 supported attributes by name are: 221 - lineno - returns the line number of the exception text 222 - col - returns the column number of the exception text 223 - line - returns the line containing the exception text 224 """ 225 pass
226
227 -class ParseFatalException(ParseBaseException):
228 """user-throwable exception thrown when inconsistent parse content 229 is found; stops all parsing immediately""" 230 pass
231
232 -class ParseSyntaxException(ParseFatalException):
233 """just like C{L{ParseFatalException}}, but thrown internally when an 234 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 235 an unbacktrackable syntax error has been found"""
236 - def __init__(self, pe):
237 super(ParseSyntaxException, self).__init__( 238 pe.pstr, pe.loc, pe.msg, pe.parserElement)
239
240 #~ class ReparseException(ParseBaseException): 241 #~ """Experimental class - parse actions can raise this exception to cause 242 #~ pyparsing to reparse the input string: 243 #~ - with a modified input string, and/or 244 #~ - with a modified start location 245 #~ Set the values of the ReparseException in the constructor, and raise the 246 #~ exception in a parse action to cause pyparsing to use the new string/location. 247 #~ Setting the values as None causes no change to be made. 248 #~ """ 249 #~ def __init_( self, newstring, restartLoc ): 250 #~ self.newParseText = newstring 251 #~ self.reparseLoc = restartLoc 252 253 -class RecursiveGrammarException(Exception):
254 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
255 - def __init__( self, parseElementList ):
256 self.parseElementTrace = parseElementList
257
258 - def __str__( self ):
259 return "RecursiveGrammarException: %s" % self.parseElementTrace
260
261 -class _ParseResultsWithOffset(object):
262 - def __init__(self,p1,p2):
263 self.tup = (p1,p2)
264 - def __getitem__(self,i):
265 return self.tup[i]
266 - def __repr__(self):
267 return repr(self.tup)
268 - def setOffset(self,i):
269 self.tup = (self.tup[0],i)
270
271 -class ParseResults(object):
272 """Structured parse results, to provide multiple means of access to the parsed data: 273 - as a list (C{len(results)}) 274 - by list index (C{results[0], results[1]}, etc.) 275 - by attribute (C{results.<resultsName>}) 276 """
277 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
278 if isinstance(toklist, cls): 279 return toklist 280 retobj = object.__new__(cls) 281 retobj.__doinit = True 282 return retobj
283 284 # Performance tuning: we construct a *lot* of these, so keep this 285 # constructor as small and fast as possible
286 - def __init__( self, toklist, name=None, asList=True, modal=True, isinstance=isinstance ):
287 if self.__doinit: 288 self.__doinit = False 289 self.__name = None 290 self.__parent = None 291 self.__accumNames = {} 292 if isinstance(toklist, list): 293 self.__toklist = toklist[:] 294 elif isinstance(toklist, _generatorType): 295 self.__toklist = list(toklist) 296 else: 297 self.__toklist = [toklist] 298 self.__tokdict = dict() 299 300 if name is not None and name: 301 if not modal: 302 self.__accumNames[name] = 0 303 if isinstance(name,int): 304 name = _ustr(name) # will always return a str, but use _ustr for consistency 305 self.__name = name 306 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 307 if isinstance(toklist,basestring): 308 toklist = [ toklist ] 309 if asList: 310 if isinstance(toklist,ParseResults): 311 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 312 else: 313 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 314 self[name].__name = name 315 else: 316 try: 317 self[name] = toklist[0] 318 except (KeyError,TypeError,IndexError): 319 self[name] = toklist
320
321 - def __getitem__( self, i ):
322 if isinstance( i, (int,slice) ): 323 return self.__toklist[i] 324 else: 325 if i not in self.__accumNames: 326 return self.__tokdict[i][-1][0] 327 else: 328 return ParseResults([ v[0] for v in self.__tokdict[i] ])
329
330 - def __setitem__( self, k, v, isinstance=isinstance ):
331 if isinstance(v,_ParseResultsWithOffset): 332 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 333 sub = v[0] 334 elif isinstance(k,int): 335 self.__toklist[k] = v 336 sub = v 337 else: 338 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 339 sub = v 340 if isinstance(sub,ParseResults): 341 sub.__parent = wkref(self)
342
343 - def __delitem__( self, i ):
344 if isinstance(i,(int,slice)): 345 mylen = len( self.__toklist ) 346 del self.__toklist[i] 347 348 # convert int to slice 349 if isinstance(i, int): 350 if i < 0: 351 i += mylen 352 i = slice(i, i+1) 353 # get removed indices 354 removed = list(range(*i.indices(mylen))) 355 removed.reverse() 356 # fixup indices in token dictionary 357 #~ for name in self.__tokdict: 358 #~ occurrences = self.__tokdict[name] 359 #~ for j in removed: 360 #~ for k, (value, position) in enumerate(occurrences): 361 #~ occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 362 for name,occurrences in self.__tokdict.items(): 363 for j in removed: 364 for k, (value, position) in enumerate(occurrences): 365 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 366 else: 367 del self.__tokdict[i]
368
369 - def __contains__( self, k ):
370 return k in self.__tokdict
371
372 - def __len__( self ): return len( self.__toklist )
373 - def __bool__(self): return len( self.__toklist ) > 0
374 __nonzero__ = __bool__
375 - def __iter__( self ): return iter( self.__toklist )
376 - def __reversed__( self ): return iter( self.__toklist[::-1] )
377 - def iterkeys( self ):
378 """Returns all named result keys.""" 379 if hasattr(self.__tokdict, "iterkeys"): 380 return self.__tokdict.iterkeys() 381 else: 382 return iter(self.__tokdict)
383
384 - def itervalues( self ):
385 """Returns all named result values.""" 386 return (self[k] for k in self.iterkeys())
387
388 - def iteritems( self ):
389 return ((k, self[k]) for k in self.iterkeys())
390 391 if PY_3: 392 keys = iterkeys 393 values = itervalues 394 items = iteritems 395 else:
396 - def keys( self ):
397 """Returns all named result keys.""" 398 return list(self.iterkeys())
399
400 - def values( self ):
401 """Returns all named result values.""" 402 return list(self.itervalues())
403
404 - def items( self ):
405 """Returns all named result keys and values as a list of tuples.""" 406 return list(self.iteritems())
407
408 - def haskeys( self ):
409 """Since keys() returns an iterator, this method is helpful in bypassing 410 code that looks for the existence of any defined results names.""" 411 return bool(self.__tokdict)
412
413 - def pop( self, *args, **kwargs):
414 """Removes and returns item at specified index (default=last). 415 Supports both list and dict semantics for pop(). If passed no 416 argument or an integer argument, it will use list semantics 417 and pop tokens from the list of parsed tokens. If passed a 418 non-integer argument (most likely a string), it will use dict 419 semantics and pop the corresponding value from any defined 420 results names. A second default return value argument is 421 supported, just as in dict.pop().""" 422 if not args: 423 args = [-1] 424 for k,v in kwargs.items(): 425 if k == 'default': 426 args = (args[0], v) 427 else: 428 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 429 if (isinstance(args[0], int) or 430 len(args) == 1 or 431 args[0] in self): 432 index = args[0] 433 ret = self[index] 434 del self[index] 435 return ret 436 else: 437 defaultvalue = args[1] 438 return defaultvalue
439
440 - def get(self, key, defaultValue=None):
441 """Returns named result matching the given key, or if there is no 442 such name, then returns the given C{defaultValue} or C{None} if no 443 C{defaultValue} is specified.""" 444 if key in self: 445 return self[key] 446 else: 447 return defaultValue
448
449 - def insert( self, index, insStr ):
450 """Inserts new element at location index in the list of parsed tokens.""" 451 self.__toklist.insert(index, insStr) 452 # fixup indices in token dictionary 453 #~ for name in self.__tokdict: 454 #~ occurrences = self.__tokdict[name] 455 #~ for k, (value, position) in enumerate(occurrences): 456 #~ occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 457 for name,occurrences in self.__tokdict.items(): 458 for k, (value, position) in enumerate(occurrences): 459 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
460
461 - def append( self, item ):
462 """Add single element to end of ParseResults list of elements.""" 463 self.__toklist.append(item)
464
465 - def extend( self, itemseq ):
466 """Add sequence of elements to end of ParseResults list of elements.""" 467 if isinstance(itemseq, ParseResults): 468 self += itemseq 469 else: 470 self.__toklist.extend(itemseq)
471
472 - def clear( self ):
473 """Clear all elements and results names.""" 474 del self.__toklist[:] 475 self.__tokdict.clear()
476
477 - def __getattr__( self, name ):
478 try: 479 return self[name] 480 except KeyError: 481 return "" 482 483 if name in self.__tokdict: 484 if name not in self.__accumNames: 485 return self.__tokdict[name][-1][0] 486 else: 487 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 488 else: 489 return ""
490
491 - def __add__( self, other ):
492 ret = self.copy() 493 ret += other 494 return ret
495
496 - def __iadd__( self, other ):
497 if other.__tokdict: 498 offset = len(self.__toklist) 499 addoffset = lambda a: offset if a<0 else a+offset 500 otheritems = other.__tokdict.items() 501 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 502 for (k,vlist) in otheritems for v in vlist] 503 for k,v in otherdictitems: 504 self[k] = v 505 if isinstance(v[0],ParseResults): 506 v[0].__parent = wkref(self) 507 508 self.__toklist += other.__toklist 509 self.__accumNames.update( other.__accumNames ) 510 return self
511
512 - def __radd__(self, other):
513 if isinstance(other,int) and other == 0: 514 return self.copy()
515
516 - def __repr__( self ):
517 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
518
519 - def __str__( self ):
520 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
521
522 - def _asStringList( self, sep='' ):
523 out = [] 524 for item in self.__toklist: 525 if out and sep: 526 out.append(sep) 527 if isinstance( item, ParseResults ): 528 out += item._asStringList() 529 else: 530 out.append( _ustr(item) ) 531 return out
532
533 - def asList( self ):
534 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 535 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
536
537 - def asDict( self ):
538 """Returns the named parse results as dictionary.""" 539 if PY_3: 540 return dict( self.items() ) 541 else: 542 return dict( self.iteritems() )
543
544 - def copy( self ):
545 """Returns a new copy of a C{ParseResults} object.""" 546 ret = ParseResults( self.__toklist ) 547 ret.__tokdict = self.__tokdict.copy() 548 ret.__parent = self.__parent 549 ret.__accumNames.update( self.__accumNames ) 550 ret.__name = self.__name 551 return ret
552
553 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
554 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 555 nl = "\n" 556 out = [] 557 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 558 for v in vlist) 559 nextLevelIndent = indent + " " 560 561 # collapse out indents if formatting is not desired 562 if not formatted: 563 indent = "" 564 nextLevelIndent = "" 565 nl = "" 566 567 selfTag = None 568 if doctag is not None: 569 selfTag = doctag 570 else: 571 if self.__name: 572 selfTag = self.__name 573 574 if not selfTag: 575 if namedItemsOnly: 576 return "" 577 else: 578 selfTag = "ITEM" 579 580 out += [ nl, indent, "<", selfTag, ">" ] 581 582 for i,res in enumerate(self.__toklist): 583 if isinstance(res,ParseResults): 584 if i in namedItems: 585 out += [ res.asXML(namedItems[i], 586 namedItemsOnly and doctag is None, 587 nextLevelIndent, 588 formatted)] 589 else: 590 out += [ res.asXML(None, 591 namedItemsOnly and doctag is None, 592 nextLevelIndent, 593 formatted)] 594 else: 595 # individual token, see if there is a name for it 596 resTag = None 597 if i in namedItems: 598 resTag = namedItems[i] 599 if not resTag: 600 if namedItemsOnly: 601 continue 602 else: 603 resTag = "ITEM" 604 xmlBodyText = _xml_escape(_ustr(res)) 605 out += [ nl, nextLevelIndent, "<", resTag, ">", 606 xmlBodyText, 607 "</", resTag, ">" ] 608 609 out += [ nl, indent, "</", selfTag, ">" ] 610 return "".join(out)
611
612 - def __lookup(self,sub):
613 for k,vlist in self.__tokdict.items(): 614 for v,loc in vlist: 615 if sub is v: 616 return k 617 return None
618
619 - def getName(self):
620 """Returns the results name for this token expression.""" 621 if self.__name: 622 return self.__name 623 elif self.__parent: 624 par = self.__parent() 625 if par: 626 return par.__lookup(self) 627 else: 628 return None 629 elif (len(self) == 1 and 630 len(self.__tokdict) == 1 and 631 self.__tokdict.values()[0][0][1] in (0,-1)): 632 return self.__tokdict.keys()[0] 633 else: 634 return None
635
636 - def dump(self,indent='',depth=0):
637 """Diagnostic method for listing out the contents of a C{ParseResults}. 638 Accepts an optional C{indent} argument so that this string can be embedded 639 in a nested display of other data.""" 640 out = [] 641 NL = '\n' 642 out.append( indent+_ustr(self.asList()) ) 643 if self.haskeys(): 644 items = sorted(self.items()) 645 for k,v in items: 646 if out: 647 out.append(NL) 648 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 649 if isinstance(v,ParseResults): 650 if v: 651 out.append( v.dump(indent,depth+1) ) 652 else: 653 out.append(_ustr(v)) 654 else: 655 out.append(_ustr(v)) 656 elif any(isinstance(vv,ParseResults) for vv in self): 657 v = self 658 for i,vv in enumerate(v): 659 if isinstance(vv,ParseResults): 660 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 661 else: 662 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 663 664 return "".join(out)
665
666 - def pprint(self, *args, **kwargs):
667 """Pretty-printer for parsed results as a list, using the C{pprint} module. 668 Accepts additional positional or keyword args as defined for the 669 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})""" 670 pprint.pprint(self.asList(), *args, **kwargs)
671 672 # add support for pickle protocol
673 - def __getstate__(self):
674 return ( self.__toklist, 675 ( self.__tokdict.copy(), 676 self.__parent is not None and self.__parent() or None, 677 self.__accumNames, 678 self.__name ) )
679
680 - def __setstate__(self,state):
681 self.__toklist = state[0] 682 (self.__tokdict, 683 par, 684 inAccumNames, 685 self.__name) = state[1] 686 self.__accumNames = {} 687 self.__accumNames.update(inAccumNames) 688 if par is not None: 689 self.__parent = wkref(par) 690 else: 691 self.__parent = None
692
693 - def __dir__(self):
694 return dir(super(ParseResults,self)) + list(self.keys())
695 696 collections.MutableMapping.register(ParseResults)
697 698 -def col (loc,strg):
699 """Returns current column within a string, counting newlines as line separators. 700 The first column is number 1. 701 702 Note: the default parsing behavior is to expand tabs in the input string 703 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 704 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 705 consistent view of the parsed string, the parse location, and line and column 706 positions within the parsed string. 707 """ 708 s = strg 709 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
710
711 -def lineno(loc,strg):
712 """Returns current line number within a string, counting newlines as line separators. 713 The first line is number 1. 714 715 Note: the default parsing behavior is to expand tabs in the input string 716 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 717 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 718 consistent view of the parsed string, the parse location, and line and column 719 positions within the parsed string. 720 """ 721 return strg.count("\n",0,loc) + 1
722
723 -def line( loc, strg ):
724 """Returns the line of text containing loc within a string, counting newlines as line separators. 725 """ 726 lastCR = strg.rfind("\n", 0, loc) 727 nextCR = strg.find("\n", loc) 728 if nextCR >= 0: 729 return strg[lastCR+1:nextCR] 730 else: 731 return strg[lastCR+1:]
732
733 -def _defaultStartDebugAction( instring, loc, expr ):
734 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
735
736 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
737 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
738
739 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
740 print ("Exception raised:" + _ustr(exc))
741
742 -def nullDebugAction(*args):
743 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 744 pass
745 746 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 747 #~ 'decorator to trim function calls to match the arity of the target' 748 #~ def _trim_arity(func, maxargs=3): 749 #~ if func in singleArgBuiltins: 750 #~ return lambda s,l,t: func(t) 751 #~ limit = 0 752 #~ foundArity = False 753 #~ def wrapper(*args): 754 #~ nonlocal limit,foundArity 755 #~ while 1: 756 #~ try: 757 #~ ret = func(*args[limit:]) 758 #~ foundArity = True 759 #~ return ret 760 #~ except TypeError: 761 #~ if limit == maxargs or foundArity: 762 #~ raise 763 #~ limit += 1 764 #~ continue 765 #~ return wrapper 766 767 # this version is Python 2.x-3.x cross-compatible 768 'decorator to trim function calls to match the arity of the target'
769 -def _trim_arity(func, maxargs=2):
770 if func in singleArgBuiltins: 771 return lambda s,l,t: func(t) 772 limit = [0] 773 foundArity = [False] 774 def wrapper(*args): 775 while 1: 776 try: 777 ret = func(*args[limit[0]:]) 778 foundArity[0] = True 779 return ret 780 except TypeError: 781 if limit[0] <= maxargs and not foundArity[0]: 782 limit[0] += 1 783 continue 784 raise
785 return wrapper 786
787 -class ParserElement(object):
788 """Abstract base level parser element class.""" 789 DEFAULT_WHITE_CHARS = " \n\t\r" 790 verbose_stacktrace = False 791 792 @staticmethod
793 - def setDefaultWhitespaceChars( chars ):
794 """Overrides the default whitespace chars 795 """ 796 ParserElement.DEFAULT_WHITE_CHARS = chars
797 798 @staticmethod
799 - def inlineLiteralsUsing(cls):
800 """ 801 Set class to be used for inclusion of string literals into a parser. 802 """ 803 ParserElement.literalStringClass = cls
804
805 - def __init__( self, savelist=False ):
806 self.parseAction = list() 807 self.failAction = None 808 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 809 self.strRepr = None 810 self.resultsName = None 811 self.saveAsList = savelist 812 self.skipWhitespace = True 813 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 814 self.copyDefaultWhiteChars = True 815 self.mayReturnEmpty = False # used when checking for left-recursion 816 self.keepTabs = False 817 self.ignoreExprs = list() 818 self.debug = False 819 self.streamlined = False 820 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 821 self.errmsg = "" 822 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 823 self.debugActions = ( None, None, None ) #custom debug actions 824 self.re = None 825 self.callPreparse = True # used to avoid redundant calls to preParse 826 self.callDuringTry = False
827
828 - def copy( self ):
829 """Make a copy of this C{ParserElement}. Useful for defining different parse actions 830 for the same parsing pattern, using copies of the original parse element.""" 831 cpy = copy.copy( self ) 832 cpy.parseAction = self.parseAction[:] 833 cpy.ignoreExprs = self.ignoreExprs[:] 834 if self.copyDefaultWhiteChars: 835 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 836 return cpy
837
838 - def setName( self, name ):
839 """Define name for this expression, for use in debugging.""" 840 self.name = name 841 self.errmsg = "Expected " + self.name 842 if hasattr(self,"exception"): 843 self.exception.msg = self.errmsg 844 return self
845
846 - def setResultsName( self, name, listAllMatches=False ):
847 """Define name for referencing matching tokens as a nested attribute 848 of the returned parse results. 849 NOTE: this returns a *copy* of the original C{ParserElement} object; 850 this is so that the client can define a basic element, such as an 851 integer, and reference it in multiple places with different names. 852 853 You can also set results names using the abbreviated syntax, 854 C{expr("name")} in place of C{expr.setResultsName("name")} - 855 see L{I{__call__}<__call__>}. 856 """ 857 newself = self.copy() 858 if name.endswith("*"): 859 name = name[:-1] 860 listAllMatches=True 861 newself.resultsName = name 862 newself.modalResults = not listAllMatches 863 return newself
864
865 - def setBreak(self,breakFlag = True):
866 """Method to invoke the Python pdb debugger when this element is 867 about to be parsed. Set C{breakFlag} to True to enable, False to 868 disable. 869 """ 870 if breakFlag: 871 _parseMethod = self._parse 872 def breaker(instring, loc, doActions=True, callPreParse=True): 873 import pdb 874 pdb.set_trace() 875 return _parseMethod( instring, loc, doActions, callPreParse )
876 breaker._originalParseMethod = _parseMethod 877 self._parse = breaker 878 else: 879 if hasattr(self._parse,"_originalParseMethod"): 880 self._parse = self._parse._originalParseMethod 881 return self
882
883 - def setParseAction( self, *fns, **kwargs ):
884 """Define action to perform when successfully matching parse element definition. 885 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 886 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 887 - s = the original string being parsed (see note below) 888 - loc = the location of the matching substring 889 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 890 If the functions in fns modify the tokens, they can return them as the return 891 value from fn, and the modified list of tokens will replace the original. 892 Otherwise, fn does not need to return any value. 893 894 Note: the default parsing behavior is to expand tabs in the input string 895 before starting the parsing process. See L{I{parseString}<parseString>} for more information 896 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 897 consistent view of the parsed string, the parse location, and line and column 898 positions within the parsed string. 899 """ 900 self.parseAction = list(map(_trim_arity, list(fns))) 901 self.callDuringTry = kwargs.get("callDuringTry", False) 902 return self
903
904 - def addParseAction( self, *fns, **kwargs ):
905 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 906 self.parseAction += list(map(_trim_arity, list(fns))) 907 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 908 return self
909
910 - def addCondition(self, *fns, **kwargs):
911 """Add a boolean predicate function to expression's list of parse actions. See 912 L{I{setParseAction}<setParseAction>}. Optional keyword argument C{message} can 913 be used to define a custom message to be used in the raised exception.""" 914 msg = kwargs.get("message") or "failed user-defined condition" 915 for fn in fns: 916 def pa(s,l,t): 917 if not bool(_trim_arity(fn)(s,l,t)): 918 raise ParseException(s,l,msg) 919 return t
920 self.parseAction.append(pa) 921 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 922 return self 923
924 - def setFailAction( self, fn ):
925 """Define action to perform if parsing fails at this expression. 926 Fail acton fn is a callable function that takes the arguments 927 C{fn(s,loc,expr,err)} where: 928 - s = string being parsed 929 - loc = location where expression match was attempted and failed 930 - expr = the parse expression that failed 931 - err = the exception thrown 932 The function returns no value. It may throw C{L{ParseFatalException}} 933 if it is desired to stop parsing immediately.""" 934 self.failAction = fn 935 return self
936
937 - def _skipIgnorables( self, instring, loc ):
938 exprsFound = True 939 while exprsFound: 940 exprsFound = False 941 for e in self.ignoreExprs: 942 try: 943 while 1: 944 loc,dummy = e._parse( instring, loc ) 945 exprsFound = True 946 except ParseException: 947 pass 948 return loc
949
950 - def preParse( self, instring, loc ):
951 if self.ignoreExprs: 952 loc = self._skipIgnorables( instring, loc ) 953 954 if self.skipWhitespace: 955 wt = self.whiteChars 956 instrlen = len(instring) 957 while loc < instrlen and instring[loc] in wt: 958 loc += 1 959 960 return loc
961
962 - def parseImpl( self, instring, loc, doActions=True ):
963 return loc, []
964
965 - def postParse( self, instring, loc, tokenlist ):
966 return tokenlist
967 968 #~ @profile
969 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
970 debugging = ( self.debug ) #and doActions ) 971 972 if debugging or self.failAction: 973 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 974 if (self.debugActions[0] ): 975 self.debugActions[0]( instring, loc, self ) 976 if callPreParse and self.callPreparse: 977 preloc = self.preParse( instring, loc ) 978 else: 979 preloc = loc 980 tokensStart = preloc 981 try: 982 try: 983 loc,tokens = self.parseImpl( instring, preloc, doActions ) 984 except IndexError: 985 raise ParseException( instring, len(instring), self.errmsg, self ) 986 except ParseBaseException as err: 987 #~ print ("Exception raised:", err) 988 if self.debugActions[2]: 989 self.debugActions[2]( instring, tokensStart, self, err ) 990 if self.failAction: 991 self.failAction( instring, tokensStart, self, err ) 992 raise 993 else: 994 if callPreParse and self.callPreparse: 995 preloc = self.preParse( instring, loc ) 996 else: 997 preloc = loc 998 tokensStart = preloc 999 if self.mayIndexError or loc >= len(instring): 1000 try: 1001 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1002 except IndexError: 1003 raise ParseException( instring, len(instring), self.errmsg, self ) 1004 else: 1005 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1006 1007 tokens = self.postParse( instring, loc, tokens ) 1008 1009 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1010 if self.parseAction and (doActions or self.callDuringTry): 1011 if debugging: 1012 try: 1013 for fn in self.parseAction: 1014 tokens = fn( instring, tokensStart, retTokens ) 1015 if tokens is not None: 1016 retTokens = ParseResults( tokens, 1017 self.resultsName, 1018 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1019 modal=self.modalResults ) 1020 except ParseBaseException as err: 1021 #~ print "Exception raised in user parse action:", err 1022 if (self.debugActions[2] ): 1023 self.debugActions[2]( instring, tokensStart, self, err ) 1024 raise 1025 else: 1026 for fn in self.parseAction: 1027 tokens = fn( instring, tokensStart, retTokens ) 1028 if tokens is not None: 1029 retTokens = ParseResults( tokens, 1030 self.resultsName, 1031 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1032 modal=self.modalResults ) 1033 1034 if debugging: 1035 #~ print ("Matched",self,"->",retTokens.asList()) 1036 if (self.debugActions[1] ): 1037 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1038 1039 return loc, retTokens
1040
1041 - def tryParse( self, instring, loc ):
1042 try: 1043 return self._parse( instring, loc, doActions=False )[0] 1044 except ParseFatalException: 1045 raise ParseException( instring, loc, self.errmsg, self)
1046 1047 # this method gets repeatedly called during backtracking with the same arguments - 1048 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1049 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1050 lookup = (self,instring,loc,callPreParse,doActions) 1051 if lookup in ParserElement._exprArgCache: 1052 value = ParserElement._exprArgCache[ lookup ] 1053 if isinstance(value, Exception): 1054 raise value 1055 return (value[0],value[1].copy()) 1056 else: 1057 try: 1058 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 1059 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1060 return value 1061 except ParseBaseException as pe: 1062 pe.__traceback__ = None 1063 ParserElement._exprArgCache[ lookup ] = pe 1064 raise
1065 1066 _parse = _parseNoCache 1067 1068 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1069 _exprArgCache = {} 1070 @staticmethod
1071 - def resetCache():
1072 ParserElement._exprArgCache.clear()
1073 1074 _packratEnabled = False 1075 @staticmethod
1076 - def enablePackrat():
1077 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1078 Repeated parse attempts at the same string location (which happens 1079 often in many complex grammars) can immediately return a cached value, 1080 instead of re-executing parsing/validating code. Memoizing is done of 1081 both valid results and parsing exceptions. 1082 1083 This speedup may break existing programs that use parse actions that 1084 have side-effects. For this reason, packrat parsing is disabled when 1085 you first import pyparsing. To activate the packrat feature, your 1086 program must call the class method C{ParserElement.enablePackrat()}. If 1087 your program uses C{psyco} to "compile as you go", you must call 1088 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1089 Python will crash. For best results, call C{enablePackrat()} immediately 1090 after importing pyparsing. 1091 """ 1092 if not ParserElement._packratEnabled: 1093 ParserElement._packratEnabled = True 1094 ParserElement._parse = ParserElement._parseCache
1095
1096 - def parseString( self, instring, parseAll=False ):
1097 """Execute the parse expression with the given string. 1098 This is the main interface to the client code, once the complete 1099 expression has been built. 1100 1101 If you want the grammar to require that the entire input string be 1102 successfully parsed, then set C{parseAll} to True (equivalent to ending 1103 the grammar with C{L{StringEnd()}}). 1104 1105 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1106 in order to report proper column numbers in parse actions. 1107 If the input string contains tabs and 1108 the grammar uses parse actions that use the C{loc} argument to index into the 1109 string being parsed, you can ensure you have a consistent view of the input 1110 string by: 1111 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1112 (see L{I{parseWithTabs}<parseWithTabs>}) 1113 - define your parse action using the full C{(s,loc,toks)} signature, and 1114 reference the input string using the parse action's C{s} argument 1115 - explictly expand the tabs in your input string before calling 1116 C{parseString} 1117 """ 1118 ParserElement.resetCache() 1119 if not self.streamlined: 1120 self.streamline() 1121 #~ self.saveAsList = True 1122 for e in self.ignoreExprs: 1123 e.streamline() 1124 if not self.keepTabs: 1125 instring = instring.expandtabs() 1126 try: 1127 loc, tokens = self._parse( instring, 0 ) 1128 if parseAll: 1129 loc = self.preParse( instring, loc ) 1130 se = Empty() + StringEnd() 1131 se._parse( instring, loc ) 1132 except ParseBaseException as exc: 1133 if ParserElement.verbose_stacktrace: 1134 raise 1135 else: 1136 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1137 raise exc 1138 else: 1139 return tokens
1140
1141 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1142 """Scan the input string for expression matches. Each match will return the 1143 matching tokens, start location, and end location. May be called with optional 1144 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1145 C{overlap} is specified, then overlapping matches will be reported. 1146 1147 Note that the start and end locations are reported relative to the string 1148 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1149 strings with embedded tabs.""" 1150 if not self.streamlined: 1151 self.streamline() 1152 for e in self.ignoreExprs: 1153 e.streamline() 1154 1155 if not self.keepTabs: 1156 instring = _ustr(instring).expandtabs() 1157 instrlen = len(instring) 1158 loc = 0 1159 preparseFn = self.preParse 1160 parseFn = self._parse 1161 ParserElement.resetCache() 1162 matches = 0 1163 try: 1164 while loc <= instrlen and matches < maxMatches: 1165 try: 1166 preloc = preparseFn( instring, loc ) 1167 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1168 except ParseException: 1169 loc = preloc+1 1170 else: 1171 if nextLoc > loc: 1172 matches += 1 1173 yield tokens, preloc, nextLoc 1174 if overlap: 1175 nextloc = preparseFn( instring, loc ) 1176 if nextloc > loc: 1177 loc = nextLoc 1178 else: 1179 loc += 1 1180 else: 1181 loc = nextLoc 1182 else: 1183 loc = preloc+1 1184 except ParseBaseException as exc: 1185 if ParserElement.verbose_stacktrace: 1186 raise 1187 else: 1188 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1189 raise exc
1190
1191 - def transformString( self, instring ):
1192 """Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1193 be returned from a parse action. To use C{transformString}, define a grammar and 1194 attach a parse action to it that modifies the returned token list. 1195 Invoking C{transformString()} on a target string will then scan for matches, 1196 and replace the matched text patterns according to the logic in the parse 1197 action. C{transformString()} returns the resulting transformed string.""" 1198 out = [] 1199 lastE = 0 1200 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1201 # keep string locs straight between transformString and scanString 1202 self.keepTabs = True 1203 try: 1204 for t,s,e in self.scanString( instring ): 1205 out.append( instring[lastE:s] ) 1206 if t: 1207 if isinstance(t,ParseResults): 1208 out += t.asList() 1209 elif isinstance(t,list): 1210 out += t 1211 else: 1212 out.append(t) 1213 lastE = e 1214 out.append(instring[lastE:]) 1215 out = [o for o in out if o] 1216 return "".join(map(_ustr,_flatten(out))) 1217 except ParseBaseException as exc: 1218 if ParserElement.verbose_stacktrace: 1219 raise 1220 else: 1221 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1222 raise exc
1223
1224 - def searchString( self, instring, maxMatches=_MAX_INT ):
1225 """Another extension to C{L{scanString}}, simplifying the access to the tokens found 1226 to match the given parse expression. May be called with optional 1227 C{maxMatches} argument, to clip searching after 'n' matches are found. 1228 """ 1229 try: 1230 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1231 except ParseBaseException as exc: 1232 if ParserElement.verbose_stacktrace: 1233 raise 1234 else: 1235 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1236 raise exc
1237
1238 - def __add__(self, other ):
1239 """Implementation of + operator - returns C{L{And}}""" 1240 if isinstance( other, basestring ): 1241 other = ParserElement.literalStringClass( other ) 1242 if not isinstance( other, ParserElement ): 1243 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1244 SyntaxWarning, stacklevel=2) 1245 return None 1246 return And( [ self, other ] )
1247
1248 - def __radd__(self, other ):
1249 """Implementation of + operator when left operand is not a C{L{ParserElement}}""" 1250 if isinstance( other, basestring ): 1251 other = ParserElement.literalStringClass( other ) 1252 if not isinstance( other, ParserElement ): 1253 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1254 SyntaxWarning, stacklevel=2) 1255 return None 1256 return other + self
1257
1258 - def __sub__(self, other):
1259 """Implementation of - operator, returns C{L{And}} with error stop""" 1260 if isinstance( other, basestring ): 1261 other = ParserElement.literalStringClass( other ) 1262 if not isinstance( other, ParserElement ): 1263 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1264 SyntaxWarning, stacklevel=2) 1265 return None 1266 return And( [ self, And._ErrorStop(), other ] )
1267
1268 - def __rsub__(self, other ):
1269 """Implementation of - operator when left operand is not a C{L{ParserElement}}""" 1270 if isinstance( other, basestring ): 1271 other = ParserElement.literalStringClass( other ) 1272 if not isinstance( other, ParserElement ): 1273 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1274 SyntaxWarning, stacklevel=2) 1275 return None 1276 return other - self
1277
1278 - def __mul__(self,other):
1279 """Implementation of * operator, allows use of C{expr * 3} in place of 1280 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1281 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1282 may also include C{None} as in: 1283 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1284 to C{expr*n + L{ZeroOrMore}(expr)} 1285 (read as "at least n instances of C{expr}") 1286 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1287 (read as "0 to n instances of C{expr}") 1288 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1289 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1290 1291 Note that C{expr*(None,n)} does not raise an exception if 1292 more than n exprs exist in the input stream; that is, 1293 C{expr*(None,n)} does not enforce a maximum number of expr 1294 occurrences. If this behavior is desired, then write 1295 C{expr*(None,n) + ~expr} 1296 1297 """ 1298 if isinstance(other,int): 1299 minElements, optElements = other,0 1300 elif isinstance(other,tuple): 1301 other = (other + (None, None))[:2] 1302 if other[0] is None: 1303 other = (0, other[1]) 1304 if isinstance(other[0],int) and other[1] is None: 1305 if other[0] == 0: 1306 return ZeroOrMore(self) 1307 if other[0] == 1: 1308 return OneOrMore(self) 1309 else: 1310 return self*other[0] + ZeroOrMore(self) 1311 elif isinstance(other[0],int) and isinstance(other[1],int): 1312 minElements, optElements = other 1313 optElements -= minElements 1314 else: 1315 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1316 else: 1317 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1318 1319 if minElements < 0: 1320 raise ValueError("cannot multiply ParserElement by negative value") 1321 if optElements < 0: 1322 raise ValueError("second tuple value must be greater or equal to first tuple value") 1323 if minElements == optElements == 0: 1324 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1325 1326 if (optElements): 1327 def makeOptionalList(n): 1328 if n>1: 1329 return Optional(self + makeOptionalList(n-1)) 1330 else: 1331 return Optional(self)
1332 if minElements: 1333 if minElements == 1: 1334 ret = self + makeOptionalList(optElements) 1335 else: 1336 ret = And([self]*minElements) + makeOptionalList(optElements) 1337 else: 1338 ret = makeOptionalList(optElements) 1339 else: 1340 if minElements == 1: 1341 ret = self 1342 else: 1343 ret = And([self]*minElements) 1344 return ret 1345
1346 - def __rmul__(self, other):
1347 return self.__mul__(other)
1348
1349 - def __or__(self, other ):
1350 """Implementation of | operator - returns C{L{MatchFirst}}""" 1351 if isinstance( other, basestring ): 1352 other = ParserElement.literalStringClass( other ) 1353 if not isinstance( other, ParserElement ): 1354 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1355 SyntaxWarning, stacklevel=2) 1356 return None 1357 return MatchFirst( [ self, other ] )
1358
1359 - def __ror__(self, other ):
1360 """Implementation of | operator when left operand is not a C{L{ParserElement}}""" 1361 if isinstance( other, basestring ): 1362 other = ParserElement.literalStringClass( other ) 1363 if not isinstance( other, ParserElement ): 1364 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1365 SyntaxWarning, stacklevel=2) 1366 return None 1367 return other | self
1368
1369 - def __xor__(self, other ):
1370 """Implementation of ^ operator - returns C{L{Or}}""" 1371 if isinstance( other, basestring ): 1372 other = ParserElement.literalStringClass( other ) 1373 if not isinstance( other, ParserElement ): 1374 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1375 SyntaxWarning, stacklevel=2) 1376 return None 1377 return Or( [ self, other ] )
1378
1379 - def __rxor__(self, other ):
1380 """Implementation of ^ operator when left operand is not a C{L{ParserElement}}""" 1381 if isinstance( other, basestring ): 1382 other = ParserElement.literalStringClass( other ) 1383 if not isinstance( other, ParserElement ): 1384 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1385 SyntaxWarning, stacklevel=2) 1386 return None 1387 return other ^ self
1388
1389 - def __and__(self, other ):
1390 """Implementation of & operator - returns C{L{Each}}""" 1391 if isinstance( other, basestring ): 1392 other = ParserElement.literalStringClass( other ) 1393 if not isinstance( other, ParserElement ): 1394 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1395 SyntaxWarning, stacklevel=2) 1396 return None 1397 return Each( [ self, other ] )
1398
1399 - def __rand__(self, other ):
1400 """Implementation of & operator when left operand is not a C{L{ParserElement}}""" 1401 if isinstance( other, basestring ): 1402 other = ParserElement.literalStringClass( other ) 1403 if not isinstance( other, ParserElement ): 1404 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1405 SyntaxWarning, stacklevel=2) 1406 return None 1407 return other & self
1408
1409 - def __invert__( self ):
1410 """Implementation of ~ operator - returns C{L{NotAny}}""" 1411 return NotAny( self )
1412
1413 - def __call__(self, name=None):
1414 """Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}:: 1415 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1416 could be written as:: 1417 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1418 1419 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1420 passed as C{True}. 1421 1422 If C{name} is omitted, same as calling C{L{copy}}. 1423 """ 1424 if name is not None: 1425 return self.setResultsName(name) 1426 else: 1427 return self.copy()
1428
1429 - def suppress( self ):
1430 """Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1431 cluttering up returned output. 1432 """ 1433 return Suppress( self )
1434
1435 - def leaveWhitespace( self ):
1436 """Disables the skipping of whitespace before matching the characters in the 1437 C{ParserElement}'s defined pattern. This is normally only used internally by 1438 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1439 """ 1440 self.skipWhitespace = False 1441 return self
1442
1443 - def setWhitespaceChars( self, chars ):
1444 """Overrides the default whitespace chars 1445 """ 1446 self.skipWhitespace = True 1447 self.whiteChars = chars 1448 self.copyDefaultWhiteChars = False 1449 return self
1450
1451 - def parseWithTabs( self ):
1452 """Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 1453 Must be called before C{parseString} when the input grammar contains elements that 1454 match C{<TAB>} characters.""" 1455 self.keepTabs = True 1456 return self
1457
1458 - def ignore( self, other ):
1459 """Define expression to be ignored (e.g., comments) while doing pattern 1460 matching; may be called repeatedly, to define multiple comment or other 1461 ignorable patterns. 1462 """ 1463 if isinstance( other, Suppress ): 1464 if other not in self.ignoreExprs: 1465 self.ignoreExprs.append( other.copy() ) 1466 else: 1467 self.ignoreExprs.append( Suppress( other.copy() ) ) 1468 return self
1469
1470 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1471 """Enable display of debugging messages while doing pattern matching.""" 1472 self.debugActions = (startAction or _defaultStartDebugAction, 1473 successAction or _defaultSuccessDebugAction, 1474 exceptionAction or _defaultExceptionDebugAction) 1475 self.debug = True 1476 return self
1477
1478 - def setDebug( self, flag=True ):
1479 """Enable display of debugging messages while doing pattern matching. 1480 Set C{flag} to True to enable, False to disable.""" 1481 if flag: 1482 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1483 else: 1484 self.debug = False 1485 return self
1486
1487 - def __str__( self ):
1488 return self.name
1489
1490 - def __repr__( self ):
1491 return _ustr(self)
1492
1493 - def streamline( self ):
1494 self.streamlined = True 1495 self.strRepr = None 1496 return self
1497
1498 - def checkRecursion( self, parseElementList ):
1499 pass
1500
1501 - def validate( self, validateTrace=[] ):
1502 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1503 self.checkRecursion( [] )
1504
1505 - def parseFile( self, file_or_filename, parseAll=False ):
1506 """Execute the parse expression on the given file or filename. 1507 If a filename is specified (instead of a file object), 1508 the entire file is opened, read, and closed before parsing. 1509 """ 1510 try: 1511 file_contents = file_or_filename.read() 1512 except AttributeError: 1513 f = open(file_or_filename, "r") 1514 file_contents = f.read() 1515 f.close() 1516 try: 1517 return self.parseString(file_contents, parseAll) 1518 except ParseBaseException as exc: 1519 if ParserElement.verbose_stacktrace: 1520 raise 1521 else: 1522 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1523 raise exc
1524
1525 - def __eq__(self,other):
1526 if isinstance(other, ParserElement): 1527 return self is other or self.__dict__ == other.__dict__ 1528 elif isinstance(other, basestring): 1529 try: 1530 self.parseString(_ustr(other), parseAll=True) 1531 return True 1532 except ParseBaseException: 1533 return False 1534 else: 1535 return super(ParserElement,self)==other
1536
1537 - def __ne__(self,other):
1538 return not (self == other)
1539
1540 - def __hash__(self):
1541 return hash(id(self))
1542
1543 - def __req__(self,other):
1544 return self == other
1545
1546 - def __rne__(self,other):
1547 return not (self == other)
1548
1549 - def runTests(self, tests, parseAll=False):
1550 """Execute the parse expression on a series of test strings, showing each 1551 test, the parsed results or where the parse failed. Quick and easy way to 1552 run a parse expression against a list of sample strings. 1553 1554 Parameters: 1555 - tests - a list of separate test strings, or a multiline string of test strings 1556 - parseAll - (default=False) - flag to pass to C{L{parseString}} when running tests 1557 """ 1558 if isinstance(tests, basestring): 1559 tests = map(str.strip, tests.splitlines()) 1560 for t in tests: 1561 out = [t] 1562 try: 1563 out.append(self.parseString(t, parseAll=parseAll).dump()) 1564 except ParseException as pe: 1565 if '\n' in t: 1566 out.append(line(pe.loc, t)) 1567 out.append(' '*(col(pe.loc,t)-1) + '^') 1568 else: 1569 out.append(' '*pe.loc + '^') 1570 out.append(str(pe)) 1571 out.append('') 1572 print('\n'.join(out))
1573
1574 1575 -class Token(ParserElement):
1576 """Abstract C{ParserElement} subclass, for defining atomic matching patterns."""
1577 - def __init__( self ):
1578 super(Token,self).__init__( savelist=False )
1579
1580 1581 -class Empty(Token):
1582 """An empty token, will always match."""
1583 - def __init__( self ):
1584 super(Empty,self).__init__() 1585 self.name = "Empty" 1586 self.mayReturnEmpty = True 1587 self.mayIndexError = False
1588
1589 1590 -class NoMatch(Token):
1591 """A token that will never match."""
1592 - def __init__( self ):
1593 super(NoMatch,self).__init__() 1594 self.name = "NoMatch" 1595 self.mayReturnEmpty = True 1596 self.mayIndexError = False 1597 self.errmsg = "Unmatchable token"
1598
1599 - def parseImpl( self, instring, loc, doActions=True ):
1600 raise ParseException(instring, loc, self.errmsg, self)
1601
1602 1603 -class Literal(Token):
1604 """Token to exactly match a specified string."""
1605 - def __init__( self, matchString ):
1606 super(Literal,self).__init__() 1607 self.match = matchString 1608 self.matchLen = len(matchString) 1609 try: 1610 self.firstMatchChar = matchString[0] 1611 except IndexError: 1612 warnings.warn("null string passed to Literal; use Empty() instead", 1613 SyntaxWarning, stacklevel=2) 1614 self.__class__ = Empty 1615 self.name = '"%s"' % _ustr(self.match) 1616 self.errmsg = "Expected " + self.name 1617 self.mayReturnEmpty = False 1618 self.mayIndexError = False
1619 1620 # Performance tuning: this routine gets called a *lot* 1621 # if this is a single character match string and the first character matches, 1622 # short-circuit as quickly as possible, and avoid calling startswith 1623 #~ @profile
1624 - def parseImpl( self, instring, loc, doActions=True ):
1625 if (instring[loc] == self.firstMatchChar and 1626 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1627 return loc+self.matchLen, self.match 1628 raise ParseException(instring, loc, self.errmsg, self)
1629 _L = Literal 1630 ParserElement.literalStringClass = Literal
1631 1632 -class Keyword(Token):
1633 """Token to exactly match a specified string as a keyword, that is, it must be 1634 immediately followed by a non-keyword character. Compare with C{L{Literal}}:: 1635 Literal("if") will match the leading C{'if'} in C{'ifAndOnlyIf'}. 1636 Keyword("if") will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 1637 Accepts two optional constructor arguments in addition to the keyword string: 1638 C{identChars} is a string of characters that would be valid identifier characters, 1639 defaulting to all alphanumerics + "_" and "$"; C{caseless} allows case-insensitive 1640 matching, default is C{False}. 1641 """ 1642 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1643
1644 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1645 super(Keyword,self).__init__() 1646 self.match = matchString 1647 self.matchLen = len(matchString) 1648 try: 1649 self.firstMatchChar = matchString[0] 1650 except IndexError: 1651 warnings.warn("null string passed to Keyword; use Empty() instead", 1652 SyntaxWarning, stacklevel=2) 1653 self.name = '"%s"' % self.match 1654 self.errmsg = "Expected " + self.name 1655 self.mayReturnEmpty = False 1656 self.mayIndexError = False 1657 self.caseless = caseless 1658 if caseless: 1659 self.caselessmatch = matchString.upper() 1660 identChars = identChars.upper() 1661 self.identChars = set(identChars)
1662
1663 - def parseImpl( self, instring, loc, doActions=True ):
1664 if self.caseless: 1665 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1666 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1667 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1668 return loc+self.matchLen, self.match 1669 else: 1670 if (instring[loc] == self.firstMatchChar and 1671 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1672 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1673 (loc == 0 or instring[loc-1] not in self.identChars) ): 1674 return loc+self.matchLen, self.match 1675 raise ParseException(instring, loc, self.errmsg, self)
1676
1677 - def copy(self):
1678 c = super(Keyword,self).copy() 1679 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1680 return c
1681 1682 @staticmethod
1683 - def setDefaultKeywordChars( chars ):
1684 """Overrides the default Keyword chars 1685 """ 1686 Keyword.DEFAULT_KEYWORD_CHARS = chars
1687
1688 -class CaselessLiteral(Literal):
1689 """Token to match a specified string, ignoring case of letters. 1690 Note: the matched results will always be in the case of the given 1691 match string, NOT the case of the input text. 1692 """
1693 - def __init__( self, matchString ):
1694 super(CaselessLiteral,self).__init__( matchString.upper() ) 1695 # Preserve the defining literal. 1696 self.returnString = matchString 1697 self.name = "'%s'" % self.returnString 1698 self.errmsg = "Expected " + self.name
1699
1700 - def parseImpl( self, instring, loc, doActions=True ):
1701 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1702 return loc+self.matchLen, self.returnString 1703 raise ParseException(instring, loc, self.errmsg, self)
1704
1705 -class CaselessKeyword(Keyword):
1706 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1707 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1708
1709 - def parseImpl( self, instring, loc, doActions=True ):
1710 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1711 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1712 return loc+self.matchLen, self.match 1713 raise ParseException(instring, loc, self.errmsg, self)
1714
1715 -class Word(Token):
1716 """Token for matching words composed of allowed character sets. 1717 Defined with string containing all allowed initial characters, 1718 an optional string containing allowed body characters (if omitted, 1719 defaults to the initial character set), and an optional minimum, 1720 maximum, and/or exact length. The default value for C{min} is 1 (a 1721 minimum value < 1 is not valid); the default values for C{max} and C{exact} 1722 are 0, meaning no maximum or exact length restriction. An optional 1723 C{exclude} parameter can list characters that might be found in 1724 the input C{bodyChars} string; useful to define a word of all printables 1725 except for one or two characters, for instance. 1726 """
1727 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
1728 super(Word,self).__init__() 1729 if excludeChars: 1730 initChars = ''.join(c for c in initChars if c not in excludeChars) 1731 if bodyChars: 1732 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 1733 self.initCharsOrig = initChars 1734 self.initChars = set(initChars) 1735 if bodyChars : 1736 self.bodyCharsOrig = bodyChars 1737 self.bodyChars = set(bodyChars) 1738 else: 1739 self.bodyCharsOrig = initChars 1740 self.bodyChars = set(initChars) 1741 1742 self.maxSpecified = max > 0 1743 1744 if min < 1: 1745 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1746 1747 self.minLen = min 1748 1749 if max > 0: 1750 self.maxLen = max 1751 else: 1752 self.maxLen = _MAX_INT 1753 1754 if exact > 0: 1755 self.maxLen = exact 1756 self.minLen = exact 1757 1758 self.name = _ustr(self) 1759 self.errmsg = "Expected " + self.name 1760 self.mayIndexError = False 1761 self.asKeyword = asKeyword 1762 1763 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1764 if self.bodyCharsOrig == self.initCharsOrig: 1765 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1766 elif len(self.initCharsOrig) == 1: 1767 self.reString = "%s[%s]*" % \ 1768 (re.escape(self.initCharsOrig), 1769 _escapeRegexRangeChars(self.bodyCharsOrig),) 1770 else: 1771 self.reString = "[%s][%s]*" % \ 1772 (_escapeRegexRangeChars(self.initCharsOrig), 1773 _escapeRegexRangeChars(self.bodyCharsOrig),) 1774 if self.asKeyword: 1775 self.reString = r"\b"+self.reString+r"\b" 1776 try: 1777 self.re = re.compile( self.reString ) 1778 except: 1779 self.re = None
1780
1781 - def parseImpl( self, instring, loc, doActions=True ):
1782 if self.re: 1783 result = self.re.match(instring,loc) 1784 if not result: 1785 raise ParseException(instring, loc, self.errmsg, self) 1786 1787 loc = result.end() 1788 return loc, result.group() 1789 1790 if not(instring[ loc ] in self.initChars): 1791 raise ParseException(instring, loc, self.errmsg, self) 1792 1793 start = loc 1794 loc += 1 1795 instrlen = len(instring) 1796 bodychars = self.bodyChars 1797 maxloc = start + self.maxLen 1798 maxloc = min( maxloc, instrlen ) 1799 while loc < maxloc and instring[loc] in bodychars: 1800 loc += 1 1801 1802 throwException = False 1803 if loc - start < self.minLen: 1804 throwException = True 1805 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1806 throwException = True 1807 if self.asKeyword: 1808 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1809 throwException = True 1810 1811 if throwException: 1812 raise ParseException(instring, loc, self.errmsg, self) 1813 1814 return loc, instring[start:loc]
1815
1816 - def __str__( self ):
1817 try: 1818 return super(Word,self).__str__() 1819 except: 1820 pass 1821 1822 1823 if self.strRepr is None: 1824 1825 def charsAsStr(s): 1826 if len(s)>4: 1827 return s[:4]+"..." 1828 else: 1829 return s
1830 1831 if ( self.initCharsOrig != self.bodyCharsOrig ): 1832 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1833 else: 1834 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1835 1836 return self.strRepr
1837
1838 1839 -class Regex(Token):
1840 """Token for matching strings that match a given regular expression. 1841 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1842 """ 1843 compiledREtype = type(re.compile("[A-Z]"))
1844 - def __init__( self, pattern, flags=0):
1845 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 1846 super(Regex,self).__init__() 1847 1848 if isinstance(pattern, basestring): 1849 if len(pattern) == 0: 1850 warnings.warn("null string passed to Regex; use Empty() instead", 1851 SyntaxWarning, stacklevel=2) 1852 1853 self.pattern = pattern 1854 self.flags = flags 1855 1856 try: 1857 self.re = re.compile(self.pattern, self.flags) 1858 self.reString = self.pattern 1859 except sre_constants.error: 1860 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1861 SyntaxWarning, stacklevel=2) 1862 raise 1863 1864 elif isinstance(pattern, Regex.compiledREtype): 1865 self.re = pattern 1866 self.pattern = \ 1867 self.reString = str(pattern) 1868 self.flags = flags 1869 1870 else: 1871 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 1872 1873 self.name = _ustr(self) 1874 self.errmsg = "Expected " + self.name 1875 self.mayIndexError = False 1876 self.mayReturnEmpty = True
1877
1878 - def parseImpl( self, instring, loc, doActions=True ):
1879 result = self.re.match(instring,loc) 1880 if not result: 1881 raise ParseException(instring, loc, self.errmsg, self) 1882 1883 loc = result.end() 1884 d = result.groupdict() 1885 ret = ParseResults(result.group()) 1886 if d: 1887 for k in d: 1888 ret[k] = d[k] 1889 return loc,ret
1890
1891 - def __str__( self ):
1892 try: 1893 return super(Regex,self).__str__() 1894 except: 1895 pass 1896 1897 if self.strRepr is None: 1898 self.strRepr = "Re:(%s)" % repr(self.pattern) 1899 1900 return self.strRepr
1901
1902 1903 -class QuotedString(Token):
1904 """Token for matching strings that are delimited by quoting characters. 1905 """
1906 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1907 """ 1908 Defined with the following parameters: 1909 - quoteChar - string of one or more characters defining the quote delimiting string 1910 - escChar - character to escape quotes, typically backslash (default=None) 1911 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1912 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 1913 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 1914 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 1915 """ 1916 super(QuotedString,self).__init__() 1917 1918 # remove white space from quote chars - wont work anyway 1919 quoteChar = quoteChar.strip() 1920 if len(quoteChar) == 0: 1921 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1922 raise SyntaxError() 1923 1924 if endQuoteChar is None: 1925 endQuoteChar = quoteChar 1926 else: 1927 endQuoteChar = endQuoteChar.strip() 1928 if len(endQuoteChar) == 0: 1929 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1930 raise SyntaxError() 1931 1932 self.quoteChar = quoteChar 1933 self.quoteCharLen = len(quoteChar) 1934 self.firstQuoteChar = quoteChar[0] 1935 self.endQuoteChar = endQuoteChar 1936 self.endQuoteCharLen = len(endQuoteChar) 1937 self.escChar = escChar 1938 self.escQuote = escQuote 1939 self.unquoteResults = unquoteResults 1940 1941 if multiline: 1942 self.flags = re.MULTILINE | re.DOTALL 1943 self.pattern = r'%s(?:[^%s%s]' % \ 1944 ( re.escape(self.quoteChar), 1945 _escapeRegexRangeChars(self.endQuoteChar[0]), 1946 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1947 else: 1948 self.flags = 0 1949 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1950 ( re.escape(self.quoteChar), 1951 _escapeRegexRangeChars(self.endQuoteChar[0]), 1952 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1953 if len(self.endQuoteChar) > 1: 1954 self.pattern += ( 1955 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1956 _escapeRegexRangeChars(self.endQuoteChar[i])) 1957 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 1958 ) 1959 if escQuote: 1960 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1961 if escChar: 1962 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1963 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1964 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1965 1966 try: 1967 self.re = re.compile(self.pattern, self.flags) 1968 self.reString = self.pattern 1969 except sre_constants.error: 1970 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1971 SyntaxWarning, stacklevel=2) 1972 raise 1973 1974 self.name = _ustr(self) 1975 self.errmsg = "Expected " + self.name 1976 self.mayIndexError = False 1977 self.mayReturnEmpty = True
1978
1979 - def parseImpl( self, instring, loc, doActions=True ):
1980 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1981 if not result: 1982 raise ParseException(instring, loc, self.errmsg, self) 1983 1984 loc = result.end() 1985 ret = result.group() 1986 1987 if self.unquoteResults: 1988 1989 # strip off quotes 1990 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1991 1992 if isinstance(ret,basestring): 1993 # replace escaped characters 1994 if self.escChar: 1995 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1996 1997 # replace escaped quotes 1998 if self.escQuote: 1999 ret = ret.replace(self.escQuote, self.endQuoteChar) 2000 2001 return loc, ret
2002
2003 - def __str__( self ):
2004 try: 2005 return super(QuotedString,self).__str__() 2006 except: 2007 pass 2008 2009 if self.strRepr is None: 2010 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2011 2012 return self.strRepr
2013
2014 2015 -class CharsNotIn(Token):
2016 """Token for matching words composed of characters *not* in a given set. 2017 Defined with string containing all disallowed characters, and an optional 2018 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2019 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2020 are 0, meaning no maximum or exact length restriction. 2021 """
2022 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2023 super(CharsNotIn,self).__init__() 2024 self.skipWhitespace = False 2025 self.notChars = notChars 2026 2027 if min < 1: 2028 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2029 2030 self.minLen = min 2031 2032 if max > 0: 2033 self.maxLen = max 2034 else: 2035 self.maxLen = _MAX_INT 2036 2037 if exact > 0: 2038 self.maxLen = exact 2039 self.minLen = exact 2040 2041 self.name = _ustr(self) 2042 self.errmsg = "Expected " + self.name 2043 self.mayReturnEmpty = ( self.minLen == 0 ) 2044 self.mayIndexError = False
2045
2046 - def parseImpl( self, instring, loc, doActions=True ):
2047 if instring[loc] in self.notChars: 2048 raise ParseException(instring, loc, self.errmsg, self) 2049 2050 start = loc 2051 loc += 1 2052 notchars = self.notChars 2053 maxlen = min( start+self.maxLen, len(instring) ) 2054 while loc < maxlen and \ 2055 (instring[loc] not in notchars): 2056 loc += 1 2057 2058 if loc - start < self.minLen: 2059 raise ParseException(instring, loc, self.errmsg, self) 2060 2061 return loc, instring[start:loc]
2062
2063 - def __str__( self ):
2064 try: 2065 return super(CharsNotIn, self).__str__() 2066 except: 2067 pass 2068 2069 if self.strRepr is None: 2070 if len(self.notChars) > 4: 2071 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2072 else: 2073 self.strRepr = "!W:(%s)" % self.notChars 2074 2075 return self.strRepr
2076
2077 -class White(Token):
2078 """Special matching class for matching whitespace. Normally, whitespace is ignored 2079 by pyparsing grammars. This class is included when some whitespace structures 2080 are significant. Define with a string containing the whitespace characters to be 2081 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2082 as defined for the C{L{Word}} class.""" 2083 whiteStrs = { 2084 " " : "<SPC>", 2085 "\t": "<TAB>", 2086 "\n": "<LF>", 2087 "\r": "<CR>", 2088 "\f": "<FF>", 2089 }
2090 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2091 super(White,self).__init__() 2092 self.matchWhite = ws 2093 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2094 #~ self.leaveWhitespace() 2095 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2096 self.mayReturnEmpty = True 2097 self.errmsg = "Expected " + self.name 2098 2099 self.minLen = min 2100 2101 if max > 0: 2102 self.maxLen = max 2103 else: 2104 self.maxLen = _MAX_INT 2105 2106 if exact > 0: 2107 self.maxLen = exact 2108 self.minLen = exact
2109
2110 - def parseImpl( self, instring, loc, doActions=True ):
2111 if not(instring[ loc ] in self.matchWhite): 2112 raise ParseException(instring, loc, self.errmsg, self) 2113 start = loc 2114 loc += 1 2115 maxloc = start + self.maxLen 2116 maxloc = min( maxloc, len(instring) ) 2117 while loc < maxloc and instring[loc] in self.matchWhite: 2118 loc += 1 2119 2120 if loc - start < self.minLen: 2121 raise ParseException(instring, loc, self.errmsg, self) 2122 2123 return loc, instring[start:loc]
2124
2125 2126 -class _PositionToken(Token):
2127 - def __init__( self ):
2128 super(_PositionToken,self).__init__() 2129 self.name=self.__class__.__name__ 2130 self.mayReturnEmpty = True 2131 self.mayIndexError = False
2132
2133 -class GoToColumn(_PositionToken):
2134 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2135 - def __init__( self, colno ):
2136 super(GoToColumn,self).__init__() 2137 self.col = colno
2138
2139 - def preParse( self, instring, loc ):
2140 if col(loc,instring) != self.col: 2141 instrlen = len(instring) 2142 if self.ignoreExprs: 2143 loc = self._skipIgnorables( instring, loc ) 2144 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2145 loc += 1 2146 return loc
2147
2148 - def parseImpl( self, instring, loc, doActions=True ):
2149 thiscol = col( loc, instring ) 2150 if thiscol > self.col: 2151 raise ParseException( instring, loc, "Text not in expected column", self ) 2152 newloc = loc + self.col - thiscol 2153 ret = instring[ loc: newloc ] 2154 return newloc, ret
2155
2156 -class LineStart(_PositionToken):
2157 """Matches if current position is at the beginning of a line within the parse string"""
2158 - def __init__( self ):
2159 super(LineStart,self).__init__() 2160 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2161 self.errmsg = "Expected start of line"
2162
2163 - def preParse( self, instring, loc ):
2164 preloc = super(LineStart,self).preParse(instring,loc) 2165 if instring[preloc] == "\n": 2166 loc += 1 2167 return loc
2168
2169 - def parseImpl( self, instring, loc, doActions=True ):
2170 if not( loc==0 or 2171 (loc == self.preParse( instring, 0 )) or 2172 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2173 raise ParseException(instring, loc, self.errmsg, self) 2174 return loc, []
2175
2176 -class LineEnd(_PositionToken):
2177 """Matches if current position is at the end of a line within the parse string"""
2178 - def __init__( self ):
2179 super(LineEnd,self).__init__() 2180 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2181 self.errmsg = "Expected end of line"
2182
2183 - def parseImpl( self, instring, loc, doActions=True ):
2184 if loc<len(instring): 2185 if instring[loc] == "\n": 2186 return loc+1, "\n" 2187 else: 2188 raise ParseException(instring, loc, self.errmsg, self) 2189 elif loc == len(instring): 2190 return loc+1, [] 2191 else: 2192 raise ParseException(instring, loc, self.errmsg, self)
2193
2194 -class StringStart(_PositionToken):
2195 """Matches if current position is at the beginning of the parse string"""
2196 - def __init__( self ):
2197 super(StringStart,self).__init__() 2198 self.errmsg = "Expected start of text"
2199
2200 - def parseImpl( self, instring, loc, doActions=True ):
2201 if loc != 0: 2202 # see if entire string up to here is just whitespace and ignoreables 2203 if loc != self.preParse( instring, 0 ): 2204 raise ParseException(instring, loc, self.errmsg, self) 2205 return loc, []
2206
2207 -class StringEnd(_PositionToken):
2208 """Matches if current position is at the end of the parse string"""
2209 - def __init__( self ):
2210 super(StringEnd,self).__init__() 2211 self.errmsg = "Expected end of text"
2212
2213 - def parseImpl( self, instring, loc, doActions=True ):
2214 if loc < len(instring): 2215 raise ParseException(instring, loc, self.errmsg, self) 2216 elif loc == len(instring): 2217 return loc+1, [] 2218 elif loc > len(instring): 2219 return loc, [] 2220 else: 2221 raise ParseException(instring, loc, self.errmsg, self)
2222
2223 -class WordStart(_PositionToken):
2224 """Matches if the current position is at the beginning of a Word, and 2225 is not preceded by any character in a given set of C{wordChars} 2226 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2227 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 2228 the string being parsed, or at the beginning of a line. 2229 """
2230 - def __init__(self, wordChars = printables):
2231 super(WordStart,self).__init__() 2232 self.wordChars = set(wordChars) 2233 self.errmsg = "Not at the start of a word"
2234
2235 - def parseImpl(self, instring, loc, doActions=True ):
2236 if loc != 0: 2237 if (instring[loc-1] in self.wordChars or 2238 instring[loc] not in self.wordChars): 2239 raise ParseException(instring, loc, self.errmsg, self) 2240 return loc, []
2241
2242 -class WordEnd(_PositionToken):
2243 """Matches if the current position is at the end of a Word, and 2244 is not followed by any character in a given set of C{wordChars} 2245 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 2246 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 2247 the string being parsed, or at the end of a line. 2248 """
2249 - def __init__(self, wordChars = printables):
2250 super(WordEnd,self).__init__() 2251 self.wordChars = set(wordChars) 2252 self.skipWhitespace = False 2253 self.errmsg = "Not at the end of a word"
2254
2255 - def parseImpl(self, instring, loc, doActions=True ):
2256 instrlen = len(instring) 2257 if instrlen>0 and loc<instrlen: 2258 if (instring[loc] in self.wordChars or 2259 instring[loc-1] not in self.wordChars): 2260 raise ParseException(instring, loc, self.errmsg, self) 2261 return loc, []
2262
2263 2264 -class ParseExpression(ParserElement):
2265 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2266 - def __init__( self, exprs, savelist = False ):
2267 super(ParseExpression,self).__init__(savelist) 2268 if isinstance( exprs, _generatorType ): 2269 exprs = list(exprs) 2270 2271 if isinstance( exprs, basestring ): 2272 self.exprs = [ Literal( exprs ) ] 2273 elif isinstance( exprs, collections.Sequence ): 2274 # if sequence of strings provided, wrap with Literal 2275 if all(isinstance(expr, basestring) for expr in exprs): 2276 exprs = map(Literal, exprs) 2277 self.exprs = list(exprs) 2278 else: 2279 try: 2280 self.exprs = list( exprs ) 2281 except TypeError: 2282 self.exprs = [ exprs ] 2283 self.callPreparse = False
2284
2285 - def __getitem__( self, i ):
2286 return self.exprs[i]
2287
2288 - def append( self, other ):
2289 self.exprs.append( other ) 2290 self.strRepr = None 2291 return self
2292
2293 - def leaveWhitespace( self ):
2294 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 2295 all contained expressions.""" 2296 self.skipWhitespace = False 2297 self.exprs = [ e.copy() for e in self.exprs ] 2298 for e in self.exprs: 2299 e.leaveWhitespace() 2300 return self
2301
2302 - def ignore( self, other ):
2303 if isinstance( other, Suppress ): 2304 if other not in self.ignoreExprs: 2305 super( ParseExpression, self).ignore( other ) 2306 for e in self.exprs: 2307 e.ignore( self.ignoreExprs[-1] ) 2308 else: 2309 super( ParseExpression, self).ignore( other ) 2310 for e in self.exprs: 2311 e.ignore( self.ignoreExprs[-1] ) 2312 return self
2313
2314 - def __str__( self ):
2315 try: 2316 return super(ParseExpression,self).__str__() 2317 except: 2318 pass 2319 2320 if self.strRepr is None: 2321 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2322 return self.strRepr
2323
2324 - def streamline( self ):
2325 super(ParseExpression,self).streamline() 2326 2327 for e in self.exprs: 2328 e.streamline() 2329 2330 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2331 # but only if there are no parse actions or resultsNames on the nested And's 2332 # (likewise for Or's and MatchFirst's) 2333 if ( len(self.exprs) == 2 ): 2334 other = self.exprs[0] 2335 if ( isinstance( other, self.__class__ ) and 2336 not(other.parseAction) and 2337 other.resultsName is None and 2338 not other.debug ): 2339 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2340 self.strRepr = None 2341 self.mayReturnEmpty |= other.mayReturnEmpty 2342 self.mayIndexError |= other.mayIndexError 2343 2344 other = self.exprs[-1] 2345 if ( isinstance( other, self.__class__ ) and 2346 not(other.parseAction) and 2347 other.resultsName is None and 2348 not other.debug ): 2349 self.exprs = self.exprs[:-1] + other.exprs[:] 2350 self.strRepr = None 2351 self.mayReturnEmpty |= other.mayReturnEmpty 2352 self.mayIndexError |= other.mayIndexError 2353 2354 self.errmsg = "Expected " + str(self) 2355 2356 return self
2357
2358 - def setResultsName( self, name, listAllMatches=False ):
2359 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2360 return ret
2361
2362 - def validate( self, validateTrace=[] ):
2363 tmp = validateTrace[:]+[self] 2364 for e in self.exprs: 2365 e.validate(tmp) 2366 self.checkRecursion( [] )
2367
2368 - def copy(self):
2369 ret = super(ParseExpression,self).copy() 2370 ret.exprs = [e.copy() for e in self.exprs] 2371 return ret
2372
2373 -class And(ParseExpression):
2374 """Requires all given C{ParseExpression}s to be found in the given order. 2375 Expressions may be separated by whitespace. 2376 May be constructed using the C{'+'} operator. 2377 """ 2378
2379 - class _ErrorStop(Empty):
2380 - def __init__(self, *args, **kwargs):
2381 super(And._ErrorStop,self).__init__(*args, **kwargs) 2382 self.name = '-' 2383 self.leaveWhitespace()
2384
2385 - def __init__( self, exprs, savelist = True ):
2386 super(And,self).__init__(exprs, savelist) 2387 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2388 self.setWhitespaceChars( self.exprs[0].whiteChars ) 2389 self.skipWhitespace = self.exprs[0].skipWhitespace 2390 self.callPreparse = True
2391
2392 - def parseImpl( self, instring, loc, doActions=True ):
2393 # pass False as last arg to _parse for first element, since we already 2394 # pre-parsed the string as part of our And pre-parsing 2395 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2396 errorStop = False 2397 for e in self.exprs[1:]: 2398 if isinstance(e, And._ErrorStop): 2399 errorStop = True 2400 continue 2401 if errorStop: 2402 try: 2403 loc, exprtokens = e._parse( instring, loc, doActions ) 2404 except ParseSyntaxException: 2405 raise 2406 except ParseBaseException as pe: 2407 pe.__traceback__ = None 2408 raise ParseSyntaxException(pe) 2409 except IndexError: 2410 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2411 else: 2412 loc, exprtokens = e._parse( instring, loc, doActions ) 2413 if exprtokens or exprtokens.haskeys(): 2414 resultlist += exprtokens 2415 return loc, resultlist
2416
2417 - def __iadd__(self, other ):
2418 if isinstance( other, basestring ): 2419 other = Literal( other ) 2420 return self.append( other ) #And( [ self, other ] )
2421
2422 - def checkRecursion( self, parseElementList ):
2423 subRecCheckList = parseElementList[:] + [ self ] 2424 for e in self.exprs: 2425 e.checkRecursion( subRecCheckList ) 2426 if not e.mayReturnEmpty: 2427 break
2428
2429 - def __str__( self ):
2430 if hasattr(self,"name"): 2431 return self.name 2432 2433 if self.strRepr is None: 2434 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 2435 2436 return self.strRepr
2437
2438 2439 -class Or(ParseExpression):
2440 """Requires that at least one C{ParseExpression} is found. 2441 If two expressions match, the expression that matches the longest string will be used. 2442 May be constructed using the C{'^'} operator. 2443 """
2444 - def __init__( self, exprs, savelist = False ):
2445 super(Or,self).__init__(exprs, savelist) 2446 if self.exprs: 2447 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2448 else: 2449 self.mayReturnEmpty = True
2450
2451 - def parseImpl( self, instring, loc, doActions=True ):
2452 maxExcLoc = -1 2453 maxException = None 2454 matches = [] 2455 for e in self.exprs: 2456 try: 2457 loc2 = e.tryParse( instring, loc ) 2458 except ParseException as err: 2459 err.__traceback__ = None 2460 if err.loc > maxExcLoc: 2461 maxException = err 2462 maxExcLoc = err.loc 2463 except IndexError: 2464 if len(instring) > maxExcLoc: 2465 maxException = ParseException(instring,len(instring),e.errmsg,self) 2466 maxExcLoc = len(instring) 2467 else: 2468 # save match among all matches, to retry longest to shortest 2469 matches.append((loc2, e)) 2470 2471 if matches: 2472 matches.sort(key=lambda x: -x[0]) 2473 for _,e in matches: 2474 try: 2475 return e._parse( instring, loc, doActions ) 2476 except ParseException as err: 2477 err.__traceback__ = None 2478 if err.loc > maxExcLoc: 2479 maxException = err 2480 maxExcLoc = err.loc 2481 2482 if maxException is not None: 2483 maxException.msg = self.errmsg 2484 raise maxException 2485 else: 2486 raise ParseException(instring, loc, "no defined alternatives to match", self)
2487 2488
2489 - def __ixor__(self, other ):
2490 if isinstance( other, basestring ): 2491 other = ParserElement.literalStringClass( other ) 2492 return self.append( other ) #Or( [ self, other ] )
2493
2494 - def __str__( self ):
2495 if hasattr(self,"name"): 2496 return self.name 2497 2498 if self.strRepr is None: 2499 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 2500 2501 return self.strRepr
2502
2503 - def checkRecursion( self, parseElementList ):
2504 subRecCheckList = parseElementList[:] + [ self ] 2505 for e in self.exprs: 2506 e.checkRecursion( subRecCheckList )
2507
2508 2509 -class MatchFirst(ParseExpression):
2510 """Requires that at least one C{ParseExpression} is found. 2511 If two expressions match, the first one listed is the one that will match. 2512 May be constructed using the C{'|'} operator. 2513 """
2514 - def __init__( self, exprs, savelist = False ):
2515 super(MatchFirst,self).__init__(exprs, savelist) 2516 if self.exprs: 2517 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 2518 else: 2519 self.mayReturnEmpty = True
2520
2521 - def parseImpl( self, instring, loc, doActions=True ):
2522 maxExcLoc = -1 2523 maxException = None 2524 for e in self.exprs: 2525 try: 2526 ret = e._parse( instring, loc, doActions ) 2527 return ret 2528 except ParseException as err: 2529 if err.loc > maxExcLoc: 2530 maxException = err 2531 maxExcLoc = err.loc 2532 except IndexError: 2533 if len(instring) > maxExcLoc: 2534 maxException = ParseException(instring,len(instring),e.errmsg,self) 2535 maxExcLoc = len(instring) 2536 2537 # only got here if no expression matched, raise exception for match that made it the furthest 2538 else: 2539 if maxException is not None: 2540 maxException.msg = self.errmsg 2541 raise maxException 2542 else: 2543 raise ParseException(instring, loc, "no defined alternatives to match", self)
2544
2545 - def __ior__(self, other ):
2546 if isinstance( other, basestring ): 2547 other = ParserElement.literalStringClass( other ) 2548 return self.append( other ) #MatchFirst( [ self, other ] )
2549
2550 - def __str__( self ):
2551 if hasattr(self,"name"): 2552 return self.name 2553 2554 if self.strRepr is None: 2555 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 2556 2557 return self.strRepr
2558
2559 - def checkRecursion( self, parseElementList ):
2560 subRecCheckList = parseElementList[:] + [ self ] 2561 for e in self.exprs: 2562 e.checkRecursion( subRecCheckList )
2563
2564 2565 -class Each(ParseExpression):
2566 """Requires all given C{ParseExpression}s to be found, but in any order. 2567 Expressions may be separated by whitespace. 2568 May be constructed using the C{'&'} operator. 2569 """
2570 - def __init__( self, exprs, savelist = True ):
2571 super(Each,self).__init__(exprs, savelist) 2572 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 2573 self.skipWhitespace = True 2574 self.initExprGroups = True
2575
2576 - def parseImpl( self, instring, loc, doActions=True ):
2577 if self.initExprGroups: 2578 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 2579 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2580 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 2581 self.optionals = opt1 + opt2 2582 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2583 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2584 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2585 self.required += self.multirequired 2586 self.initExprGroups = False 2587 tmpLoc = loc 2588 tmpReqd = self.required[:] 2589 tmpOpt = self.optionals[:] 2590 matchOrder = [] 2591 2592 keepMatching = True 2593 while keepMatching: 2594 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2595 failed = [] 2596 for e in tmpExprs: 2597 try: 2598 tmpLoc = e.tryParse( instring, tmpLoc ) 2599 except ParseException: 2600 failed.append(e) 2601 else: 2602 matchOrder.append(self.opt1map.get(id(e),e)) 2603 if e in tmpReqd: 2604 tmpReqd.remove(e) 2605 elif e in tmpOpt: 2606 tmpOpt.remove(e) 2607 if len(failed) == len(tmpExprs): 2608 keepMatching = False 2609 2610 if tmpReqd: 2611 missing = ", ".join(_ustr(e) for e in tmpReqd) 2612 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2613 2614 # add any unmatched Optionals, in case they have default values defined 2615 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 2616 2617 resultlist = [] 2618 for e in matchOrder: 2619 loc,results = e._parse(instring,loc,doActions) 2620 resultlist.append(results) 2621 2622 finalResults = ParseResults([]) 2623 for r in resultlist: 2624 dups = {} 2625 for k in r.keys(): 2626 if k in finalResults: 2627 tmp = ParseResults(finalResults[k]) 2628 tmp += ParseResults(r[k]) 2629 dups[k] = tmp 2630 finalResults += ParseResults(r) 2631 for k,v in dups.items(): 2632 finalResults[k] = v 2633 return loc, finalResults
2634
2635 - def __str__( self ):
2636 if hasattr(self,"name"): 2637 return self.name 2638 2639 if self.strRepr is None: 2640 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 2641 2642 return self.strRepr
2643
2644 - def checkRecursion( self, parseElementList ):
2645 subRecCheckList = parseElementList[:] + [ self ] 2646 for e in self.exprs: 2647 e.checkRecursion( subRecCheckList )
2648
2649 2650 -class ParseElementEnhance(ParserElement):
2651 """Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens."""
2652 - def __init__( self, expr, savelist=False ):
2653 super(ParseElementEnhance,self).__init__(savelist) 2654 if isinstance( expr, basestring ): 2655 expr = Literal(expr) 2656 self.expr = expr 2657 self.strRepr = None 2658 if expr is not None: 2659 self.mayIndexError = expr.mayIndexError 2660 self.mayReturnEmpty = expr.mayReturnEmpty 2661 self.setWhitespaceChars( expr.whiteChars ) 2662 self.skipWhitespace = expr.skipWhitespace 2663 self.saveAsList = expr.saveAsList 2664 self.callPreparse = expr.callPreparse 2665 self.ignoreExprs.extend(expr.ignoreExprs)
2666
2667 - def parseImpl( self, instring, loc, doActions=True ):
2668 if self.expr is not None: 2669 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2670 else: 2671 raise ParseException("",loc,self.errmsg,self)
2672
2673 - def leaveWhitespace( self ):
2674 self.skipWhitespace = False 2675 self.expr = self.expr.copy() 2676 if self.expr is not None: 2677 self.expr.leaveWhitespace() 2678 return self
2679
2680 - def ignore( self, other ):
2681 if isinstance( other, Suppress ): 2682 if other not in self.ignoreExprs: 2683 super( ParseElementEnhance, self).ignore( other ) 2684 if self.expr is not None: 2685 self.expr.ignore( self.ignoreExprs[-1] ) 2686 else: 2687 super( ParseElementEnhance, self).ignore( other ) 2688 if self.expr is not None: 2689 self.expr.ignore( self.ignoreExprs[-1] ) 2690 return self
2691
2692 - def streamline( self ):
2693 super(ParseElementEnhance,self).streamline() 2694 if self.expr is not None: 2695 self.expr.streamline() 2696 return self
2697
2698 - def checkRecursion( self, parseElementList ):
2699 if self in parseElementList: 2700 raise RecursiveGrammarException( parseElementList+[self] ) 2701 subRecCheckList = parseElementList[:] + [ self ] 2702 if self.expr is not None: 2703 self.expr.checkRecursion( subRecCheckList )
2704
2705 - def validate( self, validateTrace=[] ):
2706 tmp = validateTrace[:]+[self] 2707 if self.expr is not None: 2708 self.expr.validate(tmp) 2709 self.checkRecursion( [] )
2710
2711 - def __str__( self ):
2712 try: 2713 return super(ParseElementEnhance,self).__str__() 2714 except: 2715 pass 2716 2717 if self.strRepr is None and self.expr is not None: 2718 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2719 return self.strRepr
2720
2721 2722 -class FollowedBy(ParseElementEnhance):
2723 """Lookahead matching of the given parse expression. C{FollowedBy} 2724 does *not* advance the parsing position within the input string, it only 2725 verifies that the specified parse expression matches at the current 2726 position. C{FollowedBy} always returns a null token list."""
2727 - def __init__( self, expr ):
2728 super(FollowedBy,self).__init__(expr) 2729 self.mayReturnEmpty = True
2730
2731 - def parseImpl( self, instring, loc, doActions=True ):
2732 self.expr.tryParse( instring, loc ) 2733 return loc, []
2734
2735 2736 -class NotAny(ParseElementEnhance):
2737 """Lookahead to disallow matching with the given parse expression. C{NotAny} 2738 does *not* advance the parsing position within the input string, it only 2739 verifies that the specified parse expression does *not* match at the current 2740 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 2741 always returns a null token list. May be constructed using the '~' operator."""
2742 - def __init__( self, expr ):
2743 super(NotAny,self).__init__(expr) 2744 #~ self.leaveWhitespace() 2745 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2746 self.mayReturnEmpty = True 2747 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2748
2749 - def parseImpl( self, instring, loc, doActions=True ):
2750 try: 2751 self.expr.tryParse( instring, loc ) 2752 except (ParseException,IndexError): 2753 pass 2754 else: 2755 raise ParseException(instring, loc, self.errmsg, self) 2756 return loc, []
2757
2758 - def __str__( self ):
2759 if hasattr(self,"name"): 2760 return self.name 2761 2762 if self.strRepr is None: 2763 self.strRepr = "~{" + _ustr(self.expr) + "}" 2764 2765 return self.strRepr
2766
2767 2768 -class ZeroOrMore(ParseElementEnhance):
2769 """Optional repetition of zero or more of the given expression."""
2770 - def __init__( self, expr ):
2771 super(ZeroOrMore,self).__init__(expr) 2772 self.mayReturnEmpty = True
2773
2774 - def parseImpl( self, instring, loc, doActions=True ):
2775 tokens = [] 2776 try: 2777 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2778 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2779 while 1: 2780 if hasIgnoreExprs: 2781 preloc = self._skipIgnorables( instring, loc ) 2782 else: 2783 preloc = loc 2784 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2785 if tmptokens or tmptokens.haskeys(): 2786 tokens += tmptokens 2787 except (ParseException,IndexError): 2788 pass 2789 2790 return loc, tokens
2791
2792 - def __str__( self ):
2793 if hasattr(self,"name"): 2794 return self.name 2795 2796 if self.strRepr is None: 2797 self.strRepr = "[" + _ustr(self.expr) + "]..." 2798 2799 return self.strRepr
2800
2801 - def setResultsName( self, name, listAllMatches=False ):
2802 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2803 ret.saveAsList = True 2804 return ret
2805
2806 2807 -class OneOrMore(ParseElementEnhance):
2808 """Repetition of one or more of the given expression."""
2809 - def parseImpl( self, instring, loc, doActions=True ):
2810 # must be at least one 2811 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2812 try: 2813 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2814 while 1: 2815 if hasIgnoreExprs: 2816 preloc = self._skipIgnorables( instring, loc ) 2817 else: 2818 preloc = loc 2819 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2820 if tmptokens or tmptokens.haskeys(): 2821 tokens += tmptokens 2822 except (ParseException,IndexError): 2823 pass 2824 2825 return loc, tokens
2826
2827 - def __str__( self ):
2828 if hasattr(self,"name"): 2829 return self.name 2830 2831 if self.strRepr is None: 2832 self.strRepr = "{" + _ustr(self.expr) + "}..." 2833 2834 return self.strRepr
2835
2836 - def setResultsName( self, name, listAllMatches=False ):
2837 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2838 ret.saveAsList = True 2839 return ret
2840
2841 -class _NullToken(object):
2842 - def __bool__(self):
2843 return False
2844 __nonzero__ = __bool__
2845 - def __str__(self):
2846 return ""
2847 2848 _optionalNotMatched = _NullToken()
2849 -class Optional(ParseElementEnhance):
2850 """Optional matching of the given expression. 2851 A default return string can also be specified, if the optional expression 2852 is not found. 2853 """
2854 - def __init__( self, expr, default=_optionalNotMatched ):
2855 super(Optional,self).__init__( expr, savelist=False ) 2856 self.defaultValue = default 2857 self.mayReturnEmpty = True
2858
2859 - def parseImpl( self, instring, loc, doActions=True ):
2860 try: 2861 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2862 except (ParseException,IndexError): 2863 if self.defaultValue is not _optionalNotMatched: 2864 if self.expr.resultsName: 2865 tokens = ParseResults([ self.defaultValue ]) 2866 tokens[self.expr.resultsName] = self.defaultValue 2867 else: 2868 tokens = [ self.defaultValue ] 2869 else: 2870 tokens = [] 2871 return loc, tokens
2872
2873 - def __str__( self ):
2874 if hasattr(self,"name"): 2875 return self.name 2876 2877 if self.strRepr is None: 2878 self.strRepr = "[" + _ustr(self.expr) + "]" 2879 2880 return self.strRepr
2881
2882 2883 -class SkipTo(ParseElementEnhance):
2884 """Token for skipping over all undefined text until the matched expression is found. 2885 If C{include} is set to true, the matched expression is also parsed (the skipped text 2886 and matched expression are returned as a 2-element list). The C{ignore} 2887 argument is used to define grammars (typically quoted strings and comments) that 2888 might contain false matches. 2889 """
2890 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2891 super( SkipTo, self ).__init__( other ) 2892 self.ignoreExpr = ignore 2893 self.mayReturnEmpty = True 2894 self.mayIndexError = False 2895 self.includeMatch = include 2896 self.asList = False 2897 if failOn is not None and isinstance(failOn, basestring): 2898 self.failOn = Literal(failOn) 2899 else: 2900 self.failOn = failOn 2901 self.errmsg = "No match found for "+_ustr(self.expr)
2902
2903 - def parseImpl( self, instring, loc, doActions=True ):
2904 startLoc = loc 2905 instrlen = len(instring) 2906 expr = self.expr 2907 failParse = False 2908 while loc <= instrlen: 2909 try: 2910 if self.failOn: 2911 try: 2912 self.failOn.tryParse(instring, loc) 2913 except ParseBaseException: 2914 pass 2915 else: 2916 failParse = True 2917 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2918 failParse = False 2919 if self.ignoreExpr is not None: 2920 while 1: 2921 try: 2922 loc = self.ignoreExpr.tryParse(instring,loc) 2923 # print("found ignoreExpr, advance to", loc) 2924 except ParseBaseException: 2925 break 2926 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2927 skipText = instring[startLoc:loc] 2928 if self.includeMatch: 2929 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2930 if mat: 2931 skipRes = ParseResults( skipText ) 2932 skipRes += mat 2933 return loc, [ skipRes ] 2934 else: 2935 return loc, [ skipText ] 2936 else: 2937 return loc, [ skipText ] 2938 except (ParseException,IndexError): 2939 if failParse: 2940 raise 2941 else: 2942 loc += 1 2943 raise ParseException(instring, loc, self.errmsg, self)
2944
2945 -class Forward(ParseElementEnhance):
2946 """Forward declaration of an expression to be defined later - 2947 used for recursive grammars, such as algebraic infix notation. 2948 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 2949 2950 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 2951 Specifically, '|' has a lower precedence than '<<', so that:: 2952 fwdExpr << a | b | c 2953 will actually be evaluated as:: 2954 (fwdExpr << a) | b | c 2955 thereby leaving b and c out as parseable alternatives. It is recommended that you 2956 explicitly group the values inserted into the C{Forward}:: 2957 fwdExpr << (a | b | c) 2958 Converting to use the '<<=' operator instead will avoid this problem. 2959 """
2960 - def __init__( self, other=None ):
2961 super(Forward,self).__init__( other, savelist=False )
2962
2963 - def __lshift__( self, other ):
2964 if isinstance( other, basestring ): 2965 other = ParserElement.literalStringClass(other) 2966 self.expr = other 2967 self.mayReturnEmpty = other.mayReturnEmpty 2968 self.strRepr = None 2969 self.mayIndexError = self.expr.mayIndexError 2970 self.mayReturnEmpty = self.expr.mayReturnEmpty 2971 self.setWhitespaceChars( self.expr.whiteChars ) 2972 self.skipWhitespace = self.expr.skipWhitespace 2973 self.saveAsList = self.expr.saveAsList 2974 self.ignoreExprs.extend(self.expr.ignoreExprs) 2975 return self
2976
2977 - def __ilshift__(self, other):
2978 return self << other
2979
2980 - def leaveWhitespace( self ):
2981 self.skipWhitespace = False 2982 return self
2983
2984 - def streamline( self ):
2985 if not self.streamlined: 2986 self.streamlined = True 2987 if self.expr is not None: 2988 self.expr.streamline() 2989 return self
2990
2991 - def validate( self, validateTrace=[] ):
2992 if self not in validateTrace: 2993 tmp = validateTrace[:]+[self] 2994 if self.expr is not None: 2995 self.expr.validate(tmp) 2996 self.checkRecursion([])
2997
2998 - def __str__( self ):
2999 if hasattr(self,"name"): 3000 return self.name 3001 3002 self._revertClass = self.__class__ 3003 self.__class__ = _ForwardNoRecurse 3004 try: 3005 if self.expr is not None: 3006 retString = _ustr(self.expr) 3007 else: 3008 retString = "None" 3009 finally: 3010 self.__class__ = self._revertClass 3011 return self.__class__.__name__ + ": " + retString
3012
3013 - def copy(self):
3014 if self.expr is not None: 3015 return super(Forward,self).copy() 3016 else: 3017 ret = Forward() 3018 ret <<= self 3019 return ret
3020
3021 -class _ForwardNoRecurse(Forward):
3022 - def __str__( self ):
3023 return "..."
3024
3025 -class TokenConverter(ParseElementEnhance):
3026 """Abstract subclass of C{ParseExpression}, for converting parsed results."""
3027 - def __init__( self, expr, savelist=False ):
3028 super(TokenConverter,self).__init__( expr )#, savelist ) 3029 self.saveAsList = False
3030
3031 -class Upcase(TokenConverter):
3032 """Converter to upper case all matching tokens."""
3033 - def __init__(self, *args):
3034 super(Upcase,self).__init__(*args) 3035 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 3036 DeprecationWarning,stacklevel=2)
3037
3038 - def postParse( self, instring, loc, tokenlist ):
3039 return list(map( str.upper, tokenlist ))
3040
3041 3042 -class Combine(TokenConverter):
3043 """Converter to concatenate all matching tokens to a single string. 3044 By default, the matching patterns must also be contiguous in the input string; 3045 this can be disabled by specifying C{'adjacent=False'} in the constructor. 3046 """
3047 - def __init__( self, expr, joinString="", adjacent=True ):
3048 super(Combine,self).__init__( expr ) 3049 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 3050 if adjacent: 3051 self.leaveWhitespace() 3052 self.adjacent = adjacent 3053 self.skipWhitespace = True 3054 self.joinString = joinString 3055 self.callPreparse = True
3056
3057 - def ignore( self, other ):
3058 if self.adjacent: 3059 ParserElement.ignore(self, other) 3060 else: 3061 super( Combine, self).ignore( other ) 3062 return self
3063
3064 - def postParse( self, instring, loc, tokenlist ):
3065 retToks = tokenlist.copy() 3066 del retToks[:] 3067 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3068 3069 if self.resultsName and retToks.haskeys(): 3070 return [ retToks ] 3071 else: 3072 return retToks
3073
3074 -class Group(TokenConverter):
3075 """Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions."""
3076 - def __init__( self, expr ):
3077 super(Group,self).__init__( expr ) 3078 self.saveAsList = True
3079
3080 - def postParse( self, instring, loc, tokenlist ):
3081 return [ tokenlist ]
3082
3083 -class Dict(TokenConverter):
3084 """Converter to return a repetitive expression as a list, but also as a dictionary. 3085 Each element can also be referenced using the first token in the expression as its key. 3086 Useful for tabular report scraping when the first column can be used as a item key. 3087 """
3088 - def __init__( self, expr ):
3089 super(Dict,self).__init__( expr ) 3090 self.saveAsList = True
3091
3092 - def postParse( self, instring, loc, tokenlist ):
3093 for i,tok in enumerate(tokenlist): 3094 if len(tok) == 0: 3095 continue 3096 ikey = tok[0] 3097 if isinstance(ikey,int): 3098 ikey = _ustr(tok[0]).strip() 3099 if len(tok)==1: 3100 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3101 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3102 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3103 else: 3104 dictvalue = tok.copy() #ParseResults(i) 3105 del dictvalue[0] 3106 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 3107 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3108 else: 3109 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3110 3111 if self.resultsName: 3112 return [ tokenlist ] 3113 else: 3114 return tokenlist
3115
3116 3117 -class Suppress(TokenConverter):
3118 """Converter for ignoring the results of a parsed expression."""
3119 - def postParse( self, instring, loc, tokenlist ):
3120 return []
3121
3122 - def suppress( self ):
3123 return self
3124
3125 3126 -class OnlyOnce(object):
3127 """Wrapper for parse actions, to ensure they are only called once."""
3128 - def __init__(self, methodCall):
3129 self.callable = _trim_arity(methodCall) 3130 self.called = False
3131 - def __call__(self,s,l,t):
3132 if not self.called: 3133 results = self.callable(s,l,t) 3134 self.called = True 3135 return results 3136 raise ParseException(s,l,"")
3137 - def reset(self):
3138 self.called = False
3139
3140 -def traceParseAction(f):
3141 """Decorator for debugging parse actions.""" 3142 f = _trim_arity(f) 3143 def z(*paArgs): 3144 thisFunc = f.func_name 3145 s,l,t = paArgs[-3:] 3146 if len(paArgs)>3: 3147 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3148 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3149 try: 3150 ret = f(*paArgs) 3151 except Exception as exc: 3152 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3153 raise 3154 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3155 return ret
3156 try: 3157 z.__name__ = f.__name__ 3158 except AttributeError: 3159 pass 3160 return z 3161
3162 # 3163 # global helpers 3164 # 3165 -def delimitedList( expr, delim=",", combine=False ):
3166 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3167 By default, the list elements and delimiters can have intervening whitespace, and 3168 comments, but this can be overridden by passing C{combine=True} in the constructor. 3169 If C{combine} is set to C{True}, the matching tokens are returned as a single token 3170 string, with the delimiters included; otherwise, the matching tokens are returned 3171 as a list of tokens, with the delimiters suppressed. 3172 """ 3173 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3174 if combine: 3175 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3176 else: 3177 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3178
3179 -def countedArray( expr, intExpr=None ):
3180 """Helper to define a counted list of expressions. 3181 This helper defines a pattern of the form:: 3182 integer expr expr expr... 3183 where the leading integer tells how many expr expressions follow. 3184 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3185 """ 3186 arrayExpr = Forward() 3187 def countFieldParseAction(s,l,t): 3188 n = t[0] 3189 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3190 return []
3191 if intExpr is None: 3192 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 3193 else: 3194 intExpr = intExpr.copy() 3195 intExpr.setName("arrayLen") 3196 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 3197 return ( intExpr + arrayExpr ) 3198
3199 -def _flatten(L):
3200 ret = [] 3201 for i in L: 3202 if isinstance(i,list): 3203 ret.extend(_flatten(i)) 3204 else: 3205 ret.append(i) 3206 return ret
3207
3208 -def matchPreviousLiteral(expr):
3209 """Helper to define an expression that is indirectly defined from 3210 the tokens matched in a previous expression, that is, it looks 3211 for a 'repeat' of a previous expression. For example:: 3212 first = Word(nums) 3213 second = matchPreviousLiteral(first) 3214 matchExpr = first + ":" + second 3215 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 3216 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 3217 If this is not desired, use C{matchPreviousExpr}. 3218 Do *not* use with packrat parsing enabled. 3219 """ 3220 rep = Forward() 3221 def copyTokenToRepeater(s,l,t): 3222 if t: 3223 if len(t) == 1: 3224 rep << t[0] 3225 else: 3226 # flatten t tokens 3227 tflat = _flatten(t.asList()) 3228 rep << And( [ Literal(tt) for tt in tflat ] ) 3229 else: 3230 rep << Empty()
3231 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3232 return rep 3233
3234 -def matchPreviousExpr(expr):
3235 """Helper to define an expression that is indirectly defined from 3236 the tokens matched in a previous expression, that is, it looks 3237 for a 'repeat' of a previous expression. For example:: 3238 first = Word(nums) 3239 second = matchPreviousExpr(first) 3240 matchExpr = first + ":" + second 3241 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 3242 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 3243 the expressions are evaluated first, and then compared, so 3244 C{"1"} is compared with C{"10"}. 3245 Do *not* use with packrat parsing enabled. 3246 """ 3247 rep = Forward() 3248 e2 = expr.copy() 3249 rep <<= e2 3250 def copyTokenToRepeater(s,l,t): 3251 matchTokens = _flatten(t.asList()) 3252 def mustMatchTheseTokens(s,l,t): 3253 theseTokens = _flatten(t.asList()) 3254 if theseTokens != matchTokens: 3255 raise ParseException("",0,"")
3256 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3257 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3258 return rep 3259
3260 -def _escapeRegexRangeChars(s):
3261 #~ escape these chars: ^-] 3262 for c in r"\^-]": 3263 s = s.replace(c,_bslash+c) 3264 s = s.replace("\n",r"\n") 3265 s = s.replace("\t",r"\t") 3266 return _ustr(s)
3267
3268 -def oneOf( strs, caseless=False, useRegex=True ):
3269 """Helper to quickly define a set of alternative Literals, and makes sure to do 3270 longest-first testing when there is a conflict, regardless of the input order, 3271 but returns a C{L{MatchFirst}} for best performance. 3272 3273 Parameters: 3274 - strs - a string of space-delimited literals, or a list of string literals 3275 - caseless - (default=False) - treat all literals as caseless 3276 - useRegex - (default=True) - as an optimization, will generate a Regex 3277 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 3278 if creating a C{Regex} raises an exception) 3279 """ 3280 if caseless: 3281 isequal = ( lambda a,b: a.upper() == b.upper() ) 3282 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3283 parseElementClass = CaselessLiteral 3284 else: 3285 isequal = ( lambda a,b: a == b ) 3286 masks = ( lambda a,b: b.startswith(a) ) 3287 parseElementClass = Literal 3288 3289 symbols = [] 3290 if isinstance(strs,basestring): 3291 symbols = strs.split() 3292 elif isinstance(strs, collections.Sequence): 3293 symbols = list(strs[:]) 3294 elif isinstance(strs, _generatorType): 3295 symbols = list(strs) 3296 else: 3297 warnings.warn("Invalid argument to oneOf, expected string or list", 3298 SyntaxWarning, stacklevel=2) 3299 if not symbols: 3300 return NoMatch() 3301 3302 i = 0 3303 while i < len(symbols)-1: 3304 cur = symbols[i] 3305 for j,other in enumerate(symbols[i+1:]): 3306 if ( isequal(other, cur) ): 3307 del symbols[i+j+1] 3308 break 3309 elif ( masks(cur, other) ): 3310 del symbols[i+j+1] 3311 symbols.insert(i,other) 3312 cur = other 3313 break 3314 else: 3315 i += 1 3316 3317 if not caseless and useRegex: 3318 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3319 try: 3320 if len(symbols)==len("".join(symbols)): 3321 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ) 3322 else: 3323 return Regex( "|".join(re.escape(sym) for sym in symbols) ) 3324 except: 3325 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3326 SyntaxWarning, stacklevel=2) 3327 3328 3329 # last resort, just use MatchFirst 3330 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3331
3332 -def dictOf( key, value ):
3333 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3334 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 3335 in the proper order. The key pattern can include delimiting markers or punctuation, 3336 as long as they are suppressed, thereby leaving the significant key text. The value 3337 pattern can include named results, so that the C{Dict} results can include named token 3338 fields. 3339 """ 3340 return Dict( ZeroOrMore( Group ( key + value ) ) )
3341
3342 -def originalTextFor(expr, asString=True):
3343 """Helper to return the original, untokenized text for a given expression. Useful to 3344 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3345 revert separate tokens with intervening whitespace back to the original matching 3346 input text. Simpler to use than the parse action C{L{keepOriginalText}}, and does not 3347 require the inspect module to chase up the call stack. By default, returns a 3348 string containing the original parsed text. 3349 3350 If the optional C{asString} argument is passed as C{False}, then the return value is a 3351 C{L{ParseResults}} containing any results names that were originally matched, and a 3352 single token containing the original matched text from the input string. So if 3353 the expression passed to C{L{originalTextFor}} contains expressions with defined 3354 results names, you must set C{asString} to C{False} if you want to preserve those 3355 results name values.""" 3356 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3357 endlocMarker = locMarker.copy() 3358 endlocMarker.callPreparse = False 3359 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 3360 if asString: 3361 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3362 else: 3363 def extractText(s,l,t): 3364 del t[:] 3365 t.insert(0, s[t._original_start:t._original_end]) 3366 del t["_original_start"] 3367 del t["_original_end"]
3368 matchExpr.setParseAction(extractText) 3369 return matchExpr 3370
3371 -def ungroup(expr):
3372 """Helper to undo pyparsing's default grouping of And expressions, even 3373 if all but one are non-empty.""" 3374 return TokenConverter(expr).setParseAction(lambda t:t[0]) 3375
3376 -def locatedExpr(expr):
3377 """Helper to decorate a returned token with its starting and ending locations in the input string. 3378 This helper adds the following results names: 3379 - locn_start = location where matched expression begins 3380 - locn_end = location where matched expression ends 3381 - value = the actual parsed results 3382 3383 Be careful if the input text contains C{<TAB>} characters, you may want to call 3384 C{L{ParserElement.parseWithTabs}} 3385 """ 3386 locator = Empty().setParseAction(lambda s,l,t: l) 3387 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
3388 3389 3390 # convenience constants for positional expressions 3391 empty = Empty().setName("empty") 3392 lineStart = LineStart().setName("lineStart") 3393 lineEnd = LineEnd().setName("lineEnd") 3394 stringStart = StringStart().setName("stringStart") 3395 stringEnd = StringEnd().setName("stringEnd") 3396 3397 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3398 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 3399 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 3400 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 3401 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3402 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3403 3404 -def srange(s):
3405 r"""Helper to easily define string ranges for use in Word construction. Borrows 3406 syntax from regexp '[]' string range definitions:: 3407 srange("[0-9]") -> "0123456789" 3408 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3409 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3410 The input string must be enclosed in []'s, and the returned string is the expanded 3411 character set joined into a single string. 3412 The values enclosed in the []'s may be:: 3413 a single character 3414 an escaped character with a leading backslash (such as \- or \]) 3415 an escaped hex character with a leading '\x' (\x21, which is a '!' character) 3416 (\0x## is also supported for backwards compatibility) 3417 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3418 a range of any of the above, separated by a dash ('a-z', etc.) 3419 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3420 """ 3421 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 3422 try: 3423 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 3424 except: 3425 return ""
3426
3427 -def matchOnlyAtCol(n):
3428 """Helper method for defining parse actions that require matching at a specific 3429 column in the input text. 3430 """ 3431 def verifyCol(strg,locn,toks): 3432 if col(locn,strg) != n: 3433 raise ParseException(strg,locn,"matched token not at column %d" % n)
3434 return verifyCol 3435
3436 -def replaceWith(replStr):
3437 """Helper method for common parse actions that simply return a literal value. Especially 3438 useful when used with C{L{transformString<ParserElement.transformString>}()}. 3439 """ 3440 #def _replFunc(*args): 3441 # return [replStr] 3442 #return _replFunc 3443 return functools.partial(next, itertools.repeat([replStr]))
3444
3445 -def removeQuotes(s,l,t):
3446 """Helper parse action for removing quotation marks from parsed quoted strings. 3447 To use, add this parse action to quoted string using:: 3448 quotedString.setParseAction( removeQuotes ) 3449 """ 3450 return t[0][1:-1]
3451
3452 -def upcaseTokens(s,l,t):
3453 """Helper parse action to convert tokens to upper case.""" 3454 return [ tt.upper() for tt in map(_ustr,t) ]
3455
3456 -def downcaseTokens(s,l,t):
3457 """Helper parse action to convert tokens to lower case.""" 3458 return [ tt.lower() for tt in map(_ustr,t) ]
3459
3460 -def keepOriginalText(s,startLoc,t):
3461 """DEPRECATED - use new helper method C{L{originalTextFor}}. 3462 Helper parse action to preserve original parsed text, 3463 overriding any nested parse actions.""" 3464 try: 3465 endloc = getTokensEndLoc() 3466 except ParseException: 3467 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3468 del t[:] 3469 t += ParseResults(s[startLoc:endloc]) 3470 return t
3471
3472 -def getTokensEndLoc():
3473 """Method to be called from within a parse action to determine the end 3474 location of the parsed tokens.""" 3475 import inspect 3476 fstack = inspect.stack() 3477 try: 3478 # search up the stack (through intervening argument normalizers) for correct calling routine 3479 for f in fstack[2:]: 3480 if f[3] == "_parseNoCache": 3481 endloc = f[0].f_locals["loc"] 3482 return endloc 3483 else: 3484 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3485 finally: 3486 del fstack
3487
3488 -def _makeTags(tagStr, xml):
3489 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3490 if isinstance(tagStr,basestring): 3491 resname = tagStr 3492 tagStr = Keyword(tagStr, caseless=not xml) 3493 else: 3494 resname = tagStr.name 3495 3496 tagAttrName = Word(alphas,alphanums+"_-:") 3497 if (xml): 3498 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3499 openTag = Suppress("<") + tagStr("tag") + \ 3500 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3501 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3502 else: 3503 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 3504 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3505 openTag = Suppress("<") + tagStr("tag") + \ 3506 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3507 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3508 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3509 closeTag = Combine(_L("</") + tagStr + ">") 3510 3511 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3512 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3513 openTag.tag = resname 3514 closeTag.tag = resname 3515 return openTag, closeTag
3516
3517 -def makeHTMLTags(tagStr):
3518 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3519 return _makeTags( tagStr, False )
3520
3521 -def makeXMLTags(tagStr):
3522 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3523 return _makeTags( tagStr, True )
3524
3525 -def withAttribute(*args,**attrDict):
3526 """Helper to create a validating parse action to be used with start tags created 3527 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 3528 with a required attribute value, to avoid false matches on common tags such as 3529 C{<TD>} or C{<DIV>}. 3530 3531 Call C{withAttribute} with a series of attribute names and values. Specify the list 3532 of filter attributes names and values as: 3533 - keyword arguments, as in C{(align="right")}, or 3534 - as an explicit dict with C{**} operator, when an attribute name is also a Python 3535 reserved word, as in C{**{"class":"Customer", "align":"right"}} 3536 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3537 For attribute names with a namespace prefix, you must use the second form. Attribute 3538 names are matched insensitive to upper/lower case. 3539 3540 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 3541 3542 To verify that the attribute exists, but without specifying a value, pass 3543 C{withAttribute.ANY_VALUE} as the value. 3544 """ 3545 if args: 3546 attrs = args[:] 3547 else: 3548 attrs = attrDict.items() 3549 attrs = [(k,v) for k,v in attrs] 3550 def pa(s,l,tokens): 3551 for attrName,attrValue in attrs: 3552 if attrName not in tokens: 3553 raise ParseException(s,l,"no matching attribute " + attrName) 3554 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3555 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3556 (attrName, tokens[attrName], attrValue))
3557 return pa 3558 withAttribute.ANY_VALUE = object()
3559 3560 -def withClass(classname, namespace=''):
3561 """Simplified version of C{L{withAttribute}} when matching on a div class - made 3562 difficult because C{class} is a reserved word in Python. 3563 """ 3564 classattr = "%s:class" % namespace if namespace else "class" 3565 return withAttribute(**{classattr : classname})
3566 3567 opAssoc = _Constants() 3568 opAssoc.LEFT = object() 3569 opAssoc.RIGHT = object()
3570 3571 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
3572 """Helper method for constructing grammars of expressions made up of 3573 operators working in a precedence hierarchy. Operators may be unary or 3574 binary, left- or right-associative. Parse actions can also be attached 3575 to operator expressions. 3576 3577 Parameters: 3578 - baseExpr - expression representing the most basic element for the nested 3579 - opList - list of tuples, one for each operator precedence level in the 3580 expression grammar; each tuple is of the form 3581 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3582 - opExpr is the pyparsing expression for the operator; 3583 may also be a string, which will be converted to a Literal; 3584 if numTerms is 3, opExpr is a tuple of two expressions, for the 3585 two operators separating the 3 terms 3586 - numTerms is the number of terms for this operator (must 3587 be 1, 2, or 3) 3588 - rightLeftAssoc is the indicator whether the operator is 3589 right or left associative, using the pyparsing-defined 3590 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 3591 - parseAction is the parse action to be associated with 3592 expressions matching this operator expression (the 3593 parse action tuple member may be omitted) 3594 - lpar - expression for matching left-parentheses (default=Suppress('(')) 3595 - rpar - expression for matching right-parentheses (default=Suppress(')')) 3596 """ 3597 ret = Forward() 3598 lastExpr = baseExpr | ( lpar + ret + rpar ) 3599 for i,operDef in enumerate(opList): 3600 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3601 if arity == 3: 3602 if opExpr is None or len(opExpr) != 2: 3603 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3604 opExpr1, opExpr2 = opExpr 3605 thisExpr = Forward()#.setName("expr%d" % i) 3606 if rightLeftAssoc == opAssoc.LEFT: 3607 if arity == 1: 3608 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3609 elif arity == 2: 3610 if opExpr is not None: 3611 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3612 else: 3613 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3614 elif arity == 3: 3615 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3616 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3617 else: 3618 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3619 elif rightLeftAssoc == opAssoc.RIGHT: 3620 if arity == 1: 3621 # try to avoid LR with this extra test 3622 if not isinstance(opExpr, Optional): 3623 opExpr = Optional(opExpr) 3624 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3625 elif arity == 2: 3626 if opExpr is not None: 3627 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3628 else: 3629 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3630 elif arity == 3: 3631 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3632 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3633 else: 3634 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3635 else: 3636 raise ValueError("operator must indicate right or left associativity") 3637 if pa: 3638 matchExpr.setParseAction( pa ) 3639 thisExpr <<= ( matchExpr | lastExpr ) 3640 lastExpr = thisExpr 3641 ret <<= lastExpr 3642 return ret
3643 operatorPrecedence = infixNotation 3644 3645 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3646 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3647 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3648 unicodeString = Combine(_L('u') + quotedString.copy())
3649 3650 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
3651 """Helper method for defining nested lists enclosed in opening and closing 3652 delimiters ("(" and ")" are the default). 3653 3654 Parameters: 3655 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3656 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3657 - content - expression for items within the nested lists (default=None) 3658 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3659 3660 If an expression is not provided for the content argument, the nested 3661 expression will capture all whitespace-delimited content between delimiters 3662 as a list of separate values. 3663 3664 Use the C{ignoreExpr} argument to define expressions that may contain 3665 opening or closing characters that should not be treated as opening 3666 or closing characters for nesting, such as quotedString or a comment 3667 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 3668 The default is L{quotedString}, but if no expressions are to be ignored, 3669 then pass C{None} for this argument. 3670 """ 3671 if opener == closer: 3672 raise ValueError("opening and closing strings cannot be the same") 3673 if content is None: 3674 if isinstance(opener,basestring) and isinstance(closer,basestring): 3675 if len(opener) == 1 and len(closer)==1: 3676 if ignoreExpr is not None: 3677 content = (Combine(OneOrMore(~ignoreExpr + 3678 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3679 ).setParseAction(lambda t:t[0].strip())) 3680 else: 3681 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3682 ).setParseAction(lambda t:t[0].strip())) 3683 else: 3684 if ignoreExpr is not None: 3685 content = (Combine(OneOrMore(~ignoreExpr + 3686 ~Literal(opener) + ~Literal(closer) + 3687 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3688 ).setParseAction(lambda t:t[0].strip())) 3689 else: 3690 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3691 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3692 ).setParseAction(lambda t:t[0].strip())) 3693 else: 3694 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3695 ret = Forward() 3696 if ignoreExpr is not None: 3697 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3698 else: 3699 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3700 return ret
3701
3702 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3703 """Helper method for defining space-delimited indentation blocks, such as 3704 those used to define block statements in Python source code. 3705 3706 Parameters: 3707 - blockStatementExpr - expression defining syntax of statement that 3708 is repeated within the indented block 3709 - indentStack - list created by caller to manage indentation stack 3710 (multiple statementWithIndentedBlock expressions within a single grammar 3711 should share a common indentStack) 3712 - indent - boolean indicating whether block must be indented beyond the 3713 the current level; set to False for block of left-most statements 3714 (default=True) 3715 3716 A valid block must contain at least one C{blockStatement}. 3717 """ 3718 def checkPeerIndent(s,l,t): 3719 if l >= len(s): return 3720 curCol = col(l,s) 3721 if curCol != indentStack[-1]: 3722 if curCol > indentStack[-1]: 3723 raise ParseFatalException(s,l,"illegal nesting") 3724 raise ParseException(s,l,"not a peer entry")
3725 3726 def checkSubIndent(s,l,t): 3727 curCol = col(l,s) 3728 if curCol > indentStack[-1]: 3729 indentStack.append( curCol ) 3730 else: 3731 raise ParseException(s,l,"not a subentry") 3732 3733 def checkUnindent(s,l,t): 3734 if l >= len(s): return 3735 curCol = col(l,s) 3736 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3737 raise ParseException(s,l,"not an unindent") 3738 indentStack.pop() 3739 3740 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3741 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3742 PEER = Empty().setParseAction(checkPeerIndent) 3743 UNDENT = Empty().setParseAction(checkUnindent) 3744 if indent: 3745 smExpr = Group( Optional(NL) + 3746 #~ FollowedBy(blockStatementExpr) + 3747 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3748 else: 3749 smExpr = Group( Optional(NL) + 3750 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3751 blockStatementExpr.ignore(_bslash + LineEnd()) 3752 return smExpr 3753 3754 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3755 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3756 3757 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3758 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3759 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3760 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3761 3762 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3763 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3764 3765 htmlComment = Regex(r"<!--[\s\S]*?-->") 3766 restOfLine = Regex(r".*").leaveWhitespace() 3767 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3768 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3769 3770 javaStyleComment = cppStyleComment 3771 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3772 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 3773 Optional( Word(" \t") + 3774 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3775 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 3776 3777 3778 if __name__ == "__main__": 3779 3780 selectToken = CaselessLiteral( "select" ) 3781 fromToken = CaselessLiteral( "from" ) 3782 3783 ident = Word( alphas, alphanums + "_$" ) 3784 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3785 columnNameList = Group( delimitedList( columnName ) ).setName("columns") 3786 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3787 tableNameList = Group( delimitedList( tableName ) ).setName("tables") 3788 simpleSQL = ( selectToken + \ 3789 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3790 fromToken + \ 3791 tableNameList.setResultsName( "tables" ) ) 3792 3793 simpleSQL.runTests("""\ 3794 SELECT * from XYZZY, ABC 3795 select * from SYS.XYZZY 3796 Select A from Sys.dual 3797 Select AA,BB,CC from Sys.dual 3798 Select A, B, C from Sys.dual 3799 Select A, B, C from Sys.dual 3800 Xelect A, B, C from Sys.dual 3801 Select A, B, C frox Sys.dual 3802 Select 3803 Select ^^^ frox Sys.dual 3804 Select A, B, C from Sys.dual, Table2""") 3805