Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.6" 
  61  __versionTime__ = "07 Aug 2016 04:42 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import traceback 
  74  import types 
  75  from datetime import datetime 
  76   
  77  try: 
  78      from _thread import RLock 
  79  except ImportError: 
  80      from threading import RLock 
  81   
  82  try: 
  83      from collections import OrderedDict as _OrderedDict 
  84  except ImportError: 
  85      try: 
  86          from ordereddict import OrderedDict as _OrderedDict 
  87      except ImportError: 
  88          _OrderedDict = None 
  89   
  90  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  91   
  92  __all__ = [ 
  93  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  94  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  95  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  96  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  97  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  98  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  99  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
 100  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
 101  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
 102  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
 103  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
 104  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
 105  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
 106  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
 107  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
 108  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
 109  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
 110  'tokenMap', 'pyparsing_common', 
 111  ] 
 112   
 113  system_version = tuple(sys.version_info)[:3] 
 114  PY_3 = system_version[0] == 3 
 115  if PY_3: 
 116      _MAX_INT = sys.maxsize 
 117      basestring = str 
 118      unichr = chr 
 119      _ustr = str 
 120   
 121      # build list of single arg builtins, that can be used as parse actions 
 122      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 123   
 124  else: 
 125      _MAX_INT = sys.maxint 
 126      range = xrange 
127 128 - def _ustr(obj):
129 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 130 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 131 then < returns the unicode object | encodes it with the default encoding | ... >. 132 """ 133 if isinstance(obj,unicode): 134 return obj 135 136 try: 137 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 138 # it won't break any existing code. 139 return str(obj) 140 141 except UnicodeEncodeError: 142 # Else encode it 143 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 144 xmlcharref = Regex('&#\d+;') 145 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 146 return xmlcharref.transformString(ret)
147 148 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 149 singleArgBuiltins = [] 150 import __builtin__ 151 for fname in "sum len sorted reversed list tuple set any all min max".split(): 152 try: 153 singleArgBuiltins.append(getattr(__builtin__,fname)) 154 except AttributeError: 155 continue 156 157 _generatorType = type((y for y in range(1)))
158 159 -def _xml_escape(data):
160 """Escape &, <, >, ", ', etc. in a string of data.""" 161 162 # ampersand must be replaced first 163 from_symbols = '&><"\'' 164 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 165 for from_,to_ in zip(from_symbols, to_symbols): 166 data = data.replace(from_, to_) 167 return data
168
169 -class _Constants(object):
170 pass
171 172 alphas = string.ascii_uppercase + string.ascii_lowercase 173 nums = "0123456789" 174 hexnums = nums + "ABCDEFabcdef" 175 alphanums = alphas + nums 176 _bslash = chr(92) 177 printables = "".join(c for c in string.printable if c not in string.whitespace)
178 179 -class ParseBaseException(Exception):
180 """base exception class for all parsing runtime exceptions""" 181 # Performance tuning: we construct a *lot* of these, so keep this 182 # constructor as small and fast as possible
183 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
184 self.loc = loc 185 if msg is None: 186 self.msg = pstr 187 self.pstr = "" 188 else: 189 self.msg = msg 190 self.pstr = pstr 191 self.parserElement = elem 192 self.args = (pstr, loc, msg)
193
194 - def __getattr__( self, aname ):
195 """supported attributes by name are: 196 - lineno - returns the line number of the exception text 197 - col - returns the column number of the exception text 198 - line - returns the line containing the exception text 199 """ 200 if( aname == "lineno" ): 201 return lineno( self.loc, self.pstr ) 202 elif( aname in ("col", "column") ): 203 return col( self.loc, self.pstr ) 204 elif( aname == "line" ): 205 return line( self.loc, self.pstr ) 206 else: 207 raise AttributeError(aname)
208
209 - def __str__( self ):
210 return "%s (at char %d), (line:%d, col:%d)" % \ 211 ( self.msg, self.loc, self.lineno, self.column )
212 - def __repr__( self ):
213 return _ustr(self)
214 - def markInputline( self, markerString = ">!<" ):
215 """Extracts the exception line from the input string, and marks 216 the location of the exception with a special symbol. 217 """ 218 line_str = self.line 219 line_column = self.column - 1 220 if markerString: 221 line_str = "".join((line_str[:line_column], 222 markerString, line_str[line_column:])) 223 return line_str.strip()
224 - def __dir__(self):
225 return "lineno col line".split() + dir(type(self))
226
227 -class ParseException(ParseBaseException):
228 """ 229 Exception thrown when parse expressions don't match class; 230 supported attributes by name are: 231 - lineno - returns the line number of the exception text 232 - col - returns the column number of the exception text 233 - line - returns the line containing the exception text 234 235 Example:: 236 try: 237 Word(nums).setName("integer").parseString("ABC") 238 except ParseException as pe: 239 print(pe) 240 print("column: {}".format(pe.col)) 241 242 prints:: 243 Expected integer (at char 0), (line:1, col:1) 244 column: 1 245 """ 246 pass
247
248 -class ParseFatalException(ParseBaseException):
249 """user-throwable exception thrown when inconsistent parse content 250 is found; stops all parsing immediately""" 251 pass
252
253 -class ParseSyntaxException(ParseFatalException):
254 """just like C{L{ParseFatalException}}, but thrown internally when an 255 C{L{ErrorStop<And._ErrorStop>}} ('-' operator) indicates that parsing is to stop immediately because 256 an unbacktrackable syntax error has been found"""
257 - def __init__(self, pe):
258 super(ParseSyntaxException, self).__init__( 259 pe.pstr, pe.loc, pe.msg, pe.parserElement)
260
261 #~ class ReparseException(ParseBaseException): 262 #~ """Experimental class - parse actions can raise this exception to cause 263 #~ pyparsing to reparse the input string: 264 #~ - with a modified input string, and/or 265 #~ - with a modified start location 266 #~ Set the values of the ReparseException in the constructor, and raise the 267 #~ exception in a parse action to cause pyparsing to use the new string/location. 268 #~ Setting the values as None causes no change to be made. 269 #~ """ 270 #~ def __init_( self, newstring, restartLoc ): 271 #~ self.newParseText = newstring 272 #~ self.reparseLoc = restartLoc 273 274 -class RecursiveGrammarException(Exception):
275 """exception thrown by C{validate()} if the grammar could be improperly recursive"""
276 - def __init__( self, parseElementList ):
277 self.parseElementTrace = parseElementList
278
279 - def __str__( self ):
280 return "RecursiveGrammarException: %s" % self.parseElementTrace
281
282 -class _ParseResultsWithOffset(object):
283 - def __init__(self,p1,p2):
284 self.tup = (p1,p2)
285 - def __getitem__(self,i):
286 return self.tup[i]
287 - def __repr__(self):
288 return repr(self.tup)
289 - def setOffset(self,i):
290 self.tup = (self.tup[0],i)
291
292 -class ParseResults(object):
293 """ 294 Structured parse results, to provide multiple means of access to the parsed data: 295 - as a list (C{len(results)}) 296 - by list index (C{results[0], results[1]}, etc.) 297 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 298 299 Example:: 300 integer = Word(nums) 301 date_str = (integer.setResultsName("year") + '/' 302 + integer.setResultsName("month") + '/' 303 + integer.setResultsName("day")) 304 # equivalent form: 305 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 306 307 result = date_str.parseString("1999/12/31") 308 print(list(result)) 309 print(result[0]) 310 print(result['month']) 311 print(result.day) 312 print('month' in result) 313 print('minutes' in result) 314 print(result.dump()) 315 prints:: 316 ['1999', '/', '12', '/', '31'] 317 1999 318 12 319 31 320 True 321 False 322 ['1999', '/', '12', '/', '31'] 323 - day: 31 324 - month: 12 325 - year: 1999 326 """
327 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
328 if isinstance(toklist, cls): 329 return toklist 330 retobj = object.__new__(cls) 331 retobj.__doinit = True 332 return retobj
333 334 # Performance tuning: we construct a *lot* of these, so keep this 335 # constructor as small and fast as possible
336 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
337 if self.__doinit: 338 self.__doinit = False 339 self.__name = None 340 self.__parent = None 341 self.__accumNames = {} 342 self.__asList = asList 343 self.__modal = modal 344 if toklist is None: 345 toklist = [] 346 if isinstance(toklist, list): 347 self.__toklist = toklist[:] 348 elif isinstance(toklist, _generatorType): 349 self.__toklist = list(toklist) 350 else: 351 self.__toklist = [toklist] 352 self.__tokdict = dict() 353 354 if name is not None and name: 355 if not modal: 356 self.__accumNames[name] = 0 357 if isinstance(name,int): 358 name = _ustr(name) # will always return a str, but use _ustr for consistency 359 self.__name = name 360 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 361 if isinstance(toklist,basestring): 362 toklist = [ toklist ] 363 if asList: 364 if isinstance(toklist,ParseResults): 365 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 366 else: 367 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 368 self[name].__name = name 369 else: 370 try: 371 self[name] = toklist[0] 372 except (KeyError,TypeError,IndexError): 373 self[name] = toklist
374
375 - def __getitem__( self, i ):
376 if isinstance( i, (int,slice) ): 377 return self.__toklist[i] 378 else: 379 if i not in self.__accumNames: 380 return self.__tokdict[i][-1][0] 381 else: 382 return ParseResults([ v[0] for v in self.__tokdict[i] ])
383
384 - def __setitem__( self, k, v, isinstance=isinstance ):
385 if isinstance(v,_ParseResultsWithOffset): 386 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 387 sub = v[0] 388 elif isinstance(k,(int,slice)): 389 self.__toklist[k] = v 390 sub = v 391 else: 392 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 393 sub = v 394 if isinstance(sub,ParseResults): 395 sub.__parent = wkref(self)
396
397 - def __delitem__( self, i ):
398 if isinstance(i,(int,slice)): 399 mylen = len( self.__toklist ) 400 del self.__toklist[i] 401 402 # convert int to slice 403 if isinstance(i, int): 404 if i < 0: 405 i += mylen 406 i = slice(i, i+1) 407 # get removed indices 408 removed = list(range(*i.indices(mylen))) 409 removed.reverse() 410 # fixup indices in token dictionary 411 for name,occurrences in self.__tokdict.items(): 412 for j in removed: 413 for k, (value, position) in enumerate(occurrences): 414 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 415 else: 416 del self.__tokdict[i]
417
418 - def __contains__( self, k ):
419 return k in self.__tokdict
420
421 - def __len__( self ): return len( self.__toklist )
422 - def __bool__(self): return ( not not self.__toklist )
423 __nonzero__ = __bool__
424 - def __iter__( self ): return iter( self.__toklist )
425 - def __reversed__( self ): return iter( self.__toklist[::-1] )
426 - def _iterkeys( self ):
427 if hasattr(self.__tokdict, "iterkeys"): 428 return self.__tokdict.iterkeys() 429 else: 430 return iter(self.__tokdict)
431
432 - def _itervalues( self ):
433 return (self[k] for k in self._iterkeys())
434
435 - def _iteritems( self ):
436 return ((k, self[k]) for k in self._iterkeys())
437 438 if PY_3: 439 keys = _iterkeys 440 """Returns an iterator of all named result keys (Python 3.x only).""" 441 442 values = _itervalues 443 """Returns an iterator of all named result values (Python 3.x only).""" 444 445 items = _iteritems 446 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 447 448 else: 449 iterkeys = _iterkeys 450 """Returns an iterator of all named result keys (Python 2.x only).""" 451 452 itervalues = _itervalues 453 """Returns an iterator of all named result values (Python 2.x only).""" 454 455 iteritems = _iteritems 456 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 457
458 - def keys( self ):
459 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 460 return list(self.iterkeys())
461
462 - def values( self ):
463 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 464 return list(self.itervalues())
465
466 - def items( self ):
467 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 468 return list(self.iteritems())
469
470 - def haskeys( self ):
471 """Since keys() returns an iterator, this method is helpful in bypassing 472 code that looks for the existence of any defined results names.""" 473 return bool(self.__tokdict)
474
475 - def pop( self, *args, **kwargs):
476 """ 477 Removes and returns item at specified index (default=C{last}). 478 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 479 argument or an integer argument, it will use C{list} semantics 480 and pop tokens from the list of parsed tokens. If passed a 481 non-integer argument (most likely a string), it will use C{dict} 482 semantics and pop the corresponding value from any defined 483 results names. A second default return value argument is 484 supported, just as in C{dict.pop()}. 485 486 Example:: 487 def remove_first(tokens): 488 tokens.pop(0) 489 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 490 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 491 492 label = Word(alphas) 493 patt = label("LABEL") + OneOrMore(Word(nums)) 494 print(patt.parseString("AAB 123 321").dump()) 495 496 # Use pop() in a parse action to remove named result (note that corresponding value is not 497 # removed from list form of results) 498 def remove_LABEL(tokens): 499 tokens.pop("LABEL") 500 return tokens 501 patt.addParseAction(remove_LABEL) 502 print(patt.parseString("AAB 123 321").dump()) 503 prints:: 504 ['AAB', '123', '321'] 505 - LABEL: AAB 506 507 ['AAB', '123', '321'] 508 """ 509 if not args: 510 args = [-1] 511 for k,v in kwargs.items(): 512 if k == 'default': 513 args = (args[0], v) 514 else: 515 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 516 if (isinstance(args[0], int) or 517 len(args) == 1 or 518 args[0] in self): 519 index = args[0] 520 ret = self[index] 521 del self[index] 522 return ret 523 else: 524 defaultvalue = args[1] 525 return defaultvalue
526
527 - def get(self, key, defaultValue=None):
528 """ 529 Returns named result matching the given key, or if there is no 530 such name, then returns the given C{defaultValue} or C{None} if no 531 C{defaultValue} is specified. 532 533 Similar to C{dict.get()}. 534 535 Example:: 536 integer = Word(nums) 537 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 538 539 result = date_str.parseString("1999/12/31") 540 print(result.get("year")) # -> '1999' 541 print(result.get("hour", "not specified")) # -> 'not specified' 542 print(result.get("hour")) # -> None 543 """ 544 if key in self: 545 return self[key] 546 else: 547 return defaultValue
548
549 - def insert( self, index, insStr ):
550 """ 551 Inserts new element at location index in the list of parsed tokens. 552 553 Similar to C{list.insert()}. 554 555 Example:: 556 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 557 558 # use a parse action to insert the parse location in the front of the parsed results 559 def insert_locn(locn, tokens): 560 tokens.insert(0, locn) 561 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 562 """ 563 self.__toklist.insert(index, insStr) 564 # fixup indices in token dictionary 565 for name,occurrences in self.__tokdict.items(): 566 for k, (value, position) in enumerate(occurrences): 567 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
568
569 - def append( self, item ):
570 """ 571 Add single element to end of ParseResults list of elements. 572 573 Example:: 574 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 575 576 # use a parse action to compute the sum of the parsed integers, and add it to the end 577 def append_sum(tokens): 578 tokens.append(sum(map(int, tokens))) 579 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 580 """ 581 self.__toklist.append(item)
582
583 - def extend( self, itemseq ):
584 """ 585 Add sequence of elements to end of ParseResults list of elements. 586 587 Example:: 588 patt = OneOrMore(Word(alphas)) 589 590 # use a parse action to append the reverse of the matched strings, to make a palindrome 591 def make_palindrome(tokens): 592 tokens.extend(reversed([t[::-1] for t in tokens])) 593 return ''.join(tokens) 594 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 595 """ 596 if isinstance(itemseq, ParseResults): 597 self += itemseq 598 else: 599 self.__toklist.extend(itemseq)
600
601 - def clear( self ):
602 """ 603 Clear all elements and results names. 604 """ 605 del self.__toklist[:] 606 self.__tokdict.clear()
607
608 - def __getattr__( self, name ):
609 try: 610 return self[name] 611 except KeyError: 612 return "" 613 614 if name in self.__tokdict: 615 if name not in self.__accumNames: 616 return self.__tokdict[name][-1][0] 617 else: 618 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 619 else: 620 return ""
621
622 - def __add__( self, other ):
623 ret = self.copy() 624 ret += other 625 return ret
626
627 - def __iadd__( self, other ):
628 if other.__tokdict: 629 offset = len(self.__toklist) 630 addoffset = lambda a: offset if a<0 else a+offset 631 otheritems = other.__tokdict.items() 632 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 633 for (k,vlist) in otheritems for v in vlist] 634 for k,v in otherdictitems: 635 self[k] = v 636 if isinstance(v[0],ParseResults): 637 v[0].__parent = wkref(self) 638 639 self.__toklist += other.__toklist 640 self.__accumNames.update( other.__accumNames ) 641 return self
642
643 - def __radd__(self, other):
644 if isinstance(other,int) and other == 0: 645 # useful for merging many ParseResults using sum() builtin 646 return self.copy() 647 else: 648 # this may raise a TypeError - so be it 649 return other + self
650
651 - def __repr__( self ):
652 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
653
654 - def __str__( self ):
655 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
656
657 - def _asStringList( self, sep='' ):
658 out = [] 659 for item in self.__toklist: 660 if out and sep: 661 out.append(sep) 662 if isinstance( item, ParseResults ): 663 out += item._asStringList() 664 else: 665 out.append( _ustr(item) ) 666 return out
667
668 - def asList( self ):
669 """ 670 Returns the parse results as a nested list of matching tokens, all converted to strings. 671 672 Example:: 673 patt = OneOrMore(Word(alphas)) 674 result = patt.parseString("sldkj lsdkj sldkj") 675 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 676 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 677 678 # Use asList() to create an actual list 679 result_list = result.asList() 680 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 681 """ 682 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
683
684 - def asDict( self ):
685 """ 686 Returns the named parse results as a nested dictionary. 687 688 Example:: 689 integer = Word(nums) 690 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 691 692 result = date_str.parseString('12/31/1999') 693 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 694 695 result_dict = result.asDict() 696 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 697 698 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 699 import json 700 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 701 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 702 """ 703 if PY_3: 704 item_fn = self.items 705 else: 706 item_fn = self.iteritems 707 708 def toItem(obj): 709 if isinstance(obj, ParseResults): 710 if obj.haskeys(): 711 return obj.asDict() 712 else: 713 return [toItem(v) for v in obj] 714 else: 715 return obj
716 717 return dict((k,toItem(v)) for k,v in item_fn())
718
719 - def copy( self ):
720 """ 721 Returns a new copy of a C{ParseResults} object. 722 """ 723 ret = ParseResults( self.__toklist ) 724 ret.__tokdict = self.__tokdict.copy() 725 ret.__parent = self.__parent 726 ret.__accumNames.update( self.__accumNames ) 727 ret.__name = self.__name 728 return ret
729
730 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
731 """ 732 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 733 """ 734 nl = "\n" 735 out = [] 736 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 737 for v in vlist) 738 nextLevelIndent = indent + " " 739 740 # collapse out indents if formatting is not desired 741 if not formatted: 742 indent = "" 743 nextLevelIndent = "" 744 nl = "" 745 746 selfTag = None 747 if doctag is not None: 748 selfTag = doctag 749 else: 750 if self.__name: 751 selfTag = self.__name 752 753 if not selfTag: 754 if namedItemsOnly: 755 return "" 756 else: 757 selfTag = "ITEM" 758 759 out += [ nl, indent, "<", selfTag, ">" ] 760 761 for i,res in enumerate(self.__toklist): 762 if isinstance(res,ParseResults): 763 if i in namedItems: 764 out += [ res.asXML(namedItems[i], 765 namedItemsOnly and doctag is None, 766 nextLevelIndent, 767 formatted)] 768 else: 769 out += [ res.asXML(None, 770 namedItemsOnly and doctag is None, 771 nextLevelIndent, 772 formatted)] 773 else: 774 # individual token, see if there is a name for it 775 resTag = None 776 if i in namedItems: 777 resTag = namedItems[i] 778 if not resTag: 779 if namedItemsOnly: 780 continue 781 else: 782 resTag = "ITEM" 783 xmlBodyText = _xml_escape(_ustr(res)) 784 out += [ nl, nextLevelIndent, "<", resTag, ">", 785 xmlBodyText, 786 "</", resTag, ">" ] 787 788 out += [ nl, indent, "</", selfTag, ">" ] 789 return "".join(out)
790
791 - def __lookup(self,sub):
792 for k,vlist in self.__tokdict.items(): 793 for v,loc in vlist: 794 if sub is v: 795 return k 796 return None
797
798 - def getName(self):
799 """ 800 Returns the results name for this token expression. Useful when several 801 different expressions might match at a particular location. 802 803 Example:: 804 integer = Word(nums) 805 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 806 house_number_expr = Suppress('#') + Word(nums, alphanums) 807 user_data = (Group(house_number_expr)("house_number") 808 | Group(ssn_expr)("ssn") 809 | Group(integer)("age")) 810 user_info = OneOrMore(user_data) 811 812 result = user_info.parseString("22 111-22-3333 #221B") 813 for item in result: 814 print(item.getName(), ':', item[0]) 815 prints:: 816 age : 22 817 ssn : 111-22-3333 818 house_number : 221B 819 """ 820 if self.__name: 821 return self.__name 822 elif self.__parent: 823 par = self.__parent() 824 if par: 825 return par.__lookup(self) 826 else: 827 return None 828 elif (len(self) == 1 and 829 len(self.__tokdict) == 1 and 830 self.__tokdict.values()[0][0][1] in (0,-1)): 831 return self.__tokdict.keys()[0] 832 else: 833 return None
834
835 - def dump(self, indent='', depth=0, full=True):
836 """ 837 Diagnostic method for listing out the contents of a C{ParseResults}. 838 Accepts an optional C{indent} argument so that this string can be embedded 839 in a nested display of other data. 840 841 Example:: 842 integer = Word(nums) 843 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 844 845 result = date_str.parseString('12/31/1999') 846 print(result.dump()) 847 prints:: 848 ['12', '/', '31', '/', '1999'] 849 - day: 1999 850 - month: 31 851 - year: 12 852 """ 853 out = [] 854 NL = '\n' 855 out.append( indent+_ustr(self.asList()) ) 856 if full: 857 if self.haskeys(): 858 items = sorted(self.items()) 859 for k,v in items: 860 if out: 861 out.append(NL) 862 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 863 if isinstance(v,ParseResults): 864 if v: 865 out.append( v.dump(indent,depth+1) ) 866 else: 867 out.append(_ustr(v)) 868 else: 869 out.append(_ustr(v)) 870 elif any(isinstance(vv,ParseResults) for vv in self): 871 v = self 872 for i,vv in enumerate(v): 873 if isinstance(vv,ParseResults): 874 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 875 else: 876 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 877 878 return "".join(out)
879
880 - def pprint(self, *args, **kwargs):
881 """ 882 Pretty-printer for parsed results as a list, using the C{pprint} module. 883 Accepts additional positional or keyword args as defined for the 884 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 885 886 Example:: 887 ident = Word(alphas, alphanums) 888 num = Word(nums) 889 func = Forward() 890 term = ident | num | Group('(' + func + ')') 891 func <<= ident + Group(Optional(delimitedList(term))) 892 result = func.parseString("fna a,b,(fnb c,d,200),100") 893 result.pprint(width=40) 894 prints:: 895 ['fna', 896 ['a', 897 'b', 898 ['(', 'fnb', ['c', 'd', '200'], ')'], 899 '100']] 900 """ 901 pprint.pprint(self.asList(), *args, **kwargs)
902 903 # add support for pickle protocol
904 - def __getstate__(self):
905 return ( self.__toklist, 906 ( self.__tokdict.copy(), 907 self.__parent is not None and self.__parent() or None, 908 self.__accumNames, 909 self.__name ) )
910
911 - def __setstate__(self,state):
912 self.__toklist = state[0] 913 (self.__tokdict, 914 par, 915 inAccumNames, 916 self.__name) = state[1] 917 self.__accumNames = {} 918 self.__accumNames.update(inAccumNames) 919 if par is not None: 920 self.__parent = wkref(par) 921 else: 922 self.__parent = None
923
924 - def __getnewargs__(self):
925 return self.__toklist, self.__name, self.__asList, self.__modal
926
927 - def __dir__(self):
928 return (dir(type(self)) + list(self.keys()))
929 930 collections.MutableMapping.register(ParseResults)
931 932 -def col (loc,strg):
933 """Returns current column within a string, counting newlines as line separators. 934 The first column is number 1. 935 936 Note: the default parsing behavior is to expand tabs in the input string 937 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 938 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 939 consistent view of the parsed string, the parse location, and line and column 940 positions within the parsed string. 941 """ 942 s = strg 943 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
944
945 -def lineno(loc,strg):
946 """Returns current line number within a string, counting newlines as line separators. 947 The first line is number 1. 948 949 Note: the default parsing behavior is to expand tabs in the input string 950 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 951 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 952 consistent view of the parsed string, the parse location, and line and column 953 positions within the parsed string. 954 """ 955 return strg.count("\n",0,loc) + 1
956
957 -def line( loc, strg ):
958 """Returns the line of text containing loc within a string, counting newlines as line separators. 959 """ 960 lastCR = strg.rfind("\n", 0, loc) 961 nextCR = strg.find("\n", loc) 962 if nextCR >= 0: 963 return strg[lastCR+1:nextCR] 964 else: 965 return strg[lastCR+1:]
966
967 -def _defaultStartDebugAction( instring, loc, expr ):
968 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
969
970 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
971 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
972
973 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
974 print ("Exception raised:" + _ustr(exc))
975
976 -def nullDebugAction(*args):
977 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 978 pass
979 980 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 981 #~ 'decorator to trim function calls to match the arity of the target' 982 #~ def _trim_arity(func, maxargs=3): 983 #~ if func in singleArgBuiltins: 984 #~ return lambda s,l,t: func(t) 985 #~ limit = 0 986 #~ foundArity = False 987 #~ def wrapper(*args): 988 #~ nonlocal limit,foundArity 989 #~ while 1: 990 #~ try: 991 #~ ret = func(*args[limit:]) 992 #~ foundArity = True 993 #~ return ret 994 #~ except TypeError: 995 #~ if limit == maxargs or foundArity: 996 #~ raise 997 #~ limit += 1 998 #~ continue 999 #~ return wrapper 1000 1001 # this version is Python 2.x-3.x cross-compatible 1002 'decorator to trim function calls to match the arity of the target'
1003 -def _trim_arity(func, maxargs=2):
1004 if func in singleArgBuiltins: 1005 return lambda s,l,t: func(t) 1006 limit = [0] 1007 foundArity = [False] 1008 1009 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1010 if system_version[:2] >= (3,5): 1011 def extract_stack(): 1012 # special handling for Python 3.5.0 - extra deep call stack by 1 1013 offset = -3 if system_version == (3,5,0) else -2 1014 frame_summary = traceback.extract_stack()[offset] 1015 return [(frame_summary.filename, frame_summary.lineno)]
1016 def extract_tb(tb): 1017 frames = traceback.extract_tb(tb) 1018 frame_summary = frames[-1] 1019 return [(frame_summary.filename, frame_summary.lineno)] 1020 else: 1021 extract_stack = traceback.extract_stack 1022 extract_tb = traceback.extract_tb 1023 1024 # synthesize what would be returned by traceback.extract_stack at the call to 1025 # user's parse action 'func', so that we don't incur call penalty at parse time 1026 1027 LINE_DIFF = 6 1028 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1029 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1030 this_line = extract_stack()[-1] 1031 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1032 1033 def wrapper(*args): 1034 while 1: 1035 try: 1036 ret = func(*args[limit[0]:]) 1037 foundArity[0] = True 1038 return ret 1039 except TypeError: 1040 # re-raise TypeErrors if they did not come from our arity testing 1041 if foundArity[0]: 1042 raise 1043 else: 1044 try: 1045 tb = sys.exc_info()[-1] 1046 if not extract_tb(tb)[-1][:2] == pa_call_line_synth: 1047 raise 1048 finally: 1049 del tb 1050 1051 if limit[0] <= maxargs: 1052 limit[0] += 1 1053 continue 1054 raise 1055 1056 # copy func name to wrapper for sensible debug output 1057 func_name = "<parse action>" 1058 try: 1059 func_name = getattr(func, '__name__', 1060 getattr(func, '__class__').__name__) 1061 except Exception: 1062 func_name = str(func) 1063 wrapper.__name__ = func_name 1064 1065 return wrapper 1066
1067 -class ParserElement(object):
1068 """Abstract base level parser element class.""" 1069 DEFAULT_WHITE_CHARS = " \n\t\r" 1070 verbose_stacktrace = False 1071 1072 @staticmethod
1073 - def setDefaultWhitespaceChars( chars ):
1074 r""" 1075 Overrides the default whitespace chars 1076 1077 Example:: 1078 # default whitespace chars are space, <TAB> and newline 1079 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1080 1081 # change to just treat newline as significant 1082 ParserElement.setDefaultWhitespaceChars(" \t") 1083 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1084 """ 1085 ParserElement.DEFAULT_WHITE_CHARS = chars
1086 1087 @staticmethod
1088 - def inlineLiteralsUsing(cls):
1089 """ 1090 Set class to be used for inclusion of string literals into a parser. 1091 1092 Example:: 1093 # default literal class used is Literal 1094 integer = Word(nums) 1095 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1096 1097 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1098 1099 1100 # change to Suppress 1101 ParserElement.inlineLiteralsUsing(Suppress) 1102 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1103 1104 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1105 """ 1106 ParserElement._literalStringClass = cls
1107
1108 - def __init__( self, savelist=False ):
1109 self.parseAction = list() 1110 self.failAction = None 1111 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1112 self.strRepr = None 1113 self.resultsName = None 1114 self.saveAsList = savelist 1115 self.skipWhitespace = True 1116 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1117 self.copyDefaultWhiteChars = True 1118 self.mayReturnEmpty = False # used when checking for left-recursion 1119 self.keepTabs = False 1120 self.ignoreExprs = list() 1121 self.debug = False 1122 self.streamlined = False 1123 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1124 self.errmsg = "" 1125 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1126 self.debugActions = ( None, None, None ) #custom debug actions 1127 self.re = None 1128 self.callPreparse = True # used to avoid redundant calls to preParse 1129 self.callDuringTry = False
1130
1131 - def copy( self ):
1132 """ 1133 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1134 for the same parsing pattern, using copies of the original parse element. 1135 1136 Example:: 1137 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1138 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1139 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1140 1141 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1142 prints:: 1143 [5120, 100, 655360, 268435456] 1144 Equivalent form of C{expr.copy()} is just C{expr()}:: 1145 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1146 """ 1147 cpy = copy.copy( self ) 1148 cpy.parseAction = self.parseAction[:] 1149 cpy.ignoreExprs = self.ignoreExprs[:] 1150 if self.copyDefaultWhiteChars: 1151 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1152 return cpy
1153
1154 - def setName( self, name ):
1155 """ 1156 Define name for this expression, makes exception messages clearer. 1157 1158 Example:: 1159 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1160 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1161 """ 1162 self.name = name 1163 self.errmsg = "Expected " + self.name 1164 if hasattr(self,"exception"): 1165 self.exception.msg = self.errmsg 1166 return self
1167
1168 - def setResultsName( self, name, listAllMatches=False ):
1169 """ 1170 Define name for referencing matching tokens as a nested attribute 1171 of the returned parse results. 1172 NOTE: this returns a *copy* of the original C{ParserElement} object; 1173 this is so that the client can define a basic element, such as an 1174 integer, and reference it in multiple places with different names. 1175 1176 You can also set results names using the abbreviated syntax, 1177 C{expr("name")} in place of C{expr.setResultsName("name")} - 1178 see L{I{__call__}<__call__>}. 1179 1180 Example:: 1181 date_str = (integer.setResultsName("year") + '/' 1182 + integer.setResultsName("month") + '/' 1183 + integer.setResultsName("day")) 1184 1185 # equivalent form: 1186 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1187 """ 1188 newself = self.copy() 1189 if name.endswith("*"): 1190 name = name[:-1] 1191 listAllMatches=True 1192 newself.resultsName = name 1193 newself.modalResults = not listAllMatches 1194 return newself
1195
1196 - def setBreak(self,breakFlag = True):
1197 """Method to invoke the Python pdb debugger when this element is 1198 about to be parsed. Set C{breakFlag} to True to enable, False to 1199 disable. 1200 """ 1201 if breakFlag: 1202 _parseMethod = self._parse 1203 def breaker(instring, loc, doActions=True, callPreParse=True): 1204 import pdb 1205 pdb.set_trace() 1206 return _parseMethod( instring, loc, doActions, callPreParse )
1207 breaker._originalParseMethod = _parseMethod 1208 self._parse = breaker 1209 else: 1210 if hasattr(self._parse,"_originalParseMethod"): 1211 self._parse = self._parse._originalParseMethod 1212 return self
1213
1214 - def setParseAction( self, *fns, **kwargs ):
1215 """ 1216 Define action to perform when successfully matching parse element definition. 1217 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1218 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1219 - s = the original string being parsed (see note below) 1220 - loc = the location of the matching substring 1221 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1222 If the functions in fns modify the tokens, they can return them as the return 1223 value from fn, and the modified list of tokens will replace the original. 1224 Otherwise, fn does not need to return any value. 1225 1226 Optional keyword arguments: 1227 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1228 1229 Note: the default parsing behavior is to expand tabs in the input string 1230 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1231 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1232 consistent view of the parsed string, the parse location, and line and column 1233 positions within the parsed string. 1234 1235 Example:: 1236 integer = Word(nums) 1237 date_str = integer + '/' + integer + '/' + integer 1238 1239 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1240 1241 # use parse action to convert to ints at parse time 1242 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1243 date_str = integer + '/' + integer + '/' + integer 1244 1245 # note that integer fields are now ints, not strings 1246 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1247 """ 1248 self.parseAction = list(map(_trim_arity, list(fns))) 1249 self.callDuringTry = kwargs.get("callDuringTry", False) 1250 return self
1251
1252 - def addParseAction( self, *fns, **kwargs ):
1253 """ 1254 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1255 1256 See examples in L{I{copy}<copy>}. 1257 """ 1258 self.parseAction += list(map(_trim_arity, list(fns))) 1259 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1260 return self
1261
1262 - def addCondition(self, *fns, **kwargs):
1263 """Add a boolean predicate function to expression's list of parse actions. See 1264 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1265 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1266 1267 Optional keyword arguments: 1268 - message = define a custom message to be used in the raised exception 1269 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1270 1271 Example:: 1272 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1273 year_int = integer.copy() 1274 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1275 date_str = year_int + '/' + integer + '/' + integer 1276 1277 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1278 """ 1279 msg = kwargs.get("message", "failed user-defined condition") 1280 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1281 for fn in fns: 1282 def pa(s,l,t): 1283 if not bool(_trim_arity(fn)(s,l,t)): 1284 raise exc_type(s,l,msg)
1285 self.parseAction.append(pa) 1286 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1287 return self 1288
1289 - def setFailAction( self, fn ):
1290 """Define action to perform if parsing fails at this expression. 1291 Fail acton fn is a callable function that takes the arguments 1292 C{fn(s,loc,expr,err)} where: 1293 - s = string being parsed 1294 - loc = location where expression match was attempted and failed 1295 - expr = the parse expression that failed 1296 - err = the exception thrown 1297 The function returns no value. It may throw C{L{ParseFatalException}} 1298 if it is desired to stop parsing immediately.""" 1299 self.failAction = fn 1300 return self
1301
1302 - def _skipIgnorables( self, instring, loc ):
1303 exprsFound = True 1304 while exprsFound: 1305 exprsFound = False 1306 for e in self.ignoreExprs: 1307 try: 1308 while 1: 1309 loc,dummy = e._parse( instring, loc ) 1310 exprsFound = True 1311 except ParseException: 1312 pass 1313 return loc
1314
1315 - def preParse( self, instring, loc ):
1316 if self.ignoreExprs: 1317 loc = self._skipIgnorables( instring, loc ) 1318 1319 if self.skipWhitespace: 1320 wt = self.whiteChars 1321 instrlen = len(instring) 1322 while loc < instrlen and instring[loc] in wt: 1323 loc += 1 1324 1325 return loc
1326
1327 - def parseImpl( self, instring, loc, doActions=True ):
1328 return loc, []
1329
1330 - def postParse( self, instring, loc, tokenlist ):
1331 return tokenlist
1332 1333 #~ @profile
1334 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1335 debugging = ( self.debug ) #and doActions ) 1336 1337 if debugging or self.failAction: 1338 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1339 if (self.debugActions[0] ): 1340 self.debugActions[0]( instring, loc, self ) 1341 if callPreParse and self.callPreparse: 1342 preloc = self.preParse( instring, loc ) 1343 else: 1344 preloc = loc 1345 tokensStart = preloc 1346 try: 1347 try: 1348 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1349 except IndexError: 1350 raise ParseException( instring, len(instring), self.errmsg, self ) 1351 except ParseBaseException as err: 1352 #~ print ("Exception raised:", err) 1353 if self.debugActions[2]: 1354 self.debugActions[2]( instring, tokensStart, self, err ) 1355 if self.failAction: 1356 self.failAction( instring, tokensStart, self, err ) 1357 raise 1358 else: 1359 if callPreParse and self.callPreparse: 1360 preloc = self.preParse( instring, loc ) 1361 else: 1362 preloc = loc 1363 tokensStart = preloc 1364 if self.mayIndexError or loc >= len(instring): 1365 try: 1366 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1367 except IndexError: 1368 raise ParseException( instring, len(instring), self.errmsg, self ) 1369 else: 1370 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1371 1372 tokens = self.postParse( instring, loc, tokens ) 1373 1374 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1375 if self.parseAction and (doActions or self.callDuringTry): 1376 if debugging: 1377 try: 1378 for fn in self.parseAction: 1379 tokens = fn( instring, tokensStart, retTokens ) 1380 if tokens is not None: 1381 retTokens = ParseResults( tokens, 1382 self.resultsName, 1383 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1384 modal=self.modalResults ) 1385 except ParseBaseException as err: 1386 #~ print "Exception raised in user parse action:", err 1387 if (self.debugActions[2] ): 1388 self.debugActions[2]( instring, tokensStart, self, err ) 1389 raise 1390 else: 1391 for fn in self.parseAction: 1392 tokens = fn( instring, tokensStart, retTokens ) 1393 if tokens is not None: 1394 retTokens = ParseResults( tokens, 1395 self.resultsName, 1396 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1397 modal=self.modalResults ) 1398 1399 if debugging: 1400 #~ print ("Matched",self,"->",retTokens.asList()) 1401 if (self.debugActions[1] ): 1402 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1403 1404 return loc, retTokens
1405
1406 - def tryParse( self, instring, loc ):
1407 try: 1408 return self._parse( instring, loc, doActions=False )[0] 1409 except ParseFatalException: 1410 raise ParseException( instring, loc, self.errmsg, self)
1411
1412 - def canParseNext(self, instring, loc):
1413 try: 1414 self.tryParse(instring, loc) 1415 except (ParseException, IndexError): 1416 return False 1417 else: 1418 return True
1419
1420 - class _UnboundedCache(object):
1421 - def __init__(self):
1422 cache = {} 1423 self.not_in_cache = not_in_cache = object() 1424 1425 def get(self, key): 1426 return cache.get(key, not_in_cache)
1427 1428 def set(self, key, value): 1429 cache[key] = value
1430 1431 def clear(self): 1432 cache.clear() 1433 1434 self.get = types.MethodType(get, self) 1435 self.set = types.MethodType(set, self) 1436 self.clear = types.MethodType(clear, self) 1437 1438 if _OrderedDict is not None:
1439 - class _FifoCache(object):
1440 - def __init__(self, size):
1441 self.not_in_cache = not_in_cache = object() 1442 1443 cache = _OrderedDict() 1444 1445 def get(self, key): 1446 return cache.get(key, not_in_cache)
1447 1448 def set(self, key, value): 1449 cache[key] = value 1450 if len(cache) > size: 1451 cache.popitem(False)
1452 1453 def clear(self): 1454 cache.clear() 1455 1456 self.get = types.MethodType(get, self) 1457 self.set = types.MethodType(set, self) 1458 self.clear = types.MethodType(clear, self) 1459 1460 else:
1461 - class _FifoCache(object):
1462 - def __init__(self, size):
1463 self.not_in_cache = not_in_cache = object() 1464 1465 cache = {} 1466 key_fifo = collections.deque([], size) 1467 1468 def get(self, key): 1469 return cache.get(key, not_in_cache)
1470 1471 def set(self, key, value): 1472 cache[key] = value 1473 if len(cache) > size: 1474 cache.pop(key_fifo.popleft(), None) 1475 key_fifo.append(key)
1476 1477 def clear(self): 1478 cache.clear() 1479 key_fifo.clear() 1480 1481 self.get = types.MethodType(get, self) 1482 self.set = types.MethodType(set, self) 1483 self.clear = types.MethodType(clear, self) 1484 1485 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1486 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1487 packrat_cache_lock = RLock() 1488 packrat_cache_stats = [0, 0] 1489 1490 # this method gets repeatedly called during backtracking with the same arguments - 1491 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1492 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1493 HIT, MISS = 0, 1 1494 lookup = (self, instring, loc, callPreParse, doActions) 1495 with ParserElement.packrat_cache_lock: 1496 cache = ParserElement.packrat_cache 1497 value = cache.get(lookup) 1498 if value is cache.not_in_cache: 1499 ParserElement.packrat_cache_stats[MISS] += 1 1500 try: 1501 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1502 except ParseBaseException as pe: 1503 # cache a copy of the exception, without the traceback 1504 cache.set(lookup, pe.__class__(*pe.args)) 1505 raise 1506 else: 1507 cache.set(lookup, (value[0], value[1].copy())) 1508 return value 1509 else: 1510 ParserElement.packrat_cache_stats[HIT] += 1 1511 if isinstance(value, Exception): 1512 raise value 1513 return (value[0], value[1].copy())
1514 1515 _parse = _parseNoCache 1516 1517 @staticmethod
1518 - def resetCache():
1519 ParserElement.packrat_cache.clear() 1520 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1521 1522 _packratEnabled = False 1523 @staticmethod
1524 - def enablePackrat(cache_size_limit=128):
1525 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1526 Repeated parse attempts at the same string location (which happens 1527 often in many complex grammars) can immediately return a cached value, 1528 instead of re-executing parsing/validating code. Memoizing is done of 1529 both valid results and parsing exceptions. 1530 1531 Parameters: 1532 - cache_size_limit - (default=C{128}) - if an integer value is provided 1533 will limit the size of the packrat cache; if None is passed, then 1534 the cache size will be unbounded; if 0 is passed, the cache will 1535 be effectively disabled. 1536 1537 This speedup may break existing programs that use parse actions that 1538 have side-effects. For this reason, packrat parsing is disabled when 1539 you first import pyparsing. To activate the packrat feature, your 1540 program must call the class method C{ParserElement.enablePackrat()}. If 1541 your program uses C{psyco} to "compile as you go", you must call 1542 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1543 Python will crash. For best results, call C{enablePackrat()} immediately 1544 after importing pyparsing. 1545 1546 Example:: 1547 import pyparsing 1548 pyparsing.ParserElement.enablePackrat() 1549 """ 1550 if not ParserElement._packratEnabled: 1551 ParserElement._packratEnabled = True 1552 if cache_size_limit is None: 1553 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1554 else: 1555 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1556 ParserElement._parse = ParserElement._parseCache
1557
1558 - def parseString( self, instring, parseAll=False ):
1559 """ 1560 Execute the parse expression with the given string. 1561 This is the main interface to the client code, once the complete 1562 expression has been built. 1563 1564 If you want the grammar to require that the entire input string be 1565 successfully parsed, then set C{parseAll} to True (equivalent to ending 1566 the grammar with C{L{StringEnd()}}). 1567 1568 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1569 in order to report proper column numbers in parse actions. 1570 If the input string contains tabs and 1571 the grammar uses parse actions that use the C{loc} argument to index into the 1572 string being parsed, you can ensure you have a consistent view of the input 1573 string by: 1574 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1575 (see L{I{parseWithTabs}<parseWithTabs>}) 1576 - define your parse action using the full C{(s,loc,toks)} signature, and 1577 reference the input string using the parse action's C{s} argument 1578 - explictly expand the tabs in your input string before calling 1579 C{parseString} 1580 1581 Example:: 1582 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1583 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1584 """ 1585 ParserElement.resetCache() 1586 if not self.streamlined: 1587 self.streamline() 1588 #~ self.saveAsList = True 1589 for e in self.ignoreExprs: 1590 e.streamline() 1591 if not self.keepTabs: 1592 instring = instring.expandtabs() 1593 try: 1594 loc, tokens = self._parse( instring, 0 ) 1595 if parseAll: 1596 loc = self.preParse( instring, loc ) 1597 se = Empty() + StringEnd() 1598 se._parse( instring, loc ) 1599 except ParseBaseException as exc: 1600 if ParserElement.verbose_stacktrace: 1601 raise 1602 else: 1603 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1604 raise exc 1605 else: 1606 return tokens
1607
1608 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1609 """ 1610 Scan the input string for expression matches. Each match will return the 1611 matching tokens, start location, and end location. May be called with optional 1612 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1613 C{overlap} is specified, then overlapping matches will be reported. 1614 1615 Note that the start and end locations are reported relative to the string 1616 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1617 strings with embedded tabs. 1618 1619 Example:: 1620 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1621 print(source) 1622 for tokens,start,end in Word(alphas).scanString(source): 1623 print(' '*start + '^'*(end-start)) 1624 print(' '*start + tokens[0]) 1625 1626 prints:: 1627 1628 sldjf123lsdjjkf345sldkjf879lkjsfd987 1629 ^^^^^ 1630 sldjf 1631 ^^^^^^^ 1632 lsdjjkf 1633 ^^^^^^ 1634 sldkjf 1635 ^^^^^^ 1636 lkjsfd 1637 """ 1638 if not self.streamlined: 1639 self.streamline() 1640 for e in self.ignoreExprs: 1641 e.streamline() 1642 1643 if not self.keepTabs: 1644 instring = _ustr(instring).expandtabs() 1645 instrlen = len(instring) 1646 loc = 0 1647 preparseFn = self.preParse 1648 parseFn = self._parse 1649 ParserElement.resetCache() 1650 matches = 0 1651 try: 1652 while loc <= instrlen and matches < maxMatches: 1653 try: 1654 preloc = preparseFn( instring, loc ) 1655 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1656 except ParseException: 1657 loc = preloc+1 1658 else: 1659 if nextLoc > loc: 1660 matches += 1 1661 yield tokens, preloc, nextLoc 1662 if overlap: 1663 nextloc = preparseFn( instring, loc ) 1664 if nextloc > loc: 1665 loc = nextLoc 1666 else: 1667 loc += 1 1668 else: 1669 loc = nextLoc 1670 else: 1671 loc = preloc+1 1672 except ParseBaseException as exc: 1673 if ParserElement.verbose_stacktrace: 1674 raise 1675 else: 1676 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1677 raise exc
1678
1679 - def transformString( self, instring ):
1680 """ 1681 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1682 be returned from a parse action. To use C{transformString}, define a grammar and 1683 attach a parse action to it that modifies the returned token list. 1684 Invoking C{transformString()} on a target string will then scan for matches, 1685 and replace the matched text patterns according to the logic in the parse 1686 action. C{transformString()} returns the resulting transformed string. 1687 1688 Example:: 1689 wd = Word(alphas) 1690 wd.setParseAction(lambda toks: toks[0].title()) 1691 1692 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1693 Prints:: 1694 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1695 """ 1696 out = [] 1697 lastE = 0 1698 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1699 # keep string locs straight between transformString and scanString 1700 self.keepTabs = True 1701 try: 1702 for t,s,e in self.scanString( instring ): 1703 out.append( instring[lastE:s] ) 1704 if t: 1705 if isinstance(t,ParseResults): 1706 out += t.asList() 1707 elif isinstance(t,list): 1708 out += t 1709 else: 1710 out.append(t) 1711 lastE = e 1712 out.append(instring[lastE:]) 1713 out = [o for o in out if o] 1714 return "".join(map(_ustr,_flatten(out))) 1715 except ParseBaseException as exc: 1716 if ParserElement.verbose_stacktrace: 1717 raise 1718 else: 1719 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1720 raise exc
1721
1722 - def searchString( self, instring, maxMatches=_MAX_INT ):
1723 """ 1724 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1725 to match the given parse expression. May be called with optional 1726 C{maxMatches} argument, to clip searching after 'n' matches are found. 1727 1728 Example:: 1729 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1730 cap_word = Word(alphas.upper(), alphas.lower()) 1731 1732 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1733 prints:: 1734 ['More', 'Iron', 'Lead', 'Gold', 'I'] 1735 """ 1736 try: 1737 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1738 except ParseBaseException as exc: 1739 if ParserElement.verbose_stacktrace: 1740 raise 1741 else: 1742 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1743 raise exc
1744
1745 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1746 """ 1747 Generator method to split a string using the given expression as a separator. 1748 May be called with optional C{maxsplit} argument, to limit the number of splits; 1749 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1750 matching text should be included in the split results. 1751 1752 Example:: 1753 punc = oneOf(list(".,;:/-!?")) 1754 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1755 prints:: 1756 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1757 """ 1758 splits = 0 1759 last = 0 1760 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1761 yield instring[last:s] 1762 if includeSeparators: 1763 yield t[0] 1764 last = e 1765 yield instring[last:]
1766
1767 - def __add__(self, other ):
1768 """ 1769 Implementation of + operator - returns C{L{And}} 1770 """ 1771 if isinstance( other, basestring ): 1772 other = ParserElement._literalStringClass( other ) 1773 if not isinstance( other, ParserElement ): 1774 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1775 SyntaxWarning, stacklevel=2) 1776 return None 1777 return And( [ self, other ] )
1778
1779 - def __radd__(self, other ):
1780 """ 1781 Implementation of + operator when left operand is not a C{L{ParserElement}} 1782 """ 1783 if isinstance( other, basestring ): 1784 other = ParserElement._literalStringClass( other ) 1785 if not isinstance( other, ParserElement ): 1786 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1787 SyntaxWarning, stacklevel=2) 1788 return None 1789 return other + self
1790
1791 - def __sub__(self, other):
1792 """ 1793 Implementation of - operator, returns C{L{And}} with error stop 1794 """ 1795 if isinstance( other, basestring ): 1796 other = ParserElement._literalStringClass( other ) 1797 if not isinstance( other, ParserElement ): 1798 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1799 SyntaxWarning, stacklevel=2) 1800 return None 1801 return And( [ self, And._ErrorStop(), other ] )
1802
1803 - def __rsub__(self, other ):
1804 """ 1805 Implementation of - operator when left operand is not a C{L{ParserElement}} 1806 """ 1807 if isinstance( other, basestring ): 1808 other = ParserElement._literalStringClass( other ) 1809 if not isinstance( other, ParserElement ): 1810 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1811 SyntaxWarning, stacklevel=2) 1812 return None 1813 return other - self
1814
1815 - def __mul__(self,other):
1816 """ 1817 Implementation of * operator, allows use of C{expr * 3} in place of 1818 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1819 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1820 may also include C{None} as in: 1821 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1822 to C{expr*n + L{ZeroOrMore}(expr)} 1823 (read as "at least n instances of C{expr}") 1824 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1825 (read as "0 to n instances of C{expr}") 1826 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1827 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1828 1829 Note that C{expr*(None,n)} does not raise an exception if 1830 more than n exprs exist in the input stream; that is, 1831 C{expr*(None,n)} does not enforce a maximum number of expr 1832 occurrences. If this behavior is desired, then write 1833 C{expr*(None,n) + ~expr} 1834 """ 1835 if isinstance(other,int): 1836 minElements, optElements = other,0 1837 elif isinstance(other,tuple): 1838 other = (other + (None, None))[:2] 1839 if other[0] is None: 1840 other = (0, other[1]) 1841 if isinstance(other[0],int) and other[1] is None: 1842 if other[0] == 0: 1843 return ZeroOrMore(self) 1844 if other[0] == 1: 1845 return OneOrMore(self) 1846 else: 1847 return self*other[0] + ZeroOrMore(self) 1848 elif isinstance(other[0],int) and isinstance(other[1],int): 1849 minElements, optElements = other 1850 optElements -= minElements 1851 else: 1852 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1853 else: 1854 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1855 1856 if minElements < 0: 1857 raise ValueError("cannot multiply ParserElement by negative value") 1858 if optElements < 0: 1859 raise ValueError("second tuple value must be greater or equal to first tuple value") 1860 if minElements == optElements == 0: 1861 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1862 1863 if (optElements): 1864 def makeOptionalList(n): 1865 if n>1: 1866 return Optional(self + makeOptionalList(n-1)) 1867 else: 1868 return Optional(self)
1869 if minElements: 1870 if minElements == 1: 1871 ret = self + makeOptionalList(optElements) 1872 else: 1873 ret = And([self]*minElements) + makeOptionalList(optElements) 1874 else: 1875 ret = makeOptionalList(optElements) 1876 else: 1877 if minElements == 1: 1878 ret = self 1879 else: 1880 ret = And([self]*minElements) 1881 return ret 1882
1883 - def __rmul__(self, other):
1884 return self.__mul__(other)
1885
1886 - def __or__(self, other ):
1887 """ 1888 Implementation of | operator - returns C{L{MatchFirst}} 1889 """ 1890 if isinstance( other, basestring ): 1891 other = ParserElement._literalStringClass( other ) 1892 if not isinstance( other, ParserElement ): 1893 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1894 SyntaxWarning, stacklevel=2) 1895 return None 1896 return MatchFirst( [ self, other ] )
1897
1898 - def __ror__(self, other ):
1899 """ 1900 Implementation of | operator when left operand is not a C{L{ParserElement}} 1901 """ 1902 if isinstance( other, basestring ): 1903 other = ParserElement._literalStringClass( other ) 1904 if not isinstance( other, ParserElement ): 1905 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1906 SyntaxWarning, stacklevel=2) 1907 return None 1908 return other | self
1909
1910 - def __xor__(self, other ):
1911 """ 1912 Implementation of ^ operator - returns C{L{Or}} 1913 """ 1914 if isinstance( other, basestring ): 1915 other = ParserElement._literalStringClass( other ) 1916 if not isinstance( other, ParserElement ): 1917 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1918 SyntaxWarning, stacklevel=2) 1919 return None 1920 return Or( [ self, other ] )
1921
1922 - def __rxor__(self, other ):
1923 """ 1924 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1925 """ 1926 if isinstance( other, basestring ): 1927 other = ParserElement._literalStringClass( other ) 1928 if not isinstance( other, ParserElement ): 1929 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1930 SyntaxWarning, stacklevel=2) 1931 return None 1932 return other ^ self
1933
1934 - def __and__(self, other ):
1935 """ 1936 Implementation of & operator - returns C{L{Each}} 1937 """ 1938 if isinstance( other, basestring ): 1939 other = ParserElement._literalStringClass( other ) 1940 if not isinstance( other, ParserElement ): 1941 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1942 SyntaxWarning, stacklevel=2) 1943 return None 1944 return Each( [ self, other ] )
1945
1946 - def __rand__(self, other ):
1947 """ 1948 Implementation of & operator when left operand is not a C{L{ParserElement}} 1949 """ 1950 if isinstance( other, basestring ): 1951 other = ParserElement._literalStringClass( other ) 1952 if not isinstance( other, ParserElement ): 1953 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1954 SyntaxWarning, stacklevel=2) 1955 return None 1956 return other & self
1957
1958 - def __invert__( self ):
1959 """ 1960 Implementation of ~ operator - returns C{L{NotAny}} 1961 """ 1962 return NotAny( self )
1963
1964 - def __call__(self, name=None):
1965 """ 1966 Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}. 1967 1968 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1969 passed as C{True}. 1970 1971 If C{name} is omitted, same as calling C{L{copy}}. 1972 1973 Example:: 1974 # these are equivalent 1975 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1976 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1977 """ 1978 if name is not None: 1979 return self.setResultsName(name) 1980 else: 1981 return self.copy()
1982
1983 - def suppress( self ):
1984 """ 1985 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1986 cluttering up returned output. 1987 """ 1988 return Suppress( self )
1989
1990 - def leaveWhitespace( self ):
1991 """ 1992 Disables the skipping of whitespace before matching the characters in the 1993 C{ParserElement}'s defined pattern. This is normally only used internally by 1994 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1995 """ 1996 self.skipWhitespace = False 1997 return self
1998
1999 - def setWhitespaceChars( self, chars ):
2000 """ 2001 Overrides the default whitespace chars 2002 """ 2003 self.skipWhitespace = True 2004 self.whiteChars = chars 2005 self.copyDefaultWhiteChars = False 2006 return self
2007
2008 - def parseWithTabs( self ):
2009 """ 2010 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2011 Must be called before C{parseString} when the input grammar contains elements that 2012 match C{<TAB>} characters. 2013 """ 2014 self.keepTabs = True 2015 return self
2016
2017 - def ignore( self, other ):
2018 """ 2019 Define expression to be ignored (e.g., comments) while doing pattern 2020 matching; may be called repeatedly, to define multiple comment or other 2021 ignorable patterns. 2022 2023 Example:: 2024 patt = OneOrMore(Word(alphas)) 2025 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2026 2027 patt.ignore(cStyleComment) 2028 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2029 """ 2030 if isinstance(other, basestring): 2031 other = Suppress(other) 2032 2033 if isinstance( other, Suppress ): 2034 if other not in self.ignoreExprs: 2035 self.ignoreExprs.append(other) 2036 else: 2037 self.ignoreExprs.append( Suppress( other.copy() ) ) 2038 return self
2039
2040 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2041 """ 2042 Enable display of debugging messages while doing pattern matching. 2043 """ 2044 self.debugActions = (startAction or _defaultStartDebugAction, 2045 successAction or _defaultSuccessDebugAction, 2046 exceptionAction or _defaultExceptionDebugAction) 2047 self.debug = True 2048 return self
2049
2050 - def setDebug( self, flag=True ):
2051 """ 2052 Enable display of debugging messages while doing pattern matching. 2053 Set C{flag} to True to enable, False to disable. 2054 """ 2055 if flag: 2056 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2057 else: 2058 self.debug = False 2059 return self
2060
2061 - def __str__( self ):
2062 return self.name
2063
2064 - def __repr__( self ):
2065 return _ustr(self)
2066
2067 - def streamline( self ):
2068 self.streamlined = True 2069 self.strRepr = None 2070 return self
2071
2072 - def checkRecursion( self, parseElementList ):
2073 pass
2074
2075 - def validate( self, validateTrace=[] ):
2076 """ 2077 Check defined expressions for valid structure, check for infinite recursive definitions. 2078 """ 2079 self.checkRecursion( [] )
2080
2081 - def parseFile( self, file_or_filename, parseAll=False ):
2082 """ 2083 Execute the parse expression on the given file or filename. 2084 If a filename is specified (instead of a file object), 2085 the entire file is opened, read, and closed before parsing. 2086 """ 2087 try: 2088 file_contents = file_or_filename.read() 2089 except AttributeError: 2090 with open(file_or_filename, "r") as f: 2091 file_contents = f.read() 2092 try: 2093 return self.parseString(file_contents, parseAll) 2094 except ParseBaseException as exc: 2095 if ParserElement.verbose_stacktrace: 2096 raise 2097 else: 2098 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2099 raise exc
2100
2101 - def __eq__(self,other):
2102 if isinstance(other, ParserElement): 2103 return self is other or vars(self) == vars(other) 2104 elif isinstance(other, basestring): 2105 return self.matches(other) 2106 else: 2107 return super(ParserElement,self)==other
2108
2109 - def __ne__(self,other):
2110 return not (self == other)
2111
2112 - def __hash__(self):
2113 return hash(id(self))
2114
2115 - def __req__(self,other):
2116 return self == other
2117
2118 - def __rne__(self,other):
2119 return not (self == other)
2120
2121 - def matches(self, testString, parseAll=True):
2122 """ 2123 Method for quick testing of a parser against a test string. Good for simple 2124 inline microtests of sub expressions while building up larger parser.0 2125 2126 Parameters: 2127 - testString - to test against this expression for a match 2128 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2129 2130 Example:: 2131 expr = Word(nums) 2132 assert expr.matches("100") 2133 """ 2134 try: 2135 self.parseString(_ustr(testString), parseAll=parseAll) 2136 return True 2137 except ParseBaseException: 2138 return False
2139
2140 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2141 """ 2142 Execute the parse expression on a series of test strings, showing each 2143 test, the parsed results or where the parse failed. Quick and easy way to 2144 run a parse expression against a list of sample strings. 2145 2146 Parameters: 2147 - tests - a list of separate test strings, or a multiline string of test strings 2148 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2149 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2150 string; pass None to disable comment filtering 2151 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2152 if False, only dump nested list 2153 - printResults - (default=C{True}) prints test output to stdout 2154 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2155 2156 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2157 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2158 test's output 2159 2160 Example:: 2161 number_expr = pyparsing_common.number.copy() 2162 2163 result = number_expr.runTests(''' 2164 # unsigned integer 2165 100 2166 # negative integer 2167 -100 2168 # float with scientific notation 2169 6.02e23 2170 # integer with scientific notation 2171 1e-12 2172 ''') 2173 print("Success" if result[0] else "Failed!") 2174 2175 result = number_expr.runTests(''' 2176 # stray character 2177 100Z 2178 # missing leading digit before '.' 2179 -.100 2180 # too many '.' 2181 3.14.159 2182 ''', failureTests=True) 2183 print("Success" if result[0] else "Failed!") 2184 prints:: 2185 # unsigned integer 2186 100 2187 [100] 2188 2189 # negative integer 2190 -100 2191 [-100] 2192 2193 # float with scientific notation 2194 6.02e23 2195 [6.02e+23] 2196 2197 # integer with scientific notation 2198 1e-12 2199 [1e-12] 2200 2201 Success 2202 2203 # stray character 2204 100Z 2205 ^ 2206 FAIL: Expected end of text (at char 3), (line:1, col:4) 2207 2208 # missing leading digit before '.' 2209 -.100 2210 ^ 2211 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2212 2213 # too many '.' 2214 3.14.159 2215 ^ 2216 FAIL: Expected end of text (at char 4), (line:1, col:5) 2217 2218 Success 2219 """ 2220 if isinstance(tests, basestring): 2221 tests = list(map(str.strip, tests.rstrip().splitlines())) 2222 if isinstance(comment, basestring): 2223 comment = Literal(comment) 2224 allResults = [] 2225 comments = [] 2226 success = True 2227 for t in tests: 2228 if comment is not None and comment.matches(t, False) or comments and not t: 2229 comments.append(t) 2230 continue 2231 if not t: 2232 continue 2233 out = ['\n'.join(comments), t] 2234 comments = [] 2235 try: 2236 result = self.parseString(t, parseAll=parseAll) 2237 out.append(result.dump(full=fullDump)) 2238 success = success and not failureTests 2239 except ParseBaseException as pe: 2240 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2241 if '\n' in t: 2242 out.append(line(pe.loc, t)) 2243 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2244 else: 2245 out.append(' '*pe.loc + '^' + fatal) 2246 out.append("FAIL: " + str(pe)) 2247 success = success and failureTests 2248 result = pe 2249 2250 if printResults: 2251 if fullDump: 2252 out.append('') 2253 print('\n'.join(out)) 2254 2255 allResults.append((t, result)) 2256 2257 return success, allResults
2258
2259 2260 -class Token(ParserElement):
2261 """ 2262 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2263 """
2264 - def __init__( self ):
2265 super(Token,self).__init__( savelist=False )
2266
2267 2268 -class Empty(Token):
2269 """ 2270 An empty token, will always match. 2271 """
2272 - def __init__( self ):
2273 super(Empty,self).__init__() 2274 self.name = "Empty" 2275 self.mayReturnEmpty = True 2276 self.mayIndexError = False
2277
2278 2279 -class NoMatch(Token):
2280 """ 2281 A token that will never match. 2282 """
2283 - def __init__( self ):
2284 super(NoMatch,self).__init__() 2285 self.name = "NoMatch" 2286 self.mayReturnEmpty = True 2287 self.mayIndexError = False 2288 self.errmsg = "Unmatchable token"
2289
2290 - def parseImpl( self, instring, loc, doActions=True ):
2291 raise ParseException(instring, loc, self.errmsg, self)
2292
2293 2294 -class Literal(Token):
2295 """ 2296 Token to exactly match a specified string. 2297 2298 Example:: 2299 Literal('blah').parseString('blah') # -> ['blah'] 2300 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2301 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2302 2303 For case-insensitive matching, use L{CaselessLiteral}. 2304 2305 For keyword matching (force word break before and after the matched string), 2306 use L{Keyword} or L{CaselessKeyword}. 2307 """
2308 - def __init__( self, matchString ):
2309 super(Literal,self).__init__() 2310 self.match = matchString 2311 self.matchLen = len(matchString) 2312 try: 2313 self.firstMatchChar = matchString[0] 2314 except IndexError: 2315 warnings.warn("null string passed to Literal; use Empty() instead", 2316 SyntaxWarning, stacklevel=2) 2317 self.__class__ = Empty 2318 self.name = '"%s"' % _ustr(self.match) 2319 self.errmsg = "Expected " + self.name 2320 self.mayReturnEmpty = False 2321 self.mayIndexError = False
2322 2323 # Performance tuning: this routine gets called a *lot* 2324 # if this is a single character match string and the first character matches, 2325 # short-circuit as quickly as possible, and avoid calling startswith 2326 #~ @profile
2327 - def parseImpl( self, instring, loc, doActions=True ):
2328 if (instring[loc] == self.firstMatchChar and 2329 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2330 return loc+self.matchLen, self.match 2331 raise ParseException(instring, loc, self.errmsg, self)
2332 _L = Literal 2333 ParserElement._literalStringClass = Literal
2334 2335 -class Keyword(Token):
2336 """ 2337 Token to exactly match a specified string as a keyword, that is, it must be 2338 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2339 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2340 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2341 Accepts two optional constructor arguments in addition to the keyword string: 2342 - C{identChars} is a string of characters that would be valid identifier characters, 2343 defaulting to all alphanumerics + "_" and "$" 2344 - C{caseless} allows case-insensitive matching, default is C{False}. 2345 2346 Example:: 2347 Keyword("start").parseString("start") # -> ['start'] 2348 Keyword("start").parseString("starting") # -> Exception 2349 2350 For case-insensitive matching, use L{CaselessKeyword}. 2351 """ 2352 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2353
2354 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
2355 super(Keyword,self).__init__() 2356 self.match = matchString 2357 self.matchLen = len(matchString) 2358 try: 2359 self.firstMatchChar = matchString[0] 2360 except IndexError: 2361 warnings.warn("null string passed to Keyword; use Empty() instead", 2362 SyntaxWarning, stacklevel=2) 2363 self.name = '"%s"' % self.match 2364 self.errmsg = "Expected " + self.name 2365 self.mayReturnEmpty = False 2366 self.mayIndexError = False 2367 self.caseless = caseless 2368 if caseless: 2369 self.caselessmatch = matchString.upper() 2370 identChars = identChars.upper() 2371 self.identChars = set(identChars)
2372
2373 - def parseImpl( self, instring, loc, doActions=True ):
2374 if self.caseless: 2375 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2376 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2377 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2378 return loc+self.matchLen, self.match 2379 else: 2380 if (instring[loc] == self.firstMatchChar and 2381 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2382 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2383 (loc == 0 or instring[loc-1] not in self.identChars) ): 2384 return loc+self.matchLen, self.match 2385 raise ParseException(instring, loc, self.errmsg, self)
2386
2387 - def copy(self):
2388 c = super(Keyword,self).copy() 2389 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2390 return c
2391 2392 @staticmethod
2393 - def setDefaultKeywordChars( chars ):
2394 """Overrides the default Keyword chars 2395 """ 2396 Keyword.DEFAULT_KEYWORD_CHARS = chars
2397
2398 -class CaselessLiteral(Literal):
2399 """ 2400 Token to match a specified string, ignoring case of letters. 2401 Note: the matched results will always be in the case of the given 2402 match string, NOT the case of the input text. 2403 2404 Example:: 2405 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2406 2407 (Contrast with example for L{CaselessKeyword}.) 2408 """
2409 - def __init__( self, matchString ):
2410 super(CaselessLiteral,self).__init__( matchString.upper() ) 2411 # Preserve the defining literal. 2412 self.returnString = matchString 2413 self.name = "'%s'" % self.returnString 2414 self.errmsg = "Expected " + self.name
2415
2416 - def parseImpl( self, instring, loc, doActions=True ):
2417 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2418 return loc+self.matchLen, self.returnString 2419 raise ParseException(instring, loc, self.errmsg, self)
2420
2421 -class CaselessKeyword(Keyword):
2422 """ 2423 Caseless version of L{Keyword}. 2424 2425 Example:: 2426 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2427 2428 (Contrast with example for L{CaselessLiteral}.) 2429 """
2430 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
2431 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2432
2433 - def parseImpl( self, instring, loc, doActions=True ):
2434 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2435 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2436 return loc+self.matchLen, self.match 2437 raise ParseException(instring, loc, self.errmsg, self)
2438
2439 -class Word(Token):
2440 """ 2441 Token for matching words composed of allowed character sets. 2442 Defined with string containing all allowed initial characters, 2443 an optional string containing allowed body characters (if omitted, 2444 defaults to the initial character set), and an optional minimum, 2445 maximum, and/or exact length. The default value for C{min} is 1 (a 2446 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2447 are 0, meaning no maximum or exact length restriction. An optional 2448 C{excludeChars} parameter can list characters that might be found in 2449 the input C{bodyChars} string; useful to define a word of all printables 2450 except for one or two characters, for instance. 2451 2452 L{srange} is useful for defining custom character set strings for defining 2453 C{Word} expressions, using range notation from regular expression character sets. 2454 2455 A common mistake is to use C{Word} to match a specific literal string, as in 2456 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2457 I{sets} of matchable characters. This expression would match "Add", "AAA", 2458 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2459 To match an exact literal string, use L{Literal} or L{Keyword}. 2460 2461 pyparsing includes helper strings for building Words: 2462 - L{alphas} 2463 - L{nums} 2464 - L{alphanums} 2465 - L{hexnums} 2466 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2467 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2468 - L{printables} (any non-whitespace character) 2469 2470 Example:: 2471 # a word composed of digits 2472 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2473 2474 # a word with a leading capital, and zero or more lowercase 2475 capital_word = Word(alphas.upper(), alphas.lower()) 2476 2477 # hostnames are alphanumeric, with leading alpha, and '-' 2478 hostname = Word(alphas, alphanums+'-') 2479 2480 # roman numeral (not a strict parser, accepts invalid mix of characters) 2481 roman = Word("IVXLCDM") 2482 2483 # any string of non-whitespace characters, except for ',' 2484 csv_value = Word(printables, excludeChars=",") 2485 """
2486 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2487 super(Word,self).__init__() 2488 if excludeChars: 2489 initChars = ''.join(c for c in initChars if c not in excludeChars) 2490 if bodyChars: 2491 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2492 self.initCharsOrig = initChars 2493 self.initChars = set(initChars) 2494 if bodyChars : 2495 self.bodyCharsOrig = bodyChars 2496 self.bodyChars = set(bodyChars) 2497 else: 2498 self.bodyCharsOrig = initChars 2499 self.bodyChars = set(initChars) 2500 2501 self.maxSpecified = max > 0 2502 2503 if min < 1: 2504 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2505 2506 self.minLen = min 2507 2508 if max > 0: 2509 self.maxLen = max 2510 else: 2511 self.maxLen = _MAX_INT 2512 2513 if exact > 0: 2514 self.maxLen = exact 2515 self.minLen = exact 2516 2517 self.name = _ustr(self) 2518 self.errmsg = "Expected " + self.name 2519 self.mayIndexError = False 2520 self.asKeyword = asKeyword 2521 2522 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2523 if self.bodyCharsOrig == self.initCharsOrig: 2524 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2525 elif len(self.initCharsOrig) == 1: 2526 self.reString = "%s[%s]*" % \ 2527 (re.escape(self.initCharsOrig), 2528 _escapeRegexRangeChars(self.bodyCharsOrig),) 2529 else: 2530 self.reString = "[%s][%s]*" % \ 2531 (_escapeRegexRangeChars(self.initCharsOrig), 2532 _escapeRegexRangeChars(self.bodyCharsOrig),) 2533 if self.asKeyword: 2534 self.reString = r"\b"+self.reString+r"\b" 2535 try: 2536 self.re = re.compile( self.reString ) 2537 except: 2538 self.re = None
2539
2540 - def parseImpl( self, instring, loc, doActions=True ):
2541 if self.re: 2542 result = self.re.match(instring,loc) 2543 if not result: 2544 raise ParseException(instring, loc, self.errmsg, self) 2545 2546 loc = result.end() 2547 return loc, result.group() 2548 2549 if not(instring[ loc ] in self.initChars): 2550 raise ParseException(instring, loc, self.errmsg, self) 2551 2552 start = loc 2553 loc += 1 2554 instrlen = len(instring) 2555 bodychars = self.bodyChars 2556 maxloc = start + self.maxLen 2557 maxloc = min( maxloc, instrlen ) 2558 while loc < maxloc and instring[loc] in bodychars: 2559 loc += 1 2560 2561 throwException = False 2562 if loc - start < self.minLen: 2563 throwException = True 2564 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2565 throwException = True 2566 if self.asKeyword: 2567 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2568 throwException = True 2569 2570 if throwException: 2571 raise ParseException(instring, loc, self.errmsg, self) 2572 2573 return loc, instring[start:loc]
2574
2575 - def __str__( self ):
2576 try: 2577 return super(Word,self).__str__() 2578 except: 2579 pass 2580 2581 2582 if self.strRepr is None: 2583 2584 def charsAsStr(s): 2585 if len(s)>4: 2586 return s[:4]+"..." 2587 else: 2588 return s
2589 2590 if ( self.initCharsOrig != self.bodyCharsOrig ): 2591 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2592 else: 2593 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2594 2595 return self.strRepr
2596
2597 2598 -class Regex(Token):
2599 """ 2600 Token for matching strings that match a given regular expression. 2601 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2602 2603 Example:: 2604 realnum = Regex(r"[+-]?\d+\.\d*") 2605 ssn = Regex(r"\d\d\d-\d\d-\d\d\d\d") 2606 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2607 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2608 """ 2609 compiledREtype = type(re.compile("[A-Z]"))
2610 - def __init__( self, pattern, flags=0):
2611 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2612 super(Regex,self).__init__() 2613 2614 if isinstance(pattern, basestring): 2615 if not pattern: 2616 warnings.warn("null string passed to Regex; use Empty() instead", 2617 SyntaxWarning, stacklevel=2) 2618 2619 self.pattern = pattern 2620 self.flags = flags 2621 2622 try: 2623 self.re = re.compile(self.pattern, self.flags) 2624 self.reString = self.pattern 2625 except sre_constants.error: 2626 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2627 SyntaxWarning, stacklevel=2) 2628 raise 2629 2630 elif isinstance(pattern, Regex.compiledREtype): 2631 self.re = pattern 2632 self.pattern = \ 2633 self.reString = str(pattern) 2634 self.flags = flags 2635 2636 else: 2637 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2638 2639 self.name = _ustr(self) 2640 self.errmsg = "Expected " + self.name 2641 self.mayIndexError = False 2642 self.mayReturnEmpty = True
2643
2644 - def parseImpl( self, instring, loc, doActions=True ):
2645 result = self.re.match(instring,loc) 2646 if not result: 2647 raise ParseException(instring, loc, self.errmsg, self) 2648 2649 loc = result.end() 2650 d = result.groupdict() 2651 ret = ParseResults(result.group()) 2652 if d: 2653 for k in d: 2654 ret[k] = d[k] 2655 return loc,ret
2656
2657 - def __str__( self ):
2658 try: 2659 return super(Regex,self).__str__() 2660 except: 2661 pass 2662 2663 if self.strRepr is None: 2664 self.strRepr = "Re:(%s)" % repr(self.pattern) 2665 2666 return self.strRepr
2667
2668 2669 -class QuotedString(Token):
2670 r""" 2671 Token for matching strings that are delimited by quoting characters. 2672 2673 Defined with the following parameters: 2674 - quoteChar - string of one or more characters defining the quote delimiting string 2675 - escChar - character to escape quotes, typically backslash (default=C{None}) 2676 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2677 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2678 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2679 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2680 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2681 2682 Example:: 2683 qs = QuotedString('"') 2684 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2685 complex_qs = QuotedString('{{', endQuoteChar='}}') 2686 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2687 sql_qs = QuotedString('"', escQuote='""') 2688 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2689 prints:: 2690 [['This is the quote']] 2691 [['This is the "quote"']] 2692 [['This is the quote with "embedded" quotes']] 2693 """
2694 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2695 super(QuotedString,self).__init__() 2696 2697 # remove white space from quote chars - wont work anyway 2698 quoteChar = quoteChar.strip() 2699 if not quoteChar: 2700 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2701 raise SyntaxError() 2702 2703 if endQuoteChar is None: 2704 endQuoteChar = quoteChar 2705 else: 2706 endQuoteChar = endQuoteChar.strip() 2707 if not endQuoteChar: 2708 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2709 raise SyntaxError() 2710 2711 self.quoteChar = quoteChar 2712 self.quoteCharLen = len(quoteChar) 2713 self.firstQuoteChar = quoteChar[0] 2714 self.endQuoteChar = endQuoteChar 2715 self.endQuoteCharLen = len(endQuoteChar) 2716 self.escChar = escChar 2717 self.escQuote = escQuote 2718 self.unquoteResults = unquoteResults 2719 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2720 2721 if multiline: 2722 self.flags = re.MULTILINE | re.DOTALL 2723 self.pattern = r'%s(?:[^%s%s]' % \ 2724 ( re.escape(self.quoteChar), 2725 _escapeRegexRangeChars(self.endQuoteChar[0]), 2726 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2727 else: 2728 self.flags = 0 2729 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2730 ( re.escape(self.quoteChar), 2731 _escapeRegexRangeChars(self.endQuoteChar[0]), 2732 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2733 if len(self.endQuoteChar) > 1: 2734 self.pattern += ( 2735 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2736 _escapeRegexRangeChars(self.endQuoteChar[i])) 2737 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2738 ) 2739 if escQuote: 2740 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2741 if escChar: 2742 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2743 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2744 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2745 2746 try: 2747 self.re = re.compile(self.pattern, self.flags) 2748 self.reString = self.pattern 2749 except sre_constants.error: 2750 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2751 SyntaxWarning, stacklevel=2) 2752 raise 2753 2754 self.name = _ustr(self) 2755 self.errmsg = "Expected " + self.name 2756 self.mayIndexError = False 2757 self.mayReturnEmpty = True
2758
2759 - def parseImpl( self, instring, loc, doActions=True ):
2760 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2761 if not result: 2762 raise ParseException(instring, loc, self.errmsg, self) 2763 2764 loc = result.end() 2765 ret = result.group() 2766 2767 if self.unquoteResults: 2768 2769 # strip off quotes 2770 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2771 2772 if isinstance(ret,basestring): 2773 # replace escaped whitespace 2774 if '\\' in ret and self.convertWhitespaceEscapes: 2775 ws_map = { 2776 r'\t' : '\t', 2777 r'\n' : '\n', 2778 r'\f' : '\f', 2779 r'\r' : '\r', 2780 } 2781 for wslit,wschar in ws_map.items(): 2782 ret = ret.replace(wslit, wschar) 2783 2784 # replace escaped characters 2785 if self.escChar: 2786 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2787 2788 # replace escaped quotes 2789 if self.escQuote: 2790 ret = ret.replace(self.escQuote, self.endQuoteChar) 2791 2792 return loc, ret
2793
2794 - def __str__( self ):
2795 try: 2796 return super(QuotedString,self).__str__() 2797 except: 2798 pass 2799 2800 if self.strRepr is None: 2801 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2802 2803 return self.strRepr
2804
2805 2806 -class CharsNotIn(Token):
2807 """ 2808 Token for matching words composed of characters *not* in a given set (will 2809 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2810 Defined with string containing all disallowed characters, and an optional 2811 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2812 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2813 are 0, meaning no maximum or exact length restriction. 2814 2815 Example:: 2816 # define a comma-separated-value as anything that is not a ',' 2817 csv_value = CharsNotIn(',') 2818 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2819 prints:: 2820 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2821 """
2822 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2823 super(CharsNotIn,self).__init__() 2824 self.skipWhitespace = False 2825 self.notChars = notChars 2826 2827 if min < 1: 2828 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2829 2830 self.minLen = min 2831 2832 if max > 0: 2833 self.maxLen = max 2834 else: 2835 self.maxLen = _MAX_INT 2836 2837 if exact > 0: 2838 self.maxLen = exact 2839 self.minLen = exact 2840 2841 self.name = _ustr(self) 2842 self.errmsg = "Expected " + self.name 2843 self.mayReturnEmpty = ( self.minLen == 0 ) 2844 self.mayIndexError = False
2845
2846 - def parseImpl( self, instring, loc, doActions=True ):
2847 if instring[loc] in self.notChars: 2848 raise ParseException(instring, loc, self.errmsg, self) 2849 2850 start = loc 2851 loc += 1 2852 notchars = self.notChars 2853 maxlen = min( start+self.maxLen, len(instring) ) 2854 while loc < maxlen and \ 2855 (instring[loc] not in notchars): 2856 loc += 1 2857 2858 if loc - start < self.minLen: 2859 raise ParseException(instring, loc, self.errmsg, self) 2860 2861 return loc, instring[start:loc]
2862
2863 - def __str__( self ):
2864 try: 2865 return super(CharsNotIn, self).__str__() 2866 except: 2867 pass 2868 2869 if self.strRepr is None: 2870 if len(self.notChars) > 4: 2871 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2872 else: 2873 self.strRepr = "!W:(%s)" % self.notChars 2874 2875 return self.strRepr
2876
2877 -class White(Token):
2878 """ 2879 Special matching class for matching whitespace. Normally, whitespace is ignored 2880 by pyparsing grammars. This class is included when some whitespace structures 2881 are significant. Define with a string containing the whitespace characters to be 2882 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2883 as defined for the C{L{Word}} class. 2884 """ 2885 whiteStrs = { 2886 " " : "<SPC>", 2887 "\t": "<TAB>", 2888 "\n": "<LF>", 2889 "\r": "<CR>", 2890 "\f": "<FF>", 2891 }
2892 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2893 super(White,self).__init__() 2894 self.matchWhite = ws 2895 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2896 #~ self.leaveWhitespace() 2897 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2898 self.mayReturnEmpty = True 2899 self.errmsg = "Expected " + self.name 2900 2901 self.minLen = min 2902 2903 if max > 0: 2904 self.maxLen = max 2905 else: 2906 self.maxLen = _MAX_INT 2907 2908 if exact > 0: 2909 self.maxLen = exact 2910 self.minLen = exact
2911
2912 - def parseImpl( self, instring, loc, doActions=True ):
2913 if not(instring[ loc ] in self.matchWhite): 2914 raise ParseException(instring, loc, self.errmsg, self) 2915 start = loc 2916 loc += 1 2917 maxloc = start + self.maxLen 2918 maxloc = min( maxloc, len(instring) ) 2919 while loc < maxloc and instring[loc] in self.matchWhite: 2920 loc += 1 2921 2922 if loc - start < self.minLen: 2923 raise ParseException(instring, loc, self.errmsg, self) 2924 2925 return loc, instring[start:loc]
2926
2927 2928 -class _PositionToken(Token):
2929 - def __init__( self ):
2930 super(_PositionToken,self).__init__() 2931 self.name=self.__class__.__name__ 2932 self.mayReturnEmpty = True 2933 self.mayIndexError = False
2934
2935 -class GoToColumn(_PositionToken):
2936 """ 2937 Token to advance to a specific column of input text; useful for tabular report scraping. 2938 """
2939 - def __init__( self, colno ):
2940 super(GoToColumn,self).__init__() 2941 self.col = colno
2942
2943 - def preParse( self, instring, loc ):
2944 if col(loc,instring) != self.col: 2945 instrlen = len(instring) 2946 if self.ignoreExprs: 2947 loc = self._skipIgnorables( instring, loc ) 2948 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2949 loc += 1 2950 return loc
2951
2952 - def parseImpl( self, instring, loc, doActions=True ):
2953 thiscol = col( loc, instring ) 2954 if thiscol > self.col: 2955 raise ParseException( instring, loc, "Text not in expected column", self ) 2956 newloc = loc + self.col - thiscol 2957 ret = instring[ loc: newloc ] 2958 return newloc, ret
2959
2960 -class LineStart(_PositionToken):
2961 """ 2962 Matches if current position is at the beginning of a line within the parse string 2963 """
2964 - def __init__( self ):
2965 super(LineStart,self).__init__() 2966 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2967 self.errmsg = "Expected start of line"
2968
2969 - def preParse( self, instring, loc ):
2970 preloc = super(LineStart,self).preParse(instring,loc) 2971 if instring[preloc] == "\n": 2972 loc += 1 2973 return loc
2974
2975 - def parseImpl( self, instring, loc, doActions=True ):
2976 if not( loc==0 or 2977 (loc == self.preParse( instring, 0 )) or 2978 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2979 raise ParseException(instring, loc, self.errmsg, self) 2980 return loc, []
2981
2982 -class LineEnd(_PositionToken):
2983 """ 2984 Matches if current position is at the end of a line within the parse string 2985 """
2986 - def __init__( self ):
2987 super(LineEnd,self).__init__() 2988 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2989 self.errmsg = "Expected end of line"
2990
2991 - def parseImpl( self, instring, loc, doActions=True ):
2992 if loc<len(instring): 2993 if instring[loc] == "\n": 2994 return loc+1, "\n" 2995 else: 2996 raise ParseException(instring, loc, self.errmsg, self) 2997 elif loc == len(instring): 2998 return loc+1, [] 2999 else: 3000 raise ParseException(instring, loc, self.errmsg, self)
3001
3002 -class StringStart(_PositionToken):
3003 """ 3004 Matches if current position is at the beginning of the parse string 3005 """
3006 - def __init__( self ):
3007 super(StringStart,self).__init__() 3008 self.errmsg = "Expected start of text"
3009
3010 - def parseImpl( self, instring, loc, doActions=True ):
3011 if loc != 0: 3012 # see if entire string up to here is just whitespace and ignoreables 3013 if loc != self.preParse( instring, 0 ): 3014 raise ParseException(instring, loc, self.errmsg, self) 3015 return loc, []
3016
3017 -class StringEnd(_PositionToken):
3018 """ 3019 Matches if current position is at the end of the parse string 3020 """
3021 - def __init__( self ):
3022 super(StringEnd,self).__init__() 3023 self.errmsg = "Expected end of text"
3024
3025 - def parseImpl( self, instring, loc, doActions=True ):
3026 if loc < len(instring): 3027 raise ParseException(instring, loc, self.errmsg, self) 3028 elif loc == len(instring): 3029 return loc+1, [] 3030 elif loc > len(instring): 3031 return loc, [] 3032 else: 3033 raise ParseException(instring, loc, self.errmsg, self)
3034
3035 -class WordStart(_PositionToken):
3036 """ 3037 Matches if the current position is at the beginning of a Word, and 3038 is not preceded by any character in a given set of C{wordChars} 3039 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3040 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3041 the string being parsed, or at the beginning of a line. 3042 """
3043 - def __init__(self, wordChars = printables):
3044 super(WordStart,self).__init__() 3045 self.wordChars = set(wordChars) 3046 self.errmsg = "Not at the start of a word"
3047
3048 - def parseImpl(self, instring, loc, doActions=True ):
3049 if loc != 0: 3050 if (instring[loc-1] in self.wordChars or 3051 instring[loc] not in self.wordChars): 3052 raise ParseException(instring, loc, self.errmsg, self) 3053 return loc, []
3054
3055 -class WordEnd(_PositionToken):
3056 """ 3057 Matches if the current position is at the end of a Word, and 3058 is not followed by any character in a given set of C{wordChars} 3059 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3060 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3061 the string being parsed, or at the end of a line. 3062 """
3063 - def __init__(self, wordChars = printables):
3064 super(WordEnd,self).__init__() 3065 self.wordChars = set(wordChars) 3066 self.skipWhitespace = False 3067 self.errmsg = "Not at the end of a word"
3068
3069 - def parseImpl(self, instring, loc, doActions=True ):
3070 instrlen = len(instring) 3071 if instrlen>0 and loc<instrlen: 3072 if (instring[loc] in self.wordChars or 3073 instring[loc-1] not in self.wordChars): 3074 raise ParseException(instring, loc, self.errmsg, self) 3075 return loc, []
3076
3077 3078 -class ParseExpression(ParserElement):
3079 """ 3080 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3081 """
3082 - def __init__( self, exprs, savelist = False ):
3083 super(ParseExpression,self).__init__(savelist) 3084 if isinstance( exprs, _generatorType ): 3085 exprs = list(exprs) 3086 3087 if isinstance( exprs, basestring ): 3088 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3089 elif isinstance( exprs, collections.Sequence ): 3090 # if sequence of strings provided, wrap with Literal 3091 if all(isinstance(expr, basestring) for expr in exprs): 3092 exprs = map(ParserElement._literalStringClass, exprs) 3093 self.exprs = list(exprs) 3094 else: 3095 try: 3096 self.exprs = list( exprs ) 3097 except TypeError: 3098 self.exprs = [ exprs ] 3099 self.callPreparse = False
3100
3101 - def __getitem__( self, i ):
3102 return self.exprs[i]
3103
3104 - def append( self, other ):
3105 self.exprs.append( other ) 3106 self.strRepr = None 3107 return self
3108
3109 - def leaveWhitespace( self ):
3110 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3111 all contained expressions.""" 3112 self.skipWhitespace = False 3113 self.exprs = [ e.copy() for e in self.exprs ] 3114 for e in self.exprs: 3115 e.leaveWhitespace() 3116 return self
3117
3118 - def ignore( self, other ):
3119 if isinstance( other, Suppress ): 3120 if other not in self.ignoreExprs: 3121 super( ParseExpression, self).ignore( other ) 3122 for e in self.exprs: 3123 e.ignore( self.ignoreExprs[-1] ) 3124 else: 3125 super( ParseExpression, self).ignore( other ) 3126 for e in self.exprs: 3127 e.ignore( self.ignoreExprs[-1] ) 3128 return self
3129
3130 - def __str__( self ):
3131 try: 3132 return super(ParseExpression,self).__str__() 3133 except: 3134 pass 3135 3136 if self.strRepr is None: 3137 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3138 return self.strRepr
3139
3140 - def streamline( self ):
3141 super(ParseExpression,self).streamline() 3142 3143 for e in self.exprs: 3144 e.streamline() 3145 3146 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3147 # but only if there are no parse actions or resultsNames on the nested And's 3148 # (likewise for Or's and MatchFirst's) 3149 if ( len(self.exprs) == 2 ): 3150 other = self.exprs[0] 3151 if ( isinstance( other, self.__class__ ) and 3152 not(other.parseAction) and 3153 other.resultsName is None and 3154 not other.debug ): 3155 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3156 self.strRepr = None 3157 self.mayReturnEmpty |= other.mayReturnEmpty 3158 self.mayIndexError |= other.mayIndexError 3159 3160 other = self.exprs[-1] 3161 if ( isinstance( other, self.__class__ ) and 3162 not(other.parseAction) and 3163 other.resultsName is None and 3164 not other.debug ): 3165 self.exprs = self.exprs[:-1] + other.exprs[:] 3166 self.strRepr = None 3167 self.mayReturnEmpty |= other.mayReturnEmpty 3168 self.mayIndexError |= other.mayIndexError 3169 3170 self.errmsg = "Expected " + _ustr(self) 3171 3172 return self
3173
3174 - def setResultsName( self, name, listAllMatches=False ):
3175 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3176 return ret
3177
3178 - def validate( self, validateTrace=[] ):
3179 tmp = validateTrace[:]+[self] 3180 for e in self.exprs: 3181 e.validate(tmp) 3182 self.checkRecursion( [] )
3183
3184 - def copy(self):
3185 ret = super(ParseExpression,self).copy() 3186 ret.exprs = [e.copy() for e in self.exprs] 3187 return ret
3188
3189 -class And(ParseExpression):
3190 """ 3191 Requires all given C{ParseExpression}s to be found in the given order. 3192 Expressions may be separated by whitespace. 3193 May be constructed using the C{'+'} operator. 3194 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3195 3196 Example:: 3197 integer = Word(nums) 3198 name_expr = OneOrMore(Word(alphas)) 3199 3200 expr = And([integer("id"),name_expr("name"),integer("age")]) 3201 # more easily written as: 3202 expr = integer("id") + name_expr("name") + integer("age") 3203 """ 3204
3205 - class _ErrorStop(Empty):
3206 - def __init__(self, *args, **kwargs):
3207 super(And._ErrorStop,self).__init__(*args, **kwargs) 3208 self.name = '-' 3209 self.leaveWhitespace()
3210
3211 - def __init__( self, exprs, savelist = True ):
3212 super(And,self).__init__(exprs, savelist) 3213 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3214 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3215 self.skipWhitespace = self.exprs[0].skipWhitespace 3216 self.callPreparse = True
3217
3218 - def parseImpl( self, instring, loc, doActions=True ):
3219 # pass False as last arg to _parse for first element, since we already 3220 # pre-parsed the string as part of our And pre-parsing 3221 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3222 errorStop = False 3223 for e in self.exprs[1:]: 3224 if isinstance(e, And._ErrorStop): 3225 errorStop = True 3226 continue 3227 if errorStop: 3228 try: 3229 loc, exprtokens = e._parse( instring, loc, doActions ) 3230 except ParseSyntaxException: 3231 raise 3232 except ParseBaseException as pe: 3233 pe.__traceback__ = None 3234 raise ParseSyntaxException(pe) 3235 except IndexError: 3236 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 3237 else: 3238 loc, exprtokens = e._parse( instring, loc, doActions ) 3239 if exprtokens or exprtokens.haskeys(): 3240 resultlist += exprtokens 3241 return loc, resultlist
3242
3243 - def __iadd__(self, other ):
3244 if isinstance( other, basestring ): 3245 other = ParserElement._literalStringClass( other ) 3246 return self.append( other ) #And( [ self, other ] )
3247
3248 - def checkRecursion( self, parseElementList ):
3249 subRecCheckList = parseElementList[:] + [ self ] 3250 for e in self.exprs: 3251 e.checkRecursion( subRecCheckList ) 3252 if not e.mayReturnEmpty: 3253 break
3254
3255 - def __str__( self ):
3256 if hasattr(self,"name"): 3257 return self.name 3258 3259 if self.strRepr is None: 3260 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3261 3262 return self.strRepr
3263
3264 3265 -class Or(ParseExpression):
3266 """ 3267 Requires that at least one C{ParseExpression} is found. 3268 If two expressions match, the expression that matches the longest string will be used. 3269 May be constructed using the C{'^'} operator. 3270 3271 Example:: 3272 # construct Or using '^' operator 3273 3274 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3275 print(number.searchString("123 3.1416 789")) 3276 prints:: 3277 [['123'], ['3.1416'], ['789']] 3278 """
3279 - def __init__( self, exprs, savelist = False ):
3280 super(Or,self).__init__(exprs, savelist) 3281 if self.exprs: 3282 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3283 else: 3284 self.mayReturnEmpty = True
3285
3286 - def parseImpl( self, instring, loc, doActions=True ):
3287 maxExcLoc = -1 3288 maxException = None 3289 matches = [] 3290 for e in self.exprs: 3291 try: 3292 loc2 = e.tryParse( instring, loc ) 3293 except ParseException as err: 3294 err.__traceback__ = None 3295 if err.loc > maxExcLoc: 3296 maxException = err 3297 maxExcLoc = err.loc 3298 except IndexError: 3299 if len(instring) > maxExcLoc: 3300 maxException = ParseException(instring,len(instring),e.errmsg,self) 3301 maxExcLoc = len(instring) 3302 else: 3303 # save match among all matches, to retry longest to shortest 3304 matches.append((loc2, e)) 3305 3306 if matches: 3307 matches.sort(key=lambda x: -x[0]) 3308 for _,e in matches: 3309 try: 3310 return e._parse( instring, loc, doActions ) 3311 except ParseException as err: 3312 err.__traceback__ = None 3313 if err.loc > maxExcLoc: 3314 maxException = err 3315 maxExcLoc = err.loc 3316 3317 if maxException is not None: 3318 maxException.msg = self.errmsg 3319 raise maxException 3320 else: 3321 raise ParseException(instring, loc, "no defined alternatives to match", self)
3322 3323
3324 - def __ixor__(self, other ):
3325 if isinstance( other, basestring ): 3326 other = ParserElement._literalStringClass( other ) 3327 return self.append( other ) #Or( [ self, other ] )
3328
3329 - def __str__( self ):
3330 if hasattr(self,"name"): 3331 return self.name 3332 3333 if self.strRepr is None: 3334 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3335 3336 return self.strRepr
3337
3338 - def checkRecursion( self, parseElementList ):
3339 subRecCheckList = parseElementList[:] + [ self ] 3340 for e in self.exprs: 3341 e.checkRecursion( subRecCheckList )
3342
3343 3344 -class MatchFirst(ParseExpression):
3345 """ 3346 Requires that at least one C{ParseExpression} is found. 3347 If two expressions match, the first one listed is the one that will match. 3348 May be constructed using the C{'|'} operator. 3349 3350 Example:: 3351 # construct MatchFirst using '|' operator 3352 3353 # watch the order of expressions to match 3354 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3355 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3356 3357 # put more selective expression first 3358 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3359 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3360 """
3361 - def __init__( self, exprs, savelist = False ):
3362 super(MatchFirst,self).__init__(exprs, savelist) 3363 if self.exprs: 3364 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3365 else: 3366 self.mayReturnEmpty = True
3367
3368 - def parseImpl( self, instring, loc, doActions=True ):
3369 maxExcLoc = -1 3370 maxException = None 3371 for e in self.exprs: 3372 try: 3373 ret = e._parse( instring, loc, doActions ) 3374 return ret 3375 except ParseException as err: 3376 if err.loc > maxExcLoc: 3377 maxException = err 3378 maxExcLoc = err.loc 3379 except IndexError: 3380 if len(instring) > maxExcLoc: 3381 maxException = ParseException(instring,len(instring),e.errmsg,self) 3382 maxExcLoc = len(instring) 3383 3384 # only got here if no expression matched, raise exception for match that made it the furthest 3385 else: 3386 if maxException is not None: 3387 maxException.msg = self.errmsg 3388 raise maxException 3389 else: 3390 raise ParseException(instring, loc, "no defined alternatives to match", self)
3391
3392 - def __ior__(self, other ):
3393 if isinstance( other, basestring ): 3394 other = ParserElement._literalStringClass( other ) 3395 return self.append( other ) #MatchFirst( [ self, other ] )
3396
3397 - def __str__( self ):
3398 if hasattr(self,"name"): 3399 return self.name 3400 3401 if self.strRepr is None: 3402 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3403 3404 return self.strRepr
3405
3406 - def checkRecursion( self, parseElementList ):
3407 subRecCheckList = parseElementList[:] + [ self ] 3408 for e in self.exprs: 3409 e.checkRecursion( subRecCheckList )
3410
3411 3412 -class Each(ParseExpression):
3413 """ 3414 Requires all given C{ParseExpression}s to be found, but in any order. 3415 Expressions may be separated by whitespace. 3416 May be constructed using the C{'&'} operator. 3417 3418 Example:: 3419 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3420 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3421 integer = Word(nums) 3422 shape_attr = "shape:" + shape_type("shape") 3423 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3424 color_attr = "color:" + color("color") 3425 size_attr = "size:" + integer("size") 3426 3427 # use Each (using operator '&') to accept attributes in any order 3428 # (shape and posn are required, color and size are optional) 3429 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3430 3431 shape_spec.runTests(''' 3432 shape: SQUARE color: BLACK posn: 100, 120 3433 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3434 color:GREEN size:20 shape:TRIANGLE posn:20,40 3435 ''' 3436 ) 3437 prints:: 3438 shape: SQUARE color: BLACK posn: 100, 120 3439 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3440 - color: BLACK 3441 - posn: ['100', ',', '120'] 3442 - x: 100 3443 - y: 120 3444 - shape: SQUARE 3445 3446 3447 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3448 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3449 - color: BLUE 3450 - posn: ['50', ',', '80'] 3451 - x: 50 3452 - y: 80 3453 - shape: CIRCLE 3454 - size: 50 3455 3456 3457 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3458 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3459 - color: GREEN 3460 - posn: ['20', ',', '40'] 3461 - x: 20 3462 - y: 40 3463 - shape: TRIANGLE 3464 - size: 20 3465 """
3466 - def __init__( self, exprs, savelist = True ):
3467 super(Each,self).__init__(exprs, savelist) 3468 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3469 self.skipWhitespace = True 3470 self.initExprGroups = True
3471
3472 - def parseImpl( self, instring, loc, doActions=True ):
3473 if self.initExprGroups: 3474 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3475 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3476 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3477 self.optionals = opt1 + opt2 3478 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3479 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3480 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3481 self.required += self.multirequired 3482 self.initExprGroups = False 3483 tmpLoc = loc 3484 tmpReqd = self.required[:] 3485 tmpOpt = self.optionals[:] 3486 matchOrder = [] 3487 3488 keepMatching = True 3489 while keepMatching: 3490 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3491 failed = [] 3492 for e in tmpExprs: 3493 try: 3494 tmpLoc = e.tryParse( instring, tmpLoc ) 3495 except ParseException: 3496 failed.append(e) 3497 else: 3498 matchOrder.append(self.opt1map.get(id(e),e)) 3499 if e in tmpReqd: 3500 tmpReqd.remove(e) 3501 elif e in tmpOpt: 3502 tmpOpt.remove(e) 3503 if len(failed) == len(tmpExprs): 3504 keepMatching = False 3505 3506 if tmpReqd: 3507 missing = ", ".join(_ustr(e) for e in tmpReqd) 3508 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3509 3510 # add any unmatched Optionals, in case they have default values defined 3511 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3512 3513 resultlist = [] 3514 for e in matchOrder: 3515 loc,results = e._parse(instring,loc,doActions) 3516 resultlist.append(results) 3517 3518 finalResults = ParseResults() 3519 for r in resultlist: 3520 dups = {} 3521 for k in r.keys(): 3522 if k in finalResults: 3523 tmp = ParseResults(finalResults[k]) 3524 tmp += ParseResults(r[k]) 3525 dups[k] = tmp 3526 finalResults += ParseResults(r) 3527 for k,v in dups.items(): 3528 finalResults[k] = v 3529 return loc, finalResults
3530
3531 - def __str__( self ):
3532 if hasattr(self,"name"): 3533 return self.name 3534 3535 if self.strRepr is None: 3536 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3537 3538 return self.strRepr
3539
3540 - def checkRecursion( self, parseElementList ):
3541 subRecCheckList = parseElementList[:] + [ self ] 3542 for e in self.exprs: 3543 e.checkRecursion( subRecCheckList )
3544
3545 3546 -class ParseElementEnhance(ParserElement):
3547 """ 3548 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3549 """
3550 - def __init__( self, expr, savelist=False ):
3551 super(ParseElementEnhance,self).__init__(savelist) 3552 if isinstance( expr, basestring ): 3553 expr = ParserElement._literalStringClass(expr) 3554 self.expr = expr 3555 self.strRepr = None 3556 if expr is not None: 3557 self.mayIndexError = expr.mayIndexError 3558 self.mayReturnEmpty = expr.mayReturnEmpty 3559 self.setWhitespaceChars( expr.whiteChars ) 3560 self.skipWhitespace = expr.skipWhitespace 3561 self.saveAsList = expr.saveAsList 3562 self.callPreparse = expr.callPreparse 3563 self.ignoreExprs.extend(expr.ignoreExprs)
3564
3565 - def parseImpl( self, instring, loc, doActions=True ):
3566 if self.expr is not None: 3567 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3568 else: 3569 raise ParseException("",loc,self.errmsg,self)
3570
3571 - def leaveWhitespace( self ):
3572 self.skipWhitespace = False 3573 self.expr = self.expr.copy() 3574 if self.expr is not None: 3575 self.expr.leaveWhitespace() 3576 return self
3577
3578 - def ignore( self, other ):
3579 if isinstance( other, Suppress ): 3580 if other not in self.ignoreExprs: 3581 super( ParseElementEnhance, self).ignore( other ) 3582 if self.expr is not None: 3583 self.expr.ignore( self.ignoreExprs[-1] ) 3584 else: 3585 super( ParseElementEnhance, self).ignore( other ) 3586 if self.expr is not None: 3587 self.expr.ignore( self.ignoreExprs[-1] ) 3588 return self
3589
3590 - def streamline( self ):
3591 super(ParseElementEnhance,self).streamline() 3592 if self.expr is not None: 3593 self.expr.streamline() 3594 return self
3595
3596 - def checkRecursion( self, parseElementList ):
3597 if self in parseElementList: 3598 raise RecursiveGrammarException( parseElementList+[self] ) 3599 subRecCheckList = parseElementList[:] + [ self ] 3600 if self.expr is not None: 3601 self.expr.checkRecursion( subRecCheckList )
3602
3603 - def validate( self, validateTrace=[] ):
3604 tmp = validateTrace[:]+[self] 3605 if self.expr is not None: 3606 self.expr.validate(tmp) 3607 self.checkRecursion( [] )
3608
3609 - def __str__( self ):
3610 try: 3611 return super(ParseElementEnhance,self).__str__() 3612 except: 3613 pass 3614 3615 if self.strRepr is None and self.expr is not None: 3616 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3617 return self.strRepr
3618
3619 3620 -class FollowedBy(ParseElementEnhance):
3621 """ 3622 Lookahead matching of the given parse expression. C{FollowedBy} 3623 does *not* advance the parsing position within the input string, it only 3624 verifies that the specified parse expression matches at the current 3625 position. C{FollowedBy} always returns a null token list. 3626 3627 Example:: 3628 # use FollowedBy to match a label only if it is followed by a ':' 3629 data_word = Word(alphas) 3630 label = data_word + FollowedBy(':') 3631 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3632 3633 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3634 prints:: 3635 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3636 """
3637 - def __init__( self, expr ):
3638 super(FollowedBy,self).__init__(expr) 3639 self.mayReturnEmpty = True
3640
3641 - def parseImpl( self, instring, loc, doActions=True ):
3642 self.expr.tryParse( instring, loc ) 3643 return loc, []
3644
3645 3646 -class NotAny(ParseElementEnhance):
3647 """ 3648 Lookahead to disallow matching with the given parse expression. C{NotAny} 3649 does *not* advance the parsing position within the input string, it only 3650 verifies that the specified parse expression does *not* match at the current 3651 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 3652 always returns a null token list. May be constructed using the '~' operator. 3653 3654 Example:: 3655 3656 """
3657 - def __init__( self, expr ):
3658 super(NotAny,self).__init__(expr) 3659 #~ self.leaveWhitespace() 3660 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3661 self.mayReturnEmpty = True 3662 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3663
3664 - def parseImpl( self, instring, loc, doActions=True ):
3665 if self.expr.canParseNext(instring, loc): 3666 raise ParseException(instring, loc, self.errmsg, self) 3667 return loc, []
3668
3669 - def __str__( self ):
3670 if hasattr(self,"name"): 3671 return self.name 3672 3673 if self.strRepr is None: 3674 self.strRepr = "~{" + _ustr(self.expr) + "}" 3675 3676 return self.strRepr
3677
3678 3679 -class OneOrMore(ParseElementEnhance):
3680 """ 3681 Repetition of one or more of the given expression. 3682 3683 Parameters: 3684 - expr - expression that must match one or more times 3685 - stopOn - (default=C{None}) - expression for a terminating sentinel 3686 (only required if the sentinel would ordinarily match the repetition 3687 expression) 3688 3689 Example:: 3690 data_word = Word(alphas) 3691 label = data_word + FollowedBy(':') 3692 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3693 3694 text = "shape: SQUARE posn: upper left color: BLACK" 3695 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3696 3697 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3698 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3699 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3700 3701 # could also be written as 3702 (attr_expr * (1,)).parseString(text).pprint() 3703 """
3704 - def __init__( self, expr, stopOn=None):
3705 super(OneOrMore, self).__init__(expr) 3706 ender = stopOn 3707 if isinstance(ender, basestring): 3708 ender = ParserElement._literalStringClass(ender) 3709 self.not_ender = ~ender if ender is not None else None
3710
3711 - def parseImpl( self, instring, loc, doActions=True ):
3712 self_expr_parse = self.expr._parse 3713 self_skip_ignorables = self._skipIgnorables 3714 check_ender = self.not_ender is not None 3715 if check_ender: 3716 try_not_ender = self.not_ender.tryParse 3717 3718 # must be at least one (but first see if we are the stopOn sentinel; 3719 # if so, fail) 3720 if check_ender: 3721 try_not_ender(instring, loc) 3722 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3723 try: 3724 hasIgnoreExprs = (not not self.ignoreExprs) 3725 while 1: 3726 if check_ender: 3727 try_not_ender(instring, loc) 3728 if hasIgnoreExprs: 3729 preloc = self_skip_ignorables( instring, loc ) 3730 else: 3731 preloc = loc 3732 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3733 if tmptokens or tmptokens.haskeys(): 3734 tokens += tmptokens 3735 except (ParseException,IndexError): 3736 pass 3737 3738 return loc, tokens
3739
3740 - def __str__( self ):
3741 if hasattr(self,"name"): 3742 return self.name 3743 3744 if self.strRepr is None: 3745 self.strRepr = "{" + _ustr(self.expr) + "}..." 3746 3747 return self.strRepr
3748
3749 - def setResultsName( self, name, listAllMatches=False ):
3750 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 3751 ret.saveAsList = True 3752 return ret
3753
3754 -class ZeroOrMore(OneOrMore):
3755 """ 3756 Optional repetition of zero or more of the given expression. 3757 3758 Parameters: 3759 - expr - expression that must match zero or more times 3760 - stopOn - (default=C{None}) - expression for a terminating sentinel 3761 (only required if the sentinel would ordinarily match the repetition 3762 expression) 3763 3764 Example: similar to L{OneOrMore} 3765 """
3766 - def __init__( self, expr, stopOn=None):
3767 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3768 self.mayReturnEmpty = True
3769
3770 - def parseImpl( self, instring, loc, doActions=True ):
3771 try: 3772 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3773 except (ParseException,IndexError): 3774 return loc, []
3775
3776 - def __str__( self ):
3777 if hasattr(self,"name"): 3778 return self.name 3779 3780 if self.strRepr is None: 3781 self.strRepr = "[" + _ustr(self.expr) + "]..." 3782 3783 return self.strRepr
3784
3785 -class _NullToken(object):
3786 - def __bool__(self):
3787 return False
3788 __nonzero__ = __bool__
3789 - def __str__(self):
3790 return ""
3791 3792 _optionalNotMatched = _NullToken()
3793 -class Optional(ParseElementEnhance):
3794 """ 3795 Optional matching of the given expression. 3796 3797 Parameters: 3798 - expr - expression that must match zero or more times 3799 - default (optional) - value to be returned if the optional expression is not found. 3800 3801 Example:: 3802 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3803 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3804 zip.runTests(''' 3805 # traditional ZIP code 3806 12345 3807 3808 # ZIP+4 form 3809 12101-0001 3810 3811 # invalid ZIP 3812 98765- 3813 ''') 3814 prints:: 3815 # traditional ZIP code 3816 12345 3817 ['12345'] 3818 3819 # ZIP+4 form 3820 12101-0001 3821 ['12101-0001'] 3822 3823 # invalid ZIP 3824 98765- 3825 ^ 3826 FAIL: Expected end of text (at char 5), (line:1, col:6) 3827 """
3828 - def __init__( self, expr, default=_optionalNotMatched ):
3829 super(Optional,self).__init__( expr, savelist=False ) 3830 self.defaultValue = default 3831 self.mayReturnEmpty = True
3832
3833 - def parseImpl( self, instring, loc, doActions=True ):
3834 try: 3835 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3836 except (ParseException,IndexError): 3837 if self.defaultValue is not _optionalNotMatched: 3838 if self.expr.resultsName: 3839 tokens = ParseResults([ self.defaultValue ]) 3840 tokens[self.expr.resultsName] = self.defaultValue 3841 else: 3842 tokens = [ self.defaultValue ] 3843 else: 3844 tokens = [] 3845 return loc, tokens
3846
3847 - def __str__( self ):
3848 if hasattr(self,"name"): 3849 return self.name 3850 3851 if self.strRepr is None: 3852 self.strRepr = "[" + _ustr(self.expr) + "]" 3853 3854 return self.strRepr
3855
3856 -class SkipTo(ParseElementEnhance):
3857 """ 3858 Token for skipping over all undefined text until the matched expression is found. 3859 3860 Parameters: 3861 - expr - target expression marking the end of the data to be skipped 3862 - include - (default=C{False}) if True, the target expression is also parsed 3863 (the skipped text and target expression are returned as a 2-element list). 3864 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 3865 comments) that might contain false matches to the target expression 3866 - failOn - (default=C{None}) define expressions that are not allowed to be 3867 included in the skipped test; if found before the target expression is found, 3868 the SkipTo is not a match 3869 3870 Example:: 3871 report = ''' 3872 Outstanding Issues Report - 1 Jan 2000 3873 3874 # | Severity | Description | Days Open 3875 -----+----------+-------------------------------------------+----------- 3876 101 | Critical | Intermittent system crash | 6 3877 94 | Cosmetic | Spelling error on Login ('log|n') | 14 3878 79 | Minor | System slow when running too many reports | 47 3879 ''' 3880 integer = Word(nums) 3881 SEP = Suppress('|') 3882 # use SkipTo to simply match everything up until the next SEP 3883 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 3884 # - parse action will call token.strip() for each matched token, i.e., the description body 3885 string_data = SkipTo(SEP, ignore=quotedString) 3886 string_data.setParseAction(tokenMap(str.strip)) 3887 ticket_expr = (integer("issue_num") + SEP 3888 + string_data("sev") + SEP 3889 + string_data("desc") + SEP 3890 + integer("days_open")) 3891 3892 for tkt in ticket_expr.searchString(report): 3893 print tkt.dump() 3894 prints:: 3895 ['101', 'Critical', 'Intermittent system crash', '6'] 3896 - days_open: 6 3897 - desc: Intermittent system crash 3898 - issue_num: 101 3899 - sev: Critical 3900 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 3901 - days_open: 14 3902 - desc: Spelling error on Login ('log|n') 3903 - issue_num: 94 3904 - sev: Cosmetic 3905 ['79', 'Minor', 'System slow when running too many reports', '47'] 3906 - days_open: 47 3907 - desc: System slow when running too many reports 3908 - issue_num: 79 3909 - sev: Minor 3910 """
3911 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3912 super( SkipTo, self ).__init__( other ) 3913 self.ignoreExpr = ignore 3914 self.mayReturnEmpty = True 3915 self.mayIndexError = False 3916 self.includeMatch = include 3917 self.asList = False 3918 if isinstance(failOn, basestring): 3919 self.failOn = ParserElement._literalStringClass(failOn) 3920 else: 3921 self.failOn = failOn 3922 self.errmsg = "No match found for "+_ustr(self.expr)
3923
3924 - def parseImpl( self, instring, loc, doActions=True ):
3925 startloc = loc 3926 instrlen = len(instring) 3927 expr = self.expr 3928 expr_parse = self.expr._parse 3929 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 3930 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 3931 3932 tmploc = loc 3933 while tmploc <= instrlen: 3934 if self_failOn_canParseNext is not None: 3935 # break if failOn expression matches 3936 if self_failOn_canParseNext(instring, tmploc): 3937 break 3938 3939 if self_ignoreExpr_tryParse is not None: 3940 # advance past ignore expressions 3941 while 1: 3942 try: 3943 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3944 except ParseBaseException: 3945 break 3946 3947 try: 3948 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3949 except (ParseException, IndexError): 3950 # no match, advance loc in string 3951 tmploc += 1 3952 else: 3953 # matched skipto expr, done 3954 break 3955 3956 else: 3957 # ran off the end of the input string without matching skipto expr, fail 3958 raise ParseException(instring, loc, self.errmsg, self) 3959 3960 # build up return values 3961 loc = tmploc 3962 skiptext = instring[startloc:loc] 3963 skipresult = ParseResults(skiptext) 3964 3965 if self.includeMatch: 3966 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3967 skipresult += mat 3968 3969 return loc, skipresult
3970
3971 -class Forward(ParseElementEnhance):
3972 """ 3973 Forward declaration of an expression to be defined later - 3974 used for recursive grammars, such as algebraic infix notation. 3975 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3976 3977 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3978 Specifically, '|' has a lower precedence than '<<', so that:: 3979 fwdExpr << a | b | c 3980 will actually be evaluated as:: 3981 (fwdExpr << a) | b | c 3982 thereby leaving b and c out as parseable alternatives. It is recommended that you 3983 explicitly group the values inserted into the C{Forward}:: 3984 fwdExpr << (a | b | c) 3985 Converting to use the '<<=' operator instead will avoid this problem. 3986 3987 See L{ParseResults.pprint} for an example of a recursive parser created using 3988 C{Forward}. 3989 """
3990 - def __init__( self, other=None ):
3991 super(Forward,self).__init__( other, savelist=False )
3992
3993 - def __lshift__( self, other ):
3994 if isinstance( other, basestring ): 3995 other = ParserElement._literalStringClass(other) 3996 self.expr = other 3997 self.strRepr = None 3998 self.mayIndexError = self.expr.mayIndexError 3999 self.mayReturnEmpty = self.expr.mayReturnEmpty 4000 self.setWhitespaceChars( self.expr.whiteChars ) 4001 self.skipWhitespace = self.expr.skipWhitespace 4002 self.saveAsList = self.expr.saveAsList 4003 self.ignoreExprs.extend(self.expr.ignoreExprs) 4004 return self
4005
4006 - def __ilshift__(self, other):
4007 return self << other
4008
4009 - def leaveWhitespace( self ):
4010 self.skipWhitespace = False 4011 return self
4012
4013 - def streamline( self ):
4014 if not self.streamlined: 4015 self.streamlined = True 4016 if self.expr is not None: 4017 self.expr.streamline() 4018 return self
4019
4020 - def validate( self, validateTrace=[] ):
4021 if self not in validateTrace: 4022 tmp = validateTrace[:]+[self] 4023 if self.expr is not None: 4024 self.expr.validate(tmp) 4025 self.checkRecursion([])
4026
4027 - def __str__( self ):
4028 if hasattr(self,"name"): 4029 return self.name 4030 return self.__class__.__name__ + ": ..." 4031 4032 # stubbed out for now - creates awful memory and perf issues 4033 self._revertClass = self.__class__ 4034 self.__class__ = _ForwardNoRecurse 4035 try: 4036 if self.expr is not None: 4037 retString = _ustr(self.expr) 4038 else: 4039 retString = "None" 4040 finally: 4041 self.__class__ = self._revertClass 4042 return self.__class__.__name__ + ": " + retString
4043
4044 - def copy(self):
4045 if self.expr is not None: 4046 return super(Forward,self).copy() 4047 else: 4048 ret = Forward() 4049 ret <<= self 4050 return ret
4051
4052 -class _ForwardNoRecurse(Forward):
4053 - def __str__( self ):
4054 return "..."
4055
4056 -class TokenConverter(ParseElementEnhance):
4057 """ 4058 Abstract subclass of C{ParseExpression}, for converting parsed results. 4059 """
4060 - def __init__( self, expr, savelist=False ):
4061 super(TokenConverter,self).__init__( expr )#, savelist ) 4062 self.saveAsList = False
4063
4064 -class Combine(TokenConverter):
4065 """ 4066 Converter to concatenate all matching tokens to a single string. 4067 By default, the matching patterns must also be contiguous in the input string; 4068 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4069 4070 Example:: 4071 real = Word(nums) + '.' + Word(nums) 4072 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4073 # will also erroneously match the following 4074 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4075 4076 real = Combine(Word(nums) + '.' + Word(nums)) 4077 print(real.parseString('3.1416')) # -> ['3.1416'] 4078 # no match when there are internal spaces 4079 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4080 """
4081 - def __init__( self, expr, joinString="", adjacent=True ):
4082 super(Combine,self).__init__( expr ) 4083 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4084 if adjacent: 4085 self.leaveWhitespace() 4086 self.adjacent = adjacent 4087 self.skipWhitespace = True 4088 self.joinString = joinString 4089 self.callPreparse = True
4090
4091 - def ignore( self, other ):
4092 if self.adjacent: 4093 ParserElement.ignore(self, other) 4094 else: 4095 super( Combine, self).ignore( other ) 4096 return self
4097
4098 - def postParse( self, instring, loc, tokenlist ):
4099 retToks = tokenlist.copy() 4100 del retToks[:] 4101 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4102 4103 if self.resultsName and retToks.haskeys(): 4104 return [ retToks ] 4105 else: 4106 return retToks
4107
4108 -class Group(TokenConverter):
4109 """ 4110 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4111 4112 Example:: 4113 ident = Word(alphas) 4114 num = Word(nums) 4115 term = ident | num 4116 func = ident + Optional(delimitedList(term)) 4117 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4118 4119 func = ident + Group(Optional(delimitedList(term))) 4120 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4121 """
4122 - def __init__( self, expr ):
4123 super(Group,self).__init__( expr ) 4124 self.saveAsList = True
4125
4126 - def postParse( self, instring, loc, tokenlist ):
4127 return [ tokenlist ]
4128
4129 -class Dict(TokenConverter):
4130 """ 4131 Converter to return a repetitive expression as a list, but also as a dictionary. 4132 Each element can also be referenced using the first token in the expression as its key. 4133 Useful for tabular report scraping when the first column can be used as a item key. 4134 4135 Example:: 4136 data_word = Word(alphas) 4137 label = data_word + FollowedBy(':') 4138 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4139 4140 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4141 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4142 4143 # print attributes as plain groups 4144 print(OneOrMore(attr_expr).parseString(text).dump()) 4145 4146 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4147 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4148 print(result.dump()) 4149 4150 # access named fields as dict entries, or output as dict 4151 print(result['shape']) 4152 print(result.asDict()) 4153 prints:: 4154 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4155 4156 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4157 - color: light blue 4158 - posn: upper left 4159 - shape: SQUARE 4160 - texture: burlap 4161 SQUARE 4162 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4163 See more examples at L{ParseResults} of accessing fields by results name. 4164 """
4165 - def __init__( self, expr ):
4166 super(Dict,self).__init__( expr ) 4167 self.saveAsList = True
4168
4169 - def postParse( self, instring, loc, tokenlist ):
4170 for i,tok in enumerate(tokenlist): 4171 if len(tok) == 0: 4172 continue 4173 ikey = tok[0] 4174 if isinstance(ikey,int): 4175 ikey = _ustr(tok[0]).strip() 4176 if len(tok)==1: 4177 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4178 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4179 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4180 else: 4181 dictvalue = tok.copy() #ParseResults(i) 4182 del dictvalue[0] 4183 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4184 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4185 else: 4186 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4187 4188 if self.resultsName: 4189 return [ tokenlist ] 4190 else: 4191 return tokenlist
4192
4193 4194 -class Suppress(TokenConverter):
4195 """ 4196 Converter for ignoring the results of a parsed expression. 4197 4198 Example:: 4199 source = "a, b, c,d" 4200 wd = Word(alphas) 4201 wd_list1 = wd + ZeroOrMore(',' + wd) 4202 print(wd_list1.parseString(source)) 4203 4204 # often, delimiters that are useful during parsing are just in the 4205 # way afterward - use Suppress to keep them out of the parsed output 4206 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4207 print(wd_list2.parseString(source)) 4208 prints:: 4209 ['a', ',', 'b', ',', 'c', ',', 'd'] 4210 ['a', 'b', 'c', 'd'] 4211 (See also L{delimitedList}.) 4212 """
4213 - def postParse( self, instring, loc, tokenlist ):
4214 return []
4215
4216 - def suppress( self ):
4217 return self
4218
4219 4220 -class OnlyOnce(object):
4221 """ 4222 Wrapper for parse actions, to ensure they are only called once. 4223 """
4224 - def __init__(self, methodCall):
4225 self.callable = _trim_arity(methodCall) 4226 self.called = False
4227 - def __call__(self,s,l,t):
4228 if not self.called: 4229 results = self.callable(s,l,t) 4230 self.called = True 4231 return results 4232 raise ParseException(s,l,"")
4233 - def reset(self):
4234 self.called = False
4235
4236 -def traceParseAction(f):
4237 """ 4238 Decorator for debugging parse actions. 4239 4240 Example:: 4241 wd = Word(alphas) 4242 4243 @traceParseAction 4244 def remove_duplicate_chars(tokens): 4245 return ''.join(sorted(set(''.join(tokens))) 4246 4247 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4248 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4249 prints:: 4250 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4251 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4252 ['dfjkls'] 4253 """ 4254 f = _trim_arity(f) 4255 def z(*paArgs): 4256 thisFunc = f.__name__ 4257 s,l,t = paArgs[-3:] 4258 if len(paArgs)>3: 4259 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4260 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4261 try: 4262 ret = f(*paArgs) 4263 except Exception as exc: 4264 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4265 raise 4266 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4267 return ret
4268 try: 4269 z.__name__ = f.__name__ 4270 except AttributeError: 4271 pass 4272 return z 4273
4274 # 4275 # global helpers 4276 # 4277 -def delimitedList( expr, delim=",", combine=False ):
4278 """ 4279 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4280 By default, the list elements and delimiters can have intervening whitespace, and 4281 comments, but this can be overridden by passing C{combine=True} in the constructor. 4282 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4283 string, with the delimiters included; otherwise, the matching tokens are returned 4284 as a list of tokens, with the delimiters suppressed. 4285 4286 Example:: 4287 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4288 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4289 """ 4290 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4291 if combine: 4292 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4293 else: 4294 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4295
4296 -def countedArray( expr, intExpr=None ):
4297 """ 4298 Helper to define a counted list of expressions. 4299 This helper defines a pattern of the form:: 4300 integer expr expr expr... 4301 where the leading integer tells how many expr expressions follow. 4302 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4303 4304 Example:: 4305 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4306 """ 4307 arrayExpr = Forward() 4308 def countFieldParseAction(s,l,t): 4309 n = t[0] 4310 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4311 return []
4312 if intExpr is None: 4313 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4314 else: 4315 intExpr = intExpr.copy() 4316 intExpr.setName("arrayLen") 4317 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4318 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4319
4320 -def _flatten(L):
4321 ret = [] 4322 for i in L: 4323 if isinstance(i,list): 4324 ret.extend(_flatten(i)) 4325 else: 4326 ret.append(i) 4327 return ret
4328
4329 -def matchPreviousLiteral(expr):
4330 """ 4331 Helper to define an expression that is indirectly defined from 4332 the tokens matched in a previous expression, that is, it looks 4333 for a 'repeat' of a previous expression. For example:: 4334 first = Word(nums) 4335 second = matchPreviousLiteral(first) 4336 matchExpr = first + ":" + second 4337 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4338 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4339 If this is not desired, use C{matchPreviousExpr}. 4340 Do *not* use with packrat parsing enabled. 4341 """ 4342 rep = Forward() 4343 def copyTokenToRepeater(s,l,t): 4344 if t: 4345 if len(t) == 1: 4346 rep << t[0] 4347 else: 4348 # flatten t tokens 4349 tflat = _flatten(t.asList()) 4350 rep << And(Literal(tt) for tt in tflat) 4351 else: 4352 rep << Empty()
4353 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4354 rep.setName('(prev) ' + _ustr(expr)) 4355 return rep 4356
4357 -def matchPreviousExpr(expr):
4358 """ 4359 Helper to define an expression that is indirectly defined from 4360 the tokens matched in a previous expression, that is, it looks 4361 for a 'repeat' of a previous expression. For example:: 4362 first = Word(nums) 4363 second = matchPreviousExpr(first) 4364 matchExpr = first + ":" + second 4365 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4366 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 4367 the expressions are evaluated first, and then compared, so 4368 C{"1"} is compared with C{"10"}. 4369 Do *not* use with packrat parsing enabled. 4370 """ 4371 rep = Forward() 4372 e2 = expr.copy() 4373 rep <<= e2 4374 def copyTokenToRepeater(s,l,t): 4375 matchTokens = _flatten(t.asList()) 4376 def mustMatchTheseTokens(s,l,t): 4377 theseTokens = _flatten(t.asList()) 4378 if theseTokens != matchTokens: 4379 raise ParseException("",0,"")
4380 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4381 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4382 rep.setName('(prev) ' + _ustr(expr)) 4383 return rep 4384
4385 -def _escapeRegexRangeChars(s):
4386 #~ escape these chars: ^-] 4387 for c in r"\^-]": 4388 s = s.replace(c,_bslash+c) 4389 s = s.replace("\n",r"\n") 4390 s = s.replace("\t",r"\t") 4391 return _ustr(s)
4392
4393 -def oneOf( strs, caseless=False, useRegex=True ):
4394 """ 4395 Helper to quickly define a set of alternative Literals, and makes sure to do 4396 longest-first testing when there is a conflict, regardless of the input order, 4397 but returns a C{L{MatchFirst}} for best performance. 4398 4399 Parameters: 4400 - strs - a string of space-delimited literals, or a list of string literals 4401 - caseless - (default=C{False}) - treat all literals as caseless 4402 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4403 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4404 if creating a C{Regex} raises an exception) 4405 4406 Example:: 4407 comp_oper = oneOf("< = > <= >= !=") 4408 var = Word(alphas) 4409 number = Word(nums) 4410 term = var | number 4411 comparison_expr = term + comp_oper + term 4412 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4413 prints:: 4414 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4415 """ 4416 if caseless: 4417 isequal = ( lambda a,b: a.upper() == b.upper() ) 4418 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4419 parseElementClass = CaselessLiteral 4420 else: 4421 isequal = ( lambda a,b: a == b ) 4422 masks = ( lambda a,b: b.startswith(a) ) 4423 parseElementClass = Literal 4424 4425 symbols = [] 4426 if isinstance(strs,basestring): 4427 symbols = strs.split() 4428 elif isinstance(strs, collections.Sequence): 4429 symbols = list(strs[:]) 4430 elif isinstance(strs, _generatorType): 4431 symbols = list(strs) 4432 else: 4433 warnings.warn("Invalid argument to oneOf, expected string or list", 4434 SyntaxWarning, stacklevel=2) 4435 if not symbols: 4436 return NoMatch() 4437 4438 i = 0 4439 while i < len(symbols)-1: 4440 cur = symbols[i] 4441 for j,other in enumerate(symbols[i+1:]): 4442 if ( isequal(other, cur) ): 4443 del symbols[i+j+1] 4444 break 4445 elif ( masks(cur, other) ): 4446 del symbols[i+j+1] 4447 symbols.insert(i,other) 4448 cur = other 4449 break 4450 else: 4451 i += 1 4452 4453 if not caseless and useRegex: 4454 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4455 try: 4456 if len(symbols)==len("".join(symbols)): 4457 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4458 else: 4459 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4460 except: 4461 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4462 SyntaxWarning, stacklevel=2) 4463 4464 4465 # last resort, just use MatchFirst 4466 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4467
4468 -def dictOf( key, value ):
4469 """ 4470 Helper to easily and clearly define a dictionary by specifying the respective patterns 4471 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4472 in the proper order. The key pattern can include delimiting markers or punctuation, 4473 as long as they are suppressed, thereby leaving the significant key text. The value 4474 pattern can include named results, so that the C{Dict} results can include named token 4475 fields. 4476 4477 Example:: 4478 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4479 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4480 print(OneOrMore(attr_expr).parseString(text).dump()) 4481 4482 attr_label = label 4483 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4484 4485 # similar to Dict, but simpler call format 4486 result = dictOf(attr_label, attr_value).parseString(text) 4487 print(result.dump()) 4488 print(result['shape']) 4489 print(result.shape) # object attribute access works too 4490 print(result.asDict()) 4491 prints:: 4492 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4493 - color: light blue 4494 - posn: upper left 4495 - shape: SQUARE 4496 - texture: burlap 4497 SQUARE 4498 SQUARE 4499 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4500 """ 4501 return Dict( ZeroOrMore( Group ( key + value ) ) )
4502
4503 -def originalTextFor(expr, asString=True):
4504 """ 4505 Helper to return the original, untokenized text for a given expression. Useful to 4506 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4507 revert separate tokens with intervening whitespace back to the original matching 4508 input text. By default, returns astring containing the original parsed text. 4509 4510 If the optional C{asString} argument is passed as C{False}, then the return value is a 4511 C{L{ParseResults}} containing any results names that were originally matched, and a 4512 single token containing the original matched text from the input string. So if 4513 the expression passed to C{L{originalTextFor}} contains expressions with defined 4514 results names, you must set C{asString} to C{False} if you want to preserve those 4515 results name values. 4516 4517 Example:: 4518 src = "this is test <b> bold <i>text</i> </b> normal text " 4519 for tag in ("b","i"): 4520 opener,closer = makeHTMLTags(tag) 4521 patt = originalTextFor(opener + SkipTo(closer) + closer) 4522 print(patt.searchString(src)[0]) 4523 prints:: 4524 ['<b> bold <i>text</i> </b>'] 4525 ['<i>text</i>'] 4526 """ 4527 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4528 endlocMarker = locMarker.copy() 4529 endlocMarker.callPreparse = False 4530 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4531 if asString: 4532 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4533 else: 4534 def extractText(s,l,t): 4535 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4536 matchExpr.setParseAction(extractText) 4537 matchExpr.ignoreExprs = expr.ignoreExprs 4538 return matchExpr 4539
4540 -def ungroup(expr):
4541 """ 4542 Helper to undo pyparsing's default grouping of And expressions, even 4543 if all but one are non-empty. 4544 """ 4545 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4546
4547 -def locatedExpr(expr):
4548 """ 4549 Helper to decorate a returned token with its starting and ending locations in the input string. 4550 This helper adds the following results names: 4551 - locn_start = location where matched expression begins 4552 - locn_end = location where matched expression ends 4553 - value = the actual parsed results 4554 4555 Be careful if the input text contains C{<TAB>} characters, you may want to call 4556 C{L{ParserElement.parseWithTabs}} 4557 4558 Example:: 4559 wd = Word(alphas) 4560 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4561 print(match) 4562 prints:: 4563 [[0, 'ljsdf', 5]] 4564 [[8, 'lksdjjf', 15]] 4565 [[18, 'lkkjj', 23]] 4566 """ 4567 locator = Empty().setParseAction(lambda s,l,t: l) 4568 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4569 4570 4571 # convenience constants for positional expressions 4572 empty = Empty().setName("empty") 4573 lineStart = LineStart().setName("lineStart") 4574 lineEnd = LineEnd().setName("lineEnd") 4575 stringStart = StringStart().setName("stringStart") 4576 stringEnd = StringEnd().setName("stringEnd") 4577 4578 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4579 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4580 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4581 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 4582 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4583 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4584 4585 -def srange(s):
4586 r""" 4587 Helper to easily define string ranges for use in Word construction. Borrows 4588 syntax from regexp '[]' string range definitions:: 4589 srange("[0-9]") -> "0123456789" 4590 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4591 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4592 The input string must be enclosed in []'s, and the returned string is the expanded 4593 character set joined into a single string. 4594 The values enclosed in the []'s may be: 4595 - a single character 4596 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4597 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4598 (C{\0x##} is also supported for backwards compatibility) 4599 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4600 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4601 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4602 """ 4603 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4604 try: 4605 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4606 except: 4607 return ""
4608
4609 -def matchOnlyAtCol(n):
4610 """ 4611 Helper method for defining parse actions that require matching at a specific 4612 column in the input text. 4613 """ 4614 def verifyCol(strg,locn,toks): 4615 if col(locn,strg) != n: 4616 raise ParseException(strg,locn,"matched token not at column %d" % n)
4617 return verifyCol 4618
4619 -def replaceWith(replStr):
4620 """ 4621 Helper method for common parse actions that simply return a literal value. Especially 4622 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4623 4624 Example:: 4625 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4626 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4627 term = na | num 4628 4629 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4630 """ 4631 return lambda s,l,t: [replStr]
4632
4633 -def removeQuotes(s,l,t):
4634 """ 4635 Helper parse action for removing quotation marks from parsed quoted strings. 4636 4637 Example:: 4638 # by default, quotation marks are included in parsed results 4639 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4640 4641 # use removeQuotes to strip quotation marks from parsed results 4642 quotedString.setParseAction(removeQuotes) 4643 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4644 """ 4645 return t[0][1:-1]
4646
4647 -def tokenMap(func, *args):
4648 """ 4649 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4650 args are passed, they are forwarded to the given function as additional arguments after 4651 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4652 parsed data to an integer using base 16. 4653 4654 Example (compare the last to example in L{ParserElement.transformString}:: 4655 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4656 hex_ints.runTests(''' 4657 00 11 22 aa FF 0a 0d 1a 4658 ''') 4659 4660 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4661 OneOrMore(upperword).runTests(''' 4662 my kingdom for a horse 4663 ''') 4664 4665 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4666 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4667 now is the winter of our discontent made glorious summer by this sun of york 4668 ''') 4669 prints:: 4670 00 11 22 aa FF 0a 0d 1a 4671 [0, 17, 34, 170, 255, 10, 13, 26] 4672 4673 my kingdom for a horse 4674 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4675 4676 now is the winter of our discontent made glorious summer by this sun of york 4677 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4678 """ 4679 def pa(s,l,t): 4680 return [func(tokn, *args) for tokn in t]
4681 4682 try: 4683 func_name = getattr(func, '__name__', 4684 getattr(func, '__class__').__name__) 4685 except Exception: 4686 func_name = str(func) 4687 pa.__name__ = func_name 4688 4689 return pa 4690 4691 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4692 """Helper parse action to convert tokens to upper case.""" 4693 4694 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4695 """Helper parse action to convert tokens to lower case."""
4696 4697 -def _makeTags(tagStr, xml):
4698 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4699 if isinstance(tagStr,basestring): 4700 resname = tagStr 4701 tagStr = Keyword(tagStr, caseless=not xml) 4702 else: 4703 resname = tagStr.name 4704 4705 tagAttrName = Word(alphas,alphanums+"_-:") 4706 if (xml): 4707 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4708 openTag = Suppress("<") + tagStr("tag") + \ 4709 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4710 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4711 else: 4712 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4713 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4714 openTag = Suppress("<") + tagStr("tag") + \ 4715 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4716 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4717 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4718 closeTag = Combine(_L("</") + tagStr + ">") 4719 4720 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4721 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4722 openTag.tag = resname 4723 closeTag.tag = resname 4724 return openTag, closeTag
4725
4726 -def makeHTMLTags(tagStr):
4727 """ 4728 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4729 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4730 4731 Example:: 4732 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4733 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4734 a,a_end = makeHTMLTags("A") 4735 link_expr = a + SkipTo(a_end)("link_text") + a_end 4736 4737 for link in link_expr.searchString(text): 4738 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4739 print(link.link_text, '->', link.href) 4740 prints:: 4741 pyparsing -> http://pyparsing.wikispaces.com 4742 """ 4743 return _makeTags( tagStr, False )
4744
4745 -def makeXMLTags(tagStr):
4746 """ 4747 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4748 tags only in the given upper/lower case. 4749 4750 Example: similar to L{makeHTMLTags} 4751 """ 4752 return _makeTags( tagStr, True )
4753
4754 -def withAttribute(*args,**attrDict):
4755 """ 4756 Helper to create a validating parse action to be used with start tags created 4757 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4758 with a required attribute value, to avoid false matches on common tags such as 4759 C{<TD>} or C{<DIV>}. 4760 4761 Call C{withAttribute} with a series of attribute names and values. Specify the list 4762 of filter attributes names and values as: 4763 - keyword arguments, as in C{(align="right")}, or 4764 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4765 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4766 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4767 For attribute names with a namespace prefix, you must use the second form. Attribute 4768 names are matched insensitive to upper/lower case. 4769 4770 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4771 4772 To verify that the attribute exists, but without specifying a value, pass 4773 C{withAttribute.ANY_VALUE} as the value. 4774 4775 Example:: 4776 html = ''' 4777 <div> 4778 Some text 4779 <div type="grid">1 4 0 1 0</div> 4780 <div type="graph">1,3 2,3 1,1</div> 4781 <div>this has no type</div> 4782 </div> 4783 4784 ''' 4785 div,div_end = makeHTMLTags("div") 4786 4787 # only match div tag having a type attribute with value "grid" 4788 div_grid = div().setParseAction(withAttribute(type="grid")) 4789 grid_expr = div_grid + SkipTo(div | div_end)("body") 4790 for grid_header in grid_expr.searchString(html): 4791 print(grid_header.body) 4792 4793 # construct a match with any div tag having a type attribute, regardless of the value 4794 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4795 div_expr = div_any_type + SkipTo(div | div_end)("body") 4796 for div_header in div_expr.searchString(html): 4797 print(div_header.body) 4798 prints:: 4799 1 4 0 1 0 4800 4801 1 4 0 1 0 4802 1,3 2,3 1,1 4803 """ 4804 if args: 4805 attrs = args[:] 4806 else: 4807 attrs = attrDict.items() 4808 attrs = [(k,v) for k,v in attrs] 4809 def pa(s,l,tokens): 4810 for attrName,attrValue in attrs: 4811 if attrName not in tokens: 4812 raise ParseException(s,l,"no matching attribute " + attrName) 4813 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4814 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4815 (attrName, tokens[attrName], attrValue))
4816 return pa 4817 withAttribute.ANY_VALUE = object()
4818 4819 -def withClass(classname, namespace=''):
4820 """ 4821 Simplified version of C{L{withAttribute}} when matching on a div class - made 4822 difficult because C{class} is a reserved word in Python. 4823 4824 Example:: 4825 html = ''' 4826 <div> 4827 Some text 4828 <div class="grid">1 4 0 1 0</div> 4829 <div class="graph">1,3 2,3 1,1</div> 4830 <div>this &lt;div&gt; has no class</div> 4831 </div> 4832 4833 ''' 4834 div,div_end = makeHTMLTags("div") 4835 div_grid = div().setParseAction(withClass("grid")) 4836 4837 grid_expr = div_grid + SkipTo(div | div_end)("body") 4838 for grid_header in grid_expr.searchString(html): 4839 print(grid_header.body) 4840 4841 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 4842 div_expr = div_any_type + SkipTo(div | div_end)("body") 4843 for div_header in div_expr.searchString(html): 4844 print(div_header.body) 4845 prints:: 4846 1 4 0 1 0 4847 4848 1 4 0 1 0 4849 1,3 2,3 1,1 4850 """ 4851 classattr = "%s:class" % namespace if namespace else "class" 4852 return withAttribute(**{classattr : classname})
4853 4854 opAssoc = _Constants() 4855 opAssoc.LEFT = object() 4856 opAssoc.RIGHT = object()
4857 4858 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
4859 """ 4860 Helper method for constructing grammars of expressions made up of 4861 operators working in a precedence hierarchy. Operators may be unary or 4862 binary, left- or right-associative. Parse actions can also be attached 4863 to operator expressions. 4864 4865 Parameters: 4866 - baseExpr - expression representing the most basic element for the nested 4867 - opList - list of tuples, one for each operator precedence level in the 4868 expression grammar; each tuple is of the form 4869 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 4870 - opExpr is the pyparsing expression for the operator; 4871 may also be a string, which will be converted to a Literal; 4872 if numTerms is 3, opExpr is a tuple of two expressions, for the 4873 two operators separating the 3 terms 4874 - numTerms is the number of terms for this operator (must 4875 be 1, 2, or 3) 4876 - rightLeftAssoc is the indicator whether the operator is 4877 right or left associative, using the pyparsing-defined 4878 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 4879 - parseAction is the parse action to be associated with 4880 expressions matching this operator expression (the 4881 parse action tuple member may be omitted) 4882 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 4883 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 4884 4885 Example:: 4886 # simple example of four-function arithmetic with ints and variable names 4887 integer = pyparsing_common.signedInteger 4888 varname = pyparsing_common.identifier 4889 4890 arith_expr = infixNotation(integer | varname, 4891 [ 4892 ('-', 1, opAssoc.RIGHT), 4893 (oneOf('* /'), 2, opAssoc.LEFT), 4894 (oneOf('+ -'), 2, opAssoc.LEFT), 4895 ]) 4896 4897 arith_expr.runTests(''' 4898 5+3*6 4899 (5+3)*6 4900 -2--11 4901 ''', fullDump=False) 4902 prints:: 4903 5+3*6 4904 [[5, '+', [3, '*', 6]]] 4905 4906 (5+3)*6 4907 [[[5, '+', 3], '*', 6]] 4908 4909 -2--11 4910 [[['-', 2], '-', ['-', 11]]] 4911 """ 4912 ret = Forward() 4913 lastExpr = baseExpr | ( lpar + ret + rpar ) 4914 for i,operDef in enumerate(opList): 4915 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 4916 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 4917 if arity == 3: 4918 if opExpr is None or len(opExpr) != 2: 4919 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 4920 opExpr1, opExpr2 = opExpr 4921 thisExpr = Forward().setName(termName) 4922 if rightLeftAssoc == opAssoc.LEFT: 4923 if arity == 1: 4924 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 4925 elif arity == 2: 4926 if opExpr is not None: 4927 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 4928 else: 4929 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 4930 elif arity == 3: 4931 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 4932 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 4933 else: 4934 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 4935 elif rightLeftAssoc == opAssoc.RIGHT: 4936 if arity == 1: 4937 # try to avoid LR with this extra test 4938 if not isinstance(opExpr, Optional): 4939 opExpr = Optional(opExpr) 4940 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 4941 elif arity == 2: 4942 if opExpr is not None: 4943 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 4944 else: 4945 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 4946 elif arity == 3: 4947 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 4948 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 4949 else: 4950 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 4951 else: 4952 raise ValueError("operator must indicate right or left associativity") 4953 if pa: 4954 matchExpr.setParseAction( pa ) 4955 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 4956 lastExpr = thisExpr 4957 ret <<= lastExpr 4958 return ret
4959 4960 operatorPrecedence = infixNotation 4961 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 4962 4963 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 4964 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 4965 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 4966 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 4967 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
4968 4969 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
4970 """ 4971 Helper method for defining nested lists enclosed in opening and closing 4972 delimiters ("(" and ")" are the default). 4973 4974 Parameters: 4975 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 4976 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 4977 - content - expression for items within the nested lists (default=C{None}) 4978 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 4979 4980 If an expression is not provided for the content argument, the nested 4981 expression will capture all whitespace-delimited content between delimiters 4982 as a list of separate values. 4983 4984 Use the C{ignoreExpr} argument to define expressions that may contain 4985 opening or closing characters that should not be treated as opening 4986 or closing characters for nesting, such as quotedString or a comment 4987 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 4988 The default is L{quotedString}, but if no expressions are to be ignored, 4989 then pass C{None} for this argument. 4990 4991 Example:: 4992 data_type = oneOf("void int short long char float double") 4993 decl_data_type = Combine(data_type + Optional(Word('*'))) 4994 ident = Word(alphas+'_', alphanums+'_') 4995 number = pyparsing_common.number 4996 arg = Group(decl_data_type + ident) 4997 LPAR,RPAR = map(Suppress, "()") 4998 4999 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5000 5001 c_function = (decl_data_type("type") 5002 + ident("name") 5003 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5004 + code_body("body")) 5005 c_function.ignore(cStyleComment) 5006 5007 source_code = ''' 5008 int is_odd(int x) { 5009 return (x%2); 5010 } 5011 5012 int dec_to_hex(char hchar) { 5013 if (hchar >= '0' && hchar <= '9') { 5014 return (ord(hchar)-ord('0')); 5015 } else { 5016 return (10+ord(hchar)-ord('A')); 5017 } 5018 } 5019 ''' 5020 for func in c_function.searchString(source_code): 5021 print("%(name)s (%(type)s) args: %(args)s" % func) 5022 5023 prints:: 5024 is_odd (int) args: [['int', 'x']] 5025 dec_to_hex (int) args: [['char', 'hchar']] 5026 """ 5027 if opener == closer: 5028 raise ValueError("opening and closing strings cannot be the same") 5029 if content is None: 5030 if isinstance(opener,basestring) and isinstance(closer,basestring): 5031 if len(opener) == 1 and len(closer)==1: 5032 if ignoreExpr is not None: 5033 content = (Combine(OneOrMore(~ignoreExpr + 5034 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5035 ).setParseAction(lambda t:t[0].strip())) 5036 else: 5037 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5038 ).setParseAction(lambda t:t[0].strip())) 5039 else: 5040 if ignoreExpr is not None: 5041 content = (Combine(OneOrMore(~ignoreExpr + 5042 ~Literal(opener) + ~Literal(closer) + 5043 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5044 ).setParseAction(lambda t:t[0].strip())) 5045 else: 5046 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5047 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5048 ).setParseAction(lambda t:t[0].strip())) 5049 else: 5050 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5051 ret = Forward() 5052 if ignoreExpr is not None: 5053 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5054 else: 5055 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5056 ret.setName('nested %s%s expression' % (opener,closer)) 5057 return ret
5058
5059 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5060 """ 5061 Helper method for defining space-delimited indentation blocks, such as 5062 those used to define block statements in Python source code. 5063 5064 Parameters: 5065 - blockStatementExpr - expression defining syntax of statement that 5066 is repeated within the indented block 5067 - indentStack - list created by caller to manage indentation stack 5068 (multiple statementWithIndentedBlock expressions within a single grammar 5069 should share a common indentStack) 5070 - indent - boolean indicating whether block must be indented beyond the 5071 the current level; set to False for block of left-most statements 5072 (default=C{True}) 5073 5074 A valid block must contain at least one C{blockStatement}. 5075 5076 Example:: 5077 data = ''' 5078 def A(z): 5079 A1 5080 B = 100 5081 G = A2 5082 A2 5083 A3 5084 B 5085 def BB(a,b,c): 5086 BB1 5087 def BBA(): 5088 bba1 5089 bba2 5090 bba3 5091 C 5092 D 5093 def spam(x,y): 5094 def eggs(z): 5095 pass 5096 ''' 5097 5098 5099 indentStack = [1] 5100 stmt = Forward() 5101 5102 identifier = Word(alphas, alphanums) 5103 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5104 func_body = indentedBlock(stmt, indentStack) 5105 funcDef = Group( funcDecl + func_body ) 5106 5107 rvalue = Forward() 5108 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5109 rvalue << (funcCall | identifier | Word(nums)) 5110 assignment = Group(identifier + "=" + rvalue) 5111 stmt << ( funcDef | assignment | identifier ) 5112 5113 module_body = OneOrMore(stmt) 5114 5115 parseTree = module_body.parseString(data) 5116 parseTree.pprint() 5117 prints:: 5118 [['def', 5119 'A', 5120 ['(', 'z', ')'], 5121 ':', 5122 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5123 'B', 5124 ['def', 5125 'BB', 5126 ['(', 'a', 'b', 'c', ')'], 5127 ':', 5128 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5129 'C', 5130 'D', 5131 ['def', 5132 'spam', 5133 ['(', 'x', 'y', ')'], 5134 ':', 5135 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5136 """ 5137 def checkPeerIndent(s,l,t): 5138 if l >= len(s): return 5139 curCol = col(l,s) 5140 if curCol != indentStack[-1]: 5141 if curCol > indentStack[-1]: 5142 raise ParseFatalException(s,l,"illegal nesting") 5143 raise ParseException(s,l,"not a peer entry")
5144 5145 def checkSubIndent(s,l,t): 5146 curCol = col(l,s) 5147 if curCol > indentStack[-1]: 5148 indentStack.append( curCol ) 5149 else: 5150 raise ParseException(s,l,"not a subentry") 5151 5152 def checkUnindent(s,l,t): 5153 if l >= len(s): return 5154 curCol = col(l,s) 5155 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5156 raise ParseException(s,l,"not an unindent") 5157 indentStack.pop() 5158 5159 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5160 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5161 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5162 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5163 if indent: 5164 smExpr = Group( Optional(NL) + 5165 #~ FollowedBy(blockStatementExpr) + 5166 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5167 else: 5168 smExpr = Group( Optional(NL) + 5169 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5170 blockStatementExpr.ignore(_bslash + LineEnd()) 5171 return smExpr.setName('indented block') 5172 5173 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5174 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5175 5176 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5177 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5178 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5179 -def replaceHTMLEntity(t):
5180 """Helper parser action to replace common HTML entities with their special characters""" 5181 return _htmlEntityMap.get(t.entity)
5182 5183 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5184 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5185 "Comment of the form C{/* ... */}" 5186 5187 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5188 "Comment of the form C{<!-- ... -->}" 5189 5190 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5191 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5192 "Comment of the form C{// ... (to end of line)}" 5193 5194 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5195 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5196 5197 javaStyleComment = cppStyleComment 5198 "Same as C{L{cppStyleComment}}" 5199 5200 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5201 "Comment of the form C{# ... (to end of line)}" 5202 5203 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5204 Optional( Word(" \t") + 5205 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5206 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5207 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5208 5209 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5210 -class pyparsing_common:
5211 """ 5212 Here are some common low-level expressions that may be useful in jump-starting parser development: 5213 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>}) 5214 - common L{programming identifiers<identifier>} 5215 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5216 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5217 - L{UUID<uuid>} 5218 Parse actions: 5219 - C{L{convertToInteger}} 5220 - C{L{convertToFloat}} 5221 - C{L{convertToDate}} 5222 - C{L{convertToDatetime}} 5223 - C{L{stripHTMLTags}} 5224 5225 Example:: 5226 pyparsing_common.number.runTests(''' 5227 # any int or real number, returned as the appropriate type 5228 100 5229 -100 5230 +100 5231 3.14159 5232 6.02e23 5233 1e-12 5234 ''') 5235 5236 pyparsing_common.fnumber.runTests(''' 5237 # any int or real number, returned as float 5238 100 5239 -100 5240 +100 5241 3.14159 5242 6.02e23 5243 1e-12 5244 ''') 5245 5246 pyparsing_common.hex_integer.runTests(''' 5247 # hex numbers 5248 100 5249 FF 5250 ''') 5251 5252 pyparsing_common.fraction.runTests(''' 5253 # fractions 5254 1/2 5255 -3/4 5256 ''') 5257 5258 pyparsing_common.mixed_integer.runTests(''' 5259 # mixed fractions 5260 1 5261 1/2 5262 -3/4 5263 1-3/4 5264 ''') 5265 5266 import uuid 5267 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5268 pyparsing_common.uuid.runTests(''' 5269 # uuid 5270 12345678-1234-5678-1234-567812345678 5271 ''') 5272 prints:: 5273 # any int or real number, returned as the appropriate type 5274 100 5275 [100] 5276 5277 -100 5278 [-100] 5279 5280 +100 5281 [100] 5282 5283 3.14159 5284 [3.14159] 5285 5286 6.02e23 5287 [6.02e+23] 5288 5289 1e-12 5290 [1e-12] 5291 5292 # any int or real number, returned as float 5293 100 5294 [100.0] 5295 5296 -100 5297 [-100.0] 5298 5299 +100 5300 [100.0] 5301 5302 3.14159 5303 [3.14159] 5304 5305 6.02e23 5306 [6.02e+23] 5307 5308 1e-12 5309 [1e-12] 5310 5311 # hex numbers 5312 100 5313 [256] 5314 5315 FF 5316 [255] 5317 5318 # fractions 5319 1/2 5320 [0.5] 5321 5322 -3/4 5323 [-0.75] 5324 5325 # mixed fractions 5326 1 5327 [1] 5328 5329 1/2 5330 [0.5] 5331 5332 -3/4 5333 [-0.75] 5334 5335 1-3/4 5336 [1.75] 5337 5338 # uuid 5339 12345678-1234-5678-1234-567812345678 5340 [UUID('12345678-1234-5678-1234-567812345678')] 5341 """ 5342 5343 convertToInteger = tokenMap(int) 5344 """ 5345 Parse action for converting parsed integers to Python int 5346 """ 5347 5348 convertToFloat = tokenMap(float) 5349 """ 5350 Parse action for converting parsed numbers to Python float 5351 """ 5352 5353 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5354 """expression that parses an unsigned integer, returns an int""" 5355 5356 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5357 """expression that parses a hexadecimal integer, returns an int""" 5358 5359 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5360 """expression that parses an integer with optional leading sign, returns an int""" 5361 5362 fraction = (signedInteger().setParseAction(convertToFloat) + '/' + signedInteger().setParseAction(convertToFloat)).setName("fraction") 5363 """fractional expression of an integer divided by an integer, returns a float""" 5364 fraction.addParseAction(lambda t: t[0]/t[-1]) 5365 5366 mixed_integer = (fraction | signedInteger + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5367 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5368 mixed_integer.addParseAction(sum) 5369 5370 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5371 """expression that parses a floating point number and returns a float""" 5372 5373 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5374 """expression that parses a floating point number with optional scientific notation and returns a float""" 5375 5376 # streamlining this expression makes the docs nicer-looking 5377 number = (sciReal | real | signedInteger).streamline() 5378 """any numeric expression, returns the corresponding Python type""" 5379 5380 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5381 """any int or real number, returned as float""" 5382 5383 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5384 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5385 5386 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5387 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5388 5389 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5390 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5391 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5392 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5393 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5394 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5395 "IPv6 address (long, short, or mixed form)" 5396 5397 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5398 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5399 5400 @staticmethod
5401 - def convertToDate(fmt="%Y-%m-%d"):
5402 """ 5403 Helper to create a parse action for converting parsed date string to Python datetime.date 5404 5405 Params - 5406 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5407 5408 Example:: 5409 date_expr = pyparsing_common.iso8601_date.copy() 5410 date_expr.setParseAction(pyparsing_common.convertToDate()) 5411 print(date_expr.parseString("1999-12-31")) 5412 prints:: 5413 [datetime.date(1999, 12, 31)] 5414 """ 5415 return lambda s,l,t: datetime.strptime(t[0], fmt).date()
5416 5417 @staticmethod
5418 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5419 """ 5420 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5421 5422 Params - 5423 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5424 5425 Example:: 5426 dt_expr = pyparsing_common.iso8601_datetime.copy() 5427 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5428 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5429 prints:: 5430 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5431 """ 5432 return lambda s,l,t: datetime.strptime(t[0], fmt)
5433 5434 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5435 "ISO8601 date (C{yyyy-mm-dd})" 5436 5437 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5438 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5439 5440 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5441 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5442 5443 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5444 @staticmethod
5445 - def stripHTMLTags(s, l, tokens):
5446 """ 5447 Parse action to remove HTML tags from web page HTML source 5448 5449 Example:: 5450 # strip HTML links from normal text 5451 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5452 td,td_end = makeHTMLTags("TD") 5453 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5454 5455 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5456 """ 5457 return pyparsing_common._html_stripper.transformString(tokens[0])
5458 5459 if __name__ == "__main__": 5460 5461 selectToken = CaselessLiteral("select") 5462 fromToken = CaselessLiteral("from") 5463 5464 ident = Word(alphas, alphanums + "_$") 5465 5466 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5467 columnNameList = Group(delimitedList(columnName)).setName("columns") 5468 columnSpec = ('*' | columnNameList) 5469 5470 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5471 tableNameList = Group(delimitedList(tableName)).setName("tables") 5472 5473 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5474 5475 # demo runTests method, including embedded comments in test string 5476 simpleSQL.runTests(""" 5477 # '*' as column list and dotted table name 5478 select * from SYS.XYZZY 5479 5480 # caseless match on "SELECT", and casts back to "select" 5481 SELECT * from XYZZY, ABC 5482 5483 # list of column names, and mixed case SELECT keyword 5484 Select AA,BB,CC from Sys.dual 5485 5486 # multiple tables 5487 Select A, B, C from Sys.dual, Table2 5488 5489 # invalid SELECT keyword - should fail 5490 Xelect A, B, C from Sys.dual 5491 5492 # incomplete command - should fail 5493 Select 5494 5495 # invalid column name - should fail 5496 Select ^^^ frox Sys.dual 5497 5498 """) 5499 5500 pyparsing_common.number.runTests(""" 5501 100 5502 -100 5503 +100 5504 3.14159 5505 6.02e23 5506 1e-12 5507 """) 5508 5509 # any int or real number, returned as float 5510 pyparsing_common.fnumber.runTests(""" 5511 100 5512 -100 5513 +100 5514 3.14159 5515 6.02e23 5516 1e-12 5517 """) 5518 5519 pyparsing_common.hex_integer.runTests(""" 5520 100 5521 FF 5522 """) 5523 5524 import uuid 5525 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5526 pyparsing_common.uuid.runTests(""" 5527 12345678-1234-5678-1234-567812345678 5528 """) 5529