Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2015  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24   
  25  __doc__ = \ 
  26  """ 
  27  pyparsing module - Classes and methods to define and execute parsing grammars 
  28   
  29  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  30  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  31  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  32  provides a library of classes that you use to construct the grammar directly in Python. 
  33   
  34  Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"}):: 
  35   
  36      from pyparsing import Word, alphas 
  37   
  38      # define grammar of a greeting 
  39      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  40   
  41      hello = "Hello, World!" 
  42      print (hello, "->", greet.parseString( hello )) 
  43   
  44  The program outputs the following:: 
  45   
  46      Hello, World! -> ['Hello', ',', 'World', '!'] 
  47   
  48  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  49  class names, and the use of '+', '|' and '^' operators. 
  50   
  51  The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an 
  52  object with named attributes. 
  53   
  54  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  55   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  56   - quoted strings 
  57   - embedded comments 
  58  """ 
  59   
  60  __version__ = "2.1.7" 
  61  __versionTime__ = "11 Aug 2016 07:29 UTC" 
  62  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  63   
  64  import string 
  65  from weakref import ref as wkref 
  66  import copy 
  67  import sys 
  68  import warnings 
  69  import re 
  70  import sre_constants 
  71  import collections 
  72  import pprint 
  73  import traceback 
  74  import types 
  75  from datetime import datetime 
  76   
  77  try: 
  78      from _thread import RLock 
  79  except ImportError: 
  80      from threading import RLock 
  81   
  82  try: 
  83      from collections import OrderedDict as _OrderedDict 
  84  except ImportError: 
  85      try: 
  86          from ordereddict import OrderedDict as _OrderedDict 
  87      except ImportError: 
  88          _OrderedDict = None 
  89   
  90  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  91   
  92  __all__ = [ 
  93  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  94  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  95  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  96  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  97  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  98  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',  
  99  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
 100  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
 101  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
 102  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 
 103  'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 
 104  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
 105  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
 106  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
 107  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
 108  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
 109  'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass', 
 110  'tokenMap', 'pyparsing_common', 
 111  ] 
 112   
 113  system_version = tuple(sys.version_info)[:3] 
 114  PY_3 = system_version[0] == 3 
 115  if PY_3: 
 116      _MAX_INT = sys.maxsize 
 117      basestring = str 
 118      unichr = chr 
 119      _ustr = str 
 120   
 121      # build list of single arg builtins, that can be used as parse actions 
 122      singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 
 123   
 124  else: 
 125      _MAX_INT = sys.maxint 
 126      range = xrange 
127 128 - def _ustr(obj):
129 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 130 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 131 then < returns the unicode object | encodes it with the default encoding | ... >. 132 """ 133 if isinstance(obj,unicode): 134 return obj 135 136 try: 137 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 138 # it won't break any existing code. 139 return str(obj) 140 141 except UnicodeEncodeError: 142 # Else encode it 143 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 144 xmlcharref = Regex('&#\d+;') 145 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 146 return xmlcharref.transformString(ret)
147 148 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 149 singleArgBuiltins = [] 150 import __builtin__ 151 for fname in "sum len sorted reversed list tuple set any all min max".split(): 152 try: 153 singleArgBuiltins.append(getattr(__builtin__,fname)) 154 except AttributeError: 155 continue 156 157 _generatorType = type((y for y in range(1)))
158 159 -def _xml_escape(data):
160 """Escape &, <, >, ", ', etc. in a string of data.""" 161 162 # ampersand must be replaced first 163 from_symbols = '&><"\'' 164 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split()) 165 for from_,to_ in zip(from_symbols, to_symbols): 166 data = data.replace(from_, to_) 167 return data
168
169 -class _Constants(object):
170 pass
171 172 alphas = string.ascii_uppercase + string.ascii_lowercase 173 nums = "0123456789" 174 hexnums = nums + "ABCDEFabcdef" 175 alphanums = alphas + nums 176 _bslash = chr(92) 177 printables = "".join(c for c in string.printable if c not in string.whitespace)
178 179 -class ParseBaseException(Exception):
180 """base exception class for all parsing runtime exceptions""" 181 # Performance tuning: we construct a *lot* of these, so keep this 182 # constructor as small and fast as possible
183 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
184 self.loc = loc 185 if msg is None: 186 self.msg = pstr 187 self.pstr = "" 188 else: 189 self.msg = msg 190 self.pstr = pstr 191 self.parserElement = elem 192 self.args = (pstr, loc, msg)
193 194 @classmethod
195 - def _from_exception(cls, pe):
196 """ 197 internal factory method to simplify creating one type of ParseException 198 from another - avoids having __init__ signature conflicts among subclasses 199 """ 200 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
201
202 - def __getattr__( self, aname ):
203 """supported attributes by name are: 204 - lineno - returns the line number of the exception text 205 - col - returns the column number of the exception text 206 - line - returns the line containing the exception text 207 """ 208 if( aname == "lineno" ): 209 return lineno( self.loc, self.pstr ) 210 elif( aname in ("col", "column") ): 211 return col( self.loc, self.pstr ) 212 elif( aname == "line" ): 213 return line( self.loc, self.pstr ) 214 else: 215 raise AttributeError(aname)
216
217 - def __str__( self ):
218 return "%s (at char %d), (line:%d, col:%d)" % \ 219 ( self.msg, self.loc, self.lineno, self.column )
220 - def __repr__( self ):
221 return _ustr(self)
222 - def markInputline( self, markerString = ">!<" ):
223 """Extracts the exception line from the input string, and marks 224 the location of the exception with a special symbol. 225 """ 226 line_str = self.line 227 line_column = self.column - 1 228 if markerString: 229 line_str = "".join((line_str[:line_column], 230 markerString, line_str[line_column:])) 231 return line_str.strip()
232 - def __dir__(self):
233 return "lineno col line".split() + dir(type(self))
234
235 -class ParseException(ParseBaseException):
236 """ 237 Exception thrown when parse expressions don't match class; 238 supported attributes by name are: 239 - lineno - returns the line number of the exception text 240 - col - returns the column number of the exception text 241 - line - returns the line containing the exception text 242 243 Example:: 244 try: 245 Word(nums).setName("integer").parseString("ABC") 246 except ParseException as pe: 247 print(pe) 248 print("column: {}".format(pe.col)) 249 250 prints:: 251 Expected integer (at char 0), (line:1, col:1) 252 column: 1 253 """ 254 pass
255
256 -class ParseFatalException(ParseBaseException):
257 """user-throwable exception thrown when inconsistent parse content 258 is found; stops all parsing immediately""" 259 pass
260
261 -class ParseSyntaxException(ParseFatalException):
262 """just like L{ParseFatalException}, but thrown internally when an 263 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop 264 immediately because an unbacktrackable syntax error has been found""" 265 pass
266
267 #~ class ReparseException(ParseBaseException): 268 #~ """Experimental class - parse actions can raise this exception to cause 269 #~ pyparsing to reparse the input string: 270 #~ - with a modified input string, and/or 271 #~ - with a modified start location 272 #~ Set the values of the ReparseException in the constructor, and raise the 273 #~ exception in a parse action to cause pyparsing to use the new string/location. 274 #~ Setting the values as None causes no change to be made. 275 #~ """ 276 #~ def __init_( self, newstring, restartLoc ): 277 #~ self.newParseText = newstring 278 #~ self.reparseLoc = restartLoc 279 280 -class RecursiveGrammarException(Exception):
281 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
282 - def __init__( self, parseElementList ):
283 self.parseElementTrace = parseElementList
284
285 - def __str__( self ):
286 return "RecursiveGrammarException: %s" % self.parseElementTrace
287
288 -class _ParseResultsWithOffset(object):
289 - def __init__(self,p1,p2):
290 self.tup = (p1,p2)
291 - def __getitem__(self,i):
292 return self.tup[i]
293 - def __repr__(self):
294 return repr(self.tup)
295 - def setOffset(self,i):
296 self.tup = (self.tup[0],i)
297
298 -class ParseResults(object):
299 """ 300 Structured parse results, to provide multiple means of access to the parsed data: 301 - as a list (C{len(results)}) 302 - by list index (C{results[0], results[1]}, etc.) 303 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName}) 304 305 Example:: 306 integer = Word(nums) 307 date_str = (integer.setResultsName("year") + '/' 308 + integer.setResultsName("month") + '/' 309 + integer.setResultsName("day")) 310 # equivalent form: 311 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 312 313 result = date_str.parseString("1999/12/31") 314 print(list(result)) 315 print(result[0]) 316 print(result['month']) 317 print(result.day) 318 print('month' in result) 319 print('minutes' in result) 320 print(result.dump()) 321 prints:: 322 ['1999', '/', '12', '/', '31'] 323 1999 324 12 325 31 326 True 327 False 328 ['1999', '/', '12', '/', '31'] 329 - day: 31 330 - month: 12 331 - year: 1999 332 """
333 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
334 if isinstance(toklist, cls): 335 return toklist 336 retobj = object.__new__(cls) 337 retobj.__doinit = True 338 return retobj
339 340 # Performance tuning: we construct a *lot* of these, so keep this 341 # constructor as small and fast as possible
342 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
343 if self.__doinit: 344 self.__doinit = False 345 self.__name = None 346 self.__parent = None 347 self.__accumNames = {} 348 self.__asList = asList 349 self.__modal = modal 350 if toklist is None: 351 toklist = [] 352 if isinstance(toklist, list): 353 self.__toklist = toklist[:] 354 elif isinstance(toklist, _generatorType): 355 self.__toklist = list(toklist) 356 else: 357 self.__toklist = [toklist] 358 self.__tokdict = dict() 359 360 if name is not None and name: 361 if not modal: 362 self.__accumNames[name] = 0 363 if isinstance(name,int): 364 name = _ustr(name) # will always return a str, but use _ustr for consistency 365 self.__name = name 366 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])): 367 if isinstance(toklist,basestring): 368 toklist = [ toklist ] 369 if asList: 370 if isinstance(toklist,ParseResults): 371 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 372 else: 373 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 374 self[name].__name = name 375 else: 376 try: 377 self[name] = toklist[0] 378 except (KeyError,TypeError,IndexError): 379 self[name] = toklist
380
381 - def __getitem__( self, i ):
382 if isinstance( i, (int,slice) ): 383 return self.__toklist[i] 384 else: 385 if i not in self.__accumNames: 386 return self.__tokdict[i][-1][0] 387 else: 388 return ParseResults([ v[0] for v in self.__tokdict[i] ])
389
390 - def __setitem__( self, k, v, isinstance=isinstance ):
391 if isinstance(v,_ParseResultsWithOffset): 392 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 393 sub = v[0] 394 elif isinstance(k,(int,slice)): 395 self.__toklist[k] = v 396 sub = v 397 else: 398 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 399 sub = v 400 if isinstance(sub,ParseResults): 401 sub.__parent = wkref(self)
402
403 - def __delitem__( self, i ):
404 if isinstance(i,(int,slice)): 405 mylen = len( self.__toklist ) 406 del self.__toklist[i] 407 408 # convert int to slice 409 if isinstance(i, int): 410 if i < 0: 411 i += mylen 412 i = slice(i, i+1) 413 # get removed indices 414 removed = list(range(*i.indices(mylen))) 415 removed.reverse() 416 # fixup indices in token dictionary 417 for name,occurrences in self.__tokdict.items(): 418 for j in removed: 419 for k, (value, position) in enumerate(occurrences): 420 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 421 else: 422 del self.__tokdict[i]
423
424 - def __contains__( self, k ):
425 return k in self.__tokdict
426
427 - def __len__( self ): return len( self.__toklist )
428 - def __bool__(self): return ( not not self.__toklist )
429 __nonzero__ = __bool__
430 - def __iter__( self ): return iter( self.__toklist )
431 - def __reversed__( self ): return iter( self.__toklist[::-1] )
432 - def _iterkeys( self ):
433 if hasattr(self.__tokdict, "iterkeys"): 434 return self.__tokdict.iterkeys() 435 else: 436 return iter(self.__tokdict)
437
438 - def _itervalues( self ):
439 return (self[k] for k in self._iterkeys())
440
441 - def _iteritems( self ):
442 return ((k, self[k]) for k in self._iterkeys())
443 444 if PY_3: 445 keys = _iterkeys 446 """Returns an iterator of all named result keys (Python 3.x only).""" 447 448 values = _itervalues 449 """Returns an iterator of all named result values (Python 3.x only).""" 450 451 items = _iteritems 452 """Returns an iterator of all named result key-value tuples (Python 3.x only).""" 453 454 else: 455 iterkeys = _iterkeys 456 """Returns an iterator of all named result keys (Python 2.x only).""" 457 458 itervalues = _itervalues 459 """Returns an iterator of all named result values (Python 2.x only).""" 460 461 iteritems = _iteritems 462 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 463
464 - def keys( self ):
465 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 466 return list(self.iterkeys())
467
468 - def values( self ):
469 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 470 return list(self.itervalues())
471
472 - def items( self ):
473 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 474 return list(self.iteritems())
475
476 - def haskeys( self ):
477 """Since keys() returns an iterator, this method is helpful in bypassing 478 code that looks for the existence of any defined results names.""" 479 return bool(self.__tokdict)
480
481 - def pop( self, *args, **kwargs):
482 """ 483 Removes and returns item at specified index (default=C{last}). 484 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no 485 argument or an integer argument, it will use C{list} semantics 486 and pop tokens from the list of parsed tokens. If passed a 487 non-integer argument (most likely a string), it will use C{dict} 488 semantics and pop the corresponding value from any defined 489 results names. A second default return value argument is 490 supported, just as in C{dict.pop()}. 491 492 Example:: 493 def remove_first(tokens): 494 tokens.pop(0) 495 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 496 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 497 498 label = Word(alphas) 499 patt = label("LABEL") + OneOrMore(Word(nums)) 500 print(patt.parseString("AAB 123 321").dump()) 501 502 # Use pop() in a parse action to remove named result (note that corresponding value is not 503 # removed from list form of results) 504 def remove_LABEL(tokens): 505 tokens.pop("LABEL") 506 return tokens 507 patt.addParseAction(remove_LABEL) 508 print(patt.parseString("AAB 123 321").dump()) 509 prints:: 510 ['AAB', '123', '321'] 511 - LABEL: AAB 512 513 ['AAB', '123', '321'] 514 """ 515 if not args: 516 args = [-1] 517 for k,v in kwargs.items(): 518 if k == 'default': 519 args = (args[0], v) 520 else: 521 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 522 if (isinstance(args[0], int) or 523 len(args) == 1 or 524 args[0] in self): 525 index = args[0] 526 ret = self[index] 527 del self[index] 528 return ret 529 else: 530 defaultvalue = args[1] 531 return defaultvalue
532
533 - def get(self, key, defaultValue=None):
534 """ 535 Returns named result matching the given key, or if there is no 536 such name, then returns the given C{defaultValue} or C{None} if no 537 C{defaultValue} is specified. 538 539 Similar to C{dict.get()}. 540 541 Example:: 542 integer = Word(nums) 543 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 544 545 result = date_str.parseString("1999/12/31") 546 print(result.get("year")) # -> '1999' 547 print(result.get("hour", "not specified")) # -> 'not specified' 548 print(result.get("hour")) # -> None 549 """ 550 if key in self: 551 return self[key] 552 else: 553 return defaultValue
554
555 - def insert( self, index, insStr ):
556 """ 557 Inserts new element at location index in the list of parsed tokens. 558 559 Similar to C{list.insert()}. 560 561 Example:: 562 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 563 564 # use a parse action to insert the parse location in the front of the parsed results 565 def insert_locn(locn, tokens): 566 tokens.insert(0, locn) 567 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 568 """ 569 self.__toklist.insert(index, insStr) 570 # fixup indices in token dictionary 571 for name,occurrences in self.__tokdict.items(): 572 for k, (value, position) in enumerate(occurrences): 573 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
574
575 - def append( self, item ):
576 """ 577 Add single element to end of ParseResults list of elements. 578 579 Example:: 580 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 581 582 # use a parse action to compute the sum of the parsed integers, and add it to the end 583 def append_sum(tokens): 584 tokens.append(sum(map(int, tokens))) 585 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 586 """ 587 self.__toklist.append(item)
588
589 - def extend( self, itemseq ):
590 """ 591 Add sequence of elements to end of ParseResults list of elements. 592 593 Example:: 594 patt = OneOrMore(Word(alphas)) 595 596 # use a parse action to append the reverse of the matched strings, to make a palindrome 597 def make_palindrome(tokens): 598 tokens.extend(reversed([t[::-1] for t in tokens])) 599 return ''.join(tokens) 600 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 601 """ 602 if isinstance(itemseq, ParseResults): 603 self += itemseq 604 else: 605 self.__toklist.extend(itemseq)
606
607 - def clear( self ):
608 """ 609 Clear all elements and results names. 610 """ 611 del self.__toklist[:] 612 self.__tokdict.clear()
613
614 - def __getattr__( self, name ):
615 try: 616 return self[name] 617 except KeyError: 618 return "" 619 620 if name in self.__tokdict: 621 if name not in self.__accumNames: 622 return self.__tokdict[name][-1][0] 623 else: 624 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 625 else: 626 return ""
627
628 - def __add__( self, other ):
629 ret = self.copy() 630 ret += other 631 return ret
632
633 - def __iadd__( self, other ):
634 if other.__tokdict: 635 offset = len(self.__toklist) 636 addoffset = lambda a: offset if a<0 else a+offset 637 otheritems = other.__tokdict.items() 638 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 639 for (k,vlist) in otheritems for v in vlist] 640 for k,v in otherdictitems: 641 self[k] = v 642 if isinstance(v[0],ParseResults): 643 v[0].__parent = wkref(self) 644 645 self.__toklist += other.__toklist 646 self.__accumNames.update( other.__accumNames ) 647 return self
648
649 - def __radd__(self, other):
650 if isinstance(other,int) and other == 0: 651 # useful for merging many ParseResults using sum() builtin 652 return self.copy() 653 else: 654 # this may raise a TypeError - so be it 655 return other + self
656
657 - def __repr__( self ):
658 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
659
660 - def __str__( self ):
661 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
662
663 - def _asStringList( self, sep='' ):
664 out = [] 665 for item in self.__toklist: 666 if out and sep: 667 out.append(sep) 668 if isinstance( item, ParseResults ): 669 out += item._asStringList() 670 else: 671 out.append( _ustr(item) ) 672 return out
673
674 - def asList( self ):
675 """ 676 Returns the parse results as a nested list of matching tokens, all converted to strings. 677 678 Example:: 679 patt = OneOrMore(Word(alphas)) 680 result = patt.parseString("sldkj lsdkj sldkj") 681 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 682 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 683 684 # Use asList() to create an actual list 685 result_list = result.asList() 686 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 687 """ 688 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
689
690 - def asDict( self ):
691 """ 692 Returns the named parse results as a nested dictionary. 693 694 Example:: 695 integer = Word(nums) 696 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 697 698 result = date_str.parseString('12/31/1999') 699 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 700 701 result_dict = result.asDict() 702 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 703 704 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 705 import json 706 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 707 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 708 """ 709 if PY_3: 710 item_fn = self.items 711 else: 712 item_fn = self.iteritems 713 714 def toItem(obj): 715 if isinstance(obj, ParseResults): 716 if obj.haskeys(): 717 return obj.asDict() 718 else: 719 return [toItem(v) for v in obj] 720 else: 721 return obj
722 723 return dict((k,toItem(v)) for k,v in item_fn())
724
725 - def copy( self ):
726 """ 727 Returns a new copy of a C{ParseResults} object. 728 """ 729 ret = ParseResults( self.__toklist ) 730 ret.__tokdict = self.__tokdict.copy() 731 ret.__parent = self.__parent 732 ret.__accumNames.update( self.__accumNames ) 733 ret.__name = self.__name 734 return ret
735
736 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
737 """ 738 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 739 """ 740 nl = "\n" 741 out = [] 742 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items() 743 for v in vlist) 744 nextLevelIndent = indent + " " 745 746 # collapse out indents if formatting is not desired 747 if not formatted: 748 indent = "" 749 nextLevelIndent = "" 750 nl = "" 751 752 selfTag = None 753 if doctag is not None: 754 selfTag = doctag 755 else: 756 if self.__name: 757 selfTag = self.__name 758 759 if not selfTag: 760 if namedItemsOnly: 761 return "" 762 else: 763 selfTag = "ITEM" 764 765 out += [ nl, indent, "<", selfTag, ">" ] 766 767 for i,res in enumerate(self.__toklist): 768 if isinstance(res,ParseResults): 769 if i in namedItems: 770 out += [ res.asXML(namedItems[i], 771 namedItemsOnly and doctag is None, 772 nextLevelIndent, 773 formatted)] 774 else: 775 out += [ res.asXML(None, 776 namedItemsOnly and doctag is None, 777 nextLevelIndent, 778 formatted)] 779 else: 780 # individual token, see if there is a name for it 781 resTag = None 782 if i in namedItems: 783 resTag = namedItems[i] 784 if not resTag: 785 if namedItemsOnly: 786 continue 787 else: 788 resTag = "ITEM" 789 xmlBodyText = _xml_escape(_ustr(res)) 790 out += [ nl, nextLevelIndent, "<", resTag, ">", 791 xmlBodyText, 792 "</", resTag, ">" ] 793 794 out += [ nl, indent, "</", selfTag, ">" ] 795 return "".join(out)
796
797 - def __lookup(self,sub):
798 for k,vlist in self.__tokdict.items(): 799 for v,loc in vlist: 800 if sub is v: 801 return k 802 return None
803
804 - def getName(self):
805 """ 806 Returns the results name for this token expression. Useful when several 807 different expressions might match at a particular location. 808 809 Example:: 810 integer = Word(nums) 811 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 812 house_number_expr = Suppress('#') + Word(nums, alphanums) 813 user_data = (Group(house_number_expr)("house_number") 814 | Group(ssn_expr)("ssn") 815 | Group(integer)("age")) 816 user_info = OneOrMore(user_data) 817 818 result = user_info.parseString("22 111-22-3333 #221B") 819 for item in result: 820 print(item.getName(), ':', item[0]) 821 prints:: 822 age : 22 823 ssn : 111-22-3333 824 house_number : 221B 825 """ 826 if self.__name: 827 return self.__name 828 elif self.__parent: 829 par = self.__parent() 830 if par: 831 return par.__lookup(self) 832 else: 833 return None 834 elif (len(self) == 1 and 835 len(self.__tokdict) == 1 and 836 self.__tokdict.values()[0][0][1] in (0,-1)): 837 return self.__tokdict.keys()[0] 838 else: 839 return None
840
841 - def dump(self, indent='', depth=0, full=True):
842 """ 843 Diagnostic method for listing out the contents of a C{ParseResults}. 844 Accepts an optional C{indent} argument so that this string can be embedded 845 in a nested display of other data. 846 847 Example:: 848 integer = Word(nums) 849 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 850 851 result = date_str.parseString('12/31/1999') 852 print(result.dump()) 853 prints:: 854 ['12', '/', '31', '/', '1999'] 855 - day: 1999 856 - month: 31 857 - year: 12 858 """ 859 out = [] 860 NL = '\n' 861 out.append( indent+_ustr(self.asList()) ) 862 if full: 863 if self.haskeys(): 864 items = sorted(self.items()) 865 for k,v in items: 866 if out: 867 out.append(NL) 868 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 869 if isinstance(v,ParseResults): 870 if v: 871 out.append( v.dump(indent,depth+1) ) 872 else: 873 out.append(_ustr(v)) 874 else: 875 out.append(_ustr(v)) 876 elif any(isinstance(vv,ParseResults) for vv in self): 877 v = self 878 for i,vv in enumerate(v): 879 if isinstance(vv,ParseResults): 880 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) )) 881 else: 882 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv))) 883 884 return "".join(out)
885
886 - def pprint(self, *args, **kwargs):
887 """ 888 Pretty-printer for parsed results as a list, using the C{pprint} module. 889 Accepts additional positional or keyword args as defined for the 890 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint}) 891 892 Example:: 893 ident = Word(alphas, alphanums) 894 num = Word(nums) 895 func = Forward() 896 term = ident | num | Group('(' + func + ')') 897 func <<= ident + Group(Optional(delimitedList(term))) 898 result = func.parseString("fna a,b,(fnb c,d,200),100") 899 result.pprint(width=40) 900 prints:: 901 ['fna', 902 ['a', 903 'b', 904 ['(', 'fnb', ['c', 'd', '200'], ')'], 905 '100']] 906 """ 907 pprint.pprint(self.asList(), *args, **kwargs)
908 909 # add support for pickle protocol
910 - def __getstate__(self):
911 return ( self.__toklist, 912 ( self.__tokdict.copy(), 913 self.__parent is not None and self.__parent() or None, 914 self.__accumNames, 915 self.__name ) )
916
917 - def __setstate__(self,state):
918 self.__toklist = state[0] 919 (self.__tokdict, 920 par, 921 inAccumNames, 922 self.__name) = state[1] 923 self.__accumNames = {} 924 self.__accumNames.update(inAccumNames) 925 if par is not None: 926 self.__parent = wkref(par) 927 else: 928 self.__parent = None
929
930 - def __getnewargs__(self):
931 return self.__toklist, self.__name, self.__asList, self.__modal
932
933 - def __dir__(self):
934 return (dir(type(self)) + list(self.keys()))
935 936 collections.MutableMapping.register(ParseResults)
937 938 -def col (loc,strg):
939 """Returns current column within a string, counting newlines as line separators. 940 The first column is number 1. 941 942 Note: the default parsing behavior is to expand tabs in the input string 943 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 944 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 945 consistent view of the parsed string, the parse location, and line and column 946 positions within the parsed string. 947 """ 948 s = strg 949 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
950
951 -def lineno(loc,strg):
952 """Returns current line number within a string, counting newlines as line separators. 953 The first line is number 1. 954 955 Note: the default parsing behavior is to expand tabs in the input string 956 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 957 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 958 consistent view of the parsed string, the parse location, and line and column 959 positions within the parsed string. 960 """ 961 return strg.count("\n",0,loc) + 1
962
963 -def line( loc, strg ):
964 """Returns the line of text containing loc within a string, counting newlines as line separators. 965 """ 966 lastCR = strg.rfind("\n", 0, loc) 967 nextCR = strg.find("\n", loc) 968 if nextCR >= 0: 969 return strg[lastCR+1:nextCR] 970 else: 971 return strg[lastCR+1:]
972
973 -def _defaultStartDebugAction( instring, loc, expr ):
974 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
975
976 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
977 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
978
979 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
980 print ("Exception raised:" + _ustr(exc))
981
982 -def nullDebugAction(*args):
983 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 984 pass
985 986 # Only works on Python 3.x - nonlocal is toxic to Python 2 installs 987 #~ 'decorator to trim function calls to match the arity of the target' 988 #~ def _trim_arity(func, maxargs=3): 989 #~ if func in singleArgBuiltins: 990 #~ return lambda s,l,t: func(t) 991 #~ limit = 0 992 #~ foundArity = False 993 #~ def wrapper(*args): 994 #~ nonlocal limit,foundArity 995 #~ while 1: 996 #~ try: 997 #~ ret = func(*args[limit:]) 998 #~ foundArity = True 999 #~ return ret 1000 #~ except TypeError: 1001 #~ if limit == maxargs or foundArity: 1002 #~ raise 1003 #~ limit += 1 1004 #~ continue 1005 #~ return wrapper 1006 1007 # this version is Python 2.x-3.x cross-compatible 1008 'decorator to trim function calls to match the arity of the target'
1009 -def _trim_arity(func, maxargs=2):
1010 if func in singleArgBuiltins: 1011 return lambda s,l,t: func(t) 1012 limit = [0] 1013 foundArity = [False] 1014 1015 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 1016 if system_version[:2] >= (3,5): 1017 def extract_stack(): 1018 # special handling for Python 3.5.0 - extra deep call stack by 1 1019 offset = -3 if system_version == (3,5,0) else -2 1020 frame_summary = traceback.extract_stack()[offset] 1021 return [(frame_summary.filename, frame_summary.lineno)]
1022 def extract_tb(tb): 1023 frames = traceback.extract_tb(tb) 1024 frame_summary = frames[-1] 1025 return [(frame_summary.filename, frame_summary.lineno)] 1026 else: 1027 extract_stack = traceback.extract_stack 1028 extract_tb = traceback.extract_tb 1029 1030 # synthesize what would be returned by traceback.extract_stack at the call to 1031 # user's parse action 'func', so that we don't incur call penalty at parse time 1032 1033 LINE_DIFF = 6 1034 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 1035 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 1036 this_line = extract_stack()[-1] 1037 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF) 1038 1039 def wrapper(*args): 1040 while 1: 1041 try: 1042 ret = func(*args[limit[0]:]) 1043 foundArity[0] = True 1044 return ret 1045 except TypeError: 1046 # re-raise TypeErrors if they did not come from our arity testing 1047 if foundArity[0]: 1048 raise 1049 else: 1050 try: 1051 tb = sys.exc_info()[-1] 1052 if not extract_tb(tb)[-1][:2] == pa_call_line_synth: 1053 raise 1054 finally: 1055 del tb 1056 1057 if limit[0] <= maxargs: 1058 limit[0] += 1 1059 continue 1060 raise 1061 1062 # copy func name to wrapper for sensible debug output 1063 func_name = "<parse action>" 1064 try: 1065 func_name = getattr(func, '__name__', 1066 getattr(func, '__class__').__name__) 1067 except Exception: 1068 func_name = str(func) 1069 wrapper.__name__ = func_name 1070 1071 return wrapper 1072
1073 -class ParserElement(object):
1074 """Abstract base level parser element class.""" 1075 DEFAULT_WHITE_CHARS = " \n\t\r" 1076 verbose_stacktrace = False 1077 1078 @staticmethod
1079 - def setDefaultWhitespaceChars( chars ):
1080 r""" 1081 Overrides the default whitespace chars 1082 1083 Example:: 1084 # default whitespace chars are space, <TAB> and newline 1085 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 1086 1087 # change to just treat newline as significant 1088 ParserElement.setDefaultWhitespaceChars(" \t") 1089 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 1090 """ 1091 ParserElement.DEFAULT_WHITE_CHARS = chars
1092 1093 @staticmethod
1094 - def inlineLiteralsUsing(cls):
1095 """ 1096 Set class to be used for inclusion of string literals into a parser. 1097 1098 Example:: 1099 # default literal class used is Literal 1100 integer = Word(nums) 1101 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1102 1103 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1104 1105 1106 # change to Suppress 1107 ParserElement.inlineLiteralsUsing(Suppress) 1108 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1109 1110 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 1111 """ 1112 ParserElement._literalStringClass = cls
1113
1114 - def __init__( self, savelist=False ):
1115 self.parseAction = list() 1116 self.failAction = None 1117 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 1118 self.strRepr = None 1119 self.resultsName = None 1120 self.saveAsList = savelist 1121 self.skipWhitespace = True 1122 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1123 self.copyDefaultWhiteChars = True 1124 self.mayReturnEmpty = False # used when checking for left-recursion 1125 self.keepTabs = False 1126 self.ignoreExprs = list() 1127 self.debug = False 1128 self.streamlined = False 1129 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 1130 self.errmsg = "" 1131 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 1132 self.debugActions = ( None, None, None ) #custom debug actions 1133 self.re = None 1134 self.callPreparse = True # used to avoid redundant calls to preParse 1135 self.callDuringTry = False
1136
1137 - def copy( self ):
1138 """ 1139 Make a copy of this C{ParserElement}. Useful for defining different parse actions 1140 for the same parsing pattern, using copies of the original parse element. 1141 1142 Example:: 1143 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1144 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K") 1145 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1146 1147 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 1148 prints:: 1149 [5120, 100, 655360, 268435456] 1150 Equivalent form of C{expr.copy()} is just C{expr()}:: 1151 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M") 1152 """ 1153 cpy = copy.copy( self ) 1154 cpy.parseAction = self.parseAction[:] 1155 cpy.ignoreExprs = self.ignoreExprs[:] 1156 if self.copyDefaultWhiteChars: 1157 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 1158 return cpy
1159
1160 - def setName( self, name ):
1161 """ 1162 Define name for this expression, makes exception messages clearer. 1163 1164 Example:: 1165 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 1166 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 1167 """ 1168 self.name = name 1169 self.errmsg = "Expected " + self.name 1170 if hasattr(self,"exception"): 1171 self.exception.msg = self.errmsg 1172 return self
1173
1174 - def setResultsName( self, name, listAllMatches=False ):
1175 """ 1176 Define name for referencing matching tokens as a nested attribute 1177 of the returned parse results. 1178 NOTE: this returns a *copy* of the original C{ParserElement} object; 1179 this is so that the client can define a basic element, such as an 1180 integer, and reference it in multiple places with different names. 1181 1182 You can also set results names using the abbreviated syntax, 1183 C{expr("name")} in place of C{expr.setResultsName("name")} - 1184 see L{I{__call__}<__call__>}. 1185 1186 Example:: 1187 date_str = (integer.setResultsName("year") + '/' 1188 + integer.setResultsName("month") + '/' 1189 + integer.setResultsName("day")) 1190 1191 # equivalent form: 1192 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 1193 """ 1194 newself = self.copy() 1195 if name.endswith("*"): 1196 name = name[:-1] 1197 listAllMatches=True 1198 newself.resultsName = name 1199 newself.modalResults = not listAllMatches 1200 return newself
1201
1202 - def setBreak(self,breakFlag = True):
1203 """Method to invoke the Python pdb debugger when this element is 1204 about to be parsed. Set C{breakFlag} to True to enable, False to 1205 disable. 1206 """ 1207 if breakFlag: 1208 _parseMethod = self._parse 1209 def breaker(instring, loc, doActions=True, callPreParse=True): 1210 import pdb 1211 pdb.set_trace() 1212 return _parseMethod( instring, loc, doActions, callPreParse )
1213 breaker._originalParseMethod = _parseMethod 1214 self._parse = breaker 1215 else: 1216 if hasattr(self._parse,"_originalParseMethod"): 1217 self._parse = self._parse._originalParseMethod 1218 return self
1219
1220 - def setParseAction( self, *fns, **kwargs ):
1221 """ 1222 Define action to perform when successfully matching parse element definition. 1223 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)}, 1224 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where: 1225 - s = the original string being parsed (see note below) 1226 - loc = the location of the matching substring 1227 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object 1228 If the functions in fns modify the tokens, they can return them as the return 1229 value from fn, and the modified list of tokens will replace the original. 1230 Otherwise, fn does not need to return any value. 1231 1232 Optional keyword arguments: 1233 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing 1234 1235 Note: the default parsing behavior is to expand tabs in the input string 1236 before starting the parsing process. See L{I{parseString}<parseString>} for more information 1237 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a 1238 consistent view of the parsed string, the parse location, and line and column 1239 positions within the parsed string. 1240 1241 Example:: 1242 integer = Word(nums) 1243 date_str = integer + '/' + integer + '/' + integer 1244 1245 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 1246 1247 # use parse action to convert to ints at parse time 1248 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1249 date_str = integer + '/' + integer + '/' + integer 1250 1251 # note that integer fields are now ints, not strings 1252 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 1253 """ 1254 self.parseAction = list(map(_trim_arity, list(fns))) 1255 self.callDuringTry = kwargs.get("callDuringTry", False) 1256 return self
1257
1258 - def addParseAction( self, *fns, **kwargs ):
1259 """ 1260 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}. 1261 1262 See examples in L{I{copy}<copy>}. 1263 """ 1264 self.parseAction += list(map(_trim_arity, list(fns))) 1265 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1266 return self
1267
1268 - def addCondition(self, *fns, **kwargs):
1269 """Add a boolean predicate function to expression's list of parse actions. See 1270 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction}, 1271 functions passed to C{addCondition} need to return boolean success/fail of the condition. 1272 1273 Optional keyword arguments: 1274 - message = define a custom message to be used in the raised exception 1275 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 1276 1277 Example:: 1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 1279 year_int = integer.copy() 1280 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 1281 date_str = year_int + '/' + integer + '/' + integer 1282 1283 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 1284 """ 1285 msg = kwargs.get("message", "failed user-defined condition") 1286 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException 1287 for fn in fns: 1288 def pa(s,l,t): 1289 if not bool(_trim_arity(fn)(s,l,t)): 1290 raise exc_type(s,l,msg)
1291 self.parseAction.append(pa) 1292 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 1293 return self 1294
1295 - def setFailAction( self, fn ):
1296 """Define action to perform if parsing fails at this expression. 1297 Fail acton fn is a callable function that takes the arguments 1298 C{fn(s,loc,expr,err)} where: 1299 - s = string being parsed 1300 - loc = location where expression match was attempted and failed 1301 - expr = the parse expression that failed 1302 - err = the exception thrown 1303 The function returns no value. It may throw C{L{ParseFatalException}} 1304 if it is desired to stop parsing immediately.""" 1305 self.failAction = fn 1306 return self
1307
1308 - def _skipIgnorables( self, instring, loc ):
1309 exprsFound = True 1310 while exprsFound: 1311 exprsFound = False 1312 for e in self.ignoreExprs: 1313 try: 1314 while 1: 1315 loc,dummy = e._parse( instring, loc ) 1316 exprsFound = True 1317 except ParseException: 1318 pass 1319 return loc
1320
1321 - def preParse( self, instring, loc ):
1322 if self.ignoreExprs: 1323 loc = self._skipIgnorables( instring, loc ) 1324 1325 if self.skipWhitespace: 1326 wt = self.whiteChars 1327 instrlen = len(instring) 1328 while loc < instrlen and instring[loc] in wt: 1329 loc += 1 1330 1331 return loc
1332
1333 - def parseImpl( self, instring, loc, doActions=True ):
1334 return loc, []
1335
1336 - def postParse( self, instring, loc, tokenlist ):
1337 return tokenlist
1338 1339 #~ @profile
1340 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1341 debugging = ( self.debug ) #and doActions ) 1342 1343 if debugging or self.failAction: 1344 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 1345 if (self.debugActions[0] ): 1346 self.debugActions[0]( instring, loc, self ) 1347 if callPreParse and self.callPreparse: 1348 preloc = self.preParse( instring, loc ) 1349 else: 1350 preloc = loc 1351 tokensStart = preloc 1352 try: 1353 try: 1354 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1355 except IndexError: 1356 raise ParseException( instring, len(instring), self.errmsg, self ) 1357 except ParseBaseException as err: 1358 #~ print ("Exception raised:", err) 1359 if self.debugActions[2]: 1360 self.debugActions[2]( instring, tokensStart, self, err ) 1361 if self.failAction: 1362 self.failAction( instring, tokensStart, self, err ) 1363 raise 1364 else: 1365 if callPreParse and self.callPreparse: 1366 preloc = self.preParse( instring, loc ) 1367 else: 1368 preloc = loc 1369 tokensStart = preloc 1370 if self.mayIndexError or loc >= len(instring): 1371 try: 1372 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1373 except IndexError: 1374 raise ParseException( instring, len(instring), self.errmsg, self ) 1375 else: 1376 loc,tokens = self.parseImpl( instring, preloc, doActions ) 1377 1378 tokens = self.postParse( instring, loc, tokens ) 1379 1380 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 1381 if self.parseAction and (doActions or self.callDuringTry): 1382 if debugging: 1383 try: 1384 for fn in self.parseAction: 1385 tokens = fn( instring, tokensStart, retTokens ) 1386 if tokens is not None: 1387 retTokens = ParseResults( tokens, 1388 self.resultsName, 1389 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1390 modal=self.modalResults ) 1391 except ParseBaseException as err: 1392 #~ print "Exception raised in user parse action:", err 1393 if (self.debugActions[2] ): 1394 self.debugActions[2]( instring, tokensStart, self, err ) 1395 raise 1396 else: 1397 for fn in self.parseAction: 1398 tokens = fn( instring, tokensStart, retTokens ) 1399 if tokens is not None: 1400 retTokens = ParseResults( tokens, 1401 self.resultsName, 1402 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 1403 modal=self.modalResults ) 1404 1405 if debugging: 1406 #~ print ("Matched",self,"->",retTokens.asList()) 1407 if (self.debugActions[1] ): 1408 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 1409 1410 return loc, retTokens
1411
1412 - def tryParse( self, instring, loc ):
1413 try: 1414 return self._parse( instring, loc, doActions=False )[0] 1415 except ParseFatalException: 1416 raise ParseException( instring, loc, self.errmsg, self)
1417
1418 - def canParseNext(self, instring, loc):
1419 try: 1420 self.tryParse(instring, loc) 1421 except (ParseException, IndexError): 1422 return False 1423 else: 1424 return True
1425
1426 - class _UnboundedCache(object):
1427 - def __init__(self):
1428 cache = {} 1429 self.not_in_cache = not_in_cache = object() 1430 1431 def get(self, key): 1432 return cache.get(key, not_in_cache)
1433 1434 def set(self, key, value): 1435 cache[key] = value
1436 1437 def clear(self): 1438 cache.clear() 1439 1440 self.get = types.MethodType(get, self) 1441 self.set = types.MethodType(set, self) 1442 self.clear = types.MethodType(clear, self) 1443 1444 if _OrderedDict is not None:
1445 - class _FifoCache(object):
1446 - def __init__(self, size):
1447 self.not_in_cache = not_in_cache = object() 1448 1449 cache = _OrderedDict() 1450 1451 def get(self, key): 1452 return cache.get(key, not_in_cache)
1453 1454 def set(self, key, value): 1455 cache[key] = value 1456 if len(cache) > size: 1457 cache.popitem(False)
1458 1459 def clear(self): 1460 cache.clear() 1461 1462 self.get = types.MethodType(get, self) 1463 self.set = types.MethodType(set, self) 1464 self.clear = types.MethodType(clear, self) 1465 1466 else:
1467 - class _FifoCache(object):
1468 - def __init__(self, size):
1469 self.not_in_cache = not_in_cache = object() 1470 1471 cache = {} 1472 key_fifo = collections.deque([], size) 1473 1474 def get(self, key): 1475 return cache.get(key, not_in_cache)
1476 1477 def set(self, key, value): 1478 cache[key] = value 1479 if len(cache) > size: 1480 cache.pop(key_fifo.popleft(), None) 1481 key_fifo.append(key)
1482 1483 def clear(self): 1484 cache.clear() 1485 key_fifo.clear() 1486 1487 self.get = types.MethodType(get, self) 1488 self.set = types.MethodType(set, self) 1489 self.clear = types.MethodType(clear, self) 1490 1491 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1492 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 1493 packrat_cache_lock = RLock() 1494 packrat_cache_stats = [0, 0] 1495 1496 # this method gets repeatedly called during backtracking with the same arguments - 1497 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
1498 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1499 HIT, MISS = 0, 1 1500 lookup = (self, instring, loc, callPreParse, doActions) 1501 with ParserElement.packrat_cache_lock: 1502 cache = ParserElement.packrat_cache 1503 value = cache.get(lookup) 1504 if value is cache.not_in_cache: 1505 ParserElement.packrat_cache_stats[MISS] += 1 1506 try: 1507 value = self._parseNoCache(instring, loc, doActions, callPreParse) 1508 except ParseBaseException as pe: 1509 # cache a copy of the exception, without the traceback 1510 cache.set(lookup, pe.__class__(*pe.args)) 1511 raise 1512 else: 1513 cache.set(lookup, (value[0], value[1].copy())) 1514 return value 1515 else: 1516 ParserElement.packrat_cache_stats[HIT] += 1 1517 if isinstance(value, Exception): 1518 raise value 1519 return (value[0], value[1].copy())
1520 1521 _parse = _parseNoCache 1522 1523 @staticmethod
1524 - def resetCache():
1525 ParserElement.packrat_cache.clear() 1526 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats)
1527 1528 _packratEnabled = False 1529 @staticmethod
1530 - def enablePackrat(cache_size_limit=128):
1531 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1532 Repeated parse attempts at the same string location (which happens 1533 often in many complex grammars) can immediately return a cached value, 1534 instead of re-executing parsing/validating code. Memoizing is done of 1535 both valid results and parsing exceptions. 1536 1537 Parameters: 1538 - cache_size_limit - (default=C{128}) - if an integer value is provided 1539 will limit the size of the packrat cache; if None is passed, then 1540 the cache size will be unbounded; if 0 is passed, the cache will 1541 be effectively disabled. 1542 1543 This speedup may break existing programs that use parse actions that 1544 have side-effects. For this reason, packrat parsing is disabled when 1545 you first import pyparsing. To activate the packrat feature, your 1546 program must call the class method C{ParserElement.enablePackrat()}. If 1547 your program uses C{psyco} to "compile as you go", you must call 1548 C{enablePackrat} before calling C{psyco.full()}. If you do not do this, 1549 Python will crash. For best results, call C{enablePackrat()} immediately 1550 after importing pyparsing. 1551 1552 Example:: 1553 import pyparsing 1554 pyparsing.ParserElement.enablePackrat() 1555 """ 1556 if not ParserElement._packratEnabled: 1557 ParserElement._packratEnabled = True 1558 if cache_size_limit is None: 1559 ParserElement.packrat_cache = ParserElement._UnboundedCache() 1560 else: 1561 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 1562 ParserElement._parse = ParserElement._parseCache
1563
1564 - def parseString( self, instring, parseAll=False ):
1565 """ 1566 Execute the parse expression with the given string. 1567 This is the main interface to the client code, once the complete 1568 expression has been built. 1569 1570 If you want the grammar to require that the entire input string be 1571 successfully parsed, then set C{parseAll} to True (equivalent to ending 1572 the grammar with C{L{StringEnd()}}). 1573 1574 Note: C{parseString} implicitly calls C{expandtabs()} on the input string, 1575 in order to report proper column numbers in parse actions. 1576 If the input string contains tabs and 1577 the grammar uses parse actions that use the C{loc} argument to index into the 1578 string being parsed, you can ensure you have a consistent view of the input 1579 string by: 1580 - calling C{parseWithTabs} on your grammar before calling C{parseString} 1581 (see L{I{parseWithTabs}<parseWithTabs>}) 1582 - define your parse action using the full C{(s,loc,toks)} signature, and 1583 reference the input string using the parse action's C{s} argument 1584 - explictly expand the tabs in your input string before calling 1585 C{parseString} 1586 1587 Example:: 1588 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 1589 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 1590 """ 1591 ParserElement.resetCache() 1592 if not self.streamlined: 1593 self.streamline() 1594 #~ self.saveAsList = True 1595 for e in self.ignoreExprs: 1596 e.streamline() 1597 if not self.keepTabs: 1598 instring = instring.expandtabs() 1599 try: 1600 loc, tokens = self._parse( instring, 0 ) 1601 if parseAll: 1602 loc = self.preParse( instring, loc ) 1603 se = Empty() + StringEnd() 1604 se._parse( instring, loc ) 1605 except ParseBaseException as exc: 1606 if ParserElement.verbose_stacktrace: 1607 raise 1608 else: 1609 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1610 raise exc 1611 else: 1612 return tokens
1613
1614 - def scanString( self, instring, maxMatches=_MAX_INT, overlap=False ):
1615 """ 1616 Scan the input string for expression matches. Each match will return the 1617 matching tokens, start location, and end location. May be called with optional 1618 C{maxMatches} argument, to clip scanning after 'n' matches are found. If 1619 C{overlap} is specified, then overlapping matches will be reported. 1620 1621 Note that the start and end locations are reported relative to the string 1622 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1623 strings with embedded tabs. 1624 1625 Example:: 1626 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 1627 print(source) 1628 for tokens,start,end in Word(alphas).scanString(source): 1629 print(' '*start + '^'*(end-start)) 1630 print(' '*start + tokens[0]) 1631 1632 prints:: 1633 1634 sldjf123lsdjjkf345sldkjf879lkjsfd987 1635 ^^^^^ 1636 sldjf 1637 ^^^^^^^ 1638 lsdjjkf 1639 ^^^^^^ 1640 sldkjf 1641 ^^^^^^ 1642 lkjsfd 1643 """ 1644 if not self.streamlined: 1645 self.streamline() 1646 for e in self.ignoreExprs: 1647 e.streamline() 1648 1649 if not self.keepTabs: 1650 instring = _ustr(instring).expandtabs() 1651 instrlen = len(instring) 1652 loc = 0 1653 preparseFn = self.preParse 1654 parseFn = self._parse 1655 ParserElement.resetCache() 1656 matches = 0 1657 try: 1658 while loc <= instrlen and matches < maxMatches: 1659 try: 1660 preloc = preparseFn( instring, loc ) 1661 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1662 except ParseException: 1663 loc = preloc+1 1664 else: 1665 if nextLoc > loc: 1666 matches += 1 1667 yield tokens, preloc, nextLoc 1668 if overlap: 1669 nextloc = preparseFn( instring, loc ) 1670 if nextloc > loc: 1671 loc = nextLoc 1672 else: 1673 loc += 1 1674 else: 1675 loc = nextLoc 1676 else: 1677 loc = preloc+1 1678 except ParseBaseException as exc: 1679 if ParserElement.verbose_stacktrace: 1680 raise 1681 else: 1682 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1683 raise exc
1684
1685 - def transformString( self, instring ):
1686 """ 1687 Extension to C{L{scanString}}, to modify matching text with modified tokens that may 1688 be returned from a parse action. To use C{transformString}, define a grammar and 1689 attach a parse action to it that modifies the returned token list. 1690 Invoking C{transformString()} on a target string will then scan for matches, 1691 and replace the matched text patterns according to the logic in the parse 1692 action. C{transformString()} returns the resulting transformed string. 1693 1694 Example:: 1695 wd = Word(alphas) 1696 wd.setParseAction(lambda toks: toks[0].title()) 1697 1698 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 1699 Prints:: 1700 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 1701 """ 1702 out = [] 1703 lastE = 0 1704 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1705 # keep string locs straight between transformString and scanString 1706 self.keepTabs = True 1707 try: 1708 for t,s,e in self.scanString( instring ): 1709 out.append( instring[lastE:s] ) 1710 if t: 1711 if isinstance(t,ParseResults): 1712 out += t.asList() 1713 elif isinstance(t,list): 1714 out += t 1715 else: 1716 out.append(t) 1717 lastE = e 1718 out.append(instring[lastE:]) 1719 out = [o for o in out if o] 1720 return "".join(map(_ustr,_flatten(out))) 1721 except ParseBaseException as exc: 1722 if ParserElement.verbose_stacktrace: 1723 raise 1724 else: 1725 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1726 raise exc
1727
1728 - def searchString( self, instring, maxMatches=_MAX_INT ):
1729 """ 1730 Another extension to C{L{scanString}}, simplifying the access to the tokens found 1731 to match the given parse expression. May be called with optional 1732 C{maxMatches} argument, to clip searching after 'n' matches are found. 1733 1734 Example:: 1735 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 1736 cap_word = Word(alphas.upper(), alphas.lower()) 1737 1738 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 1739 prints:: 1740 ['More', 'Iron', 'Lead', 'Gold', 'I'] 1741 """ 1742 try: 1743 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1744 except ParseBaseException as exc: 1745 if ParserElement.verbose_stacktrace: 1746 raise 1747 else: 1748 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1749 raise exc
1750
1751 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1752 """ 1753 Generator method to split a string using the given expression as a separator. 1754 May be called with optional C{maxsplit} argument, to limit the number of splits; 1755 and the optional C{includeSeparators} argument (default=C{False}), if the separating 1756 matching text should be included in the split results. 1757 1758 Example:: 1759 punc = oneOf(list(".,;:/-!?")) 1760 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 1761 prints:: 1762 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 1763 """ 1764 splits = 0 1765 last = 0 1766 for t,s,e in self.scanString(instring, maxMatches=maxsplit): 1767 yield instring[last:s] 1768 if includeSeparators: 1769 yield t[0] 1770 last = e 1771 yield instring[last:]
1772
1773 - def __add__(self, other ):
1774 """ 1775 Implementation of + operator - returns C{L{And}} 1776 """ 1777 if isinstance( other, basestring ): 1778 other = ParserElement._literalStringClass( other ) 1779 if not isinstance( other, ParserElement ): 1780 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1781 SyntaxWarning, stacklevel=2) 1782 return None 1783 return And( [ self, other ] )
1784
1785 - def __radd__(self, other ):
1786 """ 1787 Implementation of + operator when left operand is not a C{L{ParserElement}} 1788 """ 1789 if isinstance( other, basestring ): 1790 other = ParserElement._literalStringClass( other ) 1791 if not isinstance( other, ParserElement ): 1792 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1793 SyntaxWarning, stacklevel=2) 1794 return None 1795 return other + self
1796
1797 - def __sub__(self, other):
1798 """ 1799 Implementation of - operator, returns C{L{And}} with error stop 1800 """ 1801 if isinstance( other, basestring ): 1802 other = ParserElement._literalStringClass( other ) 1803 if not isinstance( other, ParserElement ): 1804 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1805 SyntaxWarning, stacklevel=2) 1806 return None 1807 return And( [ self, And._ErrorStop(), other ] )
1808
1809 - def __rsub__(self, other ):
1810 """ 1811 Implementation of - operator when left operand is not a C{L{ParserElement}} 1812 """ 1813 if isinstance( other, basestring ): 1814 other = ParserElement._literalStringClass( other ) 1815 if not isinstance( other, ParserElement ): 1816 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1817 SyntaxWarning, stacklevel=2) 1818 return None 1819 return other - self
1820
1821 - def __mul__(self,other):
1822 """ 1823 Implementation of * operator, allows use of C{expr * 3} in place of 1824 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer 1825 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples 1826 may also include C{None} as in: 1827 - C{expr*(n,None)} or C{expr*(n,)} is equivalent 1828 to C{expr*n + L{ZeroOrMore}(expr)} 1829 (read as "at least n instances of C{expr}") 1830 - C{expr*(None,n)} is equivalent to C{expr*(0,n)} 1831 (read as "0 to n instances of C{expr}") 1832 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)} 1833 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)} 1834 1835 Note that C{expr*(None,n)} does not raise an exception if 1836 more than n exprs exist in the input stream; that is, 1837 C{expr*(None,n)} does not enforce a maximum number of expr 1838 occurrences. If this behavior is desired, then write 1839 C{expr*(None,n) + ~expr} 1840 """ 1841 if isinstance(other,int): 1842 minElements, optElements = other,0 1843 elif isinstance(other,tuple): 1844 other = (other + (None, None))[:2] 1845 if other[0] is None: 1846 other = (0, other[1]) 1847 if isinstance(other[0],int) and other[1] is None: 1848 if other[0] == 0: 1849 return ZeroOrMore(self) 1850 if other[0] == 1: 1851 return OneOrMore(self) 1852 else: 1853 return self*other[0] + ZeroOrMore(self) 1854 elif isinstance(other[0],int) and isinstance(other[1],int): 1855 minElements, optElements = other 1856 optElements -= minElements 1857 else: 1858 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1859 else: 1860 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1861 1862 if minElements < 0: 1863 raise ValueError("cannot multiply ParserElement by negative value") 1864 if optElements < 0: 1865 raise ValueError("second tuple value must be greater or equal to first tuple value") 1866 if minElements == optElements == 0: 1867 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1868 1869 if (optElements): 1870 def makeOptionalList(n): 1871 if n>1: 1872 return Optional(self + makeOptionalList(n-1)) 1873 else: 1874 return Optional(self)
1875 if minElements: 1876 if minElements == 1: 1877 ret = self + makeOptionalList(optElements) 1878 else: 1879 ret = And([self]*minElements) + makeOptionalList(optElements) 1880 else: 1881 ret = makeOptionalList(optElements) 1882 else: 1883 if minElements == 1: 1884 ret = self 1885 else: 1886 ret = And([self]*minElements) 1887 return ret 1888
1889 - def __rmul__(self, other):
1890 return self.__mul__(other)
1891
1892 - def __or__(self, other ):
1893 """ 1894 Implementation of | operator - returns C{L{MatchFirst}} 1895 """ 1896 if isinstance( other, basestring ): 1897 other = ParserElement._literalStringClass( other ) 1898 if not isinstance( other, ParserElement ): 1899 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1900 SyntaxWarning, stacklevel=2) 1901 return None 1902 return MatchFirst( [ self, other ] )
1903
1904 - def __ror__(self, other ):
1905 """ 1906 Implementation of | operator when left operand is not a C{L{ParserElement}} 1907 """ 1908 if isinstance( other, basestring ): 1909 other = ParserElement._literalStringClass( other ) 1910 if not isinstance( other, ParserElement ): 1911 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1912 SyntaxWarning, stacklevel=2) 1913 return None 1914 return other | self
1915
1916 - def __xor__(self, other ):
1917 """ 1918 Implementation of ^ operator - returns C{L{Or}} 1919 """ 1920 if isinstance( other, basestring ): 1921 other = ParserElement._literalStringClass( other ) 1922 if not isinstance( other, ParserElement ): 1923 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1924 SyntaxWarning, stacklevel=2) 1925 return None 1926 return Or( [ self, other ] )
1927
1928 - def __rxor__(self, other ):
1929 """ 1930 Implementation of ^ operator when left operand is not a C{L{ParserElement}} 1931 """ 1932 if isinstance( other, basestring ): 1933 other = ParserElement._literalStringClass( other ) 1934 if not isinstance( other, ParserElement ): 1935 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1936 SyntaxWarning, stacklevel=2) 1937 return None 1938 return other ^ self
1939
1940 - def __and__(self, other ):
1941 """ 1942 Implementation of & operator - returns C{L{Each}} 1943 """ 1944 if isinstance( other, basestring ): 1945 other = ParserElement._literalStringClass( other ) 1946 if not isinstance( other, ParserElement ): 1947 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1948 SyntaxWarning, stacklevel=2) 1949 return None 1950 return Each( [ self, other ] )
1951
1952 - def __rand__(self, other ):
1953 """ 1954 Implementation of & operator when left operand is not a C{L{ParserElement}} 1955 """ 1956 if isinstance( other, basestring ): 1957 other = ParserElement._literalStringClass( other ) 1958 if not isinstance( other, ParserElement ): 1959 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1960 SyntaxWarning, stacklevel=2) 1961 return None 1962 return other & self
1963
1964 - def __invert__( self ):
1965 """ 1966 Implementation of ~ operator - returns C{L{NotAny}} 1967 """ 1968 return NotAny( self )
1969
1970 - def __call__(self, name=None):
1971 """ 1972 Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}. 1973 1974 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be 1975 passed as C{True}. 1976 1977 If C{name} is omitted, same as calling C{L{copy}}. 1978 1979 Example:: 1980 # these are equivalent 1981 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1982 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1983 """ 1984 if name is not None: 1985 return self.setResultsName(name) 1986 else: 1987 return self.copy()
1988
1989 - def suppress( self ):
1990 """ 1991 Suppresses the output of this C{ParserElement}; useful to keep punctuation from 1992 cluttering up returned output. 1993 """ 1994 return Suppress( self )
1995
1996 - def leaveWhitespace( self ):
1997 """ 1998 Disables the skipping of whitespace before matching the characters in the 1999 C{ParserElement}'s defined pattern. This is normally only used internally by 2000 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 2001 """ 2002 self.skipWhitespace = False 2003 return self
2004
2005 - def setWhitespaceChars( self, chars ):
2006 """ 2007 Overrides the default whitespace chars 2008 """ 2009 self.skipWhitespace = True 2010 self.whiteChars = chars 2011 self.copyDefaultWhiteChars = False 2012 return self
2013
2014 - def parseWithTabs( self ):
2015 """ 2016 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string. 2017 Must be called before C{parseString} when the input grammar contains elements that 2018 match C{<TAB>} characters. 2019 """ 2020 self.keepTabs = True 2021 return self
2022
2023 - def ignore( self, other ):
2024 """ 2025 Define expression to be ignored (e.g., comments) while doing pattern 2026 matching; may be called repeatedly, to define multiple comment or other 2027 ignorable patterns. 2028 2029 Example:: 2030 patt = OneOrMore(Word(alphas)) 2031 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 2032 2033 patt.ignore(cStyleComment) 2034 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 2035 """ 2036 if isinstance(other, basestring): 2037 other = Suppress(other) 2038 2039 if isinstance( other, Suppress ): 2040 if other not in self.ignoreExprs: 2041 self.ignoreExprs.append(other) 2042 else: 2043 self.ignoreExprs.append( Suppress( other.copy() ) ) 2044 return self
2045
2046 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2047 """ 2048 Enable display of debugging messages while doing pattern matching. 2049 """ 2050 self.debugActions = (startAction or _defaultStartDebugAction, 2051 successAction or _defaultSuccessDebugAction, 2052 exceptionAction or _defaultExceptionDebugAction) 2053 self.debug = True 2054 return self
2055
2056 - def setDebug( self, flag=True ):
2057 """ 2058 Enable display of debugging messages while doing pattern matching. 2059 Set C{flag} to True to enable, False to disable. 2060 """ 2061 if flag: 2062 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 2063 else: 2064 self.debug = False 2065 return self
2066
2067 - def __str__( self ):
2068 return self.name
2069
2070 - def __repr__( self ):
2071 return _ustr(self)
2072
2073 - def streamline( self ):
2074 self.streamlined = True 2075 self.strRepr = None 2076 return self
2077
2078 - def checkRecursion( self, parseElementList ):
2079 pass
2080
2081 - def validate( self, validateTrace=[] ):
2082 """ 2083 Check defined expressions for valid structure, check for infinite recursive definitions. 2084 """ 2085 self.checkRecursion( [] )
2086
2087 - def parseFile( self, file_or_filename, parseAll=False ):
2088 """ 2089 Execute the parse expression on the given file or filename. 2090 If a filename is specified (instead of a file object), 2091 the entire file is opened, read, and closed before parsing. 2092 """ 2093 try: 2094 file_contents = file_or_filename.read() 2095 except AttributeError: 2096 with open(file_or_filename, "r") as f: 2097 file_contents = f.read() 2098 try: 2099 return self.parseString(file_contents, parseAll) 2100 except ParseBaseException as exc: 2101 if ParserElement.verbose_stacktrace: 2102 raise 2103 else: 2104 # catch and re-raise exception from here, clears out pyparsing internal stack trace 2105 raise exc
2106
2107 - def __eq__(self,other):
2108 if isinstance(other, ParserElement): 2109 return self is other or vars(self) == vars(other) 2110 elif isinstance(other, basestring): 2111 return self.matches(other) 2112 else: 2113 return super(ParserElement,self)==other
2114
2115 - def __ne__(self,other):
2116 return not (self == other)
2117
2118 - def __hash__(self):
2119 return hash(id(self))
2120
2121 - def __req__(self,other):
2122 return self == other
2123
2124 - def __rne__(self,other):
2125 return not (self == other)
2126
2127 - def matches(self, testString, parseAll=True):
2128 """ 2129 Method for quick testing of a parser against a test string. Good for simple 2130 inline microtests of sub expressions while building up larger parser.0 2131 2132 Parameters: 2133 - testString - to test against this expression for a match 2134 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2135 2136 Example:: 2137 expr = Word(nums) 2138 assert expr.matches("100") 2139 """ 2140 try: 2141 self.parseString(_ustr(testString), parseAll=parseAll) 2142 return True 2143 except ParseBaseException: 2144 return False
2145
2146 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2147 """ 2148 Execute the parse expression on a series of test strings, showing each 2149 test, the parsed results or where the parse failed. Quick and easy way to 2150 run a parse expression against a list of sample strings. 2151 2152 Parameters: 2153 - tests - a list of separate test strings, or a multiline string of test strings 2154 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests 2155 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test 2156 string; pass None to disable comment filtering 2157 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline; 2158 if False, only dump nested list 2159 - printResults - (default=C{True}) prints test output to stdout 2160 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing 2161 2162 Returns: a (success, results) tuple, where success indicates that all tests succeeded 2163 (or failed if C{failureTests} is True), and the results contain a list of lines of each 2164 test's output 2165 2166 Example:: 2167 number_expr = pyparsing_common.number.copy() 2168 2169 result = number_expr.runTests(''' 2170 # unsigned integer 2171 100 2172 # negative integer 2173 -100 2174 # float with scientific notation 2175 6.02e23 2176 # integer with scientific notation 2177 1e-12 2178 ''') 2179 print("Success" if result[0] else "Failed!") 2180 2181 result = number_expr.runTests(''' 2182 # stray character 2183 100Z 2184 # missing leading digit before '.' 2185 -.100 2186 # too many '.' 2187 3.14.159 2188 ''', failureTests=True) 2189 print("Success" if result[0] else "Failed!") 2190 prints:: 2191 # unsigned integer 2192 100 2193 [100] 2194 2195 # negative integer 2196 -100 2197 [-100] 2198 2199 # float with scientific notation 2200 6.02e23 2201 [6.02e+23] 2202 2203 # integer with scientific notation 2204 1e-12 2205 [1e-12] 2206 2207 Success 2208 2209 # stray character 2210 100Z 2211 ^ 2212 FAIL: Expected end of text (at char 3), (line:1, col:4) 2213 2214 # missing leading digit before '.' 2215 -.100 2216 ^ 2217 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 2218 2219 # too many '.' 2220 3.14.159 2221 ^ 2222 FAIL: Expected end of text (at char 4), (line:1, col:5) 2223 2224 Success 2225 """ 2226 if isinstance(tests, basestring): 2227 tests = list(map(str.strip, tests.rstrip().splitlines())) 2228 if isinstance(comment, basestring): 2229 comment = Literal(comment) 2230 allResults = [] 2231 comments = [] 2232 success = True 2233 for t in tests: 2234 if comment is not None and comment.matches(t, False) or comments and not t: 2235 comments.append(t) 2236 continue 2237 if not t: 2238 continue 2239 out = ['\n'.join(comments), t] 2240 comments = [] 2241 try: 2242 result = self.parseString(t, parseAll=parseAll) 2243 out.append(result.dump(full=fullDump)) 2244 success = success and not failureTests 2245 except ParseBaseException as pe: 2246 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 2247 if '\n' in t: 2248 out.append(line(pe.loc, t)) 2249 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal) 2250 else: 2251 out.append(' '*pe.loc + '^' + fatal) 2252 out.append("FAIL: " + str(pe)) 2253 success = success and failureTests 2254 result = pe 2255 2256 if printResults: 2257 if fullDump: 2258 out.append('') 2259 print('\n'.join(out)) 2260 2261 allResults.append((t, result)) 2262 2263 return success, allResults
2264
2265 2266 -class Token(ParserElement):
2267 """ 2268 Abstract C{ParserElement} subclass, for defining atomic matching patterns. 2269 """
2270 - def __init__( self ):
2271 super(Token,self).__init__( savelist=False )
2272
2273 2274 -class Empty(Token):
2275 """ 2276 An empty token, will always match. 2277 """
2278 - def __init__( self ):
2279 super(Empty,self).__init__() 2280 self.name = "Empty" 2281 self.mayReturnEmpty = True 2282 self.mayIndexError = False
2283
2284 2285 -class NoMatch(Token):
2286 """ 2287 A token that will never match. 2288 """
2289 - def __init__( self ):
2290 super(NoMatch,self).__init__() 2291 self.name = "NoMatch" 2292 self.mayReturnEmpty = True 2293 self.mayIndexError = False 2294 self.errmsg = "Unmatchable token"
2295
2296 - def parseImpl( self, instring, loc, doActions=True ):
2297 raise ParseException(instring, loc, self.errmsg, self)
2298
2299 2300 -class Literal(Token):
2301 """ 2302 Token to exactly match a specified string. 2303 2304 Example:: 2305 Literal('blah').parseString('blah') # -> ['blah'] 2306 Literal('blah').parseString('blahfooblah') # -> ['blah'] 2307 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 2308 2309 For case-insensitive matching, use L{CaselessLiteral}. 2310 2311 For keyword matching (force word break before and after the matched string), 2312 use L{Keyword} or L{CaselessKeyword}. 2313 """
2314 - def __init__( self, matchString ):
2315 super(Literal,self).__init__() 2316 self.match = matchString 2317 self.matchLen = len(matchString) 2318 try: 2319 self.firstMatchChar = matchString[0] 2320 except IndexError: 2321 warnings.warn("null string passed to Literal; use Empty() instead", 2322 SyntaxWarning, stacklevel=2) 2323 self.__class__ = Empty 2324 self.name = '"%s"' % _ustr(self.match) 2325 self.errmsg = "Expected " + self.name 2326 self.mayReturnEmpty = False 2327 self.mayIndexError = False
2328 2329 # Performance tuning: this routine gets called a *lot* 2330 # if this is a single character match string and the first character matches, 2331 # short-circuit as quickly as possible, and avoid calling startswith 2332 #~ @profile
2333 - def parseImpl( self, instring, loc, doActions=True ):
2334 if (instring[loc] == self.firstMatchChar and 2335 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 2336 return loc+self.matchLen, self.match 2337 raise ParseException(instring, loc, self.errmsg, self)
2338 _L = Literal 2339 ParserElement._literalStringClass = Literal
2340 2341 -class Keyword(Token):
2342 """ 2343 Token to exactly match a specified string as a keyword, that is, it must be 2344 immediately followed by a non-keyword character. Compare with C{L{Literal}}: 2345 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}. 2346 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'} 2347 Accepts two optional constructor arguments in addition to the keyword string: 2348 - C{identChars} is a string of characters that would be valid identifier characters, 2349 defaulting to all alphanumerics + "_" and "$" 2350 - C{caseless} allows case-insensitive matching, default is C{False}. 2351 2352 Example:: 2353 Keyword("start").parseString("start") # -> ['start'] 2354 Keyword("start").parseString("starting") # -> Exception 2355 2356 For case-insensitive matching, use L{CaselessKeyword}. 2357 """ 2358 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 2359
2360 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
2361 super(Keyword,self).__init__() 2362 self.match = matchString 2363 self.matchLen = len(matchString) 2364 try: 2365 self.firstMatchChar = matchString[0] 2366 except IndexError: 2367 warnings.warn("null string passed to Keyword; use Empty() instead", 2368 SyntaxWarning, stacklevel=2) 2369 self.name = '"%s"' % self.match 2370 self.errmsg = "Expected " + self.name 2371 self.mayReturnEmpty = False 2372 self.mayIndexError = False 2373 self.caseless = caseless 2374 if caseless: 2375 self.caselessmatch = matchString.upper() 2376 identChars = identChars.upper() 2377 self.identChars = set(identChars)
2378
2379 - def parseImpl( self, instring, loc, doActions=True ):
2380 if self.caseless: 2381 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2382 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 2383 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 2384 return loc+self.matchLen, self.match 2385 else: 2386 if (instring[loc] == self.firstMatchChar and 2387 (self.matchLen==1 or instring.startswith(self.match,loc)) and 2388 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 2389 (loc == 0 or instring[loc-1] not in self.identChars) ): 2390 return loc+self.matchLen, self.match 2391 raise ParseException(instring, loc, self.errmsg, self)
2392
2393 - def copy(self):
2394 c = super(Keyword,self).copy() 2395 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 2396 return c
2397 2398 @staticmethod
2399 - def setDefaultKeywordChars( chars ):
2400 """Overrides the default Keyword chars 2401 """ 2402 Keyword.DEFAULT_KEYWORD_CHARS = chars
2403
2404 -class CaselessLiteral(Literal):
2405 """ 2406 Token to match a specified string, ignoring case of letters. 2407 Note: the matched results will always be in the case of the given 2408 match string, NOT the case of the input text. 2409 2410 Example:: 2411 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 2412 2413 (Contrast with example for L{CaselessKeyword}.) 2414 """
2415 - def __init__( self, matchString ):
2416 super(CaselessLiteral,self).__init__( matchString.upper() ) 2417 # Preserve the defining literal. 2418 self.returnString = matchString 2419 self.name = "'%s'" % self.returnString 2420 self.errmsg = "Expected " + self.name
2421
2422 - def parseImpl( self, instring, loc, doActions=True ):
2423 if instring[ loc:loc+self.matchLen ].upper() == self.match: 2424 return loc+self.matchLen, self.returnString 2425 raise ParseException(instring, loc, self.errmsg, self)
2426
2427 -class CaselessKeyword(Keyword):
2428 """ 2429 Caseless version of L{Keyword}. 2430 2431 Example:: 2432 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 2433 2434 (Contrast with example for L{CaselessLiteral}.) 2435 """
2436 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
2437 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
2438
2439 - def parseImpl( self, instring, loc, doActions=True ):
2440 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 2441 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 2442 return loc+self.matchLen, self.match 2443 raise ParseException(instring, loc, self.errmsg, self)
2444
2445 -class Word(Token):
2446 """ 2447 Token for matching words composed of allowed character sets. 2448 Defined with string containing all allowed initial characters, 2449 an optional string containing allowed body characters (if omitted, 2450 defaults to the initial character set), and an optional minimum, 2451 maximum, and/or exact length. The default value for C{min} is 1 (a 2452 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2453 are 0, meaning no maximum or exact length restriction. An optional 2454 C{excludeChars} parameter can list characters that might be found in 2455 the input C{bodyChars} string; useful to define a word of all printables 2456 except for one or two characters, for instance. 2457 2458 L{srange} is useful for defining custom character set strings for defining 2459 C{Word} expressions, using range notation from regular expression character sets. 2460 2461 A common mistake is to use C{Word} to match a specific literal string, as in 2462 C{Word("Address")}. Remember that C{Word} uses the string argument to define 2463 I{sets} of matchable characters. This expression would match "Add", "AAA", 2464 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'. 2465 To match an exact literal string, use L{Literal} or L{Keyword}. 2466 2467 pyparsing includes helper strings for building Words: 2468 - L{alphas} 2469 - L{nums} 2470 - L{alphanums} 2471 - L{hexnums} 2472 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.) 2473 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.) 2474 - L{printables} (any non-whitespace character) 2475 2476 Example:: 2477 # a word composed of digits 2478 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 2479 2480 # a word with a leading capital, and zero or more lowercase 2481 capital_word = Word(alphas.upper(), alphas.lower()) 2482 2483 # hostnames are alphanumeric, with leading alpha, and '-' 2484 hostname = Word(alphas, alphanums+'-') 2485 2486 # roman numeral (not a strict parser, accepts invalid mix of characters) 2487 roman = Word("IVXLCDM") 2488 2489 # any string of non-whitespace characters, except for ',' 2490 csv_value = Word(printables, excludeChars=",") 2491 """
2492 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2493 super(Word,self).__init__() 2494 if excludeChars: 2495 initChars = ''.join(c for c in initChars if c not in excludeChars) 2496 if bodyChars: 2497 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 2498 self.initCharsOrig = initChars 2499 self.initChars = set(initChars) 2500 if bodyChars : 2501 self.bodyCharsOrig = bodyChars 2502 self.bodyChars = set(bodyChars) 2503 else: 2504 self.bodyCharsOrig = initChars 2505 self.bodyChars = set(initChars) 2506 2507 self.maxSpecified = max > 0 2508 2509 if min < 1: 2510 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 2511 2512 self.minLen = min 2513 2514 if max > 0: 2515 self.maxLen = max 2516 else: 2517 self.maxLen = _MAX_INT 2518 2519 if exact > 0: 2520 self.maxLen = exact 2521 self.minLen = exact 2522 2523 self.name = _ustr(self) 2524 self.errmsg = "Expected " + self.name 2525 self.mayIndexError = False 2526 self.asKeyword = asKeyword 2527 2528 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 2529 if self.bodyCharsOrig == self.initCharsOrig: 2530 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 2531 elif len(self.initCharsOrig) == 1: 2532 self.reString = "%s[%s]*" % \ 2533 (re.escape(self.initCharsOrig), 2534 _escapeRegexRangeChars(self.bodyCharsOrig),) 2535 else: 2536 self.reString = "[%s][%s]*" % \ 2537 (_escapeRegexRangeChars(self.initCharsOrig), 2538 _escapeRegexRangeChars(self.bodyCharsOrig),) 2539 if self.asKeyword: 2540 self.reString = r"\b"+self.reString+r"\b" 2541 try: 2542 self.re = re.compile( self.reString ) 2543 except: 2544 self.re = None
2545
2546 - def parseImpl( self, instring, loc, doActions=True ):
2547 if self.re: 2548 result = self.re.match(instring,loc) 2549 if not result: 2550 raise ParseException(instring, loc, self.errmsg, self) 2551 2552 loc = result.end() 2553 return loc, result.group() 2554 2555 if not(instring[ loc ] in self.initChars): 2556 raise ParseException(instring, loc, self.errmsg, self) 2557 2558 start = loc 2559 loc += 1 2560 instrlen = len(instring) 2561 bodychars = self.bodyChars 2562 maxloc = start + self.maxLen 2563 maxloc = min( maxloc, instrlen ) 2564 while loc < maxloc and instring[loc] in bodychars: 2565 loc += 1 2566 2567 throwException = False 2568 if loc - start < self.minLen: 2569 throwException = True 2570 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 2571 throwException = True 2572 if self.asKeyword: 2573 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 2574 throwException = True 2575 2576 if throwException: 2577 raise ParseException(instring, loc, self.errmsg, self) 2578 2579 return loc, instring[start:loc]
2580
2581 - def __str__( self ):
2582 try: 2583 return super(Word,self).__str__() 2584 except: 2585 pass 2586 2587 2588 if self.strRepr is None: 2589 2590 def charsAsStr(s): 2591 if len(s)>4: 2592 return s[:4]+"..." 2593 else: 2594 return s
2595 2596 if ( self.initCharsOrig != self.bodyCharsOrig ): 2597 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 2598 else: 2599 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 2600 2601 return self.strRepr
2602
2603 2604 -class Regex(Token):
2605 """ 2606 Token for matching strings that match a given regular expression. 2607 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 2608 2609 Example:: 2610 realnum = Regex(r"[+-]?\d+\.\d*") 2611 ssn = Regex(r"\d\d\d-\d\d-\d\d\d\d") 2612 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 2613 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 2614 """ 2615 compiledREtype = type(re.compile("[A-Z]"))
2616 - def __init__( self, pattern, flags=0):
2617 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags.""" 2618 super(Regex,self).__init__() 2619 2620 if isinstance(pattern, basestring): 2621 if not pattern: 2622 warnings.warn("null string passed to Regex; use Empty() instead", 2623 SyntaxWarning, stacklevel=2) 2624 2625 self.pattern = pattern 2626 self.flags = flags 2627 2628 try: 2629 self.re = re.compile(self.pattern, self.flags) 2630 self.reString = self.pattern 2631 except sre_constants.error: 2632 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 2633 SyntaxWarning, stacklevel=2) 2634 raise 2635 2636 elif isinstance(pattern, Regex.compiledREtype): 2637 self.re = pattern 2638 self.pattern = \ 2639 self.reString = str(pattern) 2640 self.flags = flags 2641 2642 else: 2643 raise ValueError("Regex may only be constructed with a string or a compiled RE object") 2644 2645 self.name = _ustr(self) 2646 self.errmsg = "Expected " + self.name 2647 self.mayIndexError = False 2648 self.mayReturnEmpty = True
2649
2650 - def parseImpl( self, instring, loc, doActions=True ):
2651 result = self.re.match(instring,loc) 2652 if not result: 2653 raise ParseException(instring, loc, self.errmsg, self) 2654 2655 loc = result.end() 2656 d = result.groupdict() 2657 ret = ParseResults(result.group()) 2658 if d: 2659 for k in d: 2660 ret[k] = d[k] 2661 return loc,ret
2662
2663 - def __str__( self ):
2664 try: 2665 return super(Regex,self).__str__() 2666 except: 2667 pass 2668 2669 if self.strRepr is None: 2670 self.strRepr = "Re:(%s)" % repr(self.pattern) 2671 2672 return self.strRepr
2673
2674 2675 -class QuotedString(Token):
2676 r""" 2677 Token for matching strings that are delimited by quoting characters. 2678 2679 Defined with the following parameters: 2680 - quoteChar - string of one or more characters defining the quote delimiting string 2681 - escChar - character to escape quotes, typically backslash (default=C{None}) 2682 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None}) 2683 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False}) 2684 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True}) 2685 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar) 2686 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True}) 2687 2688 Example:: 2689 qs = QuotedString('"') 2690 print(qs.searchString('lsjdf "This is the quote" sldjf')) 2691 complex_qs = QuotedString('{{', endQuoteChar='}}') 2692 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 2693 sql_qs = QuotedString('"', escQuote='""') 2694 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 2695 prints:: 2696 [['This is the quote']] 2697 [['This is the "quote"']] 2698 [['This is the quote with "embedded" quotes']] 2699 """
2700 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2701 super(QuotedString,self).__init__() 2702 2703 # remove white space from quote chars - wont work anyway 2704 quoteChar = quoteChar.strip() 2705 if not quoteChar: 2706 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2707 raise SyntaxError() 2708 2709 if endQuoteChar is None: 2710 endQuoteChar = quoteChar 2711 else: 2712 endQuoteChar = endQuoteChar.strip() 2713 if not endQuoteChar: 2714 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 2715 raise SyntaxError() 2716 2717 self.quoteChar = quoteChar 2718 self.quoteCharLen = len(quoteChar) 2719 self.firstQuoteChar = quoteChar[0] 2720 self.endQuoteChar = endQuoteChar 2721 self.endQuoteCharLen = len(endQuoteChar) 2722 self.escChar = escChar 2723 self.escQuote = escQuote 2724 self.unquoteResults = unquoteResults 2725 self.convertWhitespaceEscapes = convertWhitespaceEscapes 2726 2727 if multiline: 2728 self.flags = re.MULTILINE | re.DOTALL 2729 self.pattern = r'%s(?:[^%s%s]' % \ 2730 ( re.escape(self.quoteChar), 2731 _escapeRegexRangeChars(self.endQuoteChar[0]), 2732 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2733 else: 2734 self.flags = 0 2735 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 2736 ( re.escape(self.quoteChar), 2737 _escapeRegexRangeChars(self.endQuoteChar[0]), 2738 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 2739 if len(self.endQuoteChar) > 1: 2740 self.pattern += ( 2741 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 2742 _escapeRegexRangeChars(self.endQuoteChar[i])) 2743 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')' 2744 ) 2745 if escQuote: 2746 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 2747 if escChar: 2748 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 2749 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 2750 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 2751 2752 try: 2753 self.re = re.compile(self.pattern, self.flags) 2754 self.reString = self.pattern 2755 except sre_constants.error: 2756 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 2757 SyntaxWarning, stacklevel=2) 2758 raise 2759 2760 self.name = _ustr(self) 2761 self.errmsg = "Expected " + self.name 2762 self.mayIndexError = False 2763 self.mayReturnEmpty = True
2764
2765 - def parseImpl( self, instring, loc, doActions=True ):
2766 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 2767 if not result: 2768 raise ParseException(instring, loc, self.errmsg, self) 2769 2770 loc = result.end() 2771 ret = result.group() 2772 2773 if self.unquoteResults: 2774 2775 # strip off quotes 2776 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 2777 2778 if isinstance(ret,basestring): 2779 # replace escaped whitespace 2780 if '\\' in ret and self.convertWhitespaceEscapes: 2781 ws_map = { 2782 r'\t' : '\t', 2783 r'\n' : '\n', 2784 r'\f' : '\f', 2785 r'\r' : '\r', 2786 } 2787 for wslit,wschar in ws_map.items(): 2788 ret = ret.replace(wslit, wschar) 2789 2790 # replace escaped characters 2791 if self.escChar: 2792 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 2793 2794 # replace escaped quotes 2795 if self.escQuote: 2796 ret = ret.replace(self.escQuote, self.endQuoteChar) 2797 2798 return loc, ret
2799
2800 - def __str__( self ):
2801 try: 2802 return super(QuotedString,self).__str__() 2803 except: 2804 pass 2805 2806 if self.strRepr is None: 2807 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 2808 2809 return self.strRepr
2810
2811 2812 -class CharsNotIn(Token):
2813 """ 2814 Token for matching words composed of characters *not* in a given set (will 2815 include whitespace in matched characters if not listed in the provided exclusion set - see example). 2816 Defined with string containing all disallowed characters, and an optional 2817 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a 2818 minimum value < 1 is not valid); the default values for C{max} and C{exact} 2819 are 0, meaning no maximum or exact length restriction. 2820 2821 Example:: 2822 # define a comma-separated-value as anything that is not a ',' 2823 csv_value = CharsNotIn(',') 2824 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 2825 prints:: 2826 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 2827 """
2828 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2829 super(CharsNotIn,self).__init__() 2830 self.skipWhitespace = False 2831 self.notChars = notChars 2832 2833 if min < 1: 2834 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 2835 2836 self.minLen = min 2837 2838 if max > 0: 2839 self.maxLen = max 2840 else: 2841 self.maxLen = _MAX_INT 2842 2843 if exact > 0: 2844 self.maxLen = exact 2845 self.minLen = exact 2846 2847 self.name = _ustr(self) 2848 self.errmsg = "Expected " + self.name 2849 self.mayReturnEmpty = ( self.minLen == 0 ) 2850 self.mayIndexError = False
2851
2852 - def parseImpl( self, instring, loc, doActions=True ):
2853 if instring[loc] in self.notChars: 2854 raise ParseException(instring, loc, self.errmsg, self) 2855 2856 start = loc 2857 loc += 1 2858 notchars = self.notChars 2859 maxlen = min( start+self.maxLen, len(instring) ) 2860 while loc < maxlen and \ 2861 (instring[loc] not in notchars): 2862 loc += 1 2863 2864 if loc - start < self.minLen: 2865 raise ParseException(instring, loc, self.errmsg, self) 2866 2867 return loc, instring[start:loc]
2868
2869 - def __str__( self ):
2870 try: 2871 return super(CharsNotIn, self).__str__() 2872 except: 2873 pass 2874 2875 if self.strRepr is None: 2876 if len(self.notChars) > 4: 2877 self.strRepr = "!W:(%s...)" % self.notChars[:4] 2878 else: 2879 self.strRepr = "!W:(%s)" % self.notChars 2880 2881 return self.strRepr
2882
2883 -class White(Token):
2884 """ 2885 Special matching class for matching whitespace. Normally, whitespace is ignored 2886 by pyparsing grammars. This class is included when some whitespace structures 2887 are significant. Define with a string containing the whitespace characters to be 2888 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments, 2889 as defined for the C{L{Word}} class. 2890 """ 2891 whiteStrs = { 2892 " " : "<SPC>", 2893 "\t": "<TAB>", 2894 "\n": "<LF>", 2895 "\r": "<CR>", 2896 "\f": "<FF>", 2897 }
2898 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2899 super(White,self).__init__() 2900 self.matchWhite = ws 2901 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) ) 2902 #~ self.leaveWhitespace() 2903 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 2904 self.mayReturnEmpty = True 2905 self.errmsg = "Expected " + self.name 2906 2907 self.minLen = min 2908 2909 if max > 0: 2910 self.maxLen = max 2911 else: 2912 self.maxLen = _MAX_INT 2913 2914 if exact > 0: 2915 self.maxLen = exact 2916 self.minLen = exact
2917
2918 - def parseImpl( self, instring, loc, doActions=True ):
2919 if not(instring[ loc ] in self.matchWhite): 2920 raise ParseException(instring, loc, self.errmsg, self) 2921 start = loc 2922 loc += 1 2923 maxloc = start + self.maxLen 2924 maxloc = min( maxloc, len(instring) ) 2925 while loc < maxloc and instring[loc] in self.matchWhite: 2926 loc += 1 2927 2928 if loc - start < self.minLen: 2929 raise ParseException(instring, loc, self.errmsg, self) 2930 2931 return loc, instring[start:loc]
2932
2933 2934 -class _PositionToken(Token):
2935 - def __init__( self ):
2936 super(_PositionToken,self).__init__() 2937 self.name=self.__class__.__name__ 2938 self.mayReturnEmpty = True 2939 self.mayIndexError = False
2940
2941 -class GoToColumn(_PositionToken):
2942 """ 2943 Token to advance to a specific column of input text; useful for tabular report scraping. 2944 """
2945 - def __init__( self, colno ):
2946 super(GoToColumn,self).__init__() 2947 self.col = colno
2948
2949 - def preParse( self, instring, loc ):
2950 if col(loc,instring) != self.col: 2951 instrlen = len(instring) 2952 if self.ignoreExprs: 2953 loc = self._skipIgnorables( instring, loc ) 2954 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2955 loc += 1 2956 return loc
2957
2958 - def parseImpl( self, instring, loc, doActions=True ):
2959 thiscol = col( loc, instring ) 2960 if thiscol > self.col: 2961 raise ParseException( instring, loc, "Text not in expected column", self ) 2962 newloc = loc + self.col - thiscol 2963 ret = instring[ loc: newloc ] 2964 return newloc, ret
2965
2966 -class LineStart(_PositionToken):
2967 """ 2968 Matches if current position is at the beginning of a line within the parse string 2969 """
2970 - def __init__( self ):
2971 super(LineStart,self).__init__() 2972 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2973 self.errmsg = "Expected start of line"
2974
2975 - def preParse( self, instring, loc ):
2976 preloc = super(LineStart,self).preParse(instring,loc) 2977 if instring[preloc] == "\n": 2978 loc += 1 2979 return loc
2980
2981 - def parseImpl( self, instring, loc, doActions=True ):
2982 if not( loc==0 or 2983 (loc == self.preParse( instring, 0 )) or 2984 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2985 raise ParseException(instring, loc, self.errmsg, self) 2986 return loc, []
2987
2988 -class LineEnd(_PositionToken):
2989 """ 2990 Matches if current position is at the end of a line within the parse string 2991 """
2992 - def __init__( self ):
2993 super(LineEnd,self).__init__() 2994 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2995 self.errmsg = "Expected end of line"
2996
2997 - def parseImpl( self, instring, loc, doActions=True ):
2998 if loc<len(instring): 2999 if instring[loc] == "\n": 3000 return loc+1, "\n" 3001 else: 3002 raise ParseException(instring, loc, self.errmsg, self) 3003 elif loc == len(instring): 3004 return loc+1, [] 3005 else: 3006 raise ParseException(instring, loc, self.errmsg, self)
3007
3008 -class StringStart(_PositionToken):
3009 """ 3010 Matches if current position is at the beginning of the parse string 3011 """
3012 - def __init__( self ):
3013 super(StringStart,self).__init__() 3014 self.errmsg = "Expected start of text"
3015
3016 - def parseImpl( self, instring, loc, doActions=True ):
3017 if loc != 0: 3018 # see if entire string up to here is just whitespace and ignoreables 3019 if loc != self.preParse( instring, 0 ): 3020 raise ParseException(instring, loc, self.errmsg, self) 3021 return loc, []
3022
3023 -class StringEnd(_PositionToken):
3024 """ 3025 Matches if current position is at the end of the parse string 3026 """
3027 - def __init__( self ):
3028 super(StringEnd,self).__init__() 3029 self.errmsg = "Expected end of text"
3030
3031 - def parseImpl( self, instring, loc, doActions=True ):
3032 if loc < len(instring): 3033 raise ParseException(instring, loc, self.errmsg, self) 3034 elif loc == len(instring): 3035 return loc+1, [] 3036 elif loc > len(instring): 3037 return loc, [] 3038 else: 3039 raise ParseException(instring, loc, self.errmsg, self)
3040
3041 -class WordStart(_PositionToken):
3042 """ 3043 Matches if the current position is at the beginning of a Word, and 3044 is not preceded by any character in a given set of C{wordChars} 3045 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3046 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of 3047 the string being parsed, or at the beginning of a line. 3048 """
3049 - def __init__(self, wordChars = printables):
3050 super(WordStart,self).__init__() 3051 self.wordChars = set(wordChars) 3052 self.errmsg = "Not at the start of a word"
3053
3054 - def parseImpl(self, instring, loc, doActions=True ):
3055 if loc != 0: 3056 if (instring[loc-1] in self.wordChars or 3057 instring[loc] not in self.wordChars): 3058 raise ParseException(instring, loc, self.errmsg, self) 3059 return loc, []
3060
3061 -class WordEnd(_PositionToken):
3062 """ 3063 Matches if the current position is at the end of a Word, and 3064 is not followed by any character in a given set of C{wordChars} 3065 (default=C{printables}). To emulate the C{\b} behavior of regular expressions, 3066 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of 3067 the string being parsed, or at the end of a line. 3068 """
3069 - def __init__(self, wordChars = printables):
3070 super(WordEnd,self).__init__() 3071 self.wordChars = set(wordChars) 3072 self.skipWhitespace = False 3073 self.errmsg = "Not at the end of a word"
3074
3075 - def parseImpl(self, instring, loc, doActions=True ):
3076 instrlen = len(instring) 3077 if instrlen>0 and loc<instrlen: 3078 if (instring[loc] in self.wordChars or 3079 instring[loc-1] not in self.wordChars): 3080 raise ParseException(instring, loc, self.errmsg, self) 3081 return loc, []
3082
3083 3084 -class ParseExpression(ParserElement):
3085 """ 3086 Abstract subclass of ParserElement, for combining and post-processing parsed tokens. 3087 """
3088 - def __init__( self, exprs, savelist = False ):
3089 super(ParseExpression,self).__init__(savelist) 3090 if isinstance( exprs, _generatorType ): 3091 exprs = list(exprs) 3092 3093 if isinstance( exprs, basestring ): 3094 self.exprs = [ ParserElement._literalStringClass( exprs ) ] 3095 elif isinstance( exprs, collections.Iterable ): 3096 exprs = list(exprs) 3097 # if sequence of strings provided, wrap with Literal 3098 if all(isinstance(expr, basestring) for expr in exprs): 3099 exprs = map(ParserElement._literalStringClass, exprs) 3100 self.exprs = list(exprs) 3101 else: 3102 try: 3103 self.exprs = list( exprs ) 3104 except TypeError: 3105 self.exprs = [ exprs ] 3106 self.callPreparse = False
3107
3108 - def __getitem__( self, i ):
3109 return self.exprs[i]
3110
3111 - def append( self, other ):
3112 self.exprs.append( other ) 3113 self.strRepr = None 3114 return self
3115
3116 - def leaveWhitespace( self ):
3117 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on 3118 all contained expressions.""" 3119 self.skipWhitespace = False 3120 self.exprs = [ e.copy() for e in self.exprs ] 3121 for e in self.exprs: 3122 e.leaveWhitespace() 3123 return self
3124
3125 - def ignore( self, other ):
3126 if isinstance( other, Suppress ): 3127 if other not in self.ignoreExprs: 3128 super( ParseExpression, self).ignore( other ) 3129 for e in self.exprs: 3130 e.ignore( self.ignoreExprs[-1] ) 3131 else: 3132 super( ParseExpression, self).ignore( other ) 3133 for e in self.exprs: 3134 e.ignore( self.ignoreExprs[-1] ) 3135 return self
3136
3137 - def __str__( self ):
3138 try: 3139 return super(ParseExpression,self).__str__() 3140 except: 3141 pass 3142 3143 if self.strRepr is None: 3144 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 3145 return self.strRepr
3146
3147 - def streamline( self ):
3148 super(ParseExpression,self).streamline() 3149 3150 for e in self.exprs: 3151 e.streamline() 3152 3153 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 3154 # but only if there are no parse actions or resultsNames on the nested And's 3155 # (likewise for Or's and MatchFirst's) 3156 if ( len(self.exprs) == 2 ): 3157 other = self.exprs[0] 3158 if ( isinstance( other, self.__class__ ) and 3159 not(other.parseAction) and 3160 other.resultsName is None and 3161 not other.debug ): 3162 self.exprs = other.exprs[:] + [ self.exprs[1] ] 3163 self.strRepr = None 3164 self.mayReturnEmpty |= other.mayReturnEmpty 3165 self.mayIndexError |= other.mayIndexError 3166 3167 other = self.exprs[-1] 3168 if ( isinstance( other, self.__class__ ) and 3169 not(other.parseAction) and 3170 other.resultsName is None and 3171 not other.debug ): 3172 self.exprs = self.exprs[:-1] + other.exprs[:] 3173 self.strRepr = None 3174 self.mayReturnEmpty |= other.mayReturnEmpty 3175 self.mayIndexError |= other.mayIndexError 3176 3177 self.errmsg = "Expected " + _ustr(self) 3178 3179 return self
3180
3181 - def setResultsName( self, name, listAllMatches=False ):
3182 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 3183 return ret
3184
3185 - def validate( self, validateTrace=[] ):
3186 tmp = validateTrace[:]+[self] 3187 for e in self.exprs: 3188 e.validate(tmp) 3189 self.checkRecursion( [] )
3190
3191 - def copy(self):
3192 ret = super(ParseExpression,self).copy() 3193 ret.exprs = [e.copy() for e in self.exprs] 3194 return ret
3195
3196 -class And(ParseExpression):
3197 """ 3198 Requires all given C{ParseExpression}s to be found in the given order. 3199 Expressions may be separated by whitespace. 3200 May be constructed using the C{'+'} operator. 3201 May also be constructed using the C{'-'} operator, which will suppress backtracking. 3202 3203 Example:: 3204 integer = Word(nums) 3205 name_expr = OneOrMore(Word(alphas)) 3206 3207 expr = And([integer("id"),name_expr("name"),integer("age")]) 3208 # more easily written as: 3209 expr = integer("id") + name_expr("name") + integer("age") 3210 """ 3211
3212 - class _ErrorStop(Empty):
3213 - def __init__(self, *args, **kwargs):
3214 super(And._ErrorStop,self).__init__(*args, **kwargs) 3215 self.name = '-' 3216 self.leaveWhitespace()
3217
3218 - def __init__( self, exprs, savelist = True ):
3219 super(And,self).__init__(exprs, savelist) 3220 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3221 self.setWhitespaceChars( self.exprs[0].whiteChars ) 3222 self.skipWhitespace = self.exprs[0].skipWhitespace 3223 self.callPreparse = True
3224
3225 - def parseImpl( self, instring, loc, doActions=True ):
3226 # pass False as last arg to _parse for first element, since we already 3227 # pre-parsed the string as part of our And pre-parsing 3228 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 3229 errorStop = False 3230 for e in self.exprs[1:]: 3231 if isinstance(e, And._ErrorStop): 3232 errorStop = True 3233 continue 3234 if errorStop: 3235 try: 3236 loc, exprtokens = e._parse( instring, loc, doActions ) 3237 except ParseSyntaxException: 3238 raise 3239 except ParseBaseException as pe: 3240 pe.__traceback__ = None 3241 raise ParseSyntaxException._from_exception(pe) 3242 except IndexError: 3243 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 3244 else: 3245 loc, exprtokens = e._parse( instring, loc, doActions ) 3246 if exprtokens or exprtokens.haskeys(): 3247 resultlist += exprtokens 3248 return loc, resultlist
3249
3250 - def __iadd__(self, other ):
3251 if isinstance( other, basestring ): 3252 other = ParserElement._literalStringClass( other ) 3253 return self.append( other ) #And( [ self, other ] )
3254
3255 - def checkRecursion( self, parseElementList ):
3256 subRecCheckList = parseElementList[:] + [ self ] 3257 for e in self.exprs: 3258 e.checkRecursion( subRecCheckList ) 3259 if not e.mayReturnEmpty: 3260 break
3261
3262 - def __str__( self ):
3263 if hasattr(self,"name"): 3264 return self.name 3265 3266 if self.strRepr is None: 3267 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 3268 3269 return self.strRepr
3270
3271 3272 -class Or(ParseExpression):
3273 """ 3274 Requires that at least one C{ParseExpression} is found. 3275 If two expressions match, the expression that matches the longest string will be used. 3276 May be constructed using the C{'^'} operator. 3277 3278 Example:: 3279 # construct Or using '^' operator 3280 3281 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 3282 print(number.searchString("123 3.1416 789")) 3283 prints:: 3284 [['123'], ['3.1416'], ['789']] 3285 """
3286 - def __init__( self, exprs, savelist = False ):
3287 super(Or,self).__init__(exprs, savelist) 3288 if self.exprs: 3289 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3290 else: 3291 self.mayReturnEmpty = True
3292
3293 - def parseImpl( self, instring, loc, doActions=True ):
3294 maxExcLoc = -1 3295 maxException = None 3296 matches = [] 3297 for e in self.exprs: 3298 try: 3299 loc2 = e.tryParse( instring, loc ) 3300 except ParseException as err: 3301 err.__traceback__ = None 3302 if err.loc > maxExcLoc: 3303 maxException = err 3304 maxExcLoc = err.loc 3305 except IndexError: 3306 if len(instring) > maxExcLoc: 3307 maxException = ParseException(instring,len(instring),e.errmsg,self) 3308 maxExcLoc = len(instring) 3309 else: 3310 # save match among all matches, to retry longest to shortest 3311 matches.append((loc2, e)) 3312 3313 if matches: 3314 matches.sort(key=lambda x: -x[0]) 3315 for _,e in matches: 3316 try: 3317 return e._parse( instring, loc, doActions ) 3318 except ParseException as err: 3319 err.__traceback__ = None 3320 if err.loc > maxExcLoc: 3321 maxException = err 3322 maxExcLoc = err.loc 3323 3324 if maxException is not None: 3325 maxException.msg = self.errmsg 3326 raise maxException 3327 else: 3328 raise ParseException(instring, loc, "no defined alternatives to match", self)
3329 3330
3331 - def __ixor__(self, other ):
3332 if isinstance( other, basestring ): 3333 other = ParserElement._literalStringClass( other ) 3334 return self.append( other ) #Or( [ self, other ] )
3335
3336 - def __str__( self ):
3337 if hasattr(self,"name"): 3338 return self.name 3339 3340 if self.strRepr is None: 3341 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 3342 3343 return self.strRepr
3344
3345 - def checkRecursion( self, parseElementList ):
3346 subRecCheckList = parseElementList[:] + [ self ] 3347 for e in self.exprs: 3348 e.checkRecursion( subRecCheckList )
3349
3350 3351 -class MatchFirst(ParseExpression):
3352 """ 3353 Requires that at least one C{ParseExpression} is found. 3354 If two expressions match, the first one listed is the one that will match. 3355 May be constructed using the C{'|'} operator. 3356 3357 Example:: 3358 # construct MatchFirst using '|' operator 3359 3360 # watch the order of expressions to match 3361 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 3362 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 3363 3364 # put more selective expression first 3365 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 3366 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 3367 """
3368 - def __init__( self, exprs, savelist = False ):
3369 super(MatchFirst,self).__init__(exprs, savelist) 3370 if self.exprs: 3371 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 3372 else: 3373 self.mayReturnEmpty = True
3374
3375 - def parseImpl( self, instring, loc, doActions=True ):
3376 maxExcLoc = -1 3377 maxException = None 3378 for e in self.exprs: 3379 try: 3380 ret = e._parse( instring, loc, doActions ) 3381 return ret 3382 except ParseException as err: 3383 if err.loc > maxExcLoc: 3384 maxException = err 3385 maxExcLoc = err.loc 3386 except IndexError: 3387 if len(instring) > maxExcLoc: 3388 maxException = ParseException(instring,len(instring),e.errmsg,self) 3389 maxExcLoc = len(instring) 3390 3391 # only got here if no expression matched, raise exception for match that made it the furthest 3392 else: 3393 if maxException is not None: 3394 maxException.msg = self.errmsg 3395 raise maxException 3396 else: 3397 raise ParseException(instring, loc, "no defined alternatives to match", self)
3398
3399 - def __ior__(self, other ):
3400 if isinstance( other, basestring ): 3401 other = ParserElement._literalStringClass( other ) 3402 return self.append( other ) #MatchFirst( [ self, other ] )
3403
3404 - def __str__( self ):
3405 if hasattr(self,"name"): 3406 return self.name 3407 3408 if self.strRepr is None: 3409 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 3410 3411 return self.strRepr
3412
3413 - def checkRecursion( self, parseElementList ):
3414 subRecCheckList = parseElementList[:] + [ self ] 3415 for e in self.exprs: 3416 e.checkRecursion( subRecCheckList )
3417
3418 3419 -class Each(ParseExpression):
3420 """ 3421 Requires all given C{ParseExpression}s to be found, but in any order. 3422 Expressions may be separated by whitespace. 3423 May be constructed using the C{'&'} operator. 3424 3425 Example:: 3426 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 3427 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 3428 integer = Word(nums) 3429 shape_attr = "shape:" + shape_type("shape") 3430 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 3431 color_attr = "color:" + color("color") 3432 size_attr = "size:" + integer("size") 3433 3434 # use Each (using operator '&') to accept attributes in any order 3435 # (shape and posn are required, color and size are optional) 3436 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 3437 3438 shape_spec.runTests(''' 3439 shape: SQUARE color: BLACK posn: 100, 120 3440 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3441 color:GREEN size:20 shape:TRIANGLE posn:20,40 3442 ''' 3443 ) 3444 prints:: 3445 shape: SQUARE color: BLACK posn: 100, 120 3446 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 3447 - color: BLACK 3448 - posn: ['100', ',', '120'] 3449 - x: 100 3450 - y: 120 3451 - shape: SQUARE 3452 3453 3454 shape: CIRCLE size: 50 color: BLUE posn: 50,80 3455 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 3456 - color: BLUE 3457 - posn: ['50', ',', '80'] 3458 - x: 50 3459 - y: 80 3460 - shape: CIRCLE 3461 - size: 50 3462 3463 3464 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 3465 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 3466 - color: GREEN 3467 - posn: ['20', ',', '40'] 3468 - x: 20 3469 - y: 40 3470 - shape: TRIANGLE 3471 - size: 20 3472 """
3473 - def __init__( self, exprs, savelist = True ):
3474 super(Each,self).__init__(exprs, savelist) 3475 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 3476 self.skipWhitespace = True 3477 self.initExprGroups = True
3478
3479 - def parseImpl( self, instring, loc, doActions=True ):
3480 if self.initExprGroups: 3481 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional)) 3482 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 3483 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)] 3484 self.optionals = opt1 + opt2 3485 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 3486 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 3487 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 3488 self.required += self.multirequired 3489 self.initExprGroups = False 3490 tmpLoc = loc 3491 tmpReqd = self.required[:] 3492 tmpOpt = self.optionals[:] 3493 matchOrder = [] 3494 3495 keepMatching = True 3496 while keepMatching: 3497 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 3498 failed = [] 3499 for e in tmpExprs: 3500 try: 3501 tmpLoc = e.tryParse( instring, tmpLoc ) 3502 except ParseException: 3503 failed.append(e) 3504 else: 3505 matchOrder.append(self.opt1map.get(id(e),e)) 3506 if e in tmpReqd: 3507 tmpReqd.remove(e) 3508 elif e in tmpOpt: 3509 tmpOpt.remove(e) 3510 if len(failed) == len(tmpExprs): 3511 keepMatching = False 3512 3513 if tmpReqd: 3514 missing = ", ".join(_ustr(e) for e in tmpReqd) 3515 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 3516 3517 # add any unmatched Optionals, in case they have default values defined 3518 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt] 3519 3520 resultlist = [] 3521 for e in matchOrder: 3522 loc,results = e._parse(instring,loc,doActions) 3523 resultlist.append(results) 3524 3525 finalResults = ParseResults() 3526 for r in resultlist: 3527 dups = {} 3528 for k in r.keys(): 3529 if k in finalResults: 3530 tmp = ParseResults(finalResults[k]) 3531 tmp += ParseResults(r[k]) 3532 dups[k] = tmp 3533 finalResults += ParseResults(r) 3534 for k,v in dups.items(): 3535 finalResults[k] = v 3536 return loc, finalResults
3537
3538 - def __str__( self ):
3539 if hasattr(self,"name"): 3540 return self.name 3541 3542 if self.strRepr is None: 3543 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 3544 3545 return self.strRepr
3546
3547 - def checkRecursion( self, parseElementList ):
3548 subRecCheckList = parseElementList[:] + [ self ] 3549 for e in self.exprs: 3550 e.checkRecursion( subRecCheckList )
3551
3552 3553 -class ParseElementEnhance(ParserElement):
3554 """ 3555 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens. 3556 """
3557 - def __init__( self, expr, savelist=False ):
3558 super(ParseElementEnhance,self).__init__(savelist) 3559 if isinstance( expr, basestring ): 3560 expr = ParserElement._literalStringClass(expr) 3561 self.expr = expr 3562 self.strRepr = None 3563 if expr is not None: 3564 self.mayIndexError = expr.mayIndexError 3565 self.mayReturnEmpty = expr.mayReturnEmpty 3566 self.setWhitespaceChars( expr.whiteChars ) 3567 self.skipWhitespace = expr.skipWhitespace 3568 self.saveAsList = expr.saveAsList 3569 self.callPreparse = expr.callPreparse 3570 self.ignoreExprs.extend(expr.ignoreExprs)
3571
3572 - def parseImpl( self, instring, loc, doActions=True ):
3573 if self.expr is not None: 3574 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 3575 else: 3576 raise ParseException("",loc,self.errmsg,self)
3577
3578 - def leaveWhitespace( self ):
3579 self.skipWhitespace = False 3580 self.expr = self.expr.copy() 3581 if self.expr is not None: 3582 self.expr.leaveWhitespace() 3583 return self
3584
3585 - def ignore( self, other ):
3586 if isinstance( other, Suppress ): 3587 if other not in self.ignoreExprs: 3588 super( ParseElementEnhance, self).ignore( other ) 3589 if self.expr is not None: 3590 self.expr.ignore( self.ignoreExprs[-1] ) 3591 else: 3592 super( ParseElementEnhance, self).ignore( other ) 3593 if self.expr is not None: 3594 self.expr.ignore( self.ignoreExprs[-1] ) 3595 return self
3596
3597 - def streamline( self ):
3598 super(ParseElementEnhance,self).streamline() 3599 if self.expr is not None: 3600 self.expr.streamline() 3601 return self
3602
3603 - def checkRecursion( self, parseElementList ):
3604 if self in parseElementList: 3605 raise RecursiveGrammarException( parseElementList+[self] ) 3606 subRecCheckList = parseElementList[:] + [ self ] 3607 if self.expr is not None: 3608 self.expr.checkRecursion( subRecCheckList )
3609
3610 - def validate( self, validateTrace=[] ):
3611 tmp = validateTrace[:]+[self] 3612 if self.expr is not None: 3613 self.expr.validate(tmp) 3614 self.checkRecursion( [] )
3615
3616 - def __str__( self ):
3617 try: 3618 return super(ParseElementEnhance,self).__str__() 3619 except: 3620 pass 3621 3622 if self.strRepr is None and self.expr is not None: 3623 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 3624 return self.strRepr
3625
3626 3627 -class FollowedBy(ParseElementEnhance):
3628 """ 3629 Lookahead matching of the given parse expression. C{FollowedBy} 3630 does *not* advance the parsing position within the input string, it only 3631 verifies that the specified parse expression matches at the current 3632 position. C{FollowedBy} always returns a null token list. 3633 3634 Example:: 3635 # use FollowedBy to match a label only if it is followed by a ':' 3636 data_word = Word(alphas) 3637 label = data_word + FollowedBy(':') 3638 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3639 3640 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 3641 prints:: 3642 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 3643 """
3644 - def __init__( self, expr ):
3645 super(FollowedBy,self).__init__(expr) 3646 self.mayReturnEmpty = True
3647
3648 - def parseImpl( self, instring, loc, doActions=True ):
3649 self.expr.tryParse( instring, loc ) 3650 return loc, []
3651
3652 3653 -class NotAny(ParseElementEnhance):
3654 """ 3655 Lookahead to disallow matching with the given parse expression. C{NotAny} 3656 does *not* advance the parsing position within the input string, it only 3657 verifies that the specified parse expression does *not* match at the current 3658 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny} 3659 always returns a null token list. May be constructed using the '~' operator. 3660 3661 Example:: 3662 3663 """
3664 - def __init__( self, expr ):
3665 super(NotAny,self).__init__(expr) 3666 #~ self.leaveWhitespace() 3667 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 3668 self.mayReturnEmpty = True 3669 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3670
3671 - def parseImpl( self, instring, loc, doActions=True ):
3672 if self.expr.canParseNext(instring, loc): 3673 raise ParseException(instring, loc, self.errmsg, self) 3674 return loc, []
3675
3676 - def __str__( self ):
3677 if hasattr(self,"name"): 3678 return self.name 3679 3680 if self.strRepr is None: 3681 self.strRepr = "~{" + _ustr(self.expr) + "}" 3682 3683 return self.strRepr
3684
3685 3686 -class OneOrMore(ParseElementEnhance):
3687 """ 3688 Repetition of one or more of the given expression. 3689 3690 Parameters: 3691 - expr - expression that must match one or more times 3692 - stopOn - (default=C{None}) - expression for a terminating sentinel 3693 (only required if the sentinel would ordinarily match the repetition 3694 expression) 3695 3696 Example:: 3697 data_word = Word(alphas) 3698 label = data_word + FollowedBy(':') 3699 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 3700 3701 text = "shape: SQUARE posn: upper left color: BLACK" 3702 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 3703 3704 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 3705 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 3706 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 3707 3708 # could also be written as 3709 (attr_expr * (1,)).parseString(text).pprint() 3710 """
3711 - def __init__( self, expr, stopOn=None):
3712 super(OneOrMore, self).__init__(expr) 3713 ender = stopOn 3714 if isinstance(ender, basestring): 3715 ender = ParserElement._literalStringClass(ender) 3716 self.not_ender = ~ender if ender is not None else None
3717
3718 - def parseImpl( self, instring, loc, doActions=True ):
3719 self_expr_parse = self.expr._parse 3720 self_skip_ignorables = self._skipIgnorables 3721 check_ender = self.not_ender is not None 3722 if check_ender: 3723 try_not_ender = self.not_ender.tryParse 3724 3725 # must be at least one (but first see if we are the stopOn sentinel; 3726 # if so, fail) 3727 if check_ender: 3728 try_not_ender(instring, loc) 3729 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False ) 3730 try: 3731 hasIgnoreExprs = (not not self.ignoreExprs) 3732 while 1: 3733 if check_ender: 3734 try_not_ender(instring, loc) 3735 if hasIgnoreExprs: 3736 preloc = self_skip_ignorables( instring, loc ) 3737 else: 3738 preloc = loc 3739 loc, tmptokens = self_expr_parse( instring, preloc, doActions ) 3740 if tmptokens or tmptokens.haskeys(): 3741 tokens += tmptokens 3742 except (ParseException,IndexError): 3743 pass 3744 3745 return loc, tokens
3746
3747 - def __str__( self ):
3748 if hasattr(self,"name"): 3749 return self.name 3750 3751 if self.strRepr is None: 3752 self.strRepr = "{" + _ustr(self.expr) + "}..." 3753 3754 return self.strRepr
3755
3756 - def setResultsName( self, name, listAllMatches=False ):
3757 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 3758 ret.saveAsList = True 3759 return ret
3760
3761 -class ZeroOrMore(OneOrMore):
3762 """ 3763 Optional repetition of zero or more of the given expression. 3764 3765 Parameters: 3766 - expr - expression that must match zero or more times 3767 - stopOn - (default=C{None}) - expression for a terminating sentinel 3768 (only required if the sentinel would ordinarily match the repetition 3769 expression) 3770 3771 Example: similar to L{OneOrMore} 3772 """
3773 - def __init__( self, expr, stopOn=None):
3774 super(ZeroOrMore,self).__init__(expr, stopOn=stopOn) 3775 self.mayReturnEmpty = True
3776
3777 - def parseImpl( self, instring, loc, doActions=True ):
3778 try: 3779 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 3780 except (ParseException,IndexError): 3781 return loc, []
3782
3783 - def __str__( self ):
3784 if hasattr(self,"name"): 3785 return self.name 3786 3787 if self.strRepr is None: 3788 self.strRepr = "[" + _ustr(self.expr) + "]..." 3789 3790 return self.strRepr
3791
3792 -class _NullToken(object):
3793 - def __bool__(self):
3794 return False
3795 __nonzero__ = __bool__
3796 - def __str__(self):
3797 return ""
3798 3799 _optionalNotMatched = _NullToken()
3800 -class Optional(ParseElementEnhance):
3801 """ 3802 Optional matching of the given expression. 3803 3804 Parameters: 3805 - expr - expression that must match zero or more times 3806 - default (optional) - value to be returned if the optional expression is not found. 3807 3808 Example:: 3809 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 3810 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 3811 zip.runTests(''' 3812 # traditional ZIP code 3813 12345 3814 3815 # ZIP+4 form 3816 12101-0001 3817 3818 # invalid ZIP 3819 98765- 3820 ''') 3821 prints:: 3822 # traditional ZIP code 3823 12345 3824 ['12345'] 3825 3826 # ZIP+4 form 3827 12101-0001 3828 ['12101-0001'] 3829 3830 # invalid ZIP 3831 98765- 3832 ^ 3833 FAIL: Expected end of text (at char 5), (line:1, col:6) 3834 """
3835 - def __init__( self, expr, default=_optionalNotMatched ):
3836 super(Optional,self).__init__( expr, savelist=False ) 3837 self.defaultValue = default 3838 self.mayReturnEmpty = True
3839
3840 - def parseImpl( self, instring, loc, doActions=True ):
3841 try: 3842 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 3843 except (ParseException,IndexError): 3844 if self.defaultValue is not _optionalNotMatched: 3845 if self.expr.resultsName: 3846 tokens = ParseResults([ self.defaultValue ]) 3847 tokens[self.expr.resultsName] = self.defaultValue 3848 else: 3849 tokens = [ self.defaultValue ] 3850 else: 3851 tokens = [] 3852 return loc, tokens
3853
3854 - def __str__( self ):
3855 if hasattr(self,"name"): 3856 return self.name 3857 3858 if self.strRepr is None: 3859 self.strRepr = "[" + _ustr(self.expr) + "]" 3860 3861 return self.strRepr
3862
3863 -class SkipTo(ParseElementEnhance):
3864 """ 3865 Token for skipping over all undefined text until the matched expression is found. 3866 3867 Parameters: 3868 - expr - target expression marking the end of the data to be skipped 3869 - include - (default=C{False}) if True, the target expression is also parsed 3870 (the skipped text and target expression are returned as a 2-element list). 3871 - ignore - (default=C{None}) used to define grammars (typically quoted strings and 3872 comments) that might contain false matches to the target expression 3873 - failOn - (default=C{None}) define expressions that are not allowed to be 3874 included in the skipped test; if found before the target expression is found, 3875 the SkipTo is not a match 3876 3877 Example:: 3878 report = ''' 3879 Outstanding Issues Report - 1 Jan 2000 3880 3881 # | Severity | Description | Days Open 3882 -----+----------+-------------------------------------------+----------- 3883 101 | Critical | Intermittent system crash | 6 3884 94 | Cosmetic | Spelling error on Login ('log|n') | 14 3885 79 | Minor | System slow when running too many reports | 47 3886 ''' 3887 integer = Word(nums) 3888 SEP = Suppress('|') 3889 # use SkipTo to simply match everything up until the next SEP 3890 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 3891 # - parse action will call token.strip() for each matched token, i.e., the description body 3892 string_data = SkipTo(SEP, ignore=quotedString) 3893 string_data.setParseAction(tokenMap(str.strip)) 3894 ticket_expr = (integer("issue_num") + SEP 3895 + string_data("sev") + SEP 3896 + string_data("desc") + SEP 3897 + integer("days_open")) 3898 3899 for tkt in ticket_expr.searchString(report): 3900 print tkt.dump() 3901 prints:: 3902 ['101', 'Critical', 'Intermittent system crash', '6'] 3903 - days_open: 6 3904 - desc: Intermittent system crash 3905 - issue_num: 101 3906 - sev: Critical 3907 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 3908 - days_open: 14 3909 - desc: Spelling error on Login ('log|n') 3910 - issue_num: 94 3911 - sev: Cosmetic 3912 ['79', 'Minor', 'System slow when running too many reports', '47'] 3913 - days_open: 47 3914 - desc: System slow when running too many reports 3915 - issue_num: 79 3916 - sev: Minor 3917 """
3918 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3919 super( SkipTo, self ).__init__( other ) 3920 self.ignoreExpr = ignore 3921 self.mayReturnEmpty = True 3922 self.mayIndexError = False 3923 self.includeMatch = include 3924 self.asList = False 3925 if isinstance(failOn, basestring): 3926 self.failOn = ParserElement._literalStringClass(failOn) 3927 else: 3928 self.failOn = failOn 3929 self.errmsg = "No match found for "+_ustr(self.expr)
3930
3931 - def parseImpl( self, instring, loc, doActions=True ):
3932 startloc = loc 3933 instrlen = len(instring) 3934 expr = self.expr 3935 expr_parse = self.expr._parse 3936 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 3937 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 3938 3939 tmploc = loc 3940 while tmploc <= instrlen: 3941 if self_failOn_canParseNext is not None: 3942 # break if failOn expression matches 3943 if self_failOn_canParseNext(instring, tmploc): 3944 break 3945 3946 if self_ignoreExpr_tryParse is not None: 3947 # advance past ignore expressions 3948 while 1: 3949 try: 3950 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 3951 except ParseBaseException: 3952 break 3953 3954 try: 3955 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 3956 except (ParseException, IndexError): 3957 # no match, advance loc in string 3958 tmploc += 1 3959 else: 3960 # matched skipto expr, done 3961 break 3962 3963 else: 3964 # ran off the end of the input string without matching skipto expr, fail 3965 raise ParseException(instring, loc, self.errmsg, self) 3966 3967 # build up return values 3968 loc = tmploc 3969 skiptext = instring[startloc:loc] 3970 skipresult = ParseResults(skiptext) 3971 3972 if self.includeMatch: 3973 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False) 3974 skipresult += mat 3975 3976 return loc, skipresult
3977
3978 -class Forward(ParseElementEnhance):
3979 """ 3980 Forward declaration of an expression to be defined later - 3981 used for recursive grammars, such as algebraic infix notation. 3982 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator. 3983 3984 Note: take care when assigning to C{Forward} not to overlook precedence of operators. 3985 Specifically, '|' has a lower precedence than '<<', so that:: 3986 fwdExpr << a | b | c 3987 will actually be evaluated as:: 3988 (fwdExpr << a) | b | c 3989 thereby leaving b and c out as parseable alternatives. It is recommended that you 3990 explicitly group the values inserted into the C{Forward}:: 3991 fwdExpr << (a | b | c) 3992 Converting to use the '<<=' operator instead will avoid this problem. 3993 3994 See L{ParseResults.pprint} for an example of a recursive parser created using 3995 C{Forward}. 3996 """
3997 - def __init__( self, other=None ):
3998 super(Forward,self).__init__( other, savelist=False )
3999
4000 - def __lshift__( self, other ):
4001 if isinstance( other, basestring ): 4002 other = ParserElement._literalStringClass(other) 4003 self.expr = other 4004 self.strRepr = None 4005 self.mayIndexError = self.expr.mayIndexError 4006 self.mayReturnEmpty = self.expr.mayReturnEmpty 4007 self.setWhitespaceChars( self.expr.whiteChars ) 4008 self.skipWhitespace = self.expr.skipWhitespace 4009 self.saveAsList = self.expr.saveAsList 4010 self.ignoreExprs.extend(self.expr.ignoreExprs) 4011 return self
4012
4013 - def __ilshift__(self, other):
4014 return self << other
4015
4016 - def leaveWhitespace( self ):
4017 self.skipWhitespace = False 4018 return self
4019
4020 - def streamline( self ):
4021 if not self.streamlined: 4022 self.streamlined = True 4023 if self.expr is not None: 4024 self.expr.streamline() 4025 return self
4026
4027 - def validate( self, validateTrace=[] ):
4028 if self not in validateTrace: 4029 tmp = validateTrace[:]+[self] 4030 if self.expr is not None: 4031 self.expr.validate(tmp) 4032 self.checkRecursion([])
4033
4034 - def __str__( self ):
4035 if hasattr(self,"name"): 4036 return self.name 4037 return self.__class__.__name__ + ": ..." 4038 4039 # stubbed out for now - creates awful memory and perf issues 4040 self._revertClass = self.__class__ 4041 self.__class__ = _ForwardNoRecurse 4042 try: 4043 if self.expr is not None: 4044 retString = _ustr(self.expr) 4045 else: 4046 retString = "None" 4047 finally: 4048 self.__class__ = self._revertClass 4049 return self.__class__.__name__ + ": " + retString
4050
4051 - def copy(self):
4052 if self.expr is not None: 4053 return super(Forward,self).copy() 4054 else: 4055 ret = Forward() 4056 ret <<= self 4057 return ret
4058
4059 -class _ForwardNoRecurse(Forward):
4060 - def __str__( self ):
4061 return "..."
4062
4063 -class TokenConverter(ParseElementEnhance):
4064 """ 4065 Abstract subclass of C{ParseExpression}, for converting parsed results. 4066 """
4067 - def __init__( self, expr, savelist=False ):
4068 super(TokenConverter,self).__init__( expr )#, savelist ) 4069 self.saveAsList = False
4070
4071 -class Combine(TokenConverter):
4072 """ 4073 Converter to concatenate all matching tokens to a single string. 4074 By default, the matching patterns must also be contiguous in the input string; 4075 this can be disabled by specifying C{'adjacent=False'} in the constructor. 4076 4077 Example:: 4078 real = Word(nums) + '.' + Word(nums) 4079 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 4080 # will also erroneously match the following 4081 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 4082 4083 real = Combine(Word(nums) + '.' + Word(nums)) 4084 print(real.parseString('3.1416')) # -> ['3.1416'] 4085 # no match when there are internal spaces 4086 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 4087 """
4088 - def __init__( self, expr, joinString="", adjacent=True ):
4089 super(Combine,self).__init__( expr ) 4090 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 4091 if adjacent: 4092 self.leaveWhitespace() 4093 self.adjacent = adjacent 4094 self.skipWhitespace = True 4095 self.joinString = joinString 4096 self.callPreparse = True
4097
4098 - def ignore( self, other ):
4099 if self.adjacent: 4100 ParserElement.ignore(self, other) 4101 else: 4102 super( Combine, self).ignore( other ) 4103 return self
4104
4105 - def postParse( self, instring, loc, tokenlist ):
4106 retToks = tokenlist.copy() 4107 del retToks[:] 4108 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 4109 4110 if self.resultsName and retToks.haskeys(): 4111 return [ retToks ] 4112 else: 4113 return retToks
4114
4115 -class Group(TokenConverter):
4116 """ 4117 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions. 4118 4119 Example:: 4120 ident = Word(alphas) 4121 num = Word(nums) 4122 term = ident | num 4123 func = ident + Optional(delimitedList(term)) 4124 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100'] 4125 4126 func = ident + Group(Optional(delimitedList(term))) 4127 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']] 4128 """
4129 - def __init__( self, expr ):
4130 super(Group,self).__init__( expr ) 4131 self.saveAsList = True
4132
4133 - def postParse( self, instring, loc, tokenlist ):
4134 return [ tokenlist ]
4135
4136 -class Dict(TokenConverter):
4137 """ 4138 Converter to return a repetitive expression as a list, but also as a dictionary. 4139 Each element can also be referenced using the first token in the expression as its key. 4140 Useful for tabular report scraping when the first column can be used as a item key. 4141 4142 Example:: 4143 data_word = Word(alphas) 4144 label = data_word + FollowedBy(':') 4145 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 4146 4147 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4148 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4149 4150 # print attributes as plain groups 4151 print(OneOrMore(attr_expr).parseString(text).dump()) 4152 4153 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 4154 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 4155 print(result.dump()) 4156 4157 # access named fields as dict entries, or output as dict 4158 print(result['shape']) 4159 print(result.asDict()) 4160 prints:: 4161 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 4162 4163 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4164 - color: light blue 4165 - posn: upper left 4166 - shape: SQUARE 4167 - texture: burlap 4168 SQUARE 4169 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 4170 See more examples at L{ParseResults} of accessing fields by results name. 4171 """
4172 - def __init__( self, expr ):
4173 super(Dict,self).__init__( expr ) 4174 self.saveAsList = True
4175
4176 - def postParse( self, instring, loc, tokenlist ):
4177 for i,tok in enumerate(tokenlist): 4178 if len(tok) == 0: 4179 continue 4180 ikey = tok[0] 4181 if isinstance(ikey,int): 4182 ikey = _ustr(tok[0]).strip() 4183 if len(tok)==1: 4184 tokenlist[ikey] = _ParseResultsWithOffset("",i) 4185 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 4186 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 4187 else: 4188 dictvalue = tok.copy() #ParseResults(i) 4189 del dictvalue[0] 4190 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()): 4191 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 4192 else: 4193 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 4194 4195 if self.resultsName: 4196 return [ tokenlist ] 4197 else: 4198 return tokenlist
4199
4200 4201 -class Suppress(TokenConverter):
4202 """ 4203 Converter for ignoring the results of a parsed expression. 4204 4205 Example:: 4206 source = "a, b, c,d" 4207 wd = Word(alphas) 4208 wd_list1 = wd + ZeroOrMore(',' + wd) 4209 print(wd_list1.parseString(source)) 4210 4211 # often, delimiters that are useful during parsing are just in the 4212 # way afterward - use Suppress to keep them out of the parsed output 4213 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 4214 print(wd_list2.parseString(source)) 4215 prints:: 4216 ['a', ',', 'b', ',', 'c', ',', 'd'] 4217 ['a', 'b', 'c', 'd'] 4218 (See also L{delimitedList}.) 4219 """
4220 - def postParse( self, instring, loc, tokenlist ):
4221 return []
4222
4223 - def suppress( self ):
4224 return self
4225
4226 4227 -class OnlyOnce(object):
4228 """ 4229 Wrapper for parse actions, to ensure they are only called once. 4230 """
4231 - def __init__(self, methodCall):
4232 self.callable = _trim_arity(methodCall) 4233 self.called = False
4234 - def __call__(self,s,l,t):
4235 if not self.called: 4236 results = self.callable(s,l,t) 4237 self.called = True 4238 return results 4239 raise ParseException(s,l,"")
4240 - def reset(self):
4241 self.called = False
4242
4243 -def traceParseAction(f):
4244 """ 4245 Decorator for debugging parse actions. 4246 4247 Example:: 4248 wd = Word(alphas) 4249 4250 @traceParseAction 4251 def remove_duplicate_chars(tokens): 4252 return ''.join(sorted(set(''.join(tokens))) 4253 4254 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 4255 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 4256 prints:: 4257 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 4258 <<leaving remove_duplicate_chars (ret: 'dfjkls') 4259 ['dfjkls'] 4260 """ 4261 f = _trim_arity(f) 4262 def z(*paArgs): 4263 thisFunc = f.__name__ 4264 s,l,t = paArgs[-3:] 4265 if len(paArgs)>3: 4266 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 4267 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) ) 4268 try: 4269 ret = f(*paArgs) 4270 except Exception as exc: 4271 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 4272 raise 4273 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) ) 4274 return ret
4275 try: 4276 z.__name__ = f.__name__ 4277 except AttributeError: 4278 pass 4279 return z 4280
4281 # 4282 # global helpers 4283 # 4284 -def delimitedList( expr, delim=",", combine=False ):
4285 """ 4286 Helper to define a delimited list of expressions - the delimiter defaults to ','. 4287 By default, the list elements and delimiters can have intervening whitespace, and 4288 comments, but this can be overridden by passing C{combine=True} in the constructor. 4289 If C{combine} is set to C{True}, the matching tokens are returned as a single token 4290 string, with the delimiters included; otherwise, the matching tokens are returned 4291 as a list of tokens, with the delimiters suppressed. 4292 4293 Example:: 4294 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 4295 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 4296 """ 4297 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 4298 if combine: 4299 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 4300 else: 4301 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4302
4303 -def countedArray( expr, intExpr=None ):
4304 """ 4305 Helper to define a counted list of expressions. 4306 This helper defines a pattern of the form:: 4307 integer expr expr expr... 4308 where the leading integer tells how many expr expressions follow. 4309 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 4310 4311 Example:: 4312 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 4313 """ 4314 arrayExpr = Forward() 4315 def countFieldParseAction(s,l,t): 4316 n = t[0] 4317 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 4318 return []
4319 if intExpr is None: 4320 intExpr = Word(nums).setParseAction(lambda t:int(t[0])) 4321 else: 4322 intExpr = intExpr.copy() 4323 intExpr.setName("arrayLen") 4324 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 4325 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...') 4326
4327 -def _flatten(L):
4328 ret = [] 4329 for i in L: 4330 if isinstance(i,list): 4331 ret.extend(_flatten(i)) 4332 else: 4333 ret.append(i) 4334 return ret
4335
4336 -def matchPreviousLiteral(expr):
4337 """ 4338 Helper to define an expression that is indirectly defined from 4339 the tokens matched in a previous expression, that is, it looks 4340 for a 'repeat' of a previous expression. For example:: 4341 first = Word(nums) 4342 second = matchPreviousLiteral(first) 4343 matchExpr = first + ":" + second 4344 will match C{"1:1"}, but not C{"1:2"}. Because this matches a 4345 previous literal, will also match the leading C{"1:1"} in C{"1:10"}. 4346 If this is not desired, use C{matchPreviousExpr}. 4347 Do *not* use with packrat parsing enabled. 4348 """ 4349 rep = Forward() 4350 def copyTokenToRepeater(s,l,t): 4351 if t: 4352 if len(t) == 1: 4353 rep << t[0] 4354 else: 4355 # flatten t tokens 4356 tflat = _flatten(t.asList()) 4357 rep << And(Literal(tt) for tt in tflat) 4358 else: 4359 rep << Empty()
4360 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4361 rep.setName('(prev) ' + _ustr(expr)) 4362 return rep 4363
4364 -def matchPreviousExpr(expr):
4365 """ 4366 Helper to define an expression that is indirectly defined from 4367 the tokens matched in a previous expression, that is, it looks 4368 for a 'repeat' of a previous expression. For example:: 4369 first = Word(nums) 4370 second = matchPreviousExpr(first) 4371 matchExpr = first + ":" + second 4372 will match C{"1:1"}, but not C{"1:2"}. Because this matches by 4373 expressions, will *not* match the leading C{"1:1"} in C{"1:10"}; 4374 the expressions are evaluated first, and then compared, so 4375 C{"1"} is compared with C{"10"}. 4376 Do *not* use with packrat parsing enabled. 4377 """ 4378 rep = Forward() 4379 e2 = expr.copy() 4380 rep <<= e2 4381 def copyTokenToRepeater(s,l,t): 4382 matchTokens = _flatten(t.asList()) 4383 def mustMatchTheseTokens(s,l,t): 4384 theseTokens = _flatten(t.asList()) 4385 if theseTokens != matchTokens: 4386 raise ParseException("",0,"")
4387 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 4388 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 4389 rep.setName('(prev) ' + _ustr(expr)) 4390 return rep 4391
4392 -def _escapeRegexRangeChars(s):
4393 #~ escape these chars: ^-] 4394 for c in r"\^-]": 4395 s = s.replace(c,_bslash+c) 4396 s = s.replace("\n",r"\n") 4397 s = s.replace("\t",r"\t") 4398 return _ustr(s)
4399
4400 -def oneOf( strs, caseless=False, useRegex=True ):
4401 """ 4402 Helper to quickly define a set of alternative Literals, and makes sure to do 4403 longest-first testing when there is a conflict, regardless of the input order, 4404 but returns a C{L{MatchFirst}} for best performance. 4405 4406 Parameters: 4407 - strs - a string of space-delimited literals, or a collection of string literals 4408 - caseless - (default=C{False}) - treat all literals as caseless 4409 - useRegex - (default=C{True}) - as an optimization, will generate a Regex 4410 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or 4411 if creating a C{Regex} raises an exception) 4412 4413 Example:: 4414 comp_oper = oneOf("< = > <= >= !=") 4415 var = Word(alphas) 4416 number = Word(nums) 4417 term = var | number 4418 comparison_expr = term + comp_oper + term 4419 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 4420 prints:: 4421 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 4422 """ 4423 if caseless: 4424 isequal = ( lambda a,b: a.upper() == b.upper() ) 4425 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 4426 parseElementClass = CaselessLiteral 4427 else: 4428 isequal = ( lambda a,b: a == b ) 4429 masks = ( lambda a,b: b.startswith(a) ) 4430 parseElementClass = Literal 4431 4432 symbols = [] 4433 if isinstance(strs,basestring): 4434 symbols = strs.split() 4435 elif isinstance(strs, collections.Iterable): 4436 symbols = list(strs) 4437 else: 4438 warnings.warn("Invalid argument to oneOf, expected string or iterable", 4439 SyntaxWarning, stacklevel=2) 4440 if not symbols: 4441 return NoMatch() 4442 4443 i = 0 4444 while i < len(symbols)-1: 4445 cur = symbols[i] 4446 for j,other in enumerate(symbols[i+1:]): 4447 if ( isequal(other, cur) ): 4448 del symbols[i+j+1] 4449 break 4450 elif ( masks(cur, other) ): 4451 del symbols[i+j+1] 4452 symbols.insert(i,other) 4453 cur = other 4454 break 4455 else: 4456 i += 1 4457 4458 if not caseless and useRegex: 4459 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 4460 try: 4461 if len(symbols)==len("".join(symbols)): 4462 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4463 else: 4464 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols)) 4465 except: 4466 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 4467 SyntaxWarning, stacklevel=2) 4468 4469 4470 # last resort, just use MatchFirst 4471 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4472
4473 -def dictOf( key, value ):
4474 """ 4475 Helper to easily and clearly define a dictionary by specifying the respective patterns 4476 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens 4477 in the proper order. The key pattern can include delimiting markers or punctuation, 4478 as long as they are suppressed, thereby leaving the significant key text. The value 4479 pattern can include named results, so that the C{Dict} results can include named token 4480 fields. 4481 4482 Example:: 4483 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 4484 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 4485 print(OneOrMore(attr_expr).parseString(text).dump()) 4486 4487 attr_label = label 4488 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 4489 4490 # similar to Dict, but simpler call format 4491 result = dictOf(attr_label, attr_value).parseString(text) 4492 print(result.dump()) 4493 print(result['shape']) 4494 print(result.shape) # object attribute access works too 4495 print(result.asDict()) 4496 prints:: 4497 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 4498 - color: light blue 4499 - posn: upper left 4500 - shape: SQUARE 4501 - texture: burlap 4502 SQUARE 4503 SQUARE 4504 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 4505 """ 4506 return Dict( ZeroOrMore( Group ( key + value ) ) )
4507
4508 -def originalTextFor(expr, asString=True):
4509 """ 4510 Helper to return the original, untokenized text for a given expression. Useful to 4511 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 4512 revert separate tokens with intervening whitespace back to the original matching 4513 input text. By default, returns astring containing the original parsed text. 4514 4515 If the optional C{asString} argument is passed as C{False}, then the return value is a 4516 C{L{ParseResults}} containing any results names that were originally matched, and a 4517 single token containing the original matched text from the input string. So if 4518 the expression passed to C{L{originalTextFor}} contains expressions with defined 4519 results names, you must set C{asString} to C{False} if you want to preserve those 4520 results name values. 4521 4522 Example:: 4523 src = "this is test <b> bold <i>text</i> </b> normal text " 4524 for tag in ("b","i"): 4525 opener,closer = makeHTMLTags(tag) 4526 patt = originalTextFor(opener + SkipTo(closer) + closer) 4527 print(patt.searchString(src)[0]) 4528 prints:: 4529 ['<b> bold <i>text</i> </b>'] 4530 ['<i>text</i>'] 4531 """ 4532 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 4533 endlocMarker = locMarker.copy() 4534 endlocMarker.callPreparse = False 4535 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 4536 if asString: 4537 extractText = lambda s,l,t: s[t._original_start:t._original_end] 4538 else: 4539 def extractText(s,l,t): 4540 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4541 matchExpr.setParseAction(extractText) 4542 matchExpr.ignoreExprs = expr.ignoreExprs 4543 return matchExpr 4544
4545 -def ungroup(expr):
4546 """ 4547 Helper to undo pyparsing's default grouping of And expressions, even 4548 if all but one are non-empty. 4549 """ 4550 return TokenConverter(expr).setParseAction(lambda t:t[0]) 4551
4552 -def locatedExpr(expr):
4553 """ 4554 Helper to decorate a returned token with its starting and ending locations in the input string. 4555 This helper adds the following results names: 4556 - locn_start = location where matched expression begins 4557 - locn_end = location where matched expression ends 4558 - value = the actual parsed results 4559 4560 Be careful if the input text contains C{<TAB>} characters, you may want to call 4561 C{L{ParserElement.parseWithTabs}} 4562 4563 Example:: 4564 wd = Word(alphas) 4565 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 4566 print(match) 4567 prints:: 4568 [[0, 'ljsdf', 5]] 4569 [[8, 'lksdjjf', 15]] 4570 [[18, 'lkkjj', 23]] 4571 """ 4572 locator = Empty().setParseAction(lambda s,l,t: l) 4573 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4574 4575 4576 # convenience constants for positional expressions 4577 empty = Empty().setName("empty") 4578 lineStart = LineStart().setName("lineStart") 4579 lineEnd = LineEnd().setName("lineEnd") 4580 stringStart = StringStart().setName("stringStart") 4581 stringEnd = StringEnd().setName("stringEnd") 4582 4583 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 4584 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16))) 4585 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8))) 4586 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE) 4587 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 4588 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4589 4590 -def srange(s):
4591 r""" 4592 Helper to easily define string ranges for use in Word construction. Borrows 4593 syntax from regexp '[]' string range definitions:: 4594 srange("[0-9]") -> "0123456789" 4595 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 4596 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 4597 The input string must be enclosed in []'s, and the returned string is the expanded 4598 character set joined into a single string. 4599 The values enclosed in the []'s may be: 4600 - a single character 4601 - an escaped character with a leading backslash (such as C{\-} or C{\]}) 4602 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character) 4603 (C{\0x##} is also supported for backwards compatibility) 4604 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character) 4605 - a range of any of the above, separated by a dash (C{'a-z'}, etc.) 4606 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.) 4607 """ 4608 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1)) 4609 try: 4610 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 4611 except: 4612 return ""
4613
4614 -def matchOnlyAtCol(n):
4615 """ 4616 Helper method for defining parse actions that require matching at a specific 4617 column in the input text. 4618 """ 4619 def verifyCol(strg,locn,toks): 4620 if col(locn,strg) != n: 4621 raise ParseException(strg,locn,"matched token not at column %d" % n)
4622 return verifyCol 4623
4624 -def replaceWith(replStr):
4625 """ 4626 Helper method for common parse actions that simply return a literal value. Especially 4627 useful when used with C{L{transformString<ParserElement.transformString>}()}. 4628 4629 Example:: 4630 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 4631 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 4632 term = na | num 4633 4634 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 4635 """ 4636 return lambda s,l,t: [replStr]
4637
4638 -def removeQuotes(s,l,t):
4639 """ 4640 Helper parse action for removing quotation marks from parsed quoted strings. 4641 4642 Example:: 4643 # by default, quotation marks are included in parsed results 4644 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 4645 4646 # use removeQuotes to strip quotation marks from parsed results 4647 quotedString.setParseAction(removeQuotes) 4648 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 4649 """ 4650 return t[0][1:-1]
4651
4652 -def tokenMap(func, *args):
4653 """ 4654 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional 4655 args are passed, they are forwarded to the given function as additional arguments after 4656 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the 4657 parsed data to an integer using base 16. 4658 4659 Example (compare the last to example in L{ParserElement.transformString}:: 4660 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 4661 hex_ints.runTests(''' 4662 00 11 22 aa FF 0a 0d 1a 4663 ''') 4664 4665 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 4666 OneOrMore(upperword).runTests(''' 4667 my kingdom for a horse 4668 ''') 4669 4670 wd = Word(alphas).setParseAction(tokenMap(str.title)) 4671 OneOrMore(wd).setParseAction(' '.join).runTests(''' 4672 now is the winter of our discontent made glorious summer by this sun of york 4673 ''') 4674 prints:: 4675 00 11 22 aa FF 0a 0d 1a 4676 [0, 17, 34, 170, 255, 10, 13, 26] 4677 4678 my kingdom for a horse 4679 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 4680 4681 now is the winter of our discontent made glorious summer by this sun of york 4682 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 4683 """ 4684 def pa(s,l,t): 4685 return [func(tokn, *args) for tokn in t]
4686 4687 try: 4688 func_name = getattr(func, '__name__', 4689 getattr(func, '__class__').__name__) 4690 except Exception: 4691 func_name = str(func) 4692 pa.__name__ = func_name 4693 4694 return pa 4695 4696 upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 4697 """Helper parse action to convert tokens to upper case.""" 4698 4699 downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 4700 """Helper parse action to convert tokens to lower case."""
4701 4702 -def _makeTags(tagStr, xml):
4703 """Internal helper to construct opening and closing tag expressions, given a tag name""" 4704 if isinstance(tagStr,basestring): 4705 resname = tagStr 4706 tagStr = Keyword(tagStr, caseless=not xml) 4707 else: 4708 resname = tagStr.name 4709 4710 tagAttrName = Word(alphas,alphanums+"_-:") 4711 if (xml): 4712 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 4713 openTag = Suppress("<") + tagStr("tag") + \ 4714 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 4715 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4716 else: 4717 printablesLessRAbrack = "".join(c for c in printables if c not in ">") 4718 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 4719 openTag = Suppress("<") + tagStr("tag") + \ 4720 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 4721 Optional( Suppress("=") + tagAttrValue ) ))) + \ 4722 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 4723 closeTag = Combine(_L("</") + tagStr + ">") 4724 4725 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % resname) 4726 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % resname) 4727 openTag.tag = resname 4728 closeTag.tag = resname 4729 return openTag, closeTag
4730
4731 -def makeHTMLTags(tagStr):
4732 """ 4733 Helper to construct opening and closing tag expressions for HTML, given a tag name. Matches 4734 tags in either upper or lower case, attributes with namespaces and with quoted or unquoted values. 4735 4736 Example:: 4737 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 4738 # makeHTMLTags returns pyparsing expressions for the opening and closing tags as a 2-tuple 4739 a,a_end = makeHTMLTags("A") 4740 link_expr = a + SkipTo(a_end)("link_text") + a_end 4741 4742 for link in link_expr.searchString(text): 4743 # attributes in the <A> tag (like "href" shown here) are also accessible as named results 4744 print(link.link_text, '->', link.href) 4745 prints:: 4746 pyparsing -> http://pyparsing.wikispaces.com 4747 """ 4748 return _makeTags( tagStr, False )
4749
4750 -def makeXMLTags(tagStr):
4751 """ 4752 Helper to construct opening and closing tag expressions for XML, given a tag name. Matches 4753 tags only in the given upper/lower case. 4754 4755 Example: similar to L{makeHTMLTags} 4756 """ 4757 return _makeTags( tagStr, True )
4758
4759 -def withAttribute(*args,**attrDict):
4760 """ 4761 Helper to create a validating parse action to be used with start tags created 4762 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag 4763 with a required attribute value, to avoid false matches on common tags such as 4764 C{<TD>} or C{<DIV>}. 4765 4766 Call C{withAttribute} with a series of attribute names and values. Specify the list 4767 of filter attributes names and values as: 4768 - keyword arguments, as in C{(align="right")}, or 4769 - as an explicit dict with C{**} operator, when an attribute name is also a Python 4770 reserved word, as in C{**{"class":"Customer", "align":"right"}} 4771 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 4772 For attribute names with a namespace prefix, you must use the second form. Attribute 4773 names are matched insensitive to upper/lower case. 4774 4775 If just testing for C{class} (with or without a namespace), use C{L{withClass}}. 4776 4777 To verify that the attribute exists, but without specifying a value, pass 4778 C{withAttribute.ANY_VALUE} as the value. 4779 4780 Example:: 4781 html = ''' 4782 <div> 4783 Some text 4784 <div type="grid">1 4 0 1 0</div> 4785 <div type="graph">1,3 2,3 1,1</div> 4786 <div>this has no type</div> 4787 </div> 4788 4789 ''' 4790 div,div_end = makeHTMLTags("div") 4791 4792 # only match div tag having a type attribute with value "grid" 4793 div_grid = div().setParseAction(withAttribute(type="grid")) 4794 grid_expr = div_grid + SkipTo(div | div_end)("body") 4795 for grid_header in grid_expr.searchString(html): 4796 print(grid_header.body) 4797 4798 # construct a match with any div tag having a type attribute, regardless of the value 4799 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 4800 div_expr = div_any_type + SkipTo(div | div_end)("body") 4801 for div_header in div_expr.searchString(html): 4802 print(div_header.body) 4803 prints:: 4804 1 4 0 1 0 4805 4806 1 4 0 1 0 4807 1,3 2,3 1,1 4808 """ 4809 if args: 4810 attrs = args[:] 4811 else: 4812 attrs = attrDict.items() 4813 attrs = [(k,v) for k,v in attrs] 4814 def pa(s,l,tokens): 4815 for attrName,attrValue in attrs: 4816 if attrName not in tokens: 4817 raise ParseException(s,l,"no matching attribute " + attrName) 4818 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 4819 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 4820 (attrName, tokens[attrName], attrValue))
4821 return pa 4822 withAttribute.ANY_VALUE = object()
4823 4824 -def withClass(classname, namespace=''):
4825 """ 4826 Simplified version of C{L{withAttribute}} when matching on a div class - made 4827 difficult because C{class} is a reserved word in Python. 4828 4829 Example:: 4830 html = ''' 4831 <div> 4832 Some text 4833 <div class="grid">1 4 0 1 0</div> 4834 <div class="graph">1,3 2,3 1,1</div> 4835 <div>this &lt;div&gt; has no class</div> 4836 </div> 4837 4838 ''' 4839 div,div_end = makeHTMLTags("div") 4840 div_grid = div().setParseAction(withClass("grid")) 4841 4842 grid_expr = div_grid + SkipTo(div | div_end)("body") 4843 for grid_header in grid_expr.searchString(html): 4844 print(grid_header.body) 4845 4846 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 4847 div_expr = div_any_type + SkipTo(div | div_end)("body") 4848 for div_header in div_expr.searchString(html): 4849 print(div_header.body) 4850 prints:: 4851 1 4 0 1 0 4852 4853 1 4 0 1 0 4854 1,3 2,3 1,1 4855 """ 4856 classattr = "%s:class" % namespace if namespace else "class" 4857 return withAttribute(**{classattr : classname})
4858 4859 opAssoc = _Constants() 4860 opAssoc.LEFT = object() 4861 opAssoc.RIGHT = object()
4862 4863 -def infixNotation( baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')') ):
4864 """ 4865 Helper method for constructing grammars of expressions made up of 4866 operators working in a precedence hierarchy. Operators may be unary or 4867 binary, left- or right-associative. Parse actions can also be attached 4868 to operator expressions. 4869 4870 Parameters: 4871 - baseExpr - expression representing the most basic element for the nested 4872 - opList - list of tuples, one for each operator precedence level in the 4873 expression grammar; each tuple is of the form 4874 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 4875 - opExpr is the pyparsing expression for the operator; 4876 may also be a string, which will be converted to a Literal; 4877 if numTerms is 3, opExpr is a tuple of two expressions, for the 4878 two operators separating the 3 terms 4879 - numTerms is the number of terms for this operator (must 4880 be 1, 2, or 3) 4881 - rightLeftAssoc is the indicator whether the operator is 4882 right or left associative, using the pyparsing-defined 4883 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}. 4884 - parseAction is the parse action to be associated with 4885 expressions matching this operator expression (the 4886 parse action tuple member may be omitted) 4887 - lpar - expression for matching left-parentheses (default=C{Suppress('(')}) 4888 - rpar - expression for matching right-parentheses (default=C{Suppress(')')}) 4889 4890 Example:: 4891 # simple example of four-function arithmetic with ints and variable names 4892 integer = pyparsing_common.signedInteger 4893 varname = pyparsing_common.identifier 4894 4895 arith_expr = infixNotation(integer | varname, 4896 [ 4897 ('-', 1, opAssoc.RIGHT), 4898 (oneOf('* /'), 2, opAssoc.LEFT), 4899 (oneOf('+ -'), 2, opAssoc.LEFT), 4900 ]) 4901 4902 arith_expr.runTests(''' 4903 5+3*6 4904 (5+3)*6 4905 -2--11 4906 ''', fullDump=False) 4907 prints:: 4908 5+3*6 4909 [[5, '+', [3, '*', 6]]] 4910 4911 (5+3)*6 4912 [[[5, '+', 3], '*', 6]] 4913 4914 -2--11 4915 [[['-', 2], '-', ['-', 11]]] 4916 """ 4917 ret = Forward() 4918 lastExpr = baseExpr | ( lpar + ret + rpar ) 4919 for i,operDef in enumerate(opList): 4920 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 4921 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 4922 if arity == 3: 4923 if opExpr is None or len(opExpr) != 2: 4924 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 4925 opExpr1, opExpr2 = opExpr 4926 thisExpr = Forward().setName(termName) 4927 if rightLeftAssoc == opAssoc.LEFT: 4928 if arity == 1: 4929 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 4930 elif arity == 2: 4931 if opExpr is not None: 4932 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 4933 else: 4934 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 4935 elif arity == 3: 4936 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 4937 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 4938 else: 4939 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 4940 elif rightLeftAssoc == opAssoc.RIGHT: 4941 if arity == 1: 4942 # try to avoid LR with this extra test 4943 if not isinstance(opExpr, Optional): 4944 opExpr = Optional(opExpr) 4945 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 4946 elif arity == 2: 4947 if opExpr is not None: 4948 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 4949 else: 4950 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 4951 elif arity == 3: 4952 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 4953 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 4954 else: 4955 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 4956 else: 4957 raise ValueError("operator must indicate right or left associativity") 4958 if pa: 4959 matchExpr.setParseAction( pa ) 4960 thisExpr <<= ( matchExpr.setName(termName) | lastExpr ) 4961 lastExpr = thisExpr 4962 ret <<= lastExpr 4963 return ret
4964 4965 operatorPrecedence = infixNotation 4966 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release.""" 4967 4968 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes") 4969 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes") 4970 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'| 4971 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes") 4972 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
4973 4974 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()):
4975 """ 4976 Helper method for defining nested lists enclosed in opening and closing 4977 delimiters ("(" and ")" are the default). 4978 4979 Parameters: 4980 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression 4981 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression 4982 - content - expression for items within the nested lists (default=C{None}) 4983 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString}) 4984 4985 If an expression is not provided for the content argument, the nested 4986 expression will capture all whitespace-delimited content between delimiters 4987 as a list of separate values. 4988 4989 Use the C{ignoreExpr} argument to define expressions that may contain 4990 opening or closing characters that should not be treated as opening 4991 or closing characters for nesting, such as quotedString or a comment 4992 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}. 4993 The default is L{quotedString}, but if no expressions are to be ignored, 4994 then pass C{None} for this argument. 4995 4996 Example:: 4997 data_type = oneOf("void int short long char float double") 4998 decl_data_type = Combine(data_type + Optional(Word('*'))) 4999 ident = Word(alphas+'_', alphanums+'_') 5000 number = pyparsing_common.number 5001 arg = Group(decl_data_type + ident) 5002 LPAR,RPAR = map(Suppress, "()") 5003 5004 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 5005 5006 c_function = (decl_data_type("type") 5007 + ident("name") 5008 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 5009 + code_body("body")) 5010 c_function.ignore(cStyleComment) 5011 5012 source_code = ''' 5013 int is_odd(int x) { 5014 return (x%2); 5015 } 5016 5017 int dec_to_hex(char hchar) { 5018 if (hchar >= '0' && hchar <= '9') { 5019 return (ord(hchar)-ord('0')); 5020 } else { 5021 return (10+ord(hchar)-ord('A')); 5022 } 5023 } 5024 ''' 5025 for func in c_function.searchString(source_code): 5026 print("%(name)s (%(type)s) args: %(args)s" % func) 5027 5028 prints:: 5029 is_odd (int) args: [['int', 'x']] 5030 dec_to_hex (int) args: [['char', 'hchar']] 5031 """ 5032 if opener == closer: 5033 raise ValueError("opening and closing strings cannot be the same") 5034 if content is None: 5035 if isinstance(opener,basestring) and isinstance(closer,basestring): 5036 if len(opener) == 1 and len(closer)==1: 5037 if ignoreExpr is not None: 5038 content = (Combine(OneOrMore(~ignoreExpr + 5039 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5040 ).setParseAction(lambda t:t[0].strip())) 5041 else: 5042 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 5043 ).setParseAction(lambda t:t[0].strip())) 5044 else: 5045 if ignoreExpr is not None: 5046 content = (Combine(OneOrMore(~ignoreExpr + 5047 ~Literal(opener) + ~Literal(closer) + 5048 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5049 ).setParseAction(lambda t:t[0].strip())) 5050 else: 5051 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 5052 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 5053 ).setParseAction(lambda t:t[0].strip())) 5054 else: 5055 raise ValueError("opening and closing arguments must be strings if no content expression is given") 5056 ret = Forward() 5057 if ignoreExpr is not None: 5058 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 5059 else: 5060 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 5061 ret.setName('nested %s%s expression' % (opener,closer)) 5062 return ret
5063
5064 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5065 """ 5066 Helper method for defining space-delimited indentation blocks, such as 5067 those used to define block statements in Python source code. 5068 5069 Parameters: 5070 - blockStatementExpr - expression defining syntax of statement that 5071 is repeated within the indented block 5072 - indentStack - list created by caller to manage indentation stack 5073 (multiple statementWithIndentedBlock expressions within a single grammar 5074 should share a common indentStack) 5075 - indent - boolean indicating whether block must be indented beyond the 5076 the current level; set to False for block of left-most statements 5077 (default=C{True}) 5078 5079 A valid block must contain at least one C{blockStatement}. 5080 5081 Example:: 5082 data = ''' 5083 def A(z): 5084 A1 5085 B = 100 5086 G = A2 5087 A2 5088 A3 5089 B 5090 def BB(a,b,c): 5091 BB1 5092 def BBA(): 5093 bba1 5094 bba2 5095 bba3 5096 C 5097 D 5098 def spam(x,y): 5099 def eggs(z): 5100 pass 5101 ''' 5102 5103 5104 indentStack = [1] 5105 stmt = Forward() 5106 5107 identifier = Word(alphas, alphanums) 5108 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") 5109 func_body = indentedBlock(stmt, indentStack) 5110 funcDef = Group( funcDecl + func_body ) 5111 5112 rvalue = Forward() 5113 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 5114 rvalue << (funcCall | identifier | Word(nums)) 5115 assignment = Group(identifier + "=" + rvalue) 5116 stmt << ( funcDef | assignment | identifier ) 5117 5118 module_body = OneOrMore(stmt) 5119 5120 parseTree = module_body.parseString(data) 5121 parseTree.pprint() 5122 prints:: 5123 [['def', 5124 'A', 5125 ['(', 'z', ')'], 5126 ':', 5127 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 5128 'B', 5129 ['def', 5130 'BB', 5131 ['(', 'a', 'b', 'c', ')'], 5132 ':', 5133 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 5134 'C', 5135 'D', 5136 ['def', 5137 'spam', 5138 ['(', 'x', 'y', ')'], 5139 ':', 5140 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 5141 """ 5142 def checkPeerIndent(s,l,t): 5143 if l >= len(s): return 5144 curCol = col(l,s) 5145 if curCol != indentStack[-1]: 5146 if curCol > indentStack[-1]: 5147 raise ParseFatalException(s,l,"illegal nesting") 5148 raise ParseException(s,l,"not a peer entry")
5149 5150 def checkSubIndent(s,l,t): 5151 curCol = col(l,s) 5152 if curCol > indentStack[-1]: 5153 indentStack.append( curCol ) 5154 else: 5155 raise ParseException(s,l,"not a subentry") 5156 5157 def checkUnindent(s,l,t): 5158 if l >= len(s): return 5159 curCol = col(l,s) 5160 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 5161 raise ParseException(s,l,"not an unindent") 5162 indentStack.pop() 5163 5164 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 5165 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 5166 PEER = Empty().setParseAction(checkPeerIndent).setName('') 5167 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 5168 if indent: 5169 smExpr = Group( Optional(NL) + 5170 #~ FollowedBy(blockStatementExpr) + 5171 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 5172 else: 5173 smExpr = Group( Optional(NL) + 5174 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 5175 blockStatementExpr.ignore(_bslash + LineEnd()) 5176 return smExpr.setName('indented block') 5177 5178 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 5179 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 5180 5181 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag')) 5182 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\'')) 5183 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5184 -def replaceHTMLEntity(t):
5185 """Helper parser action to replace common HTML entities with their special characters""" 5186 return _htmlEntityMap.get(t.entity)
5187 5188 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 5189 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 5190 "Comment of the form C{/* ... */}" 5191 5192 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 5193 "Comment of the form C{<!-- ... -->}" 5194 5195 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 5196 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 5197 "Comment of the form C{// ... (to end of line)}" 5198 5199 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment") 5200 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}" 5201 5202 javaStyleComment = cppStyleComment 5203 "Same as C{L{cppStyleComment}}" 5204 5205 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 5206 "Comment of the form C{# ... (to end of line)}" 5207 5208 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') + 5209 Optional( Word(" \t") + 5210 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 5211 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList") 5212 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5213 5214 # some other useful expressions - using lower-case class name since we are really using this as a namespace 5215 -class pyparsing_common:
5216 """ 5217 Here are some common low-level expressions that may be useful in jump-starting parser development: 5218 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>}) 5219 - common L{programming identifiers<identifier>} 5220 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>}) 5221 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>} 5222 - L{UUID<uuid>} 5223 Parse actions: 5224 - C{L{convertToInteger}} 5225 - C{L{convertToFloat}} 5226 - C{L{convertToDate}} 5227 - C{L{convertToDatetime}} 5228 - C{L{stripHTMLTags}} 5229 5230 Example:: 5231 pyparsing_common.number.runTests(''' 5232 # any int or real number, returned as the appropriate type 5233 100 5234 -100 5235 +100 5236 3.14159 5237 6.02e23 5238 1e-12 5239 ''') 5240 5241 pyparsing_common.fnumber.runTests(''' 5242 # any int or real number, returned as float 5243 100 5244 -100 5245 +100 5246 3.14159 5247 6.02e23 5248 1e-12 5249 ''') 5250 5251 pyparsing_common.hex_integer.runTests(''' 5252 # hex numbers 5253 100 5254 FF 5255 ''') 5256 5257 pyparsing_common.fraction.runTests(''' 5258 # fractions 5259 1/2 5260 -3/4 5261 ''') 5262 5263 pyparsing_common.mixed_integer.runTests(''' 5264 # mixed fractions 5265 1 5266 1/2 5267 -3/4 5268 1-3/4 5269 ''') 5270 5271 import uuid 5272 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5273 pyparsing_common.uuid.runTests(''' 5274 # uuid 5275 12345678-1234-5678-1234-567812345678 5276 ''') 5277 prints:: 5278 # any int or real number, returned as the appropriate type 5279 100 5280 [100] 5281 5282 -100 5283 [-100] 5284 5285 +100 5286 [100] 5287 5288 3.14159 5289 [3.14159] 5290 5291 6.02e23 5292 [6.02e+23] 5293 5294 1e-12 5295 [1e-12] 5296 5297 # any int or real number, returned as float 5298 100 5299 [100.0] 5300 5301 -100 5302 [-100.0] 5303 5304 +100 5305 [100.0] 5306 5307 3.14159 5308 [3.14159] 5309 5310 6.02e23 5311 [6.02e+23] 5312 5313 1e-12 5314 [1e-12] 5315 5316 # hex numbers 5317 100 5318 [256] 5319 5320 FF 5321 [255] 5322 5323 # fractions 5324 1/2 5325 [0.5] 5326 5327 -3/4 5328 [-0.75] 5329 5330 # mixed fractions 5331 1 5332 [1] 5333 5334 1/2 5335 [0.5] 5336 5337 -3/4 5338 [-0.75] 5339 5340 1-3/4 5341 [1.75] 5342 5343 # uuid 5344 12345678-1234-5678-1234-567812345678 5345 [UUID('12345678-1234-5678-1234-567812345678')] 5346 """ 5347 5348 convertToInteger = tokenMap(int) 5349 """ 5350 Parse action for converting parsed integers to Python int 5351 """ 5352 5353 convertToFloat = tokenMap(float) 5354 """ 5355 Parse action for converting parsed numbers to Python float 5356 """ 5357 5358 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 5359 """expression that parses an unsigned integer, returns an int""" 5360 5361 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16)) 5362 """expression that parses a hexadecimal integer, returns an int""" 5363 5364 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 5365 """expression that parses an integer with optional leading sign, returns an int""" 5366 5367 fraction = (signedInteger().setParseAction(convertToFloat) + '/' + signedInteger().setParseAction(convertToFloat)).setName("fraction") 5368 """fractional expression of an integer divided by an integer, returns a float""" 5369 fraction.addParseAction(lambda t: t[0]/t[-1]) 5370 5371 mixed_integer = (fraction | signedInteger + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 5372 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 5373 mixed_integer.addParseAction(sum) 5374 5375 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat) 5376 """expression that parses a floating point number and returns a float""" 5377 5378 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 5379 """expression that parses a floating point number with optional scientific notation and returns a float""" 5380 5381 # streamlining this expression makes the docs nicer-looking 5382 number = (sciReal | real | signedInteger).streamline() 5383 """any numeric expression, returns the corresponding Python type""" 5384 5385 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 5386 """any int or real number, returned as float""" 5387 5388 identifier = Word(alphas+'_', alphanums+'_').setName("identifier") 5389 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 5390 5391 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 5392 "IPv4 address (C{0.0.0.0 - 255.255.255.255})" 5393 5394 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 5395 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address") 5396 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address") 5397 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 5398 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 5399 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 5400 "IPv6 address (long, short, or mixed form)" 5401 5402 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 5403 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 5404 5405 @staticmethod
5406 - def convertToDate(fmt="%Y-%m-%d"):
5407 """ 5408 Helper to create a parse action for converting parsed date string to Python datetime.date 5409 5410 Params - 5411 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"}) 5412 5413 Example:: 5414 date_expr = pyparsing_common.iso8601_date.copy() 5415 date_expr.setParseAction(pyparsing_common.convertToDate()) 5416 print(date_expr.parseString("1999-12-31")) 5417 prints:: 5418 [datetime.date(1999, 12, 31)] 5419 """ 5420 return lambda s,l,t: datetime.strptime(t[0], fmt).date()
5421 5422 @staticmethod
5423 - def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"):
5424 """ 5425 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime 5426 5427 Params - 5428 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"}) 5429 5430 Example:: 5431 dt_expr = pyparsing_common.iso8601_datetime.copy() 5432 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 5433 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 5434 prints:: 5435 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 5436 """ 5437 return lambda s,l,t: datetime.strptime(t[0], fmt)
5438 5439 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 5440 "ISO8601 date (C{yyyy-mm-dd})" 5441 5442 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 5443 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}" 5444 5445 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 5446 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})" 5447 5448 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 5449 @staticmethod
5450 - def stripHTMLTags(s, l, tokens):
5451 """ 5452 Parse action to remove HTML tags from web page HTML source 5453 5454 Example:: 5455 # strip HTML links from normal text 5456 text = '<td>More info at the <a href="http://pyparsing.wikispaces.com">pyparsing</a> wiki page</td>' 5457 td,td_end = makeHTMLTags("TD") 5458 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 5459 5460 print(table_text.parseString(text).body) # -> 'More info at the pyparsing wiki page' 5461 """ 5462 return pyparsing_common._html_stripper.transformString(tokens[0])
5463 5464 if __name__ == "__main__": 5465 5466 selectToken = CaselessLiteral("select") 5467 fromToken = CaselessLiteral("from") 5468 5469 ident = Word(alphas, alphanums + "_$") 5470 5471 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5472 columnNameList = Group(delimitedList(columnName)).setName("columns") 5473 columnSpec = ('*' | columnNameList) 5474 5475 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 5476 tableNameList = Group(delimitedList(tableName)).setName("tables") 5477 5478 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 5479 5480 # demo runTests method, including embedded comments in test string 5481 simpleSQL.runTests(""" 5482 # '*' as column list and dotted table name 5483 select * from SYS.XYZZY 5484 5485 # caseless match on "SELECT", and casts back to "select" 5486 SELECT * from XYZZY, ABC 5487 5488 # list of column names, and mixed case SELECT keyword 5489 Select AA,BB,CC from Sys.dual 5490 5491 # multiple tables 5492 Select A, B, C from Sys.dual, Table2 5493 5494 # invalid SELECT keyword - should fail 5495 Xelect A, B, C from Sys.dual 5496 5497 # incomplete command - should fail 5498 Select 5499 5500 # invalid column name - should fail 5501 Select ^^^ frox Sys.dual 5502 5503 """) 5504 5505 pyparsing_common.number.runTests(""" 5506 100 5507 -100 5508 +100 5509 3.14159 5510 6.02e23 5511 1e-12 5512 """) 5513 5514 # any int or real number, returned as float 5515 pyparsing_common.fnumber.runTests(""" 5516 100 5517 -100 5518 +100 5519 3.14159 5520 6.02e23 5521 1e-12 5522 """) 5523 5524 pyparsing_common.hex_integer.runTests(""" 5525 100 5526 FF 5527 """) 5528 5529 import uuid 5530 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 5531 pyparsing_common.uuid.runTests(""" 5532 12345678-1234-5678-1234-567812345678 5533 """) 5534