Package spade :: Module pyparsing
[hide private]
[frames] | no frames]

Source Code for Module spade.pyparsing

   1  # module pyparsing.py
 
   2  #
 
   3  # Copyright (c) 2003,2004,2005  Paul T. McGuire
 
   4  #
 
   5  # Permission is hereby granted, free of charge, to any person obtaining
 
   6  # a copy of this software and associated documentation files (the
 
   7  # "Software"), to deal in the Software without restriction, including
 
   8  # without limitation the rights to use, copy, modify, merge, publish,
 
   9  # distribute, sublicense, and/or sell copies of the Software, and to
 
  10  # permit persons to whom the Software is furnished to do so, subject to
 
  11  # the following conditions:
 
  12  #
 
  13  # The above copyright notice and this permission notice shall be
 
  14  # included in all copies or substantial portions of the Software.
 
  15  #
 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
  23  #
 
  24  #  Todo:
 
  25  #  - add pprint() - pretty-print output of defined BNF
 
  26  #
 
  27  from __future__ import generators 
  28  
 
  29  __doc__ = \
 
  30  """
 
  31  pyparsing module - Classes and methods to define and execute parsing grammars
 
  32  
 
  33  The pyparsing module is an alternative approach to creating and executing simple grammars, 
 
  34  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you
 
  35  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
 
  36  provides a library of classes that you use to construct the grammar directly in Python.
 
  37  
 
  38  Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
 
  39  
 
  40      from pyparsing import Word, alphas
 
  41      
 
  42      # define grammar of a greeting
 
  43      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
 
  44      
 
  45      hello = "Hello, World!"
 
  46      print hello, "->", greet.parseString( hello )
 
  47  
 
  48  The program outputs the following::
 
  49  
 
  50      Hello, World! -> ['Hello', ',', 'World', '!']
 
  51  
 
  52  The Python representation of the grammar is quite readable, owing to the self-explanatory 
 
  53  class names, and the use of '+', '|' and '^' operators.
 
  54  
 
  55  The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
 
  56  object with named attributes.
 
  57  
 
  58  The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
 
  59   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.)
 
  60   - quoted strings
 
  61   - embedded comments
 
  62  """ 
  63  __version__ = "1.3.3" 
  64  __versionTime__ = "12 September 2005 22:50" 
  65  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  66  
 
  67  import string 
  68  import copy,sys 
  69  import warnings 
  70  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
 
  71  
 
72 -def _ustr(obj):
73 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 74 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 75 then < returns the unicode object | encodes it with the default encoding | ... >. 76 """ 77 try: 78 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 79 # it won't break any existing code. 80 return str(obj) 81 82 except UnicodeEncodeError, e: 83 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 84 # state that "The return value must be a string object". However, does a 85 # unicode object (being a subclass of basestring) count as a "string 86 # object"? 87 # If so, then return a unicode object: 88 return unicode(obj)
89 # Else encode it... but how? There are many choices... :) 90 # Replace unprintables with escape codes? 91 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 92 # Replace unprintables with question marks? 93 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 94 # ... 95
96 -def _str2dict(strg):
97 return dict( [(c,0) for c in strg] )
98 99 alphas = string.lowercase + string.uppercase 100 nums = string.digits 101 hexnums = nums + "ABCDEFabcdef" 102 alphanums = alphas + nums 103
104 -class ParseBaseException(Exception):
105 """base exception class for all parsing runtime exceptions""" 106 __slots__ = ( "loc","msg","pstr","parserElement" ) 107 # Performance tuning: we construct a *lot* of these, so keep this 108 # constructor as small and fast as possible
109 - def __init__( self, pstr, loc, msg, elem=None ):
110 self.loc = loc 111 self.msg = msg 112 self.pstr = pstr 113 self.parserElement = elem
114
115 - def __getattr__( self, aname ):
116 """supported attributes by name are: 117 - lineno - returns the line number of the exception text 118 - col - returns the column number of the exception text 119 - line - returns the line containing the exception text 120 """ 121 if( aname == "lineno" ): 122 return lineno( self.loc, self.pstr ) 123 elif( aname in ("col", "column") ): 124 return col( self.loc, self.pstr ) 125 elif( aname == "line" ): 126 return line( self.loc, self.pstr ) 127 else: 128 raise AttributeError, aname
129
130 - def __str__( self ):
131 return "%s (at char %d), (line:%d, col:%d)" % ( self.msg, self.loc, self.lineno, self.column )
132 - def __repr__( self ):
133 return _ustr(self)
134 - def markInputline( self, markerString = ">!<" ):
135 """Extracts the exception line from the input string, and marks 136 the location of the exception with a special symbol. 137 """ 138 line_str = self.line 139 line_column = self.column - 1 140 if markerString: 141 line_str = "".join( [line_str[:line_column], markerString, line_str[line_column:]]) 142 return line_str.strip()
143
144 -class ParseException(ParseBaseException):
145 """exception thrown when parse expressions don't match class""" 146 """supported attributes by name are: 147 - lineno - returns the line number of the exception text 148 - col - returns the column number of the exception text 149 - line - returns the line containing the exception text 150 """ 151 pass
152
153 -class ParseFatalException(ParseBaseException):
154 """user-throwable exception thrown when inconsistent parse content 155 is found; stops all parsing immediately""" 156 pass
157
158 -class RecursiveGrammarException(Exception):
159 """exception thrown by validate() if the grammar could be improperly recursive"""
160 - def __init__( self, parseElementList ):
161 self.parseElementTrace = parseElementList
162
163 - def __str__( self ):
164 return "RecursiveGrammarException: %s" % self.parseElementTrace
165
166 -class ParseResults(object):
167 """Structured parse results, to provide multiple means of access to the parsed data: 168 - as a list (len(results)) 169 - by list index (results[0], results[1], etc.) 170 - by attribute (results.<resultsName>) 171 """ 172 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__modal" )
173 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
174 if isinstance(toklist, cls): 175 return toklist 176 retobj = object.__new__(cls) 177 retobj.__doinit = True 178 return retobj
179 180 # Performance tuning: we construct a *lot* of these, so keep this 181 # constructor as small and fast as possible
182 - def __init__( self, toklist, name=None, asList=True, modal=True ):
183 if self.__doinit: 184 self.__doinit = False 185 self.__name = None 186 self.__parent = None 187 self.__modal = modal 188 if isinstance(toklist, list): 189 self.__toklist = toklist[:] 190 else: 191 self.__toklist = [toklist] 192 self.__tokdict = dict() 193 194 if name: 195 if not self.__name: 196 self.__modal = self.__modal and modal 197 if isinstance(name,int): 198 name = _ustr(name) # will always return a str, but use _ustr for consistency 199 self.__name = name 200 if toklist: 201 if isinstance(toklist,basestring): 202 toklist = [ toklist ] 203 if asList: 204 if isinstance(toklist,ParseResults): 205 self[name] = (toklist.copy(),-1) 206 else: 207 self[name] = (ParseResults(toklist[0]),-1) 208 self[name].__name = name 209 else: 210 try: 211 self[name] = toklist[0] 212 except TypeError: 213 self[name] = toklist
214
215 - def __getitem__( self, i ):
216 if isinstance( i, (int,slice) ): 217 return self.__toklist[i] 218 else: 219 if self.__modal: 220 return self.__tokdict[i][-1][0] 221 else: 222 return ParseResults([ v[0] for v in self.__tokdict[i] ])
223
224 - def __setitem__( self, k, v ):
225 if isinstance(v,tuple): 226 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 227 sub = v[0] 228 else: 229 self.__tokdict[k] = self.__tokdict.get(k,list()) + [(v,0)] 230 sub = v 231 if isinstance(sub,ParseResults): 232 sub.__parent = self
233
234 - def __delitem__( self, i ):
235 del self.__toklist[i]
236
237 - def __contains__( self, k ):
238 return self.__tokdict.has_key(k)
239
240 - def __len__( self ): return len( self.__toklist )
241 - def __iter__( self ): return iter( self.__toklist )
242 - def keys( self ):
243 """Returns all named result keys.""" 244 return self.__tokdict.keys()
245
246 - def items( self ):
247 """Returns all named result keys and values as a list of tuples.""" 248 return [(k,v[-1][0]) for k,v in self.__tokdict.items()] 249
250 - def values( self ):
251 """Returns all named result values.""" 252 return [ v[-1][0] for v in self.__tokdict.values() ] 253
254 - def __getattr__( self, name ):
255 if name not in self.__slots__: 256 if self.__tokdict.has_key( name ): 257 if self.__modal: 258 return self.__tokdict[name][-1][0] 259 else: 260 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 261 else: 262 return "" 263 return None
264
265 - def __iadd__( self, other ):
266 if other.__tokdict: 267 offset = len(self.__toklist) 268 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 269 otherdictitems = [(k,(v[0],addoffset(v[1])) ) for (k,vlist) in other.__tokdict.items() for v in vlist] 270 for k,v in otherdictitems: 271 self[k] = v 272 if isinstance(v[0],ParseResults): 273 v[0].__parent = self 274 self.__toklist += other.__toklist 275 del other 276 return self
277
278 - def __repr__( self ):
279 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
280
281 - def __str__( self ):
282 out = "[" 283 sep = "" 284 for i in self.__toklist: 285 if isinstance(i, ParseResults): 286 out += sep + _ustr(i) 287 else: 288 out += sep + repr(i) 289 sep = ", " 290 out += "]" 291 return out
292
293 - def _asStringList( self, sep='' ):
294 out = [] 295 for item in self.__toklist: 296 if out and sep: 297 out.append(sep) 298 if isinstance( item, ParseResults ): 299 out += item._asStringList() 300 else: 301 out.append( _ustr(item) ) 302 return out
303
304 - def asList( self ):
305 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 306 out = [] 307 for res in self.__toklist: 308 if isinstance(res,ParseResults): 309 out.append( res.asList() ) 310 else: 311 out.append( res ) 312 return out
313
314 - def asDict( self ):
315 """Returns the named parse results as dictionary.""" 316 return dict( self.items() )
317
318 - def copy( self ):
319 """Returns a new copy of a ParseResults object.""" 320 ret = ParseResults( self.__toklist ) 321 ret.__tokdict = self.__tokdict.copy() 322 ret.__parent = self.__parent 323 ret.__modal = self.__modal 324 ret.__name = self.__name 325 return ret
326
327 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
328 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 329 nl = "\n" 330 out = [] 331 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() for v in vlist ] ) 332 nextLevelIndent = indent + " " 333 334 # collapse out indents if formatting is not desired 335 if not formatted: 336 indent = "" 337 nextLevelIndent = "" 338 nl = "" 339 340 selfTag = None 341 if doctag is not None: 342 selfTag = doctag 343 else: 344 if self.__name: 345 selfTag = self.__name 346 347 if not selfTag: 348 if namedItemsOnly: 349 return "" 350 else: 351 selfTag = "ITEM" 352 353 out += [ nl, indent, "<", selfTag, ">" ] 354 355 worklist = self.__toklist 356 for i,res in enumerate(worklist): 357 if isinstance(res,ParseResults): 358 if i in namedItems: 359 out += [ res.asXML(namedItems[i], namedItemsOnly and doctag is None, nextLevelIndent,formatted)] 360 else: 361 out += [ res.asXML(None, namedItemsOnly and doctag is None, nextLevelIndent,formatted)] 362 else: 363 # individual token, see if there is a name for it 364 resTag = None 365 if i in namedItems: 366 resTag = namedItems[i] 367 if not resTag: 368 if namedItemsOnly: 369 continue 370 else: 371 resTag = "ITEM" 372 out += [ nl, nextLevelIndent, "<", resTag, ">", _ustr(res), "</", resTag, ">" ] 373 374 out += [ nl, indent, "</", selfTag, ">" ] 375 return "".join(out)
376 377
378 - def __lookup(self,sub):
379 for k,vlist in self.__tokdict.items(): 380 for v,loc in vlist: 381 if sub is v: 382 return k 383 return None
384
385 - def getName(self):
386 """Returns the results name for this token expression.""" 387 if self.__name: 388 return self.__name 389 elif self.__parent: 390 par = self.__parent 391 if par: 392 return par.__lookup(self) 393 else: 394 return None 395 elif (len(self) == 1 and 396 len(self.__tokdict) == 1 and 397 self.__tokdict.values()[0][0][1] in (0,-1)): 398 return self.__tokdict.keys()[0] 399 else: 400 return None
401
402 -def col (loc,strg):
403 """Returns current column within a string, counting newlines as line separators 404 The first column is number 1. 405 """ 406 return loc - strg.rfind("\n", 0, loc)
407
408 -def lineno(loc,strg):
409 """Returns current line number within a string, counting newlines as line separators 410 The first line is number 1. 411 """ 412 return strg.count("\n",0,loc) + 1
413
414 -def line( loc, strg ):
415 """Returns the line of text containing loc within a string, counting newlines as line separators 416 The first line is number 1. 417 """ 418 lastCR = strg.rfind("\n", 0, loc) 419 nextCR = strg.find("\n", loc) 420 if nextCR > 0: 421 return strg[lastCR+1:nextCR] 422 else: 423 return strg[lastCR+1:]
424
425 -def _defaultStartDebugAction( instring, loc, expr ):
426 print "Match",expr,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )
427
428 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
429 print "Matched",expr,"->",toks.asList()
430
431 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
432 print "Exception raised:", exc
433
434 -def nullDebugAction(*args):
435 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 436 pass
437
438 -class ParserElement(object):
439 """Abstract base level parser element class.""" 440 DEFAULT_WHITE_CHARS = " \n\t\r" 441
442 - def setDefaultWhitespaceChars( chars ):
443 """Overrides the default whitespace chars 444 """ 445 ParserElement.DEFAULT_WHITE_CHARS = chars
446 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 447
448 - def __init__( self, savelist=False ):
449 self.parseAction = None 450 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 451 self.strRepr = None 452 self.resultsName = None 453 self.saveAsList = savelist 454 self.skipWhitespace = True 455 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 456 self.mayReturnEmpty = False 457 self.keepTabs = False 458 self.ignoreExprs = [] 459 self.debug = False 460 self.streamlined = False 461 self.mayIndexError = True 462 self.errmsg = "" 463 self.modalResults = True 464 self.debugActions = ( None, None, None )
465
466 - def copy( self ):
467 """Make a copy of this ParseElement. Useful for defining different parse actions 468 for the same parsing pattern, using copies of the original parse element.""" 469 cpy = copy.copy( self ) 470 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 471 return cpy
472
473 - def setName( self, name ):
474 """Define name for this expression, for use in debugging.""" 475 self.name = name 476 self.errmsg = "Expected " + self.name 477 return self
478
479 - def setResultsName( self, name, listAllMatches=False ):
480 """Define name for referencing matching tokens as a nested attribute 481 of the returned parse results. 482 NOTE: this returns a *copy* of the original ParseElement object; 483 this is so that the client can define a basic element, such as an 484 integer, and reference it in multiple places with different names. 485 """ 486 newself = self.copy() 487 newself.resultsName = name 488 newself.modalResults = not listAllMatches 489 return newself
490
491 - def setParseAction( self, fn ):
492 """Define action to perform when successfully matching parse element definition. 493 Parse action fn is a callable method with the arguments (s, loc, toks) where: 494 - s = the original string being parsed 495 - loc = the location of the matching substring 496 - toks = a list of the matched tokens, packaged as a ParseResults object 497 If the function fn modifies the tokens, it can return them as the return 498 value from fn, and the modified list of tokens will replace the original. 499 Otherwise, fn does not need to return any value. 500 """ 501 self.parseAction = fn 502 return self
503
504 - def skipIgnorables( self, instring, loc ):
505 exprsFound = True 506 while exprsFound: 507 exprsFound = False 508 for e in self.ignoreExprs: 509 try: 510 while 1: 511 loc,dummy = e.parse( instring, loc ) 512 exprsFound = True 513 except ParseException: 514 pass 515 return loc
516
517 - def preParse( self, instring, loc ):
518 if self.ignoreExprs: 519 loc = self.skipIgnorables( instring, loc ) 520 521 if self.skipWhitespace: 522 wt = self.whiteChars 523 instrlen = len(instring) 524 while loc < instrlen and instring[loc] in wt: 525 loc += 1 526 527 return loc
528
529 - def parseImpl( self, instring, loc, doActions=True ):
530 return loc, []
531
532 - def postParse( self, instring, loc, tokenlist ):
533 return tokenlist
534 535 #~ @profile
536 - def parse( self, instring, loc, doActions=True, callPreParse=True ):
537 debugging = ( self.debug ) #and doActions ) 538 539 if debugging: 540 #~ print "Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ) 541 if (self.debugActions[0] ): 542 self.debugActions[0]( instring, loc, self ) 543 if callPreParse: 544 loc = self.preParse( instring, loc ) 545 tokensStart = loc 546 try: 547 try: 548 loc,tokens = self.parseImpl( instring, loc, doActions ) 549 except IndexError: 550 raise ParseException, ( instring, len(instring), self.errmsg, self ) 551 except ParseException, err: 552 #~ print "Exception raised:", err 553 if (self.debugActions[2] ): 554 self.debugActions[2]( instring, tokensStart, self, err ) 555 raise 556 else: 557 if callPreParse: 558 loc = self.preParse( instring, loc ) 559 tokensStart = loc 560 if self.mayIndexError or loc >= len(instring): 561 try: 562 loc,tokens = self.parseImpl( instring, loc, doActions ) 563 except IndexError: 564 raise ParseException, ( instring, len(instring), self.errmsg, self ) 565 else: 566 loc,tokens = self.parseImpl( instring, loc, doActions ) 567 568 tokens = self.postParse( instring, loc, tokens ) 569 570 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 571 if self.parseAction and doActions: 572 if debugging: 573 try: 574 tokens = self.parseAction( instring, tokensStart, retTokens ) 575 if tokens is not None: 576 if isinstance(tokens,tuple): 577 tokens = tokens[1] 578 retTokens = ParseResults( tokens, 579 self.resultsName, 580 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 581 modal=self.modalResults ) 582 except ParseException, err: 583 #~ print "Exception raised in user parse action:", err 584 if (self.debugActions[2] ): 585 self.debugActions[2]( instring, tokensStart, self, err ) 586 raise 587 else: 588 tokens = self.parseAction( instring, tokensStart, retTokens ) 589 if tokens is not None: 590 if isinstance(tokens,tuple): 591 tokens = tokens[1] 592 retTokens = ParseResults( tokens, 593 self.resultsName, 594 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 595 modal=self.modalResults ) 596 597 if debugging: 598 #~ print "Matched",self,"->",retTokens.asList() 599 if (self.debugActions[1] ): 600 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 601 602 return loc, retTokens
603
604 - def tryParse( self, instring, loc ):
605 return self.parse( instring, loc, doActions=False )[0]
606
607 - def parseString( self, instring ):
608 """Execute the parse expression with the given string. 609 This is the main interface to the client code, once the complete 610 expression has been built. 611 """ 612 if not self.streamlined: 613 self.streamline() 614 self.saveAsList = True 615 for e in self.ignoreExprs: 616 e.streamline() 617 if self.keepTabs: 618 loc, tokens = self.parse( instring, 0 ) 619 else: 620 loc, tokens = self.parse( instring.expandtabs(), 0 ) 621 return tokens
622
623 - def scanString( self, instring ):
624 """Scan the input string for expression matches. Each match will return the matching tokens, start location, and end location.""" 625 if not self.streamlined: 626 self.streamline() 627 for e in self.ignoreExprs: 628 e.streamline() 629 630 if not self.keepTabs: 631 instring = instring.expandtabs() 632 instrlen = len(instring) 633 loc = 0 634 preparseFn = self.preParse 635 parseFn = self.parse 636 while loc < instrlen: 637 try: 638 loc = preparseFn( instring, loc ) 639 nextLoc,tokens = parseFn( instring, loc, callPreParse=False ) 640 except ParseException: 641 loc += 1 642 else: 643 yield tokens, loc, nextLoc 644 loc = nextLoc
645
646 - def transformString( self, instring ):
647 """Extension to scanString, to modify matching text with modified tokens that may 648 be returned from a parse action. To use transformString, define a grammar and 649 attach a parse action to it that modifies the returned token list. 650 Invoking transformString() on a target string will then scan for matches, 651 and replace the matched text patterns according to the logic in the parse 652 action. transformString() returns the resulting transformed string.""" 653 out = [] 654 lastE = 0 655 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 656 # keep string locs straight between transformString and scanString 657 self.keepTabs = True 658 for t,s,e in self.scanString( instring ): 659 out.append( instring[lastE:s] ) 660 if t: 661 if isinstance(t,ParseResults): 662 out += t.asList() 663 elif isinstance(t,list): 664 out += t 665 else: 666 out.append(t) 667 lastE = e 668 out.append(instring[lastE:]) 669 return "".join(out)
670
671 - def __add__(self, other ):
672 """Implementation of + operator - returns And""" 673 if isinstance( other, basestring ): 674 other = Literal( other ) 675 return And( [ self, other ] )
676
677 - def __radd__(self, other ):
678 """Implementation of += operator""" 679 if isinstance( other, basestring ): 680 other = Literal( other ) 681 return other + self
682
683 - def __or__(self, other ):
684 """Implementation of | operator - returns MatchFirst""" 685 if isinstance( other, basestring ): 686 other = Literal( other ) 687 return MatchFirst( [ self, other ] )
688
689 - def __ror__(self, other ):
690 """Implementation of |= operator""" 691 if isinstance( other, basestring ): 692 other = Literal( other ) 693 return other | self
694
695 - def __xor__(self, other ):
696 """Implementation of ^ operator - returns Or""" 697 if isinstance( other, basestring ): 698 other = Literal( other ) 699 return Or( [ self, other ] )
700
701 - def __rxor__(self, other ):
702 """Implementation of ^= operator""" 703 if isinstance( other, basestring ): 704 other = Literal( other ) 705 return other ^ self
706
707 - def __and__(self, other ):
708 """Implementation of & operator - returns Each""" 709 if isinstance( other, basestring ): 710 other = Literal( other ) 711 return Each( [ self, other ] )
712
713 - def __rand__(self, other ):
714 """Implementation of right-& operator""" 715 if isinstance( other, basestring ): 716 other = Literal( other ) 717 return other & self
718
719 - def __invert__( self ):
720 """Implementation of ~ operator - returns NotAny""" 721 return NotAny( self )
722
723 - def suppress( self ):
724 """Suppresses the output of this ParseElement; useful to keep punctuation from 725 cluttering up returned output. 726 """ 727 return Suppress( self )
728
729 - def leaveWhitespace( self ):
730 """Disables the skipping of whitespace before matching the characters in the 731 ParserElement's defined pattern. This is normally only used internally by 732 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 733 """ 734 self.skipWhitespace = False 735 return self
736
737 - def setWhitespaceChars( self, chars ):
738 """Overrides the default whitespace chars 739 """ 740 self.skipWhitespace = True 741 self.whiteChars = chars
742
743 - def parseWithTabs( self ):
744 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 745 Must be called before parseString when the input grammar contains elements that 746 match <TAB> characters.""" 747 self.keepTabs = True 748 return self
749
750 - def ignore( self, other ):
751 """Define expression to be ignored (e.g., comments) while doing pattern 752 matching; may be called repeatedly, to define multiple comment or other 753 ignorable patterns. 754 """ 755 if isinstance( other, Suppress ): 756 if other not in self.ignoreExprs: 757 self.ignoreExprs.append( other ) 758 else: 759 self.ignoreExprs.append( Suppress( other ) ) 760 return self
761
762 - def setDebugActions( self, startAction, successAction, exceptionAction ):
763 """Enable display of debugging messages while doing pattern matching.""" 764 self.debugActions = (startAction or _defaultStartDebugAction, 765 successAction or _defaultSuccessDebugAction, 766 exceptionAction or _defaultExceptionDebugAction) 767 self.debug = True 768 return self
769
770 - def setDebug( self, flag=True ):
771 """Enable display of debugging messages while doing pattern matching.""" 772 if flag: 773 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 774 else: 775 self.debug = False 776 return self
777
778 - def __str__( self ):
779 return self.name
780
781 - def __repr__( self ):
782 return _ustr(self)
783
784 - def streamline( self ):
785 self.streamlined = True 786 self.strRepr = None 787 return self
788
789 - def checkRecursion( self, parseElementList ):
790 pass
791
792 - def validate( self, validateTrace=[] ):
793 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 794 self.checkRecursion( [] )
795
796 - def parseFile( self, file_or_filename ):
797 """Execute the parse expression on the given file or filename. 798 If a filename is specified (instead of a file object), 799 the entire file is opened, read, and closed before parsing. 800 """ 801 try: 802 file_contents = file_or_filename.read() 803 except AttributeError: 804 f = open(file_or_filename, "rb") 805 file_contents = f.read() 806 f.close() 807 return self.parseString(file_contents)
808 809
810 -class Token(ParserElement):
811 """Abstract ParserElement subclass, for defining atomic matching patterns."""
812 - def __init__( self ):
813 super(Token,self).__init__( savelist=False ) 814 self.myException = ParseException("",0,"",self)
815
816 - def setName(self, name):
817 s = super(Token,self).setName(name) 818 self.errmsg = "Expected " + self.name 819 s.myException.msg = self.errmsg 820 return s
821 822
823 -class Empty(Token):
824 """An empty token, will always match."""
825 - def __init__( self ):
826 super(Empty,self).__init__() 827 self.name = "Empty" 828 self.mayReturnEmpty = True 829 self.mayIndexError = False
830 831
832 -class NoMatch(Token):
833 """A token that will never match."""
834 - def __init__( self ):
835 super(NoMatch,self).__init__() 836 self.name = "NoMatch" 837 self.mayReturnEmpty = True 838 self.mayIndexError = False 839 self.errmsg = "Unmatchable token" 840 s.myException.msg = self.errmsg
841
842 - def parseImpl( self, instring, loc, doActions=True ):
843 exc = self.myException 844 exc.loc = loc 845 exc.pstr = instring 846 raise exc
847 848
849 -class Literal(Token):
850 """Token to exactly match a specified string."""
851 - def __init__( self, matchString ):
852 super(Literal,self).__init__() 853 self.match = matchString 854 self.matchLen = len(matchString) 855 try: 856 self.firstMatchChar = matchString[0] 857 except IndexError: 858 warnings.warn("null string passed to Literal; use Empty() instead", 859 SyntaxWarning, stacklevel=2) 860 self.name = '"%s"' % self.match 861 self.errmsg = "Expected " + self.name 862 self.mayReturnEmpty = False 863 self.myException.msg = self.errmsg 864 self.mayIndexError = False
865 866 # Performance tuning: this routine gets called a *lot* 867 # if this is a single character match string and the first character matches, 868 # short-circuit as quickly as possible, and avoid calling startswith 869 #~ @profile
870 - def parseImpl( self, instring, loc, doActions=True ):
871 if (instring[loc] == self.firstMatchChar and 872 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 873 return loc+self.matchLen, self.match 874 #~ raise ParseException, ( instring, loc, self.errmsg ) 875 exc = self.myException 876 exc.loc = loc 877 exc.pstr = instring 878 raise exc
879
880 -class Keyword(Token):
881 """Token to exactly match a specified string as a keyword, that is, it must be 882 immediately followed by a non-keyword character. Compare with Literal:: 883 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 884 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 885 Accepts two optional constructor arguments in addition to the keyword string: 886 identChars is a string of characters that would be valid identifier characters, 887 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 888 matching, default is False. 889 """ 890 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 891
892 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
893 super(Keyword,self).__init__() 894 self.match = matchString 895 self.matchLen = len(matchString) 896 try: 897 self.firstMatchChar = matchString[0] 898 except IndexError: 899 warnings.warn("null string passed to Keyword; use Empty() instead", 900 SyntaxWarning, stacklevel=2) 901 self.name = '"%s"' % self.match 902 self.errmsg = "Expected " + self.name 903 self.mayReturnEmpty = False 904 self.myException.msg = self.errmsg 905 self.mayIndexError = False 906 self.caseless = caseless 907 if caseless: 908 self.caselessmatch = matchString.upper() 909 identChars = identChars.upper() 910 self.identChars = _str2dict(identChars)
911
912 - def parseImpl( self, instring, loc, doActions=True ):
913 if self.caseless: 914 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 915 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 916 return loc+self.matchLen, self.match 917 else: 918 if (instring[loc] == self.firstMatchChar and 919 (self.matchLen==1 or instring.startswith(self.match,loc)) and 920 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) ): 921 return loc+self.matchLen, self.match 922 #~ raise ParseException, ( instring, loc, self.errmsg ) 923 exc = self.myException 924 exc.loc = loc 925 exc.pstr = instring 926 raise exc
927
928 - def copy(self):
929 c = super(Keyword,self).copy() 930 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 931 return c
932
933 - def setDefaultKeywordChars( chars ):
934 """Overrides the default Keyword chars 935 """ 936 Keyword.DEFAULT_KEYWORD_CHARS = chars
937 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
938 939
940 -class CaselessLiteral(Literal):
941 """Token to match a specified string, ignoring case of letters. 942 Note: the matched results will always be in the case of the given 943 match string, NOT the case of the input text. 944 """
945 - def __init__( self, matchString ):
946 super(CaselessLiteral,self).__init__( matchString.upper() ) 947 # Preserve the defining literal. 948 self.returnString = matchString 949 self.name = "'%s'" % self.returnString 950 self.errmsg = "Expected " + self.name 951 self.myException.msg = self.errmsg
952
953 - def parseImpl( self, instring, loc, doActions=True ):
954 if instring[ loc:loc+self.matchLen ].upper() == self.match: 955 return loc+self.matchLen, self.returnString 956 #~ raise ParseException, ( instring, loc, self.errmsg ) 957 exc = self.myException 958 exc.loc = loc 959 exc.pstr = instring 960 raise exc
961 962
963 -class Word(Token):
964 """Token for matching words composed of allowed character sets. 965 Defined with string containing all allowed initial characters, 966 an optional string containing allowed body characters (if omitted, 967 defaults to the initial character set), and an optional minimum, 968 maximum, and/or exact length. 969 """
970 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0 ):
971 super(Word,self).__init__() 972 self.initCharsOrig = initChars 973 self.initChars = _str2dict(initChars) 974 if bodyChars : 975 self.bodyCharsOrig = bodyChars 976 self.bodyChars = _str2dict(bodyChars) 977 else: 978 self.bodyCharsOrig = initChars 979 self.bodyChars = _str2dict(initChars) 980 981 self.maxSpecified = max > 0 982 983 self.minLen = min 984 985 if max > 0: 986 self.maxLen = max 987 else: 988 self.maxLen = sys.maxint 989 990 if exact > 0: 991 self.maxLen = exact 992 self.minLen = exact 993 994 self.name = _ustr(self) 995 self.errmsg = "Expected " + self.name 996 self.myException.msg = self.errmsg 997 self.mayIndexError = False
998
999 - def parseImpl( self, instring, loc, doActions=True ):
1000 if not(instring[ loc ] in self.initChars): 1001 #~ raise ParseException, ( instring, loc, self.errmsg ) 1002 exc = self.myException 1003 exc.loc = loc 1004 exc.pstr = instring 1005 raise exc 1006 start = loc 1007 loc += 1 1008 bodychars = self.bodyChars 1009 maxloc = start + self.maxLen 1010 maxloc = min( maxloc, len(instring) ) 1011 while loc < maxloc and instring[loc] in bodychars: 1012 loc += 1 1013 1014 throwException = False 1015 if loc - start < self.minLen: 1016 throwException = True 1017 if self.maxSpecified and loc < len(instring) and instring[loc] in bodychars: 1018 throwException = True 1019 1020 if throwException: 1021 #~ raise ParseException, ( instring, loc, self.errmsg ) 1022 exc = self.myException 1023 exc.loc = loc 1024 exc.pstr = instring 1025 raise exc 1026 1027 return loc, instring[start:loc]
1028
1029 - def __str__( self ):
1030 try: 1031 return super(Word,self).__str__() 1032 except: 1033 pass 1034 1035 1036 if self.strRepr is None: 1037 1038 def charsAsStr(s): 1039 if len(s)>4: 1040 return s[:4]+"..." 1041 else: 1042 return s
1043 1044 if ( self.initCharsOrig != self.bodyCharsOrig ): 1045 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1046 else: 1047 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1048 1049 return self.strRepr
1050 1051
1052 -class CharsNotIn(Token):
1053 """Token for matching words composed of characters *not* in a given set. 1054 Defined with string containing all disallowed characters, and an optional 1055 minimum, maximum, and/or exact length. 1056 """
1057 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1058 super(CharsNotIn,self).__init__() 1059 self.skipWhitespace = False 1060 self.notChars = notChars 1061 1062 self.minLen = min 1063 1064 if max > 0: 1065 self.maxLen = max 1066 else: 1067 self.maxLen = sys.maxint 1068 1069 if exact > 0: 1070 self.maxLen = exact 1071 self.minLen = exact 1072 1073 self.name = _ustr(self) 1074 self.errmsg = "Expected " + self.name 1075 self.mayReturnEmpty = ( self.minLen == 0 ) 1076 self.myException.msg = self.errmsg 1077 self.mayIndexError = False
1078
1079 - def parseImpl( self, instring, loc, doActions=True ):
1080 if instring[loc] in self.notChars: 1081 #~ raise ParseException, ( instring, loc, self.errmsg ) 1082 exc = self.myException 1083 exc.loc = loc 1084 exc.pstr = instring 1085 raise exc 1086 1087 start = loc 1088 loc += 1 1089 notchars = self.notChars 1090 maxlen = min( start+self.maxLen, len(instring) ) 1091 while loc < maxlen and \ 1092 (instring[loc] not in notchars): 1093 loc += 1 1094 1095 if loc - start < self.minLen: 1096 #~ raise ParseException, ( instring, loc, self.errmsg ) 1097 exc = self.myException 1098 exc.loc = loc 1099 exc.pstr = instring 1100 raise exc 1101 1102 return loc, instring[start:loc]
1103
1104 - def __str__( self ):
1105 try: 1106 return super(CharsNotIn, self).__str__() 1107 except: 1108 pass 1109 1110 if self.strRepr is None: 1111 if len(self.notChars) > 4: 1112 self.strRepr = "!W:(%s...)" % self.notChars[:4] 1113 else: 1114 self.strRepr = "!W:(%s)" % self.notChars 1115 1116 return self.strRepr
1117
1118 -class White(Token):
1119 """Special matching class for matching whitespace. Normally, whitespace is ignored 1120 by pyparsing grammars. This class is included when some whitespace structures 1121 are significant. Define with a string containing the whitespace characters to be 1122 matched; default is " \\t\\n". Also takes optional min, max, and exact arguments, 1123 as defined for the Word class.""" 1124 whiteStrs = { 1125 " " : "<SPC>", 1126 "\t": "<TAB>", 1127 "\n": "<LF>", 1128 "\r": "<CR>", 1129 "\f": "<FF>", 1130 }
1131 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
1132 super(White,self).__init__() 1133 self.matchWhite = ws 1134 self.whiteChars = "".join([c for c in self.whiteChars if c not in self.matchWhite]) 1135 #~ self.leaveWhitespace() 1136 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 1137 self.mayReturnEmpty = True 1138 self.errmsg = "Expected " + self.name 1139 self.myException.msg = self.errmsg 1140 1141 self.minLen = min 1142 1143 if max > 0: 1144 self.maxLen = max 1145 else: 1146 self.maxLen = sys.maxint 1147 1148 if exact > 0: 1149 self.maxLen = exact 1150 self.minLen = exact
1151
1152 - def parseImpl( self, instring, loc, doActions=True ):
1153 if not(instring[ loc ] in self.matchWhite): 1154 #~ raise ParseException, ( instring, loc, self.errmsg ) 1155 exc = self.myException 1156 exc.loc = loc 1157 exc.pstr = instring 1158 raise exc 1159 start = loc 1160 loc += 1 1161 maxloc = start + self.maxLen 1162 maxloc = min( maxloc, len(instring) ) 1163 while loc < maxloc and instring[loc] in self.matchWhite: 1164 loc += 1 1165 1166 if loc - start < self.minLen: 1167 #~ raise ParseException, ( instring, loc, self.errmsg ) 1168 exc = self.myException 1169 exc.loc = loc 1170 exc.pstr = instring 1171 raise exc 1172 1173 return loc, instring[start:loc]
1174 1175
1176 -class PositionToken(Token):
1177 - def __init__( self ):
1178 super(PositionToken,self).__init__() 1179 self.name=self.__class__.__name__ 1180 self.mayReturnEmpty = True
1181
1182 -class GoToColumn(PositionToken):
1183 """Token to advance to a specific column of input text; useful for tabular report scraping."""
1184 - def __init__( self, colno ):
1185 super(GoToColumn,self).__init__() 1186 self.col = colno
1187
1188 - def preParse( self, instring, loc ):
1189 if col(loc,instring) != self.col: 1190 instrlen = len(instring) 1191 if self.ignoreExprs: 1192 loc = self.skipIgnorables( instring, loc ) 1193 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 1194 loc += 1 1195 return loc
1196
1197 - def parseImpl( self, instring, loc, doActions=True ):
1198 thiscol = col( loc, instring ) 1199 if thiscol > self.col: 1200 raise ParseException, ( instring, loc, "Text not in expected column", self ) 1201 newloc = loc + self.col - thiscol 1202 ret = instring[ loc: newloc ] 1203 return newloc, ret
1204
1205 -class LineStart(PositionToken):
1206 """Matches if current position is at the beginning of a line within the parse string"""
1207 - def __init__( self ):
1208 super(LineStart,self).__init__() 1209 self.whiteChars = " \t" 1210 self.errmsg = "Expected start of line" 1211 self.myException.msg = self.errmsg
1212
1213 - def preParse( self, instring, loc ):
1214 loc = super(LineStart,self).preParse(instring,loc) 1215 if instring[loc] == "\n": 1216 loc += 1 1217 return loc
1218
1219 - def parseImpl( self, instring, loc, doActions=True ):
1220 if not( loc==0 or ( loc<len(instring) and instring[loc-1] == "\n" ) ): #col(loc, instring) != 1: 1221 #~ raise ParseException, ( instring, loc, "Expected start of line" ) 1222 exc = self.myException 1223 exc.loc = loc 1224 exc.pstr = instring 1225 raise exc 1226 return loc, []
1227
1228 -class LineEnd(PositionToken):
1229 """Matches if current position is at the end of a line within the parse string"""
1230 - def __init__( self ):
1231 super(LineEnd,self).__init__() 1232 self.whiteChars = " \t" 1233 self.errmsg = "Expected end of line" 1234 self.myException.msg = self.errmsg
1235
1236 - def parseImpl( self, instring, loc, doActions=True ):
1237 if loc<len(instring): 1238 if instring[loc] == "\n": 1239 return loc+1, "\n" 1240 else: 1241 #~ raise ParseException, ( instring, loc, "Expected end of line" ) 1242 exc = self.myException 1243 exc.loc = loc 1244 exc.pstr = instring 1245 raise exc 1246 else: 1247 return loc, []
1248
1249 -class StringStart(PositionToken):
1250 """Matches if current position is at the beginning of the parse string"""
1251 - def __init__( self ):
1252 super(StringStart,self).__init__() 1253 self.errmsg = "Expected start of text" 1254 self.myException.msg = self.errmsg
1255
1256 - def parseImpl( self, instring, loc, doActions=True ):
1257 if loc != 0: 1258 # see if entire string up to here is just whitespace and ignoreables 1259 if loc != self.preParse( instring, 0 ): 1260 #~ raise ParseException, ( instring, loc, "Expected start of text" ) 1261 exc = self.myException 1262 exc.loc = loc 1263 exc.pstr = instring 1264 raise exc 1265 return loc, []
1266
1267 -class StringEnd(PositionToken):
1268 """Matches if current position is at the end of the parse string"""
1269 - def __init__( self ):
1270 super(StringEnd,self).__init__() 1271 self.errmsg = "Expected end of text" 1272 self.myException.msg = self.errmsg
1273
1274 - def parseImpl( self, instring, loc, doActions=True ):
1275 if loc < len(instring): 1276 #~ raise ParseException, ( instring, loc, "Expected end of text" ) 1277 exc = self.myException 1278 exc.loc = loc 1279 exc.pstr = instring 1280 raise exc 1281 return loc, []
1282 1283
1284 -class ParseExpression(ParserElement):
1285 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1286 - def __init__( self, exprs, savelist = False ):
1287 super(ParseExpression,self).__init__(savelist) 1288 if isinstance( exprs, list ): 1289 self.exprs = exprs 1290 elif isinstance( exprs, basestring ): 1291 self.exprs = [ Literal( exprs ) ] 1292 else: 1293 self.exprs = [ exprs ]
1294
1295 - def __getitem__( self, i ):
1296 return self.exprs[i]
1297
1298 - def append( self, other ):
1299 self.exprs.append( other ) 1300 self.strRepr = None 1301 return self
1302
1303 - def leaveWhitespace( self ):
1304 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 1305 all contained expressions.""" 1306 self.skipWhitespace = False 1307 self.exprs = [ copy.copy(e) for e in self.exprs ] 1308 for e in self.exprs: 1309 e.leaveWhitespace() 1310 return self
1311
1312 - def ignore( self, other ):
1313 if isinstance( other, Suppress ): 1314 if other not in self.ignoreExprs: 1315 super( ParseExpression, self).ignore( other ) 1316 for e in self.exprs: 1317 e.ignore( self.ignoreExprs[-1] ) 1318 else: 1319 super( ParseExpression, self).ignore( other ) 1320 for e in self.exprs: 1321 e.ignore( self.ignoreExprs[-1] ) 1322 return self
1323
1324 - def __str__( self ):
1325 try: 1326 return super(ParseExpression,self).__str__() 1327 except: 1328 pass 1329 1330 if self.strRepr is None: 1331 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 1332 return self.strRepr
1333
1334 - def streamline( self ):
1335 super(ParseExpression,self).streamline() 1336 1337 for e in self.exprs: 1338 e.streamline() 1339 1340 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 1341 # but only if there are no parse actions or resultsNames on the nested And's 1342 # (likewise for Or's and MatchFirst's) 1343 if ( len(self.exprs) == 2 ): 1344 other = self.exprs[0] 1345 if ( isinstance( other, self.__class__ ) and 1346 other.parseAction is None and 1347 other.resultsName is None and 1348 not other.debug ): 1349 self.exprs = other.exprs[:] + [ self.exprs[1] ] 1350 self.strRepr = None 1351 1352 other = self.exprs[-1] 1353 if ( isinstance( other, self.__class__ ) and 1354 other.parseAction is None and 1355 other.resultsName is None and 1356 not other.debug ): 1357 self.exprs = self.exprs[:-1] + other.exprs[:] 1358 self.strRepr = None 1359 1360 return self
1361
1362 - def setResultsName( self, name, listAllMatches=False ):
1363 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 1364 #~ ret.saveAsList = True 1365 return ret
1366
1367 - def validate( self, validateTrace=[] ):
1368 tmp = validateTrace[:]+[self] 1369 for e in self.exprs: 1370 e.validate(tmp) 1371 self.checkRecursion( [] )
1372 1373
1374 -class And(ParseExpression):
1375 """Requires all given ParseExpressions to be found in the given order. 1376 Expressions may be separated by whitespace. 1377 May be constructed using the '+' operator. 1378 """
1379 - def __init__( self, exprs, savelist = True ):
1380 super(And,self).__init__(exprs, savelist) 1381 self.mayReturnEmpty = True 1382 for e in exprs: 1383 if not e.mayReturnEmpty: 1384 self.mayReturnEmpty = False 1385 break 1386 self.skipWhitespace = exprs[0].skipWhitespace 1387 self.whiteChars = exprs[0].whiteChars
1388
1389 - def parseImpl( self, instring, loc, doActions=True ):
1390 loc, resultlist = self.exprs[0].parse( instring, loc, doActions ) 1391 for e in self.exprs[1:]: 1392 loc, exprtokens = e.parse( instring, loc, doActions ) 1393 if exprtokens or exprtokens.keys(): 1394 resultlist += exprtokens 1395 return loc, resultlist
1396
1397 - def __iadd__(self, other ):
1398 if isinstance( other, basestring ): 1399 other = Literal( other ) 1400 return self.append( other ) #And( [ self, other ] )
1401
1402 - def checkRecursion( self, parseElementList ):
1403 subRecCheckList = parseElementList[:] + [ self ] 1404 for e in self.exprs: 1405 e.checkRecursion( subRecCheckList ) 1406 if not e.mayReturnEmpty: 1407 break
1408
1409 - def __str__( self ):
1410 if hasattr(self,"name"): 1411 return self.name 1412 1413 if self.strRepr is None: 1414 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 1415 1416 return self.strRepr
1417 1418
1419 -class Or(ParseExpression):
1420 """Requires that at least one ParseExpression is found. 1421 If two expressions match, the expression that matches the longest string will be used. 1422 May be constructed using the '^' operator. 1423 """
1424 - def __init__( self, exprs, savelist = False ):
1425 super(Or,self).__init__(exprs, savelist) 1426 self.mayReturnEmpty = False 1427 for e in exprs: 1428 if e.mayReturnEmpty: 1429 self.mayReturnEmpty = True 1430 break
1431
1432 - def parseImpl( self, instring, loc, doActions=True ):
1433 maxExcLoc = -1 1434 maxMatchLoc = -1 1435 for e in self.exprs: 1436 try: 1437 loc2 = e.tryParse( instring, loc ) 1438 except ParseException, err: 1439 if err.loc > maxExcLoc: 1440 maxException = err 1441 maxExcLoc = err.loc 1442 except IndexError, err: 1443 if len(instring) > maxExcLoc: 1444 maxException = ParseException(instring,len(instring),e.errmsg,self) 1445 maxExcLoc = len(instring) 1446 else: 1447 if loc2 > maxMatchLoc: 1448 maxMatchLoc = loc2 1449 maxMatchExp = e 1450 1451 if maxMatchLoc < 0: 1452 raise maxException 1453 1454 return maxMatchExp.parse( instring, loc, doActions )
1455
1456 - def __ixor__(self, other ):
1457 if isinstance( other, basestring ): 1458 other = Literal( other ) 1459 return self.append( other ) #Or( [ self, other ] )
1460
1461 - def __str__( self ):
1462 if hasattr(self,"name"): 1463 return self.name 1464 1465 if self.strRepr is None: 1466 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 1467 1468 return self.strRepr
1469
1470 - def checkRecursion( self, parseElementList ):
1471 subRecCheckList = parseElementList[:] + [ self ] 1472 for e in self.exprs: 1473 e.checkRecursion( subRecCheckList )
1474 1475
1476 -class MatchFirst(ParseExpression):
1477 """Requires that at least one ParseExpression is found. 1478 If two expressions match, the first one listed is the one that will match. 1479 May be constructed using the '|' operator. 1480 """
1481 - def __init__( self, exprs, savelist = False ):
1482 super(MatchFirst,self).__init__(exprs, savelist) 1483 self.mayReturnEmpty = False 1484 for e in exprs: 1485 if e.mayReturnEmpty: 1486 self.mayReturnEmpty = True 1487 break
1488
1489 - def parseImpl( self, instring, loc, doActions=True ):
1490 maxExcLoc = -1 1491 for e in self.exprs: 1492 try: 1493 ret = e.parse( instring, loc, doActions ) 1494 return ret 1495 except ParseException, err: 1496 if err.loc > maxExcLoc: 1497 maxException = err 1498 maxExcLoc = err.loc 1499 except IndexError, err: 1500 if len(instring) > maxExcLoc: 1501 maxException = ParseException(instring,len(instring),e.errmsg,self) 1502 maxExcLoc = len(instring) 1503 1504 # only got here if no expression matched, raise exception for match that made it the furthest 1505 else: 1506 raise maxException
1507
1508 - def __ior__(self, other ):
1509 if isinstance( other, basestring ): 1510 other = Literal( other ) 1511 return self.append( other ) #MatchFirst( [ self, other ] )
1512
1513 - def __str__( self ):
1514 if hasattr(self,"name"): 1515 return self.name 1516 1517 if self.strRepr is None: 1518 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 1519 1520 return self.strRepr
1521
1522 - def checkRecursion( self, parseElementList ):
1523 subRecCheckList = parseElementList[:] + [ self ] 1524 for e in self.exprs: 1525 e.checkRecursion( subRecCheckList )
1526 1527
1528 -class Each(ParseExpression):
1529 """Requires all given ParseExpressions to be found, but in any order. 1530 Expressions may be separated by whitespace. 1531 May be constructed using the '&' operator. 1532 """
1533 - def __init__( self, exprs, savelist = True ):
1534 super(Each,self).__init__(exprs, savelist) 1535 self.mayReturnEmpty = True 1536 for e in exprs: 1537 if not e.mayReturnEmpty: 1538 self.mayReturnEmpty = False 1539 break 1540 self.skipWhitespace = True 1541 self.optionals = [ e.expr for e in exprs if isinstance(e,Optional) ] 1542 self.multioptionals = [ e.expr for e in exprs if isinstance(e,ZeroOrMore) ] 1543 self.multirequired = [ e.expr for e in exprs if isinstance(e,OneOrMore) ] 1544 self.required = [ e for e in exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 1545 self.required += self.multirequired
1546
1547 - def parseImpl( self, instring, loc, doActions=True ):
1548 tmpLoc = loc 1549 tmpReqd = self.required[:] 1550 tmpOpt = self.optionals[:] 1551 matchOrder = [] 1552 1553 keepMatching = True 1554 while keepMatching: 1555 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 1556 failed = [] 1557 for e in tmpExprs: 1558 try: 1559 tmpLoc = e.tryParse( instring, tmpLoc ) 1560 except ParseException: 1561 failed.append(e) 1562 else: 1563 matchOrder.append(e) 1564 if e in tmpReqd: 1565 tmpReqd.remove(e) 1566 elif e in tmpOpt: 1567 tmpOpt.remove(e) 1568 if len(failed) == len(tmpExprs): 1569 keepMatching = False 1570 1571 if tmpReqd: 1572 missing = ", ".join( [ str(e) for e in tmpReqd ] ) 1573 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 1574 1575 resultlist = [] 1576 for e in matchOrder: 1577 loc,results = e.parse(instring,loc,doActions) 1578 resultlist.append(results) 1579 1580 finalResults = ParseResults([]) 1581 for r in resultlist: 1582 dups = {} 1583 for k in r.keys(): 1584 if k in finalResults.keys(): 1585 tmp = ParseResults(finalResults[k]) 1586 tmp += ParseResults(r[k]) 1587 dups[k] = tmp 1588 finalResults += ParseResults(r) 1589 for k,v in dups.items(): 1590 finalResults[k] = v 1591 return loc, finalResults
1592
1593 - def __str__( self ):
1594 if hasattr(self,"name"): 1595 return self.name 1596 1597 if self.strRepr is None: 1598 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 1599 1600 return self.strRepr
1601
1602 - def checkRecursion( self, parseElementList ):
1603 subRecCheckList = parseElementList[:] + [ self ] 1604 for e in self.exprs: 1605 e.checkRecursion( subRecCheckList )
1606 1607
1608 -class ParseElementEnhance(ParserElement):
1609 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
1610 - def __init__( self, expr, savelist=False ):
1611 super(ParseElementEnhance,self).__init__(savelist) 1612 if isinstance( expr, basestring ): 1613 expr = Literal(expr) 1614 self.expr = expr 1615 self.strRepr = None 1616 if expr is not None: 1617 self.mayIndexError = expr.mayIndexError 1618 self.skipWhitespace = expr.skipWhitespace 1619 self.whiteChars = expr.whiteChars
1620
1621 - def parseImpl( self, instring, loc, doActions=True ):
1622 if self.expr is not None: 1623 return self.expr.parse( instring, loc, doActions ) 1624 else: 1625 raise ParseException(instring,loc,"",self)
1626
1627 - def leaveWhitespace( self ):
1628 self.skipWhitespace = False 1629 self.expr = copy.copy(self.expr) 1630 if self.expr is not None: 1631 self.expr.leaveWhitespace() 1632 return self
1633
1634 - def ignore( self, other ):
1635 if isinstance( other, Suppress ): 1636 if other not in self.ignoreExprs: 1637 super( ParseElementEnhance, self).ignore( other ) 1638 if self.expr is not None: 1639 self.expr.ignore( self.ignoreExprs[-1] ) 1640 else: 1641 super( ParseElementEnhance, self).ignore( other ) 1642 if self.expr is not None: 1643 self.expr.ignore( self.ignoreExprs[-1] ) 1644 return self
1645
1646 - def streamline( self ):
1647 super(ParseElementEnhance,self).streamline() 1648 if self.expr is not None: 1649 self.expr.streamline() 1650 return self
1651
1652 - def checkRecursion( self, parseElementList ):
1653 if self in parseElementList: 1654 raise RecursiveGrammarException( parseElementList+[self] ) 1655 subRecCheckList = parseElementList[:] + [ self ] 1656 if self.expr is not None: 1657 self.expr.checkRecursion( subRecCheckList )
1658
1659 - def validate( self, validateTrace=[] ):
1660 tmp = validateTrace[:]+[self] 1661 if self.expr is not None: 1662 self.expr.validate(tmp) 1663 self.checkRecursion( [] )
1664
1665 - def __str__( self ):
1666 try: 1667 return super(ParseElementEnhance,self).__str__() 1668 except: 1669 pass 1670 1671 if self.strRepr is None and self.expr is not None: 1672 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 1673 return self.strRepr
1674 1675
1676 -class FollowedBy(ParseElementEnhance):
1677 """Lookahead matching of the given parse expression. FollowedBy 1678 does *not* advance the parsing position within the input string, it only 1679 verifies that the specified parse expression matches at the current 1680 position. FollowedBy always returns a null token list."""
1681 - def __init__( self, expr ):
1682 super(FollowedBy,self).__init__(expr) 1683 self.mayReturnEmpty = True
1684
1685 - def parseImpl( self, instring, loc, doActions=True ):
1686 self.expr.tryParse( instring, loc ) 1687 return loc, []
1688 1689
1690 -class NotAny(ParseElementEnhance):
1691 """Lookahead to disallow matching with the given parse expression. NotAny 1692 does *not* advance the parsing position within the input string, it only 1693 verifies that the specified parse expression does *not* match at the current 1694 position. Also, NotAny does *not* skip over leading whitespace. NotAny 1695 always returns a null token list. May be constructed using the '~' operator."""
1696 - def __init__( self, expr ):
1697 super(NotAny,self).__init__(expr) 1698 #~ self.leaveWhitespace() 1699 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 1700 self.mayReturnEmpty = True 1701 self.errmsg = "Found unexpected token, "+_ustr(self.expr) 1702 self.myException = ParseException("",0,self.errmsg,self)
1703
1704 - def parseImpl( self, instring, loc, doActions=True ):
1705 try: 1706 self.expr.tryParse( instring, loc ) 1707 except (ParseException,IndexError): 1708 pass 1709 else: 1710 #~ raise ParseException, (instring, loc, self.errmsg ) 1711 exc = self.myException 1712 exc.loc = loc 1713 exc.pstr = instring 1714 raise exc 1715 return loc, []
1716
1717 - def __str__( self ):
1718 if hasattr(self,"name"): 1719 return self.name 1720 1721 if self.strRepr is None: 1722 self.strRepr = "~{" + _ustr(self.expr) + "}" 1723 1724 return self.strRepr
1725 1726
1727 -class ZeroOrMore(ParseElementEnhance):
1728 """Optional repetition of zero or more of the given expression."""
1729 - def __init__( self, expr ):
1730 super(ZeroOrMore,self).__init__(expr) 1731 self.mayReturnEmpty = True
1732
1733 - def parseImpl( self, instring, loc, doActions=True ):
1734 tokens = [] 1735 try: 1736 loc, tokens = self.expr.parse( instring, loc, doActions ) 1737 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 1738 while 1: 1739 if hasIgnoreExprs: 1740 loc = self.skipIgnorables( instring, loc ) 1741 loc, tmptokens = self.expr.parse( instring, loc, doActions ) 1742 if tmptokens or tmptokens.keys(): 1743 tokens += tmptokens 1744 except (ParseException,IndexError): 1745 pass 1746 1747 return loc, tokens
1748
1749 - def __str__( self ):
1750 if hasattr(self,"name"): 1751 return self.name 1752 1753 if self.strRepr is None: 1754 self.strRepr = "[" + _ustr(self.expr) + "]..." 1755 1756 return self.strRepr
1757
1758 - def setResultsName( self, name, listAllMatches=False ):
1759 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 1760 ret.saveAsList = True 1761 return ret
1762 1763
1764 -class OneOrMore(ParseElementEnhance):
1765 """Repetition of one or more of the given expression."""
1766 - def parseImpl( self, instring, loc, doActions=True ):
1767 # must be at least one 1768 loc, tokens = self.expr.parse( instring, loc, doActions ) 1769 try: 1770 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 1771 while 1: 1772 if hasIgnoreExprs: 1773 loc = self.skipIgnorables( instring, loc ) 1774 loc, tmptokens = self.expr.parse( instring, loc, doActions ) 1775 if tmptokens or tmptokens.keys(): 1776 tokens += tmptokens 1777 except (ParseException,IndexError): 1778 pass 1779 1780 return loc, tokens
1781
1782 - def __str__( self ):
1783 if hasattr(self,"name"): 1784 return self.name 1785 1786 if self.strRepr is None: 1787 self.strRepr = "{" + _ustr(self.expr) + "}..." 1788 1789 return self.strRepr
1790
1791 - def setResultsName( self, name, listAllMatches=False ):
1792 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 1793 ret.saveAsList = True 1794 return ret
1795 1796
1797 -class Optional(ParseElementEnhance):
1798 """Optional matching of the given expression. 1799 A default return string can also be specified, if the optional expression 1800 is not found. 1801 """
1802 - def __init__( self, exprs, default=None ):
1803 super(Optional,self).__init__( exprs, savelist=False ) 1804 self.defaultValue = default 1805 self.mayReturnEmpty = True
1806
1807 - def parseImpl( self, instring, loc, doActions=True ):
1808 try: 1809 loc, tokens = self.expr.parse( instring, loc, doActions ) 1810 except (ParseException,IndexError): 1811 if self.defaultValue is not None: 1812 tokens = [ self.defaultValue ] 1813 else: 1814 tokens = [] 1815 1816 return loc, tokens
1817
1818 - def __str__( self ):
1819 if hasattr(self,"name"): 1820 return self.name 1821 1822 if self.strRepr is None: 1823 self.strRepr = "[" + _ustr(self.expr) + "]" 1824 1825 return self.strRepr
1826 1827
1828 -class SkipTo(ParseElementEnhance):
1829 """Token for skipping over all undefined text until the matched expression is found. 1830 If include is set to true, the matched expression is also consumed. The ignore 1831 argument is used to define grammars (typically quoted strings and comments) that 1832 might contain false matches. 1833 """
1834 - def __init__( self, other, include=False, ignore=None ):
1835 super( SkipTo, self ).__init__( other ) 1836 if ignore is not None: 1837 self.expr = copy.copy( self.expr ) 1838 self.expr.ignore(ignore) 1839 self.mayReturnEmpty = True 1840 self.mayIndexError = False 1841 self.includeMatch = include 1842 self.errmsg = "No match found for "+_ustr(self.expr) 1843 self.myException = ParseException("",0,self.errmsg,self)
1844
1845 - def parseImpl( self, instring, loc, doActions=True ):
1846 startLoc = loc 1847 instrlen = len(instring) 1848 expr = self.expr 1849 while loc < instrlen: 1850 try: 1851 expr.tryParse(instring, loc) 1852 if self.includeMatch: 1853 skipText = instring[startLoc:loc] 1854 loc,mat = expr.parse(instring,loc) 1855 if mat: 1856 return loc, [ skipText, mat ] 1857 else: 1858 return loc, [ skipText ] 1859 else: 1860 return loc, [ instring[startLoc:loc] ] 1861 except (ParseException,IndexError): 1862 loc += 1 1863 exc = self.myException 1864 exc.loc = loc 1865 exc.pstr = instring 1866 raise exc
1867
1868 -class Forward(ParseElementEnhance):
1869 """Forward declaration of an expression to be defined later - 1870 used for recursive grammars, such as algebraic infix notation. 1871 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 1872 """
1873 - def __init__( self, other=None ):
1874 super(Forward,self).__init__( other, savelist=False )
1875
1876 - def __lshift__( self, other ):
1877 self.expr = other 1878 self.mayReturnEmpty = other.mayReturnEmpty 1879 self.strRepr = None 1880 return self
1881
1882 - def leaveWhitespace( self ):
1883 self.skipWhitespace = False 1884 return self
1885
1886 - def streamline( self ):
1887 if not self.streamlined: 1888 self.streamlined = True 1889 if self.expr is not None: 1890 self.expr.streamline() 1891 return self
1892
1893 - def validate( self, validateTrace=[] ):
1894 if self not in validateTrace: 1895 tmp = validateTrace[:]+[self] 1896 if self.expr is not None: 1897 self.expr.validate(tmp) 1898 self.checkRecursion([])
1899
1900 - def __str__( self ):
1901 if hasattr(self,"name"): 1902 return self.name 1903 1904 strmethod = self.__str__ 1905 self.__class__ = _ForwardNoRecurse 1906 if self.expr is not None: 1907 retString = _ustr(self.expr) 1908 else: 1909 retString = "None" 1910 self.__class__ = Forward 1911 return "Forward: "+retString
1912
1913 -class _ForwardNoRecurse(Forward):
1914 - def __str__( self ):
1915 return "..."
1916
1917 -class TokenConverter(ParseElementEnhance):
1918 """Abstract subclass of ParseExpression, for converting parsed results."""
1919 - def __init__( self, expr, savelist=False ):
1920 super(TokenConverter,self).__init__( expr )#, savelist )
1921 1922
1923 -class Upcase(TokenConverter):
1924 """Converter to upper case all matching tokens."""
1925 - def __init__(self, *args):
1926 super(Upcase,self).__init__(*args) 1927 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 1928 DeprecationWarning,stacklevel=2)
1929
1930 - def postParse( self, instring, loc, tokenlist ):
1931 return map( string.upper, tokenlist )
1932 1933
1934 -class Combine(TokenConverter):
1935 """Converter to concatenate all matching tokens to a single string. 1936 By default, the matching patterns must also be contiguous in the input string; 1937 this can be disabled by specifying 'adjacent=False' in the constructor. 1938 """
1939 - def __init__( self, expr, joinString="", adjacent=True ):
1940 super(Combine,self).__init__( expr ) 1941 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 1942 if adjacent: 1943 self.leaveWhitespace() 1944 self.adjacent = adjacent 1945 self.skipWhitespace = True 1946 self.joinString = joinString
1947
1948 - def ignore( self, other ):
1949 if self.adjacent: 1950 ParserElement.ignore(self, other) 1951 else: 1952 super( Combine, self).ignore( other ) 1953 return self
1954
1955 - def postParse( self, instring, loc, tokenlist ):
1956 retToks = tokenlist.copy() 1957 del retToks[:] 1958 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 1959 1960 if self.resultsName and len(retToks.keys())>0: 1961 return [ retToks ] 1962 else: 1963 return retToks
1964 1965
1966 -class Group(TokenConverter):
1967 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
1968 - def __init__( self, expr ):
1969 super(Group,self).__init__( expr ) 1970 self.saveAsList = True
1971
1972 - def postParse( self, instring, loc, tokenlist ):
1973 return [ tokenlist ]
1974
1975 -class Dict(TokenConverter):
1976 """Converter to return a repetitive expression as a list, but also as a dictionary. 1977 Each element can also be referenced using the first token in the expression as its key. 1978 Useful for tabular report scraping when the first column can be used as a item key. 1979 """
1980 - def __init__( self, exprs ):
1981 super(Dict,self).__init__( exprs ) 1982 self.saveAsList = True
1983
1984 - def postParse( self, instring, loc, tokenlist ):
1985 for i,tok in enumerate(tokenlist): 1986 ikey = _ustr(tok[0]).strip() 1987 if len(tok)==1: 1988 tokenlist[ikey] = ("",i) 1989 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 1990 tokenlist[ikey] = (tok[1],i) 1991 else: 1992 dictvalue = tok.copy() #ParseResults(i) 1993 del dictvalue[0] 1994 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 1995 tokenlist[ikey] = (dictvalue,i) 1996 else: 1997 tokenlist[ikey] = (dictvalue[0],i) 1998 1999 if self.resultsName: 2000 return [ tokenlist ] 2001 else: 2002 return tokenlist
2003 2004
2005 -class Suppress(TokenConverter):
2006 """Converter for ignoring the results of a parsed expression."""
2007 - def postParse( self, instring, loc, tokenlist ):
2008 return []
2009
2010 - def suppress( self ):
2011 return self
2012 2013 # 2014 # global helpers 2015 #
2016 -def delimitedList( expr, delim=",", combine=False ):
2017 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 2018 By default, the list elements and delimiters can have intervening whitespace, and 2019 comments, but this can be overridden by passing 'combine=True' in the constructor. 2020 If combine is set to True, the matching tokens are returned as a single token 2021 string, with the delimiters included; otherwise, the matching tokens are returned 2022 as a list of tokens, with the delimiters suppressed. 2023 """ 2024 if combine: 2025 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...") 2026 else: 2027 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(_ustr(expr)+_ustr(delim)+"...")
2028
2029 -def oneOf( strs, caseless=False ):
2030 """Helper to quickly define a set of alternative Literals, and makes sure to do 2031 longest-first testing when there is a conflict, regardless of the input order, 2032 but returns a MatchFirst for best performance. 2033 """ 2034 if caseless: 2035 isequal = ( lambda a,b: a.upper() == b.upper() ) 2036 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 2037 parseElementClass = CaselessLiteral 2038 else: 2039 isequal = ( lambda a,b: a == b ) 2040 masks = ( lambda a,b: b.startswith(a) ) 2041 parseElementClass = Literal 2042 2043 symbols = strs.split() 2044 i = 0 2045 while i < len(symbols)-1: 2046 cur = symbols[i] 2047 for j,other in enumerate(symbols[i+1:]): 2048 if ( isequal(other, cur) ): 2049 del symbols[i+j+1] 2050 break 2051 elif ( masks(cur, other) ): 2052 del symbols[i+j+1] 2053 symbols.insert(i,other) 2054 cur = other 2055 break 2056 else: 2057 i += 1 2058 2059 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
2060
2061 -def dictOf( key, value ):
2062 """Helper to easily and clearly define a dictionary by specifying the respective patterns 2063 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 2064 in the proper order. The key pattern can include delimiting markers or punctuation, 2065 as long as they are suppressed, thereby leaving the significant key text. The value 2066 pattern can include named results, so that the Dict results can include named token 2067 fields. 2068 """ 2069 return Dict( ZeroOrMore( Group ( key + value ) ) )
2070 2071 _bslash = "\\" 2072 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 2073 empty = Empty().setName("empty") 2074 2075 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 2076 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 2077 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 2078 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 2079 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 2080 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 2081 _reBracketExpr = "[" + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 2082 2083 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 2084
2085 -def srange(s):
2086 r"""Helper to easily define string ranges for use in Word construction. Borrows 2087 syntax from regexp '[]' string range definitions:: 2088 srange("[0-9]") -> "0123456789" 2089 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 2090 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 2091 The input string must be enclosed in []'s, and the returned string is the expanded 2092 character set joined into a single string. 2093 The values enclosed in the []'s may be:: 2094 a single character 2095 an escaped character with a leading backslash (such as \- or \]) 2096 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 2097 an escaped octal character with a leading '\0' (\041, which is a '!' character) 2098 a range of any of the above, separated by a dash ('a-z', etc.) 2099 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 2100 """ 2101 try: 2102 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 2103 except: 2104 return ""
2105
2106 -def replaceWith(replStr):
2107 """Helper method for common parse actions that simply return a literal value. Especially 2108 useful when used with transformString(). 2109 """ 2110 def _replFunc(*args): 2111 return replStr
2112 return _replFunc 2113
2114 -def removeQuotes(s,l,t):
2115 """Helper parse action for removing quotation marks from parsed quoted strings. 2116 To use, add this parse action to quoted string using:: 2117 quotedString.setParseAction( removeQuotes ) 2118 """ 2119 return t[0][1:-1]
2120
2121 -def upcaseTokens(s,l,t):
2122 """Helper parse action to convert tokens to upper case.""" 2123 return map( str.upper, t )
2124
2125 -def downcaseTokens(s,l,t):
2126 """Helper parse action to convert tokens to lower case.""" 2127 return map( str.lower, t )
2128
2129 -def _makeTags(tagStr, xml):
2130 """Internal helper to construct opening and closing tag expressions, given a tag name""" 2131 tagAttrName = Word(alphanums) 2132 if (xml): 2133 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 2134 openTag = "<" + Keyword(tagStr) + Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + Optional("/",default="").setResultsName("empty") + ">" 2135 else: 2136 printablesLessRAbrack = "".join( [ c for c in string.printable if c not in ">" ] ) 2137 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 2138 openTag = "<" + Keyword(tagStr,caseless=True) + Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + Suppress("=") + tagAttrValue ))) + Optional("/",default="").setResultsName("empty") + ">" 2139 closeTag = "</" + Keyword(tagStr,caseless=not xml) + ">" 2140 2141 openTag = openTag.setResultsName("start"+tagStr.title()).setName("<"+tagStr+">") 2142 closeTag = closeTag.setResultsName("end"+tagStr.title()).setName("</"+tagStr+">") 2143 2144 return openTag, closeTag
2145
2146 -def makeHTMLTags(tagStr):
2147 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 2148 return _makeTags( tagStr, False )
2149
2150 -def makeXMLTags(tagStr):
2151 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 2152 return _makeTags( tagStr, True )
2153 2154 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xfe]") 2155 2156 _escapables = "tnrfbacdeghijklmopqsuvwxyz " + _bslash + "'" + '"' 2157 _octDigits = "01234567" 2158 _escapedChar = ( Word( _bslash, _escapables, exact=2 ) | 2159 Word( _bslash, _octDigits, min=2, max=4 ) ) 2160 _sglQuote = Literal("'") 2161 _dblQuote = Literal('"') 2162 dblQuotedString = Combine( _dblQuote + ZeroOrMore( CharsNotIn('\\"\n\r') | _escapedChar | '""' ) + _dblQuote ).streamline().setName("string enclosed in double quotes") 2163 sglQuotedString = Combine( _sglQuote + ZeroOrMore( CharsNotIn("\\'\n\r") | _escapedChar | "''" ) + _sglQuote ).streamline().setName("string enclosed in single quotes") 2164 quotedString = ( dblQuotedString | sglQuotedString ).setName("quotedString using single or double quotes") 2165 2166 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 2167 cStyleComment = Combine( Literal("/*") + 2168 ZeroOrMore( CharsNotIn("*") | ( "*" + ~Literal("/") ) ) + 2169 Literal("*/") ).streamline().setName("cStyleComment enclosed in /* ... */") 2170 htmlComment = Combine( Literal("<!--") + ZeroOrMore( CharsNotIn("-") | 2171 (~Literal("-->") + Literal("-").leaveWhitespace() ) ) + 2172 Literal("-->") ).streamline().setName("htmlComment enclosed in <!-- ... -->") 2173 restOfLine = Optional( CharsNotIn( "\n\r" ), default="" ).setName("rest of line up to \\n").leaveWhitespace() 2174 dblSlashComment = "//" + restOfLine 2175 cppStyleComment = FollowedBy("/") + ( dblSlashComment | cStyleComment ) 2176 javaStyleComment = cppStyleComment 2177 pythonStyleComment = "#" + restOfLine 2178 _noncomma = "".join( [ c for c in printables if c != "," ] ) 2179 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 2180 Optional( Word(" \t") + 2181 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 2182 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") 2183 2184 2185 if __name__ == "__main__": 2186
2187 - def test( teststring ):
2188 print teststring,"->", 2189 try: 2190 tokens = simpleSQL.parseString( teststring ) 2191 tokenlist = tokens.asList() 2192 print tokenlist 2193 print "tokens = ", tokens 2194 print "tokens.columns =", tokens.columns 2195 print "tokens.tables =", tokens.tables 2196 print tokens.asXML("SQL",True) 2197 except ParseException, err: 2198 print err.line 2199 print " "*(err.column-1) + "^" 2200 print err 2201 print
2202 2203 selectToken = CaselessLiteral( "select" ) 2204 fromToken = CaselessLiteral( "from" ) 2205 2206 ident = Word( alphas, alphanums + "_$" ) 2207 columnName = Upcase( delimitedList( ident, ".", combine=True ) ) 2208 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 2209 tableName = Upcase( delimitedList( ident, ".", combine=True ) ) 2210 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 2211 simpleSQL = ( selectToken + \ 2212 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 2213 fromToken + \ 2214 tableNameList.setResultsName( "tables" ) ) 2215 2216 test( "SELECT * from XYZZY, ABC" ) 2217 test( "select * from SYS.XYZZY" ) 2218 test( "Select A from Sys.dual" ) 2219 test( "Select AA,BB,CC from Sys.dual" ) 2220 test( "Select A, B, C from Sys.dual" ) 2221 test( "Select A, B, C from Sys.dual" ) 2222 test( "Xelect A, B, C from Sys.dual" ) 2223 test( "Select A, B, C frox Sys.dual" ) 2224 test( "Select" ) 2225 test( "Select ^^^ frox Sys.dual" ) 2226 test( "Select A, B, C from Sys.dual, Table2 " ) 2227