1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 __doc__ = \
26 """
27 pyparsing module - Classes and methods to define and execute parsing grammars
28
29 The pyparsing module is an alternative approach to creating and executing simple grammars,
30 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
31 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
32 provides a library of classes that you use to construct the grammar directly in Python.
33
34 Here is a program to parse "Hello, World!" (or any greeting of the form C{"<salutation>, <addressee>!"})::
35
36 from pyparsing import Word, alphas
37
38 # define grammar of a greeting
39 greet = Word( alphas ) + "," + Word( alphas ) + "!"
40
41 hello = "Hello, World!"
42 print (hello, "->", greet.parseString( hello ))
43
44 The program outputs the following::
45
46 Hello, World! -> ['Hello', ',', 'World', '!']
47
48 The Python representation of the grammar is quite readable, owing to the self-explanatory
49 class names, and the use of '+', '|' and '^' operators.
50
51 The parsed results returned from L{I{ParserElement.parseString}<ParserElement.parseString>} can be accessed as a nested list, a dictionary, or an
52 object with named attributes.
53
54 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
55 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
56 - quoted strings
57 - embedded comments
58 """
59
60 __version__ = "2.1.7"
61 __versionTime__ = "11 Aug 2016 07:29 UTC"
62 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
63
64 import string
65 from weakref import ref as wkref
66 import copy
67 import sys
68 import warnings
69 import re
70 import sre_constants
71 import collections
72 import pprint
73 import traceback
74 import types
75 from datetime import datetime
76
77 try:
78 from _thread import RLock
79 except ImportError:
80 from threading import RLock
81
82 try:
83 from collections import OrderedDict as _OrderedDict
84 except ImportError:
85 try:
86 from ordereddict import OrderedDict as _OrderedDict
87 except ImportError:
88 _OrderedDict = None
89
90
91
92 __all__ = [
93 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
94 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
95 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
96 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
97 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
98 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter',
99 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
100 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
101 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
102 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums',
103 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno',
104 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
105 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
106 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
107 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
108 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
109 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation','locatedExpr', 'withClass',
110 'tokenMap', 'pyparsing_common',
111 ]
112
113 system_version = tuple(sys.version_info)[:3]
114 PY_3 = system_version[0] == 3
115 if PY_3:
116 _MAX_INT = sys.maxsize
117 basestring = str
118 unichr = chr
119 _ustr = str
120
121
122 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max]
123
124 else:
125 _MAX_INT = sys.maxint
126 range = xrange
129 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
130 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
131 then < returns the unicode object | encodes it with the default encoding | ... >.
132 """
133 if isinstance(obj,unicode):
134 return obj
135
136 try:
137
138
139 return str(obj)
140
141 except UnicodeEncodeError:
142
143 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace')
144 xmlcharref = Regex('&#\d+;')
145 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:])
146 return xmlcharref.transformString(ret)
147
148
149 singleArgBuiltins = []
150 import __builtin__
151 for fname in "sum len sorted reversed list tuple set any all min max".split():
152 try:
153 singleArgBuiltins.append(getattr(__builtin__,fname))
154 except AttributeError:
155 continue
156
157 _generatorType = type((y for y in range(1)))
160 """Escape &, <, >, ", ', etc. in a string of data."""
161
162
163 from_symbols = '&><"\''
164 to_symbols = ('&'+s+';' for s in "amp gt lt quot apos".split())
165 for from_,to_ in zip(from_symbols, to_symbols):
166 data = data.replace(from_, to_)
167 return data
168
171
172 alphas = string.ascii_uppercase + string.ascii_lowercase
173 nums = "0123456789"
174 hexnums = nums + "ABCDEFabcdef"
175 alphanums = alphas + nums
176 _bslash = chr(92)
177 printables = "".join(c for c in string.printable if c not in string.whitespace)
180 """base exception class for all parsing runtime exceptions"""
181
182
183 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
184 self.loc = loc
185 if msg is None:
186 self.msg = pstr
187 self.pstr = ""
188 else:
189 self.msg = msg
190 self.pstr = pstr
191 self.parserElement = elem
192 self.args = (pstr, loc, msg)
193
194 @classmethod
196 """
197 internal factory method to simplify creating one type of ParseException
198 from another - avoids having __init__ signature conflicts among subclasses
199 """
200 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement)
201
203 """supported attributes by name are:
204 - lineno - returns the line number of the exception text
205 - col - returns the column number of the exception text
206 - line - returns the line containing the exception text
207 """
208 if( aname == "lineno" ):
209 return lineno( self.loc, self.pstr )
210 elif( aname in ("col", "column") ):
211 return col( self.loc, self.pstr )
212 elif( aname == "line" ):
213 return line( self.loc, self.pstr )
214 else:
215 raise AttributeError(aname)
216
218 return "%s (at char %d), (line:%d, col:%d)" % \
219 ( self.msg, self.loc, self.lineno, self.column )
233 return "lineno col line".split() + dir(type(self))
234
236 """
237 Exception thrown when parse expressions don't match class;
238 supported attributes by name are:
239 - lineno - returns the line number of the exception text
240 - col - returns the column number of the exception text
241 - line - returns the line containing the exception text
242
243 Example::
244 try:
245 Word(nums).setName("integer").parseString("ABC")
246 except ParseException as pe:
247 print(pe)
248 print("column: {}".format(pe.col))
249
250 prints::
251 Expected integer (at char 0), (line:1, col:1)
252 column: 1
253 """
254 pass
255
257 """user-throwable exception thrown when inconsistent parse content
258 is found; stops all parsing immediately"""
259 pass
260
262 """just like L{ParseFatalException}, but thrown internally when an
263 L{ErrorStop<And._ErrorStop>} ('-' operator) indicates that parsing is to stop
264 immediately because an unbacktrackable syntax error has been found"""
265 pass
266
281 """exception thrown by L{ParserElement.validate} if the grammar could be improperly recursive"""
282 - def __init__( self, parseElementList ):
283 self.parseElementTrace = parseElementList
284
286 return "RecursiveGrammarException: %s" % self.parseElementTrace
287
294 return repr(self.tup)
296 self.tup = (self.tup[0],i)
297
299 """
300 Structured parse results, to provide multiple means of access to the parsed data:
301 - as a list (C{len(results)})
302 - by list index (C{results[0], results[1]}, etc.)
303 - by attribute (C{results.<resultsName>} - see L{ParserElement.setResultsName})
304
305 Example::
306 integer = Word(nums)
307 date_str = (integer.setResultsName("year") + '/'
308 + integer.setResultsName("month") + '/'
309 + integer.setResultsName("day"))
310 # equivalent form:
311 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
312
313 result = date_str.parseString("1999/12/31")
314 print(list(result))
315 print(result[0])
316 print(result['month'])
317 print(result.day)
318 print('month' in result)
319 print('minutes' in result)
320 print(result.dump())
321 prints::
322 ['1999', '/', '12', '/', '31']
323 1999
324 12
325 31
326 True
327 False
328 ['1999', '/', '12', '/', '31']
329 - day: 31
330 - month: 12
331 - year: 1999
332 """
333 - def __new__(cls, toklist=None, name=None, asList=True, modal=True ):
334 if isinstance(toklist, cls):
335 return toklist
336 retobj = object.__new__(cls)
337 retobj.__doinit = True
338 return retobj
339
340
341
342 - def __init__( self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance ):
343 if self.__doinit:
344 self.__doinit = False
345 self.__name = None
346 self.__parent = None
347 self.__accumNames = {}
348 self.__asList = asList
349 self.__modal = modal
350 if toklist is None:
351 toklist = []
352 if isinstance(toklist, list):
353 self.__toklist = toklist[:]
354 elif isinstance(toklist, _generatorType):
355 self.__toklist = list(toklist)
356 else:
357 self.__toklist = [toklist]
358 self.__tokdict = dict()
359
360 if name is not None and name:
361 if not modal:
362 self.__accumNames[name] = 0
363 if isinstance(name,int):
364 name = _ustr(name)
365 self.__name = name
366 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None,'',[])):
367 if isinstance(toklist,basestring):
368 toklist = [ toklist ]
369 if asList:
370 if isinstance(toklist,ParseResults):
371 self[name] = _ParseResultsWithOffset(toklist.copy(),0)
372 else:
373 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
374 self[name].__name = name
375 else:
376 try:
377 self[name] = toklist[0]
378 except (KeyError,TypeError,IndexError):
379 self[name] = toklist
380
382 if isinstance( i, (int,slice) ):
383 return self.__toklist[i]
384 else:
385 if i not in self.__accumNames:
386 return self.__tokdict[i][-1][0]
387 else:
388 return ParseResults([ v[0] for v in self.__tokdict[i] ])
389
391 if isinstance(v,_ParseResultsWithOffset):
392 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
393 sub = v[0]
394 elif isinstance(k,(int,slice)):
395 self.__toklist[k] = v
396 sub = v
397 else:
398 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
399 sub = v
400 if isinstance(sub,ParseResults):
401 sub.__parent = wkref(self)
402
404 if isinstance(i,(int,slice)):
405 mylen = len( self.__toklist )
406 del self.__toklist[i]
407
408
409 if isinstance(i, int):
410 if i < 0:
411 i += mylen
412 i = slice(i, i+1)
413
414 removed = list(range(*i.indices(mylen)))
415 removed.reverse()
416
417 for name,occurrences in self.__tokdict.items():
418 for j in removed:
419 for k, (value, position) in enumerate(occurrences):
420 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
421 else:
422 del self.__tokdict[i]
423
425 return k in self.__tokdict
426
427 - def __len__( self ): return len( self.__toklist )
428 - def __bool__(self): return ( not not self.__toklist )
429 __nonzero__ = __bool__
430 - def __iter__( self ): return iter( self.__toklist )
431 - def __reversed__( self ): return iter( self.__toklist[::-1] )
433 if hasattr(self.__tokdict, "iterkeys"):
434 return self.__tokdict.iterkeys()
435 else:
436 return iter(self.__tokdict)
437
439 return (self[k] for k in self._iterkeys())
440
442 return ((k, self[k]) for k in self._iterkeys())
443
444 if PY_3:
445 keys = _iterkeys
446 """Returns an iterator of all named result keys (Python 3.x only)."""
447
448 values = _itervalues
449 """Returns an iterator of all named result values (Python 3.x only)."""
450
451 items = _iteritems
452 """Returns an iterator of all named result key-value tuples (Python 3.x only)."""
453
454 else:
455 iterkeys = _iterkeys
456 """Returns an iterator of all named result keys (Python 2.x only)."""
457
458 itervalues = _itervalues
459 """Returns an iterator of all named result values (Python 2.x only)."""
460
461 iteritems = _iteritems
462 """Returns an iterator of all named result key-value tuples (Python 2.x only)."""
463
465 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x)."""
466 return list(self.iterkeys())
467
469 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x)."""
470 return list(self.itervalues())
471
473 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x)."""
474 return list(self.iteritems())
475
477 """Since keys() returns an iterator, this method is helpful in bypassing
478 code that looks for the existence of any defined results names."""
479 return bool(self.__tokdict)
480
481 - def pop( self, *args, **kwargs):
482 """
483 Removes and returns item at specified index (default=C{last}).
484 Supports both C{list} and C{dict} semantics for C{pop()}. If passed no
485 argument or an integer argument, it will use C{list} semantics
486 and pop tokens from the list of parsed tokens. If passed a
487 non-integer argument (most likely a string), it will use C{dict}
488 semantics and pop the corresponding value from any defined
489 results names. A second default return value argument is
490 supported, just as in C{dict.pop()}.
491
492 Example::
493 def remove_first(tokens):
494 tokens.pop(0)
495 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
496 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321']
497
498 label = Word(alphas)
499 patt = label("LABEL") + OneOrMore(Word(nums))
500 print(patt.parseString("AAB 123 321").dump())
501
502 # Use pop() in a parse action to remove named result (note that corresponding value is not
503 # removed from list form of results)
504 def remove_LABEL(tokens):
505 tokens.pop("LABEL")
506 return tokens
507 patt.addParseAction(remove_LABEL)
508 print(patt.parseString("AAB 123 321").dump())
509 prints::
510 ['AAB', '123', '321']
511 - LABEL: AAB
512
513 ['AAB', '123', '321']
514 """
515 if not args:
516 args = [-1]
517 for k,v in kwargs.items():
518 if k == 'default':
519 args = (args[0], v)
520 else:
521 raise TypeError("pop() got an unexpected keyword argument '%s'" % k)
522 if (isinstance(args[0], int) or
523 len(args) == 1 or
524 args[0] in self):
525 index = args[0]
526 ret = self[index]
527 del self[index]
528 return ret
529 else:
530 defaultvalue = args[1]
531 return defaultvalue
532
533 - def get(self, key, defaultValue=None):
534 """
535 Returns named result matching the given key, or if there is no
536 such name, then returns the given C{defaultValue} or C{None} if no
537 C{defaultValue} is specified.
538
539 Similar to C{dict.get()}.
540
541 Example::
542 integer = Word(nums)
543 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
544
545 result = date_str.parseString("1999/12/31")
546 print(result.get("year")) # -> '1999'
547 print(result.get("hour", "not specified")) # -> 'not specified'
548 print(result.get("hour")) # -> None
549 """
550 if key in self:
551 return self[key]
552 else:
553 return defaultValue
554
555 - def insert( self, index, insStr ):
556 """
557 Inserts new element at location index in the list of parsed tokens.
558
559 Similar to C{list.insert()}.
560
561 Example::
562 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
563
564 # use a parse action to insert the parse location in the front of the parsed results
565 def insert_locn(locn, tokens):
566 tokens.insert(0, locn)
567 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321']
568 """
569 self.__toklist.insert(index, insStr)
570
571 for name,occurrences in self.__tokdict.items():
572 for k, (value, position) in enumerate(occurrences):
573 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
574
576 """
577 Add single element to end of ParseResults list of elements.
578
579 Example::
580 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321']
581
582 # use a parse action to compute the sum of the parsed integers, and add it to the end
583 def append_sum(tokens):
584 tokens.append(sum(map(int, tokens)))
585 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444]
586 """
587 self.__toklist.append(item)
588
590 """
591 Add sequence of elements to end of ParseResults list of elements.
592
593 Example::
594 patt = OneOrMore(Word(alphas))
595
596 # use a parse action to append the reverse of the matched strings, to make a palindrome
597 def make_palindrome(tokens):
598 tokens.extend(reversed([t[::-1] for t in tokens]))
599 return ''.join(tokens)
600 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
601 """
602 if isinstance(itemseq, ParseResults):
603 self += itemseq
604 else:
605 self.__toklist.extend(itemseq)
606
608 """
609 Clear all elements and results names.
610 """
611 del self.__toklist[:]
612 self.__tokdict.clear()
613
615 try:
616 return self[name]
617 except KeyError:
618 return ""
619
620 if name in self.__tokdict:
621 if name not in self.__accumNames:
622 return self.__tokdict[name][-1][0]
623 else:
624 return ParseResults([ v[0] for v in self.__tokdict[name] ])
625 else:
626 return ""
627
629 ret = self.copy()
630 ret += other
631 return ret
632
634 if other.__tokdict:
635 offset = len(self.__toklist)
636 addoffset = lambda a: offset if a<0 else a+offset
637 otheritems = other.__tokdict.items()
638 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
639 for (k,vlist) in otheritems for v in vlist]
640 for k,v in otherdictitems:
641 self[k] = v
642 if isinstance(v[0],ParseResults):
643 v[0].__parent = wkref(self)
644
645 self.__toklist += other.__toklist
646 self.__accumNames.update( other.__accumNames )
647 return self
648
650 if isinstance(other,int) and other == 0:
651
652 return self.copy()
653 else:
654
655 return other + self
656
658 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
659
661 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']'
662
664 out = []
665 for item in self.__toklist:
666 if out and sep:
667 out.append(sep)
668 if isinstance( item, ParseResults ):
669 out += item._asStringList()
670 else:
671 out.append( _ustr(item) )
672 return out
673
675 """
676 Returns the parse results as a nested list of matching tokens, all converted to strings.
677
678 Example::
679 patt = OneOrMore(Word(alphas))
680 result = patt.parseString("sldkj lsdkj sldkj")
681 # even though the result prints in string-like form, it is actually a pyparsing ParseResults
682 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
683
684 # Use asList() to create an actual list
685 result_list = result.asList()
686 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
687 """
688 return [res.asList() if isinstance(res,ParseResults) else res for res in self.__toklist]
689
691 """
692 Returns the named parse results as a nested dictionary.
693
694 Example::
695 integer = Word(nums)
696 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
697
698 result = date_str.parseString('12/31/1999')
699 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
700
701 result_dict = result.asDict()
702 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
703
704 # even though a ParseResults supports dict-like access, sometime you just need to have a dict
705 import json
706 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
707 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"}
708 """
709 if PY_3:
710 item_fn = self.items
711 else:
712 item_fn = self.iteritems
713
714 def toItem(obj):
715 if isinstance(obj, ParseResults):
716 if obj.haskeys():
717 return obj.asDict()
718 else:
719 return [toItem(v) for v in obj]
720 else:
721 return obj
722
723 return dict((k,toItem(v)) for k,v in item_fn())
724
726 """
727 Returns a new copy of a C{ParseResults} object.
728 """
729 ret = ParseResults( self.__toklist )
730 ret.__tokdict = self.__tokdict.copy()
731 ret.__parent = self.__parent
732 ret.__accumNames.update( self.__accumNames )
733 ret.__name = self.__name
734 return ret
735
736 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
737 """
738 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.
739 """
740 nl = "\n"
741 out = []
742 namedItems = dict((v[1],k) for (k,vlist) in self.__tokdict.items()
743 for v in vlist)
744 nextLevelIndent = indent + " "
745
746
747 if not formatted:
748 indent = ""
749 nextLevelIndent = ""
750 nl = ""
751
752 selfTag = None
753 if doctag is not None:
754 selfTag = doctag
755 else:
756 if self.__name:
757 selfTag = self.__name
758
759 if not selfTag:
760 if namedItemsOnly:
761 return ""
762 else:
763 selfTag = "ITEM"
764
765 out += [ nl, indent, "<", selfTag, ">" ]
766
767 for i,res in enumerate(self.__toklist):
768 if isinstance(res,ParseResults):
769 if i in namedItems:
770 out += [ res.asXML(namedItems[i],
771 namedItemsOnly and doctag is None,
772 nextLevelIndent,
773 formatted)]
774 else:
775 out += [ res.asXML(None,
776 namedItemsOnly and doctag is None,
777 nextLevelIndent,
778 formatted)]
779 else:
780
781 resTag = None
782 if i in namedItems:
783 resTag = namedItems[i]
784 if not resTag:
785 if namedItemsOnly:
786 continue
787 else:
788 resTag = "ITEM"
789 xmlBodyText = _xml_escape(_ustr(res))
790 out += [ nl, nextLevelIndent, "<", resTag, ">",
791 xmlBodyText,
792 "</", resTag, ">" ]
793
794 out += [ nl, indent, "</", selfTag, ">" ]
795 return "".join(out)
796
798 for k,vlist in self.__tokdict.items():
799 for v,loc in vlist:
800 if sub is v:
801 return k
802 return None
803
805 """
806 Returns the results name for this token expression. Useful when several
807 different expressions might match at a particular location.
808
809 Example::
810 integer = Word(nums)
811 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
812 house_number_expr = Suppress('#') + Word(nums, alphanums)
813 user_data = (Group(house_number_expr)("house_number")
814 | Group(ssn_expr)("ssn")
815 | Group(integer)("age"))
816 user_info = OneOrMore(user_data)
817
818 result = user_info.parseString("22 111-22-3333 #221B")
819 for item in result:
820 print(item.getName(), ':', item[0])
821 prints::
822 age : 22
823 ssn : 111-22-3333
824 house_number : 221B
825 """
826 if self.__name:
827 return self.__name
828 elif self.__parent:
829 par = self.__parent()
830 if par:
831 return par.__lookup(self)
832 else:
833 return None
834 elif (len(self) == 1 and
835 len(self.__tokdict) == 1 and
836 self.__tokdict.values()[0][0][1] in (0,-1)):
837 return self.__tokdict.keys()[0]
838 else:
839 return None
840
841 - def dump(self, indent='', depth=0, full=True):
842 """
843 Diagnostic method for listing out the contents of a C{ParseResults}.
844 Accepts an optional C{indent} argument so that this string can be embedded
845 in a nested display of other data.
846
847 Example::
848 integer = Word(nums)
849 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
850
851 result = date_str.parseString('12/31/1999')
852 print(result.dump())
853 prints::
854 ['12', '/', '31', '/', '1999']
855 - day: 1999
856 - month: 31
857 - year: 12
858 """
859 out = []
860 NL = '\n'
861 out.append( indent+_ustr(self.asList()) )
862 if full:
863 if self.haskeys():
864 items = sorted(self.items())
865 for k,v in items:
866 if out:
867 out.append(NL)
868 out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
869 if isinstance(v,ParseResults):
870 if v:
871 out.append( v.dump(indent,depth+1) )
872 else:
873 out.append(_ustr(v))
874 else:
875 out.append(_ustr(v))
876 elif any(isinstance(vv,ParseResults) for vv in self):
877 v = self
878 for i,vv in enumerate(v):
879 if isinstance(vv,ParseResults):
880 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),vv.dump(indent,depth+1) ))
881 else:
882 out.append("\n%s%s[%d]:\n%s%s%s" % (indent,(' '*(depth)),i,indent,(' '*(depth+1)),_ustr(vv)))
883
884 return "".join(out)
885
886 - def pprint(self, *args, **kwargs):
887 """
888 Pretty-printer for parsed results as a list, using the C{pprint} module.
889 Accepts additional positional or keyword args as defined for the
890 C{pprint.pprint} method. (U{http://docs.python.org/3/library/pprint.html#pprint.pprint})
891
892 Example::
893 ident = Word(alphas, alphanums)
894 num = Word(nums)
895 func = Forward()
896 term = ident | num | Group('(' + func + ')')
897 func <<= ident + Group(Optional(delimitedList(term)))
898 result = func.parseString("fna a,b,(fnb c,d,200),100")
899 result.pprint(width=40)
900 prints::
901 ['fna',
902 ['a',
903 'b',
904 ['(', 'fnb', ['c', 'd', '200'], ')'],
905 '100']]
906 """
907 pprint.pprint(self.asList(), *args, **kwargs)
908
909
911 return ( self.__toklist,
912 ( self.__tokdict.copy(),
913 self.__parent is not None and self.__parent() or None,
914 self.__accumNames,
915 self.__name ) )
916
918 self.__toklist = state[0]
919 (self.__tokdict,
920 par,
921 inAccumNames,
922 self.__name) = state[1]
923 self.__accumNames = {}
924 self.__accumNames.update(inAccumNames)
925 if par is not None:
926 self.__parent = wkref(par)
927 else:
928 self.__parent = None
929
931 return self.__toklist, self.__name, self.__asList, self.__modal
932
934 return (dir(type(self)) + list(self.keys()))
935
936 collections.MutableMapping.register(ParseResults)
937
938 -def col (loc,strg):
939 """Returns current column within a string, counting newlines as line separators.
940 The first column is number 1.
941
942 Note: the default parsing behavior is to expand tabs in the input string
943 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
944 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
945 consistent view of the parsed string, the parse location, and line and column
946 positions within the parsed string.
947 """
948 s = strg
949 return 1 if loc<len(s) and s[loc] == '\n' else loc - s.rfind("\n", 0, loc)
950
952 """Returns current line number within a string, counting newlines as line separators.
953 The first line is number 1.
954
955 Note: the default parsing behavior is to expand tabs in the input string
956 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
957 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
958 consistent view of the parsed string, the parse location, and line and column
959 positions within the parsed string.
960 """
961 return strg.count("\n",0,loc) + 1
962
963 -def line( loc, strg ):
964 """Returns the line of text containing loc within a string, counting newlines as line separators.
965 """
966 lastCR = strg.rfind("\n", 0, loc)
967 nextCR = strg.find("\n", loc)
968 if nextCR >= 0:
969 return strg[lastCR+1:nextCR]
970 else:
971 return strg[lastCR+1:]
972
974 print (("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )))
975
977 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
978
980 print ("Exception raised:" + _ustr(exc))
981
983 """'Do-nothing' debug action, to suppress debugging output during parsing."""
984 pass
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008 'decorator to trim function calls to match the arity of the target'
1010 if func in singleArgBuiltins:
1011 return lambda s,l,t: func(t)
1012 limit = [0]
1013 foundArity = [False]
1014
1015
1016 if system_version[:2] >= (3,5):
1017 def extract_stack():
1018
1019 offset = -3 if system_version == (3,5,0) else -2
1020 frame_summary = traceback.extract_stack()[offset]
1021 return [(frame_summary.filename, frame_summary.lineno)]
1022 def extract_tb(tb):
1023 frames = traceback.extract_tb(tb)
1024 frame_summary = frames[-1]
1025 return [(frame_summary.filename, frame_summary.lineno)]
1026 else:
1027 extract_stack = traceback.extract_stack
1028 extract_tb = traceback.extract_tb
1029
1030
1031
1032
1033 LINE_DIFF = 6
1034
1035
1036 this_line = extract_stack()[-1]
1037 pa_call_line_synth = (this_line[0], this_line[1]+LINE_DIFF)
1038
1039 def wrapper(*args):
1040 while 1:
1041 try:
1042 ret = func(*args[limit[0]:])
1043 foundArity[0] = True
1044 return ret
1045 except TypeError:
1046
1047 if foundArity[0]:
1048 raise
1049 else:
1050 try:
1051 tb = sys.exc_info()[-1]
1052 if not extract_tb(tb)[-1][:2] == pa_call_line_synth:
1053 raise
1054 finally:
1055 del tb
1056
1057 if limit[0] <= maxargs:
1058 limit[0] += 1
1059 continue
1060 raise
1061
1062
1063 func_name = "<parse action>"
1064 try:
1065 func_name = getattr(func, '__name__',
1066 getattr(func, '__class__').__name__)
1067 except Exception:
1068 func_name = str(func)
1069 wrapper.__name__ = func_name
1070
1071 return wrapper
1072
1074 """Abstract base level parser element class."""
1075 DEFAULT_WHITE_CHARS = " \n\t\r"
1076 verbose_stacktrace = False
1077
1078 @staticmethod
1080 r"""
1081 Overrides the default whitespace chars
1082
1083 Example::
1084 # default whitespace chars are space, <TAB> and newline
1085 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl']
1086
1087 # change to just treat newline as significant
1088 ParserElement.setDefaultWhitespaceChars(" \t")
1089 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def']
1090 """
1091 ParserElement.DEFAULT_WHITE_CHARS = chars
1092
1093 @staticmethod
1095 """
1096 Set class to be used for inclusion of string literals into a parser.
1097
1098 Example::
1099 # default literal class used is Literal
1100 integer = Word(nums)
1101 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1102
1103 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1104
1105
1106 # change to Suppress
1107 ParserElement.inlineLiteralsUsing(Suppress)
1108 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1109
1110 date_str.parseString("1999/12/31") # -> ['1999', '12', '31']
1111 """
1112 ParserElement._literalStringClass = cls
1113
1115 self.parseAction = list()
1116 self.failAction = None
1117
1118 self.strRepr = None
1119 self.resultsName = None
1120 self.saveAsList = savelist
1121 self.skipWhitespace = True
1122 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1123 self.copyDefaultWhiteChars = True
1124 self.mayReturnEmpty = False
1125 self.keepTabs = False
1126 self.ignoreExprs = list()
1127 self.debug = False
1128 self.streamlined = False
1129 self.mayIndexError = True
1130 self.errmsg = ""
1131 self.modalResults = True
1132 self.debugActions = ( None, None, None )
1133 self.re = None
1134 self.callPreparse = True
1135 self.callDuringTry = False
1136
1138 """
1139 Make a copy of this C{ParserElement}. Useful for defining different parse actions
1140 for the same parsing pattern, using copies of the original parse element.
1141
1142 Example::
1143 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1144 integerK = integer.copy().addParseAction(lambda toks: toks[0]*1024) + Suppress("K")
1145 integerM = integer.copy().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1146
1147 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M"))
1148 prints::
1149 [5120, 100, 655360, 268435456]
1150 Equivalent form of C{expr.copy()} is just C{expr()}::
1151 integerM = integer().addParseAction(lambda toks: toks[0]*1024*1024) + Suppress("M")
1152 """
1153 cpy = copy.copy( self )
1154 cpy.parseAction = self.parseAction[:]
1155 cpy.ignoreExprs = self.ignoreExprs[:]
1156 if self.copyDefaultWhiteChars:
1157 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
1158 return cpy
1159
1161 """
1162 Define name for this expression, makes exception messages clearer.
1163
1164 Example::
1165 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1)
1166 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1)
1167 """
1168 self.name = name
1169 self.errmsg = "Expected " + self.name
1170 if hasattr(self,"exception"):
1171 self.exception.msg = self.errmsg
1172 return self
1173
1175 """
1176 Define name for referencing matching tokens as a nested attribute
1177 of the returned parse results.
1178 NOTE: this returns a *copy* of the original C{ParserElement} object;
1179 this is so that the client can define a basic element, such as an
1180 integer, and reference it in multiple places with different names.
1181
1182 You can also set results names using the abbreviated syntax,
1183 C{expr("name")} in place of C{expr.setResultsName("name")} -
1184 see L{I{__call__}<__call__>}.
1185
1186 Example::
1187 date_str = (integer.setResultsName("year") + '/'
1188 + integer.setResultsName("month") + '/'
1189 + integer.setResultsName("day"))
1190
1191 # equivalent form:
1192 date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
1193 """
1194 newself = self.copy()
1195 if name.endswith("*"):
1196 name = name[:-1]
1197 listAllMatches=True
1198 newself.resultsName = name
1199 newself.modalResults = not listAllMatches
1200 return newself
1201
1203 """Method to invoke the Python pdb debugger when this element is
1204 about to be parsed. Set C{breakFlag} to True to enable, False to
1205 disable.
1206 """
1207 if breakFlag:
1208 _parseMethod = self._parse
1209 def breaker(instring, loc, doActions=True, callPreParse=True):
1210 import pdb
1211 pdb.set_trace()
1212 return _parseMethod( instring, loc, doActions, callPreParse )
1213 breaker._originalParseMethod = _parseMethod
1214 self._parse = breaker
1215 else:
1216 if hasattr(self._parse,"_originalParseMethod"):
1217 self._parse = self._parse._originalParseMethod
1218 return self
1219
1221 """
1222 Define action to perform when successfully matching parse element definition.
1223 Parse action fn is a callable method with 0-3 arguments, called as C{fn(s,loc,toks)},
1224 C{fn(loc,toks)}, C{fn(toks)}, or just C{fn()}, where:
1225 - s = the original string being parsed (see note below)
1226 - loc = the location of the matching substring
1227 - toks = a list of the matched tokens, packaged as a C{L{ParseResults}} object
1228 If the functions in fns modify the tokens, they can return them as the return
1229 value from fn, and the modified list of tokens will replace the original.
1230 Otherwise, fn does not need to return any value.
1231
1232 Optional keyword arguments:
1233 - callDuringTry = (default=C{False}) indicate if parse action should be run during lookaheads and alternate testing
1234
1235 Note: the default parsing behavior is to expand tabs in the input string
1236 before starting the parsing process. See L{I{parseString}<parseString>} for more information
1237 on parsing strings containing C{<TAB>}s, and suggested methods to maintain a
1238 consistent view of the parsed string, the parse location, and line and column
1239 positions within the parsed string.
1240
1241 Example::
1242 integer = Word(nums)
1243 date_str = integer + '/' + integer + '/' + integer
1244
1245 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31']
1246
1247 # use parse action to convert to ints at parse time
1248 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1249 date_str = integer + '/' + integer + '/' + integer
1250
1251 # note that integer fields are now ints, not strings
1252 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31]
1253 """
1254 self.parseAction = list(map(_trim_arity, list(fns)))
1255 self.callDuringTry = kwargs.get("callDuringTry", False)
1256 return self
1257
1259 """
1260 Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.
1261
1262 See examples in L{I{copy}<copy>}.
1263 """
1264 self.parseAction += list(map(_trim_arity, list(fns)))
1265 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1266 return self
1267
1269 """Add a boolean predicate function to expression's list of parse actions. See
1270 L{I{setParseAction}<setParseAction>} for function call signatures. Unlike C{setParseAction},
1271 functions passed to C{addCondition} need to return boolean success/fail of the condition.
1272
1273 Optional keyword arguments:
1274 - message = define a custom message to be used in the raised exception
1275 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException
1276
1277 Example::
1278 integer = Word(nums).setParseAction(lambda toks: int(toks[0]))
1279 year_int = integer.copy()
1280 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later")
1281 date_str = year_int + '/' + integer + '/' + integer
1282
1283 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1)
1284 """
1285 msg = kwargs.get("message", "failed user-defined condition")
1286 exc_type = ParseFatalException if kwargs.get("fatal", False) else ParseException
1287 for fn in fns:
1288 def pa(s,l,t):
1289 if not bool(_trim_arity(fn)(s,l,t)):
1290 raise exc_type(s,l,msg)
1291 self.parseAction.append(pa)
1292 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False)
1293 return self
1294
1296 """Define action to perform if parsing fails at this expression.
1297 Fail acton fn is a callable function that takes the arguments
1298 C{fn(s,loc,expr,err)} where:
1299 - s = string being parsed
1300 - loc = location where expression match was attempted and failed
1301 - expr = the parse expression that failed
1302 - err = the exception thrown
1303 The function returns no value. It may throw C{L{ParseFatalException}}
1304 if it is desired to stop parsing immediately."""
1305 self.failAction = fn
1306 return self
1307
1309 exprsFound = True
1310 while exprsFound:
1311 exprsFound = False
1312 for e in self.ignoreExprs:
1313 try:
1314 while 1:
1315 loc,dummy = e._parse( instring, loc )
1316 exprsFound = True
1317 except ParseException:
1318 pass
1319 return loc
1320
1322 if self.ignoreExprs:
1323 loc = self._skipIgnorables( instring, loc )
1324
1325 if self.skipWhitespace:
1326 wt = self.whiteChars
1327 instrlen = len(instring)
1328 while loc < instrlen and instring[loc] in wt:
1329 loc += 1
1330
1331 return loc
1332
1333 - def parseImpl( self, instring, loc, doActions=True ):
1335
1336 - def postParse( self, instring, loc, tokenlist ):
1338
1339
1340 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
1341 debugging = ( self.debug )
1342
1343 if debugging or self.failAction:
1344
1345 if (self.debugActions[0] ):
1346 self.debugActions[0]( instring, loc, self )
1347 if callPreParse and self.callPreparse:
1348 preloc = self.preParse( instring, loc )
1349 else:
1350 preloc = loc
1351 tokensStart = preloc
1352 try:
1353 try:
1354 loc,tokens = self.parseImpl( instring, preloc, doActions )
1355 except IndexError:
1356 raise ParseException( instring, len(instring), self.errmsg, self )
1357 except ParseBaseException as err:
1358
1359 if self.debugActions[2]:
1360 self.debugActions[2]( instring, tokensStart, self, err )
1361 if self.failAction:
1362 self.failAction( instring, tokensStart, self, err )
1363 raise
1364 else:
1365 if callPreParse and self.callPreparse:
1366 preloc = self.preParse( instring, loc )
1367 else:
1368 preloc = loc
1369 tokensStart = preloc
1370 if self.mayIndexError or loc >= len(instring):
1371 try:
1372 loc,tokens = self.parseImpl( instring, preloc, doActions )
1373 except IndexError:
1374 raise ParseException( instring, len(instring), self.errmsg, self )
1375 else:
1376 loc,tokens = self.parseImpl( instring, preloc, doActions )
1377
1378 tokens = self.postParse( instring, loc, tokens )
1379
1380 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
1381 if self.parseAction and (doActions or self.callDuringTry):
1382 if debugging:
1383 try:
1384 for fn in self.parseAction:
1385 tokens = fn( instring, tokensStart, retTokens )
1386 if tokens is not None:
1387 retTokens = ParseResults( tokens,
1388 self.resultsName,
1389 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1390 modal=self.modalResults )
1391 except ParseBaseException as err:
1392
1393 if (self.debugActions[2] ):
1394 self.debugActions[2]( instring, tokensStart, self, err )
1395 raise
1396 else:
1397 for fn in self.parseAction:
1398 tokens = fn( instring, tokensStart, retTokens )
1399 if tokens is not None:
1400 retTokens = ParseResults( tokens,
1401 self.resultsName,
1402 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
1403 modal=self.modalResults )
1404
1405 if debugging:
1406
1407 if (self.debugActions[1] ):
1408 self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
1409
1410 return loc, retTokens
1411
1417
1419 try:
1420 self.tryParse(instring, loc)
1421 except (ParseException, IndexError):
1422 return False
1423 else:
1424 return True
1425
1428 cache = {}
1429 self.not_in_cache = not_in_cache = object()
1430
1431 def get(self, key):
1432 return cache.get(key, not_in_cache)
1433
1434 def set(self, key, value):
1435 cache[key] = value
1436
1437 def clear(self):
1438 cache.clear()
1439
1440 self.get = types.MethodType(get, self)
1441 self.set = types.MethodType(set, self)
1442 self.clear = types.MethodType(clear, self)
1443
1444 if _OrderedDict is not None:
1447 self.not_in_cache = not_in_cache = object()
1448
1449 cache = _OrderedDict()
1450
1451 def get(self, key):
1452 return cache.get(key, not_in_cache)
1453
1454 def set(self, key, value):
1455 cache[key] = value
1456 if len(cache) > size:
1457 cache.popitem(False)
1458
1459 def clear(self):
1460 cache.clear()
1461
1462 self.get = types.MethodType(get, self)
1463 self.set = types.MethodType(set, self)
1464 self.clear = types.MethodType(clear, self)
1465
1466 else:
1469 self.not_in_cache = not_in_cache = object()
1470
1471 cache = {}
1472 key_fifo = collections.deque([], size)
1473
1474 def get(self, key):
1475 return cache.get(key, not_in_cache)
1476
1477 def set(self, key, value):
1478 cache[key] = value
1479 if len(cache) > size:
1480 cache.pop(key_fifo.popleft(), None)
1481 key_fifo.append(key)
1482
1483 def clear(self):
1484 cache.clear()
1485 key_fifo.clear()
1486
1487 self.get = types.MethodType(get, self)
1488 self.set = types.MethodType(set, self)
1489 self.clear = types.MethodType(clear, self)
1490
1491
1492 packrat_cache = {}
1493 packrat_cache_lock = RLock()
1494 packrat_cache_stats = [0, 0]
1495
1496
1497
1498 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
1499 HIT, MISS = 0, 1
1500 lookup = (self, instring, loc, callPreParse, doActions)
1501 with ParserElement.packrat_cache_lock:
1502 cache = ParserElement.packrat_cache
1503 value = cache.get(lookup)
1504 if value is cache.not_in_cache:
1505 ParserElement.packrat_cache_stats[MISS] += 1
1506 try:
1507 value = self._parseNoCache(instring, loc, doActions, callPreParse)
1508 except ParseBaseException as pe:
1509
1510 cache.set(lookup, pe.__class__(*pe.args))
1511 raise
1512 else:
1513 cache.set(lookup, (value[0], value[1].copy()))
1514 return value
1515 else:
1516 ParserElement.packrat_cache_stats[HIT] += 1
1517 if isinstance(value, Exception):
1518 raise value
1519 return (value[0], value[1].copy())
1520
1521 _parse = _parseNoCache
1522
1523 @staticmethod
1527
1528 _packratEnabled = False
1529 @staticmethod
1531 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1532 Repeated parse attempts at the same string location (which happens
1533 often in many complex grammars) can immediately return a cached value,
1534 instead of re-executing parsing/validating code. Memoizing is done of
1535 both valid results and parsing exceptions.
1536
1537 Parameters:
1538 - cache_size_limit - (default=C{128}) - if an integer value is provided
1539 will limit the size of the packrat cache; if None is passed, then
1540 the cache size will be unbounded; if 0 is passed, the cache will
1541 be effectively disabled.
1542
1543 This speedup may break existing programs that use parse actions that
1544 have side-effects. For this reason, packrat parsing is disabled when
1545 you first import pyparsing. To activate the packrat feature, your
1546 program must call the class method C{ParserElement.enablePackrat()}. If
1547 your program uses C{psyco} to "compile as you go", you must call
1548 C{enablePackrat} before calling C{psyco.full()}. If you do not do this,
1549 Python will crash. For best results, call C{enablePackrat()} immediately
1550 after importing pyparsing.
1551
1552 Example::
1553 import pyparsing
1554 pyparsing.ParserElement.enablePackrat()
1555 """
1556 if not ParserElement._packratEnabled:
1557 ParserElement._packratEnabled = True
1558 if cache_size_limit is None:
1559 ParserElement.packrat_cache = ParserElement._UnboundedCache()
1560 else:
1561 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit)
1562 ParserElement._parse = ParserElement._parseCache
1563
1565 """
1566 Execute the parse expression with the given string.
1567 This is the main interface to the client code, once the complete
1568 expression has been built.
1569
1570 If you want the grammar to require that the entire input string be
1571 successfully parsed, then set C{parseAll} to True (equivalent to ending
1572 the grammar with C{L{StringEnd()}}).
1573
1574 Note: C{parseString} implicitly calls C{expandtabs()} on the input string,
1575 in order to report proper column numbers in parse actions.
1576 If the input string contains tabs and
1577 the grammar uses parse actions that use the C{loc} argument to index into the
1578 string being parsed, you can ensure you have a consistent view of the input
1579 string by:
1580 - calling C{parseWithTabs} on your grammar before calling C{parseString}
1581 (see L{I{parseWithTabs}<parseWithTabs>})
1582 - define your parse action using the full C{(s,loc,toks)} signature, and
1583 reference the input string using the parse action's C{s} argument
1584 - explictly expand the tabs in your input string before calling
1585 C{parseString}
1586
1587 Example::
1588 Word('a').parseString('aaaaabaaa') # -> ['aaaaa']
1589 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text
1590 """
1591 ParserElement.resetCache()
1592 if not self.streamlined:
1593 self.streamline()
1594
1595 for e in self.ignoreExprs:
1596 e.streamline()
1597 if not self.keepTabs:
1598 instring = instring.expandtabs()
1599 try:
1600 loc, tokens = self._parse( instring, 0 )
1601 if parseAll:
1602 loc = self.preParse( instring, loc )
1603 se = Empty() + StringEnd()
1604 se._parse( instring, loc )
1605 except ParseBaseException as exc:
1606 if ParserElement.verbose_stacktrace:
1607 raise
1608 else:
1609
1610 raise exc
1611 else:
1612 return tokens
1613
1615 """
1616 Scan the input string for expression matches. Each match will return the
1617 matching tokens, start location, and end location. May be called with optional
1618 C{maxMatches} argument, to clip scanning after 'n' matches are found. If
1619 C{overlap} is specified, then overlapping matches will be reported.
1620
1621 Note that the start and end locations are reported relative to the string
1622 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1623 strings with embedded tabs.
1624
1625 Example::
1626 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987"
1627 print(source)
1628 for tokens,start,end in Word(alphas).scanString(source):
1629 print(' '*start + '^'*(end-start))
1630 print(' '*start + tokens[0])
1631
1632 prints::
1633
1634 sldjf123lsdjjkf345sldkjf879lkjsfd987
1635 ^^^^^
1636 sldjf
1637 ^^^^^^^
1638 lsdjjkf
1639 ^^^^^^
1640 sldkjf
1641 ^^^^^^
1642 lkjsfd
1643 """
1644 if not self.streamlined:
1645 self.streamline()
1646 for e in self.ignoreExprs:
1647 e.streamline()
1648
1649 if not self.keepTabs:
1650 instring = _ustr(instring).expandtabs()
1651 instrlen = len(instring)
1652 loc = 0
1653 preparseFn = self.preParse
1654 parseFn = self._parse
1655 ParserElement.resetCache()
1656 matches = 0
1657 try:
1658 while loc <= instrlen and matches < maxMatches:
1659 try:
1660 preloc = preparseFn( instring, loc )
1661 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1662 except ParseException:
1663 loc = preloc+1
1664 else:
1665 if nextLoc > loc:
1666 matches += 1
1667 yield tokens, preloc, nextLoc
1668 if overlap:
1669 nextloc = preparseFn( instring, loc )
1670 if nextloc > loc:
1671 loc = nextLoc
1672 else:
1673 loc += 1
1674 else:
1675 loc = nextLoc
1676 else:
1677 loc = preloc+1
1678 except ParseBaseException as exc:
1679 if ParserElement.verbose_stacktrace:
1680 raise
1681 else:
1682
1683 raise exc
1684
1727
1729 """
1730 Another extension to C{L{scanString}}, simplifying the access to the tokens found
1731 to match the given parse expression. May be called with optional
1732 C{maxMatches} argument, to clip searching after 'n' matches are found.
1733
1734 Example::
1735 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters
1736 cap_word = Word(alphas.upper(), alphas.lower())
1737
1738 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))
1739 prints::
1740 ['More', 'Iron', 'Lead', 'Gold', 'I']
1741 """
1742 try:
1743 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1744 except ParseBaseException as exc:
1745 if ParserElement.verbose_stacktrace:
1746 raise
1747 else:
1748
1749 raise exc
1750
1751 - def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False):
1752 """
1753 Generator method to split a string using the given expression as a separator.
1754 May be called with optional C{maxsplit} argument, to limit the number of splits;
1755 and the optional C{includeSeparators} argument (default=C{False}), if the separating
1756 matching text should be included in the split results.
1757
1758 Example::
1759 punc = oneOf(list(".,;:/-!?"))
1760 print(list(punc.split("This, this?, this sentence, is badly punctuated!")))
1761 prints::
1762 ['This', ' this', '', ' this sentence', ' is badly punctuated', '']
1763 """
1764 splits = 0
1765 last = 0
1766 for t,s,e in self.scanString(instring, maxMatches=maxsplit):
1767 yield instring[last:s]
1768 if includeSeparators:
1769 yield t[0]
1770 last = e
1771 yield instring[last:]
1772
1774 """
1775 Implementation of + operator - returns C{L{And}}
1776 """
1777 if isinstance( other, basestring ):
1778 other = ParserElement._literalStringClass( other )
1779 if not isinstance( other, ParserElement ):
1780 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1781 SyntaxWarning, stacklevel=2)
1782 return None
1783 return And( [ self, other ] )
1784
1786 """
1787 Implementation of + operator when left operand is not a C{L{ParserElement}}
1788 """
1789 if isinstance( other, basestring ):
1790 other = ParserElement._literalStringClass( other )
1791 if not isinstance( other, ParserElement ):
1792 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1793 SyntaxWarning, stacklevel=2)
1794 return None
1795 return other + self
1796
1798 """
1799 Implementation of - operator, returns C{L{And}} with error stop
1800 """
1801 if isinstance( other, basestring ):
1802 other = ParserElement._literalStringClass( other )
1803 if not isinstance( other, ParserElement ):
1804 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1805 SyntaxWarning, stacklevel=2)
1806 return None
1807 return And( [ self, And._ErrorStop(), other ] )
1808
1810 """
1811 Implementation of - operator when left operand is not a C{L{ParserElement}}
1812 """
1813 if isinstance( other, basestring ):
1814 other = ParserElement._literalStringClass( other )
1815 if not isinstance( other, ParserElement ):
1816 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1817 SyntaxWarning, stacklevel=2)
1818 return None
1819 return other - self
1820
1822 """
1823 Implementation of * operator, allows use of C{expr * 3} in place of
1824 C{expr + expr + expr}. Expressions may also me multiplied by a 2-integer
1825 tuple, similar to C{{min,max}} multipliers in regular expressions. Tuples
1826 may also include C{None} as in:
1827 - C{expr*(n,None)} or C{expr*(n,)} is equivalent
1828 to C{expr*n + L{ZeroOrMore}(expr)}
1829 (read as "at least n instances of C{expr}")
1830 - C{expr*(None,n)} is equivalent to C{expr*(0,n)}
1831 (read as "0 to n instances of C{expr}")
1832 - C{expr*(None,None)} is equivalent to C{L{ZeroOrMore}(expr)}
1833 - C{expr*(1,None)} is equivalent to C{L{OneOrMore}(expr)}
1834
1835 Note that C{expr*(None,n)} does not raise an exception if
1836 more than n exprs exist in the input stream; that is,
1837 C{expr*(None,n)} does not enforce a maximum number of expr
1838 occurrences. If this behavior is desired, then write
1839 C{expr*(None,n) + ~expr}
1840 """
1841 if isinstance(other,int):
1842 minElements, optElements = other,0
1843 elif isinstance(other,tuple):
1844 other = (other + (None, None))[:2]
1845 if other[0] is None:
1846 other = (0, other[1])
1847 if isinstance(other[0],int) and other[1] is None:
1848 if other[0] == 0:
1849 return ZeroOrMore(self)
1850 if other[0] == 1:
1851 return OneOrMore(self)
1852 else:
1853 return self*other[0] + ZeroOrMore(self)
1854 elif isinstance(other[0],int) and isinstance(other[1],int):
1855 minElements, optElements = other
1856 optElements -= minElements
1857 else:
1858 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1859 else:
1860 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1861
1862 if minElements < 0:
1863 raise ValueError("cannot multiply ParserElement by negative value")
1864 if optElements < 0:
1865 raise ValueError("second tuple value must be greater or equal to first tuple value")
1866 if minElements == optElements == 0:
1867 raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1868
1869 if (optElements):
1870 def makeOptionalList(n):
1871 if n>1:
1872 return Optional(self + makeOptionalList(n-1))
1873 else:
1874 return Optional(self)
1875 if minElements:
1876 if minElements == 1:
1877 ret = self + makeOptionalList(optElements)
1878 else:
1879 ret = And([self]*minElements) + makeOptionalList(optElements)
1880 else:
1881 ret = makeOptionalList(optElements)
1882 else:
1883 if minElements == 1:
1884 ret = self
1885 else:
1886 ret = And([self]*minElements)
1887 return ret
1888
1891
1893 """
1894 Implementation of | operator - returns C{L{MatchFirst}}
1895 """
1896 if isinstance( other, basestring ):
1897 other = ParserElement._literalStringClass( other )
1898 if not isinstance( other, ParserElement ):
1899 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1900 SyntaxWarning, stacklevel=2)
1901 return None
1902 return MatchFirst( [ self, other ] )
1903
1905 """
1906 Implementation of | operator when left operand is not a C{L{ParserElement}}
1907 """
1908 if isinstance( other, basestring ):
1909 other = ParserElement._literalStringClass( other )
1910 if not isinstance( other, ParserElement ):
1911 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1912 SyntaxWarning, stacklevel=2)
1913 return None
1914 return other | self
1915
1917 """
1918 Implementation of ^ operator - returns C{L{Or}}
1919 """
1920 if isinstance( other, basestring ):
1921 other = ParserElement._literalStringClass( other )
1922 if not isinstance( other, ParserElement ):
1923 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1924 SyntaxWarning, stacklevel=2)
1925 return None
1926 return Or( [ self, other ] )
1927
1929 """
1930 Implementation of ^ operator when left operand is not a C{L{ParserElement}}
1931 """
1932 if isinstance( other, basestring ):
1933 other = ParserElement._literalStringClass( other )
1934 if not isinstance( other, ParserElement ):
1935 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1936 SyntaxWarning, stacklevel=2)
1937 return None
1938 return other ^ self
1939
1941 """
1942 Implementation of & operator - returns C{L{Each}}
1943 """
1944 if isinstance( other, basestring ):
1945 other = ParserElement._literalStringClass( other )
1946 if not isinstance( other, ParserElement ):
1947 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1948 SyntaxWarning, stacklevel=2)
1949 return None
1950 return Each( [ self, other ] )
1951
1953 """
1954 Implementation of & operator when left operand is not a C{L{ParserElement}}
1955 """
1956 if isinstance( other, basestring ):
1957 other = ParserElement._literalStringClass( other )
1958 if not isinstance( other, ParserElement ):
1959 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1960 SyntaxWarning, stacklevel=2)
1961 return None
1962 return other & self
1963
1965 """
1966 Implementation of ~ operator - returns C{L{NotAny}}
1967 """
1968 return NotAny( self )
1969
1971 """
1972 Shortcut for C{L{setResultsName}}, with C{listAllMatches=default}.
1973
1974 If C{name} is given with a trailing C{'*'} character, then C{listAllMatches} will be
1975 passed as C{True}.
1976
1977 If C{name} is omitted, same as calling C{L{copy}}.
1978
1979 Example::
1980 # these are equivalent
1981 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1982 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1983 """
1984 if name is not None:
1985 return self.setResultsName(name)
1986 else:
1987 return self.copy()
1988
1990 """
1991 Suppresses the output of this C{ParserElement}; useful to keep punctuation from
1992 cluttering up returned output.
1993 """
1994 return Suppress( self )
1995
1997 """
1998 Disables the skipping of whitespace before matching the characters in the
1999 C{ParserElement}'s defined pattern. This is normally only used internally by
2000 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
2001 """
2002 self.skipWhitespace = False
2003 return self
2004
2006 """
2007 Overrides the default whitespace chars
2008 """
2009 self.skipWhitespace = True
2010 self.whiteChars = chars
2011 self.copyDefaultWhiteChars = False
2012 return self
2013
2015 """
2016 Overrides default behavior to expand C{<TAB>}s to spaces before parsing the input string.
2017 Must be called before C{parseString} when the input grammar contains elements that
2018 match C{<TAB>} characters.
2019 """
2020 self.keepTabs = True
2021 return self
2022
2024 """
2025 Define expression to be ignored (e.g., comments) while doing pattern
2026 matching; may be called repeatedly, to define multiple comment or other
2027 ignorable patterns.
2028
2029 Example::
2030 patt = OneOrMore(Word(alphas))
2031 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj']
2032
2033 patt.ignore(cStyleComment)
2034 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd']
2035 """
2036 if isinstance(other, basestring):
2037 other = Suppress(other)
2038
2039 if isinstance( other, Suppress ):
2040 if other not in self.ignoreExprs:
2041 self.ignoreExprs.append(other)
2042 else:
2043 self.ignoreExprs.append( Suppress( other.copy() ) )
2044 return self
2045
2046 - def setDebugActions( self, startAction, successAction, exceptionAction ):
2047 """
2048 Enable display of debugging messages while doing pattern matching.
2049 """
2050 self.debugActions = (startAction or _defaultStartDebugAction,
2051 successAction or _defaultSuccessDebugAction,
2052 exceptionAction or _defaultExceptionDebugAction)
2053 self.debug = True
2054 return self
2055
2057 """
2058 Enable display of debugging messages while doing pattern matching.
2059 Set C{flag} to True to enable, False to disable.
2060 """
2061 if flag:
2062 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
2063 else:
2064 self.debug = False
2065 return self
2066
2069
2072
2074 self.streamlined = True
2075 self.strRepr = None
2076 return self
2077
2080
2081 - def validate( self, validateTrace=[] ):
2082 """
2083 Check defined expressions for valid structure, check for infinite recursive definitions.
2084 """
2085 self.checkRecursion( [] )
2086
2087 - def parseFile( self, file_or_filename, parseAll=False ):
2088 """
2089 Execute the parse expression on the given file or filename.
2090 If a filename is specified (instead of a file object),
2091 the entire file is opened, read, and closed before parsing.
2092 """
2093 try:
2094 file_contents = file_or_filename.read()
2095 except AttributeError:
2096 with open(file_or_filename, "r") as f:
2097 file_contents = f.read()
2098 try:
2099 return self.parseString(file_contents, parseAll)
2100 except ParseBaseException as exc:
2101 if ParserElement.verbose_stacktrace:
2102 raise
2103 else:
2104
2105 raise exc
2106
2108 if isinstance(other, ParserElement):
2109 return self is other or vars(self) == vars(other)
2110 elif isinstance(other, basestring):
2111 return self.matches(other)
2112 else:
2113 return super(ParserElement,self)==other
2114
2116 return not (self == other)
2117
2119 return hash(id(self))
2120
2122 return self == other
2123
2125 return not (self == other)
2126
2127 - def matches(self, testString, parseAll=True):
2128 """
2129 Method for quick testing of a parser against a test string. Good for simple
2130 inline microtests of sub expressions while building up larger parser.0
2131
2132 Parameters:
2133 - testString - to test against this expression for a match
2134 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2135
2136 Example::
2137 expr = Word(nums)
2138 assert expr.matches("100")
2139 """
2140 try:
2141 self.parseString(_ustr(testString), parseAll=parseAll)
2142 return True
2143 except ParseBaseException:
2144 return False
2145
2146 - def runTests(self, tests, parseAll=True, comment='#', fullDump=True, printResults=True, failureTests=False):
2147 """
2148 Execute the parse expression on a series of test strings, showing each
2149 test, the parsed results or where the parse failed. Quick and easy way to
2150 run a parse expression against a list of sample strings.
2151
2152 Parameters:
2153 - tests - a list of separate test strings, or a multiline string of test strings
2154 - parseAll - (default=C{True}) - flag to pass to C{L{parseString}} when running tests
2155 - comment - (default=C{'#'}) - expression for indicating embedded comments in the test
2156 string; pass None to disable comment filtering
2157 - fullDump - (default=C{True}) - dump results as list followed by results names in nested outline;
2158 if False, only dump nested list
2159 - printResults - (default=C{True}) prints test output to stdout
2160 - failureTests - (default=C{False}) indicates if these tests are expected to fail parsing
2161
2162 Returns: a (success, results) tuple, where success indicates that all tests succeeded
2163 (or failed if C{failureTests} is True), and the results contain a list of lines of each
2164 test's output
2165
2166 Example::
2167 number_expr = pyparsing_common.number.copy()
2168
2169 result = number_expr.runTests('''
2170 # unsigned integer
2171 100
2172 # negative integer
2173 -100
2174 # float with scientific notation
2175 6.02e23
2176 # integer with scientific notation
2177 1e-12
2178 ''')
2179 print("Success" if result[0] else "Failed!")
2180
2181 result = number_expr.runTests('''
2182 # stray character
2183 100Z
2184 # missing leading digit before '.'
2185 -.100
2186 # too many '.'
2187 3.14.159
2188 ''', failureTests=True)
2189 print("Success" if result[0] else "Failed!")
2190 prints::
2191 # unsigned integer
2192 100
2193 [100]
2194
2195 # negative integer
2196 -100
2197 [-100]
2198
2199 # float with scientific notation
2200 6.02e23
2201 [6.02e+23]
2202
2203 # integer with scientific notation
2204 1e-12
2205 [1e-12]
2206
2207 Success
2208
2209 # stray character
2210 100Z
2211 ^
2212 FAIL: Expected end of text (at char 3), (line:1, col:4)
2213
2214 # missing leading digit before '.'
2215 -.100
2216 ^
2217 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1)
2218
2219 # too many '.'
2220 3.14.159
2221 ^
2222 FAIL: Expected end of text (at char 4), (line:1, col:5)
2223
2224 Success
2225 """
2226 if isinstance(tests, basestring):
2227 tests = list(map(str.strip, tests.rstrip().splitlines()))
2228 if isinstance(comment, basestring):
2229 comment = Literal(comment)
2230 allResults = []
2231 comments = []
2232 success = True
2233 for t in tests:
2234 if comment is not None and comment.matches(t, False) or comments and not t:
2235 comments.append(t)
2236 continue
2237 if not t:
2238 continue
2239 out = ['\n'.join(comments), t]
2240 comments = []
2241 try:
2242 result = self.parseString(t, parseAll=parseAll)
2243 out.append(result.dump(full=fullDump))
2244 success = success and not failureTests
2245 except ParseBaseException as pe:
2246 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else ""
2247 if '\n' in t:
2248 out.append(line(pe.loc, t))
2249 out.append(' '*(col(pe.loc,t)-1) + '^' + fatal)
2250 else:
2251 out.append(' '*pe.loc + '^' + fatal)
2252 out.append("FAIL: " + str(pe))
2253 success = success and failureTests
2254 result = pe
2255
2256 if printResults:
2257 if fullDump:
2258 out.append('')
2259 print('\n'.join(out))
2260
2261 allResults.append((t, result))
2262
2263 return success, allResults
2264
2265
2266 -class Token(ParserElement):
2267 """
2268 Abstract C{ParserElement} subclass, for defining atomic matching patterns.
2269 """
2272
2273
2274 -class Empty(Token):
2275 """
2276 An empty token, will always match.
2277 """
2279 super(Empty,self).__init__()
2280 self.name = "Empty"
2281 self.mayReturnEmpty = True
2282 self.mayIndexError = False
2283
2286 """
2287 A token that will never match.
2288 """
2290 super(NoMatch,self).__init__()
2291 self.name = "NoMatch"
2292 self.mayReturnEmpty = True
2293 self.mayIndexError = False
2294 self.errmsg = "Unmatchable token"
2295
2296 - def parseImpl( self, instring, loc, doActions=True ):
2298
2301 """
2302 Token to exactly match a specified string.
2303
2304 Example::
2305 Literal('blah').parseString('blah') # -> ['blah']
2306 Literal('blah').parseString('blahfooblah') # -> ['blah']
2307 Literal('blah').parseString('bla') # -> Exception: Expected "blah"
2308
2309 For case-insensitive matching, use L{CaselessLiteral}.
2310
2311 For keyword matching (force word break before and after the matched string),
2312 use L{Keyword} or L{CaselessKeyword}.
2313 """
2315 super(Literal,self).__init__()
2316 self.match = matchString
2317 self.matchLen = len(matchString)
2318 try:
2319 self.firstMatchChar = matchString[0]
2320 except IndexError:
2321 warnings.warn("null string passed to Literal; use Empty() instead",
2322 SyntaxWarning, stacklevel=2)
2323 self.__class__ = Empty
2324 self.name = '"%s"' % _ustr(self.match)
2325 self.errmsg = "Expected " + self.name
2326 self.mayReturnEmpty = False
2327 self.mayIndexError = False
2328
2329
2330
2331
2332
2333 - def parseImpl( self, instring, loc, doActions=True ):
2334 if (instring[loc] == self.firstMatchChar and
2335 (self.matchLen==1 or instring.startswith(self.match,loc)) ):
2336 return loc+self.matchLen, self.match
2337 raise ParseException(instring, loc, self.errmsg, self)
2338 _L = Literal
2339 ParserElement._literalStringClass = Literal
2342 """
2343 Token to exactly match a specified string as a keyword, that is, it must be
2344 immediately followed by a non-keyword character. Compare with C{L{Literal}}:
2345 - C{Literal("if")} will match the leading C{'if'} in C{'ifAndOnlyIf'}.
2346 - C{Keyword("if")} will not; it will only match the leading C{'if'} in C{'if x=1'}, or C{'if(y==2)'}
2347 Accepts two optional constructor arguments in addition to the keyword string:
2348 - C{identChars} is a string of characters that would be valid identifier characters,
2349 defaulting to all alphanumerics + "_" and "$"
2350 - C{caseless} allows case-insensitive matching, default is C{False}.
2351
2352 Example::
2353 Keyword("start").parseString("start") # -> ['start']
2354 Keyword("start").parseString("starting") # -> Exception
2355
2356 For case-insensitive matching, use L{CaselessKeyword}.
2357 """
2358 DEFAULT_KEYWORD_CHARS = alphanums+"_$"
2359
2361 super(Keyword,self).__init__()
2362 self.match = matchString
2363 self.matchLen = len(matchString)
2364 try:
2365 self.firstMatchChar = matchString[0]
2366 except IndexError:
2367 warnings.warn("null string passed to Keyword; use Empty() instead",
2368 SyntaxWarning, stacklevel=2)
2369 self.name = '"%s"' % self.match
2370 self.errmsg = "Expected " + self.name
2371 self.mayReturnEmpty = False
2372 self.mayIndexError = False
2373 self.caseless = caseless
2374 if caseless:
2375 self.caselessmatch = matchString.upper()
2376 identChars = identChars.upper()
2377 self.identChars = set(identChars)
2378
2379 - def parseImpl( self, instring, loc, doActions=True ):
2380 if self.caseless:
2381 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2382 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
2383 (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
2384 return loc+self.matchLen, self.match
2385 else:
2386 if (instring[loc] == self.firstMatchChar and
2387 (self.matchLen==1 or instring.startswith(self.match,loc)) and
2388 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
2389 (loc == 0 or instring[loc-1] not in self.identChars) ):
2390 return loc+self.matchLen, self.match
2391 raise ParseException(instring, loc, self.errmsg, self)
2392
2397
2398 @staticmethod
2403
2405 """
2406 Token to match a specified string, ignoring case of letters.
2407 Note: the matched results will always be in the case of the given
2408 match string, NOT the case of the input text.
2409
2410 Example::
2411 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD']
2412
2413 (Contrast with example for L{CaselessKeyword}.)
2414 """
2416 super(CaselessLiteral,self).__init__( matchString.upper() )
2417
2418 self.returnString = matchString
2419 self.name = "'%s'" % self.returnString
2420 self.errmsg = "Expected " + self.name
2421
2422 - def parseImpl( self, instring, loc, doActions=True ):
2423 if instring[ loc:loc+self.matchLen ].upper() == self.match:
2424 return loc+self.matchLen, self.returnString
2425 raise ParseException(instring, loc, self.errmsg, self)
2426
2428 """
2429 Caseless version of L{Keyword}.
2430
2431 Example::
2432 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD']
2433
2434 (Contrast with example for L{CaselessLiteral}.)
2435 """
2438
2439 - def parseImpl( self, instring, loc, doActions=True ):
2440 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
2441 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
2442 return loc+self.matchLen, self.match
2443 raise ParseException(instring, loc, self.errmsg, self)
2444
2446 """
2447 Token for matching words composed of allowed character sets.
2448 Defined with string containing all allowed initial characters,
2449 an optional string containing allowed body characters (if omitted,
2450 defaults to the initial character set), and an optional minimum,
2451 maximum, and/or exact length. The default value for C{min} is 1 (a
2452 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2453 are 0, meaning no maximum or exact length restriction. An optional
2454 C{excludeChars} parameter can list characters that might be found in
2455 the input C{bodyChars} string; useful to define a word of all printables
2456 except for one or two characters, for instance.
2457
2458 L{srange} is useful for defining custom character set strings for defining
2459 C{Word} expressions, using range notation from regular expression character sets.
2460
2461 A common mistake is to use C{Word} to match a specific literal string, as in
2462 C{Word("Address")}. Remember that C{Word} uses the string argument to define
2463 I{sets} of matchable characters. This expression would match "Add", "AAA",
2464 "dAred", or any other word made up of the characters 'A', 'd', 'r', 'e', and 's'.
2465 To match an exact literal string, use L{Literal} or L{Keyword}.
2466
2467 pyparsing includes helper strings for building Words:
2468 - L{alphas}
2469 - L{nums}
2470 - L{alphanums}
2471 - L{hexnums}
2472 - L{alphas8bit} (alphabetic characters in ASCII range 128-255 - accented, tilded, umlauted, etc.)
2473 - L{punc8bit} (non-alphabetic characters in ASCII range 128-255 - currency, symbols, superscripts, diacriticals, etc.)
2474 - L{printables} (any non-whitespace character)
2475
2476 Example::
2477 # a word composed of digits
2478 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9"))
2479
2480 # a word with a leading capital, and zero or more lowercase
2481 capital_word = Word(alphas.upper(), alphas.lower())
2482
2483 # hostnames are alphanumeric, with leading alpha, and '-'
2484 hostname = Word(alphas, alphanums+'-')
2485
2486 # roman numeral (not a strict parser, accepts invalid mix of characters)
2487 roman = Word("IVXLCDM")
2488
2489 # any string of non-whitespace characters, except for ','
2490 csv_value = Word(printables, excludeChars=",")
2491 """
2492 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None ):
2493 super(Word,self).__init__()
2494 if excludeChars:
2495 initChars = ''.join(c for c in initChars if c not in excludeChars)
2496 if bodyChars:
2497 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars)
2498 self.initCharsOrig = initChars
2499 self.initChars = set(initChars)
2500 if bodyChars :
2501 self.bodyCharsOrig = bodyChars
2502 self.bodyChars = set(bodyChars)
2503 else:
2504 self.bodyCharsOrig = initChars
2505 self.bodyChars = set(initChars)
2506
2507 self.maxSpecified = max > 0
2508
2509 if min < 1:
2510 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
2511
2512 self.minLen = min
2513
2514 if max > 0:
2515 self.maxLen = max
2516 else:
2517 self.maxLen = _MAX_INT
2518
2519 if exact > 0:
2520 self.maxLen = exact
2521 self.minLen = exact
2522
2523 self.name = _ustr(self)
2524 self.errmsg = "Expected " + self.name
2525 self.mayIndexError = False
2526 self.asKeyword = asKeyword
2527
2528 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
2529 if self.bodyCharsOrig == self.initCharsOrig:
2530 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig)
2531 elif len(self.initCharsOrig) == 1:
2532 self.reString = "%s[%s]*" % \
2533 (re.escape(self.initCharsOrig),
2534 _escapeRegexRangeChars(self.bodyCharsOrig),)
2535 else:
2536 self.reString = "[%s][%s]*" % \
2537 (_escapeRegexRangeChars(self.initCharsOrig),
2538 _escapeRegexRangeChars(self.bodyCharsOrig),)
2539 if self.asKeyword:
2540 self.reString = r"\b"+self.reString+r"\b"
2541 try:
2542 self.re = re.compile( self.reString )
2543 except:
2544 self.re = None
2545
2546 - def parseImpl( self, instring, loc, doActions=True ):
2547 if self.re:
2548 result = self.re.match(instring,loc)
2549 if not result:
2550 raise ParseException(instring, loc, self.errmsg, self)
2551
2552 loc = result.end()
2553 return loc, result.group()
2554
2555 if not(instring[ loc ] in self.initChars):
2556 raise ParseException(instring, loc, self.errmsg, self)
2557
2558 start = loc
2559 loc += 1
2560 instrlen = len(instring)
2561 bodychars = self.bodyChars
2562 maxloc = start + self.maxLen
2563 maxloc = min( maxloc, instrlen )
2564 while loc < maxloc and instring[loc] in bodychars:
2565 loc += 1
2566
2567 throwException = False
2568 if loc - start < self.minLen:
2569 throwException = True
2570 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
2571 throwException = True
2572 if self.asKeyword:
2573 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
2574 throwException = True
2575
2576 if throwException:
2577 raise ParseException(instring, loc, self.errmsg, self)
2578
2579 return loc, instring[start:loc]
2580
2582 try:
2583 return super(Word,self).__str__()
2584 except:
2585 pass
2586
2587
2588 if self.strRepr is None:
2589
2590 def charsAsStr(s):
2591 if len(s)>4:
2592 return s[:4]+"..."
2593 else:
2594 return s
2595
2596 if ( self.initCharsOrig != self.bodyCharsOrig ):
2597 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
2598 else:
2599 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
2600
2601 return self.strRepr
2602
2603
2604 -class Regex(Token):
2605 """
2606 Token for matching strings that match a given regular expression.
2607 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
2608
2609 Example::
2610 realnum = Regex(r"[+-]?\d+\.\d*")
2611 ssn = Regex(r"\d\d\d-\d\d-\d\d\d\d")
2612 # ref: http://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression
2613 roman = Regex(r"M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})")
2614 """
2615 compiledREtype = type(re.compile("[A-Z]"))
2616 - def __init__( self, pattern, flags=0):
2617 """The parameters C{pattern} and C{flags} are passed to the C{re.compile()} function as-is. See the Python C{re} module for an explanation of the acceptable patterns and flags."""
2618 super(Regex,self).__init__()
2619
2620 if isinstance(pattern, basestring):
2621 if not pattern:
2622 warnings.warn("null string passed to Regex; use Empty() instead",
2623 SyntaxWarning, stacklevel=2)
2624
2625 self.pattern = pattern
2626 self.flags = flags
2627
2628 try:
2629 self.re = re.compile(self.pattern, self.flags)
2630 self.reString = self.pattern
2631 except sre_constants.error:
2632 warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
2633 SyntaxWarning, stacklevel=2)
2634 raise
2635
2636 elif isinstance(pattern, Regex.compiledREtype):
2637 self.re = pattern
2638 self.pattern = \
2639 self.reString = str(pattern)
2640 self.flags = flags
2641
2642 else:
2643 raise ValueError("Regex may only be constructed with a string or a compiled RE object")
2644
2645 self.name = _ustr(self)
2646 self.errmsg = "Expected " + self.name
2647 self.mayIndexError = False
2648 self.mayReturnEmpty = True
2649
2650 - def parseImpl( self, instring, loc, doActions=True ):
2651 result = self.re.match(instring,loc)
2652 if not result:
2653 raise ParseException(instring, loc, self.errmsg, self)
2654
2655 loc = result.end()
2656 d = result.groupdict()
2657 ret = ParseResults(result.group())
2658 if d:
2659 for k in d:
2660 ret[k] = d[k]
2661 return loc,ret
2662
2664 try:
2665 return super(Regex,self).__str__()
2666 except:
2667 pass
2668
2669 if self.strRepr is None:
2670 self.strRepr = "Re:(%s)" % repr(self.pattern)
2671
2672 return self.strRepr
2673
2676 r"""
2677 Token for matching strings that are delimited by quoting characters.
2678
2679 Defined with the following parameters:
2680 - quoteChar - string of one or more characters defining the quote delimiting string
2681 - escChar - character to escape quotes, typically backslash (default=C{None})
2682 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=C{None})
2683 - multiline - boolean indicating whether quotes can span multiple lines (default=C{False})
2684 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=C{True})
2685 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=C{None} => same as quoteChar)
2686 - convertWhitespaceEscapes - convert escaped whitespace (C{'\t'}, C{'\n'}, etc.) to actual whitespace (default=C{True})
2687
2688 Example::
2689 qs = QuotedString('"')
2690 print(qs.searchString('lsjdf "This is the quote" sldjf'))
2691 complex_qs = QuotedString('{{', endQuoteChar='}}')
2692 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf'))
2693 sql_qs = QuotedString('"', escQuote='""')
2694 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf'))
2695 prints::
2696 [['This is the quote']]
2697 [['This is the "quote"']]
2698 [['This is the quote with "embedded" quotes']]
2699 """
2700 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True):
2701 super(QuotedString,self).__init__()
2702
2703
2704 quoteChar = quoteChar.strip()
2705 if not quoteChar:
2706 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2707 raise SyntaxError()
2708
2709 if endQuoteChar is None:
2710 endQuoteChar = quoteChar
2711 else:
2712 endQuoteChar = endQuoteChar.strip()
2713 if not endQuoteChar:
2714 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
2715 raise SyntaxError()
2716
2717 self.quoteChar = quoteChar
2718 self.quoteCharLen = len(quoteChar)
2719 self.firstQuoteChar = quoteChar[0]
2720 self.endQuoteChar = endQuoteChar
2721 self.endQuoteCharLen = len(endQuoteChar)
2722 self.escChar = escChar
2723 self.escQuote = escQuote
2724 self.unquoteResults = unquoteResults
2725 self.convertWhitespaceEscapes = convertWhitespaceEscapes
2726
2727 if multiline:
2728 self.flags = re.MULTILINE | re.DOTALL
2729 self.pattern = r'%s(?:[^%s%s]' % \
2730 ( re.escape(self.quoteChar),
2731 _escapeRegexRangeChars(self.endQuoteChar[0]),
2732 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2733 else:
2734 self.flags = 0
2735 self.pattern = r'%s(?:[^%s\n\r%s]' % \
2736 ( re.escape(self.quoteChar),
2737 _escapeRegexRangeChars(self.endQuoteChar[0]),
2738 (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
2739 if len(self.endQuoteChar) > 1:
2740 self.pattern += (
2741 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
2742 _escapeRegexRangeChars(self.endQuoteChar[i]))
2743 for i in range(len(self.endQuoteChar)-1,0,-1)) + ')'
2744 )
2745 if escQuote:
2746 self.pattern += (r'|(?:%s)' % re.escape(escQuote))
2747 if escChar:
2748 self.pattern += (r'|(?:%s.)' % re.escape(escChar))
2749 self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
2750 self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
2751
2752 try:
2753 self.re = re.compile(self.pattern, self.flags)
2754 self.reString = self.pattern
2755 except sre_constants.error:
2756 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
2757 SyntaxWarning, stacklevel=2)
2758 raise
2759
2760 self.name = _ustr(self)
2761 self.errmsg = "Expected " + self.name
2762 self.mayIndexError = False
2763 self.mayReturnEmpty = True
2764
2765 - def parseImpl( self, instring, loc, doActions=True ):
2766 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
2767 if not result:
2768 raise ParseException(instring, loc, self.errmsg, self)
2769
2770 loc = result.end()
2771 ret = result.group()
2772
2773 if self.unquoteResults:
2774
2775
2776 ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
2777
2778 if isinstance(ret,basestring):
2779
2780 if '\\' in ret and self.convertWhitespaceEscapes:
2781 ws_map = {
2782 r'\t' : '\t',
2783 r'\n' : '\n',
2784 r'\f' : '\f',
2785 r'\r' : '\r',
2786 }
2787 for wslit,wschar in ws_map.items():
2788 ret = ret.replace(wslit, wschar)
2789
2790
2791 if self.escChar:
2792 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
2793
2794
2795 if self.escQuote:
2796 ret = ret.replace(self.escQuote, self.endQuoteChar)
2797
2798 return loc, ret
2799
2801 try:
2802 return super(QuotedString,self).__str__()
2803 except:
2804 pass
2805
2806 if self.strRepr is None:
2807 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
2808
2809 return self.strRepr
2810
2813 """
2814 Token for matching words composed of characters *not* in a given set (will
2815 include whitespace in matched characters if not listed in the provided exclusion set - see example).
2816 Defined with string containing all disallowed characters, and an optional
2817 minimum, maximum, and/or exact length. The default value for C{min} is 1 (a
2818 minimum value < 1 is not valid); the default values for C{max} and C{exact}
2819 are 0, meaning no maximum or exact length restriction.
2820
2821 Example::
2822 # define a comma-separated-value as anything that is not a ','
2823 csv_value = CharsNotIn(',')
2824 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213"))
2825 prints::
2826 ['dkls', 'lsdkjf', 's12 34', '@!#', '213']
2827 """
2828 - def __init__( self, notChars, min=1, max=0, exact=0 ):
2829 super(CharsNotIn,self).__init__()
2830 self.skipWhitespace = False
2831 self.notChars = notChars
2832
2833 if min < 1:
2834 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
2835
2836 self.minLen = min
2837
2838 if max > 0:
2839 self.maxLen = max
2840 else:
2841 self.maxLen = _MAX_INT
2842
2843 if exact > 0:
2844 self.maxLen = exact
2845 self.minLen = exact
2846
2847 self.name = _ustr(self)
2848 self.errmsg = "Expected " + self.name
2849 self.mayReturnEmpty = ( self.minLen == 0 )
2850 self.mayIndexError = False
2851
2852 - def parseImpl( self, instring, loc, doActions=True ):
2853 if instring[loc] in self.notChars:
2854 raise ParseException(instring, loc, self.errmsg, self)
2855
2856 start = loc
2857 loc += 1
2858 notchars = self.notChars
2859 maxlen = min( start+self.maxLen, len(instring) )
2860 while loc < maxlen and \
2861 (instring[loc] not in notchars):
2862 loc += 1
2863
2864 if loc - start < self.minLen:
2865 raise ParseException(instring, loc, self.errmsg, self)
2866
2867 return loc, instring[start:loc]
2868
2870 try:
2871 return super(CharsNotIn, self).__str__()
2872 except:
2873 pass
2874
2875 if self.strRepr is None:
2876 if len(self.notChars) > 4:
2877 self.strRepr = "!W:(%s...)" % self.notChars[:4]
2878 else:
2879 self.strRepr = "!W:(%s)" % self.notChars
2880
2881 return self.strRepr
2882
2884 """
2885 Special matching class for matching whitespace. Normally, whitespace is ignored
2886 by pyparsing grammars. This class is included when some whitespace structures
2887 are significant. Define with a string containing the whitespace characters to be
2888 matched; default is C{" \\t\\r\\n"}. Also takes optional C{min}, C{max}, and C{exact} arguments,
2889 as defined for the C{L{Word}} class.
2890 """
2891 whiteStrs = {
2892 " " : "<SPC>",
2893 "\t": "<TAB>",
2894 "\n": "<LF>",
2895 "\r": "<CR>",
2896 "\f": "<FF>",
2897 }
2898 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2899 super(White,self).__init__()
2900 self.matchWhite = ws
2901 self.setWhitespaceChars( "".join(c for c in self.whiteChars if c not in self.matchWhite) )
2902
2903 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite))
2904 self.mayReturnEmpty = True
2905 self.errmsg = "Expected " + self.name
2906
2907 self.minLen = min
2908
2909 if max > 0:
2910 self.maxLen = max
2911 else:
2912 self.maxLen = _MAX_INT
2913
2914 if exact > 0:
2915 self.maxLen = exact
2916 self.minLen = exact
2917
2918 - def parseImpl( self, instring, loc, doActions=True ):
2919 if not(instring[ loc ] in self.matchWhite):
2920 raise ParseException(instring, loc, self.errmsg, self)
2921 start = loc
2922 loc += 1
2923 maxloc = start + self.maxLen
2924 maxloc = min( maxloc, len(instring) )
2925 while loc < maxloc and instring[loc] in self.matchWhite:
2926 loc += 1
2927
2928 if loc - start < self.minLen:
2929 raise ParseException(instring, loc, self.errmsg, self)
2930
2931 return loc, instring[start:loc]
2932
2936 super(_PositionToken,self).__init__()
2937 self.name=self.__class__.__name__
2938 self.mayReturnEmpty = True
2939 self.mayIndexError = False
2940
2942 """
2943 Token to advance to a specific column of input text; useful for tabular report scraping.
2944 """
2948
2950 if col(loc,instring) != self.col:
2951 instrlen = len(instring)
2952 if self.ignoreExprs:
2953 loc = self._skipIgnorables( instring, loc )
2954 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2955 loc += 1
2956 return loc
2957
2958 - def parseImpl( self, instring, loc, doActions=True ):
2959 thiscol = col( loc, instring )
2960 if thiscol > self.col:
2961 raise ParseException( instring, loc, "Text not in expected column", self )
2962 newloc = loc + self.col - thiscol
2963 ret = instring[ loc: newloc ]
2964 return newloc, ret
2965
2967 """
2968 Matches if current position is at the beginning of a line within the parse string
2969 """
2974
2976 preloc = super(LineStart,self).preParse(instring,loc)
2977 if instring[preloc] == "\n":
2978 loc += 1
2979 return loc
2980
2981 - def parseImpl( self, instring, loc, doActions=True ):
2982 if not( loc==0 or
2983 (loc == self.preParse( instring, 0 )) or
2984 (instring[loc-1] == "\n") ):
2985 raise ParseException(instring, loc, self.errmsg, self)
2986 return loc, []
2987
2989 """
2990 Matches if current position is at the end of a line within the parse string
2991 """
2996
2997 - def parseImpl( self, instring, loc, doActions=True ):
2998 if loc<len(instring):
2999 if instring[loc] == "\n":
3000 return loc+1, "\n"
3001 else:
3002 raise ParseException(instring, loc, self.errmsg, self)
3003 elif loc == len(instring):
3004 return loc+1, []
3005 else:
3006 raise ParseException(instring, loc, self.errmsg, self)
3007
3009 """
3010 Matches if current position is at the beginning of the parse string
3011 """
3015
3016 - def parseImpl( self, instring, loc, doActions=True ):
3017 if loc != 0:
3018
3019 if loc != self.preParse( instring, 0 ):
3020 raise ParseException(instring, loc, self.errmsg, self)
3021 return loc, []
3022
3024 """
3025 Matches if current position is at the end of the parse string
3026 """
3030
3031 - def parseImpl( self, instring, loc, doActions=True ):
3032 if loc < len(instring):
3033 raise ParseException(instring, loc, self.errmsg, self)
3034 elif loc == len(instring):
3035 return loc+1, []
3036 elif loc > len(instring):
3037 return loc, []
3038 else:
3039 raise ParseException(instring, loc, self.errmsg, self)
3040
3042 """
3043 Matches if the current position is at the beginning of a Word, and
3044 is not preceded by any character in a given set of C{wordChars}
3045 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3046 use C{WordStart(alphanums)}. C{WordStart} will also match at the beginning of
3047 the string being parsed, or at the beginning of a line.
3048 """
3050 super(WordStart,self).__init__()
3051 self.wordChars = set(wordChars)
3052 self.errmsg = "Not at the start of a word"
3053
3054 - def parseImpl(self, instring, loc, doActions=True ):
3055 if loc != 0:
3056 if (instring[loc-1] in self.wordChars or
3057 instring[loc] not in self.wordChars):
3058 raise ParseException(instring, loc, self.errmsg, self)
3059 return loc, []
3060
3062 """
3063 Matches if the current position is at the end of a Word, and
3064 is not followed by any character in a given set of C{wordChars}
3065 (default=C{printables}). To emulate the C{\b} behavior of regular expressions,
3066 use C{WordEnd(alphanums)}. C{WordEnd} will also match at the end of
3067 the string being parsed, or at the end of a line.
3068 """
3070 super(WordEnd,self).__init__()
3071 self.wordChars = set(wordChars)
3072 self.skipWhitespace = False
3073 self.errmsg = "Not at the end of a word"
3074
3075 - def parseImpl(self, instring, loc, doActions=True ):
3076 instrlen = len(instring)
3077 if instrlen>0 and loc<instrlen:
3078 if (instring[loc] in self.wordChars or
3079 instring[loc-1] not in self.wordChars):
3080 raise ParseException(instring, loc, self.errmsg, self)
3081 return loc, []
3082
3085 """
3086 Abstract subclass of ParserElement, for combining and post-processing parsed tokens.
3087 """
3088 - def __init__( self, exprs, savelist = False ):
3089 super(ParseExpression,self).__init__(savelist)
3090 if isinstance( exprs, _generatorType ):
3091 exprs = list(exprs)
3092
3093 if isinstance( exprs, basestring ):
3094 self.exprs = [ ParserElement._literalStringClass( exprs ) ]
3095 elif isinstance( exprs, collections.Iterable ):
3096 exprs = list(exprs)
3097
3098 if all(isinstance(expr, basestring) for expr in exprs):
3099 exprs = map(ParserElement._literalStringClass, exprs)
3100 self.exprs = list(exprs)
3101 else:
3102 try:
3103 self.exprs = list( exprs )
3104 except TypeError:
3105 self.exprs = [ exprs ]
3106 self.callPreparse = False
3107
3109 return self.exprs[i]
3110
3112 self.exprs.append( other )
3113 self.strRepr = None
3114 return self
3115
3117 """Extends C{leaveWhitespace} defined in base class, and also invokes C{leaveWhitespace} on
3118 all contained expressions."""
3119 self.skipWhitespace = False
3120 self.exprs = [ e.copy() for e in self.exprs ]
3121 for e in self.exprs:
3122 e.leaveWhitespace()
3123 return self
3124
3126 if isinstance( other, Suppress ):
3127 if other not in self.ignoreExprs:
3128 super( ParseExpression, self).ignore( other )
3129 for e in self.exprs:
3130 e.ignore( self.ignoreExprs[-1] )
3131 else:
3132 super( ParseExpression, self).ignore( other )
3133 for e in self.exprs:
3134 e.ignore( self.ignoreExprs[-1] )
3135 return self
3136
3138 try:
3139 return super(ParseExpression,self).__str__()
3140 except:
3141 pass
3142
3143 if self.strRepr is None:
3144 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
3145 return self.strRepr
3146
3148 super(ParseExpression,self).streamline()
3149
3150 for e in self.exprs:
3151 e.streamline()
3152
3153
3154
3155
3156 if ( len(self.exprs) == 2 ):
3157 other = self.exprs[0]
3158 if ( isinstance( other, self.__class__ ) and
3159 not(other.parseAction) and
3160 other.resultsName is None and
3161 not other.debug ):
3162 self.exprs = other.exprs[:] + [ self.exprs[1] ]
3163 self.strRepr = None
3164 self.mayReturnEmpty |= other.mayReturnEmpty
3165 self.mayIndexError |= other.mayIndexError
3166
3167 other = self.exprs[-1]
3168 if ( isinstance( other, self.__class__ ) and
3169 not(other.parseAction) and
3170 other.resultsName is None and
3171 not other.debug ):
3172 self.exprs = self.exprs[:-1] + other.exprs[:]
3173 self.strRepr = None
3174 self.mayReturnEmpty |= other.mayReturnEmpty
3175 self.mayIndexError |= other.mayIndexError
3176
3177 self.errmsg = "Expected " + _ustr(self)
3178
3179 return self
3180
3184
3185 - def validate( self, validateTrace=[] ):
3186 tmp = validateTrace[:]+[self]
3187 for e in self.exprs:
3188 e.validate(tmp)
3189 self.checkRecursion( [] )
3190
3195
3196 -class And(ParseExpression):
3197 """
3198 Requires all given C{ParseExpression}s to be found in the given order.
3199 Expressions may be separated by whitespace.
3200 May be constructed using the C{'+'} operator.
3201 May also be constructed using the C{'-'} operator, which will suppress backtracking.
3202
3203 Example::
3204 integer = Word(nums)
3205 name_expr = OneOrMore(Word(alphas))
3206
3207 expr = And([integer("id"),name_expr("name"),integer("age")])
3208 # more easily written as:
3209 expr = integer("id") + name_expr("name") + integer("age")
3210 """
3211
3217
3218 - def __init__( self, exprs, savelist = True ):
3219 super(And,self).__init__(exprs, savelist)
3220 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3221 self.setWhitespaceChars( self.exprs[0].whiteChars )
3222 self.skipWhitespace = self.exprs[0].skipWhitespace
3223 self.callPreparse = True
3224
3225 - def parseImpl( self, instring, loc, doActions=True ):
3226
3227
3228 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
3229 errorStop = False
3230 for e in self.exprs[1:]:
3231 if isinstance(e, And._ErrorStop):
3232 errorStop = True
3233 continue
3234 if errorStop:
3235 try:
3236 loc, exprtokens = e._parse( instring, loc, doActions )
3237 except ParseSyntaxException:
3238 raise
3239 except ParseBaseException as pe:
3240 pe.__traceback__ = None
3241 raise ParseSyntaxException._from_exception(pe)
3242 except IndexError:
3243 raise ParseSyntaxException(instring, len(instring), self.errmsg, self)
3244 else:
3245 loc, exprtokens = e._parse( instring, loc, doActions )
3246 if exprtokens or exprtokens.haskeys():
3247 resultlist += exprtokens
3248 return loc, resultlist
3249
3251 if isinstance( other, basestring ):
3252 other = ParserElement._literalStringClass( other )
3253 return self.append( other )
3254
3256 subRecCheckList = parseElementList[:] + [ self ]
3257 for e in self.exprs:
3258 e.checkRecursion( subRecCheckList )
3259 if not e.mayReturnEmpty:
3260 break
3261
3263 if hasattr(self,"name"):
3264 return self.name
3265
3266 if self.strRepr is None:
3267 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}"
3268
3269 return self.strRepr
3270
3271
3272 -class Or(ParseExpression):
3273 """
3274 Requires that at least one C{ParseExpression} is found.
3275 If two expressions match, the expression that matches the longest string will be used.
3276 May be constructed using the C{'^'} operator.
3277
3278 Example::
3279 # construct Or using '^' operator
3280
3281 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums))
3282 print(number.searchString("123 3.1416 789"))
3283 prints::
3284 [['123'], ['3.1416'], ['789']]
3285 """
3286 - def __init__( self, exprs, savelist = False ):
3287 super(Or,self).__init__(exprs, savelist)
3288 if self.exprs:
3289 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3290 else:
3291 self.mayReturnEmpty = True
3292
3293 - def parseImpl( self, instring, loc, doActions=True ):
3294 maxExcLoc = -1
3295 maxException = None
3296 matches = []
3297 for e in self.exprs:
3298 try:
3299 loc2 = e.tryParse( instring, loc )
3300 except ParseException as err:
3301 err.__traceback__ = None
3302 if err.loc > maxExcLoc:
3303 maxException = err
3304 maxExcLoc = err.loc
3305 except IndexError:
3306 if len(instring) > maxExcLoc:
3307 maxException = ParseException(instring,len(instring),e.errmsg,self)
3308 maxExcLoc = len(instring)
3309 else:
3310
3311 matches.append((loc2, e))
3312
3313 if matches:
3314 matches.sort(key=lambda x: -x[0])
3315 for _,e in matches:
3316 try:
3317 return e._parse( instring, loc, doActions )
3318 except ParseException as err:
3319 err.__traceback__ = None
3320 if err.loc > maxExcLoc:
3321 maxException = err
3322 maxExcLoc = err.loc
3323
3324 if maxException is not None:
3325 maxException.msg = self.errmsg
3326 raise maxException
3327 else:
3328 raise ParseException(instring, loc, "no defined alternatives to match", self)
3329
3330
3332 if isinstance( other, basestring ):
3333 other = ParserElement._literalStringClass( other )
3334 return self.append( other )
3335
3337 if hasattr(self,"name"):
3338 return self.name
3339
3340 if self.strRepr is None:
3341 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}"
3342
3343 return self.strRepr
3344
3346 subRecCheckList = parseElementList[:] + [ self ]
3347 for e in self.exprs:
3348 e.checkRecursion( subRecCheckList )
3349
3352 """
3353 Requires that at least one C{ParseExpression} is found.
3354 If two expressions match, the first one listed is the one that will match.
3355 May be constructed using the C{'|'} operator.
3356
3357 Example::
3358 # construct MatchFirst using '|' operator
3359
3360 # watch the order of expressions to match
3361 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums))
3362 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']]
3363
3364 # put more selective expression first
3365 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums)
3366 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']]
3367 """
3368 - def __init__( self, exprs, savelist = False ):
3369 super(MatchFirst,self).__init__(exprs, savelist)
3370 if self.exprs:
3371 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs)
3372 else:
3373 self.mayReturnEmpty = True
3374
3375 - def parseImpl( self, instring, loc, doActions=True ):
3376 maxExcLoc = -1
3377 maxException = None
3378 for e in self.exprs:
3379 try:
3380 ret = e._parse( instring, loc, doActions )
3381 return ret
3382 except ParseException as err:
3383 if err.loc > maxExcLoc:
3384 maxException = err
3385 maxExcLoc = err.loc
3386 except IndexError:
3387 if len(instring) > maxExcLoc:
3388 maxException = ParseException(instring,len(instring),e.errmsg,self)
3389 maxExcLoc = len(instring)
3390
3391
3392 else:
3393 if maxException is not None:
3394 maxException.msg = self.errmsg
3395 raise maxException
3396 else:
3397 raise ParseException(instring, loc, "no defined alternatives to match", self)
3398
3400 if isinstance( other, basestring ):
3401 other = ParserElement._literalStringClass( other )
3402 return self.append( other )
3403
3405 if hasattr(self,"name"):
3406 return self.name
3407
3408 if self.strRepr is None:
3409 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}"
3410
3411 return self.strRepr
3412
3414 subRecCheckList = parseElementList[:] + [ self ]
3415 for e in self.exprs:
3416 e.checkRecursion( subRecCheckList )
3417
3418
3419 -class Each(ParseExpression):
3420 """
3421 Requires all given C{ParseExpression}s to be found, but in any order.
3422 Expressions may be separated by whitespace.
3423 May be constructed using the C{'&'} operator.
3424
3425 Example::
3426 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN")
3427 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON")
3428 integer = Word(nums)
3429 shape_attr = "shape:" + shape_type("shape")
3430 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn")
3431 color_attr = "color:" + color("color")
3432 size_attr = "size:" + integer("size")
3433
3434 # use Each (using operator '&') to accept attributes in any order
3435 # (shape and posn are required, color and size are optional)
3436 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr)
3437
3438 shape_spec.runTests('''
3439 shape: SQUARE color: BLACK posn: 100, 120
3440 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3441 color:GREEN size:20 shape:TRIANGLE posn:20,40
3442 '''
3443 )
3444 prints::
3445 shape: SQUARE color: BLACK posn: 100, 120
3446 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']]
3447 - color: BLACK
3448 - posn: ['100', ',', '120']
3449 - x: 100
3450 - y: 120
3451 - shape: SQUARE
3452
3453
3454 shape: CIRCLE size: 50 color: BLUE posn: 50,80
3455 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']]
3456 - color: BLUE
3457 - posn: ['50', ',', '80']
3458 - x: 50
3459 - y: 80
3460 - shape: CIRCLE
3461 - size: 50
3462
3463
3464 color: GREEN size: 20 shape: TRIANGLE posn: 20,40
3465 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']]
3466 - color: GREEN
3467 - posn: ['20', ',', '40']
3468 - x: 20
3469 - y: 40
3470 - shape: TRIANGLE
3471 - size: 20
3472 """
3473 - def __init__( self, exprs, savelist = True ):
3474 super(Each,self).__init__(exprs, savelist)
3475 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs)
3476 self.skipWhitespace = True
3477 self.initExprGroups = True
3478
3479 - def parseImpl( self, instring, loc, doActions=True ):
3480 if self.initExprGroups:
3481 self.opt1map = dict((id(e.expr),e) for e in self.exprs if isinstance(e,Optional))
3482 opt1 = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
3483 opt2 = [ e for e in self.exprs if e.mayReturnEmpty and not isinstance(e,Optional)]
3484 self.optionals = opt1 + opt2
3485 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
3486 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
3487 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
3488 self.required += self.multirequired
3489 self.initExprGroups = False
3490 tmpLoc = loc
3491 tmpReqd = self.required[:]
3492 tmpOpt = self.optionals[:]
3493 matchOrder = []
3494
3495 keepMatching = True
3496 while keepMatching:
3497 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
3498 failed = []
3499 for e in tmpExprs:
3500 try:
3501 tmpLoc = e.tryParse( instring, tmpLoc )
3502 except ParseException:
3503 failed.append(e)
3504 else:
3505 matchOrder.append(self.opt1map.get(id(e),e))
3506 if e in tmpReqd:
3507 tmpReqd.remove(e)
3508 elif e in tmpOpt:
3509 tmpOpt.remove(e)
3510 if len(failed) == len(tmpExprs):
3511 keepMatching = False
3512
3513 if tmpReqd:
3514 missing = ", ".join(_ustr(e) for e in tmpReqd)
3515 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
3516
3517
3518 matchOrder += [e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt]
3519
3520 resultlist = []
3521 for e in matchOrder:
3522 loc,results = e._parse(instring,loc,doActions)
3523 resultlist.append(results)
3524
3525 finalResults = ParseResults()
3526 for r in resultlist:
3527 dups = {}
3528 for k in r.keys():
3529 if k in finalResults:
3530 tmp = ParseResults(finalResults[k])
3531 tmp += ParseResults(r[k])
3532 dups[k] = tmp
3533 finalResults += ParseResults(r)
3534 for k,v in dups.items():
3535 finalResults[k] = v
3536 return loc, finalResults
3537
3539 if hasattr(self,"name"):
3540 return self.name
3541
3542 if self.strRepr is None:
3543 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}"
3544
3545 return self.strRepr
3546
3548 subRecCheckList = parseElementList[:] + [ self ]
3549 for e in self.exprs:
3550 e.checkRecursion( subRecCheckList )
3551
3554 """
3555 Abstract subclass of C{ParserElement}, for combining and post-processing parsed tokens.
3556 """
3557 - def __init__( self, expr, savelist=False ):
3558 super(ParseElementEnhance,self).__init__(savelist)
3559 if isinstance( expr, basestring ):
3560 expr = ParserElement._literalStringClass(expr)
3561 self.expr = expr
3562 self.strRepr = None
3563 if expr is not None:
3564 self.mayIndexError = expr.mayIndexError
3565 self.mayReturnEmpty = expr.mayReturnEmpty
3566 self.setWhitespaceChars( expr.whiteChars )
3567 self.skipWhitespace = expr.skipWhitespace
3568 self.saveAsList = expr.saveAsList
3569 self.callPreparse = expr.callPreparse
3570 self.ignoreExprs.extend(expr.ignoreExprs)
3571
3572 - def parseImpl( self, instring, loc, doActions=True ):
3573 if self.expr is not None:
3574 return self.expr._parse( instring, loc, doActions, callPreParse=False )
3575 else:
3576 raise ParseException("",loc,self.errmsg,self)
3577
3579 self.skipWhitespace = False
3580 self.expr = self.expr.copy()
3581 if self.expr is not None:
3582 self.expr.leaveWhitespace()
3583 return self
3584
3586 if isinstance( other, Suppress ):
3587 if other not in self.ignoreExprs:
3588 super( ParseElementEnhance, self).ignore( other )
3589 if self.expr is not None:
3590 self.expr.ignore( self.ignoreExprs[-1] )
3591 else:
3592 super( ParseElementEnhance, self).ignore( other )
3593 if self.expr is not None:
3594 self.expr.ignore( self.ignoreExprs[-1] )
3595 return self
3596
3602
3604 if self in parseElementList:
3605 raise RecursiveGrammarException( parseElementList+[self] )
3606 subRecCheckList = parseElementList[:] + [ self ]
3607 if self.expr is not None:
3608 self.expr.checkRecursion( subRecCheckList )
3609
3610 - def validate( self, validateTrace=[] ):
3611 tmp = validateTrace[:]+[self]
3612 if self.expr is not None:
3613 self.expr.validate(tmp)
3614 self.checkRecursion( [] )
3615
3617 try:
3618 return super(ParseElementEnhance,self).__str__()
3619 except:
3620 pass
3621
3622 if self.strRepr is None and self.expr is not None:
3623 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
3624 return self.strRepr
3625
3628 """
3629 Lookahead matching of the given parse expression. C{FollowedBy}
3630 does *not* advance the parsing position within the input string, it only
3631 verifies that the specified parse expression matches at the current
3632 position. C{FollowedBy} always returns a null token list.
3633
3634 Example::
3635 # use FollowedBy to match a label only if it is followed by a ':'
3636 data_word = Word(alphas)
3637 label = data_word + FollowedBy(':')
3638 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3639
3640 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint()
3641 prints::
3642 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']]
3643 """
3647
3648 - def parseImpl( self, instring, loc, doActions=True ):
3649 self.expr.tryParse( instring, loc )
3650 return loc, []
3651
3652
3653 -class NotAny(ParseElementEnhance):
3654 """
3655 Lookahead to disallow matching with the given parse expression. C{NotAny}
3656 does *not* advance the parsing position within the input string, it only
3657 verifies that the specified parse expression does *not* match at the current
3658 position. Also, C{NotAny} does *not* skip over leading whitespace. C{NotAny}
3659 always returns a null token list. May be constructed using the '~' operator.
3660
3661 Example::
3662
3663 """
3665 super(NotAny,self).__init__(expr)
3666
3667 self.skipWhitespace = False
3668 self.mayReturnEmpty = True
3669 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
3670
3671 - def parseImpl( self, instring, loc, doActions=True ):
3675
3677 if hasattr(self,"name"):
3678 return self.name
3679
3680 if self.strRepr is None:
3681 self.strRepr = "~{" + _ustr(self.expr) + "}"
3682
3683 return self.strRepr
3684
3687 """
3688 Repetition of one or more of the given expression.
3689
3690 Parameters:
3691 - expr - expression that must match one or more times
3692 - stopOn - (default=C{None}) - expression for a terminating sentinel
3693 (only required if the sentinel would ordinarily match the repetition
3694 expression)
3695
3696 Example::
3697 data_word = Word(alphas)
3698 label = data_word + FollowedBy(':')
3699 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
3700
3701 text = "shape: SQUARE posn: upper left color: BLACK"
3702 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']]
3703
3704 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data
3705 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
3706 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']]
3707
3708 # could also be written as
3709 (attr_expr * (1,)).parseString(text).pprint()
3710 """
3711 - def __init__( self, expr, stopOn=None):
3712 super(OneOrMore, self).__init__(expr)
3713 ender = stopOn
3714 if isinstance(ender, basestring):
3715 ender = ParserElement._literalStringClass(ender)
3716 self.not_ender = ~ender if ender is not None else None
3717
3718 - def parseImpl( self, instring, loc, doActions=True ):
3719 self_expr_parse = self.expr._parse
3720 self_skip_ignorables = self._skipIgnorables
3721 check_ender = self.not_ender is not None
3722 if check_ender:
3723 try_not_ender = self.not_ender.tryParse
3724
3725
3726
3727 if check_ender:
3728 try_not_ender(instring, loc)
3729 loc, tokens = self_expr_parse( instring, loc, doActions, callPreParse=False )
3730 try:
3731 hasIgnoreExprs = (not not self.ignoreExprs)
3732 while 1:
3733 if check_ender:
3734 try_not_ender(instring, loc)
3735 if hasIgnoreExprs:
3736 preloc = self_skip_ignorables( instring, loc )
3737 else:
3738 preloc = loc
3739 loc, tmptokens = self_expr_parse( instring, preloc, doActions )
3740 if tmptokens or tmptokens.haskeys():
3741 tokens += tmptokens
3742 except (ParseException,IndexError):
3743 pass
3744
3745 return loc, tokens
3746
3748 if hasattr(self,"name"):
3749 return self.name
3750
3751 if self.strRepr is None:
3752 self.strRepr = "{" + _ustr(self.expr) + "}..."
3753
3754 return self.strRepr
3755
3760
3762 """
3763 Optional repetition of zero or more of the given expression.
3764
3765 Parameters:
3766 - expr - expression that must match zero or more times
3767 - stopOn - (default=C{None}) - expression for a terminating sentinel
3768 (only required if the sentinel would ordinarily match the repetition
3769 expression)
3770
3771 Example: similar to L{OneOrMore}
3772 """
3773 - def __init__( self, expr, stopOn=None):
3776
3777 - def parseImpl( self, instring, loc, doActions=True ):
3782
3784 if hasattr(self,"name"):
3785 return self.name
3786
3787 if self.strRepr is None:
3788 self.strRepr = "[" + _ustr(self.expr) + "]..."
3789
3790 return self.strRepr
3791
3798
3799 _optionalNotMatched = _NullToken()
3801 """
3802 Optional matching of the given expression.
3803
3804 Parameters:
3805 - expr - expression that must match zero or more times
3806 - default (optional) - value to be returned if the optional expression is not found.
3807
3808 Example::
3809 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier
3810 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4)))
3811 zip.runTests('''
3812 # traditional ZIP code
3813 12345
3814
3815 # ZIP+4 form
3816 12101-0001
3817
3818 # invalid ZIP
3819 98765-
3820 ''')
3821 prints::
3822 # traditional ZIP code
3823 12345
3824 ['12345']
3825
3826 # ZIP+4 form
3827 12101-0001
3828 ['12101-0001']
3829
3830 # invalid ZIP
3831 98765-
3832 ^
3833 FAIL: Expected end of text (at char 5), (line:1, col:6)
3834 """
3836 super(Optional,self).__init__( expr, savelist=False )
3837 self.defaultValue = default
3838 self.mayReturnEmpty = True
3839
3840 - def parseImpl( self, instring, loc, doActions=True ):
3841 try:
3842 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
3843 except (ParseException,IndexError):
3844 if self.defaultValue is not _optionalNotMatched:
3845 if self.expr.resultsName:
3846 tokens = ParseResults([ self.defaultValue ])
3847 tokens[self.expr.resultsName] = self.defaultValue
3848 else:
3849 tokens = [ self.defaultValue ]
3850 else:
3851 tokens = []
3852 return loc, tokens
3853
3855 if hasattr(self,"name"):
3856 return self.name
3857
3858 if self.strRepr is None:
3859 self.strRepr = "[" + _ustr(self.expr) + "]"
3860
3861 return self.strRepr
3862
3863 -class SkipTo(ParseElementEnhance):
3864 """
3865 Token for skipping over all undefined text until the matched expression is found.
3866
3867 Parameters:
3868 - expr - target expression marking the end of the data to be skipped
3869 - include - (default=C{False}) if True, the target expression is also parsed
3870 (the skipped text and target expression are returned as a 2-element list).
3871 - ignore - (default=C{None}) used to define grammars (typically quoted strings and
3872 comments) that might contain false matches to the target expression
3873 - failOn - (default=C{None}) define expressions that are not allowed to be
3874 included in the skipped test; if found before the target expression is found,
3875 the SkipTo is not a match
3876
3877 Example::
3878 report = '''
3879 Outstanding Issues Report - 1 Jan 2000
3880
3881 # | Severity | Description | Days Open
3882 -----+----------+-------------------------------------------+-----------
3883 101 | Critical | Intermittent system crash | 6
3884 94 | Cosmetic | Spelling error on Login ('log|n') | 14
3885 79 | Minor | System slow when running too many reports | 47
3886 '''
3887 integer = Word(nums)
3888 SEP = Suppress('|')
3889 # use SkipTo to simply match everything up until the next SEP
3890 # - ignore quoted strings, so that a '|' character inside a quoted string does not match
3891 # - parse action will call token.strip() for each matched token, i.e., the description body
3892 string_data = SkipTo(SEP, ignore=quotedString)
3893 string_data.setParseAction(tokenMap(str.strip))
3894 ticket_expr = (integer("issue_num") + SEP
3895 + string_data("sev") + SEP
3896 + string_data("desc") + SEP
3897 + integer("days_open"))
3898
3899 for tkt in ticket_expr.searchString(report):
3900 print tkt.dump()
3901 prints::
3902 ['101', 'Critical', 'Intermittent system crash', '6']
3903 - days_open: 6
3904 - desc: Intermittent system crash
3905 - issue_num: 101
3906 - sev: Critical
3907 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14']
3908 - days_open: 14
3909 - desc: Spelling error on Login ('log|n')
3910 - issue_num: 94
3911 - sev: Cosmetic
3912 ['79', 'Minor', 'System slow when running too many reports', '47']
3913 - days_open: 47
3914 - desc: System slow when running too many reports
3915 - issue_num: 79
3916 - sev: Minor
3917 """
3918 - def __init__( self, other, include=False, ignore=None, failOn=None ):
3919 super( SkipTo, self ).__init__( other )
3920 self.ignoreExpr = ignore
3921 self.mayReturnEmpty = True
3922 self.mayIndexError = False
3923 self.includeMatch = include
3924 self.asList = False
3925 if isinstance(failOn, basestring):
3926 self.failOn = ParserElement._literalStringClass(failOn)
3927 else:
3928 self.failOn = failOn
3929 self.errmsg = "No match found for "+_ustr(self.expr)
3930
3931 - def parseImpl( self, instring, loc, doActions=True ):
3932 startloc = loc
3933 instrlen = len(instring)
3934 expr = self.expr
3935 expr_parse = self.expr._parse
3936 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None
3937 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None
3938
3939 tmploc = loc
3940 while tmploc <= instrlen:
3941 if self_failOn_canParseNext is not None:
3942
3943 if self_failOn_canParseNext(instring, tmploc):
3944 break
3945
3946 if self_ignoreExpr_tryParse is not None:
3947
3948 while 1:
3949 try:
3950 tmploc = self_ignoreExpr_tryParse(instring, tmploc)
3951 except ParseBaseException:
3952 break
3953
3954 try:
3955 expr_parse(instring, tmploc, doActions=False, callPreParse=False)
3956 except (ParseException, IndexError):
3957
3958 tmploc += 1
3959 else:
3960
3961 break
3962
3963 else:
3964
3965 raise ParseException(instring, loc, self.errmsg, self)
3966
3967
3968 loc = tmploc
3969 skiptext = instring[startloc:loc]
3970 skipresult = ParseResults(skiptext)
3971
3972 if self.includeMatch:
3973 loc, mat = expr_parse(instring,loc,doActions,callPreParse=False)
3974 skipresult += mat
3975
3976 return loc, skipresult
3977
3978 -class Forward(ParseElementEnhance):
3979 """
3980 Forward declaration of an expression to be defined later -
3981 used for recursive grammars, such as algebraic infix notation.
3982 When the expression is known, it is assigned to the C{Forward} variable using the '<<' operator.
3983
3984 Note: take care when assigning to C{Forward} not to overlook precedence of operators.
3985 Specifically, '|' has a lower precedence than '<<', so that::
3986 fwdExpr << a | b | c
3987 will actually be evaluated as::
3988 (fwdExpr << a) | b | c
3989 thereby leaving b and c out as parseable alternatives. It is recommended that you
3990 explicitly group the values inserted into the C{Forward}::
3991 fwdExpr << (a | b | c)
3992 Converting to use the '<<=' operator instead will avoid this problem.
3993
3994 See L{ParseResults.pprint} for an example of a recursive parser created using
3995 C{Forward}.
3996 """
3999
4001 if isinstance( other, basestring ):
4002 other = ParserElement._literalStringClass(other)
4003 self.expr = other
4004 self.strRepr = None
4005 self.mayIndexError = self.expr.mayIndexError
4006 self.mayReturnEmpty = self.expr.mayReturnEmpty
4007 self.setWhitespaceChars( self.expr.whiteChars )
4008 self.skipWhitespace = self.expr.skipWhitespace
4009 self.saveAsList = self.expr.saveAsList
4010 self.ignoreExprs.extend(self.expr.ignoreExprs)
4011 return self
4012
4014 return self << other
4015
4017 self.skipWhitespace = False
4018 return self
4019
4021 if not self.streamlined:
4022 self.streamlined = True
4023 if self.expr is not None:
4024 self.expr.streamline()
4025 return self
4026
4027 - def validate( self, validateTrace=[] ):
4028 if self not in validateTrace:
4029 tmp = validateTrace[:]+[self]
4030 if self.expr is not None:
4031 self.expr.validate(tmp)
4032 self.checkRecursion([])
4033
4035 if hasattr(self,"name"):
4036 return self.name
4037 return self.__class__.__name__ + ": ..."
4038
4039
4040 self._revertClass = self.__class__
4041 self.__class__ = _ForwardNoRecurse
4042 try:
4043 if self.expr is not None:
4044 retString = _ustr(self.expr)
4045 else:
4046 retString = "None"
4047 finally:
4048 self.__class__ = self._revertClass
4049 return self.__class__.__name__ + ": " + retString
4050
4052 if self.expr is not None:
4053 return super(Forward,self).copy()
4054 else:
4055 ret = Forward()
4056 ret <<= self
4057 return ret
4058
4062
4064 """
4065 Abstract subclass of C{ParseExpression}, for converting parsed results.
4066 """
4067 - def __init__( self, expr, savelist=False ):
4070
4072 """
4073 Converter to concatenate all matching tokens to a single string.
4074 By default, the matching patterns must also be contiguous in the input string;
4075 this can be disabled by specifying C{'adjacent=False'} in the constructor.
4076
4077 Example::
4078 real = Word(nums) + '.' + Word(nums)
4079 print(real.parseString('3.1416')) # -> ['3', '.', '1416']
4080 # will also erroneously match the following
4081 print(real.parseString('3. 1416')) # -> ['3', '.', '1416']
4082
4083 real = Combine(Word(nums) + '.' + Word(nums))
4084 print(real.parseString('3.1416')) # -> ['3.1416']
4085 # no match when there are internal spaces
4086 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...)
4087 """
4088 - def __init__( self, expr, joinString="", adjacent=True ):
4089 super(Combine,self).__init__( expr )
4090
4091 if adjacent:
4092 self.leaveWhitespace()
4093 self.adjacent = adjacent
4094 self.skipWhitespace = True
4095 self.joinString = joinString
4096 self.callPreparse = True
4097
4104
4105 - def postParse( self, instring, loc, tokenlist ):
4106 retToks = tokenlist.copy()
4107 del retToks[:]
4108 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
4109
4110 if self.resultsName and retToks.haskeys():
4111 return [ retToks ]
4112 else:
4113 return retToks
4114
4115 -class Group(TokenConverter):
4116 """
4117 Converter to return the matched tokens as a list - useful for returning tokens of C{L{ZeroOrMore}} and C{L{OneOrMore}} expressions.
4118
4119 Example::
4120 ident = Word(alphas)
4121 num = Word(nums)
4122 term = ident | num
4123 func = ident + Optional(delimitedList(term))
4124 print(func.parseString("fn a,b,100")) # -> ['fn', 'a', 'b', '100']
4125
4126 func = ident + Group(Optional(delimitedList(term)))
4127 print(func.parseString("fn a,b,100")) # -> ['fn', ['a', 'b', '100']]
4128 """
4130 super(Group,self).__init__( expr )
4131 self.saveAsList = True
4132
4133 - def postParse( self, instring, loc, tokenlist ):
4134 return [ tokenlist ]
4135
4136 -class Dict(TokenConverter):
4137 """
4138 Converter to return a repetitive expression as a list, but also as a dictionary.
4139 Each element can also be referenced using the first token in the expression as its key.
4140 Useful for tabular report scraping when the first column can be used as a item key.
4141
4142 Example::
4143 data_word = Word(alphas)
4144 label = data_word + FollowedBy(':')
4145 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join))
4146
4147 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4148 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4149
4150 # print attributes as plain groups
4151 print(OneOrMore(attr_expr).parseString(text).dump())
4152
4153 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names
4154 result = Dict(OneOrMore(Group(attr_expr))).parseString(text)
4155 print(result.dump())
4156
4157 # access named fields as dict entries, or output as dict
4158 print(result['shape'])
4159 print(result.asDict())
4160 prints::
4161 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap']
4162
4163 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4164 - color: light blue
4165 - posn: upper left
4166 - shape: SQUARE
4167 - texture: burlap
4168 SQUARE
4169 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'}
4170 See more examples at L{ParseResults} of accessing fields by results name.
4171 """
4173 super(Dict,self).__init__( expr )
4174 self.saveAsList = True
4175
4176 - def postParse( self, instring, loc, tokenlist ):
4177 for i,tok in enumerate(tokenlist):
4178 if len(tok) == 0:
4179 continue
4180 ikey = tok[0]
4181 if isinstance(ikey,int):
4182 ikey = _ustr(tok[0]).strip()
4183 if len(tok)==1:
4184 tokenlist[ikey] = _ParseResultsWithOffset("",i)
4185 elif len(tok)==2 and not isinstance(tok[1],ParseResults):
4186 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
4187 else:
4188 dictvalue = tok.copy()
4189 del dictvalue[0]
4190 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.haskeys()):
4191 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
4192 else:
4193 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
4194
4195 if self.resultsName:
4196 return [ tokenlist ]
4197 else:
4198 return tokenlist
4199
4202 """
4203 Converter for ignoring the results of a parsed expression.
4204
4205 Example::
4206 source = "a, b, c,d"
4207 wd = Word(alphas)
4208 wd_list1 = wd + ZeroOrMore(',' + wd)
4209 print(wd_list1.parseString(source))
4210
4211 # often, delimiters that are useful during parsing are just in the
4212 # way afterward - use Suppress to keep them out of the parsed output
4213 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd)
4214 print(wd_list2.parseString(source))
4215 prints::
4216 ['a', ',', 'b', ',', 'c', ',', 'd']
4217 ['a', 'b', 'c', 'd']
4218 (See also L{delimitedList}.)
4219 """
4220 - def postParse( self, instring, loc, tokenlist ):
4222
4225
4228 """
4229 Wrapper for parse actions, to ensure they are only called once.
4230 """
4232 self.callable = _trim_arity(methodCall)
4233 self.called = False
4235 if not self.called:
4236 results = self.callable(s,l,t)
4237 self.called = True
4238 return results
4239 raise ParseException(s,l,"")
4242
4244 """
4245 Decorator for debugging parse actions.
4246
4247 Example::
4248 wd = Word(alphas)
4249
4250 @traceParseAction
4251 def remove_duplicate_chars(tokens):
4252 return ''.join(sorted(set(''.join(tokens)))
4253
4254 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars)
4255 print(wds.parseString("slkdjs sld sldd sdlf sdljf"))
4256 prints::
4257 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {}))
4258 <<leaving remove_duplicate_chars (ret: 'dfjkls')
4259 ['dfjkls']
4260 """
4261 f = _trim_arity(f)
4262 def z(*paArgs):
4263 thisFunc = f.__name__
4264 s,l,t = paArgs[-3:]
4265 if len(paArgs)>3:
4266 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
4267 sys.stderr.write( ">>entering %s(line: '%s', %d, %r)\n" % (thisFunc,line(l,s),l,t) )
4268 try:
4269 ret = f(*paArgs)
4270 except Exception as exc:
4271 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
4272 raise
4273 sys.stderr.write( "<<leaving %s (ret: %r)\n" % (thisFunc,ret) )
4274 return ret
4275 try:
4276 z.__name__ = f.__name__
4277 except AttributeError:
4278 pass
4279 return z
4280
4281
4282
4283
4284 -def delimitedList( expr, delim=",", combine=False ):
4285 """
4286 Helper to define a delimited list of expressions - the delimiter defaults to ','.
4287 By default, the list elements and delimiters can have intervening whitespace, and
4288 comments, but this can be overridden by passing C{combine=True} in the constructor.
4289 If C{combine} is set to C{True}, the matching tokens are returned as a single token
4290 string, with the delimiters included; otherwise, the matching tokens are returned
4291 as a list of tokens, with the delimiters suppressed.
4292
4293 Example::
4294 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc']
4295 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE']
4296 """
4297 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
4298 if combine:
4299 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
4300 else:
4301 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
4302
4304 """
4305 Helper to define a counted list of expressions.
4306 This helper defines a pattern of the form::
4307 integer expr expr expr...
4308 where the leading integer tells how many expr expressions follow.
4309 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
4310
4311 Example::
4312 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd']
4313 """
4314 arrayExpr = Forward()
4315 def countFieldParseAction(s,l,t):
4316 n = t[0]
4317 arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
4318 return []
4319 if intExpr is None:
4320 intExpr = Word(nums).setParseAction(lambda t:int(t[0]))
4321 else:
4322 intExpr = intExpr.copy()
4323 intExpr.setName("arrayLen")
4324 intExpr.addParseAction(countFieldParseAction, callDuringTry=True)
4325 return ( intExpr + arrayExpr ).setName('(len) ' + _ustr(expr) + '...')
4326
4328 ret = []
4329 for i in L:
4330 if isinstance(i,list):
4331 ret.extend(_flatten(i))
4332 else:
4333 ret.append(i)
4334 return ret
4335
4337 """
4338 Helper to define an expression that is indirectly defined from
4339 the tokens matched in a previous expression, that is, it looks
4340 for a 'repeat' of a previous expression. For example::
4341 first = Word(nums)
4342 second = matchPreviousLiteral(first)
4343 matchExpr = first + ":" + second
4344 will match C{"1:1"}, but not C{"1:2"}. Because this matches a
4345 previous literal, will also match the leading C{"1:1"} in C{"1:10"}.
4346 If this is not desired, use C{matchPreviousExpr}.
4347 Do *not* use with packrat parsing enabled.
4348 """
4349 rep = Forward()
4350 def copyTokenToRepeater(s,l,t):
4351 if t:
4352 if len(t) == 1:
4353 rep << t[0]
4354 else:
4355
4356 tflat = _flatten(t.asList())
4357 rep << And(Literal(tt) for tt in tflat)
4358 else:
4359 rep << Empty()
4360 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4361 rep.setName('(prev) ' + _ustr(expr))
4362 return rep
4363
4365 """
4366 Helper to define an expression that is indirectly defined from
4367 the tokens matched in a previous expression, that is, it looks
4368 for a 'repeat' of a previous expression. For example::
4369 first = Word(nums)
4370 second = matchPreviousExpr(first)
4371 matchExpr = first + ":" + second
4372 will match C{"1:1"}, but not C{"1:2"}. Because this matches by
4373 expressions, will *not* match the leading C{"1:1"} in C{"1:10"};
4374 the expressions are evaluated first, and then compared, so
4375 C{"1"} is compared with C{"10"}.
4376 Do *not* use with packrat parsing enabled.
4377 """
4378 rep = Forward()
4379 e2 = expr.copy()
4380 rep <<= e2
4381 def copyTokenToRepeater(s,l,t):
4382 matchTokens = _flatten(t.asList())
4383 def mustMatchTheseTokens(s,l,t):
4384 theseTokens = _flatten(t.asList())
4385 if theseTokens != matchTokens:
4386 raise ParseException("",0,"")
4387 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
4388 expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
4389 rep.setName('(prev) ' + _ustr(expr))
4390 return rep
4391
4393
4394 for c in r"\^-]":
4395 s = s.replace(c,_bslash+c)
4396 s = s.replace("\n",r"\n")
4397 s = s.replace("\t",r"\t")
4398 return _ustr(s)
4399
4400 -def oneOf( strs, caseless=False, useRegex=True ):
4401 """
4402 Helper to quickly define a set of alternative Literals, and makes sure to do
4403 longest-first testing when there is a conflict, regardless of the input order,
4404 but returns a C{L{MatchFirst}} for best performance.
4405
4406 Parameters:
4407 - strs - a string of space-delimited literals, or a collection of string literals
4408 - caseless - (default=C{False}) - treat all literals as caseless
4409 - useRegex - (default=C{True}) - as an optimization, will generate a Regex
4410 object; otherwise, will generate a C{MatchFirst} object (if C{caseless=True}, or
4411 if creating a C{Regex} raises an exception)
4412
4413 Example::
4414 comp_oper = oneOf("< = > <= >= !=")
4415 var = Word(alphas)
4416 number = Word(nums)
4417 term = var | number
4418 comparison_expr = term + comp_oper + term
4419 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12"))
4420 prints::
4421 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
4422 """
4423 if caseless:
4424 isequal = ( lambda a,b: a.upper() == b.upper() )
4425 masks = ( lambda a,b: b.upper().startswith(a.upper()) )
4426 parseElementClass = CaselessLiteral
4427 else:
4428 isequal = ( lambda a,b: a == b )
4429 masks = ( lambda a,b: b.startswith(a) )
4430 parseElementClass = Literal
4431
4432 symbols = []
4433 if isinstance(strs,basestring):
4434 symbols = strs.split()
4435 elif isinstance(strs, collections.Iterable):
4436 symbols = list(strs)
4437 else:
4438 warnings.warn("Invalid argument to oneOf, expected string or iterable",
4439 SyntaxWarning, stacklevel=2)
4440 if not symbols:
4441 return NoMatch()
4442
4443 i = 0
4444 while i < len(symbols)-1:
4445 cur = symbols[i]
4446 for j,other in enumerate(symbols[i+1:]):
4447 if ( isequal(other, cur) ):
4448 del symbols[i+j+1]
4449 break
4450 elif ( masks(cur, other) ):
4451 del symbols[i+j+1]
4452 symbols.insert(i,other)
4453 cur = other
4454 break
4455 else:
4456 i += 1
4457
4458 if not caseless and useRegex:
4459
4460 try:
4461 if len(symbols)==len("".join(symbols)):
4462 return Regex( "[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols) ).setName(' | '.join(symbols))
4463 else:
4464 return Regex( "|".join(re.escape(sym) for sym in symbols) ).setName(' | '.join(symbols))
4465 except:
4466 warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
4467 SyntaxWarning, stacklevel=2)
4468
4469
4470
4471 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols))
4472
4474 """
4475 Helper to easily and clearly define a dictionary by specifying the respective patterns
4476 for the key and value. Takes care of defining the C{L{Dict}}, C{L{ZeroOrMore}}, and C{L{Group}} tokens
4477 in the proper order. The key pattern can include delimiting markers or punctuation,
4478 as long as they are suppressed, thereby leaving the significant key text. The value
4479 pattern can include named results, so that the C{Dict} results can include named token
4480 fields.
4481
4482 Example::
4483 text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
4484 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join))
4485 print(OneOrMore(attr_expr).parseString(text).dump())
4486
4487 attr_label = label
4488 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)
4489
4490 # similar to Dict, but simpler call format
4491 result = dictOf(attr_label, attr_value).parseString(text)
4492 print(result.dump())
4493 print(result['shape'])
4494 print(result.shape) # object attribute access works too
4495 print(result.asDict())
4496 prints::
4497 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
4498 - color: light blue
4499 - posn: upper left
4500 - shape: SQUARE
4501 - texture: burlap
4502 SQUARE
4503 SQUARE
4504 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
4505 """
4506 return Dict( ZeroOrMore( Group ( key + value ) ) )
4507
4508 -def originalTextFor(expr, asString=True):
4509 """
4510 Helper to return the original, untokenized text for a given expression. Useful to
4511 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
4512 revert separate tokens with intervening whitespace back to the original matching
4513 input text. By default, returns astring containing the original parsed text.
4514
4515 If the optional C{asString} argument is passed as C{False}, then the return value is a
4516 C{L{ParseResults}} containing any results names that were originally matched, and a
4517 single token containing the original matched text from the input string. So if
4518 the expression passed to C{L{originalTextFor}} contains expressions with defined
4519 results names, you must set C{asString} to C{False} if you want to preserve those
4520 results name values.
4521
4522 Example::
4523 src = "this is test <b> bold <i>text</i> </b> normal text "
4524 for tag in ("b","i"):
4525 opener,closer = makeHTMLTags(tag)
4526 patt = originalTextFor(opener + SkipTo(closer) + closer)
4527 print(patt.searchString(src)[0])
4528 prints::
4529 ['<b> bold <i>text</i> </b>']
4530 ['<i>text</i>']
4531 """
4532 locMarker = Empty().setParseAction(lambda s,loc,t: loc)
4533 endlocMarker = locMarker.copy()
4534 endlocMarker.callPreparse = False
4535 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
4536 if asString:
4537 extractText = lambda s,l,t: s[t._original_start:t._original_end]
4538 else:
4539 def extractText(s,l,t):
4540 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]]
4541 matchExpr.setParseAction(extractText)
4542 matchExpr.ignoreExprs = expr.ignoreExprs
4543 return matchExpr
4544
4546 """
4547 Helper to undo pyparsing's default grouping of And expressions, even
4548 if all but one are non-empty.
4549 """
4550 return TokenConverter(expr).setParseAction(lambda t:t[0])
4551
4553 """
4554 Helper to decorate a returned token with its starting and ending locations in the input string.
4555 This helper adds the following results names:
4556 - locn_start = location where matched expression begins
4557 - locn_end = location where matched expression ends
4558 - value = the actual parsed results
4559
4560 Be careful if the input text contains C{<TAB>} characters, you may want to call
4561 C{L{ParserElement.parseWithTabs}}
4562
4563 Example::
4564 wd = Word(alphas)
4565 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"):
4566 print(match)
4567 prints::
4568 [[0, 'ljsdf', 5]]
4569 [[8, 'lksdjjf', 15]]
4570 [[18, 'lkkjj', 23]]
4571 """
4572 locator = Empty().setParseAction(lambda s,l,t: l)
4573 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end"))
4574
4575
4576
4577 empty = Empty().setName("empty")
4578 lineStart = LineStart().setName("lineStart")
4579 lineEnd = LineEnd().setName("lineEnd")
4580 stringStart = StringStart().setName("stringStart")
4581 stringEnd = StringEnd().setName("stringEnd")
4582
4583 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
4584 _escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s,l,t:unichr(int(t[0].lstrip(r'\0x'),16)))
4585 _escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s,l,t:unichr(int(t[0][1:],8)))
4586 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(printables, excludeChars=r'\]', exact=1) | Regex(r"\w", re.UNICODE)
4587 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
4588 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
4591 r"""
4592 Helper to easily define string ranges for use in Word construction. Borrows
4593 syntax from regexp '[]' string range definitions::
4594 srange("[0-9]") -> "0123456789"
4595 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
4596 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
4597 The input string must be enclosed in []'s, and the returned string is the expanded
4598 character set joined into a single string.
4599 The values enclosed in the []'s may be:
4600 - a single character
4601 - an escaped character with a leading backslash (such as C{\-} or C{\]})
4602 - an escaped hex character with a leading C{'\x'} (C{\x21}, which is a C{'!'} character)
4603 (C{\0x##} is also supported for backwards compatibility)
4604 - an escaped octal character with a leading C{'\0'} (C{\041}, which is a C{'!'} character)
4605 - a range of any of the above, separated by a dash (C{'a-z'}, etc.)
4606 - any combination of the above (C{'aeiouy'}, C{'a-zA-Z0-9_$'}, etc.)
4607 """
4608 _expanded = lambda p: p if not isinstance(p,ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]),ord(p[1])+1))
4609 try:
4610 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body)
4611 except:
4612 return ""
4613
4615 """
4616 Helper method for defining parse actions that require matching at a specific
4617 column in the input text.
4618 """
4619 def verifyCol(strg,locn,toks):
4620 if col(locn,strg) != n:
4621 raise ParseException(strg,locn,"matched token not at column %d" % n)
4622 return verifyCol
4623
4625 """
4626 Helper method for common parse actions that simply return a literal value. Especially
4627 useful when used with C{L{transformString<ParserElement.transformString>}()}.
4628
4629 Example::
4630 num = Word(nums).setParseAction(lambda toks: int(toks[0]))
4631 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan))
4632 term = na | num
4633
4634 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234]
4635 """
4636 return lambda s,l,t: [replStr]
4637
4639 """
4640 Helper parse action for removing quotation marks from parsed quoted strings.
4641
4642 Example::
4643 # by default, quotation marks are included in parsed results
4644 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
4645
4646 # use removeQuotes to strip quotation marks from parsed results
4647 quotedString.setParseAction(removeQuotes)
4648 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
4649 """
4650 return t[0][1:-1]
4651
4653 """
4654 Helper to define a parse action by mapping a function to all elements of a ParseResults list.If any additional
4655 args are passed, they are forwarded to the given function as additional arguments after
4656 the token, as in C{hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))}, which will convert the
4657 parsed data to an integer using base 16.
4658
4659 Example (compare the last to example in L{ParserElement.transformString}::
4660 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16))
4661 hex_ints.runTests('''
4662 00 11 22 aa FF 0a 0d 1a
4663 ''')
4664
4665 upperword = Word(alphas).setParseAction(tokenMap(str.upper))
4666 OneOrMore(upperword).runTests('''
4667 my kingdom for a horse
4668 ''')
4669
4670 wd = Word(alphas).setParseAction(tokenMap(str.title))
4671 OneOrMore(wd).setParseAction(' '.join).runTests('''
4672 now is the winter of our discontent made glorious summer by this sun of york
4673 ''')
4674 prints::
4675 00 11 22 aa FF 0a 0d 1a
4676 [0, 17, 34, 170, 255, 10, 13, 26]
4677
4678 my kingdom for a horse
4679 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE']
4680
4681 now is the winter of our discontent made glorious summer by this sun of york
4682 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York']
4683 """
4684 def pa(s,l,t):
4685 return [func(tokn, *args) for tokn in t]
4686
4687 try:
4688 func_name = getattr(func, '__name__',
4689 getattr(func, '__class__').__name__)
4690 except Exception:
4691 func_name = str(func)
4692 pa.__name__ = func_name
4693
4694 return pa
4695
4696 upcaseTokens = tokenMap(lambda t: _ustr(t).upper())
4697 """Helper parse action to convert tokens to upper case."""
4698
4699 downcaseTokens = tokenMap(lambda t: _ustr(t).lower())
4700 """Helper parse action to convert tokens to lower case."""
4730
4749
4758
4760 """
4761 Helper to create a validating parse action to be used with start tags created
4762 with C{L{makeXMLTags}} or C{L{makeHTMLTags}}. Use C{withAttribute} to qualify a starting tag
4763 with a required attribute value, to avoid false matches on common tags such as
4764 C{<TD>} or C{<DIV>}.
4765
4766 Call C{withAttribute} with a series of attribute names and values. Specify the list
4767 of filter attributes names and values as:
4768 - keyword arguments, as in C{(align="right")}, or
4769 - as an explicit dict with C{**} operator, when an attribute name is also a Python
4770 reserved word, as in C{**{"class":"Customer", "align":"right"}}
4771 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
4772 For attribute names with a namespace prefix, you must use the second form. Attribute
4773 names are matched insensitive to upper/lower case.
4774
4775 If just testing for C{class} (with or without a namespace), use C{L{withClass}}.
4776
4777 To verify that the attribute exists, but without specifying a value, pass
4778 C{withAttribute.ANY_VALUE} as the value.
4779
4780 Example::
4781 html = '''
4782 <div>
4783 Some text
4784 <div type="grid">1 4 0 1 0</div>
4785 <div type="graph">1,3 2,3 1,1</div>
4786 <div>this has no type</div>
4787 </div>
4788
4789 '''
4790 div,div_end = makeHTMLTags("div")
4791
4792 # only match div tag having a type attribute with value "grid"
4793 div_grid = div().setParseAction(withAttribute(type="grid"))
4794 grid_expr = div_grid + SkipTo(div | div_end)("body")
4795 for grid_header in grid_expr.searchString(html):
4796 print(grid_header.body)
4797
4798 # construct a match with any div tag having a type attribute, regardless of the value
4799 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE))
4800 div_expr = div_any_type + SkipTo(div | div_end)("body")
4801 for div_header in div_expr.searchString(html):
4802 print(div_header.body)
4803 prints::
4804 1 4 0 1 0
4805
4806 1 4 0 1 0
4807 1,3 2,3 1,1
4808 """
4809 if args:
4810 attrs = args[:]
4811 else:
4812 attrs = attrDict.items()
4813 attrs = [(k,v) for k,v in attrs]
4814 def pa(s,l,tokens):
4815 for attrName,attrValue in attrs:
4816 if attrName not in tokens:
4817 raise ParseException(s,l,"no matching attribute " + attrName)
4818 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
4819 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
4820 (attrName, tokens[attrName], attrValue))
4821 return pa
4822 withAttribute.ANY_VALUE = object()
4823
4824 -def withClass(classname, namespace=''):
4825 """
4826 Simplified version of C{L{withAttribute}} when matching on a div class - made
4827 difficult because C{class} is a reserved word in Python.
4828
4829 Example::
4830 html = '''
4831 <div>
4832 Some text
4833 <div class="grid">1 4 0 1 0</div>
4834 <div class="graph">1,3 2,3 1,1</div>
4835 <div>this <div> has no class</div>
4836 </div>
4837
4838 '''
4839 div,div_end = makeHTMLTags("div")
4840 div_grid = div().setParseAction(withClass("grid"))
4841
4842 grid_expr = div_grid + SkipTo(div | div_end)("body")
4843 for grid_header in grid_expr.searchString(html):
4844 print(grid_header.body)
4845
4846 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE))
4847 div_expr = div_any_type + SkipTo(div | div_end)("body")
4848 for div_header in div_expr.searchString(html):
4849 print(div_header.body)
4850 prints::
4851 1 4 0 1 0
4852
4853 1 4 0 1 0
4854 1,3 2,3 1,1
4855 """
4856 classattr = "%s:class" % namespace if namespace else "class"
4857 return withAttribute(**{classattr : classname})
4858
4859 opAssoc = _Constants()
4860 opAssoc.LEFT = object()
4861 opAssoc.RIGHT = object()
4864 """
4865 Helper method for constructing grammars of expressions made up of
4866 operators working in a precedence hierarchy. Operators may be unary or
4867 binary, left- or right-associative. Parse actions can also be attached
4868 to operator expressions.
4869
4870 Parameters:
4871 - baseExpr - expression representing the most basic element for the nested
4872 - opList - list of tuples, one for each operator precedence level in the
4873 expression grammar; each tuple is of the form
4874 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
4875 - opExpr is the pyparsing expression for the operator;
4876 may also be a string, which will be converted to a Literal;
4877 if numTerms is 3, opExpr is a tuple of two expressions, for the
4878 two operators separating the 3 terms
4879 - numTerms is the number of terms for this operator (must
4880 be 1, 2, or 3)
4881 - rightLeftAssoc is the indicator whether the operator is
4882 right or left associative, using the pyparsing-defined
4883 constants C{opAssoc.RIGHT} and C{opAssoc.LEFT}.
4884 - parseAction is the parse action to be associated with
4885 expressions matching this operator expression (the
4886 parse action tuple member may be omitted)
4887 - lpar - expression for matching left-parentheses (default=C{Suppress('(')})
4888 - rpar - expression for matching right-parentheses (default=C{Suppress(')')})
4889
4890 Example::
4891 # simple example of four-function arithmetic with ints and variable names
4892 integer = pyparsing_common.signedInteger
4893 varname = pyparsing_common.identifier
4894
4895 arith_expr = infixNotation(integer | varname,
4896 [
4897 ('-', 1, opAssoc.RIGHT),
4898 (oneOf('* /'), 2, opAssoc.LEFT),
4899 (oneOf('+ -'), 2, opAssoc.LEFT),
4900 ])
4901
4902 arith_expr.runTests('''
4903 5+3*6
4904 (5+3)*6
4905 -2--11
4906 ''', fullDump=False)
4907 prints::
4908 5+3*6
4909 [[5, '+', [3, '*', 6]]]
4910
4911 (5+3)*6
4912 [[[5, '+', 3], '*', 6]]
4913
4914 -2--11
4915 [[['-', 2], '-', ['-', 11]]]
4916 """
4917 ret = Forward()
4918 lastExpr = baseExpr | ( lpar + ret + rpar )
4919 for i,operDef in enumerate(opList):
4920 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
4921 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr
4922 if arity == 3:
4923 if opExpr is None or len(opExpr) != 2:
4924 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
4925 opExpr1, opExpr2 = opExpr
4926 thisExpr = Forward().setName(termName)
4927 if rightLeftAssoc == opAssoc.LEFT:
4928 if arity == 1:
4929 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
4930 elif arity == 2:
4931 if opExpr is not None:
4932 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
4933 else:
4934 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
4935 elif arity == 3:
4936 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
4937 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
4938 else:
4939 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
4940 elif rightLeftAssoc == opAssoc.RIGHT:
4941 if arity == 1:
4942
4943 if not isinstance(opExpr, Optional):
4944 opExpr = Optional(opExpr)
4945 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
4946 elif arity == 2:
4947 if opExpr is not None:
4948 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
4949 else:
4950 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
4951 elif arity == 3:
4952 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
4953 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
4954 else:
4955 raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
4956 else:
4957 raise ValueError("operator must indicate right or left associativity")
4958 if pa:
4959 matchExpr.setParseAction( pa )
4960 thisExpr <<= ( matchExpr.setName(termName) | lastExpr )
4961 lastExpr = thisExpr
4962 ret <<= lastExpr
4963 return ret
4964
4965 operatorPrecedence = infixNotation
4966 """(Deprecated) Former name of C{L{infixNotation}}, will be dropped in a future release."""
4967
4968 dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"').setName("string enclosed in double quotes")
4969 sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("string enclosed in single quotes")
4970 quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*')+'"'|
4971 Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*")+"'").setName("quotedString using single or double quotes")
4972 unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal")
4975 """
4976 Helper method for defining nested lists enclosed in opening and closing
4977 delimiters ("(" and ")" are the default).
4978
4979 Parameters:
4980 - opener - opening character for a nested list (default=C{"("}); can also be a pyparsing expression
4981 - closer - closing character for a nested list (default=C{")"}); can also be a pyparsing expression
4982 - content - expression for items within the nested lists (default=C{None})
4983 - ignoreExpr - expression for ignoring opening and closing delimiters (default=C{quotedString})
4984
4985 If an expression is not provided for the content argument, the nested
4986 expression will capture all whitespace-delimited content between delimiters
4987 as a list of separate values.
4988
4989 Use the C{ignoreExpr} argument to define expressions that may contain
4990 opening or closing characters that should not be treated as opening
4991 or closing characters for nesting, such as quotedString or a comment
4992 expression. Specify multiple expressions using an C{L{Or}} or C{L{MatchFirst}}.
4993 The default is L{quotedString}, but if no expressions are to be ignored,
4994 then pass C{None} for this argument.
4995
4996 Example::
4997 data_type = oneOf("void int short long char float double")
4998 decl_data_type = Combine(data_type + Optional(Word('*')))
4999 ident = Word(alphas+'_', alphanums+'_')
5000 number = pyparsing_common.number
5001 arg = Group(decl_data_type + ident)
5002 LPAR,RPAR = map(Suppress, "()")
5003
5004 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment))
5005
5006 c_function = (decl_data_type("type")
5007 + ident("name")
5008 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR
5009 + code_body("body"))
5010 c_function.ignore(cStyleComment)
5011
5012 source_code = '''
5013 int is_odd(int x) {
5014 return (x%2);
5015 }
5016
5017 int dec_to_hex(char hchar) {
5018 if (hchar >= '0' && hchar <= '9') {
5019 return (ord(hchar)-ord('0'));
5020 } else {
5021 return (10+ord(hchar)-ord('A'));
5022 }
5023 }
5024 '''
5025 for func in c_function.searchString(source_code):
5026 print("%(name)s (%(type)s) args: %(args)s" % func)
5027
5028 prints::
5029 is_odd (int) args: [['int', 'x']]
5030 dec_to_hex (int) args: [['char', 'hchar']]
5031 """
5032 if opener == closer:
5033 raise ValueError("opening and closing strings cannot be the same")
5034 if content is None:
5035 if isinstance(opener,basestring) and isinstance(closer,basestring):
5036 if len(opener) == 1 and len(closer)==1:
5037 if ignoreExpr is not None:
5038 content = (Combine(OneOrMore(~ignoreExpr +
5039 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5040 ).setParseAction(lambda t:t[0].strip()))
5041 else:
5042 content = (empty.copy()+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
5043 ).setParseAction(lambda t:t[0].strip()))
5044 else:
5045 if ignoreExpr is not None:
5046 content = (Combine(OneOrMore(~ignoreExpr +
5047 ~Literal(opener) + ~Literal(closer) +
5048 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5049 ).setParseAction(lambda t:t[0].strip()))
5050 else:
5051 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
5052 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
5053 ).setParseAction(lambda t:t[0].strip()))
5054 else:
5055 raise ValueError("opening and closing arguments must be strings if no content expression is given")
5056 ret = Forward()
5057 if ignoreExpr is not None:
5058 ret <<= Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
5059 else:
5060 ret <<= Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
5061 ret.setName('nested %s%s expression' % (opener,closer))
5062 return ret
5063
5064 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
5065 """
5066 Helper method for defining space-delimited indentation blocks, such as
5067 those used to define block statements in Python source code.
5068
5069 Parameters:
5070 - blockStatementExpr - expression defining syntax of statement that
5071 is repeated within the indented block
5072 - indentStack - list created by caller to manage indentation stack
5073 (multiple statementWithIndentedBlock expressions within a single grammar
5074 should share a common indentStack)
5075 - indent - boolean indicating whether block must be indented beyond the
5076 the current level; set to False for block of left-most statements
5077 (default=C{True})
5078
5079 A valid block must contain at least one C{blockStatement}.
5080
5081 Example::
5082 data = '''
5083 def A(z):
5084 A1
5085 B = 100
5086 G = A2
5087 A2
5088 A3
5089 B
5090 def BB(a,b,c):
5091 BB1
5092 def BBA():
5093 bba1
5094 bba2
5095 bba3
5096 C
5097 D
5098 def spam(x,y):
5099 def eggs(z):
5100 pass
5101 '''
5102
5103
5104 indentStack = [1]
5105 stmt = Forward()
5106
5107 identifier = Word(alphas, alphanums)
5108 funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":")
5109 func_body = indentedBlock(stmt, indentStack)
5110 funcDef = Group( funcDecl + func_body )
5111
5112 rvalue = Forward()
5113 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")")
5114 rvalue << (funcCall | identifier | Word(nums))
5115 assignment = Group(identifier + "=" + rvalue)
5116 stmt << ( funcDef | assignment | identifier )
5117
5118 module_body = OneOrMore(stmt)
5119
5120 parseTree = module_body.parseString(data)
5121 parseTree.pprint()
5122 prints::
5123 [['def',
5124 'A',
5125 ['(', 'z', ')'],
5126 ':',
5127 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
5128 'B',
5129 ['def',
5130 'BB',
5131 ['(', 'a', 'b', 'c', ')'],
5132 ':',
5133 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
5134 'C',
5135 'D',
5136 ['def',
5137 'spam',
5138 ['(', 'x', 'y', ')'],
5139 ':',
5140 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
5141 """
5142 def checkPeerIndent(s,l,t):
5143 if l >= len(s): return
5144 curCol = col(l,s)
5145 if curCol != indentStack[-1]:
5146 if curCol > indentStack[-1]:
5147 raise ParseFatalException(s,l,"illegal nesting")
5148 raise ParseException(s,l,"not a peer entry")
5149
5150 def checkSubIndent(s,l,t):
5151 curCol = col(l,s)
5152 if curCol > indentStack[-1]:
5153 indentStack.append( curCol )
5154 else:
5155 raise ParseException(s,l,"not a subentry")
5156
5157 def checkUnindent(s,l,t):
5158 if l >= len(s): return
5159 curCol = col(l,s)
5160 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
5161 raise ParseException(s,l,"not an unindent")
5162 indentStack.pop()
5163
5164 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
5165 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT')
5166 PEER = Empty().setParseAction(checkPeerIndent).setName('')
5167 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT')
5168 if indent:
5169 smExpr = Group( Optional(NL) +
5170
5171 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
5172 else:
5173 smExpr = Group( Optional(NL) +
5174 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
5175 blockStatementExpr.ignore(_bslash + LineEnd())
5176 return smExpr.setName('indented block')
5177
5178 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
5179 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
5180
5181 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:").setName('any tag'))
5182 _htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(),'><& "\''))
5183 commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity")
5185 """Helper parser action to replace common HTML entities with their special characters"""
5186 return _htmlEntityMap.get(t.entity)
5187
5188
5189 cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment")
5190 "Comment of the form C{/* ... */}"
5191
5192 htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment")
5193 "Comment of the form C{<!-- ... -->}"
5194
5195 restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line")
5196 dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment")
5197 "Comment of the form C{// ... (to end of line)}"
5198
5199 cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/'| dblSlashComment).setName("C++ style comment")
5200 "Comment of either form C{L{cStyleComment}} or C{L{dblSlashComment}}"
5201
5202 javaStyleComment = cppStyleComment
5203 "Same as C{L{cppStyleComment}}"
5204
5205 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
5206 "Comment of the form C{# ... (to end of line)}"
5207
5208 _commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') +
5209 Optional( Word(" \t") +
5210 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
5211 commaSeparatedList = delimitedList( Optional( quotedString.copy() | _commasepitem, default="") ).setName("commaSeparatedList")
5212 """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
5216 """
5217 Here are some common low-level expressions that may be useful in jump-starting parser development:
5218 - numeric forms (L{integers<integer>}, L{reals<real>}, L{scientific notation<sciReal>})
5219 - common L{programming identifiers<identifier>}
5220 - network addresses (L{MAC<mac_address>}, L{IPv4<ipv4_address>}, L{IPv6<ipv6_address>})
5221 - ISO8601 L{dates<iso8601_date>} and L{datetime<iso8601_datetime>}
5222 - L{UUID<uuid>}
5223 Parse actions:
5224 - C{L{convertToInteger}}
5225 - C{L{convertToFloat}}
5226 - C{L{convertToDate}}
5227 - C{L{convertToDatetime}}
5228 - C{L{stripHTMLTags}}
5229
5230 Example::
5231 pyparsing_common.number.runTests('''
5232 # any int or real number, returned as the appropriate type
5233 100
5234 -100
5235 +100
5236 3.14159
5237 6.02e23
5238 1e-12
5239 ''')
5240
5241 pyparsing_common.fnumber.runTests('''
5242 # any int or real number, returned as float
5243 100
5244 -100
5245 +100
5246 3.14159
5247 6.02e23
5248 1e-12
5249 ''')
5250
5251 pyparsing_common.hex_integer.runTests('''
5252 # hex numbers
5253 100
5254 FF
5255 ''')
5256
5257 pyparsing_common.fraction.runTests('''
5258 # fractions
5259 1/2
5260 -3/4
5261 ''')
5262
5263 pyparsing_common.mixed_integer.runTests('''
5264 # mixed fractions
5265 1
5266 1/2
5267 -3/4
5268 1-3/4
5269 ''')
5270
5271 import uuid
5272 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5273 pyparsing_common.uuid.runTests('''
5274 # uuid
5275 12345678-1234-5678-1234-567812345678
5276 ''')
5277 prints::
5278 # any int or real number, returned as the appropriate type
5279 100
5280 [100]
5281
5282 -100
5283 [-100]
5284
5285 +100
5286 [100]
5287
5288 3.14159
5289 [3.14159]
5290
5291 6.02e23
5292 [6.02e+23]
5293
5294 1e-12
5295 [1e-12]
5296
5297 # any int or real number, returned as float
5298 100
5299 [100.0]
5300
5301 -100
5302 [-100.0]
5303
5304 +100
5305 [100.0]
5306
5307 3.14159
5308 [3.14159]
5309
5310 6.02e23
5311 [6.02e+23]
5312
5313 1e-12
5314 [1e-12]
5315
5316 # hex numbers
5317 100
5318 [256]
5319
5320 FF
5321 [255]
5322
5323 # fractions
5324 1/2
5325 [0.5]
5326
5327 -3/4
5328 [-0.75]
5329
5330 # mixed fractions
5331 1
5332 [1]
5333
5334 1/2
5335 [0.5]
5336
5337 -3/4
5338 [-0.75]
5339
5340 1-3/4
5341 [1.75]
5342
5343 # uuid
5344 12345678-1234-5678-1234-567812345678
5345 [UUID('12345678-1234-5678-1234-567812345678')]
5346 """
5347
5348 convertToInteger = tokenMap(int)
5349 """
5350 Parse action for converting parsed integers to Python int
5351 """
5352
5353 convertToFloat = tokenMap(float)
5354 """
5355 Parse action for converting parsed numbers to Python float
5356 """
5357
5358 integer = Word(nums).setName("integer").setParseAction(convertToInteger)
5359 """expression that parses an unsigned integer, returns an int"""
5360
5361 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int,16))
5362 """expression that parses a hexadecimal integer, returns an int"""
5363
5364 signedInteger = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger)
5365 """expression that parses an integer with optional leading sign, returns an int"""
5366
5367 fraction = (signedInteger().setParseAction(convertToFloat) + '/' + signedInteger().setParseAction(convertToFloat)).setName("fraction")
5368 """fractional expression of an integer divided by an integer, returns a float"""
5369 fraction.addParseAction(lambda t: t[0]/t[-1])
5370
5371 mixed_integer = (fraction | signedInteger + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction")
5372 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
5373 mixed_integer.addParseAction(sum)
5374
5375 real = Regex(r'[+-]?\d+\.\d*').setName("real number").setParseAction(convertToFloat)
5376 """expression that parses a floating point number and returns a float"""
5377
5378 sciReal = Regex(r'[+-]?\d+([eE][+-]?\d+|\.\d*([eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat)
5379 """expression that parses a floating point number with optional scientific notation and returns a float"""
5380
5381
5382 number = (sciReal | real | signedInteger).streamline()
5383 """any numeric expression, returns the corresponding Python type"""
5384
5385 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat)
5386 """any int or real number, returned as float"""
5387
5388 identifier = Word(alphas+'_', alphanums+'_').setName("identifier")
5389 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
5390
5391 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address")
5392 "IPv4 address (C{0.0.0.0 - 255.255.255.255})"
5393
5394 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer")
5395 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part)*7).setName("full IPv6 address")
5396 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part)*(0,6)) + "::" + Optional(_ipv6_part + (':' + _ipv6_part)*(0,6))).setName("short IPv6 address")
5397 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8)
5398 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address")
5399 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address")
5400 "IPv6 address (long, short, or mixed form)"
5401
5402 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address")
5403 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
5404
5405 @staticmethod
5407 """
5408 Helper to create a parse action for converting parsed date string to Python datetime.date
5409
5410 Params -
5411 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%d"})
5412
5413 Example::
5414 date_expr = pyparsing_common.iso8601_date.copy()
5415 date_expr.setParseAction(pyparsing_common.convertToDate())
5416 print(date_expr.parseString("1999-12-31"))
5417 prints::
5418 [datetime.date(1999, 12, 31)]
5419 """
5420 return lambda s,l,t: datetime.strptime(t[0], fmt).date()
5421
5422 @staticmethod
5424 """
5425 Helper to create a parse action for converting parsed datetime string to Python datetime.datetime
5426
5427 Params -
5428 - fmt - format to be passed to datetime.strptime (default=C{"%Y-%m-%dT%H:%M:%S.%f"})
5429
5430 Example::
5431 dt_expr = pyparsing_common.iso8601_datetime.copy()
5432 dt_expr.setParseAction(pyparsing_common.convertToDatetime())
5433 print(dt_expr.parseString("1999-12-31T23:59:59.999"))
5434 prints::
5435 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
5436 """
5437 return lambda s,l,t: datetime.strptime(t[0], fmt)
5438
5439 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date")
5440 "ISO8601 date (C{yyyy-mm-dd})"
5441
5442 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime")
5443 "ISO8601 datetime (C{yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)}) - trailing seconds, milliseconds, and timezone optional; accepts separating C{'T'} or C{' '}"
5444
5445 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID")
5446 "UUID (C{xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx})"
5447
5448 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress()
5449 @staticmethod
5463
5464 if __name__ == "__main__":
5465
5466 selectToken = CaselessLiteral("select")
5467 fromToken = CaselessLiteral("from")
5468
5469 ident = Word(alphas, alphanums + "_$")
5470
5471 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5472 columnNameList = Group(delimitedList(columnName)).setName("columns")
5473 columnSpec = ('*' | columnNameList)
5474
5475 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens)
5476 tableNameList = Group(delimitedList(tableName)).setName("tables")
5477
5478 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables")
5479
5480
5481 simpleSQL.runTests("""
5482 # '*' as column list and dotted table name
5483 select * from SYS.XYZZY
5484
5485 # caseless match on "SELECT", and casts back to "select"
5486 SELECT * from XYZZY, ABC
5487
5488 # list of column names, and mixed case SELECT keyword
5489 Select AA,BB,CC from Sys.dual
5490
5491 # multiple tables
5492 Select A, B, C from Sys.dual, Table2
5493
5494 # invalid SELECT keyword - should fail
5495 Xelect A, B, C from Sys.dual
5496
5497 # incomplete command - should fail
5498 Select
5499
5500 # invalid column name - should fail
5501 Select ^^^ frox Sys.dual
5502
5503 """)
5504
5505 pyparsing_common.number.runTests("""
5506 100
5507 -100
5508 +100
5509 3.14159
5510 6.02e23
5511 1e-12
5512 """)
5513
5514
5515 pyparsing_common.fnumber.runTests("""
5516 100
5517 -100
5518 +100
5519 3.14159
5520 6.02e23
5521 1e-12
5522 """)
5523
5524 pyparsing_common.hex_integer.runTests("""
5525 100
5526 FF
5527 """)
5528
5529 import uuid
5530 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID))
5531 pyparsing_common.uuid.runTests("""
5532 12345678-1234-5678-1234-567812345678
5533 """)
5534