Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# -*- coding: utf-8 -*- 

2# module pyparsing.py 

3# 

4# Copyright (c) 2003-2019 Paul T. McGuire 

5# 

6# Permission is hereby granted, free of charge, to any person obtaining 

7# a copy of this software and associated documentation files (the 

8# "Software"), to deal in the Software without restriction, including 

9# without limitation the rights to use, copy, modify, merge, publish, 

10# distribute, sublicense, and/or sell copies of the Software, and to 

11# permit persons to whom the Software is furnished to do so, subject to 

12# the following conditions: 

13# 

14# The above copyright notice and this permission notice shall be 

15# included in all copies or substantial portions of the Software. 

16# 

17# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 

18# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 

19# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 

20# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 

21# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 

22# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 

23# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 

24# 

25 

26__doc__ = \ 

27""" 

28pyparsing module - Classes and methods to define and execute parsing grammars 

29============================================================================= 

30 

31The pyparsing module is an alternative approach to creating and 

32executing simple grammars, vs. the traditional lex/yacc approach, or the 

33use of regular expressions. With pyparsing, you don't need to learn 

34a new syntax for defining grammars or matching expressions - the parsing 

35module provides a library of classes that you use to construct the 

36grammar directly in Python. 

37 

38Here is a program to parse "Hello, World!" (or any greeting of the form 

39``"<salutation>, <addressee>!"``), built up using :class:`Word`, 

40:class:`Literal`, and :class:`And` elements 

41(the :class:`'+'<ParserElement.__add__>` operators create :class:`And` expressions, 

42and the strings are auto-converted to :class:`Literal` expressions):: 

43 

44 from pyparsing import Word, alphas 

45 

46 # define grammar of a greeting 

47 greet = Word(alphas) + "," + Word(alphas) + "!" 

48 

49 hello = "Hello, World!" 

50 print (hello, "->", greet.parseString(hello)) 

51 

52The program outputs the following:: 

53 

54 Hello, World! -> ['Hello', ',', 'World', '!'] 

55 

56The Python representation of the grammar is quite readable, owing to the 

57self-explanatory class names, and the use of '+', '|' and '^' operators. 

58 

59The :class:`ParseResults` object returned from 

60:class:`ParserElement.parseString` can be 

61accessed as a nested list, a dictionary, or an object with named 

62attributes. 

63 

64The pyparsing module handles some of the problems that are typically 

65vexing when writing text parsers: 

66 

67 - extra or missing whitespace (the above program will also handle 

68 "Hello,World!", "Hello , World !", etc.) 

69 - quoted strings 

70 - embedded comments 

71 

72 

73Getting Started - 

74----------------- 

75Visit the classes :class:`ParserElement` and :class:`ParseResults` to 

76see the base classes that most other pyparsing 

77classes inherit from. Use the docstrings for examples of how to: 

78 

79 - construct literal match expressions from :class:`Literal` and 

80 :class:`CaselessLiteral` classes 

81 - construct character word-group expressions using the :class:`Word` 

82 class 

83 - see how to create repetitive expressions using :class:`ZeroOrMore` 

84 and :class:`OneOrMore` classes 

85 - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`, 

86 and :class:`'&'<Each>` operators to combine simple expressions into 

87 more complex ones 

88 - associate names with your parsed results using 

89 :class:`ParserElement.setResultsName` 

90 - access the parsed data, which is returned as a :class:`ParseResults` 

91 object 

92 - find some helpful expression short-cuts like :class:`delimitedList` 

93 and :class:`oneOf` 

94 - find more useful common expressions in the :class:`pyparsing_common` 

95 namespace class 

96""" 

97 

98__version__ = "2.4.7" 

99__versionTime__ = "30 Mar 2020 00:43 UTC" 

100__author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 

101 

102import string 

103from weakref import ref as wkref 

104import copy 

105import sys 

106import warnings 

107import re 

108import sre_constants 

109import collections 

110import pprint 

111import traceback 

112import types 

113from datetime import datetime 

114from operator import itemgetter 

115import itertools 

116from functools import wraps 

117from contextlib import contextmanager 

118 

119try: 

120 # Python 3 

121 from itertools import filterfalse 

122except ImportError: 

123 from itertools import ifilterfalse as filterfalse 

124 

125try: 

126 from _thread import RLock 

127except ImportError: 

128 from threading import RLock 

129 

130try: 

131 # Python 3 

132 from collections.abc import Iterable 

133 from collections.abc import MutableMapping, Mapping 

134except ImportError: 

135 # Python 2.7 

136 from collections import Iterable 

137 from collections import MutableMapping, Mapping 

138 

139try: 

140 from collections import OrderedDict as _OrderedDict 

141except ImportError: 

142 try: 

143 from ordereddict import OrderedDict as _OrderedDict 

144 except ImportError: 

145 _OrderedDict = None 

146 

147try: 

148 from types import SimpleNamespace 

149except ImportError: 

150 class SimpleNamespace: pass 

151 

152# version compatibility configuration 

153__compat__ = SimpleNamespace() 

154__compat__.__doc__ = """ 

155 A cross-version compatibility configuration for pyparsing features that will be 

156 released in a future version. By setting values in this configuration to True, 

157 those features can be enabled in prior versions for compatibility development 

158 and testing. 

159 

160 - collect_all_And_tokens - flag to enable fix for Issue #63 that fixes erroneous grouping 

161 of results names when an And expression is nested within an Or or MatchFirst; set to 

162 True to enable bugfix released in pyparsing 2.3.0, or False to preserve 

163 pre-2.3.0 handling of named results 

164""" 

165__compat__.collect_all_And_tokens = True 

166 

167__diag__ = SimpleNamespace() 

168__diag__.__doc__ = """ 

169Diagnostic configuration (all default to False) 

170 - warn_multiple_tokens_in_named_alternation - flag to enable warnings when a results 

171 name is defined on a MatchFirst or Or expression with one or more And subexpressions 

172 (only warns if __compat__.collect_all_And_tokens is False) 

173 - warn_ungrouped_named_tokens_in_collection - flag to enable warnings when a results 

174 name is defined on a containing expression with ungrouped subexpressions that also 

175 have results names 

176 - warn_name_set_on_empty_Forward - flag to enable warnings whan a Forward is defined 

177 with a results name, but has no contents defined 

178 - warn_on_multiple_string_args_to_oneof - flag to enable warnings whan oneOf is 

179 incorrectly called with multiple str arguments 

180 - enable_debug_on_named_expressions - flag to auto-enable debug on all subsequent 

181 calls to ParserElement.setName() 

182""" 

183__diag__.warn_multiple_tokens_in_named_alternation = False 

184__diag__.warn_ungrouped_named_tokens_in_collection = False 

185__diag__.warn_name_set_on_empty_Forward = False 

186__diag__.warn_on_multiple_string_args_to_oneof = False 

187__diag__.enable_debug_on_named_expressions = False 

188__diag__._all_names = [nm for nm in vars(__diag__) if nm.startswith("enable_") or nm.startswith("warn_")] 

189 

190def _enable_all_warnings(): 

191 __diag__.warn_multiple_tokens_in_named_alternation = True 

192 __diag__.warn_ungrouped_named_tokens_in_collection = True 

193 __diag__.warn_name_set_on_empty_Forward = True 

194 __diag__.warn_on_multiple_string_args_to_oneof = True 

195__diag__.enable_all_warnings = _enable_all_warnings 

196 

197 

198__all__ = ['__version__', '__versionTime__', '__author__', '__compat__', '__diag__', 

199 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 

200 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 

201 'PrecededBy', 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 

202 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 

203 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 

204 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 

205 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 'Char', 

206 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 

207 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 

208 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'hexnums', 

209 'htmlComment', 'javaStyleComment', 'line', 'lineEnd', 'lineStart', 'lineno', 

210 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 

211 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 

212 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity', 

213 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 

214 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 

215 'indentedBlock', 'originalTextFor', 'ungroup', 'infixNotation', 'locatedExpr', 'withClass', 

216 'CloseMatch', 'tokenMap', 'pyparsing_common', 'pyparsing_unicode', 'unicode_set', 

217 'conditionAsParseAction', 're', 

218 ] 

219 

220system_version = tuple(sys.version_info)[:3] 

221PY_3 = system_version[0] == 3 

222if PY_3: 

223 _MAX_INT = sys.maxsize 

224 basestring = str 

225 unichr = chr 

226 unicode = str 

227 _ustr = str 

228 

229 # build list of single arg builtins, that can be used as parse actions 

230 singleArgBuiltins = [sum, len, sorted, reversed, list, tuple, set, any, all, min, max] 

231 

232else: 

233 _MAX_INT = sys.maxint 

234 range = xrange 

235 

236 def _ustr(obj): 

237 """Drop-in replacement for str(obj) that tries to be Unicode 

238 friendly. It first tries str(obj). If that fails with 

239 a UnicodeEncodeError, then it tries unicode(obj). It then 

240 < returns the unicode object | encodes it with the default 

241 encoding | ... >. 

242 """ 

243 if isinstance(obj, unicode): 

244 return obj 

245 

246 try: 

247 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 

248 # it won't break any existing code. 

249 return str(obj) 

250 

251 except UnicodeEncodeError: 

252 # Else encode it 

253 ret = unicode(obj).encode(sys.getdefaultencoding(), 'xmlcharrefreplace') 

254 xmlcharref = Regex(r'&#\d+;') 

255 xmlcharref.setParseAction(lambda t: '\\u' + hex(int(t[0][2:-1]))[2:]) 

256 return xmlcharref.transformString(ret) 

257 

258 # build list of single arg builtins, tolerant of Python version, that can be used as parse actions 

259 singleArgBuiltins = [] 

260 import __builtin__ 

261 

262 for fname in "sum len sorted reversed list tuple set any all min max".split(): 

263 try: 

264 singleArgBuiltins.append(getattr(__builtin__, fname)) 

265 except AttributeError: 

266 continue 

267 

268_generatorType = type((y for y in range(1))) 

269 

270def _xml_escape(data): 

271 """Escape &, <, >, ", ', etc. in a string of data.""" 

272 

273 # ampersand must be replaced first 

274 from_symbols = '&><"\'' 

275 to_symbols = ('&' + s + ';' for s in "amp gt lt quot apos".split()) 

276 for from_, to_ in zip(from_symbols, to_symbols): 

277 data = data.replace(from_, to_) 

278 return data 

279 

280alphas = string.ascii_uppercase + string.ascii_lowercase 

281nums = "0123456789" 

282hexnums = nums + "ABCDEFabcdef" 

283alphanums = alphas + nums 

284_bslash = chr(92) 

285printables = "".join(c for c in string.printable if c not in string.whitespace) 

286 

287 

288def conditionAsParseAction(fn, message=None, fatal=False): 

289 msg = message if message is not None else "failed user-defined condition" 

290 exc_type = ParseFatalException if fatal else ParseException 

291 fn = _trim_arity(fn) 

292 

293 @wraps(fn) 

294 def pa(s, l, t): 

295 if not bool(fn(s, l, t)): 

296 raise exc_type(s, l, msg) 

297 

298 return pa 

299 

300class ParseBaseException(Exception): 

301 """base exception class for all parsing runtime exceptions""" 

302 # Performance tuning: we construct a *lot* of these, so keep this 

303 # constructor as small and fast as possible 

304 def __init__(self, pstr, loc=0, msg=None, elem=None): 

305 self.loc = loc 

306 if msg is None: 

307 self.msg = pstr 

308 self.pstr = "" 

309 else: 

310 self.msg = msg 

311 self.pstr = pstr 

312 self.parserElement = elem 

313 self.args = (pstr, loc, msg) 

314 

315 @classmethod 

316 def _from_exception(cls, pe): 

317 """ 

318 internal factory method to simplify creating one type of ParseException 

319 from another - avoids having __init__ signature conflicts among subclasses 

320 """ 

321 return cls(pe.pstr, pe.loc, pe.msg, pe.parserElement) 

322 

323 def __getattr__(self, aname): 

324 """supported attributes by name are: 

325 - lineno - returns the line number of the exception text 

326 - col - returns the column number of the exception text 

327 - line - returns the line containing the exception text 

328 """ 

329 if aname == "lineno": 

330 return lineno(self.loc, self.pstr) 

331 elif aname in ("col", "column"): 

332 return col(self.loc, self.pstr) 

333 elif aname == "line": 

334 return line(self.loc, self.pstr) 

335 else: 

336 raise AttributeError(aname) 

337 

338 def __str__(self): 

339 if self.pstr: 

340 if self.loc >= len(self.pstr): 

341 foundstr = ', found end of text' 

342 else: 

343 foundstr = (', found %r' % self.pstr[self.loc:self.loc + 1]).replace(r'\\', '\\') 

344 else: 

345 foundstr = '' 

346 return ("%s%s (at char %d), (line:%d, col:%d)" % 

347 (self.msg, foundstr, self.loc, self.lineno, self.column)) 

348 def __repr__(self): 

349 return _ustr(self) 

350 def markInputline(self, markerString=">!<"): 

351 """Extracts the exception line from the input string, and marks 

352 the location of the exception with a special symbol. 

353 """ 

354 line_str = self.line 

355 line_column = self.column - 1 

356 if markerString: 

357 line_str = "".join((line_str[:line_column], 

358 markerString, line_str[line_column:])) 

359 return line_str.strip() 

360 def __dir__(self): 

361 return "lineno col line".split() + dir(type(self)) 

362 

363class ParseException(ParseBaseException): 

364 """ 

365 Exception thrown when parse expressions don't match class; 

366 supported attributes by name are: 

367 - lineno - returns the line number of the exception text 

368 - col - returns the column number of the exception text 

369 - line - returns the line containing the exception text 

370 

371 Example:: 

372 

373 try: 

374 Word(nums).setName("integer").parseString("ABC") 

375 except ParseException as pe: 

376 print(pe) 

377 print("column: {}".format(pe.col)) 

378 

379 prints:: 

380 

381 Expected integer (at char 0), (line:1, col:1) 

382 column: 1 

383 

384 """ 

385 

386 @staticmethod 

387 def explain(exc, depth=16): 

388 """ 

389 Method to take an exception and translate the Python internal traceback into a list 

390 of the pyparsing expressions that caused the exception to be raised. 

391 

392 Parameters: 

393 

394 - exc - exception raised during parsing (need not be a ParseException, in support 

395 of Python exceptions that might be raised in a parse action) 

396 - depth (default=16) - number of levels back in the stack trace to list expression 

397 and function names; if None, the full stack trace names will be listed; if 0, only 

398 the failing input line, marker, and exception string will be shown 

399 

400 Returns a multi-line string listing the ParserElements and/or function names in the 

401 exception's stack trace. 

402 

403 Note: the diagnostic output will include string representations of the expressions 

404 that failed to parse. These representations will be more helpful if you use `setName` to 

405 give identifiable names to your expressions. Otherwise they will use the default string 

406 forms, which may be cryptic to read. 

407 

408 explain() is only supported under Python 3. 

409 """ 

410 import inspect 

411 

412 if depth is None: 

413 depth = sys.getrecursionlimit() 

414 ret = [] 

415 if isinstance(exc, ParseBaseException): 

416 ret.append(exc.line) 

417 ret.append(' ' * (exc.col - 1) + '^') 

418 ret.append("{0}: {1}".format(type(exc).__name__, exc)) 

419 

420 if depth > 0: 

421 callers = inspect.getinnerframes(exc.__traceback__, context=depth) 

422 seen = set() 

423 for i, ff in enumerate(callers[-depth:]): 

424 frm = ff[0] 

425 

426 f_self = frm.f_locals.get('self', None) 

427 if isinstance(f_self, ParserElement): 

428 if frm.f_code.co_name not in ('parseImpl', '_parseNoCache'): 

429 continue 

430 if f_self in seen: 

431 continue 

432 seen.add(f_self) 

433 

434 self_type = type(f_self) 

435 ret.append("{0}.{1} - {2}".format(self_type.__module__, 

436 self_type.__name__, 

437 f_self)) 

438 elif f_self is not None: 

439 self_type = type(f_self) 

440 ret.append("{0}.{1}".format(self_type.__module__, 

441 self_type.__name__)) 

442 else: 

443 code = frm.f_code 

444 if code.co_name in ('wrapper', '<module>'): 

445 continue 

446 

447 ret.append("{0}".format(code.co_name)) 

448 

449 depth -= 1 

450 if not depth: 

451 break 

452 

453 return '\n'.join(ret) 

454 

455 

456class ParseFatalException(ParseBaseException): 

457 """user-throwable exception thrown when inconsistent parse content 

458 is found; stops all parsing immediately""" 

459 pass 

460 

461class ParseSyntaxException(ParseFatalException): 

462 """just like :class:`ParseFatalException`, but thrown internally 

463 when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates 

464 that parsing is to stop immediately because an unbacktrackable 

465 syntax error has been found. 

466 """ 

467 pass 

468 

469#~ class ReparseException(ParseBaseException): 

470 #~ """Experimental class - parse actions can raise this exception to cause 

471 #~ pyparsing to reparse the input string: 

472 #~ - with a modified input string, and/or 

473 #~ - with a modified start location 

474 #~ Set the values of the ReparseException in the constructor, and raise the 

475 #~ exception in a parse action to cause pyparsing to use the new string/location. 

476 #~ Setting the values as None causes no change to be made. 

477 #~ """ 

478 #~ def __init_( self, newstring, restartLoc ): 

479 #~ self.newParseText = newstring 

480 #~ self.reparseLoc = restartLoc 

481 

482class RecursiveGrammarException(Exception): 

483 """exception thrown by :class:`ParserElement.validate` if the 

484 grammar could be improperly recursive 

485 """ 

486 def __init__(self, parseElementList): 

487 self.parseElementTrace = parseElementList 

488 

489 def __str__(self): 

490 return "RecursiveGrammarException: %s" % self.parseElementTrace 

491 

492class _ParseResultsWithOffset(object): 

493 def __init__(self, p1, p2): 

494 self.tup = (p1, p2) 

495 def __getitem__(self, i): 

496 return self.tup[i] 

497 def __repr__(self): 

498 return repr(self.tup[0]) 

499 def setOffset(self, i): 

500 self.tup = (self.tup[0], i) 

501 

502class ParseResults(object): 

503 """Structured parse results, to provide multiple means of access to 

504 the parsed data: 

505 

506 - as a list (``len(results)``) 

507 - by list index (``results[0], results[1]``, etc.) 

508 - by attribute (``results.<resultsName>`` - see :class:`ParserElement.setResultsName`) 

509 

510 Example:: 

511 

512 integer = Word(nums) 

513 date_str = (integer.setResultsName("year") + '/' 

514 + integer.setResultsName("month") + '/' 

515 + integer.setResultsName("day")) 

516 # equivalent form: 

517 # date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

518 

519 # parseString returns a ParseResults object 

520 result = date_str.parseString("1999/12/31") 

521 

522 def test(s, fn=repr): 

523 print("%s -> %s" % (s, fn(eval(s)))) 

524 test("list(result)") 

525 test("result[0]") 

526 test("result['month']") 

527 test("result.day") 

528 test("'month' in result") 

529 test("'minutes' in result") 

530 test("result.dump()", str) 

531 

532 prints:: 

533 

534 list(result) -> ['1999', '/', '12', '/', '31'] 

535 result[0] -> '1999' 

536 result['month'] -> '12' 

537 result.day -> '31' 

538 'month' in result -> True 

539 'minutes' in result -> False 

540 result.dump() -> ['1999', '/', '12', '/', '31'] 

541 - day: 31 

542 - month: 12 

543 - year: 1999 

544 """ 

545 def __new__(cls, toklist=None, name=None, asList=True, modal=True): 

546 if isinstance(toklist, cls): 

547 return toklist 

548 retobj = object.__new__(cls) 

549 retobj.__doinit = True 

550 return retobj 

551 

552 # Performance tuning: we construct a *lot* of these, so keep this 

553 # constructor as small and fast as possible 

554 def __init__(self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance): 

555 if self.__doinit: 

556 self.__doinit = False 

557 self.__name = None 

558 self.__parent = None 

559 self.__accumNames = {} 

560 self.__asList = asList 

561 self.__modal = modal 

562 if toklist is None: 

563 toklist = [] 

564 if isinstance(toklist, list): 

565 self.__toklist = toklist[:] 

566 elif isinstance(toklist, _generatorType): 

567 self.__toklist = list(toklist) 

568 else: 

569 self.__toklist = [toklist] 

570 self.__tokdict = dict() 

571 

572 if name is not None and name: 

573 if not modal: 

574 self.__accumNames[name] = 0 

575 if isinstance(name, int): 

576 name = _ustr(name) # will always return a str, but use _ustr for consistency 

577 self.__name = name 

578 if not (isinstance(toklist, (type(None), basestring, list)) and toklist in (None, '', [])): 

579 if isinstance(toklist, basestring): 

580 toklist = [toklist] 

581 if asList: 

582 if isinstance(toklist, ParseResults): 

583 self[name] = _ParseResultsWithOffset(ParseResults(toklist.__toklist), 0) 

584 else: 

585 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]), 0) 

586 self[name].__name = name 

587 else: 

588 try: 

589 self[name] = toklist[0] 

590 except (KeyError, TypeError, IndexError): 

591 self[name] = toklist 

592 

593 def __getitem__(self, i): 

594 if isinstance(i, (int, slice)): 

595 return self.__toklist[i] 

596 else: 

597 if i not in self.__accumNames: 

598 return self.__tokdict[i][-1][0] 

599 else: 

600 return ParseResults([v[0] for v in self.__tokdict[i]]) 

601 

602 def __setitem__(self, k, v, isinstance=isinstance): 

603 if isinstance(v, _ParseResultsWithOffset): 

604 self.__tokdict[k] = self.__tokdict.get(k, list()) + [v] 

605 sub = v[0] 

606 elif isinstance(k, (int, slice)): 

607 self.__toklist[k] = v 

608 sub = v 

609 else: 

610 self.__tokdict[k] = self.__tokdict.get(k, list()) + [_ParseResultsWithOffset(v, 0)] 

611 sub = v 

612 if isinstance(sub, ParseResults): 

613 sub.__parent = wkref(self) 

614 

615 def __delitem__(self, i): 

616 if isinstance(i, (int, slice)): 

617 mylen = len(self.__toklist) 

618 del self.__toklist[i] 

619 

620 # convert int to slice 

621 if isinstance(i, int): 

622 if i < 0: 

623 i += mylen 

624 i = slice(i, i + 1) 

625 # get removed indices 

626 removed = list(range(*i.indices(mylen))) 

627 removed.reverse() 

628 # fixup indices in token dictionary 

629 for name, occurrences in self.__tokdict.items(): 

630 for j in removed: 

631 for k, (value, position) in enumerate(occurrences): 

632 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 

633 else: 

634 del self.__tokdict[i] 

635 

636 def __contains__(self, k): 

637 return k in self.__tokdict 

638 

639 def __len__(self): 

640 return len(self.__toklist) 

641 

642 def __bool__(self): 

643 return (not not self.__toklist) 

644 __nonzero__ = __bool__ 

645 

646 def __iter__(self): 

647 return iter(self.__toklist) 

648 

649 def __reversed__(self): 

650 return iter(self.__toklist[::-1]) 

651 

652 def _iterkeys(self): 

653 if hasattr(self.__tokdict, "iterkeys"): 

654 return self.__tokdict.iterkeys() 

655 else: 

656 return iter(self.__tokdict) 

657 

658 def _itervalues(self): 

659 return (self[k] for k in self._iterkeys()) 

660 

661 def _iteritems(self): 

662 return ((k, self[k]) for k in self._iterkeys()) 

663 

664 if PY_3: 

665 keys = _iterkeys 

666 """Returns an iterator of all named result keys.""" 

667 

668 values = _itervalues 

669 """Returns an iterator of all named result values.""" 

670 

671 items = _iteritems 

672 """Returns an iterator of all named result key-value tuples.""" 

673 

674 else: 

675 iterkeys = _iterkeys 

676 """Returns an iterator of all named result keys (Python 2.x only).""" 

677 

678 itervalues = _itervalues 

679 """Returns an iterator of all named result values (Python 2.x only).""" 

680 

681 iteritems = _iteritems 

682 """Returns an iterator of all named result key-value tuples (Python 2.x only).""" 

683 

684 def keys(self): 

685 """Returns all named result keys (as a list in Python 2.x, as an iterator in Python 3.x).""" 

686 return list(self.iterkeys()) 

687 

688 def values(self): 

689 """Returns all named result values (as a list in Python 2.x, as an iterator in Python 3.x).""" 

690 return list(self.itervalues()) 

691 

692 def items(self): 

693 """Returns all named result key-values (as a list of tuples in Python 2.x, as an iterator in Python 3.x).""" 

694 return list(self.iteritems()) 

695 

696 def haskeys(self): 

697 """Since keys() returns an iterator, this method is helpful in bypassing 

698 code that looks for the existence of any defined results names.""" 

699 return bool(self.__tokdict) 

700 

701 def pop(self, *args, **kwargs): 

702 """ 

703 Removes and returns item at specified index (default= ``last``). 

704 Supports both ``list`` and ``dict`` semantics for ``pop()``. If 

705 passed no argument or an integer argument, it will use ``list`` 

706 semantics and pop tokens from the list of parsed tokens. If passed 

707 a non-integer argument (most likely a string), it will use ``dict`` 

708 semantics and pop the corresponding value from any defined results 

709 names. A second default return value argument is supported, just as in 

710 ``dict.pop()``. 

711 

712 Example:: 

713 

714 def remove_first(tokens): 

715 tokens.pop(0) 

716 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 

717 print(OneOrMore(Word(nums)).addParseAction(remove_first).parseString("0 123 321")) # -> ['123', '321'] 

718 

719 label = Word(alphas) 

720 patt = label("LABEL") + OneOrMore(Word(nums)) 

721 print(patt.parseString("AAB 123 321").dump()) 

722 

723 # Use pop() in a parse action to remove named result (note that corresponding value is not 

724 # removed from list form of results) 

725 def remove_LABEL(tokens): 

726 tokens.pop("LABEL") 

727 return tokens 

728 patt.addParseAction(remove_LABEL) 

729 print(patt.parseString("AAB 123 321").dump()) 

730 

731 prints:: 

732 

733 ['AAB', '123', '321'] 

734 - LABEL: AAB 

735 

736 ['AAB', '123', '321'] 

737 """ 

738 if not args: 

739 args = [-1] 

740 for k, v in kwargs.items(): 

741 if k == 'default': 

742 args = (args[0], v) 

743 else: 

744 raise TypeError("pop() got an unexpected keyword argument '%s'" % k) 

745 if (isinstance(args[0], int) 

746 or len(args) == 1 

747 or args[0] in self): 

748 index = args[0] 

749 ret = self[index] 

750 del self[index] 

751 return ret 

752 else: 

753 defaultvalue = args[1] 

754 return defaultvalue 

755 

756 def get(self, key, defaultValue=None): 

757 """ 

758 Returns named result matching the given key, or if there is no 

759 such name, then returns the given ``defaultValue`` or ``None`` if no 

760 ``defaultValue`` is specified. 

761 

762 Similar to ``dict.get()``. 

763 

764 Example:: 

765 

766 integer = Word(nums) 

767 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

768 

769 result = date_str.parseString("1999/12/31") 

770 print(result.get("year")) # -> '1999' 

771 print(result.get("hour", "not specified")) # -> 'not specified' 

772 print(result.get("hour")) # -> None 

773 """ 

774 if key in self: 

775 return self[key] 

776 else: 

777 return defaultValue 

778 

779 def insert(self, index, insStr): 

780 """ 

781 Inserts new element at location index in the list of parsed tokens. 

782 

783 Similar to ``list.insert()``. 

784 

785 Example:: 

786 

787 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 

788 

789 # use a parse action to insert the parse location in the front of the parsed results 

790 def insert_locn(locn, tokens): 

791 tokens.insert(0, locn) 

792 print(OneOrMore(Word(nums)).addParseAction(insert_locn).parseString("0 123 321")) # -> [0, '0', '123', '321'] 

793 """ 

794 self.__toklist.insert(index, insStr) 

795 # fixup indices in token dictionary 

796 for name, occurrences in self.__tokdict.items(): 

797 for k, (value, position) in enumerate(occurrences): 

798 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index)) 

799 

800 def append(self, item): 

801 """ 

802 Add single element to end of ParseResults list of elements. 

803 

804 Example:: 

805 

806 print(OneOrMore(Word(nums)).parseString("0 123 321")) # -> ['0', '123', '321'] 

807 

808 # use a parse action to compute the sum of the parsed integers, and add it to the end 

809 def append_sum(tokens): 

810 tokens.append(sum(map(int, tokens))) 

811 print(OneOrMore(Word(nums)).addParseAction(append_sum).parseString("0 123 321")) # -> ['0', '123', '321', 444] 

812 """ 

813 self.__toklist.append(item) 

814 

815 def extend(self, itemseq): 

816 """ 

817 Add sequence of elements to end of ParseResults list of elements. 

818 

819 Example:: 

820 

821 patt = OneOrMore(Word(alphas)) 

822 

823 # use a parse action to append the reverse of the matched strings, to make a palindrome 

824 def make_palindrome(tokens): 

825 tokens.extend(reversed([t[::-1] for t in tokens])) 

826 return ''.join(tokens) 

827 print(patt.addParseAction(make_palindrome).parseString("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl' 

828 """ 

829 if isinstance(itemseq, ParseResults): 

830 self.__iadd__(itemseq) 

831 else: 

832 self.__toklist.extend(itemseq) 

833 

834 def clear(self): 

835 """ 

836 Clear all elements and results names. 

837 """ 

838 del self.__toklist[:] 

839 self.__tokdict.clear() 

840 

841 def __getattr__(self, name): 

842 try: 

843 return self[name] 

844 except KeyError: 

845 return "" 

846 

847 def __add__(self, other): 

848 ret = self.copy() 

849 ret += other 

850 return ret 

851 

852 def __iadd__(self, other): 

853 if other.__tokdict: 

854 offset = len(self.__toklist) 

855 addoffset = lambda a: offset if a < 0 else a + offset 

856 otheritems = other.__tokdict.items() 

857 otherdictitems = [(k, _ParseResultsWithOffset(v[0], addoffset(v[1]))) 

858 for k, vlist in otheritems for v in vlist] 

859 for k, v in otherdictitems: 

860 self[k] = v 

861 if isinstance(v[0], ParseResults): 

862 v[0].__parent = wkref(self) 

863 

864 self.__toklist += other.__toklist 

865 self.__accumNames.update(other.__accumNames) 

866 return self 

867 

868 def __radd__(self, other): 

869 if isinstance(other, int) and other == 0: 

870 # useful for merging many ParseResults using sum() builtin 

871 return self.copy() 

872 else: 

873 # this may raise a TypeError - so be it 

874 return other + self 

875 

876 def __repr__(self): 

877 return "(%s, %s)" % (repr(self.__toklist), repr(self.__tokdict)) 

878 

879 def __str__(self): 

880 return '[' + ', '.join(_ustr(i) if isinstance(i, ParseResults) else repr(i) for i in self.__toklist) + ']' 

881 

882 def _asStringList(self, sep=''): 

883 out = [] 

884 for item in self.__toklist: 

885 if out and sep: 

886 out.append(sep) 

887 if isinstance(item, ParseResults): 

888 out += item._asStringList() 

889 else: 

890 out.append(_ustr(item)) 

891 return out 

892 

893 def asList(self): 

894 """ 

895 Returns the parse results as a nested list of matching tokens, all converted to strings. 

896 

897 Example:: 

898 

899 patt = OneOrMore(Word(alphas)) 

900 result = patt.parseString("sldkj lsdkj sldkj") 

901 # even though the result prints in string-like form, it is actually a pyparsing ParseResults 

902 print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj'] 

903 

904 # Use asList() to create an actual list 

905 result_list = result.asList() 

906 print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj'] 

907 """ 

908 return [res.asList() if isinstance(res, ParseResults) else res for res in self.__toklist] 

909 

910 def asDict(self): 

911 """ 

912 Returns the named parse results as a nested dictionary. 

913 

914 Example:: 

915 

916 integer = Word(nums) 

917 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

918 

919 result = date_str.parseString('12/31/1999') 

920 print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]}) 

921 

922 result_dict = result.asDict() 

923 print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'} 

924 

925 # even though a ParseResults supports dict-like access, sometime you just need to have a dict 

926 import json 

927 print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable 

928 print(json.dumps(result.asDict())) # -> {"month": "31", "day": "1999", "year": "12"} 

929 """ 

930 if PY_3: 

931 item_fn = self.items 

932 else: 

933 item_fn = self.iteritems 

934 

935 def toItem(obj): 

936 if isinstance(obj, ParseResults): 

937 if obj.haskeys(): 

938 return obj.asDict() 

939 else: 

940 return [toItem(v) for v in obj] 

941 else: 

942 return obj 

943 

944 return dict((k, toItem(v)) for k, v in item_fn()) 

945 

946 def copy(self): 

947 """ 

948 Returns a new copy of a :class:`ParseResults` object. 

949 """ 

950 ret = ParseResults(self.__toklist) 

951 ret.__tokdict = dict(self.__tokdict.items()) 

952 ret.__parent = self.__parent 

953 ret.__accumNames.update(self.__accumNames) 

954 ret.__name = self.__name 

955 return ret 

956 

957 def asXML(self, doctag=None, namedItemsOnly=False, indent="", formatted=True): 

958 """ 

959 (Deprecated) Returns the parse results as XML. Tags are created for tokens and lists that have defined results names. 

960 """ 

961 nl = "\n" 

962 out = [] 

963 namedItems = dict((v[1], k) for (k, vlist) in self.__tokdict.items() 

964 for v in vlist) 

965 nextLevelIndent = indent + " " 

966 

967 # collapse out indents if formatting is not desired 

968 if not formatted: 

969 indent = "" 

970 nextLevelIndent = "" 

971 nl = "" 

972 

973 selfTag = None 

974 if doctag is not None: 

975 selfTag = doctag 

976 else: 

977 if self.__name: 

978 selfTag = self.__name 

979 

980 if not selfTag: 

981 if namedItemsOnly: 

982 return "" 

983 else: 

984 selfTag = "ITEM" 

985 

986 out += [nl, indent, "<", selfTag, ">"] 

987 

988 for i, res in enumerate(self.__toklist): 

989 if isinstance(res, ParseResults): 

990 if i in namedItems: 

991 out += [res.asXML(namedItems[i], 

992 namedItemsOnly and doctag is None, 

993 nextLevelIndent, 

994 formatted)] 

995 else: 

996 out += [res.asXML(None, 

997 namedItemsOnly and doctag is None, 

998 nextLevelIndent, 

999 formatted)] 

1000 else: 

1001 # individual token, see if there is a name for it 

1002 resTag = None 

1003 if i in namedItems: 

1004 resTag = namedItems[i] 

1005 if not resTag: 

1006 if namedItemsOnly: 

1007 continue 

1008 else: 

1009 resTag = "ITEM" 

1010 xmlBodyText = _xml_escape(_ustr(res)) 

1011 out += [nl, nextLevelIndent, "<", resTag, ">", 

1012 xmlBodyText, 

1013 "</", resTag, ">"] 

1014 

1015 out += [nl, indent, "</", selfTag, ">"] 

1016 return "".join(out) 

1017 

1018 def __lookup(self, sub): 

1019 for k, vlist in self.__tokdict.items(): 

1020 for v, loc in vlist: 

1021 if sub is v: 

1022 return k 

1023 return None 

1024 

1025 def getName(self): 

1026 r""" 

1027 Returns the results name for this token expression. Useful when several 

1028 different expressions might match at a particular location. 

1029 

1030 Example:: 

1031 

1032 integer = Word(nums) 

1033 ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d") 

1034 house_number_expr = Suppress('#') + Word(nums, alphanums) 

1035 user_data = (Group(house_number_expr)("house_number") 

1036 | Group(ssn_expr)("ssn") 

1037 | Group(integer)("age")) 

1038 user_info = OneOrMore(user_data) 

1039 

1040 result = user_info.parseString("22 111-22-3333 #221B") 

1041 for item in result: 

1042 print(item.getName(), ':', item[0]) 

1043 

1044 prints:: 

1045 

1046 age : 22 

1047 ssn : 111-22-3333 

1048 house_number : 221B 

1049 """ 

1050 if self.__name: 

1051 return self.__name 

1052 elif self.__parent: 

1053 par = self.__parent() 

1054 if par: 

1055 return par.__lookup(self) 

1056 else: 

1057 return None 

1058 elif (len(self) == 1 

1059 and len(self.__tokdict) == 1 

1060 and next(iter(self.__tokdict.values()))[0][1] in (0, -1)): 

1061 return next(iter(self.__tokdict.keys())) 

1062 else: 

1063 return None 

1064 

1065 def dump(self, indent='', full=True, include_list=True, _depth=0): 

1066 """ 

1067 Diagnostic method for listing out the contents of 

1068 a :class:`ParseResults`. Accepts an optional ``indent`` argument so 

1069 that this string can be embedded in a nested display of other data. 

1070 

1071 Example:: 

1072 

1073 integer = Word(nums) 

1074 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

1075 

1076 result = date_str.parseString('12/31/1999') 

1077 print(result.dump()) 

1078 

1079 prints:: 

1080 

1081 ['12', '/', '31', '/', '1999'] 

1082 - day: 1999 

1083 - month: 31 

1084 - year: 12 

1085 """ 

1086 out = [] 

1087 NL = '\n' 

1088 if include_list: 

1089 out.append(indent + _ustr(self.asList())) 

1090 else: 

1091 out.append('') 

1092 

1093 if full: 

1094 if self.haskeys(): 

1095 items = sorted((str(k), v) for k, v in self.items()) 

1096 for k, v in items: 

1097 if out: 

1098 out.append(NL) 

1099 out.append("%s%s- %s: " % (indent, (' ' * _depth), k)) 

1100 if isinstance(v, ParseResults): 

1101 if v: 

1102 out.append(v.dump(indent=indent, full=full, include_list=include_list, _depth=_depth + 1)) 

1103 else: 

1104 out.append(_ustr(v)) 

1105 else: 

1106 out.append(repr(v)) 

1107 elif any(isinstance(vv, ParseResults) for vv in self): 

1108 v = self 

1109 for i, vv in enumerate(v): 

1110 if isinstance(vv, ParseResults): 

1111 out.append("\n%s%s[%d]:\n%s%s%s" % (indent, 

1112 (' ' * (_depth)), 

1113 i, 

1114 indent, 

1115 (' ' * (_depth + 1)), 

1116 vv.dump(indent=indent, 

1117 full=full, 

1118 include_list=include_list, 

1119 _depth=_depth + 1))) 

1120 else: 

1121 out.append("\n%s%s[%d]:\n%s%s%s" % (indent, 

1122 (' ' * (_depth)), 

1123 i, 

1124 indent, 

1125 (' ' * (_depth + 1)), 

1126 _ustr(vv))) 

1127 

1128 return "".join(out) 

1129 

1130 def pprint(self, *args, **kwargs): 

1131 """ 

1132 Pretty-printer for parsed results as a list, using the 

1133 `pprint <https://docs.python.org/3/library/pprint.html>`_ module. 

1134 Accepts additional positional or keyword args as defined for 

1135 `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ . 

1136 

1137 Example:: 

1138 

1139 ident = Word(alphas, alphanums) 

1140 num = Word(nums) 

1141 func = Forward() 

1142 term = ident | num | Group('(' + func + ')') 

1143 func <<= ident + Group(Optional(delimitedList(term))) 

1144 result = func.parseString("fna a,b,(fnb c,d,200),100") 

1145 result.pprint(width=40) 

1146 

1147 prints:: 

1148 

1149 ['fna', 

1150 ['a', 

1151 'b', 

1152 ['(', 'fnb', ['c', 'd', '200'], ')'], 

1153 '100']] 

1154 """ 

1155 pprint.pprint(self.asList(), *args, **kwargs) 

1156 

1157 # add support for pickle protocol 

1158 def __getstate__(self): 

1159 return (self.__toklist, 

1160 (self.__tokdict.copy(), 

1161 self.__parent is not None and self.__parent() or None, 

1162 self.__accumNames, 

1163 self.__name)) 

1164 

1165 def __setstate__(self, state): 

1166 self.__toklist = state[0] 

1167 self.__tokdict, par, inAccumNames, self.__name = state[1] 

1168 self.__accumNames = {} 

1169 self.__accumNames.update(inAccumNames) 

1170 if par is not None: 

1171 self.__parent = wkref(par) 

1172 else: 

1173 self.__parent = None 

1174 

1175 def __getnewargs__(self): 

1176 return self.__toklist, self.__name, self.__asList, self.__modal 

1177 

1178 def __dir__(self): 

1179 return dir(type(self)) + list(self.keys()) 

1180 

1181 @classmethod 

1182 def from_dict(cls, other, name=None): 

1183 """ 

1184 Helper classmethod to construct a ParseResults from a dict, preserving the 

1185 name-value relations as results names. If an optional 'name' argument is 

1186 given, a nested ParseResults will be returned 

1187 """ 

1188 def is_iterable(obj): 

1189 try: 

1190 iter(obj) 

1191 except Exception: 

1192 return False 

1193 else: 

1194 if PY_3: 

1195 return not isinstance(obj, (str, bytes)) 

1196 else: 

1197 return not isinstance(obj, basestring) 

1198 

1199 ret = cls([]) 

1200 for k, v in other.items(): 

1201 if isinstance(v, Mapping): 

1202 ret += cls.from_dict(v, name=k) 

1203 else: 

1204 ret += cls([v], name=k, asList=is_iterable(v)) 

1205 if name is not None: 

1206 ret = cls([ret], name=name) 

1207 return ret 

1208 

1209MutableMapping.register(ParseResults) 

1210 

1211def col (loc, strg): 

1212 """Returns current column within a string, counting newlines as line separators. 

1213 The first column is number 1. 

1214 

1215 Note: the default parsing behavior is to expand tabs in the input string 

1216 before starting the parsing process. See 

1217 :class:`ParserElement.parseString` for more 

1218 information on parsing strings containing ``<TAB>`` s, and suggested 

1219 methods to maintain a consistent view of the parsed string, the parse 

1220 location, and line and column positions within the parsed string. 

1221 """ 

1222 s = strg 

1223 return 1 if 0 < loc < len(s) and s[loc-1] == '\n' else loc - s.rfind("\n", 0, loc) 

1224 

1225def lineno(loc, strg): 

1226 """Returns current line number within a string, counting newlines as line separators. 

1227 The first line is number 1. 

1228 

1229 Note - the default parsing behavior is to expand tabs in the input string 

1230 before starting the parsing process. See :class:`ParserElement.parseString` 

1231 for more information on parsing strings containing ``<TAB>`` s, and 

1232 suggested methods to maintain a consistent view of the parsed string, the 

1233 parse location, and line and column positions within the parsed string. 

1234 """ 

1235 return strg.count("\n", 0, loc) + 1 

1236 

1237def line(loc, strg): 

1238 """Returns the line of text containing loc within a string, counting newlines as line separators. 

1239 """ 

1240 lastCR = strg.rfind("\n", 0, loc) 

1241 nextCR = strg.find("\n", loc) 

1242 if nextCR >= 0: 

1243 return strg[lastCR + 1:nextCR] 

1244 else: 

1245 return strg[lastCR + 1:] 

1246 

1247def _defaultStartDebugAction(instring, loc, expr): 

1248 print(("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % (lineno(loc, instring), col(loc, instring)))) 

1249 

1250def _defaultSuccessDebugAction(instring, startloc, endloc, expr, toks): 

1251 print("Matched " + _ustr(expr) + " -> " + str(toks.asList())) 

1252 

1253def _defaultExceptionDebugAction(instring, loc, expr, exc): 

1254 print("Exception raised:" + _ustr(exc)) 

1255 

1256def nullDebugAction(*args): 

1257 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 

1258 pass 

1259 

1260# Only works on Python 3.x - nonlocal is toxic to Python 2 installs 

1261#~ 'decorator to trim function calls to match the arity of the target' 

1262#~ def _trim_arity(func, maxargs=3): 

1263 #~ if func in singleArgBuiltins: 

1264 #~ return lambda s,l,t: func(t) 

1265 #~ limit = 0 

1266 #~ foundArity = False 

1267 #~ def wrapper(*args): 

1268 #~ nonlocal limit,foundArity 

1269 #~ while 1: 

1270 #~ try: 

1271 #~ ret = func(*args[limit:]) 

1272 #~ foundArity = True 

1273 #~ return ret 

1274 #~ except TypeError: 

1275 #~ if limit == maxargs or foundArity: 

1276 #~ raise 

1277 #~ limit += 1 

1278 #~ continue 

1279 #~ return wrapper 

1280 

1281# this version is Python 2.x-3.x cross-compatible 

1282'decorator to trim function calls to match the arity of the target' 

1283def _trim_arity(func, maxargs=2): 

1284 if func in singleArgBuiltins: 

1285 return lambda s, l, t: func(t) 

1286 limit = [0] 

1287 foundArity = [False] 

1288 

1289 # traceback return data structure changed in Py3.5 - normalize back to plain tuples 

1290 if system_version[:2] >= (3, 5): 

1291 def extract_stack(limit=0): 

1292 # special handling for Python 3.5.0 - extra deep call stack by 1 

1293 offset = -3 if system_version == (3, 5, 0) else -2 

1294 frame_summary = traceback.extract_stack(limit=-offset + limit - 1)[offset] 

1295 return [frame_summary[:2]] 

1296 def extract_tb(tb, limit=0): 

1297 frames = traceback.extract_tb(tb, limit=limit) 

1298 frame_summary = frames[-1] 

1299 return [frame_summary[:2]] 

1300 else: 

1301 extract_stack = traceback.extract_stack 

1302 extract_tb = traceback.extract_tb 

1303 

1304 # synthesize what would be returned by traceback.extract_stack at the call to 

1305 # user's parse action 'func', so that we don't incur call penalty at parse time 

1306 

1307 LINE_DIFF = 6 

1308 # IF ANY CODE CHANGES, EVEN JUST COMMENTS OR BLANK LINES, BETWEEN THE NEXT LINE AND 

1309 # THE CALL TO FUNC INSIDE WRAPPER, LINE_DIFF MUST BE MODIFIED!!!! 

1310 this_line = extract_stack(limit=2)[-1] 

1311 pa_call_line_synth = (this_line[0], this_line[1] + LINE_DIFF) 

1312 

1313 def wrapper(*args): 

1314 while 1: 

1315 try: 

1316 ret = func(*args[limit[0]:]) 

1317 foundArity[0] = True 

1318 return ret 

1319 except TypeError: 

1320 # re-raise TypeErrors if they did not come from our arity testing 

1321 if foundArity[0]: 

1322 raise 

1323 else: 

1324 try: 

1325 tb = sys.exc_info()[-1] 

1326 if not extract_tb(tb, limit=2)[-1][:2] == pa_call_line_synth: 

1327 raise 

1328 finally: 

1329 try: 

1330 del tb 

1331 except NameError: 

1332 pass 

1333 

1334 if limit[0] <= maxargs: 

1335 limit[0] += 1 

1336 continue 

1337 raise 

1338 

1339 # copy func name to wrapper for sensible debug output 

1340 func_name = "<parse action>" 

1341 try: 

1342 func_name = getattr(func, '__name__', 

1343 getattr(func, '__class__').__name__) 

1344 except Exception: 

1345 func_name = str(func) 

1346 wrapper.__name__ = func_name 

1347 

1348 return wrapper 

1349 

1350 

1351class ParserElement(object): 

1352 """Abstract base level parser element class.""" 

1353 DEFAULT_WHITE_CHARS = " \n\t\r" 

1354 verbose_stacktrace = False 

1355 

1356 @staticmethod 

1357 def setDefaultWhitespaceChars(chars): 

1358 r""" 

1359 Overrides the default whitespace chars 

1360 

1361 Example:: 

1362 

1363 # default whitespace chars are space, <TAB> and newline 

1364 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def', 'ghi', 'jkl'] 

1365 

1366 # change to just treat newline as significant 

1367 ParserElement.setDefaultWhitespaceChars(" \t") 

1368 OneOrMore(Word(alphas)).parseString("abc def\nghi jkl") # -> ['abc', 'def'] 

1369 """ 

1370 ParserElement.DEFAULT_WHITE_CHARS = chars 

1371 

1372 @staticmethod 

1373 def inlineLiteralsUsing(cls): 

1374 """ 

1375 Set class to be used for inclusion of string literals into a parser. 

1376 

1377 Example:: 

1378 

1379 # default literal class used is Literal 

1380 integer = Word(nums) 

1381 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

1382 

1383 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

1384 

1385 

1386 # change to Suppress 

1387 ParserElement.inlineLiteralsUsing(Suppress) 

1388 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

1389 

1390 date_str.parseString("1999/12/31") # -> ['1999', '12', '31'] 

1391 """ 

1392 ParserElement._literalStringClass = cls 

1393 

1394 @classmethod 

1395 def _trim_traceback(cls, tb): 

1396 while tb.tb_next: 

1397 tb = tb.tb_next 

1398 return tb 

1399 

1400 def __init__(self, savelist=False): 

1401 self.parseAction = list() 

1402 self.failAction = None 

1403 # ~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 

1404 self.strRepr = None 

1405 self.resultsName = None 

1406 self.saveAsList = savelist 

1407 self.skipWhitespace = True 

1408 self.whiteChars = set(ParserElement.DEFAULT_WHITE_CHARS) 

1409 self.copyDefaultWhiteChars = True 

1410 self.mayReturnEmpty = False # used when checking for left-recursion 

1411 self.keepTabs = False 

1412 self.ignoreExprs = list() 

1413 self.debug = False 

1414 self.streamlined = False 

1415 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 

1416 self.errmsg = "" 

1417 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 

1418 self.debugActions = (None, None, None) # custom debug actions 

1419 self.re = None 

1420 self.callPreparse = True # used to avoid redundant calls to preParse 

1421 self.callDuringTry = False 

1422 

1423 def copy(self): 

1424 """ 

1425 Make a copy of this :class:`ParserElement`. Useful for defining 

1426 different parse actions for the same parsing pattern, using copies of 

1427 the original parse element. 

1428 

1429 Example:: 

1430 

1431 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 

1432 integerK = integer.copy().addParseAction(lambda toks: toks[0] * 1024) + Suppress("K") 

1433 integerM = integer.copy().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

1434 

1435 print(OneOrMore(integerK | integerM | integer).parseString("5K 100 640K 256M")) 

1436 

1437 prints:: 

1438 

1439 [5120, 100, 655360, 268435456] 

1440 

1441 Equivalent form of ``expr.copy()`` is just ``expr()``:: 

1442 

1443 integerM = integer().addParseAction(lambda toks: toks[0] * 1024 * 1024) + Suppress("M") 

1444 """ 

1445 cpy = copy.copy(self) 

1446 cpy.parseAction = self.parseAction[:] 

1447 cpy.ignoreExprs = self.ignoreExprs[:] 

1448 if self.copyDefaultWhiteChars: 

1449 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 

1450 return cpy 

1451 

1452 def setName(self, name): 

1453 """ 

1454 Define name for this expression, makes debugging and exception messages clearer. 

1455 

1456 Example:: 

1457 

1458 Word(nums).parseString("ABC") # -> Exception: Expected W:(0123...) (at char 0), (line:1, col:1) 

1459 Word(nums).setName("integer").parseString("ABC") # -> Exception: Expected integer (at char 0), (line:1, col:1) 

1460 """ 

1461 self.name = name 

1462 self.errmsg = "Expected " + self.name 

1463 if __diag__.enable_debug_on_named_expressions: 

1464 self.setDebug() 

1465 return self 

1466 

1467 def setResultsName(self, name, listAllMatches=False): 

1468 """ 

1469 Define name for referencing matching tokens as a nested attribute 

1470 of the returned parse results. 

1471 NOTE: this returns a *copy* of the original :class:`ParserElement` object; 

1472 this is so that the client can define a basic element, such as an 

1473 integer, and reference it in multiple places with different names. 

1474 

1475 You can also set results names using the abbreviated syntax, 

1476 ``expr("name")`` in place of ``expr.setResultsName("name")`` 

1477 - see :class:`__call__`. 

1478 

1479 Example:: 

1480 

1481 date_str = (integer.setResultsName("year") + '/' 

1482 + integer.setResultsName("month") + '/' 

1483 + integer.setResultsName("day")) 

1484 

1485 # equivalent form: 

1486 date_str = integer("year") + '/' + integer("month") + '/' + integer("day") 

1487 """ 

1488 return self._setResultsName(name, listAllMatches) 

1489 

1490 def _setResultsName(self, name, listAllMatches=False): 

1491 newself = self.copy() 

1492 if name.endswith("*"): 

1493 name = name[:-1] 

1494 listAllMatches = True 

1495 newself.resultsName = name 

1496 newself.modalResults = not listAllMatches 

1497 return newself 

1498 

1499 def setBreak(self, breakFlag=True): 

1500 """Method to invoke the Python pdb debugger when this element is 

1501 about to be parsed. Set ``breakFlag`` to True to enable, False to 

1502 disable. 

1503 """ 

1504 if breakFlag: 

1505 _parseMethod = self._parse 

1506 def breaker(instring, loc, doActions=True, callPreParse=True): 

1507 import pdb 

1508 # this call to pdb.set_trace() is intentional, not a checkin error 

1509 pdb.set_trace() 

1510 return _parseMethod(instring, loc, doActions, callPreParse) 

1511 breaker._originalParseMethod = _parseMethod 

1512 self._parse = breaker 

1513 else: 

1514 if hasattr(self._parse, "_originalParseMethod"): 

1515 self._parse = self._parse._originalParseMethod 

1516 return self 

1517 

1518 def setParseAction(self, *fns, **kwargs): 

1519 """ 

1520 Define one or more actions to perform when successfully matching parse element definition. 

1521 Parse action fn is a callable method with 0-3 arguments, called as ``fn(s, loc, toks)`` , 

1522 ``fn(loc, toks)`` , ``fn(toks)`` , or just ``fn()`` , where: 

1523 

1524 - s = the original string being parsed (see note below) 

1525 - loc = the location of the matching substring 

1526 - toks = a list of the matched tokens, packaged as a :class:`ParseResults` object 

1527 

1528 If the functions in fns modify the tokens, they can return them as the return 

1529 value from fn, and the modified list of tokens will replace the original. 

1530 Otherwise, fn does not need to return any value. 

1531 

1532 If None is passed as the parse action, all previously added parse actions for this 

1533 expression are cleared. 

1534 

1535 Optional keyword arguments: 

1536 - callDuringTry = (default= ``False``) indicate if parse action should be run during lookaheads and alternate testing 

1537 

1538 Note: the default parsing behavior is to expand tabs in the input string 

1539 before starting the parsing process. See :class:`parseString for more 

1540 information on parsing strings containing ``<TAB>`` s, and suggested 

1541 methods to maintain a consistent view of the parsed string, the parse 

1542 location, and line and column positions within the parsed string. 

1543 

1544 Example:: 

1545 

1546 integer = Word(nums) 

1547 date_str = integer + '/' + integer + '/' + integer 

1548 

1549 date_str.parseString("1999/12/31") # -> ['1999', '/', '12', '/', '31'] 

1550 

1551 # use parse action to convert to ints at parse time 

1552 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 

1553 date_str = integer + '/' + integer + '/' + integer 

1554 

1555 # note that integer fields are now ints, not strings 

1556 date_str.parseString("1999/12/31") # -> [1999, '/', 12, '/', 31] 

1557 """ 

1558 if list(fns) == [None,]: 

1559 self.parseAction = [] 

1560 else: 

1561 if not all(callable(fn) for fn in fns): 

1562 raise TypeError("parse actions must be callable") 

1563 self.parseAction = list(map(_trim_arity, list(fns))) 

1564 self.callDuringTry = kwargs.get("callDuringTry", False) 

1565 return self 

1566 

1567 def addParseAction(self, *fns, **kwargs): 

1568 """ 

1569 Add one or more parse actions to expression's list of parse actions. See :class:`setParseAction`. 

1570 

1571 See examples in :class:`copy`. 

1572 """ 

1573 self.parseAction += list(map(_trim_arity, list(fns))) 

1574 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 

1575 return self 

1576 

1577 def addCondition(self, *fns, **kwargs): 

1578 """Add a boolean predicate function to expression's list of parse actions. See 

1579 :class:`setParseAction` for function call signatures. Unlike ``setParseAction``, 

1580 functions passed to ``addCondition`` need to return boolean success/fail of the condition. 

1581 

1582 Optional keyword arguments: 

1583 - message = define a custom message to be used in the raised exception 

1584 - fatal = if True, will raise ParseFatalException to stop parsing immediately; otherwise will raise ParseException 

1585 

1586 Example:: 

1587 

1588 integer = Word(nums).setParseAction(lambda toks: int(toks[0])) 

1589 year_int = integer.copy() 

1590 year_int.addCondition(lambda toks: toks[0] >= 2000, message="Only support years 2000 and later") 

1591 date_str = year_int + '/' + integer + '/' + integer 

1592 

1593 result = date_str.parseString("1999/12/31") # -> Exception: Only support years 2000 and later (at char 0), (line:1, col:1) 

1594 """ 

1595 for fn in fns: 

1596 self.parseAction.append(conditionAsParseAction(fn, message=kwargs.get('message'), 

1597 fatal=kwargs.get('fatal', False))) 

1598 

1599 self.callDuringTry = self.callDuringTry or kwargs.get("callDuringTry", False) 

1600 return self 

1601 

1602 def setFailAction(self, fn): 

1603 """Define action to perform if parsing fails at this expression. 

1604 Fail acton fn is a callable function that takes the arguments 

1605 ``fn(s, loc, expr, err)`` where: 

1606 - s = string being parsed 

1607 - loc = location where expression match was attempted and failed 

1608 - expr = the parse expression that failed 

1609 - err = the exception thrown 

1610 The function returns no value. It may throw :class:`ParseFatalException` 

1611 if it is desired to stop parsing immediately.""" 

1612 self.failAction = fn 

1613 return self 

1614 

1615 def _skipIgnorables(self, instring, loc): 

1616 exprsFound = True 

1617 while exprsFound: 

1618 exprsFound = False 

1619 for e in self.ignoreExprs: 

1620 try: 

1621 while 1: 

1622 loc, dummy = e._parse(instring, loc) 

1623 exprsFound = True 

1624 except ParseException: 

1625 pass 

1626 return loc 

1627 

1628 def preParse(self, instring, loc): 

1629 if self.ignoreExprs: 

1630 loc = self._skipIgnorables(instring, loc) 

1631 

1632 if self.skipWhitespace: 

1633 wt = self.whiteChars 

1634 instrlen = len(instring) 

1635 while loc < instrlen and instring[loc] in wt: 

1636 loc += 1 

1637 

1638 return loc 

1639 

1640 def parseImpl(self, instring, loc, doActions=True): 

1641 return loc, [] 

1642 

1643 def postParse(self, instring, loc, tokenlist): 

1644 return tokenlist 

1645 

1646 # ~ @profile 

1647 def _parseNoCache(self, instring, loc, doActions=True, callPreParse=True): 

1648 TRY, MATCH, FAIL = 0, 1, 2 

1649 debugging = (self.debug) # and doActions) 

1650 

1651 if debugging or self.failAction: 

1652 # ~ print ("Match", self, "at loc", loc, "(%d, %d)" % (lineno(loc, instring), col(loc, instring))) 

1653 if self.debugActions[TRY]: 

1654 self.debugActions[TRY](instring, loc, self) 

1655 try: 

1656 if callPreParse and self.callPreparse: 

1657 preloc = self.preParse(instring, loc) 

1658 else: 

1659 preloc = loc 

1660 tokensStart = preloc 

1661 if self.mayIndexError or preloc >= len(instring): 

1662 try: 

1663 loc, tokens = self.parseImpl(instring, preloc, doActions) 

1664 except IndexError: 

1665 raise ParseException(instring, len(instring), self.errmsg, self) 

1666 else: 

1667 loc, tokens = self.parseImpl(instring, preloc, doActions) 

1668 except Exception as err: 

1669 # ~ print ("Exception raised:", err) 

1670 if self.debugActions[FAIL]: 

1671 self.debugActions[FAIL](instring, tokensStart, self, err) 

1672 if self.failAction: 

1673 self.failAction(instring, tokensStart, self, err) 

1674 raise 

1675 else: 

1676 if callPreParse and self.callPreparse: 

1677 preloc = self.preParse(instring, loc) 

1678 else: 

1679 preloc = loc 

1680 tokensStart = preloc 

1681 if self.mayIndexError or preloc >= len(instring): 

1682 try: 

1683 loc, tokens = self.parseImpl(instring, preloc, doActions) 

1684 except IndexError: 

1685 raise ParseException(instring, len(instring), self.errmsg, self) 

1686 else: 

1687 loc, tokens = self.parseImpl(instring, preloc, doActions) 

1688 

1689 tokens = self.postParse(instring, loc, tokens) 

1690 

1691 retTokens = ParseResults(tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults) 

1692 if self.parseAction and (doActions or self.callDuringTry): 

1693 if debugging: 

1694 try: 

1695 for fn in self.parseAction: 

1696 try: 

1697 tokens = fn(instring, tokensStart, retTokens) 

1698 except IndexError as parse_action_exc: 

1699 exc = ParseException("exception raised in parse action") 

1700 exc.__cause__ = parse_action_exc 

1701 raise exc 

1702 

1703 if tokens is not None and tokens is not retTokens: 

1704 retTokens = ParseResults(tokens, 

1705 self.resultsName, 

1706 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 

1707 modal=self.modalResults) 

1708 except Exception as err: 

1709 # ~ print "Exception raised in user parse action:", err 

1710 if self.debugActions[FAIL]: 

1711 self.debugActions[FAIL](instring, tokensStart, self, err) 

1712 raise 

1713 else: 

1714 for fn in self.parseAction: 

1715 try: 

1716 tokens = fn(instring, tokensStart, retTokens) 

1717 except IndexError as parse_action_exc: 

1718 exc = ParseException("exception raised in parse action") 

1719 exc.__cause__ = parse_action_exc 

1720 raise exc 

1721 

1722 if tokens is not None and tokens is not retTokens: 

1723 retTokens = ParseResults(tokens, 

1724 self.resultsName, 

1725 asList=self.saveAsList and isinstance(tokens, (ParseResults, list)), 

1726 modal=self.modalResults) 

1727 if debugging: 

1728 # ~ print ("Matched", self, "->", retTokens.asList()) 

1729 if self.debugActions[MATCH]: 

1730 self.debugActions[MATCH](instring, tokensStart, loc, self, retTokens) 

1731 

1732 return loc, retTokens 

1733 

1734 def tryParse(self, instring, loc): 

1735 try: 

1736 return self._parse(instring, loc, doActions=False)[0] 

1737 except ParseFatalException: 

1738 raise ParseException(instring, loc, self.errmsg, self) 

1739 

1740 def canParseNext(self, instring, loc): 

1741 try: 

1742 self.tryParse(instring, loc) 

1743 except (ParseException, IndexError): 

1744 return False 

1745 else: 

1746 return True 

1747 

1748 class _UnboundedCache(object): 

1749 def __init__(self): 

1750 cache = {} 

1751 self.not_in_cache = not_in_cache = object() 

1752 

1753 def get(self, key): 

1754 return cache.get(key, not_in_cache) 

1755 

1756 def set(self, key, value): 

1757 cache[key] = value 

1758 

1759 def clear(self): 

1760 cache.clear() 

1761 

1762 def cache_len(self): 

1763 return len(cache) 

1764 

1765 self.get = types.MethodType(get, self) 

1766 self.set = types.MethodType(set, self) 

1767 self.clear = types.MethodType(clear, self) 

1768 self.__len__ = types.MethodType(cache_len, self) 

1769 

1770 if _OrderedDict is not None: 

1771 class _FifoCache(object): 

1772 def __init__(self, size): 

1773 self.not_in_cache = not_in_cache = object() 

1774 

1775 cache = _OrderedDict() 

1776 

1777 def get(self, key): 

1778 return cache.get(key, not_in_cache) 

1779 

1780 def set(self, key, value): 

1781 cache[key] = value 

1782 while len(cache) > size: 

1783 try: 

1784 cache.popitem(False) 

1785 except KeyError: 

1786 pass 

1787 

1788 def clear(self): 

1789 cache.clear() 

1790 

1791 def cache_len(self): 

1792 return len(cache) 

1793 

1794 self.get = types.MethodType(get, self) 

1795 self.set = types.MethodType(set, self) 

1796 self.clear = types.MethodType(clear, self) 

1797 self.__len__ = types.MethodType(cache_len, self) 

1798 

1799 else: 

1800 class _FifoCache(object): 

1801 def __init__(self, size): 

1802 self.not_in_cache = not_in_cache = object() 

1803 

1804 cache = {} 

1805 key_fifo = collections.deque([], size) 

1806 

1807 def get(self, key): 

1808 return cache.get(key, not_in_cache) 

1809 

1810 def set(self, key, value): 

1811 cache[key] = value 

1812 while len(key_fifo) > size: 

1813 cache.pop(key_fifo.popleft(), None) 

1814 key_fifo.append(key) 

1815 

1816 def clear(self): 

1817 cache.clear() 

1818 key_fifo.clear() 

1819 

1820 def cache_len(self): 

1821 return len(cache) 

1822 

1823 self.get = types.MethodType(get, self) 

1824 self.set = types.MethodType(set, self) 

1825 self.clear = types.MethodType(clear, self) 

1826 self.__len__ = types.MethodType(cache_len, self) 

1827 

1828 # argument cache for optimizing repeated calls when backtracking through recursive expressions 

1829 packrat_cache = {} # this is set later by enabledPackrat(); this is here so that resetCache() doesn't fail 

1830 packrat_cache_lock = RLock() 

1831 packrat_cache_stats = [0, 0] 

1832 

1833 # this method gets repeatedly called during backtracking with the same arguments - 

1834 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression 

1835 def _parseCache(self, instring, loc, doActions=True, callPreParse=True): 

1836 HIT, MISS = 0, 1 

1837 lookup = (self, instring, loc, callPreParse, doActions) 

1838 with ParserElement.packrat_cache_lock: 

1839 cache = ParserElement.packrat_cache 

1840 value = cache.get(lookup) 

1841 if value is cache.not_in_cache: 

1842 ParserElement.packrat_cache_stats[MISS] += 1 

1843 try: 

1844 value = self._parseNoCache(instring, loc, doActions, callPreParse) 

1845 except ParseBaseException as pe: 

1846 # cache a copy of the exception, without the traceback 

1847 cache.set(lookup, pe.__class__(*pe.args)) 

1848 raise 

1849 else: 

1850 cache.set(lookup, (value[0], value[1].copy())) 

1851 return value 

1852 else: 

1853 ParserElement.packrat_cache_stats[HIT] += 1 

1854 if isinstance(value, Exception): 

1855 raise value 

1856 return value[0], value[1].copy() 

1857 

1858 _parse = _parseNoCache 

1859 

1860 @staticmethod 

1861 def resetCache(): 

1862 ParserElement.packrat_cache.clear() 

1863 ParserElement.packrat_cache_stats[:] = [0] * len(ParserElement.packrat_cache_stats) 

1864 

1865 _packratEnabled = False 

1866 @staticmethod 

1867 def enablePackrat(cache_size_limit=128): 

1868 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 

1869 Repeated parse attempts at the same string location (which happens 

1870 often in many complex grammars) can immediately return a cached value, 

1871 instead of re-executing parsing/validating code. Memoizing is done of 

1872 both valid results and parsing exceptions. 

1873 

1874 Parameters: 

1875 

1876 - cache_size_limit - (default= ``128``) - if an integer value is provided 

1877 will limit the size of the packrat cache; if None is passed, then 

1878 the cache size will be unbounded; if 0 is passed, the cache will 

1879 be effectively disabled. 

1880 

1881 This speedup may break existing programs that use parse actions that 

1882 have side-effects. For this reason, packrat parsing is disabled when 

1883 you first import pyparsing. To activate the packrat feature, your 

1884 program must call the class method :class:`ParserElement.enablePackrat`. 

1885 For best results, call ``enablePackrat()`` immediately after 

1886 importing pyparsing. 

1887 

1888 Example:: 

1889 

1890 import pyparsing 

1891 pyparsing.ParserElement.enablePackrat() 

1892 """ 

1893 if not ParserElement._packratEnabled: 

1894 ParserElement._packratEnabled = True 

1895 if cache_size_limit is None: 

1896 ParserElement.packrat_cache = ParserElement._UnboundedCache() 

1897 else: 

1898 ParserElement.packrat_cache = ParserElement._FifoCache(cache_size_limit) 

1899 ParserElement._parse = ParserElement._parseCache 

1900 

1901 def parseString(self, instring, parseAll=False): 

1902 """ 

1903 Execute the parse expression with the given string. 

1904 This is the main interface to the client code, once the complete 

1905 expression has been built. 

1906 

1907 Returns the parsed data as a :class:`ParseResults` object, which may be 

1908 accessed as a list, or as a dict or object with attributes if the given parser 

1909 includes results names. 

1910 

1911 If you want the grammar to require that the entire input string be 

1912 successfully parsed, then set ``parseAll`` to True (equivalent to ending 

1913 the grammar with ``StringEnd()``). 

1914 

1915 Note: ``parseString`` implicitly calls ``expandtabs()`` on the input string, 

1916 in order to report proper column numbers in parse actions. 

1917 If the input string contains tabs and 

1918 the grammar uses parse actions that use the ``loc`` argument to index into the 

1919 string being parsed, you can ensure you have a consistent view of the input 

1920 string by: 

1921 

1922 - calling ``parseWithTabs`` on your grammar before calling ``parseString`` 

1923 (see :class:`parseWithTabs`) 

1924 - define your parse action using the full ``(s, loc, toks)`` signature, and 

1925 reference the input string using the parse action's ``s`` argument 

1926 - explictly expand the tabs in your input string before calling 

1927 ``parseString`` 

1928 

1929 Example:: 

1930 

1931 Word('a').parseString('aaaaabaaa') # -> ['aaaaa'] 

1932 Word('a').parseString('aaaaabaaa', parseAll=True) # -> Exception: Expected end of text 

1933 """ 

1934 ParserElement.resetCache() 

1935 if not self.streamlined: 

1936 self.streamline() 

1937 # ~ self.saveAsList = True 

1938 for e in self.ignoreExprs: 

1939 e.streamline() 

1940 if not self.keepTabs: 

1941 instring = instring.expandtabs() 

1942 try: 

1943 loc, tokens = self._parse(instring, 0) 

1944 if parseAll: 

1945 loc = self.preParse(instring, loc) 

1946 se = Empty() + StringEnd() 

1947 se._parse(instring, loc) 

1948 except ParseBaseException as exc: 

1949 if ParserElement.verbose_stacktrace: 

1950 raise 

1951 else: 

1952 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

1953 if getattr(exc, '__traceback__', None) is not None: 

1954 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 

1955 raise exc 

1956 else: 

1957 return tokens 

1958 

1959 def scanString(self, instring, maxMatches=_MAX_INT, overlap=False): 

1960 """ 

1961 Scan the input string for expression matches. Each match will return the 

1962 matching tokens, start location, and end location. May be called with optional 

1963 ``maxMatches`` argument, to clip scanning after 'n' matches are found. If 

1964 ``overlap`` is specified, then overlapping matches will be reported. 

1965 

1966 Note that the start and end locations are reported relative to the string 

1967 being parsed. See :class:`parseString` for more information on parsing 

1968 strings with embedded tabs. 

1969 

1970 Example:: 

1971 

1972 source = "sldjf123lsdjjkf345sldkjf879lkjsfd987" 

1973 print(source) 

1974 for tokens, start, end in Word(alphas).scanString(source): 

1975 print(' '*start + '^'*(end-start)) 

1976 print(' '*start + tokens[0]) 

1977 

1978 prints:: 

1979 

1980 sldjf123lsdjjkf345sldkjf879lkjsfd987 

1981 ^^^^^ 

1982 sldjf 

1983 ^^^^^^^ 

1984 lsdjjkf 

1985 ^^^^^^ 

1986 sldkjf 

1987 ^^^^^^ 

1988 lkjsfd 

1989 """ 

1990 if not self.streamlined: 

1991 self.streamline() 

1992 for e in self.ignoreExprs: 

1993 e.streamline() 

1994 

1995 if not self.keepTabs: 

1996 instring = _ustr(instring).expandtabs() 

1997 instrlen = len(instring) 

1998 loc = 0 

1999 preparseFn = self.preParse 

2000 parseFn = self._parse 

2001 ParserElement.resetCache() 

2002 matches = 0 

2003 try: 

2004 while loc <= instrlen and matches < maxMatches: 

2005 try: 

2006 preloc = preparseFn(instring, loc) 

2007 nextLoc, tokens = parseFn(instring, preloc, callPreParse=False) 

2008 except ParseException: 

2009 loc = preloc + 1 

2010 else: 

2011 if nextLoc > loc: 

2012 matches += 1 

2013 yield tokens, preloc, nextLoc 

2014 if overlap: 

2015 nextloc = preparseFn(instring, loc) 

2016 if nextloc > loc: 

2017 loc = nextLoc 

2018 else: 

2019 loc += 1 

2020 else: 

2021 loc = nextLoc 

2022 else: 

2023 loc = preloc + 1 

2024 except ParseBaseException as exc: 

2025 if ParserElement.verbose_stacktrace: 

2026 raise 

2027 else: 

2028 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

2029 if getattr(exc, '__traceback__', None) is not None: 

2030 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 

2031 raise exc 

2032 

2033 def transformString(self, instring): 

2034 """ 

2035 Extension to :class:`scanString`, to modify matching text with modified tokens that may 

2036 be returned from a parse action. To use ``transformString``, define a grammar and 

2037 attach a parse action to it that modifies the returned token list. 

2038 Invoking ``transformString()`` on a target string will then scan for matches, 

2039 and replace the matched text patterns according to the logic in the parse 

2040 action. ``transformString()`` returns the resulting transformed string. 

2041 

2042 Example:: 

2043 

2044 wd = Word(alphas) 

2045 wd.setParseAction(lambda toks: toks[0].title()) 

2046 

2047 print(wd.transformString("now is the winter of our discontent made glorious summer by this sun of york.")) 

2048 

2049 prints:: 

2050 

2051 Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York. 

2052 """ 

2053 out = [] 

2054 lastE = 0 

2055 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 

2056 # keep string locs straight between transformString and scanString 

2057 self.keepTabs = True 

2058 try: 

2059 for t, s, e in self.scanString(instring): 

2060 out.append(instring[lastE:s]) 

2061 if t: 

2062 if isinstance(t, ParseResults): 

2063 out += t.asList() 

2064 elif isinstance(t, list): 

2065 out += t 

2066 else: 

2067 out.append(t) 

2068 lastE = e 

2069 out.append(instring[lastE:]) 

2070 out = [o for o in out if o] 

2071 return "".join(map(_ustr, _flatten(out))) 

2072 except ParseBaseException as exc: 

2073 if ParserElement.verbose_stacktrace: 

2074 raise 

2075 else: 

2076 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

2077 if getattr(exc, '__traceback__', None) is not None: 

2078 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 

2079 raise exc 

2080 

2081 def searchString(self, instring, maxMatches=_MAX_INT): 

2082 """ 

2083 Another extension to :class:`scanString`, simplifying the access to the tokens found 

2084 to match the given parse expression. May be called with optional 

2085 ``maxMatches`` argument, to clip searching after 'n' matches are found. 

2086 

2087 Example:: 

2088 

2089 # a capitalized word starts with an uppercase letter, followed by zero or more lowercase letters 

2090 cap_word = Word(alphas.upper(), alphas.lower()) 

2091 

2092 print(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity")) 

2093 

2094 # the sum() builtin can be used to merge results into a single ParseResults object 

2095 print(sum(cap_word.searchString("More than Iron, more than Lead, more than Gold I need Electricity"))) 

2096 

2097 prints:: 

2098 

2099 [['More'], ['Iron'], ['Lead'], ['Gold'], ['I'], ['Electricity']] 

2100 ['More', 'Iron', 'Lead', 'Gold', 'I', 'Electricity'] 

2101 """ 

2102 try: 

2103 return ParseResults([t for t, s, e in self.scanString(instring, maxMatches)]) 

2104 except ParseBaseException as exc: 

2105 if ParserElement.verbose_stacktrace: 

2106 raise 

2107 else: 

2108 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

2109 if getattr(exc, '__traceback__', None) is not None: 

2110 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 

2111 raise exc 

2112 

2113 def split(self, instring, maxsplit=_MAX_INT, includeSeparators=False): 

2114 """ 

2115 Generator method to split a string using the given expression as a separator. 

2116 May be called with optional ``maxsplit`` argument, to limit the number of splits; 

2117 and the optional ``includeSeparators`` argument (default= ``False``), if the separating 

2118 matching text should be included in the split results. 

2119 

2120 Example:: 

2121 

2122 punc = oneOf(list(".,;:/-!?")) 

2123 print(list(punc.split("This, this?, this sentence, is badly punctuated!"))) 

2124 

2125 prints:: 

2126 

2127 ['This', ' this', '', ' this sentence', ' is badly punctuated', ''] 

2128 """ 

2129 splits = 0 

2130 last = 0 

2131 for t, s, e in self.scanString(instring, maxMatches=maxsplit): 

2132 yield instring[last:s] 

2133 if includeSeparators: 

2134 yield t[0] 

2135 last = e 

2136 yield instring[last:] 

2137 

2138 def __add__(self, other): 

2139 """ 

2140 Implementation of + operator - returns :class:`And`. Adding strings to a ParserElement 

2141 converts them to :class:`Literal`s by default. 

2142 

2143 Example:: 

2144 

2145 greet = Word(alphas) + "," + Word(alphas) + "!" 

2146 hello = "Hello, World!" 

2147 print (hello, "->", greet.parseString(hello)) 

2148 

2149 prints:: 

2150 

2151 Hello, World! -> ['Hello', ',', 'World', '!'] 

2152 

2153 ``...`` may be used as a parse expression as a short form of :class:`SkipTo`. 

2154 

2155 Literal('start') + ... + Literal('end') 

2156 

2157 is equivalent to: 

2158 

2159 Literal('start') + SkipTo('end')("_skipped*") + Literal('end') 

2160 

2161 Note that the skipped text is returned with '_skipped' as a results name, 

2162 and to support having multiple skips in the same parser, the value returned is 

2163 a list of all skipped text. 

2164 """ 

2165 if other is Ellipsis: 

2166 return _PendingSkip(self) 

2167 

2168 if isinstance(other, basestring): 

2169 other = self._literalStringClass(other) 

2170 if not isinstance(other, ParserElement): 

2171 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2172 SyntaxWarning, stacklevel=2) 

2173 return None 

2174 return And([self, other]) 

2175 

2176 def __radd__(self, other): 

2177 """ 

2178 Implementation of + operator when left operand is not a :class:`ParserElement` 

2179 """ 

2180 if other is Ellipsis: 

2181 return SkipTo(self)("_skipped*") + self 

2182 

2183 if isinstance(other, basestring): 

2184 other = self._literalStringClass(other) 

2185 if not isinstance(other, ParserElement): 

2186 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2187 SyntaxWarning, stacklevel=2) 

2188 return None 

2189 return other + self 

2190 

2191 def __sub__(self, other): 

2192 """ 

2193 Implementation of - operator, returns :class:`And` with error stop 

2194 """ 

2195 if isinstance(other, basestring): 

2196 other = self._literalStringClass(other) 

2197 if not isinstance(other, ParserElement): 

2198 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2199 SyntaxWarning, stacklevel=2) 

2200 return None 

2201 return self + And._ErrorStop() + other 

2202 

2203 def __rsub__(self, other): 

2204 """ 

2205 Implementation of - operator when left operand is not a :class:`ParserElement` 

2206 """ 

2207 if isinstance(other, basestring): 

2208 other = self._literalStringClass(other) 

2209 if not isinstance(other, ParserElement): 

2210 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2211 SyntaxWarning, stacklevel=2) 

2212 return None 

2213 return other - self 

2214 

2215 def __mul__(self, other): 

2216 """ 

2217 Implementation of * operator, allows use of ``expr * 3`` in place of 

2218 ``expr + expr + expr``. Expressions may also me multiplied by a 2-integer 

2219 tuple, similar to ``{min, max}`` multipliers in regular expressions. Tuples 

2220 may also include ``None`` as in: 

2221 - ``expr*(n, None)`` or ``expr*(n, )`` is equivalent 

2222 to ``expr*n + ZeroOrMore(expr)`` 

2223 (read as "at least n instances of ``expr``") 

2224 - ``expr*(None, n)`` is equivalent to ``expr*(0, n)`` 

2225 (read as "0 to n instances of ``expr``") 

2226 - ``expr*(None, None)`` is equivalent to ``ZeroOrMore(expr)`` 

2227 - ``expr*(1, None)`` is equivalent to ``OneOrMore(expr)`` 

2228 

2229 Note that ``expr*(None, n)`` does not raise an exception if 

2230 more than n exprs exist in the input stream; that is, 

2231 ``expr*(None, n)`` does not enforce a maximum number of expr 

2232 occurrences. If this behavior is desired, then write 

2233 ``expr*(None, n) + ~expr`` 

2234 """ 

2235 if other is Ellipsis: 

2236 other = (0, None) 

2237 elif isinstance(other, tuple) and other[:1] == (Ellipsis,): 

2238 other = ((0, ) + other[1:] + (None,))[:2] 

2239 

2240 if isinstance(other, int): 

2241 minElements, optElements = other, 0 

2242 elif isinstance(other, tuple): 

2243 other = tuple(o if o is not Ellipsis else None for o in other) 

2244 other = (other + (None, None))[:2] 

2245 if other[0] is None: 

2246 other = (0, other[1]) 

2247 if isinstance(other[0], int) and other[1] is None: 

2248 if other[0] == 0: 

2249 return ZeroOrMore(self) 

2250 if other[0] == 1: 

2251 return OneOrMore(self) 

2252 else: 

2253 return self * other[0] + ZeroOrMore(self) 

2254 elif isinstance(other[0], int) and isinstance(other[1], int): 

2255 minElements, optElements = other 

2256 optElements -= minElements 

2257 else: 

2258 raise TypeError("cannot multiply 'ParserElement' and ('%s', '%s') objects", type(other[0]), type(other[1])) 

2259 else: 

2260 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 

2261 

2262 if minElements < 0: 

2263 raise ValueError("cannot multiply ParserElement by negative value") 

2264 if optElements < 0: 

2265 raise ValueError("second tuple value must be greater or equal to first tuple value") 

2266 if minElements == optElements == 0: 

2267 raise ValueError("cannot multiply ParserElement by 0 or (0, 0)") 

2268 

2269 if optElements: 

2270 def makeOptionalList(n): 

2271 if n > 1: 

2272 return Optional(self + makeOptionalList(n - 1)) 

2273 else: 

2274 return Optional(self) 

2275 if minElements: 

2276 if minElements == 1: 

2277 ret = self + makeOptionalList(optElements) 

2278 else: 

2279 ret = And([self] * minElements) + makeOptionalList(optElements) 

2280 else: 

2281 ret = makeOptionalList(optElements) 

2282 else: 

2283 if minElements == 1: 

2284 ret = self 

2285 else: 

2286 ret = And([self] * minElements) 

2287 return ret 

2288 

2289 def __rmul__(self, other): 

2290 return self.__mul__(other) 

2291 

2292 def __or__(self, other): 

2293 """ 

2294 Implementation of | operator - returns :class:`MatchFirst` 

2295 """ 

2296 if other is Ellipsis: 

2297 return _PendingSkip(self, must_skip=True) 

2298 

2299 if isinstance(other, basestring): 

2300 other = self._literalStringClass(other) 

2301 if not isinstance(other, ParserElement): 

2302 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2303 SyntaxWarning, stacklevel=2) 

2304 return None 

2305 return MatchFirst([self, other]) 

2306 

2307 def __ror__(self, other): 

2308 """ 

2309 Implementation of | operator when left operand is not a :class:`ParserElement` 

2310 """ 

2311 if isinstance(other, basestring): 

2312 other = self._literalStringClass(other) 

2313 if not isinstance(other, ParserElement): 

2314 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2315 SyntaxWarning, stacklevel=2) 

2316 return None 

2317 return other | self 

2318 

2319 def __xor__(self, other): 

2320 """ 

2321 Implementation of ^ operator - returns :class:`Or` 

2322 """ 

2323 if isinstance(other, basestring): 

2324 other = self._literalStringClass(other) 

2325 if not isinstance(other, ParserElement): 

2326 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2327 SyntaxWarning, stacklevel=2) 

2328 return None 

2329 return Or([self, other]) 

2330 

2331 def __rxor__(self, other): 

2332 """ 

2333 Implementation of ^ operator when left operand is not a :class:`ParserElement` 

2334 """ 

2335 if isinstance(other, basestring): 

2336 other = self._literalStringClass(other) 

2337 if not isinstance(other, ParserElement): 

2338 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2339 SyntaxWarning, stacklevel=2) 

2340 return None 

2341 return other ^ self 

2342 

2343 def __and__(self, other): 

2344 """ 

2345 Implementation of & operator - returns :class:`Each` 

2346 """ 

2347 if isinstance(other, basestring): 

2348 other = self._literalStringClass(other) 

2349 if not isinstance(other, ParserElement): 

2350 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2351 SyntaxWarning, stacklevel=2) 

2352 return None 

2353 return Each([self, other]) 

2354 

2355 def __rand__(self, other): 

2356 """ 

2357 Implementation of & operator when left operand is not a :class:`ParserElement` 

2358 """ 

2359 if isinstance(other, basestring): 

2360 other = self._literalStringClass(other) 

2361 if not isinstance(other, ParserElement): 

2362 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 

2363 SyntaxWarning, stacklevel=2) 

2364 return None 

2365 return other & self 

2366 

2367 def __invert__(self): 

2368 """ 

2369 Implementation of ~ operator - returns :class:`NotAny` 

2370 """ 

2371 return NotAny(self) 

2372 

2373 def __iter__(self): 

2374 # must implement __iter__ to override legacy use of sequential access to __getitem__ to 

2375 # iterate over a sequence 

2376 raise TypeError('%r object is not iterable' % self.__class__.__name__) 

2377 

2378 def __getitem__(self, key): 

2379 """ 

2380 use ``[]`` indexing notation as a short form for expression repetition: 

2381 - ``expr[n]`` is equivalent to ``expr*n`` 

2382 - ``expr[m, n]`` is equivalent to ``expr*(m, n)`` 

2383 - ``expr[n, ...]`` or ``expr[n,]`` is equivalent 

2384 to ``expr*n + ZeroOrMore(expr)`` 

2385 (read as "at least n instances of ``expr``") 

2386 - ``expr[..., n]`` is equivalent to ``expr*(0, n)`` 

2387 (read as "0 to n instances of ``expr``") 

2388 - ``expr[...]`` and ``expr[0, ...]`` are equivalent to ``ZeroOrMore(expr)`` 

2389 - ``expr[1, ...]`` is equivalent to ``OneOrMore(expr)`` 

2390 ``None`` may be used in place of ``...``. 

2391 

2392 Note that ``expr[..., n]`` and ``expr[m, n]``do not raise an exception 

2393 if more than ``n`` ``expr``s exist in the input stream. If this behavior is 

2394 desired, then write ``expr[..., n] + ~expr``. 

2395 """ 

2396 

2397 # convert single arg keys to tuples 

2398 try: 

2399 if isinstance(key, str): 

2400 key = (key,) 

2401 iter(key) 

2402 except TypeError: 

2403 key = (key, key) 

2404 

2405 if len(key) > 2: 

2406 warnings.warn("only 1 or 2 index arguments supported ({0}{1})".format(key[:5], 

2407 '... [{0}]'.format(len(key)) 

2408 if len(key) > 5 else '')) 

2409 

2410 # clip to 2 elements 

2411 ret = self * tuple(key[:2]) 

2412 return ret 

2413 

2414 def __call__(self, name=None): 

2415 """ 

2416 Shortcut for :class:`setResultsName`, with ``listAllMatches=False``. 

2417 

2418 If ``name`` is given with a trailing ``'*'`` character, then ``listAllMatches`` will be 

2419 passed as ``True``. 

2420 

2421 If ``name` is omitted, same as calling :class:`copy`. 

2422 

2423 Example:: 

2424 

2425 # these are equivalent 

2426 userdata = Word(alphas).setResultsName("name") + Word(nums + "-").setResultsName("socsecno") 

2427 userdata = Word(alphas)("name") + Word(nums + "-")("socsecno") 

2428 """ 

2429 if name is not None: 

2430 return self._setResultsName(name) 

2431 else: 

2432 return self.copy() 

2433 

2434 def suppress(self): 

2435 """ 

2436 Suppresses the output of this :class:`ParserElement`; useful to keep punctuation from 

2437 cluttering up returned output. 

2438 """ 

2439 return Suppress(self) 

2440 

2441 def leaveWhitespace(self): 

2442 """ 

2443 Disables the skipping of whitespace before matching the characters in the 

2444 :class:`ParserElement`'s defined pattern. This is normally only used internally by 

2445 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 

2446 """ 

2447 self.skipWhitespace = False 

2448 return self 

2449 

2450 def setWhitespaceChars(self, chars): 

2451 """ 

2452 Overrides the default whitespace chars 

2453 """ 

2454 self.skipWhitespace = True 

2455 self.whiteChars = chars 

2456 self.copyDefaultWhiteChars = False 

2457 return self 

2458 

2459 def parseWithTabs(self): 

2460 """ 

2461 Overrides default behavior to expand ``<TAB>``s to spaces before parsing the input string. 

2462 Must be called before ``parseString`` when the input grammar contains elements that 

2463 match ``<TAB>`` characters. 

2464 """ 

2465 self.keepTabs = True 

2466 return self 

2467 

2468 def ignore(self, other): 

2469 """ 

2470 Define expression to be ignored (e.g., comments) while doing pattern 

2471 matching; may be called repeatedly, to define multiple comment or other 

2472 ignorable patterns. 

2473 

2474 Example:: 

2475 

2476 patt = OneOrMore(Word(alphas)) 

2477 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj'] 

2478 

2479 patt.ignore(cStyleComment) 

2480 patt.parseString('ablaj /* comment */ lskjd') # -> ['ablaj', 'lskjd'] 

2481 """ 

2482 if isinstance(other, basestring): 

2483 other = Suppress(other) 

2484 

2485 if isinstance(other, Suppress): 

2486 if other not in self.ignoreExprs: 

2487 self.ignoreExprs.append(other) 

2488 else: 

2489 self.ignoreExprs.append(Suppress(other.copy())) 

2490 return self 

2491 

2492 def setDebugActions(self, startAction, successAction, exceptionAction): 

2493 """ 

2494 Enable display of debugging messages while doing pattern matching. 

2495 """ 

2496 self.debugActions = (startAction or _defaultStartDebugAction, 

2497 successAction or _defaultSuccessDebugAction, 

2498 exceptionAction or _defaultExceptionDebugAction) 

2499 self.debug = True 

2500 return self 

2501 

2502 def setDebug(self, flag=True): 

2503 """ 

2504 Enable display of debugging messages while doing pattern matching. 

2505 Set ``flag`` to True to enable, False to disable. 

2506 

2507 Example:: 

2508 

2509 wd = Word(alphas).setName("alphaword") 

2510 integer = Word(nums).setName("numword") 

2511 term = wd | integer 

2512 

2513 # turn on debugging for wd 

2514 wd.setDebug() 

2515 

2516 OneOrMore(term).parseString("abc 123 xyz 890") 

2517 

2518 prints:: 

2519 

2520 Match alphaword at loc 0(1,1) 

2521 Matched alphaword -> ['abc'] 

2522 Match alphaword at loc 3(1,4) 

2523 Exception raised:Expected alphaword (at char 4), (line:1, col:5) 

2524 Match alphaword at loc 7(1,8) 

2525 Matched alphaword -> ['xyz'] 

2526 Match alphaword at loc 11(1,12) 

2527 Exception raised:Expected alphaword (at char 12), (line:1, col:13) 

2528 Match alphaword at loc 15(1,16) 

2529 Exception raised:Expected alphaword (at char 15), (line:1, col:16) 

2530 

2531 The output shown is that produced by the default debug actions - custom debug actions can be 

2532 specified using :class:`setDebugActions`. Prior to attempting 

2533 to match the ``wd`` expression, the debugging message ``"Match <exprname> at loc <n>(<line>,<col>)"`` 

2534 is shown. Then if the parse succeeds, a ``"Matched"`` message is shown, or an ``"Exception raised"`` 

2535 message is shown. Also note the use of :class:`setName` to assign a human-readable name to the expression, 

2536 which makes debugging and exception messages easier to understand - for instance, the default 

2537 name created for the :class:`Word` expression without calling ``setName`` is ``"W:(ABCD...)"``. 

2538 """ 

2539 if flag: 

2540 self.setDebugActions(_defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction) 

2541 else: 

2542 self.debug = False 

2543 return self 

2544 

2545 def __str__(self): 

2546 return self.name 

2547 

2548 def __repr__(self): 

2549 return _ustr(self) 

2550 

2551 def streamline(self): 

2552 self.streamlined = True 

2553 self.strRepr = None 

2554 return self 

2555 

2556 def checkRecursion(self, parseElementList): 

2557 pass 

2558 

2559 def validate(self, validateTrace=None): 

2560 """ 

2561 Check defined expressions for valid structure, check for infinite recursive definitions. 

2562 """ 

2563 self.checkRecursion([]) 

2564 

2565 def parseFile(self, file_or_filename, parseAll=False): 

2566 """ 

2567 Execute the parse expression on the given file or filename. 

2568 If a filename is specified (instead of a file object), 

2569 the entire file is opened, read, and closed before parsing. 

2570 """ 

2571 try: 

2572 file_contents = file_or_filename.read() 

2573 except AttributeError: 

2574 with open(file_or_filename, "r") as f: 

2575 file_contents = f.read() 

2576 try: 

2577 return self.parseString(file_contents, parseAll) 

2578 except ParseBaseException as exc: 

2579 if ParserElement.verbose_stacktrace: 

2580 raise 

2581 else: 

2582 # catch and re-raise exception from here, clearing out pyparsing internal stack trace 

2583 if getattr(exc, '__traceback__', None) is not None: 

2584 exc.__traceback__ = self._trim_traceback(exc.__traceback__) 

2585 raise exc 

2586 

2587 def __eq__(self, other): 

2588 if self is other: 

2589 return True 

2590 elif isinstance(other, basestring): 

2591 return self.matches(other) 

2592 elif isinstance(other, ParserElement): 

2593 return vars(self) == vars(other) 

2594 return False 

2595 

2596 def __ne__(self, other): 

2597 return not (self == other) 

2598 

2599 def __hash__(self): 

2600 return id(self) 

2601 

2602 def __req__(self, other): 

2603 return self == other 

2604 

2605 def __rne__(self, other): 

2606 return not (self == other) 

2607 

2608 def matches(self, testString, parseAll=True): 

2609 """ 

2610 Method for quick testing of a parser against a test string. Good for simple 

2611 inline microtests of sub expressions while building up larger parser. 

2612 

2613 Parameters: 

2614 - testString - to test against this expression for a match 

2615 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests 

2616 

2617 Example:: 

2618 

2619 expr = Word(nums) 

2620 assert expr.matches("100") 

2621 """ 

2622 try: 

2623 self.parseString(_ustr(testString), parseAll=parseAll) 

2624 return True 

2625 except ParseBaseException: 

2626 return False 

2627 

2628 def runTests(self, tests, parseAll=True, comment='#', 

2629 fullDump=True, printResults=True, failureTests=False, postParse=None, 

2630 file=None): 

2631 """ 

2632 Execute the parse expression on a series of test strings, showing each 

2633 test, the parsed results or where the parse failed. Quick and easy way to 

2634 run a parse expression against a list of sample strings. 

2635 

2636 Parameters: 

2637 - tests - a list of separate test strings, or a multiline string of test strings 

2638 - parseAll - (default= ``True``) - flag to pass to :class:`parseString` when running tests 

2639 - comment - (default= ``'#'``) - expression for indicating embedded comments in the test 

2640 string; pass None to disable comment filtering 

2641 - fullDump - (default= ``True``) - dump results as list followed by results names in nested outline; 

2642 if False, only dump nested list 

2643 - printResults - (default= ``True``) prints test output to stdout 

2644 - failureTests - (default= ``False``) indicates if these tests are expected to fail parsing 

2645 - postParse - (default= ``None``) optional callback for successful parse results; called as 

2646 `fn(test_string, parse_results)` and returns a string to be added to the test output 

2647 - file - (default=``None``) optional file-like object to which test output will be written; 

2648 if None, will default to ``sys.stdout`` 

2649 

2650 Returns: a (success, results) tuple, where success indicates that all tests succeeded 

2651 (or failed if ``failureTests`` is True), and the results contain a list of lines of each 

2652 test's output 

2653 

2654 Example:: 

2655 

2656 number_expr = pyparsing_common.number.copy() 

2657 

2658 result = number_expr.runTests(''' 

2659 # unsigned integer 

2660 100 

2661 # negative integer 

2662 -100 

2663 # float with scientific notation 

2664 6.02e23 

2665 # integer with scientific notation 

2666 1e-12 

2667 ''') 

2668 print("Success" if result[0] else "Failed!") 

2669 

2670 result = number_expr.runTests(''' 

2671 # stray character 

2672 100Z 

2673 # missing leading digit before '.' 

2674 -.100 

2675 # too many '.' 

2676 3.14.159 

2677 ''', failureTests=True) 

2678 print("Success" if result[0] else "Failed!") 

2679 

2680 prints:: 

2681 

2682 # unsigned integer 

2683 100 

2684 [100] 

2685 

2686 # negative integer 

2687 -100 

2688 [-100] 

2689 

2690 # float with scientific notation 

2691 6.02e23 

2692 [6.02e+23] 

2693 

2694 # integer with scientific notation 

2695 1e-12 

2696 [1e-12] 

2697 

2698 Success 

2699 

2700 # stray character 

2701 100Z 

2702 ^ 

2703 FAIL: Expected end of text (at char 3), (line:1, col:4) 

2704 

2705 # missing leading digit before '.' 

2706 -.100 

2707 ^ 

2708 FAIL: Expected {real number with scientific notation | real number | signed integer} (at char 0), (line:1, col:1) 

2709 

2710 # too many '.' 

2711 3.14.159 

2712 ^ 

2713 FAIL: Expected end of text (at char 4), (line:1, col:5) 

2714 

2715 Success 

2716 

2717 Each test string must be on a single line. If you want to test a string that spans multiple 

2718 lines, create a test like this:: 

2719 

2720 expr.runTest(r"this is a test\\n of strings that spans \\n 3 lines") 

2721 

2722 (Note that this is a raw string literal, you must include the leading 'r'.) 

2723 """ 

2724 if isinstance(tests, basestring): 

2725 tests = list(map(str.strip, tests.rstrip().splitlines())) 

2726 if isinstance(comment, basestring): 

2727 comment = Literal(comment) 

2728 if file is None: 

2729 file = sys.stdout 

2730 print_ = file.write 

2731 

2732 allResults = [] 

2733 comments = [] 

2734 success = True 

2735 NL = Literal(r'\n').addParseAction(replaceWith('\n')).ignore(quotedString) 

2736 BOM = u'\ufeff' 

2737 for t in tests: 

2738 if comment is not None and comment.matches(t, False) or comments and not t: 

2739 comments.append(t) 

2740 continue 

2741 if not t: 

2742 continue 

2743 out = ['\n' + '\n'.join(comments) if comments else '', t] 

2744 comments = [] 

2745 try: 

2746 # convert newline marks to actual newlines, and strip leading BOM if present 

2747 t = NL.transformString(t.lstrip(BOM)) 

2748 result = self.parseString(t, parseAll=parseAll) 

2749 except ParseBaseException as pe: 

2750 fatal = "(FATAL)" if isinstance(pe, ParseFatalException) else "" 

2751 if '\n' in t: 

2752 out.append(line(pe.loc, t)) 

2753 out.append(' ' * (col(pe.loc, t) - 1) + '^' + fatal) 

2754 else: 

2755 out.append(' ' * pe.loc + '^' + fatal) 

2756 out.append("FAIL: " + str(pe)) 

2757 success = success and failureTests 

2758 result = pe 

2759 except Exception as exc: 

2760 out.append("FAIL-EXCEPTION: " + str(exc)) 

2761 success = success and failureTests 

2762 result = exc 

2763 else: 

2764 success = success and not failureTests 

2765 if postParse is not None: 

2766 try: 

2767 pp_value = postParse(t, result) 

2768 if pp_value is not None: 

2769 if isinstance(pp_value, ParseResults): 

2770 out.append(pp_value.dump()) 

2771 else: 

2772 out.append(str(pp_value)) 

2773 else: 

2774 out.append(result.dump()) 

2775 except Exception as e: 

2776 out.append(result.dump(full=fullDump)) 

2777 out.append("{0} failed: {1}: {2}".format(postParse.__name__, type(e).__name__, e)) 

2778 else: 

2779 out.append(result.dump(full=fullDump)) 

2780 

2781 if printResults: 

2782 if fullDump: 

2783 out.append('') 

2784 print_('\n'.join(out)) 

2785 

2786 allResults.append((t, result)) 

2787 

2788 return success, allResults 

2789 

2790 

2791class _PendingSkip(ParserElement): 

2792 # internal placeholder class to hold a place were '...' is added to a parser element, 

2793 # once another ParserElement is added, this placeholder will be replaced with a SkipTo 

2794 def __init__(self, expr, must_skip=False): 

2795 super(_PendingSkip, self).__init__() 

2796 self.strRepr = str(expr + Empty()).replace('Empty', '...') 

2797 self.name = self.strRepr 

2798 self.anchor = expr 

2799 self.must_skip = must_skip 

2800 

2801 def __add__(self, other): 

2802 skipper = SkipTo(other).setName("...")("_skipped*") 

2803 if self.must_skip: 

2804 def must_skip(t): 

2805 if not t._skipped or t._skipped.asList() == ['']: 

2806 del t[0] 

2807 t.pop("_skipped", None) 

2808 def show_skip(t): 

2809 if t._skipped.asList()[-1:] == ['']: 

2810 skipped = t.pop('_skipped') 

2811 t['_skipped'] = 'missing <' + repr(self.anchor) + '>' 

2812 return (self.anchor + skipper().addParseAction(must_skip) 

2813 | skipper().addParseAction(show_skip)) + other 

2814 

2815 return self.anchor + skipper + other 

2816 

2817 def __repr__(self): 

2818 return self.strRepr 

2819 

2820 def parseImpl(self, *args): 

2821 raise Exception("use of `...` expression without following SkipTo target expression") 

2822 

2823 

2824class Token(ParserElement): 

2825 """Abstract :class:`ParserElement` subclass, for defining atomic 

2826 matching patterns. 

2827 """ 

2828 def __init__(self): 

2829 super(Token, self).__init__(savelist=False) 

2830 

2831 

2832class Empty(Token): 

2833 """An empty token, will always match. 

2834 """ 

2835 def __init__(self): 

2836 super(Empty, self).__init__() 

2837 self.name = "Empty" 

2838 self.mayReturnEmpty = True 

2839 self.mayIndexError = False 

2840 

2841 

2842class NoMatch(Token): 

2843 """A token that will never match. 

2844 """ 

2845 def __init__(self): 

2846 super(NoMatch, self).__init__() 

2847 self.name = "NoMatch" 

2848 self.mayReturnEmpty = True 

2849 self.mayIndexError = False 

2850 self.errmsg = "Unmatchable token" 

2851 

2852 def parseImpl(self, instring, loc, doActions=True): 

2853 raise ParseException(instring, loc, self.errmsg, self) 

2854 

2855 

2856class Literal(Token): 

2857 """Token to exactly match a specified string. 

2858 

2859 Example:: 

2860 

2861 Literal('blah').parseString('blah') # -> ['blah'] 

2862 Literal('blah').parseString('blahfooblah') # -> ['blah'] 

2863 Literal('blah').parseString('bla') # -> Exception: Expected "blah" 

2864 

2865 For case-insensitive matching, use :class:`CaselessLiteral`. 

2866 

2867 For keyword matching (force word break before and after the matched string), 

2868 use :class:`Keyword` or :class:`CaselessKeyword`. 

2869 """ 

2870 def __init__(self, matchString): 

2871 super(Literal, self).__init__() 

2872 self.match = matchString 

2873 self.matchLen = len(matchString) 

2874 try: 

2875 self.firstMatchChar = matchString[0] 

2876 except IndexError: 

2877 warnings.warn("null string passed to Literal; use Empty() instead", 

2878 SyntaxWarning, stacklevel=2) 

2879 self.__class__ = Empty 

2880 self.name = '"%s"' % _ustr(self.match) 

2881 self.errmsg = "Expected " + self.name 

2882 self.mayReturnEmpty = False 

2883 self.mayIndexError = False 

2884 

2885 # Performance tuning: modify __class__ to select 

2886 # a parseImpl optimized for single-character check 

2887 if self.matchLen == 1 and type(self) is Literal: 

2888 self.__class__ = _SingleCharLiteral 

2889 

2890 def parseImpl(self, instring, loc, doActions=True): 

2891 if instring[loc] == self.firstMatchChar and instring.startswith(self.match, loc): 

2892 return loc + self.matchLen, self.match 

2893 raise ParseException(instring, loc, self.errmsg, self) 

2894 

2895class _SingleCharLiteral(Literal): 

2896 def parseImpl(self, instring, loc, doActions=True): 

2897 if instring[loc] == self.firstMatchChar: 

2898 return loc + 1, self.match 

2899 raise ParseException(instring, loc, self.errmsg, self) 

2900 

2901_L = Literal 

2902ParserElement._literalStringClass = Literal 

2903 

2904class Keyword(Token): 

2905 """Token to exactly match a specified string as a keyword, that is, 

2906 it must be immediately followed by a non-keyword character. Compare 

2907 with :class:`Literal`: 

2908 

2909 - ``Literal("if")`` will match the leading ``'if'`` in 

2910 ``'ifAndOnlyIf'``. 

2911 - ``Keyword("if")`` will not; it will only match the leading 

2912 ``'if'`` in ``'if x=1'``, or ``'if(y==2)'`` 

2913 

2914 Accepts two optional constructor arguments in addition to the 

2915 keyword string: 

2916 

2917 - ``identChars`` is a string of characters that would be valid 

2918 identifier characters, defaulting to all alphanumerics + "_" and 

2919 "$" 

2920 - ``caseless`` allows case-insensitive matching, default is ``False``. 

2921 

2922 Example:: 

2923 

2924 Keyword("start").parseString("start") # -> ['start'] 

2925 Keyword("start").parseString("starting") # -> Exception 

2926 

2927 For case-insensitive matching, use :class:`CaselessKeyword`. 

2928 """ 

2929 DEFAULT_KEYWORD_CHARS = alphanums + "_$" 

2930 

2931 def __init__(self, matchString, identChars=None, caseless=False): 

2932 super(Keyword, self).__init__() 

2933 if identChars is None: 

2934 identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2935 self.match = matchString 

2936 self.matchLen = len(matchString) 

2937 try: 

2938 self.firstMatchChar = matchString[0] 

2939 except IndexError: 

2940 warnings.warn("null string passed to Keyword; use Empty() instead", 

2941 SyntaxWarning, stacklevel=2) 

2942 self.name = '"%s"' % self.match 

2943 self.errmsg = "Expected " + self.name 

2944 self.mayReturnEmpty = False 

2945 self.mayIndexError = False 

2946 self.caseless = caseless 

2947 if caseless: 

2948 self.caselessmatch = matchString.upper() 

2949 identChars = identChars.upper() 

2950 self.identChars = set(identChars) 

2951 

2952 def parseImpl(self, instring, loc, doActions=True): 

2953 if self.caseless: 

2954 if ((instring[loc:loc + self.matchLen].upper() == self.caselessmatch) 

2955 and (loc >= len(instring) - self.matchLen 

2956 or instring[loc + self.matchLen].upper() not in self.identChars) 

2957 and (loc == 0 

2958 or instring[loc - 1].upper() not in self.identChars)): 

2959 return loc + self.matchLen, self.match 

2960 

2961 else: 

2962 if instring[loc] == self.firstMatchChar: 

2963 if ((self.matchLen == 1 or instring.startswith(self.match, loc)) 

2964 and (loc >= len(instring) - self.matchLen 

2965 or instring[loc + self.matchLen] not in self.identChars) 

2966 and (loc == 0 or instring[loc - 1] not in self.identChars)): 

2967 return loc + self.matchLen, self.match 

2968 

2969 raise ParseException(instring, loc, self.errmsg, self) 

2970 

2971 def copy(self): 

2972 c = super(Keyword, self).copy() 

2973 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 

2974 return c 

2975 

2976 @staticmethod 

2977 def setDefaultKeywordChars(chars): 

2978 """Overrides the default Keyword chars 

2979 """ 

2980 Keyword.DEFAULT_KEYWORD_CHARS = chars 

2981 

2982class CaselessLiteral(Literal): 

2983 """Token to match a specified string, ignoring case of letters. 

2984 Note: the matched results will always be in the case of the given 

2985 match string, NOT the case of the input text. 

2986 

2987 Example:: 

2988 

2989 OneOrMore(CaselessLiteral("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD', 'CMD'] 

2990 

2991 (Contrast with example for :class:`CaselessKeyword`.) 

2992 """ 

2993 def __init__(self, matchString): 

2994 super(CaselessLiteral, self).__init__(matchString.upper()) 

2995 # Preserve the defining literal. 

2996 self.returnString = matchString 

2997 self.name = "'%s'" % self.returnString 

2998 self.errmsg = "Expected " + self.name 

2999 

3000 def parseImpl(self, instring, loc, doActions=True): 

3001 if instring[loc:loc + self.matchLen].upper() == self.match: 

3002 return loc + self.matchLen, self.returnString 

3003 raise ParseException(instring, loc, self.errmsg, self) 

3004 

3005class CaselessKeyword(Keyword): 

3006 """ 

3007 Caseless version of :class:`Keyword`. 

3008 

3009 Example:: 

3010 

3011 OneOrMore(CaselessKeyword("CMD")).parseString("cmd CMD Cmd10") # -> ['CMD', 'CMD'] 

3012 

3013 (Contrast with example for :class:`CaselessLiteral`.) 

3014 """ 

3015 def __init__(self, matchString, identChars=None): 

3016 super(CaselessKeyword, self).__init__(matchString, identChars, caseless=True) 

3017 

3018class CloseMatch(Token): 

3019 """A variation on :class:`Literal` which matches "close" matches, 

3020 that is, strings with at most 'n' mismatching characters. 

3021 :class:`CloseMatch` takes parameters: 

3022 

3023 - ``match_string`` - string to be matched 

3024 - ``maxMismatches`` - (``default=1``) maximum number of 

3025 mismatches allowed to count as a match 

3026 

3027 The results from a successful parse will contain the matched text 

3028 from the input string and the following named results: 

3029 

3030 - ``mismatches`` - a list of the positions within the 

3031 match_string where mismatches were found 

3032 - ``original`` - the original match_string used to compare 

3033 against the input string 

3034 

3035 If ``mismatches`` is an empty list, then the match was an exact 

3036 match. 

3037 

3038 Example:: 

3039 

3040 patt = CloseMatch("ATCATCGAATGGA") 

3041 patt.parseString("ATCATCGAAXGGA") # -> (['ATCATCGAAXGGA'], {'mismatches': [[9]], 'original': ['ATCATCGAATGGA']}) 

3042 patt.parseString("ATCAXCGAAXGGA") # -> Exception: Expected 'ATCATCGAATGGA' (with up to 1 mismatches) (at char 0), (line:1, col:1) 

3043 

3044 # exact match 

3045 patt.parseString("ATCATCGAATGGA") # -> (['ATCATCGAATGGA'], {'mismatches': [[]], 'original': ['ATCATCGAATGGA']}) 

3046 

3047 # close match allowing up to 2 mismatches 

3048 patt = CloseMatch("ATCATCGAATGGA", maxMismatches=2) 

3049 patt.parseString("ATCAXCGAAXGGA") # -> (['ATCAXCGAAXGGA'], {'mismatches': [[4, 9]], 'original': ['ATCATCGAATGGA']}) 

3050 """ 

3051 def __init__(self, match_string, maxMismatches=1): 

3052 super(CloseMatch, self).__init__() 

3053 self.name = match_string 

3054 self.match_string = match_string 

3055 self.maxMismatches = maxMismatches 

3056 self.errmsg = "Expected %r (with up to %d mismatches)" % (self.match_string, self.maxMismatches) 

3057 self.mayIndexError = False 

3058 self.mayReturnEmpty = False 

3059 

3060 def parseImpl(self, instring, loc, doActions=True): 

3061 start = loc 

3062 instrlen = len(instring) 

3063 maxloc = start + len(self.match_string) 

3064 

3065 if maxloc <= instrlen: 

3066 match_string = self.match_string 

3067 match_stringloc = 0 

3068 mismatches = [] 

3069 maxMismatches = self.maxMismatches 

3070 

3071 for match_stringloc, s_m in enumerate(zip(instring[loc:maxloc], match_string)): 

3072 src, mat = s_m 

3073 if src != mat: 

3074 mismatches.append(match_stringloc) 

3075 if len(mismatches) > maxMismatches: 

3076 break 

3077 else: 

3078 loc = match_stringloc + 1 

3079 results = ParseResults([instring[start:loc]]) 

3080 results['original'] = match_string 

3081 results['mismatches'] = mismatches 

3082 return loc, results 

3083 

3084 raise ParseException(instring, loc, self.errmsg, self) 

3085 

3086 

3087class Word(Token): 

3088 """Token for matching words composed of allowed character sets. 

3089 Defined with string containing all allowed initial characters, an 

3090 optional string containing allowed body characters (if omitted, 

3091 defaults to the initial character set), and an optional minimum, 

3092 maximum, and/or exact length. The default value for ``min`` is 

3093 1 (a minimum value < 1 is not valid); the default values for 

3094 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3095 length restriction. An optional ``excludeChars`` parameter can 

3096 list characters that might be found in the input ``bodyChars`` 

3097 string; useful to define a word of all printables except for one or 

3098 two characters, for instance. 

3099 

3100 :class:`srange` is useful for defining custom character set strings 

3101 for defining ``Word`` expressions, using range notation from 

3102 regular expression character sets. 

3103 

3104 A common mistake is to use :class:`Word` to match a specific literal 

3105 string, as in ``Word("Address")``. Remember that :class:`Word` 

3106 uses the string argument to define *sets* of matchable characters. 

3107 This expression would match "Add", "AAA", "dAred", or any other word 

3108 made up of the characters 'A', 'd', 'r', 'e', and 's'. To match an 

3109 exact literal string, use :class:`Literal` or :class:`Keyword`. 

3110 

3111 pyparsing includes helper strings for building Words: 

3112 

3113 - :class:`alphas` 

3114 - :class:`nums` 

3115 - :class:`alphanums` 

3116 - :class:`hexnums` 

3117 - :class:`alphas8bit` (alphabetic characters in ASCII range 128-255 

3118 - accented, tilded, umlauted, etc.) 

3119 - :class:`punc8bit` (non-alphabetic characters in ASCII range 

3120 128-255 - currency, symbols, superscripts, diacriticals, etc.) 

3121 - :class:`printables` (any non-whitespace character) 

3122 

3123 Example:: 

3124 

3125 # a word composed of digits 

3126 integer = Word(nums) # equivalent to Word("0123456789") or Word(srange("0-9")) 

3127 

3128 # a word with a leading capital, and zero or more lowercase 

3129 capital_word = Word(alphas.upper(), alphas.lower()) 

3130 

3131 # hostnames are alphanumeric, with leading alpha, and '-' 

3132 hostname = Word(alphas, alphanums + '-') 

3133 

3134 # roman numeral (not a strict parser, accepts invalid mix of characters) 

3135 roman = Word("IVXLCDM") 

3136 

3137 # any string of non-whitespace characters, except for ',' 

3138 csv_value = Word(printables, excludeChars=",") 

3139 """ 

3140 def __init__(self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False, excludeChars=None): 

3141 super(Word, self).__init__() 

3142 if excludeChars: 

3143 excludeChars = set(excludeChars) 

3144 initChars = ''.join(c for c in initChars if c not in excludeChars) 

3145 if bodyChars: 

3146 bodyChars = ''.join(c for c in bodyChars if c not in excludeChars) 

3147 self.initCharsOrig = initChars 

3148 self.initChars = set(initChars) 

3149 if bodyChars: 

3150 self.bodyCharsOrig = bodyChars 

3151 self.bodyChars = set(bodyChars) 

3152 else: 

3153 self.bodyCharsOrig = initChars 

3154 self.bodyChars = set(initChars) 

3155 

3156 self.maxSpecified = max > 0 

3157 

3158 if min < 1: 

3159 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 

3160 

3161 self.minLen = min 

3162 

3163 if max > 0: 

3164 self.maxLen = max 

3165 else: 

3166 self.maxLen = _MAX_INT 

3167 

3168 if exact > 0: 

3169 self.maxLen = exact 

3170 self.minLen = exact 

3171 

3172 self.name = _ustr(self) 

3173 self.errmsg = "Expected " + self.name 

3174 self.mayIndexError = False 

3175 self.asKeyword = asKeyword 

3176 

3177 if ' ' not in self.initCharsOrig + self.bodyCharsOrig and (min == 1 and max == 0 and exact == 0): 

3178 if self.bodyCharsOrig == self.initCharsOrig: 

3179 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 

3180 elif len(self.initCharsOrig) == 1: 

3181 self.reString = "%s[%s]*" % (re.escape(self.initCharsOrig), 

3182 _escapeRegexRangeChars(self.bodyCharsOrig),) 

3183 else: 

3184 self.reString = "[%s][%s]*" % (_escapeRegexRangeChars(self.initCharsOrig), 

3185 _escapeRegexRangeChars(self.bodyCharsOrig),) 

3186 if self.asKeyword: 

3187 self.reString = r"\b" + self.reString + r"\b" 

3188 

3189 try: 

3190 self.re = re.compile(self.reString) 

3191 except Exception: 

3192 self.re = None 

3193 else: 

3194 self.re_match = self.re.match 

3195 self.__class__ = _WordRegex 

3196 

3197 def parseImpl(self, instring, loc, doActions=True): 

3198 if instring[loc] not in self.initChars: 

3199 raise ParseException(instring, loc, self.errmsg, self) 

3200 

3201 start = loc 

3202 loc += 1 

3203 instrlen = len(instring) 

3204 bodychars = self.bodyChars 

3205 maxloc = start + self.maxLen 

3206 maxloc = min(maxloc, instrlen) 

3207 while loc < maxloc and instring[loc] in bodychars: 

3208 loc += 1 

3209 

3210 throwException = False 

3211 if loc - start < self.minLen: 

3212 throwException = True 

3213 elif self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 

3214 throwException = True 

3215 elif self.asKeyword: 

3216 if (start > 0 and instring[start - 1] in bodychars 

3217 or loc < instrlen and instring[loc] in bodychars): 

3218 throwException = True 

3219 

3220 if throwException: 

3221 raise ParseException(instring, loc, self.errmsg, self) 

3222 

3223 return loc, instring[start:loc] 

3224 

3225 def __str__(self): 

3226 try: 

3227 return super(Word, self).__str__() 

3228 except Exception: 

3229 pass 

3230 

3231 if self.strRepr is None: 

3232 

3233 def charsAsStr(s): 

3234 if len(s) > 4: 

3235 return s[:4] + "..." 

3236 else: 

3237 return s 

3238 

3239 if self.initCharsOrig != self.bodyCharsOrig: 

3240 self.strRepr = "W:(%s, %s)" % (charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig)) 

3241 else: 

3242 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 

3243 

3244 return self.strRepr 

3245 

3246class _WordRegex(Word): 

3247 def parseImpl(self, instring, loc, doActions=True): 

3248 result = self.re_match(instring, loc) 

3249 if not result: 

3250 raise ParseException(instring, loc, self.errmsg, self) 

3251 

3252 loc = result.end() 

3253 return loc, result.group() 

3254 

3255 

3256class Char(_WordRegex): 

3257 """A short-cut class for defining ``Word(characters, exact=1)``, 

3258 when defining a match of any single character in a string of 

3259 characters. 

3260 """ 

3261 def __init__(self, charset, asKeyword=False, excludeChars=None): 

3262 super(Char, self).__init__(charset, exact=1, asKeyword=asKeyword, excludeChars=excludeChars) 

3263 self.reString = "[%s]" % _escapeRegexRangeChars(''.join(self.initChars)) 

3264 if asKeyword: 

3265 self.reString = r"\b%s\b" % self.reString 

3266 self.re = re.compile(self.reString) 

3267 self.re_match = self.re.match 

3268 

3269 

3270class Regex(Token): 

3271 r"""Token for matching strings that match a given regular 

3272 expression. Defined with string specifying the regular expression in 

3273 a form recognized by the stdlib Python `re module <https://docs.python.org/3/library/re.html>`_. 

3274 If the given regex contains named groups (defined using ``(?P<name>...)``), 

3275 these will be preserved as named parse results. 

3276 

3277 If instead of the Python stdlib re module you wish to use a different RE module 

3278 (such as the `regex` module), you can replace it by either building your 

3279 Regex object with a compiled RE that was compiled using regex: 

3280 

3281 Example:: 

3282 

3283 realnum = Regex(r"[+-]?\d+\.\d*") 

3284 date = Regex(r'(?P<year>\d{4})-(?P<month>\d\d?)-(?P<day>\d\d?)') 

3285 # ref: https://stackoverflow.com/questions/267399/how-do-you-match-only-valid-roman-numerals-with-a-regular-expression 

3286 roman = Regex(r"M{0,4}(CM|CD|D?{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{0,3})") 

3287 

3288 # use regex module instead of stdlib re module to construct a Regex using 

3289 # a compiled regular expression 

3290 import regex 

3291 parser = pp.Regex(regex.compile(r'[0-9]')) 

3292 

3293 """ 

3294 def __init__(self, pattern, flags=0, asGroupList=False, asMatch=False): 

3295 """The parameters ``pattern`` and ``flags`` are passed 

3296 to the ``re.compile()`` function as-is. See the Python 

3297 `re module <https://docs.python.org/3/library/re.html>`_ module for an 

3298 explanation of the acceptable patterns and flags. 

3299 """ 

3300 super(Regex, self).__init__() 

3301 

3302 if isinstance(pattern, basestring): 

3303 if not pattern: 

3304 warnings.warn("null string passed to Regex; use Empty() instead", 

3305 SyntaxWarning, stacklevel=2) 

3306 

3307 self.pattern = pattern 

3308 self.flags = flags 

3309 

3310 try: 

3311 self.re = re.compile(self.pattern, self.flags) 

3312 self.reString = self.pattern 

3313 except sre_constants.error: 

3314 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 

3315 SyntaxWarning, stacklevel=2) 

3316 raise 

3317 

3318 elif hasattr(pattern, 'pattern') and hasattr(pattern, 'match'): 

3319 self.re = pattern 

3320 self.pattern = self.reString = pattern.pattern 

3321 self.flags = flags 

3322 

3323 else: 

3324 raise TypeError("Regex may only be constructed with a string or a compiled RE object") 

3325 

3326 self.re_match = self.re.match 

3327 

3328 self.name = _ustr(self) 

3329 self.errmsg = "Expected " + self.name 

3330 self.mayIndexError = False 

3331 self.mayReturnEmpty = self.re_match("") is not None 

3332 self.asGroupList = asGroupList 

3333 self.asMatch = asMatch 

3334 if self.asGroupList: 

3335 self.parseImpl = self.parseImplAsGroupList 

3336 if self.asMatch: 

3337 self.parseImpl = self.parseImplAsMatch 

3338 

3339 def parseImpl(self, instring, loc, doActions=True): 

3340 result = self.re_match(instring, loc) 

3341 if not result: 

3342 raise ParseException(instring, loc, self.errmsg, self) 

3343 

3344 loc = result.end() 

3345 ret = ParseResults(result.group()) 

3346 d = result.groupdict() 

3347 if d: 

3348 for k, v in d.items(): 

3349 ret[k] = v 

3350 return loc, ret 

3351 

3352 def parseImplAsGroupList(self, instring, loc, doActions=True): 

3353 result = self.re_match(instring, loc) 

3354 if not result: 

3355 raise ParseException(instring, loc, self.errmsg, self) 

3356 

3357 loc = result.end() 

3358 ret = result.groups() 

3359 return loc, ret 

3360 

3361 def parseImplAsMatch(self, instring, loc, doActions=True): 

3362 result = self.re_match(instring, loc) 

3363 if not result: 

3364 raise ParseException(instring, loc, self.errmsg, self) 

3365 

3366 loc = result.end() 

3367 ret = result 

3368 return loc, ret 

3369 

3370 def __str__(self): 

3371 try: 

3372 return super(Regex, self).__str__() 

3373 except Exception: 

3374 pass 

3375 

3376 if self.strRepr is None: 

3377 self.strRepr = "Re:(%s)" % repr(self.pattern) 

3378 

3379 return self.strRepr 

3380 

3381 def sub(self, repl): 

3382 r""" 

3383 Return Regex with an attached parse action to transform the parsed 

3384 result as if called using `re.sub(expr, repl, string) <https://docs.python.org/3/library/re.html#re.sub>`_. 

3385 

3386 Example:: 

3387 

3388 make_html = Regex(r"(\w+):(.*?):").sub(r"<\1>\2</\1>") 

3389 print(make_html.transformString("h1:main title:")) 

3390 # prints "<h1>main title</h1>" 

3391 """ 

3392 if self.asGroupList: 

3393 warnings.warn("cannot use sub() with Regex(asGroupList=True)", 

3394 SyntaxWarning, stacklevel=2) 

3395 raise SyntaxError() 

3396 

3397 if self.asMatch and callable(repl): 

3398 warnings.warn("cannot use sub() with a callable with Regex(asMatch=True)", 

3399 SyntaxWarning, stacklevel=2) 

3400 raise SyntaxError() 

3401 

3402 if self.asMatch: 

3403 def pa(tokens): 

3404 return tokens[0].expand(repl) 

3405 else: 

3406 def pa(tokens): 

3407 return self.re.sub(repl, tokens[0]) 

3408 return self.addParseAction(pa) 

3409 

3410class QuotedString(Token): 

3411 r""" 

3412 Token for matching strings that are delimited by quoting characters. 

3413 

3414 Defined with the following parameters: 

3415 

3416 - quoteChar - string of one or more characters defining the 

3417 quote delimiting string 

3418 - escChar - character to escape quotes, typically backslash 

3419 (default= ``None``) 

3420 - escQuote - special quote sequence to escape an embedded quote 

3421 string (such as SQL's ``""`` to escape an embedded ``"``) 

3422 (default= ``None``) 

3423 - multiline - boolean indicating whether quotes can span 

3424 multiple lines (default= ``False``) 

3425 - unquoteResults - boolean indicating whether the matched text 

3426 should be unquoted (default= ``True``) 

3427 - endQuoteChar - string of one or more characters defining the 

3428 end of the quote delimited string (default= ``None`` => same as 

3429 quoteChar) 

3430 - convertWhitespaceEscapes - convert escaped whitespace 

3431 (``'\t'``, ``'\n'``, etc.) to actual whitespace 

3432 (default= ``True``) 

3433 

3434 Example:: 

3435 

3436 qs = QuotedString('"') 

3437 print(qs.searchString('lsjdf "This is the quote" sldjf')) 

3438 complex_qs = QuotedString('{{', endQuoteChar='}}') 

3439 print(complex_qs.searchString('lsjdf {{This is the "quote"}} sldjf')) 

3440 sql_qs = QuotedString('"', escQuote='""') 

3441 print(sql_qs.searchString('lsjdf "This is the quote with ""embedded"" quotes" sldjf')) 

3442 

3443 prints:: 

3444 

3445 [['This is the quote']] 

3446 [['This is the "quote"']] 

3447 [['This is the quote with "embedded" quotes']] 

3448 """ 

3449 def __init__(self, quoteChar, escChar=None, escQuote=None, multiline=False, 

3450 unquoteResults=True, endQuoteChar=None, convertWhitespaceEscapes=True): 

3451 super(QuotedString, self).__init__() 

3452 

3453 # remove white space from quote chars - wont work anyway 

3454 quoteChar = quoteChar.strip() 

3455 if not quoteChar: 

3456 warnings.warn("quoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 

3457 raise SyntaxError() 

3458 

3459 if endQuoteChar is None: 

3460 endQuoteChar = quoteChar 

3461 else: 

3462 endQuoteChar = endQuoteChar.strip() 

3463 if not endQuoteChar: 

3464 warnings.warn("endQuoteChar cannot be the empty string", SyntaxWarning, stacklevel=2) 

3465 raise SyntaxError() 

3466 

3467 self.quoteChar = quoteChar 

3468 self.quoteCharLen = len(quoteChar) 

3469 self.firstQuoteChar = quoteChar[0] 

3470 self.endQuoteChar = endQuoteChar 

3471 self.endQuoteCharLen = len(endQuoteChar) 

3472 self.escChar = escChar 

3473 self.escQuote = escQuote 

3474 self.unquoteResults = unquoteResults 

3475 self.convertWhitespaceEscapes = convertWhitespaceEscapes 

3476 

3477 if multiline: 

3478 self.flags = re.MULTILINE | re.DOTALL 

3479 self.pattern = r'%s(?:[^%s%s]' % (re.escape(self.quoteChar), 

3480 _escapeRegexRangeChars(self.endQuoteChar[0]), 

3481 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 

3482 else: 

3483 self.flags = 0 

3484 self.pattern = r'%s(?:[^%s\n\r%s]' % (re.escape(self.quoteChar), 

3485 _escapeRegexRangeChars(self.endQuoteChar[0]), 

3486 (escChar is not None and _escapeRegexRangeChars(escChar) or '')) 

3487 if len(self.endQuoteChar) > 1: 

3488 self.pattern += ( 

3489 '|(?:' + ')|(?:'.join("%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 

3490 _escapeRegexRangeChars(self.endQuoteChar[i])) 

3491 for i in range(len(self.endQuoteChar) - 1, 0, -1)) + ')') 

3492 

3493 if escQuote: 

3494 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 

3495 if escChar: 

3496 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 

3497 self.escCharReplacePattern = re.escape(self.escChar) + "(.)" 

3498 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 

3499 

3500 try: 

3501 self.re = re.compile(self.pattern, self.flags) 

3502 self.reString = self.pattern 

3503 self.re_match = self.re.match 

3504 except sre_constants.error: 

3505 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 

3506 SyntaxWarning, stacklevel=2) 

3507 raise 

3508 

3509 self.name = _ustr(self) 

3510 self.errmsg = "Expected " + self.name 

3511 self.mayIndexError = False 

3512 self.mayReturnEmpty = True 

3513 

3514 def parseImpl(self, instring, loc, doActions=True): 

3515 result = instring[loc] == self.firstQuoteChar and self.re_match(instring, loc) or None 

3516 if not result: 

3517 raise ParseException(instring, loc, self.errmsg, self) 

3518 

3519 loc = result.end() 

3520 ret = result.group() 

3521 

3522 if self.unquoteResults: 

3523 

3524 # strip off quotes 

3525 ret = ret[self.quoteCharLen: -self.endQuoteCharLen] 

3526 

3527 if isinstance(ret, basestring): 

3528 # replace escaped whitespace 

3529 if '\\' in ret and self.convertWhitespaceEscapes: 

3530 ws_map = { 

3531 r'\t': '\t', 

3532 r'\n': '\n', 

3533 r'\f': '\f', 

3534 r'\r': '\r', 

3535 } 

3536 for wslit, wschar in ws_map.items(): 

3537 ret = ret.replace(wslit, wschar) 

3538 

3539 # replace escaped characters 

3540 if self.escChar: 

3541 ret = re.sub(self.escCharReplacePattern, r"\g<1>", ret) 

3542 

3543 # replace escaped quotes 

3544 if self.escQuote: 

3545 ret = ret.replace(self.escQuote, self.endQuoteChar) 

3546 

3547 return loc, ret 

3548 

3549 def __str__(self): 

3550 try: 

3551 return super(QuotedString, self).__str__() 

3552 except Exception: 

3553 pass 

3554 

3555 if self.strRepr is None: 

3556 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 

3557 

3558 return self.strRepr 

3559 

3560 

3561class CharsNotIn(Token): 

3562 """Token for matching words composed of characters *not* in a given 

3563 set (will include whitespace in matched characters if not listed in 

3564 the provided exclusion set - see example). Defined with string 

3565 containing all disallowed characters, and an optional minimum, 

3566 maximum, and/or exact length. The default value for ``min`` is 

3567 1 (a minimum value < 1 is not valid); the default values for 

3568 ``max`` and ``exact`` are 0, meaning no maximum or exact 

3569 length restriction. 

3570 

3571 Example:: 

3572 

3573 # define a comma-separated-value as anything that is not a ',' 

3574 csv_value = CharsNotIn(',') 

3575 print(delimitedList(csv_value).parseString("dkls,lsdkjf,s12 34,@!#,213")) 

3576 

3577 prints:: 

3578 

3579 ['dkls', 'lsdkjf', 's12 34', '@!#', '213'] 

3580 """ 

3581 def __init__(self, notChars, min=1, max=0, exact=0): 

3582 super(CharsNotIn, self).__init__() 

3583 self.skipWhitespace = False 

3584 self.notChars = notChars 

3585 

3586 if min < 1: 

3587 raise ValueError("cannot specify a minimum length < 1; use " 

3588 "Optional(CharsNotIn()) if zero-length char group is permitted") 

3589 

3590 self.minLen = min 

3591 

3592 if max > 0: 

3593 self.maxLen = max 

3594 else: 

3595 self.maxLen = _MAX_INT 

3596 

3597 if exact > 0: 

3598 self.maxLen = exact 

3599 self.minLen = exact 

3600 

3601 self.name = _ustr(self) 

3602 self.errmsg = "Expected " + self.name 

3603 self.mayReturnEmpty = (self.minLen == 0) 

3604 self.mayIndexError = False 

3605 

3606 def parseImpl(self, instring, loc, doActions=True): 

3607 if instring[loc] in self.notChars: 

3608 raise ParseException(instring, loc, self.errmsg, self) 

3609 

3610 start = loc 

3611 loc += 1 

3612 notchars = self.notChars 

3613 maxlen = min(start + self.maxLen, len(instring)) 

3614 while loc < maxlen and instring[loc] not in notchars: 

3615 loc += 1 

3616 

3617 if loc - start < self.minLen: 

3618 raise ParseException(instring, loc, self.errmsg, self) 

3619 

3620 return loc, instring[start:loc] 

3621 

3622 def __str__(self): 

3623 try: 

3624 return super(CharsNotIn, self).__str__() 

3625 except Exception: 

3626 pass 

3627 

3628 if self.strRepr is None: 

3629 if len(self.notChars) > 4: 

3630 self.strRepr = "!W:(%s...)" % self.notChars[:4] 

3631 else: 

3632 self.strRepr = "!W:(%s)" % self.notChars 

3633 

3634 return self.strRepr 

3635 

3636class White(Token): 

3637 """Special matching class for matching whitespace. Normally, 

3638 whitespace is ignored by pyparsing grammars. This class is included 

3639 when some whitespace structures are significant. Define with 

3640 a string containing the whitespace characters to be matched; default 

3641 is ``" \\t\\r\\n"``. Also takes optional ``min``, 

3642 ``max``, and ``exact`` arguments, as defined for the 

3643 :class:`Word` class. 

3644 """ 

3645 whiteStrs = { 

3646 ' ' : '<SP>', 

3647 '\t': '<TAB>', 

3648 '\n': '<LF>', 

3649 '\r': '<CR>', 

3650 '\f': '<FF>', 

3651 u'\u00A0': '<NBSP>', 

3652 u'\u1680': '<OGHAM_SPACE_MARK>', 

3653 u'\u180E': '<MONGOLIAN_VOWEL_SEPARATOR>', 

3654 u'\u2000': '<EN_QUAD>', 

3655 u'\u2001': '<EM_QUAD>', 

3656 u'\u2002': '<EN_SPACE>', 

3657 u'\u2003': '<EM_SPACE>', 

3658 u'\u2004': '<THREE-PER-EM_SPACE>', 

3659 u'\u2005': '<FOUR-PER-EM_SPACE>', 

3660 u'\u2006': '<SIX-PER-EM_SPACE>', 

3661 u'\u2007': '<FIGURE_SPACE>', 

3662 u'\u2008': '<PUNCTUATION_SPACE>', 

3663 u'\u2009': '<THIN_SPACE>', 

3664 u'\u200A': '<HAIR_SPACE>', 

3665 u'\u200B': '<ZERO_WIDTH_SPACE>', 

3666 u'\u202F': '<NNBSP>', 

3667 u'\u205F': '<MMSP>', 

3668 u'\u3000': '<IDEOGRAPHIC_SPACE>', 

3669 } 

3670 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0): 

3671 super(White, self).__init__() 

3672 self.matchWhite = ws 

3673 self.setWhitespaceChars("".join(c for c in self.whiteChars if c not in self.matchWhite)) 

3674 # ~ self.leaveWhitespace() 

3675 self.name = ("".join(White.whiteStrs[c] for c in self.matchWhite)) 

3676 self.mayReturnEmpty = True 

3677 self.errmsg = "Expected " + self.name 

3678 

3679 self.minLen = min 

3680 

3681 if max > 0: 

3682 self.maxLen = max 

3683 else: 

3684 self.maxLen = _MAX_INT 

3685 

3686 if exact > 0: 

3687 self.maxLen = exact 

3688 self.minLen = exact 

3689 

3690 def parseImpl(self, instring, loc, doActions=True): 

3691 if instring[loc] not in self.matchWhite: 

3692 raise ParseException(instring, loc, self.errmsg, self) 

3693 start = loc 

3694 loc += 1 

3695 maxloc = start + self.maxLen 

3696 maxloc = min(maxloc, len(instring)) 

3697 while loc < maxloc and instring[loc] in self.matchWhite: 

3698 loc += 1 

3699 

3700 if loc - start < self.minLen: 

3701 raise ParseException(instring, loc, self.errmsg, self) 

3702 

3703 return loc, instring[start:loc] 

3704 

3705 

3706class _PositionToken(Token): 

3707 def __init__(self): 

3708 super(_PositionToken, self).__init__() 

3709 self.name = self.__class__.__name__ 

3710 self.mayReturnEmpty = True 

3711 self.mayIndexError = False 

3712 

3713class GoToColumn(_PositionToken): 

3714 """Token to advance to a specific column of input text; useful for 

3715 tabular report scraping. 

3716 """ 

3717 def __init__(self, colno): 

3718 super(GoToColumn, self).__init__() 

3719 self.col = colno 

3720 

3721 def preParse(self, instring, loc): 

3722 if col(loc, instring) != self.col: 

3723 instrlen = len(instring) 

3724 if self.ignoreExprs: 

3725 loc = self._skipIgnorables(instring, loc) 

3726 while loc < instrlen and instring[loc].isspace() and col(loc, instring) != self.col: 

3727 loc += 1 

3728 return loc 

3729 

3730 def parseImpl(self, instring, loc, doActions=True): 

3731 thiscol = col(loc, instring) 

3732 if thiscol > self.col: 

3733 raise ParseException(instring, loc, "Text not in expected column", self) 

3734 newloc = loc + self.col - thiscol 

3735 ret = instring[loc: newloc] 

3736 return newloc, ret 

3737 

3738 

3739class LineStart(_PositionToken): 

3740 r"""Matches if current position is at the beginning of a line within 

3741 the parse string 

3742 

3743 Example:: 

3744 

3745 test = '''\ 

3746 AAA this line 

3747 AAA and this line 

3748 AAA but not this one 

3749 B AAA and definitely not this one 

3750 ''' 

3751 

3752 for t in (LineStart() + 'AAA' + restOfLine).searchString(test): 

3753 print(t) 

3754 

3755 prints:: 

3756 

3757 ['AAA', ' this line'] 

3758 ['AAA', ' and this line'] 

3759 

3760 """ 

3761 def __init__(self): 

3762 super(LineStart, self).__init__() 

3763 self.errmsg = "Expected start of line" 

3764 

3765 def parseImpl(self, instring, loc, doActions=True): 

3766 if col(loc, instring) == 1: 

3767 return loc, [] 

3768 raise ParseException(instring, loc, self.errmsg, self) 

3769 

3770class LineEnd(_PositionToken): 

3771 """Matches if current position is at the end of a line within the 

3772 parse string 

3773 """ 

3774 def __init__(self): 

3775 super(LineEnd, self).__init__() 

3776 self.setWhitespaceChars(ParserElement.DEFAULT_WHITE_CHARS.replace("\n", "")) 

3777 self.errmsg = "Expected end of line" 

3778 

3779 def parseImpl(self, instring, loc, doActions=True): 

3780 if loc < len(instring): 

3781 if instring[loc] == "\n": 

3782 return loc + 1, "\n" 

3783 else: 

3784 raise ParseException(instring, loc, self.errmsg, self) 

3785 elif loc == len(instring): 

3786 return loc + 1, [] 

3787 else: 

3788 raise ParseException(instring, loc, self.errmsg, self) 

3789 

3790class StringStart(_PositionToken): 

3791 """Matches if current position is at the beginning of the parse 

3792 string 

3793 """ 

3794 def __init__(self): 

3795 super(StringStart, self).__init__() 

3796 self.errmsg = "Expected start of text" 

3797 

3798 def parseImpl(self, instring, loc, doActions=True): 

3799 if loc != 0: 

3800 # see if entire string up to here is just whitespace and ignoreables 

3801 if loc != self.preParse(instring, 0): 

3802 raise ParseException(instring, loc, self.errmsg, self) 

3803 return loc, [] 

3804 

3805class StringEnd(_PositionToken): 

3806 """Matches if current position is at the end of the parse string 

3807 """ 

3808 def __init__(self): 

3809 super(StringEnd, self).__init__() 

3810 self.errmsg = "Expected end of text" 

3811 

3812 def parseImpl(self, instring, loc, doActions=True): 

3813 if loc < len(instring): 

3814 raise ParseException(instring, loc, self.errmsg, self) 

3815 elif loc == len(instring): 

3816 return loc + 1, [] 

3817 elif loc > len(instring): 

3818 return loc, [] 

3819 else: 

3820 raise ParseException(instring, loc, self.errmsg, self) 

3821 

3822class WordStart(_PositionToken): 

3823 """Matches if the current position is at the beginning of a Word, 

3824 and is not preceded by any character in a given set of 

3825 ``wordChars`` (default= ``printables``). To emulate the 

3826 ``\b`` behavior of regular expressions, use 

3827 ``WordStart(alphanums)``. ``WordStart`` will also match at 

3828 the beginning of the string being parsed, or at the beginning of 

3829 a line. 

3830 """ 

3831 def __init__(self, wordChars=printables): 

3832 super(WordStart, self).__init__() 

3833 self.wordChars = set(wordChars) 

3834 self.errmsg = "Not at the start of a word" 

3835 

3836 def parseImpl(self, instring, loc, doActions=True): 

3837 if loc != 0: 

3838 if (instring[loc - 1] in self.wordChars 

3839 or instring[loc] not in self.wordChars): 

3840 raise ParseException(instring, loc, self.errmsg, self) 

3841 return loc, [] 

3842 

3843class WordEnd(_PositionToken): 

3844 """Matches if the current position is at the end of a Word, and is 

3845 not followed by any character in a given set of ``wordChars`` 

3846 (default= ``printables``). To emulate the ``\b`` behavior of 

3847 regular expressions, use ``WordEnd(alphanums)``. ``WordEnd`` 

3848 will also match at the end of the string being parsed, or at the end 

3849 of a line. 

3850 """ 

3851 def __init__(self, wordChars=printables): 

3852 super(WordEnd, self).__init__() 

3853 self.wordChars = set(wordChars) 

3854 self.skipWhitespace = False 

3855 self.errmsg = "Not at the end of a word" 

3856 

3857 def parseImpl(self, instring, loc, doActions=True): 

3858 instrlen = len(instring) 

3859 if instrlen > 0 and loc < instrlen: 

3860 if (instring[loc] in self.wordChars or 

3861 instring[loc - 1] not in self.wordChars): 

3862 raise ParseException(instring, loc, self.errmsg, self) 

3863 return loc, [] 

3864 

3865 

3866class ParseExpression(ParserElement): 

3867 """Abstract subclass of ParserElement, for combining and 

3868 post-processing parsed tokens. 

3869 """ 

3870 def __init__(self, exprs, savelist=False): 

3871 super(ParseExpression, self).__init__(savelist) 

3872 if isinstance(exprs, _generatorType): 

3873 exprs = list(exprs) 

3874 

3875 if isinstance(exprs, basestring): 

3876 self.exprs = [self._literalStringClass(exprs)] 

3877 elif isinstance(exprs, ParserElement): 

3878 self.exprs = [exprs] 

3879 elif isinstance(exprs, Iterable): 

3880 exprs = list(exprs) 

3881 # if sequence of strings provided, wrap with Literal 

3882 if any(isinstance(expr, basestring) for expr in exprs): 

3883 exprs = (self._literalStringClass(e) if isinstance(e, basestring) else e for e in exprs) 

3884 self.exprs = list(exprs) 

3885 else: 

3886 try: 

3887 self.exprs = list(exprs) 

3888 except TypeError: 

3889 self.exprs = [exprs] 

3890 self.callPreparse = False 

3891 

3892 def append(self, other): 

3893 self.exprs.append(other) 

3894 self.strRepr = None 

3895 return self 

3896 

3897 def leaveWhitespace(self): 

3898 """Extends ``leaveWhitespace`` defined in base class, and also invokes ``leaveWhitespace`` on 

3899 all contained expressions.""" 

3900 self.skipWhitespace = False 

3901 self.exprs = [e.copy() for e in self.exprs] 

3902 for e in self.exprs: 

3903 e.leaveWhitespace() 

3904 return self 

3905 

3906 def ignore(self, other): 

3907 if isinstance(other, Suppress): 

3908 if other not in self.ignoreExprs: 

3909 super(ParseExpression, self).ignore(other) 

3910 for e in self.exprs: 

3911 e.ignore(self.ignoreExprs[-1]) 

3912 else: 

3913 super(ParseExpression, self).ignore(other) 

3914 for e in self.exprs: 

3915 e.ignore(self.ignoreExprs[-1]) 

3916 return self 

3917 

3918 def __str__(self): 

3919 try: 

3920 return super(ParseExpression, self).__str__() 

3921 except Exception: 

3922 pass 

3923 

3924 if self.strRepr is None: 

3925 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.exprs)) 

3926 return self.strRepr 

3927 

3928 def streamline(self): 

3929 super(ParseExpression, self).streamline() 

3930 

3931 for e in self.exprs: 

3932 e.streamline() 

3933 

3934 # collapse nested And's of the form And(And(And(a, b), c), d) to And(a, b, c, d) 

3935 # but only if there are no parse actions or resultsNames on the nested And's 

3936 # (likewise for Or's and MatchFirst's) 

3937 if len(self.exprs) == 2: 

3938 other = self.exprs[0] 

3939 if (isinstance(other, self.__class__) 

3940 and not other.parseAction 

3941 and other.resultsName is None 

3942 and not other.debug): 

3943 self.exprs = other.exprs[:] + [self.exprs[1]] 

3944 self.strRepr = None 

3945 self.mayReturnEmpty |= other.mayReturnEmpty 

3946 self.mayIndexError |= other.mayIndexError 

3947 

3948 other = self.exprs[-1] 

3949 if (isinstance(other, self.__class__) 

3950 and not other.parseAction 

3951 and other.resultsName is None 

3952 and not other.debug): 

3953 self.exprs = self.exprs[:-1] + other.exprs[:] 

3954 self.strRepr = None 

3955 self.mayReturnEmpty |= other.mayReturnEmpty 

3956 self.mayIndexError |= other.mayIndexError 

3957 

3958 self.errmsg = "Expected " + _ustr(self) 

3959 

3960 return self 

3961 

3962 def validate(self, validateTrace=None): 

3963 tmp = (validateTrace if validateTrace is not None else [])[:] + [self] 

3964 for e in self.exprs: 

3965 e.validate(tmp) 

3966 self.checkRecursion([]) 

3967 

3968 def copy(self): 

3969 ret = super(ParseExpression, self).copy() 

3970 ret.exprs = [e.copy() for e in self.exprs] 

3971 return ret 

3972 

3973 def _setResultsName(self, name, listAllMatches=False): 

3974 if __diag__.warn_ungrouped_named_tokens_in_collection: 

3975 for e in self.exprs: 

3976 if isinstance(e, ParserElement) and e.resultsName: 

3977 warnings.warn("{0}: setting results name {1!r} on {2} expression " 

3978 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", 

3979 name, 

3980 type(self).__name__, 

3981 e.resultsName), 

3982 stacklevel=3) 

3983 

3984 return super(ParseExpression, self)._setResultsName(name, listAllMatches) 

3985 

3986 

3987class And(ParseExpression): 

3988 """ 

3989 Requires all given :class:`ParseExpression` s to be found in the given order. 

3990 Expressions may be separated by whitespace. 

3991 May be constructed using the ``'+'`` operator. 

3992 May also be constructed using the ``'-'`` operator, which will 

3993 suppress backtracking. 

3994 

3995 Example:: 

3996 

3997 integer = Word(nums) 

3998 name_expr = OneOrMore(Word(alphas)) 

3999 

4000 expr = And([integer("id"), name_expr("name"), integer("age")]) 

4001 # more easily written as: 

4002 expr = integer("id") + name_expr("name") + integer("age") 

4003 """ 

4004 

4005 class _ErrorStop(Empty): 

4006 def __init__(self, *args, **kwargs): 

4007 super(And._ErrorStop, self).__init__(*args, **kwargs) 

4008 self.name = '-' 

4009 self.leaveWhitespace() 

4010 

4011 def __init__(self, exprs, savelist=True): 

4012 exprs = list(exprs) 

4013 if exprs and Ellipsis in exprs: 

4014 tmp = [] 

4015 for i, expr in enumerate(exprs): 

4016 if expr is Ellipsis: 

4017 if i < len(exprs) - 1: 

4018 skipto_arg = (Empty() + exprs[i + 1]).exprs[-1] 

4019 tmp.append(SkipTo(skipto_arg)("_skipped*")) 

4020 else: 

4021 raise Exception("cannot construct And with sequence ending in ...") 

4022 else: 

4023 tmp.append(expr) 

4024 exprs[:] = tmp 

4025 super(And, self).__init__(exprs, savelist) 

4026 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4027 self.setWhitespaceChars(self.exprs[0].whiteChars) 

4028 self.skipWhitespace = self.exprs[0].skipWhitespace 

4029 self.callPreparse = True 

4030 

4031 def streamline(self): 

4032 # collapse any _PendingSkip's 

4033 if self.exprs: 

4034 if any(isinstance(e, ParseExpression) and e.exprs and isinstance(e.exprs[-1], _PendingSkip) 

4035 for e in self.exprs[:-1]): 

4036 for i, e in enumerate(self.exprs[:-1]): 

4037 if e is None: 

4038 continue 

4039 if (isinstance(e, ParseExpression) 

4040 and e.exprs and isinstance(e.exprs[-1], _PendingSkip)): 

4041 e.exprs[-1] = e.exprs[-1] + self.exprs[i + 1] 

4042 self.exprs[i + 1] = None 

4043 self.exprs = [e for e in self.exprs if e is not None] 

4044 

4045 super(And, self).streamline() 

4046 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4047 return self 

4048 

4049 def parseImpl(self, instring, loc, doActions=True): 

4050 # pass False as last arg to _parse for first element, since we already 

4051 # pre-parsed the string as part of our And pre-parsing 

4052 loc, resultlist = self.exprs[0]._parse(instring, loc, doActions, callPreParse=False) 

4053 errorStop = False 

4054 for e in self.exprs[1:]: 

4055 if isinstance(e, And._ErrorStop): 

4056 errorStop = True 

4057 continue 

4058 if errorStop: 

4059 try: 

4060 loc, exprtokens = e._parse(instring, loc, doActions) 

4061 except ParseSyntaxException: 

4062 raise 

4063 except ParseBaseException as pe: 

4064 pe.__traceback__ = None 

4065 raise ParseSyntaxException._from_exception(pe) 

4066 except IndexError: 

4067 raise ParseSyntaxException(instring, len(instring), self.errmsg, self) 

4068 else: 

4069 loc, exprtokens = e._parse(instring, loc, doActions) 

4070 if exprtokens or exprtokens.haskeys(): 

4071 resultlist += exprtokens 

4072 return loc, resultlist 

4073 

4074 def __iadd__(self, other): 

4075 if isinstance(other, basestring): 

4076 other = self._literalStringClass(other) 

4077 return self.append(other) # And([self, other]) 

4078 

4079 def checkRecursion(self, parseElementList): 

4080 subRecCheckList = parseElementList[:] + [self] 

4081 for e in self.exprs: 

4082 e.checkRecursion(subRecCheckList) 

4083 if not e.mayReturnEmpty: 

4084 break 

4085 

4086 def __str__(self): 

4087 if hasattr(self, "name"): 

4088 return self.name 

4089 

4090 if self.strRepr is None: 

4091 self.strRepr = "{" + " ".join(_ustr(e) for e in self.exprs) + "}" 

4092 

4093 return self.strRepr 

4094 

4095 

4096class Or(ParseExpression): 

4097 """Requires that at least one :class:`ParseExpression` is found. If 

4098 two expressions match, the expression that matches the longest 

4099 string will be used. May be constructed using the ``'^'`` 

4100 operator. 

4101 

4102 Example:: 

4103 

4104 # construct Or using '^' operator 

4105 

4106 number = Word(nums) ^ Combine(Word(nums) + '.' + Word(nums)) 

4107 print(number.searchString("123 3.1416 789")) 

4108 

4109 prints:: 

4110 

4111 [['123'], ['3.1416'], ['789']] 

4112 """ 

4113 def __init__(self, exprs, savelist=False): 

4114 super(Or, self).__init__(exprs, savelist) 

4115 if self.exprs: 

4116 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4117 else: 

4118 self.mayReturnEmpty = True 

4119 

4120 def streamline(self): 

4121 super(Or, self).streamline() 

4122 if __compat__.collect_all_And_tokens: 

4123 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4124 return self 

4125 

4126 def parseImpl(self, instring, loc, doActions=True): 

4127 maxExcLoc = -1 

4128 maxException = None 

4129 matches = [] 

4130 for e in self.exprs: 

4131 try: 

4132 loc2 = e.tryParse(instring, loc) 

4133 except ParseException as err: 

4134 err.__traceback__ = None 

4135 if err.loc > maxExcLoc: 

4136 maxException = err 

4137 maxExcLoc = err.loc 

4138 except IndexError: 

4139 if len(instring) > maxExcLoc: 

4140 maxException = ParseException(instring, len(instring), e.errmsg, self) 

4141 maxExcLoc = len(instring) 

4142 else: 

4143 # save match among all matches, to retry longest to shortest 

4144 matches.append((loc2, e)) 

4145 

4146 if matches: 

4147 # re-evaluate all matches in descending order of length of match, in case attached actions 

4148 # might change whether or how much they match of the input. 

4149 matches.sort(key=itemgetter(0), reverse=True) 

4150 

4151 if not doActions: 

4152 # no further conditions or parse actions to change the selection of 

4153 # alternative, so the first match will be the best match 

4154 best_expr = matches[0][1] 

4155 return best_expr._parse(instring, loc, doActions) 

4156 

4157 longest = -1, None 

4158 for loc1, expr1 in matches: 

4159 if loc1 <= longest[0]: 

4160 # already have a longer match than this one will deliver, we are done 

4161 return longest 

4162 

4163 try: 

4164 loc2, toks = expr1._parse(instring, loc, doActions) 

4165 except ParseException as err: 

4166 err.__traceback__ = None 

4167 if err.loc > maxExcLoc: 

4168 maxException = err 

4169 maxExcLoc = err.loc 

4170 else: 

4171 if loc2 >= loc1: 

4172 return loc2, toks 

4173 # didn't match as much as before 

4174 elif loc2 > longest[0]: 

4175 longest = loc2, toks 

4176 

4177 if longest != (-1, None): 

4178 return longest 

4179 

4180 if maxException is not None: 

4181 maxException.msg = self.errmsg 

4182 raise maxException 

4183 else: 

4184 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4185 

4186 

4187 def __ixor__(self, other): 

4188 if isinstance(other, basestring): 

4189 other = self._literalStringClass(other) 

4190 return self.append(other) # Or([self, other]) 

4191 

4192 def __str__(self): 

4193 if hasattr(self, "name"): 

4194 return self.name 

4195 

4196 if self.strRepr is None: 

4197 self.strRepr = "{" + " ^ ".join(_ustr(e) for e in self.exprs) + "}" 

4198 

4199 return self.strRepr 

4200 

4201 def checkRecursion(self, parseElementList): 

4202 subRecCheckList = parseElementList[:] + [self] 

4203 for e in self.exprs: 

4204 e.checkRecursion(subRecCheckList) 

4205 

4206 def _setResultsName(self, name, listAllMatches=False): 

4207 if (not __compat__.collect_all_And_tokens 

4208 and __diag__.warn_multiple_tokens_in_named_alternation): 

4209 if any(isinstance(e, And) for e in self.exprs): 

4210 warnings.warn("{0}: setting results name {1!r} on {2} expression " 

4211 "may only return a single token for an And alternative, " 

4212 "in future will return the full list of tokens".format( 

4213 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), 

4214 stacklevel=3) 

4215 

4216 return super(Or, self)._setResultsName(name, listAllMatches) 

4217 

4218 

4219class MatchFirst(ParseExpression): 

4220 """Requires that at least one :class:`ParseExpression` is found. If 

4221 two expressions match, the first one listed is the one that will 

4222 match. May be constructed using the ``'|'`` operator. 

4223 

4224 Example:: 

4225 

4226 # construct MatchFirst using '|' operator 

4227 

4228 # watch the order of expressions to match 

4229 number = Word(nums) | Combine(Word(nums) + '.' + Word(nums)) 

4230 print(number.searchString("123 3.1416 789")) # Fail! -> [['123'], ['3'], ['1416'], ['789']] 

4231 

4232 # put more selective expression first 

4233 number = Combine(Word(nums) + '.' + Word(nums)) | Word(nums) 

4234 print(number.searchString("123 3.1416 789")) # Better -> [['123'], ['3.1416'], ['789']] 

4235 """ 

4236 def __init__(self, exprs, savelist=False): 

4237 super(MatchFirst, self).__init__(exprs, savelist) 

4238 if self.exprs: 

4239 self.mayReturnEmpty = any(e.mayReturnEmpty for e in self.exprs) 

4240 else: 

4241 self.mayReturnEmpty = True 

4242 

4243 def streamline(self): 

4244 super(MatchFirst, self).streamline() 

4245 if __compat__.collect_all_And_tokens: 

4246 self.saveAsList = any(e.saveAsList for e in self.exprs) 

4247 return self 

4248 

4249 def parseImpl(self, instring, loc, doActions=True): 

4250 maxExcLoc = -1 

4251 maxException = None 

4252 for e in self.exprs: 

4253 try: 

4254 ret = e._parse(instring, loc, doActions) 

4255 return ret 

4256 except ParseException as err: 

4257 if err.loc > maxExcLoc: 

4258 maxException = err 

4259 maxExcLoc = err.loc 

4260 except IndexError: 

4261 if len(instring) > maxExcLoc: 

4262 maxException = ParseException(instring, len(instring), e.errmsg, self) 

4263 maxExcLoc = len(instring) 

4264 

4265 # only got here if no expression matched, raise exception for match that made it the furthest 

4266 else: 

4267 if maxException is not None: 

4268 maxException.msg = self.errmsg 

4269 raise maxException 

4270 else: 

4271 raise ParseException(instring, loc, "no defined alternatives to match", self) 

4272 

4273 def __ior__(self, other): 

4274 if isinstance(other, basestring): 

4275 other = self._literalStringClass(other) 

4276 return self.append(other) # MatchFirst([self, other]) 

4277 

4278 def __str__(self): 

4279 if hasattr(self, "name"): 

4280 return self.name 

4281 

4282 if self.strRepr is None: 

4283 self.strRepr = "{" + " | ".join(_ustr(e) for e in self.exprs) + "}" 

4284 

4285 return self.strRepr 

4286 

4287 def checkRecursion(self, parseElementList): 

4288 subRecCheckList = parseElementList[:] + [self] 

4289 for e in self.exprs: 

4290 e.checkRecursion(subRecCheckList) 

4291 

4292 def _setResultsName(self, name, listAllMatches=False): 

4293 if (not __compat__.collect_all_And_tokens 

4294 and __diag__.warn_multiple_tokens_in_named_alternation): 

4295 if any(isinstance(e, And) for e in self.exprs): 

4296 warnings.warn("{0}: setting results name {1!r} on {2} expression " 

4297 "may only return a single token for an And alternative, " 

4298 "in future will return the full list of tokens".format( 

4299 "warn_multiple_tokens_in_named_alternation", name, type(self).__name__), 

4300 stacklevel=3) 

4301 

4302 return super(MatchFirst, self)._setResultsName(name, listAllMatches) 

4303 

4304 

4305class Each(ParseExpression): 

4306 """Requires all given :class:`ParseExpression` s to be found, but in 

4307 any order. Expressions may be separated by whitespace. 

4308 

4309 May be constructed using the ``'&'`` operator. 

4310 

4311 Example:: 

4312 

4313 color = oneOf("RED ORANGE YELLOW GREEN BLUE PURPLE BLACK WHITE BROWN") 

4314 shape_type = oneOf("SQUARE CIRCLE TRIANGLE STAR HEXAGON OCTAGON") 

4315 integer = Word(nums) 

4316 shape_attr = "shape:" + shape_type("shape") 

4317 posn_attr = "posn:" + Group(integer("x") + ',' + integer("y"))("posn") 

4318 color_attr = "color:" + color("color") 

4319 size_attr = "size:" + integer("size") 

4320 

4321 # use Each (using operator '&') to accept attributes in any order 

4322 # (shape and posn are required, color and size are optional) 

4323 shape_spec = shape_attr & posn_attr & Optional(color_attr) & Optional(size_attr) 

4324 

4325 shape_spec.runTests(''' 

4326 shape: SQUARE color: BLACK posn: 100, 120 

4327 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4328 color:GREEN size:20 shape:TRIANGLE posn:20,40 

4329 ''' 

4330 ) 

4331 

4332 prints:: 

4333 

4334 shape: SQUARE color: BLACK posn: 100, 120 

4335 ['shape:', 'SQUARE', 'color:', 'BLACK', 'posn:', ['100', ',', '120']] 

4336 - color: BLACK 

4337 - posn: ['100', ',', '120'] 

4338 - x: 100 

4339 - y: 120 

4340 - shape: SQUARE 

4341 

4342 

4343 shape: CIRCLE size: 50 color: BLUE posn: 50,80 

4344 ['shape:', 'CIRCLE', 'size:', '50', 'color:', 'BLUE', 'posn:', ['50', ',', '80']] 

4345 - color: BLUE 

4346 - posn: ['50', ',', '80'] 

4347 - x: 50 

4348 - y: 80 

4349 - shape: CIRCLE 

4350 - size: 50 

4351 

4352 

4353 color: GREEN size: 20 shape: TRIANGLE posn: 20,40 

4354 ['color:', 'GREEN', 'size:', '20', 'shape:', 'TRIANGLE', 'posn:', ['20', ',', '40']] 

4355 - color: GREEN 

4356 - posn: ['20', ',', '40'] 

4357 - x: 20 

4358 - y: 40 

4359 - shape: TRIANGLE 

4360 - size: 20 

4361 """ 

4362 def __init__(self, exprs, savelist=True): 

4363 super(Each, self).__init__(exprs, savelist) 

4364 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4365 self.skipWhitespace = True 

4366 self.initExprGroups = True 

4367 self.saveAsList = True 

4368 

4369 def streamline(self): 

4370 super(Each, self).streamline() 

4371 self.mayReturnEmpty = all(e.mayReturnEmpty for e in self.exprs) 

4372 return self 

4373 

4374 def parseImpl(self, instring, loc, doActions=True): 

4375 if self.initExprGroups: 

4376 self.opt1map = dict((id(e.expr), e) for e in self.exprs if isinstance(e, Optional)) 

4377 opt1 = [e.expr for e in self.exprs if isinstance(e, Optional)] 

4378 opt2 = [e for e in self.exprs if e.mayReturnEmpty and not isinstance(e, (Optional, Regex))] 

4379 self.optionals = opt1 + opt2 

4380 self.multioptionals = [e.expr for e in self.exprs if isinstance(e, ZeroOrMore)] 

4381 self.multirequired = [e.expr for e in self.exprs if isinstance(e, OneOrMore)] 

4382 self.required = [e for e in self.exprs if not isinstance(e, (Optional, ZeroOrMore, OneOrMore))] 

4383 self.required += self.multirequired 

4384 self.initExprGroups = False 

4385 tmpLoc = loc 

4386 tmpReqd = self.required[:] 

4387 tmpOpt = self.optionals[:] 

4388 matchOrder = [] 

4389 

4390 keepMatching = True 

4391 while keepMatching: 

4392 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 

4393 failed = [] 

4394 for e in tmpExprs: 

4395 try: 

4396 tmpLoc = e.tryParse(instring, tmpLoc) 

4397 except ParseException: 

4398 failed.append(e) 

4399 else: 

4400 matchOrder.append(self.opt1map.get(id(e), e)) 

4401 if e in tmpReqd: 

4402 tmpReqd.remove(e) 

4403 elif e in tmpOpt: 

4404 tmpOpt.remove(e) 

4405 if len(failed) == len(tmpExprs): 

4406 keepMatching = False 

4407 

4408 if tmpReqd: 

4409 missing = ", ".join(_ustr(e) for e in tmpReqd) 

4410 raise ParseException(instring, loc, "Missing one or more required elements (%s)" % missing) 

4411 

4412 # add any unmatched Optionals, in case they have default values defined 

4413 matchOrder += [e for e in self.exprs if isinstance(e, Optional) and e.expr in tmpOpt] 

4414 

4415 resultlist = [] 

4416 for e in matchOrder: 

4417 loc, results = e._parse(instring, loc, doActions) 

4418 resultlist.append(results) 

4419 

4420 finalResults = sum(resultlist, ParseResults([])) 

4421 return loc, finalResults 

4422 

4423 def __str__(self): 

4424 if hasattr(self, "name"): 

4425 return self.name 

4426 

4427 if self.strRepr is None: 

4428 self.strRepr = "{" + " & ".join(_ustr(e) for e in self.exprs) + "}" 

4429 

4430 return self.strRepr 

4431 

4432 def checkRecursion(self, parseElementList): 

4433 subRecCheckList = parseElementList[:] + [self] 

4434 for e in self.exprs: 

4435 e.checkRecursion(subRecCheckList) 

4436 

4437 

4438class ParseElementEnhance(ParserElement): 

4439 """Abstract subclass of :class:`ParserElement`, for combining and 

4440 post-processing parsed tokens. 

4441 """ 

4442 def __init__(self, expr, savelist=False): 

4443 super(ParseElementEnhance, self).__init__(savelist) 

4444 if isinstance(expr, basestring): 

4445 if issubclass(self._literalStringClass, Token): 

4446 expr = self._literalStringClass(expr) 

4447 else: 

4448 expr = self._literalStringClass(Literal(expr)) 

4449 self.expr = expr 

4450 self.strRepr = None 

4451 if expr is not None: 

4452 self.mayIndexError = expr.mayIndexError 

4453 self.mayReturnEmpty = expr.mayReturnEmpty 

4454 self.setWhitespaceChars(expr.whiteChars) 

4455 self.skipWhitespace = expr.skipWhitespace 

4456 self.saveAsList = expr.saveAsList 

4457 self.callPreparse = expr.callPreparse 

4458 self.ignoreExprs.extend(expr.ignoreExprs) 

4459 

4460 def parseImpl(self, instring, loc, doActions=True): 

4461 if self.expr is not None: 

4462 return self.expr._parse(instring, loc, doActions, callPreParse=False) 

4463 else: 

4464 raise ParseException("", loc, self.errmsg, self) 

4465 

4466 def leaveWhitespace(self): 

4467 self.skipWhitespace = False 

4468 self.expr = self.expr.copy() 

4469 if self.expr is not None: 

4470 self.expr.leaveWhitespace() 

4471 return self 

4472 

4473 def ignore(self, other): 

4474 if isinstance(other, Suppress): 

4475 if other not in self.ignoreExprs: 

4476 super(ParseElementEnhance, self).ignore(other) 

4477 if self.expr is not None: 

4478 self.expr.ignore(self.ignoreExprs[-1]) 

4479 else: 

4480 super(ParseElementEnhance, self).ignore(other) 

4481 if self.expr is not None: 

4482 self.expr.ignore(self.ignoreExprs[-1]) 

4483 return self 

4484 

4485 def streamline(self): 

4486 super(ParseElementEnhance, self).streamline() 

4487 if self.expr is not None: 

4488 self.expr.streamline() 

4489 return self 

4490 

4491 def checkRecursion(self, parseElementList): 

4492 if self in parseElementList: 

4493 raise RecursiveGrammarException(parseElementList + [self]) 

4494 subRecCheckList = parseElementList[:] + [self] 

4495 if self.expr is not None: 

4496 self.expr.checkRecursion(subRecCheckList) 

4497 

4498 def validate(self, validateTrace=None): 

4499 if validateTrace is None: 

4500 validateTrace = [] 

4501 tmp = validateTrace[:] + [self] 

4502 if self.expr is not None: 

4503 self.expr.validate(tmp) 

4504 self.checkRecursion([]) 

4505 

4506 def __str__(self): 

4507 try: 

4508 return super(ParseElementEnhance, self).__str__() 

4509 except Exception: 

4510 pass 

4511 

4512 if self.strRepr is None and self.expr is not None: 

4513 self.strRepr = "%s:(%s)" % (self.__class__.__name__, _ustr(self.expr)) 

4514 return self.strRepr 

4515 

4516 

4517class FollowedBy(ParseElementEnhance): 

4518 """Lookahead matching of the given parse expression. 

4519 ``FollowedBy`` does *not* advance the parsing position within 

4520 the input string, it only verifies that the specified parse 

4521 expression matches at the current position. ``FollowedBy`` 

4522 always returns a null token list. If any results names are defined 

4523 in the lookahead expression, those *will* be returned for access by 

4524 name. 

4525 

4526 Example:: 

4527 

4528 # use FollowedBy to match a label only if it is followed by a ':' 

4529 data_word = Word(alphas) 

4530 label = data_word + FollowedBy(':') 

4531 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

4532 

4533 OneOrMore(attr_expr).parseString("shape: SQUARE color: BLACK posn: upper left").pprint() 

4534 

4535 prints:: 

4536 

4537 [['shape', 'SQUARE'], ['color', 'BLACK'], ['posn', 'upper left']] 

4538 """ 

4539 def __init__(self, expr): 

4540 super(FollowedBy, self).__init__(expr) 

4541 self.mayReturnEmpty = True 

4542 

4543 def parseImpl(self, instring, loc, doActions=True): 

4544 # by using self._expr.parse and deleting the contents of the returned ParseResults list 

4545 # we keep any named results that were defined in the FollowedBy expression 

4546 _, ret = self.expr._parse(instring, loc, doActions=doActions) 

4547 del ret[:] 

4548 

4549 return loc, ret 

4550 

4551 

4552class PrecededBy(ParseElementEnhance): 

4553 """Lookbehind matching of the given parse expression. 

4554 ``PrecededBy`` does not advance the parsing position within the 

4555 input string, it only verifies that the specified parse expression 

4556 matches prior to the current position. ``PrecededBy`` always 

4557 returns a null token list, but if a results name is defined on the 

4558 given expression, it is returned. 

4559 

4560 Parameters: 

4561 

4562 - expr - expression that must match prior to the current parse 

4563 location 

4564 - retreat - (default= ``None``) - (int) maximum number of characters 

4565 to lookbehind prior to the current parse location 

4566 

4567 If the lookbehind expression is a string, Literal, Keyword, or 

4568 a Word or CharsNotIn with a specified exact or maximum length, then 

4569 the retreat parameter is not required. Otherwise, retreat must be 

4570 specified to give a maximum number of characters to look back from 

4571 the current parse position for a lookbehind match. 

4572 

4573 Example:: 

4574 

4575 # VB-style variable names with type prefixes 

4576 int_var = PrecededBy("#") + pyparsing_common.identifier 

4577 str_var = PrecededBy("$") + pyparsing_common.identifier 

4578 

4579 """ 

4580 def __init__(self, expr, retreat=None): 

4581 super(PrecededBy, self).__init__(expr) 

4582 self.expr = self.expr().leaveWhitespace() 

4583 self.mayReturnEmpty = True 

4584 self.mayIndexError = False 

4585 self.exact = False 

4586 if isinstance(expr, str): 

4587 retreat = len(expr) 

4588 self.exact = True 

4589 elif isinstance(expr, (Literal, Keyword)): 

4590 retreat = expr.matchLen 

4591 self.exact = True 

4592 elif isinstance(expr, (Word, CharsNotIn)) and expr.maxLen != _MAX_INT: 

4593 retreat = expr.maxLen 

4594 self.exact = True 

4595 elif isinstance(expr, _PositionToken): 

4596 retreat = 0 

4597 self.exact = True 

4598 self.retreat = retreat 

4599 self.errmsg = "not preceded by " + str(expr) 

4600 self.skipWhitespace = False 

4601 self.parseAction.append(lambda s, l, t: t.__delitem__(slice(None, None))) 

4602 

4603 def parseImpl(self, instring, loc=0, doActions=True): 

4604 if self.exact: 

4605 if loc < self.retreat: 

4606 raise ParseException(instring, loc, self.errmsg) 

4607 start = loc - self.retreat 

4608 _, ret = self.expr._parse(instring, start) 

4609 else: 

4610 # retreat specified a maximum lookbehind window, iterate 

4611 test_expr = self.expr + StringEnd() 

4612 instring_slice = instring[max(0, loc - self.retreat):loc] 

4613 last_expr = ParseException(instring, loc, self.errmsg) 

4614 for offset in range(1, min(loc, self.retreat + 1)+1): 

4615 try: 

4616 # print('trying', offset, instring_slice, repr(instring_slice[loc - offset:])) 

4617 _, ret = test_expr._parse(instring_slice, len(instring_slice) - offset) 

4618 except ParseBaseException as pbe: 

4619 last_expr = pbe 

4620 else: 

4621 break 

4622 else: 

4623 raise last_expr 

4624 return loc, ret 

4625 

4626 

4627class NotAny(ParseElementEnhance): 

4628 """Lookahead to disallow matching with the given parse expression. 

4629 ``NotAny`` does *not* advance the parsing position within the 

4630 input string, it only verifies that the specified parse expression 

4631 does *not* match at the current position. Also, ``NotAny`` does 

4632 *not* skip over leading whitespace. ``NotAny`` always returns 

4633 a null token list. May be constructed using the '~' operator. 

4634 

4635 Example:: 

4636 

4637 AND, OR, NOT = map(CaselessKeyword, "AND OR NOT".split()) 

4638 

4639 # take care not to mistake keywords for identifiers 

4640 ident = ~(AND | OR | NOT) + Word(alphas) 

4641 boolean_term = Optional(NOT) + ident 

4642 

4643 # very crude boolean expression - to support parenthesis groups and 

4644 # operation hierarchy, use infixNotation 

4645 boolean_expr = boolean_term + ZeroOrMore((AND | OR) + boolean_term) 

4646 

4647 # integers that are followed by "." are actually floats 

4648 integer = Word(nums) + ~Char(".") 

4649 """ 

4650 def __init__(self, expr): 

4651 super(NotAny, self).__init__(expr) 

4652 # ~ self.leaveWhitespace() 

4653 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 

4654 self.mayReturnEmpty = True 

4655 self.errmsg = "Found unwanted token, " + _ustr(self.expr) 

4656 

4657 def parseImpl(self, instring, loc, doActions=True): 

4658 if self.expr.canParseNext(instring, loc): 

4659 raise ParseException(instring, loc, self.errmsg, self) 

4660 return loc, [] 

4661 

4662 def __str__(self): 

4663 if hasattr(self, "name"): 

4664 return self.name 

4665 

4666 if self.strRepr is None: 

4667 self.strRepr = "~{" + _ustr(self.expr) + "}" 

4668 

4669 return self.strRepr 

4670 

4671class _MultipleMatch(ParseElementEnhance): 

4672 def __init__(self, expr, stopOn=None): 

4673 super(_MultipleMatch, self).__init__(expr) 

4674 self.saveAsList = True 

4675 ender = stopOn 

4676 if isinstance(ender, basestring): 

4677 ender = self._literalStringClass(ender) 

4678 self.stopOn(ender) 

4679 

4680 def stopOn(self, ender): 

4681 if isinstance(ender, basestring): 

4682 ender = self._literalStringClass(ender) 

4683 self.not_ender = ~ender if ender is not None else None 

4684 return self 

4685 

4686 def parseImpl(self, instring, loc, doActions=True): 

4687 self_expr_parse = self.expr._parse 

4688 self_skip_ignorables = self._skipIgnorables 

4689 check_ender = self.not_ender is not None 

4690 if check_ender: 

4691 try_not_ender = self.not_ender.tryParse 

4692 

4693 # must be at least one (but first see if we are the stopOn sentinel; 

4694 # if so, fail) 

4695 if check_ender: 

4696 try_not_ender(instring, loc) 

4697 loc, tokens = self_expr_parse(instring, loc, doActions, callPreParse=False) 

4698 try: 

4699 hasIgnoreExprs = (not not self.ignoreExprs) 

4700 while 1: 

4701 if check_ender: 

4702 try_not_ender(instring, loc) 

4703 if hasIgnoreExprs: 

4704 preloc = self_skip_ignorables(instring, loc) 

4705 else: 

4706 preloc = loc 

4707 loc, tmptokens = self_expr_parse(instring, preloc, doActions) 

4708 if tmptokens or tmptokens.haskeys(): 

4709 tokens += tmptokens 

4710 except (ParseException, IndexError): 

4711 pass 

4712 

4713 return loc, tokens 

4714 

4715 def _setResultsName(self, name, listAllMatches=False): 

4716 if __diag__.warn_ungrouped_named_tokens_in_collection: 

4717 for e in [self.expr] + getattr(self.expr, 'exprs', []): 

4718 if isinstance(e, ParserElement) and e.resultsName: 

4719 warnings.warn("{0}: setting results name {1!r} on {2} expression " 

4720 "collides with {3!r} on contained expression".format("warn_ungrouped_named_tokens_in_collection", 

4721 name, 

4722 type(self).__name__, 

4723 e.resultsName), 

4724 stacklevel=3) 

4725 

4726 return super(_MultipleMatch, self)._setResultsName(name, listAllMatches) 

4727 

4728 

4729class OneOrMore(_MultipleMatch): 

4730 """Repetition of one or more of the given expression. 

4731 

4732 Parameters: 

4733 - expr - expression that must match one or more times 

4734 - stopOn - (default= ``None``) - expression for a terminating sentinel 

4735 (only required if the sentinel would ordinarily match the repetition 

4736 expression) 

4737 

4738 Example:: 

4739 

4740 data_word = Word(alphas) 

4741 label = data_word + FollowedBy(':') 

4742 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 

4743 

4744 text = "shape: SQUARE posn: upper left color: BLACK" 

4745 OneOrMore(attr_expr).parseString(text).pprint() # Fail! read 'color' as data instead of next label -> [['shape', 'SQUARE color']] 

4746 

4747 # use stopOn attribute for OneOrMore to avoid reading label string as part of the data 

4748 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

4749 OneOrMore(attr_expr).parseString(text).pprint() # Better -> [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'BLACK']] 

4750 

4751 # could also be written as 

4752 (attr_expr * (1,)).parseString(text).pprint() 

4753 """ 

4754 

4755 def __str__(self): 

4756 if hasattr(self, "name"): 

4757 return self.name 

4758 

4759 if self.strRepr is None: 

4760 self.strRepr = "{" + _ustr(self.expr) + "}..." 

4761 

4762 return self.strRepr 

4763 

4764class ZeroOrMore(_MultipleMatch): 

4765 """Optional repetition of zero or more of the given expression. 

4766 

4767 Parameters: 

4768 - expr - expression that must match zero or more times 

4769 - stopOn - (default= ``None``) - expression for a terminating sentinel 

4770 (only required if the sentinel would ordinarily match the repetition 

4771 expression) 

4772 

4773 Example: similar to :class:`OneOrMore` 

4774 """ 

4775 def __init__(self, expr, stopOn=None): 

4776 super(ZeroOrMore, self).__init__(expr, stopOn=stopOn) 

4777 self.mayReturnEmpty = True 

4778 

4779 def parseImpl(self, instring, loc, doActions=True): 

4780 try: 

4781 return super(ZeroOrMore, self).parseImpl(instring, loc, doActions) 

4782 except (ParseException, IndexError): 

4783 return loc, [] 

4784 

4785 def __str__(self): 

4786 if hasattr(self, "name"): 

4787 return self.name 

4788 

4789 if self.strRepr is None: 

4790 self.strRepr = "[" + _ustr(self.expr) + "]..." 

4791 

4792 return self.strRepr 

4793 

4794 

4795class _NullToken(object): 

4796 def __bool__(self): 

4797 return False 

4798 __nonzero__ = __bool__ 

4799 def __str__(self): 

4800 return "" 

4801 

4802class Optional(ParseElementEnhance): 

4803 """Optional matching of the given expression. 

4804 

4805 Parameters: 

4806 - expr - expression that must match zero or more times 

4807 - default (optional) - value to be returned if the optional expression is not found. 

4808 

4809 Example:: 

4810 

4811 # US postal code can be a 5-digit zip, plus optional 4-digit qualifier 

4812 zip = Combine(Word(nums, exact=5) + Optional('-' + Word(nums, exact=4))) 

4813 zip.runTests(''' 

4814 # traditional ZIP code 

4815 12345 

4816 

4817 # ZIP+4 form 

4818 12101-0001 

4819 

4820 # invalid ZIP 

4821 98765- 

4822 ''') 

4823 

4824 prints:: 

4825 

4826 # traditional ZIP code 

4827 12345 

4828 ['12345'] 

4829 

4830 # ZIP+4 form 

4831 12101-0001 

4832 ['12101-0001'] 

4833 

4834 # invalid ZIP 

4835 98765- 

4836 ^ 

4837 FAIL: Expected end of text (at char 5), (line:1, col:6) 

4838 """ 

4839 __optionalNotMatched = _NullToken() 

4840 

4841 def __init__(self, expr, default=__optionalNotMatched): 

4842 super(Optional, self).__init__(expr, savelist=False) 

4843 self.saveAsList = self.expr.saveAsList 

4844 self.defaultValue = default 

4845 self.mayReturnEmpty = True 

4846 

4847 def parseImpl(self, instring, loc, doActions=True): 

4848 try: 

4849 loc, tokens = self.expr._parse(instring, loc, doActions, callPreParse=False) 

4850 except (ParseException, IndexError): 

4851 if self.defaultValue is not self.__optionalNotMatched: 

4852 if self.expr.resultsName: 

4853 tokens = ParseResults([self.defaultValue]) 

4854 tokens[self.expr.resultsName] = self.defaultValue 

4855 else: 

4856 tokens = [self.defaultValue] 

4857 else: 

4858 tokens = [] 

4859 return loc, tokens 

4860 

4861 def __str__(self): 

4862 if hasattr(self, "name"): 

4863 return self.name 

4864 

4865 if self.strRepr is None: 

4866 self.strRepr = "[" + _ustr(self.expr) + "]" 

4867 

4868 return self.strRepr 

4869 

4870class SkipTo(ParseElementEnhance): 

4871 """Token for skipping over all undefined text until the matched 

4872 expression is found. 

4873 

4874 Parameters: 

4875 - expr - target expression marking the end of the data to be skipped 

4876 - include - (default= ``False``) if True, the target expression is also parsed 

4877 (the skipped text and target expression are returned as a 2-element list). 

4878 - ignore - (default= ``None``) used to define grammars (typically quoted strings and 

4879 comments) that might contain false matches to the target expression 

4880 - failOn - (default= ``None``) define expressions that are not allowed to be 

4881 included in the skipped test; if found before the target expression is found, 

4882 the SkipTo is not a match 

4883 

4884 Example:: 

4885 

4886 report = ''' 

4887 Outstanding Issues Report - 1 Jan 2000 

4888 

4889 # | Severity | Description | Days Open 

4890 -----+----------+-------------------------------------------+----------- 

4891 101 | Critical | Intermittent system crash | 6 

4892 94 | Cosmetic | Spelling error on Login ('log|n') | 14 

4893 79 | Minor | System slow when running too many reports | 47 

4894 ''' 

4895 integer = Word(nums) 

4896 SEP = Suppress('|') 

4897 # use SkipTo to simply match everything up until the next SEP 

4898 # - ignore quoted strings, so that a '|' character inside a quoted string does not match 

4899 # - parse action will call token.strip() for each matched token, i.e., the description body 

4900 string_data = SkipTo(SEP, ignore=quotedString) 

4901 string_data.setParseAction(tokenMap(str.strip)) 

4902 ticket_expr = (integer("issue_num") + SEP 

4903 + string_data("sev") + SEP 

4904 + string_data("desc") + SEP 

4905 + integer("days_open")) 

4906 

4907 for tkt in ticket_expr.searchString(report): 

4908 print tkt.dump() 

4909 

4910 prints:: 

4911 

4912 ['101', 'Critical', 'Intermittent system crash', '6'] 

4913 - days_open: 6 

4914 - desc: Intermittent system crash 

4915 - issue_num: 101 

4916 - sev: Critical 

4917 ['94', 'Cosmetic', "Spelling error on Login ('log|n')", '14'] 

4918 - days_open: 14 

4919 - desc: Spelling error on Login ('log|n') 

4920 - issue_num: 94 

4921 - sev: Cosmetic 

4922 ['79', 'Minor', 'System slow when running too many reports', '47'] 

4923 - days_open: 47 

4924 - desc: System slow when running too many reports 

4925 - issue_num: 79 

4926 - sev: Minor 

4927 """ 

4928 def __init__(self, other, include=False, ignore=None, failOn=None): 

4929 super(SkipTo, self).__init__(other) 

4930 self.ignoreExpr = ignore 

4931 self.mayReturnEmpty = True 

4932 self.mayIndexError = False 

4933 self.includeMatch = include 

4934 self.saveAsList = False 

4935 if isinstance(failOn, basestring): 

4936 self.failOn = self._literalStringClass(failOn) 

4937 else: 

4938 self.failOn = failOn 

4939 self.errmsg = "No match found for " + _ustr(self.expr) 

4940 

4941 def parseImpl(self, instring, loc, doActions=True): 

4942 startloc = loc 

4943 instrlen = len(instring) 

4944 expr = self.expr 

4945 expr_parse = self.expr._parse 

4946 self_failOn_canParseNext = self.failOn.canParseNext if self.failOn is not None else None 

4947 self_ignoreExpr_tryParse = self.ignoreExpr.tryParse if self.ignoreExpr is not None else None 

4948 

4949 tmploc = loc 

4950 while tmploc <= instrlen: 

4951 if self_failOn_canParseNext is not None: 

4952 # break if failOn expression matches 

4953 if self_failOn_canParseNext(instring, tmploc): 

4954 break 

4955 

4956 if self_ignoreExpr_tryParse is not None: 

4957 # advance past ignore expressions 

4958 while 1: 

4959 try: 

4960 tmploc = self_ignoreExpr_tryParse(instring, tmploc) 

4961 except ParseBaseException: 

4962 break 

4963 

4964 try: 

4965 expr_parse(instring, tmploc, doActions=False, callPreParse=False) 

4966 except (ParseException, IndexError): 

4967 # no match, advance loc in string 

4968 tmploc += 1 

4969 else: 

4970 # matched skipto expr, done 

4971 break 

4972 

4973 else: 

4974 # ran off the end of the input string without matching skipto expr, fail 

4975 raise ParseException(instring, loc, self.errmsg, self) 

4976 

4977 # build up return values 

4978 loc = tmploc 

4979 skiptext = instring[startloc:loc] 

4980 skipresult = ParseResults(skiptext) 

4981 

4982 if self.includeMatch: 

4983 loc, mat = expr_parse(instring, loc, doActions, callPreParse=False) 

4984 skipresult += mat 

4985 

4986 return loc, skipresult 

4987 

4988class Forward(ParseElementEnhance): 

4989 """Forward declaration of an expression to be defined later - 

4990 used for recursive grammars, such as algebraic infix notation. 

4991 When the expression is known, it is assigned to the ``Forward`` 

4992 variable using the '<<' operator. 

4993 

4994 Note: take care when assigning to ``Forward`` not to overlook 

4995 precedence of operators. 

4996 

4997 Specifically, '|' has a lower precedence than '<<', so that:: 

4998 

4999 fwdExpr << a | b | c 

5000 

5001 will actually be evaluated as:: 

5002 

5003 (fwdExpr << a) | b | c 

5004 

5005 thereby leaving b and c out as parseable alternatives. It is recommended that you 

5006 explicitly group the values inserted into the ``Forward``:: 

5007 

5008 fwdExpr << (a | b | c) 

5009 

5010 Converting to use the '<<=' operator instead will avoid this problem. 

5011 

5012 See :class:`ParseResults.pprint` for an example of a recursive 

5013 parser created using ``Forward``. 

5014 """ 

5015 def __init__(self, other=None): 

5016 super(Forward, self).__init__(other, savelist=False) 

5017 

5018 def __lshift__(self, other): 

5019 if isinstance(other, basestring): 

5020 other = self._literalStringClass(other) 

5021 self.expr = other 

5022 self.strRepr = None 

5023 self.mayIndexError = self.expr.mayIndexError 

5024 self.mayReturnEmpty = self.expr.mayReturnEmpty 

5025 self.setWhitespaceChars(self.expr.whiteChars) 

5026 self.skipWhitespace = self.expr.skipWhitespace 

5027 self.saveAsList = self.expr.saveAsList 

5028 self.ignoreExprs.extend(self.expr.ignoreExprs) 

5029 return self 

5030 

5031 def __ilshift__(self, other): 

5032 return self << other 

5033 

5034 def leaveWhitespace(self): 

5035 self.skipWhitespace = False 

5036 return self 

5037 

5038 def streamline(self): 

5039 if not self.streamlined: 

5040 self.streamlined = True 

5041 if self.expr is not None: 

5042 self.expr.streamline() 

5043 return self 

5044 

5045 def validate(self, validateTrace=None): 

5046 if validateTrace is None: 

5047 validateTrace = [] 

5048 

5049 if self not in validateTrace: 

5050 tmp = validateTrace[:] + [self] 

5051 if self.expr is not None: 

5052 self.expr.validate(tmp) 

5053 self.checkRecursion([]) 

5054 

5055 def __str__(self): 

5056 if hasattr(self, "name"): 

5057 return self.name 

5058 if self.strRepr is not None: 

5059 return self.strRepr 

5060 

5061 # Avoid infinite recursion by setting a temporary strRepr 

5062 self.strRepr = ": ..." 

5063 

5064 # Use the string representation of main expression. 

5065 retString = '...' 

5066 try: 

5067 if self.expr is not None: 

5068 retString = _ustr(self.expr)[:1000] 

5069 else: 

5070 retString = "None" 

5071 finally: 

5072 self.strRepr = self.__class__.__name__ + ": " + retString 

5073 return self.strRepr 

5074 

5075 def copy(self): 

5076 if self.expr is not None: 

5077 return super(Forward, self).copy() 

5078 else: 

5079 ret = Forward() 

5080 ret <<= self 

5081 return ret 

5082 

5083 def _setResultsName(self, name, listAllMatches=False): 

5084 if __diag__.warn_name_set_on_empty_Forward: 

5085 if self.expr is None: 

5086 warnings.warn("{0}: setting results name {0!r} on {1} expression " 

5087 "that has no contained expression".format("warn_name_set_on_empty_Forward", 

5088 name, 

5089 type(self).__name__), 

5090 stacklevel=3) 

5091 

5092 return super(Forward, self)._setResultsName(name, listAllMatches) 

5093 

5094class TokenConverter(ParseElementEnhance): 

5095 """ 

5096 Abstract subclass of :class:`ParseExpression`, for converting parsed results. 

5097 """ 

5098 def __init__(self, expr, savelist=False): 

5099 super(TokenConverter, self).__init__(expr) # , savelist) 

5100 self.saveAsList = False 

5101 

5102class Combine(TokenConverter): 

5103 """Converter to concatenate all matching tokens to a single string. 

5104 By default, the matching patterns must also be contiguous in the 

5105 input string; this can be disabled by specifying 

5106 ``'adjacent=False'`` in the constructor. 

5107 

5108 Example:: 

5109 

5110 real = Word(nums) + '.' + Word(nums) 

5111 print(real.parseString('3.1416')) # -> ['3', '.', '1416'] 

5112 # will also erroneously match the following 

5113 print(real.parseString('3. 1416')) # -> ['3', '.', '1416'] 

5114 

5115 real = Combine(Word(nums) + '.' + Word(nums)) 

5116 print(real.parseString('3.1416')) # -> ['3.1416'] 

5117 # no match when there are internal spaces 

5118 print(real.parseString('3. 1416')) # -> Exception: Expected W:(0123...) 

5119 """ 

5120 def __init__(self, expr, joinString="", adjacent=True): 

5121 super(Combine, self).__init__(expr) 

5122 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 

5123 if adjacent: 

5124 self.leaveWhitespace() 

5125 self.adjacent = adjacent 

5126 self.skipWhitespace = True 

5127 self.joinString = joinString 

5128 self.callPreparse = True 

5129 

5130 def ignore(self, other): 

5131 if self.adjacent: 

5132 ParserElement.ignore(self, other) 

5133 else: 

5134 super(Combine, self).ignore(other) 

5135 return self 

5136 

5137 def postParse(self, instring, loc, tokenlist): 

5138 retToks = tokenlist.copy() 

5139 del retToks[:] 

5140 retToks += ParseResults(["".join(tokenlist._asStringList(self.joinString))], modal=self.modalResults) 

5141 

5142 if self.resultsName and retToks.haskeys(): 

5143 return [retToks] 

5144 else: 

5145 return retToks 

5146 

5147class Group(TokenConverter): 

5148 """Converter to return the matched tokens as a list - useful for 

5149 returning tokens of :class:`ZeroOrMore` and :class:`OneOrMore` expressions. 

5150 

5151 Example:: 

5152 

5153 ident = Word(alphas) 

5154 num = Word(nums) 

5155 term = ident | num 

5156 func = ident + Optional(delimitedList(term)) 

5157 print(func.parseString("fn a, b, 100")) # -> ['fn', 'a', 'b', '100'] 

5158 

5159 func = ident + Group(Optional(delimitedList(term))) 

5160 print(func.parseString("fn a, b, 100")) # -> ['fn', ['a', 'b', '100']] 

5161 """ 

5162 def __init__(self, expr): 

5163 super(Group, self).__init__(expr) 

5164 self.saveAsList = True 

5165 

5166 def postParse(self, instring, loc, tokenlist): 

5167 return [tokenlist] 

5168 

5169class Dict(TokenConverter): 

5170 """Converter to return a repetitive expression as a list, but also 

5171 as a dictionary. Each element can also be referenced using the first 

5172 token in the expression as its key. Useful for tabular report 

5173 scraping when the first column can be used as a item key. 

5174 

5175 Example:: 

5176 

5177 data_word = Word(alphas) 

5178 label = data_word + FollowedBy(':') 

5179 attr_expr = Group(label + Suppress(':') + OneOrMore(data_word).setParseAction(' '.join)) 

5180 

5181 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5182 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

5183 

5184 # print attributes as plain groups 

5185 print(OneOrMore(attr_expr).parseString(text).dump()) 

5186 

5187 # instead of OneOrMore(expr), parse using Dict(OneOrMore(Group(expr))) - Dict will auto-assign names 

5188 result = Dict(OneOrMore(Group(attr_expr))).parseString(text) 

5189 print(result.dump()) 

5190 

5191 # access named fields as dict entries, or output as dict 

5192 print(result['shape']) 

5193 print(result.asDict()) 

5194 

5195 prints:: 

5196 

5197 ['shape', 'SQUARE', 'posn', 'upper left', 'color', 'light blue', 'texture', 'burlap'] 

5198 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5199 - color: light blue 

5200 - posn: upper left 

5201 - shape: SQUARE 

5202 - texture: burlap 

5203 SQUARE 

5204 {'color': 'light blue', 'posn': 'upper left', 'texture': 'burlap', 'shape': 'SQUARE'} 

5205 

5206 See more examples at :class:`ParseResults` of accessing fields by results name. 

5207 """ 

5208 def __init__(self, expr): 

5209 super(Dict, self).__init__(expr) 

5210 self.saveAsList = True 

5211 

5212 def postParse(self, instring, loc, tokenlist): 

5213 for i, tok in enumerate(tokenlist): 

5214 if len(tok) == 0: 

5215 continue 

5216 ikey = tok[0] 

5217 if isinstance(ikey, int): 

5218 ikey = _ustr(tok[0]).strip() 

5219 if len(tok) == 1: 

5220 tokenlist[ikey] = _ParseResultsWithOffset("", i) 

5221 elif len(tok) == 2 and not isinstance(tok[1], ParseResults): 

5222 tokenlist[ikey] = _ParseResultsWithOffset(tok[1], i) 

5223 else: 

5224 dictvalue = tok.copy() # ParseResults(i) 

5225 del dictvalue[0] 

5226 if len(dictvalue) != 1 or (isinstance(dictvalue, ParseResults) and dictvalue.haskeys()): 

5227 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue, i) 

5228 else: 

5229 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0], i) 

5230 

5231 if self.resultsName: 

5232 return [tokenlist] 

5233 else: 

5234 return tokenlist 

5235 

5236 

5237class Suppress(TokenConverter): 

5238 """Converter for ignoring the results of a parsed expression. 

5239 

5240 Example:: 

5241 

5242 source = "a, b, c,d" 

5243 wd = Word(alphas) 

5244 wd_list1 = wd + ZeroOrMore(',' + wd) 

5245 print(wd_list1.parseString(source)) 

5246 

5247 # often, delimiters that are useful during parsing are just in the 

5248 # way afterward - use Suppress to keep them out of the parsed output 

5249 wd_list2 = wd + ZeroOrMore(Suppress(',') + wd) 

5250 print(wd_list2.parseString(source)) 

5251 

5252 prints:: 

5253 

5254 ['a', ',', 'b', ',', 'c', ',', 'd'] 

5255 ['a', 'b', 'c', 'd'] 

5256 

5257 (See also :class:`delimitedList`.) 

5258 """ 

5259 def postParse(self, instring, loc, tokenlist): 

5260 return [] 

5261 

5262 def suppress(self): 

5263 return self 

5264 

5265 

5266class OnlyOnce(object): 

5267 """Wrapper for parse actions, to ensure they are only called once. 

5268 """ 

5269 def __init__(self, methodCall): 

5270 self.callable = _trim_arity(methodCall) 

5271 self.called = False 

5272 def __call__(self, s, l, t): 

5273 if not self.called: 

5274 results = self.callable(s, l, t) 

5275 self.called = True 

5276 return results 

5277 raise ParseException(s, l, "") 

5278 def reset(self): 

5279 self.called = False 

5280 

5281def traceParseAction(f): 

5282 """Decorator for debugging parse actions. 

5283 

5284 When the parse action is called, this decorator will print 

5285 ``">> entering method-name(line:<current_source_line>, <parse_location>, <matched_tokens>)"``. 

5286 When the parse action completes, the decorator will print 

5287 ``"<<"`` followed by the returned value, or any exception that the parse action raised. 

5288 

5289 Example:: 

5290 

5291 wd = Word(alphas) 

5292 

5293 @traceParseAction 

5294 def remove_duplicate_chars(tokens): 

5295 return ''.join(sorted(set(''.join(tokens)))) 

5296 

5297 wds = OneOrMore(wd).setParseAction(remove_duplicate_chars) 

5298 print(wds.parseString("slkdjs sld sldd sdlf sdljf")) 

5299 

5300 prints:: 

5301 

5302 >>entering remove_duplicate_chars(line: 'slkdjs sld sldd sdlf sdljf', 0, (['slkdjs', 'sld', 'sldd', 'sdlf', 'sdljf'], {})) 

5303 <<leaving remove_duplicate_chars (ret: 'dfjkls') 

5304 ['dfjkls'] 

5305 """ 

5306 f = _trim_arity(f) 

5307 def z(*paArgs): 

5308 thisFunc = f.__name__ 

5309 s, l, t = paArgs[-3:] 

5310 if len(paArgs) > 3: 

5311 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 

5312 sys.stderr.write(">>entering %s(line: '%s', %d, %r)\n" % (thisFunc, line(l, s), l, t)) 

5313 try: 

5314 ret = f(*paArgs) 

5315 except Exception as exc: 

5316 sys.stderr.write("<<leaving %s (exception: %s)\n" % (thisFunc, exc)) 

5317 raise 

5318 sys.stderr.write("<<leaving %s (ret: %r)\n" % (thisFunc, ret)) 

5319 return ret 

5320 try: 

5321 z.__name__ = f.__name__ 

5322 except AttributeError: 

5323 pass 

5324 return z 

5325 

5326# 

5327# global helpers 

5328# 

5329def delimitedList(expr, delim=",", combine=False): 

5330 """Helper to define a delimited list of expressions - the delimiter 

5331 defaults to ','. By default, the list elements and delimiters can 

5332 have intervening whitespace, and comments, but this can be 

5333 overridden by passing ``combine=True`` in the constructor. If 

5334 ``combine`` is set to ``True``, the matching tokens are 

5335 returned as a single token string, with the delimiters included; 

5336 otherwise, the matching tokens are returned as a list of tokens, 

5337 with the delimiters suppressed. 

5338 

5339 Example:: 

5340 

5341 delimitedList(Word(alphas)).parseString("aa,bb,cc") # -> ['aa', 'bb', 'cc'] 

5342 delimitedList(Word(hexnums), delim=':', combine=True).parseString("AA:BB:CC:DD:EE") # -> ['AA:BB:CC:DD:EE'] 

5343 """ 

5344 dlName = _ustr(expr) + " [" + _ustr(delim) + " " + _ustr(expr) + "]..." 

5345 if combine: 

5346 return Combine(expr + ZeroOrMore(delim + expr)).setName(dlName) 

5347 else: 

5348 return (expr + ZeroOrMore(Suppress(delim) + expr)).setName(dlName) 

5349 

5350def countedArray(expr, intExpr=None): 

5351 """Helper to define a counted list of expressions. 

5352 

5353 This helper defines a pattern of the form:: 

5354 

5355 integer expr expr expr... 

5356 

5357 where the leading integer tells how many expr expressions follow. 

5358 The matched tokens returns the array of expr tokens as a list - the 

5359 leading count token is suppressed. 

5360 

5361 If ``intExpr`` is specified, it should be a pyparsing expression 

5362 that produces an integer value. 

5363 

5364 Example:: 

5365 

5366 countedArray(Word(alphas)).parseString('2 ab cd ef') # -> ['ab', 'cd'] 

5367 

5368 # in this parser, the leading integer value is given in binary, 

5369 # '10' indicating that 2 values are in the array 

5370 binaryConstant = Word('01').setParseAction(lambda t: int(t[0], 2)) 

5371 countedArray(Word(alphas), intExpr=binaryConstant).parseString('10 ab cd ef') # -> ['ab', 'cd'] 

5372 """ 

5373 arrayExpr = Forward() 

5374 def countFieldParseAction(s, l, t): 

5375 n = t[0] 

5376 arrayExpr << (n and Group(And([expr] * n)) or Group(empty)) 

5377 return [] 

5378 if intExpr is None: 

5379 intExpr = Word(nums).setParseAction(lambda t: int(t[0])) 

5380 else: 

5381 intExpr = intExpr.copy() 

5382 intExpr.setName("arrayLen") 

5383 intExpr.addParseAction(countFieldParseAction, callDuringTry=True) 

5384 return (intExpr + arrayExpr).setName('(len) ' + _ustr(expr) + '...') 

5385 

5386def _flatten(L): 

5387 ret = [] 

5388 for i in L: 

5389 if isinstance(i, list): 

5390 ret.extend(_flatten(i)) 

5391 else: 

5392 ret.append(i) 

5393 return ret 

5394 

5395def matchPreviousLiteral(expr): 

5396 """Helper to define an expression that is indirectly defined from 

5397 the tokens matched in a previous expression, that is, it looks for 

5398 a 'repeat' of a previous expression. For example:: 

5399 

5400 first = Word(nums) 

5401 second = matchPreviousLiteral(first) 

5402 matchExpr = first + ":" + second 

5403 

5404 will match ``"1:1"``, but not ``"1:2"``. Because this 

5405 matches a previous literal, will also match the leading 

5406 ``"1:1"`` in ``"1:10"``. If this is not desired, use 

5407 :class:`matchPreviousExpr`. Do *not* use with packrat parsing 

5408 enabled. 

5409 """ 

5410 rep = Forward() 

5411 def copyTokenToRepeater(s, l, t): 

5412 if t: 

5413 if len(t) == 1: 

5414 rep << t[0] 

5415 else: 

5416 # flatten t tokens 

5417 tflat = _flatten(t.asList()) 

5418 rep << And(Literal(tt) for tt in tflat) 

5419 else: 

5420 rep << Empty() 

5421 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 

5422 rep.setName('(prev) ' + _ustr(expr)) 

5423 return rep 

5424 

5425def matchPreviousExpr(expr): 

5426 """Helper to define an expression that is indirectly defined from 

5427 the tokens matched in a previous expression, that is, it looks for 

5428 a 'repeat' of a previous expression. For example:: 

5429 

5430 first = Word(nums) 

5431 second = matchPreviousExpr(first) 

5432 matchExpr = first + ":" + second 

5433 

5434 will match ``"1:1"``, but not ``"1:2"``. Because this 

5435 matches by expressions, will *not* match the leading ``"1:1"`` 

5436 in ``"1:10"``; the expressions are evaluated first, and then 

5437 compared, so ``"1"`` is compared with ``"10"``. Do *not* use 

5438 with packrat parsing enabled. 

5439 """ 

5440 rep = Forward() 

5441 e2 = expr.copy() 

5442 rep <<= e2 

5443 def copyTokenToRepeater(s, l, t): 

5444 matchTokens = _flatten(t.asList()) 

5445 def mustMatchTheseTokens(s, l, t): 

5446 theseTokens = _flatten(t.asList()) 

5447 if theseTokens != matchTokens: 

5448 raise ParseException('', 0, '') 

5449 rep.setParseAction(mustMatchTheseTokens, callDuringTry=True) 

5450 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 

5451 rep.setName('(prev) ' + _ustr(expr)) 

5452 return rep 

5453 

5454def _escapeRegexRangeChars(s): 

5455 # ~ escape these chars: ^-[] 

5456 for c in r"\^-[]": 

5457 s = s.replace(c, _bslash + c) 

5458 s = s.replace("\n", r"\n") 

5459 s = s.replace("\t", r"\t") 

5460 return _ustr(s) 

5461 

5462def oneOf(strs, caseless=False, useRegex=True, asKeyword=False): 

5463 """Helper to quickly define a set of alternative Literals, and makes 

5464 sure to do longest-first testing when there is a conflict, 

5465 regardless of the input order, but returns 

5466 a :class:`MatchFirst` for best performance. 

5467 

5468 Parameters: 

5469 

5470 - strs - a string of space-delimited literals, or a collection of 

5471 string literals 

5472 - caseless - (default= ``False``) - treat all literals as 

5473 caseless 

5474 - useRegex - (default= ``True``) - as an optimization, will 

5475 generate a Regex object; otherwise, will generate 

5476 a :class:`MatchFirst` object (if ``caseless=True`` or ``asKeyword=True``, or if 

5477 creating a :class:`Regex` raises an exception) 

5478 - asKeyword - (default=``False``) - enforce Keyword-style matching on the 

5479 generated expressions 

5480 

5481 Example:: 

5482 

5483 comp_oper = oneOf("< = > <= >= !=") 

5484 var = Word(alphas) 

5485 number = Word(nums) 

5486 term = var | number 

5487 comparison_expr = term + comp_oper + term 

5488 print(comparison_expr.searchString("B = 12 AA=23 B<=AA AA>12")) 

5489 

5490 prints:: 

5491 

5492 [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']] 

5493 """ 

5494 if isinstance(caseless, basestring): 

5495 warnings.warn("More than one string argument passed to oneOf, pass " 

5496 "choices as a list or space-delimited string", stacklevel=2) 

5497 

5498 if caseless: 

5499 isequal = (lambda a, b: a.upper() == b.upper()) 

5500 masks = (lambda a, b: b.upper().startswith(a.upper())) 

5501 parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral 

5502 else: 

5503 isequal = (lambda a, b: a == b) 

5504 masks = (lambda a, b: b.startswith(a)) 

5505 parseElementClass = Keyword if asKeyword else Literal 

5506 

5507 symbols = [] 

5508 if isinstance(strs, basestring): 

5509 symbols = strs.split() 

5510 elif isinstance(strs, Iterable): 

5511 symbols = list(strs) 

5512 else: 

5513 warnings.warn("Invalid argument to oneOf, expected string or iterable", 

5514 SyntaxWarning, stacklevel=2) 

5515 if not symbols: 

5516 return NoMatch() 

5517 

5518 if not asKeyword: 

5519 # if not producing keywords, need to reorder to take care to avoid masking 

5520 # longer choices with shorter ones 

5521 i = 0 

5522 while i < len(symbols) - 1: 

5523 cur = symbols[i] 

5524 for j, other in enumerate(symbols[i + 1:]): 

5525 if isequal(other, cur): 

5526 del symbols[i + j + 1] 

5527 break 

5528 elif masks(cur, other): 

5529 del symbols[i + j + 1] 

5530 symbols.insert(i, other) 

5531 break 

5532 else: 

5533 i += 1 

5534 

5535 if not (caseless or asKeyword) and useRegex: 

5536 # ~ print (strs, "->", "|".join([_escapeRegexChars(sym) for sym in symbols])) 

5537 try: 

5538 if len(symbols) == len("".join(symbols)): 

5539 return Regex("[%s]" % "".join(_escapeRegexRangeChars(sym) for sym in symbols)).setName(' | '.join(symbols)) 

5540 else: 

5541 return Regex("|".join(re.escape(sym) for sym in symbols)).setName(' | '.join(symbols)) 

5542 except Exception: 

5543 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 

5544 SyntaxWarning, stacklevel=2) 

5545 

5546 # last resort, just use MatchFirst 

5547 return MatchFirst(parseElementClass(sym) for sym in symbols).setName(' | '.join(symbols)) 

5548 

5549def dictOf(key, value): 

5550 """Helper to easily and clearly define a dictionary by specifying 

5551 the respective patterns for the key and value. Takes care of 

5552 defining the :class:`Dict`, :class:`ZeroOrMore`, and 

5553 :class:`Group` tokens in the proper order. The key pattern 

5554 can include delimiting markers or punctuation, as long as they are 

5555 suppressed, thereby leaving the significant key text. The value 

5556 pattern can include named results, so that the :class:`Dict` results 

5557 can include named token fields. 

5558 

5559 Example:: 

5560 

5561 text = "shape: SQUARE posn: upper left color: light blue texture: burlap" 

5562 attr_expr = (label + Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join)) 

5563 print(OneOrMore(attr_expr).parseString(text).dump()) 

5564 

5565 attr_label = label 

5566 attr_value = Suppress(':') + OneOrMore(data_word, stopOn=label).setParseAction(' '.join) 

5567 

5568 # similar to Dict, but simpler call format 

5569 result = dictOf(attr_label, attr_value).parseString(text) 

5570 print(result.dump()) 

5571 print(result['shape']) 

5572 print(result.shape) # object attribute access works too 

5573 print(result.asDict()) 

5574 

5575 prints:: 

5576 

5577 [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']] 

5578 - color: light blue 

5579 - posn: upper left 

5580 - shape: SQUARE 

5581 - texture: burlap 

5582 SQUARE 

5583 SQUARE 

5584 {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'} 

5585 """ 

5586 return Dict(OneOrMore(Group(key + value))) 

5587 

5588def originalTextFor(expr, asString=True): 

5589 """Helper to return the original, untokenized text for a given 

5590 expression. Useful to restore the parsed fields of an HTML start 

5591 tag into the raw tag text itself, or to revert separate tokens with 

5592 intervening whitespace back to the original matching input text. By 

5593 default, returns astring containing the original parsed text. 

5594 

5595 If the optional ``asString`` argument is passed as 

5596 ``False``, then the return value is 

5597 a :class:`ParseResults` containing any results names that 

5598 were originally matched, and a single token containing the original 

5599 matched text from the input string. So if the expression passed to 

5600 :class:`originalTextFor` contains expressions with defined 

5601 results names, you must set ``asString`` to ``False`` if you 

5602 want to preserve those results name values. 

5603 

5604 Example:: 

5605 

5606 src = "this is test <b> bold <i>text</i> </b> normal text " 

5607 for tag in ("b", "i"): 

5608 opener, closer = makeHTMLTags(tag) 

5609 patt = originalTextFor(opener + SkipTo(closer) + closer) 

5610 print(patt.searchString(src)[0]) 

5611 

5612 prints:: 

5613 

5614 ['<b> bold <i>text</i> </b>'] 

5615 ['<i>text</i>'] 

5616 """ 

5617 locMarker = Empty().setParseAction(lambda s, loc, t: loc) 

5618 endlocMarker = locMarker.copy() 

5619 endlocMarker.callPreparse = False 

5620 matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end") 

5621 if asString: 

5622 extractText = lambda s, l, t: s[t._original_start: t._original_end] 

5623 else: 

5624 def extractText(s, l, t): 

5625 t[:] = [s[t.pop('_original_start'):t.pop('_original_end')]] 

5626 matchExpr.setParseAction(extractText) 

5627 matchExpr.ignoreExprs = expr.ignoreExprs 

5628 return matchExpr 

5629 

5630def ungroup(expr): 

5631 """Helper to undo pyparsing's default grouping of And expressions, 

5632 even if all but one are non-empty. 

5633 """ 

5634 return TokenConverter(expr).addParseAction(lambda t: t[0]) 

5635 

5636def locatedExpr(expr): 

5637 """Helper to decorate a returned token with its starting and ending 

5638 locations in the input string. 

5639 

5640 This helper adds the following results names: 

5641 

5642 - locn_start = location where matched expression begins 

5643 - locn_end = location where matched expression ends 

5644 - value = the actual parsed results 

5645 

5646 Be careful if the input text contains ``<TAB>`` characters, you 

5647 may want to call :class:`ParserElement.parseWithTabs` 

5648 

5649 Example:: 

5650 

5651 wd = Word(alphas) 

5652 for match in locatedExpr(wd).searchString("ljsdf123lksdjjf123lkkjj1222"): 

5653 print(match) 

5654 

5655 prints:: 

5656 

5657 [[0, 'ljsdf', 5]] 

5658 [[8, 'lksdjjf', 15]] 

5659 [[18, 'lkkjj', 23]] 

5660 """ 

5661 locator = Empty().setParseAction(lambda s, l, t: l) 

5662 return Group(locator("locn_start") + expr("value") + locator.copy().leaveWhitespace()("locn_end")) 

5663 

5664 

5665# convenience constants for positional expressions 

5666empty = Empty().setName("empty") 

5667lineStart = LineStart().setName("lineStart") 

5668lineEnd = LineEnd().setName("lineEnd") 

5669stringStart = StringStart().setName("stringStart") 

5670stringEnd = StringEnd().setName("stringEnd") 

5671 

5672_escapedPunc = Word(_bslash, r"\[]-*.$+^?()~ ", exact=2).setParseAction(lambda s, l, t: t[0][1]) 

5673_escapedHexChar = Regex(r"\\0?[xX][0-9a-fA-F]+").setParseAction(lambda s, l, t: unichr(int(t[0].lstrip(r'\0x'), 16))) 

5674_escapedOctChar = Regex(r"\\0[0-7]+").setParseAction(lambda s, l, t: unichr(int(t[0][1:], 8))) 

5675_singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | CharsNotIn(r'\]', exact=1) 

5676_charRange = Group(_singleChar + Suppress("-") + _singleChar) 

5677_reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group(OneOrMore(_charRange | _singleChar)).setResultsName("body") + "]" 

5678 

5679def srange(s): 

5680 r"""Helper to easily define string ranges for use in Word 

5681 construction. Borrows syntax from regexp '[]' string range 

5682 definitions:: 

5683 

5684 srange("[0-9]") -> "0123456789" 

5685 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 

5686 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 

5687 

5688 The input string must be enclosed in []'s, and the returned string 

5689 is the expanded character set joined into a single string. The 

5690 values enclosed in the []'s may be: 

5691 

5692 - a single character 

5693 - an escaped character with a leading backslash (such as ``\-`` 

5694 or ``\]``) 

5695 - an escaped hex character with a leading ``'\x'`` 

5696 (``\x21``, which is a ``'!'`` character) (``\0x##`` 

5697 is also supported for backwards compatibility) 

5698 - an escaped octal character with a leading ``'\0'`` 

5699 (``\041``, which is a ``'!'`` character) 

5700 - a range of any of the above, separated by a dash (``'a-z'``, 

5701 etc.) 

5702 - any combination of the above (``'aeiouy'``, 

5703 ``'a-zA-Z0-9_$'``, etc.) 

5704 """ 

5705 _expanded = lambda p: p if not isinstance(p, ParseResults) else ''.join(unichr(c) for c in range(ord(p[0]), ord(p[1]) + 1)) 

5706 try: 

5707 return "".join(_expanded(part) for part in _reBracketExpr.parseString(s).body) 

5708 except Exception: 

5709 return "" 

5710 

5711def matchOnlyAtCol(n): 

5712 """Helper method for defining parse actions that require matching at 

5713 a specific column in the input text. 

5714 """ 

5715 def verifyCol(strg, locn, toks): 

5716 if col(locn, strg) != n: 

5717 raise ParseException(strg, locn, "matched token not at column %d" % n) 

5718 return verifyCol 

5719 

5720def replaceWith(replStr): 

5721 """Helper method for common parse actions that simply return 

5722 a literal value. Especially useful when used with 

5723 :class:`transformString<ParserElement.transformString>` (). 

5724 

5725 Example:: 

5726 

5727 num = Word(nums).setParseAction(lambda toks: int(toks[0])) 

5728 na = oneOf("N/A NA").setParseAction(replaceWith(math.nan)) 

5729 term = na | num 

5730 

5731 OneOrMore(term).parseString("324 234 N/A 234") # -> [324, 234, nan, 234] 

5732 """ 

5733 return lambda s, l, t: [replStr] 

5734 

5735def removeQuotes(s, l, t): 

5736 """Helper parse action for removing quotation marks from parsed 

5737 quoted strings. 

5738 

5739 Example:: 

5740 

5741 # by default, quotation marks are included in parsed results 

5742 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"] 

5743 

5744 # use removeQuotes to strip quotation marks from parsed results 

5745 quotedString.setParseAction(removeQuotes) 

5746 quotedString.parseString("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"] 

5747 """ 

5748 return t[0][1:-1] 

5749 

5750def tokenMap(func, *args): 

5751 """Helper to define a parse action by mapping a function to all 

5752 elements of a ParseResults list. If any additional args are passed, 

5753 they are forwarded to the given function as additional arguments 

5754 after the token, as in 

5755 ``hex_integer = Word(hexnums).setParseAction(tokenMap(int, 16))``, 

5756 which will convert the parsed data to an integer using base 16. 

5757 

5758 Example (compare the last to example in :class:`ParserElement.transformString`:: 

5759 

5760 hex_ints = OneOrMore(Word(hexnums)).setParseAction(tokenMap(int, 16)) 

5761 hex_ints.runTests(''' 

5762 00 11 22 aa FF 0a 0d 1a 

5763 ''') 

5764 

5765 upperword = Word(alphas).setParseAction(tokenMap(str.upper)) 

5766 OneOrMore(upperword).runTests(''' 

5767 my kingdom for a horse 

5768 ''') 

5769 

5770 wd = Word(alphas).setParseAction(tokenMap(str.title)) 

5771 OneOrMore(wd).setParseAction(' '.join).runTests(''' 

5772 now is the winter of our discontent made glorious summer by this sun of york 

5773 ''') 

5774 

5775 prints:: 

5776 

5777 00 11 22 aa FF 0a 0d 1a 

5778 [0, 17, 34, 170, 255, 10, 13, 26] 

5779 

5780 my kingdom for a horse 

5781 ['MY', 'KINGDOM', 'FOR', 'A', 'HORSE'] 

5782 

5783 now is the winter of our discontent made glorious summer by this sun of york 

5784 ['Now Is The Winter Of Our Discontent Made Glorious Summer By This Sun Of York'] 

5785 """ 

5786 def pa(s, l, t): 

5787 return [func(tokn, *args) for tokn in t] 

5788 

5789 try: 

5790 func_name = getattr(func, '__name__', 

5791 getattr(func, '__class__').__name__) 

5792 except Exception: 

5793 func_name = str(func) 

5794 pa.__name__ = func_name 

5795 

5796 return pa 

5797 

5798upcaseTokens = tokenMap(lambda t: _ustr(t).upper()) 

5799"""(Deprecated) Helper parse action to convert tokens to upper case. 

5800Deprecated in favor of :class:`pyparsing_common.upcaseTokens`""" 

5801 

5802downcaseTokens = tokenMap(lambda t: _ustr(t).lower()) 

5803"""(Deprecated) Helper parse action to convert tokens to lower case. 

5804Deprecated in favor of :class:`pyparsing_common.downcaseTokens`""" 

5805 

5806def _makeTags(tagStr, xml, 

5807 suppress_LT=Suppress("<"), 

5808 suppress_GT=Suppress(">")): 

5809 """Internal helper to construct opening and closing tag expressions, given a tag name""" 

5810 if isinstance(tagStr, basestring): 

5811 resname = tagStr 

5812 tagStr = Keyword(tagStr, caseless=not xml) 

5813 else: 

5814 resname = tagStr.name 

5815 

5816 tagAttrName = Word(alphas, alphanums + "_-:") 

5817 if xml: 

5818 tagAttrValue = dblQuotedString.copy().setParseAction(removeQuotes) 

5819 openTag = (suppress_LT 

5820 + tagStr("tag") 

5821 + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue))) 

5822 + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') 

5823 + suppress_GT) 

5824 else: 

5825 tagAttrValue = quotedString.copy().setParseAction(removeQuotes) | Word(printables, excludeChars=">") 

5826 openTag = (suppress_LT 

5827 + tagStr("tag") 

5828 + Dict(ZeroOrMore(Group(tagAttrName.setParseAction(downcaseTokens) 

5829 + Optional(Suppress("=") + tagAttrValue)))) 

5830 + Optional("/", default=[False])("empty").setParseAction(lambda s, l, t: t[0] == '/') 

5831 + suppress_GT) 

5832 closeTag = Combine(_L("</") + tagStr + ">", adjacent=False) 

5833 

5834 openTag.setName("<%s>" % resname) 

5835 # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels 

5836 openTag.addParseAction(lambda t: t.__setitem__("start" + "".join(resname.replace(":", " ").title().split()), t.copy())) 

5837 closeTag = closeTag("end" + "".join(resname.replace(":", " ").title().split())).setName("</%s>" % resname) 

5838 openTag.tag = resname 

5839 closeTag.tag = resname 

5840 openTag.tag_body = SkipTo(closeTag()) 

5841 return openTag, closeTag 

5842 

5843def makeHTMLTags(tagStr): 

5844 """Helper to construct opening and closing tag expressions for HTML, 

5845 given a tag name. Matches tags in either upper or lower case, 

5846 attributes with namespaces and with quoted or unquoted values. 

5847 

5848 Example:: 

5849 

5850 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

5851 # makeHTMLTags returns pyparsing expressions for the opening and 

5852 # closing tags as a 2-tuple 

5853 a, a_end = makeHTMLTags("A") 

5854 link_expr = a + SkipTo(a_end)("link_text") + a_end 

5855 

5856 for link in link_expr.searchString(text): 

5857 # attributes in the <A> tag (like "href" shown here) are 

5858 # also accessible as named results 

5859 print(link.link_text, '->', link.href) 

5860 

5861 prints:: 

5862 

5863 pyparsing -> https://github.com/pyparsing/pyparsing/wiki 

5864 """ 

5865 return _makeTags(tagStr, False) 

5866 

5867def makeXMLTags(tagStr): 

5868 """Helper to construct opening and closing tag expressions for XML, 

5869 given a tag name. Matches tags only in the given upper/lower case. 

5870 

5871 Example: similar to :class:`makeHTMLTags` 

5872 """ 

5873 return _makeTags(tagStr, True) 

5874 

5875def withAttribute(*args, **attrDict): 

5876 """Helper to create a validating parse action to be used with start 

5877 tags created with :class:`makeXMLTags` or 

5878 :class:`makeHTMLTags`. Use ``withAttribute`` to qualify 

5879 a starting tag with a required attribute value, to avoid false 

5880 matches on common tags such as ``<TD>`` or ``<DIV>``. 

5881 

5882 Call ``withAttribute`` with a series of attribute names and 

5883 values. Specify the list of filter attributes names and values as: 

5884 

5885 - keyword arguments, as in ``(align="right")``, or 

5886 - as an explicit dict with ``**`` operator, when an attribute 

5887 name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}`` 

5888 - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))`` 

5889 

5890 For attribute names with a namespace prefix, you must use the second 

5891 form. Attribute names are matched insensitive to upper/lower case. 

5892 

5893 If just testing for ``class`` (with or without a namespace), use 

5894 :class:`withClass`. 

5895 

5896 To verify that the attribute exists, but without specifying a value, 

5897 pass ``withAttribute.ANY_VALUE`` as the value. 

5898 

5899 Example:: 

5900 

5901 html = ''' 

5902 <div> 

5903 Some text 

5904 <div type="grid">1 4 0 1 0</div> 

5905 <div type="graph">1,3 2,3 1,1</div> 

5906 <div>this has no type</div> 

5907 </div> 

5908 

5909 ''' 

5910 div,div_end = makeHTMLTags("div") 

5911 

5912 # only match div tag having a type attribute with value "grid" 

5913 div_grid = div().setParseAction(withAttribute(type="grid")) 

5914 grid_expr = div_grid + SkipTo(div | div_end)("body") 

5915 for grid_header in grid_expr.searchString(html): 

5916 print(grid_header.body) 

5917 

5918 # construct a match with any div tag having a type attribute, regardless of the value 

5919 div_any_type = div().setParseAction(withAttribute(type=withAttribute.ANY_VALUE)) 

5920 div_expr = div_any_type + SkipTo(div | div_end)("body") 

5921 for div_header in div_expr.searchString(html): 

5922 print(div_header.body) 

5923 

5924 prints:: 

5925 

5926 1 4 0 1 0 

5927 

5928 1 4 0 1 0 

5929 1,3 2,3 1,1 

5930 """ 

5931 if args: 

5932 attrs = args[:] 

5933 else: 

5934 attrs = attrDict.items() 

5935 attrs = [(k, v) for k, v in attrs] 

5936 def pa(s, l, tokens): 

5937 for attrName, attrValue in attrs: 

5938 if attrName not in tokens: 

5939 raise ParseException(s, l, "no matching attribute " + attrName) 

5940 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 

5941 raise ParseException(s, l, "attribute '%s' has value '%s', must be '%s'" % 

5942 (attrName, tokens[attrName], attrValue)) 

5943 return pa 

5944withAttribute.ANY_VALUE = object() 

5945 

5946def withClass(classname, namespace=''): 

5947 """Simplified version of :class:`withAttribute` when 

5948 matching on a div class - made difficult because ``class`` is 

5949 a reserved word in Python. 

5950 

5951 Example:: 

5952 

5953 html = ''' 

5954 <div> 

5955 Some text 

5956 <div class="grid">1 4 0 1 0</div> 

5957 <div class="graph">1,3 2,3 1,1</div> 

5958 <div>this &lt;div&gt; has no class</div> 

5959 </div> 

5960 

5961 ''' 

5962 div,div_end = makeHTMLTags("div") 

5963 div_grid = div().setParseAction(withClass("grid")) 

5964 

5965 grid_expr = div_grid + SkipTo(div | div_end)("body") 

5966 for grid_header in grid_expr.searchString(html): 

5967 print(grid_header.body) 

5968 

5969 div_any_type = div().setParseAction(withClass(withAttribute.ANY_VALUE)) 

5970 div_expr = div_any_type + SkipTo(div | div_end)("body") 

5971 for div_header in div_expr.searchString(html): 

5972 print(div_header.body) 

5973 

5974 prints:: 

5975 

5976 1 4 0 1 0 

5977 

5978 1 4 0 1 0 

5979 1,3 2,3 1,1 

5980 """ 

5981 classattr = "%s:class" % namespace if namespace else "class" 

5982 return withAttribute(**{classattr: classname}) 

5983 

5984opAssoc = SimpleNamespace() 

5985opAssoc.LEFT = object() 

5986opAssoc.RIGHT = object() 

5987 

5988def infixNotation(baseExpr, opList, lpar=Suppress('('), rpar=Suppress(')')): 

5989 """Helper method for constructing grammars of expressions made up of 

5990 operators working in a precedence hierarchy. Operators may be unary 

5991 or binary, left- or right-associative. Parse actions can also be 

5992 attached to operator expressions. The generated parser will also 

5993 recognize the use of parentheses to override operator precedences 

5994 (see example below). 

5995 

5996 Note: if you define a deep operator list, you may see performance 

5997 issues when using infixNotation. See 

5998 :class:`ParserElement.enablePackrat` for a mechanism to potentially 

5999 improve your parser performance. 

6000 

6001 Parameters: 

6002 - baseExpr - expression representing the most basic element for the 

6003 nested 

6004 - opList - list of tuples, one for each operator precedence level 

6005 in the expression grammar; each tuple is of the form ``(opExpr, 

6006 numTerms, rightLeftAssoc, parseAction)``, where: 

6007 

6008 - opExpr is the pyparsing expression for the operator; may also 

6009 be a string, which will be converted to a Literal; if numTerms 

6010 is 3, opExpr is a tuple of two expressions, for the two 

6011 operators separating the 3 terms 

6012 - numTerms is the number of terms for this operator (must be 1, 

6013 2, or 3) 

6014 - rightLeftAssoc is the indicator whether the operator is right 

6015 or left associative, using the pyparsing-defined constants 

6016 ``opAssoc.RIGHT`` and ``opAssoc.LEFT``. 

6017 - parseAction is the parse action to be associated with 

6018 expressions matching this operator expression (the parse action 

6019 tuple member may be omitted); if the parse action is passed 

6020 a tuple or list of functions, this is equivalent to calling 

6021 ``setParseAction(*fn)`` 

6022 (:class:`ParserElement.setParseAction`) 

6023 - lpar - expression for matching left-parentheses 

6024 (default= ``Suppress('(')``) 

6025 - rpar - expression for matching right-parentheses 

6026 (default= ``Suppress(')')``) 

6027 

6028 Example:: 

6029 

6030 # simple example of four-function arithmetic with ints and 

6031 # variable names 

6032 integer = pyparsing_common.signed_integer 

6033 varname = pyparsing_common.identifier 

6034 

6035 arith_expr = infixNotation(integer | varname, 

6036 [ 

6037 ('-', 1, opAssoc.RIGHT), 

6038 (oneOf('* /'), 2, opAssoc.LEFT), 

6039 (oneOf('+ -'), 2, opAssoc.LEFT), 

6040 ]) 

6041 

6042 arith_expr.runTests(''' 

6043 5+3*6 

6044 (5+3)*6 

6045 -2--11 

6046 ''', fullDump=False) 

6047 

6048 prints:: 

6049 

6050 5+3*6 

6051 [[5, '+', [3, '*', 6]]] 

6052 

6053 (5+3)*6 

6054 [[[5, '+', 3], '*', 6]] 

6055 

6056 -2--11 

6057 [[['-', 2], '-', ['-', 11]]] 

6058 """ 

6059 # captive version of FollowedBy that does not do parse actions or capture results names 

6060 class _FB(FollowedBy): 

6061 def parseImpl(self, instring, loc, doActions=True): 

6062 self.expr.tryParse(instring, loc) 

6063 return loc, [] 

6064 

6065 ret = Forward() 

6066 lastExpr = baseExpr | (lpar + ret + rpar) 

6067 for i, operDef in enumerate(opList): 

6068 opExpr, arity, rightLeftAssoc, pa = (operDef + (None, ))[:4] 

6069 termName = "%s term" % opExpr if arity < 3 else "%s%s term" % opExpr 

6070 if arity == 3: 

6071 if opExpr is None or len(opExpr) != 2: 

6072 raise ValueError( 

6073 "if numterms=3, opExpr must be a tuple or list of two expressions") 

6074 opExpr1, opExpr2 = opExpr 

6075 thisExpr = Forward().setName(termName) 

6076 if rightLeftAssoc == opAssoc.LEFT: 

6077 if arity == 1: 

6078 matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + OneOrMore(opExpr)) 

6079 elif arity == 2: 

6080 if opExpr is not None: 

6081 matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(lastExpr + OneOrMore(opExpr + lastExpr)) 

6082 else: 

6083 matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr + OneOrMore(lastExpr)) 

6084 elif arity == 3: 

6085 matchExpr = (_FB(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) 

6086 + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))) 

6087 else: 

6088 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

6089 elif rightLeftAssoc == opAssoc.RIGHT: 

6090 if arity == 1: 

6091 # try to avoid LR with this extra test 

6092 if not isinstance(opExpr, Optional): 

6093 opExpr = Optional(opExpr) 

6094 matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr) 

6095 elif arity == 2: 

6096 if opExpr is not None: 

6097 matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(lastExpr + OneOrMore(opExpr + thisExpr)) 

6098 else: 

6099 matchExpr = _FB(lastExpr + thisExpr) + Group(lastExpr + OneOrMore(thisExpr)) 

6100 elif arity == 3: 

6101 matchExpr = (_FB(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) 

6102 + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)) 

6103 else: 

6104 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 

6105 else: 

6106 raise ValueError("operator must indicate right or left associativity") 

6107 if pa: 

6108 if isinstance(pa, (tuple, list)): 

6109 matchExpr.setParseAction(*pa) 

6110 else: 

6111 matchExpr.setParseAction(pa) 

6112 thisExpr <<= (matchExpr.setName(termName) | lastExpr) 

6113 lastExpr = thisExpr 

6114 ret <<= lastExpr 

6115 return ret 

6116 

6117operatorPrecedence = infixNotation 

6118"""(Deprecated) Former name of :class:`infixNotation`, will be 

6119dropped in a future release.""" 

6120 

6121dblQuotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"').setName("string enclosed in double quotes") 

6122sglQuotedString = Combine(Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("string enclosed in single quotes") 

6123quotedString = Combine(Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*') + '"' 

6124 | Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\(?:[^x]|x[0-9a-fA-F]+)))*") + "'").setName("quotedString using single or double quotes") 

6125unicodeString = Combine(_L('u') + quotedString.copy()).setName("unicode string literal") 

6126 

6127def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString.copy()): 

6128 """Helper method for defining nested lists enclosed in opening and 

6129 closing delimiters ("(" and ")" are the default). 

6130 

6131 Parameters: 

6132 - opener - opening character for a nested list 

6133 (default= ``"("``); can also be a pyparsing expression 

6134 - closer - closing character for a nested list 

6135 (default= ``")"``); can also be a pyparsing expression 

6136 - content - expression for items within the nested lists 

6137 (default= ``None``) 

6138 - ignoreExpr - expression for ignoring opening and closing 

6139 delimiters (default= :class:`quotedString`) 

6140 

6141 If an expression is not provided for the content argument, the 

6142 nested expression will capture all whitespace-delimited content 

6143 between delimiters as a list of separate values. 

6144 

6145 Use the ``ignoreExpr`` argument to define expressions that may 

6146 contain opening or closing characters that should not be treated as 

6147 opening or closing characters for nesting, such as quotedString or 

6148 a comment expression. Specify multiple expressions using an 

6149 :class:`Or` or :class:`MatchFirst`. The default is 

6150 :class:`quotedString`, but if no expressions are to be ignored, then 

6151 pass ``None`` for this argument. 

6152 

6153 Example:: 

6154 

6155 data_type = oneOf("void int short long char float double") 

6156 decl_data_type = Combine(data_type + Optional(Word('*'))) 

6157 ident = Word(alphas+'_', alphanums+'_') 

6158 number = pyparsing_common.number 

6159 arg = Group(decl_data_type + ident) 

6160 LPAR, RPAR = map(Suppress, "()") 

6161 

6162 code_body = nestedExpr('{', '}', ignoreExpr=(quotedString | cStyleComment)) 

6163 

6164 c_function = (decl_data_type("type") 

6165 + ident("name") 

6166 + LPAR + Optional(delimitedList(arg), [])("args") + RPAR 

6167 + code_body("body")) 

6168 c_function.ignore(cStyleComment) 

6169 

6170 source_code = ''' 

6171 int is_odd(int x) { 

6172 return (x%2); 

6173 } 

6174 

6175 int dec_to_hex(char hchar) { 

6176 if (hchar >= '0' && hchar <= '9') { 

6177 return (ord(hchar)-ord('0')); 

6178 } else { 

6179 return (10+ord(hchar)-ord('A')); 

6180 } 

6181 } 

6182 ''' 

6183 for func in c_function.searchString(source_code): 

6184 print("%(name)s (%(type)s) args: %(args)s" % func) 

6185 

6186 

6187 prints:: 

6188 

6189 is_odd (int) args: [['int', 'x']] 

6190 dec_to_hex (int) args: [['char', 'hchar']] 

6191 """ 

6192 if opener == closer: 

6193 raise ValueError("opening and closing strings cannot be the same") 

6194 if content is None: 

6195 if isinstance(opener, basestring) and isinstance(closer, basestring): 

6196 if len(opener) == 1 and len(closer) == 1: 

6197 if ignoreExpr is not None: 

6198 content = (Combine(OneOrMore(~ignoreExpr 

6199 + CharsNotIn(opener 

6200 + closer 

6201 + ParserElement.DEFAULT_WHITE_CHARS, exact=1) 

6202 ) 

6203 ).setParseAction(lambda t: t[0].strip())) 

6204 else: 

6205 content = (empty.copy() + CharsNotIn(opener 

6206 + closer 

6207 + ParserElement.DEFAULT_WHITE_CHARS 

6208 ).setParseAction(lambda t: t[0].strip())) 

6209 else: 

6210 if ignoreExpr is not None: 

6211 content = (Combine(OneOrMore(~ignoreExpr 

6212 + ~Literal(opener) 

6213 + ~Literal(closer) 

6214 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) 

6215 ).setParseAction(lambda t: t[0].strip())) 

6216 else: 

6217 content = (Combine(OneOrMore(~Literal(opener) 

6218 + ~Literal(closer) 

6219 + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)) 

6220 ).setParseAction(lambda t: t[0].strip())) 

6221 else: 

6222 raise ValueError("opening and closing arguments must be strings if no content expression is given") 

6223 ret = Forward() 

6224 if ignoreExpr is not None: 

6225 ret <<= Group(Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)) 

6226 else: 

6227 ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer)) 

6228 ret.setName('nested %s%s expression' % (opener, closer)) 

6229 return ret 

6230 

6231def indentedBlock(blockStatementExpr, indentStack, indent=True): 

6232 """Helper method for defining space-delimited indentation blocks, 

6233 such as those used to define block statements in Python source code. 

6234 

6235 Parameters: 

6236 

6237 - blockStatementExpr - expression defining syntax of statement that 

6238 is repeated within the indented block 

6239 - indentStack - list created by caller to manage indentation stack 

6240 (multiple statementWithIndentedBlock expressions within a single 

6241 grammar should share a common indentStack) 

6242 - indent - boolean indicating whether block must be indented beyond 

6243 the current level; set to False for block of left-most 

6244 statements (default= ``True``) 

6245 

6246 A valid block must contain at least one ``blockStatement``. 

6247 

6248 Example:: 

6249 

6250 data = ''' 

6251 def A(z): 

6252 A1 

6253 B = 100 

6254 G = A2 

6255 A2 

6256 A3 

6257 B 

6258 def BB(a,b,c): 

6259 BB1 

6260 def BBA(): 

6261 bba1 

6262 bba2 

6263 bba3 

6264 C 

6265 D 

6266 def spam(x,y): 

6267 def eggs(z): 

6268 pass 

6269 ''' 

6270 

6271 

6272 indentStack = [1] 

6273 stmt = Forward() 

6274 

6275 identifier = Word(alphas, alphanums) 

6276 funcDecl = ("def" + identifier + Group("(" + Optional(delimitedList(identifier)) + ")") + ":") 

6277 func_body = indentedBlock(stmt, indentStack) 

6278 funcDef = Group(funcDecl + func_body) 

6279 

6280 rvalue = Forward() 

6281 funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") 

6282 rvalue << (funcCall | identifier | Word(nums)) 

6283 assignment = Group(identifier + "=" + rvalue) 

6284 stmt << (funcDef | assignment | identifier) 

6285 

6286 module_body = OneOrMore(stmt) 

6287 

6288 parseTree = module_body.parseString(data) 

6289 parseTree.pprint() 

6290 

6291 prints:: 

6292 

6293 [['def', 

6294 'A', 

6295 ['(', 'z', ')'], 

6296 ':', 

6297 [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]], 

6298 'B', 

6299 ['def', 

6300 'BB', 

6301 ['(', 'a', 'b', 'c', ')'], 

6302 ':', 

6303 [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]], 

6304 'C', 

6305 'D', 

6306 ['def', 

6307 'spam', 

6308 ['(', 'x', 'y', ')'], 

6309 ':', 

6310 [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]] 

6311 """ 

6312 backup_stack = indentStack[:] 

6313 

6314 def reset_stack(): 

6315 indentStack[:] = backup_stack 

6316 

6317 def checkPeerIndent(s, l, t): 

6318 if l >= len(s): return 

6319 curCol = col(l, s) 

6320 if curCol != indentStack[-1]: 

6321 if curCol > indentStack[-1]: 

6322 raise ParseException(s, l, "illegal nesting") 

6323 raise ParseException(s, l, "not a peer entry") 

6324 

6325 def checkSubIndent(s, l, t): 

6326 curCol = col(l, s) 

6327 if curCol > indentStack[-1]: 

6328 indentStack.append(curCol) 

6329 else: 

6330 raise ParseException(s, l, "not a subentry") 

6331 

6332 def checkUnindent(s, l, t): 

6333 if l >= len(s): return 

6334 curCol = col(l, s) 

6335 if not(indentStack and curCol in indentStack): 

6336 raise ParseException(s, l, "not an unindent") 

6337 if curCol < indentStack[-1]: 

6338 indentStack.pop() 

6339 

6340 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress(), stopOn=StringEnd()) 

6341 INDENT = (Empty() + Empty().setParseAction(checkSubIndent)).setName('INDENT') 

6342 PEER = Empty().setParseAction(checkPeerIndent).setName('') 

6343 UNDENT = Empty().setParseAction(checkUnindent).setName('UNINDENT') 

6344 if indent: 

6345 smExpr = Group(Optional(NL) 

6346 + INDENT 

6347 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) 

6348 + UNDENT) 

6349 else: 

6350 smExpr = Group(Optional(NL) 

6351 + OneOrMore(PEER + Group(blockStatementExpr) + Optional(NL), stopOn=StringEnd()) 

6352 + UNDENT) 

6353 smExpr.setFailAction(lambda a, b, c, d: reset_stack()) 

6354 blockStatementExpr.ignore(_bslash + LineEnd()) 

6355 return smExpr.setName('indented block') 

6356 

6357alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 

6358punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 

6359 

6360anyOpenTag, anyCloseTag = makeHTMLTags(Word(alphas, alphanums + "_:").setName('any tag')) 

6361_htmlEntityMap = dict(zip("gt lt amp nbsp quot apos".split(), '><& "\'')) 

6362commonHTMLEntity = Regex('&(?P<entity>' + '|'.join(_htmlEntityMap.keys()) +");").setName("common HTML entity") 

6363def replaceHTMLEntity(t): 

6364 """Helper parser action to replace common HTML entities with their special characters""" 

6365 return _htmlEntityMap.get(t.entity) 

6366 

6367# it's easy to get these comment structures wrong - they're very common, so may as well make them available 

6368cStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/').setName("C style comment") 

6369"Comment of the form ``/* ... */``" 

6370 

6371htmlComment = Regex(r"<!--[\s\S]*?-->").setName("HTML comment") 

6372"Comment of the form ``<!-- ... -->``" 

6373 

6374restOfLine = Regex(r".*").leaveWhitespace().setName("rest of line") 

6375dblSlashComment = Regex(r"//(?:\\\n|[^\n])*").setName("// comment") 

6376"Comment of the form ``// ... (to end of line)``" 

6377 

6378cppStyleComment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + '*/' | dblSlashComment).setName("C++ style comment") 

6379"Comment of either form :class:`cStyleComment` or :class:`dblSlashComment`" 

6380 

6381javaStyleComment = cppStyleComment 

6382"Same as :class:`cppStyleComment`" 

6383 

6384pythonStyleComment = Regex(r"#.*").setName("Python style comment") 

6385"Comment of the form ``# ... (to end of line)``" 

6386 

6387_commasepitem = Combine(OneOrMore(Word(printables, excludeChars=',') 

6388 + Optional(Word(" \t") 

6389 + ~Literal(",") + ~LineEnd()))).streamline().setName("commaItem") 

6390commaSeparatedList = delimitedList(Optional(quotedString.copy() | _commasepitem, default="")).setName("commaSeparatedList") 

6391"""(Deprecated) Predefined expression of 1 or more printable words or 

6392quoted strings, separated by commas. 

6393 

6394This expression is deprecated in favor of :class:`pyparsing_common.comma_separated_list`. 

6395""" 

6396 

6397# some other useful expressions - using lower-case class name since we are really using this as a namespace 

6398class pyparsing_common: 

6399 """Here are some common low-level expressions that may be useful in 

6400 jump-starting parser development: 

6401 

6402 - numeric forms (:class:`integers<integer>`, :class:`reals<real>`, 

6403 :class:`scientific notation<sci_real>`) 

6404 - common :class:`programming identifiers<identifier>` 

6405 - network addresses (:class:`MAC<mac_address>`, 

6406 :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`) 

6407 - ISO8601 :class:`dates<iso8601_date>` and 

6408 :class:`datetime<iso8601_datetime>` 

6409 - :class:`UUID<uuid>` 

6410 - :class:`comma-separated list<comma_separated_list>` 

6411 

6412 Parse actions: 

6413 

6414 - :class:`convertToInteger` 

6415 - :class:`convertToFloat` 

6416 - :class:`convertToDate` 

6417 - :class:`convertToDatetime` 

6418 - :class:`stripHTMLTags` 

6419 - :class:`upcaseTokens` 

6420 - :class:`downcaseTokens` 

6421 

6422 Example:: 

6423 

6424 pyparsing_common.number.runTests(''' 

6425 # any int or real number, returned as the appropriate type 

6426 100 

6427 -100 

6428 +100 

6429 3.14159 

6430 6.02e23 

6431 1e-12 

6432 ''') 

6433 

6434 pyparsing_common.fnumber.runTests(''' 

6435 # any int or real number, returned as float 

6436 100 

6437 -100 

6438 +100 

6439 3.14159 

6440 6.02e23 

6441 1e-12 

6442 ''') 

6443 

6444 pyparsing_common.hex_integer.runTests(''' 

6445 # hex numbers 

6446 100 

6447 FF 

6448 ''') 

6449 

6450 pyparsing_common.fraction.runTests(''' 

6451 # fractions 

6452 1/2 

6453 -3/4 

6454 ''') 

6455 

6456 pyparsing_common.mixed_integer.runTests(''' 

6457 # mixed fractions 

6458 1 

6459 1/2 

6460 -3/4 

6461 1-3/4 

6462 ''') 

6463 

6464 import uuid 

6465 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 

6466 pyparsing_common.uuid.runTests(''' 

6467 # uuid 

6468 12345678-1234-5678-1234-567812345678 

6469 ''') 

6470 

6471 prints:: 

6472 

6473 # any int or real number, returned as the appropriate type 

6474 100 

6475 [100] 

6476 

6477 -100 

6478 [-100] 

6479 

6480 +100 

6481 [100] 

6482 

6483 3.14159 

6484 [3.14159] 

6485 

6486 6.02e23 

6487 [6.02e+23] 

6488 

6489 1e-12 

6490 [1e-12] 

6491 

6492 # any int or real number, returned as float 

6493 100 

6494 [100.0] 

6495 

6496 -100 

6497 [-100.0] 

6498 

6499 +100 

6500 [100.0] 

6501 

6502 3.14159 

6503 [3.14159] 

6504 

6505 6.02e23 

6506 [6.02e+23] 

6507 

6508 1e-12 

6509 [1e-12] 

6510 

6511 # hex numbers 

6512 100 

6513 [256] 

6514 

6515 FF 

6516 [255] 

6517 

6518 # fractions 

6519 1/2 

6520 [0.5] 

6521 

6522 -3/4 

6523 [-0.75] 

6524 

6525 # mixed fractions 

6526 1 

6527 [1] 

6528 

6529 1/2 

6530 [0.5] 

6531 

6532 -3/4 

6533 [-0.75] 

6534 

6535 1-3/4 

6536 [1.75] 

6537 

6538 # uuid 

6539 12345678-1234-5678-1234-567812345678 

6540 [UUID('12345678-1234-5678-1234-567812345678')] 

6541 """ 

6542 

6543 convertToInteger = tokenMap(int) 

6544 """ 

6545 Parse action for converting parsed integers to Python int 

6546 """ 

6547 

6548 convertToFloat = tokenMap(float) 

6549 """ 

6550 Parse action for converting parsed numbers to Python float 

6551 """ 

6552 

6553 integer = Word(nums).setName("integer").setParseAction(convertToInteger) 

6554 """expression that parses an unsigned integer, returns an int""" 

6555 

6556 hex_integer = Word(hexnums).setName("hex integer").setParseAction(tokenMap(int, 16)) 

6557 """expression that parses a hexadecimal integer, returns an int""" 

6558 

6559 signed_integer = Regex(r'[+-]?\d+').setName("signed integer").setParseAction(convertToInteger) 

6560 """expression that parses an integer with optional leading sign, returns an int""" 

6561 

6562 fraction = (signed_integer().setParseAction(convertToFloat) + '/' + signed_integer().setParseAction(convertToFloat)).setName("fraction") 

6563 """fractional expression of an integer divided by an integer, returns a float""" 

6564 fraction.addParseAction(lambda t: t[0]/t[-1]) 

6565 

6566 mixed_integer = (fraction | signed_integer + Optional(Optional('-').suppress() + fraction)).setName("fraction or mixed integer-fraction") 

6567 """mixed integer of the form 'integer - fraction', with optional leading integer, returns float""" 

6568 mixed_integer.addParseAction(sum) 

6569 

6570 real = Regex(r'[+-]?(?:\d+\.\d*|\.\d+)').setName("real number").setParseAction(convertToFloat) 

6571 """expression that parses a floating point number and returns a float""" 

6572 

6573 sci_real = Regex(r'[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)').setName("real number with scientific notation").setParseAction(convertToFloat) 

6574 """expression that parses a floating point number with optional 

6575 scientific notation and returns a float""" 

6576 

6577 # streamlining this expression makes the docs nicer-looking 

6578 number = (sci_real | real | signed_integer).streamline() 

6579 """any numeric expression, returns the corresponding Python type""" 

6580 

6581 fnumber = Regex(r'[+-]?\d+\.?\d*([eE][+-]?\d+)?').setName("fnumber").setParseAction(convertToFloat) 

6582 """any int or real number, returned as float""" 

6583 

6584 identifier = Word(alphas + '_', alphanums + '_').setName("identifier") 

6585 """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')""" 

6586 

6587 ipv4_address = Regex(r'(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}').setName("IPv4 address") 

6588 "IPv4 address (``0.0.0.0 - 255.255.255.255``)" 

6589 

6590 _ipv6_part = Regex(r'[0-9a-fA-F]{1,4}').setName("hex_integer") 

6591 _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address") 

6592 _short_ipv6_address = (Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) 

6593 + "::" 

6594 + Optional(_ipv6_part + (':' + _ipv6_part) * (0, 6)) 

6595 ).setName("short IPv6 address") 

6596 _short_ipv6_address.addCondition(lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8) 

6597 _mixed_ipv6_address = ("::ffff:" + ipv4_address).setName("mixed IPv6 address") 

6598 ipv6_address = Combine((_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).setName("IPv6 address")).setName("IPv6 address") 

6599 "IPv6 address (long, short, or mixed form)" 

6600 

6601 mac_address = Regex(r'[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}').setName("MAC address") 

6602 "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)" 

6603 

6604 @staticmethod 

6605 def convertToDate(fmt="%Y-%m-%d"): 

6606 """ 

6607 Helper to create a parse action for converting parsed date string to Python datetime.date 

6608 

6609 Params - 

6610 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``) 

6611 

6612 Example:: 

6613 

6614 date_expr = pyparsing_common.iso8601_date.copy() 

6615 date_expr.setParseAction(pyparsing_common.convertToDate()) 

6616 print(date_expr.parseString("1999-12-31")) 

6617 

6618 prints:: 

6619 

6620 [datetime.date(1999, 12, 31)] 

6621 """ 

6622 def cvt_fn(s, l, t): 

6623 try: 

6624 return datetime.strptime(t[0], fmt).date() 

6625 except ValueError as ve: 

6626 raise ParseException(s, l, str(ve)) 

6627 return cvt_fn 

6628 

6629 @staticmethod 

6630 def convertToDatetime(fmt="%Y-%m-%dT%H:%M:%S.%f"): 

6631 """Helper to create a parse action for converting parsed 

6632 datetime string to Python datetime.datetime 

6633 

6634 Params - 

6635 - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``) 

6636 

6637 Example:: 

6638 

6639 dt_expr = pyparsing_common.iso8601_datetime.copy() 

6640 dt_expr.setParseAction(pyparsing_common.convertToDatetime()) 

6641 print(dt_expr.parseString("1999-12-31T23:59:59.999")) 

6642 

6643 prints:: 

6644 

6645 [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)] 

6646 """ 

6647 def cvt_fn(s, l, t): 

6648 try: 

6649 return datetime.strptime(t[0], fmt) 

6650 except ValueError as ve: 

6651 raise ParseException(s, l, str(ve)) 

6652 return cvt_fn 

6653 

6654 iso8601_date = Regex(r'(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?').setName("ISO8601 date") 

6655 "ISO8601 date (``yyyy-mm-dd``)" 

6656 

6657 iso8601_datetime = Regex(r'(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?').setName("ISO8601 datetime") 

6658 "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``" 

6659 

6660 uuid = Regex(r'[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}').setName("UUID") 

6661 "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)" 

6662 

6663 _html_stripper = anyOpenTag.suppress() | anyCloseTag.suppress() 

6664 @staticmethod 

6665 def stripHTMLTags(s, l, tokens): 

6666 """Parse action to remove HTML tags from web page HTML source 

6667 

6668 Example:: 

6669 

6670 # strip HTML links from normal text 

6671 text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>' 

6672 td, td_end = makeHTMLTags("TD") 

6673 table_text = td + SkipTo(td_end).setParseAction(pyparsing_common.stripHTMLTags)("body") + td_end 

6674 print(table_text.parseString(text).body) 

6675 

6676 Prints:: 

6677 

6678 More info at the pyparsing wiki page 

6679 """ 

6680 return pyparsing_common._html_stripper.transformString(tokens[0]) 

6681 

6682 _commasepitem = Combine(OneOrMore(~Literal(",") 

6683 + ~LineEnd() 

6684 + Word(printables, excludeChars=',') 

6685 + Optional(White(" \t")))).streamline().setName("commaItem") 

6686 comma_separated_list = delimitedList(Optional(quotedString.copy() 

6687 | _commasepitem, default='') 

6688 ).setName("comma separated list") 

6689 """Predefined expression of 1 or more printable words or quoted strings, separated by commas.""" 

6690 

6691 upcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).upper())) 

6692 """Parse action to convert tokens to upper case.""" 

6693 

6694 downcaseTokens = staticmethod(tokenMap(lambda t: _ustr(t).lower())) 

6695 """Parse action to convert tokens to lower case.""" 

6696 

6697 

6698class _lazyclassproperty(object): 

6699 def __init__(self, fn): 

6700 self.fn = fn 

6701 self.__doc__ = fn.__doc__ 

6702 self.__name__ = fn.__name__ 

6703 

6704 def __get__(self, obj, cls): 

6705 if cls is None: 

6706 cls = type(obj) 

6707 if not hasattr(cls, '_intern') or any(cls._intern is getattr(superclass, '_intern', []) 

6708 for superclass in cls.__mro__[1:]): 

6709 cls._intern = {} 

6710 attrname = self.fn.__name__ 

6711 if attrname not in cls._intern: 

6712 cls._intern[attrname] = self.fn(cls) 

6713 return cls._intern[attrname] 

6714 

6715 

6716class unicode_set(object): 

6717 """ 

6718 A set of Unicode characters, for language-specific strings for 

6719 ``alphas``, ``nums``, ``alphanums``, and ``printables``. 

6720 A unicode_set is defined by a list of ranges in the Unicode character 

6721 set, in a class attribute ``_ranges``, such as:: 

6722 

6723 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] 

6724 

6725 A unicode set can also be defined using multiple inheritance of other unicode sets:: 

6726 

6727 class CJK(Chinese, Japanese, Korean): 

6728 pass 

6729 """ 

6730 _ranges = [] 

6731 

6732 @classmethod 

6733 def _get_chars_for_ranges(cls): 

6734 ret = [] 

6735 for cc in cls.__mro__: 

6736 if cc is unicode_set: 

6737 break 

6738 for rr in cc._ranges: 

6739 ret.extend(range(rr[0], rr[-1] + 1)) 

6740 return [unichr(c) for c in sorted(set(ret))] 

6741 

6742 @_lazyclassproperty 

6743 def printables(cls): 

6744 "all non-whitespace characters in this range" 

6745 return u''.join(filterfalse(unicode.isspace, cls._get_chars_for_ranges())) 

6746 

6747 @_lazyclassproperty 

6748 def alphas(cls): 

6749 "all alphabetic characters in this range" 

6750 return u''.join(filter(unicode.isalpha, cls._get_chars_for_ranges())) 

6751 

6752 @_lazyclassproperty 

6753 def nums(cls): 

6754 "all numeric digit characters in this range" 

6755 return u''.join(filter(unicode.isdigit, cls._get_chars_for_ranges())) 

6756 

6757 @_lazyclassproperty 

6758 def alphanums(cls): 

6759 "all alphanumeric characters in this range" 

6760 return cls.alphas + cls.nums 

6761 

6762 

6763class pyparsing_unicode(unicode_set): 

6764 """ 

6765 A namespace class for defining common language unicode_sets. 

6766 """ 

6767 _ranges = [(32, sys.maxunicode)] 

6768 

6769 class Latin1(unicode_set): 

6770 "Unicode set for Latin-1 Unicode Character Range" 

6771 _ranges = [(0x0020, 0x007e), (0x00a0, 0x00ff),] 

6772 

6773 class LatinA(unicode_set): 

6774 "Unicode set for Latin-A Unicode Character Range" 

6775 _ranges = [(0x0100, 0x017f),] 

6776 

6777 class LatinB(unicode_set): 

6778 "Unicode set for Latin-B Unicode Character Range" 

6779 _ranges = [(0x0180, 0x024f),] 

6780 

6781 class Greek(unicode_set): 

6782 "Unicode set for Greek Unicode Character Ranges" 

6783 _ranges = [ 

6784 (0x0370, 0x03ff), (0x1f00, 0x1f15), (0x1f18, 0x1f1d), (0x1f20, 0x1f45), (0x1f48, 0x1f4d), 

6785 (0x1f50, 0x1f57), (0x1f59,), (0x1f5b,), (0x1f5d,), (0x1f5f, 0x1f7d), (0x1f80, 0x1fb4), (0x1fb6, 0x1fc4), 

6786 (0x1fc6, 0x1fd3), (0x1fd6, 0x1fdb), (0x1fdd, 0x1fef), (0x1ff2, 0x1ff4), (0x1ff6, 0x1ffe), 

6787 ] 

6788 

6789 class Cyrillic(unicode_set): 

6790 "Unicode set for Cyrillic Unicode Character Range" 

6791 _ranges = [(0x0400, 0x04ff)] 

6792 

6793 class Chinese(unicode_set): 

6794 "Unicode set for Chinese Unicode Character Range" 

6795 _ranges = [(0x4e00, 0x9fff), (0x3000, 0x303f),] 

6796 

6797 class Japanese(unicode_set): 

6798 "Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges" 

6799 _ranges = [] 

6800 

6801 class Kanji(unicode_set): 

6802 "Unicode set for Kanji Unicode Character Range" 

6803 _ranges = [(0x4E00, 0x9Fbf), (0x3000, 0x303f),] 

6804 

6805 class Hiragana(unicode_set): 

6806 "Unicode set for Hiragana Unicode Character Range" 

6807 _ranges = [(0x3040, 0x309f),] 

6808 

6809 class Katakana(unicode_set): 

6810 "Unicode set for Katakana Unicode Character Range" 

6811 _ranges = [(0x30a0, 0x30ff),] 

6812 

6813 class Korean(unicode_set): 

6814 "Unicode set for Korean Unicode Character Range" 

6815 _ranges = [(0xac00, 0xd7af), (0x1100, 0x11ff), (0x3130, 0x318f), (0xa960, 0xa97f), (0xd7b0, 0xd7ff), (0x3000, 0x303f),] 

6816 

6817 class CJK(Chinese, Japanese, Korean): 

6818 "Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range" 

6819 pass 

6820 

6821 class Thai(unicode_set): 

6822 "Unicode set for Thai Unicode Character Range" 

6823 _ranges = [(0x0e01, 0x0e3a), (0x0e3f, 0x0e5b),] 

6824 

6825 class Arabic(unicode_set): 

6826 "Unicode set for Arabic Unicode Character Range" 

6827 _ranges = [(0x0600, 0x061b), (0x061e, 0x06ff), (0x0700, 0x077f),] 

6828 

6829 class Hebrew(unicode_set): 

6830 "Unicode set for Hebrew Unicode Character Range" 

6831 _ranges = [(0x0590, 0x05ff),] 

6832 

6833 class Devanagari(unicode_set): 

6834 "Unicode set for Devanagari Unicode Character Range" 

6835 _ranges = [(0x0900, 0x097f), (0xa8e0, 0xa8ff)] 

6836 

6837pyparsing_unicode.Japanese._ranges = (pyparsing_unicode.Japanese.Kanji._ranges 

6838 + pyparsing_unicode.Japanese.Hiragana._ranges 

6839 + pyparsing_unicode.Japanese.Katakana._ranges) 

6840 

6841# define ranges in language character sets 

6842if PY_3: 

6843 setattr(pyparsing_unicode, u"العربية", pyparsing_unicode.Arabic) 

6844 setattr(pyparsing_unicode, u"中文", pyparsing_unicode.Chinese) 

6845 setattr(pyparsing_unicode, u"кириллица", pyparsing_unicode.Cyrillic) 

6846 setattr(pyparsing_unicode, u"Ελληνικά", pyparsing_unicode.Greek) 

6847 setattr(pyparsing_unicode, u"עִברִית", pyparsing_unicode.Hebrew) 

6848 setattr(pyparsing_unicode, u"日本語", pyparsing_unicode.Japanese) 

6849 setattr(pyparsing_unicode.Japanese, u"漢字", pyparsing_unicode.Japanese.Kanji) 

6850 setattr(pyparsing_unicode.Japanese, u"カタカナ", pyparsing_unicode.Japanese.Katakana) 

6851 setattr(pyparsing_unicode.Japanese, u"ひらがな", pyparsing_unicode.Japanese.Hiragana) 

6852 setattr(pyparsing_unicode, u"한국어", pyparsing_unicode.Korean) 

6853 setattr(pyparsing_unicode, u"ไทย", pyparsing_unicode.Thai) 

6854 setattr(pyparsing_unicode, u"देवनागरी", pyparsing_unicode.Devanagari) 

6855 

6856 

6857class pyparsing_test: 

6858 """ 

6859 namespace class for classes useful in writing unit tests 

6860 """ 

6861 

6862 class reset_pyparsing_context: 

6863 """ 

6864 Context manager to be used when writing unit tests that modify pyparsing config values: 

6865 - packrat parsing 

6866 - default whitespace characters. 

6867 - default keyword characters 

6868 - literal string auto-conversion class 

6869 - __diag__ settings 

6870 

6871 Example: 

6872 with reset_pyparsing_context(): 

6873 # test that literals used to construct a grammar are automatically suppressed 

6874 ParserElement.inlineLiteralsUsing(Suppress) 

6875 

6876 term = Word(alphas) | Word(nums) 

6877 group = Group('(' + term[...] + ')') 

6878 

6879 # assert that the '()' characters are not included in the parsed tokens 

6880 self.assertParseAndCheckLisst(group, "(abc 123 def)", ['abc', '123', 'def']) 

6881 

6882 # after exiting context manager, literals are converted to Literal expressions again 

6883 """ 

6884 

6885 def __init__(self): 

6886 self._save_context = {} 

6887 

6888 def save(self): 

6889 self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS 

6890 self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS 

6891 self._save_context[ 

6892 "literal_string_class" 

6893 ] = ParserElement._literalStringClass 

6894 self._save_context["packrat_enabled"] = ParserElement._packratEnabled 

6895 self._save_context["packrat_parse"] = ParserElement._parse 

6896 self._save_context["__diag__"] = { 

6897 name: getattr(__diag__, name) for name in __diag__._all_names 

6898 } 

6899 self._save_context["__compat__"] = { 

6900 "collect_all_And_tokens": __compat__.collect_all_And_tokens 

6901 } 

6902 return self 

6903 

6904 def restore(self): 

6905 # reset pyparsing global state 

6906 if ( 

6907 ParserElement.DEFAULT_WHITE_CHARS 

6908 != self._save_context["default_whitespace"] 

6909 ): 

6910 ParserElement.setDefaultWhitespaceChars( 

6911 self._save_context["default_whitespace"] 

6912 ) 

6913 Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"] 

6914 ParserElement.inlineLiteralsUsing( 

6915 self._save_context["literal_string_class"] 

6916 ) 

6917 for name, value in self._save_context["__diag__"].items(): 

6918 setattr(__diag__, name, value) 

6919 ParserElement._packratEnabled = self._save_context["packrat_enabled"] 

6920 ParserElement._parse = self._save_context["packrat_parse"] 

6921 __compat__.collect_all_And_tokens = self._save_context["__compat__"] 

6922 

6923 def __enter__(self): 

6924 return self.save() 

6925 

6926 def __exit__(self, *args): 

6927 return self.restore() 

6928 

6929 class TestParseResultsAsserts: 

6930 """ 

6931 A mixin class to add parse results assertion methods to normal unittest.TestCase classes. 

6932 """ 

6933 def assertParseResultsEquals( 

6934 self, result, expected_list=None, expected_dict=None, msg=None 

6935 ): 

6936 """ 

6937 Unit test assertion to compare a ParseResults object with an optional expected_list, 

6938 and compare any defined results names with an optional expected_dict. 

6939 """ 

6940 if expected_list is not None: 

6941 self.assertEqual(expected_list, result.asList(), msg=msg) 

6942 if expected_dict is not None: 

6943 self.assertEqual(expected_dict, result.asDict(), msg=msg) 

6944 

6945 def assertParseAndCheckList( 

6946 self, expr, test_string, expected_list, msg=None, verbose=True 

6947 ): 

6948 """ 

6949 Convenience wrapper assert to test a parser element and input string, and assert that 

6950 the resulting ParseResults.asList() is equal to the expected_list. 

6951 """ 

6952 result = expr.parseString(test_string, parseAll=True) 

6953 if verbose: 

6954 print(result.dump()) 

6955 self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg) 

6956 

6957 def assertParseAndCheckDict( 

6958 self, expr, test_string, expected_dict, msg=None, verbose=True 

6959 ): 

6960 """ 

6961 Convenience wrapper assert to test a parser element and input string, and assert that 

6962 the resulting ParseResults.asDict() is equal to the expected_dict. 

6963 """ 

6964 result = expr.parseString(test_string, parseAll=True) 

6965 if verbose: 

6966 print(result.dump()) 

6967 self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg) 

6968 

6969 def assertRunTestResults( 

6970 self, run_tests_report, expected_parse_results=None, msg=None 

6971 ): 

6972 """ 

6973 Unit test assertion to evaluate output of ParserElement.runTests(). If a list of 

6974 list-dict tuples is given as the expected_parse_results argument, then these are zipped 

6975 with the report tuples returned by runTests and evaluated using assertParseResultsEquals. 

6976 Finally, asserts that the overall runTests() success value is True. 

6977 

6978 :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests 

6979 :param expected_parse_results (optional): [tuple(str, list, dict, Exception)] 

6980 """ 

6981 run_test_success, run_test_results = run_tests_report 

6982 

6983 if expected_parse_results is not None: 

6984 merged = [ 

6985 (rpt[0], rpt[1], expected) 

6986 for rpt, expected in zip(run_test_results, expected_parse_results) 

6987 ] 

6988 for test_string, result, expected in merged: 

6989 # expected should be a tuple containing a list and/or a dict or an exception, 

6990 # and optional failure message string 

6991 # an empty tuple will skip any result validation 

6992 fail_msg = next( 

6993 (exp for exp in expected if isinstance(exp, str)), None 

6994 ) 

6995 expected_exception = next( 

6996 ( 

6997 exp 

6998 for exp in expected 

6999 if isinstance(exp, type) and issubclass(exp, Exception) 

7000 ), 

7001 None, 

7002 ) 

7003 if expected_exception is not None: 

7004 with self.assertRaises( 

7005 expected_exception=expected_exception, msg=fail_msg or msg 

7006 ): 

7007 if isinstance(result, Exception): 

7008 raise result 

7009 else: 

7010 expected_list = next( 

7011 (exp for exp in expected if isinstance(exp, list)), None 

7012 ) 

7013 expected_dict = next( 

7014 (exp for exp in expected if isinstance(exp, dict)), None 

7015 ) 

7016 if (expected_list, expected_dict) != (None, None): 

7017 self.assertParseResultsEquals( 

7018 result, 

7019 expected_list=expected_list, 

7020 expected_dict=expected_dict, 

7021 msg=fail_msg or msg, 

7022 ) 

7023 else: 

7024 # warning here maybe? 

7025 print("no validation for {!r}".format(test_string)) 

7026 

7027 # do this last, in case some specific test results can be reported instead 

7028 self.assertTrue( 

7029 run_test_success, msg=msg if msg is not None else "failed runTests" 

7030 ) 

7031 

7032 @contextmanager 

7033 def assertRaisesParseException(self, exc_type=ParseException, msg=None): 

7034 with self.assertRaises(exc_type, msg=msg): 

7035 yield 

7036 

7037 

7038if __name__ == "__main__": 

7039 

7040 selectToken = CaselessLiteral("select") 

7041 fromToken = CaselessLiteral("from") 

7042 

7043 ident = Word(alphas, alphanums + "_$") 

7044 

7045 columnName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 

7046 columnNameList = Group(delimitedList(columnName)).setName("columns") 

7047 columnSpec = ('*' | columnNameList) 

7048 

7049 tableName = delimitedList(ident, ".", combine=True).setParseAction(upcaseTokens) 

7050 tableNameList = Group(delimitedList(tableName)).setName("tables") 

7051 

7052 simpleSQL = selectToken("command") + columnSpec("columns") + fromToken + tableNameList("tables") 

7053 

7054 # demo runTests method, including embedded comments in test string 

7055 simpleSQL.runTests(""" 

7056 # '*' as column list and dotted table name 

7057 select * from SYS.XYZZY 

7058 

7059 # caseless match on "SELECT", and casts back to "select" 

7060 SELECT * from XYZZY, ABC 

7061 

7062 # list of column names, and mixed case SELECT keyword 

7063 Select AA,BB,CC from Sys.dual 

7064 

7065 # multiple tables 

7066 Select A, B, C from Sys.dual, Table2 

7067 

7068 # invalid SELECT keyword - should fail 

7069 Xelect A, B, C from Sys.dual 

7070 

7071 # incomplete command - should fail 

7072 Select 

7073 

7074 # invalid column name - should fail 

7075 Select ^^^ frox Sys.dual 

7076 

7077 """) 

7078 

7079 pyparsing_common.number.runTests(""" 

7080 100 

7081 -100 

7082 +100 

7083 3.14159 

7084 6.02e23 

7085 1e-12 

7086 """) 

7087 

7088 # any int or real number, returned as float 

7089 pyparsing_common.fnumber.runTests(""" 

7090 100 

7091 -100 

7092 +100 

7093 3.14159 

7094 6.02e23 

7095 1e-12 

7096 """) 

7097 

7098 pyparsing_common.hex_integer.runTests(""" 

7099 100 

7100 FF 

7101 """) 

7102 

7103 import uuid 

7104 pyparsing_common.uuid.setParseAction(tokenMap(uuid.UUID)) 

7105 pyparsing_common.uuid.runTests(""" 

7106 12345678-1234-5678-1234-567812345678 

7107 """)