Package pyparsing :: Module pyparsing
[frames] | no frames]

Source Code for Module pyparsing.pyparsing

   1  # module pyparsing.py 
   2  # 
   3  # Copyright (c) 2003-2009  Paul T. McGuire 
   4  # 
   5  # Permission is hereby granted, free of charge, to any person obtaining 
   6  # a copy of this software and associated documentation files (the 
   7  # "Software"), to deal in the Software without restriction, including 
   8  # without limitation the rights to use, copy, modify, merge, publish, 
   9  # distribute, sublicense, and/or sell copies of the Software, and to 
  10  # permit persons to whom the Software is furnished to do so, subject to 
  11  # the following conditions: 
  12  # 
  13  # The above copyright notice and this permission notice shall be 
  14  # included in all copies or substantial portions of the Software. 
  15  # 
  16  # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
  17  # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
  18  # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
  19  # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
  20  # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
  21  # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
  22  # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 
  23  # 
  24  #from __future__ import generators 
  25   
  26  __doc__ = \ 
  27  """ 
  28  pyparsing module - Classes and methods to define and execute parsing grammars 
  29   
  30  The pyparsing module is an alternative approach to creating and executing simple grammars, 
  31  vs. the traditional lex/yacc approach, or the use of regular expressions.  With pyparsing, you 
  32  don't need to learn a new syntax for defining grammars or matching expressions - the parsing module 
  33  provides a library of classes that you use to construct the grammar directly in Python. 
  34   
  35  Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!"):: 
  36   
  37      from pyparsing import Word, alphas 
  38   
  39      # define grammar of a greeting 
  40      greet = Word( alphas ) + "," + Word( alphas ) + "!" 
  41   
  42      hello = "Hello, World!" 
  43      print hello, "->", greet.parseString( hello ) 
  44   
  45  The program outputs the following:: 
  46   
  47      Hello, World! -> ['Hello', ',', 'World', '!'] 
  48   
  49  The Python representation of the grammar is quite readable, owing to the self-explanatory 
  50  class names, and the use of '+', '|' and '^' operators. 
  51   
  52  The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an 
  53  object with named attributes. 
  54   
  55  The pyparsing module handles some of the problems that are typically vexing when writing text parsers: 
  56   - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello  ,  World  !", etc.) 
  57   - quoted strings 
  58   - embedded comments 
  59  """ 
  60   
  61  __version__ = "1.5.2" 
  62  __versionTime__ = "9 April 2009 12:21" 
  63  __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>" 
  64   
  65  import string 
  66  from weakref import ref as wkref 
  67  import copy 
  68  import sys 
  69  import warnings 
  70  import re 
  71  import sre_constants 
  72  #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) ) 
  73   
  74  __all__ = [ 
  75  'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty', 
  76  'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal', 
  77  'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or', 
  78  'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException', 
  79  'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException', 
  80  'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase', 
  81  'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore', 
  82  'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col', 
  83  'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString', 
  84  'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums', 
  85  'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno', 
  86  'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral', 
  87  'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables', 
  88  'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',  
  89  'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd', 
  90  'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute', 
  91  'indentedBlock', 'originalTextFor', 
  92  ] 
  93   
  94  """ 
  95  Detect if we are running version 3.X and make appropriate changes 
  96  Robert A. Clark 
  97  """ 
  98  _PY3K = sys.version_info[0] > 2 
  99  if _PY3K: 
 100      _MAX_INT = sys.maxsize 
 101      basestring = str 
 102      unichr = chr 
 103      _ustr = str 
 104      _str2dict = set 
 105      alphas = string.ascii_lowercase + string.ascii_uppercase 
 106  else: 
 107      _MAX_INT = sys.maxint 
 108   
109 - def _ustr(obj):
110 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries 111 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It 112 then < returns the unicode object | encodes it with the default encoding | ... >. 113 """ 114 if isinstance(obj,unicode): 115 return obj 116 117 try: 118 # If this works, then _ustr(obj) has the same behaviour as str(obj), so 119 # it won't break any existing code. 120 return str(obj) 121 122 except UnicodeEncodeError: 123 # The Python docs (http://docs.python.org/ref/customization.html#l2h-182) 124 # state that "The return value must be a string object". However, does a 125 # unicode object (being a subclass of basestring) count as a "string 126 # object"? 127 # If so, then return a unicode object: 128 return unicode(obj)
129 # Else encode it... but how? There are many choices... :) 130 # Replace unprintables with escape codes? 131 #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors') 132 # Replace unprintables with question marks? 133 #return unicode(obj).encode(sys.getdefaultencoding(), 'replace') 134 # ... 135
136 - def _str2dict(strg):
137 return dict( [(c,0) for c in strg] )
138 139 alphas = string.lowercase + string.uppercase 140 141
142 -def _xml_escape(data):
143 """Escape &, <, >, ", ', etc. in a string of data.""" 144 145 # ampersand must be replaced first 146 from_symbols = '&><"\'' 147 to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()] 148 for from_,to_ in zip(from_symbols, to_symbols): 149 data = data.replace(from_, to_) 150 return data
151
152 -class _Constants(object):
153 pass
154 155 nums = string.digits 156 hexnums = nums + "ABCDEFabcdef" 157 alphanums = alphas + nums 158 _bslash = chr(92) 159 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] ) 160
161 -class ParseBaseException(Exception):
162 """base exception class for all parsing runtime exceptions""" 163 # Performance tuning: we construct a *lot* of these, so keep this 164 # constructor as small and fast as possible
165 - def __init__( self, pstr, loc=0, msg=None, elem=None ):
166 self.loc = loc 167 if msg is None: 168 self.msg = pstr 169 self.pstr = "" 170 else: 171 self.msg = msg 172 self.pstr = pstr 173 self.parserElement = elem
174
175 - def __getattr__( self, aname ):
176 """supported attributes by name are: 177 - lineno - returns the line number of the exception text 178 - col - returns the column number of the exception text 179 - line - returns the line containing the exception text 180 """ 181 if( aname == "lineno" ): 182 return lineno( self.loc, self.pstr ) 183 elif( aname in ("col", "column") ): 184 return col( self.loc, self.pstr ) 185 elif( aname == "line" ): 186 return line( self.loc, self.pstr ) 187 else: 188 raise AttributeError(aname)
189
190 - def __str__( self ):
191 return "%s (at char %d), (line:%d, col:%d)" % \ 192 ( self.msg, self.loc, self.lineno, self.column )
193 - def __repr__( self ):
194 return _ustr(self)
195 - def markInputline( self, markerString = ">!<" ):
196 """Extracts the exception line from the input string, and marks 197 the location of the exception with a special symbol. 198 """ 199 line_str = self.line 200 line_column = self.column - 1 201 if markerString: 202 line_str = "".join( [line_str[:line_column], 203 markerString, line_str[line_column:]]) 204 return line_str.strip()
205 - def __dir__(self):
206 return "loc msg pstr parserElement lineno col line " \ 207 "markInputLine __str__ __repr__".split()
208
209 -class ParseException(ParseBaseException):
210 """exception thrown when parse expressions don't match class; 211 supported attributes by name are: 212 - lineno - returns the line number of the exception text 213 - col - returns the column number of the exception text 214 - line - returns the line containing the exception text 215 """ 216 pass
217
218 -class ParseFatalException(ParseBaseException):
219 """user-throwable exception thrown when inconsistent parse content 220 is found; stops all parsing immediately""" 221 pass
222
223 -class ParseSyntaxException(ParseFatalException):
224 """just like ParseFatalException, but thrown internally when an 225 ErrorStop indicates that parsing is to stop immediately because 226 an unbacktrackable syntax error has been found"""
227 - def __init__(self, pe):
228 super(ParseSyntaxException, self).__init__( 229 pe.pstr, pe.loc, pe.msg, pe.parserElement)
230 231 #~ class ReparseException(ParseBaseException): 232 #~ """Experimental class - parse actions can raise this exception to cause 233 #~ pyparsing to reparse the input string: 234 #~ - with a modified input string, and/or 235 #~ - with a modified start location 236 #~ Set the values of the ReparseException in the constructor, and raise the 237 #~ exception in a parse action to cause pyparsing to use the new string/location. 238 #~ Setting the values as None causes no change to be made. 239 #~ """ 240 #~ def __init_( self, newstring, restartLoc ): 241 #~ self.newParseText = newstring 242 #~ self.reparseLoc = restartLoc 243
244 -class RecursiveGrammarException(Exception):
245 """exception thrown by validate() if the grammar could be improperly recursive"""
246 - def __init__( self, parseElementList ):
247 self.parseElementTrace = parseElementList
248
249 - def __str__( self ):
250 return "RecursiveGrammarException: %s" % self.parseElementTrace
251
252 -class _ParseResultsWithOffset(object):
253 - def __init__(self,p1,p2):
254 self.tup = (p1,p2)
255 - def __getitem__(self,i):
256 return self.tup[i]
257 - def __repr__(self):
258 return repr(self.tup)
259 - def setOffset(self,i):
260 self.tup = (self.tup[0],i)
261
262 -class ParseResults(object):
263 """Structured parse results, to provide multiple means of access to the parsed data: 264 - as a list (len(results)) 265 - by list index (results[0], results[1], etc.) 266 - by attribute (results.<resultsName>) 267 """ 268 __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
269 - def __new__(cls, toklist, name=None, asList=True, modal=True ):
270 if isinstance(toklist, cls): 271 return toklist 272 retobj = object.__new__(cls) 273 retobj.__doinit = True 274 return retobj
275 276 # Performance tuning: we construct a *lot* of these, so keep this 277 # constructor as small and fast as possible
278 - def __init__( self, toklist, name=None, asList=True, modal=True ):
279 if self.__doinit: 280 self.__doinit = False 281 self.__name = None 282 self.__parent = None 283 self.__accumNames = {} 284 if isinstance(toklist, list): 285 self.__toklist = toklist[:] 286 else: 287 self.__toklist = [toklist] 288 self.__tokdict = dict() 289 290 if name: 291 if not modal: 292 self.__accumNames[name] = 0 293 if isinstance(name,int): 294 name = _ustr(name) # will always return a str, but use _ustr for consistency 295 self.__name = name 296 if not toklist in (None,'',[]): 297 if isinstance(toklist,basestring): 298 toklist = [ toklist ] 299 if asList: 300 if isinstance(toklist,ParseResults): 301 self[name] = _ParseResultsWithOffset(toklist.copy(),0) 302 else: 303 self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0) 304 self[name].__name = name 305 else: 306 try: 307 self[name] = toklist[0] 308 except (KeyError,TypeError,IndexError): 309 self[name] = toklist
310
311 - def __getitem__( self, i ):
312 if isinstance( i, (int,slice) ): 313 return self.__toklist[i] 314 else: 315 if i not in self.__accumNames: 316 return self.__tokdict[i][-1][0] 317 else: 318 return ParseResults([ v[0] for v in self.__tokdict[i] ])
319
320 - def __setitem__( self, k, v ):
321 if isinstance(v,_ParseResultsWithOffset): 322 self.__tokdict[k] = self.__tokdict.get(k,list()) + [v] 323 sub = v[0] 324 elif isinstance(k,int): 325 self.__toklist[k] = v 326 sub = v 327 else: 328 self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)] 329 sub = v 330 if isinstance(sub,ParseResults): 331 sub.__parent = wkref(self)
332
333 - def __delitem__( self, i ):
334 if isinstance(i,(int,slice)): 335 mylen = len( self.__toklist ) 336 del self.__toklist[i] 337 338 # convert int to slice 339 if isinstance(i, int): 340 if i < 0: 341 i += mylen 342 i = slice(i, i+1) 343 # get removed indices 344 removed = list(range(*i.indices(mylen))) 345 removed.reverse() 346 # fixup indices in token dictionary 347 for name in self.__tokdict: 348 occurrences = self.__tokdict[name] 349 for j in removed: 350 for k, (value, position) in enumerate(occurrences): 351 occurrences[k] = _ParseResultsWithOffset(value, position - (position > j)) 352 else: 353 del self.__tokdict[i]
354
355 - def __contains__( self, k ):
356 return k in self.__tokdict
357
358 - def __len__( self ): return len( self.__toklist )
359 - def __bool__(self): return len( self.__toklist ) > 0
360 __nonzero__ = __bool__
361 - def __iter__( self ): return iter( self.__toklist )
362 - def __reversed__( self ): return iter( reversed(self.__toklist) )
363 - def keys( self ):
364 """Returns all named result keys.""" 365 return self.__tokdict.keys()
366
367 - def pop( self, index=-1 ):
368 """Removes and returns item at specified index (default=last). 369 Will work with either numeric indices or dict-key indicies.""" 370 ret = self[index] 371 del self[index] 372 return ret
373
374 - def get(self, key, defaultValue=None):
375 """Returns named result matching the given key, or if there is no 376 such name, then returns the given defaultValue or None if no 377 defaultValue is specified.""" 378 if key in self: 379 return self[key] 380 else: 381 return defaultValue
382
383 - def insert( self, index, insStr ):
384 self.__toklist.insert(index, insStr) 385 # fixup indices in token dictionary 386 for name in self.__tokdict: 387 occurrences = self.__tokdict[name] 388 for k, (value, position) in enumerate(occurrences): 389 occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
390
391 - def items( self ):
392 """Returns all named result keys and values as a list of tuples.""" 393 return [(k,self[k]) for k in self.__tokdict]
394
395 - def values( self ):
396 """Returns all named result values.""" 397 return [ v[-1][0] for v in self.__tokdict.values() ]
398
399 - def __getattr__( self, name ):
400 if name not in self.__slots__: 401 if name in self.__tokdict: 402 if name not in self.__accumNames: 403 return self.__tokdict[name][-1][0] 404 else: 405 return ParseResults([ v[0] for v in self.__tokdict[name] ]) 406 else: 407 return "" 408 return None
409
410 - def __add__( self, other ):
411 ret = self.copy() 412 ret += other 413 return ret
414
415 - def __iadd__( self, other ):
416 if other.__tokdict: 417 offset = len(self.__toklist) 418 addoffset = ( lambda a: (a<0 and offset) or (a+offset) ) 419 otheritems = other.__tokdict.items() 420 otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) ) 421 for (k,vlist) in otheritems for v in vlist] 422 for k,v in otherdictitems: 423 self[k] = v 424 if isinstance(v[0],ParseResults): 425 v[0].__parent = wkref(self) 426 427 self.__toklist += other.__toklist 428 self.__accumNames.update( other.__accumNames ) 429 del other 430 return self
431
432 - def __repr__( self ):
433 return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
434
435 - def __str__( self ):
436 out = "[" 437 sep = "" 438 for i in self.__toklist: 439 if isinstance(i, ParseResults): 440 out += sep + _ustr(i) 441 else: 442 out += sep + repr(i) 443 sep = ", " 444 out += "]" 445 return out
446
447 - def _asStringList( self, sep='' ):
448 out = [] 449 for item in self.__toklist: 450 if out and sep: 451 out.append(sep) 452 if isinstance( item, ParseResults ): 453 out += item._asStringList() 454 else: 455 out.append( _ustr(item) ) 456 return out
457
458 - def asList( self ):
459 """Returns the parse results as a nested list of matching tokens, all converted to strings.""" 460 out = [] 461 for res in self.__toklist: 462 if isinstance(res,ParseResults): 463 out.append( res.asList() ) 464 else: 465 out.append( res ) 466 return out
467
468 - def asDict( self ):
469 """Returns the named parse results as dictionary.""" 470 return dict( self.items() )
471
472 - def copy( self ):
473 """Returns a new copy of a ParseResults object.""" 474 ret = ParseResults( self.__toklist ) 475 ret.__tokdict = self.__tokdict.copy() 476 ret.__parent = self.__parent 477 ret.__accumNames.update( self.__accumNames ) 478 ret.__name = self.__name 479 return ret
480
481 - def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
482 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names.""" 483 nl = "\n" 484 out = [] 485 namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items() 486 for v in vlist ] ) 487 nextLevelIndent = indent + " " 488 489 # collapse out indents if formatting is not desired 490 if not formatted: 491 indent = "" 492 nextLevelIndent = "" 493 nl = "" 494 495 selfTag = None 496 if doctag is not None: 497 selfTag = doctag 498 else: 499 if self.__name: 500 selfTag = self.__name 501 502 if not selfTag: 503 if namedItemsOnly: 504 return "" 505 else: 506 selfTag = "ITEM" 507 508 out += [ nl, indent, "<", selfTag, ">" ] 509 510 worklist = self.__toklist 511 for i,res in enumerate(worklist): 512 if isinstance(res,ParseResults): 513 if i in namedItems: 514 out += [ res.asXML(namedItems[i], 515 namedItemsOnly and doctag is None, 516 nextLevelIndent, 517 formatted)] 518 else: 519 out += [ res.asXML(None, 520 namedItemsOnly and doctag is None, 521 nextLevelIndent, 522 formatted)] 523 else: 524 # individual token, see if there is a name for it 525 resTag = None 526 if i in namedItems: 527 resTag = namedItems[i] 528 if not resTag: 529 if namedItemsOnly: 530 continue 531 else: 532 resTag = "ITEM" 533 xmlBodyText = _xml_escape(_ustr(res)) 534 out += [ nl, nextLevelIndent, "<", resTag, ">", 535 xmlBodyText, 536 "</", resTag, ">" ] 537 538 out += [ nl, indent, "</", selfTag, ">" ] 539 return "".join(out)
540
541 - def __lookup(self,sub):
542 for k,vlist in self.__tokdict.items(): 543 for v,loc in vlist: 544 if sub is v: 545 return k 546 return None
547
548 - def getName(self):
549 """Returns the results name for this token expression.""" 550 if self.__name: 551 return self.__name 552 elif self.__parent: 553 par = self.__parent() 554 if par: 555 return par.__lookup(self) 556 else: 557 return None 558 elif (len(self) == 1 and 559 len(self.__tokdict) == 1 and 560 self.__tokdict.values()[0][0][1] in (0,-1)): 561 return self.__tokdict.keys()[0] 562 else: 563 return None
564
565 - def dump(self,indent='',depth=0):
566 """Diagnostic method for listing out the contents of a ParseResults. 567 Accepts an optional indent argument so that this string can be embedded 568 in a nested display of other data.""" 569 out = [] 570 out.append( indent+_ustr(self.asList()) ) 571 keys = self.items() 572 keys.sort() 573 for k,v in keys: 574 if out: 575 out.append('\n') 576 out.append( "%s%s- %s: " % (indent,(' '*depth), k) ) 577 if isinstance(v,ParseResults): 578 if v.keys(): 579 out.append( v.dump(indent,depth+1) ) 580 else: 581 out.append(_ustr(v)) 582 else: 583 out.append(_ustr(v)) 584 return "".join(out)
585 586 # add support for pickle protocol
587 - def __getstate__(self):
588 return ( self.__toklist, 589 ( self.__tokdict.copy(), 590 self.__parent is not None and self.__parent() or None, 591 self.__accumNames, 592 self.__name ) )
593
594 - def __setstate__(self,state):
595 self.__toklist = state[0] 596 self.__tokdict, \ 597 par, \ 598 inAccumNames, \ 599 self.__name = state[1] 600 self.__accumNames = {} 601 self.__accumNames.update(inAccumNames) 602 if par is not None: 603 self.__parent = wkref(par) 604 else: 605 self.__parent = None
606
607 - def __dir__(self):
608 return dir(super(ParseResults,self)) + self.keys()
609
610 -def col (loc,strg):
611 """Returns current column within a string, counting newlines as line separators. 612 The first column is number 1. 613 614 Note: the default parsing behavior is to expand tabs in the input string 615 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 616 on parsing strings containing <TAB>s, and suggested methods to maintain a 617 consistent view of the parsed string, the parse location, and line and column 618 positions within the parsed string. 619 """ 620 return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
621
622 -def lineno(loc,strg):
623 """Returns current line number within a string, counting newlines as line separators. 624 The first line is number 1. 625 626 Note: the default parsing behavior is to expand tabs in the input string 627 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information 628 on parsing strings containing <TAB>s, and suggested methods to maintain a 629 consistent view of the parsed string, the parse location, and line and column 630 positions within the parsed string. 631 """ 632 return strg.count("\n",0,loc) + 1
633
634 -def line( loc, strg ):
635 """Returns the line of text containing loc within a string, counting newlines as line separators. 636 """ 637 lastCR = strg.rfind("\n", 0, loc) 638 nextCR = strg.find("\n", loc) 639 if nextCR > 0: 640 return strg[lastCR+1:nextCR] 641 else: 642 return strg[lastCR+1:]
643
644 -def _defaultStartDebugAction( instring, loc, expr ):
645 print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
646
647 -def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
648 print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
649
650 -def _defaultExceptionDebugAction( instring, loc, expr, exc ):
651 print ("Exception raised:" + _ustr(exc))
652
653 -def nullDebugAction(*args):
654 """'Do-nothing' debug action, to suppress debugging output during parsing.""" 655 pass
656
657 -class ParserElement(object):
658 """Abstract base level parser element class.""" 659 DEFAULT_WHITE_CHARS = " \n\t\r" 660
661 - def setDefaultWhitespaceChars( chars ):
662 """Overrides the default whitespace chars 663 """ 664 ParserElement.DEFAULT_WHITE_CHARS = chars
665 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars) 666
667 - def __init__( self, savelist=False ):
668 self.parseAction = list() 669 self.failAction = None 670 #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall 671 self.strRepr = None 672 self.resultsName = None 673 self.saveAsList = savelist 674 self.skipWhitespace = True 675 self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 676 self.copyDefaultWhiteChars = True 677 self.mayReturnEmpty = False # used when checking for left-recursion 678 self.keepTabs = False 679 self.ignoreExprs = list() 680 self.debug = False 681 self.streamlined = False 682 self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index 683 self.errmsg = "" 684 self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all) 685 self.debugActions = ( None, None, None ) #custom debug actions 686 self.re = None 687 self.callPreparse = True # used to avoid redundant calls to preParse 688 self.callDuringTry = False
689
690 - def copy( self ):
691 """Make a copy of this ParserElement. Useful for defining different parse actions 692 for the same parsing pattern, using copies of the original parse element.""" 693 cpy = copy.copy( self ) 694 cpy.parseAction = self.parseAction[:] 695 cpy.ignoreExprs = self.ignoreExprs[:] 696 if self.copyDefaultWhiteChars: 697 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS 698 return cpy
699
700 - def setName( self, name ):
701 """Define name for this expression, for use in debugging.""" 702 self.name = name 703 self.errmsg = "Expected " + self.name 704 if hasattr(self,"exception"): 705 self.exception.msg = self.errmsg 706 return self
707
708 - def setResultsName( self, name, listAllMatches=False ):
709 """Define name for referencing matching tokens as a nested attribute 710 of the returned parse results. 711 NOTE: this returns a *copy* of the original ParserElement object; 712 this is so that the client can define a basic element, such as an 713 integer, and reference it in multiple places with different names. 714 """ 715 newself = self.copy() 716 newself.resultsName = name 717 newself.modalResults = not listAllMatches 718 return newself
719
720 - def setBreak(self,breakFlag = True):
721 """Method to invoke the Python pdb debugger when this element is 722 about to be parsed. Set breakFlag to True to enable, False to 723 disable. 724 """ 725 if breakFlag: 726 _parseMethod = self._parse 727 def breaker(instring, loc, doActions=True, callPreParse=True): 728 import pdb 729 pdb.set_trace() 730 return _parseMethod( instring, loc, doActions, callPreParse )
731 breaker._originalParseMethod = _parseMethod 732 self._parse = breaker 733 else: 734 if hasattr(self._parse,"_originalParseMethod"): 735 self._parse = self._parse._originalParseMethod 736 return self
737
738 - def _normalizeParseActionArgs( f ):
739 """Internal method used to decorate parse actions that take fewer than 3 arguments, 740 so that all parse actions can be called as f(s,l,t).""" 741 STAR_ARGS = 4 742 743 try: 744 restore = None 745 if isinstance(f,type): 746 restore = f 747 f = f.__init__ 748 if not _PY3K: 749 codeObj = f.func_code 750 else: 751 codeObj = f.code 752 if codeObj.co_flags & STAR_ARGS: 753 return f 754 numargs = codeObj.co_argcount 755 if not _PY3K: 756 if hasattr(f,"im_self"): 757 numargs -= 1 758 else: 759 if hasattr(f,"__self__"): 760 numargs -= 1 761 if restore: 762 f = restore 763 except AttributeError: 764 try: 765 if not _PY3K: 766 call_im_func_code = f.__call__.im_func.func_code 767 else: 768 call_im_func_code = f.__code__ 769 770 # not a function, must be a callable object, get info from the 771 # im_func binding of its bound __call__ method 772 if call_im_func_code.co_flags & STAR_ARGS: 773 return f 774 numargs = call_im_func_code.co_argcount 775 if not _PY3K: 776 if hasattr(f.__call__,"im_self"): 777 numargs -= 1 778 else: 779 if hasattr(f.__call__,"__self__"): 780 numargs -= 0 781 except AttributeError: 782 if not _PY3K: 783 call_func_code = f.__call__.func_code 784 else: 785 call_func_code = f.__call__.__code__ 786 # not a bound method, get info directly from __call__ method 787 if call_func_code.co_flags & STAR_ARGS: 788 return f 789 numargs = call_func_code.co_argcount 790 if not _PY3K: 791 if hasattr(f.__call__,"im_self"): 792 numargs -= 1 793 else: 794 if hasattr(f.__call__,"__self__"): 795 numargs -= 1 796 797 798 #~ print ("adding function %s with %d args" % (f.func_name,numargs)) 799 if numargs == 3: 800 return f 801 else: 802 if numargs > 3: 803 def tmp(s,l,t): 804 return f(f.__call__.__self__, s,l,t)
805 if numargs == 2: 806 def tmp(s,l,t): 807 return f(l,t) 808 elif numargs == 1: 809 def tmp(s,l,t): 810 return f(t) 811 else: #~ numargs == 0: 812 def tmp(s,l,t): 813 return f() 814 try: 815 tmp.__name__ = f.__name__ 816 except (AttributeError,TypeError): 817 # no need for special handling if attribute doesnt exist 818 pass 819 try: 820 tmp.__doc__ = f.__doc__ 821 except (AttributeError,TypeError): 822 # no need for special handling if attribute doesnt exist 823 pass 824 try: 825 tmp.__dict__.update(f.__dict__) 826 except (AttributeError,TypeError): 827 # no need for special handling if attribute doesnt exist 828 pass 829 return tmp 830 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs) 831
832 - def setParseAction( self, *fns, **kwargs ):
833 """Define action to perform when successfully matching parse element definition. 834 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks), 835 fn(loc,toks), fn(toks), or just fn(), where: 836 - s = the original string being parsed (see note below) 837 - loc = the location of the matching substring 838 - toks = a list of the matched tokens, packaged as a ParseResults object 839 If the functions in fns modify the tokens, they can return them as the return 840 value from fn, and the modified list of tokens will replace the original. 841 Otherwise, fn does not need to return any value. 842 843 Note: the default parsing behavior is to expand tabs in the input string 844 before starting the parsing process. See L{I{parseString}<parseString>} for more information 845 on parsing strings containing <TAB>s, and suggested methods to maintain a 846 consistent view of the parsed string, the parse location, and line and column 847 positions within the parsed string. 848 """ 849 self.parseAction = list(map(self._normalizeParseActionArgs, list(fns))) 850 self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 851 return self
852
853 - def addParseAction( self, *fns, **kwargs ):
854 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}.""" 855 self.parseAction += list(map(self._normalizeParseActionArgs, list(fns))) 856 self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"]) 857 return self
858
859 - def setFailAction( self, fn ):
860 """Define action to perform if parsing fails at this expression. 861 Fail acton fn is a callable function that takes the arguments 862 fn(s,loc,expr,err) where: 863 - s = string being parsed 864 - loc = location where expression match was attempted and failed 865 - expr = the parse expression that failed 866 - err = the exception thrown 867 The function returns no value. It may throw ParseFatalException 868 if it is desired to stop parsing immediately.""" 869 self.failAction = fn 870 return self
871
872 - def _skipIgnorables( self, instring, loc ):
873 exprsFound = True 874 while exprsFound: 875 exprsFound = False 876 for e in self.ignoreExprs: 877 try: 878 while 1: 879 loc,dummy = e._parse( instring, loc ) 880 exprsFound = True 881 except ParseException: 882 pass 883 return loc
884
885 - def preParse( self, instring, loc ):
886 if self.ignoreExprs: 887 loc = self._skipIgnorables( instring, loc ) 888 889 if self.skipWhitespace: 890 wt = self.whiteChars 891 instrlen = len(instring) 892 while loc < instrlen and instring[loc] in wt: 893 loc += 1 894 895 return loc
896
897 - def parseImpl( self, instring, loc, doActions=True ):
898 return loc, []
899
900 - def postParse( self, instring, loc, tokenlist ):
901 return tokenlist
902 903 #~ @profile
904 - def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
905 debugging = ( self.debug ) #and doActions ) 906 907 if debugging or self.failAction: 908 #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) )) 909 if (self.debugActions[0] ): 910 self.debugActions[0]( instring, loc, self ) 911 if callPreParse and self.callPreparse: 912 preloc = self.preParse( instring, loc ) 913 else: 914 preloc = loc 915 tokensStart = loc 916 try: 917 try: 918 loc,tokens = self.parseImpl( instring, preloc, doActions ) 919 except IndexError: 920 raise ParseException( instring, len(instring), self.errmsg, self ) 921 except ParseBaseException: 922 #~ print ("Exception raised:", err) 923 err = None 924 if self.debugActions[2]: 925 err = sys.exc_info()[1] 926 self.debugActions[2]( instring, tokensStart, self, err ) 927 if self.failAction: 928 if err is None: 929 err = sys.exc_info()[1] 930 self.failAction( instring, tokensStart, self, err ) 931 raise 932 else: 933 if callPreParse and self.callPreparse: 934 preloc = self.preParse( instring, loc ) 935 else: 936 preloc = loc 937 tokensStart = loc 938 if self.mayIndexError or loc >= len(instring): 939 try: 940 loc,tokens = self.parseImpl( instring, preloc, doActions ) 941 except IndexError: 942 raise ParseException( instring, len(instring), self.errmsg, self ) 943 else: 944 loc,tokens = self.parseImpl( instring, preloc, doActions ) 945 946 tokens = self.postParse( instring, loc, tokens ) 947 948 retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults ) 949 if self.parseAction and (doActions or self.callDuringTry): 950 if debugging: 951 try: 952 for fn in self.parseAction: 953 tokens = fn( instring, tokensStart, retTokens ) 954 if tokens is not None: 955 retTokens = ParseResults( tokens, 956 self.resultsName, 957 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 958 modal=self.modalResults ) 959 except ParseBaseException: 960 #~ print "Exception raised in user parse action:", err 961 if (self.debugActions[2] ): 962 err = sys.exc_info()[1] 963 self.debugActions[2]( instring, tokensStart, self, err ) 964 raise 965 else: 966 for fn in self.parseAction: 967 tokens = fn( instring, tokensStart, retTokens ) 968 if tokens is not None: 969 retTokens = ParseResults( tokens, 970 self.resultsName, 971 asList=self.saveAsList and isinstance(tokens,(ParseResults,list)), 972 modal=self.modalResults ) 973 974 if debugging: 975 #~ print ("Matched",self,"->",retTokens.asList()) 976 if (self.debugActions[1] ): 977 self.debugActions[1]( instring, tokensStart, loc, self, retTokens ) 978 979 return loc, retTokens
980
981 - def tryParse( self, instring, loc ):
982 try: 983 return self._parse( instring, loc, doActions=False )[0] 984 except ParseFatalException: 985 raise ParseException( instring, loc, self.errmsg, self)
986 987 # this method gets repeatedly called during backtracking with the same arguments - 988 # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
989 - def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
990 lookup = (self,instring,loc,callPreParse,doActions) 991 if lookup in ParserElement._exprArgCache: 992 value = ParserElement._exprArgCache[ lookup ] 993 if isinstance(value,Exception): 994 raise value 995 return value 996 else: 997 try: 998 value = self._parseNoCache( instring, loc, doActions, callPreParse ) 999 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy()) 1000 return value 1001 except ParseBaseException: 1002 pe = sys.exc_info()[1] 1003 ParserElement._exprArgCache[ lookup ] = pe 1004 raise
1005 1006 _parse = _parseNoCache 1007 1008 # argument cache for optimizing repeated calls when backtracking through recursive expressions 1009 _exprArgCache = {}
1010 - def resetCache():
1011 ParserElement._exprArgCache.clear()
1012 resetCache = staticmethod(resetCache) 1013 1014 _packratEnabled = False
1015 - def enablePackrat():
1016 """Enables "packrat" parsing, which adds memoizing to the parsing logic. 1017 Repeated parse attempts at the same string location (which happens 1018 often in many complex grammars) can immediately return a cached value, 1019 instead of re-executing parsing/validating code. Memoizing is done of 1020 both valid results and parsing exceptions. 1021 1022 This speedup may break existing programs that use parse actions that 1023 have side-effects. For this reason, packrat parsing is disabled when 1024 you first import pyparsing. To activate the packrat feature, your 1025 program must call the class method ParserElement.enablePackrat(). If 1026 your program uses psyco to "compile as you go", you must call 1027 enablePackrat before calling psyco.full(). If you do not do this, 1028 Python will crash. For best results, call enablePackrat() immediately 1029 after importing pyparsing. 1030 """ 1031 if not ParserElement._packratEnabled: 1032 ParserElement._packratEnabled = True 1033 ParserElement._parse = ParserElement._parseCache
1034 enablePackrat = staticmethod(enablePackrat) 1035
1036 - def parseString( self, instring, parseAll=False ):
1037 """Execute the parse expression with the given string. 1038 This is the main interface to the client code, once the complete 1039 expression has been built. 1040 1041 If you want the grammar to require that the entire input string be 1042 successfully parsed, then set parseAll to True (equivalent to ending 1043 the grammar with StringEnd()). 1044 1045 Note: parseString implicitly calls expandtabs() on the input string, 1046 in order to report proper column numbers in parse actions. 1047 If the input string contains tabs and 1048 the grammar uses parse actions that use the loc argument to index into the 1049 string being parsed, you can ensure you have a consistent view of the input 1050 string by: 1051 - calling parseWithTabs on your grammar before calling parseString 1052 (see L{I{parseWithTabs}<parseWithTabs>}) 1053 - define your parse action using the full (s,loc,toks) signature, and 1054 reference the input string using the parse action's s argument 1055 - explictly expand the tabs in your input string before calling 1056 parseString 1057 """ 1058 ParserElement.resetCache() 1059 if not self.streamlined: 1060 self.streamline() 1061 #~ self.saveAsList = True 1062 for e in self.ignoreExprs: 1063 e.streamline() 1064 if not self.keepTabs: 1065 instring = instring.expandtabs() 1066 try: 1067 loc, tokens = self._parse( instring, 0 ) 1068 if parseAll: 1069 loc = self.preParse( instring, loc ) 1070 StringEnd()._parse( instring, loc ) 1071 except ParseBaseException: 1072 exc = sys.exc_info()[1] 1073 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1074 raise exc 1075 else: 1076 return tokens
1077
1078 - def scanString( self, instring, maxMatches=_MAX_INT ):
1079 """Scan the input string for expression matches. Each match will return the 1080 matching tokens, start location, and end location. May be called with optional 1081 maxMatches argument, to clip scanning after 'n' matches are found. 1082 1083 Note that the start and end locations are reported relative to the string 1084 being parsed. See L{I{parseString}<parseString>} for more information on parsing 1085 strings with embedded tabs.""" 1086 if not self.streamlined: 1087 self.streamline() 1088 for e in self.ignoreExprs: 1089 e.streamline() 1090 1091 if not self.keepTabs: 1092 instring = _ustr(instring).expandtabs() 1093 instrlen = len(instring) 1094 loc = 0 1095 preparseFn = self.preParse 1096 parseFn = self._parse 1097 ParserElement.resetCache() 1098 matches = 0 1099 try: 1100 while loc <= instrlen and matches < maxMatches: 1101 try: 1102 preloc = preparseFn( instring, loc ) 1103 nextLoc,tokens = parseFn( instring, preloc, callPreParse=False ) 1104 except ParseException: 1105 loc = preloc+1 1106 else: 1107 if nextLoc > loc: 1108 matches += 1 1109 yield tokens, preloc, nextLoc 1110 loc = nextLoc 1111 else: 1112 loc = preloc+1 1113 except ParseBaseException: 1114 pe = sys.exc_info()[1] 1115 raise pe
1116
1117 - def transformString( self, instring ):
1118 """Extension to scanString, to modify matching text with modified tokens that may 1119 be returned from a parse action. To use transformString, define a grammar and 1120 attach a parse action to it that modifies the returned token list. 1121 Invoking transformString() on a target string will then scan for matches, 1122 and replace the matched text patterns according to the logic in the parse 1123 action. transformString() returns the resulting transformed string.""" 1124 out = [] 1125 lastE = 0 1126 # force preservation of <TAB>s, to minimize unwanted transformation of string, and to 1127 # keep string locs straight between transformString and scanString 1128 self.keepTabs = True 1129 try: 1130 for t,s,e in self.scanString( instring ): 1131 out.append( instring[lastE:s] ) 1132 if t: 1133 if isinstance(t,ParseResults): 1134 out += t.asList() 1135 elif isinstance(t,list): 1136 out += t 1137 else: 1138 out.append(t) 1139 lastE = e 1140 out.append(instring[lastE:]) 1141 return "".join(map(_ustr,out)) 1142 except ParseBaseException: 1143 pe = sys.exc_info()[1] 1144 raise pe
1145
1146 - def searchString( self, instring, maxMatches=_MAX_INT ):
1147 """Another extension to scanString, simplifying the access to the tokens found 1148 to match the given parse expression. May be called with optional 1149 maxMatches argument, to clip searching after 'n' matches are found. 1150 """ 1151 try: 1152 return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ]) 1153 except ParseBaseException: 1154 pe = sys.exc_info()[1] 1155 raise pe
1156
1157 - def __add__(self, other ):
1158 """Implementation of + operator - returns And""" 1159 if isinstance( other, basestring ): 1160 other = Literal( other ) 1161 if not isinstance( other, ParserElement ): 1162 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1163 SyntaxWarning, stacklevel=2) 1164 return None 1165 return And( [ self, other ] )
1166
1167 - def __radd__(self, other ):
1168 """Implementation of + operator when left operand is not a ParserElement""" 1169 if isinstance( other, basestring ): 1170 other = Literal( other ) 1171 if not isinstance( other, ParserElement ): 1172 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1173 SyntaxWarning, stacklevel=2) 1174 return None 1175 return other + self
1176
1177 - def __sub__(self, other):
1178 """Implementation of - operator, returns And with error stop""" 1179 if isinstance( other, basestring ): 1180 other = Literal( other ) 1181 if not isinstance( other, ParserElement ): 1182 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1183 SyntaxWarning, stacklevel=2) 1184 return None 1185 return And( [ self, And._ErrorStop(), other ] )
1186
1187 - def __rsub__(self, other ):
1188 """Implementation of - operator when left operand is not a ParserElement""" 1189 if isinstance( other, basestring ): 1190 other = Literal( other ) 1191 if not isinstance( other, ParserElement ): 1192 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1193 SyntaxWarning, stacklevel=2) 1194 return None 1195 return other - self
1196
1197 - def __mul__(self,other):
1198 if isinstance(other,int): 1199 minElements, optElements = other,0 1200 elif isinstance(other,tuple): 1201 other = (other + (None, None))[:2] 1202 if other[0] is None: 1203 other = (0, other[1]) 1204 if isinstance(other[0],int) and other[1] is None: 1205 if other[0] == 0: 1206 return ZeroOrMore(self) 1207 if other[0] == 1: 1208 return OneOrMore(self) 1209 else: 1210 return self*other[0] + ZeroOrMore(self) 1211 elif isinstance(other[0],int) and isinstance(other[1],int): 1212 minElements, optElements = other 1213 optElements -= minElements 1214 else: 1215 raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1])) 1216 else: 1217 raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other)) 1218 1219 if minElements < 0: 1220 raise ValueError("cannot multiply ParserElement by negative value") 1221 if optElements < 0: 1222 raise ValueError("second tuple value must be greater or equal to first tuple value") 1223 if minElements == optElements == 0: 1224 raise ValueError("cannot multiply ParserElement by 0 or (0,0)") 1225 1226 if (optElements): 1227 def makeOptionalList(n): 1228 if n>1: 1229 return Optional(self + makeOptionalList(n-1)) 1230 else: 1231 return Optional(self)
1232 if minElements: 1233 if minElements == 1: 1234 ret = self + makeOptionalList(optElements) 1235 else: 1236 ret = And([self]*minElements) + makeOptionalList(optElements) 1237 else: 1238 ret = makeOptionalList(optElements) 1239 else: 1240 if minElements == 1: 1241 ret = self 1242 else: 1243 ret = And([self]*minElements) 1244 return ret 1245
1246 - def __rmul__(self, other):
1247 return self.__mul__(other)
1248
1249 - def __or__(self, other ):
1250 """Implementation of | operator - returns MatchFirst""" 1251 if isinstance( other, basestring ): 1252 other = Literal( other ) 1253 if not isinstance( other, ParserElement ): 1254 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1255 SyntaxWarning, stacklevel=2) 1256 return None 1257 return MatchFirst( [ self, other ] )
1258
1259 - def __ror__(self, other ):
1260 """Implementation of | operator when left operand is not a ParserElement""" 1261 if isinstance( other, basestring ): 1262 other = Literal( other ) 1263 if not isinstance( other, ParserElement ): 1264 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1265 SyntaxWarning, stacklevel=2) 1266 return None 1267 return other | self
1268
1269 - def __xor__(self, other ):
1270 """Implementation of ^ operator - returns Or""" 1271 if isinstance( other, basestring ): 1272 other = Literal( other ) 1273 if not isinstance( other, ParserElement ): 1274 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1275 SyntaxWarning, stacklevel=2) 1276 return None 1277 return Or( [ self, other ] )
1278
1279 - def __rxor__(self, other ):
1280 """Implementation of ^ operator when left operand is not a ParserElement""" 1281 if isinstance( other, basestring ): 1282 other = Literal( other ) 1283 if not isinstance( other, ParserElement ): 1284 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1285 SyntaxWarning, stacklevel=2) 1286 return None 1287 return other ^ self
1288
1289 - def __and__(self, other ):
1290 """Implementation of & operator - returns Each""" 1291 if isinstance( other, basestring ): 1292 other = Literal( other ) 1293 if not isinstance( other, ParserElement ): 1294 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1295 SyntaxWarning, stacklevel=2) 1296 return None 1297 return Each( [ self, other ] )
1298
1299 - def __rand__(self, other ):
1300 """Implementation of & operator when left operand is not a ParserElement""" 1301 if isinstance( other, basestring ): 1302 other = Literal( other ) 1303 if not isinstance( other, ParserElement ): 1304 warnings.warn("Cannot combine element of type %s with ParserElement" % type(other), 1305 SyntaxWarning, stacklevel=2) 1306 return None 1307 return other & self
1308
1309 - def __invert__( self ):
1310 """Implementation of ~ operator - returns NotAny""" 1311 return NotAny( self )
1312
1313 - def __call__(self, name):
1314 """Shortcut for setResultsName, with listAllMatches=default:: 1315 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno") 1316 could be written as:: 1317 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno") 1318 """ 1319 return self.setResultsName(name)
1320
1321 - def suppress( self ):
1322 """Suppresses the output of this ParserElement; useful to keep punctuation from 1323 cluttering up returned output. 1324 """ 1325 return Suppress( self )
1326
1327 - def leaveWhitespace( self ):
1328 """Disables the skipping of whitespace before matching the characters in the 1329 ParserElement's defined pattern. This is normally only used internally by 1330 the pyparsing module, but may be needed in some whitespace-sensitive grammars. 1331 """ 1332 self.skipWhitespace = False 1333 return self
1334
1335 - def setWhitespaceChars( self, chars ):
1336 """Overrides the default whitespace chars 1337 """ 1338 self.skipWhitespace = True 1339 self.whiteChars = chars 1340 self.copyDefaultWhiteChars = False 1341 return self
1342
1343 - def parseWithTabs( self ):
1344 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string. 1345 Must be called before parseString when the input grammar contains elements that 1346 match <TAB> characters.""" 1347 self.keepTabs = True 1348 return self
1349
1350 - def ignore( self, other ):
1351 """Define expression to be ignored (e.g., comments) while doing pattern 1352 matching; may be called repeatedly, to define multiple comment or other 1353 ignorable patterns. 1354 """ 1355 if isinstance( other, Suppress ): 1356 if other not in self.ignoreExprs: 1357 self.ignoreExprs.append( other ) 1358 else: 1359 self.ignoreExprs.append( Suppress( other ) ) 1360 return self
1361
1362 - def setDebugActions( self, startAction, successAction, exceptionAction ):
1363 """Enable display of debugging messages while doing pattern matching.""" 1364 self.debugActions = (startAction or _defaultStartDebugAction, 1365 successAction or _defaultSuccessDebugAction, 1366 exceptionAction or _defaultExceptionDebugAction) 1367 self.debug = True 1368 return self
1369
1370 - def setDebug( self, flag=True ):
1371 """Enable display of debugging messages while doing pattern matching. 1372 Set flag to True to enable, False to disable.""" 1373 if flag: 1374 self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction ) 1375 else: 1376 self.debug = False 1377 return self
1378
1379 - def __str__( self ):
1380 return self.name
1381
1382 - def __repr__( self ):
1383 return _ustr(self)
1384
1385 - def streamline( self ):
1386 self.streamlined = True 1387 self.strRepr = None 1388 return self
1389
1390 - def checkRecursion( self, parseElementList ):
1391 pass
1392
1393 - def validate( self, validateTrace=[] ):
1394 """Check defined expressions for valid structure, check for infinite recursive definitions.""" 1395 self.checkRecursion( [] )
1396
1397 - def parseFile( self, file_or_filename, parseAll=False ):
1398 """Execute the parse expression on the given file or filename. 1399 If a filename is specified (instead of a file object), 1400 the entire file is opened, read, and closed before parsing. 1401 """ 1402 try: 1403 file_contents = file_or_filename.read() 1404 except AttributeError: 1405 f = open(file_or_filename, "rb") 1406 file_contents = f.read() 1407 f.close() 1408 try: 1409 return self.parseString(file_contents, parseAll) 1410 except ParseBaseException: 1411 # catch and re-raise exception from here, clears out pyparsing internal stack trace 1412 exc = sys.exc_info()[1] 1413 raise exc
1414
1415 - def getException(self):
1416 return ParseException("",0,self.errmsg,self)
1417
1418 - def __getattr__(self,aname):
1419 if aname == "myException": 1420 self.myException = ret = self.getException(); 1421 return ret; 1422 else: 1423 raise AttributeError("no such attribute " + aname)
1424
1425 - def __eq__(self,other):
1426 if isinstance(other, ParserElement): 1427 return self is other or self.__dict__ == other.__dict__ 1428 elif isinstance(other, basestring): 1429 try: 1430 self.parseString(_ustr(other), parseAll=True) 1431 return True 1432 except ParseBaseException: 1433 return False 1434 else: 1435 return super(ParserElement,self)==other
1436
1437 - def __ne__(self,other):
1438 return not (self == other)
1439
1440 - def __hash__(self):
1441 return hash(id(self))
1442
1443 - def __req__(self,other):
1444 return self == other
1445
1446 - def __rne__(self,other):
1447 return not (self == other)
1448 1449
1450 -class Token(ParserElement):
1451 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1452 - def __init__( self ):
1453 super(Token,self).__init__( savelist=False )
1454 #self.myException = ParseException("",0,"",self) 1455
1456 - def setName(self, name):
1457 s = super(Token,self).setName(name) 1458 self.errmsg = "Expected " + self.name 1459 #s.myException.msg = self.errmsg 1460 return s
1461 1462
1463 -class Empty(Token):
1464 """An empty token, will always match."""
1465 - def __init__( self ):
1466 super(Empty,self).__init__() 1467 self.name = "Empty" 1468 self.mayReturnEmpty = True 1469 self.mayIndexError = False
1470 1471
1472 -class NoMatch(Token):
1473 """A token that will never match."""
1474 - def __init__( self ):
1475 super(NoMatch,self).__init__() 1476 self.name = "NoMatch" 1477 self.mayReturnEmpty = True 1478 self.mayIndexError = False 1479 self.errmsg = "Unmatchable token"
1480 #self.myException.msg = self.errmsg 1481
1482 - def parseImpl( self, instring, loc, doActions=True ):
1483 exc = self.myException 1484 exc.loc = loc 1485 exc.pstr = instring 1486 raise exc
1487 1488
1489 -class Literal(Token):
1490 """Token to exactly match a specified string."""
1491 - def __init__( self, matchString ):
1492 super(Literal,self).__init__() 1493 self.match = matchString 1494 self.matchLen = len(matchString) 1495 try: 1496 self.firstMatchChar = matchString[0] 1497 except IndexError: 1498 warnings.warn("null string passed to Literal; use Empty() instead", 1499 SyntaxWarning, stacklevel=2) 1500 self.__class__ = Empty 1501 self.name = '"%s"' % _ustr(self.match) 1502 self.errmsg = "Expected " + self.name 1503 self.mayReturnEmpty = False 1504 #self.myException.msg = self.errmsg 1505 self.mayIndexError = False
1506 1507 # Performance tuning: this routine gets called a *lot* 1508 # if this is a single character match string and the first character matches, 1509 # short-circuit as quickly as possible, and avoid calling startswith 1510 #~ @profile
1511 - def parseImpl( self, instring, loc, doActions=True ):
1512 if (instring[loc] == self.firstMatchChar and 1513 (self.matchLen==1 or instring.startswith(self.match,loc)) ): 1514 return loc+self.matchLen, self.match 1515 #~ raise ParseException( instring, loc, self.errmsg ) 1516 exc = self.myException 1517 exc.loc = loc 1518 exc.pstr = instring 1519 raise exc
1520 _L = Literal 1521
1522 -class Keyword(Token):
1523 """Token to exactly match a specified string as a keyword, that is, it must be 1524 immediately followed by a non-keyword character. Compare with Literal:: 1525 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'. 1526 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)' 1527 Accepts two optional constructor arguments in addition to the keyword string: 1528 identChars is a string of characters that would be valid identifier characters, 1529 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive 1530 matching, default is False. 1531 """ 1532 DEFAULT_KEYWORD_CHARS = alphanums+"_$" 1533
1534 - def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1535 super(Keyword,self).__init__() 1536 self.match = matchString 1537 self.matchLen = len(matchString) 1538 try: 1539 self.firstMatchChar = matchString[0] 1540 except IndexError: 1541 warnings.warn("null string passed to Keyword; use Empty() instead", 1542 SyntaxWarning, stacklevel=2) 1543 self.name = '"%s"' % self.match 1544 self.errmsg = "Expected " + self.name 1545 self.mayReturnEmpty = False 1546 #self.myException.msg = self.errmsg 1547 self.mayIndexError = False 1548 self.caseless = caseless 1549 if caseless: 1550 self.caselessmatch = matchString.upper() 1551 identChars = identChars.upper() 1552 self.identChars = _str2dict(identChars)
1553
1554 - def parseImpl( self, instring, loc, doActions=True ):
1555 if self.caseless: 1556 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1557 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and 1558 (loc == 0 or instring[loc-1].upper() not in self.identChars) ): 1559 return loc+self.matchLen, self.match 1560 else: 1561 if (instring[loc] == self.firstMatchChar and 1562 (self.matchLen==1 or instring.startswith(self.match,loc)) and 1563 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and 1564 (loc == 0 or instring[loc-1] not in self.identChars) ): 1565 return loc+self.matchLen, self.match 1566 #~ raise ParseException( instring, loc, self.errmsg ) 1567 exc = self.myException 1568 exc.loc = loc 1569 exc.pstr = instring 1570 raise exc
1571
1572 - def copy(self):
1573 c = super(Keyword,self).copy() 1574 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS 1575 return c
1576
1577 - def setDefaultKeywordChars( chars ):
1578 """Overrides the default Keyword chars 1579 """ 1580 Keyword.DEFAULT_KEYWORD_CHARS = chars
1581 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1582
1583 -class CaselessLiteral(Literal):
1584 """Token to match a specified string, ignoring case of letters. 1585 Note: the matched results will always be in the case of the given 1586 match string, NOT the case of the input text. 1587 """
1588 - def __init__( self, matchString ):
1589 super(CaselessLiteral,self).__init__( matchString.upper() ) 1590 # Preserve the defining literal. 1591 self.returnString = matchString 1592 self.name = "'%s'" % self.returnString 1593 self.errmsg = "Expected " + self.name
1594 #self.myException.msg = self.errmsg 1595
1596 - def parseImpl( self, instring, loc, doActions=True ):
1597 if instring[ loc:loc+self.matchLen ].upper() == self.match: 1598 return loc+self.matchLen, self.returnString 1599 #~ raise ParseException( instring, loc, self.errmsg ) 1600 exc = self.myException 1601 exc.loc = loc 1602 exc.pstr = instring 1603 raise exc
1604
1605 -class CaselessKeyword(Keyword):
1606 - def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1607 super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1608
1609 - def parseImpl( self, instring, loc, doActions=True ):
1610 if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and 1611 (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ): 1612 return loc+self.matchLen, self.match 1613 #~ raise ParseException( instring, loc, self.errmsg ) 1614 exc = self.myException 1615 exc.loc = loc 1616 exc.pstr = instring 1617 raise exc
1618
1619 -class Word(Token):
1620 """Token for matching words composed of allowed character sets. 1621 Defined with string containing all allowed initial characters, 1622 an optional string containing allowed body characters (if omitted, 1623 defaults to the initial character set), and an optional minimum, 1624 maximum, and/or exact length. The default value for min is 1 (a 1625 minimum value < 1 is not valid); the default values for max and exact 1626 are 0, meaning no maximum or exact length restriction. 1627 """
1628 - def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1629 super(Word,self).__init__() 1630 self.initCharsOrig = initChars 1631 self.initChars = _str2dict(initChars) 1632 if bodyChars : 1633 self.bodyCharsOrig = bodyChars 1634 self.bodyChars = _str2dict(bodyChars) 1635 else: 1636 self.bodyCharsOrig = initChars 1637 self.bodyChars = _str2dict(initChars) 1638 1639 self.maxSpecified = max > 0 1640 1641 if min < 1: 1642 raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted") 1643 1644 self.minLen = min 1645 1646 if max > 0: 1647 self.maxLen = max 1648 else: 1649 self.maxLen = _MAX_INT 1650 1651 if exact > 0: 1652 self.maxLen = exact 1653 self.minLen = exact 1654 1655 self.name = _ustr(self) 1656 self.errmsg = "Expected " + self.name 1657 #self.myException.msg = self.errmsg 1658 self.mayIndexError = False 1659 self.asKeyword = asKeyword 1660 1661 if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0): 1662 if self.bodyCharsOrig == self.initCharsOrig: 1663 self.reString = "[%s]+" % _escapeRegexRangeChars(self.initCharsOrig) 1664 elif len(self.bodyCharsOrig) == 1: 1665 self.reString = "%s[%s]*" % \ 1666 (re.escape(self.initCharsOrig), 1667 _escapeRegexRangeChars(self.bodyCharsOrig),) 1668 else: 1669 self.reString = "[%s][%s]*" % \ 1670 (_escapeRegexRangeChars(self.initCharsOrig), 1671 _escapeRegexRangeChars(self.bodyCharsOrig),) 1672 if self.asKeyword: 1673 self.reString = r"\b"+self.reString+r"\b" 1674 try: 1675 self.re = re.compile( self.reString ) 1676 except: 1677 self.re = None
1678
1679 - def parseImpl( self, instring, loc, doActions=True ):
1680 if self.re: 1681 result = self.re.match(instring,loc) 1682 if not result: 1683 exc = self.myException 1684 exc.loc = loc 1685 exc.pstr = instring 1686 raise exc 1687 1688 loc = result.end() 1689 return loc,result.group() 1690 1691 if not(instring[ loc ] in self.initChars): 1692 #~ raise ParseException( instring, loc, self.errmsg ) 1693 exc = self.myException 1694 exc.loc = loc 1695 exc.pstr = instring 1696 raise exc 1697 start = loc 1698 loc += 1 1699 instrlen = len(instring) 1700 bodychars = self.bodyChars 1701 maxloc = start + self.maxLen 1702 maxloc = min( maxloc, instrlen ) 1703 while loc < maxloc and instring[loc] in bodychars: 1704 loc += 1 1705 1706 throwException = False 1707 if loc - start < self.minLen: 1708 throwException = True 1709 if self.maxSpecified and loc < instrlen and instring[loc] in bodychars: 1710 throwException = True 1711 if self.asKeyword: 1712 if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars): 1713 throwException = True 1714 1715 if throwException: 1716 #~ raise ParseException( instring, loc, self.errmsg ) 1717 exc = self.myException 1718 exc.loc = loc 1719 exc.pstr = instring 1720 raise exc 1721 1722 return loc, instring[start:loc]
1723
1724 - def __str__( self ):
1725 try: 1726 return super(Word,self).__str__() 1727 except: 1728 pass 1729 1730 1731 if self.strRepr is None: 1732 1733 def charsAsStr(s): 1734 if len(s)>4: 1735 return s[:4]+"..." 1736 else: 1737 return s
1738 1739 if ( self.initCharsOrig != self.bodyCharsOrig ): 1740 self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) ) 1741 else: 1742 self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig) 1743 1744 return self.strRepr
1745 1746
1747 -class Regex(Token):
1748 """Token for matching strings that match a given regular expression. 1749 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module. 1750 """
1751 - def __init__( self, pattern, flags=0):
1752 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags.""" 1753 super(Regex,self).__init__() 1754 1755 if len(pattern) == 0: 1756 warnings.warn("null string passed to Regex; use Empty() instead", 1757 SyntaxWarning, stacklevel=2) 1758 1759 self.pattern = pattern 1760 self.flags = flags 1761 1762 try: 1763 self.re = re.compile(self.pattern, self.flags) 1764 self.reString = self.pattern 1765 except sre_constants.error: 1766 warnings.warn("invalid pattern (%s) passed to Regex" % pattern, 1767 SyntaxWarning, stacklevel=2) 1768 raise 1769 1770 self.name = _ustr(self) 1771 self.errmsg = "Expected " + self.name 1772 #self.myException.msg = self.errmsg 1773 self.mayIndexError = False 1774 self.mayReturnEmpty = True
1775
1776 - def parseImpl( self, instring, loc, doActions=True ):
1777 result = self.re.match(instring,loc) 1778 if not result: 1779 exc = self.myException 1780 exc.loc = loc 1781 exc.pstr = instring 1782 raise exc 1783 1784 loc = result.end() 1785 d = result.groupdict() 1786 ret = ParseResults(result.group()) 1787 if d: 1788 for k in d: 1789 ret[k] = d[k] 1790 return loc,ret
1791
1792 - def __str__( self ):
1793 try: 1794 return super(Regex,self).__str__() 1795 except: 1796 pass 1797 1798 if self.strRepr is None: 1799 self.strRepr = "Re:(%s)" % repr(self.pattern) 1800 1801 return self.strRepr
1802 1803
1804 -class QuotedString(Token):
1805 """Token for matching strings that are delimited by quoting characters. 1806 """
1807 - def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1808 """ 1809 Defined with the following parameters: 1810 - quoteChar - string of one or more characters defining the quote delimiting string 1811 - escChar - character to escape quotes, typically backslash (default=None) 1812 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None) 1813 - multiline - boolean indicating whether quotes can span multiple lines (default=False) 1814 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True) 1815 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar) 1816 """ 1817 super(QuotedString,self).__init__() 1818 1819 # remove white space from quote chars - wont work anyway 1820 quoteChar = quoteChar.strip() 1821 if len(quoteChar) == 0: 1822 warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1823 raise SyntaxError() 1824 1825 if endQuoteChar is None: 1826 endQuoteChar = quoteChar 1827 else: 1828 endQuoteChar = endQuoteChar.strip() 1829 if len(endQuoteChar) == 0: 1830 warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2) 1831 raise SyntaxError() 1832 1833 self.quoteChar = quoteChar 1834 self.quoteCharLen = len(quoteChar) 1835 self.firstQuoteChar = quoteChar[0] 1836 self.endQuoteChar = endQuoteChar 1837 self.endQuoteCharLen = len(endQuoteChar) 1838 self.escChar = escChar 1839 self.escQuote = escQuote 1840 self.unquoteResults = unquoteResults 1841 1842 if multiline: 1843 self.flags = re.MULTILINE | re.DOTALL 1844 self.pattern = r'%s(?:[^%s%s]' % \ 1845 ( re.escape(self.quoteChar), 1846 _escapeRegexRangeChars(self.endQuoteChar[0]), 1847 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1848 else: 1849 self.flags = 0 1850 self.pattern = r'%s(?:[^%s\n\r%s]' % \ 1851 ( re.escape(self.quoteChar), 1852 _escapeRegexRangeChars(self.endQuoteChar[0]), 1853 (escChar is not None and _escapeRegexRangeChars(escChar) or '') ) 1854 if len(self.endQuoteChar) > 1: 1855 self.pattern += ( 1856 '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]), 1857 _escapeRegexRangeChars(self.endQuoteChar[i])) 1858 for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')' 1859 ) 1860 if escQuote: 1861 self.pattern += (r'|(?:%s)' % re.escape(escQuote)) 1862 if escChar: 1863 self.pattern += (r'|(?:%s.)' % re.escape(escChar)) 1864 self.escCharReplacePattern = re.escape(self.escChar)+"(.)" 1865 self.pattern += (r')*%s' % re.escape(self.endQuoteChar)) 1866 1867 try: 1868 self.re = re.compile(self.pattern, self.flags) 1869 self.reString = self.pattern 1870 except sre_constants.error: 1871 warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern, 1872 SyntaxWarning, stacklevel=2) 1873 raise 1874 1875 self.name = _ustr(self) 1876 self.errmsg = "Expected " + self.name 1877 #self.myException.msg = self.errmsg 1878 self.mayIndexError = False 1879 self.mayReturnEmpty = True
1880
1881 - def parseImpl( self, instring, loc, doActions=True ):
1882 result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None 1883 if not result: 1884 exc = self.myException 1885 exc.loc = loc 1886 exc.pstr = instring 1887 raise exc 1888 1889 loc = result.end() 1890 ret = result.group() 1891 1892 if self.unquoteResults: 1893 1894 # strip off quotes 1895 ret = ret[self.quoteCharLen:-self.endQuoteCharLen] 1896 1897 if isinstance(ret,basestring): 1898 # replace escaped characters 1899 if self.escChar: 1900 ret = re.sub(self.escCharReplacePattern,"\g<1>",ret) 1901 1902 # replace escaped quotes 1903 if self.escQuote: 1904 ret = ret.replace(self.escQuote, self.endQuoteChar) 1905 1906 return loc, ret
1907
1908 - def __str__( self ):
1909 try: 1910 return super(QuotedString,self).__str__() 1911 except: 1912 pass 1913 1914 if self.strRepr is None: 1915 self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar) 1916 1917 return self.strRepr
1918 1919
1920 -class CharsNotIn(Token):
1921 """Token for matching words composed of characters *not* in a given set. 1922 Defined with string containing all disallowed characters, and an optional 1923 minimum, maximum, and/or exact length. The default value for min is 1 (a 1924 minimum value < 1 is not valid); the default values for max and exact 1925 are 0, meaning no maximum or exact length restriction. 1926 """
1927 - def __init__( self, notChars, min=1, max=0, exact=0 ):
1928 super(CharsNotIn,self).__init__() 1929 self.skipWhitespace = False 1930 self.notChars = notChars 1931 1932 if min < 1: 1933 raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted") 1934 1935 self.minLen = min 1936 1937 if max > 0: 1938 self.maxLen = max 1939 else: 1940 self.maxLen = _MAX_INT 1941 1942 if exact > 0: 1943 self.maxLen = exact 1944 self.minLen = exact 1945 1946 self.name = _ustr(self) 1947 self.errmsg = "Expected " + self.name 1948 self.mayReturnEmpty = ( self.minLen == 0 ) 1949 #self.myException.msg = self.errmsg 1950 self.mayIndexError = False
1951
1952 - def parseImpl( self, instring, loc, doActions=True ):
1953 if instring[loc] in self.notChars: 1954 #~ raise ParseException( instring, loc, self.errmsg ) 1955 exc = self.myException 1956 exc.loc = loc 1957 exc.pstr = instring 1958 raise exc 1959 1960 start = loc 1961 loc += 1 1962 notchars = self.notChars 1963 maxlen = min( start+self.maxLen, len(instring) ) 1964 while loc < maxlen and \ 1965 (instring[loc] not in notchars): 1966 loc += 1 1967 1968 if loc - start < self.minLen: 1969 #~ raise ParseException( instring, loc, self.errmsg ) 1970 exc = self.myException 1971 exc.loc = loc 1972 exc.pstr = instring 1973 raise exc 1974 1975 return loc, instring[start:loc]
1976
1977 - def __str__( self ):
1978 try: 1979 return super(CharsNotIn, self).__str__() 1980 except: 1981 pass 1982 1983 if self.strRepr is None: 1984 if len(self.notChars) > 4: 1985 self.strRepr = "!W:(%s...)" % self.notChars[:4] 1986 else: 1987 self.strRepr = "!W:(%s)" % self.notChars 1988 1989 return self.strRepr
1990
1991 -class White(Token):
1992 """Special matching class for matching whitespace. Normally, whitespace is ignored 1993 by pyparsing grammars. This class is included when some whitespace structures 1994 are significant. Define with a string containing the whitespace characters to be 1995 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments, 1996 as defined for the Word class.""" 1997 whiteStrs = { 1998 " " : "<SPC>", 1999 "\t": "<TAB>", 2000 "\n": "<LF>", 2001 "\r": "<CR>", 2002 "\f": "<FF>", 2003 }
2004 - def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2005 super(White,self).__init__() 2006 self.matchWhite = ws 2007 self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) ) 2008 #~ self.leaveWhitespace() 2009 self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite])) 2010 self.mayReturnEmpty = True 2011 self.errmsg = "Expected " + self.name 2012 #self.myException.msg = self.errmsg 2013 2014 self.minLen = min 2015 2016 if max > 0: 2017 self.maxLen = max 2018 else: 2019 self.maxLen = _MAX_INT 2020 2021 if exact > 0: 2022 self.maxLen = exact 2023 self.minLen = exact
2024
2025 - def parseImpl( self, instring, loc, doActions=True ):
2026 if not(instring[ loc ] in self.matchWhite): 2027 #~ raise ParseException( instring, loc, self.errmsg ) 2028 exc = self.myException 2029 exc.loc = loc 2030 exc.pstr = instring 2031 raise exc 2032 start = loc 2033 loc += 1 2034 maxloc = start + self.maxLen 2035 maxloc = min( maxloc, len(instring) ) 2036 while loc < maxloc and instring[loc] in self.matchWhite: 2037 loc += 1 2038 2039 if loc - start < self.minLen: 2040 #~ raise ParseException( instring, loc, self.errmsg ) 2041 exc = self.myException 2042 exc.loc = loc 2043 exc.pstr = instring 2044 raise exc 2045 2046 return loc, instring[start:loc]
2047 2048
2049 -class _PositionToken(Token):
2050 - def __init__( self ):
2051 super(_PositionToken,self).__init__() 2052 self.name=self.__class__.__name__ 2053 self.mayReturnEmpty = True 2054 self.mayIndexError = False
2055
2056 -class GoToColumn(_PositionToken):
2057 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2058 - def __init__( self, colno ):
2059 super(GoToColumn,self).__init__() 2060 self.col = colno
2061
2062 - def preParse( self, instring, loc ):
2063 if col(loc,instring) != self.col: 2064 instrlen = len(instring) 2065 if self.ignoreExprs: 2066 loc = self._skipIgnorables( instring, loc ) 2067 while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col : 2068 loc += 1 2069 return loc
2070
2071 - def parseImpl( self, instring, loc, doActions=True ):
2072 thiscol = col( loc, instring ) 2073 if thiscol > self.col: 2074 raise ParseException( instring, loc, "Text not in expected column", self ) 2075 newloc = loc + self.col - thiscol 2076 ret = instring[ loc: newloc ] 2077 return newloc, ret
2078
2079 -class LineStart(_PositionToken):
2080 """Matches if current position is at the beginning of a line within the parse string"""
2081 - def __init__( self ):
2082 super(LineStart,self).__init__() 2083 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2084 self.errmsg = "Expected start of line"
2085 #self.myException.msg = self.errmsg 2086
2087 - def preParse( self, instring, loc ):
2088 preloc = super(LineStart,self).preParse(instring,loc) 2089 if instring[preloc] == "\n": 2090 loc += 1 2091 return loc
2092
2093 - def parseImpl( self, instring, loc, doActions=True ):
2094 if not( loc==0 or 2095 (loc == self.preParse( instring, 0 )) or 2096 (instring[loc-1] == "\n") ): #col(loc, instring) != 1: 2097 #~ raise ParseException( instring, loc, "Expected start of line" ) 2098 exc = self.myException 2099 exc.loc = loc 2100 exc.pstr = instring 2101 raise exc 2102 return loc, []
2103
2104 -class LineEnd(_PositionToken):
2105 """Matches if current position is at the end of a line within the parse string"""
2106 - def __init__( self ):
2107 super(LineEnd,self).__init__() 2108 self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") ) 2109 self.errmsg = "Expected end of line"
2110 #self.myException.msg = self.errmsg 2111
2112 - def parseImpl( self, instring, loc, doActions=True ):
2113 if loc<len(instring): 2114 if instring[loc] == "\n": 2115 return loc+1, "\n" 2116 else: 2117 #~ raise ParseException( instring, loc, "Expected end of line" ) 2118 exc = self.myException 2119 exc.loc = loc 2120 exc.pstr = instring 2121 raise exc 2122 elif loc == len(instring): 2123 return loc+1, [] 2124 else: 2125 exc = self.myException 2126 exc.loc = loc 2127 exc.pstr = instring 2128 raise exc
2129
2130 -class StringStart(_PositionToken):
2131 """Matches if current position is at the beginning of the parse string"""
2132 - def __init__( self ):
2133 super(StringStart,self).__init__() 2134 self.errmsg = "Expected start of text"
2135 #self.myException.msg = self.errmsg 2136
2137 - def parseImpl( self, instring, loc, doActions=True ):
2138 if loc != 0: 2139 # see if entire string up to here is just whitespace and ignoreables 2140 if loc != self.preParse( instring, 0 ): 2141 #~ raise ParseException( instring, loc, "Expected start of text" ) 2142 exc = self.myException 2143 exc.loc = loc 2144 exc.pstr = instring 2145 raise exc 2146 return loc, []
2147
2148 -class StringEnd(_PositionToken):
2149 """Matches if current position is at the end of the parse string"""
2150 - def __init__( self ):
2151 super(StringEnd,self).__init__() 2152 self.errmsg = "Expected end of text"
2153 #self.myException.msg = self.errmsg 2154
2155 - def parseImpl( self, instring, loc, doActions=True ):
2156 if loc < len(instring): 2157 #~ raise ParseException( instring, loc, "Expected end of text" ) 2158 exc = self.myException 2159 exc.loc = loc 2160 exc.pstr = instring 2161 raise exc 2162 elif loc == len(instring): 2163 return loc+1, [] 2164 elif loc > len(instring): 2165 return loc, [] 2166 else: 2167 exc = self.myException 2168 exc.loc = loc 2169 exc.pstr = instring 2170 raise exc
2171
2172 -class WordStart(_PositionToken):
2173 """Matches if the current position is at the beginning of a Word, and 2174 is not preceded by any character in a given set of wordChars 2175 (default=printables). To emulate the \b behavior of regular expressions, 2176 use WordStart(alphanums). WordStart will also match at the beginning of 2177 the string being parsed, or at the beginning of a line. 2178 """
2179 - def __init__(self, wordChars = printables):
2180 super(WordStart,self).__init__() 2181 self.wordChars = _str2dict(wordChars) 2182 self.errmsg = "Not at the start of a word"
2183
2184 - def parseImpl(self, instring, loc, doActions=True ):
2185 if loc != 0: 2186 if (instring[loc-1] in self.wordChars or 2187 instring[loc] not in self.wordChars): 2188 exc = self.myException 2189 exc.loc = loc 2190 exc.pstr = instring 2191 raise exc 2192 return loc, []
2193
2194 -class WordEnd(_PositionToken):
2195 """Matches if the current position is at the end of a Word, and 2196 is not followed by any character in a given set of wordChars 2197 (default=printables). To emulate the \b behavior of regular expressions, 2198 use WordEnd(alphanums). WordEnd will also match at the end of 2199 the string being parsed, or at the end of a line. 2200 """
2201 - def __init__(self, wordChars = printables):
2202 super(WordEnd,self).__init__() 2203 self.wordChars = _str2dict(wordChars) 2204 self.skipWhitespace = False 2205 self.errmsg = "Not at the end of a word"
2206
2207 - def parseImpl(self, instring, loc, doActions=True ):
2208 instrlen = len(instring) 2209 if instrlen>0 and loc<instrlen: 2210 if (instring[loc] in self.wordChars or 2211 instring[loc-1] not in self.wordChars): 2212 #~ raise ParseException( instring, loc, "Expected end of word" ) 2213 exc = self.myException 2214 exc.loc = loc 2215 exc.pstr = instring 2216 raise exc 2217 return loc, []
2218 2219
2220 -class ParseExpression(ParserElement):
2221 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2222 - def __init__( self, exprs, savelist = False ):
2223 super(ParseExpression,self).__init__(savelist) 2224 if isinstance( exprs, list ): 2225 self.exprs = exprs 2226 elif isinstance( exprs, basestring ): 2227 self.exprs = [ Literal( exprs ) ] 2228 else: 2229 try: 2230 self.exprs = list( exprs ) 2231 except TypeError: 2232 self.exprs = [ exprs ] 2233 self.callPreparse = False
2234
2235 - def __getitem__( self, i ):
2236 return self.exprs[i]
2237
2238 - def append( self, other ):
2239 self.exprs.append( other ) 2240 self.strRepr = None 2241 return self
2242
2243 - def leaveWhitespace( self ):
2244 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on 2245 all contained expressions.""" 2246 self.skipWhitespace = False 2247 self.exprs = [ e.copy() for e in self.exprs ] 2248 for e in self.exprs: 2249 e.leaveWhitespace() 2250 return self
2251
2252 - def ignore( self, other ):
2253 if isinstance( other, Suppress ): 2254 if other not in self.ignoreExprs: 2255 super( ParseExpression, self).ignore( other ) 2256 for e in self.exprs: 2257 e.ignore( self.ignoreExprs[-1] ) 2258 else: 2259 super( ParseExpression, self).ignore( other ) 2260 for e in self.exprs: 2261 e.ignore( self.ignoreExprs[-1] ) 2262 return self
2263
2264 - def __str__( self ):
2265 try: 2266 return super(ParseExpression,self).__str__() 2267 except: 2268 pass 2269 2270 if self.strRepr is None: 2271 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) ) 2272 return self.strRepr
2273
2274 - def streamline( self ):
2275 super(ParseExpression,self).streamline() 2276 2277 for e in self.exprs: 2278 e.streamline() 2279 2280 # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d ) 2281 # but only if there are no parse actions or resultsNames on the nested And's 2282 # (likewise for Or's and MatchFirst's) 2283 if ( len(self.exprs) == 2 ): 2284 other = self.exprs[0] 2285 if ( isinstance( other, self.__class__ ) and 2286 not(other.parseAction) and 2287 other.resultsName is None and 2288 not other.debug ): 2289 self.exprs = other.exprs[:] + [ self.exprs[1] ] 2290 self.strRepr = None 2291 self.mayReturnEmpty |= other.mayReturnEmpty 2292 self.mayIndexError |= other.mayIndexError 2293 2294 other = self.exprs[-1] 2295 if ( isinstance( other, self.__class__ ) and 2296 not(other.parseAction) and 2297 other.resultsName is None and 2298 not other.debug ): 2299 self.exprs = self.exprs[:-1] + other.exprs[:] 2300 self.strRepr = None 2301 self.mayReturnEmpty |= other.mayReturnEmpty 2302 self.mayIndexError |= other.mayIndexError 2303 2304 return self
2305
2306 - def setResultsName( self, name, listAllMatches=False ):
2307 ret = super(ParseExpression,self).setResultsName(name,listAllMatches) 2308 return ret
2309
2310 - def validate( self, validateTrace=[] ):
2311 tmp = validateTrace[:]+[self] 2312 for e in self.exprs: 2313 e.validate(tmp) 2314 self.checkRecursion( [] )
2315
2316 -class And(ParseExpression):
2317 """Requires all given ParseExpressions to be found in the given order. 2318 Expressions may be separated by whitespace. 2319 May be constructed using the '+' operator. 2320 """ 2321
2322 - class _ErrorStop(Empty):
2323 - def __init__(self, *args, **kwargs):
2324 super(Empty,self).__init__(*args, **kwargs) 2325 self.leaveWhitespace()
2326
2327 - def __init__( self, exprs, savelist = True ):
2328 super(And,self).__init__(exprs, savelist) 2329 self.mayReturnEmpty = True 2330 for e in self.exprs: 2331 if not e.mayReturnEmpty: 2332 self.mayReturnEmpty = False 2333 break 2334 self.setWhitespaceChars( exprs[0].whiteChars ) 2335 self.skipWhitespace = exprs[0].skipWhitespace 2336 self.callPreparse = True
2337
2338 - def parseImpl( self, instring, loc, doActions=True ):
2339 # pass False as last arg to _parse for first element, since we already 2340 # pre-parsed the string as part of our And pre-parsing 2341 loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False ) 2342 errorStop = False 2343 for e in self.exprs[1:]: 2344 if isinstance(e, And._ErrorStop): 2345 errorStop = True 2346 continue 2347 if errorStop: 2348 try: 2349 loc, exprtokens = e._parse( instring, loc, doActions ) 2350 except ParseSyntaxException: 2351 raise 2352 except ParseBaseException: 2353 pe = sys.exc_info()[1] 2354 raise ParseSyntaxException(pe) 2355 except IndexError: 2356 raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) ) 2357 else: 2358 loc, exprtokens = e._parse( instring, loc, doActions ) 2359 if exprtokens or exprtokens.keys(): 2360 resultlist += exprtokens 2361 return loc, resultlist
2362
2363 - def __iadd__(self, other ):
2364 if isinstance( other, basestring ): 2365 other = Literal( other ) 2366 return self.append( other ) #And( [ self, other ] )
2367
2368 - def checkRecursion( self, parseElementList ):
2369 subRecCheckList = parseElementList[:] + [ self ] 2370 for e in self.exprs: 2371 e.checkRecursion( subRecCheckList ) 2372 if not e.mayReturnEmpty: 2373 break
2374
2375 - def __str__( self ):
2376 if hasattr(self,"name"): 2377 return self.name 2378 2379 if self.strRepr is None: 2380 self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2381 2382 return self.strRepr
2383 2384
2385 -class Or(ParseExpression):
2386 """Requires that at least one ParseExpression is found. 2387 If two expressions match, the expression that matches the longest string will be used. 2388 May be constructed using the '^' operator. 2389 """
2390 - def __init__( self, exprs, savelist = False ):
2391 super(Or,self).__init__(exprs, savelist) 2392 self.mayReturnEmpty = False 2393 for e in self.exprs: 2394 if e.mayReturnEmpty: 2395 self.mayReturnEmpty = True 2396 break
2397
2398 - def parseImpl( self, instring, loc, doActions=True ):
2399 maxExcLoc = -1 2400 maxMatchLoc = -1 2401 maxException = None 2402 for e in self.exprs: 2403 try: 2404 loc2 = e.tryParse( instring, loc ) 2405 except ParseException: 2406 err = sys.exc_info()[1] 2407 if err.loc > maxExcLoc: 2408 maxException = err 2409 maxExcLoc = err.loc 2410 except IndexError: 2411 if len(instring) > maxExcLoc: 2412 maxException = ParseException(instring,len(instring),e.errmsg,self) 2413 maxExcLoc = len(instring) 2414 else: 2415 if loc2 > maxMatchLoc: 2416 maxMatchLoc = loc2 2417 maxMatchExp = e 2418 2419 if maxMatchLoc < 0: 2420 if maxException is not None: 2421 raise maxException 2422 else: 2423 raise ParseException(instring, loc, "no defined alternatives to match", self) 2424 2425 return maxMatchExp._parse( instring, loc, doActions )
2426
2427 - def __ixor__(self, other ):
2428 if isinstance( other, basestring ): 2429 other = Literal( other ) 2430 return self.append( other ) #Or( [ self, other ] )
2431
2432 - def __str__( self ):
2433 if hasattr(self,"name"): 2434 return self.name 2435 2436 if self.strRepr is None: 2437 self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2438 2439 return self.strRepr
2440
2441 - def checkRecursion( self, parseElementList ):
2442 subRecCheckList = parseElementList[:] + [ self ] 2443 for e in self.exprs: 2444 e.checkRecursion( subRecCheckList )
2445 2446
2447 -class MatchFirst(ParseExpression):
2448 """Requires that at least one ParseExpression is found. 2449 If two expressions match, the first one listed is the one that will match. 2450 May be constructed using the '|' operator. 2451 """
2452 - def __init__( self, exprs, savelist = False ):
2453 super(MatchFirst,self).__init__(exprs, savelist) 2454 if exprs: 2455 self.mayReturnEmpty = False 2456 for e in self.exprs: 2457 if e.mayReturnEmpty: 2458 self.mayReturnEmpty = True 2459 break 2460 else: 2461 self.mayReturnEmpty = True
2462
2463 - def parseImpl( self, instring, loc, doActions=True ):
2464 maxExcLoc = -1 2465 maxException = None 2466 for e in self.exprs: 2467 try: 2468 ret = e._parse( instring, loc, doActions ) 2469 return ret 2470 except ParseException, err: 2471 if err.loc > maxExcLoc: 2472 maxException = err 2473 maxExcLoc = err.loc 2474 except IndexError: 2475 if len(instring) > maxExcLoc: 2476 maxException = ParseException(instring,len(instring),e.errmsg,self) 2477 maxExcLoc = len(instring) 2478 2479 # only got here if no expression matched, raise exception for match that made it the furthest 2480 else: 2481 if maxException is not None: 2482 raise maxException 2483 else: 2484 raise ParseException(instring, loc, "no defined alternatives to match", self)
2485
2486 - def __ior__(self, other ):
2487 if isinstance( other, basestring ): 2488 other = Literal( other ) 2489 return self.append( other ) #MatchFirst( [ self, other ] )
2490
2491 - def __str__( self ):
2492 if hasattr(self,"name"): 2493 return self.name 2494 2495 if self.strRepr is None: 2496 self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2497 2498 return self.strRepr
2499
2500 - def checkRecursion( self, parseElementList ):
2501 subRecCheckList = parseElementList[:] + [ self ] 2502 for e in self.exprs: 2503 e.checkRecursion( subRecCheckList )
2504 2505
2506 -class Each(ParseExpression):
2507 """Requires all given ParseExpressions to be found, but in any order. 2508 Expressions may be separated by whitespace. 2509 May be constructed using the '&' operator. 2510 """
2511 - def __init__( self, exprs, savelist = True ):
2512 super(Each,self).__init__(exprs, savelist) 2513 self.mayReturnEmpty = True 2514 for e in self.exprs: 2515 if not e.mayReturnEmpty: 2516 self.mayReturnEmpty = False 2517 break 2518 self.skipWhitespace = True 2519 self.initExprGroups = True
2520
2521 - def parseImpl( self, instring, loc, doActions=True ):
2522 if self.initExprGroups: 2523 self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ] 2524 self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ] 2525 self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ] 2526 self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ] 2527 self.required += self.multirequired 2528 self.initExprGroups = False 2529 tmpLoc = loc 2530 tmpReqd = self.required[:] 2531 tmpOpt = self.optionals[:] 2532 matchOrder = [] 2533 2534 keepMatching = True 2535 while keepMatching: 2536 tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired 2537 failed = [] 2538 for e in tmpExprs: 2539 try: 2540 tmpLoc = e.tryParse( instring, tmpLoc ) 2541 except ParseException: 2542 failed.append(e) 2543 else: 2544 matchOrder.append(e) 2545 if e in tmpReqd: 2546 tmpReqd.remove(e) 2547 elif e in tmpOpt: 2548 tmpOpt.remove(e) 2549 if len(failed) == len(tmpExprs): 2550 keepMatching = False 2551 2552 if tmpReqd: 2553 missing = ", ".join( [ _ustr(e) for e in tmpReqd ] ) 2554 raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing ) 2555 2556 # add any unmatched Optionals, in case they have default values defined 2557 matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt) 2558 2559 resultlist = [] 2560 for e in matchOrder: 2561 loc,results = e._parse(instring,loc,doActions) 2562 resultlist.append(results) 2563 2564 finalResults = ParseResults([]) 2565 for r in resultlist: 2566 dups = {} 2567 for k in r.keys(): 2568 if k in finalResults.keys(): 2569 tmp = ParseResults(finalResults[k]) 2570 tmp += ParseResults(r[k]) 2571 dups[k] = tmp 2572 finalResults += ParseResults(r) 2573 for k,v in dups.items(): 2574 finalResults[k] = v 2575 return loc, finalResults
2576
2577 - def __str__( self ):
2578 if hasattr(self,"name"): 2579 return self.name 2580 2581 if self.strRepr is None: 2582 self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}" 2583 2584 return self.strRepr
2585
2586 - def checkRecursion( self, parseElementList ):
2587 subRecCheckList = parseElementList[:] + [ self ] 2588 for e in self.exprs: 2589 e.checkRecursion( subRecCheckList )
2590 2591
2592 -class ParseElementEnhance(ParserElement):
2593 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2594 - def __init__( self, expr, savelist=False ):
2595 super(ParseElementEnhance,self).__init__(savelist) 2596 if isinstance( expr, basestring ): 2597 expr = Literal(expr) 2598 self.expr = expr 2599 self.strRepr = None 2600 if expr is not None: 2601 self.mayIndexError = expr.mayIndexError 2602 self.mayReturnEmpty = expr.mayReturnEmpty 2603 self.setWhitespaceChars( expr.whiteChars ) 2604 self.skipWhitespace = expr.skipWhitespace 2605 self.saveAsList = expr.saveAsList 2606 self.callPreparse = expr.callPreparse 2607 self.ignoreExprs.extend(expr.ignoreExprs)
2608
2609 - def parseImpl( self, instring, loc, doActions=True ):
2610 if self.expr is not None: 2611 return self.expr._parse( instring, loc, doActions, callPreParse=False ) 2612 else: 2613 raise ParseException("",loc,self.errmsg,self)
2614
2615 - def leaveWhitespace( self ):
2616 self.skipWhitespace = False 2617 self.expr = self.expr.copy() 2618 if self.expr is not None: 2619 self.expr.leaveWhitespace() 2620 return self
2621
2622 - def ignore( self, other ):
2623 if isinstance( other, Suppress ): 2624 if other not in self.ignoreExprs: 2625 super( ParseElementEnhance, self).ignore( other ) 2626 if self.expr is not None: 2627 self.expr.ignore( self.ignoreExprs[-1] ) 2628 else: 2629 super( ParseElementEnhance, self).ignore( other ) 2630 if self.expr is not None: 2631 self.expr.ignore( self.ignoreExprs[-1] ) 2632 return self
2633
2634 - def streamline( self ):
2635 super(ParseElementEnhance,self).streamline() 2636 if self.expr is not None: 2637 self.expr.streamline() 2638 return self
2639
2640 - def checkRecursion( self, parseElementList ):
2641 if self in parseElementList: 2642 raise RecursiveGrammarException( parseElementList+[self] ) 2643 subRecCheckList = parseElementList[:] + [ self ] 2644 if self.expr is not None: 2645 self.expr.checkRecursion( subRecCheckList )
2646
2647 - def validate( self, validateTrace=[] ):
2648 tmp = validateTrace[:]+[self] 2649 if self.expr is not None: 2650 self.expr.validate(tmp) 2651 self.checkRecursion( [] )
2652
2653 - def __str__( self ):
2654 try: 2655 return super(ParseElementEnhance,self).__str__() 2656 except: 2657 pass 2658 2659 if self.strRepr is None and self.expr is not None: 2660 self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) ) 2661 return self.strRepr
2662 2663
2664 -class FollowedBy(ParseElementEnhance):
2665 """Lookahead matching of the given parse expression. FollowedBy 2666 does *not* advance the parsing position within the input string, it only 2667 verifies that the specified parse expression matches at the current 2668 position. FollowedBy always returns a null token list."""
2669 - def __init__( self, expr ):
2670 super(FollowedBy,self).__init__(expr) 2671 self.mayReturnEmpty = True
2672
2673 - def parseImpl( self, instring, loc, doActions=True ):
2674 self.expr.tryParse( instring, loc ) 2675 return loc, []
2676 2677
2678 -class NotAny(ParseElementEnhance):
2679 """Lookahead to disallow matching with the given parse expression. NotAny 2680 does *not* advance the parsing position within the input string, it only 2681 verifies that the specified parse expression does *not* match at the current 2682 position. Also, NotAny does *not* skip over leading whitespace. NotAny 2683 always returns a null token list. May be constructed using the '~' operator."""
2684 - def __init__( self, expr ):
2685 super(NotAny,self).__init__(expr) 2686 #~ self.leaveWhitespace() 2687 self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs 2688 self.mayReturnEmpty = True 2689 self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2690 #self.myException = ParseException("",0,self.errmsg,self) 2691
2692 - def parseImpl( self, instring, loc, doActions=True ):
2693 try: 2694 self.expr.tryParse( instring, loc ) 2695 except (ParseException,IndexError): 2696 pass 2697 else: 2698 #~ raise ParseException(instring, loc, self.errmsg ) 2699 exc = self.myException 2700 exc.loc = loc 2701 exc.pstr = instring 2702 raise exc 2703 return loc, []
2704
2705 - def __str__( self ):
2706 if hasattr(self,"name"): 2707 return self.name 2708 2709 if self.strRepr is None: 2710 self.strRepr = "~{" + _ustr(self.expr) + "}" 2711 2712 return self.strRepr
2713 2714
2715 -class ZeroOrMore(ParseElementEnhance):
2716 """Optional repetition of zero or more of the given expression."""
2717 - def __init__( self, expr ):
2718 super(ZeroOrMore,self).__init__(expr) 2719 self.mayReturnEmpty = True
2720
2721 - def parseImpl( self, instring, loc, doActions=True ):
2722 tokens = [] 2723 try: 2724 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2725 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2726 while 1: 2727 if hasIgnoreExprs: 2728 preloc = self._skipIgnorables( instring, loc ) 2729 else: 2730 preloc = loc 2731 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2732 if tmptokens or tmptokens.keys(): 2733 tokens += tmptokens 2734 except (ParseException,IndexError): 2735 pass 2736 2737 return loc, tokens
2738
2739 - def __str__( self ):
2740 if hasattr(self,"name"): 2741 return self.name 2742 2743 if self.strRepr is None: 2744 self.strRepr = "[" + _ustr(self.expr) + "]..." 2745 2746 return self.strRepr
2747
2748 - def setResultsName( self, name, listAllMatches=False ):
2749 ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches) 2750 ret.saveAsList = True 2751 return ret
2752 2753
2754 -class OneOrMore(ParseElementEnhance):
2755 """Repetition of one or more of the given expression."""
2756 - def parseImpl( self, instring, loc, doActions=True ):
2757 # must be at least one 2758 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2759 try: 2760 hasIgnoreExprs = ( len(self.ignoreExprs) > 0 ) 2761 while 1: 2762 if hasIgnoreExprs: 2763 preloc = self._skipIgnorables( instring, loc ) 2764 else: 2765 preloc = loc 2766 loc, tmptokens = self.expr._parse( instring, preloc, doActions ) 2767 if tmptokens or tmptokens.keys(): 2768 tokens += tmptokens 2769 except (ParseException,IndexError): 2770 pass 2771 2772 return loc, tokens
2773
2774 - def __str__( self ):
2775 if hasattr(self,"name"): 2776 return self.name 2777 2778 if self.strRepr is None: 2779 self.strRepr = "{" + _ustr(self.expr) + "}..." 2780 2781 return self.strRepr
2782
2783 - def setResultsName( self, name, listAllMatches=False ):
2784 ret = super(OneOrMore,self).setResultsName(name,listAllMatches) 2785 ret.saveAsList = True 2786 return ret
2787
2788 -class _NullToken(object):
2789 - def __bool__(self):
2790 return False
2791 __nonzero__ = __bool__
2792 - def __str__(self):
2793 return ""
2794 2795 _optionalNotMatched = _NullToken()
2796 -class Optional(ParseElementEnhance):
2797 """Optional matching of the given expression. 2798 A default return string can also be specified, if the optional expression 2799 is not found. 2800 """
2801 - def __init__( self, exprs, default=_optionalNotMatched ):
2802 super(Optional,self).__init__( exprs, savelist=False ) 2803 self.defaultValue = default 2804 self.mayReturnEmpty = True
2805
2806 - def parseImpl( self, instring, loc, doActions=True ):
2807 try: 2808 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False ) 2809 except (ParseException,IndexError): 2810 if self.defaultValue is not _optionalNotMatched: 2811 if self.expr.resultsName: 2812 tokens = ParseResults([ self.defaultValue ]) 2813 tokens[self.expr.resultsName] = self.defaultValue 2814 else: 2815 tokens = [ self.defaultValue ] 2816 else: 2817 tokens = [] 2818 return loc, tokens
2819
2820 - def __str__( self ):
2821 if hasattr(self,"name"): 2822 return self.name 2823 2824 if self.strRepr is None: 2825 self.strRepr = "[" + _ustr(self.expr) + "]" 2826 2827 return self.strRepr
2828 2829
2830 -class SkipTo(ParseElementEnhance):
2831 """Token for skipping over all undefined text until the matched expression is found. 2832 If include is set to true, the matched expression is also parsed (the skipped text 2833 and matched expression are returned as a 2-element list). The ignore 2834 argument is used to define grammars (typically quoted strings and comments) that 2835 might contain false matches. 2836 """
2837 - def __init__( self, other, include=False, ignore=None, failOn=None ):
2838 super( SkipTo, self ).__init__( other ) 2839 self.ignoreExpr = ignore 2840 self.mayReturnEmpty = True 2841 self.mayIndexError = False 2842 self.includeMatch = include 2843 self.asList = False 2844 if failOn is not None and isinstance(failOn, basestring): 2845 self.failOn = Literal(failOn) 2846 else: 2847 self.failOn = failOn 2848 self.errmsg = "No match found for "+_ustr(self.expr)
2849 #self.myException = ParseException("",0,self.errmsg,self) 2850
2851 - def parseImpl( self, instring, loc, doActions=True ):
2852 startLoc = loc 2853 instrlen = len(instring) 2854 expr = self.expr 2855 failParse = False 2856 while loc <= instrlen: 2857 try: 2858 if self.failOn: 2859 try: 2860 self.failOn.tryParse(instring, loc) 2861 except ParseBaseException: 2862 pass 2863 else: 2864 failParse = True 2865 raise ParseException(instring, loc, "Found expression " + str(self.failOn)) 2866 failParse = False 2867 if self.ignoreExpr is not None: 2868 while 1: 2869 try: 2870 loc = self.ignoreExpr.tryParse(instring,loc) 2871 # print "found ignoreExpr, advance to", loc 2872 except ParseBaseException: 2873 break 2874 expr._parse( instring, loc, doActions=False, callPreParse=False ) 2875 skipText = instring[startLoc:loc] 2876 if self.includeMatch: 2877 loc,mat = expr._parse(instring,loc,doActions,callPreParse=False) 2878 if mat: 2879 skipRes = ParseResults( skipText ) 2880 skipRes += mat 2881 return loc, [ skipRes ] 2882 else: 2883 return loc, [ skipText ] 2884 else: 2885 return loc, [ skipText ] 2886 except (ParseException,IndexError): 2887 if failParse: 2888 raise 2889 else: 2890 loc += 1 2891 exc = self.myException 2892 exc.loc = loc 2893 exc.pstr = instring 2894 raise exc
2895
2896 -class Forward(ParseElementEnhance):
2897 """Forward declaration of an expression to be defined later - 2898 used for recursive grammars, such as algebraic infix notation. 2899 When the expression is known, it is assigned to the Forward variable using the '<<' operator. 2900 2901 Note: take care when assigning to Forward not to overlook precedence of operators. 2902 Specifically, '|' has a lower precedence than '<<', so that:: 2903 fwdExpr << a | b | c 2904 will actually be evaluated as:: 2905 (fwdExpr << a) | b | c 2906 thereby leaving b and c out as parseable alternatives. It is recommended that you 2907 explicitly group the values inserted into the Forward:: 2908 fwdExpr << (a | b | c) 2909 """
2910 - def __init__( self, other=None ):
2911 super(Forward,self).__init__( other, savelist=False )
2912
2913 - def __lshift__( self, other ):
2914 if isinstance( other, basestring ): 2915 other = Literal(other) 2916 self.expr = other 2917 self.mayReturnEmpty = other.mayReturnEmpty 2918 self.strRepr = None 2919 self.mayIndexError = self.expr.mayIndexError 2920 self.mayReturnEmpty = self.expr.mayReturnEmpty 2921 self.setWhitespaceChars( self.expr.whiteChars ) 2922 self.skipWhitespace = self.expr.skipWhitespace 2923 self.saveAsList = self.expr.saveAsList 2924 self.ignoreExprs.extend(self.expr.ignoreExprs) 2925 return None
2926
2927 - def leaveWhitespace( self ):
2928 self.skipWhitespace = False 2929 return self
2930
2931 - def streamline( self ):
2932 if not self.streamlined: 2933 self.streamlined = True 2934 if self.expr is not None: 2935 self.expr.streamline() 2936 return self
2937
2938 - def validate( self, validateTrace=[] ):
2939 if self not in validateTrace: 2940 tmp = validateTrace[:]+[self] 2941 if self.expr is not None: 2942 self.expr.validate(tmp) 2943 self.checkRecursion([])
2944
2945 - def __str__( self ):
2946 if hasattr(self,"name"): 2947 return self.name 2948 2949 self._revertClass = self.__class__ 2950 self.__class__ = _ForwardNoRecurse 2951 try: 2952 if self.expr is not None: 2953 retString = _ustr(self.expr) 2954 else: 2955 retString = "None" 2956 finally: 2957 self.__class__ = self._revertClass 2958 return self.__class__.__name__ + ": " + retString
2959
2960 - def copy(self):
2961 if self.expr is not None: 2962 return super(Forward,self).copy() 2963 else: 2964 ret = Forward() 2965 ret << self 2966 return ret
2967
2968 -class _ForwardNoRecurse(Forward):
2969 - def __str__( self ):
2970 return "..."
2971
2972 -class TokenConverter(ParseElementEnhance):
2973 """Abstract subclass of ParseExpression, for converting parsed results."""
2974 - def __init__( self, expr, savelist=False ):
2975 super(TokenConverter,self).__init__( expr )#, savelist ) 2976 self.saveAsList = False
2977
2978 -class Upcase(TokenConverter):
2979 """Converter to upper case all matching tokens."""
2980 - def __init__(self, *args):
2981 super(Upcase,self).__init__(*args) 2982 warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead", 2983 DeprecationWarning,stacklevel=2)
2984
2985 - def postParse( self, instring, loc, tokenlist ):
2986 return list(map( string.upper, tokenlist ))
2987 2988
2989 -class Combine(TokenConverter):
2990 """Converter to concatenate all matching tokens to a single string. 2991 By default, the matching patterns must also be contiguous in the input string; 2992 this can be disabled by specifying 'adjacent=False' in the constructor. 2993 """
2994 - def __init__( self, expr, joinString="", adjacent=True ):
2995 super(Combine,self).__init__( expr ) 2996 # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself 2997 if adjacent: 2998 self.leaveWhitespace() 2999 self.adjacent = adjacent 3000 self.skipWhitespace = True 3001 self.joinString = joinString
3002
3003 - def ignore( self, other ):
3004 if self.adjacent: 3005 ParserElement.ignore(self, other) 3006 else: 3007 super( Combine, self).ignore( other ) 3008 return self
3009
3010 - def postParse( self, instring, loc, tokenlist ):
3011 retToks = tokenlist.copy() 3012 del retToks[:] 3013 retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults) 3014 3015 if self.resultsName and len(retToks.keys())>0: 3016 return [ retToks ] 3017 else: 3018 return retToks
3019
3020 -class Group(TokenConverter):
3021 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3022 - def __init__( self, expr ):
3023 super(Group,self).__init__( expr ) 3024 self.saveAsList = True
3025
3026 - def postParse( self, instring, loc, tokenlist ):
3027 return [ tokenlist ]
3028
3029 -class Dict(TokenConverter):
3030 """Converter to return a repetitive expression as a list, but also as a dictionary. 3031 Each element can also be referenced using the first token in the expression as its key. 3032 Useful for tabular report scraping when the first column can be used as a item key. 3033 """
3034 - def __init__( self, exprs ):
3035 super(Dict,self).__init__( exprs ) 3036 self.saveAsList = True
3037
3038 - def postParse( self, instring, loc, tokenlist ):
3039 for i,tok in enumerate(tokenlist): 3040 if len(tok) == 0: 3041 continue 3042 ikey = tok[0] 3043 if isinstance(ikey,int): 3044 ikey = _ustr(tok[0]).strip() 3045 if len(tok)==1: 3046 tokenlist[ikey] = _ParseResultsWithOffset("",i) 3047 elif len(tok)==2 and not isinstance(tok[1],ParseResults): 3048 tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i) 3049 else: 3050 dictvalue = tok.copy() #ParseResults(i) 3051 del dictvalue[0] 3052 if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()): 3053 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i) 3054 else: 3055 tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i) 3056 3057 if self.resultsName: 3058 return [ tokenlist ] 3059 else: 3060 return tokenlist
3061 3062
3063 -class Suppress(TokenConverter):
3064 """Converter for ignoring the results of a parsed expression."""
3065 - def postParse( self, instring, loc, tokenlist ):
3066 return []
3067
3068 - def suppress( self ):
3069 return self
3070 3071
3072 -class OnlyOnce(object):
3073 """Wrapper for parse actions, to ensure they are only called once."""
3074 - def __init__(self, methodCall):
3075 self.callable = ParserElement._normalizeParseActionArgs(methodCall) 3076 self.called = False
3077 - def __call__(self,s,l,t):
3078 if not self.called: 3079 results = self.callable(s,l,t) 3080 self.called = True 3081 return results 3082 raise ParseException(s,l,"")
3083 - def reset(self):
3084 self.called = False
3085
3086 -def traceParseAction(f):
3087 """Decorator for debugging parse actions.""" 3088 f = ParserElement._normalizeParseActionArgs(f) 3089 def z(*paArgs): 3090 thisFunc = f.func_name 3091 s,l,t = paArgs[-3:] 3092 if len(paArgs)>3: 3093 thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc 3094 sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) ) 3095 try: 3096 ret = f(*paArgs) 3097 except Exception: 3098 exc = sys.exc_info()[1] 3099 sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) ) 3100 raise 3101 sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) ) 3102 return ret
3103 try: 3104 z.__name__ = f.__name__ 3105 except AttributeError: 3106 pass 3107 return z 3108 3109 # 3110 # global helpers 3111 #
3112 -def delimitedList( expr, delim=",", combine=False ):
3113 """Helper to define a delimited list of expressions - the delimiter defaults to ','. 3114 By default, the list elements and delimiters can have intervening whitespace, and 3115 comments, but this can be overridden by passing 'combine=True' in the constructor. 3116 If combine is set to True, the matching tokens are returned as a single token 3117 string, with the delimiters included; otherwise, the matching tokens are returned 3118 as a list of tokens, with the delimiters suppressed. 3119 """ 3120 dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..." 3121 if combine: 3122 return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName) 3123 else: 3124 return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3125
3126 -def countedArray( expr ):
3127 """Helper to define a counted list of expressions. 3128 This helper defines a pattern of the form:: 3129 integer expr expr expr... 3130 where the leading integer tells how many expr expressions follow. 3131 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed. 3132 """ 3133 arrayExpr = Forward() 3134 def countFieldParseAction(s,l,t): 3135 n = int(t[0]) 3136 arrayExpr << (n and Group(And([expr]*n)) or Group(empty)) 3137 return []
3138 return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr ) 3139
3140 -def _flatten(L):
3141 if type(L) is not list: return [L] 3142 if L == []: return L 3143 return _flatten(L[0]) + _flatten(L[1:])
3144
3145 -def matchPreviousLiteral(expr):
3146 """Helper to define an expression that is indirectly defined from 3147 the tokens matched in a previous expression, that is, it looks 3148 for a 'repeat' of a previous expression. For example:: 3149 first = Word(nums) 3150 second = matchPreviousLiteral(first) 3151 matchExpr = first + ":" + second 3152 will match "1:1", but not "1:2". Because this matches a 3153 previous literal, will also match the leading "1:1" in "1:10". 3154 If this is not desired, use matchPreviousExpr. 3155 Do *not* use with packrat parsing enabled. 3156 """ 3157 rep = Forward() 3158 def copyTokenToRepeater(s,l,t): 3159 if t: 3160 if len(t) == 1: 3161 rep << t[0] 3162 else: 3163 # flatten t tokens 3164 tflat = _flatten(t.asList()) 3165 rep << And( [ Literal(tt) for tt in tflat ] ) 3166 else: 3167 rep << Empty()
3168 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3169 return rep 3170
3171 -def matchPreviousExpr(expr):
3172 """Helper to define an expression that is indirectly defined from 3173 the tokens matched in a previous expression, that is, it looks 3174 for a 'repeat' of a previous expression. For example:: 3175 first = Word(nums) 3176 second = matchPreviousExpr(first) 3177 matchExpr = first + ":" + second 3178 will match "1:1", but not "1:2". Because this matches by 3179 expressions, will *not* match the leading "1:1" in "1:10"; 3180 the expressions are evaluated first, and then compared, so 3181 "1" is compared with "10". 3182 Do *not* use with packrat parsing enabled. 3183 """ 3184 rep = Forward() 3185 e2 = expr.copy() 3186 rep << e2 3187 def copyTokenToRepeater(s,l,t): 3188 matchTokens = _flatten(t.asList()) 3189 def mustMatchTheseTokens(s,l,t): 3190 theseTokens = _flatten(t.asList()) 3191 if theseTokens != matchTokens: 3192 raise ParseException("",0,"")
3193 rep.setParseAction( mustMatchTheseTokens, callDuringTry=True ) 3194 expr.addParseAction(copyTokenToRepeater, callDuringTry=True) 3195 return rep 3196
3197 -def _escapeRegexRangeChars(s):
3198 #~ escape these chars: ^-] 3199 for c in r"\^-]": 3200 s = s.replace(c,_bslash+c) 3201 s = s.replace("\n",r"\n") 3202 s = s.replace("\t",r"\t") 3203 return _ustr(s)
3204
3205 -def oneOf( strs, caseless=False, useRegex=True ):
3206 """Helper to quickly define a set of alternative Literals, and makes sure to do 3207 longest-first testing when there is a conflict, regardless of the input order, 3208 but returns a MatchFirst for best performance. 3209 3210 Parameters: 3211 - strs - a string of space-delimited literals, or a list of string literals 3212 - caseless - (default=False) - treat all literals as caseless 3213 - useRegex - (default=True) - as an optimization, will generate a Regex 3214 object; otherwise, will generate a MatchFirst object (if caseless=True, or 3215 if creating a Regex raises an exception) 3216 """ 3217 if caseless: 3218 isequal = ( lambda a,b: a.upper() == b.upper() ) 3219 masks = ( lambda a,b: b.upper().startswith(a.upper()) ) 3220 parseElementClass = CaselessLiteral 3221 else: 3222 isequal = ( lambda a,b: a == b ) 3223 masks = ( lambda a,b: b.startswith(a) ) 3224 parseElementClass = Literal 3225 3226 if isinstance(strs,(list,tuple)): 3227 symbols = list(strs[:]) 3228 elif isinstance(strs,basestring): 3229 symbols = strs.split() 3230 else: 3231 warnings.warn("Invalid argument to oneOf, expected string or list", 3232 SyntaxWarning, stacklevel=2) 3233 3234 i = 0 3235 while i < len(symbols)-1: 3236 cur = symbols[i] 3237 for j,other in enumerate(symbols[i+1:]): 3238 if ( isequal(other, cur) ): 3239 del symbols[i+j+1] 3240 break 3241 elif ( masks(cur, other) ): 3242 del symbols[i+j+1] 3243 symbols.insert(i,other) 3244 cur = other 3245 break 3246 else: 3247 i += 1 3248 3249 if not caseless and useRegex: 3250 #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] )) 3251 try: 3252 if len(symbols)==len("".join(symbols)): 3253 return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) ) 3254 else: 3255 return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) ) 3256 except: 3257 warnings.warn("Exception creating Regex for oneOf, building MatchFirst", 3258 SyntaxWarning, stacklevel=2) 3259 3260 3261 # last resort, just use MatchFirst 3262 return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3263
3264 -def dictOf( key, value ):
3265 """Helper to easily and clearly define a dictionary by specifying the respective patterns 3266 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens 3267 in the proper order. The key pattern can include delimiting markers or punctuation, 3268 as long as they are suppressed, thereby leaving the significant key text. The value 3269 pattern can include named results, so that the Dict results can include named token 3270 fields. 3271 """ 3272 return Dict( ZeroOrMore( Group ( key + value ) ) )
3273
3274 -def originalTextFor(expr, asString=True):
3275 """Helper to return the original, untokenized text for a given expression. Useful to 3276 restore the parsed fields of an HTML start tag into the raw tag text itself, or to 3277 revert separate tokens with intervening whitespace back to the original matching 3278 input text. Simpler to use than the parse action keepOriginalText, and does not 3279 require the inspect module to chase up the call stack. By default, returns a 3280 string containing the original parsed text. 3281 3282 If the optional asString argument is passed as False, then the return value is a 3283 ParseResults containing any results names that were originally matched, and a 3284 single token containing the original matched text from the input string. So if 3285 the expression passed to originalTextFor contains expressions with defined 3286 results names, you must set asString to False if you want to preserve those 3287 results name values.""" 3288 locMarker = Empty().setParseAction(lambda s,loc,t: loc) 3289 matchExpr = locMarker("_original_start") + expr + locMarker("_original_end") 3290 if asString: 3291 extractText = lambda s,l,t: s[t._original_start:t._original_end] 3292 else: 3293 def extractText(s,l,t): 3294 del t[:] 3295 t.insert(0, s[t._original_start:t._original_end]) 3296 del t["_original_start"] 3297 del t["_original_end"]
3298 matchExpr.setParseAction(extractText) 3299 return matchExpr 3300 3301 # convenience constants for positional expressions 3302 empty = Empty().setName("empty") 3303 lineStart = LineStart().setName("lineStart") 3304 lineEnd = LineEnd().setName("lineEnd") 3305 stringStart = StringStart().setName("stringStart") 3306 stringEnd = StringEnd().setName("stringEnd") 3307 3308 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1]) 3309 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ]) 3310 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16))) 3311 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8))) 3312 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1) 3313 _charRange = Group(_singleChar + Suppress("-") + _singleChar) 3314 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]" 3315 3316 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p) 3317
3318 -def srange(s):
3319 r"""Helper to easily define string ranges for use in Word construction. Borrows 3320 syntax from regexp '[]' string range definitions:: 3321 srange("[0-9]") -> "0123456789" 3322 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz" 3323 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_" 3324 The input string must be enclosed in []'s, and the returned string is the expanded 3325 character set joined into a single string. 3326 The values enclosed in the []'s may be:: 3327 a single character 3328 an escaped character with a leading backslash (such as \- or \]) 3329 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character) 3330 an escaped octal character with a leading '\0' (\041, which is a '!' character) 3331 a range of any of the above, separated by a dash ('a-z', etc.) 3332 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.) 3333 """ 3334 try: 3335 return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body]) 3336 except: 3337 return ""
3338
3339 -def matchOnlyAtCol(n):
3340 """Helper method for defining parse actions that require matching at a specific 3341 column in the input text. 3342 """ 3343 def verifyCol(strg,locn,toks): 3344 if col(locn,strg) != n: 3345 raise ParseException(strg,locn,"matched token not at column %d" % n)
3346 return verifyCol 3347
3348 -def replaceWith(replStr):
3349 """Helper method for common parse actions that simply return a literal value. Especially 3350 useful when used with transformString(). 3351 """ 3352 def _replFunc(*args): 3353 return [replStr]
3354 return _replFunc 3355
3356 -def removeQuotes(s,l,t):
3357 """Helper parse action for removing quotation marks from parsed quoted strings. 3358 To use, add this parse action to quoted string using:: 3359 quotedString.setParseAction( removeQuotes ) 3360 """ 3361 return t[0][1:-1]
3362
3363 -def upcaseTokens(s,l,t):
3364 """Helper parse action to convert tokens to upper case.""" 3365 return [ tt.upper() for tt in map(_ustr,t) ]
3366
3367 -def downcaseTokens(s,l,t):
3368 """Helper parse action to convert tokens to lower case.""" 3369 return [ tt.lower() for tt in map(_ustr,t) ]
3370
3371 -def keepOriginalText(s,startLoc,t):
3372 """Helper parse action to preserve original parsed text, 3373 overriding any nested parse actions.""" 3374 try: 3375 endloc = getTokensEndLoc() 3376 except ParseException: 3377 raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action") 3378 del t[:] 3379 t += ParseResults(s[startLoc:endloc]) 3380 return t
3381
3382 -def getTokensEndLoc():
3383 """Method to be called from within a parse action to determine the end 3384 location of the parsed tokens.""" 3385 import inspect 3386 fstack = inspect.stack() 3387 try: 3388 # search up the stack (through intervening argument normalizers) for correct calling routine 3389 for f in fstack[2:]: 3390 if f[3] == "_parseNoCache": 3391 endloc = f[0].f_locals["loc"] 3392 return endloc 3393 else: 3394 raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action") 3395 finally: 3396 del fstack
3397
3398 -def _makeTags(tagStr, xml):
3399 """Internal helper to construct opening and closing tag expressions, given a tag name""" 3400 if isinstance(tagStr,basestring): 3401 resname = tagStr 3402 tagStr = Keyword(tagStr, caseless=not xml) 3403 else: 3404 resname = tagStr.name 3405 3406 tagAttrName = Word(alphas,alphanums+"_-:") 3407 if (xml): 3408 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes ) 3409 openTag = Suppress("<") + tagStr + \ 3410 Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \ 3411 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3412 else: 3413 printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] ) 3414 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack) 3415 openTag = Suppress("<") + tagStr + \ 3416 Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \ 3417 Optional( Suppress("=") + tagAttrValue ) ))) + \ 3418 Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">") 3419 closeTag = Combine(_L("</") + tagStr + ">") 3420 3421 openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr) 3422 closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr) 3423 3424 return openTag, closeTag
3425
3426 -def makeHTMLTags(tagStr):
3427 """Helper to construct opening and closing tag expressions for HTML, given a tag name""" 3428 return _makeTags( tagStr, False )
3429
3430 -def makeXMLTags(tagStr):
3431 """Helper to construct opening and closing tag expressions for XML, given a tag name""" 3432 return _makeTags( tagStr, True )
3433
3434 -def withAttribute(*args,**attrDict):
3435 """Helper to create a validating parse action to be used with start tags created 3436 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag 3437 with a required attribute value, to avoid false matches on common tags such as 3438 <TD> or <DIV>. 3439 3440 Call withAttribute with a series of attribute names and values. Specify the list 3441 of filter attributes names and values as: 3442 - keyword arguments, as in (class="Customer",align="right"), or 3443 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") ) 3444 For attribute names with a namespace prefix, you must use the second form. Attribute 3445 names are matched insensitive to upper/lower case. 3446 3447 To verify that the attribute exists, but without specifying a value, pass 3448 withAttribute.ANY_VALUE as the value. 3449 """ 3450 if args: 3451 attrs = args[:] 3452 else: 3453 attrs = attrDict.items() 3454 attrs = [(k,v) for k,v in attrs] 3455 def pa(s,l,tokens): 3456 for attrName,attrValue in attrs: 3457 if attrName not in tokens: 3458 raise ParseException(s,l,"no matching attribute " + attrName) 3459 if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue: 3460 raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" % 3461 (attrName, tokens[attrName], attrValue))
3462 return pa 3463 withAttribute.ANY_VALUE = object() 3464 3465 opAssoc = _Constants() 3466 opAssoc.LEFT = object() 3467 opAssoc.RIGHT = object() 3468
3469 -def operatorPrecedence( baseExpr, opList ):
3470 """Helper method for constructing grammars of expressions made up of 3471 operators working in a precedence hierarchy. Operators may be unary or 3472 binary, left- or right-associative. Parse actions can also be attached 3473 to operator expressions. 3474 3475 Parameters: 3476 - baseExpr - expression representing the most basic element for the nested 3477 - opList - list of tuples, one for each operator precedence level in the 3478 expression grammar; each tuple is of the form 3479 (opExpr, numTerms, rightLeftAssoc, parseAction), where: 3480 - opExpr is the pyparsing expression for the operator; 3481 may also be a string, which will be converted to a Literal; 3482 if numTerms is 3, opExpr is a tuple of two expressions, for the 3483 two operators separating the 3 terms 3484 - numTerms is the number of terms for this operator (must 3485 be 1, 2, or 3) 3486 - rightLeftAssoc is the indicator whether the operator is 3487 right or left associative, using the pyparsing-defined 3488 constants opAssoc.RIGHT and opAssoc.LEFT. 3489 - parseAction is the parse action to be associated with 3490 expressions matching this operator expression (the 3491 parse action tuple member may be omitted) 3492 """ 3493 ret = Forward() 3494 lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') ) 3495 for i,operDef in enumerate(opList): 3496 opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4] 3497 if arity == 3: 3498 if opExpr is None or len(opExpr) != 2: 3499 raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions") 3500 opExpr1, opExpr2 = opExpr 3501 thisExpr = Forward()#.setName("expr%d" % i) 3502 if rightLeftAssoc == opAssoc.LEFT: 3503 if arity == 1: 3504 matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) ) 3505 elif arity == 2: 3506 if opExpr is not None: 3507 matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) ) 3508 else: 3509 matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) ) 3510 elif arity == 3: 3511 matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \ 3512 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr ) 3513 else: 3514 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3515 elif rightLeftAssoc == opAssoc.RIGHT: 3516 if arity == 1: 3517 # try to avoid LR with this extra test 3518 if not isinstance(opExpr, Optional): 3519 opExpr = Optional(opExpr) 3520 matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr ) 3521 elif arity == 2: 3522 if opExpr is not None: 3523 matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) ) 3524 else: 3525 matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) ) 3526 elif arity == 3: 3527 matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \ 3528 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr ) 3529 else: 3530 raise ValueError("operator must be unary (1), binary (2), or ternary (3)") 3531 else: 3532 raise ValueError("operator must indicate right or left associativity") 3533 if pa: 3534 matchExpr.setParseAction( pa ) 3535 thisExpr << ( matchExpr | lastExpr ) 3536 lastExpr = thisExpr 3537 ret << lastExpr 3538 return ret
3539 3540 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes") 3541 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes") 3542 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes") 3543 unicodeString = Combine(_L('u') + quotedString.copy()) 3544
3545 -def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3546 """Helper method for defining nested lists enclosed in opening and closing 3547 delimiters ("(" and ")" are the default). 3548 3549 Parameters: 3550 - opener - opening character for a nested list (default="("); can also be a pyparsing expression 3551 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression 3552 - content - expression for items within the nested lists (default=None) 3553 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString) 3554 3555 If an expression is not provided for the content argument, the nested 3556 expression will capture all whitespace-delimited content between delimiters 3557 as a list of separate values. 3558 3559 Use the ignoreExpr argument to define expressions that may contain 3560 opening or closing characters that should not be treated as opening 3561 or closing characters for nesting, such as quotedString or a comment 3562 expression. Specify multiple expressions using an Or or MatchFirst. 3563 The default is quotedString, but if no expressions are to be ignored, 3564 then pass None for this argument. 3565 """ 3566 if opener == closer: 3567 raise ValueError("opening and closing strings cannot be the same") 3568 if content is None: 3569 if isinstance(opener,basestring) and isinstance(closer,basestring): 3570 if len(opener) == 1 and len(closer)==1: 3571 if ignoreExpr is not None: 3572 content = (Combine(OneOrMore(~ignoreExpr + 3573 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3574 ).setParseAction(lambda t:t[0].strip())) 3575 else: 3576 content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS 3577 ).setParseAction(lambda t:t[0].strip())) 3578 else: 3579 if ignoreExpr is not None: 3580 content = (Combine(OneOrMore(~ignoreExpr + 3581 ~Literal(opener) + ~Literal(closer) + 3582 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3583 ).setParseAction(lambda t:t[0].strip())) 3584 else: 3585 content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) + 3586 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1)) 3587 ).setParseAction(lambda t:t[0].strip())) 3588 else: 3589 raise ValueError("opening and closing arguments must be strings if no content expression is given") 3590 ret = Forward() 3591 if ignoreExpr is not None: 3592 ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) ) 3593 else: 3594 ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) ) 3595 return ret
3596
3597 -def indentedBlock(blockStatementExpr, indentStack, indent=True):
3598 """Helper method for defining space-delimited indentation blocks, such as 3599 those used to define block statements in Python source code. 3600 3601 Parameters: 3602 - blockStatementExpr - expression defining syntax of statement that 3603 is repeated within the indented block 3604 - indentStack - list created by caller to manage indentation stack 3605 (multiple statementWithIndentedBlock expressions within a single grammar 3606 should share a common indentStack) 3607 - indent - boolean indicating whether block must be indented beyond the 3608 the current level; set to False for block of left-most statements 3609 (default=True) 3610 3611 A valid block must contain at least one blockStatement. 3612 """ 3613 def checkPeerIndent(s,l,t): 3614 if l >= len(s): return 3615 curCol = col(l,s) 3616 if curCol != indentStack[-1]: 3617 if curCol > indentStack[-1]: 3618 raise ParseFatalException(s,l,"illegal nesting") 3619 raise ParseException(s,l,"not a peer entry")
3620 3621 def checkSubIndent(s,l,t): 3622 curCol = col(l,s) 3623 if curCol > indentStack[-1]: 3624 indentStack.append( curCol ) 3625 else: 3626 raise ParseException(s,l,"not a subentry") 3627 3628 def checkUnindent(s,l,t): 3629 if l >= len(s): return 3630 curCol = col(l,s) 3631 if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]): 3632 raise ParseException(s,l,"not an unindent") 3633 indentStack.pop() 3634 3635 NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress()) 3636 INDENT = Empty() + Empty().setParseAction(checkSubIndent) 3637 PEER = Empty().setParseAction(checkPeerIndent) 3638 UNDENT = Empty().setParseAction(checkUnindent) 3639 if indent: 3640 smExpr = Group( Optional(NL) + 3641 FollowedBy(blockStatementExpr) + 3642 INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT) 3643 else: 3644 smExpr = Group( Optional(NL) + 3645 (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) ) 3646 blockStatementExpr.ignore(_bslash + LineEnd()) 3647 return smExpr 3648 3649 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]") 3650 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]") 3651 3652 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:")) 3653 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline() 3654 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "')) 3655 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None 3656 3657 # it's easy to get these comment structures wrong - they're very common, so may as well make them available 3658 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment") 3659 3660 htmlComment = Regex(r"<!--[\s\S]*?-->") 3661 restOfLine = Regex(r".*").leaveWhitespace() 3662 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment") 3663 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment") 3664 3665 javaStyleComment = cppStyleComment 3666 pythonStyleComment = Regex(r"#.*").setName("Python style comment") 3667 _noncomma = "".join( [ c for c in printables if c != "," ] ) 3668 _commasepitem = Combine(OneOrMore(Word(_noncomma) + 3669 Optional( Word(" \t") + 3670 ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem") 3671 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList") 3672 3673 3674 if __name__ == "__main__": 3675
3676 - def test( teststring ):
3677 try: 3678 tokens = simpleSQL.parseString( teststring ) 3679 tokenlist = tokens.asList() 3680 print (teststring + "->" + str(tokenlist)) 3681 print ("tokens = " + str(tokens)) 3682 print ("tokens.columns = " + str(tokens.columns)) 3683 print ("tokens.tables = " + str(tokens.tables)) 3684 print (tokens.asXML("SQL",True)) 3685 except ParseBaseException: 3686 err = sys.exc_info()[1] 3687 print (teststring + "->") 3688 print (err.line) 3689 print (" "*(err.column-1) + "^") 3690 print (err) 3691 print()
3692 3693 selectToken = CaselessLiteral( "select" ) 3694 fromToken = CaselessLiteral( "from" ) 3695 3696 ident = Word( alphas, alphanums + "_$" ) 3697 columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3698 columnNameList = Group( delimitedList( columnName ) )#.setName("columns") 3699 tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens ) 3700 tableNameList = Group( delimitedList( tableName ) )#.setName("tables") 3701 simpleSQL = ( selectToken + \ 3702 ( '*' | columnNameList ).setResultsName( "columns" ) + \ 3703 fromToken + \ 3704 tableNameList.setResultsName( "tables" ) ) 3705 3706 test( "SELECT * from XYZZY, ABC" ) 3707 test( "select * from SYS.XYZZY" ) 3708 test( "Select A from Sys.dual" ) 3709 test( "Select AA,BB,CC from Sys.dual" ) 3710 test( "Select A, B, C from Sys.dual" ) 3711 test( "Select A, B, C from Sys.dual" ) 3712 test( "Xelect A, B, C from Sys.dual" ) 3713 test( "Select A, B, C frox Sys.dual" ) 3714 test( "Select" ) 3715 test( "Select ^^^ frox Sys.dual" ) 3716 test( "Select A, B, C from Sys.dual, Table2 " ) 3717