Gaudi Framework, version v24r2

Home   Generated: Wed Dec 4 2013
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
pyparsing.py
Go to the documentation of this file.
1 # module pyparsing.py
2 #
3 # Copyright (c) 2003-2009 Paul T. McGuire
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining
6 # a copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish,
9 # distribute, sublicense, and/or sell copies of the Software, and to
10 # permit persons to whom the Software is furnished to do so, subject to
11 # the following conditions:
12 #
13 # The above copyright notice and this permission notice shall be
14 # included in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 #
24 #from __future__ import generators
25 
26 __doc__ = \
27 """
28 pyparsing module - Classes and methods to define and execute parsing grammars
29 
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
34 
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
36 
37  from pyparsing import Word, alphas
38 
39  # define grammar of a greeting
40  greet = Word( alphas ) + "," + Word( alphas ) + "!"
41 
42  hello = "Hello, World!"
43  print hello, "->", greet.parseString( hello )
44 
45 The program outputs the following::
46 
47  Hello, World! -> ['Hello', ',', 'World', '!']
48 
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
51 
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
54 
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56  - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
57  - quoted strings
58  - embedded comments
59 """
60 
61 __version__ = "1.5.2"
62 __versionTime__ = "17 February 2009 19:45"
63 __author__ = "Paul McGuire <ptmcg@users.sourceforge.net>"
64 
65 import string
66 from weakref import ref as wkref
67 import copy
68 import sys
69 import warnings
70 import re
71 import sre_constants
72 #~ sys.stderr.write( "testing pyparsing module, version %s, %s\n" % (__version__,__versionTime__ ) )
73 
74 __all__ = [
75 'And', 'CaselessKeyword', 'CaselessLiteral', 'CharsNotIn', 'Combine', 'Dict', 'Each', 'Empty',
76 'FollowedBy', 'Forward', 'GoToColumn', 'Group', 'Keyword', 'LineEnd', 'LineStart', 'Literal',
77 'MatchFirst', 'NoMatch', 'NotAny', 'OneOrMore', 'OnlyOnce', 'Optional', 'Or',
78 'ParseBaseException', 'ParseElementEnhance', 'ParseException', 'ParseExpression', 'ParseFatalException',
79 'ParseResults', 'ParseSyntaxException', 'ParserElement', 'QuotedString', 'RecursiveGrammarException',
80 'Regex', 'SkipTo', 'StringEnd', 'StringStart', 'Suppress', 'Token', 'TokenConverter', 'Upcase',
81 'White', 'Word', 'WordEnd', 'WordStart', 'ZeroOrMore',
82 'alphanums', 'alphas', 'alphas8bit', 'anyCloseTag', 'anyOpenTag', 'cStyleComment', 'col',
83 'commaSeparatedList', 'commonHTMLEntity', 'countedArray', 'cppStyleComment', 'dblQuotedString',
84 'dblSlashComment', 'delimitedList', 'dictOf', 'downcaseTokens', 'empty', 'getTokensEndLoc', 'hexnums',
85 'htmlComment', 'javaStyleComment', 'keepOriginalText', 'line', 'lineEnd', 'lineStart', 'lineno',
86 'makeHTMLTags', 'makeXMLTags', 'matchOnlyAtCol', 'matchPreviousExpr', 'matchPreviousLiteral',
87 'nestedExpr', 'nullDebugAction', 'nums', 'oneOf', 'opAssoc', 'operatorPrecedence', 'printables',
88 'punc8bit', 'pythonStyleComment', 'quotedString', 'removeQuotes', 'replaceHTMLEntity',
89 'replaceWith', 'restOfLine', 'sglQuotedString', 'srange', 'stringEnd',
90 'stringStart', 'traceParseAction', 'unicodeString', 'upcaseTokens', 'withAttribute',
91 'indentedBlock', 'originalTextFor',
92 ]
93 
94 
95 """
96 Detect if we are running version 3.X and make appropriate changes
97 Robert A. Clark
98 """
99 if sys.version_info[0] > 2:
100  _PY3K = True
101  _MAX_INT = sys.maxsize
102  basestring = str
103 else:
104  _PY3K = False
105  _MAX_INT = sys.maxint
106 
107 if not _PY3K:
108  def _ustr(obj):
109  """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110  str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111  then < returns the unicode object | encodes it with the default encoding | ... >.
112  """
113  if isinstance(obj,unicode):
114  return obj
115 
116  try:
117  # If this works, then _ustr(obj) has the same behaviour as str(obj), so
118  # it won't break any existing code.
119  return str(obj)
120 
121  except UnicodeEncodeError:
122  # The Python docs (http://docs.python.org/ref/customization.html#l2h-182)
123  # state that "The return value must be a string object". However, does a
124  # unicode object (being a subclass of basestring) count as a "string
125  # object"?
126  # If so, then return a unicode object:
127  return unicode(obj)
128  # Else encode it... but how? There are many choices... :)
129  # Replace unprintables with escape codes?
130  #return unicode(obj).encode(sys.getdefaultencoding(), 'backslashreplace_errors')
131  # Replace unprintables with question marks?
132  #return unicode(obj).encode(sys.getdefaultencoding(), 'replace')
133  # ...
134 else:
135  _ustr = str
136  unichr = chr
137 
138 if not _PY3K:
139  def _str2dict(strg):
140  return dict( [(c,0) for c in strg] )
141 else:
142  _str2dict = set
143 
144 def _xml_escape(data):
145  """Escape &, <, >, ", ', etc. in a string of data."""
146 
147  # ampersand must be replaced first
148  from_symbols = '&><"\''
149  to_symbols = ['&'+s+';' for s in "amp gt lt quot apos".split()]
150  for from_,to_ in zip(from_symbols, to_symbols):
151  data = data.replace(from_, to_)
152  return data
153 
154 class _Constants(object):
155  pass
156 
157 if not _PY3K:
158  alphas = string.lowercase + string.uppercase
159 else:
160  alphas = string.ascii_lowercase + string.ascii_uppercase
161 nums = string.digits
162 hexnums = nums + "ABCDEFabcdef"
163 alphanums = alphas + nums
164 _bslash = chr(92)
165 printables = "".join( [ c for c in string.printable if c not in string.whitespace ] )
166 
167 class ParseBaseException(Exception):
168  """base exception class for all parsing runtime exceptions"""
169  # Performance tuning: we construct a *lot* of these, so keep this
170  # constructor as small and fast as possible
171  def __init__( self, pstr, loc=0, msg=None, elem=None ):
172  self.loc = loc
173  if msg is None:
174  self.msg = pstr
175  self.pstr = ""
176  else:
177  self.msg = msg
178  self.pstr = pstr
179  self.parserElement = elem
180 
181  def __getattr__( self, aname ):
182  """supported attributes by name are:
183  - lineno - returns the line number of the exception text
184  - col - returns the column number of the exception text
185  - line - returns the line containing the exception text
186  """
187  if( aname == "lineno" ):
188  return lineno( self.loc, self.pstr )
189  elif( aname in ("col", "column") ):
190  return col( self.loc, self.pstr )
191  elif( aname == "line" ):
192  return line( self.loc, self.pstr )
193  else:
194  raise AttributeError(aname)
195 
196  def __str__( self ):
197  return "%s (at char %d), (line:%d, col:%d)" % \
198  ( self.msg, self.loc, self.lineno, self.column )
199  def __repr__( self ):
200  return _ustr(self)
201  def markInputline( self, markerString = ">!<" ):
202  """Extracts the exception line from the input string, and marks
203  the location of the exception with a special symbol.
204  """
205  line_str = self.line
206  line_column = self.column - 1
207  if markerString:
208  line_str = "".join( [line_str[:line_column],
209  markerString, line_str[line_column:]])
210  return line_str.strip()
211  def __dir__(self):
212  return "loc msg pstr parserElement lineno col line " \
213  "markInputLine __str__ __repr__".split()
214 
216  """exception thrown when parse expressions don't match class;
217  supported attributes by name are:
218  - lineno - returns the line number of the exception text
219  - col - returns the column number of the exception text
220  - line - returns the line containing the exception text
221  """
222  pass
223 
224 class ParseFatalException(ParseBaseException):
225  """user-throwable exception thrown when inconsistent parse content
226  is found; stops all parsing immediately"""
227  pass
228 
230  """just like ParseFatalException, but thrown internally when an
231  ErrorStop indicates that parsing is to stop immediately because
232  an unbacktrackable syntax error has been found"""
233  def __init__(self, pe):
234  super(ParseSyntaxException, self).__init__(
235  pe.pstr, pe.loc, pe.msg, pe.parserElement)
236 
237 #~ class ReparseException(ParseBaseException):
238  #~ """Experimental class - parse actions can raise this exception to cause
239  #~ pyparsing to reparse the input string:
240  #~ - with a modified input string, and/or
241  #~ - with a modified start location
242  #~ Set the values of the ReparseException in the constructor, and raise the
243  #~ exception in a parse action to cause pyparsing to use the new string/location.
244  #~ Setting the values as None causes no change to be made.
245  #~ """
246  #~ def __init_( self, newstring, restartLoc ):
247  #~ self.newParseText = newstring
248  #~ self.reparseLoc = restartLoc
249 
250 class RecursiveGrammarException(Exception):
251  """exception thrown by validate() if the grammar could be improperly recursive"""
252  def __init__( self, parseElementList ):
253  self.parseElementTrace = parseElementList
254 
255  def __str__( self ):
256  return "RecursiveGrammarException: %s" % self.parseElementTrace
257 
259  def __init__(self,p1,p2):
260  self.tup = (p1,p2)
261  def __getitem__(self,i):
262  return self.tup[i]
263  def __repr__(self):
264  return repr(self.tup)
265  def setOffset(self,i):
266  self.tup = (self.tup[0],i)
267 
268 class ParseResults(object):
269  """Structured parse results, to provide multiple means of access to the parsed data:
270  - as a list (len(results))
271  - by list index (results[0], results[1], etc.)
272  - by attribute (results.<resultsName>)
273  """
274  __slots__ = ( "__toklist", "__tokdict", "__doinit", "__name", "__parent", "__accumNames", "__weakref__" )
275  def __new__(cls, toklist, name=None, asList=True, modal=True ):
276  if isinstance(toklist, cls):
277  return toklist
278  retobj = object.__new__(cls)
279  retobj.__doinit = True
280  return retobj
281 
282  # Performance tuning: we construct a *lot* of these, so keep this
283  # constructor as small and fast as possible
284  def __init__( self, toklist, name=None, asList=True, modal=True ):
285  if self.__doinit:
286  self.__doinit = False
287  self.__name = None
288  self.__parent = None
289  self.__accumNames = {}
290  if isinstance(toklist, list):
291  self.__toklist = toklist[:]
292  else:
293  self.__toklist = [toklist]
294  self.__tokdict = dict()
295 
296  if name:
297  if not modal:
298  self.__accumNames[name] = 0
299  if isinstance(name,int):
300  name = _ustr(name) # will always return a str, but use _ustr for consistency
301  self.__name = name
302  if not toklist in (None,'',[]):
303  if isinstance(toklist,basestring):
304  toklist = [ toklist ]
305  if asList:
306  if isinstance(toklist,ParseResults):
307  self[name] = _ParseResultsWithOffset(toklist.copy(),0)
308  else:
309  self[name] = _ParseResultsWithOffset(ParseResults(toklist[0]),0)
310  self[name].__name = name
311  else:
312  try:
313  self[name] = toklist[0]
314  except (KeyError,TypeError,IndexError):
315  self[name] = toklist
316 
317  def __getitem__( self, i ):
318  if isinstance( i, (int,slice) ):
319  return self.__toklist[i]
320  else:
321  if i not in self.__accumNames:
322  return self.__tokdict[i][-1][0]
323  else:
324  return ParseResults([ v[0] for v in self.__tokdict[i] ])
325 
326  def __setitem__( self, k, v ):
327  if isinstance(v,_ParseResultsWithOffset):
328  self.__tokdict[k] = self.__tokdict.get(k,list()) + [v]
329  sub = v[0]
330  elif isinstance(k,int):
331  self.__toklist[k] = v
332  sub = v
333  else:
334  self.__tokdict[k] = self.__tokdict.get(k,list()) + [_ParseResultsWithOffset(v,0)]
335  sub = v
336  if isinstance(sub,ParseResults):
337  sub.__parent = wkref(self)
338 
339  def __delitem__( self, i ):
340  if isinstance(i,(int,slice)):
341  mylen = len( self.__toklist )
342  del self.__toklist[i]
343 
344  # convert int to slice
345  if isinstance(i, int):
346  if i < 0:
347  i += mylen
348  i = slice(i, i+1)
349  # get removed indices
350  removed = list(range(*i.indices(mylen)))
351  removed.reverse()
352  # fixup indices in token dictionary
353  for name in self.__tokdict:
354  occurrences = self.__tokdict[name]
355  for j in removed:
356  for k, (value, position) in enumerate(occurrences):
357  occurrences[k] = _ParseResultsWithOffset(value, position - (position > j))
358  else:
359  del self.__tokdict[i]
360 
361  def __contains__( self, k ):
362  return k in self.__tokdict
363 
364  def __len__( self ): return len( self.__toklist )
365  def __bool__(self): return len( self.__toklist ) > 0
366  __nonzero__ = __bool__
367  def __iter__( self ): return iter( self.__toklist )
368  def __reversed__( self ): return iter( reversed(self.__toklist) )
369  def keys( self ):
370  """Returns all named result keys."""
371  return self.__tokdict.keys()
372 
373  def pop( self, index=-1 ):
374  """Removes and returns item at specified index (default=last).
375  Will work with either numeric indices or dict-key indicies."""
376  ret = self[index]
377  del self[index]
378  return ret
379 
380  def get(self, key, defaultValue=None):
381  """Returns named result matching the given key, or if there is no
382  such name, then returns the given defaultValue or None if no
383  defaultValue is specified."""
384  if key in self:
385  return self[key]
386  else:
387  return defaultValue
388 
389  def insert( self, index, insStr ):
390  self.__toklist.insert(index, insStr)
391  # fixup indices in token dictionary
392  for name in self.__tokdict:
393  occurrences = self.__tokdict[name]
394  for k, (value, position) in enumerate(occurrences):
395  occurrences[k] = _ParseResultsWithOffset(value, position + (position > index))
396 
397  def items( self ):
398  """Returns all named result keys and values as a list of tuples."""
399  return [(k,self[k]) for k in self.__tokdict]
400 
401  def values( self ):
402  """Returns all named result values."""
403  return [ v[-1][0] for v in self.__tokdict.values() ]
404 
405  def __getattr__( self, name ):
406  if name not in self.__slots__:
407  if name in self.__tokdict:
408  if name not in self.__accumNames:
409  return self.__tokdict[name][-1][0]
410  else:
411  return ParseResults([ v[0] for v in self.__tokdict[name] ])
412  else:
413  return ""
414  return None
415 
416  def __add__( self, other ):
417  ret = self.copy()
418  ret += other
419  return ret
420 
421  def __iadd__( self, other ):
422  if other.__tokdict:
423  offset = len(self.__toklist)
424  addoffset = ( lambda a: (a<0 and offset) or (a+offset) )
425  otheritems = other.__tokdict.items()
426  otherdictitems = [(k, _ParseResultsWithOffset(v[0],addoffset(v[1])) )
427  for (k,vlist) in otheritems for v in vlist]
428  for k,v in otherdictitems:
429  self[k] = v
430  if isinstance(v[0],ParseResults):
431  v[0].__parent = wkref(self)
432 
433  self.__toklist += other.__toklist
434  self.__accumNames.update( other.__accumNames )
435  del other
436  return self
437 
438  def __repr__( self ):
439  return "(%s, %s)" % ( repr( self.__toklist ), repr( self.__tokdict ) )
440 
441  def __str__( self ):
442  out = "["
443  sep = ""
444  for i in self.__toklist:
445  if isinstance(i, ParseResults):
446  out += sep + _ustr(i)
447  else:
448  out += sep + repr(i)
449  sep = ", "
450  out += "]"
451  return out
452 
453  def _asStringList( self, sep='' ):
454  out = []
455  for item in self.__toklist:
456  if out and sep:
457  out.append(sep)
458  if isinstance( item, ParseResults ):
459  out += item._asStringList()
460  else:
461  out.append( _ustr(item) )
462  return out
463 
464  def asList( self ):
465  """Returns the parse results as a nested list of matching tokens, all converted to strings."""
466  out = []
467  for res in self.__toklist:
468  if isinstance(res,ParseResults):
469  out.append( res.asList() )
470  else:
471  out.append( res )
472  return out
473 
474  def asDict( self ):
475  """Returns the named parse results as dictionary."""
476  return dict( self.items() )
477 
478  def copy( self ):
479  """Returns a new copy of a ParseResults object."""
480  ret = ParseResults( self.__toklist )
481  ret.__tokdict = self.__tokdict.copy()
482  ret.__parent = self.__parent
483  ret.__accumNames.update( self.__accumNames )
484  ret.__name = self.__name
485  return ret
486 
487  def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
488  """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
489  nl = "\n"
490  out = []
491  namedItems = dict( [ (v[1],k) for (k,vlist) in self.__tokdict.items()
492  for v in vlist ] )
493  nextLevelIndent = indent + " "
494 
495  # collapse out indents if formatting is not desired
496  if not formatted:
497  indent = ""
498  nextLevelIndent = ""
499  nl = ""
500 
501  selfTag = None
502  if doctag is not None:
503  selfTag = doctag
504  else:
505  if self.__name:
506  selfTag = self.__name
507 
508  if not selfTag:
509  if namedItemsOnly:
510  return ""
511  else:
512  selfTag = "ITEM"
513 
514  out += [ nl, indent, "<", selfTag, ">" ]
515 
516  worklist = self.__toklist
517  for i,res in enumerate(worklist):
518  if isinstance(res,ParseResults):
519  if i in namedItems:
520  out += [ res.asXML(namedItems[i],
521  namedItemsOnly and doctag is None,
522  nextLevelIndent,
523  formatted)]
524  else:
525  out += [ res.asXML(None,
526  namedItemsOnly and doctag is None,
527  nextLevelIndent,
528  formatted)]
529  else:
530  # individual token, see if there is a name for it
531  resTag = None
532  if i in namedItems:
533  resTag = namedItems[i]
534  if not resTag:
535  if namedItemsOnly:
536  continue
537  else:
538  resTag = "ITEM"
539  xmlBodyText = _xml_escape(_ustr(res))
540  out += [ nl, nextLevelIndent, "<", resTag, ">",
541  xmlBodyText,
542  "</", resTag, ">" ]
543 
544  out += [ nl, indent, "</", selfTag, ">" ]
545  return "".join(out)
546 
547  def __lookup(self,sub):
548  for k,vlist in self.__tokdict.items():
549  for v,loc in vlist:
550  if sub is v:
551  return k
552  return None
553 
554  def getName(self):
555  """Returns the results name for this token expression."""
556  if self.__name:
557  return self.__name
558  elif self.__parent:
559  par = self.__parent()
560  if par:
561  return par.__lookup(self)
562  else:
563  return None
564  elif (len(self) == 1 and
565  len(self.__tokdict) == 1 and
566  self.__tokdict.values()[0][0][1] in (0,-1)):
567  return self.__tokdict.keys()[0]
568  else:
569  return None
570 
571  def dump(self,indent='',depth=0):
572  """Diagnostic method for listing out the contents of a ParseResults.
573  Accepts an optional indent argument so that this string can be embedded
574  in a nested display of other data."""
575  out = []
576  out.append( indent+_ustr(self.asList()) )
577  keys = self.items()
578  keys.sort()
579  for k,v in keys:
580  if out:
581  out.append('\n')
582  out.append( "%s%s- %s: " % (indent,(' '*depth), k) )
583  if isinstance(v,ParseResults):
584  if v.keys():
585  #~ out.append('\n')
586  out.append( v.dump(indent,depth+1) )
587  #~ out.append('\n')
588  else:
589  out.append(_ustr(v))
590  else:
591  out.append(_ustr(v))
592  #~ out.append('\n')
593  return "".join(out)
594 
595  # add support for pickle protocol
596  def __getstate__(self):
597  return ( self.__toklist,
598  ( self.__tokdict.copy(),
599  self.__parent is not None and self.__parent() or None,
600  self.__accumNames,
601  self.__name ) )
602 
603  def __setstate__(self,state):
604  self.__toklist = state[0]
605  self.__tokdict, \
606  par, \
607  inAccumNames, \
608  self.__name = state[1]
609  self.__accumNames = {}
610  self.__accumNames.update(inAccumNames)
611  if par is not None:
612  self.__parent = wkref(par)
613  else:
614  self.__parent = None
615 
616  def __dir__(self):
617  return dir(super(ParseResults,self)) + self.keys()
618 
619 def col (loc,strg):
620  """Returns current column within a string, counting newlines as line separators.
621  The first column is number 1.
622 
623  Note: the default parsing behavior is to expand tabs in the input string
624  before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
625  on parsing strings containing <TAB>s, and suggested methods to maintain a
626  consistent view of the parsed string, the parse location, and line and column
627  positions within the parsed string.
628  """
629  return (loc<len(strg) and strg[loc] == '\n') and 1 or loc - strg.rfind("\n", 0, loc)
630 
631 def lineno(loc,strg):
632  """Returns current line number within a string, counting newlines as line separators.
633  The first line is number 1.
634 
635  Note: the default parsing behavior is to expand tabs in the input string
636  before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
637  on parsing strings containing <TAB>s, and suggested methods to maintain a
638  consistent view of the parsed string, the parse location, and line and column
639  positions within the parsed string.
640  """
641  return strg.count("\n",0,loc) + 1
642 
643 def line( loc, strg ):
644  """Returns the line of text containing loc within a string, counting newlines as line separators.
645  """
646  lastCR = strg.rfind("\n", 0, loc)
647  nextCR = strg.find("\n", loc)
648  if nextCR > 0:
649  return strg[lastCR+1:nextCR]
650  else:
651  return strg[lastCR+1:]
652 
653 def _defaultStartDebugAction( instring, loc, expr ):
654  print ("Match " + _ustr(expr) + " at loc " + _ustr(loc) + "(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
655 
656 def _defaultSuccessDebugAction( instring, startloc, endloc, expr, toks ):
657  print ("Matched " + _ustr(expr) + " -> " + str(toks.asList()))
658 
659 def _defaultExceptionDebugAction( instring, loc, expr, exc ):
660  print ("Exception raised:" + _ustr(exc))
661 
662 def nullDebugAction(*args):
663  """'Do-nothing' debug action, to suppress debugging output during parsing."""
664  pass
665 
666 class ParserElement(object):
667  """Abstract base level parser element class."""
668  DEFAULT_WHITE_CHARS = " \n\t\r"
669 
671  """Overrides the default whitespace chars
672  """
673  ParserElement.DEFAULT_WHITE_CHARS = chars
674  setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
675 
676  def __init__( self, savelist=False ):
677  self.parseAction = list()
678  self.failAction = None
679  #~ self.name = "<unknown>" # don't define self.name, let subclasses try/except upcall
680  self.strRepr = None
681  self.resultsName = None
682  self.saveAsList = savelist
683  self.skipWhitespace = True
684  self.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
686  self.mayReturnEmpty = False # used when checking for left-recursion
687  self.keepTabs = False
688  self.ignoreExprs = list()
689  self.debug = False
690  self.streamlined = False
691  self.mayIndexError = True # used to optimize exception handling for subclasses that don't advance parse index
692  self.errmsg = ""
693  self.modalResults = True # used to mark results names as modal (report only last) or cumulative (list all)
694  self.debugActions = ( None, None, None ) #custom debug actions
695  self.re = None
696  self.callPreparse = True # used to avoid redundant calls to preParse
697  self.callDuringTry = False
698 
699  def copy( self ):
700  """Make a copy of this ParserElement. Useful for defining different parse actions
701  for the same parsing pattern, using copies of the original parse element."""
702  cpy = copy.copy( self )
703  cpy.parseAction = self.parseAction[:]
704  cpy.ignoreExprs = self.ignoreExprs[:]
705  if self.copyDefaultWhiteChars:
706  cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
707  return cpy
708 
709  def setName( self, name ):
710  """Define name for this expression, for use in debugging."""
711  self.name = name
712  self.errmsg = "Expected " + self.name
713  if hasattr(self,"exception"):
714  self.exception.msg = self.errmsg
715  return self
716 
717  def setResultsName( self, name, listAllMatches=False ):
718  """Define name for referencing matching tokens as a nested attribute
719  of the returned parse results.
720  NOTE: this returns a *copy* of the original ParserElement object;
721  this is so that the client can define a basic element, such as an
722  integer, and reference it in multiple places with different names.
723  """
724  newself = self.copy()
725  newself.resultsName = name
726  newself.modalResults = not listAllMatches
727  return newself
728 
729  def setBreak(self,breakFlag = True):
730  """Method to invoke the Python pdb debugger when this element is
731  about to be parsed. Set breakFlag to True to enable, False to
732  disable.
733  """
734  if breakFlag:
735  _parseMethod = self._parse
736  def breaker(instring, loc, doActions=True, callPreParse=True):
737  import pdb
738  pdb.set_trace()
739  return _parseMethod( instring, loc, doActions, callPreParse )
740  breaker._originalParseMethod = _parseMethod
741  self._parse = breaker
742  else:
743  if hasattr(self._parse,"_originalParseMethod"):
744  self._parse = self._parse._originalParseMethod
745  return self
746 
748  """Internal method used to decorate parse actions that take fewer than 3 arguments,
749  so that all parse actions can be called as f(s,l,t)."""
750  STAR_ARGS = 4
751 
752  try:
753  restore = None
754  if isinstance(f,type):
755  restore = f
756  f = f.__init__
757  if not _PY3K:
758  codeObj = f.func_code
759  else:
760  codeObj = f.code
761  if codeObj.co_flags & STAR_ARGS:
762  return f
763  numargs = codeObj.co_argcount
764  if not _PY3K:
765  if hasattr(f,"im_self"):
766  numargs -= 1
767  else:
768  if hasattr(f,"__self__"):
769  numargs -= 1
770  if restore:
771  f = restore
772  except AttributeError:
773  try:
774  if not _PY3K:
775  call_im_func_code = f.__call__.im_func.func_code
776  else:
777  call_im_func_code = f.__code__
778 
779  # not a function, must be a callable object, get info from the
780  # im_func binding of its bound __call__ method
781  if call_im_func_code.co_flags & STAR_ARGS:
782  return f
783  numargs = call_im_func_code.co_argcount
784  if not _PY3K:
785  if hasattr(f.__call__,"im_self"):
786  numargs -= 1
787  else:
788  if hasattr(f.__call__,"__self__"):
789  numargs -= 0
790  except AttributeError:
791  if not _PY3K:
792  call_func_code = f.__call__.func_code
793  else:
794  call_func_code = f.__call__.__code__
795  # not a bound method, get info directly from __call__ method
796  if call_func_code.co_flags & STAR_ARGS:
797  return f
798  numargs = call_func_code.co_argcount
799  if not _PY3K:
800  if hasattr(f.__call__,"im_self"):
801  numargs -= 1
802  else:
803  if hasattr(f.__call__,"__self__"):
804  numargs -= 1
805 
806 
807  #~ print ("adding function %s with %d args" % (f.func_name,numargs))
808  if numargs == 3:
809  return f
810  else:
811  if numargs > 3:
812  def tmp(s,l,t):
813  return f(f.__call__.__self__, s,l,t)
814  if numargs == 2:
815  def tmp(s,l,t):
816  return f(l,t)
817  elif numargs == 1:
818  def tmp(s,l,t):
819  return f(t)
820  else: #~ numargs == 0:
821  def tmp(s,l,t):
822  return f()
823  try:
824  tmp.__name__ = f.__name__
825  except (AttributeError,TypeError):
826  # no need for special handling if attribute doesnt exist
827  pass
828  try:
829  tmp.__doc__ = f.__doc__
830  except (AttributeError,TypeError):
831  # no need for special handling if attribute doesnt exist
832  pass
833  try:
834  tmp.__dict__.update(f.__dict__)
835  except (AttributeError,TypeError):
836  # no need for special handling if attribute doesnt exist
837  pass
838  return tmp
839  _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
840 
841  def setParseAction( self, *fns, **kwargs ):
842  """Define action to perform when successfully matching parse element definition.
843  Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
844  fn(loc,toks), fn(toks), or just fn(), where:
845  - s = the original string being parsed (see note below)
846  - loc = the location of the matching substring
847  - toks = a list of the matched tokens, packaged as a ParseResults object
848  If the functions in fns modify the tokens, they can return them as the return
849  value from fn, and the modified list of tokens will replace the original.
850  Otherwise, fn does not need to return any value.
851 
852  Note: the default parsing behavior is to expand tabs in the input string
853  before starting the parsing process. See L{I{parseString}<parseString>} for more information
854  on parsing strings containing <TAB>s, and suggested methods to maintain a
855  consistent view of the parsed string, the parse location, and line and column
856  positions within the parsed string.
857  """
858  self.parseAction = list(map(self._normalizeParseActionArgs, list(fns)))
859  self.callDuringTry = ("callDuringTry" in kwargs and kwargs["callDuringTry"])
860  return self
861 
862  def addParseAction( self, *fns, **kwargs ):
863  """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
864  self.parseAction += list(map(self._normalizeParseActionArgs, list(fns)))
865  self.callDuringTry = self.callDuringTry or ("callDuringTry" in kwargs and kwargs["callDuringTry"])
866  return self
867 
868  def setFailAction( self, fn ):
869  """Define action to perform if parsing fails at this expression.
870  Fail acton fn is a callable function that takes the arguments
871  fn(s,loc,expr,err) where:
872  - s = string being parsed
873  - loc = location where expression match was attempted and failed
874  - expr = the parse expression that failed
875  - err = the exception thrown
876  The function returns no value. It may throw ParseFatalException
877  if it is desired to stop parsing immediately."""
878  self.failAction = fn
879  return self
880 
881  def _skipIgnorables( self, instring, loc ):
882  exprsFound = True
883  while exprsFound:
884  exprsFound = False
885  for e in self.ignoreExprs:
886  try:
887  while 1:
888  loc,dummy = e._parse( instring, loc )
889  exprsFound = True
890  except ParseException:
891  pass
892  return loc
893 
894  def preParse( self, instring, loc ):
895  if self.ignoreExprs:
896  loc = self._skipIgnorables( instring, loc )
897 
898  if self.skipWhitespace:
899  wt = self.whiteChars
900  instrlen = len(instring)
901  while loc < instrlen and instring[loc] in wt:
902  loc += 1
903 
904  return loc
905 
906  def parseImpl( self, instring, loc, doActions=True ):
907  return loc, []
908 
909  def postParse( self, instring, loc, tokenlist ):
910  return tokenlist
911 
912  #~ @profile
913  def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
914  debugging = ( self.debug ) #and doActions )
915 
916  if debugging or self.failAction:
917  #~ print ("Match",self,"at loc",loc,"(%d,%d)" % ( lineno(loc,instring), col(loc,instring) ))
918  if (self.debugActions[0] ):
919  self.debugActions[0]( instring, loc, self )
920  if callPreParse and self.callPreparse:
921  preloc = self.preParse( instring, loc )
922  else:
923  preloc = loc
924  tokensStart = loc
925  try:
926  try:
927  loc,tokens = self.parseImpl( instring, preloc, doActions )
928  except IndexError:
929  raise ParseException( instring, len(instring), self.errmsg, self )
930  except ParseBaseException, err:
931  #~ print ("Exception raised:", err)
932  if self.debugActions[2]:
933  self.debugActions[2]( instring, tokensStart, self, err )
934  if self.failAction:
935  self.failAction( instring, tokensStart, self, err )
936  raise
937  else:
938  if callPreParse and self.callPreparse:
939  preloc = self.preParse( instring, loc )
940  else:
941  preloc = loc
942  tokensStart = loc
943  if self.mayIndexError or loc >= len(instring):
944  try:
945  loc,tokens = self.parseImpl( instring, preloc, doActions )
946  except IndexError:
947  raise ParseException( instring, len(instring), self.errmsg, self )
948  else:
949  loc,tokens = self.parseImpl( instring, preloc, doActions )
950 
951  tokens = self.postParse( instring, loc, tokens )
952 
953  retTokens = ParseResults( tokens, self.resultsName, asList=self.saveAsList, modal=self.modalResults )
954  if self.parseAction and (doActions or self.callDuringTry):
955  if debugging:
956  try:
957  for fn in self.parseAction:
958  tokens = fn( instring, tokensStart, retTokens )
959  if tokens is not None:
960  retTokens = ParseResults( tokens,
961  self.resultsName,
962  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
963  modal=self.modalResults )
964  except ParseBaseException, err:
965  #~ print "Exception raised in user parse action:", err
966  if (self.debugActions[2] ):
967  self.debugActions[2]( instring, tokensStart, self, err )
968  raise
969  else:
970  for fn in self.parseAction:
971  tokens = fn( instring, tokensStart, retTokens )
972  if tokens is not None:
973  retTokens = ParseResults( tokens,
974  self.resultsName,
975  asList=self.saveAsList and isinstance(tokens,(ParseResults,list)),
976  modal=self.modalResults )
977 
978  if debugging:
979  #~ print ("Matched",self,"->",retTokens.asList())
980  if (self.debugActions[1] ):
981  self.debugActions[1]( instring, tokensStart, loc, self, retTokens )
982 
983  return loc, retTokens
984 
985  def tryParse( self, instring, loc ):
986  try:
987  return self._parse( instring, loc, doActions=False )[0]
988  except ParseFatalException:
989  raise ParseException( instring, loc, self.errmsg, self)
990 
991  # this method gets repeatedly called during backtracking with the same arguments -
992  # we can cache these arguments and save ourselves the trouble of re-parsing the contained expression
993  def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
994  lookup = (self,instring,loc,callPreParse,doActions)
995  if lookup in ParserElement._exprArgCache:
996  value = ParserElement._exprArgCache[ lookup ]
997  if isinstance(value,Exception):
998  raise value
999  return value
1000  else:
1001  try:
1002  value = self._parseNoCache( instring, loc, doActions, callPreParse )
1003  ParserElement._exprArgCache[ lookup ] = (value[0],value[1].copy())
1004  return value
1005  except ParseBaseException, pe:
1006  ParserElement._exprArgCache[ lookup ] = pe
1007  raise
1008 
1009  _parse = _parseNoCache
1010 
1011  # argument cache for optimizing repeated calls when backtracking through recursive expressions
1012  _exprArgCache = {}
1013  def resetCache():
1014  ParserElement._exprArgCache.clear()
1015  resetCache = staticmethod(resetCache)
1016 
1017  _packratEnabled = False
1019  """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1020  Repeated parse attempts at the same string location (which happens
1021  often in many complex grammars) can immediately return a cached value,
1022  instead of re-executing parsing/validating code. Memoizing is done of
1023  both valid results and parsing exceptions.
1024 
1025  This speedup may break existing programs that use parse actions that
1026  have side-effects. For this reason, packrat parsing is disabled when
1027  you first import pyparsing. To activate the packrat feature, your
1028  program must call the class method ParserElement.enablePackrat(). If
1029  your program uses psyco to "compile as you go", you must call
1030  enablePackrat before calling psyco.full(). If you do not do this,
1031  Python will crash. For best results, call enablePackrat() immediately
1032  after importing pyparsing.
1033  """
1034  if not ParserElement._packratEnabled:
1035  ParserElement._packratEnabled = True
1036  ParserElement._parse = ParserElement._parseCache
1037  enablePackrat = staticmethod(enablePackrat)
1038 
1039  def parseString( self, instring, parseAll=False ):
1040  """Execute the parse expression with the given string.
1041  This is the main interface to the client code, once the complete
1042  expression has been built.
1043 
1044  If you want the grammar to require that the entire input string be
1045  successfully parsed, then set parseAll to True (equivalent to ending
1046  the grammar with StringEnd()).
1047 
1048  Note: parseString implicitly calls expandtabs() on the input string,
1049  in order to report proper column numbers in parse actions.
1050  If the input string contains tabs and
1051  the grammar uses parse actions that use the loc argument to index into the
1052  string being parsed, you can ensure you have a consistent view of the input
1053  string by:
1054  - calling parseWithTabs on your grammar before calling parseString
1055  (see L{I{parseWithTabs}<parseWithTabs>})
1056  - define your parse action using the full (s,loc,toks) signature, and
1057  reference the input string using the parse action's s argument
1058  - explictly expand the tabs in your input string before calling
1059  parseString
1060  """
1061  ParserElement.resetCache()
1062  if not self.streamlined:
1063  self.streamline()
1064  #~ self.saveAsList = True
1065  for e in self.ignoreExprs:
1066  e.streamline()
1067  if not self.keepTabs:
1068  instring = instring.expandtabs()
1069  try:
1070  loc, tokens = self._parse( instring, 0 )
1071  if parseAll:
1072  loc = self.preParse( instring, loc )
1073  StringEnd()._parse( instring, loc )
1074  except ParseBaseException, exc:
1075  # catch and re-raise exception from here, clears out pyparsing internal stack trace
1076  raise exc
1077  else:
1078  return tokens
1079 
1080  def scanString( self, instring, maxMatches=_MAX_INT ):
1081  """Scan the input string for expression matches. Each match will return the
1082  matching tokens, start location, and end location. May be called with optional
1083  maxMatches argument, to clip scanning after 'n' matches are found.
1084 
1085  Note that the start and end locations are reported relative to the string
1086  being parsed. See L{I{parseString}<parseString>} for more information on parsing
1087  strings with embedded tabs."""
1088  if not self.streamlined:
1089  self.streamline()
1090  for e in self.ignoreExprs:
1091  e.streamline()
1092 
1093  if not self.keepTabs:
1094  instring = _ustr(instring).expandtabs()
1095  instrlen = len(instring)
1096  loc = 0
1097  preparseFn = self.preParse
1098  parseFn = self._parse
1099  ParserElement.resetCache()
1100  matches = 0
1101  try:
1102  while loc <= instrlen and matches < maxMatches:
1103  try:
1104  preloc = preparseFn( instring, loc )
1105  nextLoc,tokens = parseFn( instring, preloc, callPreParse=False )
1106  except ParseException:
1107  loc = preloc+1
1108  else:
1109  matches += 1
1110  yield tokens, preloc, nextLoc
1111  loc = nextLoc
1112  except ParseBaseException, pe:
1113  raise pe
1114 
1115  def transformString( self, instring ):
1116  """Extension to scanString, to modify matching text with modified tokens that may
1117  be returned from a parse action. To use transformString, define a grammar and
1118  attach a parse action to it that modifies the returned token list.
1119  Invoking transformString() on a target string will then scan for matches,
1120  and replace the matched text patterns according to the logic in the parse
1121  action. transformString() returns the resulting transformed string."""
1122  out = []
1123  lastE = 0
1124  # force preservation of <TAB>s, to minimize unwanted transformation of string, and to
1125  # keep string locs straight between transformString and scanString
1126  self.keepTabs = True
1127  try:
1128  for t,s,e in self.scanString( instring ):
1129  out.append( instring[lastE:s] )
1130  if t:
1131  if isinstance(t,ParseResults):
1132  out += t.asList()
1133  elif isinstance(t,list):
1134  out += t
1135  else:
1136  out.append(t)
1137  lastE = e
1138  out.append(instring[lastE:])
1139  return "".join(map(_ustr,out))
1140  except ParseBaseException, pe:
1141  raise pe
1142 
1143  def searchString( self, instring, maxMatches=_MAX_INT ):
1144  """Another extension to scanString, simplifying the access to the tokens found
1145  to match the given parse expression. May be called with optional
1146  maxMatches argument, to clip searching after 'n' matches are found.
1147  """
1148  try:
1149  return ParseResults([ t for t,s,e in self.scanString( instring, maxMatches ) ])
1150  except ParseBaseException, pe:
1151  raise pe
1152 
1153  def __add__(self, other ):
1154  """Implementation of + operator - returns And"""
1155  if isinstance( other, basestring ):
1156  other = Literal( other )
1157  if not isinstance( other, ParserElement ):
1158  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1159  SyntaxWarning, stacklevel=2)
1160  return None
1161  return And( [ self, other ] )
1162 
1163  def __radd__(self, other ):
1164  """Implementation of + operator when left operand is not a ParserElement"""
1165  if isinstance( other, basestring ):
1166  other = Literal( other )
1167  if not isinstance( other, ParserElement ):
1168  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1169  SyntaxWarning, stacklevel=2)
1170  return None
1171  return other + self
1172 
1173  def __sub__(self, other):
1174  """Implementation of - operator, returns And with error stop"""
1175  if isinstance( other, basestring ):
1176  other = Literal( other )
1177  if not isinstance( other, ParserElement ):
1178  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1179  SyntaxWarning, stacklevel=2)
1180  return None
1181  return And( [ self, And._ErrorStop(), other ] )
1182 
1183  def __rsub__(self, other ):
1184  """Implementation of - operator when left operand is not a ParserElement"""
1185  if isinstance( other, basestring ):
1186  other = Literal( other )
1187  if not isinstance( other, ParserElement ):
1188  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1189  SyntaxWarning, stacklevel=2)
1190  return None
1191  return other - self
1192 
1193  def __mul__(self,other):
1194  if isinstance(other,int):
1195  minElements, optElements = other,0
1196  elif isinstance(other,tuple):
1197  other = (other + (None, None))[:2]
1198  if other[0] is None:
1199  other = (0, other[1])
1200  if isinstance(other[0],int) and other[1] is None:
1201  if other[0] == 0:
1202  return ZeroOrMore(self)
1203  if other[0] == 1:
1204  return OneOrMore(self)
1205  else:
1206  return self*other[0] + ZeroOrMore(self)
1207  elif isinstance(other[0],int) and isinstance(other[1],int):
1208  minElements, optElements = other
1209  optElements -= minElements
1210  else:
1211  raise TypeError("cannot multiply 'ParserElement' and ('%s','%s') objects", type(other[0]),type(other[1]))
1212  else:
1213  raise TypeError("cannot multiply 'ParserElement' and '%s' objects", type(other))
1214 
1215  if minElements < 0:
1216  raise ValueError("cannot multiply ParserElement by negative value")
1217  if optElements < 0:
1218  raise ValueError("second tuple value must be greater or equal to first tuple value")
1219  if minElements == optElements == 0:
1220  raise ValueError("cannot multiply ParserElement by 0 or (0,0)")
1221 
1222  if (optElements):
1223  def makeOptionalList(n):
1224  if n>1:
1225  return Optional(self + makeOptionalList(n-1))
1226  else:
1227  return Optional(self)
1228  if minElements:
1229  if minElements == 1:
1230  ret = self + makeOptionalList(optElements)
1231  else:
1232  ret = And([self]*minElements) + makeOptionalList(optElements)
1233  else:
1234  ret = makeOptionalList(optElements)
1235  else:
1236  if minElements == 1:
1237  ret = self
1238  else:
1239  ret = And([self]*minElements)
1240  return ret
1241 
1242  def __rmul__(self, other):
1243  return self.__mul__(other)
1244 
1245  def __or__(self, other ):
1246  """Implementation of | operator - returns MatchFirst"""
1247  if isinstance( other, basestring ):
1248  other = Literal( other )
1249  if not isinstance( other, ParserElement ):
1250  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1251  SyntaxWarning, stacklevel=2)
1252  return None
1253  return MatchFirst( [ self, other ] )
1254 
1255  def __ror__(self, other ):
1256  """Implementation of | operator when left operand is not a ParserElement"""
1257  if isinstance( other, basestring ):
1258  other = Literal( other )
1259  if not isinstance( other, ParserElement ):
1260  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1261  SyntaxWarning, stacklevel=2)
1262  return None
1263  return other | self
1264 
1265  def __xor__(self, other ):
1266  """Implementation of ^ operator - returns Or"""
1267  if isinstance( other, basestring ):
1268  other = Literal( other )
1269  if not isinstance( other, ParserElement ):
1270  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1271  SyntaxWarning, stacklevel=2)
1272  return None
1273  return Or( [ self, other ] )
1274 
1275  def __rxor__(self, other ):
1276  """Implementation of ^ operator when left operand is not a ParserElement"""
1277  if isinstance( other, basestring ):
1278  other = Literal( other )
1279  if not isinstance( other, ParserElement ):
1280  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1281  SyntaxWarning, stacklevel=2)
1282  return None
1283  return other ^ self
1284 
1285  def __and__(self, other ):
1286  """Implementation of & operator - returns Each"""
1287  if isinstance( other, basestring ):
1288  other = Literal( other )
1289  if not isinstance( other, ParserElement ):
1290  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1291  SyntaxWarning, stacklevel=2)
1292  return None
1293  return Each( [ self, other ] )
1294 
1295  def __rand__(self, other ):
1296  """Implementation of & operator when left operand is not a ParserElement"""
1297  if isinstance( other, basestring ):
1298  other = Literal( other )
1299  if not isinstance( other, ParserElement ):
1300  warnings.warn("Cannot combine element of type %s with ParserElement" % type(other),
1301  SyntaxWarning, stacklevel=2)
1302  return None
1303  return other & self
1304 
1305  def __invert__( self ):
1306  """Implementation of ~ operator - returns NotAny"""
1307  return NotAny( self )
1308 
1309  def __call__(self, name):
1310  """Shortcut for setResultsName, with listAllMatches=default::
1311  userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1312  could be written as::
1313  userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1314  """
1315  return self.setResultsName(name)
1316 
1317  def suppress( self ):
1318  """Suppresses the output of this ParserElement; useful to keep punctuation from
1319  cluttering up returned output.
1320  """
1321  return Suppress( self )
1322 
1323  def leaveWhitespace( self ):
1324  """Disables the skipping of whitespace before matching the characters in the
1325  ParserElement's defined pattern. This is normally only used internally by
1326  the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1327  """
1328  self.skipWhitespace = False
1329  return self
1330 
1331  def setWhitespaceChars( self, chars ):
1332  """Overrides the default whitespace chars
1333  """
1334  self.skipWhitespace = True
1335  self.whiteChars = chars
1336  self.copyDefaultWhiteChars = False
1337  return self
1338 
1339  def parseWithTabs( self ):
1340  """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1341  Must be called before parseString when the input grammar contains elements that
1342  match <TAB> characters."""
1343  self.keepTabs = True
1344  return self
1345 
1346  def ignore( self, other ):
1347  """Define expression to be ignored (e.g., comments) while doing pattern
1348  matching; may be called repeatedly, to define multiple comment or other
1349  ignorable patterns.
1350  """
1351  if isinstance( other, Suppress ):
1352  if other not in self.ignoreExprs:
1353  self.ignoreExprs.append( other )
1354  else:
1355  self.ignoreExprs.append( Suppress( other ) )
1356  return self
1357 
1358  def setDebugActions( self, startAction, successAction, exceptionAction ):
1359  """Enable display of debugging messages while doing pattern matching."""
1360  self.debugActions = (startAction or _defaultStartDebugAction,
1361  successAction or _defaultSuccessDebugAction,
1362  exceptionAction or _defaultExceptionDebugAction)
1363  self.debug = True
1364  return self
1365 
1366  def setDebug( self, flag=True ):
1367  """Enable display of debugging messages while doing pattern matching.
1368  Set flag to True to enable, False to disable."""
1369  if flag:
1370  self.setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1371  else:
1372  self.debug = False
1373  return self
1374 
1375  def __str__( self ):
1376  return self.name
1377 
1378  def __repr__( self ):
1379  return _ustr(self)
1380 
1381  def streamline( self ):
1382  self.streamlined = True
1383  self.strRepr = None
1384  return self
1385 
1386  def checkRecursion( self, parseElementList ):
1387  pass
1388 
1389  def validate( self, validateTrace=[] ):
1390  """Check defined expressions for valid structure, check for infinite recursive definitions."""
1391  self.checkRecursion( [] )
1392 
1393  def parseFile( self, file_or_filename, parseAll=False ):
1394  """Execute the parse expression on the given file or filename.
1395  If a filename is specified (instead of a file object),
1396  the entire file is opened, read, and closed before parsing.
1397  """
1398  try:
1399  file_contents = file_or_filename.read()
1400  except AttributeError:
1401  f = open(file_or_filename, "rb")
1402  file_contents = f.read()
1403  f.close()
1404  try:
1405  return self.parseString(file_contents, parseAll)
1406  except ParseBaseException, exc:
1407  # catch and re-raise exception from here, clears out pyparsing internal stack trace
1408  raise exc
1409 
1410  def getException(self):
1411  return ParseException("",0,self.errmsg,self)
1412 
1413  def __getattr__(self,aname):
1414  if aname == "myException":
1415  self.myException = ret = self.getException();
1416  return ret;
1417  else:
1418  raise AttributeError("no such attribute " + aname)
1419 
1420  def __eq__(self,other):
1421  if isinstance(other, ParserElement):
1422  return self is other or self.__dict__ == other.__dict__
1423  elif isinstance(other, basestring):
1424  try:
1425  self.parseString(_ustr(other), parseAll=True)
1426  return True
1427  except ParseBaseException:
1428  return False
1429  else:
1430  return super(ParserElement,self)==other
1431 
1432  def __ne__(self,other):
1433  return not (self == other)
1434 
1435  def __hash__(self):
1436  return hash(id(self))
1437 
1438  def __req__(self,other):
1439  return self == other
1440 
1441  def __rne__(self,other):
1442  return not (self == other)
1443 
1444 
1446  """Abstract ParserElement subclass, for defining atomic matching patterns."""
1447  def __init__( self ):
1448  super(Token,self).__init__( savelist=False )
1449  #self.myException = ParseException("",0,"",self)
1450 
1451  def setName(self, name):
1452  s = super(Token,self).setName(name)
1453  self.errmsg = "Expected " + self.name
1454  #s.myException.msg = self.errmsg
1455  return s
1456 
1457 
1458 class Empty(Token):
1459  """An empty token, will always match."""
1460  def __init__( self ):
1461  super(Empty,self).__init__()
1462  self.name = "Empty"
1463  self.mayReturnEmpty = True
1464  self.mayIndexError = False
1465 
1466 
1468  """A token that will never match."""
1469  def __init__( self ):
1470  super(NoMatch,self).__init__()
1471  self.name = "NoMatch"
1472  self.mayReturnEmpty = True
1473  self.mayIndexError = False
1474  self.errmsg = "Unmatchable token"
1475  #self.myException.msg = self.errmsg
1476 
1477  def parseImpl( self, instring, loc, doActions=True ):
1478  exc = self.myException
1479  exc.loc = loc
1480  exc.pstr = instring
1481  raise exc
1482 
1483 
1485  """Token to exactly match a specified string."""
1486  def __init__( self, matchString ):
1487  super(Literal,self).__init__()
1488  self.match = matchString
1489  self.matchLen = len(matchString)
1490  try:
1491  self.firstMatchChar = matchString[0]
1492  except IndexError:
1493  warnings.warn("null string passed to Literal; use Empty() instead",
1494  SyntaxWarning, stacklevel=2)
1495  self.__class__ = Empty
1496  self.name = '"%s"' % _ustr(self.match)
1497  self.errmsg = "Expected " + self.name
1498  self.mayReturnEmpty = False
1499  #self.myException.msg = self.errmsg
1500  self.mayIndexError = False
1501 
1502  # Performance tuning: this routine gets called a *lot*
1503  # if this is a single character match string and the first character matches,
1504  # short-circuit as quickly as possible, and avoid calling startswith
1505  #~ @profile
1506  def parseImpl( self, instring, loc, doActions=True ):
1507  if (instring[loc] == self.firstMatchChar and
1508  (self.matchLen==1 or instring.startswith(self.match,loc)) ):
1509  return loc+self.matchLen, self.match
1510  #~ raise ParseException( instring, loc, self.errmsg )
1511  exc = self.myException
1512  exc.loc = loc
1513  exc.pstr = instring
1514  raise exc
1515 _L = Literal
1516 
1518  """Token to exactly match a specified string as a keyword, that is, it must be
1519  immediately followed by a non-keyword character. Compare with Literal::
1520  Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1521  Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1522  Accepts two optional constructor arguments in addition to the keyword string:
1523  identChars is a string of characters that would be valid identifier characters,
1524  defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1525  matching, default is False.
1526  """
1527  DEFAULT_KEYWORD_CHARS = alphanums+"_$"
1528 
1529  def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1530  super(Keyword,self).__init__()
1531  self.match = matchString
1532  self.matchLen = len(matchString)
1533  try:
1534  self.firstMatchChar = matchString[0]
1535  except IndexError:
1536  warnings.warn("null string passed to Keyword; use Empty() instead",
1537  SyntaxWarning, stacklevel=2)
1538  self.name = '"%s"' % self.match
1539  self.errmsg = "Expected " + self.name
1540  self.mayReturnEmpty = False
1541  #self.myException.msg = self.errmsg
1542  self.mayIndexError = False
1543  self.caseless = caseless
1544  if caseless:
1545  self.caselessmatch = matchString.upper()
1546  identChars = identChars.upper()
1547  self.identChars = _str2dict(identChars)
1548 
1549  def parseImpl( self, instring, loc, doActions=True ):
1550  if self.caseless:
1551  if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1552  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) and
1553  (loc == 0 or instring[loc-1].upper() not in self.identChars) ):
1554  return loc+self.matchLen, self.match
1555  else:
1556  if (instring[loc] == self.firstMatchChar and
1557  (self.matchLen==1 or instring.startswith(self.match,loc)) and
1558  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen] not in self.identChars) and
1559  (loc == 0 or instring[loc-1] not in self.identChars) ):
1560  return loc+self.matchLen, self.match
1561  #~ raise ParseException( instring, loc, self.errmsg )
1562  exc = self.myException
1563  exc.loc = loc
1564  exc.pstr = instring
1565  raise exc
1566 
1567  def copy(self):
1568  c = super(Keyword,self).copy()
1569  c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1570  return c
1571 
1573  """Overrides the default Keyword chars
1574  """
1575  Keyword.DEFAULT_KEYWORD_CHARS = chars
1576  setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1577 
1579  """Token to match a specified string, ignoring case of letters.
1580  Note: the matched results will always be in the case of the given
1581  match string, NOT the case of the input text.
1582  """
1583  def __init__( self, matchString ):
1584  super(CaselessLiteral,self).__init__( matchString.upper() )
1585  # Preserve the defining literal.
1586  self.returnString = matchString
1587  self.name = "'%s'" % self.returnString
1588  self.errmsg = "Expected " + self.name
1589  #self.myException.msg = self.errmsg
1590 
1591  def parseImpl( self, instring, loc, doActions=True ):
1592  if instring[ loc:loc+self.matchLen ].upper() == self.match:
1593  return loc+self.matchLen, self.returnString
1594  #~ raise ParseException( instring, loc, self.errmsg )
1595  exc = self.myException
1596  exc.loc = loc
1597  exc.pstr = instring
1598  raise exc
1599 
1601  def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602  super(CaselessKeyword,self).__init__( matchString, identChars, caseless=True )
1603 
1604  def parseImpl( self, instring, loc, doActions=True ):
1605  if ( (instring[ loc:loc+self.matchLen ].upper() == self.caselessmatch) and
1606  (loc >= len(instring)-self.matchLen or instring[loc+self.matchLen].upper() not in self.identChars) ):
1607  return loc+self.matchLen, self.match
1608  #~ raise ParseException( instring, loc, self.errmsg )
1609  exc = self.myException
1610  exc.loc = loc
1611  exc.pstr = instring
1612  raise exc
1613 
1614 class Word(Token):
1615  """Token for matching words composed of allowed character sets.
1616  Defined with string containing all allowed initial characters,
1617  an optional string containing allowed body characters (if omitted,
1618  defaults to the initial character set), and an optional minimum,
1619  maximum, and/or exact length. The default value for min is 1 (a
1620  minimum value < 1 is not valid); the default values for max and exact
1621  are 0, meaning no maximum or exact length restriction.
1622  """
1623  def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1624  super(Word,self).__init__()
1625  self.initCharsOrig = initChars
1626  self.initChars = _str2dict(initChars)
1627  if bodyChars :
1628  self.bodyCharsOrig = bodyChars
1629  self.bodyChars = _str2dict(bodyChars)
1630  else:
1631  self.bodyCharsOrig = initChars
1632  self.bodyChars = _str2dict(initChars)
1633 
1634  self.maxSpecified = max > 0
1635 
1636  if min < 1:
1637  raise ValueError("cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1638 
1639  self.minLen = min
1640 
1641  if max > 0:
1642  self.maxLen = max
1643  else:
1644  self.maxLen = _MAX_INT
1645 
1646  if exact > 0:
1647  self.maxLen = exact
1648  self.minLen = exact
1649 
1650  self.name = _ustr(self)
1651  self.errmsg = "Expected " + self.name
1652  #self.myException.msg = self.errmsg
1653  self.mayIndexError = False
1654  self.asKeyword = asKeyword
1655 
1656  if ' ' not in self.initCharsOrig+self.bodyCharsOrig and (min==1 and max==0 and exact==0):
1657  if self.bodyCharsOrig == self.initCharsOrig:
1659  elif len(self.bodyCharsOrig) == 1:
1660  self.reString = "%s[%s]*" % \
1661  (re.escape(self.initCharsOrig),
1663  else:
1664  self.reString = "[%s][%s]*" % \
1667  if self.asKeyword:
1668  self.reString = r"\b"+self.reString+r"\b"
1669  try:
1670  self.re = re.compile( self.reString )
1671  except:
1672  self.re = None
1673 
1674  def parseImpl( self, instring, loc, doActions=True ):
1675  if self.re:
1676  result = self.re.match(instring,loc)
1677  if not result:
1678  exc = self.myException
1679  exc.loc = loc
1680  exc.pstr = instring
1681  raise exc
1682 
1683  loc = result.end()
1684  return loc,result.group()
1685 
1686  if not(instring[ loc ] in self.initChars):
1687  #~ raise ParseException( instring, loc, self.errmsg )
1688  exc = self.myException
1689  exc.loc = loc
1690  exc.pstr = instring
1691  raise exc
1692  start = loc
1693  loc += 1
1694  instrlen = len(instring)
1695  bodychars = self.bodyChars
1696  maxloc = start + self.maxLen
1697  maxloc = min( maxloc, instrlen )
1698  while loc < maxloc and instring[loc] in bodychars:
1699  loc += 1
1700 
1701  throwException = False
1702  if loc - start < self.minLen:
1703  throwException = True
1704  if self.maxSpecified and loc < instrlen and instring[loc] in bodychars:
1705  throwException = True
1706  if self.asKeyword:
1707  if (start>0 and instring[start-1] in bodychars) or (loc<instrlen and instring[loc] in bodychars):
1708  throwException = True
1709 
1710  if throwException:
1711  #~ raise ParseException( instring, loc, self.errmsg )
1712  exc = self.myException
1713  exc.loc = loc
1714  exc.pstr = instring
1715  raise exc
1716 
1717  return loc, instring[start:loc]
1718 
1719  def __str__( self ):
1720  try:
1721  return super(Word,self).__str__()
1722  except:
1723  pass
1724 
1725 
1726  if self.strRepr is None:
1727 
1728  def charsAsStr(s):
1729  if len(s)>4:
1730  return s[:4]+"..."
1731  else:
1732  return s
1733 
1734  if ( self.initCharsOrig != self.bodyCharsOrig ):
1735  self.strRepr = "W:(%s,%s)" % ( charsAsStr(self.initCharsOrig), charsAsStr(self.bodyCharsOrig) )
1736  else:
1737  self.strRepr = "W:(%s)" % charsAsStr(self.initCharsOrig)
1738 
1739  return self.strRepr
1740 
1741 
1742 class Regex(Token):
1743  """Token for matching strings that match a given regular expression.
1744  Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1745  """
1746  def __init__( self, pattern, flags=0):
1747  """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1748  super(Regex,self).__init__()
1749 
1750  if len(pattern) == 0:
1751  warnings.warn("null string passed to Regex; use Empty() instead",
1752  SyntaxWarning, stacklevel=2)
1753 
1754  self.pattern = pattern
1755  self.flags = flags
1756 
1757  try:
1758  self.re = re.compile(self.pattern, self.flags)
1759  self.reString = self.pattern
1760  except sre_constants.error:
1761  warnings.warn("invalid pattern (%s) passed to Regex" % pattern,
1762  SyntaxWarning, stacklevel=2)
1763  raise
1764 
1765  self.name = _ustr(self)
1766  self.errmsg = "Expected " + self.name
1767  #self.myException.msg = self.errmsg
1768  self.mayIndexError = False
1769  self.mayReturnEmpty = True
1770 
1771  def parseImpl( self, instring, loc, doActions=True ):
1772  result = self.re.match(instring,loc)
1773  if not result:
1774  exc = self.myException
1775  exc.loc = loc
1776  exc.pstr = instring
1777  raise exc
1778 
1779  loc = result.end()
1780  d = result.groupdict()
1781  ret = ParseResults(result.group())
1782  if d:
1783  for k in d:
1784  ret[k] = d[k]
1785  return loc,ret
1786 
1787  def __str__( self ):
1788  try:
1789  return super(Regex,self).__str__()
1790  except:
1791  pass
1792 
1793  if self.strRepr is None:
1794  self.strRepr = "Re:(%s)" % repr(self.pattern)
1795 
1796  return self.strRepr
1797 
1798 
1800  """Token for matching strings that are delimited by quoting characters.
1801  """
1802  def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1803  """
1804  Defined with the following parameters:
1805  - quoteChar - string of one or more characters defining the quote delimiting string
1806  - escChar - character to escape quotes, typically backslash (default=None)
1807  - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1808  - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1809  - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1810  - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1811  """
1812  super(QuotedString,self).__init__()
1813 
1814  # remove white space from quote chars - wont work anyway
1815  quoteChar = quoteChar.strip()
1816  if len(quoteChar) == 0:
1817  warnings.warn("quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1818  raise SyntaxError()
1819 
1820  if endQuoteChar is None:
1821  endQuoteChar = quoteChar
1822  else:
1823  endQuoteChar = endQuoteChar.strip()
1824  if len(endQuoteChar) == 0:
1825  warnings.warn("endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1826  raise SyntaxError()
1827 
1828  self.quoteChar = quoteChar
1829  self.quoteCharLen = len(quoteChar)
1830  self.firstQuoteChar = quoteChar[0]
1831  self.endQuoteChar = endQuoteChar
1832  self.endQuoteCharLen = len(endQuoteChar)
1833  self.escChar = escChar
1834  self.escQuote = escQuote
1835  self.unquoteResults = unquoteResults
1836 
1837  if multiline:
1838  self.flags = re.MULTILINE | re.DOTALL
1839  self.pattern = r'%s(?:[^%s%s]' % \
1840  ( re.escape(self.quoteChar),
1842  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1843  else:
1844  self.flags = 0
1845  self.pattern = r'%s(?:[^%s\n\r%s]' % \
1846  ( re.escape(self.quoteChar),
1848  (escChar is not None and _escapeRegexRangeChars(escChar) or '') )
1849  if len(self.endQuoteChar) > 1:
1850  self.pattern += (
1851  '|(?:' + ')|(?:'.join(["%s[^%s]" % (re.escape(self.endQuoteChar[:i]),
1853  for i in range(len(self.endQuoteChar)-1,0,-1)]) + ')'
1854  )
1855  if escQuote:
1856  self.pattern += (r'|(?:%s)' % re.escape(escQuote))
1857  if escChar:
1858  self.pattern += (r'|(?:%s.)' % re.escape(escChar))
1859  self.escCharReplacePattern = re.escape(self.escChar)+"(.)"
1860  self.pattern += (r')*%s' % re.escape(self.endQuoteChar))
1861 
1862  try:
1863  self.re = re.compile(self.pattern, self.flags)
1864  self.reString = self.pattern
1865  except sre_constants.error:
1866  warnings.warn("invalid pattern (%s) passed to Regex" % self.pattern,
1867  SyntaxWarning, stacklevel=2)
1868  raise
1869 
1870  self.name = _ustr(self)
1871  self.errmsg = "Expected " + self.name
1872  #self.myException.msg = self.errmsg
1873  self.mayIndexError = False
1874  self.mayReturnEmpty = True
1875 
1876  def parseImpl( self, instring, loc, doActions=True ):
1877  result = instring[loc] == self.firstQuoteChar and self.re.match(instring,loc) or None
1878  if not result:
1879  exc = self.myException
1880  exc.loc = loc
1881  exc.pstr = instring
1882  raise exc
1883 
1884  loc = result.end()
1885  ret = result.group()
1886 
1887  if self.unquoteResults:
1888 
1889  # strip off quotes
1890  ret = ret[self.quoteCharLen:-self.endQuoteCharLen]
1891 
1892  if isinstance(ret,basestring):
1893  # replace escaped characters
1894  if self.escChar:
1895  ret = re.sub(self.escCharReplacePattern,"\g<1>",ret)
1896 
1897  # replace escaped quotes
1898  if self.escQuote:
1899  ret = ret.replace(self.escQuote, self.endQuoteChar)
1900 
1901  return loc, ret
1902 
1903  def __str__( self ):
1904  try:
1905  return super(QuotedString,self).__str__()
1906  except:
1907  pass
1908 
1909  if self.strRepr is None:
1910  self.strRepr = "quoted string, starting with %s ending with %s" % (self.quoteChar, self.endQuoteChar)
1911 
1912  return self.strRepr
1913 
1914 
1916  """Token for matching words composed of characters *not* in a given set.
1917  Defined with string containing all disallowed characters, and an optional
1918  minimum, maximum, and/or exact length. The default value for min is 1 (a
1919  minimum value < 1 is not valid); the default values for max and exact
1920  are 0, meaning no maximum or exact length restriction.
1921  """
1922  def __init__( self, notChars, min=1, max=0, exact=0 ):
1923  super(CharsNotIn,self).__init__()
1924  self.skipWhitespace = False
1925  self.notChars = notChars
1926 
1927  if min < 1:
1928  raise ValueError("cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1929 
1930  self.minLen = min
1931 
1932  if max > 0:
1933  self.maxLen = max
1934  else:
1935  self.maxLen = _MAX_INT
1936 
1937  if exact > 0:
1938  self.maxLen = exact
1939  self.minLen = exact
1940 
1941  self.name = _ustr(self)
1942  self.errmsg = "Expected " + self.name
1943  self.mayReturnEmpty = ( self.minLen == 0 )
1944  #self.myException.msg = self.errmsg
1945  self.mayIndexError = False
1946 
1947  def parseImpl( self, instring, loc, doActions=True ):
1948  if instring[loc] in self.notChars:
1949  #~ raise ParseException( instring, loc, self.errmsg )
1950  exc = self.myException
1951  exc.loc = loc
1952  exc.pstr = instring
1953  raise exc
1954 
1955  start = loc
1956  loc += 1
1957  notchars = self.notChars
1958  maxlen = min( start+self.maxLen, len(instring) )
1959  while loc < maxlen and \
1960  (instring[loc] not in notchars):
1961  loc += 1
1962 
1963  if loc - start < self.minLen:
1964  #~ raise ParseException( instring, loc, self.errmsg )
1965  exc = self.myException
1966  exc.loc = loc
1967  exc.pstr = instring
1968  raise exc
1969 
1970  return loc, instring[start:loc]
1971 
1972  def __str__( self ):
1973  try:
1974  return super(CharsNotIn, self).__str__()
1975  except:
1976  pass
1977 
1978  if self.strRepr is None:
1979  if len(self.notChars) > 4:
1980  self.strRepr = "!W:(%s...)" % self.notChars[:4]
1981  else:
1982  self.strRepr = "!W:(%s)" % self.notChars
1983 
1984  return self.strRepr
1985 
1986 class White(Token):
1987  """Special matching class for matching whitespace. Normally, whitespace is ignored
1988  by pyparsing grammars. This class is included when some whitespace structures
1989  are significant. Define with a string containing the whitespace characters to be
1990  matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
1991  as defined for the Word class."""
1992  whiteStrs = {
1993  " " : "<SPC>",
1994  "\t": "<TAB>",
1995  "\n": "<LF>",
1996  "\r": "<CR>",
1997  "\f": "<FF>",
1998  }
1999  def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2000  super(White,self).__init__()
2001  self.matchWhite = ws
2002  self.setWhitespaceChars( "".join([c for c in self.whiteChars if c not in self.matchWhite]) )
2003  #~ self.leaveWhitespace()
2004  self.name = ("".join([White.whiteStrs[c] for c in self.matchWhite]))
2005  self.mayReturnEmpty = True
2006  self.errmsg = "Expected " + self.name
2007  #self.myException.msg = self.errmsg
2008 
2009  self.minLen = min
2010 
2011  if max > 0:
2012  self.maxLen = max
2013  else:
2014  self.maxLen = _MAX_INT
2015 
2016  if exact > 0:
2017  self.maxLen = exact
2018  self.minLen = exact
2019 
2020  def parseImpl( self, instring, loc, doActions=True ):
2021  if not(instring[ loc ] in self.matchWhite):
2022  #~ raise ParseException( instring, loc, self.errmsg )
2023  exc = self.myException
2024  exc.loc = loc
2025  exc.pstr = instring
2026  raise exc
2027  start = loc
2028  loc += 1
2029  maxloc = start + self.maxLen
2030  maxloc = min( maxloc, len(instring) )
2031  while loc < maxloc and instring[loc] in self.matchWhite:
2032  loc += 1
2033 
2034  if loc - start < self.minLen:
2035  #~ raise ParseException( instring, loc, self.errmsg )
2036  exc = self.myException
2037  exc.loc = loc
2038  exc.pstr = instring
2039  raise exc
2040 
2041  return loc, instring[start:loc]
2042 
2043 
2045  def __init__( self ):
2046  super(_PositionToken,self).__init__()
2047  self.name=self.__class__.__name__
2048  self.mayReturnEmpty = True
2049  self.mayIndexError = False
2050 
2052  """Token to advance to a specific column of input text; useful for tabular report scraping."""
2053  def __init__( self, colno ):
2054  super(GoToColumn,self).__init__()
2055  self.col = colno
2056 
2057  def preParse( self, instring, loc ):
2058  if col(loc,instring) != self.col:
2059  instrlen = len(instring)
2060  if self.ignoreExprs:
2061  loc = self._skipIgnorables( instring, loc )
2062  while loc < instrlen and instring[loc].isspace() and col( loc, instring ) != self.col :
2063  loc += 1
2064  return loc
2065 
2066  def parseImpl( self, instring, loc, doActions=True ):
2067  thiscol = col( loc, instring )
2068  if thiscol > self.col:
2069  raise ParseException( instring, loc, "Text not in expected column", self )
2070  newloc = loc + self.col - thiscol
2071  ret = instring[ loc: newloc ]
2072  return newloc, ret
2073 
2075  """Matches if current position is at the beginning of a line within the parse string"""
2076  def __init__( self ):
2077  super(LineStart,self).__init__()
2078  self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2079  self.errmsg = "Expected start of line"
2080  #self.myException.msg = self.errmsg
2081 
2082  def preParse( self, instring, loc ):
2083  preloc = super(LineStart,self).preParse(instring,loc)
2084  if instring[preloc] == "\n":
2085  loc += 1
2086  return loc
2087 
2088  def parseImpl( self, instring, loc, doActions=True ):
2089  if not( loc==0 or
2090  (loc == self.preParse( instring, 0 )) or
2091  (instring[loc-1] == "\n") ): #col(loc, instring) != 1:
2092  #~ raise ParseException( instring, loc, "Expected start of line" )
2093  exc = self.myException
2094  exc.loc = loc
2095  exc.pstr = instring
2096  raise exc
2097  return loc, []
2098 
2100  """Matches if current position is at the end of a line within the parse string"""
2101  def __init__( self ):
2102  super(LineEnd,self).__init__()
2103  self.setWhitespaceChars( ParserElement.DEFAULT_WHITE_CHARS.replace("\n","") )
2104  self.errmsg = "Expected end of line"
2105  #self.myException.msg = self.errmsg
2106 
2107  def parseImpl( self, instring, loc, doActions=True ):
2108  if loc<len(instring):
2109  if instring[loc] == "\n":
2110  return loc+1, "\n"
2111  else:
2112  #~ raise ParseException( instring, loc, "Expected end of line" )
2113  exc = self.myException
2114  exc.loc = loc
2115  exc.pstr = instring
2116  raise exc
2117  elif loc == len(instring):
2118  return loc+1, []
2119  else:
2120  exc = self.myException
2121  exc.loc = loc
2122  exc.pstr = instring
2123  raise exc
2124 
2126  """Matches if current position is at the beginning of the parse string"""
2127  def __init__( self ):
2128  super(StringStart,self).__init__()
2129  self.errmsg = "Expected start of text"
2130  #self.myException.msg = self.errmsg
2131 
2132  def parseImpl( self, instring, loc, doActions=True ):
2133  if loc != 0:
2134  # see if entire string up to here is just whitespace and ignoreables
2135  if loc != self.preParse( instring, 0 ):
2136  #~ raise ParseException( instring, loc, "Expected start of text" )
2137  exc = self.myException
2138  exc.loc = loc
2139  exc.pstr = instring
2140  raise exc
2141  return loc, []
2142 
2144  """Matches if current position is at the end of the parse string"""
2145  def __init__( self ):
2146  super(StringEnd,self).__init__()
2147  self.errmsg = "Expected end of text"
2148  #self.myException.msg = self.errmsg
2149 
2150  def parseImpl( self, instring, loc, doActions=True ):
2151  if loc < len(instring):
2152  #~ raise ParseException( instring, loc, "Expected end of text" )
2153  exc = self.myException
2154  exc.loc = loc
2155  exc.pstr = instring
2156  raise exc
2157  elif loc == len(instring):
2158  return loc+1, []
2159  elif loc > len(instring):
2160  return loc, []
2161  else:
2162  exc = self.myException
2163  exc.loc = loc
2164  exc.pstr = instring
2165  raise exc
2166 
2168  """Matches if the current position is at the beginning of a Word, and
2169  is not preceded by any character in a given set of wordChars
2170  (default=printables). To emulate the \b behavior of regular expressions,
2171  use WordStart(alphanums). WordStart will also match at the beginning of
2172  the string being parsed, or at the beginning of a line.
2173  """
2174  def __init__(self, wordChars = printables):
2175  super(WordStart,self).__init__()
2176  self.wordChars = _str2dict(wordChars)
2177  self.errmsg = "Not at the start of a word"
2178 
2179  def parseImpl(self, instring, loc, doActions=True ):
2180  if loc != 0:
2181  if (instring[loc-1] in self.wordChars or
2182  instring[loc] not in self.wordChars):
2183  exc = self.myException
2184  exc.loc = loc
2185  exc.pstr = instring
2186  raise exc
2187  return loc, []
2188 
2190  """Matches if the current position is at the end of a Word, and
2191  is not followed by any character in a given set of wordChars
2192  (default=printables). To emulate the \b behavior of regular expressions,
2193  use WordEnd(alphanums). WordEnd will also match at the end of
2194  the string being parsed, or at the end of a line.
2195  """
2196  def __init__(self, wordChars = printables):
2197  super(WordEnd,self).__init__()
2198  self.wordChars = _str2dict(wordChars)
2199  self.skipWhitespace = False
2200  self.errmsg = "Not at the end of a word"
2201 
2202  def parseImpl(self, instring, loc, doActions=True ):
2203  instrlen = len(instring)
2204  if instrlen>0 and loc<instrlen:
2205  if (instring[loc] in self.wordChars or
2206  instring[loc-1] not in self.wordChars):
2207  #~ raise ParseException( instring, loc, "Expected end of word" )
2208  exc = self.myException
2209  exc.loc = loc
2210  exc.pstr = instring
2211  raise exc
2212  return loc, []
2213 
2214 
2216  """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2217  def __init__( self, exprs, savelist = False ):
2218  super(ParseExpression,self).__init__(savelist)
2219  if isinstance( exprs, list ):
2220  self.exprs = exprs
2221  elif isinstance( exprs, basestring ):
2222  self.exprs = [ Literal( exprs ) ]
2223  else:
2224  try:
2225  self.exprs = list( exprs )
2226  except TypeError:
2227  self.exprs = [ exprs ]
2228  self.callPreparse = False
2229 
2230  def __getitem__( self, i ):
2231  return self.exprs[i]
2232 
2233  def append( self, other ):
2234  self.exprs.append( other )
2235  self.strRepr = None
2236  return self
2237 
2238  def leaveWhitespace( self ):
2239  """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2240  all contained expressions."""
2241  self.skipWhitespace = False
2242  self.exprs = [ e.copy() for e in self.exprs ]
2243  for e in self.exprs:
2244  e.leaveWhitespace()
2245  return self
2246 
2247  def ignore( self, other ):
2248  if isinstance( other, Suppress ):
2249  if other not in self.ignoreExprs:
2250  super( ParseExpression, self).ignore( other )
2251  for e in self.exprs:
2252  e.ignore( self.ignoreExprs[-1] )
2253  else:
2254  super( ParseExpression, self).ignore( other )
2255  for e in self.exprs:
2256  e.ignore( self.ignoreExprs[-1] )
2257  return self
2258 
2259  def __str__( self ):
2260  try:
2261  return super(ParseExpression,self).__str__()
2262  except:
2263  pass
2264 
2265  if self.strRepr is None:
2266  self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.exprs) )
2267  return self.strRepr
2268 
2269  def streamline( self ):
2270  super(ParseExpression,self).streamline()
2271 
2272  for e in self.exprs:
2273  e.streamline()
2274 
2275  # collapse nested And's of the form And( And( And( a,b), c), d) to And( a,b,c,d )
2276  # but only if there are no parse actions or resultsNames on the nested And's
2277  # (likewise for Or's and MatchFirst's)
2278  if ( len(self.exprs) == 2 ):
2279  other = self.exprs[0]
2280  if ( isinstance( other, self.__class__ ) and
2281  not(other.parseAction) and
2282  other.resultsName is None and
2283  not other.debug ):
2284  self.exprs = other.exprs[:] + [ self.exprs[1] ]
2285  self.strRepr = None
2286  self.mayReturnEmpty |= other.mayReturnEmpty
2287  self.mayIndexError |= other.mayIndexError
2288 
2289  other = self.exprs[-1]
2290  if ( isinstance( other, self.__class__ ) and
2291  not(other.parseAction) and
2292  other.resultsName is None and
2293  not other.debug ):
2294  self.exprs = self.exprs[:-1] + other.exprs[:]
2295  self.strRepr = None
2296  self.mayReturnEmpty |= other.mayReturnEmpty
2297  self.mayIndexError |= other.mayIndexError
2298 
2299  return self
2300 
2301  def setResultsName( self, name, listAllMatches=False ):
2302  ret = super(ParseExpression,self).setResultsName(name,listAllMatches)
2303  return ret
2304 
2305  def validate( self, validateTrace=[] ):
2306  tmp = validateTrace[:]+[self]
2307  for e in self.exprs:
2308  e.validate(tmp)
2309  self.checkRecursion( [] )
2310 
2312  """Requires all given ParseExpressions to be found in the given order.
2313  Expressions may be separated by whitespace.
2314  May be constructed using the '+' operator.
2315  """
2316 
2318  def __init__(self, *args, **kwargs):
2319  super(Empty,self).__init__(*args, **kwargs)
2320  self.leaveWhitespace()
2321 
2322  def __init__( self, exprs, savelist = True ):
2323  super(And,self).__init__(exprs, savelist)
2324  self.mayReturnEmpty = True
2325  for e in self.exprs:
2326  if not e.mayReturnEmpty:
2327  self.mayReturnEmpty = False
2328  break
2329  self.setWhitespaceChars( exprs[0].whiteChars )
2330  self.skipWhitespace = exprs[0].skipWhitespace
2331  self.callPreparse = True
2332 
2333  def parseImpl( self, instring, loc, doActions=True ):
2334  # pass False as last arg to _parse for first element, since we already
2335  # pre-parsed the string as part of our And pre-parsing
2336  loc, resultlist = self.exprs[0]._parse( instring, loc, doActions, callPreParse=False )
2337  errorStop = False
2338  for e in self.exprs[1:]:
2339  if isinstance(e, And._ErrorStop):
2340  errorStop = True
2341  continue
2342  if errorStop:
2343  try:
2344  loc, exprtokens = e._parse( instring, loc, doActions )
2345  except ParseSyntaxException:
2346  raise
2347  except ParseBaseException, pe:
2348  raise ParseSyntaxException(pe)
2349  except IndexError, ie:
2350  raise ParseSyntaxException( ParseException(instring, len(instring), self.errmsg, self) )
2351  else:
2352  loc, exprtokens = e._parse( instring, loc, doActions )
2353  if exprtokens or exprtokens.keys():
2354  resultlist += exprtokens
2355  return loc, resultlist
2356 
2357  def __iadd__(self, other ):
2358  if isinstance( other, basestring ):
2359  other = Literal( other )
2360  return self.append( other ) #And( [ self, other ] )
2361 
2362  def checkRecursion( self, parseElementList ):
2363  subRecCheckList = parseElementList[:] + [ self ]
2364  for e in self.exprs:
2365  e.checkRecursion( subRecCheckList )
2366  if not e.mayReturnEmpty:
2367  break
2368 
2369  def __str__( self ):
2370  if hasattr(self,"name"):
2371  return self.name
2372 
2373  if self.strRepr is None:
2374  self.strRepr = "{" + " ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2375 
2376  return self.strRepr
2377 
2378 
2380  """Requires that at least one ParseExpression is found.
2381  If two expressions match, the expression that matches the longest string will be used.
2382  May be constructed using the '^' operator.
2383  """
2384  def __init__( self, exprs, savelist = False ):
2385  super(Or,self).__init__(exprs, savelist)
2386  self.mayReturnEmpty = False
2387  for e in self.exprs:
2388  if e.mayReturnEmpty:
2389  self.mayReturnEmpty = True
2390  break
2391 
2392  def parseImpl( self, instring, loc, doActions=True ):
2393  maxExcLoc = -1
2394  maxMatchLoc = -1
2395  maxException = None
2396  for e in self.exprs:
2397  try:
2398  loc2 = e.tryParse( instring, loc )
2399  except ParseException, err:
2400  if err.loc > maxExcLoc:
2401  maxException = err
2402  maxExcLoc = err.loc
2403  except IndexError:
2404  if len(instring) > maxExcLoc:
2405  maxException = ParseException(instring,len(instring),e.errmsg,self)
2406  maxExcLoc = len(instring)
2407  else:
2408  if loc2 > maxMatchLoc:
2409  maxMatchLoc = loc2
2410  maxMatchExp = e
2411 
2412  if maxMatchLoc < 0:
2413  if maxException is not None:
2414  raise maxException
2415  else:
2416  raise ParseException(instring, loc, "no defined alternatives to match", self)
2417 
2418  return maxMatchExp._parse( instring, loc, doActions )
2419 
2420  def __ixor__(self, other ):
2421  if isinstance( other, basestring ):
2422  other = Literal( other )
2423  return self.append( other ) #Or( [ self, other ] )
2424 
2425  def __str__( self ):
2426  if hasattr(self,"name"):
2427  return self.name
2428 
2429  if self.strRepr is None:
2430  self.strRepr = "{" + " ^ ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2431 
2432  return self.strRepr
2433 
2434  def checkRecursion( self, parseElementList ):
2435  subRecCheckList = parseElementList[:] + [ self ]
2436  for e in self.exprs:
2437  e.checkRecursion( subRecCheckList )
2438 
2439 
2441  """Requires that at least one ParseExpression is found.
2442  If two expressions match, the first one listed is the one that will match.
2443  May be constructed using the '|' operator.
2444  """
2445  def __init__( self, exprs, savelist = False ):
2446  super(MatchFirst,self).__init__(exprs, savelist)
2447  if exprs:
2448  self.mayReturnEmpty = False
2449  for e in self.exprs:
2450  if e.mayReturnEmpty:
2451  self.mayReturnEmpty = True
2452  break
2453  else:
2454  self.mayReturnEmpty = True
2455 
2456  def parseImpl( self, instring, loc, doActions=True ):
2457  maxExcLoc = -1
2458  maxException = None
2459  for e in self.exprs:
2460  try:
2461  ret = e._parse( instring, loc, doActions )
2462  return ret
2463  except ParseException, err:
2464  if err.loc > maxExcLoc:
2465  maxException = err
2466  maxExcLoc = err.loc
2467  except IndexError:
2468  if len(instring) > maxExcLoc:
2469  maxException = ParseException(instring,len(instring),e.errmsg,self)
2470  maxExcLoc = len(instring)
2471 
2472  # only got here if no expression matched, raise exception for match that made it the furthest
2473  else:
2474  if maxException is not None:
2475  raise maxException
2476  else:
2477  raise ParseException(instring, loc, "no defined alternatives to match", self)
2478 
2479  def __ior__(self, other ):
2480  if isinstance( other, basestring ):
2481  other = Literal( other )
2482  return self.append( other ) #MatchFirst( [ self, other ] )
2483 
2484  def __str__( self ):
2485  if hasattr(self,"name"):
2486  return self.name
2487 
2488  if self.strRepr is None:
2489  self.strRepr = "{" + " | ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2490 
2491  return self.strRepr
2492 
2493  def checkRecursion( self, parseElementList ):
2494  subRecCheckList = parseElementList[:] + [ self ]
2495  for e in self.exprs:
2496  e.checkRecursion( subRecCheckList )
2497 
2498 
2500  """Requires all given ParseExpressions to be found, but in any order.
2501  Expressions may be separated by whitespace.
2502  May be constructed using the '&' operator.
2503  """
2504  def __init__( self, exprs, savelist = True ):
2505  super(Each,self).__init__(exprs, savelist)
2506  self.mayReturnEmpty = True
2507  for e in self.exprs:
2508  if not e.mayReturnEmpty:
2509  self.mayReturnEmpty = False
2510  break
2511  self.skipWhitespace = True
2512  self.initExprGroups = True
2513 
2514  def parseImpl( self, instring, loc, doActions=True ):
2515  if self.initExprGroups:
2516  self.optionals = [ e.expr for e in self.exprs if isinstance(e,Optional) ]
2517  self.multioptionals = [ e.expr for e in self.exprs if isinstance(e,ZeroOrMore) ]
2518  self.multirequired = [ e.expr for e in self.exprs if isinstance(e,OneOrMore) ]
2519  self.required = [ e for e in self.exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2520  self.required += self.multirequired
2521  self.initExprGroups = False
2522  tmpLoc = loc
2523  tmpReqd = self.required[:]
2524  tmpOpt = self.optionals[:]
2525  matchOrder = []
2526 
2527  keepMatching = True
2528  while keepMatching:
2529  tmpExprs = tmpReqd + tmpOpt + self.multioptionals + self.multirequired
2530  failed = []
2531  for e in tmpExprs:
2532  try:
2533  tmpLoc = e.tryParse( instring, tmpLoc )
2534  except ParseException:
2535  failed.append(e)
2536  else:
2537  matchOrder.append(e)
2538  if e in tmpReqd:
2539  tmpReqd.remove(e)
2540  elif e in tmpOpt:
2541  tmpOpt.remove(e)
2542  if len(failed) == len(tmpExprs):
2543  keepMatching = False
2544 
2545  if tmpReqd:
2546  missing = ", ".join( [ _ustr(e) for e in tmpReqd ] )
2547  raise ParseException(instring,loc,"Missing one or more required elements (%s)" % missing )
2548 
2549  # add any unmatched Optionals, in case they have default values defined
2550  matchOrder += list(e for e in self.exprs if isinstance(e,Optional) and e.expr in tmpOpt)
2551 
2552  resultlist = []
2553  for e in matchOrder:
2554  loc,results = e._parse(instring,loc,doActions)
2555  resultlist.append(results)
2556 
2557  finalResults = ParseResults([])
2558  for r in resultlist:
2559  dups = {}
2560  for k in r.keys():
2561  if k in finalResults.keys():
2562  tmp = ParseResults(finalResults[k])
2563  tmp += ParseResults(r[k])
2564  dups[k] = tmp
2565  finalResults += ParseResults(r)
2566  for k,v in dups.items():
2567  finalResults[k] = v
2568  return loc, finalResults
2569 
2570  def __str__( self ):
2571  if hasattr(self,"name"):
2572  return self.name
2573 
2574  if self.strRepr is None:
2575  self.strRepr = "{" + " & ".join( [ _ustr(e) for e in self.exprs ] ) + "}"
2576 
2577  return self.strRepr
2578 
2579  def checkRecursion( self, parseElementList ):
2580  subRecCheckList = parseElementList[:] + [ self ]
2581  for e in self.exprs:
2582  e.checkRecursion( subRecCheckList )
2583 
2584 
2586  """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2587  def __init__( self, expr, savelist=False ):
2588  super(ParseElementEnhance,self).__init__(savelist)
2589  if isinstance( expr, basestring ):
2590  expr = Literal(expr)
2591  self.expr = expr
2592  self.strRepr = None
2593  if expr is not None:
2594  self.mayIndexError = expr.mayIndexError
2595  self.mayReturnEmpty = expr.mayReturnEmpty
2596  self.setWhitespaceChars( expr.whiteChars )
2597  self.skipWhitespace = expr.skipWhitespace
2598  self.saveAsList = expr.saveAsList
2599  self.callPreparse = expr.callPreparse
2600  self.ignoreExprs.extend(expr.ignoreExprs)
2601 
2602  def parseImpl( self, instring, loc, doActions=True ):
2603  if self.expr is not None:
2604  return self.expr._parse( instring, loc, doActions, callPreParse=False )
2605  else:
2606  raise ParseException("",loc,self.errmsg,self)
2607 
2608  def leaveWhitespace( self ):
2609  self.skipWhitespace = False
2610  self.expr = self.expr.copy()
2611  if self.expr is not None:
2612  self.expr.leaveWhitespace()
2613  return self
2614 
2615  def ignore( self, other ):
2616  if isinstance( other, Suppress ):
2617  if other not in self.ignoreExprs:
2618  super( ParseElementEnhance, self).ignore( other )
2619  if self.expr is not None:
2620  self.expr.ignore( self.ignoreExprs[-1] )
2621  else:
2622  super( ParseElementEnhance, self).ignore( other )
2623  if self.expr is not None:
2624  self.expr.ignore( self.ignoreExprs[-1] )
2625  return self
2626 
2627  def streamline( self ):
2628  super(ParseElementEnhance,self).streamline()
2629  if self.expr is not None:
2630  self.expr.streamline()
2631  return self
2632 
2633  def checkRecursion( self, parseElementList ):
2634  if self in parseElementList:
2635  raise RecursiveGrammarException( parseElementList+[self] )
2636  subRecCheckList = parseElementList[:] + [ self ]
2637  if self.expr is not None:
2638  self.expr.checkRecursion( subRecCheckList )
2639 
2640  def validate( self, validateTrace=[] ):
2641  tmp = validateTrace[:]+[self]
2642  if self.expr is not None:
2643  self.expr.validate(tmp)
2644  self.checkRecursion( [] )
2645 
2646  def __str__( self ):
2647  try:
2648  return super(ParseElementEnhance,self).__str__()
2649  except:
2650  pass
2651 
2652  if self.strRepr is None and self.expr is not None:
2653  self.strRepr = "%s:(%s)" % ( self.__class__.__name__, _ustr(self.expr) )
2654  return self.strRepr
2655 
2656 
2658  """Lookahead matching of the given parse expression. FollowedBy
2659  does *not* advance the parsing position within the input string, it only
2660  verifies that the specified parse expression matches at the current
2661  position. FollowedBy always returns a null token list."""
2662  def __init__( self, expr ):
2663  super(FollowedBy,self).__init__(expr)
2664  self.mayReturnEmpty = True
2665 
2666  def parseImpl( self, instring, loc, doActions=True ):
2667  self.expr.tryParse( instring, loc )
2668  return loc, []
2669 
2670 
2672  """Lookahead to disallow matching with the given parse expression. NotAny
2673  does *not* advance the parsing position within the input string, it only
2674  verifies that the specified parse expression does *not* match at the current
2675  position. Also, NotAny does *not* skip over leading whitespace. NotAny
2676  always returns a null token list. May be constructed using the '~' operator."""
2677  def __init__( self, expr ):
2678  super(NotAny,self).__init__(expr)
2679  #~ self.leaveWhitespace()
2680  self.skipWhitespace = False # do NOT use self.leaveWhitespace(), don't want to propagate to exprs
2681  self.mayReturnEmpty = True
2682  self.errmsg = "Found unwanted token, "+_ustr(self.expr)
2683  #self.myException = ParseException("",0,self.errmsg,self)
2684 
2685  def parseImpl( self, instring, loc, doActions=True ):
2686  try:
2687  self.expr.tryParse( instring, loc )
2688  except (ParseException,IndexError):
2689  pass
2690  else:
2691  #~ raise ParseException(instring, loc, self.errmsg )
2692  exc = self.myException
2693  exc.loc = loc
2694  exc.pstr = instring
2695  raise exc
2696  return loc, []
2697 
2698  def __str__( self ):
2699  if hasattr(self,"name"):
2700  return self.name
2701 
2702  if self.strRepr is None:
2703  self.strRepr = "~{" + _ustr(self.expr) + "}"
2704 
2705  return self.strRepr
2706 
2707 
2709  """Optional repetition of zero or more of the given expression."""
2710  def __init__( self, expr ):
2711  super(ZeroOrMore,self).__init__(expr)
2712  self.mayReturnEmpty = True
2713 
2714  def parseImpl( self, instring, loc, doActions=True ):
2715  tokens = []
2716  try:
2717  loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2718  hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2719  while 1:
2720  if hasIgnoreExprs:
2721  preloc = self._skipIgnorables( instring, loc )
2722  else:
2723  preloc = loc
2724  loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2725  if tmptokens or tmptokens.keys():
2726  tokens += tmptokens
2727  except (ParseException,IndexError):
2728  pass
2729 
2730  return loc, tokens
2731 
2732  def __str__( self ):
2733  if hasattr(self,"name"):
2734  return self.name
2735 
2736  if self.strRepr is None:
2737  self.strRepr = "[" + _ustr(self.expr) + "]..."
2738 
2739  return self.strRepr
2740 
2741  def setResultsName( self, name, listAllMatches=False ):
2742  ret = super(ZeroOrMore,self).setResultsName(name,listAllMatches)
2743  ret.saveAsList = True
2744  return ret
2745 
2746 
2748  """Repetition of one or more of the given expression."""
2749  def parseImpl( self, instring, loc, doActions=True ):
2750  # must be at least one
2751  loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2752  try:
2753  hasIgnoreExprs = ( len(self.ignoreExprs) > 0 )
2754  while 1:
2755  if hasIgnoreExprs:
2756  preloc = self._skipIgnorables( instring, loc )
2757  else:
2758  preloc = loc
2759  loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2760  if tmptokens or tmptokens.keys():
2761  tokens += tmptokens
2762  except (ParseException,IndexError):
2763  pass
2764 
2765  return loc, tokens
2766 
2767  def __str__( self ):
2768  if hasattr(self,"name"):
2769  return self.name
2770 
2771  if self.strRepr is None:
2772  self.strRepr = "{" + _ustr(self.expr) + "}..."
2773 
2774  return self.strRepr
2775 
2776  def setResultsName( self, name, listAllMatches=False ):
2777  ret = super(OneOrMore,self).setResultsName(name,listAllMatches)
2778  ret.saveAsList = True
2779  return ret
2780 
2781 class _NullToken(object):
2782  def __bool__(self):
2783  return False
2784  __nonzero__ = __bool__
2785  def __str__(self):
2786  return ""
2787 
2788 _optionalNotMatched = _NullToken()
2790  """Optional matching of the given expression.
2791  A default return string can also be specified, if the optional expression
2792  is not found.
2793  """
2794  def __init__( self, exprs, default=_optionalNotMatched ):
2795  super(Optional,self).__init__( exprs, savelist=False )
2796  self.defaultValue = default
2797  self.mayReturnEmpty = True
2798 
2799  def parseImpl( self, instring, loc, doActions=True ):
2800  try:
2801  loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=False )
2802  except (ParseException,IndexError):
2803  if self.defaultValue is not _optionalNotMatched:
2804  if self.expr.resultsName:
2805  tokens = ParseResults([ self.defaultValue ])
2806  tokens[self.expr.resultsName] = self.defaultValue
2807  else:
2808  tokens = [ self.defaultValue ]
2809  else:
2810  tokens = []
2811  return loc, tokens
2812 
2813  def __str__( self ):
2814  if hasattr(self,"name"):
2815  return self.name
2816 
2817  if self.strRepr is None:
2818  self.strRepr = "[" + _ustr(self.expr) + "]"
2819 
2820  return self.strRepr
2821 
2822 
2824  """Token for skipping over all undefined text until the matched expression is found.
2825  If include is set to true, the matched expression is also parsed (the skipped text
2826  and matched expression are returned as a 2-element list). The ignore
2827  argument is used to define grammars (typically quoted strings and comments) that
2828  might contain false matches.
2829  """
2830  def __init__( self, other, include=False, ignore=None, failOn=None ):
2831  super( SkipTo, self ).__init__( other )
2832  self.ignoreExpr = ignore
2833  self.mayReturnEmpty = True
2834  self.mayIndexError = False
2835  self.includeMatch = include
2836  self.asList = False
2837  if failOn is not None and isinstance(failOn, basestring):
2838  self.failOn = Literal(failOn)
2839  else:
2840  self.failOn = failOn
2841  self.errmsg = "No match found for "+_ustr(self.expr)
2842  #self.myException = ParseException("",0,self.errmsg,self)
2843 
2844  def parseImpl( self, instring, loc, doActions=True ):
2845  startLoc = loc
2846  instrlen = len(instring)
2847  expr = self.expr
2848  failParse = False
2849  while loc <= instrlen:
2850  try:
2851  if self.failOn:
2852  try:
2853  self.failOn.tryParse(instring, loc)
2854  except ParseBaseException:
2855  pass
2856  else:
2857  failParse = True
2858  raise ParseException(instring, loc, "Found expression " + str(self.failOn))
2859  failParse = False
2860  if self.ignoreExpr is not None:
2861  while 1:
2862  try:
2863  loc = self.ignoreExpr.tryParse(instring,loc)
2864  print "found ignoreExpr, advance to", loc
2865  except ParseBaseException:
2866  break
2867  expr._parse( instring, loc, doActions=False, callPreParse=False )
2868  skipText = instring[startLoc:loc]
2869  if self.includeMatch:
2870  loc,mat = expr._parse(instring,loc,doActions,callPreParse=False)
2871  if mat:
2872  skipRes = ParseResults( skipText )
2873  skipRes += mat
2874  return loc, [ skipRes ]
2875  else:
2876  return loc, [ skipText ]
2877  else:
2878  return loc, [ skipText ]
2879  except (ParseException,IndexError):
2880  if failParse:
2881  raise
2882  else:
2883  loc += 1
2884  exc = self.myException
2885  exc.loc = loc
2886  exc.pstr = instring
2887  raise exc
2888 
2890  """Forward declaration of an expression to be defined later -
2891  used for recursive grammars, such as algebraic infix notation.
2892  When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2893 
2894  Note: take care when assigning to Forward not to overlook precedence of operators.
2895  Specifically, '|' has a lower precedence than '<<', so that::
2896  fwdExpr << a | b | c
2897  will actually be evaluated as::
2898  (fwdExpr << a) | b | c
2899  thereby leaving b and c out as parseable alternatives. It is recommended that you
2900  explicitly group the values inserted into the Forward::
2901  fwdExpr << (a | b | c)
2902  """
2903  def __init__( self, other=None ):
2904  super(Forward,self).__init__( other, savelist=False )
2905 
2906  def __lshift__( self, other ):
2907  if isinstance( other, basestring ):
2908  other = Literal(other)
2909  self.expr = other
2910  self.mayReturnEmpty = other.mayReturnEmpty
2911  self.strRepr = None
2912  self.mayIndexError = self.expr.mayIndexError
2913  self.mayReturnEmpty = self.expr.mayReturnEmpty
2914  self.setWhitespaceChars( self.expr.whiteChars )
2915  self.skipWhitespace = self.expr.skipWhitespace
2916  self.saveAsList = self.expr.saveAsList
2917  self.ignoreExprs.extend(self.expr.ignoreExprs)
2918  return None
2919 
2920  def leaveWhitespace( self ):
2921  self.skipWhitespace = False
2922  return self
2923 
2924  def streamline( self ):
2925  if not self.streamlined:
2926  self.streamlined = True
2927  if self.expr is not None:
2928  self.expr.streamline()
2929  return self
2930 
2931  def validate( self, validateTrace=[] ):
2932  if self not in validateTrace:
2933  tmp = validateTrace[:]+[self]
2934  if self.expr is not None:
2935  self.expr.validate(tmp)
2936  self.checkRecursion([])
2937 
2938  def __str__( self ):
2939  if hasattr(self,"name"):
2940  return self.name
2941 
2943  self.__class__ = _ForwardNoRecurse
2944  try:
2945  if self.expr is not None:
2946  retString = _ustr(self.expr)
2947  else:
2948  retString = "None"
2949  finally:
2950  self.__class__ = self._revertClass
2951  return self.__class__.__name__ + ": " + retString
2952 
2953  def copy(self):
2954  if self.expr is not None:
2955  return super(Forward,self).copy()
2956  else:
2957  ret = Forward()
2958  ret << self
2959  return ret
2960 
2962  def __str__( self ):
2963  return "..."
2964 
2966  """Abstract subclass of ParseExpression, for converting parsed results."""
2967  def __init__( self, expr, savelist=False ):
2968  super(TokenConverter,self).__init__( expr )#, savelist )
2969  self.saveAsList = False
2970 
2972  """Converter to upper case all matching tokens."""
2973  def __init__(self, *args):
2974  super(Upcase,self).__init__(*args)
2975  warnings.warn("Upcase class is deprecated, use upcaseTokens parse action instead",
2976  DeprecationWarning,stacklevel=2)
2977 
2978  def postParse( self, instring, loc, tokenlist ):
2979  return list(map( string.upper, tokenlist ))
2980 
2981 
2983  """Converter to concatenate all matching tokens to a single string.
2984  By default, the matching patterns must also be contiguous in the input string;
2985  this can be disabled by specifying 'adjacent=False' in the constructor.
2986  """
2987  def __init__( self, expr, joinString="", adjacent=True ):
2988  super(Combine,self).__init__( expr )
2989  # suppress whitespace-stripping in contained parse expressions, but re-enable it on the Combine itself
2990  if adjacent:
2991  self.leaveWhitespace()
2992  self.adjacent = adjacent
2993  self.skipWhitespace = True
2994  self.joinString = joinString
2995 
2996  def ignore( self, other ):
2997  if self.adjacent:
2998  ParserElement.ignore(self, other)
2999  else:
3000  super( Combine, self).ignore( other )
3001  return self
3002 
3003  def postParse( self, instring, loc, tokenlist ):
3004  retToks = tokenlist.copy()
3005  del retToks[:]
3006  retToks += ParseResults([ "".join(tokenlist._asStringList(self.joinString)) ], modal=self.modalResults)
3007 
3008  if self.resultsName and len(retToks.keys())>0:
3009  return [ retToks ]
3010  else:
3011  return retToks
3012 
3014  """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3015  def __init__( self, expr ):
3016  super(Group,self).__init__( expr )
3017  self.saveAsList = True
3018 
3019  def postParse( self, instring, loc, tokenlist ):
3020  return [ tokenlist ]
3021 
3023  """Converter to return a repetitive expression as a list, but also as a dictionary.
3024  Each element can also be referenced using the first token in the expression as its key.
3025  Useful for tabular report scraping when the first column can be used as a item key.
3026  """
3027  def __init__( self, exprs ):
3028  super(Dict,self).__init__( exprs )
3029  self.saveAsList = True
3030 
3031  def postParse( self, instring, loc, tokenlist ):
3032  for i,tok in enumerate(tokenlist):
3033  if len(tok) == 0:
3034  continue
3035  ikey = tok[0]
3036  if isinstance(ikey,int):
3037  ikey = _ustr(tok[0]).strip()
3038  if len(tok)==1:
3039  tokenlist[ikey] = _ParseResultsWithOffset("",i)
3040  elif len(tok)==2 and not isinstance(tok[1],ParseResults):
3041  tokenlist[ikey] = _ParseResultsWithOffset(tok[1],i)
3042  else:
3043  dictvalue = tok.copy() #ParseResults(i)
3044  del dictvalue[0]
3045  if len(dictvalue)!= 1 or (isinstance(dictvalue,ParseResults) and dictvalue.keys()):
3046  tokenlist[ikey] = _ParseResultsWithOffset(dictvalue,i)
3047  else:
3048  tokenlist[ikey] = _ParseResultsWithOffset(dictvalue[0],i)
3049 
3050  if self.resultsName:
3051  return [ tokenlist ]
3052  else:
3053  return tokenlist
3054 
3055 
3057  """Converter for ignoring the results of a parsed expression."""
3058  def postParse( self, instring, loc, tokenlist ):
3059  return []
3060 
3061  def suppress( self ):
3062  return self
3063 
3064 
3065 class OnlyOnce(object):
3066  """Wrapper for parse actions, to ensure they are only called once."""
3067  def __init__(self, methodCall):
3068  self.callable = ParserElement._normalizeParseActionArgs(methodCall)
3069  self.called = False
3070  def __call__(self,s,l,t):
3071  if not self.called:
3072  results = self.callable(s,l,t)
3073  self.called = True
3074  return results
3075  raise ParseException(s,l,"")
3076  def reset(self):
3077  self.called = False
3078 
3080  """Decorator for debugging parse actions."""
3081  f = ParserElement._normalizeParseActionArgs(f)
3082  def z(*paArgs):
3083  thisFunc = f.func_name
3084  s,l,t = paArgs[-3:]
3085  if len(paArgs)>3:
3086  thisFunc = paArgs[0].__class__.__name__ + '.' + thisFunc
3087  sys.stderr.write( ">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,line(l,s),l,t) )
3088  try:
3089  ret = f(*paArgs)
3090  except Exception, exc:
3091  sys.stderr.write( "<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3092  raise
3093  sys.stderr.write( "<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3094  return ret
3095  try:
3096  z.__name__ = f.__name__
3097  except AttributeError:
3098  pass
3099  return z
3100 
3101 #
3102 # global helpers
3103 #
3104 def delimitedList( expr, delim=",", combine=False ):
3105  """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3106  By default, the list elements and delimiters can have intervening whitespace, and
3107  comments, but this can be overridden by passing 'combine=True' in the constructor.
3108  If combine is set to True, the matching tokens are returned as a single token
3109  string, with the delimiters included; otherwise, the matching tokens are returned
3110  as a list of tokens, with the delimiters suppressed.
3111  """
3112  dlName = _ustr(expr)+" ["+_ustr(delim)+" "+_ustr(expr)+"]..."
3113  if combine:
3114  return Combine( expr + ZeroOrMore( delim + expr ) ).setName(dlName)
3115  else:
3116  return ( expr + ZeroOrMore( Suppress( delim ) + expr ) ).setName(dlName)
3117 
3118 def countedArray( expr ):
3119  """Helper to define a counted list of expressions.
3120  This helper defines a pattern of the form::
3121  integer expr expr expr...
3122  where the leading integer tells how many expr expressions follow.
3123  The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3124  """
3125  arrayExpr = Forward()
3126  def countFieldParseAction(s,l,t):
3127  n = int(t[0])
3128  arrayExpr << (n and Group(And([expr]*n)) or Group(empty))
3129  return []
3130  return ( Word(nums).setName("arrayLen").setParseAction(countFieldParseAction, callDuringTry=True) + arrayExpr )
3131 
3132 def _flatten(L):
3133  if type(L) is not list: return [L]
3134  if L == []: return L
3135  return _flatten(L[0]) + _flatten(L[1:])
3136 
3138  """Helper to define an expression that is indirectly defined from
3139  the tokens matched in a previous expression, that is, it looks
3140  for a 'repeat' of a previous expression. For example::
3141  first = Word(nums)
3142  second = matchPreviousLiteral(first)
3143  matchExpr = first + ":" + second
3144  will match "1:1", but not "1:2". Because this matches a
3145  previous literal, will also match the leading "1:1" in "1:10".
3146  If this is not desired, use matchPreviousExpr.
3147  Do *not* use with packrat parsing enabled.
3148  """
3149  rep = Forward()
3150  def copyTokenToRepeater(s,l,t):
3151  if t:
3152  if len(t) == 1:
3153  rep << t[0]
3154  else:
3155  # flatten t tokens
3156  tflat = _flatten(t.asList())
3157  rep << And( [ Literal(tt) for tt in tflat ] )
3158  else:
3159  rep << Empty()
3160  expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3161  return rep
3162 
3164  """Helper to define an expression that is indirectly defined from
3165  the tokens matched in a previous expression, that is, it looks
3166  for a 'repeat' of a previous expression. For example::
3167  first = Word(nums)
3168  second = matchPreviousExpr(first)
3169  matchExpr = first + ":" + second
3170  will match "1:1", but not "1:2". Because this matches by
3171  expressions, will *not* match the leading "1:1" in "1:10";
3172  the expressions are evaluated first, and then compared, so
3173  "1" is compared with "10".
3174  Do *not* use with packrat parsing enabled.
3175  """
3176  rep = Forward()
3177  e2 = expr.copy()
3178  rep << e2
3179  def copyTokenToRepeater(s,l,t):
3180  matchTokens = _flatten(t.asList())
3181  def mustMatchTheseTokens(s,l,t):
3182  theseTokens = _flatten(t.asList())
3183  if theseTokens != matchTokens:
3184  raise ParseException("",0,"")
3185  rep.setParseAction( mustMatchTheseTokens, callDuringTry=True )
3186  expr.addParseAction(copyTokenToRepeater, callDuringTry=True)
3187  return rep
3188 
3190  #~ escape these chars: ^-]
3191  for c in r"\^-]":
3192  s = s.replace(c,_bslash+c)
3193  s = s.replace("\n",r"\n")
3194  s = s.replace("\t",r"\t")
3195  return _ustr(s)
3196 
3197 def oneOf( strs, caseless=False, useRegex=True ):
3198  """Helper to quickly define a set of alternative Literals, and makes sure to do
3199  longest-first testing when there is a conflict, regardless of the input order,
3200  but returns a MatchFirst for best performance.
3201 
3202  Parameters:
3203  - strs - a string of space-delimited literals, or a list of string literals
3204  - caseless - (default=False) - treat all literals as caseless
3205  - useRegex - (default=True) - as an optimization, will generate a Regex
3206  object; otherwise, will generate a MatchFirst object (if caseless=True, or
3207  if creating a Regex raises an exception)
3208  """
3209  if caseless:
3210  isequal = ( lambda a,b: a.upper() == b.upper() )
3211  masks = ( lambda a,b: b.upper().startswith(a.upper()) )
3212  parseElementClass = CaselessLiteral
3213  else:
3214  isequal = ( lambda a,b: a == b )
3215  masks = ( lambda a,b: b.startswith(a) )
3216  parseElementClass = Literal
3217 
3218  if isinstance(strs,(list,tuple)):
3219  symbols = list(strs[:])
3220  elif isinstance(strs,basestring):
3221  symbols = strs.split()
3222  else:
3223  warnings.warn("Invalid argument to oneOf, expected string or list",
3224  SyntaxWarning, stacklevel=2)
3225 
3226  i = 0
3227  while i < len(symbols)-1:
3228  cur = symbols[i]
3229  for j,other in enumerate(symbols[i+1:]):
3230  if ( isequal(other, cur) ):
3231  del symbols[i+j+1]
3232  break
3233  elif ( masks(cur, other) ):
3234  del symbols[i+j+1]
3235  symbols.insert(i,other)
3236  cur = other
3237  break
3238  else:
3239  i += 1
3240 
3241  if not caseless and useRegex:
3242  #~ print (strs,"->", "|".join( [ _escapeRegexChars(sym) for sym in symbols] ))
3243  try:
3244  if len(symbols)==len("".join(symbols)):
3245  return Regex( "[%s]" % "".join( [ _escapeRegexRangeChars(sym) for sym in symbols] ) )
3246  else:
3247  return Regex( "|".join( [ re.escape(sym) for sym in symbols] ) )
3248  except:
3249  warnings.warn("Exception creating Regex for oneOf, building MatchFirst",
3250  SyntaxWarning, stacklevel=2)
3251 
3252 
3253  # last resort, just use MatchFirst
3254  return MatchFirst( [ parseElementClass(sym) for sym in symbols ] )
3255 
3256 def dictOf( key, value ):
3257  """Helper to easily and clearly define a dictionary by specifying the respective patterns
3258  for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3259  in the proper order. The key pattern can include delimiting markers or punctuation,
3260  as long as they are suppressed, thereby leaving the significant key text. The value
3261  pattern can include named results, so that the Dict results can include named token
3262  fields.
3263  """
3264  return Dict( ZeroOrMore( Group ( key + value ) ) )
3265 
3266 def originalTextFor(expr, asString=True):
3267  """Helper to return the original, untokenized text for a given expression. Useful to
3268  restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3269  revert separate tokens with intervening whitespace back to the original matching
3270  input text. Simpler to use than the parse action keepOriginalText, and does not
3271  require the inspect module to chase up the call stack. By default, returns a
3272  string containing the original parsed text.
3273 
3274  If the optional asString argument is passed as False, then the return value is a
3275  ParseResults containing any results names that were originally matched, and a
3276  single token containing the original matched text from the input string. So if
3277  the expression passed to originalTextFor contains expressions with defined
3278  results names, you must set asString to False if you want to preserve those
3279  results name values."""
3280  locMarker = Empty().setParseAction(lambda s,loc,t: loc)
3281  matchExpr = locMarker("_original_start") + expr + locMarker("_original_end")
3282  if asString:
3283  extractText = lambda s,l,t: s[t._original_start:t._original_end]
3284  else:
3285  def extractText(s,l,t):
3286  del t[:]
3287  t.insert(0, s[t._original_start:t._original_end])
3288  del t["_original_start"]
3289  del t["_original_end"]
3290  matchExpr.setParseAction(extractText)
3291  return matchExpr
3292 
3293 # convenience constants for positional expressions
3294 empty = Empty().setName("empty")
3295 lineStart = LineStart().setName("lineStart")
3296 lineEnd = LineEnd().setName("lineEnd")
3297 stringStart = StringStart().setName("stringStart")
3298 stringEnd = StringEnd().setName("stringEnd")
3299 
3300 _escapedPunc = Word( _bslash, r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(lambda s,l,t:t[0][1])
3301 _printables_less_backslash = "".join([ c for c in printables if c not in r"\]" ])
3302 _escapedHexChar = Combine( Suppress(_bslash + "0x") + Word(hexnums) ).setParseAction(lambda s,l,t:unichr(int(t[0],16)))
3303 _escapedOctChar = Combine( Suppress(_bslash) + Word("0","01234567") ).setParseAction(lambda s,l,t:unichr(int(t[0],8)))
3304 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar | Word(_printables_less_backslash,exact=1)
3305 _charRange = Group(_singleChar + Suppress("-") + _singleChar)
3306 _reBracketExpr = Literal("[") + Optional("^").setResultsName("negate") + Group( OneOrMore( _charRange | _singleChar ) ).setResultsName("body") + "]"
3307 
3308 _expanded = lambda p: (isinstance(p,ParseResults) and ''.join([ unichr(c) for c in range(ord(p[0]),ord(p[1])+1) ]) or p)
3309 
3310 def srange(s):
3311  r"""Helper to easily define string ranges for use in Word construction. Borrows
3312  syntax from regexp '[]' string range definitions::
3313  srange("[0-9]") -> "0123456789"
3314  srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3315  srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3316  The input string must be enclosed in []'s, and the returned string is the expanded
3317  character set joined into a single string.
3318  The values enclosed in the []'s may be::
3319  a single character
3320  an escaped character with a leading backslash (such as \- or \])
3321  an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3322  an escaped octal character with a leading '\0' (\041, which is a '!' character)
3323  a range of any of the above, separated by a dash ('a-z', etc.)
3324  any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3325  """
3326  try:
3327  return "".join([_expanded(part) for part in _reBracketExpr.parseString(s).body])
3328  except:
3329  return ""
3330 
3332  """Helper method for defining parse actions that require matching at a specific
3333  column in the input text.
3334  """
3335  def verifyCol(strg,locn,toks):
3336  if col(locn,strg) != n:
3337  raise ParseException(strg,locn,"matched token not at column %d" % n)
3338  return verifyCol
3339 
3340 def replaceWith(replStr):
3341  """Helper method for common parse actions that simply return a literal value. Especially
3342  useful when used with transformString().
3343  """
3344  def _replFunc(*args):
3345  return [replStr]
3346  return _replFunc
3347 
3348 def removeQuotes(s,l,t):
3349  """Helper parse action for removing quotation marks from parsed quoted strings.
3350  To use, add this parse action to quoted string using::
3351  quotedString.setParseAction( removeQuotes )
3352  """
3353  return t[0][1:-1]
3354 
3355 def upcaseTokens(s,l,t):
3356  """Helper parse action to convert tokens to upper case."""
3357  return [ tt.upper() for tt in map(_ustr,t) ]
3358 
3359 def downcaseTokens(s,l,t):
3360  """Helper parse action to convert tokens to lower case."""
3361  return [ tt.lower() for tt in map(_ustr,t) ]
3362 
3363 def keepOriginalText(s,startLoc,t):
3364  """Helper parse action to preserve original parsed text,
3365  overriding any nested parse actions."""
3366  try:
3367  endloc = getTokensEndLoc()
3368  except ParseException:
3369  raise ParseFatalException("incorrect usage of keepOriginalText - may only be called as a parse action")
3370  del t[:]
3371  t += ParseResults(s[startLoc:endloc])
3372  return t
3373 
3375  """Method to be called from within a parse action to determine the end
3376  location of the parsed tokens."""
3377  import inspect
3378  fstack = inspect.stack()
3379  try:
3380  # search up the stack (through intervening argument normalizers) for correct calling routine
3381  for f in fstack[2:]:
3382  if f[3] == "_parseNoCache":
3383  endloc = f[0].f_locals["loc"]
3384  return endloc
3385  else:
3386  raise ParseFatalException("incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3387  finally:
3388  del fstack
3389 
3390 def _makeTags(tagStr, xml):
3391  """Internal helper to construct opening and closing tag expressions, given a tag name"""
3392  if isinstance(tagStr,basestring):
3393  resname = tagStr
3394  tagStr = Keyword(tagStr, caseless=not xml)
3395  else:
3396  resname = tagStr.name
3397 
3398  tagAttrName = Word(alphas,alphanums+"_-:")
3399  if (xml):
3400  tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3401  openTag = Suppress("<") + tagStr + \
3402  Dict(ZeroOrMore(Group( tagAttrName + Suppress("=") + tagAttrValue ))) + \
3403  Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3404  else:
3405  printablesLessRAbrack = "".join( [ c for c in printables if c not in ">" ] )
3406  tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) | Word(printablesLessRAbrack)
3407  openTag = Suppress("<") + tagStr + \
3408  Dict(ZeroOrMore(Group( tagAttrName.setParseAction(downcaseTokens) + \
3409  Optional( Suppress("=") + tagAttrValue ) ))) + \
3410  Optional("/",default=[False]).setResultsName("empty").setParseAction(lambda s,l,t:t[0]=='/') + Suppress(">")
3411  closeTag = Combine(_L("</") + tagStr + ">")
3412 
3413  openTag = openTag.setResultsName("start"+"".join(resname.replace(":"," ").title().split())).setName("<%s>" % tagStr)
3414  closeTag = closeTag.setResultsName("end"+"".join(resname.replace(":"," ").title().split())).setName("</%s>" % tagStr)
3415 
3416  return openTag, closeTag
3417 
3418 def makeHTMLTags(tagStr):
3419  """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3420  return _makeTags( tagStr, False )
3421 
3422 def makeXMLTags(tagStr):
3423  """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3424  return _makeTags( tagStr, True )
3425 
3426 def withAttribute(*args,**attrDict):
3427  """Helper to create a validating parse action to be used with start tags created
3428  with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3429  with a required attribute value, to avoid false matches on common tags such as
3430  <TD> or <DIV>.
3431 
3432  Call withAttribute with a series of attribute names and values. Specify the list
3433  of filter attributes names and values as:
3434  - keyword arguments, as in (class="Customer",align="right"), or
3435  - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3436  For attribute names with a namespace prefix, you must use the second form. Attribute
3437  names are matched insensitive to upper/lower case.
3438 
3439  To verify that the attribute exists, but without specifying a value, pass
3440  withAttribute.ANY_VALUE as the value.
3441  """
3442  if args:
3443  attrs = args[:]
3444  else:
3445  attrs = attrDict.items()
3446  attrs = [(k,v) for k,v in attrs]
3447  def pa(s,l,tokens):
3448  for attrName,attrValue in attrs:
3449  if attrName not in tokens:
3450  raise ParseException(s,l,"no matching attribute " + attrName)
3451  if attrValue != withAttribute.ANY_VALUE and tokens[attrName] != attrValue:
3452  raise ParseException(s,l,"attribute '%s' has value '%s', must be '%s'" %
3453  (attrName, tokens[attrName], attrValue))
3454  return pa
3455 withAttribute.ANY_VALUE = object()
3456 
3457 opAssoc = _Constants()
3458 opAssoc.LEFT = object()
3459 opAssoc.RIGHT = object()
3460 
3461 def operatorPrecedence( baseExpr, opList ):
3462  """Helper method for constructing grammars of expressions made up of
3463  operators working in a precedence hierarchy. Operators may be unary or
3464  binary, left- or right-associative. Parse actions can also be attached
3465  to operator expressions.
3466 
3467  Parameters:
3468  - baseExpr - expression representing the most basic element for the nested
3469  - opList - list of tuples, one for each operator precedence level in the
3470  expression grammar; each tuple is of the form
3471  (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3472  - opExpr is the pyparsing expression for the operator;
3473  may also be a string, which will be converted to a Literal;
3474  if numTerms is 3, opExpr is a tuple of two expressions, for the
3475  two operators separating the 3 terms
3476  - numTerms is the number of terms for this operator (must
3477  be 1, 2, or 3)
3478  - rightLeftAssoc is the indicator whether the operator is
3479  right or left associative, using the pyparsing-defined
3480  constants opAssoc.RIGHT and opAssoc.LEFT.
3481  - parseAction is the parse action to be associated with
3482  expressions matching this operator expression (the
3483  parse action tuple member may be omitted)
3484  """
3485  ret = Forward()
3486  lastExpr = baseExpr | ( Suppress('(') + ret + Suppress(')') )
3487  for i,operDef in enumerate(opList):
3488  opExpr,arity,rightLeftAssoc,pa = (operDef + (None,))[:4]
3489  if arity == 3:
3490  if opExpr is None or len(opExpr) != 2:
3491  raise ValueError("if numterms=3, opExpr must be a tuple or list of two expressions")
3492  opExpr1, opExpr2 = opExpr
3493  thisExpr = Forward()#.setName("expr%d" % i)
3494  if rightLeftAssoc == opAssoc.LEFT:
3495  if arity == 1:
3496  matchExpr = FollowedBy(lastExpr + opExpr) + Group( lastExpr + OneOrMore( opExpr ) )
3497  elif arity == 2:
3498  if opExpr is not None:
3499  matchExpr = FollowedBy(lastExpr + opExpr + lastExpr) + Group( lastExpr + OneOrMore( opExpr + lastExpr ) )
3500  else:
3501  matchExpr = FollowedBy(lastExpr+lastExpr) + Group( lastExpr + OneOrMore(lastExpr) )
3502  elif arity == 3:
3503  matchExpr = FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3504  Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3505  else:
3506  raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3507  elif rightLeftAssoc == opAssoc.RIGHT:
3508  if arity == 1:
3509  # try to avoid LR with this extra test
3510  if not isinstance(opExpr, Optional):
3511  opExpr = Optional(opExpr)
3512  matchExpr = FollowedBy(opExpr.expr + thisExpr) + Group( opExpr + thisExpr )
3513  elif arity == 2:
3514  if opExpr is not None:
3515  matchExpr = FollowedBy(lastExpr + opExpr + thisExpr) + Group( lastExpr + OneOrMore( opExpr + thisExpr ) )
3516  else:
3517  matchExpr = FollowedBy(lastExpr + thisExpr) + Group( lastExpr + OneOrMore( thisExpr ) )
3518  elif arity == 3:
3519  matchExpr = FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3520  Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3521  else:
3522  raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
3523  else:
3524  raise ValueError("operator must indicate right or left associativity")
3525  if pa:
3526  matchExpr.setParseAction( pa )
3527  thisExpr << ( matchExpr | lastExpr )
3528  lastExpr = thisExpr
3529  ret << lastExpr
3530  return ret
3531 
3532 dblQuotedString = Regex(r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName("string enclosed in double quotes")
3533 sglQuotedString = Regex(r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName("string enclosed in single quotes")
3534 quotedString = Regex(r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName("quotedString using single or double quotes")
3535 unicodeString = Combine(_L('u') + quotedString.copy())
3536 
3537 def nestedExpr(opener="(", closer=")", content=None, ignoreExpr=quotedString):
3538  """Helper method for defining nested lists enclosed in opening and closing
3539  delimiters ("(" and ")" are the default).
3540 
3541  Parameters:
3542  - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3543  - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3544  - content - expression for items within the nested lists (default=None)
3545  - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3546 
3547  If an expression is not provided for the content argument, the nested
3548  expression will capture all whitespace-delimited content between delimiters
3549  as a list of separate values.
3550 
3551  Use the ignoreExpr argument to define expressions that may contain
3552  opening or closing characters that should not be treated as opening
3553  or closing characters for nesting, such as quotedString or a comment
3554  expression. Specify multiple expressions using an Or or MatchFirst.
3555  The default is quotedString, but if no expressions are to be ignored,
3556  then pass None for this argument.
3557  """
3558  if opener == closer:
3559  raise ValueError("opening and closing strings cannot be the same")
3560  if content is None:
3561  if isinstance(opener,basestring) and isinstance(closer,basestring):
3562  if len(opener) == 1 and len(closer)==1:
3563  if ignoreExpr is not None:
3564  content = (Combine(OneOrMore(~ignoreExpr +
3565  CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3566  ).setParseAction(lambda t:t[0].strip()))
3567  else:
3568  content = (empty+CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3569  ).setParseAction(lambda t:t[0].strip()))
3570  else:
3571  if ignoreExpr is not None:
3572  content = (Combine(OneOrMore(~ignoreExpr +
3573  ~Literal(opener) + ~Literal(closer) +
3574  CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3575  ).setParseAction(lambda t:t[0].strip()))
3576  else:
3577  content = (Combine(OneOrMore(~Literal(opener) + ~Literal(closer) +
3578  CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3579  ).setParseAction(lambda t:t[0].strip()))
3580  else:
3581  raise ValueError("opening and closing arguments must be strings if no content expression is given")
3582  ret = Forward()
3583  if ignoreExpr is not None:
3584  ret << Group( Suppress(opener) + ZeroOrMore( ignoreExpr | ret | content ) + Suppress(closer) )
3585  else:
3586  ret << Group( Suppress(opener) + ZeroOrMore( ret | content ) + Suppress(closer) )
3587  return ret
3588 
3589 def indentedBlock(blockStatementExpr, indentStack, indent=True):
3590  """Helper method for defining space-delimited indentation blocks, such as
3591  those used to define block statements in Python source code.
3592 
3593  Parameters:
3594  - blockStatementExpr - expression defining syntax of statement that
3595  is repeated within the indented block
3596  - indentStack - list created by caller to manage indentation stack
3597  (multiple statementWithIndentedBlock expressions within a single grammar
3598  should share a common indentStack)
3599  - indent - boolean indicating whether block must be indented beyond the
3600  the current level; set to False for block of left-most statements
3601  (default=True)
3602 
3603  A valid block must contain at least one blockStatement.
3604  """
3605  def checkPeerIndent(s,l,t):
3606  if l >= len(s): return
3607  curCol = col(l,s)
3608  if curCol != indentStack[-1]:
3609  if curCol > indentStack[-1]:
3610  raise ParseFatalException(s,l,"illegal nesting")
3611  raise ParseException(s,l,"not a peer entry")
3612 
3613  def checkSubIndent(s,l,t):
3614  curCol = col(l,s)
3615  if curCol > indentStack[-1]:
3616  indentStack.append( curCol )
3617  else:
3618  raise ParseException(s,l,"not a subentry")
3619 
3620  def checkUnindent(s,l,t):
3621  if l >= len(s): return
3622  curCol = col(l,s)
3623  if not(indentStack and curCol < indentStack[-1] and curCol <= indentStack[-2]):
3624  raise ParseException(s,l,"not an unindent")
3625  indentStack.pop()
3626 
3627  NL = OneOrMore(LineEnd().setWhitespaceChars("\t ").suppress())
3628  INDENT = Empty() + Empty().setParseAction(checkSubIndent)
3629  PEER = Empty().setParseAction(checkPeerIndent)
3630  UNDENT = Empty().setParseAction(checkUnindent)
3631  if indent:
3632  smExpr = Group( Optional(NL) +
3633  FollowedBy(blockStatementExpr) +
3634  INDENT + (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) + UNDENT)
3635  else:
3636  smExpr = Group( Optional(NL) +
3637  (OneOrMore( PEER + Group(blockStatementExpr) + Optional(NL) )) )
3638  blockStatementExpr.ignore(_bslash + LineEnd())
3639  return smExpr
3640 
3641 alphas8bit = srange(r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3642 punc8bit = srange(r"[\0xa1-\0xbf\0xd7\0xf7]")
3643 
3644 anyOpenTag,anyCloseTag = makeHTMLTags(Word(alphas,alphanums+"_:"))
3645 commonHTMLEntity = Combine(_L("&") + oneOf("gt lt amp nbsp quot").setResultsName("entity") +";").streamline()
3646 _htmlEntityMap = dict(zip("gt lt amp nbsp quot".split(),'><& "'))
3647 replaceHTMLEntity = lambda t : t.entity in _htmlEntityMap and _htmlEntityMap[t.entity] or None
3648 
3649 # it's easy to get these comment structures wrong - they're very common, so may as well make them available
3650 cStyleComment = Regex(r"/\*(?:[^*]*\*+)+?/").setName("C style comment")
3651 
3652 htmlComment = Regex(r"<!--[\s\S]*?-->")
3653 restOfLine = Regex(r".*").leaveWhitespace()
3654 dblSlashComment = Regex(r"\/\/(\\\n|.)*").setName("// comment")
3655 cppStyleComment = Regex(r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName("C++ style comment")
3656 
3657 javaStyleComment = cppStyleComment
3658 pythonStyleComment = Regex(r"#.*").setName("Python style comment")
3659 _noncomma = "".join( [ c for c in printables if c != "," ] )
3660 _commasepitem = Combine(OneOrMore(Word(_noncomma) +
3661  Optional( Word(" \t") +
3662  ~Literal(",") + ~LineEnd() ) ) ).streamline().setName("commaItem")
3663 commaSeparatedList = delimitedList( Optional( quotedString | _commasepitem, default="") ).setName("commaSeparatedList")
3664 
3665 
3666 if __name__ == "__main__":
3667 
3668  def test( teststring ):
3669  try:
3670  tokens = simpleSQL.parseString( teststring )
3671  tokenlist = tokens.asList()
3672  print (teststring + "->" + str(tokenlist))
3673  print ("tokens = " + str(tokens))
3674  print ("tokens.columns = " + str(tokens.columns))
3675  print ("tokens.tables = " + str(tokens.tables))
3676  print (tokens.asXML("SQL",True))
3677  except ParseBaseException,err:
3678  print (teststring + "->")
3679  print (err.line)
3680  print (" "*(err.column-1) + "^")
3681  print (err)
3682  print()
3683 
3684  selectToken = CaselessLiteral( "select" )
3685  fromToken = CaselessLiteral( "from" )
3686 
3687  ident = Word( alphas, alphanums + "_$" )
3688  columnName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3689  columnNameList = Group( delimitedList( columnName ) )#.setName("columns")
3690  tableName = delimitedList( ident, ".", combine=True ).setParseAction( upcaseTokens )
3691  tableNameList = Group( delimitedList( tableName ) )#.setName("tables")
3692  simpleSQL = ( selectToken + \
3693  ( '*' | columnNameList ).setResultsName( "columns" ) + \
3694  fromToken + \
3695  tableNameList.setResultsName( "tables" ) )
3696 
3697  test( "SELECT * from XYZZY, ABC" )
3698  test( "select * from SYS.XYZZY" )
3699  test( "Select A from Sys.dual" )
3700  test( "Select AA,BB,CC from Sys.dual" )
3701  test( "Select A, B, C from Sys.dual" )
3702  test( "Select A, B, C from Sys.dual" )
3703  test( "Xelect A, B, C from Sys.dual" )
3704  test( "Select A, B, C frox Sys.dual" )
3705  test( "Select" )
3706  test( "Select ^^^ frox Sys.dual" )
3707  test( "Select A, B, C from Sys.dual, Table2 " )

Generated at Wed Dec 4 2013 14:33:06 for Gaudi Framework, version v24r2 by Doxygen version 1.8.2 written by Dimitri van Heesch, © 1997-2004