28 pyparsing module - Classes and methods to define and execute parsing grammars
30 The pyparsing module is an alternative approach to creating and executing simple grammars,
31 vs. the traditional lex/yacc approach, or the use of regular expressions. With pyparsing, you
32 don't need to learn a new syntax for defining grammars or matching expressions - the parsing module
33 provides a library of classes that you use to construct the grammar directly in Python.
35 Here is a program to parse "Hello, World!" (or any greeting of the form "<salutation>, <addressee>!")::
37 from pyparsing import Word, alphas
39 # define grammar of a greeting
40 greet = Word( alphas ) + "," + Word( alphas ) + "!"
42 hello = "Hello, World!"
43 print hello, "->", greet.parseString( hello )
45 The program outputs the following::
47 Hello, World! -> ['Hello', ',', 'World', '!']
49 The Python representation of the grammar is quite readable, owing to the self-explanatory
50 class names, and the use of '+', '|' and '^' operators.
52 The parsed results returned from parseString() can be accessed as a nested list, a dictionary, or an
53 object with named attributes.
55 The pyparsing module handles some of the problems that are typically vexing when writing text parsers:
56 - extra or missing whitespace (the above program will also handle "Hello,World!", "Hello , World !", etc.)
62 __versionTime__ =
"17 February 2009 19:45"
63 __author__ =
"Paul McGuire <ptmcg@users.sourceforge.net>"
66 from weakref
import ref
as wkref
75 'And',
'CaselessKeyword',
'CaselessLiteral',
'CharsNotIn',
'Combine',
'Dict',
'Each',
'Empty',
76 'FollowedBy',
'Forward',
'GoToColumn',
'Group',
'Keyword',
'LineEnd',
'LineStart',
'Literal',
77 'MatchFirst',
'NoMatch',
'NotAny',
'OneOrMore',
'OnlyOnce',
'Optional',
'Or',
78 'ParseBaseException',
'ParseElementEnhance',
'ParseException',
'ParseExpression',
'ParseFatalException',
79 'ParseResults',
'ParseSyntaxException',
'ParserElement',
'QuotedString',
'RecursiveGrammarException',
80 'Regex',
'SkipTo',
'StringEnd',
'StringStart',
'Suppress',
'Token',
'TokenConverter',
'Upcase',
81 'White',
'Word',
'WordEnd',
'WordStart',
'ZeroOrMore',
82 'alphanums',
'alphas',
'alphas8bit',
'anyCloseTag',
'anyOpenTag',
'cStyleComment',
'col',
83 'commaSeparatedList',
'commonHTMLEntity',
'countedArray',
'cppStyleComment',
'dblQuotedString',
84 'dblSlashComment',
'delimitedList',
'dictOf',
'downcaseTokens',
'empty',
'getTokensEndLoc',
'hexnums',
85 'htmlComment',
'javaStyleComment',
'keepOriginalText',
'line',
'lineEnd',
'lineStart',
'lineno',
86 'makeHTMLTags',
'makeXMLTags',
'matchOnlyAtCol',
'matchPreviousExpr',
'matchPreviousLiteral',
87 'nestedExpr',
'nullDebugAction',
'nums',
'oneOf',
'opAssoc',
'operatorPrecedence',
'printables',
88 'punc8bit',
'pythonStyleComment',
'quotedString',
'removeQuotes',
'replaceHTMLEntity',
89 'replaceWith',
'restOfLine',
'sglQuotedString',
'srange',
'stringEnd',
90 'stringStart',
'traceParseAction',
'unicodeString',
'upcaseTokens',
'withAttribute',
91 'indentedBlock',
'originalTextFor',
96 Detect if we are running version 3.X and make appropriate changes
99 if sys.version_info[0] > 2:
101 _MAX_INT = sys.maxsize
105 _MAX_INT = sys.maxint
109 """Drop-in replacement for str(obj) that tries to be Unicode friendly. It first tries
110 str(obj). If that fails with a UnicodeEncodeError, then it tries unicode(obj). It
111 then < returns the unicode object | encodes it with the default encoding | ... >.
113 if isinstance(obj,unicode):
121 except UnicodeEncodeError:
140 return dict( [(c,0)
for c
in strg] )
145 """Escape &, <, >, ", ', etc. in a string of data."""
148 from_symbols =
'&><"\''
149 to_symbols = [
'&'+s+
';' for s
in "amp gt lt quot apos".split()]
150 for from_,to_
in zip(from_symbols, to_symbols):
151 data = data.replace(from_, to_)
158 alphas = string.lowercase + string.uppercase
160 alphas = string.ascii_lowercase + string.ascii_uppercase
162 hexnums = nums +
"ABCDEFabcdef"
163 alphanums = alphas + nums
165 printables =
"".join( [ c
for c
in string.printable
if c
not in string.whitespace ] )
168 """base exception class for all parsing runtime exceptions"""
171 def __init__( self, pstr, loc=0, msg=None, elem=None ):
182 """supported attributes by name are:
183 - lineno - returns the line number of the exception text
184 - col - returns the column number of the exception text
185 - line - returns the line containing the exception text
187 if( aname ==
"lineno" ):
189 elif( aname
in (
"col",
"column") ):
191 elif( aname ==
"line" ):
194 raise AttributeError(aname)
197 return "%s (at char %d), (line:%d, col:%d)" % \
198 ( self.
msg, self.
loc, self.lineno, self.column )
202 """Extracts the exception line from the input string, and marks
203 the location of the exception with a special symbol.
206 line_column = self.column - 1
208 line_str =
"".join( [line_str[:line_column],
209 markerString, line_str[line_column:]])
210 return line_str.strip()
212 return "loc msg pstr parserElement lineno col line " \
213 "markInputLine __str__ __repr__".split()
216 """exception thrown when parse expressions don't match class;
217 supported attributes by name are:
218 - lineno - returns the line number of the exception text
219 - col - returns the column number of the exception text
220 - line - returns the line containing the exception text
224 class ParseFatalException(ParseBaseException):
225 """user-throwable exception thrown when inconsistent parse content
226 is found; stops all parsing immediately"""
230 """just like ParseFatalException, but thrown internally when an
231 ErrorStop indicates that parsing is to stop immediately because
232 an unbacktrackable syntax error has been found"""
234 super(ParseSyntaxException, self).
__init__(
235 pe.pstr, pe.loc, pe.msg, pe.parserElement)
251 """exception thrown by validate() if the grammar could be improperly recursive"""
264 return repr(self.
tup)
266 self.
tup = (self.
tup[0],i)
269 """Structured parse results, to provide multiple means of access to the parsed data:
270 - as a list (len(results))
271 - by list index (results[0], results[1], etc.)
272 - by attribute (results.<resultsName>)
274 __slots__ = (
"__toklist",
"__tokdict",
"__doinit",
"__name",
"__parent",
"__accumNames",
"__weakref__" )
275 def __new__(cls, toklist, name=None, asList=True, modal=True ):
276 if isinstance(toklist, cls):
278 retobj = object.__new__(cls)
279 retobj.__doinit =
True
284 def __init__( self, toklist, name=None, asList=True, modal=True ):
290 if isinstance(toklist, list):
299 if isinstance(name,int):
302 if not toklist
in (
None,
'',[]):
303 if isinstance(toklist,basestring):
304 toklist = [ toklist ]
306 if isinstance(toklist,ParseResults):
310 self[name].__name = name
313 self[name] = toklist[0]
314 except (KeyError,TypeError,IndexError):
318 if isinstance( i, (int,slice) ):
327 if isinstance(v,_ParseResultsWithOffset):
328 self.
__tokdict[k] = self.__tokdict.get(k,list()) + [v]
330 elif isinstance(k,int):
336 if isinstance(sub,ParseResults):
337 sub.__parent = wkref(self)
340 if isinstance(i,(int,slice)):
345 if isinstance(i, int):
350 removed = list(
range(*i.indices(mylen)))
356 for k, (value, position)
in enumerate(occurrences):
366 __nonzero__ = __bool__
370 """Returns all named result keys."""
371 return self.__tokdict.keys()
373 def pop( self, index=-1 ):
374 """Removes and returns item at specified index (default=last).
375 Will work with either numeric indices or dict-key indicies."""
380 def get(self, key, defaultValue=None):
381 """Returns named result matching the given key, or if there is no
382 such name, then returns the given defaultValue or None if no
383 defaultValue is specified."""
390 self.__toklist.insert(index, insStr)
394 for k, (value, position)
in enumerate(occurrences):
398 """Returns all named result keys and values as a list of tuples."""
399 return [(k,self[k])
for k
in self.
__tokdict]
402 """Returns all named result values."""
403 return [ v[-1][0]
for v
in self.__tokdict.values() ]
424 addoffset = (
lambda a: (a<0
and offset)
or (a+offset) )
425 otheritems = other.__tokdict.items()
427 for (k,vlist)
in otheritems
for v
in vlist]
428 for k,v
in otherdictitems:
430 if isinstance(v[0],ParseResults):
431 v[0].__parent = wkref(self)
434 self.__accumNames.update( other.__accumNames )
445 if isinstance(i, ParseResults):
446 out += sep +
_ustr(i)
458 if isinstance( item, ParseResults ):
459 out += item._asStringList()
461 out.append(
_ustr(item) )
465 """Returns the parse results as a nested list of matching tokens, all converted to strings."""
468 if isinstance(res,ParseResults):
469 out.append( res.asList() )
475 """Returns the named parse results as dictionary."""
476 return dict( self.
items() )
479 """Returns a new copy of a ParseResults object."""
481 ret.__tokdict = self.__tokdict.copy()
487 def asXML( self, doctag=None, namedItemsOnly=False, indent="", formatted=True ):
488 """Returns the parse results as XML. Tags are created for tokens and lists that have defined results names."""
491 namedItems = dict( [ (v[1],k)
for (k,vlist)
in self.__tokdict.items()
493 nextLevelIndent = indent +
" "
502 if doctag
is not None:
514 out += [ nl, indent,
"<", selfTag,
">" ]
517 for i,res
in enumerate(worklist):
518 if isinstance(res,ParseResults):
520 out += [ res.asXML(namedItems[i],
521 namedItemsOnly
and doctag
is None,
525 out += [ res.asXML(
None,
526 namedItemsOnly
and doctag
is None,
533 resTag = namedItems[i]
540 out += [ nl, nextLevelIndent,
"<", resTag,
">",
544 out += [ nl, indent,
"</", selfTag,
">" ]
548 for k,vlist
in self.__tokdict.items():
555 """Returns the results name for this token expression."""
561 return par.__lookup(self)
564 elif (len(self) == 1
and
566 self.__tokdict.values()[0][0][1]
in (0,-1)):
567 return self.__tokdict.keys()[0]
571 def dump(self,indent='',depth=0):
572 """Diagnostic method for listing out the contents of a ParseResults.
573 Accepts an optional indent argument so that this string can be embedded
574 in a nested display of other data."""
582 out.append(
"%s%s- %s: " % (indent,(
' '*depth), k) )
583 if isinstance(v,ParseResults):
586 out.append( v.dump(indent,depth+1) )
598 ( self.__tokdict.copy(),
610 self.__accumNames.update(inAccumNames)
617 return dir(super(ParseResults,self)) + self.
keys()
620 """Returns current column within a string, counting newlines as line separators.
621 The first column is number 1.
623 Note: the default parsing behavior is to expand tabs in the input string
624 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
625 on parsing strings containing <TAB>s, and suggested methods to maintain a
626 consistent view of the parsed string, the parse location, and line and column
627 positions within the parsed string.
629 return (loc<len(strg)
and strg[loc] ==
'\n')
and 1
or loc - strg.rfind(
"\n", 0, loc)
632 """Returns current line number within a string, counting newlines as line separators.
633 The first line is number 1.
635 Note: the default parsing behavior is to expand tabs in the input string
636 before starting the parsing process. See L{I{ParserElement.parseString}<ParserElement.parseString>} for more information
637 on parsing strings containing <TAB>s, and suggested methods to maintain a
638 consistent view of the parsed string, the parse location, and line and column
639 positions within the parsed string.
641 return strg.count(
"\n",0,loc) + 1
644 """Returns the line of text containing loc within a string, counting newlines as line separators.
646 lastCR = strg.rfind(
"\n", 0, loc)
647 nextCR = strg.find(
"\n", loc)
649 return strg[lastCR+1:nextCR]
651 return strg[lastCR+1:]
654 print (
"Match " +
_ustr(expr) +
" at loc " +
_ustr(loc) +
"(%d,%d)" % (
lineno(loc,instring),
col(loc,instring) ))
657 print (
"Matched " +
_ustr(expr) +
" -> " + str(toks.asList()))
660 print (
"Exception raised:" +
_ustr(exc))
663 """'Do-nothing' debug action, to suppress debugging output during parsing."""
666 class ParserElement(object):
667 """Abstract base level parser element class."""
668 DEFAULT_WHITE_CHARS =
" \n\t\r"
671 """Overrides the default whitespace chars
673 ParserElement.DEFAULT_WHITE_CHARS = chars
674 setDefaultWhitespaceChars = staticmethod(setDefaultWhitespaceChars)
700 """Make a copy of this ParserElement. Useful for defining different parse actions
701 for the same parsing pattern, using copies of the original parse element."""
702 cpy = copy.copy( self )
706 cpy.whiteChars = ParserElement.DEFAULT_WHITE_CHARS
710 """Define name for this expression, for use in debugging."""
713 if hasattr(self,
"exception"):
714 self.exception.msg = self.
errmsg
718 """Define name for referencing matching tokens as a nested attribute
719 of the returned parse results.
720 NOTE: this returns a *copy* of the original ParserElement object;
721 this is so that the client can define a basic element, such as an
722 integer, and reference it in multiple places with different names.
724 newself = self.
copy()
725 newself.resultsName = name
726 newself.modalResults =
not listAllMatches
730 """Method to invoke the Python pdb debugger when this element is
731 about to be parsed. Set breakFlag to True to enable, False to
735 _parseMethod = self.
_parse
736 def breaker(instring, loc, doActions=True, callPreParse=True):
739 return _parseMethod( instring, loc, doActions, callPreParse )
740 breaker._originalParseMethod = _parseMethod
743 if hasattr(self.
_parse,
"_originalParseMethod"):
744 self.
_parse = self._parse._originalParseMethod
748 """Internal method used to decorate parse actions that take fewer than 3 arguments,
749 so that all parse actions can be called as f(s,l,t)."""
754 if isinstance(f,type):
758 codeObj = f.func_code
761 if codeObj.co_flags & STAR_ARGS:
763 numargs = codeObj.co_argcount
765 if hasattr(f,
"im_self"):
768 if hasattr(f,
"__self__"):
772 except AttributeError:
775 call_im_func_code = f.__call__.im_func.func_code
777 call_im_func_code = f.__code__
781 if call_im_func_code.co_flags & STAR_ARGS:
783 numargs = call_im_func_code.co_argcount
785 if hasattr(f.__call__,
"im_self"):
788 if hasattr(f.__call__,
"__self__"):
790 except AttributeError:
792 call_func_code = f.__call__.func_code
794 call_func_code = f.__call__.__code__
796 if call_func_code.co_flags & STAR_ARGS:
798 numargs = call_func_code.co_argcount
800 if hasattr(f.__call__,
"im_self"):
803 if hasattr(f.__call__,
"__self__"):
813 return f(f.__call__.__self__, s,l,t)
824 tmp.__name__ = f.__name__
825 except (AttributeError,TypeError):
829 tmp.__doc__ = f.__doc__
830 except (AttributeError,TypeError):
834 tmp.__dict__.update(f.__dict__)
835 except (AttributeError,TypeError):
839 _normalizeParseActionArgs = staticmethod(_normalizeParseActionArgs)
842 """Define action to perform when successfully matching parse element definition.
843 Parse action fn is a callable method with 0-3 arguments, called as fn(s,loc,toks),
844 fn(loc,toks), fn(toks), or just fn(), where:
845 - s = the original string being parsed (see note below)
846 - loc = the location of the matching substring
847 - toks = a list of the matched tokens, packaged as a ParseResults object
848 If the functions in fns modify the tokens, they can return them as the return
849 value from fn, and the modified list of tokens will replace the original.
850 Otherwise, fn does not need to return any value.
852 Note: the default parsing behavior is to expand tabs in the input string
853 before starting the parsing process. See L{I{parseString}<parseString>} for more information
854 on parsing strings containing <TAB>s, and suggested methods to maintain a
855 consistent view of the parsed string, the parse location, and line and column
856 positions within the parsed string.
859 self.
callDuringTry = (
"callDuringTry" in kwargs
and kwargs[
"callDuringTry"])
863 """Add parse action to expression's list of parse actions. See L{I{setParseAction}<setParseAction>}."""
869 """Define action to perform if parsing fails at this expression.
870 Fail acton fn is a callable function that takes the arguments
871 fn(s,loc,expr,err) where:
872 - s = string being parsed
873 - loc = location where expression match was attempted and failed
874 - expr = the parse expression that failed
875 - err = the exception thrown
876 The function returns no value. It may throw ParseFatalException
877 if it is desired to stop parsing immediately."""
888 loc,dummy = e._parse( instring, loc )
890 except ParseException:
900 instrlen = len(instring)
901 while loc < instrlen
and instring[loc]
in wt:
913 def _parseNoCache( self, instring, loc, doActions=True, callPreParse=True ):
914 debugging = ( self.
debug )
921 preloc = self.
preParse( instring, loc )
927 loc,tokens = self.
parseImpl( instring, preloc, doActions )
930 except ParseBaseException, err:
933 self.
debugActions[2]( instring, tokensStart, self, err )
935 self.
failAction( instring, tokensStart, self, err )
939 preloc = self.
preParse( instring, loc )
945 loc,tokens = self.
parseImpl( instring, preloc, doActions )
949 loc,tokens = self.
parseImpl( instring, preloc, doActions )
951 tokens = self.
postParse( instring, loc, tokens )
958 tokens = fn( instring, tokensStart, retTokens )
959 if tokens
is not None:
962 asList=self.
saveAsList and isinstance(tokens,(ParseResults,list)),
964 except ParseBaseException, err:
967 self.
debugActions[2]( instring, tokensStart, self, err )
971 tokens = fn( instring, tokensStart, retTokens )
972 if tokens
is not None:
975 asList=self.
saveAsList and isinstance(tokens,(ParseResults,list)),
981 self.
debugActions[1]( instring, tokensStart, loc, self, retTokens )
983 return loc, retTokens
987 return self.
_parse( instring, loc, doActions=
False )[0]
988 except ParseFatalException:
993 def _parseCache( self, instring, loc, doActions=True, callPreParse=True ):
994 lookup = (self,instring,loc,callPreParse,doActions)
995 if lookup
in ParserElement._exprArgCache:
996 value = ParserElement._exprArgCache[ lookup ]
997 if isinstance(value,Exception):
1002 value = self.
_parseNoCache( instring, loc, doActions, callPreParse )
1003 ParserElement._exprArgCache[ lookup ] = (value[0],value[1].
copy())
1005 except ParseBaseException, pe:
1006 ParserElement._exprArgCache[ lookup ] = pe
1009 _parse = _parseNoCache
1014 ParserElement._exprArgCache.clear()
1015 resetCache = staticmethod(resetCache)
1017 _packratEnabled =
False
1019 """Enables "packrat" parsing, which adds memoizing to the parsing logic.
1020 Repeated parse attempts at the same string location (which happens
1021 often in many complex grammars) can immediately return a cached value,
1022 instead of re-executing parsing/validating code. Memoizing is done of
1023 both valid results and parsing exceptions.
1025 This speedup may break existing programs that use parse actions that
1026 have side-effects. For this reason, packrat parsing is disabled when
1027 you first import pyparsing. To activate the packrat feature, your
1028 program must call the class method ParserElement.enablePackrat(). If
1029 your program uses psyco to "compile as you go", you must call
1030 enablePackrat before calling psyco.full(). If you do not do this,
1031 Python will crash. For best results, call enablePackrat() immediately
1032 after importing pyparsing.
1034 if not ParserElement._packratEnabled:
1035 ParserElement._packratEnabled =
True
1036 ParserElement._parse = ParserElement._parseCache
1037 enablePackrat = staticmethod(enablePackrat)
1040 """Execute the parse expression with the given string.
1041 This is the main interface to the client code, once the complete
1042 expression has been built.
1044 If you want the grammar to require that the entire input string be
1045 successfully parsed, then set parseAll to True (equivalent to ending
1046 the grammar with StringEnd()).
1048 Note: parseString implicitly calls expandtabs() on the input string,
1049 in order to report proper column numbers in parse actions.
1050 If the input string contains tabs and
1051 the grammar uses parse actions that use the loc argument to index into the
1052 string being parsed, you can ensure you have a consistent view of the input
1054 - calling parseWithTabs on your grammar before calling parseString
1055 (see L{I{parseWithTabs}<parseWithTabs>})
1056 - define your parse action using the full (s,loc,toks) signature, and
1057 reference the input string using the parse action's s argument
1058 - explictly expand the tabs in your input string before calling
1061 ParserElement.resetCache()
1068 instring = instring.expandtabs()
1070 loc, tokens = self.
_parse( instring, 0 )
1072 loc = self.
preParse( instring, loc )
1074 except ParseBaseException, exc:
1081 """Scan the input string for expression matches. Each match will return the
1082 matching tokens, start location, and end location. May be called with optional
1083 maxMatches argument, to clip scanning after 'n' matches are found.
1085 Note that the start and end locations are reported relative to the string
1086 being parsed. See L{I{parseString}<parseString>} for more information on parsing
1087 strings with embedded tabs."""
1094 instring =
_ustr(instring).expandtabs()
1095 instrlen = len(instring)
1099 ParserElement.resetCache()
1102 while loc <= instrlen
and matches < maxMatches:
1104 preloc = preparseFn( instring, loc )
1105 nextLoc,tokens = parseFn( instring, preloc, callPreParse=
False )
1106 except ParseException:
1110 yield tokens, preloc, nextLoc
1112 except ParseBaseException, pe:
1116 """Extension to scanString, to modify matching text with modified tokens that may
1117 be returned from a parse action. To use transformString, define a grammar and
1118 attach a parse action to it that modifies the returned token list.
1119 Invoking transformString() on a target string will then scan for matches,
1120 and replace the matched text patterns according to the logic in the parse
1121 action. transformString() returns the resulting transformed string."""
1129 out.append( instring[lastE:s] )
1131 if isinstance(t,ParseResults):
1133 elif isinstance(t,list):
1138 out.append(instring[lastE:])
1139 return "".join(
map(_ustr,out))
1140 except ParseBaseException, pe:
1144 """Another extension to scanString, simplifying the access to the tokens found
1145 to match the given parse expression. May be called with optional
1146 maxMatches argument, to clip searching after 'n' matches are found.
1150 except ParseBaseException, pe:
1154 """Implementation of + operator - returns And"""
1155 if isinstance( other, basestring ):
1157 if not isinstance( other, ParserElement ):
1158 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1159 SyntaxWarning, stacklevel=2)
1161 return And( [ self, other ] )
1164 """Implementation of + operator when left operand is not a ParserElement"""
1165 if isinstance( other, basestring ):
1167 if not isinstance( other, ParserElement ):
1168 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1169 SyntaxWarning, stacklevel=2)
1174 """Implementation of - operator, returns And with error stop"""
1175 if isinstance( other, basestring ):
1177 if not isinstance( other, ParserElement ):
1178 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1179 SyntaxWarning, stacklevel=2)
1184 """Implementation of - operator when left operand is not a ParserElement"""
1185 if isinstance( other, basestring ):
1187 if not isinstance( other, ParserElement ):
1188 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1189 SyntaxWarning, stacklevel=2)
1194 if isinstance(other,int):
1195 minElements, optElements = other,0
1196 elif isinstance(other,tuple):
1197 other = (other + (
None,
None))[:2]
1198 if other[0]
is None:
1199 other = (0, other[1])
1200 if isinstance(other[0],int)
and other[1]
is None:
1207 elif isinstance(other[0],int)
and isinstance(other[1],int):
1208 minElements, optElements = other
1209 optElements -= minElements
1211 raise TypeError(
"cannot multiply 'ParserElement' and ('%s','%s') objects",
type(other[0]),
type(other[1]))
1213 raise TypeError(
"cannot multiply 'ParserElement' and '%s' objects",
type(other))
1216 raise ValueError(
"cannot multiply ParserElement by negative value")
1218 raise ValueError(
"second tuple value must be greater or equal to first tuple value")
1219 if minElements == optElements == 0:
1220 raise ValueError(
"cannot multiply ParserElement by 0 or (0,0)")
1223 def makeOptionalList(n):
1225 return Optional(self + makeOptionalList(n-1))
1229 if minElements == 1:
1230 ret = self + makeOptionalList(optElements)
1232 ret =
And([self]*minElements) + makeOptionalList(optElements)
1234 ret = makeOptionalList(optElements)
1236 if minElements == 1:
1239 ret =
And([self]*minElements)
1246 """Implementation of | operator - returns MatchFirst"""
1247 if isinstance( other, basestring ):
1249 if not isinstance( other, ParserElement ):
1250 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1251 SyntaxWarning, stacklevel=2)
1256 """Implementation of | operator when left operand is not a ParserElement"""
1257 if isinstance( other, basestring ):
1259 if not isinstance( other, ParserElement ):
1260 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1261 SyntaxWarning, stacklevel=2)
1266 """Implementation of ^ operator - returns Or"""
1267 if isinstance( other, basestring ):
1269 if not isinstance( other, ParserElement ):
1270 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1271 SyntaxWarning, stacklevel=2)
1273 return Or( [ self, other ] )
1276 """Implementation of ^ operator when left operand is not a ParserElement"""
1277 if isinstance( other, basestring ):
1279 if not isinstance( other, ParserElement ):
1280 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1281 SyntaxWarning, stacklevel=2)
1286 """Implementation of & operator - returns Each"""
1287 if isinstance( other, basestring ):
1289 if not isinstance( other, ParserElement ):
1290 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1291 SyntaxWarning, stacklevel=2)
1293 return Each( [ self, other ] )
1296 """Implementation of & operator when left operand is not a ParserElement"""
1297 if isinstance( other, basestring ):
1299 if not isinstance( other, ParserElement ):
1300 warnings.warn(
"Cannot combine element of type %s with ParserElement" %
type(other),
1301 SyntaxWarning, stacklevel=2)
1306 """Implementation of ~ operator - returns NotAny"""
1310 """Shortcut for setResultsName, with listAllMatches=default::
1311 userdata = Word(alphas).setResultsName("name") + Word(nums+"-").setResultsName("socsecno")
1312 could be written as::
1313 userdata = Word(alphas)("name") + Word(nums+"-")("socsecno")
1318 """Suppresses the output of this ParserElement; useful to keep punctuation from
1319 cluttering up returned output.
1324 """Disables the skipping of whitespace before matching the characters in the
1325 ParserElement's defined pattern. This is normally only used internally by
1326 the pyparsing module, but may be needed in some whitespace-sensitive grammars.
1332 """Overrides the default whitespace chars
1340 """Overrides default behavior to expand <TAB>s to spaces before parsing the input string.
1341 Must be called before parseString when the input grammar contains elements that
1342 match <TAB> characters."""
1347 """Define expression to be ignored (e.g., comments) while doing pattern
1348 matching; may be called repeatedly, to define multiple comment or other
1351 if isinstance( other, Suppress ):
1353 self.ignoreExprs.append( other )
1355 self.ignoreExprs.append(
Suppress( other ) )
1359 """Enable display of debugging messages while doing pattern matching."""
1360 self.
debugActions = (startAction
or _defaultStartDebugAction,
1361 successAction
or _defaultSuccessDebugAction,
1362 exceptionAction
or _defaultExceptionDebugAction)
1367 """Enable display of debugging messages while doing pattern matching.
1368 Set flag to True to enable, False to disable."""
1370 self.
setDebugActions( _defaultStartDebugAction, _defaultSuccessDebugAction, _defaultExceptionDebugAction )
1390 """Check defined expressions for valid structure, check for infinite recursive definitions."""
1391 self.checkRecursion( [] )
1394 """Execute the parse expression on the given file or filename.
1395 If a filename is specified (instead of a file object),
1396 the entire file is opened, read, and closed before parsing.
1399 file_contents = file_or_filename.read()
1400 except AttributeError:
1401 f = open(file_or_filename,
"rb")
1402 file_contents = f.read()
1406 except ParseBaseException, exc:
1414 if aname ==
"myException":
1418 raise AttributeError(
"no such attribute " + aname)
1421 if isinstance(other, ParserElement):
1422 return self
is other
or self.
__dict__ == other.__dict__
1423 elif isinstance(other, basestring):
1427 except ParseBaseException:
1430 return super(ParserElement,self)==other
1433 return not (self == other)
1436 return hash(id(self))
1439 return self == other
1442 return not (self == other)
1446 """Abstract ParserElement subclass, for defining atomic matching patterns."""
1448 super(Token,self).
__init__( savelist=
False )
1452 s = super(Token,self).
setName(name)
1459 """An empty token, will always match."""
1468 """A token that will never match."""
1485 """Token to exactly match a specified string."""
1493 warnings.warn(
"null string passed to Literal; use Empty() instead",
1494 SyntaxWarning, stacklevel=2)
1508 (self.
matchLen==1
or instring.startswith(self.
match,loc)) ):
1518 """Token to exactly match a specified string as a keyword, that is, it must be
1519 immediately followed by a non-keyword character. Compare with Literal::
1520 Literal("if") will match the leading 'if' in 'ifAndOnlyIf'.
1521 Keyword("if") will not; it will only match the leading 'if in 'if x=1', or 'if(y==2)'
1522 Accepts two optional constructor arguments in addition to the keyword string:
1523 identChars is a string of characters that would be valid identifier characters,
1524 defaulting to all alphanumerics + "_" and "$"; caseless allows case-insensitive
1525 matching, default is False.
1527 DEFAULT_KEYWORD_CHARS = alphanums+
"_$"
1529 def __init__( self, matchString, identChars=DEFAULT_KEYWORD_CHARS, caseless=False ):
1536 warnings.warn(
"null string passed to Keyword; use Empty() instead",
1537 SyntaxWarning, stacklevel=2)
1546 identChars = identChars.upper()
1553 (loc == 0
or instring[loc-1].upper()
not in self.
identChars) ):
1557 (self.
matchLen==1
or instring.startswith(self.
match,loc))
and
1559 (loc == 0
or instring[loc-1]
not in self.
identChars) ):
1568 c = super(Keyword,self).
copy()
1569 c.identChars = Keyword.DEFAULT_KEYWORD_CHARS
1573 """Overrides the default Keyword chars
1575 Keyword.DEFAULT_KEYWORD_CHARS = chars
1576 setDefaultKeywordChars = staticmethod(setDefaultKeywordChars)
1579 """Token to match a specified string, ignoring case of letters.
1580 Note: the matched results will always be in the case of the given
1581 match string, NOT the case of the input text.
1584 super(CaselessLiteral,self).
__init__( matchString.upper() )
1601 def __init__( self, matchString, identChars=Keyword.DEFAULT_KEYWORD_CHARS ):
1602 super(CaselessKeyword,self).
__init__( matchString, identChars, caseless=
True )
1615 """Token for matching words composed of allowed character sets.
1616 Defined with string containing all allowed initial characters,
1617 an optional string containing allowed body characters (if omitted,
1618 defaults to the initial character set), and an optional minimum,
1619 maximum, and/or exact length. The default value for min is 1 (a
1620 minimum value < 1 is not valid); the default values for max and exact
1621 are 0, meaning no maximum or exact length restriction.
1623 def __init__( self, initChars, bodyChars=None, min=1, max=0, exact=0, asKeyword=False ):
1637 raise ValueError(
"cannot specify a minimum length < 1; use Optional(Word()) if zero-length word is permitted")
1676 result = self.re.match(instring,loc)
1684 return loc,result.group()
1686 if not(instring[ loc ]
in self.
initChars):
1694 instrlen = len(instring)
1696 maxloc = start + self.
maxLen
1697 maxloc =
min( maxloc, instrlen )
1698 while loc < maxloc
and instring[loc]
in bodychars:
1701 throwException =
False
1702 if loc - start < self.
minLen:
1703 throwException =
True
1704 if self.
maxSpecified and loc < instrlen
and instring[loc]
in bodychars:
1705 throwException =
True
1707 if (start>0
and instring[start-1]
in bodychars)
or (loc<instrlen
and instring[loc]
in bodychars):
1708 throwException =
True
1717 return loc, instring[start:loc]
1721 return super(Word,self).
__str__()
1743 """Token for matching strings that match a given regular expression.
1744 Defined with string specifying the regular expression in a form recognized by the inbuilt Python re module.
1747 """The parameters pattern and flags are passed to the re.compile() function as-is. See the Python re module for an explanation of the acceptable patterns and flags."""
1750 if len(pattern) == 0:
1751 warnings.warn(
"null string passed to Regex; use Empty() instead",
1752 SyntaxWarning, stacklevel=2)
1760 except sre_constants.error:
1761 warnings.warn(
"invalid pattern (%s) passed to Regex" % pattern,
1762 SyntaxWarning, stacklevel=2)
1772 result = self.re.match(instring,loc)
1780 d = result.groupdict()
1789 return super(Regex,self).
__str__()
1800 """Token for matching strings that are delimited by quoting characters.
1802 def __init__( self, quoteChar, escChar=None, escQuote=None, multiline=False, unquoteResults=True, endQuoteChar=None):
1804 Defined with the following parameters:
1805 - quoteChar - string of one or more characters defining the quote delimiting string
1806 - escChar - character to escape quotes, typically backslash (default=None)
1807 - escQuote - special quote sequence to escape an embedded quote string (such as SQL's "" to escape an embedded ") (default=None)
1808 - multiline - boolean indicating whether quotes can span multiple lines (default=False)
1809 - unquoteResults - boolean indicating whether the matched text should be unquoted (default=True)
1810 - endQuoteChar - string of one or more characters defining the end of the quote delimited string (default=None => same as quoteChar)
1812 super(QuotedString,self).
__init__()
1815 quoteChar = quoteChar.strip()
1816 if len(quoteChar) == 0:
1817 warnings.warn(
"quoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1820 if endQuoteChar
is None:
1821 endQuoteChar = quoteChar
1823 endQuoteChar = endQuoteChar.strip()
1824 if len(endQuoteChar) == 0:
1825 warnings.warn(
"endQuoteChar cannot be the empty string",SyntaxWarning,stacklevel=2)
1845 self.
pattern =
r'%s(?:[^%s\n\r%s]' % \
1851 '|(?:' +
')|(?:'.join([
"%s[^%s]" % (re.escape(self.
endQuoteChar[:i]),
1856 self.
pattern += (
r'|(?:%s)' % re.escape(escQuote))
1858 self.
pattern += (
r'|(?:%s.)' % re.escape(escChar))
1865 except sre_constants.error:
1866 warnings.warn(
"invalid pattern (%s) passed to Regex" % self.
pattern,
1867 SyntaxWarning, stacklevel=2)
1877 result = instring[loc] == self.
firstQuoteChar and self.re.match(instring,loc)
or None
1885 ret = result.group()
1892 if isinstance(ret,basestring):
1905 return super(QuotedString,self).
__str__()
1916 """Token for matching words composed of characters *not* in a given set.
1917 Defined with string containing all disallowed characters, and an optional
1918 minimum, maximum, and/or exact length. The default value for min is 1 (a
1919 minimum value < 1 is not valid); the default values for max and exact
1920 are 0, meaning no maximum or exact length restriction.
1928 raise ValueError(
"cannot specify a minimum length < 1; use Optional(CharsNotIn()) if zero-length char group is permitted")
1958 maxlen =
min( start+self.
maxLen, len(instring) )
1959 while loc < maxlen
and \
1960 (instring[loc]
not in notchars):
1963 if loc - start < self.
minLen:
1970 return loc, instring[start:loc]
1974 return super(CharsNotIn, self).
__str__()
1987 """Special matching class for matching whitespace. Normally, whitespace is ignored
1988 by pyparsing grammars. This class is included when some whitespace structures
1989 are significant. Define with a string containing the whitespace characters to be
1990 matched; default is " \\t\\r\\n". Also takes optional min, max, and exact arguments,
1991 as defined for the Word class."""
1999 def __init__(self, ws=" \t\r\n", min=1, max=0, exact=0):
2029 maxloc = start + self.
maxLen
2030 maxloc =
min( maxloc, len(instring) )
2031 while loc < maxloc
and instring[loc]
in self.
matchWhite:
2034 if loc - start < self.
minLen:
2041 return loc, instring[start:loc]
2046 super(_PositionToken,self).
__init__()
2052 """Token to advance to a specific column of input text; useful for tabular report scraping."""
2058 if col(loc,instring) != self.
col:
2059 instrlen = len(instring)
2062 while loc < instrlen
and instring[loc].isspace()
and col( loc, instring ) != self.
col :
2067 thiscol =
col( loc, instring )
2068 if thiscol > self.
col:
2069 raise ParseException( instring, loc,
"Text not in expected column", self )
2070 newloc = loc + self.
col - thiscol
2071 ret = instring[ loc: newloc ]
2075 """Matches if current position is at the beginning of a line within the parse string"""
2083 preloc = super(LineStart,self).
preParse(instring,loc)
2084 if instring[preloc] ==
"\n":
2090 (loc == self.
preParse( instring, 0 ))
or
2091 (instring[loc-1] ==
"\n") ):
2100 """Matches if current position is at the end of a line within the parse string"""
2108 if loc<len(instring):
2109 if instring[loc] ==
"\n":
2117 elif loc == len(instring):
2126 """Matches if current position is at the beginning of the parse string"""
2135 if loc != self.
preParse( instring, 0 ):
2144 """Matches if current position is at the end of the parse string"""
2151 if loc < len(instring):
2157 elif loc == len(instring):
2159 elif loc > len(instring):
2168 """Matches if the current position is at the beginning of a Word, and
2169 is not preceded by any character in a given set of wordChars
2170 (default=printables). To emulate the \b behavior of regular expressions,
2171 use WordStart(alphanums). WordStart will also match at the beginning of
2172 the string being parsed, or at the beginning of a line.
2181 if (instring[loc-1]
in self.
wordChars or
2190 """Matches if the current position is at the end of a Word, and
2191 is not followed by any character in a given set of wordChars
2192 (default=printables). To emulate the \b behavior of regular expressions,
2193 use WordEnd(alphanums). WordEnd will also match at the end of
2194 the string being parsed, or at the end of a line.
2203 instrlen = len(instring)
2204 if instrlen>0
and loc<instrlen:
2216 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2218 super(ParseExpression,self).
__init__(savelist)
2219 if isinstance( exprs, list ):
2221 elif isinstance( exprs, basestring ):
2225 self.
exprs = list( exprs )
2227 self.
exprs = [ exprs ]
2231 return self.
exprs[i]
2234 self.exprs.append( other )
2239 """Extends leaveWhitespace defined in base class, and also invokes leaveWhitespace on
2240 all contained expressions."""
2242 self.
exprs = [ e.copy()
for e
in self.
exprs ]
2243 for e
in self.
exprs:
2248 if isinstance( other, Suppress ):
2250 super( ParseExpression, self).
ignore( other )
2251 for e
in self.
exprs:
2254 super( ParseExpression, self).
ignore( other )
2255 for e
in self.
exprs:
2261 return super(ParseExpression,self).
__str__()
2272 for e
in self.
exprs:
2278 if ( len(self.
exprs) == 2 ):
2279 other = self.
exprs[0]
2280 if ( isinstance( other, self.__class__ )
and
2281 not(other.parseAction)
and
2282 other.resultsName
is None and
2284 self.
exprs = other.exprs[:] + [ self.
exprs[1] ]
2289 other = self.
exprs[-1]
2290 if ( isinstance( other, self.__class__ )
and
2291 not(other.parseAction)
and
2292 other.resultsName
is None and
2294 self.
exprs = self.
exprs[:-1] + other.exprs[:]
2302 ret = super(ParseExpression,self).
setResultsName(name,listAllMatches)
2306 tmp = validateTrace[:]+[self]
2307 for e
in self.
exprs:
2312 """Requires all given ParseExpressions to be found in the given order.
2313 Expressions may be separated by whitespace.
2314 May be constructed using the '+' operator.
2319 super(Empty,self).
__init__(*args, **kwargs)
2323 super(And,self).
__init__(exprs, savelist)
2325 for e
in self.
exprs:
2326 if not e.mayReturnEmpty:
2336 loc, resultlist = self.
exprs[0].
_parse( instring, loc, doActions, callPreParse=
False )
2338 for e
in self.
exprs[1:]:
2344 loc, exprtokens = e._parse( instring, loc, doActions )
2345 except ParseSyntaxException:
2347 except ParseBaseException, pe:
2349 except IndexError, ie:
2352 loc, exprtokens = e._parse( instring, loc, doActions )
2353 if exprtokens
or exprtokens.keys():
2354 resultlist += exprtokens
2355 return loc, resultlist
2358 if isinstance( other, basestring ):
2360 return self.
append( other )
2363 subRecCheckList = parseElementList[:] + [ self ]
2364 for e
in self.
exprs:
2365 e.checkRecursion( subRecCheckList )
2366 if not e.mayReturnEmpty:
2370 if hasattr(self,
"name"):
2380 """Requires that at least one ParseExpression is found.
2381 If two expressions match, the expression that matches the longest string will be used.
2382 May be constructed using the '^' operator.
2385 super(Or,self).
__init__(exprs, savelist)
2387 for e
in self.
exprs:
2388 if e.mayReturnEmpty:
2396 for e
in self.
exprs:
2398 loc2 = e.tryParse( instring, loc )
2399 except ParseException, err:
2400 if err.loc > maxExcLoc:
2404 if len(instring) > maxExcLoc:
2405 maxException =
ParseException(instring,len(instring),e.errmsg,self)
2406 maxExcLoc = len(instring)
2408 if loc2 > maxMatchLoc:
2413 if maxException
is not None:
2416 raise ParseException(instring, loc,
"no defined alternatives to match", self)
2418 return maxMatchExp._parse( instring, loc, doActions )
2421 if isinstance( other, basestring ):
2423 return self.
append( other )
2426 if hasattr(self,
"name"):
2435 subRecCheckList = parseElementList[:] + [ self ]
2436 for e
in self.
exprs:
2437 e.checkRecursion( subRecCheckList )
2441 """Requires that at least one ParseExpression is found.
2442 If two expressions match, the first one listed is the one that will match.
2443 May be constructed using the '|' operator.
2446 super(MatchFirst,self).
__init__(exprs, savelist)
2449 for e
in self.
exprs:
2450 if e.mayReturnEmpty:
2459 for e
in self.
exprs:
2461 ret = e._parse( instring, loc, doActions )
2463 except ParseException, err:
2464 if err.loc > maxExcLoc:
2468 if len(instring) > maxExcLoc:
2469 maxException =
ParseException(instring,len(instring),e.errmsg,self)
2470 maxExcLoc = len(instring)
2474 if maxException
is not None:
2477 raise ParseException(instring, loc,
"no defined alternatives to match", self)
2480 if isinstance( other, basestring ):
2482 return self.
append( other )
2485 if hasattr(self,
"name"):
2494 subRecCheckList = parseElementList[:] + [ self ]
2495 for e
in self.
exprs:
2496 e.checkRecursion( subRecCheckList )
2500 """Requires all given ParseExpressions to be found, but in any order.
2501 Expressions may be separated by whitespace.
2502 May be constructed using the '&' operator.
2505 super(Each,self).
__init__(exprs, savelist)
2507 for e
in self.
exprs:
2508 if not e.mayReturnEmpty:
2519 self.
required = [ e
for e
in self.
exprs if not isinstance(e,(Optional,ZeroOrMore,OneOrMore)) ]
2533 tmpLoc = e.tryParse( instring, tmpLoc )
2534 except ParseException:
2537 matchOrder.append(e)
2542 if len(failed) == len(tmpExprs):
2543 keepMatching =
False
2546 missing =
", ".join( [
_ustr(e)
for e
in tmpReqd ] )
2547 raise ParseException(instring,loc,
"Missing one or more required elements (%s)" % missing )
2550 matchOrder += list(e
for e
in self.
exprs if isinstance(e,Optional)
and e.expr
in tmpOpt)
2553 for e
in matchOrder:
2554 loc,results = e._parse(instring,loc,doActions)
2555 resultlist.append(results)
2558 for r
in resultlist:
2561 if k
in finalResults.keys():
2566 for k,v
in dups.items():
2568 return loc, finalResults
2571 if hasattr(self,
"name"):
2580 subRecCheckList = parseElementList[:] + [ self ]
2581 for e
in self.
exprs:
2582 e.checkRecursion( subRecCheckList )
2586 """Abstract subclass of ParserElement, for combining and post-processing parsed tokens."""
2588 super(ParseElementEnhance,self).
__init__(savelist)
2589 if isinstance( expr, basestring ):
2593 if expr
is not None:
2600 self.ignoreExprs.extend(expr.ignoreExprs)
2603 if self.
expr is not None:
2604 return self.expr._parse( instring, loc, doActions, callPreParse=
False )
2610 self.
expr = self.expr.copy()
2611 if self.
expr is not None:
2612 self.expr.leaveWhitespace()
2616 if isinstance( other, Suppress ):
2618 super( ParseElementEnhance, self).
ignore( other )
2619 if self.
expr is not None:
2622 super( ParseElementEnhance, self).
ignore( other )
2623 if self.
expr is not None:
2629 if self.
expr is not None:
2630 self.expr.streamline()
2634 if self
in parseElementList:
2636 subRecCheckList = parseElementList[:] + [ self ]
2637 if self.
expr is not None:
2638 self.expr.checkRecursion( subRecCheckList )
2641 tmp = validateTrace[:]+[self]
2642 if self.
expr is not None:
2643 self.expr.validate(tmp)
2648 return super(ParseElementEnhance,self).
__str__()
2652 if self.
strRepr is None and self.
expr is not None:
2658 """Lookahead matching of the given parse expression. FollowedBy
2659 does *not* advance the parsing position within the input string, it only
2660 verifies that the specified parse expression matches at the current
2661 position. FollowedBy always returns a null token list."""
2663 super(FollowedBy,self).
__init__(expr)
2667 self.expr.tryParse( instring, loc )
2672 """Lookahead to disallow matching with the given parse expression. NotAny
2673 does *not* advance the parsing position within the input string, it only
2674 verifies that the specified parse expression does *not* match at the current
2675 position. Also, NotAny does *not* skip over leading whitespace. NotAny
2676 always returns a null token list. May be constructed using the '~' operator."""
2687 self.expr.tryParse( instring, loc )
2688 except (ParseException,IndexError):
2699 if hasattr(self,
"name"):
2709 """Optional repetition of zero or more of the given expression."""
2711 super(ZeroOrMore,self).
__init__(expr)
2717 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=
False )
2724 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2725 if tmptokens
or tmptokens.keys():
2727 except (ParseException,IndexError):
2733 if hasattr(self,
"name"):
2743 ret.saveAsList =
True
2748 """Repetition of one or more of the given expression."""
2751 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=
False )
2759 loc, tmptokens = self.expr._parse( instring, preloc, doActions )
2760 if tmptokens
or tmptokens.keys():
2762 except (ParseException,IndexError):
2768 if hasattr(self,
"name"):
2778 ret.saveAsList =
True
2784 __nonzero__ = __bool__
2790 """Optional matching of the given expression.
2791 A default return string can also be specified, if the optional expression
2794 def __init__( self, exprs, default=_optionalNotMatched ):
2795 super(Optional,self).
__init__( exprs, savelist=
False )
2801 loc, tokens = self.expr._parse( instring, loc, doActions, callPreParse=
False )
2802 except (ParseException,IndexError):
2804 if self.expr.resultsName:
2814 if hasattr(self,
"name"):
2824 """Token for skipping over all undefined text until the matched expression is found.
2825 If include is set to true, the matched expression is also parsed (the skipped text
2826 and matched expression are returned as a 2-element list). The ignore
2827 argument is used to define grammars (typically quoted strings and comments) that
2828 might contain false matches.
2830 def __init__( self, other, include=False, ignore=None, failOn=None ):
2831 super( SkipTo, self ).
__init__( other )
2837 if failOn
is not None and isinstance(failOn, basestring):
2846 instrlen = len(instring)
2849 while loc <= instrlen:
2853 self.failOn.tryParse(instring, loc)
2854 except ParseBaseException:
2863 loc = self.ignoreExpr.tryParse(instring,loc)
2864 print "found ignoreExpr, advance to", loc
2865 except ParseBaseException:
2867 expr._parse( instring, loc, doActions=
False, callPreParse=
False )
2868 skipText = instring[startLoc:loc]
2870 loc,mat = expr._parse(instring,loc,doActions,callPreParse=
False)
2874 return loc, [ skipRes ]
2876 return loc, [ skipText ]
2878 return loc, [ skipText ]
2879 except (ParseException,IndexError):
2890 """Forward declaration of an expression to be defined later -
2891 used for recursive grammars, such as algebraic infix notation.
2892 When the expression is known, it is assigned to the Forward variable using the '<<' operator.
2894 Note: take care when assigning to Forward not to overlook precedence of operators.
2895 Specifically, '|' has a lower precedence than '<<', so that::
2896 fwdExpr << a | b | c
2897 will actually be evaluated as::
2898 (fwdExpr << a) | b | c
2899 thereby leaving b and c out as parseable alternatives. It is recommended that you
2900 explicitly group the values inserted into the Forward::
2901 fwdExpr << (a | b | c)
2904 super(Forward,self).
__init__( other, savelist=
False )
2907 if isinstance( other, basestring ):
2917 self.ignoreExprs.extend(self.expr.ignoreExprs)
2927 if self.
expr is not None:
2928 self.expr.streamline()
2932 if self
not in validateTrace:
2933 tmp = validateTrace[:]+[self]
2934 if self.
expr is not None:
2935 self.expr.validate(tmp)
2939 if hasattr(self,
"name"):
2945 if self.
expr is not None:
2951 return self.__class__.__name__ +
": " + retString
2954 if self.
expr is not None:
2955 return super(Forward,self).
copy()
2966 """Abstract subclass of ParseExpression, for converting parsed results."""
2968 super(TokenConverter,self).
__init__( expr )
2972 """Converter to upper case all matching tokens."""
2975 warnings.warn(
"Upcase class is deprecated, use upcaseTokens parse action instead",
2976 DeprecationWarning,stacklevel=2)
2979 return list(
map( string.upper, tokenlist ))
2983 """Converter to concatenate all matching tokens to a single string.
2984 By default, the matching patterns must also be contiguous in the input string;
2985 this can be disabled by specifying 'adjacent=False' in the constructor.
2987 def __init__( self, expr, joinString="", adjacent=True ):
2988 super(Combine,self).
__init__( expr )
2998 ParserElement.ignore(self, other)
3000 super( Combine, self).
ignore( other )
3004 retToks = tokenlist.copy()
3014 """Converter to return the matched tokens as a list - useful for returning tokens of ZeroOrMore and OneOrMore expressions."""
3020 return [ tokenlist ]
3023 """Converter to return a repetitive expression as a list, but also as a dictionary.
3024 Each element can also be referenced using the first token in the expression as its key.
3025 Useful for tabular report scraping when the first column can be used as a item key.
3032 for i,tok
in enumerate(tokenlist):
3036 if isinstance(ikey,int):
3037 ikey =
_ustr(tok[0]).strip()
3040 elif len(tok)==2
and not isinstance(tok[1],ParseResults):
3043 dictvalue = tok.copy()
3045 if len(dictvalue)!= 1
or (isinstance(dictvalue,ParseResults)
and dictvalue.keys()):
3051 return [ tokenlist ]
3057 """Converter for ignoring the results of a parsed expression."""
3066 """Wrapper for parse actions, to ensure they are only called once."""
3068 self.
callable = ParserElement._normalizeParseActionArgs(methodCall)
3080 """Decorator for debugging parse actions."""
3081 f = ParserElement._normalizeParseActionArgs(f)
3083 thisFunc = f.func_name
3086 thisFunc = paArgs[0].__class__.__name__ +
'.' + thisFunc
3087 sys.stderr.write(
">>entering %s(line: '%s', %d, %s)\n" % (thisFunc,
line(l,s),l,t) )
3090 except Exception, exc:
3091 sys.stderr.write(
"<<leaving %s (exception: %s)\n" % (thisFunc,exc) )
3093 sys.stderr.write(
"<<leaving %s (ret: %s)\n" % (thisFunc,ret) )
3096 z.__name__ = f.__name__
3097 except AttributeError:
3105 """Helper to define a delimited list of expressions - the delimiter defaults to ','.
3106 By default, the list elements and delimiters can have intervening whitespace, and
3107 comments, but this can be overridden by passing 'combine=True' in the constructor.
3108 If combine is set to True, the matching tokens are returned as a single token
3109 string, with the delimiters included; otherwise, the matching tokens are returned
3110 as a list of tokens, with the delimiters suppressed.
3119 """Helper to define a counted list of expressions.
3120 This helper defines a pattern of the form::
3121 integer expr expr expr...
3122 where the leading integer tells how many expr expressions follow.
3123 The matched tokens returns the array of expr tokens as a list - the leading count token is suppressed.
3126 def countFieldParseAction(s,l,t):
3130 return (
Word(nums).setName(
"arrayLen").setParseAction(countFieldParseAction, callDuringTry=
True) + arrayExpr )
3133 if type(L)
is not list:
return [L]
3134 if L == []:
return L
3138 """Helper to define an expression that is indirectly defined from
3139 the tokens matched in a previous expression, that is, it looks
3140 for a 'repeat' of a previous expression. For example::
3142 second = matchPreviousLiteral(first)
3143 matchExpr = first + ":" + second
3144 will match "1:1", but not "1:2". Because this matches a
3145 previous literal, will also match the leading "1:1" in "1:10".
3146 If this is not desired, use matchPreviousExpr.
3147 Do *not* use with packrat parsing enabled.
3150 def copyTokenToRepeater(s,l,t):
3157 rep <<
And( [
Literal(tt)
for tt
in tflat ] )
3160 expr.addParseAction(copyTokenToRepeater, callDuringTry=
True)
3164 """Helper to define an expression that is indirectly defined from
3165 the tokens matched in a previous expression, that is, it looks
3166 for a 'repeat' of a previous expression. For example::
3168 second = matchPreviousExpr(first)
3169 matchExpr = first + ":" + second
3170 will match "1:1", but not "1:2". Because this matches by
3171 expressions, will *not* match the leading "1:1" in "1:10";
3172 the expressions are evaluated first, and then compared, so
3173 "1" is compared with "10".
3174 Do *not* use with packrat parsing enabled.
3179 def copyTokenToRepeater(s,l,t):
3181 def mustMatchTheseTokens(s,l,t):
3183 if theseTokens != matchTokens:
3185 rep.setParseAction( mustMatchTheseTokens, callDuringTry=
True )
3186 expr.addParseAction(copyTokenToRepeater, callDuringTry=
True)
3192 s = s.replace(c,_bslash+c)
3193 s = s.replace(
"\n",
r"\n")
3194 s = s.replace(
"\t",
r"\t")
3197 def oneOf( strs, caseless=False, useRegex=True ):
3198 """Helper to quickly define a set of alternative Literals, and makes sure to do
3199 longest-first testing when there is a conflict, regardless of the input order,
3200 but returns a MatchFirst for best performance.
3203 - strs - a string of space-delimited literals, or a list of string literals
3204 - caseless - (default=False) - treat all literals as caseless
3205 - useRegex - (default=True) - as an optimization, will generate a Regex
3206 object; otherwise, will generate a MatchFirst object (if caseless=True, or
3207 if creating a Regex raises an exception)
3210 isequal = (
lambda a,b: a.upper() == b.upper() )
3211 masks = (
lambda a,b: b.upper().startswith(a.upper()) )
3212 parseElementClass = CaselessLiteral
3214 isequal = (
lambda a,b: a == b )
3215 masks = (
lambda a,b: b.startswith(a) )
3216 parseElementClass = Literal
3218 if isinstance(strs,(list,tuple)):
3219 symbols = list(strs[:])
3220 elif isinstance(strs,basestring):
3221 symbols = strs.split()
3223 warnings.warn(
"Invalid argument to oneOf, expected string or list",
3224 SyntaxWarning, stacklevel=2)
3227 while i < len(symbols)-1:
3229 for j,other
in enumerate(symbols[i+1:]):
3230 if ( isequal(other, cur) ):
3233 elif ( masks(cur, other) ):
3235 symbols.insert(i,other)
3241 if not caseless
and useRegex:
3244 if len(symbols)==len(
"".join(symbols)):
3247 return Regex(
"|".join( [ re.escape(sym)
for sym
in symbols] ) )
3249 warnings.warn(
"Exception creating Regex for oneOf, building MatchFirst",
3250 SyntaxWarning, stacklevel=2)
3254 return MatchFirst( [ parseElementClass(sym)
for sym
in symbols ] )
3257 """Helper to easily and clearly define a dictionary by specifying the respective patterns
3258 for the key and value. Takes care of defining the Dict, ZeroOrMore, and Group tokens
3259 in the proper order. The key pattern can include delimiting markers or punctuation,
3260 as long as they are suppressed, thereby leaving the significant key text. The value
3261 pattern can include named results, so that the Dict results can include named token
3267 """Helper to return the original, untokenized text for a given expression. Useful to
3268 restore the parsed fields of an HTML start tag into the raw tag text itself, or to
3269 revert separate tokens with intervening whitespace back to the original matching
3270 input text. Simpler to use than the parse action keepOriginalText, and does not
3271 require the inspect module to chase up the call stack. By default, returns a
3272 string containing the original parsed text.
3274 If the optional asString argument is passed as False, then the return value is a
3275 ParseResults containing any results names that were originally matched, and a
3276 single token containing the original matched text from the input string. So if
3277 the expression passed to originalTextFor contains expressions with defined
3278 results names, you must set asString to False if you want to preserve those
3279 results name values."""
3280 locMarker =
Empty().setParseAction(
lambda s,loc,t: loc)
3281 matchExpr = locMarker(
"_original_start") + expr + locMarker(
"_original_end")
3283 extractText =
lambda s,l,t: s[t._original_start:t._original_end]
3285 def extractText(s,l,t):
3287 t.insert(0, s[t._original_start:t._original_end])
3288 del t[
"_original_start"]
3289 del t[
"_original_end"]
3290 matchExpr.setParseAction(extractText)
3300 _escapedPunc =
Word( _bslash,
r"\[]-*.$+^?()~ ", exact=2 ).setParseAction(
lambda s,l,t:t[0][1])
3301 _printables_less_backslash =
"".join([ c
for c
in printables
if c
not in r"\]" ])
3304 _singleChar = _escapedPunc | _escapedHexChar | _escapedOctChar |
Word(_printables_less_backslash,exact=1)
3308 _expanded =
lambda p: (isinstance(p,ParseResults)
and ''.join([
unichr(c)
for c
in range(ord(p[0]),ord(p[1])+1) ])
or p)
3311 r"""Helper to easily define string ranges for use in Word construction. Borrows
3312 syntax from regexp '[]' string range definitions::
3313 srange("[0-9]") -> "0123456789"
3314 srange("[a-z]") -> "abcdefghijklmnopqrstuvwxyz"
3315 srange("[a-z$_]") -> "abcdefghijklmnopqrstuvwxyz$_"
3316 The input string must be enclosed in []'s, and the returned string is the expanded
3317 character set joined into a single string.
3318 The values enclosed in the []'s may be::
3320 an escaped character with a leading backslash (such as \- or \])
3321 an escaped hex character with a leading '\0x' (\0x21, which is a '!' character)
3322 an escaped octal character with a leading '\0' (\041, which is a '!' character)
3323 a range of any of the above, separated by a dash ('a-z', etc.)
3324 any combination of the above ('aeiouy', 'a-zA-Z0-9_$', etc.)
3327 return "".join([
_expanded(part)
for part
in _reBracketExpr.parseString(s).body])
3332 """Helper method for defining parse actions that require matching at a specific
3333 column in the input text.
3335 def verifyCol(strg,locn,toks):
3336 if col(locn,strg) != n:
3337 raise ParseException(strg,locn,
"matched token not at column %d" % n)
3341 """Helper method for common parse actions that simply return a literal value. Especially
3342 useful when used with transformString().
3344 def _replFunc(*args):
3349 """Helper parse action for removing quotation marks from parsed quoted strings.
3350 To use, add this parse action to quoted string using::
3351 quotedString.setParseAction( removeQuotes )
3356 """Helper parse action to convert tokens to upper case."""
3357 return [ tt.upper()
for tt
in map(_ustr,t) ]
3360 """Helper parse action to convert tokens to lower case."""
3361 return [ tt.lower()
for tt
in map(_ustr,t) ]
3364 """Helper parse action to preserve original parsed text,
3365 overriding any nested parse actions."""
3368 except ParseException:
3369 raise ParseFatalException(
"incorrect usage of keepOriginalText - may only be called as a parse action")
3375 """Method to be called from within a parse action to determine the end
3376 location of the parsed tokens."""
3378 fstack = inspect.stack()
3381 for f
in fstack[2:]:
3382 if f[3] ==
"_parseNoCache":
3383 endloc = f[0].f_locals[
"loc"]
3386 raise ParseFatalException(
"incorrect usage of getTokensEndLoc - may only be called from within a parse action")
3391 """Internal helper to construct opening and closing tag expressions, given a tag name"""
3392 if isinstance(tagStr,basestring):
3394 tagStr =
Keyword(tagStr, caseless=
not xml)
3396 resname = tagStr.name
3398 tagAttrName =
Word(alphas,alphanums+
"_-:")
3400 tagAttrValue = dblQuotedString.copy().setParseAction( removeQuotes )
3401 openTag =
Suppress(
"<") + tagStr + \
3403 Optional(
"/",default=[
False]).setResultsName(
"empty").setParseAction(
lambda s,l,t:t[0]==
'/') +
Suppress(
">")
3405 printablesLessRAbrack =
"".join( [ c
for c
in printables
if c
not in ">" ] )
3406 tagAttrValue = quotedString.copy().setParseAction( removeQuotes ) |
Word(printablesLessRAbrack)
3407 openTag =
Suppress(
"<") + tagStr + \
3410 Optional(
"/",default=[
False]).setResultsName(
"empty").setParseAction(
lambda s,l,t:t[0]==
'/') +
Suppress(
">")
3411 closeTag =
Combine(
_L(
"</") + tagStr +
">")
3413 openTag = openTag.setResultsName(
"start"+
"".join(resname.replace(
":",
" ").title().split())).setName(
"<%s>" % tagStr)
3414 closeTag = closeTag.setResultsName(
"end"+
"".join(resname.replace(
":",
" ").title().split())).setName(
"</%s>" % tagStr)
3416 return openTag, closeTag
3419 """Helper to construct opening and closing tag expressions for HTML, given a tag name"""
3423 """Helper to construct opening and closing tag expressions for XML, given a tag name"""
3427 """Helper to create a validating parse action to be used with start tags created
3428 with makeXMLTags or makeHTMLTags. Use withAttribute to qualify a starting tag
3429 with a required attribute value, to avoid false matches on common tags such as
3432 Call withAttribute with a series of attribute names and values. Specify the list
3433 of filter attributes names and values as:
3434 - keyword arguments, as in (class="Customer",align="right"), or
3435 - a list of name-value tuples, as in ( ("ns1:class", "Customer"), ("ns2:align","right") )
3436 For attribute names with a namespace prefix, you must use the second form. Attribute
3437 names are matched insensitive to upper/lower case.
3439 To verify that the attribute exists, but without specifying a value, pass
3440 withAttribute.ANY_VALUE as the value.
3445 attrs = attrDict.items()
3446 attrs = [(k,v)
for k,v
in attrs]
3448 for attrName,attrValue
in attrs:
3449 if attrName
not in tokens:
3451 if attrValue != withAttribute.ANY_VALUE
and tokens[attrName] != attrValue:
3452 raise ParseException(s,l,
"attribute '%s' has value '%s', must be '%s'" %
3453 (attrName, tokens[attrName], attrValue))
3455 withAttribute.ANY_VALUE = object()
3458 opAssoc.LEFT = object()
3459 opAssoc.RIGHT = object()
3462 """Helper method for constructing grammars of expressions made up of
3463 operators working in a precedence hierarchy. Operators may be unary or
3464 binary, left- or right-associative. Parse actions can also be attached
3465 to operator expressions.
3468 - baseExpr - expression representing the most basic element for the nested
3469 - opList - list of tuples, one for each operator precedence level in the
3470 expression grammar; each tuple is of the form
3471 (opExpr, numTerms, rightLeftAssoc, parseAction), where:
3472 - opExpr is the pyparsing expression for the operator;
3473 may also be a string, which will be converted to a Literal;
3474 if numTerms is 3, opExpr is a tuple of two expressions, for the
3475 two operators separating the 3 terms
3476 - numTerms is the number of terms for this operator (must
3478 - rightLeftAssoc is the indicator whether the operator is
3479 right or left associative, using the pyparsing-defined
3480 constants opAssoc.RIGHT and opAssoc.LEFT.
3481 - parseAction is the parse action to be associated with
3482 expressions matching this operator expression (the
3483 parse action tuple member may be omitted)
3487 for i,operDef
in enumerate(opList):
3488 opExpr,arity,rightLeftAssoc,pa = (operDef + (
None,))[:4]
3490 if opExpr
is None or len(opExpr) != 2:
3491 raise ValueError(
"if numterms=3, opExpr must be a tuple or list of two expressions")
3492 opExpr1, opExpr2 = opExpr
3494 if rightLeftAssoc == opAssoc.LEFT:
3498 if opExpr
is not None:
3503 matchExpr =
FollowedBy(lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr) + \
3504 Group( lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr )
3506 raise ValueError(
"operator must be unary (1), binary (2), or ternary (3)")
3507 elif rightLeftAssoc == opAssoc.RIGHT:
3510 if not isinstance(opExpr, Optional):
3512 matchExpr =
FollowedBy(opExpr.expr + thisExpr) +
Group( opExpr + thisExpr )
3514 if opExpr
is not None:
3519 matchExpr =
FollowedBy(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr) + \
3520 Group( lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr )
3522 raise ValueError(
"operator must be unary (1), binary (2), or ternary (3)")
3524 raise ValueError(
"operator must indicate right or left associativity")
3526 matchExpr.setParseAction( pa )
3527 thisExpr << ( matchExpr | lastExpr )
3532 dblQuotedString =
Regex(
r'"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*"').setName(
"string enclosed in double quotes")
3533 sglQuotedString =
Regex(
r"'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*'").setName(
"string enclosed in single quotes")
3534 quotedString =
Regex(
r'''(?:"(?:[^"\n\r\\]|(?:"")|(?:\\x[0-9a-fA-F]+)|(?:\\.))*")|(?:'(?:[^'\n\r\\]|(?:'')|(?:\\x[0-9a-fA-F]+)|(?:\\.))*')''').setName(
"quotedString using single or double quotes")
3535 unicodeString =
Combine(
_L(
'u') + quotedString.copy())
3537 def nestedExpr(opener="(
", closer=")
", content=None, ignoreExpr=quotedString):
3538 """Helper method for defining nested lists enclosed in opening and closing
3539 delimiters ("(" and ")" are the default).
3542 - opener - opening character for a nested list (default="("); can also be a pyparsing expression
3543 - closer - closing character for a nested list (default=")"); can also be a pyparsing expression
3544 - content - expression for items within the nested lists (default=None)
3545 - ignoreExpr - expression for ignoring opening and closing delimiters (default=quotedString)
3547 If an expression is not provided for the content argument, the nested
3548 expression will capture all whitespace-delimited content between delimiters
3549 as a list of separate values.
3551 Use the ignoreExpr argument to define expressions that may contain
3552 opening or closing characters that should not be treated as opening
3553 or closing characters for nesting, such as quotedString or a comment
3554 expression. Specify multiple expressions using an Or or MatchFirst.
3555 The default is quotedString, but if no expressions are to be ignored,
3556 then pass None for this argument.
3558 if opener == closer:
3559 raise ValueError(
"opening and closing strings cannot be the same")
3561 if isinstance(opener,basestring)
and isinstance(closer,basestring):
3562 if len(opener) == 1
and len(closer)==1:
3563 if ignoreExpr
is not None:
3565 CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3566 ).setParseAction(
lambda t:t[0].strip()))
3568 content = (empty+
CharsNotIn(opener+closer+ParserElement.DEFAULT_WHITE_CHARS
3569 ).setParseAction(
lambda t:t[0].strip()))
3571 if ignoreExpr
is not None:
3574 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3575 ).setParseAction(
lambda t:t[0].strip()))
3578 CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS,exact=1))
3579 ).setParseAction(
lambda t:t[0].strip()))
3581 raise ValueError(
"opening and closing arguments must be strings if no content expression is given")
3583 if ignoreExpr
is not None:
3589 def indentedBlock(blockStatementExpr, indentStack, indent=True):
3590 """Helper method for defining space-delimited indentation blocks, such as
3591 those used to define block statements in Python source code.
3594 - blockStatementExpr - expression defining syntax of statement that
3595 is repeated within the indented block
3596 - indentStack - list created by caller to manage indentation stack
3597 (multiple statementWithIndentedBlock expressions within a single grammar
3598 should share a common indentStack)
3599 - indent - boolean indicating whether block must be indented beyond the
3600 the current level; set to False for block of left-most statements
3603 A valid block must contain at least one blockStatement.
3605 def checkPeerIndent(s,l,t):
3606 if l >= len(s):
return
3608 if curCol != indentStack[-1]:
3609 if curCol > indentStack[-1]:
3613 def checkSubIndent(s,l,t):
3615 if curCol > indentStack[-1]:
3616 indentStack.append( curCol )
3620 def checkUnindent(s,l,t):
3621 if l >= len(s):
return
3623 if not(indentStack
and curCol < indentStack[-1]
and curCol <= indentStack[-2]):
3628 INDENT =
Empty() +
Empty().setParseAction(checkSubIndent)
3629 PEER =
Empty().setParseAction(checkPeerIndent)
3630 UNDENT =
Empty().setParseAction(checkUnindent)
3638 blockStatementExpr.ignore(_bslash +
LineEnd())
3641 alphas8bit =
srange(
r"[\0xc0-\0xd6\0xd8-\0xf6\0xf8-\0xff]")
3642 punc8bit =
srange(
r"[\0xa1-\0xbf\0xd7\0xf7]")
3645 commonHTMLEntity =
Combine(
_L(
"&") +
oneOf(
"gt lt amp nbsp quot").setResultsName(
"entity") +
";").streamline()
3646 _htmlEntityMap = dict(zip(
"gt lt amp nbsp quot".split(),
'><& "'))
3647 replaceHTMLEntity =
lambda t : t.entity
in _htmlEntityMap
and _htmlEntityMap[t.entity]
or None
3650 cStyleComment =
Regex(
r"/\*(?:[^*]*\*+)+?/").setName(
"C style comment")
3652 htmlComment =
Regex(
r"<!--[\s\S]*?-->")
3653 restOfLine =
Regex(
r".*").leaveWhitespace()
3654 dblSlashComment =
Regex(
r"\/\/(\\\n|.)*").setName(
"// comment")
3655 cppStyleComment =
Regex(
r"/(?:\*(?:[^*]*\*+)+?/|/[^\n]*(?:\n[^\n]*)*?(?:(?<!\\)|\Z))").setName(
"C++ style comment")
3657 javaStyleComment = cppStyleComment
3658 pythonStyleComment =
Regex(
r"#.*").setName(
"Python style comment")
3659 _noncomma =
"".join( [ c
for c
in printables
if c !=
"," ] )
3662 ~
Literal(
",") + ~
LineEnd() ) ) ).streamline().setName(
"commaItem")
3663 commaSeparatedList =
delimitedList(
Optional( quotedString | _commasepitem, default=
"") ).setName(
"commaSeparatedList")
3666 if __name__ ==
"__main__":
3668 def test( teststring ):
3670 tokens = simpleSQL.parseString( teststring )
3671 tokenlist = tokens.asList()
3672 print (teststring +
"->" + str(tokenlist))
3673 print (
"tokens = " + str(tokens))
3674 print (
"tokens.columns = " + str(tokens.columns))
3675 print (
"tokens.tables = " + str(tokens.tables))
3676 print (tokens.asXML(
"SQL",
True))
3677 except ParseBaseException,err:
3678 print (teststring +
"->")
3680 print (
" "*(err.column-1) +
"^")
3687 ident =
Word( alphas, alphanums +
"_$" )
3688 columnName =
delimitedList( ident,
".", combine=
True ).setParseAction( upcaseTokens )
3690 tableName =
delimitedList( ident,
".", combine=
True ).setParseAction( upcaseTokens )
3692 simpleSQL = ( selectToken + \
3693 (
'*' | columnNameList ).setResultsName(
"columns" ) + \
3695 tableNameList.setResultsName(
"tables" ) )
3697 test(
"SELECT * from XYZZY, ABC" )
3698 test(
"select * from SYS.XYZZY" )
3699 test(
"Select A from Sys.dual" )
3700 test(
"Select AA,BB,CC from Sys.dual" )
3701 test(
"Select A, B, C from Sys.dual" )
3702 test(
"Select A, B, C from Sys.dual" )
3703 test(
"Xelect A, B, C from Sys.dual" )
3704 test(
"Select A, B, C frox Sys.dual" )
3706 test(
"Select ^^^ frox Sys.dual" )
3707 test(
"Select A, B, C from Sys.dual, Table2 " )