Gaudi Framework, version v21r9

Home   Generated: 3 May 2010

decoder.py

Go to the documentation of this file.
00001 """Implementation of JSONDecoder
00002 """
00003 import re
00004 import sys
00005 import struct
00006 
00007 from simplejson.scanner import make_scanner
00008 try:
00009     from simplejson._speedups import scanstring as c_scanstring
00010 except ImportError:
00011     c_scanstring = None
00012 
00013 __all__ = ['JSONDecoder']
00014 
00015 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
00016 
00017 def _floatconstants():
00018     _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
00019     if sys.byteorder != 'big':
00020         _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
00021     nan, inf = struct.unpack('dd', _BYTES)
00022     return nan, inf, -inf
00023 
00024 NaN, PosInf, NegInf = _floatconstants()
00025 
00026 
00027 def linecol(doc, pos):
00028     lineno = doc.count('\n', 0, pos) + 1
00029     if lineno == 1:
00030         colno = pos
00031     else:
00032         colno = pos - doc.rindex('\n', 0, pos)
00033     return lineno, colno
00034 
00035 
00036 def errmsg(msg, doc, pos, end=None):
00037     # Note that this function is called from _speedups
00038     lineno, colno = linecol(doc, pos)
00039     if end is None:
00040         #fmt = '{0}: line {1} column {2} (char {3})'
00041         #return fmt.format(msg, lineno, colno, pos)
00042         fmt = '%s: line %d column %d (char %d)'
00043         return fmt % (msg, lineno, colno, pos)
00044     endlineno, endcolno = linecol(doc, end)
00045     #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
00046     #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
00047     fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
00048     return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
00049 
00050 
00051 _CONSTANTS = {
00052     '-Infinity': NegInf,
00053     'Infinity': PosInf,
00054     'NaN': NaN,
00055 }
00056 
00057 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
00058 BACKSLASH = {
00059     '"': u'"', '\\': u'\\', '/': u'/',
00060     'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
00061 }
00062 
00063 DEFAULT_ENCODING = "utf-8"
00064 
00065 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
00066     """Scan the string s for a JSON string. End is the index of the
00067     character in s after the quote that started the JSON string.
00068     Unescapes all valid JSON string escape sequences and raises ValueError
00069     on attempt to decode an invalid string. If strict is False then literal
00070     control characters are allowed in the string.
00071     
00072     Returns a tuple of the decoded string and the index of the character in s
00073     after the end quote."""
00074     if encoding is None:
00075         encoding = DEFAULT_ENCODING
00076     chunks = []
00077     _append = chunks.append
00078     begin = end - 1
00079     while 1:
00080         chunk = _m(s, end)
00081         if chunk is None:
00082             raise ValueError(
00083                 errmsg("Unterminated string starting at", s, begin))
00084         end = chunk.end()
00085         content, terminator = chunk.groups()
00086         # Content is contains zero or more unescaped string characters
00087         if content:
00088             if not isinstance(content, unicode):
00089                 content = unicode(content, encoding)
00090             _append(content)
00091         # Terminator is the end of string, a literal control character,
00092         # or a backslash denoting that an escape sequence follows
00093         if terminator == '"':
00094             break
00095         elif terminator != '\\':
00096             if strict:
00097                 msg = "Invalid control character %r at" % (terminator,)
00098                 #msg = "Invalid control character {0!r} at".format(terminator)
00099                 raise ValueError(errmsg(msg, s, end))
00100             else:
00101                 _append(terminator)
00102                 continue
00103         try:
00104             esc = s[end]
00105         except IndexError:
00106             raise ValueError(
00107                 errmsg("Unterminated string starting at", s, begin))
00108         # If not a unicode escape sequence, must be in the lookup table
00109         if esc != 'u':
00110             try:
00111                 char = _b[esc]
00112             except KeyError:
00113                 msg = "Invalid \\escape: " + repr(esc)
00114                 raise ValueError(errmsg(msg, s, end))
00115             end += 1
00116         else:
00117             # Unicode escape sequence
00118             esc = s[end + 1:end + 5]
00119             next_end = end + 5
00120             if len(esc) != 4:
00121                 msg = "Invalid \\uXXXX escape"
00122                 raise ValueError(errmsg(msg, s, end))
00123             uni = int(esc, 16)
00124             # Check for surrogate pair on UCS-4 systems
00125             if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
00126                 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
00127                 if not s[end + 5:end + 7] == '\\u':
00128                     raise ValueError(errmsg(msg, s, end))
00129                 esc2 = s[end + 7:end + 11]
00130                 if len(esc2) != 4:
00131                     raise ValueError(errmsg(msg, s, end))
00132                 uni2 = int(esc2, 16)
00133                 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
00134                 next_end += 6
00135             char = unichr(uni)
00136             end = next_end
00137         # Append the unescaped character
00138         _append(char)
00139     return u''.join(chunks), end
00140 
00141 
00142 # Use speedup if available
00143 scanstring = c_scanstring or py_scanstring
00144 
00145 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
00146 WHITESPACE_STR = ' \t\n\r'
00147 
00148 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
00149     pairs = {}
00150     # Use a slice to prevent IndexError from being raised, the following
00151     # check will raise a more specific ValueError if the string is empty
00152     nextchar = s[end:end + 1]
00153     # Normally we expect nextchar == '"'
00154     if nextchar != '"':
00155         if nextchar in _ws:
00156             end = _w(s, end).end()
00157             nextchar = s[end:end + 1]
00158         # Trivial empty object
00159         if nextchar == '}':
00160             return pairs, end + 1
00161         elif nextchar != '"':
00162             raise ValueError(errmsg("Expecting property name", s, end))
00163     end += 1
00164     while True:
00165         key, end = scanstring(s, end, encoding, strict)
00166 
00167         # To skip some function call overhead we optimize the fast paths where
00168         # the JSON key separator is ": " or just ":".
00169         if s[end:end + 1] != ':':
00170             end = _w(s, end).end()
00171             if s[end:end + 1] != ':':
00172                 raise ValueError(errmsg("Expecting : delimiter", s, end))
00173 
00174         end += 1
00175 
00176         try:
00177             if s[end] in _ws:
00178                 end += 1
00179                 if s[end] in _ws:
00180                     end = _w(s, end + 1).end()
00181         except IndexError:
00182             pass
00183 
00184         try:
00185             value, end = scan_once(s, end)
00186         except StopIteration:
00187             raise ValueError(errmsg("Expecting object", s, end))
00188         pairs[key] = value
00189 
00190         try:
00191             nextchar = s[end]
00192             if nextchar in _ws:
00193                 end = _w(s, end + 1).end()
00194                 nextchar = s[end]
00195         except IndexError:
00196             nextchar = ''
00197         end += 1
00198 
00199         if nextchar == '}':
00200             break
00201         elif nextchar != ',':
00202             raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
00203 
00204         try:
00205             nextchar = s[end]
00206             if nextchar in _ws:
00207                 end += 1
00208                 nextchar = s[end]
00209                 if nextchar in _ws:
00210                     end = _w(s, end + 1).end()
00211                     nextchar = s[end]
00212         except IndexError:
00213             nextchar = ''
00214 
00215         end += 1
00216         if nextchar != '"':
00217             raise ValueError(errmsg("Expecting property name", s, end - 1))
00218 
00219     if object_hook is not None:
00220         pairs = object_hook(pairs)
00221     return pairs, end
00222 
00223 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
00224     values = []
00225     nextchar = s[end:end + 1]
00226     if nextchar in _ws:
00227         end = _w(s, end + 1).end()
00228         nextchar = s[end:end + 1]
00229     # Look-ahead for trivial empty array
00230     if nextchar == ']':
00231         return values, end + 1
00232     _append = values.append
00233     while True:
00234         try:
00235             value, end = scan_once(s, end)
00236         except StopIteration:
00237             raise ValueError(errmsg("Expecting object", s, end))
00238         _append(value)
00239         nextchar = s[end:end + 1]
00240         if nextchar in _ws:
00241             end = _w(s, end + 1).end()
00242             nextchar = s[end:end + 1]
00243         end += 1
00244         if nextchar == ']':
00245             break
00246         elif nextchar != ',':
00247             raise ValueError(errmsg("Expecting , delimiter", s, end))
00248 
00249         try:
00250             if s[end] in _ws:
00251                 end += 1
00252                 if s[end] in _ws:
00253                     end = _w(s, end + 1).end()
00254         except IndexError:
00255             pass
00256 
00257     return values, end
00258 
00259 class JSONDecoder(object):
00260     """Simple JSON <http://json.org> decoder
00261 
00262     Performs the following translations in decoding by default:
00263 
00264     +---------------+-------------------+
00265     | JSON          | Python            |
00266     +===============+===================+
00267     | object        | dict              |
00268     +---------------+-------------------+
00269     | array         | list              |
00270     +---------------+-------------------+
00271     | string        | unicode           |
00272     +---------------+-------------------+
00273     | number (int)  | int, long         |
00274     +---------------+-------------------+
00275     | number (real) | float             |
00276     +---------------+-------------------+
00277     | true          | True              |
00278     +---------------+-------------------+
00279     | false         | False             |
00280     +---------------+-------------------+
00281     | null          | None              |
00282     +---------------+-------------------+
00283 
00284     It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
00285     their corresponding ``float`` values, which is outside the JSON spec.
00286 
00287     """
00288 
00289     def __init__(self, encoding=None, object_hook=None, parse_float=None,
00290             parse_int=None, parse_constant=None, strict=True):
00291         """``encoding`` determines the encoding used to interpret any ``str``
00292         objects decoded by this instance (utf-8 by default).  It has no
00293         effect when decoding ``unicode`` objects.
00294 
00295         Note that currently only encodings that are a superset of ASCII work,
00296         strings of other encodings should be passed in as ``unicode``.
00297 
00298         ``object_hook``, if specified, will be called with the result
00299         of every JSON object decoded and its return value will be used in
00300         place of the given ``dict``.  This can be used to provide custom
00301         deserializations (e.g. to support JSON-RPC class hinting).
00302 
00303         ``parse_float``, if specified, will be called with the string
00304         of every JSON float to be decoded. By default this is equivalent to
00305         float(num_str). This can be used to use another datatype or parser
00306         for JSON floats (e.g. decimal.Decimal).
00307 
00308         ``parse_int``, if specified, will be called with the string
00309         of every JSON int to be decoded. By default this is equivalent to
00310         int(num_str). This can be used to use another datatype or parser
00311         for JSON integers (e.g. float).
00312 
00313         ``parse_constant``, if specified, will be called with one of the
00314         following strings: -Infinity, Infinity, NaN.
00315         This can be used to raise an exception if invalid JSON numbers
00316         are encountered.
00317 
00318         """
00319         self.encoding = encoding
00320         self.object_hook = object_hook
00321         self.parse_float = parse_float or float
00322         self.parse_int = parse_int or int
00323         self.parse_constant = parse_constant or _CONSTANTS.__getitem__
00324         self.strict = strict
00325         self.parse_object = JSONObject
00326         self.parse_array = JSONArray
00327         self.parse_string = scanstring
00328         self.scan_once = make_scanner(self)
00329 
00330     def decode(self, s, _w=WHITESPACE.match):
00331         """Return the Python representation of ``s`` (a ``str`` or ``unicode``
00332         instance containing a JSON document)
00333 
00334         """
00335         obj, end = self.raw_decode(s, idx=_w(s, 0).end())
00336         end = _w(s, end).end()
00337         if end != len(s):
00338             raise ValueError(errmsg("Extra data", s, end, len(s)))
00339         return obj
00340 
00341     def raw_decode(self, s, idx=0):
00342         """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
00343         with a JSON document) and return a 2-tuple of the Python
00344         representation and the index in ``s`` where the document ended.
00345 
00346         This can be used to decode a JSON document from a string that may
00347         have extraneous data at the end.
00348 
00349         """
00350         try:
00351             obj, end = self.scan_once(s, idx)
00352         except StopIteration:
00353             raise ValueError("No JSON object could be decoded")
00354         return obj, end

Generated at Mon May 3 12:14:37 2010 for Gaudi Framework, version v21r9 by Doxygen version 1.5.6 written by Dimitri van Heesch, © 1997-2004