00001 """Implementation of JSONDecoder
00002 """
00003 import re
00004 import sys
00005 import struct
00006
00007 from simplejson.scanner import make_scanner
00008 try:
00009 from simplejson._speedups import scanstring as c_scanstring
00010 except ImportError:
00011 c_scanstring = None
00012
00013 __all__ = ['JSONDecoder']
00014
00015 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
00016
00017 def _floatconstants():
00018 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
00019 if sys.byteorder != 'big':
00020 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
00021 nan, inf = struct.unpack('dd', _BYTES)
00022 return nan, inf, -inf
00023
00024 NaN, PosInf, NegInf = _floatconstants()
00025
00026
00027 def linecol(doc, pos):
00028 lineno = doc.count('\n', 0, pos) + 1
00029 if lineno == 1:
00030 colno = pos
00031 else:
00032 colno = pos - doc.rindex('\n', 0, pos)
00033 return lineno, colno
00034
00035
00036 def errmsg(msg, doc, pos, end=None):
00037
00038 lineno, colno = linecol(doc, pos)
00039 if end is None:
00040
00041
00042 fmt = '%s: line %d column %d (char %d)'
00043 return fmt % (msg, lineno, colno, pos)
00044 endlineno, endcolno = linecol(doc, end)
00045
00046
00047 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
00048 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
00049
00050
00051 _CONSTANTS = {
00052 '-Infinity': NegInf,
00053 'Infinity': PosInf,
00054 'NaN': NaN,
00055 }
00056
00057 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
00058 BACKSLASH = {
00059 '"': u'"', '\\': u'\\', '/': u'/',
00060 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
00061 }
00062
00063 DEFAULT_ENCODING = "utf-8"
00064
00065 def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match):
00066 """Scan the string s for a JSON string. End is the index of the
00067 character in s after the quote that started the JSON string.
00068 Unescapes all valid JSON string escape sequences and raises ValueError
00069 on attempt to decode an invalid string. If strict is False then literal
00070 control characters are allowed in the string.
00071
00072 Returns a tuple of the decoded string and the index of the character in s
00073 after the end quote."""
00074 if encoding is None:
00075 encoding = DEFAULT_ENCODING
00076 chunks = []
00077 _append = chunks.append
00078 begin = end - 1
00079 while 1:
00080 chunk = _m(s, end)
00081 if chunk is None:
00082 raise ValueError(
00083 errmsg("Unterminated string starting at", s, begin))
00084 end = chunk.end()
00085 content, terminator = chunk.groups()
00086
00087 if content:
00088 if not isinstance(content, unicode):
00089 content = unicode(content, encoding)
00090 _append(content)
00091
00092
00093 if terminator == '"':
00094 break
00095 elif terminator != '\\':
00096 if strict:
00097 msg = "Invalid control character %r at" % (terminator,)
00098
00099 raise ValueError(errmsg(msg, s, end))
00100 else:
00101 _append(terminator)
00102 continue
00103 try:
00104 esc = s[end]
00105 except IndexError:
00106 raise ValueError(
00107 errmsg("Unterminated string starting at", s, begin))
00108
00109 if esc != 'u':
00110 try:
00111 char = _b[esc]
00112 except KeyError:
00113 msg = "Invalid \\escape: " + repr(esc)
00114 raise ValueError(errmsg(msg, s, end))
00115 end += 1
00116 else:
00117
00118 esc = s[end + 1:end + 5]
00119 next_end = end + 5
00120 if len(esc) != 4:
00121 msg = "Invalid \\uXXXX escape"
00122 raise ValueError(errmsg(msg, s, end))
00123 uni = int(esc, 16)
00124
00125 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
00126 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
00127 if not s[end + 5:end + 7] == '\\u':
00128 raise ValueError(errmsg(msg, s, end))
00129 esc2 = s[end + 7:end + 11]
00130 if len(esc2) != 4:
00131 raise ValueError(errmsg(msg, s, end))
00132 uni2 = int(esc2, 16)
00133 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
00134 next_end += 6
00135 char = unichr(uni)
00136 end = next_end
00137
00138 _append(char)
00139 return u''.join(chunks), end
00140
00141
00142
00143 scanstring = c_scanstring or py_scanstring
00144
00145 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
00146 WHITESPACE_STR = ' \t\n\r'
00147
00148 def JSONObject((s, end), encoding, strict, scan_once, object_hook, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
00149 pairs = {}
00150
00151
00152 nextchar = s[end:end + 1]
00153
00154 if nextchar != '"':
00155 if nextchar in _ws:
00156 end = _w(s, end).end()
00157 nextchar = s[end:end + 1]
00158
00159 if nextchar == '}':
00160 return pairs, end + 1
00161 elif nextchar != '"':
00162 raise ValueError(errmsg("Expecting property name", s, end))
00163 end += 1
00164 while True:
00165 key, end = scanstring(s, end, encoding, strict)
00166
00167
00168
00169 if s[end:end + 1] != ':':
00170 end = _w(s, end).end()
00171 if s[end:end + 1] != ':':
00172 raise ValueError(errmsg("Expecting : delimiter", s, end))
00173
00174 end += 1
00175
00176 try:
00177 if s[end] in _ws:
00178 end += 1
00179 if s[end] in _ws:
00180 end = _w(s, end + 1).end()
00181 except IndexError:
00182 pass
00183
00184 try:
00185 value, end = scan_once(s, end)
00186 except StopIteration:
00187 raise ValueError(errmsg("Expecting object", s, end))
00188 pairs[key] = value
00189
00190 try:
00191 nextchar = s[end]
00192 if nextchar in _ws:
00193 end = _w(s, end + 1).end()
00194 nextchar = s[end]
00195 except IndexError:
00196 nextchar = ''
00197 end += 1
00198
00199 if nextchar == '}':
00200 break
00201 elif nextchar != ',':
00202 raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
00203
00204 try:
00205 nextchar = s[end]
00206 if nextchar in _ws:
00207 end += 1
00208 nextchar = s[end]
00209 if nextchar in _ws:
00210 end = _w(s, end + 1).end()
00211 nextchar = s[end]
00212 except IndexError:
00213 nextchar = ''
00214
00215 end += 1
00216 if nextchar != '"':
00217 raise ValueError(errmsg("Expecting property name", s, end - 1))
00218
00219 if object_hook is not None:
00220 pairs = object_hook(pairs)
00221 return pairs, end
00222
00223 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
00224 values = []
00225 nextchar = s[end:end + 1]
00226 if nextchar in _ws:
00227 end = _w(s, end + 1).end()
00228 nextchar = s[end:end + 1]
00229
00230 if nextchar == ']':
00231 return values, end + 1
00232 _append = values.append
00233 while True:
00234 try:
00235 value, end = scan_once(s, end)
00236 except StopIteration:
00237 raise ValueError(errmsg("Expecting object", s, end))
00238 _append(value)
00239 nextchar = s[end:end + 1]
00240 if nextchar in _ws:
00241 end = _w(s, end + 1).end()
00242 nextchar = s[end:end + 1]
00243 end += 1
00244 if nextchar == ']':
00245 break
00246 elif nextchar != ',':
00247 raise ValueError(errmsg("Expecting , delimiter", s, end))
00248
00249 try:
00250 if s[end] in _ws:
00251 end += 1
00252 if s[end] in _ws:
00253 end = _w(s, end + 1).end()
00254 except IndexError:
00255 pass
00256
00257 return values, end
00258
00259 class JSONDecoder(object):
00260 """Simple JSON <http://json.org> decoder
00261
00262 Performs the following translations in decoding by default:
00263
00264 +---------------+-------------------+
00265 | JSON | Python |
00266 +===============+===================+
00267 | object | dict |
00268 +---------------+-------------------+
00269 | array | list |
00270 +---------------+-------------------+
00271 | string | unicode |
00272 +---------------+-------------------+
00273 | number (int) | int, long |
00274 +---------------+-------------------+
00275 | number (real) | float |
00276 +---------------+-------------------+
00277 | true | True |
00278 +---------------+-------------------+
00279 | false | False |
00280 +---------------+-------------------+
00281 | null | None |
00282 +---------------+-------------------+
00283
00284 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
00285 their corresponding ``float`` values, which is outside the JSON spec.
00286
00287 """
00288
00289 def __init__(self, encoding=None, object_hook=None, parse_float=None,
00290 parse_int=None, parse_constant=None, strict=True):
00291 """``encoding`` determines the encoding used to interpret any ``str``
00292 objects decoded by this instance (utf-8 by default). It has no
00293 effect when decoding ``unicode`` objects.
00294
00295 Note that currently only encodings that are a superset of ASCII work,
00296 strings of other encodings should be passed in as ``unicode``.
00297
00298 ``object_hook``, if specified, will be called with the result
00299 of every JSON object decoded and its return value will be used in
00300 place of the given ``dict``. This can be used to provide custom
00301 deserializations (e.g. to support JSON-RPC class hinting).
00302
00303 ``parse_float``, if specified, will be called with the string
00304 of every JSON float to be decoded. By default this is equivalent to
00305 float(num_str). This can be used to use another datatype or parser
00306 for JSON floats (e.g. decimal.Decimal).
00307
00308 ``parse_int``, if specified, will be called with the string
00309 of every JSON int to be decoded. By default this is equivalent to
00310 int(num_str). This can be used to use another datatype or parser
00311 for JSON integers (e.g. float).
00312
00313 ``parse_constant``, if specified, will be called with one of the
00314 following strings: -Infinity, Infinity, NaN.
00315 This can be used to raise an exception if invalid JSON numbers
00316 are encountered.
00317
00318 """
00319 self.encoding = encoding
00320 self.object_hook = object_hook
00321 self.parse_float = parse_float or float
00322 self.parse_int = parse_int or int
00323 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
00324 self.strict = strict
00325 self.parse_object = JSONObject
00326 self.parse_array = JSONArray
00327 self.parse_string = scanstring
00328 self.scan_once = make_scanner(self)
00329
00330 def decode(self, s, _w=WHITESPACE.match):
00331 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
00332 instance containing a JSON document)
00333
00334 """
00335 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
00336 end = _w(s, end).end()
00337 if end != len(s):
00338 raise ValueError(errmsg("Extra data", s, end, len(s)))
00339 return obj
00340
00341 def raw_decode(self, s, idx=0):
00342 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning
00343 with a JSON document) and return a 2-tuple of the Python
00344 representation and the index in ``s`` where the document ended.
00345
00346 This can be used to decode a JSON document from a string that may
00347 have extraneous data at the end.
00348
00349 """
00350 try:
00351 obj, end = self.scan_once(s, idx)
00352 except StopIteration:
00353 raise ValueError("No JSON object could be decoded")
00354 return obj, end