00001 """Implementation of JSONEncoder
00002 """
00003 import re
00004
00005 try:
00006 from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
00007 except ImportError:
00008 c_encode_basestring_ascii = None
00009 try:
00010 from simplejson._speedups import make_encoder as c_make_encoder
00011 except ImportError:
00012 c_make_encoder = None
00013
00014 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
00015 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
00016 HAS_UTF8 = re.compile(r'[\x80-\xff]')
00017 ESCAPE_DCT = {
00018 '\\': '\\\\',
00019 '"': '\\"',
00020 '\b': '\\b',
00021 '\f': '\\f',
00022 '\n': '\\n',
00023 '\r': '\\r',
00024 '\t': '\\t',
00025 }
00026 for i in range(0x20):
00027
00028 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
00029
00030
00031 INFINITY = float('1e66666')
00032 FLOAT_REPR = repr
00033
00034 def encode_basestring(s):
00035 """Return a JSON representation of a Python string
00036
00037 """
00038 def replace(match):
00039 return ESCAPE_DCT[match.group(0)]
00040 return '"' + ESCAPE.sub(replace, s) + '"'
00041
00042
00043 def py_encode_basestring_ascii(s):
00044 """Return an ASCII-only JSON representation of a Python string
00045
00046 """
00047 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
00048 s = s.decode('utf-8')
00049 def replace(match):
00050 s = match.group(0)
00051 try:
00052 return ESCAPE_DCT[s]
00053 except KeyError:
00054 n = ord(s)
00055 if n < 0x10000:
00056
00057 return '\\u%04x' % (n,)
00058 else:
00059
00060 n -= 0x10000
00061 s1 = 0xd800 | ((n >> 10) & 0x3ff)
00062 s2 = 0xdc00 | (n & 0x3ff)
00063
00064 return '\\u%04x\\u%04x' % (s1, s2)
00065 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
00066
00067
00068 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
00069
00070 class JSONEncoder(object):
00071 """Extensible JSON <http://json.org> encoder for Python data structures.
00072
00073 Supports the following objects and types by default:
00074
00075 +-------------------+---------------+
00076 | Python | JSON |
00077 +===================+===============+
00078 | dict | object |
00079 +-------------------+---------------+
00080 | list, tuple | array |
00081 +-------------------+---------------+
00082 | str, unicode | string |
00083 +-------------------+---------------+
00084 | int, long, float | number |
00085 +-------------------+---------------+
00086 | True | true |
00087 +-------------------+---------------+
00088 | False | false |
00089 +-------------------+---------------+
00090 | None | null |
00091 +-------------------+---------------+
00092
00093 To extend this to recognize other objects, subclass and implement a
00094 ``.default()`` method with another method that returns a serializable
00095 object for ``o`` if possible, otherwise it should call the superclass
00096 implementation (to raise ``TypeError``).
00097
00098 """
00099 item_separator = ', '
00100 key_separator = ': '
00101 def __init__(self, skipkeys=False, ensure_ascii=True,
00102 check_circular=True, allow_nan=True, sort_keys=False,
00103 indent=None, separators=None, encoding='utf-8', default=None):
00104 """Constructor for JSONEncoder, with sensible defaults.
00105
00106 If skipkeys is false, then it is a TypeError to attempt
00107 encoding of keys that are not str, int, long, float or None. If
00108 skipkeys is True, such items are simply skipped.
00109
00110 If ensure_ascii is true, the output is guaranteed to be str
00111 objects with all incoming unicode characters escaped. If
00112 ensure_ascii is false, the output will be unicode object.
00113
00114 If check_circular is true, then lists, dicts, and custom encoded
00115 objects will be checked for circular references during encoding to
00116 prevent an infinite recursion (which would cause an OverflowError).
00117 Otherwise, no such check takes place.
00118
00119 If allow_nan is true, then NaN, Infinity, and -Infinity will be
00120 encoded as such. This behavior is not JSON specification compliant,
00121 but is consistent with most JavaScript based encoders and decoders.
00122 Otherwise, it will be a ValueError to encode such floats.
00123
00124 If sort_keys is true, then the output of dictionaries will be
00125 sorted by key; this is useful for regression tests to ensure
00126 that JSON serializations can be compared on a day-to-day basis.
00127
00128 If indent is a non-negative integer, then JSON array
00129 elements and object members will be pretty-printed with that
00130 indent level. An indent level of 0 will only insert newlines.
00131 None is the most compact representation.
00132
00133 If specified, separators should be a (item_separator, key_separator)
00134 tuple. The default is (', ', ': '). To get the most compact JSON
00135 representation you should specify (',', ':') to eliminate whitespace.
00136
00137 If specified, default is a function that gets called for objects
00138 that can't otherwise be serialized. It should return a JSON encodable
00139 version of the object or raise a ``TypeError``.
00140
00141 If encoding is not None, then all input strings will be
00142 transformed into unicode using that encoding prior to JSON-encoding.
00143 The default is UTF-8.
00144
00145 """
00146
00147 self.skipkeys = skipkeys
00148 self.ensure_ascii = ensure_ascii
00149 self.check_circular = check_circular
00150 self.allow_nan = allow_nan
00151 self.sort_keys = sort_keys
00152 self.indent = indent
00153 if separators is not None:
00154 self.item_separator, self.key_separator = separators
00155 if default is not None:
00156 self.default = default
00157 self.encoding = encoding
00158
00159 def default(self, o):
00160 """Implement this method in a subclass such that it returns
00161 a serializable object for ``o``, or calls the base implementation
00162 (to raise a ``TypeError``).
00163
00164 For example, to support arbitrary iterators, you could
00165 implement default like this::
00166
00167 def default(self, o):
00168 try:
00169 iterable = iter(o)
00170 except TypeError:
00171 pass
00172 else:
00173 return list(iterable)
00174 return JSONEncoder.default(self, o)
00175
00176 """
00177 raise TypeError(repr(o) + " is not JSON serializable")
00178
00179 def encode(self, o):
00180 """Return a JSON string representation of a Python data structure.
00181
00182 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
00183 '{"foo": ["bar", "baz"]}'
00184
00185 """
00186
00187 if isinstance(o, basestring):
00188 if isinstance(o, str):
00189 _encoding = self.encoding
00190 if (_encoding is not None
00191 and not (_encoding == 'utf-8')):
00192 o = o.decode(_encoding)
00193 if self.ensure_ascii:
00194 return encode_basestring_ascii(o)
00195 else:
00196 return encode_basestring(o)
00197
00198
00199
00200 chunks = self.iterencode(o, _one_shot=True)
00201 if not isinstance(chunks, (list, tuple)):
00202 chunks = list(chunks)
00203 return ''.join(chunks)
00204
00205 def iterencode(self, o, _one_shot=False):
00206 """Encode the given object and yield each string
00207 representation as available.
00208
00209 For example::
00210
00211 for chunk in JSONEncoder().iterencode(bigobject):
00212 mysocket.write(chunk)
00213
00214 """
00215 if self.check_circular:
00216 markers = {}
00217 else:
00218 markers = None
00219 if self.ensure_ascii:
00220 _encoder = encode_basestring_ascii
00221 else:
00222 _encoder = encode_basestring
00223 if self.encoding != 'utf-8':
00224 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
00225 if isinstance(o, str):
00226 o = o.decode(_encoding)
00227 return _orig_encoder(o)
00228
00229 def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
00230
00231
00232
00233 if o != o:
00234 text = 'NaN'
00235 elif o == _inf:
00236 text = 'Infinity'
00237 elif o == _neginf:
00238 text = '-Infinity'
00239 else:
00240 return _repr(o)
00241
00242 if not allow_nan:
00243 raise ValueError(
00244 "Out of range float values are not JSON compliant: " +
00245 repr(o))
00246
00247 return text
00248
00249
00250 if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
00251 _iterencode = c_make_encoder(
00252 markers, self.default, _encoder, self.indent,
00253 self.key_separator, self.item_separator, self.sort_keys,
00254 self.skipkeys, self.allow_nan)
00255 else:
00256 _iterencode = _make_iterencode(
00257 markers, self.default, _encoder, self.indent, floatstr,
00258 self.key_separator, self.item_separator, self.sort_keys,
00259 self.skipkeys, _one_shot)
00260 return _iterencode(o, 0)
00261
00262 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
00263
00264 False=False,
00265 True=True,
00266 ValueError=ValueError,
00267 basestring=basestring,
00268 dict=dict,
00269 float=float,
00270 id=id,
00271 int=int,
00272 isinstance=isinstance,
00273 list=list,
00274 long=long,
00275 str=str,
00276 tuple=tuple,
00277 ):
00278
00279 def _iterencode_list(lst, _current_indent_level):
00280 if not lst:
00281 yield '[]'
00282 return
00283 if markers is not None:
00284 markerid = id(lst)
00285 if markerid in markers:
00286 raise ValueError("Circular reference detected")
00287 markers[markerid] = lst
00288 buf = '['
00289 if _indent is not None:
00290 _current_indent_level += 1
00291 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
00292 separator = _item_separator + newline_indent
00293 buf += newline_indent
00294 else:
00295 newline_indent = None
00296 separator = _item_separator
00297 first = True
00298 for value in lst:
00299 if first:
00300 first = False
00301 else:
00302 buf = separator
00303 if isinstance(value, basestring):
00304 yield buf + _encoder(value)
00305 elif value is None:
00306 yield buf + 'null'
00307 elif value is True:
00308 yield buf + 'true'
00309 elif value is False:
00310 yield buf + 'false'
00311 elif isinstance(value, (int, long)):
00312 yield buf + str(value)
00313 elif isinstance(value, float):
00314 yield buf + _floatstr(value)
00315 else:
00316 yield buf
00317 if isinstance(value, (list, tuple)):
00318 chunks = _iterencode_list(value, _current_indent_level)
00319 elif isinstance(value, dict):
00320 chunks = _iterencode_dict(value, _current_indent_level)
00321 else:
00322 chunks = _iterencode(value, _current_indent_level)
00323 for chunk in chunks:
00324 yield chunk
00325 if newline_indent is not None:
00326 _current_indent_level -= 1
00327 yield '\n' + (' ' * (_indent * _current_indent_level))
00328 yield ']'
00329 if markers is not None:
00330 del markers[markerid]
00331
00332 def _iterencode_dict(dct, _current_indent_level):
00333 if not dct:
00334 yield '{}'
00335 return
00336 if markers is not None:
00337 markerid = id(dct)
00338 if markerid in markers:
00339 raise ValueError("Circular reference detected")
00340 markers[markerid] = dct
00341 yield '{'
00342 if _indent is not None:
00343 _current_indent_level += 1
00344 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
00345 item_separator = _item_separator + newline_indent
00346 yield newline_indent
00347 else:
00348 newline_indent = None
00349 item_separator = _item_separator
00350 first = True
00351 if _sort_keys:
00352 items = dct.items()
00353 items.sort(key=lambda kv: kv[0])
00354 else:
00355 items = dct.iteritems()
00356 for key, value in items:
00357 if isinstance(key, basestring):
00358 pass
00359
00360
00361 elif isinstance(key, float):
00362 key = _floatstr(key)
00363 elif key is True:
00364 key = 'true'
00365 elif key is False:
00366 key = 'false'
00367 elif key is None:
00368 key = 'null'
00369 elif isinstance(key, (int, long)):
00370 key = str(key)
00371 elif _skipkeys:
00372 continue
00373 else:
00374 raise TypeError("key " + repr(key) + " is not a string")
00375 if first:
00376 first = False
00377 else:
00378 yield item_separator
00379 yield _encoder(key)
00380 yield _key_separator
00381 if isinstance(value, basestring):
00382 yield _encoder(value)
00383 elif value is None:
00384 yield 'null'
00385 elif value is True:
00386 yield 'true'
00387 elif value is False:
00388 yield 'false'
00389 elif isinstance(value, (int, long)):
00390 yield str(value)
00391 elif isinstance(value, float):
00392 yield _floatstr(value)
00393 else:
00394 if isinstance(value, (list, tuple)):
00395 chunks = _iterencode_list(value, _current_indent_level)
00396 elif isinstance(value, dict):
00397 chunks = _iterencode_dict(value, _current_indent_level)
00398 else:
00399 chunks = _iterencode(value, _current_indent_level)
00400 for chunk in chunks:
00401 yield chunk
00402 if newline_indent is not None:
00403 _current_indent_level -= 1
00404 yield '\n' + (' ' * (_indent * _current_indent_level))
00405 yield '}'
00406 if markers is not None:
00407 del markers[markerid]
00408
00409 def _iterencode(o, _current_indent_level):
00410 if isinstance(o, basestring):
00411 yield _encoder(o)
00412 elif o is None:
00413 yield 'null'
00414 elif o is True:
00415 yield 'true'
00416 elif o is False:
00417 yield 'false'
00418 elif isinstance(o, (int, long)):
00419 yield str(o)
00420 elif isinstance(o, float):
00421 yield _floatstr(o)
00422 elif isinstance(o, (list, tuple)):
00423 for chunk in _iterencode_list(o, _current_indent_level):
00424 yield chunk
00425 elif isinstance(o, dict):
00426 for chunk in _iterencode_dict(o, _current_indent_level):
00427 yield chunk
00428 else:
00429 if markers is not None:
00430 markerid = id(o)
00431 if markerid in markers:
00432 raise ValueError("Circular reference detected")
00433 markers[markerid] = o
00434 o = _default(o)
00435 for chunk in _iterencode(o, _current_indent_level):
00436 yield chunk
00437 if markers is not None:
00438 del markers[markerid]
00439
00440 return _iterencode