encoder.py

Go to the documentation of this file.
00001 """Implementation of JSONEncoder
00002 """
00003 import re
00004 
00005 try:
00006     from simplejson._speedups import encode_basestring_ascii as c_encode_basestring_ascii
00007 except ImportError:
00008     c_encode_basestring_ascii = None
00009 try:
00010     from simplejson._speedups import make_encoder as c_make_encoder
00011 except ImportError:
00012     c_make_encoder = None
00013 
00014 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
00015 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
00016 HAS_UTF8 = re.compile(r'[\x80-\xff]')
00017 ESCAPE_DCT = {
00018     '\\': '\\\\',
00019     '"': '\\"',
00020     '\b': '\\b',
00021     '\f': '\\f',
00022     '\n': '\\n',
00023     '\r': '\\r',
00024     '\t': '\\t',
00025 }
00026 for i in range(0x20):
00027     #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
00028     ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
00029 
00030 # Assume this produces an infinity on all machines (probably not guaranteed)
00031 INFINITY = float('1e66666')
00032 FLOAT_REPR = repr
00033 
00034 def encode_basestring(s):
00035     """Return a JSON representation of a Python string
00036 
00037     """
00038     def replace(match):
00039         return ESCAPE_DCT[match.group(0)]
00040     return '"' + ESCAPE.sub(replace, s) + '"'
00041 
00042 
00043 def py_encode_basestring_ascii(s):
00044     """Return an ASCII-only JSON representation of a Python string
00045 
00046     """
00047     if isinstance(s, str) and HAS_UTF8.search(s) is not None:
00048         s = s.decode('utf-8')
00049     def replace(match):
00050         s = match.group(0)
00051         try:
00052             return ESCAPE_DCT[s]
00053         except KeyError:
00054             n = ord(s)
00055             if n < 0x10000:
00056                 #return '\\u{0:04x}'.format(n)
00057                 return '\\u%04x' % (n,)
00058             else:
00059                 # surrogate pair
00060                 n -= 0x10000
00061                 s1 = 0xd800 | ((n >> 10) & 0x3ff)
00062                 s2 = 0xdc00 | (n & 0x3ff)
00063                 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
00064                 return '\\u%04x\\u%04x' % (s1, s2)
00065     return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
00066 
00067 
00068 encode_basestring_ascii = c_encode_basestring_ascii or py_encode_basestring_ascii
00069 
00070 class JSONEncoder(object):
00071     """Extensible JSON <http://json.org> encoder for Python data structures.
00072 
00073     Supports the following objects and types by default:
00074 
00075     +-------------------+---------------+
00076     | Python            | JSON          |
00077     +===================+===============+
00078     | dict              | object        |
00079     +-------------------+---------------+
00080     | list, tuple       | array         |
00081     +-------------------+---------------+
00082     | str, unicode      | string        |
00083     +-------------------+---------------+
00084     | int, long, float  | number        |
00085     +-------------------+---------------+
00086     | True              | true          |
00087     +-------------------+---------------+
00088     | False             | false         |
00089     +-------------------+---------------+
00090     | None              | null          |
00091     +-------------------+---------------+
00092 
00093     To extend this to recognize other objects, subclass and implement a
00094     ``.default()`` method with another method that returns a serializable
00095     object for ``o`` if possible, otherwise it should call the superclass
00096     implementation (to raise ``TypeError``).
00097 
00098     """
00099     item_separator = ', '
00100     key_separator = ': '
00101     def __init__(self, skipkeys=False, ensure_ascii=True,
00102             check_circular=True, allow_nan=True, sort_keys=False,
00103             indent=None, separators=None, encoding='utf-8', default=None):
00104         """Constructor for JSONEncoder, with sensible defaults.
00105 
00106         If skipkeys is false, then it is a TypeError to attempt
00107         encoding of keys that are not str, int, long, float or None.  If
00108         skipkeys is True, such items are simply skipped.
00109 
00110         If ensure_ascii is true, the output is guaranteed to be str
00111         objects with all incoming unicode characters escaped.  If
00112         ensure_ascii is false, the output will be unicode object.
00113 
00114         If check_circular is true, then lists, dicts, and custom encoded
00115         objects will be checked for circular references during encoding to
00116         prevent an infinite recursion (which would cause an OverflowError).
00117         Otherwise, no such check takes place.
00118 
00119         If allow_nan is true, then NaN, Infinity, and -Infinity will be
00120         encoded as such.  This behavior is not JSON specification compliant,
00121         but is consistent with most JavaScript based encoders and decoders.
00122         Otherwise, it will be a ValueError to encode such floats.
00123 
00124         If sort_keys is true, then the output of dictionaries will be
00125         sorted by key; this is useful for regression tests to ensure
00126         that JSON serializations can be compared on a day-to-day basis.
00127 
00128         If indent is a non-negative integer, then JSON array
00129         elements and object members will be pretty-printed with that
00130         indent level.  An indent level of 0 will only insert newlines.
00131         None is the most compact representation.
00132 
00133         If specified, separators should be a (item_separator, key_separator)
00134         tuple.  The default is (', ', ': ').  To get the most compact JSON
00135         representation you should specify (',', ':') to eliminate whitespace.
00136 
00137         If specified, default is a function that gets called for objects
00138         that can't otherwise be serialized.  It should return a JSON encodable
00139         version of the object or raise a ``TypeError``.
00140 
00141         If encoding is not None, then all input strings will be
00142         transformed into unicode using that encoding prior to JSON-encoding.
00143         The default is UTF-8.
00144 
00145         """
00146 
00147         self.skipkeys = skipkeys
00148         self.ensure_ascii = ensure_ascii
00149         self.check_circular = check_circular
00150         self.allow_nan = allow_nan
00151         self.sort_keys = sort_keys
00152         self.indent = indent
00153         if separators is not None:
00154             self.item_separator, self.key_separator = separators
00155         if default is not None:
00156             self.default = default
00157         self.encoding = encoding
00158 
00159     def default(self, o):
00160         """Implement this method in a subclass such that it returns
00161         a serializable object for ``o``, or calls the base implementation
00162         (to raise a ``TypeError``).
00163 
00164         For example, to support arbitrary iterators, you could
00165         implement default like this::
00166 
00167             def default(self, o):
00168                 try:
00169                     iterable = iter(o)
00170                 except TypeError:
00171                     pass
00172                 else:
00173                     return list(iterable)
00174                 return JSONEncoder.default(self, o)
00175 
00176         """
00177         raise TypeError(repr(o) + " is not JSON serializable")
00178 
00179     def encode(self, o):
00180         """Return a JSON string representation of a Python data structure.
00181 
00182         >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
00183         '{"foo": ["bar", "baz"]}'
00184 
00185         """
00186         # This is for extremely simple cases and benchmarks.
00187         if isinstance(o, basestring):
00188             if isinstance(o, str):
00189                 _encoding = self.encoding
00190                 if (_encoding is not None
00191                         and not (_encoding == 'utf-8')):
00192                     o = o.decode(_encoding)
00193             if self.ensure_ascii:
00194                 return encode_basestring_ascii(o)
00195             else:
00196                 return encode_basestring(o)
00197         # This doesn't pass the iterator directly to ''.join() because the
00198         # exceptions aren't as detailed.  The list call should be roughly
00199         # equivalent to the PySequence_Fast that ''.join() would do.
00200         chunks = self.iterencode(o, _one_shot=True)
00201         if not isinstance(chunks, (list, tuple)):
00202             chunks = list(chunks)
00203         return ''.join(chunks)
00204 
00205     def iterencode(self, o, _one_shot=False):
00206         """Encode the given object and yield each string
00207         representation as available.
00208 
00209         For example::
00210 
00211             for chunk in JSONEncoder().iterencode(bigobject):
00212                 mysocket.write(chunk)
00213 
00214         """
00215         if self.check_circular:
00216             markers = {}
00217         else:
00218             markers = None
00219         if self.ensure_ascii:
00220             _encoder = encode_basestring_ascii
00221         else:
00222             _encoder = encode_basestring
00223         if self.encoding != 'utf-8':
00224             def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
00225                 if isinstance(o, str):
00226                     o = o.decode(_encoding)
00227                 return _orig_encoder(o)
00228 
00229         def floatstr(o, allow_nan=self.allow_nan, _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
00230             # Check for specials.  Note that this type of test is processor- and/or
00231             # platform-specific, so do tests which don't depend on the internals.
00232 
00233             if o != o:
00234                 text = 'NaN'
00235             elif o == _inf:
00236                 text = 'Infinity'
00237             elif o == _neginf:
00238                 text = '-Infinity'
00239             else:
00240                 return _repr(o)
00241 
00242             if not allow_nan:
00243                 raise ValueError(
00244                     "Out of range float values are not JSON compliant: " +
00245                     repr(o))
00246 
00247             return text
00248 
00249 
00250         if _one_shot and c_make_encoder is not None and not self.indent and not self.sort_keys:
00251             _iterencode = c_make_encoder(
00252                 markers, self.default, _encoder, self.indent,
00253                 self.key_separator, self.item_separator, self.sort_keys,
00254                 self.skipkeys, self.allow_nan)
00255         else:
00256             _iterencode = _make_iterencode(
00257                 markers, self.default, _encoder, self.indent, floatstr,
00258                 self.key_separator, self.item_separator, self.sort_keys,
00259                 self.skipkeys, _one_shot)
00260         return _iterencode(o, 0)
00261 
00262 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
00263         ## HACK: hand-optimized bytecode; turn globals into locals
00264         False=False,
00265         True=True,
00266         ValueError=ValueError,
00267         basestring=basestring,
00268         dict=dict,
00269         float=float,
00270         id=id,
00271         int=int,
00272         isinstance=isinstance,
00273         list=list,
00274         long=long,
00275         str=str,
00276         tuple=tuple,
00277     ):
00278 
00279     def _iterencode_list(lst, _current_indent_level):
00280         if not lst:
00281             yield '[]'
00282             return
00283         if markers is not None:
00284             markerid = id(lst)
00285             if markerid in markers:
00286                 raise ValueError("Circular reference detected")
00287             markers[markerid] = lst
00288         buf = '['
00289         if _indent is not None:
00290             _current_indent_level += 1
00291             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
00292             separator = _item_separator + newline_indent
00293             buf += newline_indent
00294         else:
00295             newline_indent = None
00296             separator = _item_separator
00297         first = True
00298         for value in lst:
00299             if first:
00300                 first = False
00301             else:
00302                 buf = separator
00303             if isinstance(value, basestring):
00304                 yield buf + _encoder(value)
00305             elif value is None:
00306                 yield buf + 'null'
00307             elif value is True:
00308                 yield buf + 'true'
00309             elif value is False:
00310                 yield buf + 'false'
00311             elif isinstance(value, (int, long)):
00312                 yield buf + str(value)
00313             elif isinstance(value, float):
00314                 yield buf + _floatstr(value)
00315             else:
00316                 yield buf
00317                 if isinstance(value, (list, tuple)):
00318                     chunks = _iterencode_list(value, _current_indent_level)
00319                 elif isinstance(value, dict):
00320                     chunks = _iterencode_dict(value, _current_indent_level)
00321                 else:
00322                     chunks = _iterencode(value, _current_indent_level)
00323                 for chunk in chunks:
00324                     yield chunk
00325         if newline_indent is not None:
00326             _current_indent_level -= 1
00327             yield '\n' + (' ' * (_indent * _current_indent_level))
00328         yield ']'
00329         if markers is not None:
00330             del markers[markerid]
00331 
00332     def _iterencode_dict(dct, _current_indent_level):
00333         if not dct:
00334             yield '{}'
00335             return
00336         if markers is not None:
00337             markerid = id(dct)
00338             if markerid in markers:
00339                 raise ValueError("Circular reference detected")
00340             markers[markerid] = dct
00341         yield '{'
00342         if _indent is not None:
00343             _current_indent_level += 1
00344             newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
00345             item_separator = _item_separator + newline_indent
00346             yield newline_indent
00347         else:
00348             newline_indent = None
00349             item_separator = _item_separator
00350         first = True
00351         if _sort_keys:
00352             items = dct.items()
00353             items.sort(key=lambda kv: kv[0])
00354         else:
00355             items = dct.iteritems()
00356         for key, value in items:
00357             if isinstance(key, basestring):
00358                 pass
00359             # JavaScript is weakly typed for these, so it makes sense to
00360             # also allow them.  Many encoders seem to do something like this.
00361             elif isinstance(key, float):
00362                 key = _floatstr(key)
00363             elif key is True:
00364                 key = 'true'
00365             elif key is False:
00366                 key = 'false'
00367             elif key is None:
00368                 key = 'null'
00369             elif isinstance(key, (int, long)):
00370                 key = str(key)
00371             elif _skipkeys:
00372                 continue
00373             else:
00374                 raise TypeError("key " + repr(key) + " is not a string")
00375             if first:
00376                 first = False
00377             else:
00378                 yield item_separator
00379             yield _encoder(key)
00380             yield _key_separator
00381             if isinstance(value, basestring):
00382                 yield _encoder(value)
00383             elif value is None:
00384                 yield 'null'
00385             elif value is True:
00386                 yield 'true'
00387             elif value is False:
00388                 yield 'false'
00389             elif isinstance(value, (int, long)):
00390                 yield str(value)
00391             elif isinstance(value, float):
00392                 yield _floatstr(value)
00393             else:
00394                 if isinstance(value, (list, tuple)):
00395                     chunks = _iterencode_list(value, _current_indent_level)
00396                 elif isinstance(value, dict):
00397                     chunks = _iterencode_dict(value, _current_indent_level)
00398                 else:
00399                     chunks = _iterencode(value, _current_indent_level)
00400                 for chunk in chunks:
00401                     yield chunk
00402         if newline_indent is not None:
00403             _current_indent_level -= 1
00404             yield '\n' + (' ' * (_indent * _current_indent_level))
00405         yield '}'
00406         if markers is not None:
00407             del markers[markerid]
00408 
00409     def _iterencode(o, _current_indent_level):
00410         if isinstance(o, basestring):
00411             yield _encoder(o)
00412         elif o is None:
00413             yield 'null'
00414         elif o is True:
00415             yield 'true'
00416         elif o is False:
00417             yield 'false'
00418         elif isinstance(o, (int, long)):
00419             yield str(o)
00420         elif isinstance(o, float):
00421             yield _floatstr(o)
00422         elif isinstance(o, (list, tuple)):
00423             for chunk in _iterencode_list(o, _current_indent_level):
00424                 yield chunk
00425         elif isinstance(o, dict):
00426             for chunk in _iterencode_dict(o, _current_indent_level):
00427                 yield chunk
00428         else:
00429             if markers is not None:
00430                 markerid = id(o)
00431                 if markerid in markers:
00432                     raise ValueError("Circular reference detected")
00433                 markers[markerid] = o
00434             o = _default(o)
00435             for chunk in _iterencode(o, _current_indent_level):
00436                 yield chunk
00437             if markers is not None:
00438                 del markers[markerid]
00439 
00440     return _iterencode
Gaudi Framework, version v21r8

encoder.py