Revert JSON canonicalization code

Undo our Python 2 compatibility fixes
pull/1/head
Chris Lenk 2021-02-18 12:21:52 -05:00
parent f9ca68458a
commit 490251dd85
3 changed files with 58 additions and 65 deletions

View File

@ -23,3 +23,4 @@ repos:
args: ["-c", "--diff"]
- id: isort
name: Sort python imports (fixes files)
exclude: ^stix2/canonicalization/

View File

@ -20,12 +20,8 @@
# JCS compatible JSON serializer for Python 3.x #
#################################################
# This file has been modified to be compatible with Python 2.x as well
import re
import six
from stix2.canonicalization.NumberToJson import convert2Es6Format
try:
@ -55,10 +51,10 @@ ESCAPE_DCT = {
}
for i in range(0x20):
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
INFINITY = float('inf')
def py_encode_basestring(s):
"""Return a JSON representation of a Python string
@ -70,7 +66,6 @@ def py_encode_basestring(s):
encode_basestring = (c_encode_basestring or py_encode_basestring)
def py_encode_basestring_ascii(s):
"""Return an ASCII-only JSON representation of a Python string
@ -83,6 +78,7 @@ def py_encode_basestring_ascii(s):
n = ord(s)
if n < 0x10000:
return '\\u{0:04x}'.format(n)
#return '\\u%04x' % (n,)
else:
# surrogate pair
n -= 0x10000
@ -96,7 +92,6 @@ encode_basestring_ascii = (
c_encode_basestring_ascii or py_encode_basestring_ascii
)
class JSONEncoder(object):
"""Extensible JSON <http://json.org> encoder for Python data structures.
@ -128,11 +123,10 @@ class JSONEncoder(object):
"""
item_separator = ', '
key_separator = ': '
def __init__(
self, skipkeys=False, ensure_ascii=False,
self, *, skipkeys=False, ensure_ascii=False,
check_circular=True, allow_nan=True, sort_keys=True,
indent=None, separators=(',', ':'), default=None,
indent=None, separators=(',', ':'), default=None
):
"""Constructor for JSONEncoder, with sensible defaults.
@ -277,6 +271,7 @@ class JSONEncoder(object):
return text
if (
_one_shot and c_make_encoder is not None
and self.indent is None
@ -294,11 +289,10 @@ class JSONEncoder(object):
)
return _iterencode(o, 0)
def _make_iterencode(
markers, _default, _encoder, _indent, _floatstr,
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
# HACK: hand-optimized bytecode; turn globals into locals
## HACK: hand-optimized bytecode; turn globals into locals
ValueError=ValueError,
dict=dict,
float=float,
@ -362,10 +356,7 @@ def _make_iterencode(
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
# Below line commented-out for python2 compatibility
# yield from chunks
for chunk in chunks:
yield chunk
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
@ -397,8 +388,7 @@ def _make_iterencode(
else:
items = dct.items()
for key, value in items:
# Replaced isinstance(key, str) with below to enable simultaneous python 2 & 3 compatibility
if isinstance(key, six.string_types) or isinstance(key, six.binary_type):
if isinstance(key, str):
pass
# JavaScript is weakly typed for these, so it makes sense to
# also allow them. Many encoders seem to do something like this.
@ -445,10 +435,7 @@ def _make_iterencode(
chunks = _iterencode_dict(value, _current_indent_level)
else:
chunks = _iterencode(value, _current_indent_level)
# Below line commented-out for python2 compatibility
# yield from chunks
for chunk in chunks:
yield chunk
yield from chunks
if newline_indent is not None:
_current_indent_level -= 1
yield '\n' + _indent * _current_indent_level
@ -457,8 +444,7 @@ def _make_iterencode(
del markers[markerid]
def _iterencode(o, _current_indent_level):
# Replaced isinstance(o, str) with below to enable simultaneous python 2 & 3 compatibility
if isinstance(o, six.string_types) or isinstance(o, six.binary_type):
if isinstance(o, str):
yield _encoder(o)
elif o is None:
yield 'null'
@ -473,15 +459,9 @@ def _make_iterencode(
# see comment for int/float in _make_iterencode
yield convert2Es6Format(o)
elif isinstance(o, (list, tuple)):
# Below line commented-out for python2 compatibility
# yield from _iterencode_list(o, _current_indent_level)
for thing in _iterencode_list(o, _current_indent_level):
yield thing
yield from _iterencode_list(o, _current_indent_level)
elif isinstance(o, dict):
# Below line commented-out for python2 compatibility
# yield from _iterencode_dict(o, _current_indent_level)
for thing in _iterencode_dict(o, _current_indent_level):
yield thing
yield from _iterencode_dict(o, _current_indent_level)
else:
if markers is not None:
markerid = id(o)
@ -489,23 +469,18 @@ def _make_iterencode(
raise ValueError("Circular reference detected")
markers[markerid] = o
o = _default(o)
# Below line commented-out for python2 compatibility
# yield from _iterencode(o, _current_indent_level)
for thing in _iterencode(o, _current_indent_level):
yield thing
yield from _iterencode(o, _current_indent_level)
if markers is not None:
del markers[markerid]
return _iterencode
def canonicalize(obj, utf8=True):
def canonicalize(obj,utf8=True):
textVal = JSONEncoder(sort_keys=True).encode(obj)
if utf8:
return textVal.encode()
return textVal
def serialize(obj, utf8=True):
def serialize(obj,utf8=True):
textVal = JSONEncoder(sort_keys=False).encode(obj)
if utf8:
return textVal.encode()

View File

@ -21,40 +21,50 @@
# Convert a Python double/float into an ES6/V8 compatible string #
##################################################################
def convert2Es6Format(value):
# Convert double/float to str using the native Python formatter
# Convert double/float to str using the native Python formatter
fvalue = float(value)
# Zero is a special case. The following line takes "-0" case as well
#
# Zero is a special case. The following line takes "-0" case as well
#
if fvalue == 0:
return '0'
# The rest of the algorithm works on the textual representation only
#
# The rest of the algorithm works on the textual representation only
#
pyDouble = str(fvalue)
# The following line catches the "inf" and "nan" values returned by str(fvalue)
#
# The following line catches the "inf" and "nan" values returned by str(fvalue)
#
if pyDouble.find('n') >= 0:
raise ValueError("Invalid JSON number: " + pyDouble)
# Save sign separately, it doesn't have any role in the algorithm
#
# Save sign separately, it doesn't have any role in the algorithm
#
pySign = ''
if pyDouble.find('-') == 0:
pySign = '-'
pyDouble = pyDouble[1:]
# Now we should only have valid non-zero values
#
# Now we should only have valid non-zero values
#
pyExpStr = ''
pyExpVal = 0
q = pyDouble.find('e')
if q > 0:
# Grab the exponent and remove it from the number
#
# Grab the exponent and remove it from the number
#
pyExpStr = pyDouble[q:]
if pyExpStr[2:3] == '0':
# Supress leading zero on exponents
#
# Supress leading zero on exponents
#
pyExpStr = pyExpStr[:2] + pyExpStr[3:]
pyDouble = pyDouble[0:q]
pyExpVal = int(pyExpStr[1:])
# Split number in pyFirst + pyDot + pyLast
#
# Split number in pyFirst + pyDot + pyLast
#
pyFirst = pyDouble
pyDot = ''
pyLast = ''
@ -63,33 +73,40 @@ def convert2Es6Format(value):
pyDot = '.'
pyFirst = pyDouble[:q]
pyLast = pyDouble[q + 1:]
# Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr
#
# Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr
#
if pyLast == '0':
# Always remove trailing .0
#
# Always remove trailing .0
#
pyDot = ''
pyLast = ''
if pyExpVal > 0 and pyExpVal < 21:
# Integers are shown as is with up to 21 digits
#
# Integers are shown as is with up to 21 digits
#
pyFirst += pyLast
pyLast = ''
pyDot = ''
pyExpStr = ''
q = pyExpVal - len(pyFirst)
while q >= 0:
q -= 1
q -= 1;
pyFirst += '0'
elif pyExpVal < 0 and pyExpVal > -7:
# Small numbers are shown as 0.etc with e-6 as lower limit
#
# Small numbers are shown as 0.etc with e-6 as lower limit
#
pyLast = pyFirst + pyLast
pyFirst = '0'
pyDot = '.'
pyExpStr = ''
q = pyExpVal
while q < -1:
q += 1
q += 1;
pyLast = '0' + pyLast
# The resulting sub-strings are concatenated
#
# The resulting sub-strings are concatenated
#
return pySign + pyFirst + pyDot + pyLast + pyExpStr