Revert JSON canonicalization code

Undo our Python 2 compatibility fixes
2021-02-18 12:21:52 -05:00 · 2021-02-18 12:21:52 -05:00 · 490251dd85
parent f9ca68458a
commit 490251dd85
3 changed files with 58 additions and 65 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -23,3 +23,4 @@ repos:
        args: ["-c", "--diff"]
    -   id: isort
        name: Sort python imports (fixes files)
+exclude: ^stix2/canonicalization/
--- a/stix2/canonicalization/Canonicalize.py
+++ b/stix2/canonicalization/Canonicalize.py
@ -20,12 +20,8 @@
 # JCS compatible JSON serializer for Python 3.x #
 #################################################

-# This file has been modified to be compatible with Python 2.x as well
-
 import re

-import six
-
 from stix2.canonicalization.NumberToJson import convert2Es6Format

 try:
@ -55,10 +51,10 @@ ESCAPE_DCT = {
 }
 for i in range(0x20):
    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
+    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))

 INFINITY = float('inf')

-
 def py_encode_basestring(s):
    """Return a JSON representation of a Python string

@ -70,7 +66,6 @@ def py_encode_basestring(s):

 encode_basestring = (c_encode_basestring or py_encode_basestring)

-
 def py_encode_basestring_ascii(s):
    """Return an ASCII-only JSON representation of a Python string

@ -83,6 +78,7 @@ def py_encode_basestring_ascii(s):
            n = ord(s)
            if n < 0x10000:
                return '\\u{0:04x}'.format(n)
+                #return '\\u%04x' % (n,)
            else:
                # surrogate pair
                n -= 0x10000
@ -96,7 +92,6 @@ encode_basestring_ascii = (
    c_encode_basestring_ascii or py_encode_basestring_ascii
 )

-
 class JSONEncoder(object):
    """Extensible JSON <http://json.org> encoder for Python data structures.

@ -128,11 +123,10 @@ class JSONEncoder(object):
    """
    item_separator = ', '
    key_separator = ': '
-
    def __init__(
-        self, skipkeys=False, ensure_ascii=False,
+        self, *, skipkeys=False, ensure_ascii=False,
        check_circular=True, allow_nan=True, sort_keys=True,
-        indent=None, separators=(',', ':'), default=None,
+        indent=None, separators=(',', ':'), default=None
    ):
        """Constructor for JSONEncoder, with sensible defaults.

@ -277,6 +271,7 @@ class JSONEncoder(object):

            return text

+
        if (
            _one_shot and c_make_encoder is not None
            and self.indent is None
@ -294,11 +289,10 @@ class JSONEncoder(object):
            )
        return _iterencode(o, 0)

-
 def _make_iterencode(
    markers, _default, _encoder, _indent, _floatstr,
        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
-        # HACK: hand-optimized bytecode; turn globals into locals
+        ## HACK: hand-optimized bytecode; turn globals into locals
        ValueError=ValueError,
        dict=dict,
        float=float,
@ -362,10 +356,7 @@ def _make_iterencode(
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
-                # Below line commented-out for python2 compatibility
-                # yield from chunks
-                for chunk in chunks:
-                    yield chunk
+                yield from chunks
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + _indent * _current_indent_level
@ -397,8 +388,7 @@ def _make_iterencode(
        else:
            items = dct.items()
        for key, value in items:
-            # Replaced isinstance(key, str) with below to enable simultaneous python 2 & 3 compatibility
-            if isinstance(key, six.string_types) or isinstance(key, six.binary_type):
+            if isinstance(key, str):
                pass
            # JavaScript is weakly typed for these, so it makes sense to
            # also allow them.  Many encoders seem to do something like this.
@ -445,10 +435,7 @@ def _make_iterencode(
                    chunks = _iterencode_dict(value, _current_indent_level)
                else:
                    chunks = _iterencode(value, _current_indent_level)
-                # Below line commented-out for python2 compatibility
-                # yield from chunks
-                for chunk in chunks:
-                    yield chunk
+                yield from chunks
        if newline_indent is not None:
            _current_indent_level -= 1
            yield '\n' + _indent * _current_indent_level
@ -457,8 +444,7 @@ def _make_iterencode(
            del markers[markerid]

    def _iterencode(o, _current_indent_level):
-        # Replaced isinstance(o, str) with below to enable simultaneous python 2 & 3 compatibility
-        if isinstance(o, six.string_types) or isinstance(o, six.binary_type):
+        if isinstance(o, str):
            yield _encoder(o)
        elif o is None:
            yield 'null'
@ -473,15 +459,9 @@ def _make_iterencode(
            # see comment for int/float in _make_iterencode
            yield convert2Es6Format(o)
        elif isinstance(o, (list, tuple)):
-            # Below line commented-out for python2 compatibility
-            # yield from _iterencode_list(o, _current_indent_level)
-            for thing in _iterencode_list(o, _current_indent_level):
-                yield thing
+            yield from _iterencode_list(o, _current_indent_level)
        elif isinstance(o, dict):
-            # Below line commented-out for python2 compatibility
-            # yield from _iterencode_dict(o, _current_indent_level)
-            for thing in _iterencode_dict(o, _current_indent_level):
-                yield thing
+            yield from _iterencode_dict(o, _current_indent_level)
        else:
            if markers is not None:
                markerid = id(o)
@ -489,23 +469,18 @@ def _make_iterencode(
                    raise ValueError("Circular reference detected")
                markers[markerid] = o
            o = _default(o)
-            # Below line commented-out for python2 compatibility
-            # yield from _iterencode(o, _current_indent_level)
-            for thing in _iterencode(o, _current_indent_level):
-                yield thing
+            yield from _iterencode(o, _current_indent_level)
            if markers is not None:
                del markers[markerid]
    return _iterencode

-
-def canonicalize(obj, utf8=True):
+def canonicalize(obj,utf8=True):
    textVal = JSONEncoder(sort_keys=True).encode(obj)
    if utf8:
        return textVal.encode()
    return textVal

-
-def serialize(obj, utf8=True):
+def serialize(obj,utf8=True):
    textVal = JSONEncoder(sort_keys=False).encode(obj)
    if utf8:
        return textVal.encode()
--- a/stix2/canonicalization/NumberToJson.py
+++ b/stix2/canonicalization/NumberToJson.py
@ -21,40 +21,50 @@
 # Convert a Python double/float into an ES6/V8 compatible string #
 ##################################################################
 def convert2Es6Format(value):
-    # Convert double/float to str using the native Python formatter
+# Convert double/float to str using the native Python formatter
    fvalue = float(value)
-
-    # Zero is a special case.  The following line takes "-0" case as well
+#
+# Zero is a special case.  The following line takes "-0" case as well
+#
    if fvalue == 0:
        return '0'
-
-    # The rest of the algorithm works on the textual representation only
+#
+# The rest of the algorithm works on the textual representation only
+#
    pyDouble = str(fvalue)
-
-    # The following line catches the "inf" and "nan" values returned by str(fvalue)
+#
+# The following line catches the "inf" and "nan" values returned by str(fvalue)
+#
    if pyDouble.find('n') >= 0:
        raise ValueError("Invalid JSON number: " + pyDouble)
-
-    # Save sign separately, it doesn't have any role in the algorithm
+#
+# Save sign separately, it doesn't have any role in the algorithm
+#
    pySign = ''
    if pyDouble.find('-') == 0:
        pySign = '-'
        pyDouble = pyDouble[1:]
-
-    # Now we should only have valid non-zero values
+#
+# Now we should only have valid non-zero values
+#
    pyExpStr = ''
    pyExpVal = 0
    q = pyDouble.find('e')
    if q > 0:
-        # Grab the exponent and remove it from the number
+#
+# Grab the exponent and remove it from the number
+#
        pyExpStr = pyDouble[q:]
        if pyExpStr[2:3] == '0':
-            # Supress leading zero on exponents
+#
+# Supress leading zero on exponents
+#
            pyExpStr = pyExpStr[:2] + pyExpStr[3:]
        pyDouble = pyDouble[0:q]
        pyExpVal = int(pyExpStr[1:])
-
-    # Split number in pyFirst + pyDot + pyLast
+#
+# Split number in pyFirst + pyDot + pyLast
+#
    pyFirst = pyDouble
    pyDot = ''
    pyLast = ''
@ -63,33 +73,40 @@ def convert2Es6Format(value):
        pyDot = '.'
        pyFirst = pyDouble[:q]
        pyLast = pyDouble[q + 1:]
-
-    # Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr
+#
+# Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr
+#
    if pyLast == '0':
-        # Always remove trailing .0
+#
+# Always remove trailing .0
+#
        pyDot = ''
        pyLast = ''
-
    if pyExpVal > 0 and pyExpVal < 21:
-        # Integers are shown as is with up to 21 digits
+#
+# Integers are shown as is with up to 21 digits
+#
        pyFirst += pyLast
        pyLast = ''
        pyDot = ''
        pyExpStr = ''
        q = pyExpVal - len(pyFirst)
        while q >= 0:
-            q -= 1
+            q -= 1;
            pyFirst += '0'
    elif pyExpVal < 0 and pyExpVal > -7:
-        # Small numbers are shown as 0.etc with e-6 as lower limit
+#
+# Small numbers are shown as 0.etc with e-6 as lower limit
+#
        pyLast = pyFirst + pyLast
        pyFirst = '0'
        pyDot = '.'
        pyExpStr = ''
        q = pyExpVal
        while q < -1:
-            q += 1
+            q += 1;
            pyLast = '0' + pyLast
-
-    # The resulting sub-strings are concatenated
+#
+# The resulting sub-strings are concatenated
+#
    return pySign + pyFirst + pyDot + pyLast + pyExpStr