From 490251dd85e1f79095b0f7b53053bd05632c3775 Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Thu, 18 Feb 2021 12:21:52 -0500 Subject: [PATCH] Revert JSON canonicalization code Undo our Python 2 compatibility fixes --- .pre-commit-config.yaml | 1 + stix2/canonicalization/Canonicalize.py | 55 ++++++--------------- stix2/canonicalization/NumberToJson.py | 67 ++++++++++++++++---------- 3 files changed, 58 insertions(+), 65 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d127dd6..434eb95 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,3 +23,4 @@ repos: args: ["-c", "--diff"] - id: isort name: Sort python imports (fixes files) +exclude: ^stix2/canonicalization/ diff --git a/stix2/canonicalization/Canonicalize.py b/stix2/canonicalization/Canonicalize.py index 78145be..72910ab 100644 --- a/stix2/canonicalization/Canonicalize.py +++ b/stix2/canonicalization/Canonicalize.py @@ -20,12 +20,8 @@ # JCS compatible JSON serializer for Python 3.x # ################################################# -# This file has been modified to be compatible with Python 2.x as well - import re -import six - from stix2.canonicalization.NumberToJson import convert2Es6Format try: @@ -55,10 +51,10 @@ ESCAPE_DCT = { } for i in range(0x20): ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) + #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) INFINITY = float('inf') - def py_encode_basestring(s): """Return a JSON representation of a Python string @@ -70,7 +66,6 @@ def py_encode_basestring(s): encode_basestring = (c_encode_basestring or py_encode_basestring) - def py_encode_basestring_ascii(s): """Return an ASCII-only JSON representation of a Python string @@ -83,6 +78,7 @@ def py_encode_basestring_ascii(s): n = ord(s) if n < 0x10000: return '\\u{0:04x}'.format(n) + #return '\\u%04x' % (n,) else: # surrogate pair n -= 0x10000 @@ -96,7 +92,6 @@ encode_basestring_ascii = ( c_encode_basestring_ascii or py_encode_basestring_ascii ) - class JSONEncoder(object): """Extensible JSON encoder for Python data structures. @@ -128,11 +123,10 @@ class JSONEncoder(object): """ item_separator = ', ' key_separator = ': ' - def __init__( - self, skipkeys=False, ensure_ascii=False, + self, *, skipkeys=False, ensure_ascii=False, check_circular=True, allow_nan=True, sort_keys=True, - indent=None, separators=(',', ':'), default=None, + indent=None, separators=(',', ':'), default=None ): """Constructor for JSONEncoder, with sensible defaults. @@ -277,6 +271,7 @@ class JSONEncoder(object): return text + if ( _one_shot and c_make_encoder is not None and self.indent is None @@ -294,11 +289,10 @@ class JSONEncoder(object): ) return _iterencode(o, 0) - def _make_iterencode( markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, - # HACK: hand-optimized bytecode; turn globals into locals + ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, dict=dict, float=float, @@ -362,10 +356,7 @@ def _make_iterencode( chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) - # Below line commented-out for python2 compatibility - # yield from chunks - for chunk in chunks: - yield chunk + yield from chunks if newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level @@ -397,8 +388,7 @@ def _make_iterencode( else: items = dct.items() for key, value in items: - # Replaced isinstance(key, str) with below to enable simultaneous python 2 & 3 compatibility - if isinstance(key, six.string_types) or isinstance(key, six.binary_type): + if isinstance(key, str): pass # JavaScript is weakly typed for these, so it makes sense to # also allow them. Many encoders seem to do something like this. @@ -445,10 +435,7 @@ def _make_iterencode( chunks = _iterencode_dict(value, _current_indent_level) else: chunks = _iterencode(value, _current_indent_level) - # Below line commented-out for python2 compatibility - # yield from chunks - for chunk in chunks: - yield chunk + yield from chunks if newline_indent is not None: _current_indent_level -= 1 yield '\n' + _indent * _current_indent_level @@ -457,8 +444,7 @@ def _make_iterencode( del markers[markerid] def _iterencode(o, _current_indent_level): - # Replaced isinstance(o, str) with below to enable simultaneous python 2 & 3 compatibility - if isinstance(o, six.string_types) or isinstance(o, six.binary_type): + if isinstance(o, str): yield _encoder(o) elif o is None: yield 'null' @@ -473,15 +459,9 @@ def _make_iterencode( # see comment for int/float in _make_iterencode yield convert2Es6Format(o) elif isinstance(o, (list, tuple)): - # Below line commented-out for python2 compatibility - # yield from _iterencode_list(o, _current_indent_level) - for thing in _iterencode_list(o, _current_indent_level): - yield thing + yield from _iterencode_list(o, _current_indent_level) elif isinstance(o, dict): - # Below line commented-out for python2 compatibility - # yield from _iterencode_dict(o, _current_indent_level) - for thing in _iterencode_dict(o, _current_indent_level): - yield thing + yield from _iterencode_dict(o, _current_indent_level) else: if markers is not None: markerid = id(o) @@ -489,23 +469,18 @@ def _make_iterencode( raise ValueError("Circular reference detected") markers[markerid] = o o = _default(o) - # Below line commented-out for python2 compatibility - # yield from _iterencode(o, _current_indent_level) - for thing in _iterencode(o, _current_indent_level): - yield thing + yield from _iterencode(o, _current_indent_level) if markers is not None: del markers[markerid] return _iterencode - -def canonicalize(obj, utf8=True): +def canonicalize(obj,utf8=True): textVal = JSONEncoder(sort_keys=True).encode(obj) if utf8: return textVal.encode() return textVal - -def serialize(obj, utf8=True): +def serialize(obj,utf8=True): textVal = JSONEncoder(sort_keys=False).encode(obj) if utf8: return textVal.encode() diff --git a/stix2/canonicalization/NumberToJson.py b/stix2/canonicalization/NumberToJson.py index cea54d0..132af3f 100644 --- a/stix2/canonicalization/NumberToJson.py +++ b/stix2/canonicalization/NumberToJson.py @@ -21,40 +21,50 @@ # Convert a Python double/float into an ES6/V8 compatible string # ################################################################## def convert2Es6Format(value): - # Convert double/float to str using the native Python formatter +# Convert double/float to str using the native Python formatter fvalue = float(value) - - # Zero is a special case. The following line takes "-0" case as well +# +# Zero is a special case. The following line takes "-0" case as well +# if fvalue == 0: return '0' - - # The rest of the algorithm works on the textual representation only +# +# The rest of the algorithm works on the textual representation only +# pyDouble = str(fvalue) - - # The following line catches the "inf" and "nan" values returned by str(fvalue) +# +# The following line catches the "inf" and "nan" values returned by str(fvalue) +# if pyDouble.find('n') >= 0: raise ValueError("Invalid JSON number: " + pyDouble) - - # Save sign separately, it doesn't have any role in the algorithm +# +# Save sign separately, it doesn't have any role in the algorithm +# pySign = '' if pyDouble.find('-') == 0: pySign = '-' pyDouble = pyDouble[1:] - - # Now we should only have valid non-zero values +# +# Now we should only have valid non-zero values +# pyExpStr = '' pyExpVal = 0 q = pyDouble.find('e') if q > 0: - # Grab the exponent and remove it from the number +# +# Grab the exponent and remove it from the number +# pyExpStr = pyDouble[q:] if pyExpStr[2:3] == '0': - # Supress leading zero on exponents +# +# Supress leading zero on exponents +# pyExpStr = pyExpStr[:2] + pyExpStr[3:] pyDouble = pyDouble[0:q] pyExpVal = int(pyExpStr[1:]) - - # Split number in pyFirst + pyDot + pyLast +# +# Split number in pyFirst + pyDot + pyLast +# pyFirst = pyDouble pyDot = '' pyLast = '' @@ -63,33 +73,40 @@ def convert2Es6Format(value): pyDot = '.' pyFirst = pyDouble[:q] pyLast = pyDouble[q + 1:] - - # Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr +# +# Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr +# if pyLast == '0': - # Always remove trailing .0 +# +# Always remove trailing .0 +# pyDot = '' pyLast = '' - if pyExpVal > 0 and pyExpVal < 21: - # Integers are shown as is with up to 21 digits +# +# Integers are shown as is with up to 21 digits +# pyFirst += pyLast pyLast = '' pyDot = '' pyExpStr = '' q = pyExpVal - len(pyFirst) while q >= 0: - q -= 1 + q -= 1; pyFirst += '0' elif pyExpVal < 0 and pyExpVal > -7: - # Small numbers are shown as 0.etc with e-6 as lower limit +# +# Small numbers are shown as 0.etc with e-6 as lower limit +# pyLast = pyFirst + pyLast pyFirst = '0' pyDot = '.' pyExpStr = '' q = pyExpVal while q < -1: - q += 1 + q += 1; pyLast = '0' + pyLast - - # The resulting sub-strings are concatenated +# +# The resulting sub-strings are concatenated +# return pySign + pyFirst + pyDot + pyLast + pyExpStr