From 216b43d49ef6eb8874b26dba53ab638fd45aa901 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Fri, 11 Oct 2019 17:12:44 -0400 Subject: [PATCH 1/2] Fix determinstic UUID handling when there are high-codepoint unicode characters. Make compatible with both python 2 and 3. --- stix2/base.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/stix2/base.py b/stix2/base.py index a13cb98..b2e20de 100644 --- a/stix2/base.py +++ b/stix2/base.py @@ -394,11 +394,14 @@ class _Observable(_STIXBase): if streamlined_obj_vals: data = canonicalize(streamlined_obj_vals, utf8=False) - # try/except here to enable python 2 compatibility - try: + # The situation is complicated w.r.t. python 2/3 behavior, so + # I'd rather not rely on particular exceptions being raised to + # determine what to do. Better to just check the python version + # directly. + if six.PY3: return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data)) - except UnicodeDecodeError: - return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, six.binary_type(data))) + else: + return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8"))) # We return None if there are no values specified for any of the id-contributing-properties return None From edf465bd80b20f151064ac39ba4d0c1cd9643e1d Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Fri, 11 Oct 2019 18:15:47 -0400 Subject: [PATCH 2/2] Add a unit test for deterministic ID, with unicode --- stix2/test/v21/test_base.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/stix2/test/v21/test_base.py b/stix2/test/v21/test_base.py index 18d3a50..d753ab1 100644 --- a/stix2/test/v21/test_base.py +++ b/stix2/test/v21/test_base.py @@ -1,9 +1,11 @@ import datetime as dt import json +import uuid import pytest import pytz +import stix2 from stix2.base import STIXJSONEncoder @@ -23,3 +25,14 @@ def test_encode_json_object(): json.dumps(test_dict, cls=STIXJSONEncoder) assert " is not JSON serializable" in str(excinfo.value) + + +def test_deterministic_id_unicode(): + mutex = {'name': u'D*Fl#Ed*\u00a3\u00a8', 'type': 'mutex'} + obs = stix2.parse_observable(mutex, version="2.1") + + dd_idx = obs.id.index("--") + id_uuid = uuid.UUID(obs.id[dd_idx+2:]) + + assert id_uuid.variant == uuid.RFC_4122 + assert id_uuid.version == 5