Merge pull request #300 from chisholm/deterministic_id_unicode_fix

Fix deterministic ID handling with unicode
master
Chris Lenk 2019-10-14 11:03:53 -04:00 committed by GitHub
commit 08e8b88410
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 20 additions and 4 deletions

View File

@ -394,11 +394,14 @@ class _Observable(_STIXBase):
if streamlined_obj_vals:
data = canonicalize(streamlined_obj_vals, utf8=False)
# try/except here to enable python 2 compatibility
try:
# The situation is complicated w.r.t. python 2/3 behavior, so
# I'd rather not rely on particular exceptions being raised to
# determine what to do. Better to just check the python version
# directly.
if six.PY3:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data))
except UnicodeDecodeError:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, six.binary_type(data)))
else:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8")))
# We return None if there are no values specified for any of the id-contributing-properties
return None

View File

@ -1,9 +1,11 @@
import datetime as dt
import json
import uuid
import pytest
import pytz
import stix2
from stix2.base import STIXJSONEncoder
@ -23,3 +25,14 @@ def test_encode_json_object():
json.dumps(test_dict, cls=STIXJSONEncoder)
assert " is not JSON serializable" in str(excinfo.value)
def test_deterministic_id_unicode():
mutex = {'name': u'D*Fl#Ed*\u00a3\u00a8', 'type': 'mutex'}
obs = stix2.parse_observable(mutex, version="2.1")
dd_idx = obs.id.index("--")
id_uuid = uuid.UUID(obs.id[dd_idx+2:])
assert id_uuid.variant == uuid.RFC_4122
assert id_uuid.version == 5