Revamp deterministic ID generation code to fix bugs.
parent
41525f9be0
commit
6c2c4781e7
186
stix2/base.py
186
stix2/base.py
|
@ -334,24 +334,21 @@ class _Observable(_STIXBase):
|
|||
def __init__(self, **kwargs):
|
||||
# the constructor might be called independently of an observed data object
|
||||
self._STIXBase__valid_refs = kwargs.pop('_valid_refs', [])
|
||||
|
||||
self._allow_custom = kwargs.get('allow_custom', False)
|
||||
self._properties['extensions'].allow_custom = kwargs.get('allow_custom', False)
|
||||
|
||||
try:
|
||||
# Since `spec_version` is optional, this is how we check for a 2.1 SCO
|
||||
self._id_contributing_properties
|
||||
|
||||
if 'id' not in kwargs:
|
||||
possible_id = self._generate_id(kwargs)
|
||||
if possible_id is not None:
|
||||
kwargs['id'] = possible_id
|
||||
except AttributeError:
|
||||
# End up here if handling a 2.0 SCO, and don't need to do anything further
|
||||
pass
|
||||
|
||||
super(_Observable, self).__init__(**kwargs)
|
||||
|
||||
if 'id' not in kwargs and not isinstance(self, stix2.v20._Observable):
|
||||
# Specific to 2.1+ observables: generate a deterministic ID
|
||||
id_ = self._generate_id()
|
||||
|
||||
# Spec says fall back to UUIDv4 if no contributing properties were
|
||||
# given. That's what already happened (the following is actually
|
||||
# overwriting the default uuidv4), so nothing to do here.
|
||||
if id_ is not None:
|
||||
# Can't assign to self (we're immutable), so slip the ID in
|
||||
# more sneakily.
|
||||
self._inner["id"] = id_
|
||||
|
||||
def _check_ref(self, ref, prop, prop_name):
|
||||
"""
|
||||
Only for checking `*_ref` or `*_refs` properties in spec_version 2.0
|
||||
|
@ -396,42 +393,50 @@ class _Observable(_STIXBase):
|
|||
for ref in kwargs[prop_name]:
|
||||
self._check_ref(ref, prop, prop_name)
|
||||
|
||||
def _generate_id(self, kwargs):
|
||||
required_prefix = self._type + "--"
|
||||
def _generate_id(self):
|
||||
"""
|
||||
Generate a UUIDv5 for this observable, using its "ID contributing
|
||||
properties".
|
||||
|
||||
properties_to_use = self._id_contributing_properties
|
||||
if properties_to_use:
|
||||
streamlined_object = {}
|
||||
if "hashes" in kwargs and "hashes" in properties_to_use:
|
||||
possible_hash = _choose_one_hash(kwargs["hashes"])
|
||||
:return: The ID, or None if no ID contributing properties are set
|
||||
"""
|
||||
|
||||
id_ = None
|
||||
json_serializable_object = {}
|
||||
|
||||
for key in self._id_contributing_properties:
|
||||
|
||||
if key in self:
|
||||
obj_value = self[key]
|
||||
|
||||
if key == "hashes":
|
||||
possible_hash = _choose_one_hash(obj_value)
|
||||
if possible_hash:
|
||||
streamlined_object["hashes"] = possible_hash
|
||||
for key in properties_to_use:
|
||||
if key != "hashes" and key in kwargs:
|
||||
if isinstance(kwargs[key], dict) or isinstance(kwargs[key], _STIXBase):
|
||||
temp_deep_copy = copy.deepcopy(dict(kwargs[key]))
|
||||
_recursive_stix_to_dict(temp_deep_copy)
|
||||
streamlined_object[key] = temp_deep_copy
|
||||
elif isinstance(kwargs[key], list):
|
||||
temp_deep_copy = copy.deepcopy(kwargs[key])
|
||||
_recursive_stix_list_to_dict(temp_deep_copy)
|
||||
streamlined_object[key] = temp_deep_copy
|
||||
serializable_value = possible_hash
|
||||
|
||||
else:
|
||||
streamlined_object[key] = kwargs[key]
|
||||
if streamlined_object:
|
||||
data = canonicalize(streamlined_object, utf8=False)
|
||||
serializable_value = _make_json_serializable(obj_value)
|
||||
|
||||
json_serializable_object[key] = serializable_value
|
||||
|
||||
if json_serializable_object:
|
||||
|
||||
data = canonicalize(json_serializable_object, utf8=False)
|
||||
|
||||
# The situation is complicated w.r.t. python 2/3 behavior, so
|
||||
# I'd rather not rely on particular exceptions being raised to
|
||||
# determine what to do. Better to just check the python version
|
||||
# directly.
|
||||
if six.PY3:
|
||||
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data))
|
||||
uuid_ = uuid.uuid5(SCO_DET_ID_NAMESPACE, data)
|
||||
else:
|
||||
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8")))
|
||||
uuid_ = uuid.uuid5(
|
||||
SCO_DET_ID_NAMESPACE, data.encode("utf-8")
|
||||
)
|
||||
|
||||
# We return None if there are no values specified for any of the id-contributing-properties
|
||||
return None
|
||||
id_ = "{}--{}".format(self._type, six.text_type(uuid_))
|
||||
|
||||
return id_
|
||||
|
||||
|
||||
class _Extension(_STIXBase):
|
||||
|
@ -455,35 +460,90 @@ def _choose_one_hash(hash_dict):
|
|||
if k is not None:
|
||||
return {k: hash_dict[k]}
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _cls_init(cls, obj, kwargs):
|
||||
if getattr(cls, '__init__', object.__init__) is not object.__init__:
|
||||
cls.__init__(obj, **kwargs)
|
||||
|
||||
|
||||
def _recursive_stix_to_dict(input_dict):
|
||||
for key in input_dict:
|
||||
if isinstance(input_dict[key], dict):
|
||||
_recursive_stix_to_dict(input_dict[key])
|
||||
elif isinstance(input_dict[key], _STIXBase):
|
||||
input_dict[key] = dict(input_dict[key])
|
||||
def _make_json_serializable(value):
|
||||
"""
|
||||
Make the given value JSON-serializable; required for the JSON canonicalizer
|
||||
to work. This recurses into lists/dicts, converts stix objects to dicts,
|
||||
etc. "Convenience" types this library uses as property values are
|
||||
JSON-serialized to produce a JSON-serializable value. (So you will always
|
||||
get strings for those.)
|
||||
|
||||
# There may stil be nested _STIXBase objects
|
||||
_recursive_stix_to_dict(input_dict[key])
|
||||
elif isinstance(input_dict[key], list):
|
||||
_recursive_stix_list_to_dict(input_dict[key])
|
||||
else:
|
||||
pass
|
||||
The conversion will not affect the passed in value.
|
||||
|
||||
:param value: The value to make JSON-serializable.
|
||||
:return: The JSON-serializable value.
|
||||
:raises ValueError: If value is None (since nulls are not allowed in STIX
|
||||
objects).
|
||||
"""
|
||||
if value is None:
|
||||
raise ValueError("Illegal null value found in a STIX object")
|
||||
|
||||
json_value = value # default assumption
|
||||
|
||||
if isinstance(value, Mapping):
|
||||
json_value = {
|
||||
k: _make_json_serializable(v)
|
||||
for k, v in value.items()
|
||||
}
|
||||
|
||||
elif isinstance(value, list):
|
||||
json_value = [
|
||||
_make_json_serializable(v)
|
||||
for v in value
|
||||
]
|
||||
|
||||
elif not isinstance(value, (int, float, six.string_types, bool)):
|
||||
# If a "simple" value which is not already JSON-serializable,
|
||||
# JSON-serialize to a string and use that as our JSON-serializable
|
||||
# value. This applies to our datetime objects currently (timestamp
|
||||
# properties), and could apply to any other "convenience" types this
|
||||
# library uses for property values in the future.
|
||||
json_value = json.dumps(value, ensure_ascii=False, cls=STIXJSONEncoder)
|
||||
|
||||
# If it looks like a string literal was output, strip off the quotes.
|
||||
# Otherwise, a second pair will be added when it's canonicalized. Also
|
||||
# to be extra safe, we need to unescape.
|
||||
if len(json_value) >= 2 and \
|
||||
json_value[0] == '"' and json_value[-1] == '"':
|
||||
json_value = _un_json_escape(json_value[1:-1])
|
||||
|
||||
return json_value
|
||||
|
||||
|
||||
def _recursive_stix_list_to_dict(input_list):
|
||||
for i in range(len(input_list)):
|
||||
if isinstance(input_list[i], _STIXBase):
|
||||
input_list[i] = dict(input_list[i])
|
||||
elif isinstance(input_list[i], dict):
|
||||
pass
|
||||
elif isinstance(input_list[i], list):
|
||||
_recursive_stix_list_to_dict(input_list[i])
|
||||
else:
|
||||
continue
|
||||
_recursive_stix_to_dict(input_list[i])
|
||||
def _un_json_escape(json_string):
|
||||
"""
|
||||
Removes JSON string literal escapes. We should undo these things Python's
|
||||
serializer does, so we can ensure they're done canonically. The
|
||||
canonicalizer should be in charge of everything, as much as is feasible.
|
||||
|
||||
:param json_string: String literal output of Python's JSON serializer,
|
||||
minus the surrounding quotes.
|
||||
:return: The unescaped string
|
||||
"""
|
||||
|
||||
# I don't think I should need to worry about the unicode escapes (\uXXXX)
|
||||
# since I use ensure_ascii=False when generating it. I will just fix all
|
||||
# the other escapes, e.g. \n, \r, etc.
|
||||
#
|
||||
# This list is taken from RFC7159 section 7:
|
||||
# https://tools.ietf.org/html/rfc7159.html#section-7
|
||||
|
||||
result = json_string\
|
||||
.replace(r"\"", "\"")\
|
||||
.replace(r"\/", "/")\
|
||||
.replace(r"\b", "\b")\
|
||||
.replace(r"\f", "\f")\
|
||||
.replace(r"\n", "\n")\
|
||||
.replace(r"\r", "\r")\
|
||||
.replace(r"\t", "\t")\
|
||||
.replace(r"\\", "\\") # Must do this one last!
|
||||
|
||||
return result
|
||||
|
|
Loading…
Reference in New Issue