Revamp deterministic ID generation code to fix bugs.
							parent
							
								
									41525f9be0
								
							
						
					
					
						commit
						6c2c4781e7
					
				
							
								
								
									
										200
									
								
								stix2/base.py
								
								
								
								
							
							
						
						
									
										200
									
								
								stix2/base.py
								
								
								
								
							|  | @ -334,24 +334,21 @@ class _Observable(_STIXBase): | |||
|     def __init__(self, **kwargs): | ||||
|         # the constructor might be called independently of an observed data object | ||||
|         self._STIXBase__valid_refs = kwargs.pop('_valid_refs', []) | ||||
| 
 | ||||
|         self._allow_custom = kwargs.get('allow_custom', False) | ||||
|         self._properties['extensions'].allow_custom = kwargs.get('allow_custom', False) | ||||
| 
 | ||||
|         try: | ||||
|             # Since `spec_version` is optional, this is how we check for a 2.1 SCO | ||||
|             self._id_contributing_properties | ||||
| 
 | ||||
|             if 'id' not in kwargs: | ||||
|                 possible_id = self._generate_id(kwargs) | ||||
|                 if possible_id is not None: | ||||
|                     kwargs['id'] = possible_id | ||||
|         except AttributeError: | ||||
|             # End up here if handling a 2.0 SCO, and don't need to do anything further | ||||
|             pass | ||||
| 
 | ||||
|         super(_Observable, self).__init__(**kwargs) | ||||
| 
 | ||||
|         if 'id' not in kwargs and not isinstance(self, stix2.v20._Observable): | ||||
|             # Specific to 2.1+ observables: generate a deterministic ID | ||||
|             id_ = self._generate_id() | ||||
| 
 | ||||
|             # Spec says fall back to UUIDv4 if no contributing properties were | ||||
|             # given.  That's what already happened (the following is actually | ||||
|             # overwriting the default uuidv4), so nothing to do here. | ||||
|             if id_ is not None: | ||||
|                 # Can't assign to self (we're immutable), so slip the ID in | ||||
|                 # more sneakily. | ||||
|                 self._inner["id"] = id_ | ||||
| 
 | ||||
|     def _check_ref(self, ref, prop, prop_name): | ||||
|         """ | ||||
|         Only for checking `*_ref` or `*_refs` properties in spec_version 2.0 | ||||
|  | @ -396,42 +393,50 @@ class _Observable(_STIXBase): | |||
|                 for ref in kwargs[prop_name]: | ||||
|                     self._check_ref(ref, prop, prop_name) | ||||
| 
 | ||||
|     def _generate_id(self, kwargs): | ||||
|         required_prefix = self._type + "--" | ||||
|     def _generate_id(self): | ||||
|         """ | ||||
|         Generate a UUIDv5 for this observable, using its "ID contributing | ||||
|         properties". | ||||
| 
 | ||||
|         properties_to_use = self._id_contributing_properties | ||||
|         if properties_to_use: | ||||
|             streamlined_object = {} | ||||
|             if "hashes" in kwargs and "hashes" in properties_to_use: | ||||
|                 possible_hash = _choose_one_hash(kwargs["hashes"]) | ||||
|                 if possible_hash: | ||||
|                     streamlined_object["hashes"] = possible_hash | ||||
|             for key in properties_to_use: | ||||
|                 if key != "hashes" and key in kwargs: | ||||
|                     if isinstance(kwargs[key], dict) or isinstance(kwargs[key], _STIXBase): | ||||
|                         temp_deep_copy = copy.deepcopy(dict(kwargs[key])) | ||||
|                         _recursive_stix_to_dict(temp_deep_copy) | ||||
|                         streamlined_object[key] = temp_deep_copy | ||||
|                     elif isinstance(kwargs[key], list): | ||||
|                         temp_deep_copy = copy.deepcopy(kwargs[key]) | ||||
|                         _recursive_stix_list_to_dict(temp_deep_copy) | ||||
|                         streamlined_object[key] = temp_deep_copy | ||||
|                     else: | ||||
|                         streamlined_object[key] = kwargs[key] | ||||
|             if streamlined_object: | ||||
|                 data = canonicalize(streamlined_object, utf8=False) | ||||
|         :return: The ID, or None if no ID contributing properties are set | ||||
|         """ | ||||
| 
 | ||||
|         id_ = None | ||||
|         json_serializable_object = {} | ||||
| 
 | ||||
|         for key in self._id_contributing_properties: | ||||
| 
 | ||||
|             if key in self: | ||||
|                 obj_value = self[key] | ||||
| 
 | ||||
|                 if key == "hashes": | ||||
|                     possible_hash = _choose_one_hash(obj_value) | ||||
|                     if possible_hash: | ||||
|                         serializable_value = possible_hash | ||||
| 
 | ||||
|                 # The situation is complicated w.r.t. python 2/3 behavior, so | ||||
|                 # I'd rather not rely on particular exceptions being raised to | ||||
|                 # determine what to do.  Better to just check the python version | ||||
|                 # directly. | ||||
|                 if six.PY3: | ||||
|                     return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data)) | ||||
|                 else: | ||||
|                     return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8"))) | ||||
|                     serializable_value = _make_json_serializable(obj_value) | ||||
| 
 | ||||
|         # We return None if there are no values specified for any of the id-contributing-properties | ||||
|         return None | ||||
|                 json_serializable_object[key] = serializable_value | ||||
| 
 | ||||
|         if json_serializable_object: | ||||
| 
 | ||||
|             data = canonicalize(json_serializable_object, utf8=False) | ||||
| 
 | ||||
|             # The situation is complicated w.r.t. python 2/3 behavior, so | ||||
|             # I'd rather not rely on particular exceptions being raised to | ||||
|             # determine what to do.  Better to just check the python version | ||||
|             # directly. | ||||
|             if six.PY3: | ||||
|                 uuid_ = uuid.uuid5(SCO_DET_ID_NAMESPACE, data) | ||||
|             else: | ||||
|                 uuid_ = uuid.uuid5( | ||||
|                     SCO_DET_ID_NAMESPACE, data.encode("utf-8") | ||||
|                 ) | ||||
| 
 | ||||
|             id_ = "{}--{}".format(self._type, six.text_type(uuid_)) | ||||
| 
 | ||||
|         return id_ | ||||
| 
 | ||||
| 
 | ||||
| class _Extension(_STIXBase): | ||||
|  | @ -455,35 +460,90 @@ def _choose_one_hash(hash_dict): | |||
|         if k is not None: | ||||
|             return {k: hash_dict[k]} | ||||
| 
 | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def _cls_init(cls, obj, kwargs): | ||||
|     if getattr(cls, '__init__', object.__init__) is not object.__init__: | ||||
|         cls.__init__(obj, **kwargs) | ||||
| 
 | ||||
| 
 | ||||
| def _recursive_stix_to_dict(input_dict): | ||||
|     for key in input_dict: | ||||
|         if isinstance(input_dict[key], dict): | ||||
|             _recursive_stix_to_dict(input_dict[key]) | ||||
|         elif isinstance(input_dict[key], _STIXBase): | ||||
|             input_dict[key] = dict(input_dict[key]) | ||||
| def _make_json_serializable(value): | ||||
|     """ | ||||
|     Make the given value JSON-serializable; required for the JSON canonicalizer | ||||
|     to work.  This recurses into lists/dicts, converts stix objects to dicts, | ||||
|     etc.  "Convenience" types this library uses as property values are | ||||
|     JSON-serialized to produce a JSON-serializable value.  (So you will always | ||||
|     get strings for those.) | ||||
| 
 | ||||
|             # There may stil be nested _STIXBase objects | ||||
|             _recursive_stix_to_dict(input_dict[key]) | ||||
|         elif isinstance(input_dict[key], list): | ||||
|             _recursive_stix_list_to_dict(input_dict[key]) | ||||
|         else: | ||||
|             pass | ||||
|     The conversion will not affect the passed in value. | ||||
| 
 | ||||
|     :param value: The value to make JSON-serializable. | ||||
|     :return: The JSON-serializable value. | ||||
|     :raises ValueError: If value is None (since nulls are not allowed in STIX | ||||
|         objects). | ||||
|     """ | ||||
|     if value is None: | ||||
|         raise ValueError("Illegal null value found in a STIX object") | ||||
| 
 | ||||
|     json_value = value  # default assumption | ||||
| 
 | ||||
|     if isinstance(value, Mapping): | ||||
|         json_value = { | ||||
|             k: _make_json_serializable(v) | ||||
|             for k, v in value.items() | ||||
|         } | ||||
| 
 | ||||
|     elif isinstance(value, list): | ||||
|         json_value = [ | ||||
|             _make_json_serializable(v) | ||||
|             for v in value | ||||
|         ] | ||||
| 
 | ||||
|     elif not isinstance(value, (int, float, six.string_types, bool)): | ||||
|         # If a "simple" value which is not already JSON-serializable, | ||||
|         # JSON-serialize to a string and use that as our JSON-serializable | ||||
|         # value.  This applies to our datetime objects currently (timestamp | ||||
|         # properties), and could apply to any other "convenience" types this | ||||
|         # library uses for property values in the future. | ||||
|         json_value = json.dumps(value, ensure_ascii=False, cls=STIXJSONEncoder) | ||||
| 
 | ||||
|         # If it looks like a string literal was output, strip off the quotes. | ||||
|         # Otherwise, a second pair will be added when it's canonicalized.  Also | ||||
|         # to be extra safe, we need to unescape. | ||||
|         if len(json_value) >= 2 and \ | ||||
|                 json_value[0] == '"' and json_value[-1] == '"': | ||||
|             json_value = _un_json_escape(json_value[1:-1]) | ||||
| 
 | ||||
|     return json_value | ||||
| 
 | ||||
| 
 | ||||
| def _recursive_stix_list_to_dict(input_list): | ||||
|     for i in range(len(input_list)): | ||||
|         if isinstance(input_list[i], _STIXBase): | ||||
|             input_list[i] = dict(input_list[i]) | ||||
|         elif isinstance(input_list[i], dict): | ||||
|             pass | ||||
|         elif isinstance(input_list[i], list): | ||||
|             _recursive_stix_list_to_dict(input_list[i]) | ||||
|         else: | ||||
|             continue | ||||
|         _recursive_stix_to_dict(input_list[i]) | ||||
| def _un_json_escape(json_string): | ||||
|     """ | ||||
|     Removes JSON string literal escapes.  We should undo these things Python's | ||||
|     serializer does, so we can ensure they're done canonically.  The | ||||
|     canonicalizer should be in charge of everything, as much as is feasible. | ||||
| 
 | ||||
|     :param json_string: String literal output of Python's JSON serializer, | ||||
|         minus the surrounding quotes. | ||||
|     :return: The unescaped string | ||||
|     """ | ||||
| 
 | ||||
|     # I don't think I should need to worry about the unicode escapes (\uXXXX) | ||||
|     # since I use ensure_ascii=False when generating it.  I will just fix all | ||||
|     # the other escapes, e.g. \n, \r, etc. | ||||
|     # | ||||
|     # This list is taken from RFC7159 section 7: | ||||
|     # https://tools.ietf.org/html/rfc7159.html#section-7 | ||||
| 
 | ||||
|     result = json_string\ | ||||
|         .replace(r"\"", "\"")\ | ||||
|         .replace(r"\/", "/")\ | ||||
|         .replace(r"\b", "\b")\ | ||||
|         .replace(r"\f", "\f")\ | ||||
|         .replace(r"\n", "\n")\ | ||||
|         .replace(r"\r", "\r")\ | ||||
|         .replace(r"\t", "\t")\ | ||||
|         .replace(r"\\", "\\")  # Must do this one last! | ||||
| 
 | ||||
|     return result | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Michael Chisholm
						Michael Chisholm