cti-python-stix2/stix2/base.py

453 lines
18 KiB
Python
Raw Normal View History

2018-11-28 22:51:00 +01:00
"""Base classes for type definitions in the STIX2 library."""
2017-02-10 22:35:02 +01:00
2017-05-02 20:06:42 +02:00
import copy
2017-02-10 22:35:02 +01:00
import datetime as dt
2019-08-19 15:39:13 +02:00
import uuid
2017-08-15 19:40:47 +02:00
import simplejson as json
Improved the exception class hierarchy: - Removed all plain python base classes (e.g. ValueError, TypeError) - Renamed InvalidPropertyConfigurationError -> PropertyPresenceError, since incorrect values could be considered a property config error, and I really just wanted this class to apply to presence (co-)constraint violations. - Added ObjectConfigurationError as a superclass of InvalidValueError, PropertyPresenceError, and any other exception that could be raised during _STIXBase object init, which is when the spec compliance checks happen. This class is intended to represent general spec violations. - Did some class reordering in exceptions.py, so all the ObjectConfigurationError subclasses were together. Changed how property "cleaning" errors were handled: - Previous docs said they should all be ValueErrors, but that would require extra exception check-and-replace complexity in the property implementations, so that requirement is removed. Doc is changed to just say that cleaning problems should cause exceptions to be raised. _STIXBase._check_property() now handles most exception types, not just ValueError. - Decided to try chaining the original clean error to the InvalidValueError, in case the extra diagnostics would be helpful in the future. This is done via 'six' adapter function and only works on python3. - A small amount of testing was removed, since it was looking at custom exception properties which became unavailable once the exception was replaced with InvalidValueError. Did another pass through unit tests to fix breakage caused by the changed exception class hierarchy. Removed unnecessary observable extension handling code from parse_observable(), since it was all duplicated in ExtensionsProperty. The redundant code in parse_observable() had different exception behavior than ExtensionsProperty, which makes the API inconsistent and unit tests more complicated. (Problems in ExtensionsProperty get replaced with InvalidValueError, but extensions problems handled directly in parse_observable() don't get the same replacement, and so the exception type is different.) Redid the workbench monkeypatching. The old way was impossible to make work, and had caused ugly ripple effect hackage in other parts of the codebase. Now, it replaces the global object maps with factory functions which behave the same way when called, as real classes. Had to fix up a few unit tests to get them all passing with this monkeypatching in place. Also remove all the xfail markings in the workbench test suite, since all tests now pass. Since workbench monkeypatching isn't currently affecting any unit tests, tox.ini was simplified to remove the special-casing for running the workbench tests. Removed the v20 workbench test suite, since the workbench currently only works with the latest stix object version.
2019-07-19 20:50:11 +02:00
import six
2017-02-10 22:35:02 +01:00
2019-08-19 19:35:17 +02:00
from stix2.canonicalization.Canonicalize import canonicalize
2019-08-19 15:39:13 +02:00
from .exceptions import (
AtLeastOnePropertyError, DependentPropertiesError, ExtraPropertiesError,
ImmutableError, InvalidObjRefError, InvalidValueError,
MissingPropertiesError, MutuallyExclusivePropertiesError,
)
from .markings.utils import validate
2017-08-31 22:36:59 +02:00
from .utils import NOW, find_property_index, format_datetime, get_timestamp
from .utils import new_version as _new_version
from .utils import revoke as _revoke
2017-02-10 22:35:02 +01:00
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
2017-02-10 22:35:02 +01:00
__all__ = ['STIXJSONEncoder', '_STIXBase']
DEFAULT_ERROR = "{type} must have {property}='{expected}'."
SCO_DET_ID_NAMESPACE = uuid.UUID("00abedb4-aa42-466c-9c01-fed23315a9b7")
2017-02-10 22:35:02 +01:00
class STIXJSONEncoder(json.JSONEncoder):
"""Custom JSONEncoder subclass for serializing Python ``stix2`` objects.
If an optional property with a default value specified in the STIX 2 spec
is set to that default value, it will be left out of the serialized output.
An example of this type of property include the ``revoked`` common property.
"""
def default(self, obj):
if isinstance(obj, (dt.date, dt.datetime)):
return format_datetime(obj)
elif isinstance(obj, _STIXBase):
tmp_obj = dict(copy.deepcopy(obj))
for prop_name in obj._defaulted_optional_properties:
del tmp_obj[prop_name]
return tmp_obj
else:
return super(STIXJSONEncoder, self).default(obj)
class STIXJSONIncludeOptionalDefaultsEncoder(json.JSONEncoder):
"""Custom JSONEncoder subclass for serializing Python ``stix2`` objects.
Differs from ``STIXJSONEncoder`` in that if an optional property with a default
value specified in the STIX 2 spec is set to that default value, it will be
included in the serialized output.
"""
2017-02-10 22:35:02 +01:00
def default(self, obj):
if isinstance(obj, (dt.date, dt.datetime)):
return format_datetime(obj)
elif isinstance(obj, _STIXBase):
return dict(obj)
else:
2018-06-26 18:23:53 +02:00
return super(STIXJSONIncludeOptionalDefaultsEncoder, self).default(obj)
2017-02-10 22:35:02 +01:00
2017-02-24 16:28:53 +01:00
def get_required_properties(properties):
2017-03-22 14:26:13 +01:00
return (k for k, v in properties.items() if v.required)
2017-02-24 16:28:53 +01:00
class _STIXBase(Mapping):
2017-02-10 22:35:02 +01:00
"""Base class for STIX object types"""
2017-08-29 21:08:26 +02:00
def object_properties(self):
props = set(self._properties.keys())
custom_props = list(set(self._inner.keys()) - props)
custom_props.sort()
all_properties = list(self._properties.keys())
all_properties.extend(custom_props) # Any custom properties to the bottom
return all_properties
def _check_property(self, prop_name, prop, kwargs):
2017-02-24 17:20:24 +01:00
if prop_name not in kwargs:
if hasattr(prop, 'default'):
value = prop.default()
if value == NOW:
value = self.__now
kwargs[prop_name] = value
if prop_name in kwargs:
try:
2017-04-17 21:13:11 +02:00
kwargs[prop_name] = prop.clean(kwargs[prop_name])
Improved the exception class hierarchy: - Removed all plain python base classes (e.g. ValueError, TypeError) - Renamed InvalidPropertyConfigurationError -> PropertyPresenceError, since incorrect values could be considered a property config error, and I really just wanted this class to apply to presence (co-)constraint violations. - Added ObjectConfigurationError as a superclass of InvalidValueError, PropertyPresenceError, and any other exception that could be raised during _STIXBase object init, which is when the spec compliance checks happen. This class is intended to represent general spec violations. - Did some class reordering in exceptions.py, so all the ObjectConfigurationError subclasses were together. Changed how property "cleaning" errors were handled: - Previous docs said they should all be ValueErrors, but that would require extra exception check-and-replace complexity in the property implementations, so that requirement is removed. Doc is changed to just say that cleaning problems should cause exceptions to be raised. _STIXBase._check_property() now handles most exception types, not just ValueError. - Decided to try chaining the original clean error to the InvalidValueError, in case the extra diagnostics would be helpful in the future. This is done via 'six' adapter function and only works on python3. - A small amount of testing was removed, since it was looking at custom exception properties which became unavailable once the exception was replaced with InvalidValueError. Did another pass through unit tests to fix breakage caused by the changed exception class hierarchy. Removed unnecessary observable extension handling code from parse_observable(), since it was all duplicated in ExtensionsProperty. The redundant code in parse_observable() had different exception behavior than ExtensionsProperty, which makes the API inconsistent and unit tests more complicated. (Problems in ExtensionsProperty get replaced with InvalidValueError, but extensions problems handled directly in parse_observable() don't get the same replacement, and so the exception type is different.) Redid the workbench monkeypatching. The old way was impossible to make work, and had caused ugly ripple effect hackage in other parts of the codebase. Now, it replaces the global object maps with factory functions which behave the same way when called, as real classes. Had to fix up a few unit tests to get them all passing with this monkeypatching in place. Also remove all the xfail markings in the workbench test suite, since all tests now pass. Since workbench monkeypatching isn't currently affecting any unit tests, tox.ini was simplified to remove the special-casing for running the workbench tests. Removed the v20 workbench test suite, since the workbench currently only works with the latest stix object version.
2019-07-19 20:50:11 +02:00
except InvalidValueError:
# No point in wrapping InvalidValueError in another
# InvalidValueError... so let those propagate.
raise
except Exception as exc:
six.raise_from(
InvalidValueError(
self.__class__, prop_name, reason=str(exc),
),
exc,
)
2017-02-24 17:20:24 +01:00
# interproperty constraint methods
def _check_mutually_exclusive_properties(self, list_of_properties, at_least_one=True):
current_properties = self.properties_populated()
count = len(set(list_of_properties).intersection(current_properties))
# at_least_one allows for xor to be checked
if count > 1 or (at_least_one and count == 0):
raise MutuallyExclusivePropertiesError(self.__class__, list_of_properties)
def _check_at_least_one_property(self, list_of_properties=None):
if not list_of_properties:
list_of_properties = sorted(list(self.__class__._properties.keys()))
2019-09-05 01:08:34 +02:00
if isinstance(self, _Observable):
props_to_remove = ["type", "id", "defanged", "spec_version"]
else:
props_to_remove = ["type"]
list_of_properties = [prop for prop in list_of_properties if prop not in props_to_remove]
current_properties = self.properties_populated()
list_of_properties_populated = set(list_of_properties).intersection(current_properties)
if list_of_properties and (not list_of_properties_populated or list_of_properties_populated == set(['extensions'])):
raise AtLeastOnePropertyError(self.__class__, list_of_properties)
def _check_properties_dependency(self, list_of_properties, list_of_dependent_properties):
failed_dependency_pairs = []
for p in list_of_properties:
for dp in list_of_dependent_properties:
2017-06-08 14:42:32 +02:00
if not self.get(p) and self.get(dp):
failed_dependency_pairs.append((p, dp))
if failed_dependency_pairs:
raise DependentPropertiesError(self.__class__, failed_dependency_pairs)
def _check_object_constraints(self):
for m in self.get('granular_markings', []):
validate(self, m.get('selectors'))
def __init__(self, allow_custom=False, **kwargs):
2017-02-10 22:35:02 +01:00
cls = self.__class__
2019-12-17 17:57:55 +01:00
self._allow_custom = allow_custom
2017-02-10 22:35:02 +01:00
# Use the same timestamp for any auto-generated datetimes
2017-02-24 16:28:53 +01:00
self.__now = get_timestamp()
2017-02-10 22:35:02 +01:00
# Detect any keyword arguments not allowed for a specific type
2017-06-09 18:20:40 +02:00
custom_props = kwargs.pop('custom_properties', {})
if custom_props and not isinstance(custom_props, dict):
raise ValueError("'custom_properties' must be a dictionary")
2019-12-17 17:57:55 +01:00
if not self._allow_custom:
extra_kwargs = list(set(kwargs) - set(self._properties))
if extra_kwargs:
raise ExtraPropertiesError(cls, extra_kwargs)
if custom_props:
2019-12-17 17:57:55 +01:00
self._allow_custom = True
2017-02-10 22:35:02 +01:00
2019-04-17 16:08:34 +02:00
# Remove any keyword arguments whose value is None or [] (i.e. empty list)
setting_kwargs = {}
2017-06-09 18:20:40 +02:00
props = kwargs.copy()
props.update(custom_props)
for prop_name, prop_value in props.items():
2019-04-17 16:08:34 +02:00
if prop_value is not None and prop_value != []:
setting_kwargs[prop_name] = prop_value
# Detect any missing required properties
required_properties = set(get_required_properties(self._properties))
missing_kwargs = required_properties - set(setting_kwargs)
2017-02-10 22:35:02 +01:00
if missing_kwargs:
raise MissingPropertiesError(cls, missing_kwargs)
2017-02-10 22:35:02 +01:00
for prop_name, prop_metadata in self._properties.items():
self._check_property(prop_name, prop_metadata, setting_kwargs)
2017-02-10 22:35:02 +01:00
# Cache defaulted optional properties for serialization
defaulted = []
for name, prop in self._properties.items():
try:
if (not prop.required and not hasattr(prop, '_fixed_value') and
prop.default() == setting_kwargs[name]):
defaulted.append(name)
except (AttributeError, KeyError):
continue
self._defaulted_optional_properties = defaulted
self._inner = setting_kwargs
2017-02-10 22:35:02 +01:00
self._check_object_constraints()
2017-02-10 22:35:02 +01:00
def __getitem__(self, key):
return self._inner[key]
def __iter__(self):
return iter(self._inner)
def __len__(self):
return len(self._inner)
# Handle attribute access just like key access
def __getattr__(self, name):
2018-06-06 21:30:45 +02:00
# Pickle-proofing: pickle invokes this on uninitialized instances (i.e.
# __init__ has not run). So no "self" attributes are set yet. The
# usual behavior of this method reads an __init__-assigned attribute,
# which would cause infinite recursion. So this check disables all
# attribute reads until the instance has been properly initialized.
unpickling = '_inner' not in self.__dict__
2018-06-06 21:30:45 +02:00
if not unpickling and name in self:
return self.__getitem__(name)
2017-06-08 14:42:32 +02:00
raise AttributeError("'%s' object has no attribute '%s'" %
(self.__class__.__name__, name))
2017-02-10 22:35:02 +01:00
def __setattr__(self, name, value):
if not name.startswith("_"):
raise ImmutableError(self.__class__, name)
2017-02-10 22:35:02 +01:00
super(_STIXBase, self).__setattr__(name, value)
def __str__(self):
return self.serialize(pretty=True)
2017-02-10 22:58:17 +01:00
def __repr__(self):
2017-08-29 21:08:26 +02:00
props = [(k, self[k]) for k in self.object_properties() if self.get(k)]
return '{0}({1})'.format(
self.__class__.__name__,
', '.join(['{0!s}={1!r}'.format(k, v) for k, v in props]),
)
def __deepcopy__(self, memo):
# Assume: we can ignore the memo argument, because no object will ever contain the same sub-object multiple times.
new_inner = copy.deepcopy(self._inner, memo)
cls = type(self)
if isinstance(self, _Observable):
# Assume: valid references in the original object are still valid in the new version
new_inner['_valid_refs'] = {'*': '*'}
2019-12-17 17:57:55 +01:00
new_inner['allow_custom'] = self._allow_custom
return cls(**new_inner)
def properties_populated(self):
return list(self._inner.keys())
2017-05-02 20:06:42 +02:00
# Versioning API
def new_version(self, **kwargs):
return _new_version(self, **kwargs)
2017-05-02 20:06:42 +02:00
def revoke(self):
return _revoke(self)
def serialize(self, pretty=False, include_optional_defaults=False, **kwargs):
"""
Serialize a STIX object.
Args:
pretty (bool): If True, output properties following the STIX specs
formatting. This includes indentation. Refer to notes for more
2018-03-13 21:01:42 +01:00
details. (Default: ``False``)
include_optional_defaults (bool): Determines whether to include
optional properties set to the default value defined in the spec.
**kwargs: The arguments for a json.dumps() call.
Examples:
>>> import stix2
>>> identity = stix2.Identity(name='Example Corp.', identity_class='organization')
>>> print(identity.serialize(sort_keys=True))
{"created": "2018-06-08T19:03:54.066Z", ... "name": "Example Corp.", "type": "identity"}
>>> print(identity.serialize(sort_keys=True, indent=4))
{
"created": "2018-06-08T19:03:54.066Z",
"id": "identity--d7f3e25a-ba1c-447a-ab71-6434b092b05e",
"identity_class": "organization",
"modified": "2018-06-08T19:03:54.066Z",
"name": "Example Corp.",
"type": "identity"
}
Returns:
2018-03-16 19:26:41 +01:00
str: The serialized JSON object.
Note:
The argument ``pretty=True`` will output the STIX object following
spec order. Using this argument greatly impacts object serialization
performance. If your use case is centered across machine-to-machine
operation it is recommended to set ``pretty=False``.
When ``pretty=True`` the following key-value pairs will be added or
overridden: indent=4, separators=(",", ": "), item_sort_key=sort_by.
"""
if pretty:
def sort_by(element):
return find_property_index(self, *element)
kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by})
if include_optional_defaults:
return json.dumps(self, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs)
else:
return json.dumps(self, cls=STIXJSONEncoder, **kwargs)
class _Observable(_STIXBase):
def __init__(self, **kwargs):
# the constructor might be called independently of an observed data object
self._STIXBase__valid_refs = kwargs.pop('_valid_refs', [])
2019-12-17 17:57:55 +01:00
self._allow_custom = kwargs.get('allow_custom', False)
self._properties['extensions'].allow_custom = kwargs.get('allow_custom', False)
try:
# Since `spec_version` is optional, this is how we check for a 2.1 SCO
self._id_contributing_properties
if 'id' not in kwargs:
possible_id = self._generate_id(kwargs)
if possible_id is not None:
kwargs['id'] = possible_id
except AttributeError:
# End up here if handling a 2.0 SCO, and don't need to do anything further
pass
2019-08-19 15:39:13 +02:00
super(_Observable, self).__init__(**kwargs)
def _check_ref(self, ref, prop, prop_name):
"""
Only for checking `*_ref` or `*_refs` properties in spec_version 2.0
STIX Cyber Observables (SCOs)
"""
if '*' in self._STIXBase__valid_refs:
return # don't check if refs are valid
if ref not in self._STIXBase__valid_refs:
2019-09-05 01:08:34 +02:00
raise InvalidObjRefError(self.__class__, prop_name, "'%s' is not a valid object in local scope" % ref)
try:
allowed_types = prop.contained.valid_types
except AttributeError:
allowed_types = prop.valid_types
try:
try:
ref_type = self._STIXBase__valid_refs[ref].type
except AttributeError:
ref_type = self._STIXBase__valid_refs[ref]
except TypeError:
raise ValueError("'%s' must be created with _valid_refs as a dict, not a list." % self.__class__.__name__)
if allowed_types:
if ref_type not in allowed_types:
raise InvalidObjRefError(self.__class__, prop_name, "object reference '%s' is of an invalid type '%s'" % (ref, ref_type))
def _check_property(self, prop_name, prop, kwargs):
super(_Observable, self)._check_property(prop_name, prop, kwargs)
if prop_name not in kwargs:
return
2019-09-05 01:08:34 +02:00
from .properties import ObjectReferenceProperty
if prop_name.endswith('_ref'):
if isinstance(prop, ObjectReferenceProperty):
2019-09-05 01:08:34 +02:00
ref = kwargs[prop_name]
self._check_ref(ref, prop, prop_name)
elif prop_name.endswith('_refs'):
if isinstance(prop.contained, ObjectReferenceProperty):
2019-09-05 01:08:34 +02:00
for ref in kwargs[prop_name]:
self._check_ref(ref, prop, prop_name)
2019-08-19 15:39:13 +02:00
def _generate_id(self, kwargs):
required_prefix = self._type + "--"
properties_to_use = self._id_contributing_properties
if properties_to_use:
streamlined_obj_vals = []
if "hashes" in kwargs and "hashes" in properties_to_use:
possible_hash = _choose_one_hash(kwargs["hashes"])
if possible_hash:
streamlined_obj_vals.append(possible_hash)
for key in properties_to_use:
if key != "hashes" and key in kwargs:
if isinstance(kwargs[key], dict) or isinstance(kwargs[key], _STIXBase):
temp_deep_copy = copy.deepcopy(dict(kwargs[key]))
_recursive_stix_to_dict(temp_deep_copy)
streamlined_obj_vals.append(temp_deep_copy)
2019-09-05 01:08:34 +02:00
elif isinstance(kwargs[key], list) and isinstance(kwargs[key][0], _STIXBase):
for obj in kwargs[key]:
temp_deep_copy = copy.deepcopy(dict(obj))
_recursive_stix_to_dict(temp_deep_copy)
streamlined_obj_vals.append(temp_deep_copy)
else:
streamlined_obj_vals.append(kwargs[key])
if streamlined_obj_vals:
data = canonicalize(streamlined_obj_vals, utf8=False)
2019-09-11 16:44:14 +02:00
# The situation is complicated w.r.t. python 2/3 behavior, so
# I'd rather not rely on particular exceptions being raised to
# determine what to do. Better to just check the python version
# directly.
if six.PY3:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data))
else:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8")))
# We return None if there are no values specified for any of the id-contributing-properties
return None
class _Extension(_STIXBase):
def _check_object_constraints(self):
super(_Extension, self)._check_object_constraints()
self._check_at_least_one_property()
2019-08-19 15:39:13 +02:00
def _choose_one_hash(hash_dict):
2019-09-11 16:49:11 +02:00
if "MD5" in hash_dict:
return {"MD5": hash_dict["MD5"]}
elif "SHA-1" in hash_dict:
return {"SHA-1": hash_dict["SHA-1"]}
elif "SHA-256" in hash_dict:
return {"SHA-256": hash_dict["SHA-256"]}
elif "SHA-512" in hash_dict:
return {"SHA-512": hash_dict["SHA-512"]}
else:
k = next(iter(hash_dict), None)
if k is not None:
return {k: hash_dict[k]}
2018-10-03 17:57:31 +02:00
def _cls_init(cls, obj, kwargs):
if getattr(cls, '__init__', object.__init__) is not object.__init__:
cls.__init__(obj, **kwargs)
def _recursive_stix_to_dict(input_dict):
for key in input_dict:
if isinstance(input_dict[key], dict):
_recursive_stix_to_dict(input_dict[key])
elif isinstance(input_dict[key], _STIXBase):
input_dict[key] = dict(input_dict[key])
# There may stil be nested _STIXBase objects
_recursive_stix_to_dict(input_dict[key])
else:
return