2017-02-10 22:35:02 +01:00
|
|
|
"""Utility functions and classes for the stix2 library."""
|
2017-08-31 18:28:07 +02:00
|
|
|
from collections import Mapping
|
|
|
|
import copy
|
2017-02-10 22:35:02 +01:00
|
|
|
import datetime as dt
|
2017-05-05 16:53:28 +02:00
|
|
|
import json
|
|
|
|
|
2017-05-09 21:10:53 +02:00
|
|
|
from dateutil import parser
|
2017-05-10 00:03:46 +02:00
|
|
|
import pytz
|
2017-04-25 00:29:56 +02:00
|
|
|
|
2017-08-31 18:28:07 +02:00
|
|
|
from .exceptions import (InvalidValueError, RevokeError,
|
|
|
|
UnmodifiablePropertyError)
|
|
|
|
|
2017-05-19 19:51:59 +02:00
|
|
|
# Sentinel value for properties that should be set to the current time.
|
2017-02-10 22:35:02 +01:00
|
|
|
# We can't use the standard 'default' approach, since if there are multiple
|
|
|
|
# timestamps in a single object, the timestamps will vary by a few microseconds.
|
|
|
|
NOW = object()
|
|
|
|
|
2017-11-21 20:39:50 +01:00
|
|
|
# STIX object properties that cannot be modified
|
|
|
|
STIX_UNMOD_PROPERTIES = ["created", "created_by_ref", "id", "type"]
|
|
|
|
|
2018-04-13 20:52:00 +02:00
|
|
|
TYPE_REGEX = r'^\-?[a-z0-9]+(-[a-z0-9]+)*\-?$'
|
|
|
|
|
2017-02-10 22:35:02 +01:00
|
|
|
|
2017-06-23 00:47:35 +02:00
|
|
|
class STIXdatetime(dt.datetime):
|
|
|
|
def __new__(cls, *args, **kwargs):
|
|
|
|
precision = kwargs.pop('precision', None)
|
|
|
|
if isinstance(args[0], dt.datetime): # Allow passing in a datetime object
|
|
|
|
dttm = args[0]
|
2017-06-28 21:55:23 +02:00
|
|
|
args = (dttm.year, dttm.month, dttm.day, dttm.hour, dttm.minute,
|
|
|
|
dttm.second, dttm.microsecond, dttm.tzinfo)
|
|
|
|
# self will be an instance of STIXdatetime, not dt.datetime
|
2017-06-23 00:47:35 +02:00
|
|
|
self = dt.datetime.__new__(cls, *args, **kwargs)
|
|
|
|
self.precision = precision
|
|
|
|
return self
|
|
|
|
|
2017-08-11 21:04:58 +02:00
|
|
|
def __repr__(self):
|
|
|
|
return "'%s'" % format_datetime(self)
|
|
|
|
|
2017-06-23 00:47:35 +02:00
|
|
|
|
2017-09-29 17:24:19 +02:00
|
|
|
def deduplicate(stix_obj_list):
|
2017-10-27 21:50:43 +02:00
|
|
|
"""Deduplicate a list of STIX objects to a unique set.
|
2017-09-29 17:24:19 +02:00
|
|
|
|
|
|
|
Reduces a set of STIX objects to unique set by looking
|
|
|
|
at 'id' and 'modified' fields - as a unique object version
|
|
|
|
is determined by the combination of those fields
|
|
|
|
|
|
|
|
Note: Be aware, as can be seen in the implementation
|
|
|
|
of deduplicate(),that if the "stix_obj_list" argument has
|
|
|
|
multiple STIX objects of the same version, the last object
|
|
|
|
version found in the list will be the one that is returned.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
stix_obj_list (list): list of STIX objects (dicts)
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A list with a unique set of the passed list of STIX objects.
|
|
|
|
|
|
|
|
"""
|
|
|
|
unique_objs = {}
|
|
|
|
|
|
|
|
for obj in stix_obj_list:
|
2017-10-27 21:50:43 +02:00
|
|
|
try:
|
|
|
|
unique_objs[(obj['id'], obj['modified'])] = obj
|
|
|
|
except KeyError:
|
|
|
|
# Handle objects with no `modified` property, e.g. marking-definition
|
|
|
|
unique_objs[(obj['id'], obj['created'])] = obj
|
2017-09-29 17:24:19 +02:00
|
|
|
|
|
|
|
return list(unique_objs.values())
|
|
|
|
|
|
|
|
|
2017-02-10 22:35:02 +01:00
|
|
|
def get_timestamp():
|
2017-09-22 20:54:21 +02:00
|
|
|
"""Return a STIX timestamp of the current date and time."""
|
2017-06-23 00:47:35 +02:00
|
|
|
return STIXdatetime.now(tz=pytz.UTC)
|
2017-02-10 22:35:02 +01:00
|
|
|
|
|
|
|
|
|
|
|
def format_datetime(dttm):
|
2017-09-22 20:54:21 +02:00
|
|
|
"""Convert a datetime object into a valid STIX timestamp string.
|
|
|
|
|
|
|
|
1. Convert to timezone-aware
|
|
|
|
2. Convert to UTC
|
|
|
|
3. Format in ISO format
|
|
|
|
4. Ensure correct precision
|
|
|
|
a. Add subsecond value if non-zero and precision not defined
|
|
|
|
5. Add "Z"
|
|
|
|
|
|
|
|
"""
|
2017-04-17 16:48:13 +02:00
|
|
|
|
2017-05-22 17:11:42 +02:00
|
|
|
if dttm.tzinfo is None or dttm.tzinfo.utcoffset(dttm) is None:
|
2017-04-17 19:16:14 +02:00
|
|
|
# dttm is timezone-naive; assume UTC
|
2017-05-22 17:11:42 +02:00
|
|
|
zoned = pytz.utc.localize(dttm)
|
|
|
|
else:
|
|
|
|
zoned = dttm.astimezone(pytz.utc)
|
2017-04-17 16:48:13 +02:00
|
|
|
ts = zoned.strftime("%Y-%m-%dT%H:%M:%S")
|
2017-06-23 00:47:35 +02:00
|
|
|
ms = zoned.strftime("%f")
|
|
|
|
precision = getattr(dttm, "precision", None)
|
2017-06-28 21:55:23 +02:00
|
|
|
if precision == 'second':
|
|
|
|
pass # Alredy precise to the second
|
|
|
|
elif precision == "millisecond":
|
|
|
|
ts = ts + '.' + ms[:3]
|
2017-06-23 00:47:35 +02:00
|
|
|
elif zoned.microsecond > 0:
|
2017-04-17 16:48:13 +02:00
|
|
|
ts = ts + '.' + ms.rstrip("0")
|
|
|
|
return ts + "Z"
|
2017-04-19 20:32:56 +02:00
|
|
|
|
|
|
|
|
2017-06-23 00:47:35 +02:00
|
|
|
def parse_into_datetime(value, precision=None):
|
2017-09-22 20:54:21 +02:00
|
|
|
"""Parse a value into a valid STIX timestamp object.
|
|
|
|
"""
|
2017-05-04 22:34:08 +02:00
|
|
|
if isinstance(value, dt.date):
|
|
|
|
if hasattr(value, 'hour'):
|
2017-06-23 00:47:35 +02:00
|
|
|
ts = value
|
2017-05-04 22:34:08 +02:00
|
|
|
else:
|
|
|
|
# Add a time component
|
2017-06-23 00:47:35 +02:00
|
|
|
ts = dt.datetime.combine(value, dt.time(0, 0, tzinfo=pytz.utc))
|
2017-05-04 22:34:08 +02:00
|
|
|
else:
|
2017-06-23 00:47:35 +02:00
|
|
|
# value isn't a date or datetime object so assume it's a string
|
|
|
|
try:
|
|
|
|
parsed = parser.parse(value)
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
# Unknown format
|
|
|
|
raise ValueError("must be a datetime object, date object, or "
|
|
|
|
"timestamp string in a recognizable format.")
|
|
|
|
if parsed.tzinfo:
|
|
|
|
ts = parsed.astimezone(pytz.utc)
|
|
|
|
else:
|
|
|
|
# Doesn't have timezone info in the string; assume UTC
|
|
|
|
ts = pytz.utc.localize(parsed)
|
|
|
|
|
|
|
|
# Ensure correct precision
|
|
|
|
if not precision:
|
2017-08-11 21:04:58 +02:00
|
|
|
return STIXdatetime(ts, precision=precision)
|
2017-06-23 00:47:35 +02:00
|
|
|
ms = ts.microsecond
|
2017-06-28 21:55:23 +02:00
|
|
|
if precision == 'second':
|
|
|
|
ts = ts.replace(microsecond=0)
|
|
|
|
elif precision == 'millisecond':
|
2017-06-23 00:47:35 +02:00
|
|
|
ms_len = len(str(ms))
|
|
|
|
if ms_len > 3:
|
|
|
|
# Truncate to millisecond precision
|
2017-06-28 21:55:23 +02:00
|
|
|
factor = 10 ** (ms_len - 3)
|
|
|
|
ts = ts.replace(microsecond=(ts.microsecond // factor) * factor)
|
|
|
|
else:
|
|
|
|
ts = ts.replace(microsecond=0)
|
2017-06-23 00:47:35 +02:00
|
|
|
return STIXdatetime(ts, precision=precision)
|
2017-05-04 22:34:08 +02:00
|
|
|
|
|
|
|
|
2018-04-13 17:08:03 +02:00
|
|
|
def _get_dict(data):
|
2017-04-19 20:32:56 +02:00
|
|
|
"""Return data as a dictionary.
|
2017-09-22 20:54:21 +02:00
|
|
|
|
2017-04-19 20:32:56 +02:00
|
|
|
Input can be a dictionary, string, or file-like object.
|
|
|
|
"""
|
|
|
|
|
|
|
|
if type(data) is dict:
|
2017-05-16 15:25:08 +02:00
|
|
|
return data
|
2017-04-19 20:32:56 +02:00
|
|
|
else:
|
|
|
|
try:
|
2017-05-16 15:25:08 +02:00
|
|
|
return json.loads(data)
|
2017-04-19 20:32:56 +02:00
|
|
|
except TypeError:
|
2017-05-16 15:25:08 +02:00
|
|
|
pass
|
|
|
|
try:
|
|
|
|
return json.load(data)
|
|
|
|
except AttributeError:
|
|
|
|
pass
|
|
|
|
try:
|
|
|
|
return dict(data)
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
raise ValueError("Cannot convert '%s' to dictionary." % str(data))
|
2017-08-31 18:28:07 +02:00
|
|
|
|
|
|
|
|
2018-06-15 03:29:50 +02:00
|
|
|
def _find(seq, val):
|
|
|
|
"""
|
|
|
|
Search sequence 'seq' for val. This behaves like str.find(): if not found,
|
|
|
|
-1 is returned instead of throwing an exception.
|
2018-06-08 21:43:21 +02:00
|
|
|
|
2018-06-15 03:29:50 +02:00
|
|
|
:param seq: The sequence to search
|
|
|
|
:param val: The value to search for
|
|
|
|
:return: The index of the value if found, or -1 if not found
|
2017-08-15 19:40:47 +02:00
|
|
|
"""
|
|
|
|
try:
|
2018-06-15 03:29:50 +02:00
|
|
|
return seq.index(val)
|
2017-08-15 19:40:47 +02:00
|
|
|
except ValueError:
|
2018-06-15 03:29:50 +02:00
|
|
|
return -1
|
|
|
|
|
|
|
|
|
|
|
|
def _find_property_in_seq(seq, search_key, search_value):
|
|
|
|
"""
|
|
|
|
Helper for find_property_index(): search for the property in all elements
|
|
|
|
of the given sequence.
|
|
|
|
|
|
|
|
:param seq: The sequence
|
|
|
|
:param search_key: Property name to find
|
|
|
|
:param search_value: Property value to find
|
|
|
|
:return: A property index, or -1 if the property was not found
|
|
|
|
"""
|
|
|
|
idx = -1
|
|
|
|
for elem in seq:
|
|
|
|
idx = find_property_index(elem, search_key, search_value)
|
|
|
|
if idx >= 0:
|
|
|
|
break
|
|
|
|
|
|
|
|
return idx
|
|
|
|
|
|
|
|
|
|
|
|
def find_property_index(obj, search_key, search_value):
|
|
|
|
"""
|
|
|
|
Search (recursively) for the given key and value in the given object.
|
|
|
|
Return an index for the key, relative to whatever object it's found in.
|
|
|
|
|
|
|
|
:param obj: The object to search (list, dict, or stix object)
|
|
|
|
:param search_key: A search key
|
|
|
|
:param search_value: A search value
|
|
|
|
:return: An index; -1 if the key and value aren't found
|
|
|
|
"""
|
|
|
|
from .base import _STIXBase
|
|
|
|
|
|
|
|
# Special-case keys which are numbers-as-strings, e.g. for cyber-observable
|
|
|
|
# mappings. Use the int value of the key as the index.
|
|
|
|
if search_key.isdigit():
|
|
|
|
return int(search_key)
|
|
|
|
|
|
|
|
if isinstance(obj, _STIXBase):
|
|
|
|
if search_key in obj and obj[search_key] == search_value:
|
|
|
|
idx = _find(obj.object_properties(), search_key)
|
|
|
|
else:
|
|
|
|
idx = _find_property_in_seq(obj.values(), search_key, search_value)
|
|
|
|
elif isinstance(obj, dict):
|
|
|
|
if search_key in obj and obj[search_key] == search_value:
|
|
|
|
idx = _find(sorted(obj), search_key)
|
|
|
|
else:
|
|
|
|
idx = _find_property_in_seq(obj.values(), search_key, search_value)
|
|
|
|
elif isinstance(obj, list):
|
|
|
|
idx = _find_property_in_seq(obj, search_key, search_value)
|
|
|
|
else:
|
|
|
|
# Don't know how to search this type
|
|
|
|
idx = -1
|
|
|
|
|
|
|
|
return idx
|
2017-08-31 22:36:59 +02:00
|
|
|
|
|
|
|
|
2017-08-31 18:28:07 +02:00
|
|
|
def new_version(data, **kwargs):
|
|
|
|
"""Create a new version of a STIX object, by modifying properties and
|
2017-09-22 20:54:21 +02:00
|
|
|
updating the ``modified`` property.
|
2017-08-31 18:28:07 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
if not isinstance(data, Mapping):
|
|
|
|
raise ValueError('cannot create new version of object of this type! '
|
|
|
|
'Try a dictionary or instance of an SDO or SRO class.')
|
|
|
|
|
|
|
|
unchangable_properties = []
|
|
|
|
if data.get("revoked"):
|
|
|
|
raise RevokeError("new_version")
|
|
|
|
try:
|
|
|
|
new_obj_inner = copy.deepcopy(data._inner)
|
|
|
|
except AttributeError:
|
|
|
|
new_obj_inner = copy.deepcopy(data)
|
|
|
|
properties_to_change = kwargs.keys()
|
|
|
|
|
|
|
|
# Make sure certain properties aren't trying to change
|
2017-11-21 20:39:50 +01:00
|
|
|
for prop in STIX_UNMOD_PROPERTIES:
|
2017-08-31 18:28:07 +02:00
|
|
|
if prop in properties_to_change:
|
|
|
|
unchangable_properties.append(prop)
|
|
|
|
if unchangable_properties:
|
|
|
|
raise UnmodifiablePropertyError(unchangable_properties)
|
|
|
|
|
|
|
|
cls = type(data)
|
|
|
|
if 'modified' not in kwargs:
|
|
|
|
kwargs['modified'] = get_timestamp()
|
|
|
|
elif 'modified' in data:
|
|
|
|
old_modified_property = parse_into_datetime(data.get('modified'), precision='millisecond')
|
|
|
|
new_modified_property = parse_into_datetime(kwargs['modified'], precision='millisecond')
|
2017-11-21 20:39:50 +01:00
|
|
|
if new_modified_property <= old_modified_property:
|
|
|
|
raise InvalidValueError(cls, 'modified',
|
2017-11-29 18:03:10 +01:00
|
|
|
"The new modified datetime cannot be before than or equal to the current modified datetime."
|
|
|
|
"It cannot be equal, as according to STIX 2 specification, objects that are different "
|
|
|
|
"but have the same id and modified timestamp do not have defined consumer behavior.")
|
2017-08-31 18:28:07 +02:00
|
|
|
new_obj_inner.update(kwargs)
|
2017-09-01 22:37:49 +02:00
|
|
|
# Exclude properties with a value of 'None' in case data is not an instance of a _STIXBase subclass
|
|
|
|
return cls(**{k: v for k, v in new_obj_inner.items() if v is not None})
|
2017-08-31 18:28:07 +02:00
|
|
|
|
|
|
|
|
|
|
|
def revoke(data):
|
2017-09-22 20:54:21 +02:00
|
|
|
"""Revoke a STIX object.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A new version of the object with ``revoked`` set to ``True``.
|
|
|
|
"""
|
2017-08-31 18:28:07 +02:00
|
|
|
if not isinstance(data, Mapping):
|
|
|
|
raise ValueError('cannot revoke object of this type! Try a dictionary '
|
|
|
|
'or instance of an SDO or SRO class.')
|
|
|
|
|
|
|
|
if data.get("revoked"):
|
|
|
|
raise RevokeError("revoke")
|
2018-03-02 17:32:07 +01:00
|
|
|
return new_version(data, revoked=True, allow_custom=True)
|
2017-11-01 19:17:41 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_class_hierarchy_names(obj):
|
2017-11-01 19:21:26 +01:00
|
|
|
"""Given an object, return the names of the class hierarchy."""
|
2017-11-01 19:17:41 +01:00
|
|
|
names = []
|
|
|
|
for cls in obj.__class__.__mro__:
|
|
|
|
names.append(cls.__name__)
|
|
|
|
return names
|
2017-11-21 20:39:50 +01:00
|
|
|
|
|
|
|
|
|
|
|
def remove_custom_stix(stix_obj):
|
2018-04-09 19:29:53 +02:00
|
|
|
"""Remove any custom STIX objects or properties.
|
2017-11-21 20:39:50 +01:00
|
|
|
|
2017-12-01 17:30:41 +01:00
|
|
|
Warning: This function is a best effort utility, in that
|
|
|
|
it will remove custom objects and properties based on the
|
2018-04-09 19:29:53 +02:00
|
|
|
type names; i.e. if "x-" prefixes object types, and "x\\_"
|
2017-12-01 17:30:41 +01:00
|
|
|
prefixes property types. According to the STIX2 spec,
|
|
|
|
those naming conventions are a SHOULDs not MUSTs, meaning
|
|
|
|
that valid custom STIX content may ignore those conventions
|
|
|
|
and in effect render this utility function invalid when used
|
|
|
|
on that STIX content.
|
|
|
|
|
2017-11-21 20:39:50 +01:00
|
|
|
Args:
|
|
|
|
stix_obj (dict OR python-stix obj): a single python-stix object
|
|
|
|
or dict of a STIX object
|
2017-11-29 18:03:10 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
A new version of the object with any custom content removed
|
2017-11-21 20:39:50 +01:00
|
|
|
"""
|
|
|
|
|
2017-11-29 18:03:10 +01:00
|
|
|
if stix_obj["type"].startswith("x-"):
|
2017-11-21 20:39:50 +01:00
|
|
|
# if entire object is custom, discard
|
|
|
|
return None
|
|
|
|
|
|
|
|
custom_props = []
|
|
|
|
for prop in stix_obj.items():
|
|
|
|
if prop[0].startswith("x_"):
|
|
|
|
# for every custom property, record it and set value to None
|
|
|
|
# (so we can pass it to new_version() and it will be dropped)
|
|
|
|
custom_props.append((prop[0], None))
|
|
|
|
|
|
|
|
if custom_props:
|
|
|
|
# obtain set of object properties that can be transferred
|
|
|
|
# to a new object version. This is 1)custom props with their
|
|
|
|
# values set to None, and 2)any properties left that are not
|
|
|
|
# unmodifiable STIX properties or the "modified" property
|
|
|
|
|
|
|
|
# set of properties that are not supplied to new_version()
|
|
|
|
# to be used for updating properties. This includes unmodifiable
|
|
|
|
# properties (properties that new_version() just re-uses from the
|
|
|
|
# existing STIX object) and the "modified" property. We dont supply the
|
|
|
|
# "modified" property so that new_version() creates a new datetime
|
|
|
|
# value for this property
|
|
|
|
non_supplied_props = STIX_UNMOD_PROPERTIES + ["modified"]
|
|
|
|
|
|
|
|
props = [(prop, stix_obj[prop]) for prop in stix_obj if prop not in non_supplied_props]
|
|
|
|
|
|
|
|
# add to set the custom properties we want to get rid of (with their value=None)
|
|
|
|
props.extend(custom_props)
|
|
|
|
|
2017-11-29 18:50:13 +01:00
|
|
|
new_obj = new_version(stix_obj, **(dict(props)))
|
|
|
|
|
|
|
|
while parse_into_datetime(new_obj["modified"]) == parse_into_datetime(stix_obj["modified"]):
|
|
|
|
# Prevents bug when fast computation allows multiple STIX object
|
|
|
|
# versions to be created in single unit of time
|
|
|
|
new_obj = new_version(stix_obj, **(dict(props)))
|
|
|
|
|
|
|
|
return new_obj
|
2017-11-21 20:39:50 +01:00
|
|
|
|
|
|
|
else:
|
|
|
|
return stix_obj
|
2017-11-29 23:13:16 +01:00
|
|
|
|
|
|
|
|
2017-11-20 04:11:44 +01:00
|
|
|
def get_type_from_id(stix_id):
|
|
|
|
return stix_id.split('--', 1)[0]
|