369 lines
14 KiB
Python
369 lines
14 KiB
Python
"""
|
|
Python STIX 2.0 Memory Source/Sink
|
|
|
|
TODO:
|
|
Run through tests again, lot of changes.
|
|
|
|
TODO:
|
|
Use deduplicate() calls only when memory corpus is dirty (been added to)
|
|
can save a lot of time for successive queries
|
|
|
|
Note:
|
|
Not worrying about STIX versioning. The in memory STIX data at anytime
|
|
will only hold one version of a STIX object. As such, when save() is called,
|
|
the single versions of all the STIX objects are what is written to file.
|
|
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
|
|
from stix2.base import _STIXBase
|
|
from stix2.core import Bundle, parse
|
|
from stix2.sources import DataSink, DataSource, DataStore
|
|
from stix2.sources.filters import Filter, apply_common_filters
|
|
|
|
|
|
def _add(store, stix_data=None, allow_custom=False, version=None):
|
|
"""Add STIX objects to MemoryStore/Sink.
|
|
|
|
Adds STIX objects to an in-memory dictionary for fast lookup.
|
|
Recursive function, breaks down STIX Bundles and lists.
|
|
|
|
Args:
|
|
stix_data (list OR dict OR STIX object): STIX objects to be added
|
|
allow_custom (bool): whether to allow custom objects/properties or
|
|
not. Default: False.
|
|
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
|
|
None, use latest version.
|
|
|
|
"""
|
|
if isinstance(stix_data, _STIXBase):
|
|
# adding a python STIX object
|
|
store._data[stix_data["id"]] = stix_data
|
|
|
|
elif isinstance(stix_data, dict):
|
|
if stix_data["type"] == "bundle":
|
|
# adding a json bundle - so just grab STIX objects
|
|
for stix_obj in stix_data.get("objects", []):
|
|
_add(store, stix_obj, allow_custom=allow_custom, version=version)
|
|
else:
|
|
# adding a json STIX object
|
|
store._data[stix_data["id"]] = stix_data
|
|
|
|
elif isinstance(stix_data, str):
|
|
# adding json encoded string of STIX content
|
|
stix_data = parse(stix_data, allow_custom=allow_custom, version=version)
|
|
if stix_data["type"] == "bundle":
|
|
# recurse on each STIX object in bundle
|
|
for stix_obj in stix_data.get("objects", []):
|
|
_add(store, stix_obj, allow_custom=allow_custom, version=version)
|
|
else:
|
|
_add(store, stix_data, allow_custom=allow_custom, version=version)
|
|
|
|
elif isinstance(stix_data, list):
|
|
# STIX objects are in a list- recurse on each object
|
|
for stix_obj in stix_data:
|
|
_add(store, stix_obj, allow_custom=allow_custom, version=version)
|
|
|
|
else:
|
|
raise TypeError("stix_data must be a STIX object (or list of), JSON formatted STIX (or list of), or a JSON formatted STIX bundle")
|
|
|
|
|
|
class MemoryStore(DataStore):
|
|
"""Interface to an in-memory dictionary of STIX objects.
|
|
|
|
MemoryStore is a wrapper around a paired MemorySink and MemorySource.
|
|
|
|
Note: It doesn't make sense to create a MemoryStore by passing
|
|
in existing MemorySource and MemorySink because there could
|
|
be data concurrency issues. As well, just as easy to create new MemoryStore.
|
|
|
|
Args:
|
|
stix_data (list OR dict OR STIX object): STIX content to be added
|
|
allow_custom (bool): whether to allow custom objects/properties or
|
|
not. Default: False.
|
|
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
|
|
None, use latest version.
|
|
|
|
Attributes:
|
|
_data (dict): the in-memory dict that holds STIX objects
|
|
source (MemorySource): MemorySource
|
|
sink (MemorySink): MemorySink
|
|
|
|
"""
|
|
def __init__(self, stix_data=None, allow_custom=False, version=None):
|
|
super(MemoryStore, self).__init__()
|
|
self._data = {}
|
|
|
|
if stix_data:
|
|
_add(self, stix_data, allow_custom=allow_custom, version=version)
|
|
|
|
self.source = MemorySource(stix_data=self._data, allow_custom=allow_custom, version=version, _store=True)
|
|
self.sink = MemorySink(stix_data=self._data, allow_custom=allow_custom, version=version, _store=True)
|
|
|
|
def save_to_file(self, file_path, allow_custom=False):
|
|
"""Write SITX objects from in-memory dictionary to JSON file, as a STIX
|
|
Bundle.
|
|
|
|
Args:
|
|
file_path (str): file path to write STIX data to
|
|
allow_custom (bool): whether to allow custom objects/properties or
|
|
not. Default: False.
|
|
|
|
"""
|
|
return self.sink.save_to_file(file_path=file_path, allow_custom=allow_custom)
|
|
|
|
def load_from_file(self, file_path, allow_custom=False, version=None):
|
|
"""Load STIX data from JSON file.
|
|
|
|
File format is expected to be a single JSON
|
|
STIX object or JSON STIX bundle.
|
|
|
|
Args:
|
|
file_path (str): file path to load STIX data from
|
|
allow_custom (bool): whether to allow custom objects/properties or
|
|
not. Default: False.
|
|
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
|
|
None, use latest version.
|
|
|
|
"""
|
|
return self.source.load_from_file(file_path=file_path, allow_custom=allow_custom, version=version)
|
|
|
|
def get(self, stix_id, _composite_filters=None):
|
|
"""Retrieve the most recent version of a single STIX object by ID.
|
|
|
|
Translate get() call to the appropriate DataSource call.
|
|
|
|
Args:
|
|
stix_id (str): the id of the STIX object to retrieve.
|
|
_composite_filters (set): set of filters passed from the parent
|
|
CompositeDataSource, not user supplied
|
|
|
|
Returns:
|
|
stix_obj: the single most recent version of the STIX
|
|
object specified by the "id".
|
|
|
|
"""
|
|
return self.source.get(stix_id, _composite_filters=_composite_filters)
|
|
|
|
def all_versions(self, stix_id, _composite_filters=None):
|
|
"""Retrieve all versions of a single STIX object by ID.
|
|
|
|
Translate all_versions() call to the appropriate DataSource call.
|
|
|
|
Args:
|
|
stix_id (str): the id of the STIX object to retrieve.
|
|
_composite_filters (set): set of filters passed from the parent
|
|
CompositeDataSource, not user supplied
|
|
|
|
Returns:
|
|
stix_objs (list): a list of STIX objects
|
|
|
|
"""
|
|
return self.source.all_versions(stix_id, _composite_filters=_composite_filters)
|
|
|
|
def query(self, query=None, _composite_filters=None):
|
|
"""Retrieve STIX objects matching a set of filters.
|
|
|
|
Translates query() to appropriate DataStore call.
|
|
|
|
Args:
|
|
query (list): a list of filters (which collectively are the query)
|
|
to conduct search on.
|
|
_composite_filters (set): set of filters passed from the parent
|
|
CompositeDataSource, not user supplied
|
|
|
|
Returns:
|
|
stix_objs (list): a list of STIX objects
|
|
|
|
"""
|
|
return self.source.query(query=query, _composite_filters=_composite_filters)
|
|
|
|
def add(self, stix_objs, allow_custom=False, version=None):
|
|
"""Store STIX objects.
|
|
|
|
Translates add() to the appropriate DataSink call.
|
|
|
|
Args:
|
|
stix_objs (list): a list of STIX objects
|
|
|
|
"""
|
|
return self.sink.add(stix_objs, allow_custom=allow_custom, version=version)
|
|
|
|
|
|
class MemorySink(DataSink):
|
|
"""Interface for adding/pushing STIX objects to an in-memory dictionary.
|
|
|
|
Designed to be paired with a MemorySource, together as the two
|
|
components of a MemoryStore.
|
|
|
|
Args:
|
|
stix_data (dict OR list): valid STIX 2.0 content in
|
|
bundle or a list.
|
|
_store (bool): if the MemorySink is a part of a DataStore,
|
|
in which case "stix_data" is a direct reference to
|
|
shared memory with DataSource. Not user supplied
|
|
allow_custom (bool): whether to allow custom objects/properties or
|
|
not. Default: False.
|
|
|
|
Attributes:
|
|
_data (dict): the in-memory dict that holds STIX objects.
|
|
If apart of a MemoryStore, dict is shared between with
|
|
a MemorySource
|
|
|
|
"""
|
|
def __init__(self, stix_data=None, allow_custom=False, version=None, _store=False):
|
|
super(MemorySink, self).__init__()
|
|
self._data = {}
|
|
|
|
if _store:
|
|
self._data = stix_data
|
|
elif stix_data:
|
|
_add(self, stix_data, allow_custom=allow_custom, version=version)
|
|
|
|
def add(self, stix_data, allow_custom=False, version=None):
|
|
_add(self, stix_data, allow_custom=allow_custom, version=version)
|
|
add.__doc__ = _add.__doc__
|
|
|
|
def save_to_file(self, file_path, allow_custom=False):
|
|
file_path = os.path.abspath(file_path)
|
|
if not os.path.exists(os.path.dirname(file_path)):
|
|
os.makedirs(os.path.dirname(file_path))
|
|
with open(file_path, "w") as f:
|
|
f.write(str(Bundle(self._data.values(), allow_custom=allow_custom)))
|
|
save_to_file.__doc__ = MemoryStore.save_to_file.__doc__
|
|
|
|
|
|
class MemorySource(DataSource):
|
|
"""Interface for searching/retrieving STIX objects from an in-memory
|
|
dictionary.
|
|
|
|
Designed to be paired with a MemorySink, together as the two
|
|
components of a MemoryStore.
|
|
|
|
Args:
|
|
stix_data (dict OR list OR STIX object): valid STIX 2.0 content in
|
|
bundle or list.
|
|
_store (bool): if the MemorySource is a part of a DataStore,
|
|
in which case "stix_data" is a direct reference to shared
|
|
memory with DataSink. Not user supplied
|
|
allow_custom (bool): whether to allow custom objects/properties or
|
|
not. Default: False.
|
|
|
|
Attributes:
|
|
_data (dict): the in-memory dict that holds STIX objects.
|
|
If apart of a MemoryStore, dict is shared between with
|
|
a MemorySink
|
|
|
|
"""
|
|
def __init__(self, stix_data=None, allow_custom=False, version=None, _store=False):
|
|
super(MemorySource, self).__init__()
|
|
self._data = {}
|
|
|
|
if _store:
|
|
self._data = stix_data
|
|
elif stix_data:
|
|
_add(self, stix_data, allow_custom=allow_custom, version=version)
|
|
|
|
def get(self, stix_id, _composite_filters=None):
|
|
"""Retrieve STIX object from in-memory dict via STIX ID.
|
|
|
|
Args:
|
|
stix_id (str): The STIX ID of the STIX object to be retrieved.
|
|
_composite_filters (set): set of filters passed from the parent
|
|
CompositeDataSource, not user supplied
|
|
|
|
Returns:
|
|
(dict OR STIX object): STIX object that has the supplied
|
|
ID. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
|
|
as they are supplied (either as python dictionary or STIX object), it
|
|
is returned in the same form as it as added
|
|
|
|
"""
|
|
if _composite_filters is None:
|
|
# if get call is only based on 'id', no need to search, just retrieve from dict
|
|
try:
|
|
stix_obj = self._data[stix_id]
|
|
except KeyError:
|
|
stix_obj = None
|
|
return stix_obj
|
|
|
|
# if there are filters from the composite level, process full query
|
|
query = [Filter("id", "=", stix_id)]
|
|
|
|
all_data = self.query(query=query, _composite_filters=_composite_filters)
|
|
|
|
if all_data:
|
|
# reduce to most recent version
|
|
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
|
|
|
|
return stix_obj
|
|
else:
|
|
return None
|
|
|
|
def all_versions(self, stix_id, _composite_filters=None):
|
|
"""Retrieve STIX objects from in-memory dict via STIX ID, all versions of it
|
|
|
|
Note: Since Memory sources/sinks don't handle multiple versions of a
|
|
STIX object, this operation is unnecessary. Translate call to get().
|
|
|
|
Args:
|
|
stix_id (str): The STIX ID of the STIX 2 object to retrieve.
|
|
_composite_filters (set): set of filters passed from the parent
|
|
CompositeDataSource, not user supplied
|
|
|
|
Returns:
|
|
(list): list of STIX objects that has the supplied ID. As the
|
|
MemoryStore(i.e. MemorySink) adds STIX objects to memory as they
|
|
are supplied (either as python dictionary or STIX object), it
|
|
is returned in the same form as it as added
|
|
|
|
"""
|
|
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
|
|
|
|
def query(self, query=None, _composite_filters=None):
|
|
"""Search and retrieve STIX objects based on the complete query.
|
|
|
|
A "complete query" includes the filters from the query, the filters
|
|
attached to this MemorySource, and any filters passed from a
|
|
CompositeDataSource (i.e. _composite_filters).
|
|
|
|
Args:
|
|
query (list): list of filters to search on
|
|
_composite_filters (set): set of filters passed from the
|
|
CompositeDataSource, not user supplied
|
|
|
|
Returns:
|
|
(list): list of STIX objects that matches the supplied
|
|
query. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
|
|
as they are supplied (either as python dictionary or STIX object), it
|
|
is returned in the same form as it as added.
|
|
|
|
"""
|
|
if query is None:
|
|
query = set()
|
|
else:
|
|
if not isinstance(query, list):
|
|
# make sure don't make set from a Filter object,
|
|
# need to make a set from a list of Filter objects (even if just one Filter)
|
|
query = [query]
|
|
query = set(query)
|
|
|
|
# combine all query filters
|
|
if self.filters:
|
|
query.update(self.filters)
|
|
if _composite_filters:
|
|
query.update(_composite_filters)
|
|
|
|
# Apply STIX common property filters.
|
|
all_data = list(apply_common_filters(self._data.values(), query))
|
|
|
|
return all_data
|
|
|
|
def load_from_file(self, file_path, allow_custom=False, version=None):
|
|
file_path = os.path.abspath(file_path)
|
|
stix_data = json.load(open(file_path, "r"))
|
|
_add(self, stix_data, allow_custom=allow_custom, version=version)
|
|
load_from_file.__doc__ = MemoryStore.load_from_file.__doc__
|