""" Python STIX 2.0 Memory Source/Sink TODO: Run through tests again, lot of changes. TODO: Use deduplicate() calls only when memory corpus is dirty (been added to) can save a lot of time for successive queries Note: Not worrying about STIX versioning. The in memory STIX data at anytime will only hold one version of a STIX object. As such, when save() is called, the single versions of all the STIX objects are what is written to file. """ import json import os from stix2.base import _STIXBase from stix2.core import Bundle, parse from stix2.sources import DataSink, DataSource, DataStore from stix2.sources.filters import Filter, apply_common_filters def _add(store, stix_data=None, allow_custom=False, version=None): """Add STIX objects to MemoryStore/Sink. Adds STIX objects to an in-memory dictionary for fast lookup. Recursive function, breaks down STIX Bundles and lists. Args: stix_data (list OR dict OR STIX object): STIX objects to be added allow_custom (bool): whether to allow custom objects/properties or not. Default: False. version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. """ if isinstance(stix_data, _STIXBase): # adding a python STIX object store._data[stix_data["id"]] = stix_data elif isinstance(stix_data, dict): if stix_data["type"] == "bundle": # adding a json bundle - so just grab STIX objects for stix_obj in stix_data.get("objects", []): _add(store, stix_obj, allow_custom=allow_custom, version=version) else: # adding a json STIX object store._data[stix_data["id"]] = stix_data elif isinstance(stix_data, str): # adding json encoded string of STIX content stix_data = parse(stix_data, allow_custom=allow_custom, version=version) if stix_data["type"] == "bundle": # recurse on each STIX object in bundle for stix_obj in stix_data.get("objects", []): _add(store, stix_obj, allow_custom=allow_custom, version=version) else: _add(store, stix_data, allow_custom=allow_custom, version=version) elif isinstance(stix_data, list): # STIX objects are in a list- recurse on each object for stix_obj in stix_data: _add(store, stix_obj, allow_custom=allow_custom, version=version) else: raise TypeError("stix_data must be a STIX object (or list of), JSON formatted STIX (or list of), or a JSON formatted STIX bundle") class MemoryStore(DataStore): """Interface to an in-memory dictionary of STIX objects. MemoryStore is a wrapper around a paired MemorySink and MemorySource. Note: It doesn't make sense to create a MemoryStore by passing in existing MemorySource and MemorySink because there could be data concurrency issues. As well, just as easy to create new MemoryStore. Args: stix_data (list OR dict OR STIX object): STIX content to be added allow_custom (bool): whether to allow custom objects/properties or not. Default: False. version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. Attributes: _data (dict): the in-memory dict that holds STIX objects source (MemorySource): MemorySource sink (MemorySink): MemorySink """ def __init__(self, stix_data=None, allow_custom=False, version=None): super(MemoryStore, self).__init__() self._data = {} if stix_data: _add(self, stix_data, allow_custom=allow_custom, version=version) self.source = MemorySource(stix_data=self._data, allow_custom=allow_custom, version=version, _store=True) self.sink = MemorySink(stix_data=self._data, allow_custom=allow_custom, version=version, _store=True) def save_to_file(self, file_path, allow_custom=False): """Write SITX objects from in-memory dictionary to JSON file, as a STIX Bundle. Args: file_path (str): file path to write STIX data to allow_custom (bool): whether to allow custom objects/properties or not. Default: False. """ return self.sink.save_to_file(file_path=file_path, allow_custom=allow_custom) def load_from_file(self, file_path, allow_custom=False, version=None): """Load STIX data from JSON file. File format is expected to be a single JSON STIX object or JSON STIX bundle. Args: file_path (str): file path to load STIX data from allow_custom (bool): whether to allow custom objects/properties or not. Default: False. version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If None, use latest version. """ return self.source.load_from_file(file_path=file_path, allow_custom=allow_custom, version=version) def get(self, stix_id, _composite_filters=None): """Retrieve the most recent version of a single STIX object by ID. Translate get() call to the appropriate DataSource call. Args: stix_id (str): the id of the STIX object to retrieve. _composite_filters (set): set of filters passed from the parent CompositeDataSource, not user supplied Returns: stix_obj: the single most recent version of the STIX object specified by the "id". """ return self.source.get(stix_id, _composite_filters=_composite_filters) def all_versions(self, stix_id, _composite_filters=None): """Retrieve all versions of a single STIX object by ID. Translate all_versions() call to the appropriate DataSource call. Args: stix_id (str): the id of the STIX object to retrieve. _composite_filters (set): set of filters passed from the parent CompositeDataSource, not user supplied Returns: stix_objs (list): a list of STIX objects """ return self.source.all_versions(stix_id, _composite_filters=_composite_filters) def query(self, query=None, _composite_filters=None): """Retrieve STIX objects matching a set of filters. Translates query() to appropriate DataStore call. Args: query (list): a list of filters (which collectively are the query) to conduct search on. _composite_filters (set): set of filters passed from the parent CompositeDataSource, not user supplied Returns: stix_objs (list): a list of STIX objects """ return self.source.query(query=query, _composite_filters=_composite_filters) def add(self, stix_objs, allow_custom=False, version=None): """Store STIX objects. Translates add() to the appropriate DataSink call. Args: stix_objs (list): a list of STIX objects """ return self.sink.add(stix_objs, allow_custom=allow_custom, version=version) class MemorySink(DataSink): """Interface for adding/pushing STIX objects to an in-memory dictionary. Designed to be paired with a MemorySource, together as the two components of a MemoryStore. Args: stix_data (dict OR list): valid STIX 2.0 content in bundle or a list. _store (bool): if the MemorySink is a part of a DataStore, in which case "stix_data" is a direct reference to shared memory with DataSource. Not user supplied allow_custom (bool): whether to allow custom objects/properties or not. Default: False. Attributes: _data (dict): the in-memory dict that holds STIX objects. If apart of a MemoryStore, dict is shared between with a MemorySource """ def __init__(self, stix_data=None, allow_custom=False, version=None, _store=False): super(MemorySink, self).__init__() self._data = {} if _store: self._data = stix_data elif stix_data: _add(self, stix_data, allow_custom=allow_custom, version=version) def add(self, stix_data, allow_custom=False, version=None): _add(self, stix_data, allow_custom=allow_custom, version=version) add.__doc__ = _add.__doc__ def save_to_file(self, file_path, allow_custom=False): file_path = os.path.abspath(file_path) if not os.path.exists(os.path.dirname(file_path)): os.makedirs(os.path.dirname(file_path)) with open(file_path, "w") as f: f.write(str(Bundle(self._data.values(), allow_custom=allow_custom))) save_to_file.__doc__ = MemoryStore.save_to_file.__doc__ class MemorySource(DataSource): """Interface for searching/retrieving STIX objects from an in-memory dictionary. Designed to be paired with a MemorySink, together as the two components of a MemoryStore. Args: stix_data (dict OR list OR STIX object): valid STIX 2.0 content in bundle or list. _store (bool): if the MemorySource is a part of a DataStore, in which case "stix_data" is a direct reference to shared memory with DataSink. Not user supplied allow_custom (bool): whether to allow custom objects/properties or not. Default: False. Attributes: _data (dict): the in-memory dict that holds STIX objects. If apart of a MemoryStore, dict is shared between with a MemorySink """ def __init__(self, stix_data=None, allow_custom=False, version=None, _store=False): super(MemorySource, self).__init__() self._data = {} if _store: self._data = stix_data elif stix_data: _add(self, stix_data, allow_custom=allow_custom, version=version) def get(self, stix_id, _composite_filters=None): """Retrieve STIX object from in-memory dict via STIX ID. Args: stix_id (str): The STIX ID of the STIX object to be retrieved. _composite_filters (set): set of filters passed from the parent CompositeDataSource, not user supplied Returns: (dict OR STIX object): STIX object that has the supplied ID. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory as they are supplied (either as python dictionary or STIX object), it is returned in the same form as it as added """ if _composite_filters is None: # if get call is only based on 'id', no need to search, just retrieve from dict try: stix_obj = self._data[stix_id] except KeyError: stix_obj = None return stix_obj # if there are filters from the composite level, process full query query = [Filter("id", "=", stix_id)] all_data = self.query(query=query, _composite_filters=_composite_filters) if all_data: # reduce to most recent version stix_obj = sorted(all_data, key=lambda k: k['modified'])[0] return stix_obj else: return None def all_versions(self, stix_id, _composite_filters=None): """Retrieve STIX objects from in-memory dict via STIX ID, all versions of it Note: Since Memory sources/sinks don't handle multiple versions of a STIX object, this operation is unnecessary. Translate call to get(). Args: stix_id (str): The STIX ID of the STIX 2 object to retrieve. _composite_filters (set): set of filters passed from the parent CompositeDataSource, not user supplied Returns: (list): list of STIX objects that has the supplied ID. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory as they are supplied (either as python dictionary or STIX object), it is returned in the same form as it as added """ return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)] def query(self, query=None, _composite_filters=None): """Search and retrieve STIX objects based on the complete query. A "complete query" includes the filters from the query, the filters attached to this MemorySource, and any filters passed from a CompositeDataSource (i.e. _composite_filters). Args: query (list): list of filters to search on _composite_filters (set): set of filters passed from the CompositeDataSource, not user supplied Returns: (list): list of STIX objects that matches the supplied query. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory as they are supplied (either as python dictionary or STIX object), it is returned in the same form as it as added. """ if query is None: query = set() else: if not isinstance(query, list): # make sure don't make set from a Filter object, # need to make a set from a list of Filter objects (even if just one Filter) query = [query] query = set(query) # combine all query filters if self.filters: query.update(self.filters) if _composite_filters: query.update(_composite_filters) # Apply STIX common property filters. all_data = list(apply_common_filters(self._data.values(), query)) return all_data def load_from_file(self, file_path, allow_custom=False, version=None): file_path = os.path.abspath(file_path) stix_data = json.load(open(file_path, "r")) _add(self, stix_data, allow_custom=allow_custom, version=version) load_from_file.__doc__ = MemoryStore.load_from_file.__doc__