cti-python-stix2/stix2/sources/__init__.py

427 lines
13 KiB
Python

"""Python STIX 2.0 Sources
.. autosummary::
:toctree: sources
filesystem
filters
memory
taxii
|
"""
from abc import ABCMeta, abstractmethod
import uuid
from six import with_metaclass
from stix2.utils import deduplicate
def make_id():
return str(uuid.uuid4())
class DataStore(with_metaclass(ABCMeta)):
"""An implementer will create a concrete subclass from
this class for the specific DataStore.
Args:
source (DataSource): An existing DataSource to use
as this DataStore's DataSource component
sink (DataSink): An existing DataSink to use
as this DataStore's DataSink component
Attributes:
id (str): A unique UUIDv4 to identify this DataStore.
source (DataSource): An object that implements DataSource class.
sink (DataSink): An object that implements DataSink class.
"""
def __init__(self, source=None, sink=None):
super(DataStore, self).__init__()
self.id = make_id()
self.source = source
self.sink = sink
@abstractmethod
def get(self, stix_id): # pragma: no cover
"""Retrieve the most recent version of a single STIX object by ID.
Translate get() call to the appropriate DataSource call.
Args:
stix_id (str): the id of the STIX object to retrieve.
Returns:
stix_obj: the single most recent version of the STIX
object specified by the "id".
"""
return NotImplementedError()
@abstractmethod
def all_versions(self, stix_id): # pragma: no cover
"""Retrieve all versions of a single STIX object by ID.
Implement: Define a function that performs any custom behavior before
calling the associated DataSource all_versions() method.
Args:
stix_id (str): the id of the STIX object to retrieve.
Returns:
stix_objs (list): a list of STIX objects
"""
return NotImplementedError()
@abstractmethod
def query(self, query=None): # pragma: no cover
"""Retrieve STIX objects matching a set of filters.
Implement: Specific data source API calls, processing,
functionality required for retrieving query from the data source.
Define custom behavior before calling the associated DataSource query()
Args:
query (list): a list of filters (which collectively are the query)
to conduct search on.
Returns:
stix_objs (list): a list of STIX objects
"""
return NotImplementedError()
@abstractmethod
def add(self, stix_objs): # pragma: no cover
"""Method for storing STIX objects.
Define custom behavior before storing STIX objects using the associated
DataSink. Translates add() to the appropriate DataSink call.
Args:
stix_objs (list): a list of STIX objects
"""
return NotImplementedError()
class DataSink(with_metaclass(ABCMeta)):
"""An implementer will create a concrete subclass from
this class for the specific DataSink.
Attributes:
id (str): A unique UUIDv4 to identify this DataSink.
"""
def __init__(self):
super(DataSink, self).__init__()
self.id = make_id()
@abstractmethod
def add(self, stix_objs): # pragma: no cover
"""Method for storing STIX objects.
Implement: Specific data sink API calls, processing,
functionality required for adding data to the sink
Args:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
"""
raise NotImplementedError()
class DataSource(with_metaclass(ABCMeta)):
"""An implementer will create a concrete subclass from
this class for the specific DataSource.
Attributes:
id (str): A unique UUIDv4 to identify this DataSource.
filters (set): A collection of filters attached to this DataSource.
"""
def __init__(self):
super(DataSource, self).__init__()
self.id = make_id()
self.filters = set()
@abstractmethod
def get(self, stix_id): # pragma: no cover
"""
Implement: Specific data source API calls, processing,
functionality required for retrieving data from the data source
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
Returns:
stix_obj: the STIX object
"""
raise NotImplementedError()
@abstractmethod
def all_versions(self, stix_id): # pragma: no cover
"""
Implement: Similar to get() except returns list of all object versions
of the specified "id". In addition, implement the specific data
source API calls, processing, functionality required for retrieving
data from the data source.
Args:
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
return a list of objects, all the versions of the object
specified by the "id".
Returns:
stix_objs (list): a list of STIX objects
"""
raise NotImplementedError()
@abstractmethod
def query(self, query=None): # pragma: no cover
"""
Implement: The specific data source API calls, processing,
functionality required for retrieving query from the data source
Args:
query (list): a list of filters (which collectively are the query)
to conduct search on.
Returns:
stix_objs (list): a list of STIX objects
"""
raise NotImplementedError()
class CompositeDataSource(DataSource):
"""Controller for all the attached DataSources.
A user can have a single CompositeDataSource as an interface
the a set of DataSources. When an API call is made to the
CompositeDataSource, it is delegated to each of the (real)
DataSources that are attached to it.
DataSources can be attached to CompositeDataSource for a variety
of reasons, e.g. common filters, organization, less API calls.
Attributes:
data_sources (list): A dictionary of DataSource objects; to be
controlled and used by the Data Source Controller object.
"""
def __init__(self):
"""Create a new STIX Data Source.
Args:
"""
super(CompositeDataSource, self).__init__()
self.data_sources = []
def get(self, stix_id, _composite_filters=None):
"""Retrieve STIX object by STIX ID
Federated retrieve method, iterates through all DataSources
defined in the "data_sources" parameter. Each data source has a
specific API retrieve-like function and associated parameters. This
function does a federated retrieval and consolidation of the data
returned from all the STIX data sources.
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
Args:
stix_id (str): the id of the STIX object to retrieve.
_composite_filters (list): a list of filters passed from a
CompositeDataSource (i.e. if this CompositeDataSource is attached
to another parent CompositeDataSource), not user supplied.
Returns:
stix_obj: the STIX object to be returned.
"""
if not self.has_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
all_data = []
all_filters = set()
all_filters.update(self.filters)
if _composite_filters:
all_filters.update(_composite_filters)
# for every configured Data Source, call its retrieve handler
for ds in self.data_sources:
data = ds.get(stix_id=stix_id, _composite_filters=all_filters)
if data:
all_data.append(data)
# remove duplicate versions
if len(all_data) > 0:
all_data = deduplicate(all_data)
else:
return None
# reduce to most recent version
stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0]
return stix_obj
def all_versions(self, stix_id, _composite_filters=None):
"""Retrieve all versions of a STIX object by STIX ID.
Federated all_versions retrieve method - iterates through all
DataSources defined in "data_sources".
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
Args:
stix_id (str): id of the STIX objects to retrieve.
_composite_filters (list): a list of filters passed from a
CompositeDataSource (i.e. if this CompositeDataSource is
attached to a parent CompositeDataSource), not user supplied.
Returns:
all_data (list): list of STIX objects that have the specified id
"""
if not self.has_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
all_data = []
all_filters = set()
all_filters.update(self.filters)
if _composite_filters:
all_filters.update(_composite_filters)
# retrieve STIX objects from all configured data sources
for ds in self.data_sources:
data = ds.all_versions(stix_id=stix_id, _composite_filters=all_filters)
all_data.extend(data)
# remove exact duplicates (where duplicates are STIX 2.0 objects
# with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = deduplicate(all_data)
return all_data
def query(self, query=None, _composite_filters=None):
"""Retrieve STIX objects that match a query.
Federate the query to all DataSources attached to the
Composite Data Source.
Args:
query (list): list of filters to search on.
_composite_filters (list): a list of filters passed from a
CompositeDataSource (i.e. if this CompositeDataSource is
attached to a parent CompositeDataSource), not user supplied.
Returns:
all_data (list): list of STIX objects to be returned
"""
if not self.has_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
if not query:
# don't mess with the query (i.e. convert to a set, as that's done
# within the specific DataSources that are called)
query = []
all_data = []
all_filters = set()
all_filters.update(self.filters)
if _composite_filters:
all_filters.update(_composite_filters)
# federate query to all attached data sources,
# pass composite filters to id
for ds in self.data_sources:
data = ds.query(query=query, _composite_filters=all_filters)
all_data.extend(data)
# remove exact duplicates (where duplicates are STIX 2.0
# objects with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = deduplicate(all_data)
return all_data
def add_data_source(self, data_source):
"""Attach a DataSource to CompositeDataSource instance
Args:
data_source (DataSource): a stix2.DataSource to attach
to the CompositeDataSource
"""
if issubclass(data_source.__class__, DataSource):
if data_source.id not in [ds_.id for ds_ in self.data_sources]:
# check DataSource not already attached CompositeDataSource
self.data_sources.append(data_source)
else:
raise TypeError("DataSource (to be added) is not of type stix2.DataSource. DataSource type is '%s'" % type(data_source))
return
def add_data_sources(self, data_sources):
"""Attach list of DataSources to CompositeDataSource instance
Args:
data_sources (list): stix2.DataSources to attach to
CompositeDataSource
"""
for ds in data_sources:
self.add_data_source(ds)
return
def remove_data_source(self, data_source_id):
"""Remove DataSource from the CompositeDataSource instance
Args:
data_source_id (str): DataSource IDs.
"""
def _match(ds_id, candidate_ds_id):
return ds_id == candidate_ds_id
self.data_sources[:] = [ds for ds in self.data_sources if not _match(ds.id, data_source_id)]
return
def remove_data_sources(self, data_source_ids):
"""Remove DataSources from the CompositeDataSource instance
Args:
data_source_ids (list): DataSource IDs
"""
for ds_id in data_source_ids:
self.remove_data_source(ds_id)
return
def has_data_sources(self):
return len(self.data_sources)
def get_all_data_sources(self):
return self.data_sources