cti-python-stix2/stix2/sources/__init__.py

510 lines
16 KiB
Python
Raw Normal View History

2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
Python STIX 2.0 Sources
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Classes:
DataStore
DataSink
DataSource
2017-09-06 22:20:16 +02:00
CompositeDataSource
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
TODO:Test everything
2017-05-24 17:25:40 +02:00
Notes:
add_filter(), remove_filter(), deduplicate() - if these functions remain
the exact same for DataSource, DataSink, CompositeDataSource etc... -> just
make those functions an interface to inherit?
2017-05-26 21:24:33 +02:00
"""
2017-05-24 17:25:40 +02:00
import uuid
2017-05-26 21:24:33 +02:00
from six import iteritems
2017-08-31 20:03:12 +02:00
from stix2.sources.filters import (FILTER_OPS, FILTER_VALUE_TYPES,
STIX_COMMON_FIELDS, STIX_COMMON_FILTERS_MAP)
2017-08-09 20:49:06 +02:00
2017-05-24 17:25:40 +02:00
def make_id():
2017-05-26 21:24:33 +02:00
return str(uuid.uuid4())
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class DataStore(object):
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
An implementer will create a concrete subclass from
this abstract class for the specific data store.
Attributes:
id (str): A unique UUIDv4 to identify this DataStore.
source (DataStore): An object that implements DataStore class.
sink (DataSink): An object that implements DataSink class.
2017-07-12 16:58:31 +02:00
"""
def __init__(self, source=None, sink=None):
self.id = make_id()
2017-08-28 20:32:51 +02:00
self.source = source
self.sink = sink
2017-07-12 16:58:31 +02:00
def get(self, stix_id):
"""Retrieve the most recent version of a single STIX object by ID.
Notes:
Translate API get() call to the appropriate DataSource call.
2017-05-24 17:25:40 +02:00
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve.
2017-05-24 17:25:40 +02:00
Returns:
stix_obj (dictionary): the single most recent version of the STIX
object specified by the "id".
2017-05-26 21:24:33 +02:00
"""
return self.source.get(stix_id)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def all_versions(self, stix_id):
"""Retrieve all versions of a single STIX object by ID.
2017-07-12 16:58:31 +02:00
Implement:
Translate all_versions() call to the appropriate DataSource call
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-07-12 16:58:31 +02:00
"""
return self.source.all_versions(stix_id)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def query(self, query):
"""Retrieve STIX objects matching a set of filters.
Notes:
Implement the specific data source API calls, processing,
functionality required for retrieving query from the data source.
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
query (list): a list of filters (which collectively are the query)
to conduct search on.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
return self.source.query(query=query)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def add(self, stix_objs):
"""Store STIX objects.
Notes:
Translate add() to the appropriate DataSink call().
2017-05-24 17:25:40 +02:00
Args:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-07-12 16:58:31 +02:00
"""
return self.sink.add(stix_objs)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class DataSink(object):
"""
Abstract class for defining a data sink. Intended for subclassing into
different sink components.
Attributes:
id (str): A unique UUIDv4 to identify this DataSink.
2017-07-12 16:58:31 +02:00
"""
def __init__(self):
self.id = make_id()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def add(self, stix_objs):
"""Store STIX objects.
Notes:
Implement the specific data sink API calls, processing,
2017-07-12 16:58:31 +02:00
functionality required for adding data to the sink
Args:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-07-12 16:58:31 +02:00
"""
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class DataSource(object):
"""
Abstract class for defining a data source. Intended for subclassing into
different source components.
Attributes:
id (str): A unique UUIDv4 to identify this DataSource.
2017-08-28 20:32:51 +02:00
filters (set): A collection of filters present in this DataSource.
2017-07-12 16:58:31 +02:00
"""
def __init__(self):
self.id = make_id()
2017-08-09 20:49:06 +02:00
self.filters = set()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def get(self, stix_id, _composite_filters=None):
"""
Fill:
Implement the specific data source API calls, processing,
2017-07-12 16:58:31 +02:00
functionality required for retrieving data from the data source
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
_composite_filters (list): list of filters passed along from
the Composite Data Filter.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_obj (dictionary): the STIX object to be returned
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def all_versions(self, stix_id, _composite_filters=None):
"""
Notes:
Similar to get() except returns list of all object versions of
the specified "id". In addition, implement the specific data
source API calls, processing, functionality required for retrieving
data from the data source.
2017-05-24 17:25:40 +02:00
Args:
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
2017-07-12 16:58:31 +02:00
return a list of objects, all the versions of the object
specified by the "id".
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
_composite_filters (list): list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-05-24 17:25:40 +02:00
"""
2017-07-12 16:58:31 +02:00
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def query(self, query, _composite_filters=None):
"""
Fill:
-implement the specific data source API calls, processing,
functionality required for retrieving query from the data source
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
query (list): a list of filters (which collectively are the query)
to conduct search on
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
Returns:
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
def add_filters(self, filters):
"""Add multiple filters to be applied to all queries for STIX objects.
2017-05-24 17:25:40 +02:00
Args:
2017-08-28 20:32:51 +02:00
filters (list): list of filters (dict) to add to the Data Source.
2017-07-12 16:58:31 +02:00
"""
for filter in filters:
self.add_filter(filter)
2017-05-24 17:25:40 +02:00
def add_filter(self, filter):
"""Add a filter to be applied to all queries for STIX objects.
Args:
filter: filter to add to the Data Source.
"""
2017-08-09 20:49:06 +02:00
# check filter field is a supported STIX 2.0 common field
if filter.field not in STIX_COMMON_FIELDS:
2017-08-09 20:49:06 +02:00
raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
# check filter operator is supported
if filter.op not in FILTER_OPS:
raise ValueError("Filter operation (from 'op' field) not supported")
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
# check filter value type is supported
if type(filter.value) not in FILTER_VALUE_TYPES:
2017-08-09 20:49:06 +02:00
raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
2017-05-24 17:25:40 +02:00
self.filters.add(filter)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def apply_common_filters(self, stix_objs, query):
2017-09-06 22:20:16 +02:00
"""Evaluate filters against a set of STIX 2.0 objects.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Supports only STIX 2.0 common property fields
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
stix_objs (list): list of STIX objects to apply the query to
query (list): list of filters (combined form complete query)
2017-05-24 17:25:40 +02:00
Returns:
2017-07-12 16:58:31 +02:00
(list): list of STIX objects that successfully evaluate against
the query.
2017-07-12 16:58:31 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
filtered_stix_objs = []
# evaluate objects against filter
for stix_obj in stix_objs:
clean = True
for filter_ in query:
2017-08-31 20:23:08 +02:00
# skip filter as filter was identified (when added) as
# not a common filter
if filter_.field not in STIX_COMMON_FIELDS:
raise ValueError("Error, field: {0} is not supported for filtering on.".format(filter_.field))
# For properties like granular_markings and external_references
# need to break the first property from the string.
if "." in filter_.field:
field = filter_.field.split(".")[0]
else:
field = filter_.field
# check filter "field" is in STIX object - if cant be
# applied due to STIX object, STIX object is discarded
# (i.e. did not make it through the filter)
if field not in stix_obj.keys():
clean = False
break
match = STIX_COMMON_FILTERS_MAP[filter_.field.split('.')[0]](filter_, stix_obj)
if not match:
clean = False
break
elif match == -1:
raise ValueError("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field))
2017-07-12 16:58:31 +02:00
# if object unmarked after all filters, add it
if clean:
filtered_stix_objs.append(stix_obj)
return filtered_stix_objs
def deduplicate(self, stix_obj_list):
"""Deduplicate a list of STIX objects to a unique set
2017-07-12 16:58:31 +02:00
Reduces a set of STIX objects to unique set by looking
2017-07-12 16:58:31 +02:00
at 'id' and 'modified' fields - as a unique object version
is determined by the combination of those fields
Args:
stix_obj_list (list): list of STIX objects (dicts)
Returns:
A list with a unique set of the passed list of STIX objects.
2017-07-12 16:58:31 +02:00
"""
unique_objs = {}
for obj in stix_obj_list:
unique_objs[(obj['id'], obj['modified'])] = obj
return list(unique_objs.values())
2017-05-24 17:25:40 +02:00
class CompositeDataSource(DataSource):
2017-09-06 22:20:16 +02:00
"""Controller for all the defined/configured STIX Data Sources.
2017-05-24 17:25:40 +02:00
2017-09-06 22:20:16 +02:00
E.g. a user can define n Data Sources - creating Data Source (objects)
for each. There is only one instance of this for any Python STIX 2.0
application.
Attributes:
name (str): The name that identifies this CompositeDataSource.
data_sources (dict): A dictionary of DataSource objects; to be
controlled and used by the Data Source Controller object.
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
def __init__(self):
2017-09-06 22:20:16 +02:00
"""Create a new STIX Data Source.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
name (str): A string containing the name to attach in the
CompositeDataSource instance.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
super(CompositeDataSource, self).__init__()
2017-07-12 16:58:31 +02:00
self.data_sources = {}
2017-05-24 17:25:40 +02:00
def get(self, stix_id, _composite_filters=None):
2017-07-12 16:58:31 +02:00
"""Retrieve STIX object by 'id'
2017-05-24 17:25:40 +02:00
Federated retrieve method-iterates through all STIX data sources
2017-07-12 16:58:31 +02:00
defined in the "data_sources" parameter. Each data source has a
specific API retrieve-like function and associated parameters. This
function does a federated retrieval and consolidation of the data
returned from all the STIX data sources.
2017-05-24 17:25:40 +02:00
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
stix_id (str): the id of the STIX object to retrieve.
2017-05-24 17:25:40 +02:00
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
Returns:
stix_obj (dict): the STIX object to be returned.
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-09-08 15:01:12 +02:00
if not self.get_all_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
2017-07-12 16:58:31 +02:00
all_data = []
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# for every configured Data Source, call its retrieve handler
for ds_id, ds in iteritems(self.data_sources):
data = ds.get(stix_id=stix_id, _composite_filters=list(self.filters))
2017-08-28 20:32:51 +02:00
all_data.append(data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# remove duplicate versions
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# reduce to most recent version
2017-08-28 20:32:51 +02:00
stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0]
2017-07-12 16:58:31 +02:00
return stix_obj
def all_versions(self, stix_id, _composite_filters=None):
2017-07-12 16:58:31 +02:00
"""Retrieve STIX objects by 'id'
Federated all_versions retrieve method - iterates through all STIX data
sources defined in "data_sources"
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
2017-07-12 16:58:31 +02:00
Args:
stix_id (str): id of the STIX objects to retrieve
2017-05-24 17:25:40 +02:00
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
Returns:
2017-07-12 16:58:31 +02:00
all_data (list): list of STIX objects that have the specified id
2017-05-26 21:24:33 +02:00
"""
2017-09-08 15:01:12 +02:00
if not self.get_all_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
2017-07-12 16:58:31 +02:00
all_data = []
2017-08-28 20:32:51 +02:00
all_filters = self.filters
if _composite_filters:
all_filters = set(self.filters).update(_composite_filters)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# retrieve STIX objects from all configured data sources
for ds_id, ds in iteritems(self.data_sources):
2017-08-28 20:32:51 +02:00
data = ds.all_versions(stix_id=stix_id, _composite_filters=list(all_filters))
2017-07-12 16:58:31 +02:00
all_data.extend(data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# remove exact duplicates (where duplicates are STIX 2.0 objects
# with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
return all_data
2017-05-24 17:25:40 +02:00
def query(self, query=None, _composite_filters=None):
"""Federate the query to all Data Sources attached to the
Composite Data Source.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
query (list): list of filters to search on.
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-07-12 16:58:31 +02:00
Returns:
all_data (list): list of STIX objects to be returned
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-09-08 15:01:12 +02:00
if not self.get_all_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
2017-07-12 16:58:31 +02:00
if not query:
query = []
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
all_data = []
2017-08-28 20:32:51 +02:00
all_filters = self.filters
if _composite_filters:
all_filters = set(self.filters).update(_composite_filters)
2017-07-12 16:58:31 +02:00
# federate query to all attached data sources,
# pass composite filters to id
for ds_id, ds in iteritems(self.data_sources):
2017-08-28 20:32:51 +02:00
data = ds.query(query=query, _composite_filters=list(all_filters))
2017-07-12 16:58:31 +02:00
all_data.extend(data)
# remove exact duplicates (where duplicates are STIX 2.0
# objects with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
return all_data
def add_data_source(self, data_sources):
"""Add/attach Data Source to the Composite Data Source instance
2017-07-12 16:58:31 +02:00
Args:
data_sources (list): a list of Data Source objects to attach
to the Composite Data Source
2017-05-26 21:24:33 +02:00
"""
2017-09-06 22:20:16 +02:00
if not isinstance(data_sources, list):
data_sources = [data_sources]
2017-07-12 16:58:31 +02:00
for ds in data_sources:
2017-08-28 20:32:51 +02:00
if issubclass(ds.__class__, DataSource):
if ds.id in self.data_sources:
2017-07-12 16:58:31 +02:00
# data source already attached to Composite Data Source
continue
# add data source to Composite Data Source
# (its id will be its key identifier)
self.data_sources[ds.id] = ds
2017-07-12 16:58:31 +02:00
else:
# the Data Source object is not a proper subclass
# of DataSource Abstract Class
# TODO: maybe log error?
continue
return
def remove_data_source(self, data_source_ids):
"""Remove/detach Data Source from the Composite Data Source instance
2017-07-12 16:58:31 +02:00
2017-05-24 17:25:40 +02:00
Args:
2017-08-28 20:32:51 +02:00
data_source_ids (list): a list of Data Source identifiers.
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
for id in data_source_ids:
if id in self.data_sources:
del self.data_sources[id]
2017-08-28 20:32:51 +02:00
else:
raise ValueError("DataSource 'id' not found in CompositeDataSource collection.")
2017-07-12 16:58:31 +02:00
return
2017-05-24 17:25:40 +02:00
2017-08-28 20:32:51 +02:00
def get_all_data_sources(self):
"""Return all attached Data Sources
2017-07-12 16:58:31 +02:00
"""
2017-08-28 20:32:51 +02:00
return self.data_sources.values()