cti-python-stix2/stix2/sources/__init__.py

498 lines
16 KiB
Python
Raw Normal View History

2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
Python STIX 2.0 Sources
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Classes:
DataStore
DataSink
DataSource
STIXCommonPropertyFilters
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
TODO:Test everything
2017-05-24 17:25:40 +02:00
Notes:
add_filter(), remove_filter(), deduplicate() - if these functions remain
the exact same for DataSource, DataSink, CompositeDataSource etc... -> just
make those functions an interface to inherit?
2017-05-26 21:24:33 +02:00
"""
2017-05-24 17:25:40 +02:00
import uuid
2017-05-26 21:24:33 +02:00
from six import iteritems
2017-08-31 20:03:12 +02:00
from stix2.sources.filters import (FILTER_OPS, FILTER_VALUE_TYPES,
STIX_COMMON_FIELDS, STIX_COMMON_FILTERS_MAP)
2017-08-09 20:49:06 +02:00
2017-05-24 17:25:40 +02:00
def make_id():
2017-05-26 21:24:33 +02:00
return str(uuid.uuid4())
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class DataStore(object):
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
An implementer will create a concrete subclass from
this abstract class for the specific data store.
2017-07-12 16:58:31 +02:00
"""
2017-08-28 20:32:51 +02:00
def __init__(self, name="DataStore", source=None, sink=None):
2017-07-12 16:58:31 +02:00
self.name = name
2017-08-28 20:32:51 +02:00
self.id_ = make_id()
self.source = source
self.sink = sink
2017-07-12 16:58:31 +02:00
def get(self, stix_id):
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
Implement:
Translate API get() call to the appropriate DataSource call
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
2017-05-24 17:25:40 +02:00
Returns:
2017-07-12 16:58:31 +02:00
stix_obj (dictionary): the STIX object to be returned
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
return self.source.get(stix_id=stix_id)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def all_versions(self, stix_id):
"""
Implement:
Translate all_versions() call to the appropriate DataSource call
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
_composite_filters (list): list of filters passed along from
the Composite Data Filter.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-07-12 16:58:31 +02:00
"""
return self.source.all_versions(stix_id=stix_id)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def query(self, query):
"""
Fill:
Implement the specific data source API calls, processing,
2017-07-12 16:58:31 +02:00
functionality required for retrieving query from the data source
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
query (list): a list of filters (which collectively are the query)
to conduct search on.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
return self.source.query(query=query)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def add(self, stix_objs):
"""
Fill:
-translate add() to the appropriate DataSink call()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
return self.sink.add(stix_objs=stix_objs)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class DataSink(object):
"""
Abstract class for defining a data sink. Intended for subclassing into
different sink components.
Attributes:
2017-08-28 20:32:51 +02:00
id_ (str): A unique UUIDv4 to identify this DataSink.
name (str): The descriptive name that identifies this DataSink.
2017-07-12 16:58:31 +02:00
"""
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def __init__(self, name="DataSink"):
self.name = name
2017-08-28 20:32:51 +02:00
self.id_ = make_id()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def add(self, stix_objs):
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
Fill:
Implement the specific data sink API calls, processing,
2017-07-12 16:58:31 +02:00
functionality required for adding data to the sink
2017-07-12 16:58:31 +02:00
"""
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class DataSource(object):
"""
Abstract class for defining a data source. Intended for subclassing into
different source components.
Attributes:
2017-08-28 20:32:51 +02:00
id_ (str): A unique UUIDv4 to identify this DataSource.
name (str): The descriptive name that identifies this DataSource.
2017-08-28 20:32:51 +02:00
filters (set): A collection of filters present in this DataSource.
2017-07-12 16:58:31 +02:00
"""
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def __init__(self, name="DataSource"):
self.name = name
2017-08-28 20:32:51 +02:00
self.id_ = make_id()
2017-08-09 20:49:06 +02:00
self.filters = set()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def get(self, stix_id, _composite_filters=None):
"""
Fill:
Implement the specific data source API calls, processing,
2017-07-12 16:58:31 +02:00
functionality required for retrieving data from the data source
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
_composite_filters (list): list of filters passed along from
the Composite Data Filter.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_obj (dictionary): the STIX object to be returned
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def all_versions(self, stix_id, _composite_filters=None):
"""
Fill:
-Similar to get() except returns list of all object versions of
the specified "id".
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
-implement the specific data source API calls, processing,
functionality required for retrieving data from the data source
2017-05-24 17:25:40 +02:00
Args:
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
2017-07-12 16:58:31 +02:00
return a list of objects, all the versions of the object
specified by the "id".
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
_composite_filters (list): list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
2017-05-24 17:25:40 +02:00
"""
2017-07-12 16:58:31 +02:00
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def query(self, query, _composite_filters=None):
"""
Fill:
-implement the specific data source API calls, processing,
functionality required for retrieving query from the data source
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
query (list): a list of filters (which collectively are the query)
to conduct search on
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
Returns:
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
raise NotImplementedError()
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
def add_filters(self, filters):
"""Add multiple filters to the DataSource.
2017-05-24 17:25:40 +02:00
Args:
2017-08-28 20:32:51 +02:00
filters (list): list of filters (dict) to add to the Data Source.
2017-07-12 16:58:31 +02:00
"""
2017-08-28 20:32:51 +02:00
for filter_ in filters:
self.add_filter(filter_)
2017-05-24 17:25:40 +02:00
2017-08-28 20:32:51 +02:00
def add_filter(self, filter_):
2017-08-09 20:49:06 +02:00
"""Add a filter."""
# check filter field is a supported STIX 2.0 common field
2017-08-28 20:32:51 +02:00
if filter_.field not in STIX_COMMON_FIELDS:
2017-08-09 20:49:06 +02:00
raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
# check filter operator is supported
2017-08-28 20:32:51 +02:00
if filter_.op not in FILTER_OPS:
2017-08-09 20:49:06 +02:00
raise ValueError("Filter operation(from 'op' field) not supported")
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
# check filter value type is supported
2017-08-28 20:32:51 +02:00
if type(filter_.value) not in FILTER_VALUE_TYPES:
2017-08-09 20:49:06 +02:00
raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
2017-05-24 17:25:40 +02:00
2017-08-28 20:32:51 +02:00
self.filters.add(filter_)
2017-05-24 17:25:40 +02:00
2017-08-09 20:49:06 +02:00
# TODO: Do we need a remove_filter function?
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
def apply_common_filters(self, stix_objs, query):
"""Evaluates filters against a set of STIX 2.0 objects
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Supports only STIX 2.0 common property fields
2017-05-24 17:25:40 +02:00
Args:
2017-07-12 16:58:31 +02:00
stix_objs (list): list of STIX objects to apply the query to
query (list): list of filters (combined form complete query)
2017-05-24 17:25:40 +02:00
Returns:
2017-07-12 16:58:31 +02:00
(list): list of STIX objects that successfully evaluate against
the query.
2017-07-12 16:58:31 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
filtered_stix_objs = []
# evaluate objects against filter
for stix_obj in stix_objs:
clean = True
for filter_ in query:
try:
# skip filter as filter was identified (when added) as
# not a common filter
if filter_.field not in STIX_COMMON_FIELDS:
raise Exception("Error, field: {0} is not supported for filtering on.".format(filter_.field))
# For properties like granular_markings and external_references
# need to break the first property from the string.
if "." in filter_.field:
field = filter_.field.split(".")[0]
else:
field = filter_.field
# check filter "field" is in STIX object - if cant be
# applied due to STIX object, STIX object is discarded
# (i.e. did not make it through the filter)
if field not in stix_obj.keys():
clean = False
break
2017-08-31 20:03:12 +02:00
match = STIX_COMMON_FILTERS_MAP[filter_.field.split('.')[0]](filter_, stix_obj)
2017-07-12 16:58:31 +02:00
if not match:
clean = False
break
elif match == -1:
raise Exception("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field))
2017-07-12 16:58:31 +02:00
except Exception as e:
raise ValueError(e)
2017-07-12 16:58:31 +02:00
# if object unmarked after all filters, add it
if clean:
filtered_stix_objs.append(stix_obj)
return filtered_stix_objs
def deduplicate(self, stix_obj_list):
"""Deduplicate a list of STIX objects to a unique set
2017-07-12 16:58:31 +02:00
Reduces a set of STIX objects to unique set by looking
2017-07-12 16:58:31 +02:00
at 'id' and 'modified' fields - as a unique object version
is determined by the combination of those fields
Args:
stix_obj_list (list): list of STIX objects (dicts)
Returns:
A list with a unique set of the passed list of STIX objects.
2017-07-12 16:58:31 +02:00
"""
unique_objs = {}
for obj in stix_obj_list:
unique_objs[(obj['id'], obj['modified'])] = obj
return list(unique_objs.values())
2017-05-24 17:25:40 +02:00
class CompositeDataSource(DataSource):
2017-07-12 16:58:31 +02:00
"""Composite Data Source
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Acts as a controller for all the defined/configured STIX Data Sources
e.g. a user can define n Data Sources - creating Data Source (objects)
2017-07-12 16:58:31 +02:00
for each. There is only one instance of this for any python STIX 2.0
application.
Attributes:
name (str): The name that identifies this CompositeDataSource.
data_sources (dict): A dictionary of DataSource objects; to be
controlled and used by the Data Source Controller object.
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
def __init__(self, name="CompositeDataSource"):
"""
Creates a new STIX Data Source.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
name (str): A string containing the name to attach in the
CompositeDataSource instance.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
"""
super(CompositeDataSource, self).__init__(name=name)
2017-07-12 16:58:31 +02:00
self.data_sources = {}
2017-05-24 17:25:40 +02:00
def get(self, stix_id, _composite_filters=None):
2017-07-12 16:58:31 +02:00
"""Retrieve STIX object by 'id'
2017-05-24 17:25:40 +02:00
Federated retrieve method-iterates through all STIX data sources
2017-07-12 16:58:31 +02:00
defined in the "data_sources" parameter. Each data source has a
specific API retrieve-like function and associated parameters. This
function does a federated retrieval and consolidation of the data
returned from all the STIX data sources.
2017-05-24 17:25:40 +02:00
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
stix_id (str): the id of the STIX object to retrieve.
2017-05-24 17:25:40 +02:00
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
Returns:
stix_obj (dict): the STIX object to be returned.
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
all_data = []
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# for every configured Data Source, call its retrieve handler
for ds_id, ds in iteritems(self.data_sources):
data = ds.get(stix_id=stix_id, _composite_filters=list(self.filters))
2017-08-28 20:32:51 +02:00
all_data.append(data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# remove duplicate versions
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# reduce to most recent version
2017-08-28 20:32:51 +02:00
stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0]
2017-07-12 16:58:31 +02:00
return stix_obj
def all_versions(self, stix_id, _composite_filters=None):
2017-07-12 16:58:31 +02:00
"""Retrieve STIX objects by 'id'
Federated all_versions retrieve method - iterates through all STIX data
sources defined in "data_sources"
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
2017-07-12 16:58:31 +02:00
Args:
stix_id (str): id of the STIX objects to retrieve
2017-05-24 17:25:40 +02:00
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-05-24 17:25:40 +02:00
Returns:
2017-07-12 16:58:31 +02:00
all_data (list): list of STIX objects that have the specified id
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
all_data = []
2017-08-28 20:32:51 +02:00
all_filters = self.filters
if _composite_filters:
all_filters = set(self.filters).update(_composite_filters)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# retrieve STIX objects from all configured data sources
for ds_id, ds in iteritems(self.data_sources):
2017-08-28 20:32:51 +02:00
data = ds.all_versions(stix_id=stix_id, _composite_filters=list(all_filters))
2017-07-12 16:58:31 +02:00
all_data.extend(data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
# remove exact duplicates (where duplicates are STIX 2.0 objects
# with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
return all_data
2017-05-24 17:25:40 +02:00
def query(self, query=None, _composite_filters=None):
"""Composite data source query
2017-05-24 17:25:40 +02:00
Federate the query to all Data Sources attached to the
Composite Data Source.
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
Args:
query (list): list of filters to search on.
_composite_filters (list): a list of filters passed from the
Composite Data Source
2017-07-12 16:58:31 +02:00
Returns:
all_data (list): list of STIX objects to be returned
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
if not query:
query = []
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
all_data = []
2017-08-28 20:32:51 +02:00
all_filters = self.filters
if _composite_filters:
all_filters = set(self.filters).update(_composite_filters)
2017-07-12 16:58:31 +02:00
# federate query to all attached data sources,
# pass composite filters to id
for ds_id, ds in iteritems(self.data_sources):
2017-08-28 20:32:51 +02:00
data = ds.query(query=query, _composite_filters=list(all_filters))
2017-07-12 16:58:31 +02:00
all_data.extend(data)
# remove exact duplicates (where duplicates are STIX 2.0
# objects with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
return all_data
def add_data_source(self, data_sources):
"""Add/attach Data Source to the Composite Data Source instance
2017-07-12 16:58:31 +02:00
Args:
data_sources (list): a list of Data Source objects to attach
to the Composite Data Source
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
for ds in data_sources:
2017-08-28 20:32:51 +02:00
if issubclass(ds.__class__, DataSource):
if ds.id_ in self.data_sources:
2017-07-12 16:58:31 +02:00
# data source already attached to Composite Data Source
continue
# add data source to Composite Data Source
# (its id will be its key identifier)
2017-08-28 20:32:51 +02:00
self.data_sources[ds.id_] = ds
2017-07-12 16:58:31 +02:00
else:
# the Data Source object is not a proper subclass
# of DataSource Abstract Class
# TODO: maybe log error?
continue
return
def remove_data_source(self, data_source_ids):
"""Remove/detach Data Source from the Composite Data Source instance
2017-07-12 16:58:31 +02:00
2017-05-24 17:25:40 +02:00
Args:
2017-08-28 20:32:51 +02:00
data_source_ids (list): a list of Data Source identifiers.
2017-05-24 17:25:40 +02:00
2017-05-26 21:24:33 +02:00
"""
2017-07-12 16:58:31 +02:00
for id_ in data_source_ids:
2017-08-28 20:32:51 +02:00
if id_ in self.data_sources:
del self.data_sources[id_]
else:
raise ValueError("DataSource 'id' not found in CompositeDataSource collection.")
2017-07-12 16:58:31 +02:00
return
2017-05-24 17:25:40 +02:00
2017-08-28 20:32:51 +02:00
def get_all_data_sources(self):
"""Return all attached Data Sources
2017-07-12 16:58:31 +02:00
"""
2017-08-28 20:32:51 +02:00
return self.data_sources.values()