docstrings redone; code changes for adding/returning STIX objects to FileSystem, Memory, TAXII; code changes for query sets and filters
parent
463d1e6b28
commit
94df10bf8d
|
@ -137,13 +137,6 @@ class Environment(object):
|
|||
raise AttributeError('Environment has no data source')
|
||||
add_filters.__doc__ = DataSource.add_filters.__doc__
|
||||
|
||||
def add_filter(self, *args, **kwargs):
|
||||
try:
|
||||
return self.source.add_filter(*args, **kwargs)
|
||||
except AttributeError:
|
||||
raise AttributeError('Environment has no data source')
|
||||
add_filter.__doc__ = DataSource.add_filter.__doc__
|
||||
|
||||
def add(self, *args, **kwargs):
|
||||
try:
|
||||
return self.sink.add(*args, **kwargs)
|
||||
|
|
|
@ -7,12 +7,8 @@ Classes:
|
|||
DataSource
|
||||
CompositeDataSource
|
||||
|
||||
TODO:Test everything
|
||||
|
||||
Notes:
|
||||
add_filter(), remove_filter(), deduplicate() - if these functions remain
|
||||
the exact same for DataSource, DataSink, CompositeDataSource etc... -> just
|
||||
make those functions an interface to inherit?
|
||||
Q: We have add_filters() but no remove_filter()
|
||||
|
||||
"""
|
||||
|
||||
|
@ -28,14 +24,100 @@ def make_id():
|
|||
return str(uuid.uuid4())
|
||||
|
||||
|
||||
class DataStore(object):
|
||||
def apply_common_filters(stix_objs, query):
|
||||
"""Evaluate filters against a set of STIX 2.0 objects.
|
||||
|
||||
Supports only STIX 2.0 common property fields
|
||||
|
||||
Args:
|
||||
stix_objs (list): list of STIX objects to apply the query to
|
||||
|
||||
query (set): set of filters (combined form complete query)
|
||||
|
||||
Returns:
|
||||
(list): list of STIX objects that successfully evaluate against
|
||||
the query.
|
||||
|
||||
"""
|
||||
filtered_stix_objs = []
|
||||
|
||||
# evaluate objects against filter
|
||||
for stix_obj in stix_objs:
|
||||
clean = True
|
||||
for filter_ in query:
|
||||
# skip filter as filter was identified (when added) as
|
||||
# not a common filter
|
||||
if filter_.field not in STIX_COMMON_FIELDS:
|
||||
raise ValueError("Error, field: {0} is not supported for filtering on.".format(filter_.field))
|
||||
|
||||
# For properties like granular_markings and external_references
|
||||
# need to break the first property from the string.
|
||||
if "." in filter_.field:
|
||||
field = filter_.field.split(".")[0]
|
||||
else:
|
||||
field = filter_.field
|
||||
|
||||
# check filter "field" is in STIX object - if cant be
|
||||
# applied due to STIX object, STIX object is discarded
|
||||
# (i.e. did not make it through the filter)
|
||||
if field not in stix_obj.keys():
|
||||
clean = False
|
||||
break
|
||||
|
||||
match = STIX_COMMON_FILTERS_MAP[filter_.field.split('.')[0]](filter_, stix_obj)
|
||||
if not match:
|
||||
clean = False
|
||||
break
|
||||
elif match == -1:
|
||||
raise ValueError("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field))
|
||||
|
||||
# if object unmarked after all filters, add it
|
||||
if clean:
|
||||
filtered_stix_objs.append(stix_obj)
|
||||
|
||||
return filtered_stix_objs
|
||||
|
||||
|
||||
def deduplicate(stix_obj_list):
|
||||
"""Deduplicate a list of STIX objects to a unique set
|
||||
|
||||
Reduces a set of STIX objects to unique set by looking
|
||||
at 'id' and 'modified' fields - as a unique object version
|
||||
is determined by the combination of those fields
|
||||
|
||||
Args:
|
||||
stix_obj_list (list): list of STIX objects (dicts)
|
||||
|
||||
Returns:
|
||||
A list with a unique set of the passed list of STIX objects.
|
||||
|
||||
"""
|
||||
unique_objs = {}
|
||||
|
||||
for obj in stix_obj_list:
|
||||
unique_objs[(obj['id'], obj['modified'])] = obj
|
||||
|
||||
return list(unique_objs.values())
|
||||
|
||||
|
||||
class DataStore(object):
|
||||
"""DataStore
|
||||
|
||||
An implementer will create a concrete subclass from
|
||||
this abstract class for the specific data store.
|
||||
this class for the specific DataStore.
|
||||
|
||||
Args:
|
||||
source (DataSource): An existing DataSource to use
|
||||
as this DataStore's DataSource component
|
||||
|
||||
sink (DataSink): An existing DataSink to use
|
||||
as this DataStore's DataSink component
|
||||
|
||||
Attributes:
|
||||
id (str): A unique UUIDv4 to identify this DataStore.
|
||||
source (DataStore): An object that implements DataStore class.
|
||||
|
||||
source (DataSource): An object that implements DataSource class.
|
||||
|
||||
sink (DataSink): An object that implements DataSink class.
|
||||
|
||||
"""
|
||||
|
@ -47,14 +129,13 @@ class DataStore(object):
|
|||
def get(self, stix_id):
|
||||
"""Retrieve the most recent version of a single STIX object by ID.
|
||||
|
||||
Notes:
|
||||
Translate API get() call to the appropriate DataSource call.
|
||||
Translate get() call to the appropriate DataSource call.
|
||||
|
||||
Args:
|
||||
stix_id (str): the id of the STIX 2.0 object to retrieve.
|
||||
stix_id (str): the id of the STIX object to retrieve.
|
||||
|
||||
Returns:
|
||||
stix_obj (dictionary): the single most recent version of the STIX
|
||||
stix_obj: the single most recent version of the STIX
|
||||
object specified by the "id".
|
||||
|
||||
"""
|
||||
|
@ -63,15 +144,13 @@ class DataStore(object):
|
|||
def all_versions(self, stix_id):
|
||||
"""Retrieve all versions of a single STIX object by ID.
|
||||
|
||||
Implement:
|
||||
Translate all_versions() call to the appropriate DataSource call
|
||||
Implement: Translate all_versions() call to the appropriate DataSource call
|
||||
|
||||
Args:
|
||||
stix_id (str): the id of the STIX 2.0 object to retrieve.
|
||||
stix_id (str): the id of the STIX object to retrieve.
|
||||
|
||||
Returns:
|
||||
stix_objs (list): a list of STIX objects (where each object is a
|
||||
STIX object)
|
||||
stix_objs (list): a list of STIX objects
|
||||
|
||||
"""
|
||||
return self.source.all_versions(stix_id)
|
||||
|
@ -79,17 +158,15 @@ class DataStore(object):
|
|||
def query(self, query):
|
||||
"""Retrieve STIX objects matching a set of filters.
|
||||
|
||||
Notes:
|
||||
Implement the specific data source API calls, processing,
|
||||
functionality required for retrieving query from the data source.
|
||||
Implement: Specific data source API calls, processing,
|
||||
functionality required for retrieving query from the data source.
|
||||
|
||||
Args:
|
||||
query (list): a list of filters (which collectively are the query)
|
||||
to conduct search on.
|
||||
|
||||
Returns:
|
||||
stix_objs (list): a list of STIX objects (where each object is a
|
||||
STIX object)
|
||||
stix_objs (list): a list of STIX objects
|
||||
|
||||
"""
|
||||
return self.source.query(query=query)
|
||||
|
@ -97,21 +174,30 @@ class DataStore(object):
|
|||
def add(self, stix_objs):
|
||||
"""Store STIX objects.
|
||||
|
||||
Notes:
|
||||
Translate add() to the appropriate DataSink call().
|
||||
Translates add() to the appropriate DataSink call.
|
||||
|
||||
Args:
|
||||
stix_objs (list): a list of STIX objects (where each object is a
|
||||
STIX object)
|
||||
|
||||
stix_objs (list): a list of STIX objects
|
||||
"""
|
||||
return self.sink.add(stix_objs)
|
||||
|
||||
def add_filters(self, filters):
|
||||
"""add query filters (to DataSource component)
|
||||
|
||||
Translates add_filters() to appropriate DataSource call.
|
||||
|
||||
Args:
|
||||
filters (list or Filter obj): Filters to be added to DataStore
|
||||
|
||||
"""
|
||||
return self.source.add_filters(filters)
|
||||
|
||||
|
||||
class DataSink(object):
|
||||
"""
|
||||
Abstract class for defining a data sink. Intended for subclassing into
|
||||
different sink components.
|
||||
"""DataSink
|
||||
|
||||
An implementer will create a concrete subclass from
|
||||
this class for the specific DataSink.
|
||||
|
||||
Attributes:
|
||||
id (str): A unique UUIDv4 to identify this DataSink.
|
||||
|
@ -123,9 +209,8 @@ class DataSink(object):
|
|||
def add(self, stix_objs):
|
||||
"""Store STIX objects.
|
||||
|
||||
Notes:
|
||||
Implement the specific data sink API calls, processing,
|
||||
functionality required for adding data to the sink
|
||||
Implement: Specific data sink API calls, processing,
|
||||
functionality required for adding data to the sink
|
||||
|
||||
Args:
|
||||
stix_objs (list): a list of STIX objects (where each object is a
|
||||
|
@ -136,194 +221,122 @@ class DataSink(object):
|
|||
|
||||
|
||||
class DataSource(object):
|
||||
"""
|
||||
Abstract class for defining a data source. Intended for subclassing into
|
||||
different source components.
|
||||
"""DataSource
|
||||
|
||||
An implementer will create a concrete subclass from
|
||||
this class for the specific DataSource.
|
||||
|
||||
Attributes:
|
||||
id (str): A unique UUIDv4 to identify this DataSource.
|
||||
filters (set): A collection of filters present in this DataSource.
|
||||
|
||||
_filters (set): A collection of filters attached to this DataSource.
|
||||
|
||||
"""
|
||||
def __init__(self):
|
||||
self.id = make_id()
|
||||
self.filters = set()
|
||||
self._filters = set()
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
Fill:
|
||||
Implement the specific data source API calls, processing,
|
||||
functionality required for retrieving data from the data source
|
||||
Implement: Specific data source API calls, processing,
|
||||
functionality required for retrieving data from the data source
|
||||
|
||||
Args:
|
||||
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
|
||||
return a single object, the most recent version of the object
|
||||
specified by the "id".
|
||||
|
||||
_composite_filters (list): list of filters passed along from
|
||||
the Composite Data Filter.
|
||||
_composite_filters (set): set of filters passed from the parent
|
||||
the CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
stix_obj (dictionary): the STIX object to be returned
|
||||
stix_obj: the STIX object
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
Notes:
|
||||
Similar to get() except returns list of all object versions of
|
||||
the specified "id". In addition, implement the specific data
|
||||
source API calls, processing, functionality required for retrieving
|
||||
data from the data source.
|
||||
Implement: Similar to get() except returns list of all object versions of
|
||||
the specified "id". In addition, implement the specific data
|
||||
source API calls, processing, functionality required for retrieving
|
||||
data from the data source.
|
||||
|
||||
Args:
|
||||
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
|
||||
return a list of objects, all the versions of the object
|
||||
specified by the "id".
|
||||
|
||||
_composite_filters (list): list of filters passed from the
|
||||
Composite Data Source
|
||||
_composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
stix_objs (list): a list of STIX objects (where each object is a
|
||||
STIX object)
|
||||
stix_objs (list): a list of STIX objects
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def query(self, query, _composite_filters=None):
|
||||
"""
|
||||
Fill:
|
||||
-implement the specific data source API calls, processing,
|
||||
functionality required for retrieving query from the data source
|
||||
Implement:Implement the specific data source API calls, processing,
|
||||
functionality required for retrieving query from the data source
|
||||
|
||||
Args:
|
||||
query (list): a list of filters (which collectively are the query)
|
||||
to conduct search on
|
||||
|
||||
_composite_filters (list): a list of filters passed from the
|
||||
Composite Data Source
|
||||
_composite_filters (set): a set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
stix_objs (list): a list of STIX objects
|
||||
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def add_filters(self, filters):
|
||||
"""Add multiple filters to be applied to all queries for STIX objects.
|
||||
|
||||
Args:
|
||||
filters (list): list of filters (dict) to add to the Data Source.
|
||||
|
||||
"""
|
||||
for filter in filters:
|
||||
self.add_filter(filter)
|
||||
|
||||
def add_filter(self, filter):
|
||||
"""Add a filter to be applied to all queries for STIX objects.
|
||||
|
||||
Args:
|
||||
filter: filter to add to the Data Source.
|
||||
filters (list or Filter obj): filter(s) to add to the Data Source.
|
||||
|
||||
"""
|
||||
# check filter field is a supported STIX 2.0 common field
|
||||
if filter.field not in STIX_COMMON_FIELDS:
|
||||
raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
|
||||
if isinstance(filters, list) or isinstance(filters, set):
|
||||
for filter_ in filters:
|
||||
self.add_filters(filter_)
|
||||
else:
|
||||
filter_ = filters
|
||||
# check filter field is a supported STIX 2.0 common field
|
||||
if filter_.field not in STIX_COMMON_FIELDS:
|
||||
raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
|
||||
|
||||
# check filter operator is supported
|
||||
if filter.op not in FILTER_OPS:
|
||||
raise ValueError("Filter operation (from 'op' field) not supported")
|
||||
# check filter operator is supported
|
||||
if filter_.op not in FILTER_OPS:
|
||||
raise ValueError("Filter operation (from 'op' field) not supported")
|
||||
|
||||
# check filter value type is supported
|
||||
if type(filter.value) not in FILTER_VALUE_TYPES:
|
||||
raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
|
||||
# check filter value type is supported
|
||||
if type(filter_.value) not in FILTER_VALUE_TYPES:
|
||||
raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
|
||||
|
||||
self.filters.add(filter)
|
||||
|
||||
def apply_common_filters(self, stix_objs, query):
|
||||
"""Evaluate filters against a set of STIX 2.0 objects.
|
||||
|
||||
Supports only STIX 2.0 common property fields
|
||||
|
||||
Args:
|
||||
stix_objs (list): list of STIX objects to apply the query to
|
||||
query (list): list of filters (combined form complete query)
|
||||
|
||||
Returns:
|
||||
(list): list of STIX objects that successfully evaluate against
|
||||
the query.
|
||||
|
||||
"""
|
||||
filtered_stix_objs = []
|
||||
|
||||
# evaluate objects against filter
|
||||
for stix_obj in stix_objs:
|
||||
clean = True
|
||||
for filter_ in query:
|
||||
# skip filter as filter was identified (when added) as
|
||||
# not a common filter
|
||||
if filter_.field not in STIX_COMMON_FIELDS:
|
||||
raise ValueError("Error, field: {0} is not supported for filtering on.".format(filter_.field))
|
||||
|
||||
# For properties like granular_markings and external_references
|
||||
# need to break the first property from the string.
|
||||
if "." in filter_.field:
|
||||
field = filter_.field.split(".")[0]
|
||||
else:
|
||||
field = filter_.field
|
||||
|
||||
# check filter "field" is in STIX object - if cant be
|
||||
# applied due to STIX object, STIX object is discarded
|
||||
# (i.e. did not make it through the filter)
|
||||
if field not in stix_obj.keys():
|
||||
clean = False
|
||||
break
|
||||
|
||||
match = STIX_COMMON_FILTERS_MAP[filter_.field.split('.')[0]](filter_, stix_obj)
|
||||
if not match:
|
||||
clean = False
|
||||
break
|
||||
elif match == -1:
|
||||
raise ValueError("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field))
|
||||
|
||||
# if object unmarked after all filters, add it
|
||||
if clean:
|
||||
filtered_stix_objs.append(stix_obj)
|
||||
|
||||
return filtered_stix_objs
|
||||
|
||||
def deduplicate(self, stix_obj_list):
|
||||
"""Deduplicate a list of STIX objects to a unique set
|
||||
|
||||
Reduces a set of STIX objects to unique set by looking
|
||||
at 'id' and 'modified' fields - as a unique object version
|
||||
is determined by the combination of those fields
|
||||
|
||||
Args:
|
||||
stix_obj_list (list): list of STIX objects (dicts)
|
||||
|
||||
Returns:
|
||||
A list with a unique set of the passed list of STIX objects.
|
||||
|
||||
"""
|
||||
unique_objs = {}
|
||||
|
||||
for obj in stix_obj_list:
|
||||
unique_objs[(obj['id'], obj['modified'])] = obj
|
||||
|
||||
return list(unique_objs.values())
|
||||
self._filters.add(filter_)
|
||||
|
||||
|
||||
class CompositeDataSource(DataSource):
|
||||
"""Controller for all the defined/configured STIX Data Sources.
|
||||
"""CompostiteDataSource
|
||||
|
||||
E.g. a user can define n Data Sources - creating Data Source (objects)
|
||||
for each. There is only one instance of this for any Python STIX 2.0
|
||||
application.
|
||||
Controller for all the attached DataSources.
|
||||
|
||||
A user can have a single CompositeDataSource as an interface
|
||||
the a set of DataSources. When an API call is made to the
|
||||
CompositeDataSource, it is delegated to each of the (real)
|
||||
DataSources that are attached to it.
|
||||
|
||||
DataSources can be attached to CompositeDataSource for a variety
|
||||
of reasons, e.g. common filters, organization, less API calls.
|
||||
|
||||
Attributes:
|
||||
name (str): The name that identifies this CompositeDataSource.
|
||||
|
||||
data_sources (dict): A dictionary of DataSource objects; to be
|
||||
controlled and used by the Data Source Controller object.
|
||||
|
||||
|
@ -340,41 +353,46 @@ class CompositeDataSource(DataSource):
|
|||
self.data_sources = {}
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""Retrieve STIX object by 'id'
|
||||
"""Retrieve STIX object by STIX ID
|
||||
|
||||
Federated retrieve method-iterates through all STIX data sources
|
||||
Federated retrieve method, iterates through all DataSources
|
||||
defined in the "data_sources" parameter. Each data source has a
|
||||
specific API retrieve-like function and associated parameters. This
|
||||
function does a federated retrieval and consolidation of the data
|
||||
returned from all the STIX data sources.
|
||||
|
||||
Notes:
|
||||
A composite data source will pass its attached filters to
|
||||
each configured data source, pushing filtering to them to handle.
|
||||
A composite data source will pass its attached filters to
|
||||
each configured data source, pushing filtering to them to handle.
|
||||
|
||||
Args:
|
||||
stix_id (str): the id of the STIX object to retrieve.
|
||||
|
||||
_composite_filters (list): a list of filters passed from the
|
||||
Composite Data Source
|
||||
_composite_filters (list): a list of filters passed from a
|
||||
CompositeDataSource (i.e. if this CompositeDataSource is attached
|
||||
to another parent CompositeDataSource), not user supplied
|
||||
|
||||
Returns:
|
||||
stix_obj (dict): the STIX object to be returned.
|
||||
stix_obj: the STIX object to be returned.
|
||||
|
||||
"""
|
||||
if not self.get_all_data_sources():
|
||||
raise AttributeError('CompositeDataSource has no data sources')
|
||||
|
||||
all_data = []
|
||||
all_filters = set()
|
||||
all_filters.update(self._filters)
|
||||
|
||||
if _composite_filters:
|
||||
all_filters.update(_composite_filters)
|
||||
|
||||
# for every configured Data Source, call its retrieve handler
|
||||
for ds_id, ds in iteritems(self.data_sources):
|
||||
data = ds.get(stix_id=stix_id, _composite_filters=list(self.filters))
|
||||
data = ds.get(stix_id=stix_id, _composite_filters=all_filters)
|
||||
all_data.append(data)
|
||||
|
||||
# remove duplicate versions
|
||||
if len(all_data) > 0:
|
||||
all_data = self.deduplicate(all_data)
|
||||
all_data = deduplicate(all_data)
|
||||
|
||||
# reduce to most recent version
|
||||
stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0]
|
||||
|
@ -382,20 +400,20 @@ class CompositeDataSource(DataSource):
|
|||
return stix_obj
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""Retrieve STIX objects by 'id'
|
||||
"""Retrieve STIX objects by STIX ID
|
||||
|
||||
Federated all_versions retrieve method - iterates through all STIX data
|
||||
sources defined in "data_sources"
|
||||
Federated all_versions retrieve method - iterates through all DataSources
|
||||
defined in "data_sources"
|
||||
|
||||
Notes:
|
||||
A composite data source will pass its attached filters to
|
||||
each configured data source, pushing filtering to them to handle
|
||||
A composite data source will pass its attached filters to
|
||||
each configured data source, pushing filtering to them to handle
|
||||
|
||||
Args:
|
||||
stix_id (str): id of the STIX objects to retrieve
|
||||
|
||||
_composite_filters (list): a list of filters passed from the
|
||||
Composite Data Source
|
||||
_composite_filters (list): a list of filters passed from a
|
||||
CompositeDataSource (i.e. if this CompositeDataSource is attached
|
||||
to a parent CompositeDataSource), not user supplied
|
||||
|
||||
Returns:
|
||||
all_data (list): list of STIX objects that have the specified id
|
||||
|
@ -405,32 +423,37 @@ class CompositeDataSource(DataSource):
|
|||
raise AttributeError('CompositeDataSource has no data sources')
|
||||
|
||||
all_data = []
|
||||
all_filters = self.filters
|
||||
all_filters = set()
|
||||
|
||||
all_filters.update(self._filters)
|
||||
|
||||
if _composite_filters:
|
||||
all_filters = set(self.filters).update(_composite_filters)
|
||||
all_filters.update(_composite_filters)
|
||||
|
||||
# retrieve STIX objects from all configured data sources
|
||||
for ds_id, ds in iteritems(self.data_sources):
|
||||
data = ds.all_versions(stix_id=stix_id, _composite_filters=list(all_filters))
|
||||
data = ds.all_versions(stix_id=stix_id, _composite_filters=all_filters)
|
||||
all_data.extend(data)
|
||||
|
||||
# remove exact duplicates (where duplicates are STIX 2.0 objects
|
||||
# with the same 'id' and 'modified' values)
|
||||
if len(all_data) > 0:
|
||||
all_data = self.deduplicate(all_data)
|
||||
all_data = deduplicate(all_data)
|
||||
|
||||
return all_data
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""Federate the query to all Data Sources attached to the
|
||||
"""Retrieve STIX objects that match query
|
||||
|
||||
Federate the query to all DataSources attached to the
|
||||
Composite Data Source.
|
||||
|
||||
Args:
|
||||
query (list): list of filters to search on.
|
||||
query (list): list of filters to search on
|
||||
|
||||
_composite_filters (list): a list of filters passed from the
|
||||
Composite Data Source
|
||||
_composite_filters (list): a list of filters passed from a
|
||||
CompositeDataSource (i.e. if this CompositeDataSource is attached
|
||||
to a parent CompositeDataSource), not user supplied
|
||||
|
||||
Returns:
|
||||
all_data (list): list of STIX objects to be returned
|
||||
|
@ -440,33 +463,37 @@ class CompositeDataSource(DataSource):
|
|||
raise AttributeError('CompositeDataSource has no data sources')
|
||||
|
||||
if not query:
|
||||
# dont mess with the query (i.e. convert to a set, as thats done
|
||||
# within the specific DataSources that are called)
|
||||
query = []
|
||||
|
||||
all_data = []
|
||||
all_filters = self.filters
|
||||
|
||||
all_filters = set()
|
||||
all_filters.update(self._filters)
|
||||
|
||||
if _composite_filters:
|
||||
all_filters = set(self.filters).update(_composite_filters)
|
||||
all_filters.update(_composite_filters)
|
||||
|
||||
# federate query to all attached data sources,
|
||||
# pass composite filters to id
|
||||
for ds_id, ds in iteritems(self.data_sources):
|
||||
data = ds.query(query=query, _composite_filters=list(all_filters))
|
||||
data = ds.query(query=query, _composite_filters=all_filters)
|
||||
all_data.extend(data)
|
||||
|
||||
# remove exact duplicates (where duplicates are STIX 2.0
|
||||
# objects with the same 'id' and 'modified' values)
|
||||
if len(all_data) > 0:
|
||||
all_data = self.deduplicate(all_data)
|
||||
all_data = deduplicate(all_data)
|
||||
|
||||
return all_data
|
||||
|
||||
def add_data_source(self, data_sources):
|
||||
"""Add/attach Data Source to the Composite Data Source instance
|
||||
"""Attach a DataSource to the CompositeDataSource instance
|
||||
|
||||
Args:
|
||||
data_sources (list): a list of Data Source objects to attach
|
||||
to the Composite Data Source
|
||||
data_sources (list): a list of DataSource(s) to attach
|
||||
to the CompositeDataSource
|
||||
|
||||
"""
|
||||
if not isinstance(data_sources, list):
|
||||
|
@ -474,36 +501,32 @@ class CompositeDataSource(DataSource):
|
|||
for ds in data_sources:
|
||||
if issubclass(ds.__class__, DataSource):
|
||||
if ds.id in self.data_sources:
|
||||
# data source already attached to Composite Data Source
|
||||
# DataSource already attached to CompositeDataSource
|
||||
continue
|
||||
|
||||
# add data source to Composite Data Source
|
||||
# (its id will be its key identifier)
|
||||
# add DataSource to CompositeDataSource, its ID is used as key
|
||||
self.data_sources[ds.id] = ds
|
||||
else:
|
||||
# the Data Source object is not a proper subclass
|
||||
# of DataSource Abstract Class
|
||||
# the Data Source object not a subclass of DataSource
|
||||
# TODO: maybe log error?
|
||||
continue
|
||||
|
||||
return
|
||||
|
||||
def remove_data_source(self, data_source_ids):
|
||||
"""Remove/detach Data Source from the Composite Data Source instance
|
||||
"""Remove DataSource from the CompositeDataSource instance
|
||||
|
||||
Args:
|
||||
data_source_ids (list): a list of Data Source identifiers.
|
||||
data_source_ids (list): a list of Data Source id(s).
|
||||
|
||||
"""
|
||||
for id in data_source_ids:
|
||||
if id in self.data_sources:
|
||||
del self.data_sources[id]
|
||||
else:
|
||||
raise ValueError("DataSource 'id' not found in CompositeDataSource collection.")
|
||||
raise ValueError("DataSource 'id' not found in CompositeDataSource.data_sources ")
|
||||
return
|
||||
|
||||
def get_all_data_sources(self):
|
||||
"""Return all attached Data Sources
|
||||
|
||||
"""
|
||||
"""Return all attached DataSource(s)"""
|
||||
return self.data_sources.values()
|
||||
|
|
|
@ -12,13 +12,28 @@ TODO: Test everything
|
|||
import json
|
||||
import os
|
||||
|
||||
from stix2 import Bundle
|
||||
from stix2.sources import DataSink, DataSource, DataStore
|
||||
from stix2.base import _STIXBase
|
||||
from stix2.core import Bundle, parse
|
||||
from stix2.sources import (DataSink, DataSource, DataStore,
|
||||
apply_common_filters, deduplicate)
|
||||
from stix2.sources.filters import Filter
|
||||
|
||||
|
||||
class FileSystemStore(DataStore):
|
||||
"""
|
||||
"""FileSystemStore
|
||||
|
||||
Provides an interface to an file directory of STIX objects.
|
||||
FileSystemStore is a wrapper around a paired FileSystemSink
|
||||
and FileSystemSource.
|
||||
|
||||
Args:
|
||||
stix_dir (str): path to directory of STIX objects
|
||||
|
||||
Attributes:
|
||||
source (FileSystemSource): FuleSystemSource
|
||||
|
||||
sink (FileSystemSink): FileSystemSink
|
||||
|
||||
"""
|
||||
def __init__(self, stix_dir="stix_data"):
|
||||
super(FileSystemStore, self).__init__()
|
||||
|
@ -27,56 +42,117 @@ class FileSystemStore(DataStore):
|
|||
|
||||
|
||||
class FileSystemSink(DataSink):
|
||||
"""
|
||||
"""FileSystemSink
|
||||
|
||||
Provides an interface for adding/pushing STIX objects
|
||||
to file directory of STIX objects.
|
||||
|
||||
Can be paired with a FileSystemSource, together as the two
|
||||
components of a FileSystemStore.
|
||||
|
||||
Args:
|
||||
stix_dir (str): path to directory of STIX objects
|
||||
|
||||
"""
|
||||
def __init__(self, stix_dir="stix_data"):
|
||||
super(FileSystemSink, self).__init__()
|
||||
self.stix_dir = os.path.abspath(stix_dir)
|
||||
self._stix_dir = os.path.abspath(stix_dir)
|
||||
|
||||
# check directory path exists
|
||||
if not os.path.exists(self.stix_dir):
|
||||
if not os.path.exists(self._stix_dir):
|
||||
print("Error: directory path for STIX data does not exist")
|
||||
|
||||
@property
|
||||
def stix_dir(self):
|
||||
return self.stix_dir
|
||||
return self._stix_dir
|
||||
|
||||
@stix_dir.setter
|
||||
def stix_dir(self, dir):
|
||||
self.stix_dir = dir
|
||||
def add(self, stix_data=None):
|
||||
"""add STIX objects to file directory
|
||||
|
||||
def add(self, stix_objs=None):
|
||||
Args:
|
||||
stix_data (STIX object OR dict OR str OR list): valid STIX 2.0 content
|
||||
in a STIX object(or list of), dict (or list of), or a STIX 2.0
|
||||
json encoded string
|
||||
|
||||
TODO: Bundlify STIX content or no? When dumping to disk.
|
||||
"""
|
||||
Q: bundlify or no?
|
||||
"""
|
||||
if not stix_objs:
|
||||
stix_objs = []
|
||||
for stix_obj in stix_objs:
|
||||
path = os.path.join(self.stix_dir, stix_obj["type"], stix_obj["id"])
|
||||
json.dump(Bundle([stix_obj]), open(path, 'w+'), indent=4)
|
||||
def _check_path_and_write(stix_dir, stix_obj):
|
||||
path = os.path.join(stix_dir, stix_obj["type"], stix_obj["id"] + ".json")
|
||||
|
||||
if not os.path.exists(os.path.dirname(path)):
|
||||
os.makedirs(os.path.dirname(path))
|
||||
|
||||
with open(path, "w") as f:
|
||||
# Bundle() can take dict or STIX obj as argument
|
||||
f.write(str(Bundle(stix_obj)))
|
||||
|
||||
if isinstance(stix_data, _STIXBase):
|
||||
# adding python STIX object
|
||||
_check_path_and_write(self._stix_dir, stix_data)
|
||||
|
||||
elif isinstance(stix_data, dict):
|
||||
if stix_data["type"] == "bundle":
|
||||
# adding json-formatted Bundle - extracting STIX objects
|
||||
for stix_obj in stix_data["objects"]:
|
||||
self.add(stix_obj)
|
||||
else:
|
||||
# adding json-formatted STIX
|
||||
_check_path_and_write(self._stix_dir, stix_data)
|
||||
|
||||
elif isinstance(stix_data, str):
|
||||
# adding json encoded string of STIX content
|
||||
stix_data = parse(stix_data)
|
||||
if stix_data["type"] == "bundle":
|
||||
for stix_obj in stix_data:
|
||||
self.add(stix_obj)
|
||||
else:
|
||||
self.add(stix_data)
|
||||
|
||||
elif isinstance(stix_data, list):
|
||||
# if list, recurse call on individual STIX objects
|
||||
for stix_obj in stix_data:
|
||||
self.add(stix_obj)
|
||||
else:
|
||||
raise ValueError("stix_data must be a STIX object(or list of, json formatted STIX(or list of) or a json formatted STIX bundle")
|
||||
|
||||
|
||||
class FileSystemSource(DataSource):
|
||||
"""
|
||||
"""FileSystemSource
|
||||
|
||||
Provides an interface for searching/retrieving
|
||||
STIX objects from a STIX object file directory.
|
||||
|
||||
Can be paired with a FileSystemSink, together as the two
|
||||
components of a FileSystemStore.
|
||||
|
||||
Args:
|
||||
stix_dir (str): path to directory of STIX objects
|
||||
|
||||
"""
|
||||
def __init__(self, stix_dir="stix_data"):
|
||||
super(FileSystemSource, self).__init__()
|
||||
self.stix_dir = os.path.abspath(stix_dir)
|
||||
self._stix_dir = os.path.abspath(stix_dir)
|
||||
|
||||
# check directory path exists
|
||||
if not os.path.exists(self.stix_dir):
|
||||
if not os.path.exists(self._stix_dir):
|
||||
print("Error: directory path for STIX data does not exist")
|
||||
|
||||
@property
|
||||
def stix_dir(self):
|
||||
return self.stix_dir
|
||||
|
||||
@stix_dir.setter
|
||||
def stix_dir(self, dir):
|
||||
self.stix_dir = dir
|
||||
return self._stix_dir
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""retrieve STIX object from file directory via STIX ID
|
||||
|
||||
Args:
|
||||
stix_id (str): The STIX ID of the STIX object to be retrieved.
|
||||
|
||||
composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(STIX object): STIX object that has the supplied STIX ID.
|
||||
The STIX object is loaded from its json file, parsed into
|
||||
a python STIX object and then returned
|
||||
|
||||
"""
|
||||
query = [Filter("id", "=", stix_id)]
|
||||
|
||||
|
@ -84,30 +160,63 @@ class FileSystemSource(DataSource):
|
|||
|
||||
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
|
||||
|
||||
return stix_obj
|
||||
return parse(stix_obj)
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
Notes:
|
||||
Since FileSystem sources/sinks don't handle multiple versions
|
||||
of a STIX object, this operation is unnecessary. Pass call to get().
|
||||
"""retrieve STIX object from file directory via STIX ID, all versions
|
||||
|
||||
Note: Since FileSystem sources/sinks don't handle multiple versions
|
||||
of a STIX object, this operation is unnecessary. Pass call to get().
|
||||
|
||||
Args:
|
||||
stix_id (str): The STIX ID of the STIX objects to be retrieved.
|
||||
|
||||
composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(list): of STIX objects that has the supplied STIX ID.
|
||||
The STIX objects are loaded from their json files, parsed into
|
||||
a python STIX objects and then returned
|
||||
|
||||
"""
|
||||
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""
|
||||
"""search and retrieve STIX objects based on the complete query
|
||||
|
||||
A "complete query" includes the filters from the query, the filters
|
||||
attached to MemorySource, and any filters passed from a
|
||||
CompositeDataSource (i.e. _composite_filters)
|
||||
|
||||
Args:
|
||||
query (list): list of filters to search on
|
||||
|
||||
composite_filters (set): set of filters passed from the
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(list): list of STIX objects that matches the supplied
|
||||
query. The STIX objects are loaded from their json files,
|
||||
parsed into a python STIX objects and then returned.
|
||||
|
||||
"""
|
||||
all_data = []
|
||||
|
||||
if query is None:
|
||||
query = []
|
||||
query = set()
|
||||
else:
|
||||
if not isinstance(query, list):
|
||||
# make sure dont make set from a Filter object,
|
||||
# need to make a set from a list of Filter objects (even if just one Filter)
|
||||
query = list(query)
|
||||
query = set(query)
|
||||
|
||||
# combine all query filters
|
||||
if self.filters:
|
||||
query.extend(self.filters.values())
|
||||
if self._filters:
|
||||
query.update(self._filters)
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
query.update(_composite_filters)
|
||||
|
||||
# extract any filters that are for "type" or "id" , as we can then do
|
||||
# filtering before reading in the STIX objects. A STIX 'type' filter
|
||||
|
@ -125,12 +234,12 @@ class FileSystemSource(DataSource):
|
|||
for filter in file_filters:
|
||||
if filter.field == "type":
|
||||
if filter.op == "=":
|
||||
include_paths.append(os.path.join(self.stix_dir, filter.value))
|
||||
include_paths.append(os.path.join(self._stix_dir, filter.value))
|
||||
elif filter.op == "!=":
|
||||
declude_paths.append(os.path.join(self.stix_dir, filter.value))
|
||||
declude_paths.append(os.path.join(self._stix_dir, filter.value))
|
||||
else:
|
||||
# have to walk entire STIX directory
|
||||
include_paths.append(self.stix_dir)
|
||||
include_paths.append(self._stix_dir)
|
||||
|
||||
# if a user specifies a "type" filter like "type = <stix-object_type>",
|
||||
# the filter is reducing the search space to single stix object types
|
||||
|
@ -144,7 +253,7 @@ class FileSystemSource(DataSource):
|
|||
# user has specified types that are not wanted (i.e. "!=")
|
||||
# so query will look in all STIX directories that are not
|
||||
# the specified type. Compile correct dir paths
|
||||
for dir in os.listdir(self.stix_dir):
|
||||
for dir in os.listdir(self._stix_dir):
|
||||
if os.path.abspath(dir) not in declude_paths:
|
||||
include_paths.append(os.path.abspath(dir))
|
||||
|
||||
|
@ -153,36 +262,48 @@ class FileSystemSource(DataSource):
|
|||
if "id" in [filter.field for filter in file_filters]:
|
||||
for filter in file_filters:
|
||||
if filter.field == "id" and filter.op == "=":
|
||||
id = filter.value
|
||||
id_ = filter.value
|
||||
break
|
||||
else:
|
||||
id = None
|
||||
id_ = None
|
||||
else:
|
||||
id = None
|
||||
id_ = None
|
||||
|
||||
# now iterate through all STIX objs
|
||||
for path in include_paths:
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
if id:
|
||||
if id == file.split(".")[0]:
|
||||
for file_ in files:
|
||||
if id_:
|
||||
if id_ == file_.split(".")[0]:
|
||||
# since ID is specified in one of filters, can evaluate against filename first without loading
|
||||
stix_obj = json.load(file)["objects"]
|
||||
stix_obj = json.load(open(os.path.join(root, file_)))["objects"][0]
|
||||
# check against other filters, add if match
|
||||
all_data.extend(self.apply_common_filters([stix_obj], query))
|
||||
all_data.extend(apply_common_filters([stix_obj], query))
|
||||
else:
|
||||
# have to load into memory regardless to evaluate other filters
|
||||
stix_obj = json.load(file)["objects"]
|
||||
all_data.extend(self.apply_common_filters([stix_obj], query))
|
||||
stix_obj = json.load(open(os.path.join(root, file_)))["objects"][0]
|
||||
all_data.extend(apply_common_filters([stix_obj], query))
|
||||
|
||||
all_data = self.deduplicate(all_data)
|
||||
return all_data
|
||||
all_data = deduplicate(all_data)
|
||||
|
||||
# parse python STIX objects from the STIX object dicts
|
||||
stix_objs = [parse(stix_obj_dict) for stix_obj_dict in all_data]
|
||||
|
||||
return stix_objs
|
||||
|
||||
def _parse_file_filters(self, query):
|
||||
"""utility method to extract STIX common filters
|
||||
that can used to possibly speed up querying STIX objects
|
||||
from the file system
|
||||
|
||||
Extracts filters that are for the "id" and "type" field of
|
||||
a STIX object. As the file directory is organized by STIX
|
||||
object type with filenames that are equivalent to the STIX
|
||||
object ID, these filters can be used first to reduce the
|
||||
search space of a FileSystemStore(or FileSystemSink)
|
||||
"""
|
||||
"""
|
||||
file_filters = []
|
||||
for filter in query:
|
||||
if filter.field == "id" or filter.field == "type":
|
||||
file_filters.append(filter)
|
||||
file_filters = set()
|
||||
for filter_ in query:
|
||||
if filter_.field == "id" or filter_.field == "type":
|
||||
file_filters.add(filter_)
|
||||
return file_filters
|
||||
|
|
|
@ -4,10 +4,6 @@ Filters for Python STIX 2.0 DataSources, DataSinks, DataStores
|
|||
Classes:
|
||||
Filter
|
||||
|
||||
TODO: The script at the bottom of the module works (to capture
|
||||
all the callable filter methods), however it causes this module
|
||||
to be imported by itself twice. Not sure how big of deal that is,
|
||||
or if cleaner solution possible.
|
||||
"""
|
||||
|
||||
import collections
|
||||
|
@ -15,6 +11,8 @@ import types
|
|||
|
||||
# Currently, only STIX 2.0 common SDO fields (that are not complex objects)
|
||||
# are supported for filtering on
|
||||
|
||||
"""Supported STIX properties"""
|
||||
STIX_COMMON_FIELDS = [
|
||||
"created",
|
||||
"created_by_ref",
|
||||
|
@ -30,14 +28,13 @@ STIX_COMMON_FIELDS = [
|
|||
"modified",
|
||||
"object_marking_refs",
|
||||
"revoked",
|
||||
"type",
|
||||
"granular_markings"
|
||||
"type"
|
||||
]
|
||||
|
||||
# Supported filter operations
|
||||
"""Supported filter operations"""
|
||||
FILTER_OPS = ['=', '!=', 'in', '>', '<', '>=', '<=']
|
||||
|
||||
# Supported filter value types
|
||||
"""Supported filter value types"""
|
||||
FILTER_VALUE_TYPES = [bool, dict, float, int, list, str, tuple]
|
||||
|
||||
# filter lookup map - STIX 2 common fields -> filter method
|
||||
|
@ -45,6 +42,24 @@ STIX_COMMON_FILTERS_MAP = {}
|
|||
|
||||
|
||||
class Filter(collections.namedtuple("Filter", ['field', 'op', 'value'])):
|
||||
"""Filter
|
||||
|
||||
STIX 2 filters that support the querying functionality of STIX 2
|
||||
DataStores and DataSources.
|
||||
|
||||
Initialized in the manner of python tuples
|
||||
|
||||
Args:
|
||||
field (str): filter field name, corresponds to STIX 2 object property
|
||||
|
||||
op (str): operator of the filter
|
||||
|
||||
value (str): filter field value
|
||||
|
||||
Example:
|
||||
Filter("id", "=", "malware--0f862b01-99da-47cc-9bdb-db4a86a95bb1")
|
||||
|
||||
"""
|
||||
__slots__ = ()
|
||||
|
||||
def __new__(cls, field, op, value):
|
||||
|
@ -55,7 +70,8 @@ class Filter(collections.namedtuple("Filter", ['field', 'op', 'value'])):
|
|||
return self
|
||||
|
||||
|
||||
# primitive type filters
|
||||
"""Base type filters"""
|
||||
|
||||
|
||||
def _all_filter(filter_, stix_obj_field):
|
||||
"""all filter operations (for filters whose value type can be applied to any operation type)"""
|
||||
|
@ -78,7 +94,7 @@ def _all_filter(filter_, stix_obj_field):
|
|||
|
||||
|
||||
def _id_filter(filter_, stix_obj_id):
|
||||
"""base filter types"""
|
||||
"""base STIX id filter"""
|
||||
if filter_.op == "=":
|
||||
return stix_obj_id == filter_.value
|
||||
elif filter_.op == "!=":
|
||||
|
@ -88,6 +104,7 @@ def _id_filter(filter_, stix_obj_id):
|
|||
|
||||
|
||||
def _boolean_filter(filter_, stix_obj_field):
|
||||
"""base boolean filter"""
|
||||
if filter_.op == "=":
|
||||
return stix_obj_field == filter_.value
|
||||
elif filter_.op == "!=":
|
||||
|
@ -97,19 +114,25 @@ def _boolean_filter(filter_, stix_obj_field):
|
|||
|
||||
|
||||
def _string_filter(filter_, stix_obj_field):
|
||||
"""base string filter"""
|
||||
return _all_filter(filter_, stix_obj_field)
|
||||
|
||||
|
||||
def _timestamp_filter(filter_, stix_obj_timestamp):
|
||||
"""base STIX 2 timestamp filter"""
|
||||
return _all_filter(filter_, stix_obj_timestamp)
|
||||
|
||||
# STIX 2.0 Common Property filters
|
||||
# The naming of these functions is important as
|
||||
# they are used to index a mapping dictionary from
|
||||
# STIX common field names to these filter functions.
|
||||
#
|
||||
# REQUIRED naming scheme:
|
||||
# "check_<STIX field name>_filter"
|
||||
|
||||
"""STIX 2.0 Common Property Filters
|
||||
|
||||
The naming of these functions is important as
|
||||
they are used to index a mapping dictionary from
|
||||
STIX common field names to these filter functions.
|
||||
|
||||
REQUIRED naming scheme:
|
||||
"check_<STIX field name>_filter"
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def check_created_filter(filter_, stix_obj):
|
||||
|
@ -124,13 +147,15 @@ def check_external_references_filter(filter_, stix_obj):
|
|||
"""
|
||||
STIX object's can have a list of external references
|
||||
|
||||
external_references properties:
|
||||
external_references properties supported:
|
||||
external_references.source_name (string)
|
||||
external_references.description (string)
|
||||
external_references.url (string)
|
||||
external_references.hashes (hash, but for filtering purposes, a string)
|
||||
external_references.external_id (string)
|
||||
|
||||
external_references properties not supported:
|
||||
external_references.hashes
|
||||
|
||||
"""
|
||||
for er in stix_obj["external_references"]:
|
||||
# grab er property name from filter field
|
||||
|
|
|
@ -6,7 +6,8 @@ Classes:
|
|||
MemorySink
|
||||
MemorySource
|
||||
|
||||
TODO: Test everything.
|
||||
|
||||
TODO: Run through tests again, lot of changes.
|
||||
|
||||
TODO: Use deduplicate() calls only when memory corpus is dirty (been added to)
|
||||
can save a lot of time for successive queries
|
||||
|
@ -18,49 +19,88 @@ Notes:
|
|||
|
||||
"""
|
||||
|
||||
import collections
|
||||
import json
|
||||
import os
|
||||
|
||||
from stix2 import Bundle
|
||||
from stix2.sources import DataSink, DataSource, DataStore
|
||||
from stix2.base import _STIXBase
|
||||
from stix2.core import Bundle, parse
|
||||
from stix2.sources import DataSink, DataSource, DataStore, apply_common_filters
|
||||
from stix2.sources.filters import Filter
|
||||
|
||||
|
||||
def _add(store, stix_data=None):
|
||||
"""Adds stix objects to MemoryStore/Source/Sink."""
|
||||
if isinstance(stix_data, collections.Mapping):
|
||||
# stix objects are in a bundle
|
||||
# make dictionary of the objects for easy lookup
|
||||
for stix_obj in stix_data["objects"]:
|
||||
store.data[stix_obj["id"]] = stix_obj
|
||||
"""Adds STIX objects to MemoryStore/Sink.
|
||||
|
||||
Adds STIX objects to an in-memory dictionary for fast lookup.
|
||||
Recursive function, breaks down STIX Bundles and lists.
|
||||
|
||||
Args:
|
||||
stix_data (list OR dict OR STIX object): STIX objects to be added
|
||||
"""
|
||||
|
||||
if isinstance(stix_data, _STIXBase):
|
||||
# adding a python STIX object
|
||||
store._data[stix_data["id"]] = stix_data
|
||||
|
||||
elif isinstance(stix_data, dict):
|
||||
if stix_data["type"] == "bundle":
|
||||
# adding a json bundle - so just grab STIX objects
|
||||
for stix_obj in stix_data["objects"]:
|
||||
_add(store, stix_obj)
|
||||
else:
|
||||
# adding a json STIX object
|
||||
store._data[stix_data["id"]] = stix_data
|
||||
|
||||
elif isinstance(stix_data, str):
|
||||
# adding json encoded string of STIX content
|
||||
stix_data = parse(stix_data)
|
||||
if stix_data["type"] == "bundle":
|
||||
# recurse on each STIX object in bundle
|
||||
for stix_obj in stix_data:
|
||||
_add(store, stix_obj)
|
||||
else:
|
||||
_add(store, stix_data)
|
||||
|
||||
elif isinstance(stix_data, list):
|
||||
# stix objects are in a list
|
||||
# STIX objects are in a list- recurse on each object
|
||||
for stix_obj in stix_data:
|
||||
store.data[stix_obj["id"]] = stix_obj
|
||||
_add(store, stix_obj)
|
||||
else:
|
||||
raise ValueError("stix_data must be in bundle format or raw list")
|
||||
raise ValueError("stix_data must be as STIX object(or list of),json formatted STIX (or list of), or a json formatted STIX bundle")
|
||||
|
||||
|
||||
class MemoryStore(DataStore):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, stix_data=None):
|
||||
"""
|
||||
Notes:
|
||||
It doesn't make sense to create a MemoryStore by passing
|
||||
in existing MemorySource and MemorySink because there could
|
||||
be data concurrency issues. Just as easy to create new MemoryStore.
|
||||
"""MemoryStore
|
||||
|
||||
"""
|
||||
Provides an interface to an in-memory dictionary
|
||||
of STIX objects. MemoryStore is a wrapper around a paired
|
||||
MemorySink and MemorySource
|
||||
|
||||
Note: It doesn't make sense to create a MemoryStore by passing
|
||||
in existing MemorySource and MemorySink because there could
|
||||
be data concurrency issues. As well, just as easy to create new MemoryStore.
|
||||
|
||||
Args:
|
||||
stix_data (list OR dict OR STIX object): STIX content to be added
|
||||
|
||||
Attributes:
|
||||
_data (dict): the in-memory dict that holds STIX objects
|
||||
|
||||
source (MemorySource): MemorySource
|
||||
|
||||
sink (MemorySink): MemorySink
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, stix_data=None):
|
||||
super(MemoryStore, self).__init__()
|
||||
self.data = {}
|
||||
self._data = {}
|
||||
|
||||
if stix_data:
|
||||
_add(self, stix_data)
|
||||
|
||||
self.source = MemorySource(stix_data=self.data, _store=True)
|
||||
self.sink = MemorySink(stix_data=self.data, _store=True)
|
||||
self.source = MemorySource(stix_data=self._data, _store=True)
|
||||
self.sink = MemorySink(stix_data=self._data, _store=True)
|
||||
|
||||
def save_to_file(self, file_path):
|
||||
return self.sink.save_to_file(file_path=file_path)
|
||||
|
@ -70,64 +110,111 @@ class MemoryStore(DataStore):
|
|||
|
||||
|
||||
class MemorySink(DataSink):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, stix_data=None, _store=False):
|
||||
"""
|
||||
Args:
|
||||
stix_data (dictionary OR list): valid STIX 2.0 content in
|
||||
bundle or a list.
|
||||
_store (bool): if the MemorySink is a part of a DataStore,
|
||||
in which case "stix_data" is a direct reference to
|
||||
shared memory with DataSource.
|
||||
"""MemorySink
|
||||
|
||||
"""
|
||||
Provides an interface for adding/pushing STIX objects
|
||||
to an in-memory dictionary.
|
||||
|
||||
Designed to be paired with a MemorySource, together as the two
|
||||
components of a MemoryStore.
|
||||
|
||||
Args:
|
||||
stix_data (dict OR list): valid STIX 2.0 content in
|
||||
bundle or a list.
|
||||
|
||||
_store (bool): if the MemorySink is a part of a DataStore,
|
||||
in which case "stix_data" is a direct reference to
|
||||
shared memory with DataSource. Not user supplied
|
||||
|
||||
Attributes:
|
||||
_data (dict): the in-memory dict that holds STIX objects.
|
||||
If apart of a MemoryStore, dict is shared between with
|
||||
a MemorySource
|
||||
"""
|
||||
|
||||
def __init__(self, stix_data=None, _store=False):
|
||||
super(MemorySink, self).__init__()
|
||||
self.data = {}
|
||||
self._data = {}
|
||||
|
||||
if _store:
|
||||
self.data = stix_data
|
||||
self._data = stix_data
|
||||
elif stix_data:
|
||||
self.add(stix_data)
|
||||
_add(self, stix_data)
|
||||
|
||||
def add(self, stix_data):
|
||||
"""
|
||||
"""add STIX objects to in-memory dictionary maintained by
|
||||
the MemorySink (MemoryStore)
|
||||
|
||||
see "_add()" for args documentation
|
||||
"""
|
||||
_add(self, stix_data)
|
||||
|
||||
def save_to_file(self, file_path):
|
||||
"""write SITX objects in in-memory dictionary to json file, as a STIX Bundle
|
||||
|
||||
Args:
|
||||
file_path (str): file path to write STIX data to
|
||||
|
||||
"""
|
||||
"""
|
||||
json.dump(Bundle(self.data.values()), file_path, indent=4)
|
||||
file_path = os.path.abspath(file_path)
|
||||
if not os.path.exists(os.path.dirname(file_path)):
|
||||
os.makedirs(os.path.dirname(file_path))
|
||||
with open(file_path, "w") as f:
|
||||
f.write(str(Bundle(self._data.values())))
|
||||
|
||||
|
||||
class MemorySource(DataSource):
|
||||
"""MemorySource
|
||||
|
||||
Provides an interface for searching/retrieving
|
||||
STIX objects from an in-memory dictionary.
|
||||
|
||||
Designed to be paired with a MemorySink, together as the two
|
||||
components of a MemoryStore.
|
||||
|
||||
Args:
|
||||
stix_data (dict OR list OR STIX object): valid STIX 2.0 content in
|
||||
bundle or list.
|
||||
|
||||
_store (bool): if the MemorySource is a part of a DataStore,
|
||||
in which case "stix_data" is a direct reference to shared
|
||||
memory with DataSink. Not user supplied
|
||||
|
||||
Attributes:
|
||||
_data (dict): the in-memory dict that holds STIX objects.
|
||||
If apart of a MemoryStore, dict is shared between with
|
||||
a MemorySink
|
||||
"""
|
||||
|
||||
def __init__(self, stix_data=None, _store=False):
|
||||
"""
|
||||
Args:
|
||||
stix_data (dictionary OR list): valid STIX 2.0 content in
|
||||
bundle or list.
|
||||
_store (bool): if the MemorySource is a part of a DataStore,
|
||||
in which case "stix_data" is a direct reference to shared
|
||||
memory with DataSink.
|
||||
|
||||
"""
|
||||
super(MemorySource, self).__init__()
|
||||
self.data = {}
|
||||
self._data = {}
|
||||
|
||||
if _store:
|
||||
self.data = stix_data
|
||||
self._data = stix_data
|
||||
elif stix_data:
|
||||
_add(self, stix_data)
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""retrieve STIX object from in-memory dict via STIX ID
|
||||
|
||||
Args:
|
||||
stix_id (str): The STIX ID of the STIX object to be retrieved.
|
||||
|
||||
composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(dict OR STIX object): STIX object that has the supplied
|
||||
ID. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
|
||||
as they are supplied (either as python dictionary or STIX object), it
|
||||
is returned in the same form as it as added
|
||||
"""
|
||||
"""
|
||||
|
||||
if _composite_filters is None:
|
||||
# if get call is only based on 'id', no need to search, just retrieve from dict
|
||||
try:
|
||||
stix_obj = self.data[stix_id]
|
||||
stix_obj = self._data[stix_id]
|
||||
except KeyError:
|
||||
stix_obj = None
|
||||
return stix_obj
|
||||
|
@ -143,44 +230,75 @@ class MemorySource(DataSource):
|
|||
return stix_obj
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
Notes:
|
||||
Since Memory sources/sinks don't handle multiple versions of a
|
||||
STIX object, this operation is unnecessary. Translate call to get().
|
||||
"""retrieve STIX objects from in-memory dict via STIX ID, all versions of it
|
||||
|
||||
Note: Since Memory sources/sinks don't handle multiple versions of a
|
||||
STIX object, this operation is unnecessary. Translate call to get().
|
||||
|
||||
Args:
|
||||
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
|
||||
return a list of objects, all the versions of the object
|
||||
specified by the "id".
|
||||
stix_id (str): The STIX ID of the STIX 2 object to retrieve.
|
||||
|
||||
composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(list): STIX object that matched ``stix_id``.
|
||||
(list): list of STIX objects that has the supplied ID. As the
|
||||
MemoryStore(i.e. MemorySink) adds STIX objects to memory as they
|
||||
are supplied (either as python dictionary or STIX object), it
|
||||
is returned in the same form as it as added
|
||||
|
||||
"""
|
||||
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""
|
||||
"""search and retrieve STIX objects based on the complete query
|
||||
|
||||
A "complete query" includes the filters from the query, the filters
|
||||
attached to MemorySource, and any filters passed from a
|
||||
CompositeDataSource (i.e. _composite_filters)
|
||||
|
||||
Args:
|
||||
query (list): list of filters to search on
|
||||
|
||||
composite_filters (set): set of filters passed from the
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(list): list of STIX objects that matches the supplied
|
||||
query. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
|
||||
as they are supplied (either as python dictionary or STIX object), it
|
||||
is returned in the same form as it as added
|
||||
|
||||
"""
|
||||
if query is None:
|
||||
query = []
|
||||
query = set()
|
||||
else:
|
||||
if not isinstance(query, list):
|
||||
# make sure dont make set from a Filter object,
|
||||
# need to make a set from a list of Filter objects (even if just one Filter)
|
||||
query = list(query)
|
||||
query = set(query)
|
||||
|
||||
# combine all query filters
|
||||
if self.filters:
|
||||
query.extend(list(self.filters))
|
||||
if self._filters:
|
||||
query.update(self._filters)
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
query.update(_composite_filters)
|
||||
|
||||
# Apply STIX common property filters.
|
||||
all_data = self.apply_common_filters(self.data.values(), query)
|
||||
all_data = apply_common_filters(self._data.values(), query)
|
||||
|
||||
return all_data
|
||||
|
||||
def load_from_file(self, file_path):
|
||||
"""
|
||||
"""load STIX data from json file
|
||||
|
||||
File format is expected to be a single json
|
||||
STIX object or json STIX bundle
|
||||
|
||||
Args:
|
||||
file_path (str): file path to load STIX data from
|
||||
"""
|
||||
file_path = os.path.abspath(file_path)
|
||||
stix_data = json.load(open(file_path, "r"))
|
||||
|
||||
for stix_obj in stix_data["objects"]:
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
_add(self, stix_data)
|
||||
|
|
|
@ -10,8 +10,8 @@ TODO: Test everything
|
|||
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from stix2.base import _STIXBase
|
||||
from stix2.core import Bundle, parse
|
||||
from stix2.sources import DataSink, DataSource, DataStore, make_id
|
||||
from stix2.sources.filters import Filter
|
||||
|
||||
|
@ -19,35 +19,73 @@ TAXII_FILTERS = ['added_after', 'id', 'type', 'version']
|
|||
|
||||
|
||||
class TAXIICollectionStore(DataStore):
|
||||
"""
|
||||
"""TAXIICollectionStore
|
||||
|
||||
Provides an interface to a local/remote TAXII Collection
|
||||
of STIX data. TAXIICollectionStore is a wrapper
|
||||
around a paired TAXIICollectionSink and TAXIICollectionSource.
|
||||
|
||||
Args:
|
||||
collection (taxii2.Collection): TAXII Collection instance
|
||||
"""
|
||||
def __init__(self, collection):
|
||||
"""
|
||||
Create a new TAXII Collection Data store
|
||||
|
||||
Args:
|
||||
collection (taxii2.Collection): Collection instance
|
||||
|
||||
"""
|
||||
super(TAXIICollectionStore, self).__init__()
|
||||
self.source = TAXIICollectionSource(collection)
|
||||
self.sink = TAXIICollectionSink(collection)
|
||||
|
||||
|
||||
class TAXIICollectionSink(DataSink):
|
||||
"""
|
||||
"""TAXIICollectionSink
|
||||
|
||||
Provides an interface for pushing STIX objects to a local/remote
|
||||
TAXII Collection endpoint.
|
||||
|
||||
Args:
|
||||
collection (taxii2.Collection): TAXII2 Collection instance
|
||||
|
||||
"""
|
||||
def __init__(self, collection):
|
||||
super(TAXIICollectionSink, self).__init__()
|
||||
self.collection = collection
|
||||
|
||||
def add(self, stix_obj):
|
||||
def add(self, stix_data):
|
||||
"""add/push STIX content to TAXII Collection endpoint
|
||||
|
||||
Args:
|
||||
stix_data (STIX object OR dict OR str OR list): valid STIX 2.0 content
|
||||
in a STIX object (or Bundle), STIX onject dict (or Bundle dict), or a STIX 2.0
|
||||
json encoded string, or list of any of the following
|
||||
|
||||
"""
|
||||
"""
|
||||
self.collection.add_objects(self.create_bundle([json.loads(str(stix_obj))]))
|
||||
|
||||
if isinstance(stix_data, _STIXBase):
|
||||
# adding python STIX object
|
||||
bundle = dict(Bundle(stix_data))
|
||||
|
||||
elif isinstance(stix_data, dict):
|
||||
# adding python dict (of either Bundle or STIX obj)
|
||||
if stix_data["type"] == "bundle":
|
||||
bundle = stix_data
|
||||
else:
|
||||
bundle = dict(Bundle(stix_data))
|
||||
|
||||
elif isinstance(stix_data, list):
|
||||
# adding list of something - recurse on each
|
||||
for obj in stix_data:
|
||||
self.add(obj)
|
||||
elif isinstance(stix_data, str):
|
||||
# adding json encoded string of STIX content
|
||||
stix_data = parse(stix_data)
|
||||
if stix_data["type"] == "bundle":
|
||||
bundle = dict(stix_data)
|
||||
else:
|
||||
bundle = dict(Bundle(stix_data))
|
||||
|
||||
self.collection.add_objects(bundle)
|
||||
|
||||
@staticmethod
|
||||
def create_bundle(objects):
|
||||
"""TODO: Remove?"""
|
||||
return dict(id="bundle--%s" % make_id(),
|
||||
objects=objects,
|
||||
spec_version="2.0",
|
||||
|
@ -55,21 +93,42 @@ class TAXIICollectionSink(DataSink):
|
|||
|
||||
|
||||
class TAXIICollectionSource(DataSource):
|
||||
"""
|
||||
"""TAXIICollectionSource
|
||||
|
||||
Provides an interface for searching/retrieving STIX objects
|
||||
from a local/remote TAXII Collection endpoint.
|
||||
|
||||
Args:
|
||||
collection (taxii2.Collection): TAXII Collection instance
|
||||
|
||||
"""
|
||||
def __init__(self, collection):
|
||||
super(TAXIICollectionSource, self).__init__()
|
||||
self.collection = collection
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""retrieve STIX object from local/remote STIX Collection
|
||||
endpoint.
|
||||
|
||||
Args:
|
||||
stix_id (str): The STIX ID of the STIX object to be retrieved.
|
||||
|
||||
composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(STIX object): STIX object that has the supplied STIX ID.
|
||||
The STIX object is received from TAXII has dict, parsed into
|
||||
a python STIX object and then returned
|
||||
|
||||
|
||||
"""
|
||||
# combine all query filters
|
||||
query = []
|
||||
if self.filters:
|
||||
query.extend(self.filters.values())
|
||||
query = set()
|
||||
if self._filters:
|
||||
query.update(self._filters)
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
query.update(_composite_filters)
|
||||
|
||||
# separate taxii query terms (can be done remotely)
|
||||
taxii_filters = self._parse_taxii_filters(query)
|
||||
|
@ -83,10 +142,21 @@ class TAXIICollectionSource(DataSource):
|
|||
else:
|
||||
stix_obj = None
|
||||
|
||||
return stix_obj
|
||||
return parse(stix_obj)
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""retrieve STIX object from local/remote TAXII Collection
|
||||
endpoint, all versions of it
|
||||
|
||||
Args:
|
||||
stix_id (str): The STIX ID of the STIX objects to be retrieved.
|
||||
|
||||
composite_filters (set): set of filters passed from the parent
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(see query() as all_versions() is just a wrapper)
|
||||
|
||||
"""
|
||||
# make query in TAXII query format since 'id' is TAXII field
|
||||
query = [
|
||||
|
@ -99,16 +169,39 @@ class TAXIICollectionSource(DataSource):
|
|||
return all_data
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""search and retreive STIX objects based on the complete query
|
||||
|
||||
A "complete query" includes the filters from the query, the filters
|
||||
attached to MemorySource, and any filters passed from a
|
||||
CompositeDataSource (i.e. _composite_filters)
|
||||
|
||||
Args:
|
||||
query (list): list of filters to search on
|
||||
|
||||
composite_filters (set): set of filters passed from the
|
||||
CompositeDataSource, not user supplied
|
||||
|
||||
Returns:
|
||||
(list): list of STIX objects that matches the supplied
|
||||
query. The STIX objects are received from TAXII as dicts,
|
||||
parsed into python STIX objects and then returned.
|
||||
|
||||
"""
|
||||
"""
|
||||
|
||||
if query is None:
|
||||
query = []
|
||||
query = set()
|
||||
else:
|
||||
if not isinstance(query, list):
|
||||
# make sure dont make set from a Filter object,
|
||||
# need to make a set from a list of Filter objects (even if just one Filter)
|
||||
query = list(query)
|
||||
query = set(query)
|
||||
|
||||
# combine all query filters
|
||||
if self.filters:
|
||||
query.extend(self.filters.values())
|
||||
if self._filters:
|
||||
query.update(self.filters.values())
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
query.update(_composite_filters)
|
||||
|
||||
# separate taxii query terms (can be done remotely)
|
||||
taxii_filters = self._parse_taxii_filters(query)
|
||||
|
@ -119,10 +212,13 @@ class TAXIICollectionSource(DataSource):
|
|||
# deduplicate data (before filtering as reduces wasted filtering)
|
||||
all_data = self.deduplicate(all_data)
|
||||
|
||||
# apply local (composite and data source filters)
|
||||
# apply local (CompositeDataSource, TAXIICollectionSource and query filters)
|
||||
all_data = self.apply_common_filters(all_data, query)
|
||||
|
||||
return all_data
|
||||
# parse python STIX objects from the STIX object dicts
|
||||
stix_objs = [parse(stix_obj_dict) for stix_obj_dict in all_data]
|
||||
|
||||
return stix_objs
|
||||
|
||||
def _parse_taxii_filters(self, query):
|
||||
"""Parse out TAXII filters that the TAXII server can filter on.
|
||||
|
@ -142,6 +238,7 @@ class TAXIICollectionSource(DataSource):
|
|||
for 'requests.get()'.
|
||||
|
||||
"""
|
||||
|
||||
params = {}
|
||||
|
||||
for filter_ in query:
|
||||
|
|
Loading…
Reference in New Issue