docstrings redone; code changes for adding/returning STIX objects to FileSystem, Memory, TAXII; code changes for query sets and filters

stix2.1
= 2017-09-22 11:29:17 -04:00
parent 463d1e6b28
commit 94df10bf8d
6 changed files with 765 additions and 388 deletions

View File

@ -137,13 +137,6 @@ class Environment(object):
raise AttributeError('Environment has no data source')
add_filters.__doc__ = DataSource.add_filters.__doc__
def add_filter(self, *args, **kwargs):
try:
return self.source.add_filter(*args, **kwargs)
except AttributeError:
raise AttributeError('Environment has no data source')
add_filter.__doc__ = DataSource.add_filter.__doc__
def add(self, *args, **kwargs):
try:
return self.sink.add(*args, **kwargs)

View File

@ -7,12 +7,8 @@ Classes:
DataSource
CompositeDataSource
TODO:Test everything
Notes:
add_filter(), remove_filter(), deduplicate() - if these functions remain
the exact same for DataSource, DataSink, CompositeDataSource etc... -> just
make those functions an interface to inherit?
Q: We have add_filters() but no remove_filter()
"""
@ -28,14 +24,100 @@ def make_id():
return str(uuid.uuid4())
class DataStore(object):
def apply_common_filters(stix_objs, query):
"""Evaluate filters against a set of STIX 2.0 objects.
Supports only STIX 2.0 common property fields
Args:
stix_objs (list): list of STIX objects to apply the query to
query (set): set of filters (combined form complete query)
Returns:
(list): list of STIX objects that successfully evaluate against
the query.
"""
filtered_stix_objs = []
# evaluate objects against filter
for stix_obj in stix_objs:
clean = True
for filter_ in query:
# skip filter as filter was identified (when added) as
# not a common filter
if filter_.field not in STIX_COMMON_FIELDS:
raise ValueError("Error, field: {0} is not supported for filtering on.".format(filter_.field))
# For properties like granular_markings and external_references
# need to break the first property from the string.
if "." in filter_.field:
field = filter_.field.split(".")[0]
else:
field = filter_.field
# check filter "field" is in STIX object - if cant be
# applied due to STIX object, STIX object is discarded
# (i.e. did not make it through the filter)
if field not in stix_obj.keys():
clean = False
break
match = STIX_COMMON_FILTERS_MAP[filter_.field.split('.')[0]](filter_, stix_obj)
if not match:
clean = False
break
elif match == -1:
raise ValueError("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field))
# if object unmarked after all filters, add it
if clean:
filtered_stix_objs.append(stix_obj)
return filtered_stix_objs
def deduplicate(stix_obj_list):
"""Deduplicate a list of STIX objects to a unique set
Reduces a set of STIX objects to unique set by looking
at 'id' and 'modified' fields - as a unique object version
is determined by the combination of those fields
Args:
stix_obj_list (list): list of STIX objects (dicts)
Returns:
A list with a unique set of the passed list of STIX objects.
"""
unique_objs = {}
for obj in stix_obj_list:
unique_objs[(obj['id'], obj['modified'])] = obj
return list(unique_objs.values())
class DataStore(object):
"""DataStore
An implementer will create a concrete subclass from
this abstract class for the specific data store.
this class for the specific DataStore.
Args:
source (DataSource): An existing DataSource to use
as this DataStore's DataSource component
sink (DataSink): An existing DataSink to use
as this DataStore's DataSink component
Attributes:
id (str): A unique UUIDv4 to identify this DataStore.
source (DataStore): An object that implements DataStore class.
source (DataSource): An object that implements DataSource class.
sink (DataSink): An object that implements DataSink class.
"""
@ -47,14 +129,13 @@ class DataStore(object):
def get(self, stix_id):
"""Retrieve the most recent version of a single STIX object by ID.
Notes:
Translate API get() call to the appropriate DataSource call.
Translate get() call to the appropriate DataSource call.
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve.
stix_id (str): the id of the STIX object to retrieve.
Returns:
stix_obj (dictionary): the single most recent version of the STIX
stix_obj: the single most recent version of the STIX
object specified by the "id".
"""
@ -63,15 +144,13 @@ class DataStore(object):
def all_versions(self, stix_id):
"""Retrieve all versions of a single STIX object by ID.
Implement:
Translate all_versions() call to the appropriate DataSource call
Implement: Translate all_versions() call to the appropriate DataSource call
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve.
stix_id (str): the id of the STIX object to retrieve.
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
stix_objs (list): a list of STIX objects
"""
return self.source.all_versions(stix_id)
@ -79,17 +158,15 @@ class DataStore(object):
def query(self, query):
"""Retrieve STIX objects matching a set of filters.
Notes:
Implement the specific data source API calls, processing,
functionality required for retrieving query from the data source.
Implement: Specific data source API calls, processing,
functionality required for retrieving query from the data source.
Args:
query (list): a list of filters (which collectively are the query)
to conduct search on.
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
stix_objs (list): a list of STIX objects
"""
return self.source.query(query=query)
@ -97,21 +174,30 @@ class DataStore(object):
def add(self, stix_objs):
"""Store STIX objects.
Notes:
Translate add() to the appropriate DataSink call().
Translates add() to the appropriate DataSink call.
Args:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
stix_objs (list): a list of STIX objects
"""
return self.sink.add(stix_objs)
def add_filters(self, filters):
"""add query filters (to DataSource component)
Translates add_filters() to appropriate DataSource call.
Args:
filters (list or Filter obj): Filters to be added to DataStore
"""
return self.source.add_filters(filters)
class DataSink(object):
"""
Abstract class for defining a data sink. Intended for subclassing into
different sink components.
"""DataSink
An implementer will create a concrete subclass from
this class for the specific DataSink.
Attributes:
id (str): A unique UUIDv4 to identify this DataSink.
@ -123,9 +209,8 @@ class DataSink(object):
def add(self, stix_objs):
"""Store STIX objects.
Notes:
Implement the specific data sink API calls, processing,
functionality required for adding data to the sink
Implement: Specific data sink API calls, processing,
functionality required for adding data to the sink
Args:
stix_objs (list): a list of STIX objects (where each object is a
@ -136,194 +221,122 @@ class DataSink(object):
class DataSource(object):
"""
Abstract class for defining a data source. Intended for subclassing into
different source components.
"""DataSource
An implementer will create a concrete subclass from
this class for the specific DataSource.
Attributes:
id (str): A unique UUIDv4 to identify this DataSource.
filters (set): A collection of filters present in this DataSource.
_filters (set): A collection of filters attached to this DataSource.
"""
def __init__(self):
self.id = make_id()
self.filters = set()
self._filters = set()
def get(self, stix_id, _composite_filters=None):
"""
Fill:
Implement the specific data source API calls, processing,
functionality required for retrieving data from the data source
Implement: Specific data source API calls, processing,
functionality required for retrieving data from the data source
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
_composite_filters (list): list of filters passed along from
the Composite Data Filter.
_composite_filters (set): set of filters passed from the parent
the CompositeDataSource, not user supplied
Returns:
stix_obj (dictionary): the STIX object to be returned
stix_obj: the STIX object
"""
raise NotImplementedError()
def all_versions(self, stix_id, _composite_filters=None):
"""
Notes:
Similar to get() except returns list of all object versions of
the specified "id". In addition, implement the specific data
source API calls, processing, functionality required for retrieving
data from the data source.
Implement: Similar to get() except returns list of all object versions of
the specified "id". In addition, implement the specific data
source API calls, processing, functionality required for retrieving
data from the data source.
Args:
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
return a list of objects, all the versions of the object
specified by the "id".
_composite_filters (list): list of filters passed from the
Composite Data Source
_composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
stix_objs (list): a list of STIX objects
"""
raise NotImplementedError()
def query(self, query, _composite_filters=None):
"""
Fill:
-implement the specific data source API calls, processing,
functionality required for retrieving query from the data source
Implement:Implement the specific data source API calls, processing,
functionality required for retrieving query from the data source
Args:
query (list): a list of filters (which collectively are the query)
to conduct search on
_composite_filters (list): a list of filters passed from the
Composite Data Source
_composite_filters (set): a set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
stix_objs (list): a list of STIX objects
"""
raise NotImplementedError()
def add_filters(self, filters):
"""Add multiple filters to be applied to all queries for STIX objects.
Args:
filters (list): list of filters (dict) to add to the Data Source.
"""
for filter in filters:
self.add_filter(filter)
def add_filter(self, filter):
"""Add a filter to be applied to all queries for STIX objects.
Args:
filter: filter to add to the Data Source.
filters (list or Filter obj): filter(s) to add to the Data Source.
"""
# check filter field is a supported STIX 2.0 common field
if filter.field not in STIX_COMMON_FIELDS:
raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
if isinstance(filters, list) or isinstance(filters, set):
for filter_ in filters:
self.add_filters(filter_)
else:
filter_ = filters
# check filter field is a supported STIX 2.0 common field
if filter_.field not in STIX_COMMON_FIELDS:
raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
# check filter operator is supported
if filter.op not in FILTER_OPS:
raise ValueError("Filter operation (from 'op' field) not supported")
# check filter operator is supported
if filter_.op not in FILTER_OPS:
raise ValueError("Filter operation (from 'op' field) not supported")
# check filter value type is supported
if type(filter.value) not in FILTER_VALUE_TYPES:
raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
# check filter value type is supported
if type(filter_.value) not in FILTER_VALUE_TYPES:
raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
self.filters.add(filter)
def apply_common_filters(self, stix_objs, query):
"""Evaluate filters against a set of STIX 2.0 objects.
Supports only STIX 2.0 common property fields
Args:
stix_objs (list): list of STIX objects to apply the query to
query (list): list of filters (combined form complete query)
Returns:
(list): list of STIX objects that successfully evaluate against
the query.
"""
filtered_stix_objs = []
# evaluate objects against filter
for stix_obj in stix_objs:
clean = True
for filter_ in query:
# skip filter as filter was identified (when added) as
# not a common filter
if filter_.field not in STIX_COMMON_FIELDS:
raise ValueError("Error, field: {0} is not supported for filtering on.".format(filter_.field))
# For properties like granular_markings and external_references
# need to break the first property from the string.
if "." in filter_.field:
field = filter_.field.split(".")[0]
else:
field = filter_.field
# check filter "field" is in STIX object - if cant be
# applied due to STIX object, STIX object is discarded
# (i.e. did not make it through the filter)
if field not in stix_obj.keys():
clean = False
break
match = STIX_COMMON_FILTERS_MAP[filter_.field.split('.')[0]](filter_, stix_obj)
if not match:
clean = False
break
elif match == -1:
raise ValueError("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field))
# if object unmarked after all filters, add it
if clean:
filtered_stix_objs.append(stix_obj)
return filtered_stix_objs
def deduplicate(self, stix_obj_list):
"""Deduplicate a list of STIX objects to a unique set
Reduces a set of STIX objects to unique set by looking
at 'id' and 'modified' fields - as a unique object version
is determined by the combination of those fields
Args:
stix_obj_list (list): list of STIX objects (dicts)
Returns:
A list with a unique set of the passed list of STIX objects.
"""
unique_objs = {}
for obj in stix_obj_list:
unique_objs[(obj['id'], obj['modified'])] = obj
return list(unique_objs.values())
self._filters.add(filter_)
class CompositeDataSource(DataSource):
"""Controller for all the defined/configured STIX Data Sources.
"""CompostiteDataSource
E.g. a user can define n Data Sources - creating Data Source (objects)
for each. There is only one instance of this for any Python STIX 2.0
application.
Controller for all the attached DataSources.
A user can have a single CompositeDataSource as an interface
the a set of DataSources. When an API call is made to the
CompositeDataSource, it is delegated to each of the (real)
DataSources that are attached to it.
DataSources can be attached to CompositeDataSource for a variety
of reasons, e.g. common filters, organization, less API calls.
Attributes:
name (str): The name that identifies this CompositeDataSource.
data_sources (dict): A dictionary of DataSource objects; to be
controlled and used by the Data Source Controller object.
@ -340,41 +353,46 @@ class CompositeDataSource(DataSource):
self.data_sources = {}
def get(self, stix_id, _composite_filters=None):
"""Retrieve STIX object by 'id'
"""Retrieve STIX object by STIX ID
Federated retrieve method-iterates through all STIX data sources
Federated retrieve method, iterates through all DataSources
defined in the "data_sources" parameter. Each data source has a
specific API retrieve-like function and associated parameters. This
function does a federated retrieval and consolidation of the data
returned from all the STIX data sources.
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
Args:
stix_id (str): the id of the STIX object to retrieve.
_composite_filters (list): a list of filters passed from the
Composite Data Source
_composite_filters (list): a list of filters passed from a
CompositeDataSource (i.e. if this CompositeDataSource is attached
to another parent CompositeDataSource), not user supplied
Returns:
stix_obj (dict): the STIX object to be returned.
stix_obj: the STIX object to be returned.
"""
if not self.get_all_data_sources():
raise AttributeError('CompositeDataSource has no data sources')
all_data = []
all_filters = set()
all_filters.update(self._filters)
if _composite_filters:
all_filters.update(_composite_filters)
# for every configured Data Source, call its retrieve handler
for ds_id, ds in iteritems(self.data_sources):
data = ds.get(stix_id=stix_id, _composite_filters=list(self.filters))
data = ds.get(stix_id=stix_id, _composite_filters=all_filters)
all_data.append(data)
# remove duplicate versions
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
all_data = deduplicate(all_data)
# reduce to most recent version
stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0]
@ -382,20 +400,20 @@ class CompositeDataSource(DataSource):
return stix_obj
def all_versions(self, stix_id, _composite_filters=None):
"""Retrieve STIX objects by 'id'
"""Retrieve STIX objects by STIX ID
Federated all_versions retrieve method - iterates through all STIX data
sources defined in "data_sources"
Federated all_versions retrieve method - iterates through all DataSources
defined in "data_sources"
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
Args:
stix_id (str): id of the STIX objects to retrieve
_composite_filters (list): a list of filters passed from the
Composite Data Source
_composite_filters (list): a list of filters passed from a
CompositeDataSource (i.e. if this CompositeDataSource is attached
to a parent CompositeDataSource), not user supplied
Returns:
all_data (list): list of STIX objects that have the specified id
@ -405,32 +423,37 @@ class CompositeDataSource(DataSource):
raise AttributeError('CompositeDataSource has no data sources')
all_data = []
all_filters = self.filters
all_filters = set()
all_filters.update(self._filters)
if _composite_filters:
all_filters = set(self.filters).update(_composite_filters)
all_filters.update(_composite_filters)
# retrieve STIX objects from all configured data sources
for ds_id, ds in iteritems(self.data_sources):
data = ds.all_versions(stix_id=stix_id, _composite_filters=list(all_filters))
data = ds.all_versions(stix_id=stix_id, _composite_filters=all_filters)
all_data.extend(data)
# remove exact duplicates (where duplicates are STIX 2.0 objects
# with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
all_data = deduplicate(all_data)
return all_data
def query(self, query=None, _composite_filters=None):
"""Federate the query to all Data Sources attached to the
"""Retrieve STIX objects that match query
Federate the query to all DataSources attached to the
Composite Data Source.
Args:
query (list): list of filters to search on.
query (list): list of filters to search on
_composite_filters (list): a list of filters passed from the
Composite Data Source
_composite_filters (list): a list of filters passed from a
CompositeDataSource (i.e. if this CompositeDataSource is attached
to a parent CompositeDataSource), not user supplied
Returns:
all_data (list): list of STIX objects to be returned
@ -440,33 +463,37 @@ class CompositeDataSource(DataSource):
raise AttributeError('CompositeDataSource has no data sources')
if not query:
# dont mess with the query (i.e. convert to a set, as thats done
# within the specific DataSources that are called)
query = []
all_data = []
all_filters = self.filters
all_filters = set()
all_filters.update(self._filters)
if _composite_filters:
all_filters = set(self.filters).update(_composite_filters)
all_filters.update(_composite_filters)
# federate query to all attached data sources,
# pass composite filters to id
for ds_id, ds in iteritems(self.data_sources):
data = ds.query(query=query, _composite_filters=list(all_filters))
data = ds.query(query=query, _composite_filters=all_filters)
all_data.extend(data)
# remove exact duplicates (where duplicates are STIX 2.0
# objects with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
all_data = deduplicate(all_data)
return all_data
def add_data_source(self, data_sources):
"""Add/attach Data Source to the Composite Data Source instance
"""Attach a DataSource to the CompositeDataSource instance
Args:
data_sources (list): a list of Data Source objects to attach
to the Composite Data Source
data_sources (list): a list of DataSource(s) to attach
to the CompositeDataSource
"""
if not isinstance(data_sources, list):
@ -474,36 +501,32 @@ class CompositeDataSource(DataSource):
for ds in data_sources:
if issubclass(ds.__class__, DataSource):
if ds.id in self.data_sources:
# data source already attached to Composite Data Source
# DataSource already attached to CompositeDataSource
continue
# add data source to Composite Data Source
# (its id will be its key identifier)
# add DataSource to CompositeDataSource, its ID is used as key
self.data_sources[ds.id] = ds
else:
# the Data Source object is not a proper subclass
# of DataSource Abstract Class
# the Data Source object not a subclass of DataSource
# TODO: maybe log error?
continue
return
def remove_data_source(self, data_source_ids):
"""Remove/detach Data Source from the Composite Data Source instance
"""Remove DataSource from the CompositeDataSource instance
Args:
data_source_ids (list): a list of Data Source identifiers.
data_source_ids (list): a list of Data Source id(s).
"""
for id in data_source_ids:
if id in self.data_sources:
del self.data_sources[id]
else:
raise ValueError("DataSource 'id' not found in CompositeDataSource collection.")
raise ValueError("DataSource 'id' not found in CompositeDataSource.data_sources ")
return
def get_all_data_sources(self):
"""Return all attached Data Sources
"""
"""Return all attached DataSource(s)"""
return self.data_sources.values()

View File

@ -12,13 +12,28 @@ TODO: Test everything
import json
import os
from stix2 import Bundle
from stix2.sources import DataSink, DataSource, DataStore
from stix2.base import _STIXBase
from stix2.core import Bundle, parse
from stix2.sources import (DataSink, DataSource, DataStore,
apply_common_filters, deduplicate)
from stix2.sources.filters import Filter
class FileSystemStore(DataStore):
"""
"""FileSystemStore
Provides an interface to an file directory of STIX objects.
FileSystemStore is a wrapper around a paired FileSystemSink
and FileSystemSource.
Args:
stix_dir (str): path to directory of STIX objects
Attributes:
source (FileSystemSource): FuleSystemSource
sink (FileSystemSink): FileSystemSink
"""
def __init__(self, stix_dir="stix_data"):
super(FileSystemStore, self).__init__()
@ -27,56 +42,117 @@ class FileSystemStore(DataStore):
class FileSystemSink(DataSink):
"""
"""FileSystemSink
Provides an interface for adding/pushing STIX objects
to file directory of STIX objects.
Can be paired with a FileSystemSource, together as the two
components of a FileSystemStore.
Args:
stix_dir (str): path to directory of STIX objects
"""
def __init__(self, stix_dir="stix_data"):
super(FileSystemSink, self).__init__()
self.stix_dir = os.path.abspath(stix_dir)
self._stix_dir = os.path.abspath(stix_dir)
# check directory path exists
if not os.path.exists(self.stix_dir):
if not os.path.exists(self._stix_dir):
print("Error: directory path for STIX data does not exist")
@property
def stix_dir(self):
return self.stix_dir
return self._stix_dir
@stix_dir.setter
def stix_dir(self, dir):
self.stix_dir = dir
def add(self, stix_data=None):
"""add STIX objects to file directory
def add(self, stix_objs=None):
Args:
stix_data (STIX object OR dict OR str OR list): valid STIX 2.0 content
in a STIX object(or list of), dict (or list of), or a STIX 2.0
json encoded string
TODO: Bundlify STIX content or no? When dumping to disk.
"""
Q: bundlify or no?
"""
if not stix_objs:
stix_objs = []
for stix_obj in stix_objs:
path = os.path.join(self.stix_dir, stix_obj["type"], stix_obj["id"])
json.dump(Bundle([stix_obj]), open(path, 'w+'), indent=4)
def _check_path_and_write(stix_dir, stix_obj):
path = os.path.join(stix_dir, stix_obj["type"], stix_obj["id"] + ".json")
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
with open(path, "w") as f:
# Bundle() can take dict or STIX obj as argument
f.write(str(Bundle(stix_obj)))
if isinstance(stix_data, _STIXBase):
# adding python STIX object
_check_path_and_write(self._stix_dir, stix_data)
elif isinstance(stix_data, dict):
if stix_data["type"] == "bundle":
# adding json-formatted Bundle - extracting STIX objects
for stix_obj in stix_data["objects"]:
self.add(stix_obj)
else:
# adding json-formatted STIX
_check_path_and_write(self._stix_dir, stix_data)
elif isinstance(stix_data, str):
# adding json encoded string of STIX content
stix_data = parse(stix_data)
if stix_data["type"] == "bundle":
for stix_obj in stix_data:
self.add(stix_obj)
else:
self.add(stix_data)
elif isinstance(stix_data, list):
# if list, recurse call on individual STIX objects
for stix_obj in stix_data:
self.add(stix_obj)
else:
raise ValueError("stix_data must be a STIX object(or list of, json formatted STIX(or list of) or a json formatted STIX bundle")
class FileSystemSource(DataSource):
"""
"""FileSystemSource
Provides an interface for searching/retrieving
STIX objects from a STIX object file directory.
Can be paired with a FileSystemSink, together as the two
components of a FileSystemStore.
Args:
stix_dir (str): path to directory of STIX objects
"""
def __init__(self, stix_dir="stix_data"):
super(FileSystemSource, self).__init__()
self.stix_dir = os.path.abspath(stix_dir)
self._stix_dir = os.path.abspath(stix_dir)
# check directory path exists
if not os.path.exists(self.stix_dir):
if not os.path.exists(self._stix_dir):
print("Error: directory path for STIX data does not exist")
@property
def stix_dir(self):
return self.stix_dir
@stix_dir.setter
def stix_dir(self, dir):
self.stix_dir = dir
return self._stix_dir
def get(self, stix_id, _composite_filters=None):
"""
"""retrieve STIX object from file directory via STIX ID
Args:
stix_id (str): The STIX ID of the STIX object to be retrieved.
composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
(STIX object): STIX object that has the supplied STIX ID.
The STIX object is loaded from its json file, parsed into
a python STIX object and then returned
"""
query = [Filter("id", "=", stix_id)]
@ -84,30 +160,63 @@ class FileSystemSource(DataSource):
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
return stix_obj
return parse(stix_obj)
def all_versions(self, stix_id, _composite_filters=None):
"""
Notes:
Since FileSystem sources/sinks don't handle multiple versions
of a STIX object, this operation is unnecessary. Pass call to get().
"""retrieve STIX object from file directory via STIX ID, all versions
Note: Since FileSystem sources/sinks don't handle multiple versions
of a STIX object, this operation is unnecessary. Pass call to get().
Args:
stix_id (str): The STIX ID of the STIX objects to be retrieved.
composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
(list): of STIX objects that has the supplied STIX ID.
The STIX objects are loaded from their json files, parsed into
a python STIX objects and then returned
"""
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
def query(self, query=None, _composite_filters=None):
"""
"""search and retrieve STIX objects based on the complete query
A "complete query" includes the filters from the query, the filters
attached to MemorySource, and any filters passed from a
CompositeDataSource (i.e. _composite_filters)
Args:
query (list): list of filters to search on
composite_filters (set): set of filters passed from the
CompositeDataSource, not user supplied
Returns:
(list): list of STIX objects that matches the supplied
query. The STIX objects are loaded from their json files,
parsed into a python STIX objects and then returned.
"""
all_data = []
if query is None:
query = []
query = set()
else:
if not isinstance(query, list):
# make sure dont make set from a Filter object,
# need to make a set from a list of Filter objects (even if just one Filter)
query = list(query)
query = set(query)
# combine all query filters
if self.filters:
query.extend(self.filters.values())
if self._filters:
query.update(self._filters)
if _composite_filters:
query.extend(_composite_filters)
query.update(_composite_filters)
# extract any filters that are for "type" or "id" , as we can then do
# filtering before reading in the STIX objects. A STIX 'type' filter
@ -125,12 +234,12 @@ class FileSystemSource(DataSource):
for filter in file_filters:
if filter.field == "type":
if filter.op == "=":
include_paths.append(os.path.join(self.stix_dir, filter.value))
include_paths.append(os.path.join(self._stix_dir, filter.value))
elif filter.op == "!=":
declude_paths.append(os.path.join(self.stix_dir, filter.value))
declude_paths.append(os.path.join(self._stix_dir, filter.value))
else:
# have to walk entire STIX directory
include_paths.append(self.stix_dir)
include_paths.append(self._stix_dir)
# if a user specifies a "type" filter like "type = <stix-object_type>",
# the filter is reducing the search space to single stix object types
@ -144,7 +253,7 @@ class FileSystemSource(DataSource):
# user has specified types that are not wanted (i.e. "!=")
# so query will look in all STIX directories that are not
# the specified type. Compile correct dir paths
for dir in os.listdir(self.stix_dir):
for dir in os.listdir(self._stix_dir):
if os.path.abspath(dir) not in declude_paths:
include_paths.append(os.path.abspath(dir))
@ -153,36 +262,48 @@ class FileSystemSource(DataSource):
if "id" in [filter.field for filter in file_filters]:
for filter in file_filters:
if filter.field == "id" and filter.op == "=":
id = filter.value
id_ = filter.value
break
else:
id = None
id_ = None
else:
id = None
id_ = None
# now iterate through all STIX objs
for path in include_paths:
for root, dirs, files in os.walk(path):
for file in files:
if id:
if id == file.split(".")[0]:
for file_ in files:
if id_:
if id_ == file_.split(".")[0]:
# since ID is specified in one of filters, can evaluate against filename first without loading
stix_obj = json.load(file)["objects"]
stix_obj = json.load(open(os.path.join(root, file_)))["objects"][0]
# check against other filters, add if match
all_data.extend(self.apply_common_filters([stix_obj], query))
all_data.extend(apply_common_filters([stix_obj], query))
else:
# have to load into memory regardless to evaluate other filters
stix_obj = json.load(file)["objects"]
all_data.extend(self.apply_common_filters([stix_obj], query))
stix_obj = json.load(open(os.path.join(root, file_)))["objects"][0]
all_data.extend(apply_common_filters([stix_obj], query))
all_data = self.deduplicate(all_data)
return all_data
all_data = deduplicate(all_data)
# parse python STIX objects from the STIX object dicts
stix_objs = [parse(stix_obj_dict) for stix_obj_dict in all_data]
return stix_objs
def _parse_file_filters(self, query):
"""utility method to extract STIX common filters
that can used to possibly speed up querying STIX objects
from the file system
Extracts filters that are for the "id" and "type" field of
a STIX object. As the file directory is organized by STIX
object type with filenames that are equivalent to the STIX
object ID, these filters can be used first to reduce the
search space of a FileSystemStore(or FileSystemSink)
"""
"""
file_filters = []
for filter in query:
if filter.field == "id" or filter.field == "type":
file_filters.append(filter)
file_filters = set()
for filter_ in query:
if filter_.field == "id" or filter_.field == "type":
file_filters.add(filter_)
return file_filters

View File

@ -4,10 +4,6 @@ Filters for Python STIX 2.0 DataSources, DataSinks, DataStores
Classes:
Filter
TODO: The script at the bottom of the module works (to capture
all the callable filter methods), however it causes this module
to be imported by itself twice. Not sure how big of deal that is,
or if cleaner solution possible.
"""
import collections
@ -15,6 +11,8 @@ import types
# Currently, only STIX 2.0 common SDO fields (that are not complex objects)
# are supported for filtering on
"""Supported STIX properties"""
STIX_COMMON_FIELDS = [
"created",
"created_by_ref",
@ -30,14 +28,13 @@ STIX_COMMON_FIELDS = [
"modified",
"object_marking_refs",
"revoked",
"type",
"granular_markings"
"type"
]
# Supported filter operations
"""Supported filter operations"""
FILTER_OPS = ['=', '!=', 'in', '>', '<', '>=', '<=']
# Supported filter value types
"""Supported filter value types"""
FILTER_VALUE_TYPES = [bool, dict, float, int, list, str, tuple]
# filter lookup map - STIX 2 common fields -> filter method
@ -45,6 +42,24 @@ STIX_COMMON_FILTERS_MAP = {}
class Filter(collections.namedtuple("Filter", ['field', 'op', 'value'])):
"""Filter
STIX 2 filters that support the querying functionality of STIX 2
DataStores and DataSources.
Initialized in the manner of python tuples
Args:
field (str): filter field name, corresponds to STIX 2 object property
op (str): operator of the filter
value (str): filter field value
Example:
Filter("id", "=", "malware--0f862b01-99da-47cc-9bdb-db4a86a95bb1")
"""
__slots__ = ()
def __new__(cls, field, op, value):
@ -55,7 +70,8 @@ class Filter(collections.namedtuple("Filter", ['field', 'op', 'value'])):
return self
# primitive type filters
"""Base type filters"""
def _all_filter(filter_, stix_obj_field):
"""all filter operations (for filters whose value type can be applied to any operation type)"""
@ -78,7 +94,7 @@ def _all_filter(filter_, stix_obj_field):
def _id_filter(filter_, stix_obj_id):
"""base filter types"""
"""base STIX id filter"""
if filter_.op == "=":
return stix_obj_id == filter_.value
elif filter_.op == "!=":
@ -88,6 +104,7 @@ def _id_filter(filter_, stix_obj_id):
def _boolean_filter(filter_, stix_obj_field):
"""base boolean filter"""
if filter_.op == "=":
return stix_obj_field == filter_.value
elif filter_.op == "!=":
@ -97,19 +114,25 @@ def _boolean_filter(filter_, stix_obj_field):
def _string_filter(filter_, stix_obj_field):
"""base string filter"""
return _all_filter(filter_, stix_obj_field)
def _timestamp_filter(filter_, stix_obj_timestamp):
"""base STIX 2 timestamp filter"""
return _all_filter(filter_, stix_obj_timestamp)
# STIX 2.0 Common Property filters
# The naming of these functions is important as
# they are used to index a mapping dictionary from
# STIX common field names to these filter functions.
#
# REQUIRED naming scheme:
# "check_<STIX field name>_filter"
"""STIX 2.0 Common Property Filters
The naming of these functions is important as
they are used to index a mapping dictionary from
STIX common field names to these filter functions.
REQUIRED naming scheme:
"check_<STIX field name>_filter"
"""
def check_created_filter(filter_, stix_obj):
@ -124,13 +147,15 @@ def check_external_references_filter(filter_, stix_obj):
"""
STIX object's can have a list of external references
external_references properties:
external_references properties supported:
external_references.source_name (string)
external_references.description (string)
external_references.url (string)
external_references.hashes (hash, but for filtering purposes, a string)
external_references.external_id (string)
external_references properties not supported:
external_references.hashes
"""
for er in stix_obj["external_references"]:
# grab er property name from filter field

View File

@ -6,7 +6,8 @@ Classes:
MemorySink
MemorySource
TODO: Test everything.
TODO: Run through tests again, lot of changes.
TODO: Use deduplicate() calls only when memory corpus is dirty (been added to)
can save a lot of time for successive queries
@ -18,49 +19,88 @@ Notes:
"""
import collections
import json
import os
from stix2 import Bundle
from stix2.sources import DataSink, DataSource, DataStore
from stix2.base import _STIXBase
from stix2.core import Bundle, parse
from stix2.sources import DataSink, DataSource, DataStore, apply_common_filters
from stix2.sources.filters import Filter
def _add(store, stix_data=None):
"""Adds stix objects to MemoryStore/Source/Sink."""
if isinstance(stix_data, collections.Mapping):
# stix objects are in a bundle
# make dictionary of the objects for easy lookup
for stix_obj in stix_data["objects"]:
store.data[stix_obj["id"]] = stix_obj
"""Adds STIX objects to MemoryStore/Sink.
Adds STIX objects to an in-memory dictionary for fast lookup.
Recursive function, breaks down STIX Bundles and lists.
Args:
stix_data (list OR dict OR STIX object): STIX objects to be added
"""
if isinstance(stix_data, _STIXBase):
# adding a python STIX object
store._data[stix_data["id"]] = stix_data
elif isinstance(stix_data, dict):
if stix_data["type"] == "bundle":
# adding a json bundle - so just grab STIX objects
for stix_obj in stix_data["objects"]:
_add(store, stix_obj)
else:
# adding a json STIX object
store._data[stix_data["id"]] = stix_data
elif isinstance(stix_data, str):
# adding json encoded string of STIX content
stix_data = parse(stix_data)
if stix_data["type"] == "bundle":
# recurse on each STIX object in bundle
for stix_obj in stix_data:
_add(store, stix_obj)
else:
_add(store, stix_data)
elif isinstance(stix_data, list):
# stix objects are in a list
# STIX objects are in a list- recurse on each object
for stix_obj in stix_data:
store.data[stix_obj["id"]] = stix_obj
_add(store, stix_obj)
else:
raise ValueError("stix_data must be in bundle format or raw list")
raise ValueError("stix_data must be as STIX object(or list of),json formatted STIX (or list of), or a json formatted STIX bundle")
class MemoryStore(DataStore):
"""
"""
def __init__(self, stix_data=None):
"""
Notes:
It doesn't make sense to create a MemoryStore by passing
in existing MemorySource and MemorySink because there could
be data concurrency issues. Just as easy to create new MemoryStore.
"""MemoryStore
"""
Provides an interface to an in-memory dictionary
of STIX objects. MemoryStore is a wrapper around a paired
MemorySink and MemorySource
Note: It doesn't make sense to create a MemoryStore by passing
in existing MemorySource and MemorySink because there could
be data concurrency issues. As well, just as easy to create new MemoryStore.
Args:
stix_data (list OR dict OR STIX object): STIX content to be added
Attributes:
_data (dict): the in-memory dict that holds STIX objects
source (MemorySource): MemorySource
sink (MemorySink): MemorySink
"""
def __init__(self, stix_data=None):
super(MemoryStore, self).__init__()
self.data = {}
self._data = {}
if stix_data:
_add(self, stix_data)
self.source = MemorySource(stix_data=self.data, _store=True)
self.sink = MemorySink(stix_data=self.data, _store=True)
self.source = MemorySource(stix_data=self._data, _store=True)
self.sink = MemorySink(stix_data=self._data, _store=True)
def save_to_file(self, file_path):
return self.sink.save_to_file(file_path=file_path)
@ -70,64 +110,111 @@ class MemoryStore(DataStore):
class MemorySink(DataSink):
"""
"""
def __init__(self, stix_data=None, _store=False):
"""
Args:
stix_data (dictionary OR list): valid STIX 2.0 content in
bundle or a list.
_store (bool): if the MemorySink is a part of a DataStore,
in which case "stix_data" is a direct reference to
shared memory with DataSource.
"""MemorySink
"""
Provides an interface for adding/pushing STIX objects
to an in-memory dictionary.
Designed to be paired with a MemorySource, together as the two
components of a MemoryStore.
Args:
stix_data (dict OR list): valid STIX 2.0 content in
bundle or a list.
_store (bool): if the MemorySink is a part of a DataStore,
in which case "stix_data" is a direct reference to
shared memory with DataSource. Not user supplied
Attributes:
_data (dict): the in-memory dict that holds STIX objects.
If apart of a MemoryStore, dict is shared between with
a MemorySource
"""
def __init__(self, stix_data=None, _store=False):
super(MemorySink, self).__init__()
self.data = {}
self._data = {}
if _store:
self.data = stix_data
self._data = stix_data
elif stix_data:
self.add(stix_data)
_add(self, stix_data)
def add(self, stix_data):
"""
"""add STIX objects to in-memory dictionary maintained by
the MemorySink (MemoryStore)
see "_add()" for args documentation
"""
_add(self, stix_data)
def save_to_file(self, file_path):
"""write SITX objects in in-memory dictionary to json file, as a STIX Bundle
Args:
file_path (str): file path to write STIX data to
"""
"""
json.dump(Bundle(self.data.values()), file_path, indent=4)
file_path = os.path.abspath(file_path)
if not os.path.exists(os.path.dirname(file_path)):
os.makedirs(os.path.dirname(file_path))
with open(file_path, "w") as f:
f.write(str(Bundle(self._data.values())))
class MemorySource(DataSource):
"""MemorySource
Provides an interface for searching/retrieving
STIX objects from an in-memory dictionary.
Designed to be paired with a MemorySink, together as the two
components of a MemoryStore.
Args:
stix_data (dict OR list OR STIX object): valid STIX 2.0 content in
bundle or list.
_store (bool): if the MemorySource is a part of a DataStore,
in which case "stix_data" is a direct reference to shared
memory with DataSink. Not user supplied
Attributes:
_data (dict): the in-memory dict that holds STIX objects.
If apart of a MemoryStore, dict is shared between with
a MemorySink
"""
def __init__(self, stix_data=None, _store=False):
"""
Args:
stix_data (dictionary OR list): valid STIX 2.0 content in
bundle or list.
_store (bool): if the MemorySource is a part of a DataStore,
in which case "stix_data" is a direct reference to shared
memory with DataSink.
"""
super(MemorySource, self).__init__()
self.data = {}
self._data = {}
if _store:
self.data = stix_data
self._data = stix_data
elif stix_data:
_add(self, stix_data)
def get(self, stix_id, _composite_filters=None):
"""retrieve STIX object from in-memory dict via STIX ID
Args:
stix_id (str): The STIX ID of the STIX object to be retrieved.
composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
(dict OR STIX object): STIX object that has the supplied
ID. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
as they are supplied (either as python dictionary or STIX object), it
is returned in the same form as it as added
"""
"""
if _composite_filters is None:
# if get call is only based on 'id', no need to search, just retrieve from dict
try:
stix_obj = self.data[stix_id]
stix_obj = self._data[stix_id]
except KeyError:
stix_obj = None
return stix_obj
@ -143,44 +230,75 @@ class MemorySource(DataSource):
return stix_obj
def all_versions(self, stix_id, _composite_filters=None):
"""
Notes:
Since Memory sources/sinks don't handle multiple versions of a
STIX object, this operation is unnecessary. Translate call to get().
"""retrieve STIX objects from in-memory dict via STIX ID, all versions of it
Note: Since Memory sources/sinks don't handle multiple versions of a
STIX object, this operation is unnecessary. Translate call to get().
Args:
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
return a list of objects, all the versions of the object
specified by the "id".
stix_id (str): The STIX ID of the STIX 2 object to retrieve.
composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
(list): STIX object that matched ``stix_id``.
(list): list of STIX objects that has the supplied ID. As the
MemoryStore(i.e. MemorySink) adds STIX objects to memory as they
are supplied (either as python dictionary or STIX object), it
is returned in the same form as it as added
"""
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
def query(self, query=None, _composite_filters=None):
"""
"""search and retrieve STIX objects based on the complete query
A "complete query" includes the filters from the query, the filters
attached to MemorySource, and any filters passed from a
CompositeDataSource (i.e. _composite_filters)
Args:
query (list): list of filters to search on
composite_filters (set): set of filters passed from the
CompositeDataSource, not user supplied
Returns:
(list): list of STIX objects that matches the supplied
query. As the MemoryStore(i.e. MemorySink) adds STIX objects to memory
as they are supplied (either as python dictionary or STIX object), it
is returned in the same form as it as added
"""
if query is None:
query = []
query = set()
else:
if not isinstance(query, list):
# make sure dont make set from a Filter object,
# need to make a set from a list of Filter objects (even if just one Filter)
query = list(query)
query = set(query)
# combine all query filters
if self.filters:
query.extend(list(self.filters))
if self._filters:
query.update(self._filters)
if _composite_filters:
query.extend(_composite_filters)
query.update(_composite_filters)
# Apply STIX common property filters.
all_data = self.apply_common_filters(self.data.values(), query)
all_data = apply_common_filters(self._data.values(), query)
return all_data
def load_from_file(self, file_path):
"""
"""load STIX data from json file
File format is expected to be a single json
STIX object or json STIX bundle
Args:
file_path (str): file path to load STIX data from
"""
file_path = os.path.abspath(file_path)
stix_data = json.load(open(file_path, "r"))
for stix_obj in stix_data["objects"]:
self.data[stix_obj["id"]] = stix_obj
_add(self, stix_data)

View File

@ -10,8 +10,8 @@ TODO: Test everything
"""
import json
from stix2.base import _STIXBase
from stix2.core import Bundle, parse
from stix2.sources import DataSink, DataSource, DataStore, make_id
from stix2.sources.filters import Filter
@ -19,35 +19,73 @@ TAXII_FILTERS = ['added_after', 'id', 'type', 'version']
class TAXIICollectionStore(DataStore):
"""
"""TAXIICollectionStore
Provides an interface to a local/remote TAXII Collection
of STIX data. TAXIICollectionStore is a wrapper
around a paired TAXIICollectionSink and TAXIICollectionSource.
Args:
collection (taxii2.Collection): TAXII Collection instance
"""
def __init__(self, collection):
"""
Create a new TAXII Collection Data store
Args:
collection (taxii2.Collection): Collection instance
"""
super(TAXIICollectionStore, self).__init__()
self.source = TAXIICollectionSource(collection)
self.sink = TAXIICollectionSink(collection)
class TAXIICollectionSink(DataSink):
"""
"""TAXIICollectionSink
Provides an interface for pushing STIX objects to a local/remote
TAXII Collection endpoint.
Args:
collection (taxii2.Collection): TAXII2 Collection instance
"""
def __init__(self, collection):
super(TAXIICollectionSink, self).__init__()
self.collection = collection
def add(self, stix_obj):
def add(self, stix_data):
"""add/push STIX content to TAXII Collection endpoint
Args:
stix_data (STIX object OR dict OR str OR list): valid STIX 2.0 content
in a STIX object (or Bundle), STIX onject dict (or Bundle dict), or a STIX 2.0
json encoded string, or list of any of the following
"""
"""
self.collection.add_objects(self.create_bundle([json.loads(str(stix_obj))]))
if isinstance(stix_data, _STIXBase):
# adding python STIX object
bundle = dict(Bundle(stix_data))
elif isinstance(stix_data, dict):
# adding python dict (of either Bundle or STIX obj)
if stix_data["type"] == "bundle":
bundle = stix_data
else:
bundle = dict(Bundle(stix_data))
elif isinstance(stix_data, list):
# adding list of something - recurse on each
for obj in stix_data:
self.add(obj)
elif isinstance(stix_data, str):
# adding json encoded string of STIX content
stix_data = parse(stix_data)
if stix_data["type"] == "bundle":
bundle = dict(stix_data)
else:
bundle = dict(Bundle(stix_data))
self.collection.add_objects(bundle)
@staticmethod
def create_bundle(objects):
"""TODO: Remove?"""
return dict(id="bundle--%s" % make_id(),
objects=objects,
spec_version="2.0",
@ -55,21 +93,42 @@ class TAXIICollectionSink(DataSink):
class TAXIICollectionSource(DataSource):
"""
"""TAXIICollectionSource
Provides an interface for searching/retrieving STIX objects
from a local/remote TAXII Collection endpoint.
Args:
collection (taxii2.Collection): TAXII Collection instance
"""
def __init__(self, collection):
super(TAXIICollectionSource, self).__init__()
self.collection = collection
def get(self, stix_id, _composite_filters=None):
"""
"""retrieve STIX object from local/remote STIX Collection
endpoint.
Args:
stix_id (str): The STIX ID of the STIX object to be retrieved.
composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
(STIX object): STIX object that has the supplied STIX ID.
The STIX object is received from TAXII has dict, parsed into
a python STIX object and then returned
"""
# combine all query filters
query = []
if self.filters:
query.extend(self.filters.values())
query = set()
if self._filters:
query.update(self._filters)
if _composite_filters:
query.extend(_composite_filters)
query.update(_composite_filters)
# separate taxii query terms (can be done remotely)
taxii_filters = self._parse_taxii_filters(query)
@ -83,10 +142,21 @@ class TAXIICollectionSource(DataSource):
else:
stix_obj = None
return stix_obj
return parse(stix_obj)
def all_versions(self, stix_id, _composite_filters=None):
"""
"""retrieve STIX object from local/remote TAXII Collection
endpoint, all versions of it
Args:
stix_id (str): The STIX ID of the STIX objects to be retrieved.
composite_filters (set): set of filters passed from the parent
CompositeDataSource, not user supplied
Returns:
(see query() as all_versions() is just a wrapper)
"""
# make query in TAXII query format since 'id' is TAXII field
query = [
@ -99,16 +169,39 @@ class TAXIICollectionSource(DataSource):
return all_data
def query(self, query=None, _composite_filters=None):
"""search and retreive STIX objects based on the complete query
A "complete query" includes the filters from the query, the filters
attached to MemorySource, and any filters passed from a
CompositeDataSource (i.e. _composite_filters)
Args:
query (list): list of filters to search on
composite_filters (set): set of filters passed from the
CompositeDataSource, not user supplied
Returns:
(list): list of STIX objects that matches the supplied
query. The STIX objects are received from TAXII as dicts,
parsed into python STIX objects and then returned.
"""
"""
if query is None:
query = []
query = set()
else:
if not isinstance(query, list):
# make sure dont make set from a Filter object,
# need to make a set from a list of Filter objects (even if just one Filter)
query = list(query)
query = set(query)
# combine all query filters
if self.filters:
query.extend(self.filters.values())
if self._filters:
query.update(self.filters.values())
if _composite_filters:
query.extend(_composite_filters)
query.update(_composite_filters)
# separate taxii query terms (can be done remotely)
taxii_filters = self._parse_taxii_filters(query)
@ -119,10 +212,13 @@ class TAXIICollectionSource(DataSource):
# deduplicate data (before filtering as reduces wasted filtering)
all_data = self.deduplicate(all_data)
# apply local (composite and data source filters)
# apply local (CompositeDataSource, TAXIICollectionSource and query filters)
all_data = self.apply_common_filters(all_data, query)
return all_data
# parse python STIX objects from the STIX object dicts
stix_objs = [parse(stix_obj_dict) for stix_obj_dict in all_data]
return stix_objs
def _parse_taxii_filters(self, query):
"""Parse out TAXII filters that the TAXII server can filter on.
@ -142,6 +238,7 @@ class TAXIICollectionSource(DataSource):
for 'requests.get()'.
"""
params = {}
for filter_ in query: