Formatting changes, replace deduplicate() code in DataSource, missing super() calls to initialize objects.

stix2.1
Emmanuelle Vargas-Gonzalez 2017-08-11 08:10:20 -04:00
parent a4ead4f6e7
commit 86fd3778f5
5 changed files with 162 additions and 156 deletions

View File

@ -9,9 +9,11 @@ Classes:
TODO:Test everything
NOTE: add_filter(), remove_filter(), deduplicate() - if these functions remain
the exact same for DataSource, DataSink, CompositeDataSource etc... -> just
make those functions an interface to inherit?
Notes:
add_filter(), remove_filter(), deduplicate() - if these functions remain
the exact same for DataSource, DataSink, CompositeDataSource etc... -> just
make those functions an interface to inherit?
"""
import copy
@ -23,7 +25,8 @@ from six import iteritems
def make_id():
return str(uuid.uuid4())
# Currently, only STIX 2.0 common SDO fields (that are not compex objects)
# Currently, only STIX 2.0 common SDO fields (that are not complex objects)
# are supported for filtering on
STIX_COMMON_FIELDS = [
"created",
@ -59,6 +62,7 @@ class DataStore(object):
"""
An implementer will create a concrete subclass from
this abstract class for the specific data store.
"""
def __init__(self, name="DataStore"):
self.name = name
@ -69,29 +73,25 @@ class DataStore(object):
def get(self, stix_id):
"""
Implement:
-translate API get() call to the appropriate DataSource call
Translate API get() call to the appropriate DataSource call
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
_composite_filters (list): list of filters passed along from
the Composite Data Filter.
Returns:
stix_obj (dictionary): the STIX object to be returned
"""
return self.source.get(stix_id=stix_id)
def all_versions(self, stix_id):
"""
Implement:
-translate all_versions() call to the appropriate DataSource call
Translate all_versions() call to the appropriate DataSource call
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
@ -102,21 +102,19 @@ class DataStore(object):
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
"""
return self.source.all_versions(stix_id=stix_id)
def query(self, query):
"""
Fill:
-implement the specific data source API calls, processing,
Implement the specific data source API calls, processing,
functionality required for retrieving query from the data source
Args:
query (list): a list of filters (which collectively are the query)
to conduct search on
_composite_filters (list): a list of filters passed from the
Composite Data Source
to conduct search on.
Returns:
stix_objs (list): a list of STIX objects (where each object is a
@ -136,8 +134,13 @@ class DataStore(object):
class DataSink(object):
"""
An implementer will create a concrete subclass from this
abstract class for the specific data sink.
Abstract class for defining a data sink. Intended for subclassing into
different sink components.
Attributes:
id (str): A unique UUIDv4 to identify this DataSink.
name (str): The descriptive name that identifies this DataSink.
"""
def __init__(self, name="DataSink"):
@ -147,16 +150,25 @@ class DataSink(object):
def add(self, stix_objs):
"""
Fill:
-implement the specific data sink API calls, processing,
Implement the specific data sink API calls, processing,
functionality required for adding data to the sink
"""
raise NotImplementedError()
class DataSource(object):
"""
An implementer will create a concrete subclass from
this abstract class for the specific data source.
Abstract class for defining a data source. Intended for subclassing into
different source components.
Attributes:
id (str): A unique UUIDv4 to identify this DataSource.
name (str): The descriptive name that identifies this DataSource.
filters (dict): A collection of filters present in this DataSource.
filter_allowed (dict): A collection of the allowed filters in this
DataSource.
"""
def __init__(self, name="DataSource"):
@ -168,11 +180,10 @@ class DataSource(object):
def get(self, stix_id, _composite_filters=None):
"""
Fill:
-implement the specific data source API calls, processing,
Implement the specific data source API calls, processing,
functionality required for retrieving data from the data source
Args:
stix_id (str): the id of the STIX 2.0 object to retrieve. Should
return a single object, the most recent version of the object
specified by the "id".
@ -196,7 +207,7 @@ class DataSource(object):
functionality required for retrieving data from the data source
Args:
id (str): The id of the STIX 2.0 object to retrieve. Should
stix_id (str): The id of the STIX 2.0 object to retrieve. Should
return a list of objects, all the versions of the object
specified by the "id".
@ -206,8 +217,8 @@ class DataSource(object):
Returns:
stix_objs (list): a list of STIX objects (where each object is a
STIX object)
"""
"""
raise NotImplementedError()
def query(self, query, _composite_filters=None):
@ -225,13 +236,11 @@ class DataSource(object):
Returns:
"""
raise NotImplementedError()
def add_filter(self, filters):
"""add/attach a filter to the Data Source instance
"""Add/attach a filter to the Data Source instance
Args:
filters (list): list of filters (dict) to add to the Data Source
@ -240,14 +249,13 @@ class DataSource(object):
status (list): list of status/error messages
"""
status = []
errors = []
ids = []
allowed = True
for filter_ in filters:
# check required filter components ("field", "op", "value") exist
# check required filter components ('field', 'op', 'value') exist
for field in FILTER_FIELDS:
if field not in filter_.keys():
allowed = False
@ -306,14 +314,11 @@ class DataSource(object):
return ids, status
def remove_filter(self, filter_ids):
"""remove/detach a filter from the Data Source instance
"""Remove/detach a filter from the Data Source instance
Args:
filter_ids (list): list of filter ids to dettach/remove
from Data Source
Returns:
filter_ids (list): list of filter ids to detach/remove
from Data Source.
"""
for filter_id in filter_ids:
@ -328,7 +333,7 @@ class DataSource(object):
return
def get_filters(self):
"""return copy of all filters currently attached to Data Source
"""Return copy of all filters currently attached to Data Source
TODO: make this a property?
@ -340,7 +345,7 @@ class DataSource(object):
return copy.deepcopy(list(self.filters.values()))
def apply_common_filters(self, stix_objs, query):
"""evaluates filters against a set of STIX 2.0 objects
"""Evaluates filters against a set of STIX 2.0 objects
Supports only STIX 2.0 common property fields
@ -350,10 +355,9 @@ class DataSource(object):
Returns:
(list): list of STIX objects that successfully evaluate against
the query
the query.
"""
filtered_stix_objs = []
# evaluate objects against filter
@ -390,9 +394,9 @@ class DataSource(object):
return filtered_stix_objs
def deduplicate(self, stix_obj_list):
"""deduplicate a list of STIX objects into a unique set
"""Deduplicate a list of STIX objects to a unique set
reduces a set of STIX objects to unique set by looking
Reduces a set of STIX objects to unique set by looking
at 'id' and 'modified' fields - as a unique object version
is determined by the combination of those fields
@ -400,30 +404,34 @@ class DataSource(object):
stix_obj_list (list): list of STIX objects (dicts)
Returns:
(list): a unique set of the passed STIX object list
A list with a unique set of the passed list of STIX objects.
"""
unique = []
have = False
for i in stix_obj_list:
for j in unique:
if i['id'] == j['id'] and i['modified'] == j['modified']:
have = True
break
if not have:
unique.append(i)
have = False
return unique
unique_objs = {}
for obj in stix_obj_list:
unique_objs[(obj['id'], obj['modified'])] = obj
return list(unique_objs.values())
class CompositeDataSource(object):
"""Composite Data Source
Acts as a controller for all the defined/configured STIX Data Sources
e.g. a user can defined n Data Sources - creating Data Source (objects)
e.g. a user can define n Data Sources - creating Data Source (objects)
for each. There is only one instance of this for any python STIX 2.0
application
application.
Attributes:
id (str): A UUIDv4 to identify this CompositeDataSource.
name (str): The name that identifies this CompositeDataSource.
data_sources (dict): A dictionary of DataSource objects; to be
controlled and used by the Data Source Controller object.
filters (dict): A collection of filters present in this
CompositeDataSource.
filter_allowed (dict): A collection of the allowed filters in this
CompositeDataSource.
"""
def __init__(self, name="CompositeDataSource"):
@ -431,11 +439,9 @@ class CompositeDataSource(object):
Creates a new STIX Data Source.
Args:
'data_sources' (dict): a dict of DataSource objects; to be
controlled and used by the Data Source Controller object
name (str): A string containing the name to attach in the
CompositeDataSource instance.
filters :
name :
"""
self.id = make_id()
self.name = name
@ -446,23 +452,23 @@ class CompositeDataSource(object):
def get(self, stix_id):
"""Retrieve STIX object by 'id'
federated retrieve method-iterates through all STIX data sources
Federated retrieve method-iterates through all STIX data sources
defined in the "data_sources" parameter. Each data source has a
specific API retrieve-like function and associated parameters. This
function does a federated retrieval and consolidation of the data
returned from all the STIX data sources.
note: a composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle.
Args:
id (str): the id of the STIX object to retrieve
stix_id (str): the id of the STIX object to retrieve.
Returns:
stix_obj (dict): the STIX object to be returned
stix_obj (dict): the STIX object to be returned.
"""
all_data = []
# for every configured Data Source, call its retrieve handler
@ -485,14 +491,16 @@ class CompositeDataSource(object):
Federated all_versions retrieve method - iterates through all STIX data
sources defined in "data_sources"
note: a composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
Notes:
A composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
Args:
id_ (str): id of the STIX objects to retrieve
stix_id (str): id of the STIX objects to retrieve
Returns:
all_data (list): list of STIX objects that have the specified id
"""
all_data = []
@ -509,10 +517,10 @@ class CompositeDataSource(object):
return all_data
def query(self, query=None):
"""composite data source query
"""Composite data source query
Federate the query to all Data Sources attached
to the Composite Data Source
Federate the query to all Data Sources attached to the
Composite Data Source.
Args:
query (list): list of filters to search on
@ -540,16 +548,13 @@ class CompositeDataSource(object):
return all_data
def add_data_source(self, data_sources):
"""add/attach Data Source to the Composite Data Source instance
"""Add/attach Data Source to the Composite Data Source instance
Args:
data_sources (list): a list of Data Source objects to attach
to the Composite Data Source
Returns:
"""
for ds in data_sources:
if issubclass(ds, DataSource):
if self.data_sources[ds['id']] in self.data_sources.keys():
@ -568,7 +573,7 @@ class CompositeDataSource(object):
return
def remove_data_source(self, data_source_ids):
"""remove/detach Data Source from the Composite Data Source instance
"""Remove/detach Data Source from the Composite Data Source instance
Args:
data_source_ids (list): a list of Data Source
@ -590,17 +595,13 @@ class CompositeDataSource(object):
@property
def data_sources(self):
"""return all attached Data Sources
Args:
Returns:
"""Return all attached Data Sources
"""
return copy.deepcopy(self.data_sources.values())
def add_filter(self, filters):
"""add/attach a filter to the Composite Data Source instance
"""Add/attach a filter to the Composite Data Source instance
Args:
filters (list): list of filters (dict) to add to the Data Source
@ -609,7 +610,6 @@ class CompositeDataSource(object):
status (list): list of status/error messages
"""
status = []
errors = []
ids = []
@ -679,12 +679,9 @@ class CompositeDataSource(object):
Args:
filter_ids (list): list of filter id's (which are strings)
dettach from the Composite Data Source
Returns:
detach from the Composite Data Source.
"""
for filter_id in filter_ids:
try:
if filter_id in self.filters:
@ -699,7 +696,7 @@ class CompositeDataSource(object):
@property
def filters(self):
"""return filters attached to Composite Data Source
"""Return filters attached to Composite Data Source
Returns:
(list): the list of filters currently attached to the Data Source
@ -708,7 +705,7 @@ class CompositeDataSource(object):
return copy.deepcopy(list(self.filters.values()))
def deduplicate(self, stix_obj_list):
"""deduplicate a list fo STIX objects to a unique set
"""Deduplicate a list of STIX objects to a unique set
Reduces a set of STIX objects to unique set by looking
at 'id' and 'modified' fields - as a unique object version
@ -718,9 +715,9 @@ class CompositeDataSource(object):
stix_obj_list (list): list of STIX objects (dicts)
Returns:
(list): unique set of the passed list of STIX objects
"""
A list with a unique set of the passed list of STIX objects.
"""
unique_objs = {}
for obj in stix_obj_list:
@ -729,13 +726,13 @@ class CompositeDataSource(object):
return list(unique_objs.values())
class STIXCommonPropertyFilters():
class STIXCommonPropertyFilters(object):
"""
"""
@classmethod
def _all(cls, filter_, stix_obj_field):
"""all filter operations (for filters whose value type can be applied to any operation type)"""
if filter_["op"] == '=':
if filter_["op"] == "=":
return stix_obj_field == filter_["value"]
elif filter_["op"] == "!=":
return stix_obj_field != filter_["value"]
@ -791,14 +788,15 @@ class STIXCommonPropertyFilters():
@classmethod
def external_references(cls, filter_, stix_obj):
"""
stix object's can have a list of external references
STIX object's can have a list of external references
external_references properties:
external_references.source_name (string)
external_references.description (string)
external_references.url (string)
external_references.hashes (hash, but for filtering purposes, a string)
external_references.external_id (string)
external-reference properties:
external_reference.source_name (string)
external_reference.description (string)
external_reference.url (string)
external_reference.hashes (hash, but for filtering purposes , a string)
external_reference.external_id (string)
"""
for er in stix_obj["external_references"]:
# grab er property name from filter field
@ -811,11 +809,12 @@ class STIXCommonPropertyFilters():
@classmethod
def granular_markings(cls, filter_, stix_obj):
"""
stix object's can have a list of granular marking references
STIX object's can have a list of granular marking references
granular_markings properties:
granular_markings.marking_ref (id)
granular_markings.selectors (string)
granular-marking properties:
granular-marking.marking_ref (id)
granular-marking.selectors (string)
"""
for gm in stix_obj["granular_markings"]:
# grab gm property name from filter field

View File

@ -12,16 +12,15 @@ TODO: Test everything
import json
import os
from stix2.sources import DataSink, DataSource, DataStore, make_id
from stix2 import Bundle
from stix2.sources import DataSink, DataSource, DataStore
class FileSystemStore(DataStore):
"""
"""
def __init__(self, stix_dir="stix_data", name="FileSystemStore"):
self.name = name
self.id = make_id()
super(FileSystemStore, self).__init__(name=name)
self.source = FileSystemSource(stix_dir=stix_dir)
self.sink = FileSystemSink(stix_dir=stix_dir)
@ -94,8 +93,11 @@ class FileSystemSource(DataSource):
def all_versions(self, stix_id, _composite_filters=None):
"""
NOTE: since FileSystem sources/sinks dont handle mutliple verions of a STIX object,
this operation is futile. Pass call to get(). (Appoved by G.B.)
Notes:
Since FileSystem sources/sinks don't handle multiple versions
of a STIX object, this operation is futile. Pass call to get().
(Approved by G.B.)
"""
# query = [
@ -139,7 +141,7 @@ class FileSystemSource(DataSource):
if "type" in [filter_["field"] for filter_ in file_filters]:
for filter_ in file_filters:
if filter_["field"] == "type":
if filter_["op"] == '=':
if filter_["op"] == "=":
include_paths.append(os.path.join(self.stix_dir, filter_["value"]))
elif filter_["op"] == "!=":
declude_paths.append(os.path.join(self.stix_dir, filter_["value"]))
@ -167,8 +169,11 @@ class FileSystemSource(DataSource):
# may forgo the loading of STIX content into memory
if "id" in [filter_["field"] for filter_ in file_filters]:
for filter_ in file_filters:
if filter_["field"] == "id" and filter_["field"] == '=':
if filter_["field"] == "id" and filter_["op"] == "=":
id_ = filter_["value"]
break
else:
id_ = None
else:
id_ = None
@ -188,7 +193,6 @@ class FileSystemSource(DataSource):
all_data.extend(self.apply_common_filters([stix_obj], query))
all_data = self.deduplicate(all_data)
return all_data
def _parse_file_filters(self, query):

View File

@ -11,9 +11,10 @@ TODO: Test everything.
TODO: Use deduplicate() calls only when memory corpus is dirty (been added to)
can save a lot of time for successive queries
NOTE: Not worrying about STIX versioning. The in memory STIX data at anytime
will only hold one version of a STIX object. As such, when save() is called,
the single versions of all the STIX objects are what is written to file.
Notes:
Not worrying about STIX versioning. The in memory STIX data at anytime
will only hold one version of a STIX object. As such, when save() is called,
the single versions of all the STIX objects are what is written to file.
"""
@ -21,7 +22,7 @@ import json
import os
from stix2 import Bundle
from stix2.sources import DataSink, DataSource, DataStore, make_id
from stix2.sources import DataSink, DataSource, DataStore
from stix2validator import validate_string
@ -30,12 +31,13 @@ class MemoryStore(DataStore):
"""
def __init__(self, stix_data=None, name="MemoryStore"):
"""
Note: It doesnt make sense to create a MemoryStore by passing
in existing MemorySource and MemorySink because there could
be data concurrency issues. Just as easy to create new MemoryStore.
Notes:
It doesn't make sense to create a MemoryStore by passing
in existing MemorySource and MemorySink because there could
be data concurrency issues. Just as easy to create new MemoryStore.
"""
self.name = name
self.id = make_id()
super(MemoryStore, self).__init__(name=name)
self.data = {}
if stix_data:
@ -46,7 +48,6 @@ class MemoryStore(DataStore):
# make dictionary of the objects for easy lookup
if r.is_valid:
for stix_obj in stix_data["objects"]:
self.data[stix_obj["id"]] = stix_obj
else:
print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator")
@ -73,16 +74,16 @@ class MemoryStore(DataStore):
class MemorySink(DataSink):
"""
"""
def __init__(self, stix_data=None, name="MemorySink", _store=False):
"""
Args:
data (dictionary OR list): valid STIX 2.0 content in bundle or a list
stix_data (dictionary OR list): valid STIX 2.0 content in
bundle or a list.
name (string): optional name tag of the data source
_store (bool): if the MemorySink is a part of a DataStore, in which case
"stix_data" is a direct reference to shared memory with DataSource
_store (bool): if the MemorySink is a part of a DataStore,
in which case "stix_data" is a direct reference to
shared memory with DataSource.
"""
super(MemorySink, self).__init__(name=name)
@ -152,11 +153,12 @@ class MemorySource(DataSource):
def __init__(self, stix_data=None, name="MemorySource", _store=False):
"""
Args:
data (dictionary OR list): valid STIX 2.0 content in bundle or list
name (string): optional name tag of the data source
_store (bool): if the MemorySource is a part of a DataStore, in which case
"stix_data" is a direct reference to shared memory with DataSink
stix_data (dictionary OR list): valid STIX 2.0 content in
bundle or list.
name (string): optional name tag of the data source.
_store (bool): if the MemorySource is a part of a DataStore,
in which case "stix_data" is a direct reference to shared
memory with DataSink.
"""
super(MemorySource, self).__init__(name=name)
@ -167,7 +169,7 @@ class MemorySource(DataSource):
self.data = {}
if stix_data:
if type(stix_data) == dict:
# stix objects are in a bundle
# STIX objects are in a bundle
# verify STIX json data
r = validate_string(json.dumps(stix_data))
# make dictionary of the objects for easy lookup
@ -179,7 +181,7 @@ class MemorySource(DataSource):
print(r)
self.data = {}
elif type(stix_data) == list:
# stix objects are in a list
# STIX objects are in a list
for stix_obj in stix_data:
r = validate_string(json.dumps(stix_obj))
if r.is_valid:
@ -219,8 +221,11 @@ class MemorySource(DataSource):
def all_versions(self, stix_id, _composite_filters=None):
"""
NOTE: since Memory sources/sinks dont handle mutliple verions of a STIX object,
this operation is futile. Translate call to get(). (Appoved by G.B.)
Notes:
Since Memory sources/sinks don't handle multiple versions of a
STIX object, this operation is futile. Translate call to get().
(Approved by G.B.)
"""
# query = [
@ -237,9 +242,7 @@ class MemorySource(DataSource):
def query(self, query=None, _composite_filters=None):
"""
"""
if query is None:
query = []
@ -250,7 +253,7 @@ class MemorySource(DataSource):
query.extend(_composite_filters)
# deduplicate data before filtering -> Deduplication is not required as Memory only ever holds one version of an object
# all_data = self.depuplicate(all_data)
# all_data = self.deduplicate(all_data)
# apply STIX common property filters
all_data = self.apply_common_filters(self.data.values(), query)

View File

@ -11,7 +11,6 @@ TODO: Test everything
"""
import json
import uuid
from stix2.sources import DataSink, DataSource, DataStore, make_id
@ -27,8 +26,8 @@ class TAXIICollectionStore(DataStore):
Args:
collection (taxii2.Collection): Collection instance
"""
"""
self.name = name
self.id = make_id()
self.source = TAXIICollectionSource(collection)
@ -38,7 +37,6 @@ class TAXIICollectionStore(DataStore):
class TAXIICollectionSink(DataSink):
"""
"""
def __init__(self, collection, name="TAXIICollectionSink"):
super(TAXIICollectionSink, self).__init__(name=name)
@ -51,7 +49,7 @@ class TAXIICollectionSink(DataSink):
@staticmethod
def create_bundle(objects):
return dict(id="bundle--" + str(uuid.uuid4()),
return dict(id="bundle--%s" % make_id(),
objects=objects,
spec_version="2.0",
type="bundle")
@ -137,15 +135,17 @@ class TAXIICollectionSource(DataSource):
return all_data
def _parse_taxii_filters(self, query):
"""Parse out TAXII filters that the TAXII server can filter on
"""Parse out TAXII filters that the TAXII server can filter on.
For instance
"?match[type]=indicator,sighting" should be in a query dict as follows
{
"field": "type"
"op": "=",
"value": "indicator,sighting"
}
Notes:
For instance - "?match[type]=indicator,sighting" should be in a
query dict as follows:
{
"field": "type"
"op": "=",
"value": "indicator,sighting"
}
Args:
query (list): list of filters to extract which ones are TAXII
@ -154,8 +154,8 @@ class TAXIICollectionSource(DataSource):
Returns:
params (dict): dict of the TAXII filters but in format required
for 'requests.get()'.
"""
"""
params = {}
for filter_ in query:
@ -163,6 +163,6 @@ class TAXIICollectionSource(DataSource):
if filter_["field"] == "added_after":
params[filter_["field"]] = filter_["value"]
else:
taxii_field = "match[" + filter_["field"] + ']'
taxii_field = "match[%s]" % filter_["field"]
params[taxii_field] = filter_["value"]
return params

View File

@ -74,7 +74,7 @@ def test_parse_taxii_filters():
assert taxii_filters == expected_params
@pytest.skip
@pytest.mark.skip(reason="test_add_get_remove_filter() - Determine what are we testing.")
def test_add_get_remove_filter():
# First 3 filters are valid, remaining fields are erroneous in some way