cti-python-stix2/stix2/sources/__init__.py

687 lines
22 KiB
Python
Raw Normal View History

2017-05-24 17:25:40 +02:00
'''
Python STIX 2.0 Composite Data Source and Data Source (classes)
---TODO/Questions---
-Test everything
-add_filter(), remove_filter(), deduplicate() - if these functions remain the exact same for
both CompositeDataSource and DataSource, they just inherit/have module access to
'''
import abc
import copy
import uuid
def make_id():
str(uuid.uuid4())
# STIX 2.0 fields used to denote object version
STIX_VERSION_FIELDS = ['id', 'modified']
# currently, only STIX 2.0 common SDO fields (that are not compex objects) are supported for filtering on
STIX_COMMON_FIELDS = [
'type',
'id',
'created_by_ref',
'created',
'modified',
'revoked',
'labels',
# 'external_references', #list of external references object type - not supported for filtering
'object_references',
'object_marking_refs',
'granular_marking_refs',
# 'granular_markings' #list of granular-marking type - not supported for filtering
]
# Required fields in filter(dict)
FILTER_FIELDS = ['field', 'op', 'value']
# Supported filter operations
FILTER_OPS = ['=', '!=', 'in', '>', '<', '>=', '<=']
# Supported filter value types
FILTER_VALUE_TYPES = [bool, dict, float, int, list, str, tuple]
class CompositeDataSource(object):
'''Composite Data Source
Acts as a controller for all the defined/configured STIX Data Sources
e.g. a user can defined n Data Sources - creating Data Source (objects)
for each. There is only one instance of this for any python STIX 2.0 application
'''
def __init__(self, name="CompositeDataSource"):
'''
Creates a new STIX Data Source.
Args:
'data_sources' (dict): a dict of DataSource objects; to be controlled and used by
the Data Source Controller object
filters :
name :
Returns:
'''
self.id_ = make_id()
self.name = name
self.data_sources = {}
self.filters = {}
self.filter_allowed = {}
def get(self, id_):
'''retrieve STIX object by 'id'
federated retrieve method-iterates through all STIX data sources
defined in the "data_sources" parameter. Each data source has a
specific API retrieve-like function and associated parameters. This
function does a federated retrieval and consolidation of the data
returned from all the STIX data sources.
note: a composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
Args:
id_ (str): the id of the STIX object to retrieve
Returns:
stix_obj (dict): the STIX object to be returned
'''
all_data = []
# for every configured Data Source, call its retrieve handler
for ds_id, ds in self.data_sources.iteritems():
data = ds.get(id_=id_, _composite_filters=self.filters.values())
all_data += data
# remove duplicate versions
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
# reduce to most recent version
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
return stix_obj
def all_versions(self, id_):
'''retrieve STIX objects by 'id'
Federated all_versions retrieve method - iterates through all STIX data
sources defined in "data_sources"
note: a composite data source will pass its attached filters to
each configured data source, pushing filtering to them to handle
Args:
id_ (str): id of the STIX objects to retrieve
Returns:
all_data (list): list of STIX objects that have the specified id
'''
all_data = []
# retrieve STIX objects from all configured data sources
for ds_id, ds in self.data_sources.iteritems():
data = ds.all_versions(id_=id_, _composite_filters=self.filters.values())
all_data += data
# remove exact duplicates (where duplicates are STIX 2.0 objects with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
return all_data
def query(self, query=None):
'''composite data source query
Federate the query to all Data Sources attached
to the Composite Data Source
Args:
query (list): list of filters to search on
Returns:
all_data (list): list of STIX objects to be returned
'''
if not query:
query = []
all_data = []
# federate query to all attached data sources, pass composite filters to them
for ds_id, ds in self.data_sources.iteritems():
data = ds.query(query=query, _composite_filters=self.filters.values())
all_data += data
# remove exact duplicates (where duplicates are STIX 2.0 objects with the same 'id' and 'modified' values)
if len(all_data) > 0:
all_data = self.deduplicate(all_data)
return all_data
def add_data_source(self, data_sources):
'''add/attach Data Source to the Composite Data Source instance
Args:
data_sources (list): a list of Data Source objects to attach to the Composite Data Source
Returns:
'''
for ds in data_sources:
if issubclass(ds, DataSource):
if self.data_sources[ds['id']] in self.data_sources.keys():
# data source already attached to Composite Data Source
continue
# add data source to Composite Data Source (its id will be its key identifier)
self.data_sources[ds['id']] = ds
else:
# the Data Source object is not a proper subclass of DataSource Abstract Class
# TODO: maybe log error?
continue
return
def remove_data_source(self, data_source_ids):
'''remove/detach Data Source from the Composite Data Source instance
Args:
data_source_ids (list): a list of Data Source id's( which are strings )
Returns:
'''
for id_ in data_source_ids:
try:
if self.data_sources[id_]:
del self.data_sources[id_]
except KeyError:
# Data Source 'id' was not found in CompositeDataSource's list of data sources
pass
return
def get_data_sources(self):
'''return all attached Data Sources
TODO: Make this a property?
Args:
Returns:
'''
return copy.deepcopy(self.data_sources.values())
def add_filter(self, filters):
'''add/attach a filter to the Composite Data Source instance
Args:
filters (list): list of filters (dict) to add to the Data Source
Returns:
status (list): list of status/error messages
'''
status = []
errors = []
ids = []
allowed = True
for filter_ in filters:
# check required filter components ("field", "op", "value") exist
for field in FILTER_FIELDS:
if field not in filter_.keys():
allowed = False
errors.append("Filter was missing a required field(key). Each filter requires 'field', 'op', 'value' keys.")
break
if allowed:
#no need for further checks if filter is missing parameters
# check filter field is a supported STIX 2.0 common field
if filter_['field'] not in STIX_COMMON_FIELDS:
allowed = False
errors.append("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
2017-05-24 17:25:40 +02:00
# check filter operator is supported
if filter_['op'] not in FILTER_OPS:
allowed = False
errors.append("Filter operation(from 'op' field) not supported")
2017-05-24 17:25:40 +02:00
# check filter value type is supported
if type(filter_['value']) not in FILTER_VALUE_TYPES:
allowed = False
errors.append("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
2017-05-24 17:25:40 +02:00
'''
Filter is added regardless of whether it fits requirements
to be a common filter. This is done because some filters
may be added and used by third party Data Sources, where
the filtering may be conducted within those plugins, just not here
'''
id_ = make_id()
filter_['id'] = id_
self.filters['id_'] = filter_
ids.append(id_)
if allowed:
self.filter_allowed[id_] = True
status.append({
"status": "added as a common filter",
"filter": filter_,
"data_source_name": self.name,
"data_source_id": self.id_
})
else:
self.filter_allowed[id_] = False
status.append({
"status": "added but is not a common filter",
"filter": filter_,
"errors": errors,
2017-05-24 17:25:40 +02:00
"data_source_name": self.name,
"data_source_id": self.id_
})
del errors[:]
allowed = True
return ids, status
def remove_filter(self, filter_ids):
'''remove/detach a filter from the Data Source instance
Args:
filter_ids (list): list of filter id's (which are strings)
dettach from the Composite Data Source
Returns:
'''
for filter_id in filter_ids:
try:
if filter_id in self.filters:
del self.filters[filter_id]
del self.filter_allowed[filter_id]
except KeyError:
# filter id not found in list of filters attached to the Composite Data Source
pass
return
def get_filters(self):
'''return filters attached to Composite Data Source
Args:
Returns:
(list): the list of filters currently attached to the Data Source
'''
return copy.deepcopy(list(self.filters.values()))
def deduplicate(self, stix_obj_list):
'''deduplicate a list fo STIX objects to a unique set
Reduces a set of STIX objects to unique set by looking
at 'id' and 'modified' fields - as a unique object version is determined
by the combination of those fields
Args:
stix_obj_list (list): list of STIX objects (dicts)
Returns:
(list): unique set of the passed list of STIX objects
'''
unique = []
dont_have = False
for i in stix_obj_list:
dont_have = False
for j in unique:
for field in STIX_VERSION_FIELDS:
if not i[field] == j[field]:
dont_have = True
break
if dont_have:
unique.append(i)
return unique
class DataSource(object):
'''
Abstract Data Source class for STIX 2.0
An implementer will create a concrete subclass from
this abstract class for the specific data source.
The purpose of the concrete subclasses is to then
supply them to a Composite Data Source which calls
the subclass methods when conducting STIX 2.0
data retrievals.
'''
__metaclass__ = abc.ABCMeta
def __init__(self, name="DataSource"):
self.name = name
self.id_ = make_id()
self.filters = {}
self.filter_allowed = {}
@abc.abstractmethod
def get(self, id_, _composite_filters=None):
'''
Fill:
-implement the specific data source API calls, processing, functionality
requried for retrieving data from the data source
Args:
id (str): the id of the STIX 2.0 object to retrieve. Should return a single object,
the most recent version of the object specified by the "id".
_composite_filters (list): list of filters passed along from the Composite Data Filter
Returns:
stix_obj (dictionary): the STIX object to be returned
'''
2017-05-25 19:31:45 +02:00
raise NotImplementedError
2017-05-24 17:25:40 +02:00
@abc.abstractmethod
def all_versions(self, id_, _composite_filters=None):
'''
Fill:
-Similar to get() except returns list of all object versions of the specified "id".
-implement the specific data source API calls, processing, functionality
requried for retrieving data from the data source
Args:
id (str): The id of the STIX 2.0 object to retrieve. Should return a list of objects,
all the versions of the object specified by the "id".
_composite_filters (list): list of filters passed from the Composite Data Source
Returns:
stix_objs (list): a list of STIX objects(where each object is a STIX object)
'''
stix_objs = []
return stix_objs
@abc.abstractmethod
def query(self, query, _composite_filters=None):
'''
Fill:
-implement the specific data source API calls, processing, functionality
requried for retrieving query from the data source
Args:
query (list): a list of filters (which collectively are the query) to conduct search on
_composite_filters (list): a list of filters passed from the Composite Data Source
Returns:
'''
stix_objs = []
return stix_objs
@abc.abstractmethod
def close(self):
'''
Fill:
Close, release, shutdown any objects, contexts, variables
Args:
Returns:
(list): list of status/error messages
'''
status = []
return status
def add_filter(self, filters):
'''add/attach a filter to the Data Source instance
Args:
filters (list): list of filters (dict) to add to the Data Source
Returns:
status (list): list of status/error messages
'''
status = []
errors = []
ids = []
allowed = True
for filter_ in filters:
# check required filter components ("field", "op", "value") exist
for field in FILTER_FIELDS:
if field not in filter_.keys():
allowed = False
errors.append("Filter was missing a required field(key). Each filter requires 'field', 'op', 'value' keys.")
break
if allowed:
#no reason for further checks if missing filter parameters
2017-05-24 17:25:40 +02:00
# check filter field is a supported STIX 2.0 common field
if filter_['field'] not in STIX_COMMON_FIELDS:
allowed = False
errors.append("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported")
2017-05-24 17:25:40 +02:00
# check filter operator is supported
if filter_['op'] not in FILTER_OPS:
allowed = False
errors.append("Filter operation(from 'op' field) not supported")
2017-05-24 17:25:40 +02:00
# check filter value type is supported
if type(filter_['value']) not in FILTER_VALUE_TYPES:
allowed = False
errors.append("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary")
2017-05-24 17:25:40 +02:00
'''
Filter is added regardless of whether it fits requirements
to be a common filter. This is done because some filters
may be added and used by third party Data Sources, where
the filtering may be conducted within those plugins, just not here
'''
id_ = make_id()
filter_['id'] = id_
self.filters[id_] = filter_
ids.append(id_)
if allowed:
self.filter_allowed[id_] = True
status.append({
"status": "added as a common filter",
"filter": filter_,
"data_source_name": self.name,
"data_source_id": self.id_
})
else:
self.filter_allowed[id_] = False
status.append({
"status": "added but is not a common filter",
"filter": filter_,
"errors": copy.deepcopy(errors),
"data_source_name": self.name,
"data_source_id": self.id_
})
del errors[:]
allowed = True
return ids, status
def remove_filter(self, filter_ids):
'''remove/detach a filter from the Data Source instance
Args:
filter_ids (list): list of filter ids to dettach/remove from Data Source
Returns:
'''
for filter_id in filter_ids:
try:
if filter_id in self.filters:
del self.filters[filter_id]
del self.filter_allowed[filter_id]
except KeyError:
# filter 'id' not found list of filters attached to Data Source
pass
return
def get_filters(self):
'''return copy of all filters currently attached to Data Source
TODO: make this a property?
Returns:
(list): a copy of all the filters(dict) which are attached to Data Source
'''
return copy.deepcopy(list(self.filters.values()))
def apply_common_filters(self, stix_objs, query):
'''evaluates filters against a set of STIX 2.0 objects
Supports only STIX 2.0 common property fields
Args:
stix_objs (list): list of STIX objects to apply the query to
query (list): list of filters (combined form complete query)
Returns:
(list): list of STIX objects that successfully evaluate against the query
'''
filtered_stix_objs = []
# evaluate objects against filter
for stix_obj in stix_objs:
clean = True
for filter_ in query:
# skip filter as filter was identified (when added) as not a common filter
if 'id' in filter_ and self.filter_allowed[filter_['id']] is False:
continue
# check filter "field" is in STIX object - if cant be applied due to STIX object,
# STIX object is discarded (i.e. did not make it through the filter)
if filter_['field'] not in stix_obj.keys():
break
try:
if filter_['op'] == '=':
if not stix_obj[filter_['field']] == filter_['value']:
clean = False
break
elif filter_['op'] == "!=":
if not stix_obj[filter_['field']] != filter_['value']:
clean = False
break
elif filter_['op'] == "in":
if not stix_obj[filter_['field']] in filter_['value']:
clean = False
break
else:
# filter operation not supported
continue
'''
#TODO: I think the rest of the operations only
#apply to timestamps, in which case I dont think
#simple operator usage (like below) works
elif filter_['op'] == ">":
if not stix_obj[filter_['field']] > filter_['value']:
clean = False
break
elif filter_['op'] == "<":
if not stix_obj[filter_['field']] < filter_['value']:
clean = False
break
elif filter_['op'] == ">=":
if not stix_obj[filter_['field']] >= filter_['value']:
clean = False
break
elif filter_['op'] == "<=":
if not stix_obj[filter_['field']] <= filter_['value']:
clean = False
break
'''
except TypeError:
# type mismatch of comparison operands - ignore filter, no error raised for now
pass
# if object unmarked after all filter, add it
if clean:
filtered_stix_objs.append(stix_obj)
clean = True
return filtered_stix_objs
def deduplicate(self, stix_obj_list):
'''deduplicate a list of STIX objects into a unique set
reduces a set of STIX objects to unique set by looking
at 'id' and 'modified' fields - as a unique object version
is determined by the combination of those fields
Args:
stix_obj_list (list): list of STIX objects (dicts)
Returns:
(list): a unique set of the passed STIX object list
'''
unique = []
have = False
for i in stix_obj_list:
for j in unique:
if i['id'] == j['id'] and i['modified'] == j['modified']:
have = True
break
if not have:
unique.append(i)
have = False
return unique