cti-python-stix2/stix2/datastore/taxii.py

327 lines
13 KiB
Python
Raw Normal View History

2017-07-12 16:58:31 +02:00
"""
Python STIX 2.x TAXIICollectionStore
2017-07-12 16:58:31 +02:00
"""
from requests.exceptions import HTTPError
2017-05-24 17:25:40 +02:00
from stix2.base import _STIXBase
from stix2.core import Bundle, parse
from stix2.datastore import DataSink, DataSource, DataStoreMixin
2018-04-11 19:36:52 +02:00
from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
from stix2.utils import deduplicate
2017-07-12 16:58:31 +02:00
TAXII_FILTERS = ['added_after', 'id', 'type', 'version']
2017-05-24 17:25:40 +02:00
class TAXIICollectionStore(DataStoreMixin):
"""Provides an interface to a local/remote TAXII Collection
of STIX data. TAXIICollectionStore is a wrapper
around a paired TAXIICollectionSink and TAXIICollectionSource.
2017-07-12 16:58:31 +02:00
Args:
collection (taxii2.Collection): TAXII Collection instance
2017-11-29 18:03:10 +01:00
allow_custom (bool): whether to allow custom STIX content to be
pushed/retrieved. Defaults to True for TAXIICollectionSource
side(retrieving data) and False for TAXIICollectionSink
side(pushing data). However, when parameter is supplied, it will
be applied to both TAXIICollectionSource/Sink.
"""
2017-11-29 18:03:10 +01:00
def __init__(self, collection, allow_custom=None):
2018-02-26 22:56:24 +01:00
if allow_custom is None:
2017-11-29 18:03:10 +01:00
allow_custom_source = True
allow_custom_sink = False
else:
allow_custom_sink = allow_custom_source = allow_custom
super(TAXIICollectionStore, self).__init__(
2017-11-29 18:03:10 +01:00
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source),
sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink)
)
2017-07-12 16:58:31 +02:00
class TAXIICollectionSink(DataSink):
"""Provides an interface for pushing STIX objects to a local/remote
TAXII Collection endpoint.
Args:
collection (taxii2.Collection): TAXII2 Collection instance
2017-11-29 18:03:10 +01:00
allow_custom (bool): Whether to allow custom STIX content to be
added to the TAXIICollectionSink. Default: False
2017-07-12 16:58:31 +02:00
"""
2017-11-29 18:03:10 +01:00
def __init__(self, collection, allow_custom=False):
super(TAXIICollectionSink, self).__init__()
try:
# we have to execute .can_write first in isolation because the
# attribute access could trigger a taxii2client.ValidationError which
# we catch here as a ValueError (its parent class). Later, we need to
# have the ability to also raise a different ValueError based on the
# value of .can_write
writeable = collection.can_write
except (HTTPError, ValueError) as e:
e.message = ("The underlying TAXII Collection resource defined in the supplied TAXII"
" Collection object provided could not be reached. TAXII Collection Error: "
+ e.message)
raise
if writeable:
# now past taxii2client possible exceptions, check value for local exceptions
self.collection = collection
else:
raise ValueError("The TAXII Collection object provided does not have write access"
" to the underlying linked Collection resource")
2017-11-29 18:03:10 +01:00
self.allow_custom = allow_custom
2017-07-12 16:58:31 +02:00
2017-11-29 18:03:10 +01:00
def add(self, stix_data, version=None):
"""Add/push STIX content to TAXII Collection endpoint
Args:
stix_data (STIX object OR dict OR str OR list): valid STIX 2.0 content
in a STIX object (or Bundle), STIX onject dict (or Bundle dict), or a STIX 2.0
json encoded string, or list of any of the following
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
None, use latest version.
2017-07-12 16:58:31 +02:00
"""
if isinstance(stix_data, _STIXBase):
# adding python STIX object
if stix_data["type"] == "bundle":
bundle = stix_data.serialize(encoding="utf-8")
else:
bundle = Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding="utf-8")
elif isinstance(stix_data, dict):
# adding python dict (of either Bundle or STIX obj)
if stix_data["type"] == "bundle":
bundle = parse(stix_data, allow_custom=self.allow_custom, version=version).serialize(encoding="utf-8")
else:
bundle = Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding="utf-8")
elif isinstance(stix_data, list):
# adding list of something - recurse on each
for obj in stix_data:
2017-11-29 18:03:10 +01:00
self.add(obj, version=version)
return
elif isinstance(stix_data, str):
# adding json encoded string of STIX content
2017-11-29 18:03:10 +01:00
stix_data = parse(stix_data, allow_custom=self.allow_custom, version=version)
if stix_data["type"] == "bundle":
bundle = stix_data.serialize(encoding="utf-8")
else:
bundle = Bundle(stix_data, allow_custom=self.allow_custom).serialize(encoding="utf-8")
else:
raise TypeError("stix_data must be as STIX object(or list of),json formatted STIX (or list of), or a json formatted STIX bundle")
2017-05-24 17:25:40 +02:00
self.collection.add_objects(bundle)
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
class TAXIICollectionSource(DataSource):
"""Provides an interface for searching/retrieving STIX objects
from a local/remote TAXII Collection endpoint.
Args:
collection (taxii2.Collection): TAXII Collection instance
2017-11-29 18:03:10 +01:00
allow_custom (bool): Whether to allow custom STIX content to be
added to the FileSystemSink. Default: True
2017-07-12 16:58:31 +02:00
"""
2017-11-29 18:03:10 +01:00
def __init__(self, collection, allow_custom=True):
super(TAXIICollectionSource, self).__init__()
try:
# we have to execute .can_read first in isolation because the
# attribute access could trigger a taxii2client.ValidationError which
# we catch here as a ValueError (its parent class). Later, we need to
# have the ability to also raise a different ValueError based on the
# value of .can_read
writeable = collection.can_read
except (HTTPError, ValueError) as e:
e.message = ("The underlying TAXII Collection resource defined in the supplied TAXII"
" Collection object provided could not be reached. TAXII Collection Error: "
+ e.message)
raise
if writeable:
# now past taxii2client possible exceptions, check value for local exceptions
self.collection = collection
else:
raise ValueError("The TAXII Collection object provided does not have read access"
" to the underlying linked Collection resource")
2017-11-29 18:03:10 +01:00
self.allow_custom = allow_custom
2017-07-12 16:58:31 +02:00
2017-11-29 18:03:10 +01:00
def get(self, stix_id, version=None, _composite_filters=None):
"""Retrieve STIX object from local/remote STIX Collection
endpoint.
Args:
stix_id (str): The STIX ID of the STIX object to be retrieved.
2018-04-11 19:36:52 +02:00
_composite_filters (FilterSet): collection of filters passed from the parent
CompositeDataSource, not user supplied
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
None, use latest version.
Returns:
(STIX object): STIX object that has the supplied STIX ID.
The STIX object is received from TAXII has dict, parsed into
a python STIX object and then returned
2017-07-12 16:58:31 +02:00
"""
# combine all query filters
2018-04-11 19:36:52 +02:00
query = FilterSet()
if self.filters:
2018-04-11 19:36:52 +02:00
query.add(self.filters)
2017-07-12 16:58:31 +02:00
if _composite_filters:
2018-04-11 19:36:52 +02:00
query.add(_composite_filters)
2017-05-24 17:25:40 +02:00
# dont extract TAXII filters from query (to send to TAXII endpoint)
# as directly retrieveing a STIX object by ID
try:
stix_objs = self.collection.get_object(stix_id)["objects"]
stix_obj = list(apply_common_filters(stix_objs, query))
2017-05-24 17:25:40 +02:00
except HTTPError as err:
if err.response.status_code == 404:
# if resource not found or access is denied from TAXII server, return None
stix_obj = []
else:
raise
2017-05-24 17:25:40 +02:00
if len(stix_obj):
2017-11-29 18:03:10 +01:00
stix_obj = parse(stix_obj[0], allow_custom=self.allow_custom, version=version)
if stix_obj.id != stix_id:
# check - was added to handle erroneous TAXII servers
stix_obj = None
2017-07-12 16:58:31 +02:00
else:
stix_obj = None
2017-05-24 17:25:40 +02:00
return stix_obj
2017-05-24 17:25:40 +02:00
2017-11-29 18:03:10 +01:00
def all_versions(self, stix_id, version=None, _composite_filters=None):
"""Retrieve STIX object from local/remote TAXII Collection
endpoint, all versions of it
Args:
stix_id (str): The STIX ID of the STIX objects to be retrieved.
2018-04-11 19:36:52 +02:00
_composite_filters (FilterSet): collection of filters passed from the parent
CompositeDataSource, not user supplied
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
None, use latest version.
Returns:
(see query() as all_versions() is just a wrapper)
2017-05-26 21:24:33 +02:00
"""
2017-05-24 17:25:40 +02:00
# make query in TAXII query format since 'id' is TAXII field
query = [
Filter("id", "=", stix_id),
Filter("version", "=", "all")
2017-05-24 17:25:40 +02:00
]
2017-11-29 18:03:10 +01:00
all_data = self.query(query=query, _composite_filters=_composite_filters)
2017-05-24 17:25:40 +02:00
# parse STIX objects from TAXII returned json
2017-11-29 18:03:10 +01:00
all_data = [parse(stix_obj, allow_custom=self.allow_custom, version=version) for stix_obj in all_data]
# check - was added to handle erroneous TAXII servers
all_data_clean = [stix_obj for stix_obj in all_data if stix_obj.id == stix_id]
return all_data_clean
2017-05-24 17:25:40 +02:00
2017-11-29 18:03:10 +01:00
def query(self, query=None, version=None, _composite_filters=None):
"""Search and retreive STIX objects based on the complete query
A "complete query" includes the filters from the query, the filters
attached to MemorySource, and any filters passed from a
CompositeDataSource (i.e. _composite_filters)
Args:
query (list): list of filters to search on
2018-04-11 19:36:52 +02:00
_composite_filters (FilterSet): collection of filters passed from the
CompositeDataSource, not user supplied
version (str): Which STIX2 version to use. (e.g. "2.0", "2.1"). If
None, use latest version.
Returns:
(list): list of STIX objects that matches the supplied
query. The STIX objects are received from TAXII as dicts,
parsed into python STIX objects and then returned.
2017-05-26 21:24:33 +02:00
"""
2018-04-11 19:36:52 +02:00
query = FilterSet(query)
2017-05-24 17:25:40 +02:00
# combine all query filters
if self.filters:
2018-04-11 19:36:52 +02:00
query.add(self.filters)
2017-05-24 17:25:40 +02:00
if _composite_filters:
2018-04-11 19:36:52 +02:00
query.add(_composite_filters)
2017-05-24 17:25:40 +02:00
# parse taxii query params (that can be applied remotely)
2017-05-24 17:25:40 +02:00
taxii_filters = self._parse_taxii_filters(query)
# taxii2client requires query params as keywords
taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
2017-07-12 16:58:31 +02:00
# query TAXII collection
try:
all_data = self.collection.get_objects(**taxii_filters_dict)["objects"]
2017-05-24 17:25:40 +02:00
# deduplicate data (before filtering as reduces wasted filtering)
all_data = deduplicate(all_data)
2017-05-24 17:25:40 +02:00
# a pply local (CompositeDataSource, TAXIICollectionSource and query) filters
query.remove(taxii_filters)
all_data = list(apply_common_filters(all_data, query))
except HTTPError as err:
# if resources not found or access is denied from TAXII server, return empty list
if err.response.status_code == 404:
err.message = ("The requested STIX objects for the TAXII Collection resource defined in"
" the supplied TAXII Collection object is either not found or access is"
" denied. Received error: " + err.message)
raise
2017-05-24 17:25:40 +02:00
# parse python STIX objects from the STIX object dicts
2017-11-29 18:03:10 +01:00
stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]
return stix_objs
2017-05-24 17:25:40 +02:00
def _parse_taxii_filters(self, query):
"""Parse out TAXII filters that the TAXII server can filter on
Does not put in TAXII spec format as the TAXII2Client (that we use)
does this for us.
Notes:
Currently, the TAXII2Client can handle TAXII filters where the
filter value is list, as both a comma-seperated string or python list
2017-05-24 17:25:40 +02:00
For instance - "?match[type]=indicator,sighting" can be in a
filter in any of these formats:
Filter("type", "<any op>", "indicator,sighting")
Filter("type", "<any op>", ["indicator", "sighting"])
2017-05-24 17:25:40 +02:00
Args:
2018-04-11 19:36:52 +02:00
query (list): list of filters to extract which ones are TAXII
2017-05-26 21:24:33 +02:00
specific.
2017-05-24 17:25:40 +02:00
Returns:
A list of TAXII filters that meet the TAXII filtering parameters.
2017-05-24 17:25:40 +02:00
"""
2018-04-11 19:36:52 +02:00
taxii_filters = []
2017-05-24 17:25:40 +02:00
2017-07-12 16:58:31 +02:00
for filter_ in query:
if filter_.property in TAXII_FILTERS:
2018-04-11 19:36:52 +02:00
taxii_filters.append(filter_)
return taxii_filters