Data* suite reorg, fixing bugs
parent
20958b908a
commit
9d72d60706
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,256 @@
|
|||
"""
|
||||
Python STIX 2.0 FileSystem Source/Sink
|
||||
|
||||
Classes:
|
||||
FileSystemStore
|
||||
FileSystemSink
|
||||
FileSystemSource
|
||||
|
||||
TODO: Test everything
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from sources import DataSink, DataSource, DataStore, make_id
|
||||
from stix2 import Bundle
|
||||
|
||||
|
||||
class FileSystemStore(DataStore):
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__(self, stix_dir="stix_data", source=None, sink=None, name="FileSystemStore"):
|
||||
self.name = name
|
||||
self.id = make_id()
|
||||
|
||||
if source:
|
||||
self.source = source
|
||||
else:
|
||||
self.source = FileSystemSource(stix_dir=stix_dir)
|
||||
|
||||
if sink:
|
||||
self.sink = sink
|
||||
else:
|
||||
self.sink = FileSystemSink(stix_dir=stix_dir)
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return self.source
|
||||
|
||||
@source.setter
|
||||
def source(self, source):
|
||||
self.source = source
|
||||
|
||||
@property
|
||||
def sink(self):
|
||||
return self.sink
|
||||
|
||||
@sink.setter
|
||||
def sink(self, sink):
|
||||
self.sink = sink
|
||||
|
||||
# file system sink API calls
|
||||
|
||||
def add(self, stix_objs):
|
||||
return self.sink.add(stix_objs=stix_objs)
|
||||
|
||||
def remove(self, stix_ids):
|
||||
return self.sink.remove(stix_ids=stix_ids)
|
||||
|
||||
# file sytem source API calls
|
||||
|
||||
def get(self, stix_id):
|
||||
return self.source.get(stix_id=stix_id)
|
||||
|
||||
def all_versions(self, stix_id):
|
||||
return self.source.all_versions(stix_id=stix_id)
|
||||
|
||||
def query(self, query):
|
||||
return self.source.query(query=query)
|
||||
|
||||
|
||||
class FileSystemSink(DataSink):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, stix_dir="stix_data", name="FileSystemSink"):
|
||||
super(FileSystemSink, self).__init__(name=name)
|
||||
self.stix_dir = os.path.abspath(stix_dir)
|
||||
|
||||
# check directory path exists
|
||||
if not os.path.exists(self.stix_dir):
|
||||
print("Error: directory path for STIX data does not exist")
|
||||
|
||||
@property
|
||||
def stix_dir(self):
|
||||
return self.stix_dir
|
||||
|
||||
@stix_dir.setter
|
||||
def stix_dir(self, dir):
|
||||
self.stix_dir = dir
|
||||
|
||||
def add(self, stix_objs=None):
|
||||
"""
|
||||
Q: bundlify or no?
|
||||
"""
|
||||
if not stix_objs:
|
||||
stix_objs = []
|
||||
for stix_obj in stix_objs:
|
||||
path = os.path.join(self.stix_dir, stix_obj["type"], stix_obj["id"])
|
||||
json.dump(Bundle([stix_obj]), open(path, 'w+', indent=4))
|
||||
|
||||
def remove(self, stix_ids=None):
|
||||
if not stix_ids:
|
||||
stix_ids = []
|
||||
for stix_id in stix_ids:
|
||||
stix_type = stix_id.split("--")[0]
|
||||
try:
|
||||
os.remove(os.path.join(self.stix_dir, stix_type, stix_id))
|
||||
except OSError:
|
||||
# log error? nonexistent object in data with directory
|
||||
continue
|
||||
|
||||
|
||||
class FileSystemSource(DataSource):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, stix_dir="stix_data", name="FileSystemSource"):
|
||||
super(FileSystemSource, self).__init__(name=name)
|
||||
self.stix_dir = os.path.abspath(stix_dir)
|
||||
|
||||
# check directory path exists
|
||||
if not os.path.exists(self.stix_dir):
|
||||
print("Error: directory path for STIX data does not exist")
|
||||
|
||||
@property
|
||||
def stix_dir(self):
|
||||
return self.stix_dir
|
||||
|
||||
@stix_dir.setter
|
||||
def stix_dir(self, dir):
|
||||
self.stix_dir = dir
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""
|
||||
query = [
|
||||
{
|
||||
"field": "id",
|
||||
"op": "=",
|
||||
"value": stix_id
|
||||
}
|
||||
]
|
||||
|
||||
all_data = self.query(query=query, _composite_filters=_composite_filters)
|
||||
|
||||
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
|
||||
|
||||
return stix_obj
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
NOTE: since FileSystem sources/sinks dont handle mutliple verions of a STIX object,
|
||||
this operation is futile. Pass call to get(). (Appoved by G.B.)
|
||||
"""
|
||||
|
||||
# query = [
|
||||
# {
|
||||
# "field": "id",
|
||||
# "op": "=",
|
||||
# "value": stix_id
|
||||
# }
|
||||
# ]
|
||||
|
||||
# all_data = self.query(query=query, _composite_filters=_composite_filters)
|
||||
|
||||
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""
|
||||
"""
|
||||
all_data = []
|
||||
|
||||
if query is None:
|
||||
query = []
|
||||
|
||||
# combine all query filters
|
||||
if self.filters:
|
||||
query.extend(self.filters.values())
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
|
||||
# extract any filters that are for "type" or "id" , as we can then do
|
||||
# filtering before reading in the STIX objects. A STIX 'type' filter
|
||||
# can reduce the query to a single sub-directory. A STIX 'id' filter
|
||||
# allows for the fast checking of the file names versus loading it.
|
||||
file_filters = self._parse_file_filters(query)
|
||||
|
||||
# establish which subdirectories can be avoided in query
|
||||
# by decluding as many as possible. A filter with "type" as the field
|
||||
# means that certain STIX object types can be ruled out, and thus
|
||||
# the corresponding subdirectories as well
|
||||
include_paths = []
|
||||
declude_paths = []
|
||||
if "type" in [filter_['field'] for filter_ in file_filters]:
|
||||
for filter_ in file_filters:
|
||||
if filter_['field'] == "type":
|
||||
if filter_['op'] == '=':
|
||||
include_paths.append(os.path.join(self.stix_dir, filter_['value']))
|
||||
elif filter_['op'] == "!=":
|
||||
declude_paths.append(os.path.join(self.stix_dir, filter_['value']))
|
||||
else:
|
||||
# have to walk entire STIX directory
|
||||
include_paths.append(self.stix_dir)
|
||||
|
||||
# if a user specifies a "type" filter like "type = <stix-object_type>",
|
||||
# the filter is reducing the search space to single stix object types
|
||||
# (and thus single directories). This makes such a filter more powerful
|
||||
# than "type != <stix-object_type>" bc the latter is substracting
|
||||
# only one type of stix object type (and thus only one directory),
|
||||
# As such the former type of filters are given preference over the latter;
|
||||
# i.e. if both exist in a query, that latter type will be ignored
|
||||
|
||||
if not include_paths:
|
||||
# user has specified types that are not wanted (i.e. "!=")
|
||||
# so query will look in all STIX directories that are not
|
||||
# the specified type. Compile correct dir paths
|
||||
for dir_ in os.listdir(self.stix_dir):
|
||||
if os.path.abspath(dir_) not in declude_paths:
|
||||
include_paths.append(os.path.abspath(dir_))
|
||||
|
||||
# grab stix object ID as well - if present in filters, as
|
||||
# may forgo the loading of STIX content into memory
|
||||
if "id" in [filter_['field'] for filter_ in file_filters]:
|
||||
for filter_ in file_filters:
|
||||
if filter_['field'] == 'id' and filter_['field'] == '=':
|
||||
id_ = filter_['value']
|
||||
else:
|
||||
id_ = None
|
||||
|
||||
# now iterate through all STIX objs
|
||||
for path in include_paths:
|
||||
for root, dirs, files in os.walk(path):
|
||||
for file in files:
|
||||
if id_:
|
||||
if id_ == file.split(".")[0]:
|
||||
# since ID is specified in one of filters, can evaluate against filename first without loading
|
||||
stix_obj = json.load(file)['objects']
|
||||
# check against other filters, add if match
|
||||
all_data.extend(self.apply_common_filters([stix_obj], query))
|
||||
else:
|
||||
# have to load into memory regardless to evaluate other filters
|
||||
stix_obj = json.load(file)['objects']
|
||||
all_data.extend(self.apply_common_filters([stix_obj], query))
|
||||
|
||||
all_data = self.deduplicate(all_data)
|
||||
|
||||
return all_data
|
||||
|
||||
def _parse_file_filters(self, query):
|
||||
"""
|
||||
"""
|
||||
file_filters = []
|
||||
for filter_ in query:
|
||||
if filter_['field'] == "id" or filter_['field'] == "type":
|
||||
file_filters.append(filter_)
|
||||
return file_filters
|
|
@ -0,0 +1,268 @@
|
|||
"""
|
||||
Python STIX 2.0 Memory Source/Sink
|
||||
|
||||
Classes:
|
||||
MemoryStore
|
||||
MemorySink
|
||||
MemorySource
|
||||
|
||||
TODO: Test everything.
|
||||
|
||||
TODO: Use deduplicate() calls only when memory corpus is dirty (been added to)
|
||||
can save a lot of time for successive queries
|
||||
|
||||
NOTE: Not worrying about STIX versioning. The in memory STIX data at anytime
|
||||
will only hold one version of a STIX object. As such, when save() is called,
|
||||
the single versions of all the STIX objects are what is written to file.
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
|
||||
from stix2 import Bundle
|
||||
from stix2.sources import DataSink, DataSource, DataStore, make_id
|
||||
from stix2validator import validate_string
|
||||
|
||||
|
||||
class MemoryStore(DataStore):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, stix_data=None, source=None, sink=None, name="MemoryStore"):
|
||||
self.name = name
|
||||
self.id = make_id()
|
||||
|
||||
if source:
|
||||
self.source = source
|
||||
else:
|
||||
self.source = MemorySource(stix_data=stix_data)
|
||||
|
||||
if sink:
|
||||
self.sink = sink
|
||||
else:
|
||||
self.sink = MemorySink(stix_data=stix_data)
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return self.source
|
||||
|
||||
@source.setter
|
||||
def source(self, source):
|
||||
self.source = source
|
||||
|
||||
@property
|
||||
def sink(self):
|
||||
return self.sink
|
||||
|
||||
@sink.setter
|
||||
def sink(self, sink):
|
||||
self.sink = sink
|
||||
|
||||
# memory sink API calls
|
||||
|
||||
def add(self, stix_data):
|
||||
return self.sink.add(stix_data=stix_data)
|
||||
|
||||
def remove(self, stix_ids):
|
||||
return self.sink.remove(stix_ids=stix_ids)
|
||||
|
||||
def save(self):
|
||||
return self.sink.save()
|
||||
|
||||
# memory source API calls
|
||||
|
||||
def get(self, stix_id):
|
||||
return self.source.get(stix_id=stix_id)
|
||||
|
||||
def all_versions(self, stix_id):
|
||||
return self.source.all_versions(stix_id=stix_id)
|
||||
|
||||
def query(self, query):
|
||||
return self.source.query(query=query)
|
||||
|
||||
|
||||
class MemorySink(DataSink):
|
||||
"""
|
||||
|
||||
"""
|
||||
def __init__(self, stix_data=None, name="MemorySink"):
|
||||
"""
|
||||
Args:
|
||||
|
||||
data (dictionary OR list): valid STIX 2.0 content in bundle or a list
|
||||
name (string): optional name tag of the data source
|
||||
|
||||
"""
|
||||
super(MemorySink, self).__init__(name=name)
|
||||
self.data = {}
|
||||
if stix_data:
|
||||
if type(stix_data) == dict:
|
||||
# stix objects are in a bundle
|
||||
# verify STIX json data
|
||||
r = validate_string(json.dumps(stix_data))
|
||||
# make dictionary of the objects for easy lookup
|
||||
if r.is_valid:
|
||||
for stix_obj in stix_data["objects"]:
|
||||
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
else:
|
||||
print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator")
|
||||
print(r)
|
||||
self.data = {}
|
||||
elif type(stix_data) == list:
|
||||
# stix objects are in a list
|
||||
for stix_obj in stix_data:
|
||||
r = validate_string(json.dumps(stix_obj))
|
||||
if r.is_valid:
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
else:
|
||||
print("Error: STIX object %s is not valid under STIX 2 validator.") % stix_obj["id"]
|
||||
print(r)
|
||||
else:
|
||||
raise ValueError("stix_data must be in bundle format or raw list")
|
||||
|
||||
def add(self, stix_data):
|
||||
"""
|
||||
"""
|
||||
if type(stix_data) == dict:
|
||||
# stix data is in bundle
|
||||
r = validate_string(json.dumps(stix_data))
|
||||
if r.is_valid:
|
||||
for stix_obj in stix_data["objects"]:
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
else:
|
||||
print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator")
|
||||
print(r)
|
||||
elif type(stix_data) == list:
|
||||
# stix data is in list
|
||||
for stix_obj in stix_data:
|
||||
r = validate_string(json.dumps(stix_obj))
|
||||
if r.is_valid:
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
else:
|
||||
print("Error: STIX object %s is not valid under STIX 2 validator.") % stix_obj["id"]
|
||||
print(r)
|
||||
else:
|
||||
raise ValueError("stix_data must be in bundle format or raw list")
|
||||
|
||||
def remove(self, stix_ids):
|
||||
"""
|
||||
"""
|
||||
for stix_id in stix_ids:
|
||||
try:
|
||||
del self.data[stix_id]
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def save(self, file_path=None):
|
||||
"""
|
||||
"""
|
||||
if not file_path:
|
||||
file_path = os.path.dirname(os.path.realpath(__file__))
|
||||
json.dump(Bundle(self.data.values()), file_path, indent=4)
|
||||
|
||||
|
||||
class MemorySource(DataSource):
|
||||
|
||||
def __init__(self, stix_data=None, name="MemorySource"):
|
||||
"""
|
||||
Args:
|
||||
|
||||
data (dictionary OR list): valid STIX 2.0 content in bundle or list
|
||||
name (string): optional name tag of the data source
|
||||
|
||||
"""
|
||||
super(MemorySource, self).__init__(name=name)
|
||||
self.data = {}
|
||||
|
||||
if stix_data:
|
||||
if type(stix_data) == dict:
|
||||
# stix objects are in a bundle
|
||||
# verify STIX json data
|
||||
r = validate_string(json.dumps(stix_data))
|
||||
# make dictionary of the objects for easy lookup
|
||||
if r.is_valid:
|
||||
for stix_obj in stix_data["objects"]:
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
else:
|
||||
print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator")
|
||||
print(r)
|
||||
self.data = {}
|
||||
elif type(stix_data) == list:
|
||||
# stix objects are in a list
|
||||
for stix_obj in stix_data:
|
||||
r = validate_string(json.dumps(stix_obj))
|
||||
if r.is_valid:
|
||||
self.data[stix_obj["id"]] = stix_obj
|
||||
else:
|
||||
print("Error: STIX object %s is not valid under STIX 2 validator.") % stix_obj["id"]
|
||||
print(r)
|
||||
else:
|
||||
raise ValueError("stix_data must be in bundle format or raw list")
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""
|
||||
if _composite_filters is None:
|
||||
# if get call is only based on 'id', no need to search, just retrieve from dict
|
||||
try:
|
||||
stix_obj = self.data[stix_id]
|
||||
except KeyError:
|
||||
stix_obj = None
|
||||
return stix_obj
|
||||
|
||||
# if there are filters from the composite level, process full query
|
||||
query = [
|
||||
{
|
||||
"field": "id",
|
||||
"op": "=",
|
||||
"value": stix_id
|
||||
}
|
||||
]
|
||||
|
||||
all_data = self.query(query=query, _composite_filters=_composite_filters)
|
||||
|
||||
# reduce to most recent version
|
||||
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
|
||||
|
||||
return stix_obj
|
||||
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
NOTE: since Memory sources/sinks dont handle mutliple verions of a STIX object,
|
||||
this operation is futile. Translate call to get(). (Appoved by G.B.)
|
||||
"""
|
||||
|
||||
# query = [
|
||||
# {
|
||||
# "field": "id",
|
||||
# "op": "=",
|
||||
# "value": stix_id
|
||||
# }
|
||||
# ]
|
||||
|
||||
# all_data = self.query(query=query, _composite_filters=_composite_filters)
|
||||
|
||||
return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)]
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""
|
||||
|
||||
"""
|
||||
|
||||
if query is None:
|
||||
query = []
|
||||
|
||||
# combine all query filters
|
||||
if self.filters:
|
||||
query.extend(self.filters.values())
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
|
||||
# deduplicate data before filtering -> Deduplication is not required as Memory only ever holds one version of an object
|
||||
# all_data = self.depuplicate(all_data)
|
||||
|
||||
# apply STIX common property filters
|
||||
all_data = self.apply_common_filters(self.data.values(), query)
|
||||
|
||||
return all_data
|
|
@ -1,131 +1,213 @@
|
|||
import requests
|
||||
from requests.auth import HTTPBasicAuth
|
||||
"""
|
||||
Python STIX 2.0 TAXII Source/Sink
|
||||
|
||||
from stix2.sources import DataSource
|
||||
Classes:
|
||||
TAXIICollectionStore
|
||||
TAXIICollectionSink
|
||||
TAXIICollectionSource
|
||||
|
||||
# TODO: -Should we make properties for the TAXIIDataSource address and other
|
||||
# possible variables that are found in "self.taxii_info"
|
||||
TODO: Test everything
|
||||
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
|
||||
from stix2.sources import DataSink, DataSource, DataStore, make_id
|
||||
from taxii2_client import TAXII2Client
|
||||
|
||||
TAXII_FILTERS = ['added_after', 'id', 'type', 'version']
|
||||
|
||||
test = True
|
||||
|
||||
class TAXIICollectionStore(DataStore):
|
||||
"""
|
||||
"""
|
||||
def __init__(self,
|
||||
source=None,
|
||||
sink=None,
|
||||
server_uri=None,
|
||||
api_root_name=None,
|
||||
collection_id=None,
|
||||
user=None,
|
||||
password=None,
|
||||
name="TAXIICollectionStore"):
|
||||
|
||||
self.name = name
|
||||
self.id = make_id()
|
||||
|
||||
if source:
|
||||
self.source = source
|
||||
else:
|
||||
self.source = TAXIICollectionSource(server_uri, api_root_name, collection_id, user, password)
|
||||
|
||||
if sink:
|
||||
self.sink = sink
|
||||
else:
|
||||
self.TAXIICollectionSink(server_uri, api_root_name, collection_id, user, password)
|
||||
|
||||
@property
|
||||
def source(self):
|
||||
return self.source
|
||||
|
||||
@source.setter
|
||||
def source(self, source):
|
||||
self.source = source
|
||||
|
||||
@property
|
||||
def sink(self):
|
||||
return self.sink
|
||||
|
||||
@sink.setter
|
||||
def sink(self, sink):
|
||||
self.sink = sink
|
||||
|
||||
# file system sink API calls
|
||||
|
||||
def add(self, stix_objs):
|
||||
return self.sink.add(stix_objs=stix_objs)
|
||||
|
||||
# file sytem source API calls
|
||||
|
||||
def get(self, stix_id):
|
||||
return self.source.get(stix_id=stix_id)
|
||||
|
||||
def all_versions(self, stix_id):
|
||||
return self.source.all_versions(stix_id=stix_id)
|
||||
|
||||
def query(self, query):
|
||||
return self.source.query(query=query)
|
||||
|
||||
|
||||
class TAXIIDataSource(DataSource):
|
||||
"""STIX 2.0 Data Source - TAXII 2.0 module"""
|
||||
class TAXIICollectionSink(DataSink):
|
||||
"""
|
||||
"""
|
||||
|
||||
def __init__(self, api_root=None, auth=None, name="TAXII"):
|
||||
super(TAXIIDataSource, self).__init__(name=name)
|
||||
def __init__(self, server_uri=None, api_root_name=None, collection_id=None, user=None, password=None, name="TAXIICollectionSink"):
|
||||
super(TAXIICollectionSink, self).__init__(name=name)
|
||||
|
||||
if not api_root:
|
||||
api_root = "http://localhost:5000"
|
||||
if not auth:
|
||||
auth = {"user": "admin", "pass": "taxii"}
|
||||
self.taxii_client = TAXII2Client(server_uri, user, password)
|
||||
self.taxii_client.populate_available_information()
|
||||
|
||||
self.taxii_info = {
|
||||
"api_root": {
|
||||
"url": api_root
|
||||
},
|
||||
"auth": auth
|
||||
}
|
||||
|
||||
if test:
|
||||
return
|
||||
|
||||
try:
|
||||
# check api-root is reachable/exists and grab api collections
|
||||
coll_url = self.taxii_info['api_root']['url'] + "/collections/"
|
||||
headers = {}
|
||||
|
||||
resp = requests.get(coll_url,
|
||||
headers=headers,
|
||||
auth=HTTPBasicAuth(self.taxii_info['auth']['user'],
|
||||
self.taxii_info['auth']['pass']))
|
||||
# TESTING
|
||||
# print("\n-------__init__() ----\n")
|
||||
# print(resp.text)
|
||||
# print("\n")
|
||||
# print(resp.status_code)
|
||||
# END TESTING
|
||||
|
||||
# raise http error if request returned error code
|
||||
resp.raise_for_status()
|
||||
|
||||
resp_json = resp.json()
|
||||
|
||||
try:
|
||||
self.taxii_info['api_root']['collections'] = resp_json['collections']
|
||||
except KeyError as e:
|
||||
if e == "collections":
|
||||
raise
|
||||
# raise type(e), type(e)(e.message +
|
||||
# "To connect to the TAXII collections, the API root
|
||||
# resource must contain a collection endpoint URL.
|
||||
# This was not found in the API root resource received
|
||||
# from the API root" ), sys.exc_info()[2]
|
||||
|
||||
except requests.ConnectionError as e:
|
||||
raise
|
||||
# raise type(e), type(e)(e.message +
|
||||
# "Attempting to connect to %s" % coll_url)
|
||||
|
||||
def get(self, id_, _composite_filters=None):
|
||||
"""Get STIX 2.0 object from TAXII source by specified 'id'
|
||||
|
||||
Notes:
|
||||
Just pass _composite_filters to the query() as they are applied
|
||||
there. de-duplication of results is also done within query()
|
||||
|
||||
Args:
|
||||
id_ (str): id of STIX object to retrieve
|
||||
|
||||
_composite_filters (list): filters passed from a Composite Data
|
||||
Source (if this data source is attached to one)
|
||||
|
||||
Returns:
|
||||
if not api_root_name:
|
||||
raise ValueError("No api_root specified.")
|
||||
else:
|
||||
self.api_root = None
|
||||
for a_r in self.taxii_client.api_roots:
|
||||
if api_root_name == a_r.name:
|
||||
self.api_root = a_r
|
||||
break
|
||||
if not self.api_root:
|
||||
raise ValueError("The api_root %s is not found on this taxii server" % api_root_name)
|
||||
if not collection_id:
|
||||
raise ValueError("No collection specified.")
|
||||
else:
|
||||
self.collection = None
|
||||
for c in self.api_root.collections:
|
||||
if c.id_ == collection_id:
|
||||
self.collection = c
|
||||
break
|
||||
if not self.collection:
|
||||
raise ValueError("The collection %s is not found on the api_root %s of this taxii server" %
|
||||
(collection_id, api_root_name))
|
||||
|
||||
def save(self, stix_obj):
|
||||
"""
|
||||
"""
|
||||
self.collection.add_objects(self.create_bundle([json.loads(str(stix_obj))]))
|
||||
|
||||
# make query in TAXII query format since 'id' is TAXii field
|
||||
query = [
|
||||
{
|
||||
"field": "match[id]",
|
||||
"op": "=",
|
||||
"value": id_
|
||||
}
|
||||
]
|
||||
@staticmethod
|
||||
def create_bundle(objects):
|
||||
return dict(id="bundle--" + str(uuid.uuid4()),
|
||||
objects=objects,
|
||||
spec_version="2.0",
|
||||
type="bundle")
|
||||
|
||||
all_data = self.query(query=query, _composite_filters=_composite_filters)
|
||||
# utility functions for the current set collection and api root
|
||||
def get_api_root_info(self):
|
||||
"""
|
||||
"""
|
||||
return self.api_root.get_information()
|
||||
|
||||
# reduce to most recent version
|
||||
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
|
||||
def get_api_root_collections(self):
|
||||
"""
|
||||
"""
|
||||
return self.api_root.get_collections()
|
||||
|
||||
def get_collection_manifest(self):
|
||||
"""
|
||||
"""
|
||||
return self.collection.get_collection_manifest()
|
||||
|
||||
|
||||
class TAXIICollectionSource(DataSource):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, server_uri=None, api_root_name=None, collection_id=None, user=None, password=None, name="TAXIICollectionSourc"):
|
||||
super(TAXIICollectionSource, self).__init__(name=name)
|
||||
|
||||
self.taxii_client = TAXII2Client(server_uri, user, password)
|
||||
self.taxii_client.populate_available_information()
|
||||
|
||||
if not api_root_name:
|
||||
raise ValueError("No api_root specified.")
|
||||
else:
|
||||
self.api_root = None
|
||||
for a_r in self.taxii_client.api_roots:
|
||||
if api_root_name == a_r.name:
|
||||
self.api_root = a_r
|
||||
break
|
||||
if not self.api_root:
|
||||
raise ValueError("The api_root %s is not found on this taxii server" % api_root_name)
|
||||
if not collection_id:
|
||||
raise ValueError("No collection specified.")
|
||||
else:
|
||||
self.collection = None
|
||||
for c in self.api_root.collections:
|
||||
if c.id_ == collection_id:
|
||||
self.collection = c
|
||||
break
|
||||
if not self.collection:
|
||||
raise ValueError("The collection %s is not found on the api_root %s of this taxii server" %
|
||||
(collection_id, api_root_name))
|
||||
|
||||
def get(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""
|
||||
# combine all query filters
|
||||
query = []
|
||||
if self.filters:
|
||||
query.extend(self.filters.values())
|
||||
if _composite_filters:
|
||||
query.extend(_composite_filters)
|
||||
|
||||
# separate taxii query terms (can be done remotely)
|
||||
taxii_filters = self._parse_taxii_filters(query)
|
||||
|
||||
stix_objs = self.collection.get_object(stix_id, taxii_filters)["objects"]
|
||||
|
||||
stix_obj = self.apply_common_filters(stix_objs, query)
|
||||
|
||||
if len(stix_obj) > 0:
|
||||
stix_obj = stix_obj[0]
|
||||
else:
|
||||
stix_obj = None
|
||||
|
||||
return stix_obj
|
||||
|
||||
def all_versions(self, id_, _composite_filters=None):
|
||||
"""Get all versions of STIX 2.0 object from TAXII source by
|
||||
specified 'id'
|
||||
|
||||
Notes:
|
||||
Just passes _composite_filters to the query() as they are applied
|
||||
there. de-duplication of results is also done within query()
|
||||
|
||||
Args:
|
||||
id_ (str): id of STIX objects to retrieve
|
||||
_composite_filters (list): filters passed from a Composite Data
|
||||
Source (if this data source is attached to one)
|
||||
|
||||
Returns:
|
||||
The query results with filters applied.
|
||||
def all_versions(self, stix_id, _composite_filters=None):
|
||||
"""
|
||||
"""
|
||||
|
||||
# make query in TAXII query format since 'id' is TAXII field
|
||||
query = [
|
||||
{
|
||||
"field": "match[id]",
|
||||
"op": "=",
|
||||
"value": id_
|
||||
"value": stix_id
|
||||
},
|
||||
{
|
||||
"field": "match[version]",
|
||||
"op": "=",
|
||||
"value": "all"
|
||||
}
|
||||
]
|
||||
|
||||
|
@ -134,84 +216,22 @@ class TAXIIDataSource(DataSource):
|
|||
return all_data
|
||||
|
||||
def query(self, query=None, _composite_filters=None):
|
||||
"""Query the TAXII data source for STIX objects matching the query
|
||||
|
||||
The final full query could contain filters from:
|
||||
-the current API call
|
||||
-Composite Data source filters (that are passed in via
|
||||
'_composite_filters')
|
||||
-TAXII data source filters that are attached
|
||||
|
||||
TAXII filters ['added_after', 'match[<>]'] are extracted and sent
|
||||
to TAXII if they are present
|
||||
|
||||
TODO: Authentication for TAXII
|
||||
|
||||
Args:
|
||||
|
||||
query(list): list of filters (dicts) to search on
|
||||
|
||||
_composite_filters (list): filters passed from a
|
||||
Composite Data Source (if this data source is attached to one)
|
||||
|
||||
Returns:
|
||||
|
||||
|
||||
"""
|
||||
|
||||
all_data = []
|
||||
|
||||
"""
|
||||
if query is None:
|
||||
query = []
|
||||
|
||||
# combine all query filters
|
||||
if self.filters:
|
||||
query += self.filters.values()
|
||||
query.extend(self.filters.values())
|
||||
if _composite_filters:
|
||||
query += _composite_filters
|
||||
query.extend(_composite_filters)
|
||||
|
||||
# separate taxii query terms (can be done remotely)
|
||||
taxii_filters = self._parse_taxii_filters(query)
|
||||
|
||||
# for each collection endpoint - send query request
|
||||
for collection in self.taxii_info['api_root']['collections']:
|
||||
|
||||
coll_obj_url = "/".join([self.taxii_info['api_root']['url'],
|
||||
"collections", str(collection['id']),
|
||||
"objects"])
|
||||
headers = {}
|
||||
try:
|
||||
resp = requests.get(coll_obj_url,
|
||||
params=taxii_filters,
|
||||
headers=headers,
|
||||
auth=HTTPBasicAuth(self.taxii_info['auth']['user'],
|
||||
self.taxii_info['auth']['pass']))
|
||||
# TESTING
|
||||
# print("\n-------query() ----\n")
|
||||
# print("Request that was sent: \n")
|
||||
# print(resp.url)
|
||||
# print("Response: \n")
|
||||
# print(json.dumps(resp.json(),indent=4))
|
||||
# print("\n")
|
||||
# print(resp.status_code)
|
||||
# print("------------------")
|
||||
# END TESTING
|
||||
|
||||
# raise http error if request returned error code
|
||||
resp.raise_for_status()
|
||||
resp_json = resp.json()
|
||||
|
||||
# grab all STIX 2.0 objects in json response
|
||||
for stix_obj in resp_json['objects']:
|
||||
all_data.append(stix_obj)
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise e
|
||||
# raise type(e), type(e)(e.message +
|
||||
# "Attempting to connect to %s" % coll_url)
|
||||
|
||||
# TODO: Is there a way to collect exceptions while carrying
|
||||
# on then raise all of them at the end?
|
||||
# query TAXII collection
|
||||
all_data = self.collection.get_objects(filters=taxii_filters)["objects"]
|
||||
|
||||
# deduplicate data (before filtering as reduces wasted filtering)
|
||||
all_data = self.deduplicate(all_data)
|
||||
|
@ -224,11 +244,10 @@ class TAXIIDataSource(DataSource):
|
|||
def _parse_taxii_filters(self, query):
|
||||
"""Parse out TAXII filters that the TAXII server can filter on
|
||||
|
||||
TAXII filters should be analgous to how they are supplied
|
||||
in the url to the TAXII endpoint. For instance
|
||||
For instance
|
||||
"?match[type]=indicator,sighting" should be in a query dict as follows
|
||||
{
|
||||
"field": "match[type]"
|
||||
"field": "type"
|
||||
"op": "=",
|
||||
"value": "indicator,sighting"
|
||||
}
|
||||
|
@ -244,19 +263,62 @@ class TAXIIDataSource(DataSource):
|
|||
|
||||
params = {}
|
||||
|
||||
for q in query:
|
||||
if q['field'] in TAXII_FILTERS:
|
||||
if q['field'] == 'added_after':
|
||||
params[q['field']] = q['value']
|
||||
for filter_ in query:
|
||||
if filter_["field"] in TAXII_FILTERS:
|
||||
if filter_["field"] == "added_after":
|
||||
params[filter_["field"]] = filter_["value"]
|
||||
else:
|
||||
taxii_field = 'match[' + q['field'] + ']'
|
||||
params[taxii_field] = q['value']
|
||||
taxii_field = "match[" + filter_["field"] + ']'
|
||||
params[taxii_field] = filter_["value"]
|
||||
return params
|
||||
|
||||
def close(self):
|
||||
"""Close down the Data Source - if any clean up is required.
|
||||
|
||||
# utility functions for the current attached collection and api root
|
||||
def get_api_root_info(self):
|
||||
"""
|
||||
pass
|
||||
"""
|
||||
return self.api_root.get_information()
|
||||
|
||||
# TODO: - getters/setters (properties) for TAXII config info
|
||||
def get_api_root_collections(self):
|
||||
"""
|
||||
"""
|
||||
return self.api_root.get_collections()
|
||||
|
||||
def get_collection_manifest(self):
|
||||
"""
|
||||
"""
|
||||
return self.collection.get_collection_manifest()
|
||||
|
||||
|
||||
def get_server_api_roots(taxii_client):
|
||||
"""
|
||||
"""
|
||||
api_root_info = []
|
||||
taxii_client.populate_available_information()
|
||||
|
||||
for api_root in taxii_client.api_roots:
|
||||
api_root_info.append(api_root.information())
|
||||
|
||||
return api_root_info
|
||||
|
||||
|
||||
def get_server_collections(taxii_client):
|
||||
"""
|
||||
"""
|
||||
server_collections = []
|
||||
|
||||
taxii_client.populate_available_information()
|
||||
|
||||
for api_root in taxii_client.api_roots:
|
||||
server_collections.extend(api_root.get_collections())
|
||||
|
||||
return server_collections
|
||||
|
||||
|
||||
def get_api_root_collections(taxii_client, api_root_name):
|
||||
"""
|
||||
"""
|
||||
taxii_client.populate_available_information()
|
||||
|
||||
for api_root in taxii_client.api_roots:
|
||||
if api_root == api_root_name:
|
||||
return api_root.get_collections()
|
||||
|
|
Loading…
Reference in New Issue