diff --git a/.gitignore b/.gitignore index 3b9971a..3b953e1 100644 --- a/.gitignore +++ b/.gitignore @@ -57,9 +57,12 @@ docs/_build/ # PyBuilder target/ +# External data cache +cache.sqlite + # Vim *.swp -# + # PyCharm .idea/ diff --git a/stix2/sources/__init__.py b/stix2/sources/__init__.py index 6360fde..529725e 100644 --- a/stix2/sources/__init__.py +++ b/stix2/sources/__init__.py @@ -70,17 +70,21 @@ class DataStore(object): An implementer will create a concrete subclass from this abstract class for the specific data store. + Attributes: + id (str): A unique UUIDv4 to identify this DataStore. + source (DataStore): An object that implements DataStore class. + sink (DataSink): An object that implements DataSink class. + """ - def __init__(self, name="DataStore", source=None, sink=None): - self.name = name - self.id_ = make_id() + def __init__(self, source=None, sink=None): + self.id = make_id() self.source = source self.sink = sink def get(self, stix_id): """ - Implement: - Translate API get() call to the appropriate DataSource call + Notes: + Translate API get() call to the appropriate DataSource call. Args: stix_id (str): the id of the STIX 2.0 object to retrieve. Should @@ -103,9 +107,6 @@ class DataStore(object): return a single object, the most recent version of the object specified by the "id". - _composite_filters (list): list of filters passed along from - the Composite Data Filter. - Returns: stix_objs (list): a list of STIX objects (where each object is a STIX object) @@ -115,9 +116,9 @@ class DataStore(object): def query(self, query): """ - Fill: + Notes: Implement the specific data source API calls, processing, - functionality required for retrieving query from the data source + functionality required for retrieving query from the data source. Args: query (list): a list of filters (which collectively are the query) @@ -132,8 +133,8 @@ class DataStore(object): def add(self, stix_objs): """ - Fill: - -translate add() to the appropriate DataSink call() + Notes: + Translate add() to the appropriate DataSink call(). """ return self.sink.add(stix_objs=stix_objs) @@ -145,18 +146,15 @@ class DataSink(object): different sink components. Attributes: - id_ (str): A unique UUIDv4 to identify this DataSink. - name (str): The descriptive name that identifies this DataSink. + id (str): A unique UUIDv4 to identify this DataSink. """ - - def __init__(self, name="DataSink"): - self.name = name - self.id_ = make_id() + def __init__(self): + self.id = make_id() def add(self, stix_objs): """ - Fill: + Notes: Implement the specific data sink API calls, processing, functionality required for adding data to the sink @@ -170,15 +168,12 @@ class DataSource(object): different source components. Attributes: - id_ (str): A unique UUIDv4 to identify this DataSource. - name (str): The descriptive name that identifies this DataSource. + id (str): A unique UUIDv4 to identify this DataSource. filters (set): A collection of filters present in this DataSource. """ - - def __init__(self, name="DataSource"): - self.name = name - self.id_ = make_id() + def __init__(self): + self.id = make_id() self.filters = set() def get(self, stix_id, _composite_filters=None): @@ -203,12 +198,11 @@ class DataSource(object): def all_versions(self, stix_id, _composite_filters=None): """ - Fill: - -Similar to get() except returns list of all object versions of - the specified "id". - - -implement the specific data source API calls, processing, - functionality required for retrieving data from the data source + Notes: + Similar to get() except returns list of all object versions of + the specified "id". In addition, implement the specific data + source API calls, processing, functionality required for retrieving + data from the data source. Args: stix_id (str): The id of the STIX 2.0 object to retrieve. Should @@ -249,26 +243,24 @@ class DataSource(object): Args: filters (list): list of filters (dict) to add to the Data Source. """ - for filter_ in filters: - self.add_filter(filter_) + for filter in filters: + self.add_filter(filter) - def add_filter(self, filter_): + def add_filter(self, filter): """Add a filter.""" # check filter field is a supported STIX 2.0 common field - if filter_.field not in STIX_COMMON_FIELDS: + if filter.field not in STIX_COMMON_FIELDS: raise ValueError("Filter 'field' is not a STIX 2.0 common property. Currently only STIX object common properties supported") # check filter operator is supported - if filter_.op not in FILTER_OPS: - raise ValueError("Filter operation(from 'op' field) not supported") + if filter.op not in FILTER_OPS: + raise ValueError("Filter operation (from 'op' field) not supported") # check filter value type is supported - if type(filter_.value) not in FILTER_VALUE_TYPES: + if type(filter.value) not in FILTER_VALUE_TYPES: raise ValueError("Filter 'value' type is not supported. The type(value) must be python immutable type or dictionary") - self.filters.add(filter_) - - # TODO: Do we need a remove_filter function? + self.filters.add(filter) def apply_common_filters(self, stix_objs, query): """Evaluates filters against a set of STIX 2.0 objects @@ -289,19 +281,19 @@ class DataSource(object): # evaluate objects against filter for stix_obj in stix_objs: clean = True - for filter_ in query: + for filter in query: try: # skip filter as filter was identified (when added) as # not a common filter - if filter_.field not in STIX_COMMON_FIELDS: - raise Exception("Error, field: {0} is not supported for filtering on.".format(filter_.field)) + if filter.field not in STIX_COMMON_FIELDS: + raise Exception("Error, field: {0} is not supported for filtering on.".format(filter.field)) # For properties like granular_markings and external_references # need to break the first property from the string. - if "." in filter_.field: - field = filter_.field.split(".")[0] + if "." in filter.field: + field = filter.field.split(".")[0] else: - field = filter_.field + field = filter.field # check filter "field" is in STIX object - if cant be # applied due to STIX object, STIX object is discarded @@ -310,12 +302,12 @@ class DataSource(object): clean = False break - match = getattr(STIXCommonPropertyFilters, field)(filter_, stix_obj) + match = getattr(STIXCommonPropertyFilters, field)(filter, stix_obj) if not match: clean = False break elif match == -1: - raise Exception("Error, filter operator: {0} not supported for specified field: {1}".format(filter_.op, filter_.field)) + raise Exception("Error, filter operator: {0} not supported for specified field: {1}".format(filter.op, filter.field)) except Exception as e: raise ValueError(e) @@ -361,7 +353,7 @@ class CompositeDataSource(DataSource): controlled and used by the Data Source Controller object. """ - def __init__(self, name="CompositeDataSource"): + def __init__(self): """ Creates a new STIX Data Source. @@ -370,7 +362,7 @@ class CompositeDataSource(DataSource): CompositeDataSource instance. """ - super(CompositeDataSource, self).__init__(name=name) + super(CompositeDataSource, self).__init__() self.data_sources = {} def get(self, stix_id, _composite_filters=None): @@ -498,13 +490,13 @@ class CompositeDataSource(DataSource): """ for ds in data_sources: if issubclass(ds.__class__, DataSource): - if ds.id_ in self.data_sources: + if ds.id in self.data_sources: # data source already attached to Composite Data Source continue # add data source to Composite Data Source # (its id will be its key identifier) - self.data_sources[ds.id_] = ds + self.data_sources[ds.id] = ds else: # the Data Source object is not a proper subclass # of DataSource Abstract Class @@ -520,9 +512,9 @@ class CompositeDataSource(DataSource): data_source_ids (list): a list of Data Source identifiers. """ - for id_ in data_source_ids: - if id_ in self.data_sources: - del self.data_sources[id_] + for id in data_source_ids: + if id in self.data_sources: + del self.data_sources[id] else: raise ValueError("DataSource 'id' not found in CompositeDataSource collection.") return @@ -538,63 +530,63 @@ class STIXCommonPropertyFilters(object): """ """ @classmethod - def _all(cls, filter_, stix_obj_field): + def _all(cls, filter, stix_obj_field): """all filter operations (for filters whose value type can be applied to any operation type)""" - if filter_.op == "=": - return stix_obj_field == filter_.value - elif filter_.op == "!=": - return stix_obj_field != filter_.value - elif filter_.op == "in": - return stix_obj_field in filter_.value - elif filter_.op == ">": - return stix_obj_field > filter_.value - elif filter_.op == "<": - return stix_obj_field < filter_.value - elif filter_.op == ">=": - return stix_obj_field >= filter_.value - elif filter_.op == "<=": - return stix_obj_field <= filter_.value + if filter.op == "=": + return stix_obj_field == filter.value + elif filter.op == "!=": + return stix_obj_field != filter.value + elif filter.op == "in": + return stix_obj_field in filter.value + elif filter.op == ">": + return stix_obj_field > filter.value + elif filter.op == "<": + return stix_obj_field < filter.value + elif filter.op == ">=": + return stix_obj_field >= filter.value + elif filter.op == "<=": + return stix_obj_field <= filter.value else: return -1 @classmethod - def _id(cls, filter_, stix_obj_id): + def _id(cls, filter, stix_obj_id): """base filter types""" - if filter_.op == "=": - return stix_obj_id == filter_.value - elif filter_.op == "!=": - return stix_obj_id != filter_.value + if filter.op == "=": + return stix_obj_id == filter.value + elif filter.op == "!=": + return stix_obj_id != filter.value else: return -1 @classmethod - def _boolean(cls, filter_, stix_obj_field): - if filter_.op == "=": - return stix_obj_field == filter_.value - elif filter_.op == "!=": - return stix_obj_field != filter_.value + def _boolean(cls, filter, stix_obj_field): + if filter.op == "=": + return stix_obj_field == filter.value + elif filter.op == "!=": + return stix_obj_field != filter.value else: return -1 @classmethod - def _string(cls, filter_, stix_obj_field): - return cls._all(filter_, stix_obj_field) + def _string(cls, filter, stix_obj_field): + return cls._all(filter, stix_obj_field) @classmethod - def _timestamp(cls, filter_, stix_obj_timestamp): - return cls._all(filter_, stix_obj_timestamp) + def _timestamp(cls, filter, stix_obj_timestamp): + return cls._all(filter, stix_obj_timestamp) # STIX 2.0 Common Property filters @classmethod - def created(cls, filter_, stix_obj): - return cls._timestamp(filter_, stix_obj["created"]) + def created(cls, filter, stix_obj): + return cls._timestamp(filter, stix_obj["created"]) @classmethod - def created_by_ref(cls, filter_, stix_obj): - return cls._id(filter_, stix_obj["created_by_ref"]) + def created_by_ref(cls, filter, stix_obj): + return cls._id(filter, stix_obj["created_by_ref"]) @classmethod - def external_references(cls, filter_, stix_obj): + def external_references(cls, filter, stix_obj): """ STIX object's can have a list of external references @@ -608,14 +600,14 @@ class STIXCommonPropertyFilters(object): """ for er in stix_obj["external_references"]: # grab er property name from filter field - filter_field = filter_.field.split(".")[1] - r = cls._string(filter_, er[filter_field]) + filter_field = filter.field.split(".")[1] + r = cls._string(filter, er[filter_field]) if r: return r return False @classmethod - def granular_markings(cls, filter_, stix_obj): + def granular_markings(cls, filter, stix_obj): """ STIX object's can have a list of granular marking references @@ -626,46 +618,46 @@ class STIXCommonPropertyFilters(object): """ for gm in stix_obj["granular_markings"]: # grab gm property name from filter field - filter_field = filter_.field.split(".")[1] + filter_field = filter.field.split(".")[1] if filter_field == "marking_ref": - return cls._id(filter_, gm[filter_field]) + return cls._id(filter, gm[filter_field]) elif filter_field == "selectors": for selector in gm[filter_field]: - r = cls._string(filter_, selector) + r = cls._string(filter, selector) if r: return r return False @classmethod - def id(cls, filter_, stix_obj): - return cls._id(filter_, stix_obj["id"]) + def id(cls, filter, stix_obj): + return cls._id(filter, stix_obj["id"]) @classmethod - def labels(cls, filter_, stix_obj): + def labels(cls, filter, stix_obj): for label in stix_obj["labels"]: - r = cls._string(filter_, label) + r = cls._string(filter, label) if r: return r return False @classmethod - def modified(cls, filter_, stix_obj): - return cls._timestamp(filter_, stix_obj["modified"]) + def modified(cls, filter, stix_obj): + return cls._timestamp(filter, stix_obj["modified"]) @classmethod - def object_marking_refs(cls, filter_, stix_obj): + def object_marking_refs(cls, filter, stix_obj): for marking_id in stix_obj["object_marking_refs"]: - r = cls._id(filter_, marking_id) + r = cls._id(filter, marking_id) if r: return r return False @classmethod - def revoked(cls, filter_, stix_obj): - return cls._boolean(filter_, stix_obj["revoked"]) + def revoked(cls, filter, stix_obj): + return cls._boolean(filter, stix_obj["revoked"]) @classmethod - def type(cls, filter_, stix_obj): - return cls._string(filter_, stix_obj["type"]) + def type(cls, filter, stix_obj): + return cls._string(filter, stix_obj["type"]) diff --git a/stix2/sources/filesystem.py b/stix2/sources/filesystem.py index 61e7c88..cf69675 100644 --- a/stix2/sources/filesystem.py +++ b/stix2/sources/filesystem.py @@ -19,8 +19,8 @@ from stix2.sources import DataSink, DataSource, DataStore, Filter class FileSystemStore(DataStore): """ """ - def __init__(self, name="FileSystemStore", stix_dir="stix_data"): - super(FileSystemStore, self).__init__(name=name) + def __init__(self, stix_dir="stix_data"): + super(FileSystemStore, self).__init__() self.source = FileSystemSource(stix_dir=stix_dir) self.sink = FileSystemSink(stix_dir=stix_dir) @@ -28,8 +28,8 @@ class FileSystemStore(DataStore): class FileSystemSink(DataSink): """ """ - def __init__(self, name="FileSystemSink", stix_dir="stix_data"): - super(FileSystemSink, self).__init__(name=name) + def __init__(self, stix_dir="stix_data"): + super(FileSystemSink, self).__init__() self.stix_dir = os.path.abspath(stix_dir) # check directory path exists @@ -58,8 +58,8 @@ class FileSystemSink(DataSink): class FileSystemSource(DataSource): """ """ - def __init__(self, name="FileSystemSource", stix_dir="stix_data"): - super(FileSystemSource, self).__init__(name=name) + def __init__(self, stix_dir="stix_data"): + super(FileSystemSource, self).__init__() self.stix_dir = os.path.abspath(stix_dir) # check directory path exists @@ -71,15 +71,13 @@ class FileSystemSource(DataSource): return self.stix_dir @stix_dir.setter - def stix_dir(self, dir_): - self.stix_dir = dir_ + def stix_dir(self, dir): + self.stix_dir = dir def get(self, stix_id, _composite_filters=None): """ """ - query = [ - Filter("id", "=", stix_id) - ] + query = [Filter("id", "=", stix_id)] all_data = self.query(query=query, _composite_filters=_composite_filters) @@ -95,17 +93,6 @@ class FileSystemSource(DataSource): (Approved by G.B.) """ - - # query = [ - # { - # "field": "id", - # "op": "=", - # "value": stix_id - # } - # ] - - # all_data = self.query(query=query, _composite_filters=_composite_filters) - return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)] def query(self, query=None, _composite_filters=None): @@ -134,13 +121,13 @@ class FileSystemSource(DataSource): # the corresponding subdirectories as well include_paths = [] declude_paths = [] - if "type" in [filter_.field for filter_ in file_filters]: - for filter_ in file_filters: - if filter_.field == "type": - if filter_.op == "=": - include_paths.append(os.path.join(self.stix_dir, filter_.value)) - elif filter_.op == "!=": - declude_paths.append(os.path.join(self.stix_dir, filter_.value)) + if "type" in [filter.field for filter in file_filters]: + for filter in file_filters: + if filter.field == "type": + if filter.op == "=": + include_paths.append(os.path.join(self.stix_dir, filter.value)) + elif filter.op == "!=": + declude_paths.append(os.path.join(self.stix_dir, filter.value)) else: # have to walk entire STIX directory include_paths.append(self.stix_dir) @@ -157,35 +144,35 @@ class FileSystemSource(DataSource): # user has specified types that are not wanted (i.e. "!=") # so query will look in all STIX directories that are not # the specified type. Compile correct dir paths - for dir_ in os.listdir(self.stix_dir): - if os.path.abspath(dir_) not in declude_paths: - include_paths.append(os.path.abspath(dir_)) + for dir in os.listdir(self.stix_dir): + if os.path.abspath(dir) not in declude_paths: + include_paths.append(os.path.abspath(dir)) # grab stix object ID as well - if present in filters, as # may forgo the loading of STIX content into memory - if "id" in [filter_.field for filter_ in file_filters]: - for filter_ in file_filters: - if filter_.field == "id" and filter_.op == "=": - id_ = filter_.value + if "id" in [filter.field for filter in file_filters]: + for filter in file_filters: + if filter.field == "id" and filter.op == "=": + id = filter.value break else: - id_ = None + id = None else: - id_ = None + id = None # now iterate through all STIX objs for path in include_paths: for root, dirs, files in os.walk(path): - for file_ in files: - if id_: - if id_ == file_.split(".")[0]: + for file in files: + if id: + if id == file.split(".")[0]: # since ID is specified in one of filters, can evaluate against filename first without loading - stix_obj = json.load(file_)["objects"] + stix_obj = json.load(file)["objects"] # check against other filters, add if match all_data.extend(self.apply_common_filters([stix_obj], query)) else: # have to load into memory regardless to evaluate other filters - stix_obj = json.load(file_)["objects"] + stix_obj = json.load(file)["objects"] all_data.extend(self.apply_common_filters([stix_obj], query)) all_data = self.deduplicate(all_data) @@ -195,7 +182,7 @@ class FileSystemSource(DataSource): """ """ file_filters = [] - for filter_ in query: - if filter_.field == "id" or filter_.field == "type": - file_filters.append(filter_) + for filter in query: + if filter.field == "id" or filter.field == "type": + file_filters.append(filter) return file_filters diff --git a/stix2/sources/memory.py b/stix2/sources/memory.py index 28d929d..696bdd1 100644 --- a/stix2/sources/memory.py +++ b/stix2/sources/memory.py @@ -18,19 +18,44 @@ Notes: """ +import collections import json import os -from stix2validator import validate_string +from stix2validator import validate_instance from stix2 import Bundle from stix2.sources import DataSink, DataSource, DataStore, Filter +def _add(store, stix_data): + """Adds stix objects to MemoryStore/Source/Sink.""" + if isinstance(stix_data, collections.Mapping): + # stix objects are in a bundle + # verify STIX json data + r = validate_instance(stix_data) + # make dictionary of the objects for easy lookup + if r.is_valid: + for stix_obj in stix_data["objects"]: + store.data[stix_obj["id"]] = stix_obj + else: + raise ValueError("Error: data passed was found to not be valid by the STIX 2 Validator: \n%s", r.as_dict()) + elif isinstance(stix_data, list): + # stix objects are in a list + for stix_obj in stix_data: + r = validate_instance(stix_obj) + if r.is_valid: + store.data[stix_obj["id"]] = stix_obj + else: + raise ValueError("Error: STIX object %s is not valid under STIX 2 validator.\n%s", stix_obj["id"], r) + else: + raise ValueError("stix_data must be in bundle format or raw list") + + class MemoryStore(DataStore): """ """ - def __init__(self, name="MemoryStore", stix_data=None): + def __init__(self, stix_data): """ Notes: It doesn't make sense to create a MemoryStore by passing @@ -38,30 +63,11 @@ class MemoryStore(DataStore): be data concurrency issues. Just as easy to create new MemoryStore. """ - super(MemoryStore, self).__init__(name=name) + super(MemoryStore, self).__init__() self.data = {} if stix_data: - if type(stix_data) == dict: - # stix objects are in a bundle - # verify STIX json data - r = validate_string(json.dumps(stix_data)) - # make dictionary of the objects for easy lookup - if r.is_valid: - for stix_obj in stix_data["objects"]: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator") - print(r) - elif type(stix_data) == list: - # stix objects are in a list - for stix_obj in stix_data: - r = validate_string(json.dumps(stix_obj)) - if r.is_valid: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: STIX object %s is not valid under STIX 2 validator." % stix_obj["id"]) - print(r) + _add(self, stix_data) self.source = MemorySource(stix_data=self.data, _store=True) self.sink = MemorySink(stix_data=self.data, _store=True) @@ -76,72 +82,28 @@ class MemoryStore(DataStore): class MemorySink(DataSink): """ """ - def __init__(self, name="MemorySink", stix_data=None, _store=False): + def __init__(self, stix_data, _store=False): """ Args: stix_data (dictionary OR list): valid STIX 2.0 content in bundle or a list. - name (string): optional name tag of the data source _store (bool): if the MemorySink is a part of a DataStore, in which case "stix_data" is a direct reference to shared memory with DataSource. """ - super(MemorySink, self).__init__(name=name) + super(MemorySink, self).__init__() + self.data = {} if _store: self.data = stix_data - else: - self.data = {} - if stix_data: - if type(stix_data) == dict: - # stix objects are in a bundle - # verify STIX json data - r = validate_string(json.dumps(stix_data)) - # make dictionary of the objects for easy lookup - if r.is_valid: - for stix_obj in stix_data["objects"]: - - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator") - print(r) - self.data = {} - elif type(stix_data) == list: - # stix objects are in a list - for stix_obj in stix_data: - r = validate_string(json.dumps(stix_obj)) - if r.is_valid: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: STIX object %s is not valid under STIX 2 validator." % stix_obj["id"]) - print(r) - else: - raise ValueError("stix_data must be in bundle format or raw list") + elif stix_data: + self.add(stix_data) def add(self, stix_data): """ """ - if type(stix_data) == dict: - # stix data is in bundle - r = validate_string(json.dumps(stix_data)) - if r.is_valid: - for stix_obj in stix_data["objects"]: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: json data passed to MemorySink() was found to not be validated by STIX 2 Validator") - print(r) - elif type(stix_data) == list: - # stix data is in list - for stix_obj in stix_data: - r = validate_string(json.dumps(stix_obj)) - if r.is_valid: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: STIX object %s is not valid under STIX 2 validator." % stix_obj["id"]) - print(r) - else: - raise ValueError("stix_data must be in bundle format or raw list") + _add(self, stix_data) def save_to_file(self, file_path): """ @@ -151,47 +113,23 @@ class MemorySink(DataSink): class MemorySource(DataSource): - def __init__(self, name="MemorySource", stix_data=None, _store=False): + def __init__(self, stix_data, _store=False): """ Args: stix_data (dictionary OR list): valid STIX 2.0 content in bundle or list. - name (string): optional name tag of the data source. _store (bool): if the MemorySource is a part of a DataStore, in which case "stix_data" is a direct reference to shared memory with DataSink. """ - super(MemorySource, self).__init__(name=name) + super(MemorySource, self).__init__() + self.data = {} if _store: self.data = stix_data - else: - self.data = {} - if stix_data: - if type(stix_data) == dict: - # STIX objects are in a bundle - # verify STIX json data - r = validate_string(json.dumps(stix_data)) - # make dictionary of the objects for easy lookup - if r.is_valid: - for stix_obj in stix_data["objects"]: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: json data passed to MemorySource() was found to not be validated by STIX 2 Validator") - print(r.as_dict()) - self.data = {} - elif type(stix_data) == list: - # STIX objects are in a list - for stix_obj in stix_data: - r = validate_string(json.dumps(stix_obj)) - if r.is_valid: - self.data[stix_obj["id"]] = stix_obj - else: - print("Error: STIX object %s is not valid under STIX 2 validator." % stix_obj["id"]) - print(r) - else: - raise ValueError("stix_data must be in bundle format or raw list") + elif stix_data: + _add(self, stix_data) def get(self, stix_id, _composite_filters=None): """ @@ -205,9 +143,7 @@ class MemorySource(DataSource): return stix_obj # if there are filters from the composite level, process full query - query = [ - Filter("id", "=", stix_id) - ] + query = [Filter("id", "=", stix_id)] all_data = self.query(query=query, _composite_filters=_composite_filters) @@ -219,22 +155,21 @@ class MemorySource(DataSource): def all_versions(self, stix_id, _composite_filters=None): """ Notes: - Since Memory sources/sinks don't handle multiple versions of a - STIX object, this operation is futile. Translate call to get(). - (Approved by G.B.) + Similar to get() except returns list of all object versions of + the specified "id". + + Args: + stix_id (str): The id of the STIX 2.0 object to retrieve. Should + return a list of objects, all the versions of the object + specified by the "id". + + _composite_filters (list): list of filters passed from the + Composite Data Source. + + Returns: + stix_objs (list): STIX objects that matched ``stix_id``. """ - - # query = [ - # { - # "field": "id", - # "op": "=", - # "value": stix_id - # } - # ] - - # all_data = self.query(query=query, _composite_filters=_composite_filters) - return [self.get(stix_id=stix_id, _composite_filters=_composite_filters)] def query(self, query=None, _composite_filters=None): @@ -245,14 +180,11 @@ class MemorySource(DataSource): # combine all query filters if self.filters: - query.extend(self.filters.values()) + query.extend(list(self.filters)) if _composite_filters: query.extend(_composite_filters) - # deduplicate data before filtering -> Deduplication is not required as Memory only ever holds one version of an object - # all_data = self.deduplicate(all_data) - - # apply STIX common property filters + # Apply STIX common property filters. all_data = self.apply_common_filters(self.data.values(), query) return all_data @@ -263,11 +195,10 @@ class MemorySource(DataSource): file_path = os.path.abspath(file_path) stix_data = json.load(open(file_path, "r")) - r = validate_string(json.dumps(stix_data)) + r = validate_instance(stix_data) if r.is_valid: for stix_obj in stix_data["objects"]: self.data[stix_obj["id"]] = stix_obj - else: - print("Error: STIX data loaded from file (%s) was found to not be validated by STIX 2 Validator" % file_path) - print(r) + + raise ValueError("Error: STIX data loaded from file (%s) was found to not be validated by STIX 2 Validator.\n%s", file_path, r) diff --git a/stix2/sources/taxii.py b/stix2/sources/taxii.py index 4edeeed..eec5296 100644 --- a/stix2/sources/taxii.py +++ b/stix2/sources/taxii.py @@ -20,7 +20,7 @@ TAXII_FILTERS = ['added_after', 'id', 'type', 'version'] class TAXIICollectionStore(DataStore): """ """ - def __init__(self, collection, name="TAXIICollectionStore"): + def __init__(self, collection): """ Create a new TAXII Collection Data store @@ -28,7 +28,7 @@ class TAXIICollectionStore(DataStore): collection (taxii2.Collection): Collection instance """ - super(TAXIICollectionStore, self).__init__(name=name) + super(TAXIICollectionStore, self).__init__() self.source = TAXIICollectionSource(collection) self.sink = TAXIICollectionSink(collection) @@ -36,8 +36,8 @@ class TAXIICollectionStore(DataStore): class TAXIICollectionSink(DataSink): """ """ - def __init__(self, collection, name="TAXIICollectionSink"): - super(TAXIICollectionSink, self).__init__(name=name) + def __init__(self, collection): + super(TAXIICollectionSink, self).__init__() self.collection = collection def add(self, stix_obj): @@ -56,8 +56,8 @@ class TAXIICollectionSink(DataSink): class TAXIICollectionSource(DataSource): """ """ - def __init__(self, collection, name="TAXIICollectionSource"): - super(TAXIICollectionSource, self).__init__(name=name) + def __init__(self, collection): + super(TAXIICollectionSource, self).__init__() self.collection = collection def get(self, stix_id, _composite_filters=None): diff --git a/stix2/test/test_data_sources.py b/stix2/test/test_data_sources.py index ee37825..a11e02d 100644 --- a/stix2/test/test_data_sources.py +++ b/stix2/test/test_data_sources.py @@ -1,9 +1,10 @@ import pytest from taxii2client import Collection +import stix2 from stix2.sources import (CompositeDataSource, DataSink, DataSource, DataStore, Filter, make_id, taxii) -from stix2.sources.memory import MemorySource +from stix2.sources.memory import MemorySource, MemoryStore COLLECTION_URL = 'https://example.com/api1/collections/91a7b528-80eb-42ed-a74d-c6fbd5a26116/' @@ -121,7 +122,7 @@ STIX_OBJS2 = [ ] -def test_ds_smoke(): +def test_ds_abstract_class_smoke(): ds1 = DataSource() ds2 = DataSink() ds3 = DataStore(source=ds1, sink=ds2) @@ -139,14 +140,36 @@ def test_ds_smoke(): ds3.query([Filter("id", "=", "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111")]) +def test_memory_store_smoke(): + # Initialize MemoryStore with dict + ms = MemoryStore(STIX_OBJS1) + + # Add item to sink + ms.add(dict(id="bundle--%s" % make_id(), + objects=STIX_OBJS2, + spec_version="2.0", + type="bundle")) + + resp = ms.all_versions("indicator--d81f86b9-975b-bc0b-775e-810c5ad45a4f") + assert len(resp) == 1 + + resp = ms.get("indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f") + assert resp["id"] == "indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f" + + query = [Filter('type', '=', 'malware')] + + resp = ms.query(query) + assert len(resp) == 0 + + def test_ds_taxii(collection): ds = taxii.TAXIICollectionSource(collection) - assert ds.name == 'TAXIICollectionSource' + assert ds.collection is not None def test_ds_taxii_name(collection): - ds = taxii.TAXIICollectionSource(collection, name='My Data Source Name') - assert ds.name == "My Data Source Name" + ds = taxii.TAXIICollectionSource(collection) + assert ds.collection is not None def test_parse_taxii_filters(): @@ -209,7 +232,7 @@ def test_add_get_remove_filter(): with pytest.raises(ValueError) as excinfo: ds.add_filter(invalid_filters[1]) - assert str(excinfo.value) == "Filter operation(from 'op' field) not supported" + assert str(excinfo.value) == "Filter operation (from 'op' field) not supported" with pytest.raises(ValueError) as excinfo: ds.add_filter(invalid_filters[2]) @@ -270,6 +293,22 @@ def test_apply_common_filters(): "source_ref": "indicator--a932fcc6-e032-176c-126f-cb970a5a1ade", "target_ref": "malware--fdd60b30-b67c-11e3-b0b9-f01faf20d111", "type": "relationship" + }, + { + "id": "vulnerability--ee916c28-c7a4-4d0d-ad56-a8d357f89fef", + "created": "2016-02-14T00:00:00.000Z", + "created_by_ref": "identity--00000000-0000-0000-0000-b8e91df99dc9", + "modified": "2016-02-14T00:00:00.000Z", + "type": "vulnerability", + "name": "CVE-2014-0160", + "description": "The (1) TLS...", + "external_references": [ + { + "source_name": "cve", + "external_id": "CVE-2014-0160" + } + ], + "labels": ["heartbleed", "has-logo"] } ] @@ -284,50 +323,90 @@ def test_apply_common_filters(): Filter("object_marking_refs", "=", "marking-definition--613f2e26-407d-48c7-9eca-b8e91df99dc9"), Filter("granular_markings.selectors", "in", "relationship_type"), Filter("granular_markings.marking_ref", "=", "marking-definition--5e57c739-391a-4eb3-b6be-7d15ca92d5ed"), + Filter("external_references.external_id", "in", "CVE-2014-0160,CVE-2017-6608"), + Filter("created_by_ref", "=", "identity--00000000-0000-0000-0000-b8e91df99dc9"), + Filter("object_marking_refs", "=", "marking-definition--613f2e26-0000-0000-0000-b8e91df99dc9"), + Filter("granular_markings.selectors", "in", "description"), + Filter("external_references.source_name", "=", "CVE"), ] ds = DataSource() + # "Return any object whose type is not relationship" resp = ds.apply_common_filters(stix_objs, [filters[0]]) ids = [r['id'] for r in resp] assert stix_objs[0]['id'] in ids assert stix_objs[1]['id'] in ids + assert stix_objs[3]['id'] in ids + assert len(ids) == 3 + # "Return any object that matched id relationship--2f9a9aa9-108a-4333-83e2-4fb25add0463" resp = ds.apply_common_filters(stix_objs, [filters[1]]) assert resp[0]['id'] == stix_objs[2]['id'] + assert len(resp) == 1 + # "Return any object that contains remote-access-trojan in labels" resp = ds.apply_common_filters(stix_objs, [filters[2]]) assert resp[0]['id'] == stix_objs[0]['id'] - - resp = ds.apply_common_filters(stix_objs, [filters[3]]) - assert resp[0]['id'] == stix_objs[0]['id'] assert len(resp) == 1 + # "Return any object created after 2015-01-01T01:00:00.000Z" + resp = ds.apply_common_filters(stix_objs, [filters[3]]) + assert resp[0]['id'] == stix_objs[0]['id'] + assert len(resp) == 2 + + # "Return any revoked object" resp = ds.apply_common_filters(stix_objs, [filters[4]]) assert resp[0]['id'] == stix_objs[2]['id'] assert len(resp) == 1 + # "Return any object whose not revoked" # Note that if 'revoked' property is not present in object. - # Currently we can't use such an expression to filter for... + # Currently we can't use such an expression to filter for... :( resp = ds.apply_common_filters(stix_objs, [filters[5]]) assert len(resp) == 0 + # Assert unknown operator for _boolean() raises exception. with pytest.raises(ValueError) as excinfo: ds.apply_common_filters(stix_objs, [filters[6]]) assert str(excinfo.value) == ("Error, filter operator: {0} not supported " - "for specified field: {1}").format(filters[6].op, - filters[6].field) + "for specified field: {1}" + .format(filters[6].op, filters[6].field)) + # "Return any object that matches marking-definition--613f2e26-407d-48c7-9eca-b8e91df99dc9 in object_marking_refs" resp = ds.apply_common_filters(stix_objs, [filters[7]]) assert resp[0]['id'] == stix_objs[2]['id'] assert len(resp) == 1 + # "Return any object that contains relationship_type in their selectors AND + # also has marking-definition--5e57c739-391a-4eb3-b6be-7d15ca92d5ed in marking_ref" resp = ds.apply_common_filters(stix_objs, [filters[8], filters[9]]) assert resp[0]['id'] == stix_objs[2]['id'] assert len(resp) == 1 - # These are used with STIX_OBJS2 + # "Return any object that contains CVE-2014-0160,CVE-2017-6608 in their external_id" + resp = ds.apply_common_filters(stix_objs, [filters[10]]) + assert resp[0]['id'] == stix_objs[3]['id'] + assert len(resp) == 1 + + # "Return any object that matches created_by_ref identity--00000000-0000-0000-0000-b8e91df99dc9" + resp = ds.apply_common_filters(stix_objs, [filters[11]]) + assert len(resp) == 1 + + # "Return any object that matches marking-definition--613f2e26-0000-0000-0000-b8e91df99dc9 in object_marking_refs" (None) + resp = ds.apply_common_filters(stix_objs, [filters[12]]) + assert len(resp) == 0 + + # "Return any object that contains description in its selectors" (None) + resp = ds.apply_common_filters(stix_objs, [filters[13]]) + assert len(resp) == 0 + + # "Return any object that object that matches CVE in source_name" (None, case sensitive) + resp = ds.apply_common_filters(stix_objs, [filters[14]]) + assert len(resp) == 0 + + # These filters are used with STIX_OBJS2 object collection. more_filters = [ Filter("modified", "<", "2017-01-28T13:49:53.935Z"), Filter("modified", ">", "2017-01-28T13:49:53.935Z"), @@ -339,45 +418,56 @@ def test_apply_common_filters(): Filter("notacommonproperty", "=", "bar"), ] + # "Return any object modified before 2017-01-28T13:49:53.935Z" resp = ds.apply_common_filters(STIX_OBJS2, [more_filters[0]]) assert resp[0]['id'] == STIX_OBJS2[1]['id'] assert len(resp) == 2 + # "Return any object modified after 2017-01-28T13:49:53.935Z" resp = ds.apply_common_filters(STIX_OBJS2, [more_filters[1]]) assert resp[0]['id'] == STIX_OBJS2[0]['id'] assert len(resp) == 1 + # "Return any object modified after or on 2017-01-28T13:49:53.935Z" resp = ds.apply_common_filters(STIX_OBJS2, [more_filters[2]]) assert resp[0]['id'] == STIX_OBJS2[0]['id'] assert len(resp) == 3 + # "Return any object modified before or on 2017-01-28T13:49:53.935Z" resp = ds.apply_common_filters(STIX_OBJS2, [more_filters[3]]) assert resp[0]['id'] == STIX_OBJS2[1]['id'] assert len(resp) == 2 + # Assert unknown operator for _all() raises exception. with pytest.raises(ValueError) as excinfo: ds.apply_common_filters(STIX_OBJS2, [more_filters[4]]) assert str(excinfo.value) == ("Error, filter operator: {0} not supported " - "for specified field: {1}").format(more_filters[4].op, - more_filters[4].field) + "for specified field: {1}" + .format(more_filters[4].op, + more_filters[4].field)) + # "Return any object whose id is not indicator--d81f86b8-975b-bc0b-775e-810c5ad45a4f" resp = ds.apply_common_filters(STIX_OBJS2, [more_filters[5]]) assert resp[0]['id'] == STIX_OBJS2[0]['id'] assert len(resp) == 1 + # Assert unknown operator for _id() raises exception. with pytest.raises(ValueError) as excinfo: ds.apply_common_filters(STIX_OBJS2, [more_filters[6]]) assert str(excinfo.value) == ("Error, filter operator: {0} not supported " - "for specified field: {1}").format(more_filters[6].op, - more_filters[6].field) + "for specified field: {1}" + .format(more_filters[6].op, + more_filters[6].field)) + # Assert unknown field raises exception. with pytest.raises(ValueError) as excinfo: ds.apply_common_filters(STIX_OBJS2, [more_filters[7]]) assert str(excinfo.value) == ("Error, field: {0} is not supported for " - "filtering on.".format(more_filters[7].field)) + "filtering on." + .format(more_filters[7].field)) def test_deduplicate(): @@ -409,12 +499,12 @@ def test_add_remove_composite_datasource(): assert len(cds.get_all_data_sources()) == 2 - cds.remove_data_source([ds1.id_, ds2.id_]) + cds.remove_data_source([ds1.id, ds2.id]) assert len(cds.get_all_data_sources()) == 0 with pytest.raises(ValueError): - cds.remove_data_source([ds3.id_]) + cds.remove_data_source([ds3.id]) def test_composite_datasource_operations(): @@ -448,25 +538,3 @@ def test_composite_datasource_operations(): # STIX_OBJS2 has indicator with later time, one with different id, one with # original time in STIX_OBJS1 assert len(results) == 3 - -# def test_data_source_file(): -# ds = file.FileDataSource() -# -# assert ds.name == "DataSource" -# -# -# def test_data_source_name(): -# ds = file.FileDataSource(name="My File Data Source") -# -# assert ds.name == "My File Data Source" -# -# -# def test_data_source_get(): -# ds = file.FileDataSource(name="My File Data Source") -# -# with pytest.raises(NotImplementedError): -# ds.get("foo") -# -# #filter testing -# def test_add_filter(): -# ds = file.FileDataSource()