From f9a52eeed3236c9721d889d4b32e6f48cce1c120 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 26 Feb 2021 19:19:33 -0500 Subject: [PATCH 01/10] WIP: changes to both similarity functions, expose settings --- stix2/environment.py | 18 ++-- stix2/equivalence/graph/__init__.py | 62 ++++--------- stix2/equivalence/object/__init__.py | 126 ++++++++++++++++++++------- 3 files changed, 123 insertions(+), 83 deletions(-) diff --git a/stix2/environment.py b/stix2/environment.py index d0f694e..75e5fa5 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -189,7 +189,8 @@ class Environment(DataStoreMixin): return None @staticmethod - def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): + def object_similarity(obj1, obj2, prop_scores={}, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict): """This method returns a measure of how similar the two objects are. Args: @@ -220,10 +221,12 @@ class Environment(DataStoreMixin): see `the Committee Note `__. """ - return object_similarity(obj1, obj2, prop_scores, **weight_dict) + return object_similarity(obj1, obj2, prop_scores, ignore_spec_version, + versioning_checks, max_depth, **weight_dict) @staticmethod - def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict): + def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict): """This method returns a true/false value if two objects are semantically equivalent. Internally, it calls the object_similarity function and compares it against the given threshold value. @@ -263,7 +266,8 @@ class Environment(DataStoreMixin): return object_equivalence(obj1, obj2, prop_scores, threshold, **weight_dict) @staticmethod - def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): + def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict): """This method returns a similarity score for two given graphs. Each DataStore can contain a connected or disconnected graph and the final result is weighted over the amount of objects we managed to compare. @@ -298,10 +302,12 @@ class Environment(DataStoreMixin): see `the Committee Note `__. """ - return graph_similarity(ds1, ds2, prop_scores, **weight_dict) + return graph_similarity(ds1, ds2, prop_scores, ignore_spec_version, + versioning_checks, max_depth, **weight_dict) @staticmethod - def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict): + def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict): """This method returns a true/false value if two graphs are semantically equivalent. Internally, it calls the graph_similarity function and compares it against the given threshold value. diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py index e78624e..1dcccf1 100644 --- a/stix2/equivalence/graph/__init__.py +++ b/stix2/equivalence/graph/__init__.py @@ -53,7 +53,8 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict): return False -def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): +def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict): """This method returns a similarity score for two given graphs. Each DataStore can contain a connected or disconnected graph and the final result is weighted over the amount of objects we managed to compare. @@ -65,6 +66,9 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): ds2: A DataStore object instance representing your graph prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. + ignore_spec_version: As + versioning_checks: As + max_depth: As weight_dict: A dictionary that can be used to override settings in the similarity process @@ -90,13 +94,21 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): """ results = {} similarity_score = 0 - weights = GRAPH_WEIGHTS.copy() + weights = WEIGHTS.copy() if weight_dict: weights.update(weight_dict) + weights["_internal"] = { + "ignore_spec_version": ignore_spec_version, + "versioning_checks": versioning_checks, + "ds1": ds1, + "ds2": ds2, + "max_depth": max_depth, + } + if weights["_internal"]["max_depth"] <= 0: - raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0") + raise ValueError("'max_depth' must be greater than 0") pairs = _object_pairs( _bucket_per_type(ds1.query([])), @@ -104,16 +116,15 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): weights, ) - weights["_internal"]["ds1"] = ds1 - weights["_internal"]["ds2"] = ds2 - logger.debug("Starting graph similarity process between DataStores: '%s' and '%s'", ds1.id, ds2.id) for object1, object2 in pairs: iprop_score = {} object1_id = object1["id"] object2_id = object2["id"] - result = object_similarity(object1, object2, iprop_score, **weights) + result = object_similarity(object1, object2, iprop_score, ds1, ds2, + ignore_spec_version, versioning_checks, + max_depth, **weights) if object1_id not in results: results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result} @@ -141,40 +152,3 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): similarity_score, ) return similarity_score - - -# default weights used for the graph similarity process -GRAPH_WEIGHTS = WEIGHTS.copy() -GRAPH_WEIGHTS.update({ - "grouping": { - "name": (20, partial_string_based), - "context": (20, partial_string_based), - "object_refs": (60, list_reference_check), - }, - "relationship": { - "relationship_type": (20, exact_match), - "source_ref": (40, reference_check), - "target_ref": (40, reference_check), - }, - "report": { - "name": (30, partial_string_based), - "published": (10, partial_timestamp_based), - "object_refs": (60, list_reference_check), - "tdelta": 1, # One day interval - }, - "sighting": { - "first_seen": (5, partial_timestamp_based), - "last_seen": (5, partial_timestamp_based), - "sighting_of_ref": (40, reference_check), - "observed_data_refs": (20, list_reference_check), - "where_sighted_refs": (20, list_reference_check), - "summary": (10, exact_match), - }, - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "ds1": None, - "ds2": None, - "max_depth": 1, - }, -}) # :autodoc-skip: diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index e175938..8bae111 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -4,7 +4,7 @@ import itertools import logging import time -from ...datastore import Filter +from ...datastore import Filter, DataStoreMixin, DataSink, DataSource from ...utils import STIXdatetime, parse_into_datetime from ..pattern import equivalent_patterns @@ -54,7 +54,9 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict): return False -def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): +def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, + ignore_spec_version=False, versioning_checks=False, + max_depth=1, **weight_dict): """This method returns a measure of similarity depending on how similar the two objects are. @@ -63,6 +65,11 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): obj2: A stix2 object instance prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. + ds1: As + ds2: As + ignore_spec_version: As + versioning_checks: As + max_depth: As weight_dict: A dictionary that can be used to override settings in the similarity process @@ -91,6 +98,14 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): if weight_dict: weights.update(weight_dict) + weights["_internal"] = { + "ignore_spec_version": ignore_spec_version, + "versioning_checks": versioning_checks, + "ds1": ds1, + "ds2": ds2, + "max_depth": max_depth, + } + type1, type2 = obj1["type"], obj2["type"] ignore_spec_version = weights["_internal"]["ignore_spec_version"] @@ -117,6 +132,7 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): if check_property_present(prop, obj1, obj2): w = weights[type1][prop][0] comp_funct = weights[type1][prop][1] + prop_scores[prop] = {} if comp_funct == partial_timestamp_based: contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"]) @@ -124,24 +140,30 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): threshold = weights[type1]["threshold"] contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold) elif comp_funct == reference_check or comp_funct == list_reference_check: - max_depth = weights["_internal"]["max_depth"] - if max_depth > 0: - weights["_internal"]["max_depth"] = max_depth - 1 + max_depth_i = weights["_internal"]["max_depth"] + if max_depth_i > 0: + weights["_internal"]["max_depth"] = max_depth_i - 1 ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"] - contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) + if _datastore_check(ds1, ds2): + contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) + elif comp_funct == reference_check: + comp_funct = exact_match + contributing_score = w * comp_funct(obj1[prop], obj2[prop]) + elif comp_funct == list_reference_check: + comp_funct = partial_list_based + contributing_score = w * comp_funct(obj1[prop], obj2[prop]) + prop_scores[prop]["method"] = comp_funct.__name__ else: continue # prevent excessive recursion - weights["_internal"]["max_depth"] = max_depth + weights["_internal"]["max_depth"] = max_depth_i else: contributing_score = w * comp_funct(obj1[prop], obj2[prop]) sum_weights += w matching_score += contributing_score - prop_scores[prop] = { - "weight": w, - "contributing_score": contributing_score, - } + prop_scores[prop]["weight"] = w + prop_scores[prop]["contributing_score"] = contributing_score logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score) prop_scores["matching_score"] = matching_score @@ -196,7 +218,9 @@ def partial_timestamp_based(t1, t2, tdelta): def partial_list_based(l1, l2): - """Performs a partial list matching via finding the intersection between common values. + """Performs a partial list matching via finding the intersection between + common values. Repeated values are counted only once. This method can be + used for *_refs equality checks when de-reference is not possible. Args: l1: A list of values. @@ -213,7 +237,8 @@ def partial_list_based(l1, l2): def exact_match(val1, val2): - """Performs an exact value match based on two values + """Performs an exact value match based on two values. This method can be + used for *_ref equality check when de-reference is not possible. Args: val1: A value suitable for an equality test. @@ -275,15 +300,8 @@ def partial_external_reference_based(refs1, refs2): allowed = {"veris", "cve", "capec", "mitre-attack"} matches = 0 - if len(refs1) >= len(refs2): - l1 = refs1 - l2 = refs2 - else: - l1 = refs2 - l2 = refs1 - - for ext_ref1 in l1: - for ext_ref2 in l2: + for ext_ref1 in refs1: + for ext_ref2 in refs2: sn_match = False ei_match = False url_match = False @@ -352,17 +370,21 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights): """Checks multiple object versions if present in graph. Maximizes for the similarity score of a particular version.""" results = {} - objects1 = ds1.query([Filter("id", "=", ref1)]) - objects2 = ds2.query([Filter("id", "=", ref2)]) pairs = _object_pairs( - _bucket_per_type(objects1), - _bucket_per_type(objects2), + _bucket_per_type(ds1.query([Filter("id", "=", ref1)])), + _bucket_per_type(ds2.query([Filter("id", "=", ref2)])), weights, ) + ignore_spec_version = weights["_internal"]["ignore_spec_version"] + versioning_checks = weights["_internal"]["versioning_checks"] + max_depth = weights["_internal"]["max_depth"] for object1, object2 in pairs: - result = object_similarity(object1, object2, **weights) + result = object_similarity(object1, object2, ds1=ds1, ds2=ds2, + ignore_spec_version=ignore_spec_version, + versioning_checks=versioning_checks, + max_depth=max_depth, **weights) if ref1 not in results: results[ref1] = {"matched": ref2, "value": result} elif result > results[ref1]["value"]: @@ -383,12 +405,18 @@ def reference_check(ref1, ref2, ds1, ds2, **weights): result = 0.0 if type1 == type2 and type1 in weights: - if weights["_internal"]["versioning_checks"]: + ignore_spec_version = weights["_internal"]["ignore_spec_version"] + versioning_checks = weights["_internal"]["versioning_checks"] + max_depth = weights["_internal"]["max_depth"] + if versioning_checks: result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0 else: o1, o2 = ds1.get(ref1), ds2.get(ref2) if o1 and o2: - result = object_similarity(o1, o2, **weights) / 100.0 + result = object_similarity(o1, o2, ds1=ds1, ds2=ds2, + ignore_spec_version=ignore_spec_version, + versioning_checks=versioning_checks, + max_depth=max_depth, **weights) / 100.0 logger.debug( "--\t\treference_check '%s' '%s'\tresult: '%s'", @@ -439,6 +467,13 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights): return result +def _datastore_check(ds1, ds2): + if (issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or + issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource))): + return True + return False + + def _bucket_per_type(graph, mode="type"): """Given a list of objects or references, bucket them by type. Depending on the list type: extract from 'type' property or using @@ -480,11 +515,20 @@ WEIGHTS = { "name": (60, partial_string_based), "external_references": (40, partial_external_reference_based), }, + "grouping": { + "name": (20, partial_string_based), + "context": (20, partial_string_based), + "object_refs": (60, list_reference_check), + }, "identity": { "name": (60, partial_string_based), "identity_class": (20, exact_match), "sectors": (20, partial_list_based), }, + "incident": { + "name": (60, partial_string_based), + "external_references": (40, partial_external_reference_based), + }, "indicator": { "indicator_types": (15, partial_list_based), "pattern": (80, custom_pattern_based), @@ -511,6 +555,25 @@ WEIGHTS = { "definition": (60, exact_match), "definition_type": (20, exact_match), }, + "relationship": { + "relationship_type": (20, exact_match), + "source_ref": (40, reference_check), + "target_ref": (40, reference_check), + }, + "report": { + "name": (30, partial_string_based), + "published": (10, partial_timestamp_based), + "object_refs": (60, list_reference_check), + "tdelta": 1, # One day interval + }, + "sighting": { + "first_seen": (5, partial_timestamp_based), + "last_seen": (5, partial_timestamp_based), + "sighting_of_ref": (40, reference_check), + "observed_data_refs": (20, list_reference_check), + "where_sighted_refs": (20, list_reference_check), + "summary": (10, exact_match), + }, "threat-actor": { "name": (60, partial_string_based), "threat_actor_types": (20, partial_list_based), @@ -523,8 +586,5 @@ WEIGHTS = { "vulnerability": { "name": (30, partial_string_based), "external_references": (70, partial_external_reference_based), - }, - "_internal": { - "ignore_spec_version": False, - }, + } } # :autodoc-skip: From ff5014c606858053ad6eee6a13438a67dffe388f Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 1 Mar 2021 12:27:52 -0500 Subject: [PATCH 02/10] expose configuration options, combine weight dictionary, update tests --- docs/conf.py | 9 +- stix2/environment.py | 109 +++++++++++++---- stix2/equivalence/graph/__init__.py | 60 +++++++--- stix2/equivalence/object/__init__.py | 169 +++++++++++++++----------- stix2/test/v20/test_environment.py | 143 +++------------------- stix2/test/v21/test_environment.py | 172 ++++++--------------------- 6 files changed, 284 insertions(+), 378 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 5d12af3..62e829d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -66,16 +66,9 @@ object_default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n ') object_default_sem_eq_weights = object_default_sem_eq_weights.replace(' "', ' ') object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n') -with open('object_default_sem_eq_weights.rst', 'w') as f: +with open('similarity_weights.rst', 'w') as f: f.write(".. code-block:: python\n\n {}\n\n".format(object_default_sem_eq_weights)) -graph_default_sem_eq_weights = json.dumps(GRAPH_WEIGHTS, indent=4, default=lambda o: o.__name__) -graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('\n', '\n ') -graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace(' "', ' ') -graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('"\n', '\n') -with open('graph_default_sem_eq_weights.rst', 'w') as f: - f.write(".. code-block:: python\n\n {}\n\n".format(graph_default_sem_eq_weights)) - def get_property_type(prop): """Convert property classname into pretty string name of property. diff --git a/stix2/environment.py b/stix2/environment.py index 75e5fa5..b37b485 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -189,8 +189,11 @@ class Environment(DataStoreMixin): return None @staticmethod - def object_similarity(obj1, obj2, prop_scores={}, ignore_spec_version=False, - versioning_checks=False, max_depth=1, **weight_dict): + def object_similarity( + obj1, obj2, prop_scores={}, ds1=None, ds2=None, + ignore_spec_version=False, versioning_checks=False, + max_depth=1, **weight_dict + ): """This method returns a measure of how similar the two objects are. Args: @@ -198,8 +201,19 @@ class Environment(DataStoreMixin): obj2: A stix2 object instance prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. - weight_dict: A dictionary that can be used to override settings - in the similarity process + ds1: A DataStore object instance representing your graph + ds2: A DataStore object instance representing your graph + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: float: A number between 0.0 and 100.0 as a measurement of similarity. @@ -221,12 +235,17 @@ class Environment(DataStoreMixin): see `the Committee Note `__. """ - return object_similarity(obj1, obj2, prop_scores, ignore_spec_version, - versioning_checks, max_depth, **weight_dict) + return object_similarity( + obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version, + versioning_checks, max_depth, **weight_dict + ) @staticmethod - def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, ignore_spec_version=False, - versioning_checks=False, max_depth=1, **weight_dict): + def object_equivalence( + obj1, obj2, prop_scores={}, threshold=70, ds1=None, ds2=None, + ignore_spec_version=False, versioning_checks=False, + max_depth=1, **weight_dict + ): """This method returns a true/false value if two objects are semantically equivalent. Internally, it calls the object_similarity function and compares it against the given threshold value. @@ -239,8 +258,19 @@ class Environment(DataStoreMixin): threshold: A numerical value between 0 and 100 to determine the minimum score to result in successfully calling both objects equivalent. This value can be tuned. - weight_dict: A dictionary that can be used to override settings - in the similarity process + ds1: A DataStore object instance representing your graph + ds2: A DataStore object instance representing your graph + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: bool: True if the result of the object similarity is greater than or equal to @@ -263,11 +293,16 @@ class Environment(DataStoreMixin): see `the Committee Note `__. """ - return object_equivalence(obj1, obj2, prop_scores, threshold, **weight_dict) + return object_equivalence( + obj1, obj2, prop_scores, threshold, ds1, ds2, + ignore_spec_version, versioning_checks, max_depth, **weight_dict + ) @staticmethod - def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, - versioning_checks=False, max_depth=1, **weight_dict): + def graph_similarity( + ds1, ds2, prop_scores={}, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict + ): """This method returns a similarity score for two given graphs. Each DataStore can contain a connected or disconnected graph and the final result is weighted over the amount of objects we managed to compare. @@ -279,8 +314,17 @@ class Environment(DataStoreMixin): ds2: A DataStore object instance representing your graph prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. - weight_dict: A dictionary that can be used to override settings - in the similarity process + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: float: A number between 0.0 and 100.0 as a measurement of similarity. @@ -295,19 +339,24 @@ class Environment(DataStoreMixin): Note: Default weight_dict: - .. include:: ../graph_default_sem_eq_weights.rst + .. include:: ../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. see `the Committee Note `__. """ - return graph_similarity(ds1, ds2, prop_scores, ignore_spec_version, - versioning_checks, max_depth, **weight_dict) + return graph_similarity( + ds1, ds2, prop_scores, ignore_spec_version, + versioning_checks, max_depth, **weight_dict + ) @staticmethod - def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, ignore_spec_version=False, - versioning_checks=False, max_depth=1, **weight_dict): + def graph_equivalence( + ds1, ds2, prop_scores={}, threshold=70, + ignore_spec_version=False, versioning_checks=False, + max_depth=1, **weight_dict + ): """This method returns a true/false value if two graphs are semantically equivalent. Internally, it calls the graph_similarity function and compares it against the given threshold value. @@ -320,8 +369,17 @@ class Environment(DataStoreMixin): threshold: A numerical value between 0 and 100 to determine the minimum score to result in successfully calling both graphs equivalent. This value can be tuned. - weight_dict: A dictionary that can be used to override settings - in the similarity process + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: bool: True if the result of the graph similarity is greater than or equal to @@ -337,11 +395,14 @@ class Environment(DataStoreMixin): Note: Default weight_dict: - .. include:: ../graph_default_sem_eq_weights.rst + .. include:: ../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. see `the Committee Note `__. """ - return graph_equivalence(ds1, ds2, prop_scores, threshold, **weight_dict) + return graph_equivalence( + ds1, ds2, prop_scores, threshold, ignore_spec_version, + versioning_checks, max_depth, **weight_dict + ) diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py index 1dcccf1..1d43219 100644 --- a/stix2/equivalence/graph/__init__.py +++ b/stix2/equivalence/graph/__init__.py @@ -10,7 +10,11 @@ from ..object import ( logger = logging.getLogger(__name__) -def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict): +def graph_equivalence( + ds1, ds2, prop_scores={}, threshold=70, + ignore_spec_version=False, versioning_checks=False, + max_depth=1, **weight_dict +): """This method returns a true/false value if two graphs are semantically equivalent. Internally, it calls the graph_similarity function and compares it against the given threshold value. @@ -23,8 +27,17 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict): threshold: A numerical value between 0 and 100 to determine the minimum score to result in successfully calling both graphs equivalent. This value can be tuned. - weight_dict: A dictionary that can be used to override settings - in the similarity process + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: bool: True if the result of the graph similarity is greater than or equal to @@ -40,21 +53,26 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict): Note: Default weight_dict: - .. include:: ../../graph_default_sem_eq_weights.rst + .. include:: ../../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. see `the Committee Note `__. """ - similarity_result = graph_similarity(ds1, ds2, prop_scores, **weight_dict) + similarity_result = graph_similarity( + ds1, ds2, prop_scores, ignore_spec_version, + versioning_checks, max_depth, **weight_dict + ) if similarity_result >= threshold: return True return False -def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, - versioning_checks=False, max_depth=1, **weight_dict): +def graph_similarity( + ds1, ds2, prop_scores={}, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict +): """This method returns a similarity score for two given graphs. Each DataStore can contain a connected or disconnected graph and the final result is weighted over the amount of objects we managed to compare. @@ -66,11 +84,17 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, ds2: A DataStore object instance representing your graph prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. - ignore_spec_version: As - versioning_checks: As - max_depth: As - weight_dict: A dictionary that can be used to override settings - in the similarity process + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: float: A number between 0.0 and 100.0 as a measurement of similarity. @@ -85,7 +109,7 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, Note: Default weight_dict: - .. include:: ../../graph_default_sem_eq_weights.rst + .. include:: ../../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. @@ -107,7 +131,7 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, "max_depth": max_depth, } - if weights["_internal"]["max_depth"] <= 0: + if max_depth <= 0: raise ValueError("'max_depth' must be greater than 0") pairs = _object_pairs( @@ -122,9 +146,11 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False, object1_id = object1["id"] object2_id = object2["id"] - result = object_similarity(object1, object2, iprop_score, ds1, ds2, - ignore_spec_version, versioning_checks, - max_depth, **weights) + result = object_similarity( + object1, object2, iprop_score, ds1, ds2, + ignore_spec_version, versioning_checks, + max_depth, **weights + ) if object1_id not in results: results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result} diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index 8bae111..71a263c 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -4,14 +4,18 @@ import itertools import logging import time -from ...datastore import Filter, DataStoreMixin, DataSink, DataSource +from ...datastore import DataSink, DataSource, DataStoreMixin, Filter from ...utils import STIXdatetime, parse_into_datetime from ..pattern import equivalent_patterns logger = logging.getLogger(__name__) -def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict): +def object_equivalence( + obj1, obj2, prop_scores={}, threshold=70, ds1=None, + ds2=None, ignore_spec_version=False, + versioning_checks=False, max_depth=1, **weight_dict +): """This method returns a true/false value if two objects are semantically equivalent. Internally, it calls the object_similarity function and compares it against the given threshold value. @@ -24,8 +28,19 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict): threshold: A numerical value between 0 and 100 to determine the minimum score to result in successfully calling both objects equivalent. This value can be tuned. - weight_dict: A dictionary that can be used to override settings - in the similarity process + ds1: A DataStore object instance representing your graph + ds2: A DataStore object instance representing your graph + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: bool: True if the result of the object similarity is greater than or equal to @@ -41,22 +56,27 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict): Note: Default weight_dict: - .. include:: ../../object_default_sem_eq_weights.rst + .. include:: ../../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. see `the Committee Note `__. """ - similarity_result = object_similarity(obj1, obj2, prop_scores, **weight_dict) + similarity_result = object_similarity( + obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version, + versioning_checks, max_depth, **weight_dict + ) if similarity_result >= threshold: return True return False -def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, - ignore_spec_version=False, versioning_checks=False, - max_depth=1, **weight_dict): +def object_similarity( + obj1, obj2, prop_scores={}, ds1=None, ds2=None, + ignore_spec_version=False, versioning_checks=False, + max_depth=1, **weight_dict +): """This method returns a measure of similarity depending on how similar the two objects are. @@ -65,13 +85,19 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, obj2: A stix2 object instance prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. - ds1: As - ds2: As - ignore_spec_version: As - versioning_checks: As - max_depth: As - weight_dict: A dictionary that can be used to override settings - in the similarity process + ds1: A DataStore object instance representing your graph + ds2: A DataStore object instance representing your graph + ignore_spec_version: A boolean indicating whether to test object types + that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). + If set to True this check will be skipped. + versioning_checks: A boolean indicating whether to test multiple revisions + of the same object (when present) to maximize similarity against a + particular version. If set to True the algorithm will perform this step. + max_depth: A positive integer indicating the maximum recursion depth the + algorithm can reach when de-referencing objects and performing the + object_similarity algorithm. + weight_dict: A dictionary that can be used to override what checks are done + to objects in the similarity process. Returns: float: A number between 0.0 and 100.0 as a measurement of similarity. @@ -86,7 +112,7 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, Note: Default weight_dict: - .. include:: ../../object_default_sem_eq_weights.rst + .. include:: ../../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. @@ -107,7 +133,6 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, } type1, type2 = obj1["type"], obj2["type"] - ignore_spec_version = weights["_internal"]["ignore_spec_version"] if type1 != type2: raise ValueError('The objects to compare must be of the same type!') @@ -140,9 +165,8 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, threshold = weights[type1]["threshold"] contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold) elif comp_funct == reference_check or comp_funct == list_reference_check: - max_depth_i = weights["_internal"]["max_depth"] - if max_depth_i > 0: - weights["_internal"]["max_depth"] = max_depth_i - 1 + if max_depth > 0: + weights["_internal"]["max_depth"] = max_depth - 1 ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"] if _datastore_check(ds1, ds2): contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) @@ -155,7 +179,7 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, prop_scores[prop]["method"] = comp_funct.__name__ else: continue # prevent excessive recursion - weights["_internal"]["max_depth"] = max_depth_i + weights["_internal"]["max_depth"] = max_depth else: contributing_score = w * comp_funct(obj1[prop], obj2[prop]) @@ -187,7 +211,7 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None, def check_property_present(prop, obj1, obj2): """Helper method checks if a property is present on both objects.""" if prop == "longitude_latitude": - if all(x in obj1 and x in obj2 for x in ['latitude', 'longitude']): + if all(x in obj1 and x in obj2 for x in ('latitude', 'longitude')): return True elif prop in obj1 and prop in obj2: return True @@ -286,12 +310,12 @@ def custom_pattern_based(pattern1, pattern2): return equivalent_patterns(pattern1, pattern2) -def partial_external_reference_based(refs1, refs2): +def partial_external_reference_based(ext_refs1, ext_refs2): """Performs a matching on External References. Args: - refs1: A list of external references. - refs2: A list of external references. + ext_refs1: A list of external references. + ext_refs2: A list of external references. Returns: float: Number between 0.0 and 1.0 depending on matches. @@ -300,44 +324,47 @@ def partial_external_reference_based(refs1, refs2): allowed = {"veris", "cve", "capec", "mitre-attack"} matches = 0 - for ext_ref1 in refs1: - for ext_ref2 in refs2: - sn_match = False - ei_match = False - url_match = False - source_name = None + ref_pairs = itertools.chain( + itertools.product(ext_refs1, ext_refs2), + ) - if check_property_present("source_name", ext_ref1, ext_ref2): - if ext_ref1["source_name"] == ext_ref2["source_name"]: - source_name = ext_ref1["source_name"] - sn_match = True - if check_property_present("external_id", ext_ref1, ext_ref2): - if ext_ref1["external_id"] == ext_ref2["external_id"]: - ei_match = True - if check_property_present("url", ext_ref1, ext_ref2): - if ext_ref1["url"] == ext_ref2["url"]: - url_match = True + for ext_ref1, ext_ref2 in ref_pairs: + sn_match = False + ei_match = False + url_match = False + source_name = None - # Special case: if source_name is a STIX defined name and either - # external_id or url match then its a perfect match and other entries - # can be ignored. - if sn_match and (ei_match or url_match) and source_name in allowed: - result = 1.0 - logger.debug( - "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", - refs1, refs2, result, - ) - return result + if check_property_present("source_name", ext_ref1, ext_ref2): + if ext_ref1["source_name"] == ext_ref2["source_name"]: + source_name = ext_ref1["source_name"] + sn_match = True + if check_property_present("external_id", ext_ref1, ext_ref2): + if ext_ref1["external_id"] == ext_ref2["external_id"]: + ei_match = True + if check_property_present("url", ext_ref1, ext_ref2): + if ext_ref1["url"] == ext_ref2["url"]: + url_match = True - # Regular check. If the source_name (not STIX-defined) or external_id or - # url matches then we consider the entry a match. - if (sn_match or ei_match or url_match) and source_name not in allowed: - matches += 1 + # Special case: if source_name is a STIX defined name and either + # external_id or url match then its a perfect match and other entries + # can be ignored. + if sn_match and (ei_match or url_match) and source_name in allowed: + result = 1.0 + logger.debug( + "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", + ext_refs1, ext_refs2, result, + ) + return result - result = matches / max(len(refs1), len(refs2)) + # Regular check. If the source_name (not STIX-defined) or external_id or + # url matches then we consider the entry a match. + if (sn_match or ei_match or url_match) and source_name not in allowed: + matches += 1 + + result = matches / max(len(ext_refs1), len(ext_refs2)) logger.debug( "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", - refs1, refs2, result, + ext_refs1, ext_refs2, result, ) return result @@ -381,10 +408,11 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights): max_depth = weights["_internal"]["max_depth"] for object1, object2 in pairs: - result = object_similarity(object1, object2, ds1=ds1, ds2=ds2, - ignore_spec_version=ignore_spec_version, - versioning_checks=versioning_checks, - max_depth=max_depth, **weights) + result = object_similarity( + object1, object2, ds1, ds2, + ignore_spec_version, versioning_checks, + max_depth, **weights + ) if ref1 not in results: results[ref1] = {"matched": ref2, "value": result} elif result > results[ref1]["value"]: @@ -413,10 +441,11 @@ def reference_check(ref1, ref2, ds1, ds2, **weights): else: o1, o2 = ds1.get(ref1), ds2.get(ref2) if o1 and o2: - result = object_similarity(o1, o2, ds1=ds1, ds2=ds2, - ignore_spec_version=ignore_spec_version, - versioning_checks=versioning_checks, - max_depth=max_depth, **weights) / 100.0 + result = object_similarity( + o1, o2, ds1, ds2, + ignore_spec_version, versioning_checks, + max_depth, **weights + ) / 100.0 logger.debug( "--\t\treference_check '%s' '%s'\tresult: '%s'", @@ -468,8 +497,10 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights): def _datastore_check(ds1, ds2): - if (issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or - issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource))): + if ( + issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or + issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource)) + ): return True return False @@ -586,5 +617,5 @@ WEIGHTS = { "vulnerability": { "name": (30, partial_string_based), "external_references": (70, partial_external_reference_based), - } + }, } # :autodoc-skip: diff --git a/stix2/test/v20/test_environment.py b/stix2/test/v20/test_environment.py index 33e0985..c8867b0 100644 --- a/stix2/test/v20/test_environment.py +++ b/stix2/test/v20/test_environment.py @@ -424,7 +424,7 @@ def test_related_to_by_target(ds): def test_versioned_checks(ds, ds2): - weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights = stix2.equivalence.graph.WEIGHTS.copy() weights.update({ "_internal": { "ignore_spec_version": True, @@ -437,7 +437,7 @@ def test_versioned_checks(ds, ds2): def test_semantic_check_with_versioning(ds, ds2): - weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights = stix2.equivalence.graph.WEIGHTS.copy() weights.update({ "_internal": { "ignore_spec_version": False, @@ -467,13 +467,11 @@ def test_semantic_check_with_versioning(ds, ds2): def test_list_semantic_check(ds, ds2): - weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights = stix2.equivalence.graph.WEIGHTS.copy() weights.update({ "_internal": { "ignore_spec_version": False, "versioning_checks": False, - "ds1": ds, - "ds2": ds2, "max_depth": 1, }, }) @@ -504,39 +502,18 @@ def test_list_semantic_check(ds, ds2): def test_graph_similarity_raises_value_error(ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": -1, - }, - } with pytest.raises(ValueError): prop_scores1 = {} - stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights) + stix2.Environment().graph_similarity(ds, ds2, prop_scores1, max_depth=-1) def test_graph_similarity_with_filesystem_source(ds, fs): - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, **weights) + env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, ignore_spec_version=True) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, **weights) + env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, ignore_spec_version=True) assert round(env1) == 25 assert round(prop_scores1["matching_score"]) == 451 @@ -552,41 +529,20 @@ def test_graph_similarity_with_filesystem_source(ds, fs): def test_graph_similarity_with_duplicate_graph(ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores = {} - env = stix2.Environment().graph_similarity(ds, ds, prop_scores, **weights) + env = stix2.Environment().graph_similarity(ds, ds, prop_scores) assert round(env) == 100 assert round(prop_scores["matching_score"]) == 800 assert round(prop_scores["len_pairs"]) == 8 def test_graph_similarity_with_versioning_check_on(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, versioning_checks=True) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, versioning_checks=True) assert round(env1) == 88 assert round(prop_scores1["matching_score"]) == 789 @@ -602,26 +558,12 @@ def test_graph_similarity_with_versioning_check_on(ds2, ds): def test_graph_similarity_with_versioning_check_off(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2) assert round(env1) == 88 assert round(prop_scores1["matching_score"]) == 789 @@ -637,26 +579,12 @@ def test_graph_similarity_with_versioning_check_off(ds2, ds): def test_graph_equivalence_with_filesystem_source(ds, fs): - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, **weights) + env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, ignore_spec_version=True) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, **weights) + env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, ignore_spec_version=True) assert env1 is False assert round(prop_scores1["matching_score"]) == 451 @@ -672,41 +600,20 @@ def test_graph_equivalence_with_filesystem_source(ds, fs): def test_graph_equivalence_with_duplicate_graph(ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores = {} - env = stix2.Environment().graph_equivalence(ds, ds, prop_scores, **weights) + env = stix2.Environment().graph_equivalence(ds, ds, prop_scores) assert env is True assert round(prop_scores["matching_score"]) == 800 assert round(prop_scores["len_pairs"]) == 8 def test_graph_equivalence_with_versioning_check_on(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, versioning_checks=True) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, versioning_checks=True) assert env1 is True assert round(prop_scores1["matching_score"]) == 789 @@ -722,26 +629,12 @@ def test_graph_equivalence_with_versioning_check_on(ds2, ds): def test_graph_equivalence_with_versioning_check_off(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2) assert env1 is True assert round(prop_scores1["matching_score"]) == 789 diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py index e7bf4da..6a14bf3 100644 --- a/stix2/test/v21/test_environment.py +++ b/stix2/test/v21/test_environment.py @@ -760,16 +760,13 @@ def test_object_similarity_different_spec_version(): "valid_from": (5, stix2.equivalence.object.partial_timestamp_based), "tdelta": 1, # One day interval }, - "_internal": { - "ignore_spec_version": True, # Disables spec_version check. - }, } ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS) ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **IND_KWARGS) - env = stix2.Environment().object_similarity(ind1, ind2, **weights) + env = stix2.Environment().object_similarity(ind1, ind2, ignore_spec_version=True, **weights) assert round(env) == 0 - env = stix2.Environment().object_similarity(ind2, ind1, **weights) + env = stix2.Environment().object_similarity(ind2, ind1, ignore_spec_version=True, **weights) assert round(env) == 0 @@ -861,7 +858,9 @@ def test_object_similarity_exact_match(): def test_non_existent_config_for_object(): r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS) r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS) - assert stix2.Environment().object_similarity(r1, r2) == 0.0 + prop_scores = {} + assert stix2.Environment().object_similarity(r1, r2, prop_scores) == 100.0 + assert prop_scores["object_refs"]["method"] == "partial_list_based" def custom_semantic_equivalence_method(obj1, obj2, **weights): @@ -937,7 +936,8 @@ def test_object_similarity_prop_scores_method_provided(): def test_versioned_checks(ds, ds2): - weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + # Testing internal method + weights = stix2.equivalence.graph.WEIGHTS.copy() weights.update({ "_internal": { "ignore_spec_version": True, @@ -950,7 +950,7 @@ def test_versioned_checks(ds, ds2): def test_semantic_check_with_versioning(ds, ds2): - weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights = stix2.equivalence.graph.WEIGHTS.copy() weights.update({ "_internal": { "ignore_spec_version": False, @@ -981,7 +981,7 @@ def test_semantic_check_with_versioning(ds, ds2): def test_list_semantic_check(ds, ds2): - weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights = stix2.equivalence.graph.WEIGHTS.copy() weights.update({ "_internal": { "ignore_spec_version": False, @@ -1027,39 +1027,28 @@ def test_list_semantic_check(ds, ds2): def test_graph_similarity_raises_value_error(ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": -1, - }, - } with pytest.raises(ValueError): prop_scores1 = {} - stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights) + stix2.Environment().graph_similarity(ds, ds2, prop_scores1, max_depth=-1) def test_graph_similarity_with_filesystem_source(ds, fs): - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, **weights) + env1 = stix2.Environment().graph_similarity( + fs, ds, prop_scores1, + ignore_spec_version=True, + versioning_checks=False, + max_depth=1, + ) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, **weights) + env2 = stix2.Environment().graph_similarity( + ds, fs, prop_scores2, + ignore_spec_version=True, + versioning_checks=False, + max_depth=1, + ) assert round(env1) == 23 assert round(prop_scores1["matching_score"]) == 411 @@ -1154,14 +1143,11 @@ def test_depth_limiting(): "some2_ref": (33, stix2.equivalence.object.reference_check), "name": (34, stix2.equivalence.object.partial_string_based), }, - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, } prop_scores1 = {} - env1 = stix2.equivalence.graph.graph_similarity(mem_store1, mem_store2, prop_scores1, **custom_weights) + env1 = stix2.equivalence.graph.graph_similarity( + mem_store1, mem_store2, prop_scores1, **custom_weights + ) assert round(env1) == 38 assert round(prop_scores1["matching_score"]) == 300 @@ -1185,44 +1171,23 @@ def test_depth_limiting(): def test_graph_similarity_with_duplicate_graph(ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores = {} - env = stix2.Environment().graph_similarity(ds, ds, prop_scores, **weights) + env = stix2.Environment().graph_similarity(ds, ds, prop_scores) assert round(env) == 100 assert round(prop_scores["matching_score"]) == 800 assert round(prop_scores["len_pairs"]) == 8 def test_graph_similarity_with_versioning_check_on(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, versioning_checks=True) assert round(env1) == 88 assert round(prop_scores1["matching_score"]) == 789 assert round(prop_scores1["len_pairs"]) == 9 # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, versioning_checks=True) assert round(env2) == 88 assert round(prop_scores2["matching_score"]) == 789 assert round(prop_scores2["len_pairs"]) == 9 @@ -1233,29 +1198,15 @@ def test_graph_similarity_with_versioning_check_on(ds2, ds): def test_graph_similarity_with_versioning_check_off(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1) assert round(env1) == 88 assert round(prop_scores1["matching_score"]) == 789 assert round(prop_scores1["len_pairs"]) == 9 # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2) assert round(env2) == 88 assert round(prop_scores2["matching_score"]) == 789 assert round(prop_scores2["len_pairs"]) == 9 @@ -1266,26 +1217,12 @@ def test_graph_similarity_with_versioning_check_off(ds2, ds): def test_graph_equivalence_with_filesystem_source(ds, fs): - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, **weights) + env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, ignore_spec_version=True) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": True, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, **weights) + env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, ignore_spec_version=True) assert env1 is False assert round(prop_scores1["matching_score"]) == 411 @@ -1301,41 +1238,20 @@ def test_graph_equivalence_with_filesystem_source(ds, fs): def test_graph_equivalence_with_duplicate_graph(ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores = {} - env = stix2.Environment().graph_equivalence(ds, ds, prop_scores, **weights) + env = stix2.Environment().graph_equivalence(ds, ds, prop_scores) assert env is True assert round(prop_scores["matching_score"]) == 800 assert round(prop_scores["len_pairs"]) == 8 def test_graph_equivalence_with_versioning_check_on(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, versioning_checks=True) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": True, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, versioning_checks=True) assert env1 is True assert round(prop_scores1["matching_score"]) == 789 @@ -1351,26 +1267,12 @@ def test_graph_equivalence_with_versioning_check_on(ds2, ds): def test_graph_equivalence_with_versioning_check_off(ds2, ds): - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores1 = {} - env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights) + env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1) # Switching parameters - weights = { - "_internal": { - "ignore_spec_version": False, - "versioning_checks": False, - "max_depth": 1, - }, - } prop_scores2 = {} - env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights) + env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2) assert env1 is True assert round(prop_scores1["matching_score"]) == 789 From d2e867b52ead1793a32e021f7583ec37a6232bc3 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 1 Mar 2021 12:29:33 -0500 Subject: [PATCH 03/10] docstring corrections --- .gitignore | 3 +-- stix2/environment.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 72b31cd..4d16202 100644 --- a/.gitignore +++ b/.gitignore @@ -55,8 +55,7 @@ coverage.xml # Sphinx documentation docs/_build/ .ipynb_checkpoints -graph_default_sem_eq_weights.rst -object_default_sem_eq_weights.rst +similarity_weights.rst # PyBuilder target/ diff --git a/stix2/environment.py b/stix2/environment.py index b37b485..2905b9e 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -228,7 +228,7 @@ class Environment(DataStoreMixin): Note: Default weight_dict: - .. include:: ../object_default_sem_eq_weights.rst + .. include:: ../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. @@ -286,7 +286,7 @@ class Environment(DataStoreMixin): Note: Default weight_dict: - .. include:: ../object_default_sem_eq_weights.rst + .. include:: ../similarity_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. From 3efa4c1ce980afd54f21d5a77471b3d8eae3c464 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 1 Mar 2021 12:44:35 -0500 Subject: [PATCH 04/10] revert part changes --- stix2/equivalence/object/__init__.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index 71a263c..81bf23c 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -176,7 +176,7 @@ def object_similarity( elif comp_funct == list_reference_check: comp_funct = partial_list_based contributing_score = w * comp_funct(obj1[prop], obj2[prop]) - prop_scores[prop]["method"] = comp_funct.__name__ + prop_scores[prop]["check_type"] = comp_funct.__name__ else: continue # prevent excessive recursion weights["_internal"]["max_depth"] = max_depth @@ -409,10 +409,11 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights): for object1, object2 in pairs: result = object_similarity( - object1, object2, ds1, ds2, - ignore_spec_version, versioning_checks, - max_depth, **weights - ) + object1, object2, ds1=ds1, ds2=ds2, + ignore_spec_version=ignore_spec_version, + versioning_checks=versioning_checks, + max_depth=max_depth, **weights + ) if ref1 not in results: results[ref1] = {"matched": ref2, "value": result} elif result > results[ref1]["value"]: @@ -442,9 +443,10 @@ def reference_check(ref1, ref2, ds1, ds2, **weights): o1, o2 = ds1.get(ref1), ds2.get(ref2) if o1 and o2: result = object_similarity( - o1, o2, ds1, ds2, - ignore_spec_version, versioning_checks, - max_depth, **weights + o1, o2, ds1=ds1, ds2=ds2, + ignore_spec_version=ignore_spec_version, + versioning_checks=versioning_checks, + max_depth=max_depth, **weights ) / 100.0 logger.debug( From e4e6f46089db5c30698435a0e5b692faaf33b47a Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 1 Mar 2021 12:54:01 -0500 Subject: [PATCH 05/10] change key name for _refs check --- stix2/test/v21/test_environment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py index 6a14bf3..2b781f4 100644 --- a/stix2/test/v21/test_environment.py +++ b/stix2/test/v21/test_environment.py @@ -860,7 +860,7 @@ def test_non_existent_config_for_object(): r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS) prop_scores = {} assert stix2.Environment().object_similarity(r1, r2, prop_scores) == 100.0 - assert prop_scores["object_refs"]["method"] == "partial_list_based" + assert prop_scores["object_refs"]["check_type"] == "partial_list_based" def custom_semantic_equivalence_method(obj1, obj2, **weights): From 72a12e96ba23f4da68d1cc2af5fd7b60481b5e0f Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 1 Mar 2021 13:14:03 -0500 Subject: [PATCH 06/10] update guide example, conf.py and remove some unused imports --- docs/conf.py | 1 - docs/guide/equivalence.ipynb | 11 +---------- stix2/equivalence/graph/__init__.py | 4 +--- stix2/equivalence/object/__init__.py | 6 +++--- 4 files changed, 5 insertions(+), 17 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 62e829d..b6dd6ea 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -7,7 +7,6 @@ import sys from sphinx.ext.autodoc import ClassDocumenter from stix2.base import _STIXBase -from stix2.equivalence.graph import GRAPH_WEIGHTS from stix2.equivalence.object import WEIGHTS from stix2.version import __version__ diff --git a/docs/guide/equivalence.ipynb b/docs/guide/equivalence.ipynb index e61e9ed..9e9c679 100644 --- a/docs/guide/equivalence.ipynb +++ b/docs/guide/equivalence.ipynb @@ -4607,20 +4607,11 @@ " ),\n", "]\n", "\n", - "\n", - "weights = {\n", - " \"_internal\": {\n", - " \"ignore_spec_version\": False,\n", - " \"versioning_checks\": False,\n", - " \"max_depth\": 1,\n", - " },\n", - "}\n", - "\n", "memstore1 = MemoryStore(g1)\n", "memstore2 = MemoryStore(g2)\n", "prop_scores = {}\n", "\n", - "similarity_result = env.graph_similarity(memstore1, memstore2, prop_scores, **weights)\n", + "similarity_result = env.graph_similarity(memstore1, memstore2, prop_scores)\n", "equivalence_result = env.graph_equivalence(memstore1, memstore2, threshold=60)\n", "\n", "print(similarity_result)\n", diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py index 1d43219..1f46fd3 100644 --- a/stix2/equivalence/graph/__init__.py +++ b/stix2/equivalence/graph/__init__.py @@ -2,9 +2,7 @@ import logging from ..object import ( - WEIGHTS, _bucket_per_type, _object_pairs, exact_match, - list_reference_check, object_similarity, partial_string_based, - partial_timestamp_based, reference_check, + WEIGHTS, _bucket_per_type, _object_pairs, object_similarity, ) logger = logging.getLogger(__name__) diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index 81bf23c..c9bfb34 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -412,8 +412,8 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights): object1, object2, ds1=ds1, ds2=ds2, ignore_spec_version=ignore_spec_version, versioning_checks=versioning_checks, - max_depth=max_depth, **weights - ) + max_depth=max_depth, **weights, + ) if ref1 not in results: results[ref1] = {"matched": ref2, "value": result} elif result > results[ref1]["value"]: @@ -446,7 +446,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights): o1, o2, ds1=ds1, ds2=ds2, ignore_spec_version=ignore_spec_version, versioning_checks=versioning_checks, - max_depth=max_depth, **weights + max_depth=max_depth, **weights, ) / 100.0 logger.debug( From bd996b8750eb6ba6612a5ba47e03c7ee9a45adc3 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Mon, 1 Mar 2021 14:40:05 -0500 Subject: [PATCH 07/10] Update __init__.py --- stix2/equivalence/object/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index c9bfb34..8dcafb6 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -409,10 +409,10 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights): for object1, object2 in pairs: result = object_similarity( - object1, object2, ds1=ds1, ds2=ds2, - ignore_spec_version=ignore_spec_version, - versioning_checks=versioning_checks, - max_depth=max_depth, **weights, + object1, object2, ds1=ds1, ds2=ds2, + ignore_spec_version=ignore_spec_version, + versioning_checks=versioning_checks, + max_depth=max_depth, **weights, ) if ref1 not in results: results[ref1] = {"matched": ref2, "value": result} From 262284444ef6aafe7bba7a38568254d1489fffae Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Wed, 10 Mar 2021 09:52:15 -0500 Subject: [PATCH 08/10] Update stix2/environment.py Co-authored-by: Chris Lenk --- stix2/environment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stix2/environment.py b/stix2/environment.py index 2905b9e..f8624c7 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -201,8 +201,8 @@ class Environment(DataStoreMixin): obj2: A stix2 object instance prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. - ds1: A DataStore object instance representing your graph - ds2: A DataStore object instance representing your graph + ds1 (optional): A DataStore object instance from which to pull related objects + ds2 (optional): A DataStore object instance from which to pull related objects ignore_spec_version: A boolean indicating whether to test object types that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). If set to True this check will be skipped. From 7d7c56c64b73ce02ec73be819ab8dad87b9c973a Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Wed, 10 Mar 2021 09:52:24 -0500 Subject: [PATCH 09/10] Update stix2/environment.py --- stix2/environment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stix2/environment.py b/stix2/environment.py index f8624c7..f7c13ee 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -258,8 +258,8 @@ class Environment(DataStoreMixin): threshold: A numerical value between 0 and 100 to determine the minimum score to result in successfully calling both objects equivalent. This value can be tuned. - ds1: A DataStore object instance representing your graph - ds2: A DataStore object instance representing your graph + ds1 (optional): A DataStore object instance from which to pull related objects + ds2 (optional): A DataStore object instance from which to pull related objects ignore_spec_version: A boolean indicating whether to test object types that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). If set to True this check will be skipped. From c9e66def60c2791b3d3b5cde038de4ef8abcc7de Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Wed, 10 Mar 2021 13:32:02 -0500 Subject: [PATCH 10/10] rename test, update the rest of the docstrings for object_similarity() and object_equivalence() --- stix2/equivalence/object/__init__.py | 14 +++++++------- stix2/test/v21/test_environment.py | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index 8dcafb6..da2097e 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -4,7 +4,7 @@ import itertools import logging import time -from ...datastore import DataSink, DataSource, DataStoreMixin, Filter +from ...datastore import DataSource, DataStoreMixin, Filter from ...utils import STIXdatetime, parse_into_datetime from ..pattern import equivalent_patterns @@ -28,8 +28,8 @@ def object_equivalence( threshold: A numerical value between 0 and 100 to determine the minimum score to result in successfully calling both objects equivalent. This value can be tuned. - ds1: A DataStore object instance representing your graph - ds2: A DataStore object instance representing your graph + ds1 (optional): A DataStore object instance from which to pull related objects + ds2 (optional): A DataStore object instance from which to pull related objects ignore_spec_version: A boolean indicating whether to test object types that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). If set to True this check will be skipped. @@ -85,8 +85,8 @@ def object_similarity( obj2: A stix2 object instance prop_scores: A dictionary that can hold individual property scores, weights, contributing score, matching score and sum of weights. - ds1: A DataStore object instance representing your graph - ds2: A DataStore object instance representing your graph + ds1 (optional): A DataStore object instance from which to pull related objects + ds2 (optional): A DataStore object instance from which to pull related objects ignore_spec_version: A boolean indicating whether to test object types that belong to different spec versions (STIX 2.0 and STIX 2.1 for example). If set to True this check will be skipped. @@ -500,8 +500,8 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights): def _datastore_check(ds1, ds2): if ( - issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or - issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource)) + issubclass(ds1.__class__, (DataStoreMixin, DataSource)) or + issubclass(ds2.__class__, (DataStoreMixin, DataSource)) ): return True return False diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py index 2b781f4..7f6b71c 100644 --- a/stix2/test/v21/test_environment.py +++ b/stix2/test/v21/test_environment.py @@ -855,7 +855,7 @@ def test_object_similarity_exact_match(): assert stix2.equivalence.object.exact_match(t1, t2) == 0.0 -def test_non_existent_config_for_object(): +def test_no_datastore_fallsback_list_based_check_for_refs_check(): r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS) r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS) prop_scores = {}