diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py index 3d892f4..402bcb2 100644 --- a/stix2/equivalence/graph/__init__.py +++ b/stix2/equivalence/graph/__init__.py @@ -105,31 +105,26 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): weights, ) + weights["_internal"]["ds1"] = ds1 + weights["_internal"]["ds2"] = ds2 + for object1, object2 in pairs: - iprop_score1 = {} - iprop_score2 = {} + iprop_score = {} object1_id = object1["id"] object2_id = object2["id"] + result = object_similarity(object1, object2, iprop_score, **weights) weights["_internal"]["max_depth"] = depth - weights["_internal"]["ds1"] = ds1 - weights["_internal"]["ds2"] = ds2 - result1 = object_similarity(object1, object2, iprop_score1, **weights) - - weights["_internal"]["max_depth"] = depth - weights["_internal"]["ds1"] = ds2 - weights["_internal"]["ds2"] = ds1 - result2 = object_similarity(object2, object1, iprop_score2, **weights) if object1_id not in results: - results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score1, "value": result1} - elif result1 > results[object1_id]["value"]: - results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score1, "value": result1} + results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result} + elif result > results[object1_id]["value"]: + results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result} if object2_id not in results: - results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score2, "value": result2} - elif result2 > results[object2_id]["value"]: - results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score2, "value": result2} + results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score, "value": result} + elif result > results[object2_id]["value"]: + results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score, "value": result} matching_score = sum(x["value"] for x in results.values()) len_pairs = len(results) diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index 39eb99a..7f348b6 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -125,12 +125,13 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict): contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold) elif comp_funct == reference_check or comp_funct == list_reference_check: max_depth = weights["_internal"]["max_depth"] - if max_depth < 0: - continue # prevent excessive recursion + if max_depth > 0: + weights["_internal"]["max_depth"] = max_depth - 1 + ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"] + contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) + weights["_internal"]["max_depth"] = max_depth + 1 else: - weights["_internal"]["max_depth"] -= 1 - ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"] - contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) + continue # prevent excessive recursion else: contributing_score = w * comp_funct(obj1[prop], obj2[prop]) @@ -376,7 +377,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights): type1, type2 = ref1.split("--")[0], ref2.split("--")[0] result = 0.0 - if type1 == type2: + if type1 == type2 and type1 in weights: if weights["_internal"]["versioning_checks"]: result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0 else: diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py index 80c4ba8..fb651af 100644 --- a/stix2/test/v21/test_environment.py +++ b/stix2/test/v21/test_environment.py @@ -723,10 +723,11 @@ def test_object_similarity_different_spec_version_raises(): def test_object_similarity_zero_match(): IND_KWARGS = dict( - indicator_types=["APTX"], + indicator_types=["malicious-activity", "bar"], pattern="[ipv4-addr:value = '192.168.1.1']", pattern_type="stix", valid_from="2019-01-01T12:34:56Z", + labels=["APTX", "foo"], ) weights = { "indicator": { @@ -742,7 +743,9 @@ def test_object_similarity_zero_match(): ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS) ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **IND_KWARGS) env = stix2.Environment().object_similarity(ind1, ind2, **weights) - assert round(env) == 0 + assert round(env) == 8 + env = stix2.Environment().object_similarity(ind2, ind1, **weights) + assert round(env) == 8 def test_object_similarity_different_spec_version(): @@ -766,6 +769,9 @@ def test_object_similarity_different_spec_version(): env = stix2.Environment().object_similarity(ind1, ind2, **weights) assert round(env) == 0 + env = stix2.Environment().object_similarity(ind2, ind1, **weights) + assert round(env) == 0 + @pytest.mark.parametrize( "refs1,refs2,ret_val", [ @@ -1068,6 +1074,116 @@ def test_graph_similarity_with_filesystem_source(ds, fs): assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4) +def test_depth_limiting(): + g1 = [ + { + "type": "foo", + "id": "foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd", + "spec_version": "2.1", + "created": "1986-02-08T00:20:17Z", + "modified": "1989-12-11T06:54:29Z", + "some1_ref": "foo--700a8a3c-9936-412f-b4eb-ede466476180", + "some2_ref": "foo--f4a999a3-df94-499d-9cac-6c02e21775ee", + }, + { + "type": "foo", + "id": "foo--700a8a3c-9936-412f-b4eb-ede466476180", + "spec_version": "2.1", + "created": "1989-01-06T10:31:54Z", + "modified": "1995-06-18T10:25:01Z", + "some1_ref": "foo--705afd45-eb56-43fc-a214-313d63d199a3", + }, + { + "type": "foo", + "id": "foo--705afd45-eb56-43fc-a214-313d63d199a3", + "spec_version": "2.1", + "created": "1977-11-06T21:19:29Z", + "modified": "1997-12-02T20:33:34Z", + }, + { + "type": "foo", + "id": "foo--f4a999a3-df94-499d-9cac-6c02e21775ee", + "spec_version": "2.1", + "created": "1991-09-17T00:40:52Z", + "modified": "1992-12-06T11:02:47Z", + "name": "alice", + }, + ] + + g2 = [ + { + "type": "foo", + "id": "foo--71570479-3e6e-48d2-81fb-897454dec55d", + "spec_version": "2.1", + "created": "1975-12-22T05:20:38Z", + "modified": "1980-11-11T01:09:03Z", + "some1_ref": "foo--4aeda39b-31fa-4ffb-a847-d8edc175a579", + "some2_ref": "foo--941e48d6-3100-4419-9e8c-cf1eb59e71b2", + }, + { + "type": "foo", + "id": "foo--4aeda39b-31fa-4ffb-a847-d8edc175a579", + "spec_version": "2.1", + "created": "1976-01-05T08:32:03Z", + "modified": "1980-11-09T05:41:02Z", + "some1_ref": "foo--689252c3-5d20-43ff-bbf7-c8e45d713768", + }, + { + "type": "foo", + "id": "foo--689252c3-5d20-43ff-bbf7-c8e45d713768", + "spec_version": "2.1", + "created": "1974-09-11T18:56:30Z", + "modified": "1976-10-31T11:59:43Z", + }, + { + "type": "foo", + "id": "foo--941e48d6-3100-4419-9e8c-cf1eb59e71b2", + "spec_version": "2.1", + "created": "1985-01-03T01:07:03Z", + "modified": "1992-07-20T21:32:31Z", + "name": "alice", + } + ] + + mem_store1 = stix2.MemorySource(g1) + mem_store2 = stix2.MemorySource(g2) + + custom_weights = { + "foo": { + "some1_ref": (33, stix2.equivalence.object.reference_check), + "some2_ref": (33, stix2.equivalence.object.reference_check), + "name": (34, stix2.equivalence.object.partial_string_based), + }, + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores1 = {} + env1 = stix2.equivalence.graph.graph_similarity(mem_store1, mem_store2, prop_scores1, **custom_weights) + + assert round(env1) == 38 + assert round(prop_scores1["matching_score"]) == 300 + assert round(prop_scores1["len_pairs"]) == 8 + # from 'alice' check in de-reference + assert prop_scores1['summary']['foo--71570479-3e6e-48d2-81fb-897454dec55d']['prop_score']['some2_ref']['weight'] == 33 + assert prop_scores1['summary']['foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd']['prop_score']['some2_ref']['weight'] == 33 + + # Switching parameters + prop_scores2 = {} + env2 = stix2.equivalence.graph.graph_similarity( + mem_store2, mem_store1, prop_scores2, **custom_weights + ) + + assert round(env2) == 38 + assert round(prop_scores2["matching_score"]) == 300 + assert round(prop_scores2["len_pairs"]) == 8 + # from 'alice' check in de-reference + assert prop_scores2['summary']['foo--71570479-3e6e-48d2-81fb-897454dec55d']['prop_score']['some2_ref']['weight'] == 33 + assert prop_scores2['summary']['foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd']['prop_score']['some2_ref']['weight'] == 33 + + def test_graph_similarity_with_duplicate_graph(ds): weights = { "_internal": {