resolve issue regarding reference_check or list_reference_check, remove redundant object_similarity call

update test suite
pull/1/head
Emmanuelle Vargas-Gonzalez 2021-02-17 21:30:14 -05:00
parent c656d35da5
commit ee63e9faf4
3 changed files with 136 additions and 24 deletions

View File

@ -105,31 +105,26 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
weights, weights,
) )
weights["_internal"]["ds1"] = ds1
weights["_internal"]["ds2"] = ds2
for object1, object2 in pairs: for object1, object2 in pairs:
iprop_score1 = {} iprop_score = {}
iprop_score2 = {}
object1_id = object1["id"] object1_id = object1["id"]
object2_id = object2["id"] object2_id = object2["id"]
result = object_similarity(object1, object2, iprop_score, **weights)
weights["_internal"]["max_depth"] = depth weights["_internal"]["max_depth"] = depth
weights["_internal"]["ds1"] = ds1
weights["_internal"]["ds2"] = ds2
result1 = object_similarity(object1, object2, iprop_score1, **weights)
weights["_internal"]["max_depth"] = depth
weights["_internal"]["ds1"] = ds2
weights["_internal"]["ds2"] = ds1
result2 = object_similarity(object2, object1, iprop_score2, **weights)
if object1_id not in results: if object1_id not in results:
results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score1, "value": result1} results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
elif result1 > results[object1_id]["value"]: elif result > results[object1_id]["value"]:
results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score1, "value": result1} results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
if object2_id not in results: if object2_id not in results:
results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score2, "value": result2} results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score, "value": result}
elif result2 > results[object2_id]["value"]: elif result > results[object2_id]["value"]:
results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score2, "value": result2} results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score, "value": result}
matching_score = sum(x["value"] for x in results.values()) matching_score = sum(x["value"] for x in results.values())
len_pairs = len(results) len_pairs = len(results)

View File

@ -125,12 +125,13 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold) contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
elif comp_funct == reference_check or comp_funct == list_reference_check: elif comp_funct == reference_check or comp_funct == list_reference_check:
max_depth = weights["_internal"]["max_depth"] max_depth = weights["_internal"]["max_depth"]
if max_depth < 0: if max_depth > 0:
continue # prevent excessive recursion weights["_internal"]["max_depth"] = max_depth - 1
else:
weights["_internal"]["max_depth"] -= 1
ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"] ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
weights["_internal"]["max_depth"] = max_depth + 1
else:
continue # prevent excessive recursion
else: else:
contributing_score = w * comp_funct(obj1[prop], obj2[prop]) contributing_score = w * comp_funct(obj1[prop], obj2[prop])
@ -376,7 +377,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
type1, type2 = ref1.split("--")[0], ref2.split("--")[0] type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
result = 0.0 result = 0.0
if type1 == type2: if type1 == type2 and type1 in weights:
if weights["_internal"]["versioning_checks"]: if weights["_internal"]["versioning_checks"]:
result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0 result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0
else: else:

View File

@ -723,10 +723,11 @@ def test_object_similarity_different_spec_version_raises():
def test_object_similarity_zero_match(): def test_object_similarity_zero_match():
IND_KWARGS = dict( IND_KWARGS = dict(
indicator_types=["APTX"], indicator_types=["malicious-activity", "bar"],
pattern="[ipv4-addr:value = '192.168.1.1']", pattern="[ipv4-addr:value = '192.168.1.1']",
pattern_type="stix", pattern_type="stix",
valid_from="2019-01-01T12:34:56Z", valid_from="2019-01-01T12:34:56Z",
labels=["APTX", "foo"],
) )
weights = { weights = {
"indicator": { "indicator": {
@ -742,7 +743,9 @@ def test_object_similarity_zero_match():
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS) ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **IND_KWARGS) ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **IND_KWARGS)
env = stix2.Environment().object_similarity(ind1, ind2, **weights) env = stix2.Environment().object_similarity(ind1, ind2, **weights)
assert round(env) == 0 assert round(env) == 8
env = stix2.Environment().object_similarity(ind2, ind1, **weights)
assert round(env) == 8
def test_object_similarity_different_spec_version(): def test_object_similarity_different_spec_version():
@ -766,6 +769,9 @@ def test_object_similarity_different_spec_version():
env = stix2.Environment().object_similarity(ind1, ind2, **weights) env = stix2.Environment().object_similarity(ind1, ind2, **weights)
assert round(env) == 0 assert round(env) == 0
env = stix2.Environment().object_similarity(ind2, ind1, **weights)
assert round(env) == 0
@pytest.mark.parametrize( @pytest.mark.parametrize(
"refs1,refs2,ret_val", [ "refs1,refs2,ret_val", [
@ -1068,6 +1074,116 @@ def test_graph_similarity_with_filesystem_source(ds, fs):
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4) assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
def test_depth_limiting():
g1 = [
{
"type": "foo",
"id": "foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd",
"spec_version": "2.1",
"created": "1986-02-08T00:20:17Z",
"modified": "1989-12-11T06:54:29Z",
"some1_ref": "foo--700a8a3c-9936-412f-b4eb-ede466476180",
"some2_ref": "foo--f4a999a3-df94-499d-9cac-6c02e21775ee",
},
{
"type": "foo",
"id": "foo--700a8a3c-9936-412f-b4eb-ede466476180",
"spec_version": "2.1",
"created": "1989-01-06T10:31:54Z",
"modified": "1995-06-18T10:25:01Z",
"some1_ref": "foo--705afd45-eb56-43fc-a214-313d63d199a3",
},
{
"type": "foo",
"id": "foo--705afd45-eb56-43fc-a214-313d63d199a3",
"spec_version": "2.1",
"created": "1977-11-06T21:19:29Z",
"modified": "1997-12-02T20:33:34Z",
},
{
"type": "foo",
"id": "foo--f4a999a3-df94-499d-9cac-6c02e21775ee",
"spec_version": "2.1",
"created": "1991-09-17T00:40:52Z",
"modified": "1992-12-06T11:02:47Z",
"name": "alice",
},
]
g2 = [
{
"type": "foo",
"id": "foo--71570479-3e6e-48d2-81fb-897454dec55d",
"spec_version": "2.1",
"created": "1975-12-22T05:20:38Z",
"modified": "1980-11-11T01:09:03Z",
"some1_ref": "foo--4aeda39b-31fa-4ffb-a847-d8edc175a579",
"some2_ref": "foo--941e48d6-3100-4419-9e8c-cf1eb59e71b2",
},
{
"type": "foo",
"id": "foo--4aeda39b-31fa-4ffb-a847-d8edc175a579",
"spec_version": "2.1",
"created": "1976-01-05T08:32:03Z",
"modified": "1980-11-09T05:41:02Z",
"some1_ref": "foo--689252c3-5d20-43ff-bbf7-c8e45d713768",
},
{
"type": "foo",
"id": "foo--689252c3-5d20-43ff-bbf7-c8e45d713768",
"spec_version": "2.1",
"created": "1974-09-11T18:56:30Z",
"modified": "1976-10-31T11:59:43Z",
},
{
"type": "foo",
"id": "foo--941e48d6-3100-4419-9e8c-cf1eb59e71b2",
"spec_version": "2.1",
"created": "1985-01-03T01:07:03Z",
"modified": "1992-07-20T21:32:31Z",
"name": "alice",
}
]
mem_store1 = stix2.MemorySource(g1)
mem_store2 = stix2.MemorySource(g2)
custom_weights = {
"foo": {
"some1_ref": (33, stix2.equivalence.object.reference_check),
"some2_ref": (33, stix2.equivalence.object.reference_check),
"name": (34, stix2.equivalence.object.partial_string_based),
},
"_internal": {
"ignore_spec_version": False,
"versioning_checks": False,
"max_depth": 1,
},
}
prop_scores1 = {}
env1 = stix2.equivalence.graph.graph_similarity(mem_store1, mem_store2, prop_scores1, **custom_weights)
assert round(env1) == 38
assert round(prop_scores1["matching_score"]) == 300
assert round(prop_scores1["len_pairs"]) == 8
# from 'alice' check in de-reference
assert prop_scores1['summary']['foo--71570479-3e6e-48d2-81fb-897454dec55d']['prop_score']['some2_ref']['weight'] == 33
assert prop_scores1['summary']['foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd']['prop_score']['some2_ref']['weight'] == 33
# Switching parameters
prop_scores2 = {}
env2 = stix2.equivalence.graph.graph_similarity(
mem_store2, mem_store1, prop_scores2, **custom_weights
)
assert round(env2) == 38
assert round(prop_scores2["matching_score"]) == 300
assert round(prop_scores2["len_pairs"]) == 8
# from 'alice' check in de-reference
assert prop_scores2['summary']['foo--71570479-3e6e-48d2-81fb-897454dec55d']['prop_score']['some2_ref']['weight'] == 33
assert prop_scores2['summary']['foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd']['prop_score']['some2_ref']['weight'] == 33
def test_graph_similarity_with_duplicate_graph(ds): def test_graph_similarity_with_duplicate_graph(ds):
weights = { weights = {
"_internal": { "_internal": {