doctring changes, _versioned_checks changes

pull/1/head
Emmanuelle Vargas-Gonzalez 2021-02-18 10:37:34 -05:00
parent fa6978969b
commit 99453770cf
3 changed files with 23 additions and 19 deletions

View File

@ -1,4 +1,4 @@
"""Python APIs for STIX 2 Semantic Equivalence.
"""Python APIs for STIX 2 Semantic Equivalence and Similarity.
.. autosummary::
:toctree: equivalence

View File

@ -97,7 +97,6 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
if weights["_internal"]["max_depth"] <= 0:
raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0")
depth = weights["_internal"]["max_depth"]
pairs = _object_pairs(
_bucket_per_type(ds1.query([])),
@ -108,13 +107,13 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
weights["_internal"]["ds1"] = ds1
weights["_internal"]["ds2"] = ds2
logger.debug("Starting graph similarity process between DataStores: '%s' and '%s'", ds1.id, ds2.id)
for object1, object2 in pairs:
iprop_score = {}
object1_id = object1["id"]
object2_id = object2["id"]
result = object_similarity(object1, object2, iprop_score, **weights)
weights["_internal"]["max_depth"] = depth
if object1_id not in results:
results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}

View File

@ -103,13 +103,13 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
try:
weights[type1]
except KeyError:
logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1)
logger.warning("'%s' type has no 'weights' dict specified & thus no object similarity method to call!", type1)
sum_weights = matching_score = 0
else:
try:
method = weights[type1]["method"]
except KeyError:
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
matching_score = 0.0
sum_weights = 0.0
@ -129,9 +129,9 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
weights["_internal"]["max_depth"] = max_depth - 1
ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
weights["_internal"]["max_depth"] = max_depth + 1
else:
continue # prevent excessive recursion
weights["_internal"]["max_depth"] = max_depth
else:
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
@ -148,7 +148,7 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
prop_scores["sum_weights"] = sum_weights
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
else:
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
try:
matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
except TypeError:
@ -350,19 +350,24 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold):
def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
"""Checks multiple object versions if present in graph.
Maximizes for the semantic equivalence score of a particular version."""
Maximizes for the similarity score of a particular version."""
results = {}
objects1 = ds1.query([Filter("id", "=", ref1)])
objects2 = ds2.query([Filter("id", "=", ref2)])
if len(objects1) > 0 and len(objects2) > 0:
for o1 in objects1:
for o2 in objects2:
result = object_similarity(o1, o2, **weights)
if ref1 not in results:
results[ref1] = {"matched": ref2, "value": result}
elif result > results[ref1]["value"]:
results[ref1] = {"matched": ref2, "value": result}
pairs = _object_pairs(
_bucket_per_type(objects1),
_bucket_per_type(objects2),
weights,
)
for object1, object2 in pairs:
result = object_similarity(object1, object2, **weights)
if ref1 not in results:
results[ref1] = {"matched": ref2, "value": result}
elif result > results[ref1]["value"]:
results[ref1] = {"matched": ref2, "value": result}
result = results.get(ref1, {}).get("value", 0.0)
logger.debug(
"--\t\t_versioned_checks '%s' '%s'\tresult: '%s'",
@ -372,8 +377,8 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
def reference_check(ref1, ref2, ds1, ds2, **weights):
"""For two references, de-reference the object and perform object-based
semantic equivalence. The score influences the result of an edge check."""
"""For two references, de-reference the object and perform object_similarity.
The score influences the result of an edge check."""
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
result = 0.0
@ -394,7 +399,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
def list_reference_check(refs1, refs2, ds1, ds2, **weights):
"""For objects that contain multiple references (i.e., object_refs) perform
the same de-reference procedure and perform object-based semantic equivalence.
the same de-reference procedure and perform object_similarity.
The score influences the objects containing these references. The result is
weighted on the amount of unique objects that could 1) be de-referenced 2) """
results = {}