doctring changes, _versioned_checks changes
parent
fa6978969b
commit
99453770cf
|
@ -1,4 +1,4 @@
|
|||
"""Python APIs for STIX 2 Semantic Equivalence.
|
||||
"""Python APIs for STIX 2 Semantic Equivalence and Similarity.
|
||||
|
||||
.. autosummary::
|
||||
:toctree: equivalence
|
||||
|
|
|
@ -97,7 +97,6 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
|
|||
|
||||
if weights["_internal"]["max_depth"] <= 0:
|
||||
raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0")
|
||||
depth = weights["_internal"]["max_depth"]
|
||||
|
||||
pairs = _object_pairs(
|
||||
_bucket_per_type(ds1.query([])),
|
||||
|
@ -108,13 +107,13 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
|
|||
weights["_internal"]["ds1"] = ds1
|
||||
weights["_internal"]["ds2"] = ds2
|
||||
|
||||
logger.debug("Starting graph similarity process between DataStores: '%s' and '%s'", ds1.id, ds2.id)
|
||||
for object1, object2 in pairs:
|
||||
iprop_score = {}
|
||||
object1_id = object1["id"]
|
||||
object2_id = object2["id"]
|
||||
|
||||
result = object_similarity(object1, object2, iprop_score, **weights)
|
||||
weights["_internal"]["max_depth"] = depth
|
||||
|
||||
if object1_id not in results:
|
||||
results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
|
||||
|
|
|
@ -103,13 +103,13 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
|
|||
try:
|
||||
weights[type1]
|
||||
except KeyError:
|
||||
logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1)
|
||||
logger.warning("'%s' type has no 'weights' dict specified & thus no object similarity method to call!", type1)
|
||||
sum_weights = matching_score = 0
|
||||
else:
|
||||
try:
|
||||
method = weights[type1]["method"]
|
||||
except KeyError:
|
||||
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||
matching_score = 0.0
|
||||
sum_weights = 0.0
|
||||
|
||||
|
@ -129,9 +129,9 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
|
|||
weights["_internal"]["max_depth"] = max_depth - 1
|
||||
ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
|
||||
contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
|
||||
weights["_internal"]["max_depth"] = max_depth + 1
|
||||
else:
|
||||
continue # prevent excessive recursion
|
||||
weights["_internal"]["max_depth"] = max_depth
|
||||
else:
|
||||
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
||||
|
||||
|
@ -148,7 +148,7 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
|
|||
prop_scores["sum_weights"] = sum_weights
|
||||
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
|
||||
else:
|
||||
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||
try:
|
||||
matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
|
||||
except TypeError:
|
||||
|
@ -350,19 +350,24 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold):
|
|||
|
||||
def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
|
||||
"""Checks multiple object versions if present in graph.
|
||||
Maximizes for the semantic equivalence score of a particular version."""
|
||||
Maximizes for the similarity score of a particular version."""
|
||||
results = {}
|
||||
objects1 = ds1.query([Filter("id", "=", ref1)])
|
||||
objects2 = ds2.query([Filter("id", "=", ref2)])
|
||||
|
||||
if len(objects1) > 0 and len(objects2) > 0:
|
||||
for o1 in objects1:
|
||||
for o2 in objects2:
|
||||
result = object_similarity(o1, o2, **weights)
|
||||
if ref1 not in results:
|
||||
results[ref1] = {"matched": ref2, "value": result}
|
||||
elif result > results[ref1]["value"]:
|
||||
results[ref1] = {"matched": ref2, "value": result}
|
||||
pairs = _object_pairs(
|
||||
_bucket_per_type(objects1),
|
||||
_bucket_per_type(objects2),
|
||||
weights,
|
||||
)
|
||||
|
||||
for object1, object2 in pairs:
|
||||
result = object_similarity(object1, object2, **weights)
|
||||
if ref1 not in results:
|
||||
results[ref1] = {"matched": ref2, "value": result}
|
||||
elif result > results[ref1]["value"]:
|
||||
results[ref1] = {"matched": ref2, "value": result}
|
||||
|
||||
result = results.get(ref1, {}).get("value", 0.0)
|
||||
logger.debug(
|
||||
"--\t\t_versioned_checks '%s' '%s'\tresult: '%s'",
|
||||
|
@ -372,8 +377,8 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
|
|||
|
||||
|
||||
def reference_check(ref1, ref2, ds1, ds2, **weights):
|
||||
"""For two references, de-reference the object and perform object-based
|
||||
semantic equivalence. The score influences the result of an edge check."""
|
||||
"""For two references, de-reference the object and perform object_similarity.
|
||||
The score influences the result of an edge check."""
|
||||
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
||||
result = 0.0
|
||||
|
||||
|
@ -394,7 +399,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
|
|||
|
||||
def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
||||
"""For objects that contain multiple references (i.e., object_refs) perform
|
||||
the same de-reference procedure and perform object-based semantic equivalence.
|
||||
the same de-reference procedure and perform object_similarity.
|
||||
The score influences the objects containing these references. The result is
|
||||
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
||||
results = {}
|
||||
|
|
Loading…
Reference in New Issue