make some functions internal, add some docs for them
parent
09fd8c060b
commit
d2d85badb2
|
@ -2,9 +2,9 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from ..object import (
|
from ..object import (
|
||||||
WEIGHTS, bucket_per_type, exact_match, list_reference_check, object_pairs,
|
WEIGHTS, _bucket_per_type, _object_pairs, exact_match,
|
||||||
object_similarity, partial_string_based, partial_timestamp_based,
|
list_reference_check, object_similarity, partial_string_based,
|
||||||
reference_check,
|
partial_timestamp_based, reference_check,
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -99,9 +99,11 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
|
||||||
raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0")
|
raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0")
|
||||||
depth = weights["_internal"]["max_depth"]
|
depth = weights["_internal"]["max_depth"]
|
||||||
|
|
||||||
graph1 = bucket_per_type(ds1.query([]))
|
pairs = _object_pairs(
|
||||||
graph2 = bucket_per_type(ds2.query([]))
|
_bucket_per_type(ds1.query([])),
|
||||||
pairs = object_pairs(graph1, graph2, weights)
|
_bucket_per_type(ds2.query([])),
|
||||||
|
weights,
|
||||||
|
)
|
||||||
|
|
||||||
for object1, object2 in pairs:
|
for object1, object2 in pairs:
|
||||||
iprop_score1 = {}
|
iprop_score1 = {}
|
||||||
|
|
|
@ -398,9 +398,9 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
||||||
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
||||||
results = {}
|
results = {}
|
||||||
|
|
||||||
pairs = object_pairs(
|
pairs = _object_pairs(
|
||||||
bucket_per_type(refs1, "id-split"),
|
_bucket_per_type(refs1, "id-split"),
|
||||||
bucket_per_type(refs2, "id-split"),
|
_bucket_per_type(refs2, "id-split"),
|
||||||
weights,
|
weights,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -433,7 +433,10 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
def bucket_per_type(g, mode="type"):
|
def _bucket_per_type(g, mode="type"):
|
||||||
|
"""Given a list of objects or references, bucket them by type.
|
||||||
|
Depending on the list type: extract from 'type' property or using
|
||||||
|
the 'id'"""
|
||||||
buckets = collections.defaultdict(list)
|
buckets = collections.defaultdict(list)
|
||||||
if mode == "type":
|
if mode == "type":
|
||||||
[buckets[obj["type"]].append(obj) for obj in g]
|
[buckets[obj["type"]].append(obj) for obj in g]
|
||||||
|
@ -442,7 +445,10 @@ def bucket_per_type(g, mode="type"):
|
||||||
return buckets
|
return buckets
|
||||||
|
|
||||||
|
|
||||||
def object_pairs(g1, g2, w):
|
def _object_pairs(g1, g2, w):
|
||||||
|
"""Returns a generator with the product of the comparable
|
||||||
|
objects for the graph similarity process. It determines
|
||||||
|
objects in common between graphs and objects with weights."""
|
||||||
types_in_common = set(g1.keys()).intersection(g2.keys())
|
types_in_common = set(g1.keys()).intersection(g2.keys())
|
||||||
testable_types = types_in_common.intersection(w.keys())
|
testable_types = types_in_common.intersection(w.keys())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue