make some functions internal, add some docs for them
parent
09fd8c060b
commit
d2d85badb2
|
@ -2,9 +2,9 @@
|
|||
import logging
|
||||
|
||||
from ..object import (
|
||||
WEIGHTS, bucket_per_type, exact_match, list_reference_check, object_pairs,
|
||||
object_similarity, partial_string_based, partial_timestamp_based,
|
||||
reference_check,
|
||||
WEIGHTS, _bucket_per_type, _object_pairs, exact_match,
|
||||
list_reference_check, object_similarity, partial_string_based,
|
||||
partial_timestamp_based, reference_check,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -99,9 +99,11 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
|
|||
raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0")
|
||||
depth = weights["_internal"]["max_depth"]
|
||||
|
||||
graph1 = bucket_per_type(ds1.query([]))
|
||||
graph2 = bucket_per_type(ds2.query([]))
|
||||
pairs = object_pairs(graph1, graph2, weights)
|
||||
pairs = _object_pairs(
|
||||
_bucket_per_type(ds1.query([])),
|
||||
_bucket_per_type(ds2.query([])),
|
||||
weights,
|
||||
)
|
||||
|
||||
for object1, object2 in pairs:
|
||||
iprop_score1 = {}
|
||||
|
|
|
@ -398,9 +398,9 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
|||
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
||||
results = {}
|
||||
|
||||
pairs = object_pairs(
|
||||
bucket_per_type(refs1, "id-split"),
|
||||
bucket_per_type(refs2, "id-split"),
|
||||
pairs = _object_pairs(
|
||||
_bucket_per_type(refs1, "id-split"),
|
||||
_bucket_per_type(refs2, "id-split"),
|
||||
weights,
|
||||
)
|
||||
|
||||
|
@ -433,7 +433,10 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
|||
return result
|
||||
|
||||
|
||||
def bucket_per_type(g, mode="type"):
|
||||
def _bucket_per_type(g, mode="type"):
|
||||
"""Given a list of objects or references, bucket them by type.
|
||||
Depending on the list type: extract from 'type' property or using
|
||||
the 'id'"""
|
||||
buckets = collections.defaultdict(list)
|
||||
if mode == "type":
|
||||
[buckets[obj["type"]].append(obj) for obj in g]
|
||||
|
@ -442,7 +445,10 @@ def bucket_per_type(g, mode="type"):
|
|||
return buckets
|
||||
|
||||
|
||||
def object_pairs(g1, g2, w):
|
||||
def _object_pairs(g1, g2, w):
|
||||
"""Returns a generator with the product of the comparable
|
||||
objects for the graph similarity process. It determines
|
||||
objects in common between graphs and objects with weights."""
|
||||
types_in_common = set(g1.keys()).intersection(g2.keys())
|
||||
testable_types = types_in_common.intersection(w.keys())
|
||||
|
||||
|
|
Loading…
Reference in New Issue