From d2d85badb2c297ea1abdfc1612d4ad847fd8a2a6 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Tue, 16 Feb 2021 11:01:07 -0500 Subject: [PATCH] make some functions internal, add some docs for them --- stix2/equivalence/graph/__init__.py | 14 ++++++++------ stix2/equivalence/object/__init__.py | 16 +++++++++++----- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py index 1a25484..3d892f4 100644 --- a/stix2/equivalence/graph/__init__.py +++ b/stix2/equivalence/graph/__init__.py @@ -2,9 +2,9 @@ import logging from ..object import ( - WEIGHTS, bucket_per_type, exact_match, list_reference_check, object_pairs, - object_similarity, partial_string_based, partial_timestamp_based, - reference_check, + WEIGHTS, _bucket_per_type, _object_pairs, exact_match, + list_reference_check, object_similarity, partial_string_based, + partial_timestamp_based, reference_check, ) logger = logging.getLogger(__name__) @@ -99,9 +99,11 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict): raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0") depth = weights["_internal"]["max_depth"] - graph1 = bucket_per_type(ds1.query([])) - graph2 = bucket_per_type(ds2.query([])) - pairs = object_pairs(graph1, graph2, weights) + pairs = _object_pairs( + _bucket_per_type(ds1.query([])), + _bucket_per_type(ds2.query([])), + weights, + ) for object1, object2 in pairs: iprop_score1 = {} diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index 29e3c4f..39eb99a 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -398,9 +398,9 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights): weighted on the amount of unique objects that could 1) be de-referenced 2) """ results = {} - pairs = object_pairs( - bucket_per_type(refs1, "id-split"), - bucket_per_type(refs2, "id-split"), + pairs = _object_pairs( + _bucket_per_type(refs1, "id-split"), + _bucket_per_type(refs2, "id-split"), weights, ) @@ -433,7 +433,10 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights): return result -def bucket_per_type(g, mode="type"): +def _bucket_per_type(g, mode="type"): + """Given a list of objects or references, bucket them by type. + Depending on the list type: extract from 'type' property or using + the 'id'""" buckets = collections.defaultdict(list) if mode == "type": [buckets[obj["type"]].append(obj) for obj in g] @@ -442,7 +445,10 @@ def bucket_per_type(g, mode="type"): return buckets -def object_pairs(g1, g2, w): +def _object_pairs(g1, g2, w): + """Returns a generator with the product of the comparable + objects for the graph similarity process. It determines + objects in common between graphs and objects with weights.""" types_in_common = set(g1.keys()).intersection(g2.keys()) testable_types = types_in_common.intersection(w.keys())