From 489970718f038a67011f468c3649225971689415 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Feb 2021 22:35:37 -0500
Subject: [PATCH] WIP: changes to graph_similarity

busted main loop, symmetrical properties not present
---
 stix2/environment.py                 | 12 ++---
 stix2/equivalence/graph/__init__.py  | 73 +++++++++++++++----------
 stix2/equivalence/object/__init__.py |  6 +--
 stix2/test/v21/test_environment.py   | 79 +++++++++++++++-------------
 4 files changed, 97 insertions(+), 73 deletions(-)
diff --git a/stix2/environment.py b/stix2/environment.py
index 4dc6ff0..bc7fcaf 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -2,12 +2,12 @@
 import copy
 
 from .datastore import CompositeDataSource, DataStoreMixin
-from .equivalence.graph import graphically_equivalent
+from .equivalence.graph import graph_similarity
 from .equivalence.object import (  # noqa: F401
     WEIGHTS, check_property_present, custom_pattern_based, exact_match,
     list_reference_check, partial_external_reference_based, partial_list_based,
     partial_location_distance, partial_string_based, partial_timestamp_based,
-    reference_check, semantically_equivalent,
+    reference_check, object_similarity,
 )
 from .parsing import parse as _parse
 
@@ -197,7 +197,7 @@ class Environment(DataStoreMixin):
             return None
 
     @staticmethod
-    def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
+    def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
         """This method verifies if two objects of the same type are
         semantically equivalent.
 
@@ -229,10 +229,10 @@ class Environment(DataStoreMixin):
             see `the Committee Note <link here>`__.
 
         """
-        return semantically_equivalent(obj1, obj2, prop_scores, **weight_dict)
+        return object_similarity(obj1, obj2, prop_scores, **weight_dict)
 
     @staticmethod
-    def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
+    def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
         """This method verifies if two graphs are semantically equivalent.
         Each DataStore can contain a connected or disconnected graph and the
         final result is weighted over the amount of objects we managed to compare.
@@ -267,4 +267,4 @@ class Environment(DataStoreMixin):
             see `the Committee Note <link here>`__.
 
         """
-        return graphically_equivalent(ds1, ds2, prop_scores, **weight_dict)
+        return graph_similarity(ds1, ds2, prop_scores, **weight_dict)
diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py
index 680f42f..cff99d0 100644
--- a/stix2/equivalence/graph/__init__.py
+++ b/stix2/equivalence/graph/__init__.py
@@ -1,15 +1,17 @@
 """Python APIs for STIX 2 Graph-based Semantic Equivalence."""
+import collections
+import itertools
 import logging
 
 from ..object import (
     WEIGHTS, exact_match, list_reference_check, partial_string_based,
-    partial_timestamp_based, reference_check, semantically_equivalent,
+    partial_timestamp_based, reference_check, object_similarity,
 )
 
 logger = logging.getLogger(__name__)
 
 
-def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
+def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
     """This method verifies if two graphs are semantically equivalent.
     Each DataStore can contain a connected or disconnected graph and the
     final result is weighted over the amount of objects we managed to compare.
@@ -44,49 +46,48 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
         see `the Committee Note <link here>`__.
 
     """
+    results = {}
+    equivalence_score = 0
     weights = GRAPH_WEIGHTS.copy()
 
     if weight_dict:
         weights.update(weight_dict)
 
-    results = {}
     depth = weights["_internal"]["max_depth"]
 
-    graph1 = ds1.query([])
-    graph2 = ds2.query([])
+    graph1 = bucket_per_type(ds1.query([]))
+    graph2 = bucket_per_type(ds2.query([]))
+    pairs = object_pairs(graph1, graph2, weights)
 
-    graph1.sort(key=lambda x: x["type"])
-    graph2.sort(key=lambda x: x["type"])
-
-    if len(graph1) < len(graph2):
+    for object1, object2 in pairs:
+        iprop_score1 = {}
+        iprop_score2 = {}
+        object1_id = object1["id"]
+        object2_id = object2["id"]
+        weights["_internal"]["max_depth"] = depth
         weights["_internal"]["ds1"] = ds1
         weights["_internal"]["ds2"] = ds2
-        g1 = graph1
-        g2 = graph2
-    else:
+        result1 = object_similarity(object1, object2, iprop_score1, **weights)
+
         weights["_internal"]["ds1"] = ds2
         weights["_internal"]["ds2"] = ds1
-        g1 = graph2
-        g2 = graph1
+        result2 = object_similarity(object2, object1, iprop_score2, **weights)
 
-    for object1 in g1:
-        for object2 in g2:
-            if object1["type"] == object2["type"] and object1["type"] in weights:
-                iprop_score = {}
-                result = semantically_equivalent(object1, object2, iprop_score, **weights)
-                objects1_id = object1["id"]
-                weights["_internal"]["max_depth"] = depth
+        if object1_id not in results:
+            results[object1_id] = {"lhs": object1["id"], "rhs": object2["id"], "prop_score": iprop_score1, "value": result1}
+        elif result1 > results[object1_id]["value"]:
+            results[object1_id] = {"lhs": object1["id"], "rhs": object2["id"], "prop_score": iprop_score1, "value": result1}
 
-                if objects1_id not in results:
-                    results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result}
-                elif result > results[objects1_id]["value"]:
-                    results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result}
+        if object2_id not in results:
+            results[object2_id] = {"lhs": object2["id"], "rhs": object1["id"], "prop_score": iprop_score2, "value": result2}
+        elif result1 > results[object2_id]["value"]:
+            results[object2_id] = {"lhs": object2["id"], "rhs": object1["id"], "prop_score": iprop_score2, "value": result2}
 
-    equivalence_score = 0
     matching_score = sum(x["value"] for x in results.values())
-    sum_weights = len(results) * 100.0
+    sum_weights = len(results)
     if sum_weights > 0:
-        equivalence_score = (matching_score / sum_weights) * 100
+        equivalence_score = matching_score / sum_weights
+
     prop_scores["matching_score"] = matching_score
     prop_scores["sum_weights"] = sum_weights
     prop_scores["summary"] = results
@@ -100,6 +101,22 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
     return equivalence_score
 
 
+def bucket_per_type(g):
+    buckets = collections.defaultdict(list)
+    [buckets[obj["type"]].append(obj) for obj in g]
+    return buckets
+
+
+def object_pairs(g1, g2, w):
+    types_in_common = set(g1.keys()).intersection(g2.keys())
+    testable_types = types_in_common.intersection(w.keys())
+
+    return itertools.chain.from_iterable(
+        itertools.product(g1[stix_type], g2[stix_type])
+        for stix_type in testable_types
+    )
+
+
 # default weights used for the graph semantic equivalence process
 GRAPH_WEIGHTS = WEIGHTS.copy()
 GRAPH_WEIGHTS.update({
diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index 0225788..8b1ceaa 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -9,7 +9,7 @@ from ..pattern import equivalent_patterns
 logger = logging.getLogger(__name__)
 
 
-def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
+def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
     """This method verifies if two objects of the same type are
     semantically equivalent.
 
@@ -312,7 +312,7 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
     if len(objects1) > 0 and len(objects2) > 0:
         for o1 in objects1:
             for o2 in objects2:
-                result = semantically_equivalent(o1, o2, **weights)
+                result = object_similarity(o1, o2, **weights)
                 if ref1 not in results:
                     results[ref1] = {"matched": ref2, "value": result}
                 elif result > results[ref1]["value"]:
@@ -337,7 +337,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
         else:
             o1, o2 = ds1.get(ref1), ds2.get(ref2)
             if o1 and o2:
-                result = semantically_equivalent(o1, o2, **weights) / 100.0
+                result = object_similarity(o1, o2, **weights) / 100.0
 
     logger.debug(
         "--\t\treference_check '%s' '%s'\tresult: '%s'",
diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py
index 0da01d1..5682ad1 100644
--- a/stix2/test/v21/test_environment.py
+++ b/stix2/test/v21/test_environment.py
@@ -429,7 +429,7 @@ def test_related_to_by_target(ds):
 def test_semantic_equivalence_on_same_attack_pattern1():
     ap1 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_PATTERN_KWARGS)
     ap2 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_PATTERN_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ap1, ap2)
+    env = stix2.Environment().object_similarity(ap1, ap2)
     assert round(env) == 100
 
 
@@ -445,14 +445,14 @@ def test_semantic_equivalence_on_same_attack_pattern2():
     )
     ap1 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_KWARGS)
     ap2 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ap1, ap2)
+    env = stix2.Environment().object_similarity(ap1, ap2)
     assert round(env) == 100
 
 
 def test_semantic_equivalence_on_same_campaign1():
     camp1 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
     camp2 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
-    env = stix2.Environment().semantically_equivalent(camp1, camp2)
+    env = stix2.Environment().object_similarity(camp1, camp2)
     assert round(env) == 100
 
 
@@ -464,14 +464,14 @@ def test_semantic_equivalence_on_same_campaign2():
     )
     camp1 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMP_KWARGS)
     camp2 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMP_KWARGS)
-    env = stix2.Environment().semantically_equivalent(camp1, camp2)
+    env = stix2.Environment().object_similarity(camp1, camp2)
     assert round(env) == 100
 
 
 def test_semantic_equivalence_on_same_identity1():
     iden1 = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
     iden2 = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
-    env = stix2.Environment().semantically_equivalent(iden1, iden2)
+    env = stix2.Environment().object_similarity(iden1, iden2)
     assert round(env) == 100
 
 
@@ -483,14 +483,14 @@ def test_semantic_equivalence_on_same_identity2():
     )
     iden1 = stix2.v21.Identity(id=IDENTITY_ID, **IDEN_KWARGS)
     iden2 = stix2.v21.Identity(id=IDENTITY_ID, **IDEN_KWARGS)
-    env = stix2.Environment().semantically_equivalent(iden1, iden2)
+    env = stix2.Environment().object_similarity(iden1, iden2)
     assert round(env) == 100
 
 
 def test_semantic_equivalence_on_same_indicator():
     ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
     ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ind1, ind2)
+    env = stix2.Environment().object_similarity(ind1, ind2)
     assert round(env) == 100
 
 
@@ -498,7 +498,7 @@ def test_semantic_equivalence_on_same_location1():
     location_kwargs = dict(latitude=45, longitude=179)
     loc1 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
     loc2 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
-    env = stix2.Environment().semantically_equivalent(loc1, loc2)
+    env = stix2.Environment().object_similarity(loc1, loc2)
     assert round(env) == 100
 
 
@@ -511,7 +511,7 @@ def test_semantic_equivalence_on_same_location2():
     )
     loc1 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
     loc2 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
-    env = stix2.Environment().semantically_equivalent(loc1, loc2)
+    env = stix2.Environment().object_similarity(loc1, loc2)
     assert round(env) == 100
 
 
@@ -519,21 +519,21 @@ def test_semantic_equivalence_location_with_no_latlong():
     loc_kwargs = dict(country="US", administrative_area="US-DC")
     loc1 = stix2.v21.Location(id=LOCATION_ID, **LOCATION_KWARGS)
     loc2 = stix2.v21.Location(id=LOCATION_ID, **loc_kwargs)
-    env = stix2.Environment().semantically_equivalent(loc1, loc2)
+    env = stix2.Environment().object_similarity(loc1, loc2)
     assert round(env) != 100
 
 
 def test_semantic_equivalence_on_same_malware():
     malw1 = stix2.v21.Malware(id=MALWARE_ID, **MALWARE_KWARGS)
     malw2 = stix2.v21.Malware(id=MALWARE_ID, **MALWARE_KWARGS)
-    env = stix2.Environment().semantically_equivalent(malw1, malw2)
+    env = stix2.Environment().object_similarity(malw1, malw2)
     assert round(env) == 100
 
 
 def test_semantic_equivalence_on_same_threat_actor1():
     ta1 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_ACTOR_KWARGS)
     ta2 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_ACTOR_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ta1, ta2)
+    env = stix2.Environment().object_similarity(ta1, ta2)
     assert round(env) == 100
 
 
@@ -545,21 +545,21 @@ def test_semantic_equivalence_on_same_threat_actor2():
     )
     ta1 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_KWARGS)
     ta2 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ta1, ta2)
+    env = stix2.Environment().object_similarity(ta1, ta2)
     assert round(env) == 100
 
 
 def test_semantic_equivalence_on_same_tool():
     tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
     tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
-    env = stix2.Environment().semantically_equivalent(tool1, tool2)
+    env = stix2.Environment().object_similarity(tool1, tool2)
     assert round(env) == 100
 
 
 def test_semantic_equivalence_on_same_vulnerability1():
     vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
     vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
-    env = stix2.Environment().semantically_equivalent(vul1, vul2)
+    env = stix2.Environment().object_similarity(vul1, vul2)
     assert round(env) == 100
 
 
@@ -584,7 +584,7 @@ def test_semantic_equivalence_on_same_vulnerability2():
     )
     vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS1)
     vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS2)
-    env = stix2.Environment().semantically_equivalent(vul1, vul2)
+    env = stix2.Environment().object_similarity(vul1, vul2)
     assert round(env) == 0.0
 
 
@@ -640,7 +640,7 @@ def test_semantic_equivalence_on_unknown_object():
     }
     cust1 = stix2.parse(CUSTOM_KWARGS1, allow_custom=True)
     cust2 = stix2.parse(CUSTOM_KWARGS2, allow_custom=True)
-    env = stix2.Environment().semantically_equivalent(cust1, cust2, **weights)
+    env = stix2.Environment().object_similarity(cust1, cust2, **weights)
     assert round(env) == 0
 
 
@@ -648,7 +648,7 @@ def test_semantic_equivalence_different_type_raises():
     with pytest.raises(ValueError) as excinfo:
         vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
         ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
-        stix2.Environment().semantically_equivalent(vul1, ind1)
+        stix2.Environment().object_similarity(vul1, ind1)
 
     assert str(excinfo.value) == "The objects to compare must be of the same type!"
 
@@ -661,7 +661,7 @@ def test_semantic_equivalence_different_spec_version_raises():
         )
         ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
         ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **V20_KWARGS)
-        stix2.Environment().semantically_equivalent(ind1, ind2)
+        stix2.Environment().object_similarity(ind1, ind2)
 
     assert str(excinfo.value) == "The objects to compare must be of the same spec version!"
 
@@ -686,7 +686,7 @@ def test_semantic_equivalence_zero_match():
     }
     ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
     ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **IND_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ind1, ind2, **weights)
+    env = stix2.Environment().object_similarity(ind1, ind2, **weights)
     assert round(env) == 0
 
 
@@ -708,7 +708,7 @@ def test_semantic_equivalence_different_spec_version():
     }
     ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
     ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **IND_KWARGS)
-    env = stix2.Environment().semantically_equivalent(ind1, ind2, **weights)
+    env = stix2.Environment().object_similarity(ind1, ind2, **weights)
     assert round(env) == 0
 
 
@@ -800,7 +800,7 @@ def test_semantic_equivalence_exact_match():
 def test_non_existent_config_for_object():
     r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
     r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
-    assert stix2.Environment().semantically_equivalent(r1, r2) == 0.0
+    assert stix2.Environment().object_similarity(r1, r2) == 0.0
 
 
 def custom_semantic_equivalence_method(obj1, obj2, **weights):
@@ -824,7 +824,7 @@ def test_semantic_equivalence_method_provided():
 
     tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
     tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
-    env = stix2.Environment().semantically_equivalent(tool1, tool2, **weights)
+    env = stix2.Environment().object_similarity(tool1, tool2, **weights)
     assert round(env) == 96
 
 
@@ -838,7 +838,7 @@ def test_semantic_equivalence_prop_scores():
 
     tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
     tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
-    stix2.Environment().semantically_equivalent(tool1, tool2, prop_scores)
+    stix2.Environment().object_similarity(tool1, tool2, prop_scores)
     assert len(prop_scores) == 4
     assert round(prop_scores["matching_score"], 1) == 8.9
     assert round(prop_scores["sum_weights"], 1) == 100.0
@@ -868,7 +868,7 @@ def test_semantic_equivalence_prop_scores_method_provided():
 
     tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
     tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
-    env = stix2.Environment().semantically_equivalent(tool1, tool2, prop_scores, **weights)
+    env = stix2.Environment().object_similarity(tool1, tool2, prop_scores, **weights)
     assert round(env) == 96
     assert len(prop_scores) == 2
     assert prop_scores["matching_score"] == 96.0
@@ -964,12 +964,19 @@ def test_graph_equivalence_with_filesystem_source(ds):
             "max_depth": 1,
         },
     }
-    prop_scores = {}
+    prop_scores1 = {}
+    prop_scores2 = {}
     fs = stix2.FileSystemSource(FS_PATH)
-    env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights)
-    assert round(env) == 24
-    assert round(prop_scores["matching_score"]) == 122
-    assert round(prop_scores["sum_weights"]) == 500
+    env = stix2.Environment().graph_similarity(fs, ds, prop_scores1, **weights)
+    assert round(env) == 26
+    assert round(prop_scores1["matching_score"]) == 460
+    assert round(prop_scores1["sum_weights"]) == 18
+
+    env = stix2.Environment().graph_similarity(ds, fs, prop_scores2, **weights)
+    assert round(env) == 47
+    assert round(prop_scores2["matching_score"]) == 852
+    assert round(prop_scores2["sum_weights"]) == 18
+    assert prop_scores1 == prop_scores2
 
 
 def test_graph_equivalence_with_duplicate_graph(ds):
@@ -981,10 +988,10 @@ def test_graph_equivalence_with_duplicate_graph(ds):
         },
     }
     prop_scores = {}
-    env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights)
+    env = stix2.Environment().graph_similarity(ds, ds, prop_scores, **weights)
     assert round(env) == 100
     assert round(prop_scores["matching_score"]) == 800
-    assert round(prop_scores["sum_weights"]) == 800
+    assert round(prop_scores["sum_weights"]) == 8
 
 
 def test_graph_equivalence_with_versioning_check_on(ds2, ds):
@@ -996,10 +1003,10 @@ def test_graph_equivalence_with_versioning_check_on(ds2, ds):
         },
     }
     prop_scores = {}
-    env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
+    env = stix2.Environment().graph_similarity(ds, ds2, prop_scores, **weights)
     assert round(env) == 93
     assert round(prop_scores["matching_score"]) == 745
-    assert round(prop_scores["sum_weights"]) == 800
+    assert round(prop_scores["sum_weights"]) == 8
 
 
 def test_graph_equivalence_with_versioning_check_off(ds2, ds):
@@ -1011,7 +1018,7 @@ def test_graph_equivalence_with_versioning_check_off(ds2, ds):
         },
     }
     prop_scores = {}
-    env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
+    env = stix2.Environment().graph_similarity(ds, ds2, prop_scores, **weights)
     assert round(env) == 93
     assert round(prop_scores["matching_score"]) == 745
-    assert round(prop_scores["sum_weights"]) == 800
+    assert round(prop_scores["sum_weights"]) == 8