From ace64c4042984439536ce4fcbbe500ca2215dfa7 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 21 Dec 2020 17:53:53 -0500
Subject: [PATCH 01/23] provide pagination support for requests in the
 TAXIICollectionSource

---
 stix2/datastore/taxii.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/stix2/datastore/taxii.py b/stix2/datastore/taxii.py
index 41d1e54..1600253 100644
--- a/stix2/datastore/taxii.py
+++ b/stix2/datastore/taxii.py
@@ -12,6 +12,8 @@ from stix2.parsing import parse
 from stix2.utils import deduplicate
 
 try:
+    from taxii2client import v20 as tcv20
+    from taxii2client import v21 as tcv21
     from taxii2client.exceptions import ValidationError
     _taxii2_client = True
 except ImportError:
@@ -144,9 +146,12 @@ class TAXIICollectionSource(DataSource):
         collection (taxii2.Collection): TAXII Collection instance
         allow_custom (bool): Whether to allow custom STIX content to be
             added to the FileSystemSink. Default: True
+        items_per_page (int): How many STIX objects to request per call
+            to TAXII Server. This value is tunable, but servers may override
+            if their internal limit is surpassed.
 
     """
-    def __init__(self, collection, allow_custom=True):
+    def __init__(self, collection, allow_custom=True, items_per_page=5000):
         super(TAXIICollectionSource, self).__init__()
         if not _taxii2_client:
             raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
@@ -167,6 +172,7 @@ class TAXIICollectionSource(DataSource):
             )
 
         self.allow_custom = allow_custom
+        self.items_per_page = items_per_page
 
     def get(self, stix_id, version=None, _composite_filters=None):
         """Retrieve STIX object from local/remote STIX Collection
@@ -286,8 +292,19 @@ class TAXIICollectionSource(DataSource):
         taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
 
         # query TAXII collection
+        all_data = []
         try:
-            all_data = self.collection.get_objects(**taxii_filters_dict).get('objects', [])
+            if isinstance(self.collection, tcv21.Collection):
+                envelope = self.collection.get_objects(**taxii_filters_dict)
+                all_data.extend(envelope.get("objects", []))
+
+                # The while loop will not be executed if the response is received in full.
+                while envelope.get("more", False):
+                    envelope = self.collection.get_objects(limit=self.items_per_page, next=envelope.get("next", ""))
+                    all_data.extend(envelope.get("objects", []))
+            else:
+                for bundle in tcv20.as_pages(self.collection.get_objects, per_request=self.items_per_page):
+                    all_data.extend(bundle.get("objects", []))
 
             # deduplicate data (before filtering as reduces wasted filtering)
             all_data = deduplicate(all_data)

From 76eebeb549bb8bfbedee944bae356eebbe236abe Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Tue, 22 Dec 2020 16:52:27 -0500
Subject: [PATCH 02/23] expose **taxii_filters_dict on requests

---
 stix2/datastore/taxii.py               |  4 ++--
 stix2/test/v20/test_datastore_taxii.py | 11 +++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/stix2/datastore/taxii.py b/stix2/datastore/taxii.py
index 1600253..61d2366 100644
--- a/stix2/datastore/taxii.py
+++ b/stix2/datastore/taxii.py
@@ -300,10 +300,10 @@ class TAXIICollectionSource(DataSource):
 
                 # The while loop will not be executed if the response is received in full.
                 while envelope.get("more", False):
-                    envelope = self.collection.get_objects(limit=self.items_per_page, next=envelope.get("next", ""))
+                    envelope = self.collection.get_objects(limit=self.items_per_page, next=envelope.get("next", ""), **taxii_filters_dict)
                     all_data.extend(envelope.get("objects", []))
             else:
-                for bundle in tcv20.as_pages(self.collection.get_objects, per_request=self.items_per_page):
+                for bundle in tcv20.as_pages(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
                     all_data.extend(bundle.get("objects", []))
 
             # deduplicate data (before filtering as reduces wasted filtering)
diff --git a/stix2/test/v20/test_datastore_taxii.py b/stix2/test/v20/test_datastore_taxii.py
index 0b21981..cd051f1 100644
--- a/stix2/test/v20/test_datastore_taxii.py
+++ b/stix2/test/v20/test_datastore_taxii.py
@@ -5,7 +5,7 @@ import pytest
 from requests.models import Response
 import six
 from taxii2client.common import _filter_kwargs_to_query_params
-from taxii2client.v20 import Collection
+from taxii2client.v20 import MEDIA_TYPE_STIX_V20, Collection
 
 import stix2
 from stix2.datastore import DataSourceError
@@ -35,12 +35,12 @@ class MockTAXIICollectionEndpoint(Collection):
                 {
                     "date_added": get_timestamp(),
                     "id": object["id"],
-                    "media_type": "application/stix+json;version=2.1",
+                    "media_type": "application/stix+json;version=2.0",
                     "version": object.get("modified", object.get("created", get_timestamp())),
                 },
             )
 
-    def get_objects(self, **filter_kwargs):
+    def get_objects(self, accept=MEDIA_TYPE_STIX_V20, start=0, per_request=0, **filter_kwargs):
         self._verify_can_read()
         query_params = _filter_kwargs_to_query_params(filter_kwargs)
         assert isinstance(query_params, dict)
@@ -52,7 +52,10 @@ class MockTAXIICollectionEndpoint(Collection):
             100,
         )[0]
         if objs:
-            return stix2.v20.Bundle(objects=objs)
+            resp = Response()
+            resp.encoding = "utf-8"
+            resp._content = six.ensure_binary(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False))
+            return resp
         else:
             resp = Response()
             resp.status_code = 404

From a7eb4113deb931e8cf744d0711d66fa695d6405d Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Tue, 2 Feb 2021 00:04:04 -0500
Subject: [PATCH 03/23] minor change to align API

---
 stix2/datastore/taxii.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/stix2/datastore/taxii.py b/stix2/datastore/taxii.py
index 61d2366..a0e2706 100644
--- a/stix2/datastore/taxii.py
+++ b/stix2/datastore/taxii.py
@@ -35,9 +35,12 @@ class TAXIICollectionStore(DataStoreMixin):
             side(retrieving data) and False for TAXIICollectionSink
             side(pushing data). However, when parameter is supplied, it will
             be applied to both TAXIICollectionSource/Sink.
+        items_per_page (int): How many STIX objects to request per call
+            to TAXII Server. The value can be tuned, but servers may override
+            if their internal limit is surpassed. Used by TAXIICollectionSource
 
     """
-    def __init__(self, collection, allow_custom=None):
+    def __init__(self, collection, allow_custom=None, items_per_page=5000):
         if allow_custom is None:
             allow_custom_source = True
             allow_custom_sink = False
@@ -45,7 +48,7 @@ class TAXIICollectionStore(DataStoreMixin):
             allow_custom_sink = allow_custom_source = allow_custom
 
         super(TAXIICollectionStore, self).__init__(
-            source=TAXIICollectionSource(collection, allow_custom=allow_custom_source),
+            source=TAXIICollectionSource(collection, allow_custom=allow_custom_source, items_per_page=items_per_page),
             sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
         )
 
@@ -147,7 +150,7 @@ class TAXIICollectionSource(DataSource):
         allow_custom (bool): Whether to allow custom STIX content to be
             added to the FileSystemSink. Default: True
         items_per_page (int): How many STIX objects to request per call
-            to TAXII Server. This value is tunable, but servers may override
+            to TAXII Server. The value can be tuned, but servers may override
             if their internal limit is surpassed.
 
     """
@@ -295,12 +298,7 @@ class TAXIICollectionSource(DataSource):
         all_data = []
         try:
             if isinstance(self.collection, tcv21.Collection):
-                envelope = self.collection.get_objects(**taxii_filters_dict)
-                all_data.extend(envelope.get("objects", []))
-
-                # The while loop will not be executed if the response is received in full.
-                while envelope.get("more", False):
-                    envelope = self.collection.get_objects(limit=self.items_per_page, next=envelope.get("next", ""), **taxii_filters_dict)
+                for envelope in tcv21.as_pages(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
                     all_data.extend(envelope.get("objects", []))
             else:
                 for bundle in tcv20.as_pages(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):

From 30fd8c3464dd58895ad87d42dc584b2fee6ceb1c Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Tue, 2 Feb 2021 00:08:11 -0500
Subject: [PATCH 04/23] compact calls

---
 stix2/datastore/taxii.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/stix2/datastore/taxii.py b/stix2/datastore/taxii.py
index a0e2706..9ad6df9 100644
--- a/stix2/datastore/taxii.py
+++ b/stix2/datastore/taxii.py
@@ -297,12 +297,10 @@ class TAXIICollectionSource(DataSource):
         # query TAXII collection
         all_data = []
         try:
-            if isinstance(self.collection, tcv21.Collection):
-                for envelope in tcv21.as_pages(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
-                    all_data.extend(envelope.get("objects", []))
-            else:
-                for bundle in tcv20.as_pages(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
-                    all_data.extend(bundle.get("objects", []))
+            paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages
+
+            for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
+                all_data.extend(resource.get("objects", []))
 
             # deduplicate data (before filtering as reduces wasted filtering)
             all_data = deduplicate(all_data)

From f9a52eeed3236c9721d889d4b32e6f48cce1c120 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Fri, 26 Feb 2021 19:19:33 -0500
Subject: [PATCH 05/23] WIP: changes to both similarity functions, expose
 settings

---
 stix2/environment.py                 |  18 ++--
 stix2/equivalence/graph/__init__.py  |  62 ++++---------
 stix2/equivalence/object/__init__.py | 126 ++++++++++++++++++++-------
 3 files changed, 123 insertions(+), 83 deletions(-)

diff --git a/stix2/environment.py b/stix2/environment.py
index d0f694e..75e5fa5 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -189,7 +189,8 @@ class Environment(DataStoreMixin):
             return None
 
     @staticmethod
-    def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
+    def object_similarity(obj1, obj2, prop_scores={}, ignore_spec_version=False,
+                     versioning_checks=False, max_depth=1, **weight_dict):
         """This method returns a measure of how similar the two objects are.
 
         Args:
@@ -220,10 +221,12 @@ class Environment(DataStoreMixin):
             see `the Committee Note <link here>`__.
 
         """
-        return object_similarity(obj1, obj2, prop_scores, **weight_dict)
+        return object_similarity(obj1, obj2, prop_scores, ignore_spec_version,
+                                versioning_checks, max_depth, **weight_dict)
 
     @staticmethod
-    def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
+    def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, ignore_spec_version=False,
+                     versioning_checks=False, max_depth=1, **weight_dict):
         """This method returns a true/false value if two objects are semantically equivalent.
         Internally, it calls the object_similarity function and compares it against the given
         threshold value.
@@ -263,7 +266,8 @@ class Environment(DataStoreMixin):
         return object_equivalence(obj1, obj2, prop_scores, threshold, **weight_dict)
 
     @staticmethod
-    def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
+    def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
+                     versioning_checks=False, max_depth=1, **weight_dict):
         """This method returns a similarity score for two given graphs.
         Each DataStore can contain a connected or disconnected graph and the
         final result is weighted over the amount of objects we managed to compare.
@@ -298,10 +302,12 @@ class Environment(DataStoreMixin):
             see `the Committee Note <link here>`__.
 
         """
-        return graph_similarity(ds1, ds2, prop_scores, **weight_dict)
+        return graph_similarity(ds1, ds2, prop_scores, ignore_spec_version,
+                                versioning_checks, max_depth, **weight_dict)
 
     @staticmethod
-    def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
+    def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, ignore_spec_version=False,
+                     versioning_checks=False, max_depth=1, **weight_dict):
         """This method returns a true/false value if two graphs are semantically equivalent.
         Internally, it calls the graph_similarity function and compares it against the given
         threshold value.
diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py
index e78624e..1dcccf1 100644
--- a/stix2/equivalence/graph/__init__.py
+++ b/stix2/equivalence/graph/__init__.py
@@ -53,7 +53,8 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
     return False
 
 
-def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
+def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
+                     versioning_checks=False, max_depth=1, **weight_dict):
     """This method returns a similarity score for two given graphs.
     Each DataStore can contain a connected or disconnected graph and the
     final result is weighted over the amount of objects we managed to compare.
@@ -65,6 +66,9 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
         ds2: A DataStore object instance representing your graph
         prop_scores: A dictionary that can hold individual property scores,
             weights, contributing score, matching score and sum of weights.
+        ignore_spec_version: As
+        versioning_checks: As
+        max_depth: As
         weight_dict: A dictionary that can be used to override settings
             in the similarity process
 
@@ -90,13 +94,21 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
     """
     results = {}
     similarity_score = 0
-    weights = GRAPH_WEIGHTS.copy()
+    weights = WEIGHTS.copy()
 
     if weight_dict:
         weights.update(weight_dict)
 
+    weights["_internal"] = {
+        "ignore_spec_version": ignore_spec_version,
+        "versioning_checks": versioning_checks,
+        "ds1": ds1,
+        "ds2": ds2,
+        "max_depth": max_depth,
+    }
+
     if weights["_internal"]["max_depth"] <= 0:
-        raise ValueError("weight_dict['_internal']['max_depth'] must be greater than 0")
+        raise ValueError("'max_depth' must be greater than 0")
 
     pairs = _object_pairs(
         _bucket_per_type(ds1.query([])),
@@ -104,16 +116,15 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
         weights,
     )
 
-    weights["_internal"]["ds1"] = ds1
-    weights["_internal"]["ds2"] = ds2
-
     logger.debug("Starting graph similarity process between DataStores: '%s' and '%s'", ds1.id, ds2.id)
     for object1, object2 in pairs:
         iprop_score = {}
         object1_id = object1["id"]
         object2_id = object2["id"]
 
-        result = object_similarity(object1, object2, iprop_score, **weights)
+        result = object_similarity(object1, object2, iprop_score, ds1, ds2,
+                                   ignore_spec_version, versioning_checks,
+                                   max_depth, **weights)
 
         if object1_id not in results:
             results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
@@ -141,40 +152,3 @@ def graph_similarity(ds1, ds2, prop_scores={}, **weight_dict):
         similarity_score,
     )
     return similarity_score
-
-
-# default weights used for the graph similarity process
-GRAPH_WEIGHTS = WEIGHTS.copy()
-GRAPH_WEIGHTS.update({
-    "grouping": {
-        "name": (20, partial_string_based),
-        "context": (20, partial_string_based),
-        "object_refs": (60, list_reference_check),
-    },
-    "relationship": {
-        "relationship_type": (20, exact_match),
-        "source_ref": (40, reference_check),
-        "target_ref": (40, reference_check),
-    },
-    "report": {
-        "name": (30, partial_string_based),
-        "published": (10, partial_timestamp_based),
-        "object_refs": (60, list_reference_check),
-        "tdelta": 1,  # One day interval
-    },
-    "sighting": {
-        "first_seen": (5, partial_timestamp_based),
-        "last_seen": (5, partial_timestamp_based),
-        "sighting_of_ref": (40, reference_check),
-        "observed_data_refs": (20, list_reference_check),
-        "where_sighted_refs": (20, list_reference_check),
-        "summary": (10, exact_match),
-    },
-    "_internal": {
-        "ignore_spec_version": False,
-        "versioning_checks": False,
-        "ds1": None,
-        "ds2": None,
-        "max_depth": 1,
-    },
-})  # :autodoc-skip:
diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index e175938..8bae111 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -4,7 +4,7 @@ import itertools
 import logging
 import time
 
-from ...datastore import Filter
+from ...datastore import Filter, DataStoreMixin, DataSink, DataSource
 from ...utils import STIXdatetime, parse_into_datetime
 from ..pattern import equivalent_patterns
 
@@ -54,7 +54,9 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
     return False
 
 
-def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
+def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
+                      ignore_spec_version=False, versioning_checks=False,
+                      max_depth=1, **weight_dict):
     """This method returns a measure of similarity depending on how
     similar the two objects are.
 
@@ -63,6 +65,11 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
         obj2: A stix2 object instance
         prop_scores: A dictionary that can hold individual property scores,
             weights, contributing score, matching score and sum of weights.
+        ds1: As
+        ds2: As
+        ignore_spec_version: As
+        versioning_checks: As
+        max_depth: As
         weight_dict: A dictionary that can be used to override settings
             in the similarity process
 
@@ -91,6 +98,14 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
     if weight_dict:
         weights.update(weight_dict)
 
+    weights["_internal"] = {
+        "ignore_spec_version": ignore_spec_version,
+        "versioning_checks": versioning_checks,
+        "ds1": ds1,
+        "ds2": ds2,
+        "max_depth": max_depth,
+    }
+
     type1, type2 = obj1["type"], obj2["type"]
     ignore_spec_version = weights["_internal"]["ignore_spec_version"]
 
@@ -117,6 +132,7 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
                 if check_property_present(prop, obj1, obj2):
                     w = weights[type1][prop][0]
                     comp_funct = weights[type1][prop][1]
+                    prop_scores[prop] = {}
 
                     if comp_funct == partial_timestamp_based:
                         contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
@@ -124,24 +140,30 @@ def object_similarity(obj1, obj2, prop_scores={}, **weight_dict):
                         threshold = weights[type1]["threshold"]
                         contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
                     elif comp_funct == reference_check or comp_funct == list_reference_check:
-                        max_depth = weights["_internal"]["max_depth"]
-                        if max_depth > 0:
-                            weights["_internal"]["max_depth"] = max_depth - 1
+                        max_depth_i = weights["_internal"]["max_depth"]
+                        if max_depth_i > 0:
+                            weights["_internal"]["max_depth"] = max_depth_i - 1
                             ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
-                            contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
+                            if _datastore_check(ds1, ds2):
+                                contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
+                            elif comp_funct == reference_check:
+                                comp_funct = exact_match
+                                contributing_score = w * comp_funct(obj1[prop], obj2[prop])
+                            elif comp_funct == list_reference_check:
+                                comp_funct = partial_list_based
+                                contributing_score = w * comp_funct(obj1[prop], obj2[prop])
+                            prop_scores[prop]["method"] = comp_funct.__name__
                         else:
                             continue  # prevent excessive recursion
-                        weights["_internal"]["max_depth"] = max_depth
+                        weights["_internal"]["max_depth"] = max_depth_i
                     else:
                         contributing_score = w * comp_funct(obj1[prop], obj2[prop])
 
                     sum_weights += w
                     matching_score += contributing_score
 
-                    prop_scores[prop] = {
-                        "weight": w,
-                        "contributing_score": contributing_score,
-                    }
+                    prop_scores[prop]["weight"] = w
+                    prop_scores[prop]["contributing_score"] = contributing_score
                     logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
 
             prop_scores["matching_score"] = matching_score
@@ -196,7 +218,9 @@ def partial_timestamp_based(t1, t2, tdelta):
 
 
 def partial_list_based(l1, l2):
-    """Performs a partial list matching via finding the intersection between common values.
+    """Performs a partial list matching via finding the intersection between
+    common values. Repeated values are counted only once. This method can be
+    used for *_refs equality checks when de-reference is not possible.
 
     Args:
         l1: A list of values.
@@ -213,7 +237,8 @@ def partial_list_based(l1, l2):
 
 
 def exact_match(val1, val2):
-    """Performs an exact value match based on two values
+    """Performs an exact value match based on two values. This method can be
+    used for *_ref equality check when de-reference is not possible.
 
     Args:
         val1: A value suitable for an equality test.
@@ -275,15 +300,8 @@ def partial_external_reference_based(refs1, refs2):
     allowed = {"veris", "cve", "capec", "mitre-attack"}
     matches = 0
 
-    if len(refs1) >= len(refs2):
-        l1 = refs1
-        l2 = refs2
-    else:
-        l1 = refs2
-        l2 = refs1
-
-    for ext_ref1 in l1:
-        for ext_ref2 in l2:
+    for ext_ref1 in refs1:
+        for ext_ref2 in refs2:
             sn_match = False
             ei_match = False
             url_match = False
@@ -352,17 +370,21 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
     """Checks multiple object versions if present in graph.
     Maximizes for the similarity score of a particular version."""
     results = {}
-    objects1 = ds1.query([Filter("id", "=", ref1)])
-    objects2 = ds2.query([Filter("id", "=", ref2)])
 
     pairs = _object_pairs(
-        _bucket_per_type(objects1),
-        _bucket_per_type(objects2),
+        _bucket_per_type(ds1.query([Filter("id", "=", ref1)])),
+        _bucket_per_type(ds2.query([Filter("id", "=", ref2)])),
         weights,
     )
+    ignore_spec_version = weights["_internal"]["ignore_spec_version"]
+    versioning_checks = weights["_internal"]["versioning_checks"]
+    max_depth = weights["_internal"]["max_depth"]
 
     for object1, object2 in pairs:
-        result = object_similarity(object1, object2, **weights)
+        result = object_similarity(object1, object2, ds1=ds1, ds2=ds2,
+                                   ignore_spec_version=ignore_spec_version,
+                                   versioning_checks=versioning_checks,
+                                   max_depth=max_depth, **weights)
         if ref1 not in results:
             results[ref1] = {"matched": ref2, "value": result}
         elif result > results[ref1]["value"]:
@@ -383,12 +405,18 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
     result = 0.0
 
     if type1 == type2 and type1 in weights:
-        if weights["_internal"]["versioning_checks"]:
+        ignore_spec_version = weights["_internal"]["ignore_spec_version"]
+        versioning_checks = weights["_internal"]["versioning_checks"]
+        max_depth = weights["_internal"]["max_depth"]
+        if versioning_checks:
             result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0
         else:
             o1, o2 = ds1.get(ref1), ds2.get(ref2)
             if o1 and o2:
-                result = object_similarity(o1, o2, **weights) / 100.0
+                result = object_similarity(o1, o2, ds1=ds1, ds2=ds2,
+                                           ignore_spec_version=ignore_spec_version,
+                                           versioning_checks=versioning_checks,
+                                           max_depth=max_depth, **weights) / 100.0
 
     logger.debug(
         "--\t\treference_check '%s' '%s'\tresult: '%s'",
@@ -439,6 +467,13 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
     return result
 
 
+def _datastore_check(ds1, ds2):
+    if (issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or
+            issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource))):
+        return True
+    return False
+
+
 def _bucket_per_type(graph, mode="type"):
     """Given a list of objects or references, bucket them by type.
     Depending on the list type: extract from 'type' property or using
@@ -480,11 +515,20 @@ WEIGHTS = {
         "name": (60, partial_string_based),
         "external_references": (40, partial_external_reference_based),
     },
+    "grouping": {
+        "name": (20, partial_string_based),
+        "context": (20, partial_string_based),
+        "object_refs": (60, list_reference_check),
+    },
     "identity": {
         "name": (60, partial_string_based),
         "identity_class": (20, exact_match),
         "sectors": (20, partial_list_based),
     },
+    "incident": {
+        "name": (60, partial_string_based),
+        "external_references": (40, partial_external_reference_based),
+    },
     "indicator": {
         "indicator_types": (15, partial_list_based),
         "pattern": (80, custom_pattern_based),
@@ -511,6 +555,25 @@ WEIGHTS = {
         "definition": (60, exact_match),
         "definition_type": (20, exact_match),
     },
+    "relationship": {
+        "relationship_type": (20, exact_match),
+        "source_ref": (40, reference_check),
+        "target_ref": (40, reference_check),
+    },
+    "report": {
+        "name": (30, partial_string_based),
+        "published": (10, partial_timestamp_based),
+        "object_refs": (60, list_reference_check),
+        "tdelta": 1,  # One day interval
+    },
+    "sighting": {
+        "first_seen": (5, partial_timestamp_based),
+        "last_seen": (5, partial_timestamp_based),
+        "sighting_of_ref": (40, reference_check),
+        "observed_data_refs": (20, list_reference_check),
+        "where_sighted_refs": (20, list_reference_check),
+        "summary": (10, exact_match),
+    },
     "threat-actor": {
         "name": (60, partial_string_based),
         "threat_actor_types": (20, partial_list_based),
@@ -523,8 +586,5 @@ WEIGHTS = {
     "vulnerability": {
         "name": (30, partial_string_based),
         "external_references": (70, partial_external_reference_based),
-    },
-    "_internal": {
-        "ignore_spec_version": False,
-    },
+    }
 }  # :autodoc-skip:

From ff5014c606858053ad6eee6a13438a67dffe388f Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Mar 2021 12:27:52 -0500
Subject: [PATCH 06/23] expose configuration options, combine weight
 dictionary, update tests

---
 docs/conf.py                         |   9 +-
 stix2/environment.py                 | 109 +++++++++++++----
 stix2/equivalence/graph/__init__.py  |  60 +++++++---
 stix2/equivalence/object/__init__.py | 169 +++++++++++++++-----------
 stix2/test/v20/test_environment.py   | 143 +++-------------------
 stix2/test/v21/test_environment.py   | 172 ++++++---------------------
 6 files changed, 284 insertions(+), 378 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 5d12af3..62e829d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -66,16 +66,9 @@ object_default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o:
 object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n    ')
 object_default_sem_eq_weights = object_default_sem_eq_weights.replace('               "', '               ')
 object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n')
-with open('object_default_sem_eq_weights.rst', 'w') as f:
+with open('similarity_weights.rst', 'w') as f:
     f.write(".. code-block:: python\n\n   {}\n\n".format(object_default_sem_eq_weights))
 
-graph_default_sem_eq_weights = json.dumps(GRAPH_WEIGHTS, indent=4, default=lambda o: o.__name__)
-graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('\n', '\n    ')
-graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('               "', '               ')
-graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('"\n', '\n')
-with open('graph_default_sem_eq_weights.rst', 'w') as f:
-    f.write(".. code-block:: python\n\n   {}\n\n".format(graph_default_sem_eq_weights))
-
 
 def get_property_type(prop):
     """Convert property classname into pretty string name of property.
diff --git a/stix2/environment.py b/stix2/environment.py
index 75e5fa5..b37b485 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -189,8 +189,11 @@ class Environment(DataStoreMixin):
             return None
 
     @staticmethod
-    def object_similarity(obj1, obj2, prop_scores={}, ignore_spec_version=False,
-                     versioning_checks=False, max_depth=1, **weight_dict):
+    def object_similarity(
+        obj1, obj2, prop_scores={}, ds1=None, ds2=None,
+        ignore_spec_version=False, versioning_checks=False,
+        max_depth=1, **weight_dict
+    ):
         """This method returns a measure of how similar the two objects are.
 
         Args:
@@ -198,8 +201,19 @@ class Environment(DataStoreMixin):
             obj2: A stix2 object instance
             prop_scores: A dictionary that can hold individual property scores,
                 weights, contributing score, matching score and sum of weights.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ds1: A DataStore object instance representing your graph
+            ds2: A DataStore object instance representing your graph
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             float: A number between 0.0 and 100.0 as a measurement of similarity.
@@ -221,12 +235,17 @@ class Environment(DataStoreMixin):
             see `the Committee Note <link here>`__.
 
         """
-        return object_similarity(obj1, obj2, prop_scores, ignore_spec_version,
-                                versioning_checks, max_depth, **weight_dict)
+        return object_similarity(
+            obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version,
+            versioning_checks, max_depth, **weight_dict
+        )
 
     @staticmethod
-    def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, ignore_spec_version=False,
-                     versioning_checks=False, max_depth=1, **weight_dict):
+    def object_equivalence(
+        obj1, obj2, prop_scores={}, threshold=70, ds1=None, ds2=None,
+        ignore_spec_version=False, versioning_checks=False,
+        max_depth=1, **weight_dict
+    ):
         """This method returns a true/false value if two objects are semantically equivalent.
         Internally, it calls the object_similarity function and compares it against the given
         threshold value.
@@ -239,8 +258,19 @@ class Environment(DataStoreMixin):
             threshold: A numerical value between 0 and 100 to determine the minimum
                 score to result in successfully calling both objects equivalent. This
                 value can be tuned.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ds1: A DataStore object instance representing your graph
+            ds2: A DataStore object instance representing your graph
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             bool: True if the result of the object similarity is greater than or equal to
@@ -263,11 +293,16 @@ class Environment(DataStoreMixin):
             see `the Committee Note <link here>`__.
 
         """
-        return object_equivalence(obj1, obj2, prop_scores, threshold, **weight_dict)
+        return object_equivalence(
+            obj1, obj2, prop_scores, threshold, ds1, ds2,
+            ignore_spec_version, versioning_checks, max_depth, **weight_dict
+        )
 
     @staticmethod
-    def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
-                     versioning_checks=False, max_depth=1, **weight_dict):
+    def graph_similarity(
+        ds1, ds2, prop_scores={}, ignore_spec_version=False,
+        versioning_checks=False, max_depth=1, **weight_dict
+    ):
         """This method returns a similarity score for two given graphs.
         Each DataStore can contain a connected or disconnected graph and the
         final result is weighted over the amount of objects we managed to compare.
@@ -279,8 +314,17 @@ class Environment(DataStoreMixin):
             ds2: A DataStore object instance representing your graph
             prop_scores: A dictionary that can hold individual property scores,
                 weights, contributing score, matching score and sum of weights.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             float: A number between 0.0 and 100.0 as a measurement of similarity.
@@ -295,19 +339,24 @@ class Environment(DataStoreMixin):
         Note:
             Default weight_dict:
 
-            .. include:: ../graph_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
             see `the Committee Note <link here>`__.
 
         """
-        return graph_similarity(ds1, ds2, prop_scores, ignore_spec_version,
-                                versioning_checks, max_depth, **weight_dict)
+        return graph_similarity(
+            ds1, ds2, prop_scores, ignore_spec_version,
+            versioning_checks, max_depth, **weight_dict
+        )
 
     @staticmethod
-    def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, ignore_spec_version=False,
-                     versioning_checks=False, max_depth=1, **weight_dict):
+    def graph_equivalence(
+        ds1, ds2, prop_scores={}, threshold=70,
+        ignore_spec_version=False, versioning_checks=False,
+        max_depth=1, **weight_dict
+    ):
         """This method returns a true/false value if two graphs are semantically equivalent.
         Internally, it calls the graph_similarity function and compares it against the given
         threshold value.
@@ -320,8 +369,17 @@ class Environment(DataStoreMixin):
             threshold: A numerical value between 0 and 100 to determine the minimum
                 score to result in successfully calling both graphs equivalent. This
                 value can be tuned.
-            weight_dict: A dictionary that can be used to override settings
-                in the similarity process
+            ignore_spec_version: A boolean indicating whether to test object types
+                that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+                If set to True this check will be skipped.
+            versioning_checks: A boolean indicating whether to test multiple revisions
+                of the same object (when present) to maximize similarity against a
+                particular version. If set to True the algorithm will perform this step.
+            max_depth: A positive integer indicating the maximum recursion depth the
+                algorithm can reach when de-referencing objects and performing the
+                object_similarity algorithm.
+            weight_dict: A dictionary that can be used to override what checks are done
+                to objects in the similarity process.
 
         Returns:
             bool: True if the result of the graph similarity is greater than or equal to
@@ -337,11 +395,14 @@ class Environment(DataStoreMixin):
         Note:
             Default weight_dict:
 
-            .. include:: ../graph_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
             see `the Committee Note <link here>`__.
 
         """
-        return graph_equivalence(ds1, ds2, prop_scores, threshold, **weight_dict)
+        return graph_equivalence(
+            ds1, ds2, prop_scores, threshold, ignore_spec_version,
+            versioning_checks, max_depth, **weight_dict
+        )
diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py
index 1dcccf1..1d43219 100644
--- a/stix2/equivalence/graph/__init__.py
+++ b/stix2/equivalence/graph/__init__.py
@@ -10,7 +10,11 @@ from ..object import (
 logger = logging.getLogger(__name__)
 
 
-def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
+def graph_equivalence(
+    ds1, ds2, prop_scores={}, threshold=70,
+    ignore_spec_version=False, versioning_checks=False,
+    max_depth=1, **weight_dict
+):
     """This method returns a true/false value if two graphs are semantically equivalent.
     Internally, it calls the graph_similarity function and compares it against the given
     threshold value.
@@ -23,8 +27,17 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
         threshold: A numerical value between 0 and 100 to determine the minimum
             score to result in successfully calling both graphs equivalent. This
             value can be tuned.
-        weight_dict: A dictionary that can be used to override settings
-            in the similarity process
+        ignore_spec_version: A boolean indicating whether to test object types
+            that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+            If set to True this check will be skipped.
+        versioning_checks: A boolean indicating whether to test multiple revisions
+            of the same object (when present) to maximize similarity against a
+            particular version. If set to True the algorithm will perform this step.
+        max_depth: A positive integer indicating the maximum recursion depth the
+            algorithm can reach when de-referencing objects and performing the
+            object_similarity algorithm.
+        weight_dict: A dictionary that can be used to override what checks are done
+            to objects in the similarity process.
 
     Returns:
         bool: True if the result of the graph similarity is greater than or equal to
@@ -40,21 +53,26 @@ def graph_equivalence(ds1, ds2, prop_scores={}, threshold=70, **weight_dict):
     Note:
         Default weight_dict:
 
-        .. include:: ../../graph_default_sem_eq_weights.rst
+        .. include:: ../../similarity_weights.rst
 
     Note:
         This implementation follows the Semantic Equivalence Committee Note.
         see `the Committee Note <link here>`__.
 
     """
-    similarity_result = graph_similarity(ds1, ds2, prop_scores, **weight_dict)
+    similarity_result = graph_similarity(
+        ds1, ds2, prop_scores, ignore_spec_version,
+        versioning_checks, max_depth, **weight_dict
+    )
     if similarity_result >= threshold:
         return True
     return False
 
 
-def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
-                     versioning_checks=False, max_depth=1, **weight_dict):
+def graph_similarity(
+    ds1, ds2, prop_scores={}, ignore_spec_version=False,
+    versioning_checks=False, max_depth=1, **weight_dict
+):
     """This method returns a similarity score for two given graphs.
     Each DataStore can contain a connected or disconnected graph and the
     final result is weighted over the amount of objects we managed to compare.
@@ -66,11 +84,17 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
         ds2: A DataStore object instance representing your graph
         prop_scores: A dictionary that can hold individual property scores,
             weights, contributing score, matching score and sum of weights.
-        ignore_spec_version: As
-        versioning_checks: As
-        max_depth: As
-        weight_dict: A dictionary that can be used to override settings
-            in the similarity process
+        ignore_spec_version: A boolean indicating whether to test object types
+            that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+            If set to True this check will be skipped.
+        versioning_checks: A boolean indicating whether to test multiple revisions
+            of the same object (when present) to maximize similarity against a
+            particular version. If set to True the algorithm will perform this step.
+        max_depth: A positive integer indicating the maximum recursion depth the
+            algorithm can reach when de-referencing objects and performing the
+            object_similarity algorithm.
+        weight_dict: A dictionary that can be used to override what checks are done
+            to objects in the similarity process.
 
     Returns:
         float: A number between 0.0 and 100.0 as a measurement of similarity.
@@ -85,7 +109,7 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
     Note:
         Default weight_dict:
 
-        .. include:: ../../graph_default_sem_eq_weights.rst
+        .. include:: ../../similarity_weights.rst
 
     Note:
         This implementation follows the Semantic Equivalence Committee Note.
@@ -107,7 +131,7 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
         "max_depth": max_depth,
     }
 
-    if weights["_internal"]["max_depth"] <= 0:
+    if max_depth <= 0:
         raise ValueError("'max_depth' must be greater than 0")
 
     pairs = _object_pairs(
@@ -122,9 +146,11 @@ def graph_similarity(ds1, ds2, prop_scores={}, ignore_spec_version=False,
         object1_id = object1["id"]
         object2_id = object2["id"]
 
-        result = object_similarity(object1, object2, iprop_score, ds1, ds2,
-                                   ignore_spec_version, versioning_checks,
-                                   max_depth, **weights)
+        result = object_similarity(
+            object1, object2, iprop_score, ds1, ds2,
+            ignore_spec_version, versioning_checks,
+            max_depth, **weights
+        )
 
         if object1_id not in results:
             results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index 8bae111..71a263c 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -4,14 +4,18 @@ import itertools
 import logging
 import time
 
-from ...datastore import Filter, DataStoreMixin, DataSink, DataSource
+from ...datastore import DataSink, DataSource, DataStoreMixin, Filter
 from ...utils import STIXdatetime, parse_into_datetime
 from ..pattern import equivalent_patterns
 
 logger = logging.getLogger(__name__)
 
 
-def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
+def object_equivalence(
+    obj1, obj2, prop_scores={}, threshold=70, ds1=None,
+    ds2=None, ignore_spec_version=False,
+    versioning_checks=False, max_depth=1, **weight_dict
+):
     """This method returns a true/false value if two objects are semantically equivalent.
     Internally, it calls the object_similarity function and compares it against the given
     threshold value.
@@ -24,8 +28,19 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
         threshold: A numerical value between 0 and 100 to determine the minimum
             score to result in successfully calling both objects equivalent. This
             value can be tuned.
-        weight_dict: A dictionary that can be used to override settings
-            in the similarity process
+        ds1: A DataStore object instance representing your graph
+        ds2: A DataStore object instance representing your graph
+        ignore_spec_version: A boolean indicating whether to test object types
+            that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+            If set to True this check will be skipped.
+        versioning_checks: A boolean indicating whether to test multiple revisions
+            of the same object (when present) to maximize similarity against a
+            particular version. If set to True the algorithm will perform this step.
+        max_depth: A positive integer indicating the maximum recursion depth the
+            algorithm can reach when de-referencing objects and performing the
+            object_similarity algorithm.
+        weight_dict: A dictionary that can be used to override what checks are done
+            to objects in the similarity process.
 
     Returns:
         bool: True if the result of the object similarity is greater than or equal to
@@ -41,22 +56,27 @@ def object_equivalence(obj1, obj2, prop_scores={}, threshold=70, **weight_dict):
     Note:
         Default weight_dict:
 
-        .. include:: ../../object_default_sem_eq_weights.rst
+        .. include:: ../../similarity_weights.rst
 
     Note:
         This implementation follows the Semantic Equivalence Committee Note.
         see `the Committee Note <link here>`__.
 
     """
-    similarity_result = object_similarity(obj1, obj2, prop_scores, **weight_dict)
+    similarity_result = object_similarity(
+        obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version,
+        versioning_checks, max_depth, **weight_dict
+    )
     if similarity_result >= threshold:
         return True
     return False
 
 
-def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
-                      ignore_spec_version=False, versioning_checks=False,
-                      max_depth=1, **weight_dict):
+def object_similarity(
+    obj1, obj2, prop_scores={}, ds1=None, ds2=None,
+    ignore_spec_version=False, versioning_checks=False,
+    max_depth=1, **weight_dict
+):
     """This method returns a measure of similarity depending on how
     similar the two objects are.
 
@@ -65,13 +85,19 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
         obj2: A stix2 object instance
         prop_scores: A dictionary that can hold individual property scores,
             weights, contributing score, matching score and sum of weights.
-        ds1: As
-        ds2: As
-        ignore_spec_version: As
-        versioning_checks: As
-        max_depth: As
-        weight_dict: A dictionary that can be used to override settings
-            in the similarity process
+        ds1: A DataStore object instance representing your graph
+        ds2: A DataStore object instance representing your graph
+        ignore_spec_version: A boolean indicating whether to test object types
+            that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
+            If set to True this check will be skipped.
+        versioning_checks: A boolean indicating whether to test multiple revisions
+            of the same object (when present) to maximize similarity against a
+            particular version. If set to True the algorithm will perform this step.
+        max_depth: A positive integer indicating the maximum recursion depth the
+            algorithm can reach when de-referencing objects and performing the
+            object_similarity algorithm.
+        weight_dict: A dictionary that can be used to override what checks are done
+            to objects in the similarity process.
 
     Returns:
         float: A number between 0.0 and 100.0 as a measurement of similarity.
@@ -86,7 +112,7 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
     Note:
         Default weight_dict:
 
-        .. include:: ../../object_default_sem_eq_weights.rst
+        .. include:: ../../similarity_weights.rst
 
     Note:
         This implementation follows the Semantic Equivalence Committee Note.
@@ -107,7 +133,6 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
     }
 
     type1, type2 = obj1["type"], obj2["type"]
-    ignore_spec_version = weights["_internal"]["ignore_spec_version"]
 
     if type1 != type2:
         raise ValueError('The objects to compare must be of the same type!')
@@ -140,9 +165,8 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
                         threshold = weights[type1]["threshold"]
                         contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
                     elif comp_funct == reference_check or comp_funct == list_reference_check:
-                        max_depth_i = weights["_internal"]["max_depth"]
-                        if max_depth_i > 0:
-                            weights["_internal"]["max_depth"] = max_depth_i - 1
+                        if max_depth > 0:
+                            weights["_internal"]["max_depth"] = max_depth - 1
                             ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
                             if _datastore_check(ds1, ds2):
                                 contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
@@ -155,7 +179,7 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
                             prop_scores[prop]["method"] = comp_funct.__name__
                         else:
                             continue  # prevent excessive recursion
-                        weights["_internal"]["max_depth"] = max_depth_i
+                        weights["_internal"]["max_depth"] = max_depth
                     else:
                         contributing_score = w * comp_funct(obj1[prop], obj2[prop])
 
@@ -187,7 +211,7 @@ def object_similarity(obj1, obj2, prop_scores={}, ds1=None, ds2=None,
 def check_property_present(prop, obj1, obj2):
     """Helper method checks if a property is present on both objects."""
     if prop == "longitude_latitude":
-        if all(x in obj1 and x in obj2 for x in ['latitude', 'longitude']):
+        if all(x in obj1 and x in obj2 for x in ('latitude', 'longitude')):
             return True
     elif prop in obj1 and prop in obj2:
         return True
@@ -286,12 +310,12 @@ def custom_pattern_based(pattern1, pattern2):
     return equivalent_patterns(pattern1, pattern2)
 
 
-def partial_external_reference_based(refs1, refs2):
+def partial_external_reference_based(ext_refs1, ext_refs2):
     """Performs a matching on External References.
 
     Args:
-        refs1: A list of external references.
-        refs2: A list of external references.
+        ext_refs1: A list of external references.
+        ext_refs2: A list of external references.
 
     Returns:
         float: Number between 0.0 and 1.0 depending on matches.
@@ -300,44 +324,47 @@ def partial_external_reference_based(refs1, refs2):
     allowed = {"veris", "cve", "capec", "mitre-attack"}
     matches = 0
 
-    for ext_ref1 in refs1:
-        for ext_ref2 in refs2:
-            sn_match = False
-            ei_match = False
-            url_match = False
-            source_name = None
+    ref_pairs = itertools.chain(
+        itertools.product(ext_refs1, ext_refs2),
+    )
 
-            if check_property_present("source_name", ext_ref1, ext_ref2):
-                if ext_ref1["source_name"] == ext_ref2["source_name"]:
-                    source_name = ext_ref1["source_name"]
-                    sn_match = True
-            if check_property_present("external_id", ext_ref1, ext_ref2):
-                if ext_ref1["external_id"] == ext_ref2["external_id"]:
-                    ei_match = True
-            if check_property_present("url", ext_ref1, ext_ref2):
-                if ext_ref1["url"] == ext_ref2["url"]:
-                    url_match = True
+    for ext_ref1, ext_ref2 in ref_pairs:
+        sn_match = False
+        ei_match = False
+        url_match = False
+        source_name = None
 
-            # Special case: if source_name is a STIX defined name and either
-            # external_id or url match then its a perfect match and other entries
-            # can be ignored.
-            if sn_match and (ei_match or url_match) and source_name in allowed:
-                result = 1.0
-                logger.debug(
-                    "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
-                    refs1, refs2, result,
-                )
-                return result
+        if check_property_present("source_name", ext_ref1, ext_ref2):
+            if ext_ref1["source_name"] == ext_ref2["source_name"]:
+                source_name = ext_ref1["source_name"]
+                sn_match = True
+        if check_property_present("external_id", ext_ref1, ext_ref2):
+            if ext_ref1["external_id"] == ext_ref2["external_id"]:
+                ei_match = True
+        if check_property_present("url", ext_ref1, ext_ref2):
+            if ext_ref1["url"] == ext_ref2["url"]:
+                url_match = True
 
-            # Regular check. If the source_name (not STIX-defined) or external_id or
-            # url matches then we consider the entry a match.
-            if (sn_match or ei_match or url_match) and source_name not in allowed:
-                matches += 1
+        # Special case: if source_name is a STIX defined name and either
+        # external_id or url match then its a perfect match and other entries
+        # can be ignored.
+        if sn_match and (ei_match or url_match) and source_name in allowed:
+            result = 1.0
+            logger.debug(
+                "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
+                ext_refs1, ext_refs2, result,
+            )
+            return result
 
-    result = matches / max(len(refs1), len(refs2))
+        # Regular check. If the source_name (not STIX-defined) or external_id or
+        # url matches then we consider the entry a match.
+        if (sn_match or ei_match or url_match) and source_name not in allowed:
+            matches += 1
+
+    result = matches / max(len(ext_refs1), len(ext_refs2))
     logger.debug(
         "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
-        refs1, refs2, result,
+        ext_refs1, ext_refs2, result,
     )
     return result
 
@@ -381,10 +408,11 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
     max_depth = weights["_internal"]["max_depth"]
 
     for object1, object2 in pairs:
-        result = object_similarity(object1, object2, ds1=ds1, ds2=ds2,
-                                   ignore_spec_version=ignore_spec_version,
-                                   versioning_checks=versioning_checks,
-                                   max_depth=max_depth, **weights)
+        result = object_similarity(
+            object1, object2, ds1, ds2,
+            ignore_spec_version, versioning_checks,
+            max_depth, **weights
+        )
         if ref1 not in results:
             results[ref1] = {"matched": ref2, "value": result}
         elif result > results[ref1]["value"]:
@@ -413,10 +441,11 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
         else:
             o1, o2 = ds1.get(ref1), ds2.get(ref2)
             if o1 and o2:
-                result = object_similarity(o1, o2, ds1=ds1, ds2=ds2,
-                                           ignore_spec_version=ignore_spec_version,
-                                           versioning_checks=versioning_checks,
-                                           max_depth=max_depth, **weights) / 100.0
+                result = object_similarity(
+                    o1, o2, ds1, ds2,
+                    ignore_spec_version, versioning_checks,
+                    max_depth, **weights
+                ) / 100.0
 
     logger.debug(
         "--\t\treference_check '%s' '%s'\tresult: '%s'",
@@ -468,8 +497,10 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
 
 
 def _datastore_check(ds1, ds2):
-    if (issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or
-            issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource))):
+    if (
+        issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or
+        issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource))
+    ):
         return True
     return False
 
@@ -586,5 +617,5 @@ WEIGHTS = {
     "vulnerability": {
         "name": (30, partial_string_based),
         "external_references": (70, partial_external_reference_based),
-    }
+    },
 }  # :autodoc-skip:
diff --git a/stix2/test/v20/test_environment.py b/stix2/test/v20/test_environment.py
index 33e0985..c8867b0 100644
--- a/stix2/test/v20/test_environment.py
+++ b/stix2/test/v20/test_environment.py
@@ -424,7 +424,7 @@ def test_related_to_by_target(ds):
 
 
 def test_versioned_checks(ds, ds2):
-    weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+    weights = stix2.equivalence.graph.WEIGHTS.copy()
     weights.update({
         "_internal": {
             "ignore_spec_version": True,
@@ -437,7 +437,7 @@ def test_versioned_checks(ds, ds2):
 
 
 def test_semantic_check_with_versioning(ds, ds2):
-    weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+    weights = stix2.equivalence.graph.WEIGHTS.copy()
     weights.update({
         "_internal": {
             "ignore_spec_version": False,
@@ -467,13 +467,11 @@ def test_semantic_check_with_versioning(ds, ds2):
 
 
 def test_list_semantic_check(ds, ds2):
-    weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+    weights = stix2.equivalence.graph.WEIGHTS.copy()
     weights.update({
         "_internal": {
             "ignore_spec_version": False,
             "versioning_checks": False,
-            "ds1": ds,
-            "ds2": ds2,
             "max_depth": 1,
         },
     })
@@ -504,39 +502,18 @@ def test_list_semantic_check(ds, ds2):
 
 
 def test_graph_similarity_raises_value_error(ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": -1,
-        },
-    }
     with pytest.raises(ValueError):
         prop_scores1 = {}
-        stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights)
+        stix2.Environment().graph_similarity(ds, ds2, prop_scores1, max_depth=-1)
 
 
 def test_graph_similarity_with_filesystem_source(ds, fs):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, ignore_spec_version=True)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, ignore_spec_version=True)
 
     assert round(env1) == 25
     assert round(prop_scores1["matching_score"]) == 451
@@ -552,41 +529,20 @@ def test_graph_similarity_with_filesystem_source(ds, fs):
 
 
 def test_graph_similarity_with_duplicate_graph(ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores = {}
-    env = stix2.Environment().graph_similarity(ds, ds, prop_scores, **weights)
+    env = stix2.Environment().graph_similarity(ds, ds, prop_scores)
     assert round(env) == 100
     assert round(prop_scores["matching_score"]) == 800
     assert round(prop_scores["len_pairs"]) == 8
 
 
 def test_graph_similarity_with_versioning_check_on(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, versioning_checks=True)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, versioning_checks=True)
 
     assert round(env1) == 88
     assert round(prop_scores1["matching_score"]) == 789
@@ -602,26 +558,12 @@ def test_graph_similarity_with_versioning_check_on(ds2, ds):
 
 
 def test_graph_similarity_with_versioning_check_off(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2)
 
     assert round(env1) == 88
     assert round(prop_scores1["matching_score"]) == 789
@@ -637,26 +579,12 @@ def test_graph_similarity_with_versioning_check_off(ds2, ds):
 
 
 def test_graph_equivalence_with_filesystem_source(ds, fs):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, ignore_spec_version=True)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, ignore_spec_version=True)
 
     assert env1 is False
     assert round(prop_scores1["matching_score"]) == 451
@@ -672,41 +600,20 @@ def test_graph_equivalence_with_filesystem_source(ds, fs):
 
 
 def test_graph_equivalence_with_duplicate_graph(ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores = {}
-    env = stix2.Environment().graph_equivalence(ds, ds, prop_scores, **weights)
+    env = stix2.Environment().graph_equivalence(ds, ds, prop_scores)
     assert env is True
     assert round(prop_scores["matching_score"]) == 800
     assert round(prop_scores["len_pairs"]) == 8
 
 
 def test_graph_equivalence_with_versioning_check_on(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, versioning_checks=True)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, versioning_checks=True)
 
     assert env1 is True
     assert round(prop_scores1["matching_score"]) == 789
@@ -722,26 +629,12 @@ def test_graph_equivalence_with_versioning_check_on(ds2, ds):
 
 
 def test_graph_equivalence_with_versioning_check_off(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2)
 
     assert env1 is True
     assert round(prop_scores1["matching_score"]) == 789
diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py
index e7bf4da..6a14bf3 100644
--- a/stix2/test/v21/test_environment.py
+++ b/stix2/test/v21/test_environment.py
@@ -760,16 +760,13 @@ def test_object_similarity_different_spec_version():
             "valid_from": (5, stix2.equivalence.object.partial_timestamp_based),
             "tdelta": 1,  # One day interval
         },
-        "_internal": {
-            "ignore_spec_version": True,  # Disables spec_version check.
-        },
     }
     ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
     ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **IND_KWARGS)
-    env = stix2.Environment().object_similarity(ind1, ind2, **weights)
+    env = stix2.Environment().object_similarity(ind1, ind2, ignore_spec_version=True, **weights)
     assert round(env) == 0
 
-    env = stix2.Environment().object_similarity(ind2, ind1, **weights)
+    env = stix2.Environment().object_similarity(ind2, ind1, ignore_spec_version=True, **weights)
     assert round(env) == 0
 
 
@@ -861,7 +858,9 @@ def test_object_similarity_exact_match():
 def test_non_existent_config_for_object():
     r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
     r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
-    assert stix2.Environment().object_similarity(r1, r2) == 0.0
+    prop_scores = {}
+    assert stix2.Environment().object_similarity(r1, r2, prop_scores) == 100.0
+    assert prop_scores["object_refs"]["method"] == "partial_list_based"
 
 
 def custom_semantic_equivalence_method(obj1, obj2, **weights):
@@ -937,7 +936,8 @@ def test_object_similarity_prop_scores_method_provided():
 
 
 def test_versioned_checks(ds, ds2):
-    weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+    # Testing internal method
+    weights = stix2.equivalence.graph.WEIGHTS.copy()
     weights.update({
         "_internal": {
             "ignore_spec_version": True,
@@ -950,7 +950,7 @@ def test_versioned_checks(ds, ds2):
 
 
 def test_semantic_check_with_versioning(ds, ds2):
-    weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+    weights = stix2.equivalence.graph.WEIGHTS.copy()
     weights.update({
         "_internal": {
             "ignore_spec_version": False,
@@ -981,7 +981,7 @@ def test_semantic_check_with_versioning(ds, ds2):
 
 
 def test_list_semantic_check(ds, ds2):
-    weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+    weights = stix2.equivalence.graph.WEIGHTS.copy()
     weights.update({
         "_internal": {
             "ignore_spec_version": False,
@@ -1027,39 +1027,28 @@ def test_list_semantic_check(ds, ds2):
 
 
 def test_graph_similarity_raises_value_error(ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": -1,
-        },
-    }
     with pytest.raises(ValueError):
         prop_scores1 = {}
-        stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights)
+        stix2.Environment().graph_similarity(ds, ds2, prop_scores1, max_depth=-1)
 
 
 def test_graph_similarity_with_filesystem_source(ds, fs):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_similarity(
+        fs, ds, prop_scores1,
+        ignore_spec_version=True,
+        versioning_checks=False,
+        max_depth=1,
+    )
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_similarity(
+        ds, fs, prop_scores2,
+        ignore_spec_version=True,
+        versioning_checks=False,
+        max_depth=1,
+    )
 
     assert round(env1) == 23
     assert round(prop_scores1["matching_score"]) == 411
@@ -1154,14 +1143,11 @@ def test_depth_limiting():
             "some2_ref": (33, stix2.equivalence.object.reference_check),
             "name": (34, stix2.equivalence.object.partial_string_based),
         },
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
     }
     prop_scores1 = {}
-    env1 = stix2.equivalence.graph.graph_similarity(mem_store1, mem_store2, prop_scores1, **custom_weights)
+    env1 = stix2.equivalence.graph.graph_similarity(
+        mem_store1, mem_store2, prop_scores1, **custom_weights
+    )
 
     assert round(env1) == 38
     assert round(prop_scores1["matching_score"]) == 300
@@ -1185,44 +1171,23 @@ def test_depth_limiting():
 
 
 def test_graph_similarity_with_duplicate_graph(ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores = {}
-    env = stix2.Environment().graph_similarity(ds, ds, prop_scores, **weights)
+    env = stix2.Environment().graph_similarity(ds, ds, prop_scores)
     assert round(env) == 100
     assert round(prop_scores["matching_score"]) == 800
     assert round(prop_scores["len_pairs"]) == 8
 
 
 def test_graph_similarity_with_versioning_check_on(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, versioning_checks=True)
     assert round(env1) == 88
     assert round(prop_scores1["matching_score"]) == 789
     assert round(prop_scores1["len_pairs"]) == 9
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, versioning_checks=True)
     assert round(env2) == 88
     assert round(prop_scores2["matching_score"]) == 789
     assert round(prop_scores2["len_pairs"]) == 9
@@ -1233,29 +1198,15 @@ def test_graph_similarity_with_versioning_check_on(ds2, ds):
 
 
 def test_graph_similarity_with_versioning_check_off(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1)
     assert round(env1) == 88
     assert round(prop_scores1["matching_score"]) == 789
     assert round(prop_scores1["len_pairs"]) == 9
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2)
     assert round(env2) == 88
     assert round(prop_scores2["matching_score"]) == 789
     assert round(prop_scores2["len_pairs"]) == 9
@@ -1266,26 +1217,12 @@ def test_graph_similarity_with_versioning_check_off(ds2, ds):
 
 
 def test_graph_equivalence_with_filesystem_source(ds, fs):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, ignore_spec_version=True)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": True,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, ignore_spec_version=True)
 
     assert env1 is False
     assert round(prop_scores1["matching_score"]) == 411
@@ -1301,41 +1238,20 @@ def test_graph_equivalence_with_filesystem_source(ds, fs):
 
 
 def test_graph_equivalence_with_duplicate_graph(ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores = {}
-    env = stix2.Environment().graph_equivalence(ds, ds, prop_scores, **weights)
+    env = stix2.Environment().graph_equivalence(ds, ds, prop_scores)
     assert env is True
     assert round(prop_scores["matching_score"]) == 800
     assert round(prop_scores["len_pairs"]) == 8
 
 
 def test_graph_equivalence_with_versioning_check_on(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, versioning_checks=True)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": True,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, versioning_checks=True)
 
     assert env1 is True
     assert round(prop_scores1["matching_score"]) == 789
@@ -1351,26 +1267,12 @@ def test_graph_equivalence_with_versioning_check_on(ds2, ds):
 
 
 def test_graph_equivalence_with_versioning_check_off(ds2, ds):
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores1 = {}
-    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, **weights)
+    env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1)
 
     # Switching parameters
-    weights = {
-        "_internal": {
-            "ignore_spec_version": False,
-            "versioning_checks": False,
-            "max_depth": 1,
-        },
-    }
     prop_scores2 = {}
-    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, **weights)
+    env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2)
 
     assert env1 is True
     assert round(prop_scores1["matching_score"]) == 789

From d2e867b52ead1793a32e021f7583ec37a6232bc3 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Mar 2021 12:29:33 -0500
Subject: [PATCH 07/23] docstring corrections

---
 .gitignore           | 3 +--
 stix2/environment.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index 72b31cd..4d16202 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,8 +55,7 @@ coverage.xml
 # Sphinx documentation
 docs/_build/
 .ipynb_checkpoints
-graph_default_sem_eq_weights.rst
-object_default_sem_eq_weights.rst
+similarity_weights.rst
 
 # PyBuilder
 target/
diff --git a/stix2/environment.py b/stix2/environment.py
index b37b485..2905b9e 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -228,7 +228,7 @@ class Environment(DataStoreMixin):
         Note:
             Default weight_dict:
 
-            .. include:: ../object_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.
@@ -286,7 +286,7 @@ class Environment(DataStoreMixin):
         Note:
             Default weight_dict:
 
-            .. include:: ../object_default_sem_eq_weights.rst
+            .. include:: ../similarity_weights.rst
 
         Note:
             This implementation follows the Semantic Equivalence Committee Note.

From 3efa4c1ce980afd54f21d5a77471b3d8eae3c464 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Mar 2021 12:44:35 -0500
Subject: [PATCH 08/23] revert part changes

---
 stix2/equivalence/object/__init__.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index 71a263c..81bf23c 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -176,7 +176,7 @@ def object_similarity(
                             elif comp_funct == list_reference_check:
                                 comp_funct = partial_list_based
                                 contributing_score = w * comp_funct(obj1[prop], obj2[prop])
-                            prop_scores[prop]["method"] = comp_funct.__name__
+                            prop_scores[prop]["check_type"] = comp_funct.__name__
                         else:
                             continue  # prevent excessive recursion
                         weights["_internal"]["max_depth"] = max_depth
@@ -409,10 +409,11 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
 
     for object1, object2 in pairs:
         result = object_similarity(
-            object1, object2, ds1, ds2,
-            ignore_spec_version, versioning_checks,
-            max_depth, **weights
-        )
+                object1, object2, ds1=ds1, ds2=ds2,
+                ignore_spec_version=ignore_spec_version,
+                versioning_checks=versioning_checks,
+                max_depth=max_depth, **weights
+            )
         if ref1 not in results:
             results[ref1] = {"matched": ref2, "value": result}
         elif result > results[ref1]["value"]:
@@ -442,9 +443,10 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
             o1, o2 = ds1.get(ref1), ds2.get(ref2)
             if o1 and o2:
                 result = object_similarity(
-                    o1, o2, ds1, ds2,
-                    ignore_spec_version, versioning_checks,
-                    max_depth, **weights
+                    o1, o2, ds1=ds1, ds2=ds2,
+                    ignore_spec_version=ignore_spec_version,
+                    versioning_checks=versioning_checks,
+                    max_depth=max_depth, **weights
                 ) / 100.0
 
     logger.debug(

From e4e6f46089db5c30698435a0e5b692faaf33b47a Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Mar 2021 12:54:01 -0500
Subject: [PATCH 09/23] change key name for _refs check

---
 stix2/test/v21/test_environment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py
index 6a14bf3..2b781f4 100644
--- a/stix2/test/v21/test_environment.py
+++ b/stix2/test/v21/test_environment.py
@@ -860,7 +860,7 @@ def test_non_existent_config_for_object():
     r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
     prop_scores = {}
     assert stix2.Environment().object_similarity(r1, r2, prop_scores) == 100.0
-    assert prop_scores["object_refs"]["method"] == "partial_list_based"
+    assert prop_scores["object_refs"]["check_type"] == "partial_list_based"
 
 
 def custom_semantic_equivalence_method(obj1, obj2, **weights):

From 72a12e96ba23f4da68d1cc2af5fd7b60481b5e0f Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Mar 2021 13:14:03 -0500
Subject: [PATCH 10/23] update guide example, conf.py and remove some unused
 imports

---
 docs/conf.py                         |  1 -
 docs/guide/equivalence.ipynb         | 11 +----------
 stix2/equivalence/graph/__init__.py  |  4 +---
 stix2/equivalence/object/__init__.py |  6 +++---
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 62e829d..b6dd6ea 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -7,7 +7,6 @@ import sys
 from sphinx.ext.autodoc import ClassDocumenter
 
 from stix2.base import _STIXBase
-from stix2.equivalence.graph import GRAPH_WEIGHTS
 from stix2.equivalence.object import WEIGHTS
 from stix2.version import __version__
 
diff --git a/docs/guide/equivalence.ipynb b/docs/guide/equivalence.ipynb
index e61e9ed..9e9c679 100644
--- a/docs/guide/equivalence.ipynb
+++ b/docs/guide/equivalence.ipynb
@@ -4607,20 +4607,11 @@
     "    ),\n",
     "]\n",
     "\n",
-    "\n",
-    "weights = {\n",
-    "    \"_internal\": {\n",
-    "        \"ignore_spec_version\": False,\n",
-    "        \"versioning_checks\": False,\n",
-    "        \"max_depth\": 1,\n",
-    "    },\n",
-    "}\n",
-    "\n",
     "memstore1 = MemoryStore(g1)\n",
     "memstore2 = MemoryStore(g2)\n",
     "prop_scores = {}\n",
     "\n",
-    "similarity_result = env.graph_similarity(memstore1, memstore2, prop_scores, **weights)\n",
+    "similarity_result = env.graph_similarity(memstore1, memstore2, prop_scores)\n",
     "equivalence_result = env.graph_equivalence(memstore1, memstore2, threshold=60)\n",
     "\n",
     "print(similarity_result)\n",
diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py
index 1d43219..1f46fd3 100644
--- a/stix2/equivalence/graph/__init__.py
+++ b/stix2/equivalence/graph/__init__.py
@@ -2,9 +2,7 @@
 import logging
 
 from ..object import (
-    WEIGHTS, _bucket_per_type, _object_pairs, exact_match,
-    list_reference_check, object_similarity, partial_string_based,
-    partial_timestamp_based, reference_check,
+    WEIGHTS, _bucket_per_type, _object_pairs, object_similarity,
 )
 
 logger = logging.getLogger(__name__)
diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index 81bf23c..c9bfb34 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -412,8 +412,8 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
                 object1, object2, ds1=ds1, ds2=ds2,
                 ignore_spec_version=ignore_spec_version,
                 versioning_checks=versioning_checks,
-                max_depth=max_depth, **weights
-            )
+                max_depth=max_depth, **weights,
+        )
         if ref1 not in results:
             results[ref1] = {"matched": ref2, "value": result}
         elif result > results[ref1]["value"]:
@@ -446,7 +446,7 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
                     o1, o2, ds1=ds1, ds2=ds2,
                     ignore_spec_version=ignore_spec_version,
                     versioning_checks=versioning_checks,
-                    max_depth=max_depth, **weights
+                    max_depth=max_depth, **weights,
                 ) / 100.0
 
     logger.debug(

From bd996b8750eb6ba6612a5ba47e03c7ee9a45adc3 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanvg@users.noreply.github.com>
Date: Mon, 1 Mar 2021 14:40:05 -0500
Subject: [PATCH 11/23] Update __init__.py

---
 stix2/equivalence/object/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index c9bfb34..8dcafb6 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -409,10 +409,10 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
 
     for object1, object2 in pairs:
         result = object_similarity(
-                object1, object2, ds1=ds1, ds2=ds2,
-                ignore_spec_version=ignore_spec_version,
-                versioning_checks=versioning_checks,
-                max_depth=max_depth, **weights,
+            object1, object2, ds1=ds1, ds2=ds2,
+            ignore_spec_version=ignore_spec_version,
+            versioning_checks=versioning_checks,
+            max_depth=max_depth, **weights,
         )
         if ref1 not in results:
             results[ref1] = {"matched": ref2, "value": result}

From fb6852b38f6a7ff47a2161a8ca0a06dfae0e18f0 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Mon, 1 Mar 2021 20:47:30 -0500
Subject: [PATCH 12/23] update test suite

---
 stix2/test/v20/test_datastore_taxii.py |  2 ++
 stix2/test/v21/test_datastore_taxii.py | 18 ++++++++++++------
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/stix2/test/v20/test_datastore_taxii.py b/stix2/test/v20/test_datastore_taxii.py
index cd051f1..32aba92 100644
--- a/stix2/test/v20/test_datastore_taxii.py
+++ b/stix2/test/v20/test_datastore_taxii.py
@@ -53,6 +53,8 @@ class MockTAXIICollectionEndpoint(Collection):
         )[0]
         if objs:
             resp = Response()
+            resp.status_code = 200
+            resp.headers["Content-Range"] = f"items 0-{len(objs)}/{len(objs)}"
             resp.encoding = "utf-8"
             resp._content = six.ensure_binary(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False))
             return resp
diff --git a/stix2/test/v21/test_datastore_taxii.py b/stix2/test/v21/test_datastore_taxii.py
index 92ae6dc..7a91f6f 100644
--- a/stix2/test/v21/test_datastore_taxii.py
+++ b/stix2/test/v21/test_datastore_taxii.py
@@ -29,14 +29,14 @@ class MockTAXIICollectionEndpoint(Collection):
         self._verify_can_write()
         if isinstance(bundle, six.string_types):
             bundle = json.loads(bundle)
-        for object in bundle.get("objects", []):
-            self.objects.append(object)
+        for obj in bundle.get("objects", []):
+            self.objects.append(obj)
             self.manifests.append(
                 {
                     "date_added": get_timestamp(),
-                    "id": object["id"],
+                    "id": obj["id"],
                     "media_type": "application/stix+json;version=2.1",
-                    "version": object.get("modified", object.get("created", get_timestamp())),
+                    "version": obj.get("modified", obj.get("created", get_timestamp())),
                 },
             )
 
@@ -52,7 +52,10 @@ class MockTAXIICollectionEndpoint(Collection):
             100,
         )[0]
         if objs:
-            return stix2.v21.Bundle(objects=objs)
+            return {
+                "objects": objs,
+                "more": False,
+            }
         else:
             resp = Response()
             resp.status_code = 404
@@ -76,7 +79,10 @@ class MockTAXIICollectionEndpoint(Collection):
         else:
             filtered_objects = []
         if filtered_objects:
-            return stix2.v21.Bundle(objects=filtered_objects)
+            return {
+                "objects": filtered_objects,
+                "more": False,
+            }
         else:
             resp = Response()
             resp.status_code = 404

From 262284444ef6aafe7bba7a38568254d1489fffae Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanvg@users.noreply.github.com>
Date: Wed, 10 Mar 2021 09:52:15 -0500
Subject: [PATCH 13/23] Update stix2/environment.py

Co-authored-by: Chris Lenk <clenk@users.noreply.github.com>
---
 stix2/environment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stix2/environment.py b/stix2/environment.py
index 2905b9e..f8624c7 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -201,8 +201,8 @@ class Environment(DataStoreMixin):
             obj2: A stix2 object instance
             prop_scores: A dictionary that can hold individual property scores,
                 weights, contributing score, matching score and sum of weights.
-            ds1: A DataStore object instance representing your graph
-            ds2: A DataStore object instance representing your graph
+            ds1 (optional): A DataStore object instance from which to pull related objects
+            ds2 (optional): A DataStore object instance from which to pull related objects
             ignore_spec_version: A boolean indicating whether to test object types
                 that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
                 If set to True this check will be skipped.

From 7d7c56c64b73ce02ec73be819ab8dad87b9c973a Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanvg@users.noreply.github.com>
Date: Wed, 10 Mar 2021 09:52:24 -0500
Subject: [PATCH 14/23] Update stix2/environment.py

---
 stix2/environment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stix2/environment.py b/stix2/environment.py
index f8624c7..f7c13ee 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -258,8 +258,8 @@ class Environment(DataStoreMixin):
             threshold: A numerical value between 0 and 100 to determine the minimum
                 score to result in successfully calling both objects equivalent. This
                 value can be tuned.
-            ds1: A DataStore object instance representing your graph
-            ds2: A DataStore object instance representing your graph
+            ds1 (optional): A DataStore object instance from which to pull related objects
+            ds2 (optional): A DataStore object instance from which to pull related objects
             ignore_spec_version: A boolean indicating whether to test object types
                 that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
                 If set to True this check will be skipped.

From c9e66def60c2791b3d3b5cde038de4ef8abcc7de Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Wed, 10 Mar 2021 13:32:02 -0500
Subject: [PATCH 15/23] rename test, update the rest of the docstrings for
 object_similarity() and object_equivalence()

---
 stix2/equivalence/object/__init__.py | 14 +++++++-------
 stix2/test/v21/test_environment.py   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index 8dcafb6..da2097e 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -4,7 +4,7 @@ import itertools
 import logging
 import time
 
-from ...datastore import DataSink, DataSource, DataStoreMixin, Filter
+from ...datastore import DataSource, DataStoreMixin, Filter
 from ...utils import STIXdatetime, parse_into_datetime
 from ..pattern import equivalent_patterns
 
@@ -28,8 +28,8 @@ def object_equivalence(
         threshold: A numerical value between 0 and 100 to determine the minimum
             score to result in successfully calling both objects equivalent. This
             value can be tuned.
-        ds1: A DataStore object instance representing your graph
-        ds2: A DataStore object instance representing your graph
+        ds1 (optional): A DataStore object instance from which to pull related objects
+        ds2 (optional): A DataStore object instance from which to pull related objects
         ignore_spec_version: A boolean indicating whether to test object types
             that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
             If set to True this check will be skipped.
@@ -85,8 +85,8 @@ def object_similarity(
         obj2: A stix2 object instance
         prop_scores: A dictionary that can hold individual property scores,
             weights, contributing score, matching score and sum of weights.
-        ds1: A DataStore object instance representing your graph
-        ds2: A DataStore object instance representing your graph
+        ds1 (optional): A DataStore object instance from which to pull related objects
+        ds2 (optional): A DataStore object instance from which to pull related objects
         ignore_spec_version: A boolean indicating whether to test object types
             that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
             If set to True this check will be skipped.
@@ -500,8 +500,8 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
 
 def _datastore_check(ds1, ds2):
     if (
-        issubclass(ds1.__class__, (DataStoreMixin, DataSink, DataSource)) or
-        issubclass(ds2.__class__, (DataStoreMixin, DataSink, DataSource))
+        issubclass(ds1.__class__, (DataStoreMixin, DataSource)) or
+        issubclass(ds2.__class__, (DataStoreMixin, DataSource))
     ):
         return True
     return False
diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py
index 2b781f4..7f6b71c 100644
--- a/stix2/test/v21/test_environment.py
+++ b/stix2/test/v21/test_environment.py
@@ -855,7 +855,7 @@ def test_object_similarity_exact_match():
     assert stix2.equivalence.object.exact_match(t1, t2) == 0.0
 
 
-def test_non_existent_config_for_object():
+def test_no_datastore_fallsback_list_based_check_for_refs_check():
     r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
     r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
     prop_scores = {}

From 34e9da805fe849de38bb25bb2b6fae50406a903a Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Fri, 12 Mar 2021 16:31:28 -0500
Subject: [PATCH 16/23] move taxii2client dependency to 2.3.0

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2fc5d70..397b98f 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,7 @@ setup(
         'Bug Tracker': 'https://github.com/oasis-open/cti-python-stix2/issues/',
     },
     extras_require={
-        'taxii': ['taxii2-client>=2.2.1'],
+        'taxii': ['taxii2-client>=2.3.0'],
         'semantic': ['haversine', 'rapidfuzz'],
     },
 )

From 7e5d31742c020c6c6007683a133a0c0c26178fb3 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Fri, 12 Mar 2021 16:46:34 -0500
Subject: [PATCH 17/23] remove `six` from tests

---
 stix2/test/v20/test_datastore_taxii.py | 5 ++---
 stix2/test/v21/test_datastore_taxii.py | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/stix2/test/v20/test_datastore_taxii.py b/stix2/test/v20/test_datastore_taxii.py
index 32aba92..075f0a3 100644
--- a/stix2/test/v20/test_datastore_taxii.py
+++ b/stix2/test/v20/test_datastore_taxii.py
@@ -3,7 +3,6 @@ import json
 from medallion.filters.basic_filter import BasicFilter
 import pytest
 from requests.models import Response
-import six
 from taxii2client.common import _filter_kwargs_to_query_params
 from taxii2client.v20 import MEDIA_TYPE_STIX_V20, Collection
 
@@ -27,7 +26,7 @@ class MockTAXIICollectionEndpoint(Collection):
 
     def add_objects(self, bundle):
         self._verify_can_write()
-        if isinstance(bundle, six.string_types):
+        if isinstance(bundle, str):
             bundle = json.loads(bundle)
         for object in bundle.get("objects", []):
             self.objects.append(object)
@@ -56,7 +55,7 @@ class MockTAXIICollectionEndpoint(Collection):
             resp.status_code = 200
             resp.headers["Content-Range"] = f"items 0-{len(objs)}/{len(objs)}"
             resp.encoding = "utf-8"
-            resp._content = six.ensure_binary(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False))
+            resp._content = bytes(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False), resp.encoding)
             return resp
         else:
             resp = Response()
diff --git a/stix2/test/v21/test_datastore_taxii.py b/stix2/test/v21/test_datastore_taxii.py
index 7a91f6f..62ddadc 100644
--- a/stix2/test/v21/test_datastore_taxii.py
+++ b/stix2/test/v21/test_datastore_taxii.py
@@ -3,7 +3,6 @@ import json
 from medallion.filters.basic_filter import BasicFilter
 import pytest
 from requests.models import Response
-import six
 from taxii2client.common import _filter_kwargs_to_query_params
 from taxii2client.v21 import Collection
 
@@ -27,7 +26,7 @@ class MockTAXIICollectionEndpoint(Collection):
 
     def add_objects(self, bundle):
         self._verify_can_write()
-        if isinstance(bundle, six.string_types):
+        if isinstance(bundle, str):
             bundle = json.loads(bundle)
         for obj in bundle.get("objects", []):
             self.objects.append(obj)

From f7ebd34c8c9c2331dea5fe80389e6bfbff9fe124 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanvg@users.noreply.github.com>
Date: Fri, 12 Mar 2021 16:49:23 -0500
Subject: [PATCH 18/23] Update __init__.py

fix incident entry
---
 stix2/equivalence/object/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
index da2097e..dde52ec 100644
--- a/stix2/equivalence/object/__init__.py
+++ b/stix2/equivalence/object/__init__.py
@@ -559,8 +559,8 @@ WEIGHTS = {
         "sectors": (20, partial_list_based),
     },
     "incident": {
-        "name": (60, partial_string_based),
-        "external_references": (40, partial_external_reference_based),
+        "name": (30, partial_string_based),
+        "external_references": (70, partial_external_reference_based),
     },
     "indicator": {
         "indicator_types": (15, partial_list_based),

From 827f622c045d7350d9dbbbc36f9899f692c51e43 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Wed, 17 Mar 2021 15:01:49 -0400
Subject: [PATCH 19/23] provide ability to stream STIX output to fp
 ``.write()`` file-like object

---
 stix2/base.py                 | 32 ++++++++++++++++++++++++++++-
 stix2/serialization.py        | 38 +++++++++++++++++++++++++++++++++++
 stix2/test/v20/test_bundle.py | 22 ++++++++++++++++++++
 stix2/test/v21/test_bundle.py | 22 ++++++++++++++++++++
 4 files changed, 113 insertions(+), 1 deletion(-)

diff --git a/stix2/base.py b/stix2/base.py
index b158f06..03d8ea4 100644
--- a/stix2/base.py
+++ b/stix2/base.py
@@ -17,7 +17,8 @@ from .exceptions import (
 from .markings import _MarkingsMixin
 from .markings.utils import validate
 from .serialization import (
-    STIXJSONEncoder, STIXJSONIncludeOptionalDefaultsEncoder, serialize,
+    STIXJSONEncoder, STIXJSONIncludeOptionalDefaultsEncoder, fp_serialize,
+    serialize,
 )
 from .utils import NOW, PREFIX_21_REGEX, get_timestamp
 from .versioning import new_version as _new_version
@@ -260,6 +261,35 @@ class _STIXBase(Mapping):
         """
         return serialize(self, *args, **kwargs)
 
+    def fp_serialize(self, *args, **kwargs):
+        """
+        Serialize a STIX object to a file-like supporting object.
+
+        Examples:
+            >>> import stix2
+            >>> identity = stix2.Identity(name='Example Corp.', identity_class='organization')
+            >>> print(identity.serialize(sort_keys=True))
+            {"created": "2018-06-08T19:03:54.066Z", ... "name": "Example Corp.", "type": "identity"}
+            >>> print(identity.serialize(sort_keys=True, indent=4))
+            {
+                "created": "2018-06-08T19:03:54.066Z",
+                "id": "identity--d7f3e25a-ba1c-447a-ab71-6434b092b05e",
+                "identity_class": "organization",
+                "modified": "2018-06-08T19:03:54.066Z",
+                "name": "Example Corp.",
+                "type": "identity"
+            }
+            >>> with open("example.json", mode="w", encoding="utf-8") as f:
+            >>>     identity.fp_serialize(f, pretty=True)
+
+        Returns:
+            None
+
+        See Also:
+            ``stix2.serialization.fp_serialize`` for options.
+        """
+        fp_serialize(self, *args, **kwargs)
+
 
 class _DomainObject(_STIXBase, _MarkingsMixin):
     pass
diff --git a/stix2/serialization.py b/stix2/serialization.py
index 7488eb5..8822f33 100644
--- a/stix2/serialization.py
+++ b/stix2/serialization.py
@@ -85,6 +85,44 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs):
         return json.dumps(obj, cls=STIXJSONEncoder, **kwargs)
 
 
+def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs):
+    """
+    Serialize a STIX object as a stream to file-like supporting object.
+
+    Args:
+        obj: The STIX object to be serialized.
+        fp: A ``.write()``-supporting file-like object.
+        pretty (bool): If True, output properties following the STIX specs
+            formatting. This includes indentation. Refer to notes for more
+            details. (Default: ``False``)
+        include_optional_defaults (bool): Determines whether to include
+            optional properties set to the default value defined in the spec.
+        **kwargs: The arguments for a json.dumps() call.
+
+    Returns:
+        None
+
+    Note:
+        The argument ``pretty=True`` will output the STIX object following
+        spec order. Using this argument greatly impacts object serialization
+        performance. If your use case is centered across machine-to-machine
+        operation it is recommended to set ``pretty=False``.
+
+        When ``pretty=True`` the following key-value pairs will be added or
+        overridden: indent=4, separators=(",", ": "), item_sort_key=sort_by.
+    """
+    if pretty:
+        def sort_by(element):
+            return find_property_index(obj, *element)
+
+        kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by})
+
+    if include_optional_defaults:
+        json.dump(obj, fp, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs)
+    else:
+        json.dump(obj, fp, cls=STIXJSONEncoder, **kwargs)
+
+
 def _find(seq, val):
     """
     Search sequence 'seq' for val.  This behaves like str.find(): if not found,
diff --git a/stix2/test/v20/test_bundle.py b/stix2/test/v20/test_bundle.py
index f53d0cb..fed91e1 100644
--- a/stix2/test/v20/test_bundle.py
+++ b/stix2/test/v20/test_bundle.py
@@ -1,3 +1,4 @@
+import io
 import json
 
 import pytest
@@ -113,6 +114,27 @@ def test_bundle_id_must_start_with_bundle():
     assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
 
 
+def test_create_bundle_fp_serialize_true(indicator, malware, relationship):
+    bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
+    buffer = io.StringIO()
+
+    bundle.fp_serialize(buffer, pretty=True)
+
+    assert str(bundle) == EXPECTED_BUNDLE
+    assert bundle.serialize(pretty=True) == EXPECTED_BUNDLE
+    assert buffer.getvalue() == EXPECTED_BUNDLE
+
+
+def test_create_bundle_fp_serialize_false(indicator, malware, relationship):
+    bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
+    buffer = io.StringIO()
+
+    bundle.fp_serialize(buffer, sort_keys=True)
+
+    assert bundle.serialize(sort_keys=True) == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
+    assert buffer.getvalue() == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
+
+
 def test_create_bundle1(indicator, malware, relationship):
     bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
 
diff --git a/stix2/test/v21/test_bundle.py b/stix2/test/v21/test_bundle.py
index 4e30c84..07014c6 100644
--- a/stix2/test/v21/test_bundle.py
+++ b/stix2/test/v21/test_bundle.py
@@ -1,3 +1,4 @@
+import io
 import json
 
 import pytest
@@ -123,6 +124,27 @@ def test_bundle_id_must_start_with_bundle():
     assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
 
 
+def test_create_bundle_fp_serialize_true(indicator, malware, relationship):
+    bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
+    buffer = io.StringIO()
+
+    bundle.fp_serialize(buffer, pretty=True)
+
+    assert str(bundle) == EXPECTED_BUNDLE
+    assert bundle.serialize(pretty=True) == EXPECTED_BUNDLE
+    assert buffer.getvalue() == EXPECTED_BUNDLE
+
+
+def test_create_bundle_fp_serialize_false(indicator, malware, relationship):
+    bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
+    buffer = io.StringIO()
+
+    bundle.fp_serialize(buffer, sort_keys=True)
+
+    assert bundle.serialize(sort_keys=True) == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
+    assert buffer.getvalue() == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
+
+
 def test_create_bundle1(indicator, malware, relationship):
     bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
 

From 2ea9c0c63c1e40e9ecbf78dddf296e2bec28992a Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Wed, 17 Mar 2021 15:15:26 -0400
Subject: [PATCH 20/23] use it on filesystem.py data sink

---
 stix2/datastore/filesystem.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/stix2/datastore/filesystem.py b/stix2/datastore/filesystem.py
index d844115..2209116 100644
--- a/stix2/datastore/filesystem.py
+++ b/stix2/datastore/filesystem.py
@@ -13,7 +13,7 @@ from stix2.datastore import (
 )
 from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
 from stix2.parsing import parse
-from stix2.serialization import serialize
+from stix2.serialization import fp_serialize
 from stix2.utils import format_datetime, get_type_from_id, parse_into_datetime
 
 
@@ -584,9 +584,8 @@ class FileSystemSink(DataSink):
         if os.path.isfile(file_path):
             raise DataSourceError("Attempted to overwrite file (!) at: {}".format(file_path))
 
-        with io.open(file_path, 'w', encoding=encoding) as f:
-            stix_obj = serialize(stix_obj, pretty=True, encoding=encoding, ensure_ascii=False)
-            f.write(stix_obj)
+        with io.open(file_path, mode='w', encoding=encoding) as f:
+            fp_serialize(stix_obj, f, pretty=True, encoding=encoding, ensure_ascii=False)
 
     def add(self, stix_data=None, version=None):
         """Add STIX objects to file directory.

From 922de111ed965bb0f1b24c78d77a4e9dccacc7d8 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Thu, 18 Mar 2021 10:14:36 -0400
Subject: [PATCH 21/23] minor tweaks to docstrings

---
 stix2/base.py          | 2 +-
 stix2/serialization.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/stix2/base.py b/stix2/base.py
index 03d8ea4..2c48ef6 100644
--- a/stix2/base.py
+++ b/stix2/base.py
@@ -263,7 +263,7 @@ class _STIXBase(Mapping):
 
     def fp_serialize(self, *args, **kwargs):
         """
-        Serialize a STIX object to a file-like supporting object.
+        Serialize a STIX object to ``fp`` (a text stream file-like supporting object).
 
         Examples:
             >>> import stix2
diff --git a/stix2/serialization.py b/stix2/serialization.py
index 8822f33..660bba6 100644
--- a/stix2/serialization.py
+++ b/stix2/serialization.py
@@ -87,11 +87,11 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs):
 
 def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs):
     """
-    Serialize a STIX object as a stream to file-like supporting object.
+    Serialize a STIX object to ``fp`` (a text stream file-like supporting object).
 
     Args:
         obj: The STIX object to be serialized.
-        fp: A ``.write()``-supporting file-like object.
+        fp: A text stream file-like object supporting ``.write()``.
         pretty (bool): If True, output properties following the STIX specs
             formatting. This includes indentation. Refer to notes for more
             details. (Default: ``False``)

From c2d360d22345475ee39ca642735c4a2e338f8b50 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanuelle@mitre.org>
Date: Thu, 18 Mar 2021 18:08:31 -0400
Subject: [PATCH 22/23] apply fp_serialize() changes on main serialize() method

---
 stix2/serialization.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/stix2/serialization.py b/stix2/serialization.py
index 660bba6..2784d39 100644
--- a/stix2/serialization.py
+++ b/stix2/serialization.py
@@ -2,6 +2,7 @@
 
 import copy
 import datetime as dt
+import io
 
 import simplejson as json
 
@@ -73,16 +74,9 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs):
         When ``pretty=True`` the following key-value pairs will be added or
         overridden: indent=4, separators=(",", ": "), item_sort_key=sort_by.
     """
-    if pretty:
-        def sort_by(element):
-            return find_property_index(obj, *element)
-
-        kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by})
-
-    if include_optional_defaults:
-        return json.dumps(obj, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs)
-    else:
-        return json.dumps(obj, cls=STIXJSONEncoder, **kwargs)
+    with io.StringIO() as fp:
+        fp_serialize(obj, fp, pretty, include_optional_defaults, **kwargs)
+        return fp.getvalue()
 
 
 def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs):

From 19196654c5137def1dada0b3b4a8c95d5842b322 Mon Sep 17 00:00:00 2001
From: Emmanuelle Vargas-Gonzalez <emmanvg@users.noreply.github.com>
Date: Fri, 19 Mar 2021 15:31:01 -0400
Subject: [PATCH 23/23] Apply suggestions from code review

Co-authored-by: Chris Lenk <clenk@users.noreply.github.com>
---
 stix2/test/v20/test_bundle.py | 4 ++--
 stix2/test/v21/test_bundle.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/stix2/test/v20/test_bundle.py b/stix2/test/v20/test_bundle.py
index fed91e1..ac5d239 100644
--- a/stix2/test/v20/test_bundle.py
+++ b/stix2/test/v20/test_bundle.py
@@ -114,7 +114,7 @@ def test_bundle_id_must_start_with_bundle():
     assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
 
 
-def test_create_bundle_fp_serialize_true(indicator, malware, relationship):
+def test_create_bundle_fp_serialize_pretty(indicator, malware, relationship):
     bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
     buffer = io.StringIO()
 
@@ -125,7 +125,7 @@ def test_create_bundle_fp_serialize_true(indicator, malware, relationship):
     assert buffer.getvalue() == EXPECTED_BUNDLE
 
 
-def test_create_bundle_fp_serialize_false(indicator, malware, relationship):
+def test_create_bundle_fp_serialize_nonpretty(indicator, malware, relationship):
     bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
     buffer = io.StringIO()
 
diff --git a/stix2/test/v21/test_bundle.py b/stix2/test/v21/test_bundle.py
index 07014c6..1cf30d0 100644
--- a/stix2/test/v21/test_bundle.py
+++ b/stix2/test/v21/test_bundle.py
@@ -124,7 +124,7 @@ def test_bundle_id_must_start_with_bundle():
     assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
 
 
-def test_create_bundle_fp_serialize_true(indicator, malware, relationship):
+def test_create_bundle_fp_serialize_pretty(indicator, malware, relationship):
     bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
     buffer = io.StringIO()
 
@@ -135,7 +135,7 @@ def test_create_bundle_fp_serialize_true(indicator, malware, relationship):
     assert buffer.getvalue() == EXPECTED_BUNDLE
 
 
-def test_create_bundle_fp_serialize_false(indicator, malware, relationship):
+def test_create_bundle_fp_serialize_nonpretty(indicator, malware, relationship):
     bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
     buffer = io.StringIO()