From 77eda29471991170e418ad5fddfeb9564dea114c Mon Sep 17 00:00:00 2001
From: Chris Lenk <clenk@mitre.org>
Date: Wed, 11 Dec 2019 13:13:36 -0500
Subject: [PATCH] Add default weight_dict to documentation for semantic
 equivalence

---
 .gitignore           |   1 +
 docs/conf.py         |  10 +++++
 stix2/environment.py | 105 +++++++++++++++++++++++--------------------
 3 files changed, 68 insertions(+), 48 deletions(-)
diff --git a/.gitignore b/.gitignore
index 5534a28..9758937 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,6 +55,7 @@ coverage.xml
 # Sphinx documentation
 docs/_build/
 .ipynb_checkpoints
+default_sem_eq_weights.rst
 
 # PyBuilder
 target/
diff --git a/docs/conf.py b/docs/conf.py
index 2a10fbd..8b372d5 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,4 +1,5 @@
 import datetime
+import json
 import os
 import re
 import sys
@@ -7,6 +8,7 @@ from six import class_types
 from sphinx.ext.autodoc import ClassDocumenter
 
 from stix2.base import _STIXBase
+from stix2.environment import WEIGHTS
 from stix2.version import __version__
 
 sys.path.insert(0, os.path.abspath('..'))
@@ -59,6 +61,14 @@ latex_documents = [
     (master_doc, 'stix2.tex', 'stix2 Documentation', 'OASIS', 'manual'),
 ]
 
+# Add a formatted version of environment.WEIGHTS
+default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: o.__name__)
+default_sem_eq_weights = default_sem_eq_weights.replace('\n', '\n    ')
+default_sem_eq_weights = default_sem_eq_weights.replace('               "', '               ')
+default_sem_eq_weights = default_sem_eq_weights.replace('"\n', '\n')
+with open('default_sem_eq_weights.rst', 'w') as f:
+    f.write(".. code-block:: py\n\n   {}\n\n".format(default_sem_eq_weights))
+
 
 def get_property_type(prop):
     """Convert property classname into pretty string name of property.
diff --git a/stix2/environment.py b/stix2/environment.py
index 85381b2..1d99567 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -210,59 +210,17 @@ class Environment(DataStoreMixin):
             Course of Action, Intrusion-Set, Observed-Data, Report are not supported
             by this implementation. Indicator pattern check is also limited.
 
+        Note:
+            Default weights_dict:
+
+            .. include:: ../default_sem_eq_weights.rst
+
         Note:
             This implementation follows the Committee Note on semantic equivalence.
             see `the Committee Note <link here>`__.
 
         """
-        # default weights used for the semantic equivalence process
-        weights = {
-            "attack-pattern": {
-                "name": (30, partial_string_based),
-                "external_references": (70, partial_external_reference_based),
-            },
-            "campaign": {
-                "name": (60, partial_string_based),
-                "aliases": (40, partial_list_based),
-            },
-            "identity": {
-                "name": (60, partial_string_based),
-                "identity_class": (20, exact_match),
-                "sectors": (20, partial_list_based),
-            },
-            "indicator": {
-                "indicator_types": (15, partial_list_based),
-                "pattern": (80, custom_pattern_based),
-                "valid_from": (5, partial_timestamp_based),
-                "tdelta": 1,  # One day interval
-            },
-            "location": {
-                "longitude_latitude": (34, partial_location_distance),
-                "region": (33, exact_match),
-                "country": (33, exact_match),
-                "threshold": 1000.0,
-            },
-            "malware": {
-                "malware_types": (20, partial_list_based),
-                "name": (80, partial_string_based),
-            },
-            "threat-actor": {
-                "name": (60, partial_string_based),
-                "threat_actor_types": (20, partial_list_based),
-                "aliases": (20, partial_list_based),
-            },
-            "tool": {
-                "tool_types": (20, partial_list_based),
-                "name": (80, partial_string_based),
-            },
-            "vulnerability": {
-                "name": (30, partial_string_based),
-                "external_references": (70, partial_external_reference_based),
-            },
-            "_internal": {
-                "ignore_spec_version": False,
-            },
-        }
+        weights = WEIGHTS.copy()
 
         if weight_dict:
             weights.update(weight_dict)
@@ -316,6 +274,7 @@ class Environment(DataStoreMixin):
                 try:
                     matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
                 except TypeError:
+                    # method doesn't support detailed output with prop_scores
                     matching_score, sum_weights = method(obj1, obj2, **weights[type1])
                 logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
 
@@ -507,3 +466,53 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold):
         (lat1, long1), (lat2, long2), threshold, result,
     )
     return result
+
+
+# default weights used for the semantic equivalence process
+WEIGHTS = {
+    "attack-pattern": {
+        "name": (30, partial_string_based),
+        "external_references": (70, partial_external_reference_based),
+    },
+    "campaign": {
+        "name": (60, partial_string_based),
+        "aliases": (40, partial_list_based),
+    },
+    "identity": {
+        "name": (60, partial_string_based),
+        "identity_class": (20, exact_match),
+        "sectors": (20, partial_list_based),
+    },
+    "indicator": {
+        "indicator_types": (15, partial_list_based),
+        "pattern": (80, custom_pattern_based),
+        "valid_from": (5, partial_timestamp_based),
+        "tdelta": 1,  # One day interval
+    },
+    "location": {
+        "longitude_latitude": (34, partial_location_distance),
+        "region": (33, exact_match),
+        "country": (33, exact_match),
+        "threshold": 1000.0,
+    },
+    "malware": {
+        "malware_types": (20, partial_list_based),
+        "name": (80, partial_string_based),
+    },
+    "threat-actor": {
+        "name": (60, partial_string_based),
+        "threat_actor_types": (20, partial_list_based),
+        "aliases": (20, partial_list_based),
+    },
+    "tool": {
+        "tool_types": (20, partial_list_based),
+        "name": (80, partial_string_based),
+    },
+    "vulnerability": {
+        "name": (30, partial_string_based),
+        "external_references": (70, partial_external_reference_based),
+    },
+    "_internal": {
+        "ignore_spec_version": False,
+    },
+}  #: :autodoc-skip: