Merge branch 'master' of github.com:oasis-open/cti-python-stix2

2019-10-22 01:30:57 +02:00 · 2019-10-22 01:30:57 +02:00 · e2a4129ad3
parent 4f1d68065a d4c0115735
commit e2a4129ad3
9 changed files with 445 additions and 156 deletions
--- a/6
+++ b/6
@ -1,6 +1,12 @@
 CHANGELOG
 =========
 1.2.1 - 2019-10-16
 * #301 Adds more detailed debugging semantic equivalence output
 * #301 Updates semantic equivalence errors
 * #300 Fixes bug with deterministic IDs for SCOs containing unicode
 1.2.0 - 2019-09-25
 * #268, #271, #273, #275, #283, #285, #290 Changes support of STIX 2.1 to WD05 (CSD02), for all object types
--- a/docs/guide/equivalence.ipynb
+++ b/docs/guide/equivalence.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 1,
   "metadata": {
    "nbsphinx": "hidden"
   },
@ -22,7 +22,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 2,
   "metadata": {
    "nbsphinx": "hidden"
   },
@ -58,7 +58,7 @@
   "source": [
    "## Checking Semantic Equivalence\n",
    "\n",
-    "The [Environment](../api/stix2.environment.rst#stix2.environment.Environment) has a function for checking if two STIX Objects are semantically equivalent. For each supported objct type, the algorithm checks if the values for a specific set of properties match. Then each matching property is weighted since every property doesn't represent the same level of importance for semantic equivalence. The result will be the sum of these weighted values, in the range of 0 to 100. A result of 0 means that the the two objects are not equivalent, and a result of 100 means that they are equivalent.\n",
+    "The [Environment](../api/stix2.environment.rst#stix2.environment.Environment) has a function for checking if two STIX Objects are semantically equivalent. For each supported object type, the algorithm checks if the values for a specific set of properties match. Then each matching property is weighted since every property doesn't represent the same level of importance for semantic equivalence. The result will be the sum of these weighted values, in the range of 0 to 100. A result of 0 means that the the two objects are not equivalent, and a result of 100 means that they are equivalent.\n",
    "\n",
    "TODO: Add a link to the committee note when it is released.\n",
    "\n",
@ -71,7 +71,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
@ -152,7 +152,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 16,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -191,12 +191,12 @@
   "source": [
    "### Campaign Example\n",
    "\n",
-    "For Campaigns, the only properties that contribute to semantic equivalence are `name` and `aliases`, with weights of 60 and 40, respectively. In this example, the two campaigns have completely different names, but slightly similar descriptions."
+    "For Campaigns, the only properties that contribute to semantic equivalence are `name` and `aliases`, with weights of 60 and 40, respectively. In this example, the two campaigns have completely different names, but slightly similar descriptions. The result may be higher than expected because the Jaro-Winkler algorithm used to compare string properties looks at the edit distance of the two strings rather than just the words in them."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
@ -277,7 +277,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 17,
+     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -286,12 +286,10 @@
    "from stix2.v21 import Campaign\n",
    "\n",
    "c1 = Campaign(\n",
-    "    name=\"Someone Attacks Somebody\",\n",
+    "    name=\"Someone Attacks Somebody\",)\n",
    "    description=\"A campaign targeting....\",)\n",
    "\n",
    "c2 = Campaign(\n",
-    "    name=\"Another Campaign\",\n",
+    "    name=\"Another Campaign\",)\n",
    "    description=\"A campaign that targets....\",)\n",
    "print(env.semantically_equivalent(c1, c2))"
   ]
  },
@ -306,7 +304,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
@ -387,7 +385,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 18,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -419,8 +417,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 6,
-   "metadata": {},
+   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stderr",
@ -507,7 +507,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 19,
+     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -530,6 +530,13 @@
    "print(env.semantically_equivalent(ind1, ind2))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If the patterns were identical the result would have been 100."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
@ -541,7 +548,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 7,
   "metadata": {
    "scrolled": true
   },
@ -624,7 +631,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 20,
+     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -654,7 +661,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 8,
   "metadata": {
    "scrolled": true
   },
@ -737,7 +744,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 21,
+     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -771,7 +778,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 9,
   "metadata": {
    "scrolled": true
   },
@ -854,7 +861,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 22,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -888,7 +895,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 10,
   "metadata": {
    "scrolled": true
   },
@ -971,7 +978,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 23,
+     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1002,7 +1009,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
@ -1083,7 +1090,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 24,
+     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1117,7 +1124,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
@ -1137,21 +1144,102 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "Some object types do not have a defined method for calculating semantic equivalence and by default will raise an error."
+    "Some object types do not have a defined method for calculating semantic equivalence and by default will give a warning and a result of zero."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
-     "ename": "SemanticEquivalenceUnsupportedTypeError",
+     "name": "stderr",
-     "evalue": "report type has no semantic equivalence implementation!",
+     "output_type": "stream",
-     "output_type": "error",
+     "text": [
-     "traceback": [
+      "'report' type has no semantic equivalence method to call!\n"
      "\u001b[0;31mSemanticEquivalenceUnsupportedTypeError\u001b[0m\u001b[0;31m:\u001b[0m report type has no semantic equivalence implementation!\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">.highlight .hll { background-color: #ffffcc }\n",
       ".highlight  { background: #f8f8f8; }\n",
       ".highlight .c { color: #408080; font-style: italic } /* Comment */\n",
       ".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
       ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
       ".highlight .o { color: #666666 } /* Operator */\n",
       ".highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
       ".highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
       ".highlight .cp { color: #BC7A00 } /* Comment.Preproc */\n",
       ".highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
       ".highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
       ".highlight .cs { color: #408080; font-style: italic } /* Comment.Special */\n",
       ".highlight .gd { color: #A00000 } /* Generic.Deleted */\n",
       ".highlight .ge { font-style: italic } /* Generic.Emph */\n",
       ".highlight .gr { color: #FF0000 } /* Generic.Error */\n",
       ".highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
       ".highlight .gi { color: #00A000 } /* Generic.Inserted */\n",
       ".highlight .go { color: #888888 } /* Generic.Output */\n",
       ".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
       ".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
       ".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
       ".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
       ".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
       ".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
       ".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
       ".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
       ".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
       ".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
       ".highlight .m { color: #666666 } /* Literal.Number */\n",
       ".highlight .s { color: #BA2121 } /* Literal.String */\n",
       ".highlight .na { color: #7D9029 } /* Name.Attribute */\n",
       ".highlight .nb { color: #008000 } /* Name.Builtin */\n",
       ".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
       ".highlight .no { color: #880000 } /* Name.Constant */\n",
       ".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
       ".highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
       ".highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
       ".highlight .nf { color: #0000FF } /* Name.Function */\n",
       ".highlight .nl { color: #A0A000 } /* Name.Label */\n",
       ".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
       ".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
       ".highlight .nv { color: #19177C } /* Name.Variable */\n",
       ".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
       ".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
       ".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
       ".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
       ".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
       ".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
       ".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
       ".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
       ".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       ".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
       ".highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       ".highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       ".highlight .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       ".highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
       ".highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       ".highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
       ".highlight .sx { color: #008000 } /* Literal.String.Other */\n",
       ".highlight .sr { color: #BB6688 } /* Literal.String.Regex */\n",
       ".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       ".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
       ".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       ".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
       ".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
       ".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
       ".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
       ".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
       ".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span>0\n",
       "</pre></div>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
@ -1176,12 +1264,43 @@
   "cell_type": "markdown",
   "metadata": {},
   "source": [
-    "By default, comparing objects of different spec versions will result in an error. You can optionally allow this by providing a configuration dictionary like in the next example:"
+    "By default, comparing objects of different spec versions will result in a `ValueError`."
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "ename": "ValueError",
     "evalue": "The objects to compare must be of the same spec version!",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31mValueError\u001b[0m\u001b[0;31m:\u001b[0m The objects to compare must be of the same spec version!\n"
     ]
    }
   ],
   "source": [
    "from stix2.v20 import Identity as Identity20\n",
    "\n",
    "id20 = Identity20(\n",
    "    name=\"John Smith\",\n",
    "    identity_class=\"individual\",\n",
    ")\n",
    "print(env.semantically_equivalent(id2, id20))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "You can optionally allow comparing across spec versions by providing a configuration dictionary like in the next example:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
@ -1262,7 +1381,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 27,
+     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1286,7 +1405,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
@ -1367,7 +1486,7 @@
       "<IPython.core.display.HTML object>"
      ]
     },
-     "execution_count": 28,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -1410,6 +1529,136 @@
    "}\n",
    "print(env.semantically_equivalent(foo1, foo2, **weights))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Detailed Results\n",
    "\n",
    "If your logging level is set to `DEBUG` or higher, the function will log more detailed results. These show the semantic equivalence and weighting for each property that is checked, to show how the final result was arrived at."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Starting semantic equivalence process between: 'threat-actor--54dc2aac-6fde-4a68-ae2a-0c0bc575ed70' and 'threat-actor--c51bce3b-a067-4692-ab77-fcdefdd3f157'\n",
      "--\t\tpartial_string_based 'Evil Org' 'James Bond'\tresult: '0.56'\n",
      "'name' check -- weight: 60, contributing score: 33.6\n",
      "--\t\tpartial_list_based '['crime-syndicate']' '['spy']'\tresult: '0.0'\n",
      "'threat_actor_types' check -- weight: 20, contributing score: 0.0\n",
      "--\t\tpartial_list_based '['super-evil']' '['007']'\tresult: '0.0'\n",
      "'aliases' check -- weight: 20, contributing score: 0.0\n",
      "Matching Score: 33.6, Sum of Weights: 100.0\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">.highlight .hll { background-color: #ffffcc }\n",
       ".highlight  { background: #f8f8f8; }\n",
       ".highlight .c { color: #408080; font-style: italic } /* Comment */\n",
       ".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
       ".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
       ".highlight .o { color: #666666 } /* Operator */\n",
       ".highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
       ".highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
       ".highlight .cp { color: #BC7A00 } /* Comment.Preproc */\n",
       ".highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
       ".highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
       ".highlight .cs { color: #408080; font-style: italic } /* Comment.Special */\n",
       ".highlight .gd { color: #A00000 } /* Generic.Deleted */\n",
       ".highlight .ge { font-style: italic } /* Generic.Emph */\n",
       ".highlight .gr { color: #FF0000 } /* Generic.Error */\n",
       ".highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
       ".highlight .gi { color: #00A000 } /* Generic.Inserted */\n",
       ".highlight .go { color: #888888 } /* Generic.Output */\n",
       ".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
       ".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
       ".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
       ".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
       ".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
       ".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
       ".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
       ".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
       ".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
       ".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
       ".highlight .m { color: #666666 } /* Literal.Number */\n",
       ".highlight .s { color: #BA2121 } /* Literal.String */\n",
       ".highlight .na { color: #7D9029 } /* Name.Attribute */\n",
       ".highlight .nb { color: #008000 } /* Name.Builtin */\n",
       ".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
       ".highlight .no { color: #880000 } /* Name.Constant */\n",
       ".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
       ".highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
       ".highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
       ".highlight .nf { color: #0000FF } /* Name.Function */\n",
       ".highlight .nl { color: #A0A000 } /* Name.Label */\n",
       ".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
       ".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
       ".highlight .nv { color: #19177C } /* Name.Variable */\n",
       ".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
       ".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
       ".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
       ".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
       ".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
       ".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
       ".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
       ".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
       ".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
       ".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
       ".highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
       ".highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
       ".highlight .s2 { color: #BA2121 } /* Literal.String.Double */\n",
       ".highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
       ".highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
       ".highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
       ".highlight .sx { color: #008000 } /* Literal.String.Other */\n",
       ".highlight .sr { color: #BB6688 } /* Literal.String.Regex */\n",
       ".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
       ".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
       ".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
       ".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
       ".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
       ".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
       ".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
       ".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
       ".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span>33.6\n",
       "</pre></div>\n"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import logging\n",
    "logging.basicConfig(format='%(message)s')\n",
    "logger = logging.getLogger()\n",
    "logger.setLevel(logging.DEBUG)\n",
    "\n",
    "ta3 = ThreatActor(\n",
    "    threat_actor_types=[\"crime-syndicate\"],\n",
    "    name=\"Evil Org\",\n",
    "    aliases=[\"super-evil\"],\n",
    ")\n",
    "ta4 = ThreatActor(\n",
    "    threat_actor_types=[\"spy\"],\n",
    "    name=\"James Bond\",\n",
    "    aliases=[\"007\"],\n",
    ")\n",
    "print(env.semantically_equivalent(ta3, ta4))"
   ]
  }
 ],
 "metadata": {
--- a/setup.cfg
+++ b/setup.cfg
@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.2.0
+current_version = 1.2.1
 commit = True
 tag = True
--- a/stix2/base.py
+++ b/stix2/base.py
@ -396,11 +396,14 @@ class _Observable(_STIXBase):
            if streamlined_obj_vals:
                data = canonicalize(streamlined_obj_vals, utf8=False)
-                # try/except here to enable python 2 compatibility
+                # The situation is complicated w.r.t. python 2/3 behavior, so
-                try:
+                # I'd rather not rely on particular exceptions being raised to
                # determine what to do.  Better to just check the python version
                # directly.
                if six.PY3:
                    return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data))
-                except UnicodeDecodeError:
+                else:
-                    return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, six.binary_type(data)))
+                    return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8")))
        # We return None if there are no values specified for any of the id-contributing-properties
        return None
--- a/stix2/environment.py
+++ b/stix2/environment.py
@ -6,7 +6,6 @@ import time
 from .core import parse as _parse
 from .datastore import CompositeDataSource, DataStoreMixin
 from .exceptions import SemanticEquivalenceUnsupportedTypeError
 from .utils import STIXdatetime, parse_into_datetime
 logger = logging.getLogger(__name__)
@ -228,9 +227,6 @@ class Environment(DataStoreMixin):
                "aliases": 40,
                "method": _campaign_checks,
            },
            "course-of-action": {
                "method": _course_of_action_checks,
            },
            "identity": {
                "name": 60,
                "identity_class": 20,
@ -244,9 +240,6 @@ class Environment(DataStoreMixin):
                "tdelta": 1,  # One day interval
                "method": _indicator_checks,
            },
            "intrusion-set": {
                "method": _intrusion_set_checks,
            },
            "location": {
                "longitude_latitude": 34,
                "region": 33,
@ -259,12 +252,6 @@ class Environment(DataStoreMixin):
                "name": 80,
                "method": _malware_checks,
            },
            "observed-data": {
                "method": _observed_data_checks,
            },
            "report": {
                "method": _report_checks,
            },
            "threat-actor": {
                "name": 60,
                "threat_actor_types": 20,
@ -298,7 +285,13 @@ class Environment(DataStoreMixin):
        if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"):
            raise ValueError('The objects to compare must be of the same spec version!')
        try:
            method = weights[type1]["method"]
        except KeyError:
            logger.warning("'%s' type has no semantic equivalence method to call!", type1)
            sum_weights = matching_score = 0
        else:
            logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
            matching_score, sum_weights = method(obj1, obj2, **weights[type1])
        if sum_weights <= 0:
@ -333,7 +326,9 @@ def partial_timestamp_based(t1, t2, tdelta):
    if not isinstance(t2, STIXdatetime):
        t2 = parse_into_datetime(t2)
    t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple())
-    return 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
+    result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
    logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result)
    return result
 def partial_list_based(l1, l2):
@ -348,7 +343,9 @@ def partial_list_based(l1, l2):
    """
    l1_set, l2_set = set(l1), set(l2)
-    return len(l1_set.intersection(l2_set)) / max(len(l1), len(l2))
+    result = len(l1_set.intersection(l2_set)) / max(len(l1), len(l2))
    logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result)
    return result
 def exact_match(val1, val2):
@ -362,9 +359,11 @@ def exact_match(val1, val2):
        float: 1.0 if the value matches exactly, 0.0 otherwise.
    """
    result = 0.0
    if val1 == val2:
-        return 1.0
+        result = 1.0
-    return 0.0
+    logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result)
    return result
 def partial_string_based(str1, str2):
@ -379,7 +378,9 @@ def partial_string_based(str1, str2):
    """
    from pyjarowinkler import distance
-    return distance.get_jaro_distance(str1, str2)
+    result = distance.get_jaro_distance(str1, str2)
    logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result)
    return result
 def custom_pattern_based(pattern1, pattern2):
@ -440,14 +441,24 @@ def partial_external_reference_based(refs1, refs2):
            # external_id or url match then its a perfect match and other entries
            # can be ignored.
            if sn_match and (ei_match or url_match) and source_name in allowed:
-                return 1.0
+                result = 1.0
                logger.debug(
                    "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
                    refs1, refs2, result,
                )
                return result
            # Regular check. If the source_name (not STIX-defined) or external_id or
            # url matches then we consider the entry a match.
            if (sn_match or ei_match or url_match) and source_name not in allowed:
                matches += 1
-    return matches / max(len(refs1), len(refs2))
+    result = matches / max(len(refs1), len(refs2))
    logger.debug(
        "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
        refs1, refs2, result,
    )
    return result
 def partial_location_distance(lat1, long1, lat2, long2, threshold):
@ -466,7 +477,12 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold):
    """
    from haversine import haversine, Unit
    distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS)
-    return 1 - (distance / threshold)
+    result = 1 - (distance / threshold)
    logger.debug(
        "--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'",
        (lat1, long1), (lat2, long2), threshold, result,
    )
    return result
 def _attack_pattern_checks(obj1, obj2, **weights):
@ -474,15 +490,19 @@ def _attack_pattern_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * partial_string_based(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("external_references", obj1, obj2):
        w = weights["external_references"]
-        sum_weights += w
+        contributing_score = (
-        matching_score += (
+                w * partial_external_reference_based(obj1["external_references"], obj2["external_references"])
                w *
                partial_external_reference_based(obj1["external_references"], obj2["external_references"])
        )
        sum_weights += w
        matching_score += contributing_score
        logger.debug("'external_references' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -491,12 +511,17 @@ def _campaign_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * partial_string_based(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("aliases", obj1, obj2):
        w = weights["aliases"]
        contributing_score = w * partial_list_based(obj1["aliases"], obj2["aliases"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["aliases"], obj2["aliases"])
+        matching_score += contributing_score
        logger.debug("'aliases' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -505,16 +530,23 @@ def _identity_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * exact_match(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * exact_match(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("identity_class", obj1, obj2):
        w = weights["identity_class"]
        contributing_score = w * exact_match(obj1["identity_class"], obj2["identity_class"])
        sum_weights += w
-        matching_score += w * exact_match(obj1["identity_class"], obj2["identity_class"])
+        matching_score += contributing_score
        logger.debug("'identity_class' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("sectors", obj1, obj2):
        w = weights["sectors"]
        contributing_score = w * partial_list_based(obj1["sectors"], obj2["sectors"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["sectors"], obj2["sectors"])
+        matching_score += contributing_score
        logger.debug("'sectors' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -523,19 +555,26 @@ def _indicator_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("indicator_types", obj1, obj2):
        w = weights["indicator_types"]
        contributing_score = w * partial_list_based(obj1["indicator_types"], obj2["indicator_types"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["indicator_types"], obj2["indicator_types"])
+        matching_score += contributing_score
        logger.debug("'indicator_types' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("pattern", obj1, obj2):
        w = weights["pattern"]
        contributing_score = w * custom_pattern_based(obj1["pattern"], obj2["pattern"])
        sum_weights += w
-        matching_score += w * custom_pattern_based(obj1["pattern"], obj2["pattern"])
+        matching_score += contributing_score
        logger.debug("'pattern' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("valid_from", obj1, obj2):
        w = weights["valid_from"]
-        sum_weights += w
+        contributing_score = (
        matching_score += (
                w *
                partial_timestamp_based(obj1["valid_from"], obj2["valid_from"], weights["tdelta"])
        )
        sum_weights += w
        matching_score += contributing_score
        logger.debug("'valid_from' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -544,19 +583,26 @@ def _location_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("latitude", obj1, obj2) and check_property_present("longitude", obj1, obj2):
        w = weights["longitude_latitude"]
-        sum_weights += w
+        contributing_score = (
        matching_score += (
                w *
                partial_location_distance(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], weights["threshold"])
        )
        sum_weights += w
        matching_score += contributing_score
        logger.debug("'longitude_latitude' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("region", obj1, obj2):
        w = weights["region"]
        contributing_score = w * exact_match(obj1["region"], obj2["region"])
        sum_weights += w
-        matching_score += w * exact_match(obj1["region"], obj2["region"])
+        matching_score += contributing_score
        logger.debug("'region' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("country", obj1, obj2):
        w = weights["country"]
        contributing_score = w * exact_match(obj1["country"], obj2["country"])
        sum_weights += w
-        matching_score += w * exact_match(obj1["country"], obj2["country"])
+        matching_score += contributing_score
        logger.debug("'country' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -565,12 +611,17 @@ def _malware_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("malware_types", obj1, obj2):
        w = weights["malware_types"]
        contributing_score = w * partial_list_based(obj1["malware_types"], obj2["malware_types"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["malware_types"], obj2["malware_types"])
+        matching_score += contributing_score
        logger.debug("'malware_types' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * partial_string_based(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -579,16 +630,23 @@ def _threat_actor_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * partial_string_based(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("threat_actor_types", obj1, obj2):
        w = weights["threat_actor_types"]
        contributing_score = w * partial_list_based(obj1["threat_actor_types"], obj2["threat_actor_types"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["threat_actor_types"], obj2["threat_actor_types"])
+        matching_score += contributing_score
        logger.debug("'threat_actor_types' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("aliases", obj1, obj2):
        w = weights["aliases"]
        contributing_score = w * partial_list_based(obj1["aliases"], obj2["aliases"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["aliases"], obj2["aliases"])
+        matching_score += contributing_score
        logger.debug("'aliases' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -597,12 +655,17 @@ def _tool_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("tool_types", obj1, obj2):
        w = weights["tool_types"]
        contributing_score = w * partial_list_based(obj1["tool_types"], obj2["tool_types"])
        sum_weights += w
-        matching_score += w * partial_list_based(obj1["tool_types"], obj2["tool_types"])
+        matching_score += contributing_score
        logger.debug("'tool_types' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * partial_string_based(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
@ -611,29 +674,18 @@ def _vulnerability_checks(obj1, obj2, **weights):
    sum_weights = 0.0
    if check_property_present("name", obj1, obj2):
        w = weights["name"]
        contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
        sum_weights += w
-        matching_score += w * partial_string_based(obj1["name"], obj2["name"])
+        matching_score += contributing_score
        logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
    if check_property_present("external_references", obj1, obj2):
        w = weights["external_references"]
-        sum_weights += w
+        contributing_score = w * partial_external_reference_based(
        matching_score += w * partial_external_reference_based(
            obj1["external_references"],
            obj2["external_references"],
        )
        sum_weights += w
        matching_score += contributing_score
        logger.debug("'external_references' check -- weight: %s, contributing score: %s", w, contributing_score)
    logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
    return matching_score, sum_weights
 def _course_of_action_checks(obj1, obj2, **weights):
    raise SemanticEquivalenceUnsupportedTypeError("course-of-action type has no semantic equivalence implementation!")
 def _intrusion_set_checks(obj1, obj2, **weights):
    raise SemanticEquivalenceUnsupportedTypeError("intrusion-set type has no semantic equivalence implementation!")
 def _observed_data_checks(obj1, obj2, **weights):
    raise SemanticEquivalenceUnsupportedTypeError("observed-data type has no semantic equivalence implementation!")
 def _report_checks(obj1, obj2, **weights):
    raise SemanticEquivalenceUnsupportedTypeError("report type has no semantic equivalence implementation!")
--- a/stix2/exceptions.py
+++ b/stix2/exceptions.py
@ -233,10 +233,3 @@ class STIXDeprecationWarning(DeprecationWarning):
    Represents usage of a deprecated component of a STIX specification.
    """
    pass
 class SemanticEquivalenceUnsupportedTypeError(STIXError, TypeError):
    """STIX object type not supported by the semantic equivalence approach."""
    def __init__(self, msg):
        super(SemanticEquivalenceUnsupportedTypeError, self).__init__(msg)
--- a/stix2/test/v21/test_base.py
+++ b/stix2/test/v21/test_base.py
@ -1,9 +1,11 @@
 import datetime as dt
 import json
 import uuid
 import pytest
 import pytz
 import stix2
 from stix2.base import STIXJSONEncoder
@ -23,3 +25,14 @@ def test_encode_json_object():
        json.dumps(test_dict, cls=STIXJSONEncoder)
    assert " is not JSON serializable" in str(excinfo.value)
 def test_deterministic_id_unicode():
    mutex = {'name': u'D*Fl#Ed*\u00a3\u00a8', 'type': 'mutex'}
    obs = stix2.parse_observable(mutex, version="2.1")
    dd_idx = obs.id.index("--")
    id_uuid = uuid.UUID(obs.id[dd_idx+2:])
    assert id_uuid.variant == uuid.RFC_4122
    assert id_uuid.version == 5
--- a/stix2/test/v21/test_environment.py
+++ b/stix2/test/v21/test_environment.py
@ -6,10 +6,8 @@ import stix2.exceptions
 from .constants import (
    ATTACK_PATTERN_ID, ATTACK_PATTERN_KWARGS, CAMPAIGN_ID, CAMPAIGN_KWARGS,
-    COURSE_OF_ACTION_ID, COURSE_OF_ACTION_KWARGS, FAKE_TIME, IDENTITY_ID,
+    FAKE_TIME, IDENTITY_ID, IDENTITY_KWARGS, INDICATOR_ID, INDICATOR_KWARGS,
-    IDENTITY_KWARGS, INDICATOR_ID, INDICATOR_KWARGS, INTRUSION_SET_ID,
+    LOCATION_ID, MALWARE_ID, MALWARE_KWARGS, RELATIONSHIP_IDS, REPORT_ID,
    INTRUSION_SET_KWARGS, LOCATION_ID, MALWARE_ID, MALWARE_KWARGS,
    OBSERVED_DATA_ID, OBSERVED_DATA_KWARGS, RELATIONSHIP_IDS, REPORT_ID,
    REPORT_KWARGS, THREAT_ACTOR_ID, THREAT_ACTOR_KWARGS, TOOL_ID, TOOL_KWARGS,
    VULNERABILITY_ID, VULNERABILITY_KWARGS,
 )
@ -615,37 +613,6 @@ def test_semantic_equivalence_different_spec_version_raises():
    assert str(excinfo.value) == "The objects to compare must be of the same spec version!"
@pytest.mark.parametrize(
    "obj1,obj2,ret_val",
    [
        (
             stix2.v21.CourseOfAction(id=COURSE_OF_ACTION_ID, **COURSE_OF_ACTION_KWARGS),
             stix2.v21.CourseOfAction(id=COURSE_OF_ACTION_ID, **COURSE_OF_ACTION_KWARGS),
             "course-of-action type has no semantic equivalence implementation!",
        ),
        (
             stix2.v21.IntrusionSet(id=INTRUSION_SET_ID, **INTRUSION_SET_KWARGS),
             stix2.v21.IntrusionSet(id=INTRUSION_SET_ID, **INTRUSION_SET_KWARGS),
             "intrusion-set type has no semantic equivalence implementation!",
        ),
        (
             stix2.v21.ObservedData(id=OBSERVED_DATA_ID, **OBSERVED_DATA_KWARGS),
             stix2.v21.ObservedData(id=OBSERVED_DATA_ID, **OBSERVED_DATA_KWARGS),
             "observed-data type has no semantic equivalence implementation!",
        ),
        (
             stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS),
             stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS),
             "report type has no semantic equivalence implementation!",
        ),
    ],
 )
 def test_semantic_equivalence_on_unsupported_types(obj1, obj2, ret_val):
    with pytest.raises(stix2.exceptions.SemanticEquivalenceUnsupportedTypeError) as excinfo:
        stix2.Environment().semantically_equivalent(obj1, obj2)
    assert ret_val == str(excinfo.value)
 def test_semantic_equivalence_zero_match():
    IND_KWARGS = dict(
        indicator_types=["APTX"],
@ -767,7 +734,7 @@ def test_semantic_equivalence_external_references(refs1, refs2, ret_val):
    assert value == ret_val
-def test_semantic_equivalence_timetamp():
+def test_semantic_equivalence_timestamp():
    t1 = "2018-10-17T00:14:20.652Z"
    t2 = "2018-10-17T12:14:20.652Z"
    assert stix2.environment.partial_timestamp_based(t1, t2, 1) == 0.5
@ -777,3 +744,9 @@ def test_semantic_equivalence_exact_match():
    t1 = "2018-10-17T00:14:20.652Z"
    t2 = "2018-10-17T12:14:20.652Z"
    assert stix2.environment.exact_match(t1, t2) == 0.0
 def test_non_existent_config_for_object():
    r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
    r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
    assert stix2.Environment().semantically_equivalent(r1, r2) == 0.0
--- a/stix2/version.py
+++ b/stix2/version.py
@ -1 +1 @@
-__version__ = "1.2.0"
+__version__ = "1.2.1"