Merge branch 'master' of github.com:oasis-open/cti-python-stix2

master
chrisr3d 2019-10-22 01:30:57 +02:00
commit e2a4129ad3
9 changed files with 445 additions and 156 deletions

View File

@ -1,6 +1,12 @@
CHANGELOG
=========
1.2.1 - 2019-10-16
* #301 Adds more detailed debugging semantic equivalence output
* #301 Updates semantic equivalence errors
* #300 Fixes bug with deterministic IDs for SCOs containing unicode
1.2.0 - 2019-09-25
* #268, #271, #273, #275, #283, #285, #290 Changes support of STIX 2.1 to WD05 (CSD02), for all object types

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"metadata": {
"nbsphinx": "hidden"
},
@ -22,7 +22,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 2,
"metadata": {
"nbsphinx": "hidden"
},
@ -58,7 +58,7 @@
"source": [
"## Checking Semantic Equivalence\n",
"\n",
"The [Environment](../api/stix2.environment.rst#stix2.environment.Environment) has a function for checking if two STIX Objects are semantically equivalent. For each supported objct type, the algorithm checks if the values for a specific set of properties match. Then each matching property is weighted since every property doesn't represent the same level of importance for semantic equivalence. The result will be the sum of these weighted values, in the range of 0 to 100. A result of 0 means that the the two objects are not equivalent, and a result of 100 means that they are equivalent.\n",
"The [Environment](../api/stix2.environment.rst#stix2.environment.Environment) has a function for checking if two STIX Objects are semantically equivalent. For each supported object type, the algorithm checks if the values for a specific set of properties match. Then each matching property is weighted since every property doesn't represent the same level of importance for semantic equivalence. The result will be the sum of these weighted values, in the range of 0 to 100. A result of 0 means that the the two objects are not equivalent, and a result of 100 means that they are equivalent.\n",
"\n",
"TODO: Add a link to the committee note when it is released.\n",
"\n",
@ -71,7 +71,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -152,7 +152,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 16,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -191,12 +191,12 @@
"source": [
"### Campaign Example\n",
"\n",
"For Campaigns, the only properties that contribute to semantic equivalence are `name` and `aliases`, with weights of 60 and 40, respectively. In this example, the two campaigns have completely different names, but slightly similar descriptions."
"For Campaigns, the only properties that contribute to semantic equivalence are `name` and `aliases`, with weights of 60 and 40, respectively. In this example, the two campaigns have completely different names, but slightly similar descriptions. The result may be higher than expected because the Jaro-Winkler algorithm used to compare string properties looks at the edit distance of the two strings rather than just the words in them."
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -277,7 +277,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 17,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
@ -286,12 +286,10 @@
"from stix2.v21 import Campaign\n",
"\n",
"c1 = Campaign(\n",
" name=\"Someone Attacks Somebody\",\n",
" description=\"A campaign targeting....\",)\n",
" name=\"Someone Attacks Somebody\",)\n",
"\n",
"c2 = Campaign(\n",
" name=\"Another Campaign\",\n",
" description=\"A campaign that targets....\",)\n",
" name=\"Another Campaign\",)\n",
"print(env.semantically_equivalent(c1, c2))"
]
},
@ -306,7 +304,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 5,
"metadata": {},
"outputs": [
{
@ -387,7 +385,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 18,
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@ -419,8 +417,10 @@
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"execution_count": 6,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stderr",
@ -507,7 +507,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 19,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -530,6 +530,13 @@
"print(env.semantically_equivalent(ind1, ind2))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"If the patterns were identical the result would have been 100."
]
},
{
"cell_type": "markdown",
"metadata": {},
@ -541,7 +548,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 7,
"metadata": {
"scrolled": true
},
@ -624,7 +631,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 20,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
@ -654,7 +661,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 8,
"metadata": {
"scrolled": true
},
@ -737,7 +744,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 21,
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
@ -771,7 +778,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 9,
"metadata": {
"scrolled": true
},
@ -854,7 +861,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 22,
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
@ -888,7 +895,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 10,
"metadata": {
"scrolled": true
},
@ -971,7 +978,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 23,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
@ -1002,7 +1009,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 11,
"metadata": {},
"outputs": [
{
@ -1083,7 +1090,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 24,
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@ -1117,7 +1124,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 12,
"metadata": {},
"outputs": [
{
@ -1137,21 +1144,102 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Some object types do not have a defined method for calculating semantic equivalence and by default will raise an error."
"Some object types do not have a defined method for calculating semantic equivalence and by default will give a warning and a result of zero."
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 13,
"metadata": {},
"outputs": [
{
"ename": "SemanticEquivalenceUnsupportedTypeError",
"evalue": "report type has no semantic equivalence implementation!",
"output_type": "error",
"traceback": [
"\u001b[0;31mSemanticEquivalenceUnsupportedTypeError\u001b[0m\u001b[0;31m:\u001b[0m report type has no semantic equivalence implementation!\n"
"name": "stderr",
"output_type": "stream",
"text": [
"'report' type has no semantic equivalence method to call!\n"
]
},
{
"data": {
"text/html": [
"<style type=\"text/css\">.highlight .hll { background-color: #ffffcc }\n",
".highlight { background: #f8f8f8; }\n",
".highlight .c { color: #408080; font-style: italic } /* Comment */\n",
".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
".highlight .o { color: #666666 } /* Operator */\n",
".highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
".highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
".highlight .cp { color: #BC7A00 } /* Comment.Preproc */\n",
".highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
".highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
".highlight .cs { color: #408080; font-style: italic } /* Comment.Special */\n",
".highlight .gd { color: #A00000 } /* Generic.Deleted */\n",
".highlight .ge { font-style: italic } /* Generic.Emph */\n",
".highlight .gr { color: #FF0000 } /* Generic.Error */\n",
".highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
".highlight .gi { color: #00A000 } /* Generic.Inserted */\n",
".highlight .go { color: #888888 } /* Generic.Output */\n",
".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
".highlight .m { color: #666666 } /* Literal.Number */\n",
".highlight .s { color: #BA2121 } /* Literal.String */\n",
".highlight .na { color: #7D9029 } /* Name.Attribute */\n",
".highlight .nb { color: #008000 } /* Name.Builtin */\n",
".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
".highlight .no { color: #880000 } /* Name.Constant */\n",
".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
".highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
".highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
".highlight .nf { color: #0000FF } /* Name.Function */\n",
".highlight .nl { color: #A0A000 } /* Name.Label */\n",
".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
".highlight .nv { color: #19177C } /* Name.Variable */\n",
".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
".highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
".highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
".highlight .s2 { color: #BA2121 } /* Literal.String.Double */\n",
".highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
".highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
".highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
".highlight .sx { color: #008000 } /* Literal.String.Other */\n",
".highlight .sr { color: #BB6688 } /* Literal.String.Regex */\n",
".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span>0\n",
"</pre></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
@ -1176,12 +1264,43 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"By default, comparing objects of different spec versions will result in an error. You can optionally allow this by providing a configuration dictionary like in the next example:"
"By default, comparing objects of different spec versions will result in a `ValueError`."
]
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 14,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "The objects to compare must be of the same spec version!",
"output_type": "error",
"traceback": [
"\u001b[0;31mValueError\u001b[0m\u001b[0;31m:\u001b[0m The objects to compare must be of the same spec version!\n"
]
}
],
"source": [
"from stix2.v20 import Identity as Identity20\n",
"\n",
"id20 = Identity20(\n",
" name=\"John Smith\",\n",
" identity_class=\"individual\",\n",
")\n",
"print(env.semantically_equivalent(id2, id20))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You can optionally allow comparing across spec versions by providing a configuration dictionary like in the next example:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
@ -1262,7 +1381,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 27,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
@ -1286,7 +1405,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 16,
"metadata": {},
"outputs": [
{
@ -1367,7 +1486,7 @@
"<IPython.core.display.HTML object>"
]
},
"execution_count": 28,
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@ -1410,6 +1529,136 @@
"}\n",
"print(env.semantically_equivalent(foo1, foo2, **weights))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Detailed Results\n",
"\n",
"If your logging level is set to `DEBUG` or higher, the function will log more detailed results. These show the semantic equivalence and weighting for each property that is checked, to show how the final result was arrived at."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Starting semantic equivalence process between: 'threat-actor--54dc2aac-6fde-4a68-ae2a-0c0bc575ed70' and 'threat-actor--c51bce3b-a067-4692-ab77-fcdefdd3f157'\n",
"--\t\tpartial_string_based 'Evil Org' 'James Bond'\tresult: '0.56'\n",
"'name' check -- weight: 60, contributing score: 33.6\n",
"--\t\tpartial_list_based '['crime-syndicate']' '['spy']'\tresult: '0.0'\n",
"'threat_actor_types' check -- weight: 20, contributing score: 0.0\n",
"--\t\tpartial_list_based '['super-evil']' '['007']'\tresult: '0.0'\n",
"'aliases' check -- weight: 20, contributing score: 0.0\n",
"Matching Score: 33.6, Sum of Weights: 100.0\n"
]
},
{
"data": {
"text/html": [
"<style type=\"text/css\">.highlight .hll { background-color: #ffffcc }\n",
".highlight { background: #f8f8f8; }\n",
".highlight .c { color: #408080; font-style: italic } /* Comment */\n",
".highlight .err { border: 1px solid #FF0000 } /* Error */\n",
".highlight .k { color: #008000; font-weight: bold } /* Keyword */\n",
".highlight .o { color: #666666 } /* Operator */\n",
".highlight .ch { color: #408080; font-style: italic } /* Comment.Hashbang */\n",
".highlight .cm { color: #408080; font-style: italic } /* Comment.Multiline */\n",
".highlight .cp { color: #BC7A00 } /* Comment.Preproc */\n",
".highlight .cpf { color: #408080; font-style: italic } /* Comment.PreprocFile */\n",
".highlight .c1 { color: #408080; font-style: italic } /* Comment.Single */\n",
".highlight .cs { color: #408080; font-style: italic } /* Comment.Special */\n",
".highlight .gd { color: #A00000 } /* Generic.Deleted */\n",
".highlight .ge { font-style: italic } /* Generic.Emph */\n",
".highlight .gr { color: #FF0000 } /* Generic.Error */\n",
".highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */\n",
".highlight .gi { color: #00A000 } /* Generic.Inserted */\n",
".highlight .go { color: #888888 } /* Generic.Output */\n",
".highlight .gp { color: #000080; font-weight: bold } /* Generic.Prompt */\n",
".highlight .gs { font-weight: bold } /* Generic.Strong */\n",
".highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */\n",
".highlight .gt { color: #0044DD } /* Generic.Traceback */\n",
".highlight .kc { color: #008000; font-weight: bold } /* Keyword.Constant */\n",
".highlight .kd { color: #008000; font-weight: bold } /* Keyword.Declaration */\n",
".highlight .kn { color: #008000; font-weight: bold } /* Keyword.Namespace */\n",
".highlight .kp { color: #008000 } /* Keyword.Pseudo */\n",
".highlight .kr { color: #008000; font-weight: bold } /* Keyword.Reserved */\n",
".highlight .kt { color: #B00040 } /* Keyword.Type */\n",
".highlight .m { color: #666666 } /* Literal.Number */\n",
".highlight .s { color: #BA2121 } /* Literal.String */\n",
".highlight .na { color: #7D9029 } /* Name.Attribute */\n",
".highlight .nb { color: #008000 } /* Name.Builtin */\n",
".highlight .nc { color: #0000FF; font-weight: bold } /* Name.Class */\n",
".highlight .no { color: #880000 } /* Name.Constant */\n",
".highlight .nd { color: #AA22FF } /* Name.Decorator */\n",
".highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */\n",
".highlight .ne { color: #D2413A; font-weight: bold } /* Name.Exception */\n",
".highlight .nf { color: #0000FF } /* Name.Function */\n",
".highlight .nl { color: #A0A000 } /* Name.Label */\n",
".highlight .nn { color: #0000FF; font-weight: bold } /* Name.Namespace */\n",
".highlight .nt { color: #008000; font-weight: bold } /* Name.Tag */\n",
".highlight .nv { color: #19177C } /* Name.Variable */\n",
".highlight .ow { color: #AA22FF; font-weight: bold } /* Operator.Word */\n",
".highlight .w { color: #bbbbbb } /* Text.Whitespace */\n",
".highlight .mb { color: #666666 } /* Literal.Number.Bin */\n",
".highlight .mf { color: #666666 } /* Literal.Number.Float */\n",
".highlight .mh { color: #666666 } /* Literal.Number.Hex */\n",
".highlight .mi { color: #666666 } /* Literal.Number.Integer */\n",
".highlight .mo { color: #666666 } /* Literal.Number.Oct */\n",
".highlight .sa { color: #BA2121 } /* Literal.String.Affix */\n",
".highlight .sb { color: #BA2121 } /* Literal.String.Backtick */\n",
".highlight .sc { color: #BA2121 } /* Literal.String.Char */\n",
".highlight .dl { color: #BA2121 } /* Literal.String.Delimiter */\n",
".highlight .sd { color: #BA2121; font-style: italic } /* Literal.String.Doc */\n",
".highlight .s2 { color: #BA2121 } /* Literal.String.Double */\n",
".highlight .se { color: #BB6622; font-weight: bold } /* Literal.String.Escape */\n",
".highlight .sh { color: #BA2121 } /* Literal.String.Heredoc */\n",
".highlight .si { color: #BB6688; font-weight: bold } /* Literal.String.Interpol */\n",
".highlight .sx { color: #008000 } /* Literal.String.Other */\n",
".highlight .sr { color: #BB6688 } /* Literal.String.Regex */\n",
".highlight .s1 { color: #BA2121 } /* Literal.String.Single */\n",
".highlight .ss { color: #19177C } /* Literal.String.Symbol */\n",
".highlight .bp { color: #008000 } /* Name.Builtin.Pseudo */\n",
".highlight .fm { color: #0000FF } /* Name.Function.Magic */\n",
".highlight .vc { color: #19177C } /* Name.Variable.Class */\n",
".highlight .vg { color: #19177C } /* Name.Variable.Global */\n",
".highlight .vi { color: #19177C } /* Name.Variable.Instance */\n",
".highlight .vm { color: #19177C } /* Name.Variable.Magic */\n",
".highlight .il { color: #666666 } /* Literal.Number.Integer.Long */</style><div class=\"highlight\"><pre><span></span>33.6\n",
"</pre></div>\n"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import logging\n",
"logging.basicConfig(format='%(message)s')\n",
"logger = logging.getLogger()\n",
"logger.setLevel(logging.DEBUG)\n",
"\n",
"ta3 = ThreatActor(\n",
" threat_actor_types=[\"crime-syndicate\"],\n",
" name=\"Evil Org\",\n",
" aliases=[\"super-evil\"],\n",
")\n",
"ta4 = ThreatActor(\n",
" threat_actor_types=[\"spy\"],\n",
" name=\"James Bond\",\n",
" aliases=[\"007\"],\n",
")\n",
"print(env.semantically_equivalent(ta3, ta4))"
]
}
],
"metadata": {

View File

@ -1,5 +1,5 @@
[bumpversion]
current_version = 1.2.0
current_version = 1.2.1
commit = True
tag = True

View File

@ -396,11 +396,14 @@ class _Observable(_STIXBase):
if streamlined_obj_vals:
data = canonicalize(streamlined_obj_vals, utf8=False)
# try/except here to enable python 2 compatibility
try:
# The situation is complicated w.r.t. python 2/3 behavior, so
# I'd rather not rely on particular exceptions being raised to
# determine what to do. Better to just check the python version
# directly.
if six.PY3:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data))
except UnicodeDecodeError:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, six.binary_type(data)))
else:
return required_prefix + six.text_type(uuid.uuid5(SCO_DET_ID_NAMESPACE, data.encode("utf-8")))
# We return None if there are no values specified for any of the id-contributing-properties
return None

View File

@ -6,7 +6,6 @@ import time
from .core import parse as _parse
from .datastore import CompositeDataSource, DataStoreMixin
from .exceptions import SemanticEquivalenceUnsupportedTypeError
from .utils import STIXdatetime, parse_into_datetime
logger = logging.getLogger(__name__)
@ -228,9 +227,6 @@ class Environment(DataStoreMixin):
"aliases": 40,
"method": _campaign_checks,
},
"course-of-action": {
"method": _course_of_action_checks,
},
"identity": {
"name": 60,
"identity_class": 20,
@ -244,9 +240,6 @@ class Environment(DataStoreMixin):
"tdelta": 1, # One day interval
"method": _indicator_checks,
},
"intrusion-set": {
"method": _intrusion_set_checks,
},
"location": {
"longitude_latitude": 34,
"region": 33,
@ -259,12 +252,6 @@ class Environment(DataStoreMixin):
"name": 80,
"method": _malware_checks,
},
"observed-data": {
"method": _observed_data_checks,
},
"report": {
"method": _report_checks,
},
"threat-actor": {
"name": 60,
"threat_actor_types": 20,
@ -298,8 +285,14 @@ class Environment(DataStoreMixin):
if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"):
raise ValueError('The objects to compare must be of the same spec version!')
method = weights[type1]["method"]
matching_score, sum_weights = method(obj1, obj2, **weights[type1])
try:
method = weights[type1]["method"]
except KeyError:
logger.warning("'%s' type has no semantic equivalence method to call!", type1)
sum_weights = matching_score = 0
else:
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
matching_score, sum_weights = method(obj1, obj2, **weights[type1])
if sum_weights <= 0:
return 0
@ -333,7 +326,9 @@ def partial_timestamp_based(t1, t2, tdelta):
if not isinstance(t2, STIXdatetime):
t2 = parse_into_datetime(t2)
t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple())
return 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result)
return result
def partial_list_based(l1, l2):
@ -348,7 +343,9 @@ def partial_list_based(l1, l2):
"""
l1_set, l2_set = set(l1), set(l2)
return len(l1_set.intersection(l2_set)) / max(len(l1), len(l2))
result = len(l1_set.intersection(l2_set)) / max(len(l1), len(l2))
logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result)
return result
def exact_match(val1, val2):
@ -362,9 +359,11 @@ def exact_match(val1, val2):
float: 1.0 if the value matches exactly, 0.0 otherwise.
"""
result = 0.0
if val1 == val2:
return 1.0
return 0.0
result = 1.0
logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result)
return result
def partial_string_based(str1, str2):
@ -379,7 +378,9 @@ def partial_string_based(str1, str2):
"""
from pyjarowinkler import distance
return distance.get_jaro_distance(str1, str2)
result = distance.get_jaro_distance(str1, str2)
logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result)
return result
def custom_pattern_based(pattern1, pattern2):
@ -440,14 +441,24 @@ def partial_external_reference_based(refs1, refs2):
# external_id or url match then its a perfect match and other entries
# can be ignored.
if sn_match and (ei_match or url_match) and source_name in allowed:
return 1.0
result = 1.0
logger.debug(
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
refs1, refs2, result,
)
return result
# Regular check. If the source_name (not STIX-defined) or external_id or
# url matches then we consider the entry a match.
if (sn_match or ei_match or url_match) and source_name not in allowed:
matches += 1
return matches / max(len(refs1), len(refs2))
result = matches / max(len(refs1), len(refs2))
logger.debug(
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
refs1, refs2, result,
)
return result
def partial_location_distance(lat1, long1, lat2, long2, threshold):
@ -466,7 +477,12 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold):
"""
from haversine import haversine, Unit
distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS)
return 1 - (distance / threshold)
result = 1 - (distance / threshold)
logger.debug(
"--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'",
(lat1, long1), (lat2, long2), threshold, result,
)
return result
def _attack_pattern_checks(obj1, obj2, **weights):
@ -474,15 +490,19 @@ def _attack_pattern_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * partial_string_based(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("external_references", obj1, obj2):
w = weights["external_references"]
sum_weights += w
matching_score += (
w *
partial_external_reference_based(obj1["external_references"], obj2["external_references"])
contributing_score = (
w * partial_external_reference_based(obj1["external_references"], obj2["external_references"])
)
sum_weights += w
matching_score += contributing_score
logger.debug("'external_references' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -491,12 +511,17 @@ def _campaign_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * partial_string_based(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("aliases", obj1, obj2):
w = weights["aliases"]
contributing_score = w * partial_list_based(obj1["aliases"], obj2["aliases"])
sum_weights += w
matching_score += w * partial_list_based(obj1["aliases"], obj2["aliases"])
matching_score += contributing_score
logger.debug("'aliases' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -505,16 +530,23 @@ def _identity_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * exact_match(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * exact_match(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("identity_class", obj1, obj2):
w = weights["identity_class"]
contributing_score = w * exact_match(obj1["identity_class"], obj2["identity_class"])
sum_weights += w
matching_score += w * exact_match(obj1["identity_class"], obj2["identity_class"])
matching_score += contributing_score
logger.debug("'identity_class' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("sectors", obj1, obj2):
w = weights["sectors"]
contributing_score = w * partial_list_based(obj1["sectors"], obj2["sectors"])
sum_weights += w
matching_score += w * partial_list_based(obj1["sectors"], obj2["sectors"])
matching_score += contributing_score
logger.debug("'sectors' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -523,19 +555,26 @@ def _indicator_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("indicator_types", obj1, obj2):
w = weights["indicator_types"]
contributing_score = w * partial_list_based(obj1["indicator_types"], obj2["indicator_types"])
sum_weights += w
matching_score += w * partial_list_based(obj1["indicator_types"], obj2["indicator_types"])
matching_score += contributing_score
logger.debug("'indicator_types' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("pattern", obj1, obj2):
w = weights["pattern"]
contributing_score = w * custom_pattern_based(obj1["pattern"], obj2["pattern"])
sum_weights += w
matching_score += w * custom_pattern_based(obj1["pattern"], obj2["pattern"])
matching_score += contributing_score
logger.debug("'pattern' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("valid_from", obj1, obj2):
w = weights["valid_from"]
sum_weights += w
matching_score += (
contributing_score = (
w *
partial_timestamp_based(obj1["valid_from"], obj2["valid_from"], weights["tdelta"])
)
sum_weights += w
matching_score += contributing_score
logger.debug("'valid_from' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -544,19 +583,26 @@ def _location_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("latitude", obj1, obj2) and check_property_present("longitude", obj1, obj2):
w = weights["longitude_latitude"]
sum_weights += w
matching_score += (
contributing_score = (
w *
partial_location_distance(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], weights["threshold"])
)
sum_weights += w
matching_score += contributing_score
logger.debug("'longitude_latitude' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("region", obj1, obj2):
w = weights["region"]
contributing_score = w * exact_match(obj1["region"], obj2["region"])
sum_weights += w
matching_score += w * exact_match(obj1["region"], obj2["region"])
matching_score += contributing_score
logger.debug("'region' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("country", obj1, obj2):
w = weights["country"]
contributing_score = w * exact_match(obj1["country"], obj2["country"])
sum_weights += w
matching_score += w * exact_match(obj1["country"], obj2["country"])
matching_score += contributing_score
logger.debug("'country' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -565,12 +611,17 @@ def _malware_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("malware_types", obj1, obj2):
w = weights["malware_types"]
contributing_score = w * partial_list_based(obj1["malware_types"], obj2["malware_types"])
sum_weights += w
matching_score += w * partial_list_based(obj1["malware_types"], obj2["malware_types"])
matching_score += contributing_score
logger.debug("'malware_types' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * partial_string_based(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -579,16 +630,23 @@ def _threat_actor_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * partial_string_based(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("threat_actor_types", obj1, obj2):
w = weights["threat_actor_types"]
contributing_score = w * partial_list_based(obj1["threat_actor_types"], obj2["threat_actor_types"])
sum_weights += w
matching_score += w * partial_list_based(obj1["threat_actor_types"], obj2["threat_actor_types"])
matching_score += contributing_score
logger.debug("'threat_actor_types' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("aliases", obj1, obj2):
w = weights["aliases"]
contributing_score = w * partial_list_based(obj1["aliases"], obj2["aliases"])
sum_weights += w
matching_score += w * partial_list_based(obj1["aliases"], obj2["aliases"])
matching_score += contributing_score
logger.debug("'aliases' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -597,12 +655,17 @@ def _tool_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("tool_types", obj1, obj2):
w = weights["tool_types"]
contributing_score = w * partial_list_based(obj1["tool_types"], obj2["tool_types"])
sum_weights += w
matching_score += w * partial_list_based(obj1["tool_types"], obj2["tool_types"])
matching_score += contributing_score
logger.debug("'tool_types' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * partial_string_based(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
@ -611,29 +674,18 @@ def _vulnerability_checks(obj1, obj2, **weights):
sum_weights = 0.0
if check_property_present("name", obj1, obj2):
w = weights["name"]
contributing_score = w * partial_string_based(obj1["name"], obj2["name"])
sum_weights += w
matching_score += w * partial_string_based(obj1["name"], obj2["name"])
matching_score += contributing_score
logger.debug("'name' check -- weight: %s, contributing score: %s", w, contributing_score)
if check_property_present("external_references", obj1, obj2):
w = weights["external_references"]
sum_weights += w
matching_score += w * partial_external_reference_based(
contributing_score = w * partial_external_reference_based(
obj1["external_references"],
obj2["external_references"],
)
sum_weights += w
matching_score += contributing_score
logger.debug("'external_references' check -- weight: %s, contributing score: %s", w, contributing_score)
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
return matching_score, sum_weights
def _course_of_action_checks(obj1, obj2, **weights):
raise SemanticEquivalenceUnsupportedTypeError("course-of-action type has no semantic equivalence implementation!")
def _intrusion_set_checks(obj1, obj2, **weights):
raise SemanticEquivalenceUnsupportedTypeError("intrusion-set type has no semantic equivalence implementation!")
def _observed_data_checks(obj1, obj2, **weights):
raise SemanticEquivalenceUnsupportedTypeError("observed-data type has no semantic equivalence implementation!")
def _report_checks(obj1, obj2, **weights):
raise SemanticEquivalenceUnsupportedTypeError("report type has no semantic equivalence implementation!")

View File

@ -233,10 +233,3 @@ class STIXDeprecationWarning(DeprecationWarning):
Represents usage of a deprecated component of a STIX specification.
"""
pass
class SemanticEquivalenceUnsupportedTypeError(STIXError, TypeError):
"""STIX object type not supported by the semantic equivalence approach."""
def __init__(self, msg):
super(SemanticEquivalenceUnsupportedTypeError, self).__init__(msg)

View File

@ -1,9 +1,11 @@
import datetime as dt
import json
import uuid
import pytest
import pytz
import stix2
from stix2.base import STIXJSONEncoder
@ -23,3 +25,14 @@ def test_encode_json_object():
json.dumps(test_dict, cls=STIXJSONEncoder)
assert " is not JSON serializable" in str(excinfo.value)
def test_deterministic_id_unicode():
mutex = {'name': u'D*Fl#Ed*\u00a3\u00a8', 'type': 'mutex'}
obs = stix2.parse_observable(mutex, version="2.1")
dd_idx = obs.id.index("--")
id_uuid = uuid.UUID(obs.id[dd_idx+2:])
assert id_uuid.variant == uuid.RFC_4122
assert id_uuid.version == 5

View File

@ -6,10 +6,8 @@ import stix2.exceptions
from .constants import (
ATTACK_PATTERN_ID, ATTACK_PATTERN_KWARGS, CAMPAIGN_ID, CAMPAIGN_KWARGS,
COURSE_OF_ACTION_ID, COURSE_OF_ACTION_KWARGS, FAKE_TIME, IDENTITY_ID,
IDENTITY_KWARGS, INDICATOR_ID, INDICATOR_KWARGS, INTRUSION_SET_ID,
INTRUSION_SET_KWARGS, LOCATION_ID, MALWARE_ID, MALWARE_KWARGS,
OBSERVED_DATA_ID, OBSERVED_DATA_KWARGS, RELATIONSHIP_IDS, REPORT_ID,
FAKE_TIME, IDENTITY_ID, IDENTITY_KWARGS, INDICATOR_ID, INDICATOR_KWARGS,
LOCATION_ID, MALWARE_ID, MALWARE_KWARGS, RELATIONSHIP_IDS, REPORT_ID,
REPORT_KWARGS, THREAT_ACTOR_ID, THREAT_ACTOR_KWARGS, TOOL_ID, TOOL_KWARGS,
VULNERABILITY_ID, VULNERABILITY_KWARGS,
)
@ -615,37 +613,6 @@ def test_semantic_equivalence_different_spec_version_raises():
assert str(excinfo.value) == "The objects to compare must be of the same spec version!"
@pytest.mark.parametrize(
"obj1,obj2,ret_val",
[
(
stix2.v21.CourseOfAction(id=COURSE_OF_ACTION_ID, **COURSE_OF_ACTION_KWARGS),
stix2.v21.CourseOfAction(id=COURSE_OF_ACTION_ID, **COURSE_OF_ACTION_KWARGS),
"course-of-action type has no semantic equivalence implementation!",
),
(
stix2.v21.IntrusionSet(id=INTRUSION_SET_ID, **INTRUSION_SET_KWARGS),
stix2.v21.IntrusionSet(id=INTRUSION_SET_ID, **INTRUSION_SET_KWARGS),
"intrusion-set type has no semantic equivalence implementation!",
),
(
stix2.v21.ObservedData(id=OBSERVED_DATA_ID, **OBSERVED_DATA_KWARGS),
stix2.v21.ObservedData(id=OBSERVED_DATA_ID, **OBSERVED_DATA_KWARGS),
"observed-data type has no semantic equivalence implementation!",
),
(
stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS),
stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS),
"report type has no semantic equivalence implementation!",
),
],
)
def test_semantic_equivalence_on_unsupported_types(obj1, obj2, ret_val):
with pytest.raises(stix2.exceptions.SemanticEquivalenceUnsupportedTypeError) as excinfo:
stix2.Environment().semantically_equivalent(obj1, obj2)
assert ret_val == str(excinfo.value)
def test_semantic_equivalence_zero_match():
IND_KWARGS = dict(
indicator_types=["APTX"],
@ -767,7 +734,7 @@ def test_semantic_equivalence_external_references(refs1, refs2, ret_val):
assert value == ret_val
def test_semantic_equivalence_timetamp():
def test_semantic_equivalence_timestamp():
t1 = "2018-10-17T00:14:20.652Z"
t2 = "2018-10-17T12:14:20.652Z"
assert stix2.environment.partial_timestamp_based(t1, t2, 1) == 0.5
@ -777,3 +744,9 @@ def test_semantic_equivalence_exact_match():
t1 = "2018-10-17T00:14:20.652Z"
t2 = "2018-10-17T12:14:20.652Z"
assert stix2.environment.exact_match(t1, t2) == 0.0
def test_non_existent_config_for_object():
r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
assert stix2.Environment().semantically_equivalent(r1, r2) == 0.0

View File

@ -1 +1 @@
__version__ = "1.2.0"
__version__ = "1.2.1"