Merge branch 'master' of github.com:oasis-open/cti-python-stix2 into main
commit
8093e07d1b
|
@ -55,8 +55,7 @@ coverage.xml
|
||||||
# Sphinx documentation
|
# Sphinx documentation
|
||||||
docs/_build/
|
docs/_build/
|
||||||
.ipynb_checkpoints
|
.ipynb_checkpoints
|
||||||
graph_default_sem_eq_weights.rst
|
similarity_weights.rst
|
||||||
object_default_sem_eq_weights.rst
|
|
||||||
|
|
||||||
# PyBuilder
|
# PyBuilder
|
||||||
target/
|
target/
|
||||||
|
|
|
@ -10,7 +10,6 @@ known_third_party =
|
||||||
pytz,
|
pytz,
|
||||||
requests,
|
requests,
|
||||||
simplejson,
|
simplejson,
|
||||||
six,
|
|
||||||
sphinx,
|
sphinx,
|
||||||
stix2patterns,
|
stix2patterns,
|
||||||
taxii2client,
|
taxii2client,
|
||||||
|
|
|
@ -23,3 +23,4 @@ repos:
|
||||||
args: ["-c", "--diff"]
|
args: ["-c", "--diff"]
|
||||||
- id: isort
|
- id: isort
|
||||||
name: Sort python imports (fixes files)
|
name: Sort python imports (fixes files)
|
||||||
|
exclude: ^stix2/canonicalization/
|
||||||
|
|
|
@ -21,6 +21,8 @@ Install with `pip <https://pip.pypa.io/en/stable/>`__:
|
||||||
|
|
||||||
$ pip install stix2
|
$ pip install stix2
|
||||||
|
|
||||||
|
Note: The library requires Python 3.6+.
|
||||||
|
|
||||||
Usage
|
Usage
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|
13
docs/conf.py
13
docs/conf.py
|
@ -4,11 +4,9 @@ import os
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
from six import class_types
|
|
||||||
from sphinx.ext.autodoc import ClassDocumenter
|
from sphinx.ext.autodoc import ClassDocumenter
|
||||||
|
|
||||||
from stix2.base import _STIXBase
|
from stix2.base import _STIXBase
|
||||||
from stix2.equivalence.graph import GRAPH_WEIGHTS
|
|
||||||
from stix2.equivalence.object import WEIGHTS
|
from stix2.equivalence.object import WEIGHTS
|
||||||
from stix2.version import __version__
|
from stix2.version import __version__
|
||||||
|
|
||||||
|
@ -67,16 +65,9 @@ object_default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o:
|
||||||
object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n ')
|
object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n ')
|
||||||
object_default_sem_eq_weights = object_default_sem_eq_weights.replace(' "', ' ')
|
object_default_sem_eq_weights = object_default_sem_eq_weights.replace(' "', ' ')
|
||||||
object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n')
|
object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n')
|
||||||
with open('object_default_sem_eq_weights.rst', 'w') as f:
|
with open('similarity_weights.rst', 'w') as f:
|
||||||
f.write(".. code-block:: python\n\n {}\n\n".format(object_default_sem_eq_weights))
|
f.write(".. code-block:: python\n\n {}\n\n".format(object_default_sem_eq_weights))
|
||||||
|
|
||||||
graph_default_sem_eq_weights = json.dumps(GRAPH_WEIGHTS, indent=4, default=lambda o: o.__name__)
|
|
||||||
graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('\n', '\n ')
|
|
||||||
graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace(' "', ' ')
|
|
||||||
graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('"\n', '\n')
|
|
||||||
with open('graph_default_sem_eq_weights.rst', 'w') as f:
|
|
||||||
f.write(".. code-block:: python\n\n {}\n\n".format(graph_default_sem_eq_weights))
|
|
||||||
|
|
||||||
|
|
||||||
def get_property_type(prop):
|
def get_property_type(prop):
|
||||||
"""Convert property classname into pretty string name of property.
|
"""Convert property classname into pretty string name of property.
|
||||||
|
@ -107,7 +98,7 @@ class STIXPropertyDocumenter(ClassDocumenter):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def can_document_member(cls, member, membername, isattr, parent):
|
def can_document_member(cls, member, membername, isattr, parent):
|
||||||
return isinstance(member, class_types) and \
|
return isinstance(member, type) and \
|
||||||
issubclass(member, _STIXBase) and \
|
issubclass(member, _STIXBase) and \
|
||||||
hasattr(member, '_properties')
|
hasattr(member, '_properties')
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
4
setup.py
4
setup.py
|
@ -47,11 +47,11 @@ setup(
|
||||||
],
|
],
|
||||||
keywords='stix stix2 json cti cyber threat intelligence',
|
keywords='stix stix2 json cti cyber threat intelligence',
|
||||||
packages=find_packages(exclude=['*.test', '*.test.*']),
|
packages=find_packages(exclude=['*.test', '*.test.*']),
|
||||||
|
python_requires='>=3.6',
|
||||||
install_requires=[
|
install_requires=[
|
||||||
'pytz',
|
'pytz',
|
||||||
'requests',
|
'requests',
|
||||||
'simplejson',
|
'simplejson',
|
||||||
'six>=1.13.0',
|
|
||||||
'stix2-patterns>=1.2.0',
|
'stix2-patterns>=1.2.0',
|
||||||
],
|
],
|
||||||
project_urls={
|
project_urls={
|
||||||
|
@ -60,7 +60,7 @@ setup(
|
||||||
'Bug Tracker': 'https://github.com/oasis-open/cti-python-stix2/issues/',
|
'Bug Tracker': 'https://github.com/oasis-open/cti-python-stix2/issues/',
|
||||||
},
|
},
|
||||||
extras_require={
|
extras_require={
|
||||||
'taxii': ['taxii2-client>=2.2.1'],
|
'taxii': ['taxii2-client>=2.3.0'],
|
||||||
'semantic': ['haversine', 'rapidfuzz'],
|
'semantic': ['haversine', 'rapidfuzz'],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -5,7 +5,6 @@ import re
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
import simplejson as json
|
import simplejson as json
|
||||||
import six
|
|
||||||
|
|
||||||
import stix2
|
import stix2
|
||||||
from stix2.canonicalization.Canonicalize import canonicalize
|
from stix2.canonicalization.Canonicalize import canonicalize
|
||||||
|
@ -18,7 +17,8 @@ from .exceptions import (
|
||||||
from .markings import _MarkingsMixin
|
from .markings import _MarkingsMixin
|
||||||
from .markings.utils import validate
|
from .markings.utils import validate
|
||||||
from .serialization import (
|
from .serialization import (
|
||||||
STIXJSONEncoder, STIXJSONIncludeOptionalDefaultsEncoder, serialize,
|
STIXJSONEncoder, STIXJSONIncludeOptionalDefaultsEncoder, fp_serialize,
|
||||||
|
serialize,
|
||||||
)
|
)
|
||||||
from .utils import NOW, PREFIX_21_REGEX, get_timestamp
|
from .utils import NOW, PREFIX_21_REGEX, get_timestamp
|
||||||
from .versioning import new_version as _new_version
|
from .versioning import new_version as _new_version
|
||||||
|
@ -70,12 +70,9 @@ class _STIXBase(Mapping):
|
||||||
# InvalidValueError... so let those propagate.
|
# InvalidValueError... so let those propagate.
|
||||||
raise
|
raise
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
six.raise_from(
|
raise InvalidValueError(
|
||||||
InvalidValueError(
|
self.__class__, prop_name, reason=str(exc),
|
||||||
self.__class__, prop_name, reason=str(exc),
|
) from exc
|
||||||
),
|
|
||||||
exc,
|
|
||||||
)
|
|
||||||
|
|
||||||
# interproperty constraint methods
|
# interproperty constraint methods
|
||||||
|
|
||||||
|
@ -266,6 +263,35 @@ class _STIXBase(Mapping):
|
||||||
"""
|
"""
|
||||||
return serialize(self, *args, **kwargs)
|
return serialize(self, *args, **kwargs)
|
||||||
|
|
||||||
|
def fp_serialize(self, *args, **kwargs):
|
||||||
|
"""
|
||||||
|
Serialize a STIX object to ``fp`` (a text stream file-like supporting object).
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
>>> import stix2
|
||||||
|
>>> identity = stix2.Identity(name='Example Corp.', identity_class='organization')
|
||||||
|
>>> print(identity.serialize(sort_keys=True))
|
||||||
|
{"created": "2018-06-08T19:03:54.066Z", ... "name": "Example Corp.", "type": "identity"}
|
||||||
|
>>> print(identity.serialize(sort_keys=True, indent=4))
|
||||||
|
{
|
||||||
|
"created": "2018-06-08T19:03:54.066Z",
|
||||||
|
"id": "identity--d7f3e25a-ba1c-447a-ab71-6434b092b05e",
|
||||||
|
"identity_class": "organization",
|
||||||
|
"modified": "2018-06-08T19:03:54.066Z",
|
||||||
|
"name": "Example Corp.",
|
||||||
|
"type": "identity"
|
||||||
|
}
|
||||||
|
>>> with open("example.json", mode="w", encoding="utf-8") as f:
|
||||||
|
>>> identity.fp_serialize(f, pretty=True)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
|
See Also:
|
||||||
|
``stix2.serialization.fp_serialize`` for options.
|
||||||
|
"""
|
||||||
|
fp_serialize(self, *args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
class _DomainObject(_STIXBase, _MarkingsMixin):
|
class _DomainObject(_STIXBase, _MarkingsMixin):
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
|
@ -386,19 +412,8 @@ class _Observable(_STIXBase):
|
||||||
if json_serializable_object:
|
if json_serializable_object:
|
||||||
|
|
||||||
data = canonicalize(json_serializable_object, utf8=False)
|
data = canonicalize(json_serializable_object, utf8=False)
|
||||||
|
uuid_ = uuid.uuid5(SCO_DET_ID_NAMESPACE, data)
|
||||||
# The situation is complicated w.r.t. python 2/3 behavior, so
|
id_ = "{}--{}".format(self._type, str(uuid_))
|
||||||
# I'd rather not rely on particular exceptions being raised to
|
|
||||||
# determine what to do. Better to just check the python version
|
|
||||||
# directly.
|
|
||||||
if six.PY3:
|
|
||||||
uuid_ = uuid.uuid5(SCO_DET_ID_NAMESPACE, data)
|
|
||||||
else:
|
|
||||||
uuid_ = uuid.uuid5(
|
|
||||||
SCO_DET_ID_NAMESPACE, data.encode("utf-8"),
|
|
||||||
)
|
|
||||||
|
|
||||||
id_ = "{}--{}".format(self._type, six.text_type(uuid_))
|
|
||||||
|
|
||||||
return id_
|
return id_
|
||||||
|
|
||||||
|
@ -464,7 +479,7 @@ def _make_json_serializable(value):
|
||||||
for v in value
|
for v in value
|
||||||
]
|
]
|
||||||
|
|
||||||
elif not isinstance(value, (int, float, six.string_types, bool)):
|
elif not isinstance(value, (int, float, str, bool)):
|
||||||
# If a "simple" value which is not already JSON-serializable,
|
# If a "simple" value which is not already JSON-serializable,
|
||||||
# JSON-serialize to a string and use that as our JSON-serializable
|
# JSON-serialize to a string and use that as our JSON-serializable
|
||||||
# value. This applies to our datetime objects currently (timestamp
|
# value. This applies to our datetime objects currently (timestamp
|
||||||
|
|
|
@ -20,12 +20,8 @@
|
||||||
# JCS compatible JSON serializer for Python 3.x #
|
# JCS compatible JSON serializer for Python 3.x #
|
||||||
#################################################
|
#################################################
|
||||||
|
|
||||||
# This file has been modified to be compatible with Python 2.x as well
|
|
||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from stix2.canonicalization.NumberToJson import convert2Es6Format
|
from stix2.canonicalization.NumberToJson import convert2Es6Format
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -55,10 +51,10 @@ ESCAPE_DCT = {
|
||||||
}
|
}
|
||||||
for i in range(0x20):
|
for i in range(0x20):
|
||||||
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
|
ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
|
||||||
|
#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
|
||||||
|
|
||||||
INFINITY = float('inf')
|
INFINITY = float('inf')
|
||||||
|
|
||||||
|
|
||||||
def py_encode_basestring(s):
|
def py_encode_basestring(s):
|
||||||
"""Return a JSON representation of a Python string
|
"""Return a JSON representation of a Python string
|
||||||
|
|
||||||
|
@ -70,7 +66,6 @@ def py_encode_basestring(s):
|
||||||
|
|
||||||
encode_basestring = (c_encode_basestring or py_encode_basestring)
|
encode_basestring = (c_encode_basestring or py_encode_basestring)
|
||||||
|
|
||||||
|
|
||||||
def py_encode_basestring_ascii(s):
|
def py_encode_basestring_ascii(s):
|
||||||
"""Return an ASCII-only JSON representation of a Python string
|
"""Return an ASCII-only JSON representation of a Python string
|
||||||
|
|
||||||
|
@ -83,6 +78,7 @@ def py_encode_basestring_ascii(s):
|
||||||
n = ord(s)
|
n = ord(s)
|
||||||
if n < 0x10000:
|
if n < 0x10000:
|
||||||
return '\\u{0:04x}'.format(n)
|
return '\\u{0:04x}'.format(n)
|
||||||
|
#return '\\u%04x' % (n,)
|
||||||
else:
|
else:
|
||||||
# surrogate pair
|
# surrogate pair
|
||||||
n -= 0x10000
|
n -= 0x10000
|
||||||
|
@ -96,7 +92,6 @@ encode_basestring_ascii = (
|
||||||
c_encode_basestring_ascii or py_encode_basestring_ascii
|
c_encode_basestring_ascii or py_encode_basestring_ascii
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class JSONEncoder(object):
|
class JSONEncoder(object):
|
||||||
"""Extensible JSON <http://json.org> encoder for Python data structures.
|
"""Extensible JSON <http://json.org> encoder for Python data structures.
|
||||||
|
|
||||||
|
@ -128,11 +123,10 @@ class JSONEncoder(object):
|
||||||
"""
|
"""
|
||||||
item_separator = ', '
|
item_separator = ', '
|
||||||
key_separator = ': '
|
key_separator = ': '
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, skipkeys=False, ensure_ascii=False,
|
self, *, skipkeys=False, ensure_ascii=False,
|
||||||
check_circular=True, allow_nan=True, sort_keys=True,
|
check_circular=True, allow_nan=True, sort_keys=True,
|
||||||
indent=None, separators=(',', ':'), default=None,
|
indent=None, separators=(',', ':'), default=None
|
||||||
):
|
):
|
||||||
"""Constructor for JSONEncoder, with sensible defaults.
|
"""Constructor for JSONEncoder, with sensible defaults.
|
||||||
|
|
||||||
|
@ -277,6 +271,7 @@ class JSONEncoder(object):
|
||||||
|
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
if (
|
if (
|
||||||
_one_shot and c_make_encoder is not None
|
_one_shot and c_make_encoder is not None
|
||||||
and self.indent is None
|
and self.indent is None
|
||||||
|
@ -294,11 +289,10 @@ class JSONEncoder(object):
|
||||||
)
|
)
|
||||||
return _iterencode(o, 0)
|
return _iterencode(o, 0)
|
||||||
|
|
||||||
|
|
||||||
def _make_iterencode(
|
def _make_iterencode(
|
||||||
markers, _default, _encoder, _indent, _floatstr,
|
markers, _default, _encoder, _indent, _floatstr,
|
||||||
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
|
_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
|
||||||
# HACK: hand-optimized bytecode; turn globals into locals
|
## HACK: hand-optimized bytecode; turn globals into locals
|
||||||
ValueError=ValueError,
|
ValueError=ValueError,
|
||||||
dict=dict,
|
dict=dict,
|
||||||
float=float,
|
float=float,
|
||||||
|
@ -362,10 +356,7 @@ def _make_iterencode(
|
||||||
chunks = _iterencode_dict(value, _current_indent_level)
|
chunks = _iterencode_dict(value, _current_indent_level)
|
||||||
else:
|
else:
|
||||||
chunks = _iterencode(value, _current_indent_level)
|
chunks = _iterencode(value, _current_indent_level)
|
||||||
# Below line commented-out for python2 compatibility
|
yield from chunks
|
||||||
# yield from chunks
|
|
||||||
for chunk in chunks:
|
|
||||||
yield chunk
|
|
||||||
if newline_indent is not None:
|
if newline_indent is not None:
|
||||||
_current_indent_level -= 1
|
_current_indent_level -= 1
|
||||||
yield '\n' + _indent * _current_indent_level
|
yield '\n' + _indent * _current_indent_level
|
||||||
|
@ -397,8 +388,7 @@ def _make_iterencode(
|
||||||
else:
|
else:
|
||||||
items = dct.items()
|
items = dct.items()
|
||||||
for key, value in items:
|
for key, value in items:
|
||||||
# Replaced isinstance(key, str) with below to enable simultaneous python 2 & 3 compatibility
|
if isinstance(key, str):
|
||||||
if isinstance(key, six.string_types) or isinstance(key, six.binary_type):
|
|
||||||
pass
|
pass
|
||||||
# JavaScript is weakly typed for these, so it makes sense to
|
# JavaScript is weakly typed for these, so it makes sense to
|
||||||
# also allow them. Many encoders seem to do something like this.
|
# also allow them. Many encoders seem to do something like this.
|
||||||
|
@ -445,10 +435,7 @@ def _make_iterencode(
|
||||||
chunks = _iterencode_dict(value, _current_indent_level)
|
chunks = _iterencode_dict(value, _current_indent_level)
|
||||||
else:
|
else:
|
||||||
chunks = _iterencode(value, _current_indent_level)
|
chunks = _iterencode(value, _current_indent_level)
|
||||||
# Below line commented-out for python2 compatibility
|
yield from chunks
|
||||||
# yield from chunks
|
|
||||||
for chunk in chunks:
|
|
||||||
yield chunk
|
|
||||||
if newline_indent is not None:
|
if newline_indent is not None:
|
||||||
_current_indent_level -= 1
|
_current_indent_level -= 1
|
||||||
yield '\n' + _indent * _current_indent_level
|
yield '\n' + _indent * _current_indent_level
|
||||||
|
@ -457,8 +444,7 @@ def _make_iterencode(
|
||||||
del markers[markerid]
|
del markers[markerid]
|
||||||
|
|
||||||
def _iterencode(o, _current_indent_level):
|
def _iterencode(o, _current_indent_level):
|
||||||
# Replaced isinstance(o, str) with below to enable simultaneous python 2 & 3 compatibility
|
if isinstance(o, str):
|
||||||
if isinstance(o, six.string_types) or isinstance(o, six.binary_type):
|
|
||||||
yield _encoder(o)
|
yield _encoder(o)
|
||||||
elif o is None:
|
elif o is None:
|
||||||
yield 'null'
|
yield 'null'
|
||||||
|
@ -473,15 +459,9 @@ def _make_iterencode(
|
||||||
# see comment for int/float in _make_iterencode
|
# see comment for int/float in _make_iterencode
|
||||||
yield convert2Es6Format(o)
|
yield convert2Es6Format(o)
|
||||||
elif isinstance(o, (list, tuple)):
|
elif isinstance(o, (list, tuple)):
|
||||||
# Below line commented-out for python2 compatibility
|
yield from _iterencode_list(o, _current_indent_level)
|
||||||
# yield from _iterencode_list(o, _current_indent_level)
|
|
||||||
for thing in _iterencode_list(o, _current_indent_level):
|
|
||||||
yield thing
|
|
||||||
elif isinstance(o, dict):
|
elif isinstance(o, dict):
|
||||||
# Below line commented-out for python2 compatibility
|
yield from _iterencode_dict(o, _current_indent_level)
|
||||||
# yield from _iterencode_dict(o, _current_indent_level)
|
|
||||||
for thing in _iterencode_dict(o, _current_indent_level):
|
|
||||||
yield thing
|
|
||||||
else:
|
else:
|
||||||
if markers is not None:
|
if markers is not None:
|
||||||
markerid = id(o)
|
markerid = id(o)
|
||||||
|
@ -489,23 +469,18 @@ def _make_iterencode(
|
||||||
raise ValueError("Circular reference detected")
|
raise ValueError("Circular reference detected")
|
||||||
markers[markerid] = o
|
markers[markerid] = o
|
||||||
o = _default(o)
|
o = _default(o)
|
||||||
# Below line commented-out for python2 compatibility
|
yield from _iterencode(o, _current_indent_level)
|
||||||
# yield from _iterencode(o, _current_indent_level)
|
|
||||||
for thing in _iterencode(o, _current_indent_level):
|
|
||||||
yield thing
|
|
||||||
if markers is not None:
|
if markers is not None:
|
||||||
del markers[markerid]
|
del markers[markerid]
|
||||||
return _iterencode
|
return _iterencode
|
||||||
|
|
||||||
|
def canonicalize(obj,utf8=True):
|
||||||
def canonicalize(obj, utf8=True):
|
|
||||||
textVal = JSONEncoder(sort_keys=True).encode(obj)
|
textVal = JSONEncoder(sort_keys=True).encode(obj)
|
||||||
if utf8:
|
if utf8:
|
||||||
return textVal.encode()
|
return textVal.encode()
|
||||||
return textVal
|
return textVal
|
||||||
|
|
||||||
|
def serialize(obj,utf8=True):
|
||||||
def serialize(obj, utf8=True):
|
|
||||||
textVal = JSONEncoder(sort_keys=False).encode(obj)
|
textVal = JSONEncoder(sort_keys=False).encode(obj)
|
||||||
if utf8:
|
if utf8:
|
||||||
return textVal.encode()
|
return textVal.encode()
|
||||||
|
|
|
@ -21,40 +21,50 @@
|
||||||
# Convert a Python double/float into an ES6/V8 compatible string #
|
# Convert a Python double/float into an ES6/V8 compatible string #
|
||||||
##################################################################
|
##################################################################
|
||||||
def convert2Es6Format(value):
|
def convert2Es6Format(value):
|
||||||
# Convert double/float to str using the native Python formatter
|
# Convert double/float to str using the native Python formatter
|
||||||
fvalue = float(value)
|
fvalue = float(value)
|
||||||
|
#
|
||||||
# Zero is a special case. The following line takes "-0" case as well
|
# Zero is a special case. The following line takes "-0" case as well
|
||||||
|
#
|
||||||
if fvalue == 0:
|
if fvalue == 0:
|
||||||
return '0'
|
return '0'
|
||||||
|
#
|
||||||
# The rest of the algorithm works on the textual representation only
|
# The rest of the algorithm works on the textual representation only
|
||||||
|
#
|
||||||
pyDouble = str(fvalue)
|
pyDouble = str(fvalue)
|
||||||
|
#
|
||||||
# The following line catches the "inf" and "nan" values returned by str(fvalue)
|
# The following line catches the "inf" and "nan" values returned by str(fvalue)
|
||||||
|
#
|
||||||
if pyDouble.find('n') >= 0:
|
if pyDouble.find('n') >= 0:
|
||||||
raise ValueError("Invalid JSON number: " + pyDouble)
|
raise ValueError("Invalid JSON number: " + pyDouble)
|
||||||
|
#
|
||||||
# Save sign separately, it doesn't have any role in the algorithm
|
# Save sign separately, it doesn't have any role in the algorithm
|
||||||
|
#
|
||||||
pySign = ''
|
pySign = ''
|
||||||
if pyDouble.find('-') == 0:
|
if pyDouble.find('-') == 0:
|
||||||
pySign = '-'
|
pySign = '-'
|
||||||
pyDouble = pyDouble[1:]
|
pyDouble = pyDouble[1:]
|
||||||
|
#
|
||||||
# Now we should only have valid non-zero values
|
# Now we should only have valid non-zero values
|
||||||
|
#
|
||||||
pyExpStr = ''
|
pyExpStr = ''
|
||||||
pyExpVal = 0
|
pyExpVal = 0
|
||||||
q = pyDouble.find('e')
|
q = pyDouble.find('e')
|
||||||
if q > 0:
|
if q > 0:
|
||||||
# Grab the exponent and remove it from the number
|
#
|
||||||
|
# Grab the exponent and remove it from the number
|
||||||
|
#
|
||||||
pyExpStr = pyDouble[q:]
|
pyExpStr = pyDouble[q:]
|
||||||
if pyExpStr[2:3] == '0':
|
if pyExpStr[2:3] == '0':
|
||||||
# Supress leading zero on exponents
|
#
|
||||||
|
# Supress leading zero on exponents
|
||||||
|
#
|
||||||
pyExpStr = pyExpStr[:2] + pyExpStr[3:]
|
pyExpStr = pyExpStr[:2] + pyExpStr[3:]
|
||||||
pyDouble = pyDouble[0:q]
|
pyDouble = pyDouble[0:q]
|
||||||
pyExpVal = int(pyExpStr[1:])
|
pyExpVal = int(pyExpStr[1:])
|
||||||
|
#
|
||||||
# Split number in pyFirst + pyDot + pyLast
|
# Split number in pyFirst + pyDot + pyLast
|
||||||
|
#
|
||||||
pyFirst = pyDouble
|
pyFirst = pyDouble
|
||||||
pyDot = ''
|
pyDot = ''
|
||||||
pyLast = ''
|
pyLast = ''
|
||||||
|
@ -63,33 +73,40 @@ def convert2Es6Format(value):
|
||||||
pyDot = '.'
|
pyDot = '.'
|
||||||
pyFirst = pyDouble[:q]
|
pyFirst = pyDouble[:q]
|
||||||
pyLast = pyDouble[q + 1:]
|
pyLast = pyDouble[q + 1:]
|
||||||
|
#
|
||||||
# Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr
|
# Now the string is split into: pySign + pyFirst + pyDot + pyLast + pyExpStr
|
||||||
|
#
|
||||||
if pyLast == '0':
|
if pyLast == '0':
|
||||||
# Always remove trailing .0
|
#
|
||||||
|
# Always remove trailing .0
|
||||||
|
#
|
||||||
pyDot = ''
|
pyDot = ''
|
||||||
pyLast = ''
|
pyLast = ''
|
||||||
|
|
||||||
if pyExpVal > 0 and pyExpVal < 21:
|
if pyExpVal > 0 and pyExpVal < 21:
|
||||||
# Integers are shown as is with up to 21 digits
|
#
|
||||||
|
# Integers are shown as is with up to 21 digits
|
||||||
|
#
|
||||||
pyFirst += pyLast
|
pyFirst += pyLast
|
||||||
pyLast = ''
|
pyLast = ''
|
||||||
pyDot = ''
|
pyDot = ''
|
||||||
pyExpStr = ''
|
pyExpStr = ''
|
||||||
q = pyExpVal - len(pyFirst)
|
q = pyExpVal - len(pyFirst)
|
||||||
while q >= 0:
|
while q >= 0:
|
||||||
q -= 1
|
q -= 1;
|
||||||
pyFirst += '0'
|
pyFirst += '0'
|
||||||
elif pyExpVal < 0 and pyExpVal > -7:
|
elif pyExpVal < 0 and pyExpVal > -7:
|
||||||
# Small numbers are shown as 0.etc with e-6 as lower limit
|
#
|
||||||
|
# Small numbers are shown as 0.etc with e-6 as lower limit
|
||||||
|
#
|
||||||
pyLast = pyFirst + pyLast
|
pyLast = pyFirst + pyLast
|
||||||
pyFirst = '0'
|
pyFirst = '0'
|
||||||
pyDot = '.'
|
pyDot = '.'
|
||||||
pyExpStr = ''
|
pyExpStr = ''
|
||||||
q = pyExpVal
|
q = pyExpVal
|
||||||
while q < -1:
|
while q < -1:
|
||||||
q += 1
|
q += 1;
|
||||||
pyLast = '0' + pyLast
|
pyLast = '0' + pyLast
|
||||||
|
#
|
||||||
# The resulting sub-strings are concatenated
|
# The resulting sub-strings are concatenated
|
||||||
|
#
|
||||||
return pySign + pyFirst + pyDot + pyLast + pyExpStr
|
return pySign + pyFirst + pyDot + pyLast + pyExpStr
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from .base import _cls_init
|
from .base import _cls_init
|
||||||
from .registration import (
|
from .registration import (
|
||||||
_register_marking, _register_object, _register_observable,
|
_register_marking, _register_object, _register_observable,
|
||||||
|
@ -13,14 +11,11 @@ def _get_properties_dict(properties):
|
||||||
try:
|
try:
|
||||||
return OrderedDict(properties)
|
return OrderedDict(properties)
|
||||||
except TypeError as e:
|
except TypeError as e:
|
||||||
six.raise_from(
|
raise ValueError(
|
||||||
ValueError(
|
"properties must be dict-like, e.g. a list "
|
||||||
"properties must be dict-like, e.g. a list "
|
"containing tuples. For example, "
|
||||||
"containing tuples. For example, "
|
"[('property1', IntegerProperty())]",
|
||||||
"[('property1', IntegerProperty())]",
|
) from e
|
||||||
),
|
|
||||||
e,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def _custom_object_builder(cls, type, properties, version, base_class):
|
def _custom_object_builder(cls, type, properties, version, base_class):
|
||||||
|
|
|
@ -15,8 +15,6 @@ Python STIX2 DataStore API.
|
||||||
from abc import ABCMeta, abstractmethod
|
from abc import ABCMeta, abstractmethod
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from six import with_metaclass
|
|
||||||
|
|
||||||
from stix2.datastore.filters import Filter, FilterSet
|
from stix2.datastore.filters import Filter, FilterSet
|
||||||
from stix2.utils import deduplicate
|
from stix2.utils import deduplicate
|
||||||
|
|
||||||
|
@ -219,7 +217,7 @@ class DataStoreMixin(object):
|
||||||
raise AttributeError(msg % self.__class__.__name__)
|
raise AttributeError(msg % self.__class__.__name__)
|
||||||
|
|
||||||
|
|
||||||
class DataSink(with_metaclass(ABCMeta)):
|
class DataSink(metaclass=ABCMeta):
|
||||||
"""An implementer will create a concrete subclass from
|
"""An implementer will create a concrete subclass from
|
||||||
this class for the specific DataSink.
|
this class for the specific DataSink.
|
||||||
|
|
||||||
|
@ -245,7 +243,7 @@ class DataSink(with_metaclass(ABCMeta)):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class DataSource(with_metaclass(ABCMeta)):
|
class DataSource(metaclass=ABCMeta):
|
||||||
"""An implementer will create a concrete subclass from
|
"""An implementer will create a concrete subclass from
|
||||||
this class for the specific DataSource.
|
this class for the specific DataSource.
|
||||||
|
|
||||||
|
|
|
@ -6,8 +6,6 @@ import os
|
||||||
import re
|
import re
|
||||||
import stat
|
import stat
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from stix2 import v20, v21
|
from stix2 import v20, v21
|
||||||
from stix2.base import _STIXBase
|
from stix2.base import _STIXBase
|
||||||
from stix2.datastore import (
|
from stix2.datastore import (
|
||||||
|
@ -15,7 +13,7 @@ from stix2.datastore import (
|
||||||
)
|
)
|
||||||
from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
|
from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
|
||||||
from stix2.parsing import parse
|
from stix2.parsing import parse
|
||||||
from stix2.serialization import serialize
|
from stix2.serialization import fp_serialize
|
||||||
from stix2.utils import format_datetime, get_type_from_id, parse_into_datetime
|
from stix2.utils import format_datetime, get_type_from_id, parse_into_datetime
|
||||||
|
|
||||||
|
|
||||||
|
@ -116,7 +114,7 @@ def _update_allow(allow_set, value):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
adding_seq = hasattr(value, "__iter__") and \
|
adding_seq = hasattr(value, "__iter__") and \
|
||||||
not isinstance(value, six.string_types)
|
not isinstance(value, str)
|
||||||
|
|
||||||
if allow_set is None:
|
if allow_set is None:
|
||||||
allow_set = set()
|
allow_set = set()
|
||||||
|
@ -586,9 +584,8 @@ class FileSystemSink(DataSink):
|
||||||
if os.path.isfile(file_path):
|
if os.path.isfile(file_path):
|
||||||
raise DataSourceError("Attempted to overwrite file (!) at: {}".format(file_path))
|
raise DataSourceError("Attempted to overwrite file (!) at: {}".format(file_path))
|
||||||
|
|
||||||
with io.open(file_path, 'w', encoding=encoding) as f:
|
with io.open(file_path, mode='w', encoding=encoding) as f:
|
||||||
stix_obj = serialize(stix_obj, pretty=True, encoding=encoding, ensure_ascii=False)
|
fp_serialize(stix_obj, f, pretty=True, encoding=encoding, ensure_ascii=False)
|
||||||
f.write(stix_obj)
|
|
||||||
|
|
||||||
def add(self, stix_data=None, version=None):
|
def add(self, stix_data=None, version=None):
|
||||||
"""Add STIX objects to file directory.
|
"""Add STIX objects to file directory.
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
import collections
|
import collections
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
import stix2.utils
|
import stix2.utils
|
||||||
|
|
||||||
"""Supported filter operations"""
|
"""Supported filter operations"""
|
||||||
|
@ -12,8 +10,7 @@ FILTER_OPS = ['=', '!=', 'in', '>', '<', '>=', '<=', 'contains']
|
||||||
|
|
||||||
"""Supported filter value types"""
|
"""Supported filter value types"""
|
||||||
FILTER_VALUE_TYPES = (
|
FILTER_VALUE_TYPES = (
|
||||||
bool, dict, float, int, list, tuple, six.string_types,
|
bool, dict, float, int, list, tuple, str, datetime,
|
||||||
datetime,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -84,7 +81,7 @@ class Filter(collections.namedtuple('Filter', ['property', 'op', 'value'])):
|
||||||
# If filtering on a timestamp property and the filter value is a string,
|
# If filtering on a timestamp property and the filter value is a string,
|
||||||
# try to convert the filter value to a datetime instance.
|
# try to convert the filter value to a datetime instance.
|
||||||
if isinstance(stix_obj_property, datetime) and \
|
if isinstance(stix_obj_property, datetime) and \
|
||||||
isinstance(self.value, six.string_types):
|
isinstance(self.value, str):
|
||||||
filter_value = stix2.utils.parse_into_datetime(self.value)
|
filter_value = stix2.utils.parse_into_datetime(self.value)
|
||||||
else:
|
else:
|
||||||
filter_value = self.value
|
filter_value = self.value
|
||||||
|
|
|
@ -12,6 +12,8 @@ from stix2.parsing import parse
|
||||||
from stix2.utils import deduplicate
|
from stix2.utils import deduplicate
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
from taxii2client import v20 as tcv20
|
||||||
|
from taxii2client import v21 as tcv21
|
||||||
from taxii2client.exceptions import ValidationError
|
from taxii2client.exceptions import ValidationError
|
||||||
_taxii2_client = True
|
_taxii2_client = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -33,9 +35,12 @@ class TAXIICollectionStore(DataStoreMixin):
|
||||||
side(retrieving data) and False for TAXIICollectionSink
|
side(retrieving data) and False for TAXIICollectionSink
|
||||||
side(pushing data). However, when parameter is supplied, it will
|
side(pushing data). However, when parameter is supplied, it will
|
||||||
be applied to both TAXIICollectionSource/Sink.
|
be applied to both TAXIICollectionSource/Sink.
|
||||||
|
items_per_page (int): How many STIX objects to request per call
|
||||||
|
to TAXII Server. The value can be tuned, but servers may override
|
||||||
|
if their internal limit is surpassed. Used by TAXIICollectionSource
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, collection, allow_custom=None):
|
def __init__(self, collection, allow_custom=None, items_per_page=5000):
|
||||||
if allow_custom is None:
|
if allow_custom is None:
|
||||||
allow_custom_source = True
|
allow_custom_source = True
|
||||||
allow_custom_sink = False
|
allow_custom_sink = False
|
||||||
|
@ -43,7 +48,7 @@ class TAXIICollectionStore(DataStoreMixin):
|
||||||
allow_custom_sink = allow_custom_source = allow_custom
|
allow_custom_sink = allow_custom_source = allow_custom
|
||||||
|
|
||||||
super(TAXIICollectionStore, self).__init__(
|
super(TAXIICollectionStore, self).__init__(
|
||||||
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source),
|
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source, items_per_page=items_per_page),
|
||||||
sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
|
sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -144,9 +149,12 @@ class TAXIICollectionSource(DataSource):
|
||||||
collection (taxii2.Collection): TAXII Collection instance
|
collection (taxii2.Collection): TAXII Collection instance
|
||||||
allow_custom (bool): Whether to allow custom STIX content to be
|
allow_custom (bool): Whether to allow custom STIX content to be
|
||||||
added to the FileSystemSink. Default: True
|
added to the FileSystemSink. Default: True
|
||||||
|
items_per_page (int): How many STIX objects to request per call
|
||||||
|
to TAXII Server. The value can be tuned, but servers may override
|
||||||
|
if their internal limit is surpassed.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, collection, allow_custom=True):
|
def __init__(self, collection, allow_custom=True, items_per_page=5000):
|
||||||
super(TAXIICollectionSource, self).__init__()
|
super(TAXIICollectionSource, self).__init__()
|
||||||
if not _taxii2_client:
|
if not _taxii2_client:
|
||||||
raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
|
raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
|
||||||
|
@ -167,6 +175,7 @@ class TAXIICollectionSource(DataSource):
|
||||||
)
|
)
|
||||||
|
|
||||||
self.allow_custom = allow_custom
|
self.allow_custom = allow_custom
|
||||||
|
self.items_per_page = items_per_page
|
||||||
|
|
||||||
def get(self, stix_id, version=None, _composite_filters=None):
|
def get(self, stix_id, version=None, _composite_filters=None):
|
||||||
"""Retrieve STIX object from local/remote STIX Collection
|
"""Retrieve STIX object from local/remote STIX Collection
|
||||||
|
@ -286,8 +295,12 @@ class TAXIICollectionSource(DataSource):
|
||||||
taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
|
taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
|
||||||
|
|
||||||
# query TAXII collection
|
# query TAXII collection
|
||||||
|
all_data = []
|
||||||
try:
|
try:
|
||||||
all_data = self.collection.get_objects(**taxii_filters_dict).get('objects', [])
|
paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages
|
||||||
|
|
||||||
|
for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
|
||||||
|
all_data.extend(resource.get("objects", []))
|
||||||
|
|
||||||
# deduplicate data (before filtering as reduces wasted filtering)
|
# deduplicate data (before filtering as reduces wasted filtering)
|
||||||
all_data = deduplicate(all_data)
|
all_data = deduplicate(all_data)
|
||||||
|
|
|
@ -2,18 +2,10 @@
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
from .datastore import CompositeDataSource, DataStoreMixin
|
from .datastore import CompositeDataSource, DataStoreMixin
|
||||||
from .equivalence.graph import graphically_equivalent
|
from .equivalence.graph import graph_equivalence, graph_similarity
|
||||||
from .equivalence.object import ( # noqa: F401
|
from .equivalence.object import object_equivalence, object_similarity
|
||||||
WEIGHTS, check_property_present, custom_pattern_based, exact_match,
|
|
||||||
list_reference_check, partial_external_reference_based, partial_list_based,
|
|
||||||
partial_location_distance, partial_string_based, partial_timestamp_based,
|
|
||||||
reference_check, semantically_equivalent,
|
|
||||||
)
|
|
||||||
from .parsing import parse as _parse
|
from .parsing import parse as _parse
|
||||||
|
|
||||||
# TODO: Remove all unused imports that now belong to the equivalence module in the next major release.
|
|
||||||
# Kept for backwards compatibility.
|
|
||||||
|
|
||||||
|
|
||||||
class ObjectFactory(object):
|
class ObjectFactory(object):
|
||||||
"""Easily create STIX objects with default values for certain properties.
|
"""Easily create STIX objects with default values for certain properties.
|
||||||
|
@ -197,23 +189,37 @@ class Environment(DataStoreMixin):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
def object_similarity(
|
||||||
"""This method verifies if two objects of the same type are
|
obj1, obj2, prop_scores={}, ds1=None, ds2=None,
|
||||||
semantically equivalent.
|
ignore_spec_version=False, versioning_checks=False,
|
||||||
|
max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a measure of how similar the two objects are.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
obj1: A stix2 object instance
|
obj1: A stix2 object instance
|
||||||
obj2: A stix2 object instance
|
obj2: A stix2 object instance
|
||||||
prop_scores: A dictionary that can hold individual property scores,
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
weights, contributing score, matching score and sum of weights.
|
weights, contributing score, matching score and sum of weights.
|
||||||
weight_dict: A dictionary that can be used to override settings
|
ds1 (optional): A DataStore object instance from which to pull related objects
|
||||||
in the semantic equivalence process
|
ds2 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: A number between 0.0 and 100.0 as a measurement of equivalence.
|
float: A number between 0.0 and 100.0 as a measurement of similarity.
|
||||||
|
|
||||||
Warning:
|
Warning:
|
||||||
Object types need to have property weights defined for the equivalence process.
|
Object types need to have property weights defined for the similarity process.
|
||||||
Otherwise, those objects will not influence the final score. The WEIGHTS
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
||||||
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
|
@ -222,21 +228,85 @@ class Environment(DataStoreMixin):
|
||||||
Note:
|
Note:
|
||||||
Default weight_dict:
|
Default weight_dict:
|
||||||
|
|
||||||
.. include:: ../object_default_sem_eq_weights.rst
|
.. include:: ../similarity_weights.rst
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
This implementation follows the Semantic Equivalence Committee Note.
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
see `the Committee Note <link here>`__.
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return semantically_equivalent(obj1, obj2, prop_scores, **weight_dict)
|
return object_similarity(
|
||||||
|
obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version,
|
||||||
|
versioning_checks, max_depth, **weight_dict
|
||||||
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
|
def object_equivalence(
|
||||||
"""This method verifies if two graphs are semantically equivalent.
|
obj1, obj2, prop_scores={}, threshold=70, ds1=None, ds2=None,
|
||||||
|
ignore_spec_version=False, versioning_checks=False,
|
||||||
|
max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a true/false value if two objects are semantically equivalent.
|
||||||
|
Internally, it calls the object_similarity function and compares it against the given
|
||||||
|
threshold value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
obj1: A stix2 object instance
|
||||||
|
obj2: A stix2 object instance
|
||||||
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
|
weights, contributing score, matching score and sum of weights.
|
||||||
|
threshold: A numerical value between 0 and 100 to determine the minimum
|
||||||
|
score to result in successfully calling both objects equivalent. This
|
||||||
|
value can be tuned.
|
||||||
|
ds1 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ds2 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the result of the object similarity is greater than or equal to
|
||||||
|
the threshold value. False otherwise.
|
||||||
|
|
||||||
|
Warning:
|
||||||
|
Object types need to have property weights defined for the similarity process.
|
||||||
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
|
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
||||||
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
|
or methods can be fine tuned for a particular use case.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Default weight_dict:
|
||||||
|
|
||||||
|
.. include:: ../similarity_weights.rst
|
||||||
|
|
||||||
|
Note:
|
||||||
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return object_equivalence(
|
||||||
|
obj1, obj2, prop_scores, threshold, ds1, ds2,
|
||||||
|
ignore_spec_version, versioning_checks, max_depth, **weight_dict
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def graph_similarity(
|
||||||
|
ds1, ds2, prop_scores={}, ignore_spec_version=False,
|
||||||
|
versioning_checks=False, max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a similarity score for two given graphs.
|
||||||
Each DataStore can contain a connected or disconnected graph and the
|
Each DataStore can contain a connected or disconnected graph and the
|
||||||
final result is weighted over the amount of objects we managed to compare.
|
final result is weighted over the amount of objects we managed to compare.
|
||||||
This approach builds on top of the object-based semantic equivalence process
|
This approach builds on top of the object-based similarity process
|
||||||
and each comparison can return a value between 0 and 100.
|
and each comparison can return a value between 0 and 100.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -244,14 +314,23 @@ class Environment(DataStoreMixin):
|
||||||
ds2: A DataStore object instance representing your graph
|
ds2: A DataStore object instance representing your graph
|
||||||
prop_scores: A dictionary that can hold individual property scores,
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
weights, contributing score, matching score and sum of weights.
|
weights, contributing score, matching score and sum of weights.
|
||||||
weight_dict: A dictionary that can be used to override settings
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
in the semantic equivalence process
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: A number between 0.0 and 100.0 as a measurement of equivalence.
|
float: A number between 0.0 and 100.0 as a measurement of similarity.
|
||||||
|
|
||||||
Warning:
|
Warning:
|
||||||
Object types need to have property weights defined for the equivalence process.
|
Object types need to have property weights defined for the similarity process.
|
||||||
Otherwise, those objects will not influence the final score. The WEIGHTS
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
dictionary under `stix2.equivalence.graph` can give you an idea on how to add
|
dictionary under `stix2.equivalence.graph` can give you an idea on how to add
|
||||||
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
|
@ -260,11 +339,70 @@ class Environment(DataStoreMixin):
|
||||||
Note:
|
Note:
|
||||||
Default weight_dict:
|
Default weight_dict:
|
||||||
|
|
||||||
.. include:: ../graph_default_sem_eq_weights.rst
|
.. include:: ../similarity_weights.rst
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
This implementation follows the Semantic Equivalence Committee Note.
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
see `the Committee Note <link here>`__.
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return graphically_equivalent(ds1, ds2, prop_scores, **weight_dict)
|
return graph_similarity(
|
||||||
|
ds1, ds2, prop_scores, ignore_spec_version,
|
||||||
|
versioning_checks, max_depth, **weight_dict
|
||||||
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def graph_equivalence(
|
||||||
|
ds1, ds2, prop_scores={}, threshold=70,
|
||||||
|
ignore_spec_version=False, versioning_checks=False,
|
||||||
|
max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a true/false value if two graphs are semantically equivalent.
|
||||||
|
Internally, it calls the graph_similarity function and compares it against the given
|
||||||
|
threshold value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ds1: A DataStore object instance representing your graph
|
||||||
|
ds2: A DataStore object instance representing your graph
|
||||||
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
|
weights, contributing score, matching score and sum of weights.
|
||||||
|
threshold: A numerical value between 0 and 100 to determine the minimum
|
||||||
|
score to result in successfully calling both graphs equivalent. This
|
||||||
|
value can be tuned.
|
||||||
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the result of the graph similarity is greater than or equal to
|
||||||
|
the threshold value. False otherwise.
|
||||||
|
|
||||||
|
Warning:
|
||||||
|
Object types need to have property weights defined for the similarity process.
|
||||||
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
|
dictionary under `stix2.equivalence.graph` can give you an idea on how to add
|
||||||
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
|
or methods can be fine tuned for a particular use case.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Default weight_dict:
|
||||||
|
|
||||||
|
.. include:: ../similarity_weights.rst
|
||||||
|
|
||||||
|
Note:
|
||||||
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
|
"""
|
||||||
|
return graph_equivalence(
|
||||||
|
ds1, ds2, prop_scores, threshold, ignore_spec_version,
|
||||||
|
versioning_checks, max_depth, **weight_dict
|
||||||
|
)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
"""Python APIs for STIX 2 Semantic Equivalence.
|
"""Python APIs for STIX 2 Semantic Equivalence and Similarity.
|
||||||
|
|
||||||
.. autosummary::
|
.. autosummary::
|
||||||
:toctree: equivalence
|
:toctree: equivalence
|
||||||
|
|
|
@ -1,19 +1,80 @@
|
||||||
"""Python APIs for STIX 2 Graph-based Semantic Equivalence."""
|
"""Python APIs for STIX 2 Graph-based Semantic Equivalence and Similarity."""
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from ..object import (
|
from ..object import (
|
||||||
WEIGHTS, exact_match, list_reference_check, partial_string_based,
|
WEIGHTS, _bucket_per_type, _object_pairs, object_similarity,
|
||||||
partial_timestamp_based, reference_check, semantically_equivalent,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
|
def graph_equivalence(
|
||||||
"""This method verifies if two graphs are semantically equivalent.
|
ds1, ds2, prop_scores={}, threshold=70,
|
||||||
|
ignore_spec_version=False, versioning_checks=False,
|
||||||
|
max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a true/false value if two graphs are semantically equivalent.
|
||||||
|
Internally, it calls the graph_similarity function and compares it against the given
|
||||||
|
threshold value.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ds1: A DataStore object instance representing your graph
|
||||||
|
ds2: A DataStore object instance representing your graph
|
||||||
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
|
weights, contributing score, matching score and sum of weights.
|
||||||
|
threshold: A numerical value between 0 and 100 to determine the minimum
|
||||||
|
score to result in successfully calling both graphs equivalent. This
|
||||||
|
value can be tuned.
|
||||||
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
bool: True if the result of the graph similarity is greater than or equal to
|
||||||
|
the threshold value. False otherwise.
|
||||||
|
|
||||||
|
Warning:
|
||||||
|
Object types need to have property weights defined for the similarity process.
|
||||||
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
|
dictionary under `stix2.equivalence.graph` can give you an idea on how to add
|
||||||
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
|
or methods can be fine tuned for a particular use case.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Default weight_dict:
|
||||||
|
|
||||||
|
.. include:: ../../similarity_weights.rst
|
||||||
|
|
||||||
|
Note:
|
||||||
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
|
"""
|
||||||
|
similarity_result = graph_similarity(
|
||||||
|
ds1, ds2, prop_scores, ignore_spec_version,
|
||||||
|
versioning_checks, max_depth, **weight_dict
|
||||||
|
)
|
||||||
|
if similarity_result >= threshold:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def graph_similarity(
|
||||||
|
ds1, ds2, prop_scores={}, ignore_spec_version=False,
|
||||||
|
versioning_checks=False, max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a similarity score for two given graphs.
|
||||||
Each DataStore can contain a connected or disconnected graph and the
|
Each DataStore can contain a connected or disconnected graph and the
|
||||||
final result is weighted over the amount of objects we managed to compare.
|
final result is weighted over the amount of objects we managed to compare.
|
||||||
This approach builds on top of the object-based semantic equivalence process
|
This approach builds on top of the object-based similarity process
|
||||||
and each comparison can return a value between 0 and 100.
|
and each comparison can return a value between 0 and 100.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -21,117 +82,97 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
|
||||||
ds2: A DataStore object instance representing your graph
|
ds2: A DataStore object instance representing your graph
|
||||||
prop_scores: A dictionary that can hold individual property scores,
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
weights, contributing score, matching score and sum of weights.
|
weights, contributing score, matching score and sum of weights.
|
||||||
weight_dict: A dictionary that can be used to override settings
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
in the semantic equivalence process
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: A number between 0.0 and 100.0 as a measurement of equivalence.
|
float: A number between 0.0 and 100.0 as a measurement of similarity.
|
||||||
|
|
||||||
Warning:
|
Warning:
|
||||||
Object types need to have property weights defined for the equivalence process.
|
Object types need to have property weights defined for the similarity process.
|
||||||
Otherwise, those objects will not influence the final score. The WEIGHTS
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
dictionary under `stix2.equivalence.graph` can give you an idea on how to add
|
dictionary under `stix2.equivalence.graph` can give you an idea on how to add
|
||||||
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
or methods can be fine tuned for a particular use case.
|
or methods can be fine tuned for a particular use case.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
Default weights_dict:
|
Default weight_dict:
|
||||||
|
|
||||||
.. include:: ../../graph_default_sem_eq_weights.rst
|
.. include:: ../../similarity_weights.rst
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
This implementation follows the Semantic Equivalence Committee Note.
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
see `the Committee Note <link here>`__.
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
weights = GRAPH_WEIGHTS.copy()
|
results = {}
|
||||||
|
similarity_score = 0
|
||||||
|
weights = WEIGHTS.copy()
|
||||||
|
|
||||||
if weight_dict:
|
if weight_dict:
|
||||||
weights.update(weight_dict)
|
weights.update(weight_dict)
|
||||||
|
|
||||||
results = {}
|
weights["_internal"] = {
|
||||||
depth = weights["_internal"]["max_depth"]
|
"ignore_spec_version": ignore_spec_version,
|
||||||
|
"versioning_checks": versioning_checks,
|
||||||
|
"ds1": ds1,
|
||||||
|
"ds2": ds2,
|
||||||
|
"max_depth": max_depth,
|
||||||
|
}
|
||||||
|
|
||||||
graph1 = ds1.query([])
|
if max_depth <= 0:
|
||||||
graph2 = ds2.query([])
|
raise ValueError("'max_depth' must be greater than 0")
|
||||||
|
|
||||||
graph1.sort(key=lambda x: x["type"])
|
pairs = _object_pairs(
|
||||||
graph2.sort(key=lambda x: x["type"])
|
_bucket_per_type(ds1.query([])),
|
||||||
|
_bucket_per_type(ds2.query([])),
|
||||||
|
weights,
|
||||||
|
)
|
||||||
|
|
||||||
if len(graph1) < len(graph2):
|
logger.debug("Starting graph similarity process between DataStores: '%s' and '%s'", ds1.id, ds2.id)
|
||||||
weights["_internal"]["ds1"] = ds1
|
for object1, object2 in pairs:
|
||||||
weights["_internal"]["ds2"] = ds2
|
iprop_score = {}
|
||||||
g1 = graph1
|
object1_id = object1["id"]
|
||||||
g2 = graph2
|
object2_id = object2["id"]
|
||||||
else:
|
|
||||||
weights["_internal"]["ds1"] = ds2
|
|
||||||
weights["_internal"]["ds2"] = ds1
|
|
||||||
g1 = graph2
|
|
||||||
g2 = graph1
|
|
||||||
|
|
||||||
for object1 in g1:
|
result = object_similarity(
|
||||||
for object2 in g2:
|
object1, object2, iprop_score, ds1, ds2,
|
||||||
if object1["type"] == object2["type"] and object1["type"] in weights:
|
ignore_spec_version, versioning_checks,
|
||||||
iprop_score = {}
|
max_depth, **weights
|
||||||
result = semantically_equivalent(object1, object2, iprop_score, **weights)
|
)
|
||||||
objects1_id = object1["id"]
|
|
||||||
weights["_internal"]["max_depth"] = depth
|
|
||||||
|
|
||||||
if objects1_id not in results:
|
if object1_id not in results:
|
||||||
results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result}
|
results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
|
||||||
elif result > results[objects1_id]["value"]:
|
elif result > results[object1_id]["value"]:
|
||||||
results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result}
|
results[object1_id] = {"lhs": object1_id, "rhs": object2_id, "prop_score": iprop_score, "value": result}
|
||||||
|
|
||||||
|
if object2_id not in results:
|
||||||
|
results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score, "value": result}
|
||||||
|
elif result > results[object2_id]["value"]:
|
||||||
|
results[object2_id] = {"lhs": object2_id, "rhs": object1_id, "prop_score": iprop_score, "value": result}
|
||||||
|
|
||||||
equivalence_score = 0
|
|
||||||
matching_score = sum(x["value"] for x in results.values())
|
matching_score = sum(x["value"] for x in results.values())
|
||||||
sum_weights = len(results) * 100.0
|
len_pairs = len(results)
|
||||||
if sum_weights > 0:
|
if len_pairs > 0:
|
||||||
equivalence_score = (matching_score / sum_weights) * 100
|
similarity_score = matching_score / len_pairs
|
||||||
|
|
||||||
prop_scores["matching_score"] = matching_score
|
prop_scores["matching_score"] = matching_score
|
||||||
prop_scores["sum_weights"] = sum_weights
|
prop_scores["len_pairs"] = len_pairs
|
||||||
prop_scores["summary"] = results
|
prop_scores["summary"] = results
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"DONE\t\tSUM_WEIGHT: %.2f\tMATCHING_SCORE: %.2f\t SCORE: %.2f",
|
"DONE\t\tLEN_PAIRS: %.2f\tMATCHING_SCORE: %.2f\t SIMILARITY_SCORE: %.2f",
|
||||||
sum_weights,
|
len_pairs,
|
||||||
matching_score,
|
matching_score,
|
||||||
equivalence_score,
|
similarity_score,
|
||||||
)
|
)
|
||||||
return equivalence_score
|
return similarity_score
|
||||||
|
|
||||||
|
|
||||||
# default weights used for the graph semantic equivalence process
|
|
||||||
GRAPH_WEIGHTS = WEIGHTS.copy()
|
|
||||||
GRAPH_WEIGHTS.update({
|
|
||||||
"grouping": {
|
|
||||||
"name": (20, partial_string_based),
|
|
||||||
"context": (20, partial_string_based),
|
|
||||||
"object_refs": (60, list_reference_check),
|
|
||||||
},
|
|
||||||
"relationship": {
|
|
||||||
"relationship_type": (20, exact_match),
|
|
||||||
"source_ref": (40, reference_check),
|
|
||||||
"target_ref": (40, reference_check),
|
|
||||||
},
|
|
||||||
"report": {
|
|
||||||
"name": (30, partial_string_based),
|
|
||||||
"published": (10, partial_timestamp_based),
|
|
||||||
"object_refs": (60, list_reference_check),
|
|
||||||
"tdelta": 1, # One day interval
|
|
||||||
},
|
|
||||||
"sighting": {
|
|
||||||
"first_seen": (5, partial_timestamp_based),
|
|
||||||
"last_seen": (5, partial_timestamp_based),
|
|
||||||
"sighting_of_ref": (40, reference_check),
|
|
||||||
"observed_data_refs": (20, list_reference_check),
|
|
||||||
"where_sighted_refs": (20, list_reference_check),
|
|
||||||
"summary": (10, exact_match),
|
|
||||||
},
|
|
||||||
"_internal": {
|
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": False,
|
|
||||||
"ds1": None,
|
|
||||||
"ds2": None,
|
|
||||||
"max_depth": 1,
|
|
||||||
},
|
|
||||||
}) # :autodoc-skip:
|
|
||||||
|
|
|
@ -1,40 +1,118 @@
|
||||||
"""Python APIs for STIX 2 Object-based Semantic Equivalence."""
|
"""Python APIs for STIX 2 Object-based Semantic Equivalence and Similarity."""
|
||||||
|
import collections
|
||||||
|
import itertools
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from ...datastore import Filter
|
from ...datastore import DataSource, DataStoreMixin, Filter
|
||||||
from ...utils import STIXdatetime, parse_into_datetime
|
from ...utils import STIXdatetime, parse_into_datetime
|
||||||
from ..pattern import equivalent_patterns
|
from ..pattern import equivalent_patterns
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
def object_equivalence(
|
||||||
"""This method verifies if two objects of the same type are
|
obj1, obj2, prop_scores={}, threshold=70, ds1=None,
|
||||||
semantically equivalent.
|
ds2=None, ignore_spec_version=False,
|
||||||
|
versioning_checks=False, max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a true/false value if two objects are semantically equivalent.
|
||||||
|
Internally, it calls the object_similarity function and compares it against the given
|
||||||
|
threshold value.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
obj1: A stix2 object instance
|
obj1: A stix2 object instance
|
||||||
obj2: A stix2 object instance
|
obj2: A stix2 object instance
|
||||||
prop_scores: A dictionary that can hold individual property scores,
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
weights, contributing score, matching score and sum of weights.
|
weights, contributing score, matching score and sum of weights.
|
||||||
weight_dict: A dictionary that can be used to override settings
|
threshold: A numerical value between 0 and 100 to determine the minimum
|
||||||
in the semantic equivalence process
|
score to result in successfully calling both objects equivalent. This
|
||||||
|
value can be tuned.
|
||||||
|
ds1 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ds2 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: A number between 0.0 and 100.0 as a measurement of equivalence.
|
bool: True if the result of the object similarity is greater than or equal to
|
||||||
|
the threshold value. False otherwise.
|
||||||
|
|
||||||
Warning:
|
Warning:
|
||||||
Object types need to have property weights defined for the equivalence process.
|
Object types need to have property weights defined for the similarity process.
|
||||||
Otherwise, those objects will not influence the final score. The WEIGHTS
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
||||||
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
or methods can be fine tuned for a particular use case.
|
or methods can be fine tuned for a particular use case.
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
Default weights_dict:
|
Default weight_dict:
|
||||||
|
|
||||||
.. include:: ../../object_default_sem_eq_weights.rst
|
.. include:: ../../similarity_weights.rst
|
||||||
|
|
||||||
|
Note:
|
||||||
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
|
see `the Committee Note <link here>`__.
|
||||||
|
|
||||||
|
"""
|
||||||
|
similarity_result = object_similarity(
|
||||||
|
obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version,
|
||||||
|
versioning_checks, max_depth, **weight_dict
|
||||||
|
)
|
||||||
|
if similarity_result >= threshold:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def object_similarity(
|
||||||
|
obj1, obj2, prop_scores={}, ds1=None, ds2=None,
|
||||||
|
ignore_spec_version=False, versioning_checks=False,
|
||||||
|
max_depth=1, **weight_dict
|
||||||
|
):
|
||||||
|
"""This method returns a measure of similarity depending on how
|
||||||
|
similar the two objects are.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
obj1: A stix2 object instance
|
||||||
|
obj2: A stix2 object instance
|
||||||
|
prop_scores: A dictionary that can hold individual property scores,
|
||||||
|
weights, contributing score, matching score and sum of weights.
|
||||||
|
ds1 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ds2 (optional): A DataStore object instance from which to pull related objects
|
||||||
|
ignore_spec_version: A boolean indicating whether to test object types
|
||||||
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
||||||
|
If set to True this check will be skipped.
|
||||||
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
||||||
|
of the same object (when present) to maximize similarity against a
|
||||||
|
particular version. If set to True the algorithm will perform this step.
|
||||||
|
max_depth: A positive integer indicating the maximum recursion depth the
|
||||||
|
algorithm can reach when de-referencing objects and performing the
|
||||||
|
object_similarity algorithm.
|
||||||
|
weight_dict: A dictionary that can be used to override what checks are done
|
||||||
|
to objects in the similarity process.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
float: A number between 0.0 and 100.0 as a measurement of similarity.
|
||||||
|
|
||||||
|
Warning:
|
||||||
|
Object types need to have property weights defined for the similarity process.
|
||||||
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
||||||
|
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
||||||
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
||||||
|
or methods can be fine tuned for a particular use case.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
Default weight_dict:
|
||||||
|
|
||||||
|
.. include:: ../../similarity_weights.rst
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
This implementation follows the Semantic Equivalence Committee Note.
|
This implementation follows the Semantic Equivalence Committee Note.
|
||||||
|
@ -46,8 +124,15 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
||||||
if weight_dict:
|
if weight_dict:
|
||||||
weights.update(weight_dict)
|
weights.update(weight_dict)
|
||||||
|
|
||||||
|
weights["_internal"] = {
|
||||||
|
"ignore_spec_version": ignore_spec_version,
|
||||||
|
"versioning_checks": versioning_checks,
|
||||||
|
"ds1": ds1,
|
||||||
|
"ds2": ds2,
|
||||||
|
"max_depth": max_depth,
|
||||||
|
}
|
||||||
|
|
||||||
type1, type2 = obj1["type"], obj2["type"]
|
type1, type2 = obj1["type"], obj2["type"]
|
||||||
ignore_spec_version = weights["_internal"]["ignore_spec_version"]
|
|
||||||
|
|
||||||
if type1 != type2:
|
if type1 != type2:
|
||||||
raise ValueError('The objects to compare must be of the same type!')
|
raise ValueError('The objects to compare must be of the same type!')
|
||||||
|
@ -58,13 +143,13 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
||||||
try:
|
try:
|
||||||
weights[type1]
|
weights[type1]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1)
|
logger.warning("'%s' type has no 'weights' dict specified & thus no object similarity method to call!", type1)
|
||||||
sum_weights = matching_score = 0
|
sum_weights = matching_score = 0
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
method = weights[type1]["method"]
|
method = weights[type1]["method"]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||||
matching_score = 0.0
|
matching_score = 0.0
|
||||||
sum_weights = 0.0
|
sum_weights = 0.0
|
||||||
|
|
||||||
|
@ -72,6 +157,7 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
||||||
if check_property_present(prop, obj1, obj2):
|
if check_property_present(prop, obj1, obj2):
|
||||||
w = weights[type1][prop][0]
|
w = weights[type1][prop][0]
|
||||||
comp_funct = weights[type1][prop][1]
|
comp_funct = weights[type1][prop][1]
|
||||||
|
prop_scores[prop] = {}
|
||||||
|
|
||||||
if comp_funct == partial_timestamp_based:
|
if comp_funct == partial_timestamp_based:
|
||||||
contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
|
||||||
|
@ -79,30 +165,36 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
||||||
threshold = weights[type1]["threshold"]
|
threshold = weights[type1]["threshold"]
|
||||||
contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
|
contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
|
||||||
elif comp_funct == reference_check or comp_funct == list_reference_check:
|
elif comp_funct == reference_check or comp_funct == list_reference_check:
|
||||||
max_depth = weights["_internal"]["max_depth"]
|
if max_depth > 0:
|
||||||
if max_depth < 0:
|
weights["_internal"]["max_depth"] = max_depth - 1
|
||||||
continue # prevent excessive recursion
|
ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
|
||||||
|
if _datastore_check(ds1, ds2):
|
||||||
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
|
||||||
|
elif comp_funct == reference_check:
|
||||||
|
comp_funct = exact_match
|
||||||
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
||||||
|
elif comp_funct == list_reference_check:
|
||||||
|
comp_funct = partial_list_based
|
||||||
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
||||||
|
prop_scores[prop]["check_type"] = comp_funct.__name__
|
||||||
else:
|
else:
|
||||||
weights["_internal"]["max_depth"] -= 1
|
continue # prevent excessive recursion
|
||||||
ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
|
weights["_internal"]["max_depth"] = max_depth
|
||||||
contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
|
|
||||||
else:
|
else:
|
||||||
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
||||||
|
|
||||||
sum_weights += w
|
sum_weights += w
|
||||||
matching_score += contributing_score
|
matching_score += contributing_score
|
||||||
|
|
||||||
prop_scores[prop] = {
|
prop_scores[prop]["weight"] = w
|
||||||
"weight": w,
|
prop_scores[prop]["contributing_score"] = contributing_score
|
||||||
"contributing_score": contributing_score,
|
|
||||||
}
|
|
||||||
logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
|
logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
|
||||||
|
|
||||||
prop_scores["matching_score"] = matching_score
|
prop_scores["matching_score"] = matching_score
|
||||||
prop_scores["sum_weights"] = sum_weights
|
prop_scores["sum_weights"] = sum_weights
|
||||||
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
|
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
|
||||||
else:
|
else:
|
||||||
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||||
try:
|
try:
|
||||||
matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
|
matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
|
||||||
except TypeError:
|
except TypeError:
|
||||||
|
@ -119,7 +211,7 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
|
||||||
def check_property_present(prop, obj1, obj2):
|
def check_property_present(prop, obj1, obj2):
|
||||||
"""Helper method checks if a property is present on both objects."""
|
"""Helper method checks if a property is present on both objects."""
|
||||||
if prop == "longitude_latitude":
|
if prop == "longitude_latitude":
|
||||||
if all(x in obj1 and x in obj2 for x in ['latitude', 'longitude']):
|
if all(x in obj1 and x in obj2 for x in ('latitude', 'longitude')):
|
||||||
return True
|
return True
|
||||||
elif prop in obj1 and prop in obj2:
|
elif prop in obj1 and prop in obj2:
|
||||||
return True
|
return True
|
||||||
|
@ -150,7 +242,9 @@ def partial_timestamp_based(t1, t2, tdelta):
|
||||||
|
|
||||||
|
|
||||||
def partial_list_based(l1, l2):
|
def partial_list_based(l1, l2):
|
||||||
"""Performs a partial list matching via finding the intersection between common values.
|
"""Performs a partial list matching via finding the intersection between
|
||||||
|
common values. Repeated values are counted only once. This method can be
|
||||||
|
used for *_refs equality checks when de-reference is not possible.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
l1: A list of values.
|
l1: A list of values.
|
||||||
|
@ -167,7 +261,8 @@ def partial_list_based(l1, l2):
|
||||||
|
|
||||||
|
|
||||||
def exact_match(val1, val2):
|
def exact_match(val1, val2):
|
||||||
"""Performs an exact value match based on two values
|
"""Performs an exact value match based on two values. This method can be
|
||||||
|
used for *_ref equality check when de-reference is not possible.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
val1: A value suitable for an equality test.
|
val1: A value suitable for an equality test.
|
||||||
|
@ -215,12 +310,12 @@ def custom_pattern_based(pattern1, pattern2):
|
||||||
return equivalent_patterns(pattern1, pattern2)
|
return equivalent_patterns(pattern1, pattern2)
|
||||||
|
|
||||||
|
|
||||||
def partial_external_reference_based(refs1, refs2):
|
def partial_external_reference_based(ext_refs1, ext_refs2):
|
||||||
"""Performs a matching on External References.
|
"""Performs a matching on External References.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
refs1: A list of external references.
|
ext_refs1: A list of external references.
|
||||||
refs2: A list of external references.
|
ext_refs2: A list of external references.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
float: Number between 0.0 and 1.0 depending on matches.
|
float: Number between 0.0 and 1.0 depending on matches.
|
||||||
|
@ -229,51 +324,47 @@ def partial_external_reference_based(refs1, refs2):
|
||||||
allowed = {"veris", "cve", "capec", "mitre-attack"}
|
allowed = {"veris", "cve", "capec", "mitre-attack"}
|
||||||
matches = 0
|
matches = 0
|
||||||
|
|
||||||
if len(refs1) >= len(refs2):
|
ref_pairs = itertools.chain(
|
||||||
l1 = refs1
|
itertools.product(ext_refs1, ext_refs2),
|
||||||
l2 = refs2
|
)
|
||||||
else:
|
|
||||||
l1 = refs2
|
|
||||||
l2 = refs1
|
|
||||||
|
|
||||||
for ext_ref1 in l1:
|
for ext_ref1, ext_ref2 in ref_pairs:
|
||||||
for ext_ref2 in l2:
|
sn_match = False
|
||||||
sn_match = False
|
ei_match = False
|
||||||
ei_match = False
|
url_match = False
|
||||||
url_match = False
|
source_name = None
|
||||||
source_name = None
|
|
||||||
|
|
||||||
if check_property_present("source_name", ext_ref1, ext_ref2):
|
if check_property_present("source_name", ext_ref1, ext_ref2):
|
||||||
if ext_ref1["source_name"] == ext_ref2["source_name"]:
|
if ext_ref1["source_name"] == ext_ref2["source_name"]:
|
||||||
source_name = ext_ref1["source_name"]
|
source_name = ext_ref1["source_name"]
|
||||||
sn_match = True
|
sn_match = True
|
||||||
if check_property_present("external_id", ext_ref1, ext_ref2):
|
if check_property_present("external_id", ext_ref1, ext_ref2):
|
||||||
if ext_ref1["external_id"] == ext_ref2["external_id"]:
|
if ext_ref1["external_id"] == ext_ref2["external_id"]:
|
||||||
ei_match = True
|
ei_match = True
|
||||||
if check_property_present("url", ext_ref1, ext_ref2):
|
if check_property_present("url", ext_ref1, ext_ref2):
|
||||||
if ext_ref1["url"] == ext_ref2["url"]:
|
if ext_ref1["url"] == ext_ref2["url"]:
|
||||||
url_match = True
|
url_match = True
|
||||||
|
|
||||||
# Special case: if source_name is a STIX defined name and either
|
# Special case: if source_name is a STIX defined name and either
|
||||||
# external_id or url match then its a perfect match and other entries
|
# external_id or url match then its a perfect match and other entries
|
||||||
# can be ignored.
|
# can be ignored.
|
||||||
if sn_match and (ei_match or url_match) and source_name in allowed:
|
if sn_match and (ei_match or url_match) and source_name in allowed:
|
||||||
result = 1.0
|
result = 1.0
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
|
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
|
||||||
refs1, refs2, result,
|
ext_refs1, ext_refs2, result,
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Regular check. If the source_name (not STIX-defined) or external_id or
|
# Regular check. If the source_name (not STIX-defined) or external_id or
|
||||||
# url matches then we consider the entry a match.
|
# url matches then we consider the entry a match.
|
||||||
if (sn_match or ei_match or url_match) and source_name not in allowed:
|
if (sn_match or ei_match or url_match) and source_name not in allowed:
|
||||||
matches += 1
|
matches += 1
|
||||||
|
|
||||||
result = matches / max(len(refs1), len(refs2))
|
result = matches / max(len(ext_refs1), len(ext_refs2))
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
|
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
|
||||||
refs1, refs2, result,
|
ext_refs1, ext_refs2, result,
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -304,19 +395,30 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold):
|
||||||
|
|
||||||
def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
|
def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
|
||||||
"""Checks multiple object versions if present in graph.
|
"""Checks multiple object versions if present in graph.
|
||||||
Maximizes for the semantic equivalence score of a particular version."""
|
Maximizes for the similarity score of a particular version."""
|
||||||
results = {}
|
results = {}
|
||||||
objects1 = ds1.query([Filter("id", "=", ref1)])
|
|
||||||
objects2 = ds2.query([Filter("id", "=", ref2)])
|
|
||||||
|
|
||||||
if len(objects1) > 0 and len(objects2) > 0:
|
pairs = _object_pairs(
|
||||||
for o1 in objects1:
|
_bucket_per_type(ds1.query([Filter("id", "=", ref1)])),
|
||||||
for o2 in objects2:
|
_bucket_per_type(ds2.query([Filter("id", "=", ref2)])),
|
||||||
result = semantically_equivalent(o1, o2, **weights)
|
weights,
|
||||||
if ref1 not in results:
|
)
|
||||||
results[ref1] = {"matched": ref2, "value": result}
|
ignore_spec_version = weights["_internal"]["ignore_spec_version"]
|
||||||
elif result > results[ref1]["value"]:
|
versioning_checks = weights["_internal"]["versioning_checks"]
|
||||||
results[ref1] = {"matched": ref2, "value": result}
|
max_depth = weights["_internal"]["max_depth"]
|
||||||
|
|
||||||
|
for object1, object2 in pairs:
|
||||||
|
result = object_similarity(
|
||||||
|
object1, object2, ds1=ds1, ds2=ds2,
|
||||||
|
ignore_spec_version=ignore_spec_version,
|
||||||
|
versioning_checks=versioning_checks,
|
||||||
|
max_depth=max_depth, **weights,
|
||||||
|
)
|
||||||
|
if ref1 not in results:
|
||||||
|
results[ref1] = {"matched": ref2, "value": result}
|
||||||
|
elif result > results[ref1]["value"]:
|
||||||
|
results[ref1] = {"matched": ref2, "value": result}
|
||||||
|
|
||||||
result = results.get(ref1, {}).get("value", 0.0)
|
result = results.get(ref1, {}).get("value", 0.0)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"--\t\t_versioned_checks '%s' '%s'\tresult: '%s'",
|
"--\t\t_versioned_checks '%s' '%s'\tresult: '%s'",
|
||||||
|
@ -326,18 +428,26 @@ def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
|
||||||
|
|
||||||
|
|
||||||
def reference_check(ref1, ref2, ds1, ds2, **weights):
|
def reference_check(ref1, ref2, ds1, ds2, **weights):
|
||||||
"""For two references, de-reference the object and perform object-based
|
"""For two references, de-reference the object and perform object_similarity.
|
||||||
semantic equivalence. The score influences the result of an edge check."""
|
The score influences the result of an edge check."""
|
||||||
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
||||||
result = 0.0
|
result = 0.0
|
||||||
|
|
||||||
if type1 == type2:
|
if type1 == type2 and type1 in weights:
|
||||||
if weights["_internal"]["versioning_checks"]:
|
ignore_spec_version = weights["_internal"]["ignore_spec_version"]
|
||||||
|
versioning_checks = weights["_internal"]["versioning_checks"]
|
||||||
|
max_depth = weights["_internal"]["max_depth"]
|
||||||
|
if versioning_checks:
|
||||||
result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0
|
result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0
|
||||||
else:
|
else:
|
||||||
o1, o2 = ds1.get(ref1), ds2.get(ref2)
|
o1, o2 = ds1.get(ref1), ds2.get(ref2)
|
||||||
if o1 and o2:
|
if o1 and o2:
|
||||||
result = semantically_equivalent(o1, o2, **weights) / 100.0
|
result = object_similarity(
|
||||||
|
o1, o2, ds1=ds1, ds2=ds2,
|
||||||
|
ignore_spec_version=ignore_spec_version,
|
||||||
|
versioning_checks=versioning_checks,
|
||||||
|
max_depth=max_depth, **weights,
|
||||||
|
) / 100.0
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"--\t\treference_check '%s' '%s'\tresult: '%s'",
|
"--\t\treference_check '%s' '%s'\tresult: '%s'",
|
||||||
|
@ -348,38 +458,35 @@ def reference_check(ref1, ref2, ds1, ds2, **weights):
|
||||||
|
|
||||||
def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
||||||
"""For objects that contain multiple references (i.e., object_refs) perform
|
"""For objects that contain multiple references (i.e., object_refs) perform
|
||||||
the same de-reference procedure and perform object-based semantic equivalence.
|
the same de-reference procedure and perform object_similarity.
|
||||||
The score influences the objects containing these references. The result is
|
The score influences the objects containing these references. The result is
|
||||||
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
||||||
results = {}
|
results = {}
|
||||||
if len(refs1) >= len(refs2):
|
|
||||||
l1 = refs1
|
|
||||||
l2 = refs2
|
|
||||||
b1 = ds1
|
|
||||||
b2 = ds2
|
|
||||||
else:
|
|
||||||
l1 = refs2
|
|
||||||
l2 = refs1
|
|
||||||
b1 = ds2
|
|
||||||
b2 = ds1
|
|
||||||
|
|
||||||
l1.sort()
|
pairs = _object_pairs(
|
||||||
l2.sort()
|
_bucket_per_type(refs1, "id-split"),
|
||||||
|
_bucket_per_type(refs2, "id-split"),
|
||||||
|
weights,
|
||||||
|
)
|
||||||
|
|
||||||
for ref1 in l1:
|
for ref1, ref2 in pairs:
|
||||||
for ref2 in l2:
|
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
||||||
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
if type1 == type2:
|
||||||
if type1 == type2:
|
score = reference_check(ref1, ref2, ds1, ds2, **weights)
|
||||||
score = reference_check(ref1, ref2, b1, b2, **weights) * 100.0
|
|
||||||
|
|
||||||
if ref1 not in results:
|
if ref1 not in results:
|
||||||
results[ref1] = {"matched": ref2, "value": score}
|
results[ref1] = {"matched": ref2, "value": score}
|
||||||
elif score > results[ref1]["value"]:
|
elif score > results[ref1]["value"]:
|
||||||
results[ref1] = {"matched": ref2, "value": score}
|
results[ref1] = {"matched": ref2, "value": score}
|
||||||
|
|
||||||
|
if ref2 not in results:
|
||||||
|
results[ref2] = {"matched": ref1, "value": score}
|
||||||
|
elif score > results[ref2]["value"]:
|
||||||
|
results[ref2] = {"matched": ref1, "value": score}
|
||||||
|
|
||||||
result = 0.0
|
result = 0.0
|
||||||
total_sum = sum(x["value"] for x in results.values())
|
total_sum = sum(x["value"] for x in results.values())
|
||||||
max_score = len(results) * 100.0
|
max_score = len(results)
|
||||||
|
|
||||||
if max_score > 0:
|
if max_score > 0:
|
||||||
result = total_sum / max_score
|
result = total_sum / max_score
|
||||||
|
@ -391,7 +498,43 @@ def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
# default weights used for the semantic equivalence process
|
def _datastore_check(ds1, ds2):
|
||||||
|
if (
|
||||||
|
issubclass(ds1.__class__, (DataStoreMixin, DataSource)) or
|
||||||
|
issubclass(ds2.__class__, (DataStoreMixin, DataSource))
|
||||||
|
):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def _bucket_per_type(graph, mode="type"):
|
||||||
|
"""Given a list of objects or references, bucket them by type.
|
||||||
|
Depending on the list type: extract from 'type' property or using
|
||||||
|
the 'id'.
|
||||||
|
"""
|
||||||
|
buckets = collections.defaultdict(list)
|
||||||
|
if mode == "type":
|
||||||
|
[buckets[obj["type"]].append(obj) for obj in graph]
|
||||||
|
elif mode == "id-split":
|
||||||
|
[buckets[obj.split("--")[0]].append(obj) for obj in graph]
|
||||||
|
return buckets
|
||||||
|
|
||||||
|
|
||||||
|
def _object_pairs(graph1, graph2, weights):
|
||||||
|
"""Returns a generator with the product of the comparable
|
||||||
|
objects for the graph similarity process. It determines
|
||||||
|
objects in common between graphs and objects with weights.
|
||||||
|
"""
|
||||||
|
types_in_common = set(graph1.keys()).intersection(graph2.keys())
|
||||||
|
testable_types = types_in_common.intersection(weights.keys())
|
||||||
|
|
||||||
|
return itertools.chain.from_iterable(
|
||||||
|
itertools.product(graph1[stix_type], graph2[stix_type])
|
||||||
|
for stix_type in testable_types
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# default weights used for the similarity process
|
||||||
WEIGHTS = {
|
WEIGHTS = {
|
||||||
"attack-pattern": {
|
"attack-pattern": {
|
||||||
"name": (30, partial_string_based),
|
"name": (30, partial_string_based),
|
||||||
|
@ -405,11 +548,20 @@ WEIGHTS = {
|
||||||
"name": (60, partial_string_based),
|
"name": (60, partial_string_based),
|
||||||
"external_references": (40, partial_external_reference_based),
|
"external_references": (40, partial_external_reference_based),
|
||||||
},
|
},
|
||||||
|
"grouping": {
|
||||||
|
"name": (20, partial_string_based),
|
||||||
|
"context": (20, partial_string_based),
|
||||||
|
"object_refs": (60, list_reference_check),
|
||||||
|
},
|
||||||
"identity": {
|
"identity": {
|
||||||
"name": (60, partial_string_based),
|
"name": (60, partial_string_based),
|
||||||
"identity_class": (20, exact_match),
|
"identity_class": (20, exact_match),
|
||||||
"sectors": (20, partial_list_based),
|
"sectors": (20, partial_list_based),
|
||||||
},
|
},
|
||||||
|
"incident": {
|
||||||
|
"name": (30, partial_string_based),
|
||||||
|
"external_references": (70, partial_external_reference_based),
|
||||||
|
},
|
||||||
"indicator": {
|
"indicator": {
|
||||||
"indicator_types": (15, partial_list_based),
|
"indicator_types": (15, partial_list_based),
|
||||||
"pattern": (80, custom_pattern_based),
|
"pattern": (80, custom_pattern_based),
|
||||||
|
@ -436,6 +588,25 @@ WEIGHTS = {
|
||||||
"definition": (60, exact_match),
|
"definition": (60, exact_match),
|
||||||
"definition_type": (20, exact_match),
|
"definition_type": (20, exact_match),
|
||||||
},
|
},
|
||||||
|
"relationship": {
|
||||||
|
"relationship_type": (20, exact_match),
|
||||||
|
"source_ref": (40, reference_check),
|
||||||
|
"target_ref": (40, reference_check),
|
||||||
|
},
|
||||||
|
"report": {
|
||||||
|
"name": (30, partial_string_based),
|
||||||
|
"published": (10, partial_timestamp_based),
|
||||||
|
"object_refs": (60, list_reference_check),
|
||||||
|
"tdelta": 1, # One day interval
|
||||||
|
},
|
||||||
|
"sighting": {
|
||||||
|
"first_seen": (5, partial_timestamp_based),
|
||||||
|
"last_seen": (5, partial_timestamp_based),
|
||||||
|
"sighting_of_ref": (40, reference_check),
|
||||||
|
"observed_data_refs": (20, list_reference_check),
|
||||||
|
"where_sighted_refs": (20, list_reference_check),
|
||||||
|
"summary": (10, exact_match),
|
||||||
|
},
|
||||||
"threat-actor": {
|
"threat-actor": {
|
||||||
"name": (60, partial_string_based),
|
"name": (60, partial_string_based),
|
||||||
"threat_actor_types": (20, partial_list_based),
|
"threat_actor_types": (20, partial_list_based),
|
||||||
|
@ -449,7 +620,4 @@ WEIGHTS = {
|
||||||
"name": (30, partial_string_based),
|
"name": (30, partial_string_based),
|
||||||
"external_references": (70, partial_external_reference_based),
|
"external_references": (70, partial_external_reference_based),
|
||||||
},
|
},
|
||||||
"_internal": {
|
|
||||||
"ignore_spec_version": False,
|
|
||||||
},
|
|
||||||
} # :autodoc-skip:
|
} # :autodoc-skip:
|
||||||
|
|
|
@ -14,17 +14,17 @@ from ...version import DEFAULT_VERSION
|
||||||
from .compare.observation import observation_expression_cmp
|
from .compare.observation import observation_expression_cmp
|
||||||
from .transform import ChainTransformer, SettleTransformer
|
from .transform import ChainTransformer, SettleTransformer
|
||||||
from .transform.observation import (
|
from .transform.observation import (
|
||||||
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer,
|
AbsorptionTransformer, DNFTransformer, FlattenTransformer,
|
||||||
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
|
NormalizeComparisonExpressionsTransformer, OrderDedupeTransformer,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Lazy-initialize
|
# Lazy-initialize
|
||||||
_pattern_canonicalizer = None
|
_pattern_normalizer = None
|
||||||
|
|
||||||
|
|
||||||
def _get_pattern_canonicalizer():
|
def _get_pattern_normalizer():
|
||||||
"""
|
"""
|
||||||
Get a canonicalization transformer for STIX patterns.
|
Get a normalization transformer for STIX patterns.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The transformer
|
The transformer
|
||||||
|
@ -33,11 +33,11 @@ def _get_pattern_canonicalizer():
|
||||||
# The transformers are either stateless or contain no state which changes
|
# The transformers are either stateless or contain no state which changes
|
||||||
# with each use. So we can setup the transformers once and keep reusing
|
# with each use. So we can setup the transformers once and keep reusing
|
||||||
# them.
|
# them.
|
||||||
global _pattern_canonicalizer
|
global _pattern_normalizer
|
||||||
|
|
||||||
if not _pattern_canonicalizer:
|
if not _pattern_normalizer:
|
||||||
canonicalize_comp_expr = \
|
normalize_comp_expr = \
|
||||||
CanonicalizeComparisonExpressionsTransformer()
|
NormalizeComparisonExpressionsTransformer()
|
||||||
|
|
||||||
obs_expr_flatten = FlattenTransformer()
|
obs_expr_flatten = FlattenTransformer()
|
||||||
obs_expr_order = OrderDedupeTransformer()
|
obs_expr_order = OrderDedupeTransformer()
|
||||||
|
@ -49,12 +49,12 @@ def _get_pattern_canonicalizer():
|
||||||
|
|
||||||
obs_dnf = DNFTransformer()
|
obs_dnf = DNFTransformer()
|
||||||
|
|
||||||
_pattern_canonicalizer = ChainTransformer(
|
_pattern_normalizer = ChainTransformer(
|
||||||
canonicalize_comp_expr,
|
normalize_comp_expr,
|
||||||
obs_settle_simplify, obs_dnf, obs_settle_simplify,
|
obs_settle_simplify, obs_dnf, obs_settle_simplify,
|
||||||
)
|
)
|
||||||
|
|
||||||
return _pattern_canonicalizer
|
return _pattern_normalizer
|
||||||
|
|
||||||
|
|
||||||
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
||||||
|
@ -77,11 +77,11 @@ def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
||||||
pattern2, version=stix_version,
|
pattern2, version=stix_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
pattern_canonicalizer = _get_pattern_canonicalizer()
|
pattern_normalizer = _get_pattern_normalizer()
|
||||||
canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1)
|
norm_patt1, _ = pattern_normalizer.transform(patt_ast1)
|
||||||
canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2)
|
norm_patt2, _ = pattern_normalizer.transform(patt_ast2)
|
||||||
|
|
||||||
result = observation_expression_cmp(canon_patt1, canon_patt2)
|
result = observation_expression_cmp(norm_patt1, norm_patt2)
|
||||||
|
|
||||||
return result == 0
|
return result == 0
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ def find_equivalent_patterns(
|
||||||
"""
|
"""
|
||||||
Find patterns from a sequence which are equivalent to a given pattern.
|
Find patterns from a sequence which are equivalent to a given pattern.
|
||||||
This is more efficient than using equivalent_patterns() in a loop, because
|
This is more efficient than using equivalent_patterns() in a loop, because
|
||||||
it doesn't re-canonicalize the search pattern over and over. This works
|
it doesn't re-normalize the search pattern over and over. This works
|
||||||
on an input iterable and is implemented as a generator of matches. So you
|
on an input iterable and is implemented as a generator of matches. So you
|
||||||
can "stream" patterns in and matching patterns will be streamed out.
|
can "stream" patterns in and matching patterns will be streamed out.
|
||||||
|
|
||||||
|
@ -109,8 +109,8 @@ def find_equivalent_patterns(
|
||||||
search_pattern, version=stix_version,
|
search_pattern, version=stix_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
pattern_canonicalizer = _get_pattern_canonicalizer()
|
pattern_normalizer = _get_pattern_normalizer()
|
||||||
canon_search_pattern_ast, _ = pattern_canonicalizer.transform(
|
norm_search_pattern_ast, _ = pattern_normalizer.transform(
|
||||||
search_pattern_ast,
|
search_pattern_ast,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -118,10 +118,10 @@ def find_equivalent_patterns(
|
||||||
pattern_ast = pattern_visitor.create_pattern_object(
|
pattern_ast = pattern_visitor.create_pattern_object(
|
||||||
pattern, version=stix_version,
|
pattern, version=stix_version,
|
||||||
)
|
)
|
||||||
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast)
|
norm_pattern_ast, _ = pattern_normalizer.transform(pattern_ast)
|
||||||
|
|
||||||
result = observation_expression_cmp(
|
result = observation_expression_cmp(
|
||||||
canon_search_pattern_ast, canon_pattern_ast,
|
norm_search_pattern_ast, norm_pattern_ast,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result == 0:
|
if result == 0:
|
||||||
|
|
|
@ -346,7 +346,7 @@ def comparison_expression_cmp(expr1, expr2):
|
||||||
"""
|
"""
|
||||||
Compare two comparison expressions. This is sensitive to the order of the
|
Compare two comparison expressions. This is sensitive to the order of the
|
||||||
expressions' sub-components. To achieve an order-insensitive comparison,
|
expressions' sub-components. To achieve an order-insensitive comparison,
|
||||||
the ASTs must be canonically ordered first.
|
the sub-component ASTs must be ordered first.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
expr1: The first comparison expression
|
expr1: The first comparison expression
|
||||||
|
|
|
@ -62,7 +62,7 @@ def observation_expression_cmp(expr1, expr2):
|
||||||
"""
|
"""
|
||||||
Compare two observation expression ASTs. This is sensitive to the order of
|
Compare two observation expression ASTs. This is sensitive to the order of
|
||||||
the expressions' sub-components. To achieve an order-insensitive
|
the expressions' sub-components. To achieve an order-insensitive
|
||||||
comparison, the ASTs must be canonically ordered first.
|
comparison, the sub-component ASTs must be ordered first.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
expr1: The first observation expression
|
expr1: The first observation expression
|
||||||
|
|
|
@ -46,7 +46,7 @@ def _dupe_ast(ast):
|
||||||
|
|
||||||
elif isinstance(ast, _ComparisonExpression):
|
elif isinstance(ast, _ComparisonExpression):
|
||||||
# Change this to create a dupe, if we ever need to change simple
|
# Change this to create a dupe, if we ever need to change simple
|
||||||
# comparison expressions as part of canonicalization.
|
# comparison expressions as part of normalization.
|
||||||
result = ast
|
result = ast
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -147,9 +147,8 @@ class OrderDedupeTransformer(
|
||||||
ComparisonExpressionTransformer,
|
ComparisonExpressionTransformer,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Canonically order the children of all nodes in the AST. Because the
|
Order the children of all nodes in the AST. Because the deduping algorithm
|
||||||
deduping algorithm is based on sorted data, this transformation also does
|
is based on sorted data, this transformation also does deduping.
|
||||||
deduping.
|
|
||||||
|
|
||||||
E.g.:
|
E.g.:
|
||||||
A and A => A
|
A and A => A
|
||||||
|
|
|
@ -234,7 +234,7 @@ class OrderDedupeTransformer(
|
||||||
ObservationExpressionTransformer,
|
ObservationExpressionTransformer,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Canonically order AND/OR expressions, and dedupe ORs. E.g.:
|
Order AND/OR expressions, and dedupe ORs. E.g.:
|
||||||
|
|
||||||
A or A => A
|
A or A => A
|
||||||
B or A => A or B
|
B or A => A or B
|
||||||
|
@ -282,6 +282,7 @@ class AbsorptionTransformer(
|
||||||
|
|
||||||
A or (A and B) = A
|
A or (A and B) = A
|
||||||
A or (A followedby B) = A
|
A or (A followedby B) = A
|
||||||
|
A or (B followedby A) = A
|
||||||
|
|
||||||
Other variants do not hold for observation expressions.
|
Other variants do not hold for observation expressions.
|
||||||
"""
|
"""
|
||||||
|
@ -435,28 +436,35 @@ class DNFTransformer(ObservationExpressionTransformer):
|
||||||
|
|
||||||
A and (B or C) => (A and B) or (A and C)
|
A and (B or C) => (A and B) or (A and C)
|
||||||
A followedby (B or C) => (A followedby B) or (A followedby C)
|
A followedby (B or C) => (A followedby B) or (A followedby C)
|
||||||
|
(A or B) followedby C => (A followedby C) or (B followedby C)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __transform(self, ast):
|
def __transform(self, ast):
|
||||||
|
|
||||||
root_type = type(ast) # will be AST class for AND or FOLLOWEDBY
|
# If no OR children, nothing to do
|
||||||
changed = False
|
if any(
|
||||||
or_children = []
|
isinstance(child, OrObservationExpression)
|
||||||
other_children = []
|
for child in ast.operands
|
||||||
for child in ast.operands:
|
):
|
||||||
if isinstance(child, OrObservationExpression):
|
# When we distribute FOLLOWEDBY over OR, it is important to
|
||||||
or_children.append(child.operands)
|
# preserve the original FOLLOWEDBY order! We don't need to do that
|
||||||
else:
|
# for AND, but we do it anyway because it doesn't hurt, and we can
|
||||||
other_children.append(child)
|
# use the same code for both.
|
||||||
|
iterables = []
|
||||||
|
for child in ast.operands:
|
||||||
|
if isinstance(child, OrObservationExpression):
|
||||||
|
iterables.append(child.operands)
|
||||||
|
else:
|
||||||
|
iterables.append((child,))
|
||||||
|
|
||||||
if or_children:
|
root_type = type(ast) # will be AST class for AND or FOLLOWEDBY
|
||||||
distributed_children = [
|
distributed_children = [
|
||||||
root_type([
|
root_type([
|
||||||
_dupe_ast(sub_ast) for sub_ast in itertools.chain(
|
_dupe_ast(sub_ast) for sub_ast in itertools.chain(
|
||||||
other_children, prod_seq,
|
prod_seq,
|
||||||
)
|
)
|
||||||
])
|
])
|
||||||
for prod_seq in itertools.product(*or_children)
|
for prod_seq in itertools.product(*iterables)
|
||||||
]
|
]
|
||||||
|
|
||||||
# Need to recursively continue to distribute AND/FOLLOWEDBY over OR
|
# Need to recursively continue to distribute AND/FOLLOWEDBY over OR
|
||||||
|
@ -470,6 +478,7 @@ class DNFTransformer(ObservationExpressionTransformer):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
result = ast
|
result = ast
|
||||||
|
changed = False
|
||||||
|
|
||||||
return result, changed
|
return result, changed
|
||||||
|
|
||||||
|
@ -480,11 +489,11 @@ class DNFTransformer(ObservationExpressionTransformer):
|
||||||
return self.__transform(ast)
|
return self.__transform(ast)
|
||||||
|
|
||||||
|
|
||||||
class CanonicalizeComparisonExpressionsTransformer(
|
class NormalizeComparisonExpressionsTransformer(
|
||||||
ObservationExpressionTransformer,
|
ObservationExpressionTransformer,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Canonicalize all comparison expressions.
|
Normalize all comparison expressions.
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
comp_flatten = CFlattenTransformer()
|
comp_flatten = CFlattenTransformer()
|
||||||
|
@ -495,13 +504,13 @@ class CanonicalizeComparisonExpressionsTransformer(
|
||||||
|
|
||||||
comp_special = SpecialValueCanonicalization()
|
comp_special = SpecialValueCanonicalization()
|
||||||
comp_dnf = CDNFTransformer()
|
comp_dnf = CDNFTransformer()
|
||||||
self.__comp_canonicalize = ChainTransformer(
|
self.__comp_normalize = ChainTransformer(
|
||||||
comp_special, settle_simplify, comp_dnf, settle_simplify,
|
comp_special, settle_simplify, comp_dnf, settle_simplify,
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform_observation(self, ast):
|
def transform_observation(self, ast):
|
||||||
comp_expr = ast.operand
|
comp_expr = ast.operand
|
||||||
canon_comp_expr, changed = self.__comp_canonicalize.transform(comp_expr)
|
norm_comp_expr, changed = self.__comp_normalize.transform(comp_expr)
|
||||||
ast.operand = canon_comp_expr
|
ast.operand = norm_comp_expr
|
||||||
|
|
||||||
return ast, changed
|
return ast, changed
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
"""
|
"""
|
||||||
Some simple comparison expression canonicalization functions.
|
Some simple comparison expression normalization functions.
|
||||||
"""
|
"""
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
|
|
@ -175,7 +175,14 @@ class ImmutableError(STIXError):
|
||||||
return msg.format(self)
|
return msg.format(self)
|
||||||
|
|
||||||
|
|
||||||
class UnmodifiablePropertyError(STIXError):
|
class VersioningError(STIXError):
|
||||||
|
"""
|
||||||
|
Base class for object versioning errors.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class UnmodifiablePropertyError(VersioningError):
|
||||||
"""Attempted to modify an unmodifiable property of object when creating a new version."""
|
"""Attempted to modify an unmodifiable property of object when creating a new version."""
|
||||||
|
|
||||||
def __init__(self, unchangable_properties):
|
def __init__(self, unchangable_properties):
|
||||||
|
@ -187,6 +194,40 @@ class UnmodifiablePropertyError(STIXError):
|
||||||
return msg.format(", ".join(self.unchangable_properties))
|
return msg.format(", ".join(self.unchangable_properties))
|
||||||
|
|
||||||
|
|
||||||
|
class TypeNotVersionableError(VersioningError):
|
||||||
|
"""
|
||||||
|
An object couldn't be versioned because it lacked the versioning properties
|
||||||
|
and its type does not support them.
|
||||||
|
"""
|
||||||
|
def __init__(self, obj):
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
type_name = obj.get("type")
|
||||||
|
else:
|
||||||
|
# try standard attribute of _STIXBase subclasses/instances
|
||||||
|
type_name = getattr(obj, "_type", None)
|
||||||
|
|
||||||
|
self.object = obj
|
||||||
|
|
||||||
|
msg = "Object type{}is not versionable. Try a dictionary or " \
|
||||||
|
"instance of an SDO or SRO class.".format(
|
||||||
|
" '{}' ".format(type_name) if type_name else " ",
|
||||||
|
)
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
|
class ObjectNotVersionableError(VersioningError):
|
||||||
|
"""
|
||||||
|
An object's type supports versioning, but the object couldn't be versioned
|
||||||
|
because it lacked sufficient versioning properties.
|
||||||
|
"""
|
||||||
|
def __init__(self, obj):
|
||||||
|
self.object = obj
|
||||||
|
|
||||||
|
msg = "Creating a new object version requires at least the 'created'" \
|
||||||
|
" property: " + str(obj)
|
||||||
|
super().__init__(msg)
|
||||||
|
|
||||||
|
|
||||||
class RevokeError(STIXError):
|
class RevokeError(STIXError):
|
||||||
"""Attempted an operation on a revoked object."""
|
"""Attempted an operation on a revoked object."""
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from stix2 import exceptions, utils
|
from stix2 import exceptions, utils
|
||||||
|
|
||||||
|
|
||||||
|
@ -129,7 +127,7 @@ def compress_markings(granular_markings):
|
||||||
{'marking_ref': item, 'selectors': sorted(selectors)}
|
{'marking_ref': item, 'selectors': sorted(selectors)}
|
||||||
if utils.is_marking(item) else
|
if utils.is_marking(item) else
|
||||||
{'lang': item, 'selectors': sorted(selectors)}
|
{'lang': item, 'selectors': sorted(selectors)}
|
||||||
for item, selectors in six.iteritems(map_)
|
for item, selectors in map_.items()
|
||||||
]
|
]
|
||||||
|
|
||||||
return compressed
|
return compressed
|
||||||
|
@ -230,7 +228,7 @@ def iterpath(obj, path=None):
|
||||||
if path is None:
|
if path is None:
|
||||||
path = []
|
path = []
|
||||||
|
|
||||||
for varname, varobj in iter(sorted(six.iteritems(obj))):
|
for varname, varobj in iter(sorted(obj.items())):
|
||||||
path.append(varname)
|
path.append(varname)
|
||||||
yield (path, varobj)
|
yield (path, varobj)
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
import importlib
|
import importlib
|
||||||
import inspect
|
import inspect
|
||||||
|
|
||||||
from six import text_type
|
|
||||||
from stix2patterns.exceptions import ParseException
|
from stix2patterns.exceptions import ParseException
|
||||||
from stix2patterns.grammars.STIXPatternParser import TerminalNode
|
from stix2patterns.grammars.STIXPatternParser import TerminalNode
|
||||||
from stix2patterns.v20.grammars.STIXPatternParser import \
|
from stix2patterns.v20.grammars.STIXPatternParser import \
|
||||||
|
@ -263,7 +262,7 @@ class STIXPatternVisitorForSTIX2():
|
||||||
property_path.append(
|
property_path.append(
|
||||||
self.instantiate(
|
self.instantiate(
|
||||||
"ListObjectPathComponent",
|
"ListObjectPathComponent",
|
||||||
current.property_name if isinstance(current, BasicObjectPathComponent) else text_type(current),
|
current.property_name if isinstance(current, BasicObjectPathComponent) else str(current),
|
||||||
next.value,
|
next.value,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
@ -286,7 +285,7 @@ class STIXPatternVisitorForSTIX2():
|
||||||
if isinstance(first_component, TerminalNode):
|
if isinstance(first_component, TerminalNode):
|
||||||
step = first_component.getText()
|
step = first_component.getText()
|
||||||
else:
|
else:
|
||||||
step = text_type(first_component)
|
step = str(first_component)
|
||||||
# if step.endswith("_ref"):
|
# if step.endswith("_ref"):
|
||||||
# return stix2.ReferenceObjectPathComponent(step)
|
# return stix2.ReferenceObjectPathComponent(step)
|
||||||
# else:
|
# else:
|
||||||
|
|
|
@ -5,8 +5,6 @@ import binascii
|
||||||
import datetime
|
import datetime
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from .utils import parse_into_datetime
|
from .utils import parse_into_datetime
|
||||||
|
|
||||||
|
|
||||||
|
@ -15,7 +13,7 @@ def escape_quotes_and_backslashes(s):
|
||||||
|
|
||||||
|
|
||||||
def quote_if_needed(x):
|
def quote_if_needed(x):
|
||||||
if isinstance(x, six.string_types):
|
if isinstance(x, str):
|
||||||
if x.find("-") != -1:
|
if x.find("-") != -1:
|
||||||
if not x.startswith("'"):
|
if not x.startswith("'"):
|
||||||
return "'" + x + "'"
|
return "'" + x + "'"
|
||||||
|
|
|
@ -7,8 +7,6 @@ import inspect
|
||||||
import re
|
import re
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from six import string_types, text_type
|
|
||||||
|
|
||||||
from .base import _STIXBase
|
from .base import _STIXBase
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
CustomContentError, DictionaryKeyError, MissingPropertiesError,
|
CustomContentError, DictionaryKeyError, MissingPropertiesError,
|
||||||
|
@ -236,7 +234,7 @@ class ListProperty(Property):
|
||||||
except TypeError:
|
except TypeError:
|
||||||
raise ValueError("must be an iterable.")
|
raise ValueError("must be an iterable.")
|
||||||
|
|
||||||
if isinstance(value, (_STIXBase, string_types)):
|
if isinstance(value, (_STIXBase, str)):
|
||||||
value = [value]
|
value = [value]
|
||||||
|
|
||||||
if isinstance(self.contained, Property):
|
if isinstance(self.contained, Property):
|
||||||
|
@ -277,8 +275,8 @@ class StringProperty(Property):
|
||||||
super(StringProperty, self).__init__(**kwargs)
|
super(StringProperty, self).__init__(**kwargs)
|
||||||
|
|
||||||
def clean(self, value):
|
def clean(self, value):
|
||||||
if not isinstance(value, string_types):
|
if not isinstance(value, str):
|
||||||
return text_type(value)
|
return str(value)
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import copy
|
import copy
|
||||||
import datetime as dt
|
import datetime as dt
|
||||||
|
import io
|
||||||
|
|
||||||
import simplejson as json
|
import simplejson as json
|
||||||
|
|
||||||
|
@ -64,6 +65,37 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs):
|
||||||
Returns:
|
Returns:
|
||||||
str: The serialized JSON object.
|
str: The serialized JSON object.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
The argument ``pretty=True`` will output the STIX object following
|
||||||
|
spec order. Using this argument greatly impacts object serialization
|
||||||
|
performance. If your use case is centered across machine-to-machine
|
||||||
|
operation it is recommended to set ``pretty=False``.
|
||||||
|
|
||||||
|
When ``pretty=True`` the following key-value pairs will be added or
|
||||||
|
overridden: indent=4, separators=(",", ": "), item_sort_key=sort_by.
|
||||||
|
"""
|
||||||
|
with io.StringIO() as fp:
|
||||||
|
fp_serialize(obj, fp, pretty, include_optional_defaults, **kwargs)
|
||||||
|
return fp.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs):
|
||||||
|
"""
|
||||||
|
Serialize a STIX object to ``fp`` (a text stream file-like supporting object).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
obj: The STIX object to be serialized.
|
||||||
|
fp: A text stream file-like object supporting ``.write()``.
|
||||||
|
pretty (bool): If True, output properties following the STIX specs
|
||||||
|
formatting. This includes indentation. Refer to notes for more
|
||||||
|
details. (Default: ``False``)
|
||||||
|
include_optional_defaults (bool): Determines whether to include
|
||||||
|
optional properties set to the default value defined in the spec.
|
||||||
|
**kwargs: The arguments for a json.dumps() call.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
The argument ``pretty=True`` will output the STIX object following
|
The argument ``pretty=True`` will output the STIX object following
|
||||||
spec order. Using this argument greatly impacts object serialization
|
spec order. Using this argument greatly impacts object serialization
|
||||||
|
@ -80,9 +112,9 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs):
|
||||||
kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by})
|
kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by})
|
||||||
|
|
||||||
if include_optional_defaults:
|
if include_optional_defaults:
|
||||||
return json.dumps(obj, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs)
|
json.dump(obj, fp, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs)
|
||||||
else:
|
else:
|
||||||
return json.dumps(obj, cls=STIXJSONEncoder, **kwargs)
|
json.dump(obj, fp, cls=STIXJSONEncoder, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
def _find(seq, val):
|
def _find(seq, val):
|
||||||
|
|
|
@ -223,6 +223,10 @@ def test_obs_absorb_not_equivalent(patt1, patt2):
|
||||||
"([a:b=1] OR [a:b=2]) FOLLOWEDBY ([a:b=3] OR [a:b=4])",
|
"([a:b=1] OR [a:b=2]) FOLLOWEDBY ([a:b=3] OR [a:b=4])",
|
||||||
"([a:b=1] FOLLOWEDBY [a:b=3]) OR ([a:b=1] FOLLOWEDBY [a:b=4]) OR ([a:b=2] FOLLOWEDBY [a:b=3]) OR ([a:b=2] FOLLOWEDBY [a:b=4])",
|
"([a:b=1] FOLLOWEDBY [a:b=3]) OR ([a:b=1] FOLLOWEDBY [a:b=4]) OR ([a:b=2] FOLLOWEDBY [a:b=3]) OR ([a:b=2] FOLLOWEDBY [a:b=4])",
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"([a:b=1] OR [a:b=2]) FOLLOWEDBY ([a:b=5] AND [a:b=6])",
|
||||||
|
"([a:b=1] FOLLOWEDBY ([a:b=5] AND [a:b=6])) OR ([a:b=2] FOLLOWEDBY ([a:b=5] AND [a:b=6]))",
|
||||||
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_obs_dnf_equivalent(patt1, patt2):
|
def test_obs_dnf_equivalent(patt1, patt2):
|
||||||
|
@ -243,6 +247,10 @@ def test_obs_dnf_equivalent(patt1, patt2):
|
||||||
"[a:b=1] WITHIN 2 SECONDS",
|
"[a:b=1] WITHIN 2 SECONDS",
|
||||||
"[a:b=1] REPEATS 2 TIMES",
|
"[a:b=1] REPEATS 2 TIMES",
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
"[a:b=1] FOLLOWEDBY ([a:b=2] OR [a:b=3])",
|
||||||
|
"([a:b=2] FOLLOWEDBY [a:b=1]) OR ([a:b=1] FOLLOWEDBY [a:b=3])",
|
||||||
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_obs_not_equivalent(patt1, patt2):
|
def test_obs_not_equivalent(patt1, patt2):
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -113,6 +114,27 @@ def test_bundle_id_must_start_with_bundle():
|
||||||
assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
|
assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_bundle_fp_serialize_pretty(indicator, malware, relationship):
|
||||||
|
bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
|
||||||
|
buffer = io.StringIO()
|
||||||
|
|
||||||
|
bundle.fp_serialize(buffer, pretty=True)
|
||||||
|
|
||||||
|
assert str(bundle) == EXPECTED_BUNDLE
|
||||||
|
assert bundle.serialize(pretty=True) == EXPECTED_BUNDLE
|
||||||
|
assert buffer.getvalue() == EXPECTED_BUNDLE
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_bundle_fp_serialize_nonpretty(indicator, malware, relationship):
|
||||||
|
bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
|
||||||
|
buffer = io.StringIO()
|
||||||
|
|
||||||
|
bundle.fp_serialize(buffer, sort_keys=True)
|
||||||
|
|
||||||
|
assert bundle.serialize(sort_keys=True) == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
|
||||||
|
assert buffer.getvalue() == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
def test_create_bundle1(indicator, malware, relationship):
|
def test_create_bundle1(indicator, malware, relationship):
|
||||||
bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
|
bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship])
|
||||||
|
|
||||||
|
|
|
@ -128,18 +128,17 @@ def test_filter_value_type_check():
|
||||||
|
|
||||||
with pytest.raises(TypeError) as excinfo:
|
with pytest.raises(TypeError) as excinfo:
|
||||||
Filter('created', '=', object())
|
Filter('created', '=', object())
|
||||||
# On Python 2, the type of object() is `<type 'object'>` On Python 3, it's `<class 'object'>`.
|
assert "'<class 'object'>'" in str(excinfo.value)
|
||||||
assert any([s in str(excinfo.value) for s in ["<type 'object'>", "'<class 'object'>'"]])
|
|
||||||
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
||||||
|
|
||||||
with pytest.raises(TypeError) as excinfo:
|
with pytest.raises(TypeError) as excinfo:
|
||||||
Filter("type", "=", complex(2, -1))
|
Filter("type", "=", complex(2, -1))
|
||||||
assert any([s in str(excinfo.value) for s in ["<type 'complex'>", "'<class 'complex'>'"]])
|
assert "'<class 'complex'>'" in str(excinfo.value)
|
||||||
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
||||||
|
|
||||||
with pytest.raises(TypeError) as excinfo:
|
with pytest.raises(TypeError) as excinfo:
|
||||||
Filter("type", "=", set([16, 23]))
|
Filter("type", "=", set([16, 23]))
|
||||||
assert any([s in str(excinfo.value) for s in ["<type 'set'>", "'<class 'set'>'"]])
|
assert "'<class 'set'>'" in str(excinfo.value)
|
||||||
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,8 @@ import json
|
||||||
from medallion.filters.basic_filter import BasicFilter
|
from medallion.filters.basic_filter import BasicFilter
|
||||||
import pytest
|
import pytest
|
||||||
from requests.models import Response
|
from requests.models import Response
|
||||||
import six
|
|
||||||
from taxii2client.common import _filter_kwargs_to_query_params
|
from taxii2client.common import _filter_kwargs_to_query_params
|
||||||
from taxii2client.v20 import Collection
|
from taxii2client.v20 import MEDIA_TYPE_STIX_V20, Collection
|
||||||
|
|
||||||
import stix2
|
import stix2
|
||||||
from stix2.datastore import DataSourceError
|
from stix2.datastore import DataSourceError
|
||||||
|
@ -27,7 +26,7 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
|
|
||||||
def add_objects(self, bundle):
|
def add_objects(self, bundle):
|
||||||
self._verify_can_write()
|
self._verify_can_write()
|
||||||
if isinstance(bundle, six.string_types):
|
if isinstance(bundle, str):
|
||||||
bundle = json.loads(bundle)
|
bundle = json.loads(bundle)
|
||||||
for object in bundle.get("objects", []):
|
for object in bundle.get("objects", []):
|
||||||
self.objects.append(object)
|
self.objects.append(object)
|
||||||
|
@ -35,12 +34,12 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
{
|
{
|
||||||
"date_added": get_timestamp(),
|
"date_added": get_timestamp(),
|
||||||
"id": object["id"],
|
"id": object["id"],
|
||||||
"media_type": "application/stix+json;version=2.1",
|
"media_type": "application/stix+json;version=2.0",
|
||||||
"version": object.get("modified", object.get("created", get_timestamp())),
|
"version": object.get("modified", object.get("created", get_timestamp())),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_objects(self, **filter_kwargs):
|
def get_objects(self, accept=MEDIA_TYPE_STIX_V20, start=0, per_request=0, **filter_kwargs):
|
||||||
self._verify_can_read()
|
self._verify_can_read()
|
||||||
query_params = _filter_kwargs_to_query_params(filter_kwargs)
|
query_params = _filter_kwargs_to_query_params(filter_kwargs)
|
||||||
assert isinstance(query_params, dict)
|
assert isinstance(query_params, dict)
|
||||||
|
@ -52,7 +51,12 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
100,
|
100,
|
||||||
)[0]
|
)[0]
|
||||||
if objs:
|
if objs:
|
||||||
return stix2.v20.Bundle(objects=objs)
|
resp = Response()
|
||||||
|
resp.status_code = 200
|
||||||
|
resp.headers["Content-Range"] = f"items 0-{len(objs)}/{len(objs)}"
|
||||||
|
resp.encoding = "utf-8"
|
||||||
|
resp._content = bytes(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False), resp.encoding)
|
||||||
|
return resp
|
||||||
else:
|
else:
|
||||||
resp = Response()
|
resp = Response()
|
||||||
resp.status_code = 404
|
resp.status_code = 404
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -67,6 +68,11 @@ def ds2():
|
||||||
yield stix2.MemoryStore(stix_objs)
|
yield stix2.MemoryStore(stix_objs)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fs():
|
||||||
|
yield stix2.FileSystemSource(FS_PATH)
|
||||||
|
|
||||||
|
|
||||||
def test_object_factory_created_by_ref_str():
|
def test_object_factory_created_by_ref_str():
|
||||||
factory = stix2.ObjectFactory(created_by_ref=IDENTITY_ID)
|
factory = stix2.ObjectFactory(created_by_ref=IDENTITY_ID)
|
||||||
ind = factory.create(stix2.v20.Indicator, **INDICATOR_KWARGS)
|
ind = factory.create(stix2.v20.Indicator, **INDICATOR_KWARGS)
|
||||||
|
@ -418,7 +424,7 @@ def test_related_to_by_target(ds):
|
||||||
|
|
||||||
|
|
||||||
def test_versioned_checks(ds, ds2):
|
def test_versioned_checks(ds, ds2):
|
||||||
weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
|
weights = stix2.equivalence.graph.WEIGHTS.copy()
|
||||||
weights.update({
|
weights.update({
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": True,
|
"ignore_spec_version": True,
|
||||||
|
@ -431,7 +437,7 @@ def test_versioned_checks(ds, ds2):
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_check_with_versioning(ds, ds2):
|
def test_semantic_check_with_versioning(ds, ds2):
|
||||||
weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
|
weights = stix2.equivalence.graph.WEIGHTS.copy()
|
||||||
weights.update({
|
weights.update({
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": False,
|
"ignore_spec_version": False,
|
||||||
|
@ -461,13 +467,11 @@ def test_semantic_check_with_versioning(ds, ds2):
|
||||||
|
|
||||||
|
|
||||||
def test_list_semantic_check(ds, ds2):
|
def test_list_semantic_check(ds, ds2):
|
||||||
weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
|
weights = stix2.equivalence.graph.WEIGHTS.copy()
|
||||||
weights.update({
|
weights.update({
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": False,
|
"ignore_spec_version": False,
|
||||||
"versioning_checks": False,
|
"versioning_checks": False,
|
||||||
"ds1": ds,
|
|
||||||
"ds2": ds2,
|
|
||||||
"max_depth": 1,
|
"max_depth": 1,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
@ -497,62 +501,149 @@ def test_list_semantic_check(ds, ds2):
|
||||||
assert round(score) == 1
|
assert round(score) == 1
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_filesystem_source(ds):
|
def test_graph_similarity_raises_value_error(ds):
|
||||||
weights = {
|
with pytest.raises(ValueError):
|
||||||
"_internal": {
|
prop_scores1 = {}
|
||||||
"ignore_spec_version": True,
|
stix2.Environment().graph_similarity(ds, ds2, prop_scores1, max_depth=-1)
|
||||||
"versioning_checks": False,
|
|
||||||
"max_depth": 1,
|
|
||||||
},
|
def test_graph_similarity_with_filesystem_source(ds, fs):
|
||||||
}
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_similarity(fs, ds, prop_scores1, ignore_spec_version=True)
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_similarity(ds, fs, prop_scores2, ignore_spec_version=True)
|
||||||
|
|
||||||
|
assert round(env1) == 25
|
||||||
|
assert round(prop_scores1["matching_score"]) == 451
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 18
|
||||||
|
|
||||||
|
assert round(env2) == 25
|
||||||
|
assert round(prop_scores2["matching_score"]) == 451
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 18
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_duplicate_graph(ds):
|
||||||
prop_scores = {}
|
prop_scores = {}
|
||||||
fs = stix2.FileSystemSource(FS_PATH)
|
env = stix2.Environment().graph_similarity(ds, ds, prop_scores)
|
||||||
env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights)
|
assert round(env) == 100
|
||||||
assert round(env) == 28
|
assert round(prop_scores["matching_score"]) == 800
|
||||||
assert round(prop_scores["matching_score"]) == 139
|
assert round(prop_scores["len_pairs"]) == 8
|
||||||
assert round(prop_scores["sum_weights"]) == 500
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_versioning_check_on(ds2, ds):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, versioning_checks=True)
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, versioning_checks=True)
|
||||||
|
|
||||||
|
assert round(env1) == 88
|
||||||
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
|
|
||||||
|
assert round(env2) == 88
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_versioning_check_off(ds2, ds):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1)
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2)
|
||||||
|
|
||||||
|
assert round(env1) == 88
|
||||||
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
|
|
||||||
|
assert round(env2) == 88
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_graph_equivalence_with_filesystem_source(ds, fs):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, ignore_spec_version=True)
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, ignore_spec_version=True)
|
||||||
|
|
||||||
|
assert env1 is False
|
||||||
|
assert round(prop_scores1["matching_score"]) == 451
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 18
|
||||||
|
|
||||||
|
assert env2 is False
|
||||||
|
assert round(prop_scores2["matching_score"]) == 451
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 18
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_duplicate_graph(ds):
|
def test_graph_equivalence_with_duplicate_graph(ds):
|
||||||
weights = {
|
|
||||||
"_internal": {
|
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": False,
|
|
||||||
"max_depth": 1,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
prop_scores = {}
|
prop_scores = {}
|
||||||
env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights)
|
env = stix2.Environment().graph_equivalence(ds, ds, prop_scores)
|
||||||
assert round(env) == 100
|
assert env is True
|
||||||
assert round(prop_scores["matching_score"]) == 800
|
assert round(prop_scores["matching_score"]) == 800
|
||||||
assert round(prop_scores["sum_weights"]) == 800
|
assert round(prop_scores["len_pairs"]) == 8
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_versioning_check_on(ds2, ds):
|
def test_graph_equivalence_with_versioning_check_on(ds2, ds):
|
||||||
weights = {
|
prop_scores1 = {}
|
||||||
"_internal": {
|
env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, versioning_checks=True)
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": True,
|
# Switching parameters
|
||||||
"max_depth": 1,
|
prop_scores2 = {}
|
||||||
},
|
env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, versioning_checks=True)
|
||||||
}
|
|
||||||
prop_scores = {}
|
assert env1 is True
|
||||||
env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
assert round(env) == 93
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
assert round(prop_scores["matching_score"]) == 745
|
|
||||||
assert round(prop_scores["sum_weights"]) == 800
|
assert env2 is True
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_versioning_check_off(ds2, ds):
|
def test_graph_equivalence_with_versioning_check_off(ds2, ds):
|
||||||
weights = {
|
prop_scores1 = {}
|
||||||
"_internal": {
|
env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1)
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": False,
|
# Switching parameters
|
||||||
"max_depth": 1,
|
prop_scores2 = {}
|
||||||
},
|
env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2)
|
||||||
}
|
|
||||||
prop_scores = {}
|
assert env1 is True
|
||||||
env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
assert round(env) == 93
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
assert round(prop_scores["matching_score"]) == 745
|
|
||||||
assert round(prop_scores["sum_weights"]) == 800
|
assert env2 is True
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
|
@ -170,6 +170,60 @@ def test_versioning_error_dict_bad_modified_value():
|
||||||
"but have the same id and modified timestamp do not have defined consumer behavior."
|
"but have the same id and modified timestamp do not have defined consumer behavior."
|
||||||
|
|
||||||
|
|
||||||
|
def test_versioning_dict_unregistered_no_modified():
|
||||||
|
d = {
|
||||||
|
"type": "not-registered",
|
||||||
|
"id": "not-registered--4da54535-47b7-468c-88fa-d13b04033c4b",
|
||||||
|
"created": "1995-04-07T15:37:48.178Z",
|
||||||
|
}
|
||||||
|
|
||||||
|
new_d = stix2.versioning.new_version(d)
|
||||||
|
assert "modified" in new_d
|
||||||
|
assert new_d["modified"] > stix2.utils.parse_into_datetime(d["created"])
|
||||||
|
|
||||||
|
new_d = stix2.versioning.new_version(d, modified="1996-11-20T01:19:29.134Z")
|
||||||
|
assert new_d["modified"] == "1996-11-20T01:19:29.134Z"
|
||||||
|
|
||||||
|
|
||||||
|
def test_versioning_dict_unregistered_unversionable():
|
||||||
|
d = {
|
||||||
|
"type": "not-registered",
|
||||||
|
"id": "not-registered--4da54535-47b7-468c-88fa-d13b04033c4b",
|
||||||
|
"modified": "1995-04-07T15:37:48.178Z",
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.ObjectNotVersionableError):
|
||||||
|
stix2.versioning.new_version(d)
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.ObjectNotVersionableError):
|
||||||
|
# should fail even if we provide a "created" kwarg.
|
||||||
|
stix2.versioning.new_version(d, created="1985-06-29T06:09:51.157Z")
|
||||||
|
|
||||||
|
|
||||||
|
def test_versioning_custom_object():
|
||||||
|
@stix2.v20.CustomObject(
|
||||||
|
"x-versionable-all-optional-20", [
|
||||||
|
("created", stix2.properties.TimestampProperty()),
|
||||||
|
("modified", stix2.properties.TimestampProperty()),
|
||||||
|
("revoked", stix2.properties.BooleanProperty()),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
class CustomSDO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
obj = CustomSDO(created="1990-12-18T17:56:11.346234Z")
|
||||||
|
new_obj = stix2.versioning.new_version(obj)
|
||||||
|
|
||||||
|
assert "modified" in new_obj
|
||||||
|
assert new_obj["modified"] > new_obj["created"]
|
||||||
|
|
||||||
|
obj = CustomSDO()
|
||||||
|
with pytest.raises(stix2.exceptions.ObjectNotVersionableError):
|
||||||
|
# fails due to insufficient properties on the object, even though its
|
||||||
|
# type supports versioning.
|
||||||
|
stix2.versioning.new_version(obj)
|
||||||
|
|
||||||
|
|
||||||
def test_versioning_error_dict_no_modified_value():
|
def test_versioning_error_dict_no_modified_value():
|
||||||
campaign_v1 = {
|
campaign_v1 = {
|
||||||
'type': 'campaign',
|
'type': 'campaign',
|
||||||
|
@ -184,10 +238,10 @@ def test_versioning_error_dict_no_modified_value():
|
||||||
|
|
||||||
def test_making_new_version_invalid_cls():
|
def test_making_new_version_invalid_cls():
|
||||||
campaign_v1 = "This is a campaign."
|
campaign_v1 = "This is a campaign."
|
||||||
with pytest.raises(ValueError) as excinfo:
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError) as excinfo:
|
||||||
stix2.versioning.new_version(campaign_v1, name="fred")
|
stix2.versioning.new_version(campaign_v1, name="fred")
|
||||||
|
|
||||||
assert 'cannot create new version of object of this type' in str(excinfo.value)
|
assert excinfo.value.object is campaign_v1
|
||||||
|
|
||||||
|
|
||||||
def test_revoke_dict():
|
def test_revoke_dict():
|
||||||
|
@ -206,7 +260,7 @@ def test_revoke_dict():
|
||||||
|
|
||||||
def test_revoke_unversionable():
|
def test_revoke_unversionable():
|
||||||
sco = stix2.v20.File(name="data.txt")
|
sco = stix2.v20.File(name="data.txt")
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
sco.revoke()
|
sco.revoke()
|
||||||
|
|
||||||
|
|
||||||
|
@ -277,7 +331,7 @@ def test_version_unversionable_dict():
|
||||||
"name": "data.txt",
|
"name": "data.txt",
|
||||||
}
|
}
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
stix2.versioning.new_version(f)
|
stix2.versioning.new_version(f)
|
||||||
|
|
||||||
|
|
||||||
|
@ -295,10 +349,10 @@ def test_version_sco_with_modified():
|
||||||
"modified": "1991-05-13T19:24:57Z",
|
"modified": "1991-05-13T19:24:57Z",
|
||||||
}
|
}
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
stix2.versioning.new_version(file_sco, name="newname.txt")
|
stix2.versioning.new_version(file_sco, name="newname.txt")
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
stix2.versioning.revoke(file_sco)
|
stix2.versioning.revoke(file_sco)
|
||||||
|
|
||||||
file_sco_obj = stix2.v20.File(
|
file_sco_obj = stix2.v20.File(
|
||||||
|
@ -307,10 +361,10 @@ def test_version_sco_with_modified():
|
||||||
modified="1991-05-13T19:24:57Z",
|
modified="1991-05-13T19:24:57Z",
|
||||||
)
|
)
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
stix2.versioning.new_version(file_sco_obj, name="newname.txt")
|
stix2.versioning.new_version(file_sco_obj, name="newname.txt")
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
stix2.versioning.revoke(file_sco_obj)
|
stix2.versioning.revoke(file_sco_obj)
|
||||||
|
|
||||||
|
|
||||||
|
@ -337,6 +391,45 @@ def test_version_sco_with_custom():
|
||||||
revoked_obj = stix2.versioning.revoke(new_file_sco_obj)
|
revoked_obj = stix2.versioning.revoke(new_file_sco_obj)
|
||||||
assert revoked_obj.revoked
|
assert revoked_obj.revoked
|
||||||
|
|
||||||
|
# Same thing with a dict
|
||||||
|
d = {
|
||||||
|
"type": "file",
|
||||||
|
"name": "data.txt",
|
||||||
|
"created": "1973-11-23T02:31:37Z",
|
||||||
|
"modified": "1991-05-13T19:24:57Z",
|
||||||
|
"revoked": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
new_d = stix2.versioning.new_version(d, size=1234)
|
||||||
|
assert new_d["size"] == 1234
|
||||||
|
|
||||||
|
revoked_d = stix2.versioning.revoke(new_d)
|
||||||
|
assert revoked_d["revoked"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_marking():
|
||||||
|
m = stix2.v20.MarkingDefinition(
|
||||||
|
created="1982-11-29T12:20:13.723Z",
|
||||||
|
definition_type="statement",
|
||||||
|
definition={"statement": "Copyright (c) 2000-2020 Acme Corp"},
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
|
stix2.versioning.new_version(m)
|
||||||
|
|
||||||
|
m = {
|
||||||
|
"type": "marking-definition",
|
||||||
|
"id": "marking-definition--2a9f3f6e-5cbd-423b-a40d-02aefd29e612",
|
||||||
|
"created": "1982-11-29T12:20:13.723Z",
|
||||||
|
"definition_type": "statement",
|
||||||
|
"definition": {
|
||||||
|
"statement": "Copyright (c) 2000-2020 Acme Corp",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
|
stix2.versioning.new_version(m)
|
||||||
|
|
||||||
|
|
||||||
def test_version_disable_custom():
|
def test_version_disable_custom():
|
||||||
m = stix2.v20.Malware(
|
m = stix2.v20.Malware(
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import io
|
||||||
import json
|
import json
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -123,6 +124,27 @@ def test_bundle_id_must_start_with_bundle():
|
||||||
assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
|
assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'."
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_bundle_fp_serialize_pretty(indicator, malware, relationship):
|
||||||
|
bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
|
||||||
|
buffer = io.StringIO()
|
||||||
|
|
||||||
|
bundle.fp_serialize(buffer, pretty=True)
|
||||||
|
|
||||||
|
assert str(bundle) == EXPECTED_BUNDLE
|
||||||
|
assert bundle.serialize(pretty=True) == EXPECTED_BUNDLE
|
||||||
|
assert buffer.getvalue() == EXPECTED_BUNDLE
|
||||||
|
|
||||||
|
|
||||||
|
def test_create_bundle_fp_serialize_nonpretty(indicator, malware, relationship):
|
||||||
|
bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
|
||||||
|
buffer = io.StringIO()
|
||||||
|
|
||||||
|
bundle.fp_serialize(buffer, sort_keys=True)
|
||||||
|
|
||||||
|
assert bundle.serialize(sort_keys=True) == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
|
||||||
|
assert buffer.getvalue() == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
def test_create_bundle1(indicator, malware, relationship):
|
def test_create_bundle1(indicator, malware, relationship):
|
||||||
bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
|
bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship])
|
||||||
|
|
||||||
|
|
|
@ -146,18 +146,17 @@ def test_filter_value_type_check():
|
||||||
|
|
||||||
with pytest.raises(TypeError) as excinfo:
|
with pytest.raises(TypeError) as excinfo:
|
||||||
Filter('created', '=', object())
|
Filter('created', '=', object())
|
||||||
# On Python 2, the type of object() is `<type 'object'>` On Python 3, it's `<class 'object'>`.
|
assert "'<class 'object'>'" in str(excinfo.value)
|
||||||
assert any([s in str(excinfo.value) for s in ["<type 'object'>", "'<class 'object'>'"]])
|
|
||||||
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
||||||
|
|
||||||
with pytest.raises(TypeError) as excinfo:
|
with pytest.raises(TypeError) as excinfo:
|
||||||
Filter("type", "=", complex(2, -1))
|
Filter("type", "=", complex(2, -1))
|
||||||
assert any([s in str(excinfo.value) for s in ["<type 'complex'>", "'<class 'complex'>'"]])
|
assert "'<class 'complex'>'" in str(excinfo.value)
|
||||||
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
||||||
|
|
||||||
with pytest.raises(TypeError) as excinfo:
|
with pytest.raises(TypeError) as excinfo:
|
||||||
Filter("type", "=", set([16, 23]))
|
Filter("type", "=", set([16, 23]))
|
||||||
assert any([s in str(excinfo.value) for s in ["<type 'set'>", "'<class 'set'>'"]])
|
assert "'<class 'set'>'" in str(excinfo.value)
|
||||||
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
assert "is not supported. The type must be a Python immutable type or dictionary" in str(excinfo.value)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,6 @@ import json
|
||||||
from medallion.filters.basic_filter import BasicFilter
|
from medallion.filters.basic_filter import BasicFilter
|
||||||
import pytest
|
import pytest
|
||||||
from requests.models import Response
|
from requests.models import Response
|
||||||
import six
|
|
||||||
from taxii2client.common import _filter_kwargs_to_query_params
|
from taxii2client.common import _filter_kwargs_to_query_params
|
||||||
from taxii2client.v21 import Collection
|
from taxii2client.v21 import Collection
|
||||||
|
|
||||||
|
@ -27,16 +26,16 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
|
|
||||||
def add_objects(self, bundle):
|
def add_objects(self, bundle):
|
||||||
self._verify_can_write()
|
self._verify_can_write()
|
||||||
if isinstance(bundle, six.string_types):
|
if isinstance(bundle, str):
|
||||||
bundle = json.loads(bundle)
|
bundle = json.loads(bundle)
|
||||||
for object in bundle.get("objects", []):
|
for obj in bundle.get("objects", []):
|
||||||
self.objects.append(object)
|
self.objects.append(obj)
|
||||||
self.manifests.append(
|
self.manifests.append(
|
||||||
{
|
{
|
||||||
"date_added": get_timestamp(),
|
"date_added": get_timestamp(),
|
||||||
"id": object["id"],
|
"id": obj["id"],
|
||||||
"media_type": "application/stix+json;version=2.1",
|
"media_type": "application/stix+json;version=2.1",
|
||||||
"version": object.get("modified", object.get("created", get_timestamp())),
|
"version": obj.get("modified", obj.get("created", get_timestamp())),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -52,7 +51,10 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
100,
|
100,
|
||||||
)[0]
|
)[0]
|
||||||
if objs:
|
if objs:
|
||||||
return stix2.v21.Bundle(objects=objs)
|
return {
|
||||||
|
"objects": objs,
|
||||||
|
"more": False,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
resp = Response()
|
resp = Response()
|
||||||
resp.status_code = 404
|
resp.status_code = 404
|
||||||
|
@ -76,7 +78,10 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
else:
|
else:
|
||||||
filtered_objects = []
|
filtered_objects = []
|
||||||
if filtered_objects:
|
if filtered_objects:
|
||||||
return stix2.v21.Bundle(objects=filtered_objects)
|
return {
|
||||||
|
"objects": filtered_objects,
|
||||||
|
"more": False,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
resp = Response()
|
resp = Response()
|
||||||
resp.status_code = 404
|
resp.status_code = 404
|
||||||
|
|
|
@ -3,7 +3,6 @@ import datetime
|
||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import six
|
|
||||||
|
|
||||||
import stix2.base
|
import stix2.base
|
||||||
import stix2.canonicalization.Canonicalize
|
import stix2.canonicalization.Canonicalize
|
||||||
|
@ -31,12 +30,7 @@ def _make_uuid5(name):
|
||||||
"""
|
"""
|
||||||
Make a STIX 2.1+ compliant UUIDv5 from a "name".
|
Make a STIX 2.1+ compliant UUIDv5 from a "name".
|
||||||
"""
|
"""
|
||||||
if six.PY3:
|
uuid_ = uuid.uuid5(SCO_DET_ID_NAMESPACE, name)
|
||||||
uuid_ = uuid.uuid5(SCO_DET_ID_NAMESPACE, name)
|
|
||||||
else:
|
|
||||||
uuid_ = uuid.uuid5(
|
|
||||||
SCO_DET_ID_NAMESPACE, name.encode("utf-8"),
|
|
||||||
)
|
|
||||||
|
|
||||||
return uuid_
|
return uuid_
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
@ -37,7 +38,7 @@ def ds():
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def ds2():
|
def ds2_objects():
|
||||||
cam = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
|
cam = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
|
||||||
idy = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
|
idy = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
|
||||||
ind = stix2.v21.Indicator(id=INDICATOR_ID, created_by_ref=idy.id, **INDICATOR_KWARGS)
|
ind = stix2.v21.Indicator(id=INDICATOR_ID, created_by_ref=idy.id, **INDICATOR_KWARGS)
|
||||||
|
@ -68,7 +69,17 @@ def ds2():
|
||||||
published="2021-04-09T08:22:22Z", object_refs=stix_objs,
|
published="2021-04-09T08:22:22Z", object_refs=stix_objs,
|
||||||
)
|
)
|
||||||
stix_objs.append(reprt)
|
stix_objs.append(reprt)
|
||||||
yield stix2.MemoryStore(stix_objs)
|
yield stix_objs
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def ds2(ds2_objects):
|
||||||
|
yield stix2.MemoryStore(ds2_objects)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def fs():
|
||||||
|
yield stix2.FileSystemSource(FS_PATH)
|
||||||
|
|
||||||
|
|
||||||
def test_object_factory_created_by_ref_str():
|
def test_object_factory_created_by_ref_str():
|
||||||
|
@ -426,14 +437,14 @@ def test_related_to_by_target(ds):
|
||||||
assert any(x['id'] == INDICATOR_ID for x in resp)
|
assert any(x['id'] == INDICATOR_ID for x in resp)
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_attack_pattern1():
|
def test_object_similarity_on_same_attack_pattern1():
|
||||||
ap1 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_PATTERN_KWARGS)
|
ap1 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_PATTERN_KWARGS)
|
||||||
ap2 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_PATTERN_KWARGS)
|
ap2 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_PATTERN_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ap1, ap2)
|
env = stix2.Environment().object_similarity(ap1, ap2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_attack_pattern2():
|
def test_object_similarity_on_same_attack_pattern2():
|
||||||
ATTACK_KWARGS = dict(
|
ATTACK_KWARGS = dict(
|
||||||
name="Phishing",
|
name="Phishing",
|
||||||
external_references=[
|
external_references=[
|
||||||
|
@ -445,18 +456,18 @@ def test_semantic_equivalence_on_same_attack_pattern2():
|
||||||
)
|
)
|
||||||
ap1 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_KWARGS)
|
ap1 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_KWARGS)
|
||||||
ap2 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_KWARGS)
|
ap2 = stix2.v21.AttackPattern(id=ATTACK_PATTERN_ID, **ATTACK_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ap1, ap2)
|
env = stix2.Environment().object_similarity(ap1, ap2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_campaign1():
|
def test_object_similarity_on_same_campaign1():
|
||||||
camp1 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
|
camp1 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
|
||||||
camp2 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
|
camp2 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(camp1, camp2)
|
env = stix2.Environment().object_similarity(camp1, camp2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_campaign2():
|
def test_object_similarity_on_same_campaign2():
|
||||||
CAMP_KWARGS = dict(
|
CAMP_KWARGS = dict(
|
||||||
name="Green Group Attacks Against Finance",
|
name="Green Group Attacks Against Finance",
|
||||||
description="Campaign by Green Group against a series of targets in the financial services sector.",
|
description="Campaign by Green Group against a series of targets in the financial services sector.",
|
||||||
|
@ -464,18 +475,18 @@ def test_semantic_equivalence_on_same_campaign2():
|
||||||
)
|
)
|
||||||
camp1 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMP_KWARGS)
|
camp1 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMP_KWARGS)
|
||||||
camp2 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMP_KWARGS)
|
camp2 = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMP_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(camp1, camp2)
|
env = stix2.Environment().object_similarity(camp1, camp2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_identity1():
|
def test_object_similarity_on_same_identity1():
|
||||||
iden1 = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
|
iden1 = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
|
||||||
iden2 = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
|
iden2 = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(iden1, iden2)
|
env = stix2.Environment().object_similarity(iden1, iden2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_identity2():
|
def test_object_similarity_on_same_identity2():
|
||||||
IDEN_KWARGS = dict(
|
IDEN_KWARGS = dict(
|
||||||
name="John Smith",
|
name="John Smith",
|
||||||
identity_class="individual",
|
identity_class="individual",
|
||||||
|
@ -483,26 +494,26 @@ def test_semantic_equivalence_on_same_identity2():
|
||||||
)
|
)
|
||||||
iden1 = stix2.v21.Identity(id=IDENTITY_ID, **IDEN_KWARGS)
|
iden1 = stix2.v21.Identity(id=IDENTITY_ID, **IDEN_KWARGS)
|
||||||
iden2 = stix2.v21.Identity(id=IDENTITY_ID, **IDEN_KWARGS)
|
iden2 = stix2.v21.Identity(id=IDENTITY_ID, **IDEN_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(iden1, iden2)
|
env = stix2.Environment().object_similarity(iden1, iden2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_indicator():
|
def test_object_similarity_on_same_indicator():
|
||||||
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
||||||
ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ind1, ind2)
|
env = stix2.Environment().object_similarity(ind1, ind2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_location1():
|
def test_object_similarity_on_same_location1():
|
||||||
location_kwargs = dict(latitude=45, longitude=179)
|
location_kwargs = dict(latitude=45, longitude=179)
|
||||||
loc1 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
loc1 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
||||||
loc2 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
loc2 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
||||||
env = stix2.Environment().semantically_equivalent(loc1, loc2)
|
env = stix2.Environment().object_similarity(loc1, loc2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_location2():
|
def test_object_similarity_on_same_location2():
|
||||||
location_kwargs = dict(
|
location_kwargs = dict(
|
||||||
latitude=38.889,
|
latitude=38.889,
|
||||||
longitude=-77.023,
|
longitude=-77.023,
|
||||||
|
@ -511,33 +522,33 @@ def test_semantic_equivalence_on_same_location2():
|
||||||
)
|
)
|
||||||
loc1 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
loc1 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
||||||
loc2 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
loc2 = stix2.v21.Location(id=LOCATION_ID, **location_kwargs)
|
||||||
env = stix2.Environment().semantically_equivalent(loc1, loc2)
|
env = stix2.Environment().object_similarity(loc1, loc2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_location_with_no_latlong():
|
def test_object_similarity_location_with_no_latlong():
|
||||||
loc_kwargs = dict(country="US", administrative_area="US-DC")
|
loc_kwargs = dict(country="US", administrative_area="US-DC")
|
||||||
loc1 = stix2.v21.Location(id=LOCATION_ID, **LOCATION_KWARGS)
|
loc1 = stix2.v21.Location(id=LOCATION_ID, **LOCATION_KWARGS)
|
||||||
loc2 = stix2.v21.Location(id=LOCATION_ID, **loc_kwargs)
|
loc2 = stix2.v21.Location(id=LOCATION_ID, **loc_kwargs)
|
||||||
env = stix2.Environment().semantically_equivalent(loc1, loc2)
|
env = stix2.Environment().object_similarity(loc1, loc2)
|
||||||
assert round(env) != 100
|
assert round(env) != 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_malware():
|
def test_object_similarity_on_same_malware():
|
||||||
malw1 = stix2.v21.Malware(id=MALWARE_ID, **MALWARE_KWARGS)
|
malw1 = stix2.v21.Malware(id=MALWARE_ID, **MALWARE_KWARGS)
|
||||||
malw2 = stix2.v21.Malware(id=MALWARE_ID, **MALWARE_KWARGS)
|
malw2 = stix2.v21.Malware(id=MALWARE_ID, **MALWARE_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(malw1, malw2)
|
env = stix2.Environment().object_similarity(malw1, malw2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_threat_actor1():
|
def test_object_similarity_on_same_threat_actor1():
|
||||||
ta1 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_ACTOR_KWARGS)
|
ta1 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_ACTOR_KWARGS)
|
||||||
ta2 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_ACTOR_KWARGS)
|
ta2 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_ACTOR_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ta1, ta2)
|
env = stix2.Environment().object_similarity(ta1, ta2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_threat_actor2():
|
def test_object_similarity_on_same_threat_actor2():
|
||||||
THREAT_KWARGS = dict(
|
THREAT_KWARGS = dict(
|
||||||
threat_actor_types=["crime-syndicate"],
|
threat_actor_types=["crime-syndicate"],
|
||||||
aliases=["super-evil"],
|
aliases=["super-evil"],
|
||||||
|
@ -545,25 +556,38 @@ def test_semantic_equivalence_on_same_threat_actor2():
|
||||||
)
|
)
|
||||||
ta1 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_KWARGS)
|
ta1 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_KWARGS)
|
||||||
ta2 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_KWARGS)
|
ta2 = stix2.v21.ThreatActor(id=THREAT_ACTOR_ID, **THREAT_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ta1, ta2)
|
env = stix2.Environment().object_similarity(ta1, ta2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_tool():
|
def test_object_similarity_on_same_tool():
|
||||||
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
||||||
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(tool1, tool2)
|
env = stix2.Environment().object_similarity(tool1, tool2)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_vulnerability1():
|
def test_object_similarity_on_same_vulnerability1():
|
||||||
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
||||||
vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(vul1, vul2)
|
prop_scores = {}
|
||||||
|
env = stix2.Environment().object_similarity(vul1, vul2, prop_scores)
|
||||||
assert round(env) == 100
|
assert round(env) == 100
|
||||||
|
assert round(prop_scores["matching_score"]) == 30
|
||||||
|
assert round(prop_scores["sum_weights"]) == 30
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_same_vulnerability2():
|
def test_object_equivalence_on_same_vulnerability1():
|
||||||
|
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
||||||
|
vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
||||||
|
prop_scores = {}
|
||||||
|
env = stix2.Environment().object_equivalence(vul1, vul2, prop_scores)
|
||||||
|
assert env is True
|
||||||
|
assert round(prop_scores["matching_score"]) == 30
|
||||||
|
assert round(prop_scores["sum_weights"]) == 30
|
||||||
|
|
||||||
|
|
||||||
|
def test_object_similarity_on_same_vulnerability2():
|
||||||
VULN_KWARGS1 = dict(
|
VULN_KWARGS1 = dict(
|
||||||
name="Heartbleed",
|
name="Heartbleed",
|
||||||
external_references=[
|
external_references=[
|
||||||
|
@ -584,11 +608,42 @@ def test_semantic_equivalence_on_same_vulnerability2():
|
||||||
)
|
)
|
||||||
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS1)
|
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS1)
|
||||||
vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS2)
|
vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS2)
|
||||||
env = stix2.Environment().semantically_equivalent(vul1, vul2)
|
prop_scores = {}
|
||||||
|
env = stix2.Environment().object_similarity(vul1, vul2, prop_scores)
|
||||||
assert round(env) == 0.0
|
assert round(env) == 0.0
|
||||||
|
assert round(prop_scores["matching_score"]) == 0
|
||||||
|
assert round(prop_scores["sum_weights"]) == 100
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_on_unknown_object():
|
def test_object_equivalence_on_same_vulnerability2():
|
||||||
|
VULN_KWARGS1 = dict(
|
||||||
|
name="Heartbleed",
|
||||||
|
external_references=[
|
||||||
|
{
|
||||||
|
"url": "https://example",
|
||||||
|
"source_name": "some-source",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
VULN_KWARGS2 = dict(
|
||||||
|
name="Foo",
|
||||||
|
external_references=[
|
||||||
|
{
|
||||||
|
"url": "https://example2",
|
||||||
|
"source_name": "some-source2",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS1)
|
||||||
|
vul2 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULN_KWARGS2)
|
||||||
|
prop_scores = {}
|
||||||
|
env = stix2.Environment().object_equivalence(vul1, vul2, prop_scores)
|
||||||
|
assert env is False
|
||||||
|
assert round(prop_scores["matching_score"]) == 0
|
||||||
|
assert round(prop_scores["sum_weights"]) == 100
|
||||||
|
|
||||||
|
|
||||||
|
def test_object_similarity_on_unknown_object():
|
||||||
CUSTOM_KWARGS1 = dict(
|
CUSTOM_KWARGS1 = dict(
|
||||||
type="x-foobar",
|
type="x-foobar",
|
||||||
id="x-foobar--0c7b5b88-8ff7-4a4d-aa9d-feb398cd0061",
|
id="x-foobar--0c7b5b88-8ff7-4a4d-aa9d-feb398cd0061",
|
||||||
|
@ -615,17 +670,17 @@ def test_semantic_equivalence_on_unknown_object():
|
||||||
def _x_foobar_checks(obj1, obj2, **weights):
|
def _x_foobar_checks(obj1, obj2, **weights):
|
||||||
matching_score = 0.0
|
matching_score = 0.0
|
||||||
sum_weights = 0.0
|
sum_weights = 0.0
|
||||||
if stix2.environment.check_property_present("external_references", obj1, obj2):
|
if stix2.equivalence.object.check_property_present("external_references", obj1, obj2):
|
||||||
w = weights["external_references"]
|
w = weights["external_references"]
|
||||||
sum_weights += w
|
sum_weights += w
|
||||||
matching_score += w * stix2.environment.partial_external_reference_based(
|
matching_score += w * stix2.equivalence.object.partial_external_reference_based(
|
||||||
obj1["external_references"],
|
obj1["external_references"],
|
||||||
obj2["external_references"],
|
obj2["external_references"],
|
||||||
)
|
)
|
||||||
if stix2.environment.check_property_present("name", obj1, obj2):
|
if stix2.equivalence.object.check_property_present("name", obj1, obj2):
|
||||||
w = weights["name"]
|
w = weights["name"]
|
||||||
sum_weights += w
|
sum_weights += w
|
||||||
matching_score += w * stix2.environment.partial_string_based(obj1["name"], obj2["name"])
|
matching_score += w * stix2.equivalence.object.partial_string_based(obj1["name"], obj2["name"])
|
||||||
return matching_score, sum_weights
|
return matching_score, sum_weights
|
||||||
|
|
||||||
weights = {
|
weights = {
|
||||||
|
@ -640,20 +695,20 @@ def test_semantic_equivalence_on_unknown_object():
|
||||||
}
|
}
|
||||||
cust1 = stix2.parse(CUSTOM_KWARGS1, allow_custom=True)
|
cust1 = stix2.parse(CUSTOM_KWARGS1, allow_custom=True)
|
||||||
cust2 = stix2.parse(CUSTOM_KWARGS2, allow_custom=True)
|
cust2 = stix2.parse(CUSTOM_KWARGS2, allow_custom=True)
|
||||||
env = stix2.Environment().semantically_equivalent(cust1, cust2, **weights)
|
env = stix2.Environment().object_similarity(cust1, cust2, **weights)
|
||||||
assert round(env) == 0
|
assert round(env) == 0
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_different_type_raises():
|
def test_object_similarity_different_type_raises():
|
||||||
with pytest.raises(ValueError) as excinfo:
|
with pytest.raises(ValueError) as excinfo:
|
||||||
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
vul1 = stix2.v21.Vulnerability(id=VULNERABILITY_ID, **VULNERABILITY_KWARGS)
|
||||||
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
||||||
stix2.Environment().semantically_equivalent(vul1, ind1)
|
stix2.Environment().object_similarity(vul1, ind1)
|
||||||
|
|
||||||
assert str(excinfo.value) == "The objects to compare must be of the same type!"
|
assert str(excinfo.value) == "The objects to compare must be of the same type!"
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_different_spec_version_raises():
|
def test_object_similarity_different_spec_version_raises():
|
||||||
with pytest.raises(ValueError) as excinfo:
|
with pytest.raises(ValueError) as excinfo:
|
||||||
V20_KWARGS = dict(
|
V20_KWARGS = dict(
|
||||||
labels=['malicious-activity'],
|
labels=['malicious-activity'],
|
||||||
|
@ -661,23 +716,24 @@ def test_semantic_equivalence_different_spec_version_raises():
|
||||||
)
|
)
|
||||||
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
||||||
ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **V20_KWARGS)
|
ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **V20_KWARGS)
|
||||||
stix2.Environment().semantically_equivalent(ind1, ind2)
|
stix2.Environment().object_similarity(ind1, ind2)
|
||||||
|
|
||||||
assert str(excinfo.value) == "The objects to compare must be of the same spec version!"
|
assert str(excinfo.value) == "The objects to compare must be of the same spec version!"
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_zero_match():
|
def test_object_similarity_zero_match():
|
||||||
IND_KWARGS = dict(
|
IND_KWARGS = dict(
|
||||||
indicator_types=["APTX"],
|
indicator_types=["malicious-activity", "bar"],
|
||||||
pattern="[ipv4-addr:value = '192.168.1.1']",
|
pattern="[ipv4-addr:value = '192.168.1.1']",
|
||||||
pattern_type="stix",
|
pattern_type="stix",
|
||||||
valid_from="2019-01-01T12:34:56Z",
|
valid_from="2019-01-01T12:34:56Z",
|
||||||
|
labels=["APTX", "foo"],
|
||||||
)
|
)
|
||||||
weights = {
|
weights = {
|
||||||
"indicator": {
|
"indicator": {
|
||||||
"indicator_types": (15, stix2.environment.partial_list_based),
|
"indicator_types": (15, stix2.equivalence.object.partial_list_based),
|
||||||
"pattern": (80, stix2.environment.custom_pattern_based),
|
"pattern": (80, stix2.equivalence.object.custom_pattern_based),
|
||||||
"valid_from": (5, stix2.environment.partial_timestamp_based),
|
"valid_from": (5, stix2.equivalence.object.partial_timestamp_based),
|
||||||
"tdelta": 1, # One day interval
|
"tdelta": 1, # One day interval
|
||||||
},
|
},
|
||||||
"_internal": {
|
"_internal": {
|
||||||
|
@ -686,29 +742,31 @@ def test_semantic_equivalence_zero_match():
|
||||||
}
|
}
|
||||||
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
||||||
ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **IND_KWARGS)
|
ind2 = stix2.v21.Indicator(id=INDICATOR_ID, **IND_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ind1, ind2, **weights)
|
env = stix2.Environment().object_similarity(ind1, ind2, **weights)
|
||||||
assert round(env) == 0
|
assert round(env) == 8
|
||||||
|
env = stix2.Environment().object_similarity(ind2, ind1, **weights)
|
||||||
|
assert round(env) == 8
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_different_spec_version():
|
def test_object_similarity_different_spec_version():
|
||||||
IND_KWARGS = dict(
|
IND_KWARGS = dict(
|
||||||
labels=["APTX"],
|
labels=["APTX"],
|
||||||
pattern="[ipv4-addr:value = '192.168.1.1']",
|
pattern="[ipv4-addr:value = '192.168.1.1']",
|
||||||
)
|
)
|
||||||
weights = {
|
weights = {
|
||||||
"indicator": {
|
"indicator": {
|
||||||
"indicator_types": (15, stix2.environment.partial_list_based),
|
"indicator_types": (15, stix2.equivalence.object.partial_list_based),
|
||||||
"pattern": (80, stix2.environment.custom_pattern_based),
|
"pattern": (80, stix2.equivalence.object.custom_pattern_based),
|
||||||
"valid_from": (5, stix2.environment.partial_timestamp_based),
|
"valid_from": (5, stix2.equivalence.object.partial_timestamp_based),
|
||||||
"tdelta": 1, # One day interval
|
"tdelta": 1, # One day interval
|
||||||
},
|
},
|
||||||
"_internal": {
|
|
||||||
"ignore_spec_version": True, # Disables spec_version check.
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
ind1 = stix2.v21.Indicator(id=INDICATOR_ID, **INDICATOR_KWARGS)
|
||||||
ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **IND_KWARGS)
|
ind2 = stix2.v20.Indicator(id=INDICATOR_ID, **IND_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(ind1, ind2, **weights)
|
env = stix2.Environment().object_similarity(ind1, ind2, ignore_spec_version=True, **weights)
|
||||||
|
assert round(env) == 0
|
||||||
|
|
||||||
|
env = stix2.Environment().object_similarity(ind2, ind1, ignore_spec_version=True, **weights)
|
||||||
assert round(env) == 0
|
assert round(env) == 0
|
||||||
|
|
||||||
|
|
||||||
|
@ -780,34 +838,36 @@ def test_semantic_equivalence_different_spec_version():
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
def test_semantic_equivalence_external_references(refs1, refs2, ret_val):
|
def test_object_similarity_external_references(refs1, refs2, ret_val):
|
||||||
value = stix2.environment.partial_external_reference_based(refs1, refs2)
|
value = stix2.equivalence.object.partial_external_reference_based(refs1, refs2)
|
||||||
assert value == ret_val
|
assert value == ret_val
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_timestamp():
|
def test_object_similarity_timestamp():
|
||||||
t1 = "2018-10-17T00:14:20.652Z"
|
t1 = "2018-10-17T00:14:20.652Z"
|
||||||
t2 = "2018-10-17T12:14:20.652Z"
|
t2 = "2018-10-17T12:14:20.652Z"
|
||||||
assert stix2.environment.partial_timestamp_based(t1, t2, 1) == 0.5
|
assert stix2.equivalence.object.partial_timestamp_based(t1, t2, 1) == 0.5
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_exact_match():
|
def test_object_similarity_exact_match():
|
||||||
t1 = "2018-10-17T00:14:20.652Z"
|
t1 = "2018-10-17T00:14:20.652Z"
|
||||||
t2 = "2018-10-17T12:14:20.652Z"
|
t2 = "2018-10-17T12:14:20.652Z"
|
||||||
assert stix2.environment.exact_match(t1, t2) == 0.0
|
assert stix2.equivalence.object.exact_match(t1, t2) == 0.0
|
||||||
|
|
||||||
|
|
||||||
def test_non_existent_config_for_object():
|
def test_no_datastore_fallsback_list_based_check_for_refs_check():
|
||||||
r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
|
r1 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
|
||||||
r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
|
r2 = stix2.v21.Report(id=REPORT_ID, **REPORT_KWARGS)
|
||||||
assert stix2.Environment().semantically_equivalent(r1, r2) == 0.0
|
prop_scores = {}
|
||||||
|
assert stix2.Environment().object_similarity(r1, r2, prop_scores) == 100.0
|
||||||
|
assert prop_scores["object_refs"]["check_type"] == "partial_list_based"
|
||||||
|
|
||||||
|
|
||||||
def custom_semantic_equivalence_method(obj1, obj2, **weights):
|
def custom_semantic_equivalence_method(obj1, obj2, **weights):
|
||||||
return 96.0, 100.0
|
return 96.0, 100.0
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_method_provided():
|
def test_object_similarity_method_provided():
|
||||||
# Because `method` is provided, `partial_list_based` will be ignored
|
# Because `method` is provided, `partial_list_based` will be ignored
|
||||||
TOOL2_KWARGS = dict(
|
TOOL2_KWARGS = dict(
|
||||||
name="Random Software",
|
name="Random Software",
|
||||||
|
@ -816,19 +876,19 @@ def test_semantic_equivalence_method_provided():
|
||||||
|
|
||||||
weights = {
|
weights = {
|
||||||
"tool": {
|
"tool": {
|
||||||
"tool_types": (20, stix2.environment.partial_list_based),
|
"tool_types": (20, stix2.equivalence.object.partial_list_based),
|
||||||
"name": (80, stix2.environment.partial_string_based),
|
"name": (80, stix2.equivalence.object.partial_string_based),
|
||||||
"method": custom_semantic_equivalence_method,
|
"method": custom_semantic_equivalence_method,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
||||||
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
|
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(tool1, tool2, **weights)
|
env = stix2.Environment().object_similarity(tool1, tool2, **weights)
|
||||||
assert round(env) == 96
|
assert round(env) == 96
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_prop_scores():
|
def test_object_similarity_prop_scores():
|
||||||
TOOL2_KWARGS = dict(
|
TOOL2_KWARGS = dict(
|
||||||
name="Random Software",
|
name="Random Software",
|
||||||
tool_types=["information-gathering"],
|
tool_types=["information-gathering"],
|
||||||
|
@ -838,7 +898,7 @@ def test_semantic_equivalence_prop_scores():
|
||||||
|
|
||||||
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
||||||
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
|
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
|
||||||
stix2.Environment().semantically_equivalent(tool1, tool2, prop_scores)
|
stix2.Environment().object_similarity(tool1, tool2, prop_scores)
|
||||||
assert len(prop_scores) == 4
|
assert len(prop_scores) == 4
|
||||||
assert round(prop_scores["matching_score"], 1) == 8.9
|
assert round(prop_scores["matching_score"], 1) == 8.9
|
||||||
assert round(prop_scores["sum_weights"], 1) == 100.0
|
assert round(prop_scores["sum_weights"], 1) == 100.0
|
||||||
|
@ -850,7 +910,7 @@ def custom_semantic_equivalence_method_prop_scores(obj1, obj2, prop_scores, **we
|
||||||
return 96.0, 100.0
|
return 96.0, 100.0
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_equivalence_prop_scores_method_provided():
|
def test_object_similarity_prop_scores_method_provided():
|
||||||
TOOL2_KWARGS = dict(
|
TOOL2_KWARGS = dict(
|
||||||
name="Random Software",
|
name="Random Software",
|
||||||
tool_types=["information-gathering"],
|
tool_types=["information-gathering"],
|
||||||
|
@ -868,7 +928,7 @@ def test_semantic_equivalence_prop_scores_method_provided():
|
||||||
|
|
||||||
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
tool1 = stix2.v21.Tool(id=TOOL_ID, **TOOL_KWARGS)
|
||||||
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
|
tool2 = stix2.v21.Tool(id=TOOL_ID, **TOOL2_KWARGS)
|
||||||
env = stix2.Environment().semantically_equivalent(tool1, tool2, prop_scores, **weights)
|
env = stix2.Environment().object_similarity(tool1, tool2, prop_scores, **weights)
|
||||||
assert round(env) == 96
|
assert round(env) == 96
|
||||||
assert len(prop_scores) == 2
|
assert len(prop_scores) == 2
|
||||||
assert prop_scores["matching_score"] == 96.0
|
assert prop_scores["matching_score"] == 96.0
|
||||||
|
@ -876,7 +936,8 @@ def test_semantic_equivalence_prop_scores_method_provided():
|
||||||
|
|
||||||
|
|
||||||
def test_versioned_checks(ds, ds2):
|
def test_versioned_checks(ds, ds2):
|
||||||
weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
|
# Testing internal method
|
||||||
|
weights = stix2.equivalence.graph.WEIGHTS.copy()
|
||||||
weights.update({
|
weights.update({
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": True,
|
"ignore_spec_version": True,
|
||||||
|
@ -889,7 +950,7 @@ def test_versioned_checks(ds, ds2):
|
||||||
|
|
||||||
|
|
||||||
def test_semantic_check_with_versioning(ds, ds2):
|
def test_semantic_check_with_versioning(ds, ds2):
|
||||||
weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
|
weights = stix2.equivalence.graph.WEIGHTS.copy()
|
||||||
weights.update({
|
weights.update({
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": False,
|
"ignore_spec_version": False,
|
||||||
|
@ -920,7 +981,7 @@ def test_semantic_check_with_versioning(ds, ds2):
|
||||||
|
|
||||||
|
|
||||||
def test_list_semantic_check(ds, ds2):
|
def test_list_semantic_check(ds, ds2):
|
||||||
weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
|
weights = stix2.equivalence.graph.WEIGHTS.copy()
|
||||||
weights.update({
|
weights.update({
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": False,
|
"ignore_spec_version": False,
|
||||||
|
@ -955,63 +1016,272 @@ def test_list_semantic_check(ds, ds2):
|
||||||
)
|
)
|
||||||
assert round(score) == 1
|
assert round(score) == 1
|
||||||
|
|
||||||
|
score = stix2.equivalence.object.list_reference_check(
|
||||||
|
object_refs2,
|
||||||
|
object_refs1,
|
||||||
|
ds2,
|
||||||
|
ds,
|
||||||
|
**weights,
|
||||||
|
)
|
||||||
|
assert round(score) == 1
|
||||||
|
|
||||||
def test_graph_equivalence_with_filesystem_source(ds):
|
|
||||||
weights = {
|
def test_graph_similarity_raises_value_error(ds):
|
||||||
"_internal": {
|
with pytest.raises(ValueError):
|
||||||
"ignore_spec_version": True,
|
prop_scores1 = {}
|
||||||
"versioning_checks": False,
|
stix2.Environment().graph_similarity(ds, ds2, prop_scores1, max_depth=-1)
|
||||||
"max_depth": 1,
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_filesystem_source(ds, fs):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_similarity(
|
||||||
|
fs, ds, prop_scores1,
|
||||||
|
ignore_spec_version=True,
|
||||||
|
versioning_checks=False,
|
||||||
|
max_depth=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_similarity(
|
||||||
|
ds, fs, prop_scores2,
|
||||||
|
ignore_spec_version=True,
|
||||||
|
versioning_checks=False,
|
||||||
|
max_depth=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert round(env1) == 23
|
||||||
|
assert round(prop_scores1["matching_score"]) == 411
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 18
|
||||||
|
|
||||||
|
assert round(env2) == 23
|
||||||
|
assert round(prop_scores2["matching_score"]) == 411
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 18
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_depth_limiting():
|
||||||
|
g1 = [
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1986-02-08T00:20:17Z",
|
||||||
|
"modified": "1989-12-11T06:54:29Z",
|
||||||
|
"some1_ref": "foo--700a8a3c-9936-412f-b4eb-ede466476180",
|
||||||
|
"some2_ref": "foo--f4a999a3-df94-499d-9cac-6c02e21775ee",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--700a8a3c-9936-412f-b4eb-ede466476180",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1989-01-06T10:31:54Z",
|
||||||
|
"modified": "1995-06-18T10:25:01Z",
|
||||||
|
"some1_ref": "foo--705afd45-eb56-43fc-a214-313d63d199a3",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--705afd45-eb56-43fc-a214-313d63d199a3",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1977-11-06T21:19:29Z",
|
||||||
|
"modified": "1997-12-02T20:33:34Z",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--f4a999a3-df94-499d-9cac-6c02e21775ee",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1991-09-17T00:40:52Z",
|
||||||
|
"modified": "1992-12-06T11:02:47Z",
|
||||||
|
"name": "alice",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
g2 = [
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--71570479-3e6e-48d2-81fb-897454dec55d",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1975-12-22T05:20:38Z",
|
||||||
|
"modified": "1980-11-11T01:09:03Z",
|
||||||
|
"some1_ref": "foo--4aeda39b-31fa-4ffb-a847-d8edc175a579",
|
||||||
|
"some2_ref": "foo--941e48d6-3100-4419-9e8c-cf1eb59e71b2",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--4aeda39b-31fa-4ffb-a847-d8edc175a579",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1976-01-05T08:32:03Z",
|
||||||
|
"modified": "1980-11-09T05:41:02Z",
|
||||||
|
"some1_ref": "foo--689252c3-5d20-43ff-bbf7-c8e45d713768",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--689252c3-5d20-43ff-bbf7-c8e45d713768",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1974-09-11T18:56:30Z",
|
||||||
|
"modified": "1976-10-31T11:59:43Z",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "foo",
|
||||||
|
"id": "foo--941e48d6-3100-4419-9e8c-cf1eb59e71b2",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1985-01-03T01:07:03Z",
|
||||||
|
"modified": "1992-07-20T21:32:31Z",
|
||||||
|
"name": "alice",
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
mem_store1 = stix2.MemorySource(g1)
|
||||||
|
mem_store2 = stix2.MemorySource(g2)
|
||||||
|
|
||||||
|
custom_weights = {
|
||||||
|
"foo": {
|
||||||
|
"some1_ref": (33, stix2.equivalence.object.reference_check),
|
||||||
|
"some2_ref": (33, stix2.equivalence.object.reference_check),
|
||||||
|
"name": (34, stix2.equivalence.object.partial_string_based),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.equivalence.graph.graph_similarity(
|
||||||
|
mem_store1, mem_store2, prop_scores1, **custom_weights
|
||||||
|
)
|
||||||
|
|
||||||
|
assert round(env1) == 38
|
||||||
|
assert round(prop_scores1["matching_score"]) == 300
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 8
|
||||||
|
# from 'alice' check in de-reference
|
||||||
|
assert prop_scores1['summary']['foo--71570479-3e6e-48d2-81fb-897454dec55d']['prop_score']['some2_ref']['weight'] == 33
|
||||||
|
assert prop_scores1['summary']['foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd']['prop_score']['some2_ref']['weight'] == 33
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.equivalence.graph.graph_similarity(
|
||||||
|
mem_store2, mem_store1, prop_scores2, **custom_weights
|
||||||
|
)
|
||||||
|
|
||||||
|
assert round(env2) == 38
|
||||||
|
assert round(prop_scores2["matching_score"]) == 300
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 8
|
||||||
|
# from 'alice' check in de-reference
|
||||||
|
assert prop_scores2['summary']['foo--71570479-3e6e-48d2-81fb-897454dec55d']['prop_score']['some2_ref']['weight'] == 33
|
||||||
|
assert prop_scores2['summary']['foo--07f9dd2a-1cce-45bb-8cbe-dba3f007aafd']['prop_score']['some2_ref']['weight'] == 33
|
||||||
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_duplicate_graph(ds):
|
||||||
prop_scores = {}
|
prop_scores = {}
|
||||||
fs = stix2.FileSystemSource(FS_PATH)
|
env = stix2.Environment().graph_similarity(ds, ds, prop_scores)
|
||||||
env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights)
|
assert round(env) == 100
|
||||||
assert round(env) == 24
|
assert round(prop_scores["matching_score"]) == 800
|
||||||
assert round(prop_scores["matching_score"]) == 122
|
assert round(prop_scores["len_pairs"]) == 8
|
||||||
assert round(prop_scores["sum_weights"]) == 500
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_versioning_check_on(ds2, ds):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1, versioning_checks=True)
|
||||||
|
assert round(env1) == 88
|
||||||
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2, versioning_checks=True)
|
||||||
|
assert round(env2) == 88
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_graph_similarity_with_versioning_check_off(ds2, ds):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_similarity(ds, ds2, prop_scores1)
|
||||||
|
assert round(env1) == 88
|
||||||
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_similarity(ds2, ds, prop_scores2)
|
||||||
|
assert round(env2) == 88
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
|
def test_graph_equivalence_with_filesystem_source(ds, fs):
|
||||||
|
prop_scores1 = {}
|
||||||
|
env1 = stix2.Environment().graph_equivalence(fs, ds, prop_scores1, ignore_spec_version=True)
|
||||||
|
|
||||||
|
# Switching parameters
|
||||||
|
prop_scores2 = {}
|
||||||
|
env2 = stix2.Environment().graph_equivalence(ds, fs, prop_scores2, ignore_spec_version=True)
|
||||||
|
|
||||||
|
assert env1 is False
|
||||||
|
assert round(prop_scores1["matching_score"]) == 411
|
||||||
|
assert round(prop_scores1["len_pairs"]) == 18
|
||||||
|
|
||||||
|
assert env2 is False
|
||||||
|
assert round(prop_scores2["matching_score"]) == 411
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 18
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_duplicate_graph(ds):
|
def test_graph_equivalence_with_duplicate_graph(ds):
|
||||||
weights = {
|
|
||||||
"_internal": {
|
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": False,
|
|
||||||
"max_depth": 1,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
prop_scores = {}
|
prop_scores = {}
|
||||||
env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights)
|
env = stix2.Environment().graph_equivalence(ds, ds, prop_scores)
|
||||||
assert round(env) == 100
|
assert env is True
|
||||||
assert round(prop_scores["matching_score"]) == 800
|
assert round(prop_scores["matching_score"]) == 800
|
||||||
assert round(prop_scores["sum_weights"]) == 800
|
assert round(prop_scores["len_pairs"]) == 8
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_versioning_check_on(ds2, ds):
|
def test_graph_equivalence_with_versioning_check_on(ds2, ds):
|
||||||
weights = {
|
prop_scores1 = {}
|
||||||
"_internal": {
|
env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1, versioning_checks=True)
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": True,
|
# Switching parameters
|
||||||
"max_depth": 1,
|
prop_scores2 = {}
|
||||||
},
|
env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2, versioning_checks=True)
|
||||||
}
|
|
||||||
prop_scores = {}
|
assert env1 is True
|
||||||
env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
assert round(env) == 93
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
assert round(prop_scores["matching_score"]) == 745
|
|
||||||
assert round(prop_scores["sum_weights"]) == 800
|
assert env2 is True
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
||||||
|
|
||||||
def test_graph_equivalence_with_versioning_check_off(ds2, ds):
|
def test_graph_equivalence_with_versioning_check_off(ds2, ds):
|
||||||
weights = {
|
prop_scores1 = {}
|
||||||
"_internal": {
|
env1 = stix2.Environment().graph_equivalence(ds, ds2, prop_scores1)
|
||||||
"ignore_spec_version": False,
|
|
||||||
"versioning_checks": False,
|
# Switching parameters
|
||||||
"max_depth": 1,
|
prop_scores2 = {}
|
||||||
},
|
env2 = stix2.Environment().graph_equivalence(ds2, ds, prop_scores2)
|
||||||
}
|
|
||||||
prop_scores = {}
|
assert env1 is True
|
||||||
env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
|
assert round(prop_scores1["matching_score"]) == 789
|
||||||
assert round(env) == 93
|
assert round(prop_scores1["len_pairs"]) == 9
|
||||||
assert round(prop_scores["matching_score"]) == 745
|
|
||||||
assert round(prop_scores["sum_weights"]) == 800
|
assert env2 is True
|
||||||
|
assert round(prop_scores2["matching_score"]) == 789
|
||||||
|
assert round(prop_scores2["len_pairs"]) == 9
|
||||||
|
|
||||||
|
prop_scores1["matching_score"] = round(prop_scores1["matching_score"], 3)
|
||||||
|
prop_scores2["matching_score"] = round(prop_scores2["matching_score"], 3)
|
||||||
|
assert json.dumps(prop_scores1, sort_keys=True, indent=4) == json.dumps(prop_scores2, sort_keys=True, indent=4)
|
||||||
|
|
|
@ -4,6 +4,7 @@ import pytest
|
||||||
|
|
||||||
import stix2
|
import stix2
|
||||||
import stix2.exceptions
|
import stix2.exceptions
|
||||||
|
import stix2.properties
|
||||||
import stix2.utils
|
import stix2.utils
|
||||||
import stix2.v21
|
import stix2.v21
|
||||||
import stix2.versioning
|
import stix2.versioning
|
||||||
|
@ -179,6 +180,62 @@ def test_versioning_error_dict_bad_modified_value():
|
||||||
"but have the same id and modified timestamp do not have defined consumer behavior."
|
"but have the same id and modified timestamp do not have defined consumer behavior."
|
||||||
|
|
||||||
|
|
||||||
|
def test_versioning_dict_unregistered_no_modified():
|
||||||
|
d = {
|
||||||
|
"type": "not-registered",
|
||||||
|
"id": "not-registered--4da54535-47b7-468c-88fa-d13b04033c4b",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"created": "1995-04-07T15:37:48.178Z",
|
||||||
|
}
|
||||||
|
|
||||||
|
new_d = stix2.versioning.new_version(d)
|
||||||
|
assert "modified" in new_d
|
||||||
|
assert new_d["modified"] > stix2.utils.parse_into_datetime(d["created"])
|
||||||
|
|
||||||
|
new_d = stix2.versioning.new_version(d, modified="1996-11-20T01:19:29.134Z")
|
||||||
|
assert new_d["modified"] == "1996-11-20T01:19:29.134Z"
|
||||||
|
|
||||||
|
|
||||||
|
def test_versioning_dict_unregistered_unversionable():
|
||||||
|
d = {
|
||||||
|
"type": "not-registered",
|
||||||
|
"id": "not-registered--4da54535-47b7-468c-88fa-d13b04033c4b",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"modified": "1995-04-07T15:37:48.178Z",
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.ObjectNotVersionableError):
|
||||||
|
stix2.versioning.new_version(d)
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.ObjectNotVersionableError):
|
||||||
|
# should fail even if we provide a "created" kwarg.
|
||||||
|
stix2.versioning.new_version(d, created="1985-06-29T06:09:51.157Z")
|
||||||
|
|
||||||
|
|
||||||
|
def test_versioning_custom_object():
|
||||||
|
@stix2.v21.CustomObject(
|
||||||
|
"x-versionable-all-optional-21", [
|
||||||
|
("created", stix2.properties.TimestampProperty()),
|
||||||
|
("modified", stix2.properties.TimestampProperty()),
|
||||||
|
("revoked", stix2.properties.BooleanProperty()),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
class CustomSDO:
|
||||||
|
pass
|
||||||
|
|
||||||
|
obj = CustomSDO(created="1990-12-18T17:56:11.346234Z")
|
||||||
|
new_obj = stix2.versioning.new_version(obj)
|
||||||
|
|
||||||
|
assert "modified" in new_obj
|
||||||
|
assert new_obj["modified"] > new_obj["created"]
|
||||||
|
|
||||||
|
obj = CustomSDO()
|
||||||
|
with pytest.raises(stix2.exceptions.ObjectNotVersionableError):
|
||||||
|
# fails due to insufficient properties on the object, even though its
|
||||||
|
# type supports versioning.
|
||||||
|
stix2.versioning.new_version(obj)
|
||||||
|
|
||||||
|
|
||||||
def test_versioning_error_dict_no_modified_value():
|
def test_versioning_error_dict_no_modified_value():
|
||||||
campaign_v1 = {
|
campaign_v1 = {
|
||||||
'type': 'campaign',
|
'type': 'campaign',
|
||||||
|
@ -193,10 +250,10 @@ def test_versioning_error_dict_no_modified_value():
|
||||||
|
|
||||||
def test_making_new_version_invalid_cls():
|
def test_making_new_version_invalid_cls():
|
||||||
campaign_v1 = "This is a campaign."
|
campaign_v1 = "This is a campaign."
|
||||||
with pytest.raises(ValueError) as excinfo:
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError) as excinfo:
|
||||||
stix2.versioning.new_version(campaign_v1, name="fred")
|
stix2.versioning.new_version(campaign_v1, name="fred")
|
||||||
|
|
||||||
assert 'cannot create new version of object of this type' in str(excinfo.value)
|
assert excinfo.value.object is campaign_v1
|
||||||
|
|
||||||
|
|
||||||
def test_revoke_dict():
|
def test_revoke_dict():
|
||||||
|
@ -216,7 +273,7 @@ def test_revoke_dict():
|
||||||
|
|
||||||
def test_revoke_unversionable():
|
def test_revoke_unversionable():
|
||||||
sco = stix2.v21.File(name="data.txt")
|
sco = stix2.v21.File(name="data.txt")
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
sco.revoke()
|
sco.revoke()
|
||||||
|
|
||||||
|
|
||||||
|
@ -318,7 +375,7 @@ def test_version_unversionable_dict():
|
||||||
"name": "data.txt",
|
"name": "data.txt",
|
||||||
}
|
}
|
||||||
|
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
stix2.versioning.new_version(f)
|
stix2.versioning.new_version(f)
|
||||||
|
|
||||||
|
|
||||||
|
@ -345,6 +402,23 @@ def test_version_sco_with_custom():
|
||||||
revoked_obj = stix2.versioning.revoke(new_file_sco_obj)
|
revoked_obj = stix2.versioning.revoke(new_file_sco_obj)
|
||||||
assert revoked_obj.revoked
|
assert revoked_obj.revoked
|
||||||
|
|
||||||
|
# Same thing with a dict
|
||||||
|
d = {
|
||||||
|
"type": "file",
|
||||||
|
"id": "file--d287f10a-98b4-4a47-8fa0-64b12695ea58",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"name": "data.txt",
|
||||||
|
"created": "1973-11-23T02:31:37Z",
|
||||||
|
"modified": "1991-05-13T19:24:57Z",
|
||||||
|
"revoked": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
new_d = stix2.versioning.new_version(d, size=1234)
|
||||||
|
assert new_d["size"] == 1234
|
||||||
|
|
||||||
|
revoked_d = stix2.versioning.revoke(new_d)
|
||||||
|
assert revoked_d["revoked"]
|
||||||
|
|
||||||
|
|
||||||
def test_version_sco_id_contributing_properties():
|
def test_version_sco_id_contributing_properties():
|
||||||
file_sco_obj = stix2.v21.File(
|
file_sco_obj = stix2.v21.File(
|
||||||
|
@ -378,6 +452,33 @@ def test_version_sco_id_contributing_properties_dict():
|
||||||
assert e.value.unchangable_properties == {"name"}
|
assert e.value.unchangable_properties == {"name"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_version_marking():
|
||||||
|
m = stix2.v21.MarkingDefinition(
|
||||||
|
name="a name",
|
||||||
|
created="1982-11-29T12:20:13.723Z",
|
||||||
|
definition_type="statement",
|
||||||
|
definition={"statement": "Copyright (c) 2000-2020 Acme Corp"},
|
||||||
|
)
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
|
stix2.versioning.new_version(m)
|
||||||
|
|
||||||
|
m = {
|
||||||
|
"type": "marking-definition",
|
||||||
|
"id": "marking-definition--2a9f3f6e-5cbd-423b-a40d-02aefd29e612",
|
||||||
|
"spec_version": "2.1",
|
||||||
|
"name": "a name",
|
||||||
|
"created": "1982-11-29T12:20:13.723Z",
|
||||||
|
"definition_type": "statement",
|
||||||
|
"definition": {
|
||||||
|
"statement": "Copyright (c) 2000-2020 Acme Corp",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
with pytest.raises(stix2.exceptions.TypeNotVersionableError):
|
||||||
|
stix2.versioning.new_version(m)
|
||||||
|
|
||||||
|
|
||||||
def test_version_disable_custom():
|
def test_version_disable_custom():
|
||||||
m = stix2.v21.Malware(
|
m = stix2.v21.Malware(
|
||||||
name="foo", description="Steals your identity!", is_family=False,
|
name="foo", description="Steals your identity!", is_family=False,
|
||||||
|
|
|
@ -7,7 +7,6 @@ import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
import pytz
|
import pytz
|
||||||
import six
|
|
||||||
|
|
||||||
import stix2.registry as mappings
|
import stix2.registry as mappings
|
||||||
import stix2.version
|
import stix2.version
|
||||||
|
@ -70,7 +69,7 @@ def _to_enum(value, enum_type, enum_default=None):
|
||||||
if not isinstance(value, enum_type):
|
if not isinstance(value, enum_type):
|
||||||
if value is None and enum_default is not None:
|
if value is None and enum_default is not None:
|
||||||
value = enum_default
|
value = enum_default
|
||||||
elif isinstance(value, six.string_types):
|
elif isinstance(value, str):
|
||||||
value = enum_type[value.upper()]
|
value = enum_type[value.upper()]
|
||||||
else:
|
else:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
|
|
|
@ -3,8 +3,6 @@
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
import six
|
|
||||||
|
|
||||||
from ..custom import _custom_marking_builder
|
from ..custom import _custom_marking_builder
|
||||||
from ..markings import _MarkingsMixin
|
from ..markings import _MarkingsMixin
|
||||||
from ..markings.utils import check_tlp_marking
|
from ..markings.utils import check_tlp_marking
|
||||||
|
@ -21,7 +19,7 @@ def _should_set_millisecond(cr, marking_type):
|
||||||
if marking_type == TLPMarking:
|
if marking_type == TLPMarking:
|
||||||
return True
|
return True
|
||||||
# otherwise, precision is kept from how it was given
|
# otherwise, precision is kept from how it was given
|
||||||
if isinstance(cr, six.string_types):
|
if isinstance(cr, str):
|
||||||
if '.' in cr:
|
if '.' in cr:
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
import itertools
|
import itertools
|
||||||
|
from urllib.parse import quote_plus
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from six.moves.urllib.parse import quote_plus
|
|
||||||
from stix2patterns.validator import run_validator
|
from stix2patterns.validator import run_validator
|
||||||
|
|
||||||
from ..custom import _custom_object_builder
|
from ..custom import _custom_object_builder
|
||||||
|
|
|
@ -9,13 +9,13 @@ import uuid
|
||||||
import stix2.base
|
import stix2.base
|
||||||
import stix2.registry
|
import stix2.registry
|
||||||
from stix2.utils import (
|
from stix2.utils import (
|
||||||
detect_spec_version, get_timestamp, is_sco, is_sdo, is_sro,
|
detect_spec_version, get_timestamp, is_sco, parse_into_datetime,
|
||||||
parse_into_datetime,
|
|
||||||
)
|
)
|
||||||
import stix2.v20
|
import stix2.v20
|
||||||
|
|
||||||
from .exceptions import (
|
from .exceptions import (
|
||||||
InvalidValueError, RevokeError, UnmodifiablePropertyError,
|
InvalidValueError, ObjectNotVersionableError, RevokeError,
|
||||||
|
TypeNotVersionableError, UnmodifiablePropertyError,
|
||||||
)
|
)
|
||||||
|
|
||||||
# STIX object properties that cannot be modified
|
# STIX object properties that cannot be modified
|
||||||
|
@ -56,27 +56,16 @@ def _fudge_modified(old_modified, new_modified, use_stix21):
|
||||||
return new_modified
|
return new_modified
|
||||||
|
|
||||||
|
|
||||||
def _is_versionable(data):
|
def _get_stix_version(data):
|
||||||
"""
|
"""
|
||||||
Determine whether the given object is versionable. This check is done on
|
Bit of factored out functionality for getting/detecting the STIX version
|
||||||
the basis of support for three properties for the object type: "created",
|
of the given value.
|
||||||
"modified", and "revoked". If all three are supported, the object is
|
|
||||||
versionable; otherwise it is not. Dicts must have a "type" property whose
|
|
||||||
value is for a registered object type. This is used to determine a
|
|
||||||
complete set of supported properties for the type.
|
|
||||||
|
|
||||||
Also, detect whether it represents a STIX 2.1 or greater spec version.
|
:param data: An object, e.g. _STIXBase instance or dict
|
||||||
|
:return: The STIX version as a string in "X.Y" notation, or None if the
|
||||||
:param data: The object to check. Must be either a stix object, or a dict
|
version could not be determined.
|
||||||
with a "type" property.
|
|
||||||
:return: A 2-tuple of bools: the first is True if the object is versionable
|
|
||||||
and False if not; the second is True if the object is STIX 2.1+ and
|
|
||||||
False if not.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
is_versionable = False
|
|
||||||
stix_version = None
|
stix_version = None
|
||||||
|
|
||||||
if isinstance(data, Mapping):
|
if isinstance(data, Mapping):
|
||||||
|
|
||||||
# First, determine spec version. It's easy for our stix2 objects; more
|
# First, determine spec version. It's easy for our stix2 objects; more
|
||||||
|
@ -88,36 +77,112 @@ def _is_versionable(data):
|
||||||
elif isinstance(data, dict):
|
elif isinstance(data, dict):
|
||||||
stix_version = detect_spec_version(data)
|
stix_version = detect_spec_version(data)
|
||||||
|
|
||||||
|
return stix_version
|
||||||
|
|
||||||
|
|
||||||
|
def _is_versionable_type(data):
|
||||||
|
"""
|
||||||
|
Determine whether type of the given object is versionable. This check is
|
||||||
|
done on the basis of support for three properties for the object type:
|
||||||
|
"created", "modified", and "revoked". If all three are supported, the
|
||||||
|
object type is versionable; otherwise it is not. Dicts must have a "type"
|
||||||
|
property. This is used in STIX version detection and to determine a
|
||||||
|
complete set of supported properties for the type.
|
||||||
|
|
||||||
|
If a dict is passed whose "type" is unregistered, then this library has no
|
||||||
|
knowledge of the type. It can't determine what properties are "supported".
|
||||||
|
This function will be lax and treat the type as versionable.
|
||||||
|
|
||||||
|
Note that this support check is not sufficient for creating a new object
|
||||||
|
version. Support for the versioning properties does not mean that
|
||||||
|
sufficient properties are actually present on the object.
|
||||||
|
|
||||||
|
Also, detect whether it represents a STIX 2.1 or greater spec version.
|
||||||
|
|
||||||
|
:param data: The object to check. Must be either a stix object, or a dict
|
||||||
|
with a "type" property.
|
||||||
|
:return: A 2-tuple: the first element is True if the object is versionable
|
||||||
|
and False if not; the second is the STIX version as a string in "X.Y"
|
||||||
|
notation.
|
||||||
|
"""
|
||||||
|
|
||||||
|
is_versionable = False
|
||||||
|
stix_version = None
|
||||||
|
|
||||||
|
if isinstance(data, Mapping):
|
||||||
|
# First, determine spec version
|
||||||
|
stix_version = _get_stix_version(data)
|
||||||
|
|
||||||
# Then, determine versionability.
|
# Then, determine versionability.
|
||||||
|
if isinstance(data, stix2.base._STIXBase):
|
||||||
|
is_versionable = _VERSIONING_PROPERTIES.issubset(
|
||||||
|
data._properties,
|
||||||
|
)
|
||||||
|
|
||||||
# This should be sufficient for STIX objects; maybe we get lucky with
|
|
||||||
# dicts here but probably not.
|
|
||||||
if data.keys() >= _VERSIONING_PROPERTIES:
|
|
||||||
is_versionable = True
|
|
||||||
|
|
||||||
# Tougher to handle dicts. We need to consider STIX version, map to a
|
|
||||||
# registered class, and from that get a more complete picture of its
|
|
||||||
# properties.
|
|
||||||
elif isinstance(data, dict):
|
elif isinstance(data, dict):
|
||||||
obj_type = data["type"]
|
# Tougher to handle dicts. We need to consider STIX version,
|
||||||
|
# map to a registered class, and from that get a more complete
|
||||||
|
# picture of its properties.
|
||||||
|
|
||||||
if is_sdo(obj_type, stix_version) or is_sro(obj_type, stix_version):
|
cls = stix2.registry.class_for_type(data.get("type"), stix_version)
|
||||||
# Should we bother checking properties for SDOs/SROs?
|
if cls:
|
||||||
# They were designed to be versionable.
|
|
||||||
is_versionable = True
|
|
||||||
|
|
||||||
elif is_sco(obj_type, stix_version):
|
|
||||||
# but do check SCOs
|
|
||||||
cls = stix2.registry.class_for_type(
|
|
||||||
obj_type, stix_version, "observables",
|
|
||||||
)
|
|
||||||
is_versionable = _VERSIONING_PROPERTIES.issubset(
|
is_versionable = _VERSIONING_PROPERTIES.issubset(
|
||||||
cls._properties,
|
cls._properties,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# The type is not registered, so we have no knowledge of
|
||||||
|
# what properties are supported. Let's be lax and let them
|
||||||
|
# version it.
|
||||||
|
is_versionable = True
|
||||||
|
|
||||||
return is_versionable, stix_version
|
return is_versionable, stix_version
|
||||||
|
|
||||||
|
|
||||||
|
def _check_versionable_object(data):
|
||||||
|
"""
|
||||||
|
Determine whether there are or may be sufficient properties present on
|
||||||
|
an object to allow versioning. Raises an exception if the object can't be
|
||||||
|
versioned.
|
||||||
|
|
||||||
|
Also detect STIX spec version.
|
||||||
|
|
||||||
|
:param data: The object to check, e.g. dict with a "type" property, or
|
||||||
|
_STIXBase instance
|
||||||
|
:return: True if the object is STIX 2.1+, or False if not
|
||||||
|
:raises TypeNotVersionableError: If the object didn't have the versioning
|
||||||
|
properties and the type was found to not support them
|
||||||
|
:raises ObjectNotVersionableError: If the type was found to support
|
||||||
|
versioning but there were insufficient properties on the object
|
||||||
|
"""
|
||||||
|
if isinstance(data, Mapping):
|
||||||
|
if data.keys() >= _VERSIONING_PROPERTIES:
|
||||||
|
# If the properties all already exist in the object, assume they
|
||||||
|
# are either supported by the type, or are custom properties, and
|
||||||
|
# allow versioning.
|
||||||
|
stix_version = _get_stix_version(data)
|
||||||
|
|
||||||
|
else:
|
||||||
|
is_versionable_type, stix_version = _is_versionable_type(data)
|
||||||
|
if is_versionable_type:
|
||||||
|
# The type supports the versioning properties (or we don't
|
||||||
|
# recognize it and just assume it does). The question shifts
|
||||||
|
# to whether the object has sufficient properties to create a
|
||||||
|
# new version. Just require "created" for now. We need at
|
||||||
|
# least that as a starting point for new version timestamps.
|
||||||
|
is_versionable = "created" in data
|
||||||
|
|
||||||
|
if not is_versionable:
|
||||||
|
raise ObjectNotVersionableError(data)
|
||||||
|
else:
|
||||||
|
raise TypeNotVersionableError(data)
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise TypeNotVersionableError(data)
|
||||||
|
|
||||||
|
return stix_version
|
||||||
|
|
||||||
|
|
||||||
def new_version(data, allow_custom=None, **kwargs):
|
def new_version(data, allow_custom=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
Create a new version of a STIX object, by modifying properties and
|
Create a new version of a STIX object, by modifying properties and
|
||||||
|
@ -134,13 +199,7 @@ def new_version(data, allow_custom=None, **kwargs):
|
||||||
:return: The new object.
|
:return: The new object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
is_versionable, stix_version = _is_versionable(data)
|
stix_version = _check_versionable_object(data)
|
||||||
|
|
||||||
if not is_versionable:
|
|
||||||
raise ValueError(
|
|
||||||
"cannot create new version of object of this type! "
|
|
||||||
"Try a dictionary or instance of an SDO or SRO class.",
|
|
||||||
)
|
|
||||||
|
|
||||||
if data.get('revoked'):
|
if data.get('revoked'):
|
||||||
raise RevokeError("new_version")
|
raise RevokeError("new_version")
|
||||||
|
@ -178,36 +237,34 @@ def new_version(data, allow_custom=None, **kwargs):
|
||||||
# to know which rules to apply.
|
# to know which rules to apply.
|
||||||
precision_constraint = "min" if stix_version == "2.1" else "exact"
|
precision_constraint = "min" if stix_version == "2.1" else "exact"
|
||||||
|
|
||||||
|
old_modified = data.get("modified") or data.get("created")
|
||||||
|
old_modified = parse_into_datetime(
|
||||||
|
old_modified, precision="millisecond",
|
||||||
|
precision_constraint=precision_constraint,
|
||||||
|
)
|
||||||
|
|
||||||
cls = type(data)
|
cls = type(data)
|
||||||
if 'modified' not in kwargs:
|
if 'modified' in kwargs:
|
||||||
old_modified = parse_into_datetime(
|
new_modified = parse_into_datetime(
|
||||||
data["modified"], precision="millisecond",
|
|
||||||
precision_constraint=precision_constraint,
|
|
||||||
)
|
|
||||||
|
|
||||||
new_modified = get_timestamp()
|
|
||||||
new_modified = _fudge_modified(
|
|
||||||
old_modified, new_modified, stix_version == "2.1",
|
|
||||||
)
|
|
||||||
|
|
||||||
kwargs['modified'] = new_modified
|
|
||||||
|
|
||||||
elif 'modified' in data:
|
|
||||||
old_modified_property = parse_into_datetime(
|
|
||||||
data.get('modified'), precision='millisecond',
|
|
||||||
precision_constraint=precision_constraint,
|
|
||||||
)
|
|
||||||
new_modified_property = parse_into_datetime(
|
|
||||||
kwargs['modified'], precision='millisecond',
|
kwargs['modified'], precision='millisecond',
|
||||||
precision_constraint=precision_constraint,
|
precision_constraint=precision_constraint,
|
||||||
)
|
)
|
||||||
if new_modified_property <= old_modified_property:
|
if new_modified <= old_modified:
|
||||||
raise InvalidValueError(
|
raise InvalidValueError(
|
||||||
cls, 'modified',
|
cls, 'modified',
|
||||||
"The new modified datetime cannot be before than or equal to the current modified datetime."
|
"The new modified datetime cannot be before than or equal to the current modified datetime."
|
||||||
"It cannot be equal, as according to STIX 2 specification, objects that are different "
|
"It cannot be equal, as according to STIX 2 specification, objects that are different "
|
||||||
"but have the same id and modified timestamp do not have defined consumer behavior.",
|
"but have the same id and modified timestamp do not have defined consumer behavior.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
new_modified = get_timestamp()
|
||||||
|
new_modified = _fudge_modified(
|
||||||
|
old_modified, new_modified, stix_version != "2.0",
|
||||||
|
)
|
||||||
|
|
||||||
|
kwargs['modified'] = new_modified
|
||||||
|
|
||||||
new_obj_inner.update(kwargs)
|
new_obj_inner.update(kwargs)
|
||||||
|
|
||||||
# Set allow_custom appropriately if versioning an object. We will ignore
|
# Set allow_custom appropriately if versioning an object. We will ignore
|
||||||
|
|
|
@ -161,6 +161,13 @@ def _setup_workbench():
|
||||||
new_class = type(obj_type.__name__, (obj_type,), new_class_dict)
|
new_class = type(obj_type.__name__, (obj_type,), new_class_dict)
|
||||||
factory_func = functools.partial(_environ.create, new_class)
|
factory_func = functools.partial(_environ.create, new_class)
|
||||||
|
|
||||||
|
# Copy over some class attributes that other code expects to find
|
||||||
|
factory_func._type = obj_type._type
|
||||||
|
factory_func._properties = obj_type._properties
|
||||||
|
if hasattr(obj_type, "_id_contributing_properties"):
|
||||||
|
factory_func._id_contributing_properties = \
|
||||||
|
obj_type._id_contributing_properties
|
||||||
|
|
||||||
# Add our new "class" to this module's globals and to the library-wide
|
# Add our new "class" to this module's globals and to the library-wide
|
||||||
# mapping. This allows parse() to use the wrapped classes.
|
# mapping. This allows parse() to use the wrapped classes.
|
||||||
globals()[obj_type.__name__] = factory_func
|
globals()[obj_type.__name__] = factory_func
|
||||||
|
|
Loading…
Reference in New Issue