diff --git a/.gitignore b/.gitignore
index 9758937..72b31cd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -55,7 +55,8 @@ coverage.xml
# Sphinx documentation
docs/_build/
.ipynb_checkpoints
-default_sem_eq_weights.rst
+graph_default_sem_eq_weights.rst
+object_default_sem_eq_weights.rst
# PyBuilder
target/
diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst
new file mode 100644
index 0000000..37c6cc0
--- /dev/null
+++ b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst
@@ -0,0 +1,5 @@
+comparison
+============================================
+
+.. automodule:: stix2.equivalence.pattern.compare.comparison
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst
new file mode 100644
index 0000000..463e11f
--- /dev/null
+++ b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst
@@ -0,0 +1,5 @@
+observation
+=============================================
+
+.. automodule:: stix2.equivalence.pattern.compare.observation
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst b/docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst
new file mode 100644
index 0000000..3f64bea
--- /dev/null
+++ b/docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst
@@ -0,0 +1,5 @@
+compare
+=================================
+
+.. automodule:: stix2.equivalence.pattern.compare
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst b/docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst
new file mode 100644
index 0000000..a26102b
--- /dev/null
+++ b/docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst
@@ -0,0 +1,5 @@
+transform
+===================================
+
+.. automodule:: stix2.equivalence.pattern.transform
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst
new file mode 100644
index 0000000..e2b5d42
--- /dev/null
+++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst
@@ -0,0 +1,5 @@
+comparison
+==============================================
+
+.. automodule:: stix2.equivalence.pattern.transform.comparison
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst
new file mode 100644
index 0000000..607bdcf
--- /dev/null
+++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst
@@ -0,0 +1,5 @@
+observation
+===============================================
+
+.. automodule:: stix2.equivalence.pattern.transform.observation
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst
new file mode 100644
index 0000000..f8191c3
--- /dev/null
+++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst
@@ -0,0 +1,5 @@
+specials
+============================================
+
+.. automodule:: stix2.equivalence.pattern.transform.specials
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst b/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst
deleted file mode 100644
index 8e53da7..0000000
--- a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-comparison
-==============
-
-.. automodule:: stix2.equivalence.patterns.compare.comparison
- :members:
diff --git a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst b/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst
deleted file mode 100644
index 1abd64e..0000000
--- a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-observation
-==============
-
-.. automodule:: stix2.equivalence.patterns.compare.observation
- :members:
diff --git a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst b/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst
deleted file mode 100644
index 2cf8388..0000000
--- a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-comparison
-==============
-
-.. automodule:: stix2.equivalence.patterns.transform.comparison
- :members:
diff --git a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst b/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst
deleted file mode 100644
index 1815e7e..0000000
--- a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-observation
-==============
-
-.. automodule:: stix2.equivalence.patterns.transform.observation
- :members:
diff --git a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst b/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst
deleted file mode 100644
index 7930ae2..0000000
--- a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-specials
-==============
-
-.. automodule:: stix2.equivalence.patterns.transform.specials
- :members:
diff --git a/docs/api/equivalence/stix2.equivalence.graph.rst b/docs/api/equivalence/stix2.equivalence.graph.rst
new file mode 100644
index 0000000..0cc2922
--- /dev/null
+++ b/docs/api/equivalence/stix2.equivalence.graph.rst
@@ -0,0 +1,5 @@
+graph
+=======================
+
+.. automodule:: stix2.equivalence.graph
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/stix2.equivalence.object.rst b/docs/api/equivalence/stix2.equivalence.object.rst
new file mode 100644
index 0000000..76f5274
--- /dev/null
+++ b/docs/api/equivalence/stix2.equivalence.object.rst
@@ -0,0 +1,5 @@
+object
+========================
+
+.. automodule:: stix2.equivalence.object
+ :members:
\ No newline at end of file
diff --git a/docs/api/equivalence/stix2.equivalence.pattern.rst b/docs/api/equivalence/stix2.equivalence.pattern.rst
new file mode 100644
index 0000000..5246ed2
--- /dev/null
+++ b/docs/api/equivalence/stix2.equivalence.pattern.rst
@@ -0,0 +1,5 @@
+pattern
+==============
+
+.. automodule:: stix2.equivalence.pattern
+ :members:
diff --git a/docs/api/equivalence/stix2.equivalence.patterns.rst b/docs/api/equivalence/stix2.equivalence.patterns.rst
deleted file mode 100644
index 32377f1..0000000
--- a/docs/api/equivalence/stix2.equivalence.patterns.rst
+++ /dev/null
@@ -1,5 +0,0 @@
-patterns
-==============
-
-.. automodule:: stix2.equivalence.patterns
- :members:
diff --git a/docs/conf.py b/docs/conf.py
index 8b372d5..9723e39 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -8,7 +8,8 @@ from six import class_types
from sphinx.ext.autodoc import ClassDocumenter
from stix2.base import _STIXBase
-from stix2.environment import WEIGHTS
+from stix2.equivalence.graph import GRAPH_WEIGHTS
+from stix2.equivalence.object import WEIGHTS
from stix2.version import __version__
sys.path.insert(0, os.path.abspath('..'))
@@ -62,12 +63,19 @@ latex_documents = [
]
# Add a formatted version of environment.WEIGHTS
-default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: o.__name__)
-default_sem_eq_weights = default_sem_eq_weights.replace('\n', '\n ')
-default_sem_eq_weights = default_sem_eq_weights.replace(' "', ' ')
-default_sem_eq_weights = default_sem_eq_weights.replace('"\n', '\n')
-with open('default_sem_eq_weights.rst', 'w') as f:
- f.write(".. code-block:: py\n\n {}\n\n".format(default_sem_eq_weights))
+object_default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: o.__name__)
+object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n ')
+object_default_sem_eq_weights = object_default_sem_eq_weights.replace(' "', ' ')
+object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n')
+with open('object_default_sem_eq_weights.rst', 'w') as f:
+ f.write(".. code-block:: python\n\n {}\n\n".format(object_default_sem_eq_weights))
+
+graph_default_sem_eq_weights = json.dumps(GRAPH_WEIGHTS, indent=4, default=lambda o: o.__name__)
+graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('\n', '\n ')
+graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace(' "', ' ')
+graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('"\n', '\n')
+with open('graph_default_sem_eq_weights.rst', 'w') as f:
+ f.write(".. code-block:: python\n\n {}\n\n".format(graph_default_sem_eq_weights))
def get_property_type(prop):
diff --git a/docs/guide/equivalence.ipynb b/docs/guide/equivalence.ipynb
index c0b9331..8393495 100644
--- a/docs/guide/equivalence.ipynb
+++ b/docs/guide/equivalence.ipynb
@@ -2165,15 +2165,17 @@
"The weights dictionary should contain both the weight and the comparison function for each property. You may use the default weights and functions, or provide your own.\n",
"\n",
"##### Existing comparison functions\n",
- "For reference, here is a list of the comparison functions already built in the codebase (found in [stix2/environment.py](../api/stix2.environment.rst#stix2.environment.Environment)):\n",
+ "For reference, here is a list of the comparison functions already built in the codebase (found in [stix2/equivalence/object](../api/equivalence/stix2.equivalence.object.rst#module-stix2.equivalence.object)):\n",
"\n",
- " - [custom_pattern_based](../api/stix2.environment.rst#stix2.environment.custom_pattern_based)\n",
- " - [exact_match](../api/stix2.environment.rst#stix2.environment.exact_match)\n",
- " - [partial_external_reference_based](../api/stix2.environment.rst#stix2.environment.partial_external_reference_based)\n",
- " - [partial_list_based](../api/stix2.environment.rst#stix2.environment.partial_list_based)\n",
- " - [partial_location_distance](../api/stix2.environment.rst#stix2.environment.partial_location_distance)\n",
- " - [partial_string_based](../api/stix2.environment.rst#stix2.environment.partial_string_based)\n",
- " - [partial_timestamp_based](../api/stix2.environment.rst#stix2.environment.partial_timestamp_based)\n",
+ " - [custom_pattern_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.custom_pattern_based)\n",
+ " - [exact_match](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.exact_match)\n",
+ " - [list_reference_check](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.list_reference_check)\n",
+ " - [partial_external_reference_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_external_reference_based)\n",
+ " - [partial_list_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_list_based)\n",
+ " - [partial_location_distance](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_location_distance)\n",
+ " - [partial_string_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_string_based)\n",
+ " - [partial_timestamp_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_timestamp_based)\n",
+ " - [reference_check](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.reference_check)\n",
"\n",
"For instance, if we wanted to compare two of the `ThreatActor`s from before, but use our own weights, then we could do the following:"
]
diff --git a/requirements.txt b/requirements.txt
index 8a5e4a2..fa1773a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
bumpversion
ipython
+nbconvert<6
nbsphinx==0.4.3
pre-commit
pygments<3,>=2.4.1
diff --git a/stix2/datastore/__init__.py b/stix2/datastore/__init__.py
index 57cb513..1ff0769 100644
--- a/stix2/datastore/__init__.py
+++ b/stix2/datastore/__init__.py
@@ -481,14 +481,14 @@ class CompositeDataSource(DataSource):
if data:
all_data.append(data)
- # remove duplicate versions
- if len(all_data) > 0:
- all_data = deduplicate(all_data)
- else:
- return None
+ # Search for latest version
+ stix_obj = latest_ver = None
+ for obj in all_data:
+ ver = obj.get("modified") or obj.get("created")
- # reduce to most recent version
- stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0]
+ if stix_obj is None or ver is None or ver > latest_ver:
+ stix_obj = obj
+ latest_ver = ver
return stix_obj
diff --git a/stix2/environment.py b/stix2/environment.py
index 0a6bdb2..4dc6ff0 100644
--- a/stix2/environment.py
+++ b/stix2/environment.py
@@ -1,13 +1,18 @@
"""Python STIX2 Environment API."""
import copy
-import logging
-import time
from .datastore import CompositeDataSource, DataStoreMixin
+from .equivalence.graph import graphically_equivalent
+from .equivalence.object import ( # noqa: F401
+ WEIGHTS, check_property_present, custom_pattern_based, exact_match,
+ list_reference_check, partial_external_reference_based, partial_list_based,
+ partial_location_distance, partial_string_based, partial_timestamp_based,
+ reference_check, semantically_equivalent,
+)
from .parsing import parse as _parse
-from .utils import STIXdatetime, parse_into_datetime
-logger = logging.getLogger(__name__)
+# TODO: Remove all unused imports that now belong to the equivalence module in the next major release.
+# Kept for backwards compatibility.
class ObjectFactory(object):
@@ -193,7 +198,7 @@ class Environment(DataStoreMixin):
@staticmethod
def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
- """This method is meant to verify if two objects of the same type are
+ """This method verifies if two objects of the same type are
semantically equivalent.
Args:
@@ -208,315 +213,58 @@ class Environment(DataStoreMixin):
float: A number between 0.0 and 100.0 as a measurement of equivalence.
Warning:
- Course of Action, Intrusion-Set, Observed-Data, Report are not supported
- by this implementation. Indicator pattern check is also limited.
+ Object types need to have property weights defined for the equivalence process.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.object` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
Note:
- Default weights_dict:
+ Default weight_dict:
- .. include:: ../default_sem_eq_weights.rst
+ .. include:: ../object_default_sem_eq_weights.rst
Note:
- This implementation follows the Committee Note on semantic equivalence.
+ This implementation follows the Semantic Equivalence Committee Note.
see `the Committee Note `__.
"""
- weights = WEIGHTS.copy()
+ return semantically_equivalent(obj1, obj2, prop_scores, **weight_dict)
- if weight_dict:
- weights.update(weight_dict)
+ @staticmethod
+ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
+ """This method verifies if two graphs are semantically equivalent.
+ Each DataStore can contain a connected or disconnected graph and the
+ final result is weighted over the amount of objects we managed to compare.
+ This approach builds on top of the object-based semantic equivalence process
+ and each comparison can return a value between 0 and 100.
- type1, type2 = obj1["type"], obj2["type"]
- ignore_spec_version = weights["_internal"]["ignore_spec_version"]
+ Args:
+ ds1: A DataStore object instance representing your graph
+ ds2: A DataStore object instance representing your graph
+ prop_scores: A dictionary that can hold individual property scores,
+ weights, contributing score, matching score and sum of weights.
+ weight_dict: A dictionary that can be used to override settings
+ in the semantic equivalence process
- if type1 != type2:
- raise ValueError('The objects to compare must be of the same type!')
+ Returns:
+ float: A number between 0.0 and 100.0 as a measurement of equivalence.
- if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"):
- raise ValueError('The objects to compare must be of the same spec version!')
+ Warning:
+ Object types need to have property weights defined for the equivalence process.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.graph` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
- try:
- weights[type1]
- except KeyError:
- logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1)
- sum_weights = matching_score = 0
- else:
- try:
- method = weights[type1]["method"]
- except KeyError:
- logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
- matching_score = 0.0
- sum_weights = 0.0
+ Note:
+ Default weight_dict:
- for prop in weights[type1]:
- if check_property_present(prop, obj1, obj2) or prop == "longitude_latitude":
- w = weights[type1][prop][0]
- comp_funct = weights[type1][prop][1]
+ .. include:: ../graph_default_sem_eq_weights.rst
- if comp_funct == partial_timestamp_based:
- contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
- elif comp_funct == partial_location_distance:
- threshold = weights[type1]["threshold"]
- contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
- else:
- contributing_score = w * comp_funct(obj1[prop], obj2[prop])
+ Note:
+ This implementation follows the Semantic Equivalence Committee Note.
+ see `the Committee Note `__.
- sum_weights += w
- matching_score += contributing_score
-
- prop_scores[prop] = {
- "weight": w,
- "contributing_score": contributing_score,
- }
- logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
-
- prop_scores["matching_score"] = matching_score
- prop_scores["sum_weights"] = sum_weights
- logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
- else:
- logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
- try:
- matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
- except TypeError:
- # method doesn't support detailed output with prop_scores
- matching_score, sum_weights = method(obj1, obj2, **weights[type1])
- logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
-
- if sum_weights <= 0:
- return 0
- equivalence_score = (matching_score / sum_weights) * 100.0
- return equivalence_score
-
-
-def check_property_present(prop, obj1, obj2):
- """Helper method checks if a property is present on both objects."""
- if prop in obj1 and prop in obj2:
- return True
- return False
-
-
-def partial_timestamp_based(t1, t2, tdelta):
- """Performs a timestamp-based matching via checking how close one timestamp is to another.
-
- Args:
- t1: A datetime string or STIXdatetime object.
- t2: A datetime string or STIXdatetime object.
- tdelta (float): A given time delta. This number is multiplied by 86400 (1 day) to
- extend or shrink your time change tolerance.
-
- Returns:
- float: Number between 0.0 and 1.0 depending on match criteria.
-
- """
- if not isinstance(t1, STIXdatetime):
- t1 = parse_into_datetime(t1)
- if not isinstance(t2, STIXdatetime):
- t2 = parse_into_datetime(t2)
- t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple())
- result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
- logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result)
- return result
-
-
-def partial_list_based(l1, l2):
- """Performs a partial list matching via finding the intersection between common values.
-
- Args:
- l1: A list of values.
- l2: A list of values.
-
- Returns:
- float: 1.0 if the value matches exactly, 0.0 otherwise.
-
- """
- l1_set, l2_set = set(l1), set(l2)
- result = len(l1_set.intersection(l2_set)) / max(len(l1), len(l2))
- logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result)
- return result
-
-
-def exact_match(val1, val2):
- """Performs an exact value match based on two values
-
- Args:
- val1: A value suitable for an equality test.
- val2: A value suitable for an equality test.
-
- Returns:
- float: 1.0 if the value matches exactly, 0.0 otherwise.
-
- """
- result = 0.0
- if val1 == val2:
- result = 1.0
- logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result)
- return result
-
-
-def partial_string_based(str1, str2):
- """Performs a partial string match using the Jaro-Winkler distance algorithm.
-
- Args:
- str1: A string value to check.
- str2: A string value to check.
-
- Returns:
- float: Number between 0.0 and 1.0 depending on match criteria.
-
- """
- from rapidfuzz import fuzz
- result = fuzz.token_sort_ratio(str1, str2)
- logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result)
- return result / 100.0
-
-
-def custom_pattern_based(pattern1, pattern2):
- """Performs a matching on Indicator Patterns.
-
- Args:
- pattern1: An Indicator pattern
- pattern2: An Indicator pattern
-
- Returns:
- float: Number between 0.0 and 1.0 depending on match criteria.
-
- """
- logger.warning("Indicator pattern equivalence is not fully defined; will default to zero if not completely identical")
- return exact_match(pattern1, pattern2) # TODO: Implement pattern based equivalence
-
-
-def partial_external_reference_based(refs1, refs2):
- """Performs a matching on External References.
-
- Args:
- refs1: A list of external references.
- refs2: A list of external references.
-
- Returns:
- float: Number between 0.0 and 1.0 depending on matches.
-
- """
- allowed = set(("veris", "cve", "capec", "mitre-attack"))
- matches = 0
-
- if len(refs1) >= len(refs2):
- l1 = refs1
- l2 = refs2
- else:
- l1 = refs2
- l2 = refs1
-
- for ext_ref1 in l1:
- for ext_ref2 in l2:
- sn_match = False
- ei_match = False
- url_match = False
- source_name = None
-
- if check_property_present("source_name", ext_ref1, ext_ref2):
- if ext_ref1["source_name"] == ext_ref2["source_name"]:
- source_name = ext_ref1["source_name"]
- sn_match = True
- if check_property_present("external_id", ext_ref1, ext_ref2):
- if ext_ref1["external_id"] == ext_ref2["external_id"]:
- ei_match = True
- if check_property_present("url", ext_ref1, ext_ref2):
- if ext_ref1["url"] == ext_ref2["url"]:
- url_match = True
-
- # Special case: if source_name is a STIX defined name and either
- # external_id or url match then its a perfect match and other entries
- # can be ignored.
- if sn_match and (ei_match or url_match) and source_name in allowed:
- result = 1.0
- logger.debug(
- "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
- refs1, refs2, result,
- )
- return result
-
- # Regular check. If the source_name (not STIX-defined) or external_id or
- # url matches then we consider the entry a match.
- if (sn_match or ei_match or url_match) and source_name not in allowed:
- matches += 1
-
- result = matches / max(len(refs1), len(refs2))
- logger.debug(
- "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
- refs1, refs2, result,
- )
- return result
-
-
-def partial_location_distance(lat1, long1, lat2, long2, threshold):
- """Given two coordinates perform a matching based on its distance using the Haversine Formula.
-
- Args:
- lat1: Latitude value for first coordinate point.
- lat2: Latitude value for second coordinate point.
- long1: Longitude value for first coordinate point.
- long2: Longitude value for second coordinate point.
- threshold (float): A kilometer measurement for the threshold distance between these two points.
-
- Returns:
- float: Number between 0.0 and 1.0 depending on match.
-
- """
- from haversine import Unit, haversine
- distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS)
- result = 1 - (distance / threshold)
- logger.debug(
- "--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'",
- (lat1, long1), (lat2, long2), threshold, result,
- )
- return result
-
-
-# default weights used for the semantic equivalence process
-WEIGHTS = {
- "attack-pattern": {
- "name": (30, partial_string_based),
- "external_references": (70, partial_external_reference_based),
- },
- "campaign": {
- "name": (60, partial_string_based),
- "aliases": (40, partial_list_based),
- },
- "identity": {
- "name": (60, partial_string_based),
- "identity_class": (20, exact_match),
- "sectors": (20, partial_list_based),
- },
- "indicator": {
- "indicator_types": (15, partial_list_based),
- "pattern": (80, custom_pattern_based),
- "valid_from": (5, partial_timestamp_based),
- "tdelta": 1, # One day interval
- },
- "location": {
- "longitude_latitude": (34, partial_location_distance),
- "region": (33, exact_match),
- "country": (33, exact_match),
- "threshold": 1000.0,
- },
- "malware": {
- "malware_types": (20, partial_list_based),
- "name": (80, partial_string_based),
- },
- "threat-actor": {
- "name": (60, partial_string_based),
- "threat_actor_types": (20, partial_list_based),
- "aliases": (20, partial_list_based),
- },
- "tool": {
- "tool_types": (20, partial_list_based),
- "name": (80, partial_string_based),
- },
- "vulnerability": {
- "name": (30, partial_string_based),
- "external_references": (70, partial_external_reference_based),
- },
- "_internal": {
- "ignore_spec_version": False,
- },
-} #: :autodoc-skip:
+ """
+ return graphically_equivalent(ds1, ds2, prop_scores, **weight_dict)
diff --git a/stix2/equivalence/__init__.py b/stix2/equivalence/__init__.py
index c67e649..f175024 100644
--- a/stix2/equivalence/__init__.py
+++ b/stix2/equivalence/__init__.py
@@ -3,7 +3,9 @@
.. autosummary::
:toctree: equivalence
- patterns
+ pattern
+ graph
+ object
|
"""
diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py
new file mode 100644
index 0000000..680f42f
--- /dev/null
+++ b/stix2/equivalence/graph/__init__.py
@@ -0,0 +1,137 @@
+"""Python APIs for STIX 2 Graph-based Semantic Equivalence."""
+import logging
+
+from ..object import (
+ WEIGHTS, exact_match, list_reference_check, partial_string_based,
+ partial_timestamp_based, reference_check, semantically_equivalent,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict):
+ """This method verifies if two graphs are semantically equivalent.
+ Each DataStore can contain a connected or disconnected graph and the
+ final result is weighted over the amount of objects we managed to compare.
+ This approach builds on top of the object-based semantic equivalence process
+ and each comparison can return a value between 0 and 100.
+
+ Args:
+ ds1: A DataStore object instance representing your graph
+ ds2: A DataStore object instance representing your graph
+ prop_scores: A dictionary that can hold individual property scores,
+ weights, contributing score, matching score and sum of weights.
+ weight_dict: A dictionary that can be used to override settings
+ in the semantic equivalence process
+
+ Returns:
+ float: A number between 0.0 and 100.0 as a measurement of equivalence.
+
+ Warning:
+ Object types need to have property weights defined for the equivalence process.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.graph` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
+
+ Note:
+ Default weights_dict:
+
+ .. include:: ../../graph_default_sem_eq_weights.rst
+
+ Note:
+ This implementation follows the Semantic Equivalence Committee Note.
+ see `the Committee Note `__.
+
+ """
+ weights = GRAPH_WEIGHTS.copy()
+
+ if weight_dict:
+ weights.update(weight_dict)
+
+ results = {}
+ depth = weights["_internal"]["max_depth"]
+
+ graph1 = ds1.query([])
+ graph2 = ds2.query([])
+
+ graph1.sort(key=lambda x: x["type"])
+ graph2.sort(key=lambda x: x["type"])
+
+ if len(graph1) < len(graph2):
+ weights["_internal"]["ds1"] = ds1
+ weights["_internal"]["ds2"] = ds2
+ g1 = graph1
+ g2 = graph2
+ else:
+ weights["_internal"]["ds1"] = ds2
+ weights["_internal"]["ds2"] = ds1
+ g1 = graph2
+ g2 = graph1
+
+ for object1 in g1:
+ for object2 in g2:
+ if object1["type"] == object2["type"] and object1["type"] in weights:
+ iprop_score = {}
+ result = semantically_equivalent(object1, object2, iprop_score, **weights)
+ objects1_id = object1["id"]
+ weights["_internal"]["max_depth"] = depth
+
+ if objects1_id not in results:
+ results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result}
+ elif result > results[objects1_id]["value"]:
+ results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result}
+
+ equivalence_score = 0
+ matching_score = sum(x["value"] for x in results.values())
+ sum_weights = len(results) * 100.0
+ if sum_weights > 0:
+ equivalence_score = (matching_score / sum_weights) * 100
+ prop_scores["matching_score"] = matching_score
+ prop_scores["sum_weights"] = sum_weights
+ prop_scores["summary"] = results
+
+ logger.debug(
+ "DONE\t\tSUM_WEIGHT: %.2f\tMATCHING_SCORE: %.2f\t SCORE: %.2f",
+ sum_weights,
+ matching_score,
+ equivalence_score,
+ )
+ return equivalence_score
+
+
+# default weights used for the graph semantic equivalence process
+GRAPH_WEIGHTS = WEIGHTS.copy()
+GRAPH_WEIGHTS.update({
+ "grouping": {
+ "name": (20, partial_string_based),
+ "context": (20, partial_string_based),
+ "object_refs": (60, list_reference_check),
+ },
+ "relationship": {
+ "relationship_type": (20, exact_match),
+ "source_ref": (40, reference_check),
+ "target_ref": (40, reference_check),
+ },
+ "report": {
+ "name": (30, partial_string_based),
+ "published": (10, partial_timestamp_based),
+ "object_refs": (60, list_reference_check),
+ "tdelta": 1, # One day interval
+ },
+ "sighting": {
+ "first_seen": (5, partial_timestamp_based),
+ "last_seen": (5, partial_timestamp_based),
+ "sighting_of_ref": (40, reference_check),
+ "observed_data_refs": (20, list_reference_check),
+ "where_sighted_refs": (20, list_reference_check),
+ "summary": (10, exact_match),
+ },
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "ds1": None,
+ "ds2": None,
+ "max_depth": 1,
+ },
+}) # :autodoc-skip:
diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py
new file mode 100644
index 0000000..8333ceb
--- /dev/null
+++ b/stix2/equivalence/object/__init__.py
@@ -0,0 +1,452 @@
+"""Python APIs for STIX 2 Object-based Semantic Equivalence."""
+import logging
+import time
+
+from ...datastore import Filter
+from ...utils import STIXdatetime, parse_into_datetime
+
+logger = logging.getLogger(__name__)
+
+
+def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict):
+ """This method verifies if two objects of the same type are
+ semantically equivalent.
+
+ Args:
+ obj1: A stix2 object instance
+ obj2: A stix2 object instance
+ prop_scores: A dictionary that can hold individual property scores,
+ weights, contributing score, matching score and sum of weights.
+ weight_dict: A dictionary that can be used to override settings
+ in the semantic equivalence process
+
+ Returns:
+ float: A number between 0.0 and 100.0 as a measurement of equivalence.
+
+ Warning:
+ Object types need to have property weights defined for the equivalence process.
+ Otherwise, those objects will not influence the final score. The WEIGHTS
+ dictionary under `stix2.equivalence.object` can give you an idea on how to add
+ new entries and pass them via the `weight_dict` argument. Similarly, the values
+ or methods can be fine tuned for a particular use case.
+
+ Note:
+ Default weights_dict:
+
+ .. include:: ../../object_default_sem_eq_weights.rst
+
+ Note:
+ This implementation follows the Semantic Equivalence Committee Note.
+ see `the Committee Note `__.
+
+ """
+ weights = WEIGHTS.copy()
+
+ if weight_dict:
+ weights.update(weight_dict)
+
+ type1, type2 = obj1["type"], obj2["type"]
+ ignore_spec_version = weights["_internal"]["ignore_spec_version"]
+
+ if type1 != type2:
+ raise ValueError('The objects to compare must be of the same type!')
+
+ if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"):
+ raise ValueError('The objects to compare must be of the same spec version!')
+
+ try:
+ weights[type1]
+ except KeyError:
+ logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1)
+ sum_weights = matching_score = 0
+ else:
+ try:
+ method = weights[type1]["method"]
+ except KeyError:
+ logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
+ matching_score = 0.0
+ sum_weights = 0.0
+
+ for prop in weights[type1]:
+ if check_property_present(prop, obj1, obj2) or prop == "longitude_latitude":
+ w = weights[type1][prop][0]
+ comp_funct = weights[type1][prop][1]
+
+ if comp_funct == partial_timestamp_based:
+ contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
+ elif comp_funct == partial_location_distance:
+ threshold = weights[type1]["threshold"]
+ contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
+ elif comp_funct == reference_check or comp_funct == list_reference_check:
+ max_depth = weights["_internal"]["max_depth"]
+ if max_depth < 0:
+ continue # prevent excessive recursion
+ else:
+ weights["_internal"]["max_depth"] -= 1
+ ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
+ contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
+ else:
+ contributing_score = w * comp_funct(obj1[prop], obj2[prop])
+
+ sum_weights += w
+ matching_score += contributing_score
+
+ prop_scores[prop] = {
+ "weight": w,
+ "contributing_score": contributing_score,
+ }
+ logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
+
+ prop_scores["matching_score"] = matching_score
+ prop_scores["sum_weights"] = sum_weights
+ logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
+ else:
+ logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
+ try:
+ matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
+ except TypeError:
+ # method doesn't support detailed output with prop_scores
+ matching_score, sum_weights = method(obj1, obj2, **weights[type1])
+ logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
+
+ if sum_weights <= 0:
+ return 0
+ equivalence_score = (matching_score / sum_weights) * 100.0
+ return equivalence_score
+
+
+def check_property_present(prop, obj1, obj2):
+ """Helper method checks if a property is present on both objects."""
+ if prop in obj1 and prop in obj2:
+ return True
+ return False
+
+
+def partial_timestamp_based(t1, t2, tdelta):
+ """Performs a timestamp-based matching via checking how close one timestamp is to another.
+
+ Args:
+ t1: A datetime string or STIXdatetime object.
+ t2: A datetime string or STIXdatetime object.
+ tdelta (float): A given time delta. This number is multiplied by 86400 (1 day) to
+ extend or shrink your time change tolerance.
+
+ Returns:
+ float: Number between 0.0 and 1.0 depending on match criteria.
+
+ """
+ if not isinstance(t1, STIXdatetime):
+ t1 = parse_into_datetime(t1)
+ if not isinstance(t2, STIXdatetime):
+ t2 = parse_into_datetime(t2)
+ t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple())
+ result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
+ logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result)
+ return result
+
+
+def partial_list_based(l1, l2):
+ """Performs a partial list matching via finding the intersection between common values.
+
+ Args:
+ l1: A list of values.
+ l2: A list of values.
+
+ Returns:
+ float: 1.0 if the value matches exactly, 0.0 otherwise.
+
+ """
+ l1_set, l2_set = set(l1), set(l2)
+ result = len(l1_set.intersection(l2_set)) / max(len(l1_set), len(l2_set))
+ logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result)
+ return result
+
+
+def exact_match(val1, val2):
+ """Performs an exact value match based on two values
+
+ Args:
+ val1: A value suitable for an equality test.
+ val2: A value suitable for an equality test.
+
+ Returns:
+ float: 1.0 if the value matches exactly, 0.0 otherwise.
+
+ """
+ result = 0.0
+ if val1 == val2:
+ result = 1.0
+ logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result)
+ return result
+
+
+def partial_string_based(str1, str2):
+ """Performs a partial string match using the Jaro-Winkler distance algorithm.
+
+ Args:
+ str1: A string value to check.
+ str2: A string value to check.
+
+ Returns:
+ float: Number between 0.0 and 1.0 depending on match criteria.
+
+ """
+ from rapidfuzz import fuzz
+ result = fuzz.token_sort_ratio(str1, str2)
+ logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result)
+ return result / 100.0
+
+
+def custom_pattern_based(pattern1, pattern2):
+ """Performs a matching on Indicator Patterns.
+
+ Args:
+ pattern1: An Indicator pattern
+ pattern2: An Indicator pattern
+
+ Returns:
+ float: Number between 0.0 and 1.0 depending on match criteria.
+
+ """
+ logger.warning("Indicator pattern equivalence is not fully defined; will default to zero if not completely identical")
+ return exact_match(pattern1, pattern2) # TODO: Implement pattern based equivalence
+
+
+def partial_external_reference_based(refs1, refs2):
+ """Performs a matching on External References.
+
+ Args:
+ refs1: A list of external references.
+ refs2: A list of external references.
+
+ Returns:
+ float: Number between 0.0 and 1.0 depending on matches.
+
+ """
+ allowed = {"veris", "cve", "capec", "mitre-attack"}
+ matches = 0
+
+ if len(refs1) >= len(refs2):
+ l1 = refs1
+ l2 = refs2
+ else:
+ l1 = refs2
+ l2 = refs1
+
+ for ext_ref1 in l1:
+ for ext_ref2 in l2:
+ sn_match = False
+ ei_match = False
+ url_match = False
+ source_name = None
+
+ if check_property_present("source_name", ext_ref1, ext_ref2):
+ if ext_ref1["source_name"] == ext_ref2["source_name"]:
+ source_name = ext_ref1["source_name"]
+ sn_match = True
+ if check_property_present("external_id", ext_ref1, ext_ref2):
+ if ext_ref1["external_id"] == ext_ref2["external_id"]:
+ ei_match = True
+ if check_property_present("url", ext_ref1, ext_ref2):
+ if ext_ref1["url"] == ext_ref2["url"]:
+ url_match = True
+
+ # Special case: if source_name is a STIX defined name and either
+ # external_id or url match then its a perfect match and other entries
+ # can be ignored.
+ if sn_match and (ei_match or url_match) and source_name in allowed:
+ result = 1.0
+ logger.debug(
+ "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
+ refs1, refs2, result,
+ )
+ return result
+
+ # Regular check. If the source_name (not STIX-defined) or external_id or
+ # url matches then we consider the entry a match.
+ if (sn_match or ei_match or url_match) and source_name not in allowed:
+ matches += 1
+
+ result = matches / max(len(refs1), len(refs2))
+ logger.debug(
+ "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
+ refs1, refs2, result,
+ )
+ return result
+
+
+def partial_location_distance(lat1, long1, lat2, long2, threshold):
+ """Given two coordinates perform a matching based on its distance using the Haversine Formula.
+
+ Args:
+ lat1: Latitude value for first coordinate point.
+ lat2: Latitude value for second coordinate point.
+ long1: Longitude value for first coordinate point.
+ long2: Longitude value for second coordinate point.
+ threshold (float): A kilometer measurement for the threshold distance between these two points.
+
+ Returns:
+ float: Number between 0.0 and 1.0 depending on match.
+
+ """
+ from haversine import Unit, haversine
+ distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS)
+ result = 1 - (distance / threshold)
+ logger.debug(
+ "--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'",
+ (lat1, long1), (lat2, long2), threshold, result,
+ )
+ return result
+
+
+def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
+ """Checks multiple object versions if present in graph.
+ Maximizes for the semantic equivalence score of a particular version."""
+ results = {}
+ objects1 = ds1.query([Filter("id", "=", ref1)])
+ objects2 = ds2.query([Filter("id", "=", ref2)])
+
+ if len(objects1) > 0 and len(objects2) > 0:
+ for o1 in objects1:
+ for o2 in objects2:
+ result = semantically_equivalent(o1, o2, **weights)
+ if ref1 not in results:
+ results[ref1] = {"matched": ref2, "value": result}
+ elif result > results[ref1]["value"]:
+ results[ref1] = {"matched": ref2, "value": result}
+ result = results.get(ref1, {}).get("value", 0.0)
+ logger.debug(
+ "--\t\t_versioned_checks '%s' '%s'\tresult: '%s'",
+ ref1, ref2, result,
+ )
+ return result
+
+
+def reference_check(ref1, ref2, ds1, ds2, **weights):
+ """For two references, de-reference the object and perform object-based
+ semantic equivalence. The score influences the result of an edge check."""
+ type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
+ result = 0.0
+
+ if type1 == type2:
+ if weights["_internal"]["versioning_checks"]:
+ result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0
+ else:
+ o1, o2 = ds1.get(ref1), ds2.get(ref2)
+ if o1 and o2:
+ result = semantically_equivalent(o1, o2, **weights) / 100.0
+
+ logger.debug(
+ "--\t\treference_check '%s' '%s'\tresult: '%s'",
+ ref1, ref2, result,
+ )
+ return result
+
+
+def list_reference_check(refs1, refs2, ds1, ds2, **weights):
+ """For objects that contain multiple references (i.e., object_refs) perform
+ the same de-reference procedure and perform object-based semantic equivalence.
+ The score influences the objects containing these references. The result is
+ weighted on the amount of unique objects that could 1) be de-referenced 2) """
+ results = {}
+ if len(refs1) >= len(refs2):
+ l1 = refs1
+ l2 = refs2
+ b1 = ds1
+ b2 = ds2
+ else:
+ l1 = refs2
+ l2 = refs1
+ b1 = ds2
+ b2 = ds1
+
+ l1.sort()
+ l2.sort()
+
+ for ref1 in l1:
+ for ref2 in l2:
+ type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
+ if type1 == type2:
+ score = reference_check(ref1, ref2, b1, b2, **weights) * 100.0
+
+ if ref1 not in results:
+ results[ref1] = {"matched": ref2, "value": score}
+ elif score > results[ref1]["value"]:
+ results[ref1] = {"matched": ref2, "value": score}
+
+ result = 0.0
+ total_sum = sum(x["value"] for x in results.values())
+ max_score = len(results) * 100.0
+
+ if max_score > 0:
+ result = total_sum / max_score
+
+ logger.debug(
+ "--\t\tlist_reference_check '%s' '%s'\ttotal_sum: '%s'\tmax_score: '%s'\tresult: '%s'",
+ refs1, refs2, total_sum, max_score, result,
+ )
+ return result
+
+
+# default weights used for the semantic equivalence process
+WEIGHTS = {
+ "attack-pattern": {
+ "name": (30, partial_string_based),
+ "external_references": (70, partial_external_reference_based),
+ },
+ "campaign": {
+ "name": (60, partial_string_based),
+ "aliases": (40, partial_list_based),
+ },
+ "course-of-action": {
+ "name": (60, partial_string_based),
+ "external_references": (40, partial_external_reference_based),
+ },
+ "identity": {
+ "name": (60, partial_string_based),
+ "identity_class": (20, exact_match),
+ "sectors": (20, partial_list_based),
+ },
+ "indicator": {
+ "indicator_types": (15, partial_list_based),
+ "pattern": (80, custom_pattern_based),
+ "valid_from": (5, partial_timestamp_based),
+ "tdelta": 1, # One day interval
+ },
+ "intrusion-set": {
+ "name": (20, partial_string_based),
+ "external_references": (60, partial_external_reference_based),
+ "aliases": (20, partial_list_based),
+ },
+ "location": {
+ "longitude_latitude": (34, partial_location_distance),
+ "region": (33, exact_match),
+ "country": (33, exact_match),
+ "threshold": 1000.0,
+ },
+ "malware": {
+ "malware_types": (20, partial_list_based),
+ "name": (80, partial_string_based),
+ },
+ "marking-definition": {
+ "name": (20, exact_match),
+ "definition": (60, exact_match),
+ "definition_type": (20, exact_match),
+ },
+ "threat-actor": {
+ "name": (60, partial_string_based),
+ "threat_actor_types": (20, partial_list_based),
+ "aliases": (20, partial_list_based),
+ },
+ "tool": {
+ "tool_types": (20, partial_list_based),
+ "name": (80, partial_string_based),
+ },
+ "vulnerability": {
+ "name": (30, partial_string_based),
+ "external_references": (70, partial_external_reference_based),
+ },
+ "_internal": {
+ "ignore_spec_version": False,
+ },
+} # :autodoc-skip:
diff --git a/stix2/equivalence/patterns/__init__.py b/stix2/equivalence/pattern/__init__.py
similarity index 95%
rename from stix2/equivalence/patterns/__init__.py
rename to stix2/equivalence/pattern/__init__.py
index 85ec9ab..b2e5421 100644
--- a/stix2/equivalence/patterns/__init__.py
+++ b/stix2/equivalence/pattern/__init__.py
@@ -1,7 +1,7 @@
"""Python APIs for STIX 2 Pattern Semantic Equivalence.
.. autosummary::
- :toctree: patterns
+ :toctree: pattern
compare
transform
@@ -10,13 +10,13 @@
"""
import stix2
-from stix2.equivalence.patterns.compare.observation import (
+from stix2.equivalence.pattern.compare.observation import (
observation_expression_cmp,
)
-from stix2.equivalence.patterns.transform import (
+from stix2.equivalence.pattern.transform import (
ChainTransformer, SettleTransformer,
)
-from stix2.equivalence.patterns.transform.observation import (
+from stix2.equivalence.pattern.transform.observation import (
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer,
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
)
diff --git a/stix2/equivalence/patterns/compare/__init__.py b/stix2/equivalence/pattern/compare/__init__.py
similarity index 100%
rename from stix2/equivalence/patterns/compare/__init__.py
rename to stix2/equivalence/pattern/compare/__init__.py
diff --git a/stix2/equivalence/patterns/compare/comparison.py b/stix2/equivalence/pattern/compare/comparison.py
similarity index 99%
rename from stix2/equivalence/patterns/compare/comparison.py
rename to stix2/equivalence/pattern/compare/comparison.py
index ed717fc..e412705 100644
--- a/stix2/equivalence/patterns/compare/comparison.py
+++ b/stix2/equivalence/pattern/compare/comparison.py
@@ -4,7 +4,7 @@ Comparison utilities for STIX pattern comparison expressions.
import base64
import functools
-from stix2.equivalence.patterns.compare import generic_cmp, iter_lex_cmp
+from stix2.equivalence.pattern.compare import generic_cmp, iter_lex_cmp
from stix2.patterns import (
AndBooleanExpression, BinaryConstant, BooleanConstant, FloatConstant,
HexConstant, IntegerConstant, ListConstant, ListObjectPathComponent,
diff --git a/stix2/equivalence/patterns/compare/observation.py b/stix2/equivalence/pattern/compare/observation.py
similarity index 96%
rename from stix2/equivalence/patterns/compare/observation.py
rename to stix2/equivalence/pattern/compare/observation.py
index 227b8ae..8df9e3f 100644
--- a/stix2/equivalence/patterns/compare/observation.py
+++ b/stix2/equivalence/pattern/compare/observation.py
@@ -1,8 +1,8 @@
"""
Comparison utilities for STIX pattern observation expressions.
"""
-from stix2.equivalence.patterns.compare import generic_cmp, iter_lex_cmp
-from stix2.equivalence.patterns.compare.comparison import (
+from stix2.equivalence.pattern.compare import generic_cmp, iter_lex_cmp
+from stix2.equivalence.pattern.compare.comparison import (
comparison_expression_cmp, generic_constant_cmp,
)
from stix2.patterns import (
diff --git a/stix2/equivalence/patterns/transform/__init__.py b/stix2/equivalence/pattern/transform/__init__.py
similarity index 100%
rename from stix2/equivalence/patterns/transform/__init__.py
rename to stix2/equivalence/pattern/transform/__init__.py
diff --git a/stix2/equivalence/patterns/transform/comparison.py b/stix2/equivalence/pattern/transform/comparison.py
similarity index 97%
rename from stix2/equivalence/patterns/transform/comparison.py
rename to stix2/equivalence/pattern/transform/comparison.py
index 528cc9b..d0f431b 100644
--- a/stix2/equivalence/patterns/transform/comparison.py
+++ b/stix2/equivalence/pattern/transform/comparison.py
@@ -4,12 +4,12 @@ Transformation utilities for STIX pattern comparison expressions.
import functools
import itertools
-from stix2.equivalence.patterns.compare import iter_in, iter_lex_cmp
-from stix2.equivalence.patterns.compare.comparison import (
+from stix2.equivalence.pattern.compare import iter_in, iter_lex_cmp
+from stix2.equivalence.pattern.compare.comparison import (
comparison_expression_cmp,
)
-from stix2.equivalence.patterns.transform import Transformer
-from stix2.equivalence.patterns.transform.specials import (
+from stix2.equivalence.pattern.transform import Transformer
+from stix2.equivalence.pattern.transform.specials import (
ipv4_addr, ipv6_addr, windows_reg_key,
)
from stix2.patterns import (
diff --git a/stix2/equivalence/patterns/transform/observation.py b/stix2/equivalence/pattern/transform/observation.py
similarity index 97%
rename from stix2/equivalence/patterns/transform/observation.py
rename to stix2/equivalence/pattern/transform/observation.py
index d4ee175..a8982cf 100644
--- a/stix2/equivalence/patterns/transform/observation.py
+++ b/stix2/equivalence/pattern/transform/observation.py
@@ -4,23 +4,23 @@ Transformation utilities for STIX pattern observation expressions.
import functools
import itertools
-from stix2.equivalence.patterns.compare import iter_in, iter_lex_cmp
-from stix2.equivalence.patterns.compare.observation import (
+from stix2.equivalence.pattern.compare import iter_in, iter_lex_cmp
+from stix2.equivalence.pattern.compare.observation import (
observation_expression_cmp,
)
-from stix2.equivalence.patterns.transform import (
+from stix2.equivalence.pattern.transform import (
ChainTransformer, SettleTransformer, Transformer,
)
-from stix2.equivalence.patterns.transform.comparison import (
+from stix2.equivalence.pattern.transform.comparison import (
SpecialValueCanonicalization,
)
-from stix2.equivalence.patterns.transform.comparison import \
+from stix2.equivalence.pattern.transform.comparison import \
AbsorptionTransformer as CAbsorptionTransformer
-from stix2.equivalence.patterns.transform.comparison import \
+from stix2.equivalence.pattern.transform.comparison import \
DNFTransformer as CDNFTransformer
-from stix2.equivalence.patterns.transform.comparison import \
+from stix2.equivalence.pattern.transform.comparison import \
FlattenTransformer as CFlattenTransformer
-from stix2.equivalence.patterns.transform.comparison import \
+from stix2.equivalence.pattern.transform.comparison import \
OrderDedupeTransformer as COrderDedupeTransformer
from stix2.patterns import (
AndObservationExpression, FollowedByObservationExpression,
diff --git a/stix2/equivalence/patterns/transform/specials.py b/stix2/equivalence/pattern/transform/specials.py
similarity index 99%
rename from stix2/equivalence/patterns/transform/specials.py
rename to stix2/equivalence/pattern/transform/specials.py
index b95e6bf..d3611f3 100644
--- a/stix2/equivalence/patterns/transform/specials.py
+++ b/stix2/equivalence/pattern/transform/specials.py
@@ -3,7 +3,7 @@ Some simple comparison expression canonicalization functions.
"""
import socket
-from stix2.equivalence.patterns.compare.comparison import (
+from stix2.equivalence.pattern.compare.comparison import (
object_path_to_raw_values,
)
diff --git a/stix2/test/test_pattern_equivalence.py b/stix2/test/test_pattern_equivalence.py
index 6fc2adf..431322f 100644
--- a/stix2/test/test_pattern_equivalence.py
+++ b/stix2/test/test_pattern_equivalence.py
@@ -1,6 +1,6 @@
import pytest
-from stix2.equivalence.patterns import (
+from stix2.equivalence.pattern import (
equivalent_patterns, find_equivalent_patterns,
)
diff --git a/stix2/test/v20/test_environment.py b/stix2/test/v20/test_environment.py
index 5afb430..34ce596 100644
--- a/stix2/test/v20/test_environment.py
+++ b/stix2/test/v20/test_environment.py
@@ -1,6 +1,10 @@
+import os
+
import pytest
import stix2
+import stix2.equivalence.graph
+import stix2.equivalence.object
from .constants import (
CAMPAIGN_ID, CAMPAIGN_KWARGS, FAKE_TIME, IDENTITY_ID, IDENTITY_KWARGS,
@@ -8,6 +12,8 @@ from .constants import (
RELATIONSHIP_IDS,
)
+FS_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "stix2_data")
+
@pytest.fixture
def ds():
@@ -18,7 +24,42 @@ def ds():
rel1 = stix2.v20.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0])
rel2 = stix2.v20.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1])
rel3 = stix2.v20.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2])
- stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3]
+ reprt = stix2.v20.Report(
+ name="Malware Report",
+ published="2021-05-09T08:22:22Z",
+ labels=["campaign"],
+ object_refs=[mal.id, rel1.id, ind.id],
+ )
+ stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3, reprt]
+ yield stix2.MemoryStore(stix_objs)
+
+
+@pytest.fixture
+def ds2():
+ cam = stix2.v20.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
+ idy = stix2.v20.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
+ ind = stix2.v20.Indicator(id=INDICATOR_ID, created_by_ref=idy.id, **INDICATOR_KWARGS)
+ indv2 = ind.new_version(external_references=[{
+ "source_name": "unknown",
+ "url": "https://examplewebsite.com/",
+ }])
+ mal = stix2.v20.Malware(id=MALWARE_ID, created_by_ref=idy.id, **MALWARE_KWARGS)
+ malv2 = mal.new_version(external_references=[{
+ "source_name": "unknown",
+ "url": "https://examplewebsite2.com/",
+ }])
+ rel1 = stix2.v20.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0])
+ rel2 = stix2.v20.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1])
+ rel3 = stix2.v20.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2])
+ stix_objs = [cam, idy, ind, indv2, mal, malv2, rel1, rel2, rel3]
+ reprt = stix2.v20.Report(
+ created_by_ref=idy.id,
+ name="example",
+ labels=["campaign"],
+ published="2021-04-09T08:22:22Z",
+ object_refs=stix_objs,
+ )
+ stix_objs.append(reprt)
yield stix2.MemoryStore(stix_objs)
@@ -370,3 +411,144 @@ def test_related_to_by_target(ds):
assert len(resp) == 2
assert any(x['id'] == CAMPAIGN_ID for x in resp)
assert any(x['id'] == INDICATOR_ID for x in resp)
+
+
+def test_versioned_checks(ds, ds2):
+ weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+ weights.update({
+ "_internal": {
+ "ignore_spec_version": True,
+ "versioning_checks": True,
+ "max_depth": 1,
+ },
+ })
+ score = stix2.equivalence.object._versioned_checks(INDICATOR_ID, INDICATOR_ID, ds, ds2, **weights)
+ assert round(score) == 100
+
+
+def test_semantic_check_with_versioning(ds, ds2):
+ weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+ weights.update({
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": True,
+ "ds1": ds,
+ "ds2": ds2,
+ "max_depth": 1,
+ },
+ })
+ ind = stix2.v20.Indicator(
+ **dict(
+ labels=["malicious-activity"],
+ pattern="[file:hashes.'SHA-256' = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855']",
+ valid_from="2017-01-01T12:34:56Z",
+ external_references=[
+ {
+ "source_name": "unknown",
+ "url": "https://examplewebsite2.com/",
+ },
+ ],
+ object_marking_refs=[stix2.v20.TLP_WHITE],
+ )
+ )
+ ds.add(ind)
+ score = stix2.equivalence.object.reference_check(ind.id, INDICATOR_ID, ds, ds2, **weights)
+ assert round(score) == 0 # Since pattern is different score is really low
+
+
+def test_list_semantic_check(ds, ds2):
+ weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+ weights.update({
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "ds1": ds,
+ "ds2": ds2,
+ "max_depth": 1,
+ },
+ })
+ object_refs1 = [
+ "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e",
+ "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd",
+ "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7",
+ ]
+ object_refs2 = [
+ "campaign--8e2e2d2b-17d4-4cbf-938f-98ee46b3cd3f",
+ "identity--311b2d2d-f010-4473-83ec-1edf84858f4c",
+ "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7",
+ "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e",
+ "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e",
+ "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd",
+ "relationship--181c9c09-43e6-45dd-9374-3bec192f05ef",
+ "relationship--a0cbb21c-8daf-4a7f-96aa-7155a4ef8f70",
+ ]
+
+ score = stix2.equivalence.object.list_reference_check(
+ object_refs1,
+ object_refs2,
+ ds,
+ ds2,
+ **weights,
+ )
+ assert round(score) == 1
+
+
+def test_graph_equivalence_with_filesystem_source(ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": True,
+ "versioning_checks": False,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ fs = stix2.FileSystemSource(FS_PATH)
+ env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights)
+ assert round(env) == 28
+ assert round(prop_scores["matching_score"]) == 139
+ assert round(prop_scores["sum_weights"]) == 500
+
+
+def test_graph_equivalence_with_duplicate_graph(ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights)
+ assert round(env) == 100
+ assert round(prop_scores["matching_score"]) == 800
+ assert round(prop_scores["sum_weights"]) == 800
+
+
+def test_graph_equivalence_with_versioning_check_on(ds2, ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": True,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
+ assert round(env) == 93
+ assert round(prop_scores["matching_score"]) == 745
+ assert round(prop_scores["sum_weights"]) == 800
+
+
+def test_graph_equivalence_with_versioning_check_off(ds2, ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
+ assert round(env) == 93
+ assert round(prop_scores["matching_score"]) == 745
+ assert round(prop_scores["sum_weights"]) == 800
diff --git a/stix2/test/v20/test_pattern_equivalence.py b/stix2/test/v20/test_pattern_equivalence.py
index 1ada5c7..aab8533 100644
--- a/stix2/test/v20/test_pattern_equivalence.py
+++ b/stix2/test/v20/test_pattern_equivalence.py
@@ -4,7 +4,7 @@ Pattern equivalence unit tests which use STIX 2.0-specific pattern features
import pytest
-from stix2.equivalence.patterns import equivalent_patterns
+from stix2.equivalence.pattern import equivalent_patterns
@pytest.mark.parametrize(
diff --git a/stix2/test/v20/test_utils.py b/stix2/test/v20/test_utils.py
index 9372bbb..a66f3e8 100644
--- a/stix2/test/v20/test_utils.py
+++ b/stix2/test/v20/test_utils.py
@@ -114,7 +114,7 @@ def test_deduplicate(stix_objs1):
mods = [obj['modified'] for obj in unique]
assert "indicator--00000000-0000-4000-8000-000000000001" in ids
- assert "indicator--00000000-0000-4000-8000-000000000001" in ids
+ assert "indicator--00000000-0000-4000-8000-000000000002" in ids
assert "2017-01-27T13:49:53.935Z" in mods
assert "2017-01-27T13:49:53.936Z" in mods
diff --git a/stix2/test/v21/conftest.py b/stix2/test/v21/conftest.py
index d602f42..6efcf39 100644
--- a/stix2/test/v21/conftest.py
+++ b/stix2/test/v21/conftest.py
@@ -132,7 +132,13 @@ def stix_objs1():
"type": "indicator",
"valid_from": "2017-01-27T13:49:53.935382Z",
}
- return [ind1, ind2, ind3, ind4, ind5]
+ sco = {
+ "type": "url",
+ "spec_version": "2.1",
+ "id": "url--cc1deced-d99b-4d72-9268-8182420cb2fd",
+ "value": "http://example.com/",
+ }
+ return [ind1, ind2, ind3, ind4, ind5, sco]
@pytest.fixture
diff --git a/stix2/test/v21/test_datastore_composite.py b/stix2/test/v21/test_datastore_composite.py
index 76119c3..c6128e5 100644
--- a/stix2/test/v21/test_datastore_composite.py
+++ b/stix2/test/v21/test_datastore_composite.py
@@ -59,6 +59,17 @@ def test_composite_datasource_operations(stix_objs1, stix_objs2):
assert indicator["modified"] == parse_into_datetime("2017-01-31T13:49:53.935Z")
assert indicator["type"] == "indicator"
+ sco = cds1.get("url--cc1deced-d99b-4d72-9268-8182420cb2fd")
+ assert sco["id"] == "url--cc1deced-d99b-4d72-9268-8182420cb2fd"
+
+ scos = cds1.all_versions("url--cc1deced-d99b-4d72-9268-8182420cb2fd")
+ assert len(scos) == 1
+ assert scos[0]["id"] == "url--cc1deced-d99b-4d72-9268-8182420cb2fd"
+
+ scos = cds1.query([Filter("value", "=", "http://example.com/")])
+ assert len(scos) == 1
+ assert scos[0]["id"] == "url--cc1deced-d99b-4d72-9268-8182420cb2fd"
+
query1 = [
Filter("type", "=", "indicator"),
]
diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py
index 7a7d4c1..95094fe 100644
--- a/stix2/test/v21/test_environment.py
+++ b/stix2/test/v21/test_environment.py
@@ -1,7 +1,11 @@
+import os
+
import pytest
import stix2
import stix2.environment
+import stix2.equivalence.graph
+import stix2.equivalence.object
import stix2.exceptions
from .constants import (
@@ -12,6 +16,8 @@ from .constants import (
VULNERABILITY_ID, VULNERABILITY_KWARGS,
)
+FS_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "stix2_data")
+
@pytest.fixture
def ds():
@@ -22,7 +28,46 @@ def ds():
rel1 = stix2.v21.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0])
rel2 = stix2.v21.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1])
rel3 = stix2.v21.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2])
- stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3]
+ reprt = stix2.v21.Report(
+ name="Malware Report", published="2021-05-09T08:22:22Z",
+ object_refs=[mal.id, rel1.id, ind.id],
+ )
+ stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3, reprt]
+ yield stix2.MemoryStore(stix_objs)
+
+
+@pytest.fixture
+def ds2():
+ cam = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS)
+ idy = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS)
+ ind = stix2.v21.Indicator(id=INDICATOR_ID, created_by_ref=idy.id, **INDICATOR_KWARGS)
+ indv2 = ind.new_version(
+ external_references=[
+ {
+ "source_name": "unknown",
+ "url": "https://examplewebsite.com/",
+ },
+ ],
+ object_marking_refs=[stix2.v21.TLP_WHITE],
+ )
+ mal = stix2.v21.Malware(id=MALWARE_ID, created_by_ref=idy.id, **MALWARE_KWARGS)
+ malv2 = mal.new_version(
+ external_references=[
+ {
+ "source_name": "unknown",
+ "url": "https://examplewebsite2.com/",
+ },
+ ],
+ )
+ rel1 = stix2.v21.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0])
+ rel2 = stix2.v21.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1])
+ rel3 = stix2.v21.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2])
+ stix_objs = [cam, idy, ind, indv2, mal, malv2, rel1, rel2, rel3]
+ reprt = stix2.v21.Report(
+ created_by_ref=idy.id, name="example",
+ published="2021-04-09T08:22:22Z", object_refs=stix_objs,
+ )
+ stix_objs.append(reprt)
yield stix2.MemoryStore(stix_objs)
@@ -820,3 +865,145 @@ def test_semantic_equivalence_prop_scores_method_provided():
assert len(prop_scores) == 2
assert prop_scores["matching_score"] == 96.0
assert prop_scores["sum_weights"] == 100.0
+
+
+def test_versioned_checks(ds, ds2):
+ weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+ weights.update({
+ "_internal": {
+ "ignore_spec_version": True,
+ "versioning_checks": True,
+ "max_depth": 1,
+ },
+ })
+ score = stix2.equivalence.object._versioned_checks(INDICATOR_ID, INDICATOR_ID, ds, ds2, **weights)
+ assert round(score) == 100
+
+
+def test_semantic_check_with_versioning(ds, ds2):
+ weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+ weights.update({
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": True,
+ "ds1": ds,
+ "ds2": ds2,
+ "max_depth": 1,
+ },
+ })
+ ind = stix2.v21.Indicator(
+ **dict(
+ indicator_types=["malicious-activity"],
+ pattern_type="stix",
+ pattern="[file:hashes.'SHA-256' = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855']",
+ valid_from="2017-01-01T12:34:56Z",
+ external_references=[
+ {
+ "source_name": "unknown",
+ "url": "https://examplewebsite2.com/",
+ },
+ ],
+ object_marking_refs=[stix2.v21.TLP_WHITE],
+ )
+ )
+ ds.add(ind)
+ score = stix2.equivalence.object.reference_check(ind.id, INDICATOR_ID, ds, ds2, **weights)
+ assert round(score) == 0 # Since pattern is different score is really low
+
+
+def test_list_semantic_check(ds, ds2):
+ weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy()
+ weights.update({
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "ds1": ds,
+ "ds2": ds2,
+ "max_depth": 1,
+ },
+ })
+ object_refs1 = [
+ "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e",
+ "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd",
+ "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7",
+ ]
+ object_refs2 = [
+ "campaign--8e2e2d2b-17d4-4cbf-938f-98ee46b3cd3f",
+ "identity--311b2d2d-f010-4473-83ec-1edf84858f4c",
+ "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7",
+ "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e",
+ "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e",
+ "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd",
+ "relationship--181c9c09-43e6-45dd-9374-3bec192f05ef",
+ "relationship--a0cbb21c-8daf-4a7f-96aa-7155a4ef8f70",
+ ]
+
+ score = stix2.equivalence.object.list_reference_check(
+ object_refs1,
+ object_refs2,
+ ds,
+ ds2,
+ **weights,
+ )
+ assert round(score) == 1
+
+
+def test_graph_equivalence_with_filesystem_source(ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": True,
+ "versioning_checks": False,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ fs = stix2.FileSystemSource(FS_PATH)
+ env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights)
+ assert round(env) == 24
+ assert round(prop_scores["matching_score"]) == 122
+ assert round(prop_scores["sum_weights"]) == 500
+
+
+def test_graph_equivalence_with_duplicate_graph(ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights)
+ assert round(env) == 100
+ assert round(prop_scores["matching_score"]) == 800
+ assert round(prop_scores["sum_weights"]) == 800
+
+
+def test_graph_equivalence_with_versioning_check_on(ds2, ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": True,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
+ assert round(env) == 93
+ assert round(prop_scores["matching_score"]) == 745
+ assert round(prop_scores["sum_weights"]) == 800
+
+
+def test_graph_equivalence_with_versioning_check_off(ds2, ds):
+ weights = {
+ "_internal": {
+ "ignore_spec_version": False,
+ "versioning_checks": False,
+ "max_depth": 1,
+ },
+ }
+ prop_scores = {}
+ env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights)
+ assert round(env) == 93
+ assert round(prop_scores["matching_score"]) == 745
+ assert round(prop_scores["sum_weights"]) == 800
diff --git a/stix2/test/v21/test_pattern_equivalence.py b/stix2/test/v21/test_pattern_equivalence.py
index 71ded69..5f6b707 100644
--- a/stix2/test/v21/test_pattern_equivalence.py
+++ b/stix2/test/v21/test_pattern_equivalence.py
@@ -4,7 +4,7 @@ Pattern equivalence unit tests which use STIX 2.1+-specific pattern features
import pytest
-from stix2.equivalence.patterns import equivalent_patterns
+from stix2.equivalence.pattern import equivalent_patterns
@pytest.mark.parametrize(
diff --git a/stix2/test/v21/test_utils.py b/stix2/test/v21/test_utils.py
index 03477aa..f64cec2 100644
--- a/stix2/test/v21/test_utils.py
+++ b/stix2/test/v21/test_utils.py
@@ -104,17 +104,18 @@ def test_get_type_from_id(stix_id, type):
def test_deduplicate(stix_objs1):
unique = stix2.utils.deduplicate(stix_objs1)
- # Only 3 objects are unique
- # 2 id's vary
+ # Only 4 objects are unique
+ # 3 id's vary
# 2 modified times vary for a particular id
- assert len(unique) == 3
+ assert len(unique) == 4
ids = [obj['id'] for obj in unique]
- mods = [obj['modified'] for obj in unique]
+ mods = [obj.get('modified') for obj in unique]
assert "indicator--00000000-0000-4000-8000-000000000001" in ids
- assert "indicator--00000000-0000-4000-8000-000000000001" in ids
+ assert "indicator--00000000-0000-4000-8000-000000000002" in ids
+ assert "url--cc1deced-d99b-4d72-9268-8182420cb2fd" in ids
assert "2017-01-27T13:49:53.935Z" in mods
assert "2017-01-27T13:49:53.936Z" in mods
diff --git a/stix2/utils.py b/stix2/utils.py
index f741581..1b88f72 100644
--- a/stix2/utils.py
+++ b/stix2/utils.py
@@ -132,11 +132,12 @@ def deduplicate(stix_obj_list):
unique_objs = {}
for obj in stix_obj_list:
- try:
- unique_objs[(obj['id'], obj['modified'])] = obj
- except KeyError:
- # Handle objects with no `modified` property, e.g. marking-definition
- unique_objs[(obj['id'], obj['created'])] = obj
+ ver = obj.get("modified") or obj.get("created")
+
+ if ver is None:
+ unique_objs[obj["id"]] = obj
+ else:
+ unique_objs[(obj['id'], ver)] = obj
return list(unique_objs.values())