From 7fa3c78deac5b00916e6197d3b2a75baea4a5bca Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Mon, 17 Aug 2020 18:38:29 -0400 Subject: [PATCH 1/9] Update CompositeDataSource and deduplicate() to handle unversioned objects, including 2.1 SCOs. Updated some unit tests to test this. Fixed a typo in a 2.0 unit test (2.0 deduplicate() test). --- stix2/datastore/__init__.py | 14 +++++++------- stix2/test/v20/test_utils.py | 2 +- stix2/test/v21/conftest.py | 8 +++++++- stix2/test/v21/test_datastore_composite.py | 11 +++++++++++ stix2/test/v21/test_utils.py | 11 ++++++----- stix2/utils.py | 11 ++++++----- 6 files changed, 38 insertions(+), 19 deletions(-) diff --git a/stix2/datastore/__init__.py b/stix2/datastore/__init__.py index 57cb513..1ff0769 100644 --- a/stix2/datastore/__init__.py +++ b/stix2/datastore/__init__.py @@ -481,14 +481,14 @@ class CompositeDataSource(DataSource): if data: all_data.append(data) - # remove duplicate versions - if len(all_data) > 0: - all_data = deduplicate(all_data) - else: - return None + # Search for latest version + stix_obj = latest_ver = None + for obj in all_data: + ver = obj.get("modified") or obj.get("created") - # reduce to most recent version - stix_obj = sorted(all_data, key=lambda k: k['modified'], reverse=True)[0] + if stix_obj is None or ver is None or ver > latest_ver: + stix_obj = obj + latest_ver = ver return stix_obj diff --git a/stix2/test/v20/test_utils.py b/stix2/test/v20/test_utils.py index 9372bbb..a66f3e8 100644 --- a/stix2/test/v20/test_utils.py +++ b/stix2/test/v20/test_utils.py @@ -114,7 +114,7 @@ def test_deduplicate(stix_objs1): mods = [obj['modified'] for obj in unique] assert "indicator--00000000-0000-4000-8000-000000000001" in ids - assert "indicator--00000000-0000-4000-8000-000000000001" in ids + assert "indicator--00000000-0000-4000-8000-000000000002" in ids assert "2017-01-27T13:49:53.935Z" in mods assert "2017-01-27T13:49:53.936Z" in mods diff --git a/stix2/test/v21/conftest.py b/stix2/test/v21/conftest.py index d602f42..a7a97a9 100644 --- a/stix2/test/v21/conftest.py +++ b/stix2/test/v21/conftest.py @@ -132,7 +132,13 @@ def stix_objs1(): "type": "indicator", "valid_from": "2017-01-27T13:49:53.935382Z", } - return [ind1, ind2, ind3, ind4, ind5] + sco = { + "type": "url", + "spec_version": "2.1", + "id": "url--cc1deced-d99b-4d72-9268-8182420cb2fd", + "value": "http://example.com/" + } + return [ind1, ind2, ind3, ind4, ind5, sco] @pytest.fixture diff --git a/stix2/test/v21/test_datastore_composite.py b/stix2/test/v21/test_datastore_composite.py index 76119c3..c6128e5 100644 --- a/stix2/test/v21/test_datastore_composite.py +++ b/stix2/test/v21/test_datastore_composite.py @@ -59,6 +59,17 @@ def test_composite_datasource_operations(stix_objs1, stix_objs2): assert indicator["modified"] == parse_into_datetime("2017-01-31T13:49:53.935Z") assert indicator["type"] == "indicator" + sco = cds1.get("url--cc1deced-d99b-4d72-9268-8182420cb2fd") + assert sco["id"] == "url--cc1deced-d99b-4d72-9268-8182420cb2fd" + + scos = cds1.all_versions("url--cc1deced-d99b-4d72-9268-8182420cb2fd") + assert len(scos) == 1 + assert scos[0]["id"] == "url--cc1deced-d99b-4d72-9268-8182420cb2fd" + + scos = cds1.query([Filter("value", "=", "http://example.com/")]) + assert len(scos) == 1 + assert scos[0]["id"] == "url--cc1deced-d99b-4d72-9268-8182420cb2fd" + query1 = [ Filter("type", "=", "indicator"), ] diff --git a/stix2/test/v21/test_utils.py b/stix2/test/v21/test_utils.py index 03477aa..f64cec2 100644 --- a/stix2/test/v21/test_utils.py +++ b/stix2/test/v21/test_utils.py @@ -104,17 +104,18 @@ def test_get_type_from_id(stix_id, type): def test_deduplicate(stix_objs1): unique = stix2.utils.deduplicate(stix_objs1) - # Only 3 objects are unique - # 2 id's vary + # Only 4 objects are unique + # 3 id's vary # 2 modified times vary for a particular id - assert len(unique) == 3 + assert len(unique) == 4 ids = [obj['id'] for obj in unique] - mods = [obj['modified'] for obj in unique] + mods = [obj.get('modified') for obj in unique] assert "indicator--00000000-0000-4000-8000-000000000001" in ids - assert "indicator--00000000-0000-4000-8000-000000000001" in ids + assert "indicator--00000000-0000-4000-8000-000000000002" in ids + assert "url--cc1deced-d99b-4d72-9268-8182420cb2fd" in ids assert "2017-01-27T13:49:53.935Z" in mods assert "2017-01-27T13:49:53.936Z" in mods diff --git a/stix2/utils.py b/stix2/utils.py index f741581..1b88f72 100644 --- a/stix2/utils.py +++ b/stix2/utils.py @@ -132,11 +132,12 @@ def deduplicate(stix_obj_list): unique_objs = {} for obj in stix_obj_list: - try: - unique_objs[(obj['id'], obj['modified'])] = obj - except KeyError: - # Handle objects with no `modified` property, e.g. marking-definition - unique_objs[(obj['id'], obj['created'])] = obj + ver = obj.get("modified") or obj.get("created") + + if ver is None: + unique_objs[obj["id"]] = obj + else: + unique_objs[(obj['id'], ver)] = obj return list(unique_objs.values()) From 3c25410a9de7ee70e4b42d6a82f710d5f78d302f Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Wed, 19 Aug 2020 11:51:47 -0400 Subject: [PATCH 2/9] pre-commit stylistic fix --- stix2/test/v21/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stix2/test/v21/conftest.py b/stix2/test/v21/conftest.py index a7a97a9..6efcf39 100644 --- a/stix2/test/v21/conftest.py +++ b/stix2/test/v21/conftest.py @@ -136,7 +136,7 @@ def stix_objs1(): "type": "url", "spec_version": "2.1", "id": "url--cc1deced-d99b-4d72-9268-8182420cb2fd", - "value": "http://example.com/" + "value": "http://example.com/", } return [ind1, ind2, ind3, ind4, ind5, sco] From fb705c4885179bd9be94deb4362c3e42ed877fa2 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 16 Oct 2020 11:35:26 -0400 Subject: [PATCH 3/9] Graph Equivalence (#449) * new packages for graph and object-based semantic equivalence * new method graphically_equivalent for Environment, move equivalence methods out * object equivalence function, methods used for object-based moved here. * new graph_equivalence methods * add notes * add support for versioning checks (default disabled) * new tests to cover graph equivalence and new methods * added more imports to environment.py to prevent breaking changes * variable changes, new fields for checks, reset depth check per call * flexibility when object is not available on graph. * refactor debug logging message * new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs * API documentation for new modules * additional text required to build docs * add more test methods for list_semantic_check an graphically_equivalent/versioning * add logging debug messages, code clean-up * include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score * include results as summary in prop_scores, minor tweaks * Update __init__.py doctrings update * apply feedback from pull request - rename semantic_check to reference_check - rename modules to graph and object respectively to eliminate redundancy - remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance * update docs/ entries * add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary update tests to reflect changes * rename package patterns -> pattern * documentation, moving weights around * more documentation moving * rename WEIGHTS variable for graph_equivalence --- .../graph/stix2.equivalence.graph.rst | 5 + .../object/stix2.equivalence.object.rst | 5 + ...equivalence.pattern.compare.comparison.rst | 5 + ...quivalence.pattern.compare.observation.rst | 5 + ...uivalence.pattern.transform.comparison.rst | 5 + ...ivalence.pattern.transform.observation.rst | 5 + ...equivalence.pattern.transform.specials.rst | 5 + ...quivalence.patterns.compare.comparison.rst | 5 - ...uivalence.patterns.compare.observation.rst | 5 - ...ivalence.patterns.transform.comparison.rst | 5 - ...valence.patterns.transform.observation.rst | 5 - ...quivalence.patterns.transform.specials.rst | 5 - .../equivalence/stix2.equivalence.pattern.rst | 5 + .../stix2.equivalence.patterns.rst | 5 - stix2/environment.py | 346 ++------------ stix2/equivalence/__init__.py | 4 +- stix2/equivalence/graph/__init__.py | 136 ++++++ stix2/equivalence/object/__init__.py | 451 ++++++++++++++++++ .../{patterns => pattern}/__init__.py | 6 +- .../{patterns => pattern}/compare/__init__.py | 0 .../compare/comparison.py | 2 +- .../compare/observation.py | 4 +- .../transform/__init__.py | 0 .../transform/comparison.py | 8 +- .../transform/observation.py | 16 +- .../transform/specials.py | 2 +- stix2/test/test_pattern_equivalence.py | 2 +- stix2/test/v20/test_environment.py | 184 ++++++- stix2/test/v20/test_pattern_equivalence.py | 2 +- stix2/test/v21/test_environment.py | 189 +++++++- stix2/test/v21/test_pattern_equivalence.py | 2 +- 31 files changed, 1070 insertions(+), 354 deletions(-) create mode 100644 docs/api/equivalence/graph/stix2.equivalence.graph.rst create mode 100644 docs/api/equivalence/object/stix2.equivalence.object.rst create mode 100644 docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst create mode 100644 docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst create mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst create mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst create mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst delete mode 100644 docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst delete mode 100644 docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst delete mode 100644 docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst delete mode 100644 docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst delete mode 100644 docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst create mode 100644 docs/api/equivalence/stix2.equivalence.pattern.rst delete mode 100644 docs/api/equivalence/stix2.equivalence.patterns.rst create mode 100644 stix2/equivalence/graph/__init__.py create mode 100644 stix2/equivalence/object/__init__.py rename stix2/equivalence/{patterns => pattern}/__init__.py (95%) rename stix2/equivalence/{patterns => pattern}/compare/__init__.py (100%) rename stix2/equivalence/{patterns => pattern}/compare/comparison.py (99%) rename stix2/equivalence/{patterns => pattern}/compare/observation.py (96%) rename stix2/equivalence/{patterns => pattern}/transform/__init__.py (100%) rename stix2/equivalence/{patterns => pattern}/transform/comparison.py (97%) rename stix2/equivalence/{patterns => pattern}/transform/observation.py (97%) rename stix2/equivalence/{patterns => pattern}/transform/specials.py (99%) diff --git a/docs/api/equivalence/graph/stix2.equivalence.graph.rst b/docs/api/equivalence/graph/stix2.equivalence.graph.rst new file mode 100644 index 0000000..deb3dde --- /dev/null +++ b/docs/api/equivalence/graph/stix2.equivalence.graph.rst @@ -0,0 +1,5 @@ +graph +===== + +.. automodule:: stix2.equivalence.graph + :members: diff --git a/docs/api/equivalence/object/stix2.equivalence.object.rst b/docs/api/equivalence/object/stix2.equivalence.object.rst new file mode 100644 index 0000000..41c8f52 --- /dev/null +++ b/docs/api/equivalence/object/stix2.equivalence.object.rst @@ -0,0 +1,5 @@ +object +====== + +.. automodule:: stix2.equivalence.object + :members: diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst new file mode 100644 index 0000000..0b886be --- /dev/null +++ b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst @@ -0,0 +1,5 @@ +comparison +============== + +.. automodule:: stix2.equivalence.pattern.compare.comparison + :members: diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst new file mode 100644 index 0000000..16bd619 --- /dev/null +++ b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst @@ -0,0 +1,5 @@ +observation +============== + +.. automodule:: stix2.equivalence.pattern.compare.observation + :members: diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst new file mode 100644 index 0000000..6364561 --- /dev/null +++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst @@ -0,0 +1,5 @@ +comparison +============== + +.. automodule:: stix2.equivalence.pattern.transform.comparison + :members: diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst new file mode 100644 index 0000000..f67fb1b --- /dev/null +++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst @@ -0,0 +1,5 @@ +observation +============== + +.. automodule:: stix2.equivalence.pattern.transform.observation + :members: diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst new file mode 100644 index 0000000..8b61b69 --- /dev/null +++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst @@ -0,0 +1,5 @@ +specials +============== + +.. automodule:: stix2.equivalence.pattern.transform.specials + :members: diff --git a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst b/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst deleted file mode 100644 index 8e53da7..0000000 --- a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.comparison.rst +++ /dev/null @@ -1,5 +0,0 @@ -comparison -============== - -.. automodule:: stix2.equivalence.patterns.compare.comparison - :members: diff --git a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst b/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst deleted file mode 100644 index 1abd64e..0000000 --- a/docs/api/equivalence/patterns/compare/stix2.equivalence.patterns.compare.observation.rst +++ /dev/null @@ -1,5 +0,0 @@ -observation -============== - -.. automodule:: stix2.equivalence.patterns.compare.observation - :members: diff --git a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst b/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst deleted file mode 100644 index 2cf8388..0000000 --- a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.comparison.rst +++ /dev/null @@ -1,5 +0,0 @@ -comparison -============== - -.. automodule:: stix2.equivalence.patterns.transform.comparison - :members: diff --git a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst b/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst deleted file mode 100644 index 1815e7e..0000000 --- a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.observation.rst +++ /dev/null @@ -1,5 +0,0 @@ -observation -============== - -.. automodule:: stix2.equivalence.patterns.transform.observation - :members: diff --git a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst b/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst deleted file mode 100644 index 7930ae2..0000000 --- a/docs/api/equivalence/patterns/transform/stix2.equivalence.patterns.transform.specials.rst +++ /dev/null @@ -1,5 +0,0 @@ -specials -============== - -.. automodule:: stix2.equivalence.patterns.transform.specials - :members: diff --git a/docs/api/equivalence/stix2.equivalence.pattern.rst b/docs/api/equivalence/stix2.equivalence.pattern.rst new file mode 100644 index 0000000..5246ed2 --- /dev/null +++ b/docs/api/equivalence/stix2.equivalence.pattern.rst @@ -0,0 +1,5 @@ +pattern +============== + +.. automodule:: stix2.equivalence.pattern + :members: diff --git a/docs/api/equivalence/stix2.equivalence.patterns.rst b/docs/api/equivalence/stix2.equivalence.patterns.rst deleted file mode 100644 index 32377f1..0000000 --- a/docs/api/equivalence/stix2.equivalence.patterns.rst +++ /dev/null @@ -1,5 +0,0 @@ -patterns -============== - -.. automodule:: stix2.equivalence.patterns - :members: diff --git a/stix2/environment.py b/stix2/environment.py index 0a6bdb2..ea93a83 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -1,13 +1,18 @@ """Python STIX2 Environment API.""" import copy -import logging -import time from .datastore import CompositeDataSource, DataStoreMixin +from .equivalence.graph import graphically_equivalent +from .equivalence.object import ( # noqa: F401 + check_property_present, custom_pattern_based, exact_match, + list_reference_check, partial_external_reference_based, partial_list_based, + partial_location_distance, partial_string_based, partial_timestamp_based, + reference_check, semantically_equivalent, +) from .parsing import parse as _parse -from .utils import STIXdatetime, parse_into_datetime -logger = logging.getLogger(__name__) +# TODO: Remove all unused imports that now belong to the equivalence module in the next major release. +# Kept for backwards compatibility. class ObjectFactory(object): @@ -193,7 +198,7 @@ class Environment(DataStoreMixin): @staticmethod def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict): - """This method is meant to verify if two objects of the same type are + """This method verifies if two objects of the same type are semantically equivalent. Args: @@ -208,8 +213,11 @@ class Environment(DataStoreMixin): float: A number between 0.0 and 100.0 as a measurement of equivalence. Warning: - Course of Action, Intrusion-Set, Observed-Data, Report are not supported - by this implementation. Indicator pattern check is also limited. + Object types need to have property weights defined for the equivalence process. + Otherwise, those objects will not influence the final score. The WEIGHTS + dictionary under `stix2.equivalence.object` can give you an idea on how to add + new entries and pass them via the `weight_dict` argument. Similarly, the values + or methods can be fine tuned for a particular use case. Note: Default weights_dict: @@ -217,306 +225,46 @@ class Environment(DataStoreMixin): .. include:: ../default_sem_eq_weights.rst Note: - This implementation follows the Committee Note on semantic equivalence. + This implementation follows the Semantic Equivalence Committee Note. see `the Committee Note `__. """ - weights = WEIGHTS.copy() + return semantically_equivalent(obj1, obj2, prop_scores, **weight_dict) - if weight_dict: - weights.update(weight_dict) + @staticmethod + def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict): + """This method verifies if two graphs are semantically equivalent. + Each DataStore can contain a connected or disconnected graph and the + final result is weighted over the amount of objects we managed to compare. + This approach builds on top of the object-based semantic equivalence process + and each comparison can return a value between 0 and 100. - type1, type2 = obj1["type"], obj2["type"] - ignore_spec_version = weights["_internal"]["ignore_spec_version"] + Args: + ds1: A DataStore object instance representing your graph + ds2: A DataStore object instance representing your graph + prop_scores: A dictionary that can hold individual property scores, + weights, contributing score, matching score and sum of weights. + weight_dict: A dictionary that can be used to override settings + in the semantic equivalence process - if type1 != type2: - raise ValueError('The objects to compare must be of the same type!') + Returns: + float: A number between 0.0 and 100.0 as a measurement of equivalence. - if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"): - raise ValueError('The objects to compare must be of the same spec version!') + Warning: + Object types need to have property weights defined for the equivalence process. + Otherwise, those objects will not influence the final score. The WEIGHTS + dictionary under `stix2.equivalence.graph` can give you an idea on how to add + new entries and pass them via the `weight_dict` argument. Similarly, the values + or methods can be fine tuned for a particular use case. - try: - weights[type1] - except KeyError: - logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1) - sum_weights = matching_score = 0 - else: - try: - method = weights[type1]["method"] - except KeyError: - logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"]) - matching_score = 0.0 - sum_weights = 0.0 + Note: + Default weights_dict: - for prop in weights[type1]: - if check_property_present(prop, obj1, obj2) or prop == "longitude_latitude": - w = weights[type1][prop][0] - comp_funct = weights[type1][prop][1] + .. include:: ../default_sem_eq_weights.rst - if comp_funct == partial_timestamp_based: - contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"]) - elif comp_funct == partial_location_distance: - threshold = weights[type1]["threshold"] - contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold) - else: - contributing_score = w * comp_funct(obj1[prop], obj2[prop]) + Note: + This implementation follows the Semantic Equivalence Committee Note. + see `the Committee Note `__. - sum_weights += w - matching_score += contributing_score - - prop_scores[prop] = { - "weight": w, - "contributing_score": contributing_score, - } - logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score) - - prop_scores["matching_score"] = matching_score - prop_scores["sum_weights"] = sum_weights - logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights) - else: - logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"]) - try: - matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1]) - except TypeError: - # method doesn't support detailed output with prop_scores - matching_score, sum_weights = method(obj1, obj2, **weights[type1]) - logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights) - - if sum_weights <= 0: - return 0 - equivalence_score = (matching_score / sum_weights) * 100.0 - return equivalence_score - - -def check_property_present(prop, obj1, obj2): - """Helper method checks if a property is present on both objects.""" - if prop in obj1 and prop in obj2: - return True - return False - - -def partial_timestamp_based(t1, t2, tdelta): - """Performs a timestamp-based matching via checking how close one timestamp is to another. - - Args: - t1: A datetime string or STIXdatetime object. - t2: A datetime string or STIXdatetime object. - tdelta (float): A given time delta. This number is multiplied by 86400 (1 day) to - extend or shrink your time change tolerance. - - Returns: - float: Number between 0.0 and 1.0 depending on match criteria. - - """ - if not isinstance(t1, STIXdatetime): - t1 = parse_into_datetime(t1) - if not isinstance(t2, STIXdatetime): - t2 = parse_into_datetime(t2) - t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple()) - result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1) - logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result) - return result - - -def partial_list_based(l1, l2): - """Performs a partial list matching via finding the intersection between common values. - - Args: - l1: A list of values. - l2: A list of values. - - Returns: - float: 1.0 if the value matches exactly, 0.0 otherwise. - - """ - l1_set, l2_set = set(l1), set(l2) - result = len(l1_set.intersection(l2_set)) / max(len(l1), len(l2)) - logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result) - return result - - -def exact_match(val1, val2): - """Performs an exact value match based on two values - - Args: - val1: A value suitable for an equality test. - val2: A value suitable for an equality test. - - Returns: - float: 1.0 if the value matches exactly, 0.0 otherwise. - - """ - result = 0.0 - if val1 == val2: - result = 1.0 - logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result) - return result - - -def partial_string_based(str1, str2): - """Performs a partial string match using the Jaro-Winkler distance algorithm. - - Args: - str1: A string value to check. - str2: A string value to check. - - Returns: - float: Number between 0.0 and 1.0 depending on match criteria. - - """ - from rapidfuzz import fuzz - result = fuzz.token_sort_ratio(str1, str2) - logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result) - return result / 100.0 - - -def custom_pattern_based(pattern1, pattern2): - """Performs a matching on Indicator Patterns. - - Args: - pattern1: An Indicator pattern - pattern2: An Indicator pattern - - Returns: - float: Number between 0.0 and 1.0 depending on match criteria. - - """ - logger.warning("Indicator pattern equivalence is not fully defined; will default to zero if not completely identical") - return exact_match(pattern1, pattern2) # TODO: Implement pattern based equivalence - - -def partial_external_reference_based(refs1, refs2): - """Performs a matching on External References. - - Args: - refs1: A list of external references. - refs2: A list of external references. - - Returns: - float: Number between 0.0 and 1.0 depending on matches. - - """ - allowed = set(("veris", "cve", "capec", "mitre-attack")) - matches = 0 - - if len(refs1) >= len(refs2): - l1 = refs1 - l2 = refs2 - else: - l1 = refs2 - l2 = refs1 - - for ext_ref1 in l1: - for ext_ref2 in l2: - sn_match = False - ei_match = False - url_match = False - source_name = None - - if check_property_present("source_name", ext_ref1, ext_ref2): - if ext_ref1["source_name"] == ext_ref2["source_name"]: - source_name = ext_ref1["source_name"] - sn_match = True - if check_property_present("external_id", ext_ref1, ext_ref2): - if ext_ref1["external_id"] == ext_ref2["external_id"]: - ei_match = True - if check_property_present("url", ext_ref1, ext_ref2): - if ext_ref1["url"] == ext_ref2["url"]: - url_match = True - - # Special case: if source_name is a STIX defined name and either - # external_id or url match then its a perfect match and other entries - # can be ignored. - if sn_match and (ei_match or url_match) and source_name in allowed: - result = 1.0 - logger.debug( - "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", - refs1, refs2, result, - ) - return result - - # Regular check. If the source_name (not STIX-defined) or external_id or - # url matches then we consider the entry a match. - if (sn_match or ei_match or url_match) and source_name not in allowed: - matches += 1 - - result = matches / max(len(refs1), len(refs2)) - logger.debug( - "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", - refs1, refs2, result, - ) - return result - - -def partial_location_distance(lat1, long1, lat2, long2, threshold): - """Given two coordinates perform a matching based on its distance using the Haversine Formula. - - Args: - lat1: Latitude value for first coordinate point. - lat2: Latitude value for second coordinate point. - long1: Longitude value for first coordinate point. - long2: Longitude value for second coordinate point. - threshold (float): A kilometer measurement for the threshold distance between these two points. - - Returns: - float: Number between 0.0 and 1.0 depending on match. - - """ - from haversine import Unit, haversine - distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS) - result = 1 - (distance / threshold) - logger.debug( - "--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'", - (lat1, long1), (lat2, long2), threshold, result, - ) - return result - - -# default weights used for the semantic equivalence process -WEIGHTS = { - "attack-pattern": { - "name": (30, partial_string_based), - "external_references": (70, partial_external_reference_based), - }, - "campaign": { - "name": (60, partial_string_based), - "aliases": (40, partial_list_based), - }, - "identity": { - "name": (60, partial_string_based), - "identity_class": (20, exact_match), - "sectors": (20, partial_list_based), - }, - "indicator": { - "indicator_types": (15, partial_list_based), - "pattern": (80, custom_pattern_based), - "valid_from": (5, partial_timestamp_based), - "tdelta": 1, # One day interval - }, - "location": { - "longitude_latitude": (34, partial_location_distance), - "region": (33, exact_match), - "country": (33, exact_match), - "threshold": 1000.0, - }, - "malware": { - "malware_types": (20, partial_list_based), - "name": (80, partial_string_based), - }, - "threat-actor": { - "name": (60, partial_string_based), - "threat_actor_types": (20, partial_list_based), - "aliases": (20, partial_list_based), - }, - "tool": { - "tool_types": (20, partial_list_based), - "name": (80, partial_string_based), - }, - "vulnerability": { - "name": (30, partial_string_based), - "external_references": (70, partial_external_reference_based), - }, - "_internal": { - "ignore_spec_version": False, - }, -} #: :autodoc-skip: + """ + return graphically_equivalent(ds1, ds2, prop_scores, **weight_dict) diff --git a/stix2/equivalence/__init__.py b/stix2/equivalence/__init__.py index c67e649..f175024 100644 --- a/stix2/equivalence/__init__.py +++ b/stix2/equivalence/__init__.py @@ -3,7 +3,9 @@ .. autosummary:: :toctree: equivalence - patterns + pattern + graph + object | """ diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py new file mode 100644 index 0000000..3c8730a --- /dev/null +++ b/stix2/equivalence/graph/__init__.py @@ -0,0 +1,136 @@ +import logging + +from ..object import ( + WEIGHTS, exact_match, list_reference_check, partial_string_based, + partial_timestamp_based, reference_check, semantically_equivalent, +) + +logger = logging.getLogger(__name__) + + +def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict): + """This method verifies if two graphs are semantically equivalent. + Each DataStore can contain a connected or disconnected graph and the + final result is weighted over the amount of objects we managed to compare. + This approach builds on top of the object-based semantic equivalence process + and each comparison can return a value between 0 and 100. + + Args: + ds1: A DataStore object instance representing your graph + ds2: A DataStore object instance representing your graph + prop_scores: A dictionary that can hold individual property scores, + weights, contributing score, matching score and sum of weights. + weight_dict: A dictionary that can be used to override settings + in the semantic equivalence process + + Returns: + float: A number between 0.0 and 100.0 as a measurement of equivalence. + + Warning: + Object types need to have property weights defined for the equivalence process. + Otherwise, those objects will not influence the final score. The WEIGHTS + dictionary under `stix2.equivalence.graph` can give you an idea on how to add + new entries and pass them via the `weight_dict` argument. Similarly, the values + or methods can be fine tuned for a particular use case. + + Note: + Default weights_dict: + + .. include:: ../default_sem_eq_weights.rst + + Note: + This implementation follows the Semantic Equivalence Committee Note. + see `the Committee Note `__. + + """ + weights = GRAPH_WEIGHTS.copy() + + if weight_dict: + weights.update(weight_dict) + + results = {} + depth = weights["_internal"]["max_depth"] + + graph1 = ds1.query([]) + graph2 = ds2.query([]) + + graph1.sort(key=lambda x: x["type"]) + graph2.sort(key=lambda x: x["type"]) + + if len(graph1) < len(graph2): + weights["_internal"]["ds1"] = ds1 + weights["_internal"]["ds2"] = ds2 + g1 = graph1 + g2 = graph2 + else: + weights["_internal"]["ds1"] = ds2 + weights["_internal"]["ds2"] = ds1 + g1 = graph2 + g2 = graph1 + + for object1 in g1: + for object2 in g2: + if object1["type"] == object2["type"] and object1["type"] in weights: + iprop_score = {} + result = semantically_equivalent(object1, object2, iprop_score, **weights) + objects1_id = object1["id"] + weights["_internal"]["max_depth"] = depth + + if objects1_id not in results: + results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result} + elif result > results[objects1_id]["value"]: + results[objects1_id] = {"matched": object2["id"], "prop_score": iprop_score, "value": result} + + equivalence_score = 0 + matching_score = sum(x["value"] for x in results.values()) + sum_weights = len(results) * 100.0 + if sum_weights > 0: + equivalence_score = (matching_score / sum_weights) * 100 + prop_scores["matching_score"] = matching_score + prop_scores["sum_weights"] = sum_weights + prop_scores["summary"] = results + + logger.debug( + "DONE\t\tSUM_WEIGHT: %.2f\tMATCHING_SCORE: %.2f\t SCORE: %.2f", + sum_weights, + matching_score, + equivalence_score, + ) + return equivalence_score + + +# default weights used for the graph semantic equivalence process +GRAPH_WEIGHTS = WEIGHTS.copy() +GRAPH_WEIGHTS.update({ + "grouping": { + "name": (20, partial_string_based), + "context": (20, partial_string_based), + "object_refs": (60, list_reference_check), + }, + "relationship": { + "relationship_type": (20, exact_match), + "source_ref": (40, reference_check), + "target_ref": (40, reference_check), + }, + "report": { + "name": (30, partial_string_based), + "published": (10, partial_timestamp_based), + "object_refs": (60, list_reference_check), + "tdelta": 1, # One day interval + }, + "sighting": { + "first_seen": (5, partial_timestamp_based), + "last_seen": (5, partial_timestamp_based), + "sighting_of_ref": (40, reference_check), + "observed_data_refs": (20, list_reference_check), + "where_sighted_refs": (20, list_reference_check), + "summary": (10, exact_match), + }, + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "ds1": None, + "ds2": None, + "max_depth": 1, + }, +}) #: :autodoc-skip: diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py new file mode 100644 index 0000000..c24fa3c --- /dev/null +++ b/stix2/equivalence/object/__init__.py @@ -0,0 +1,451 @@ +import logging +import time + +from ...datastore import Filter +from ...utils import STIXdatetime, parse_into_datetime + +logger = logging.getLogger(__name__) + + +def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict): + """This method verifies if two objects of the same type are + semantically equivalent. + + Args: + obj1: A stix2 object instance + obj2: A stix2 object instance + prop_scores: A dictionary that can hold individual property scores, + weights, contributing score, matching score and sum of weights. + weight_dict: A dictionary that can be used to override settings + in the semantic equivalence process + + Returns: + float: A number between 0.0 and 100.0 as a measurement of equivalence. + + Warning: + Object types need to have property weights defined for the equivalence process. + Otherwise, those objects will not influence the final score. The WEIGHTS + dictionary under `stix2.equivalence.object` can give you an idea on how to add + new entries and pass them via the `weight_dict` argument. Similarly, the values + or methods can be fine tuned for a particular use case. + + Note: + Default weights_dict: + + .. include:: ../default_sem_eq_weights.rst + + Note: + This implementation follows the Semantic Equivalence Committee Note. + see `the Committee Note `__. + + """ + weights = WEIGHTS.copy() + + if weight_dict: + weights.update(weight_dict) + + type1, type2 = obj1["type"], obj2["type"] + ignore_spec_version = weights["_internal"]["ignore_spec_version"] + + if type1 != type2: + raise ValueError('The objects to compare must be of the same type!') + + if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"): + raise ValueError('The objects to compare must be of the same spec version!') + + try: + weights[type1] + except KeyError: + logger.warning("'%s' type has no 'weights' dict specified & thus no semantic equivalence method to call!", type1) + sum_weights = matching_score = 0 + else: + try: + method = weights[type1]["method"] + except KeyError: + logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"]) + matching_score = 0.0 + sum_weights = 0.0 + + for prop in weights[type1]: + if check_property_present(prop, obj1, obj2) or prop == "longitude_latitude": + w = weights[type1][prop][0] + comp_funct = weights[type1][prop][1] + + if comp_funct == partial_timestamp_based: + contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"]) + elif comp_funct == partial_location_distance: + threshold = weights[type1]["threshold"] + contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold) + elif comp_funct == reference_check or comp_funct == list_reference_check: + max_depth = weights["_internal"]["max_depth"] + if max_depth < 0: + continue # prevent excessive recursion + else: + weights["_internal"]["max_depth"] -= 1 + ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"] + contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights) + else: + contributing_score = w * comp_funct(obj1[prop], obj2[prop]) + + sum_weights += w + matching_score += contributing_score + + prop_scores[prop] = { + "weight": w, + "contributing_score": contributing_score, + } + logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score) + + prop_scores["matching_score"] = matching_score + prop_scores["sum_weights"] = sum_weights + logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights) + else: + logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"]) + try: + matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1]) + except TypeError: + # method doesn't support detailed output with prop_scores + matching_score, sum_weights = method(obj1, obj2, **weights[type1]) + logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights) + + if sum_weights <= 0: + return 0 + equivalence_score = (matching_score / sum_weights) * 100.0 + return equivalence_score + + +def check_property_present(prop, obj1, obj2): + """Helper method checks if a property is present on both objects.""" + if prop in obj1 and prop in obj2: + return True + return False + + +def partial_timestamp_based(t1, t2, tdelta): + """Performs a timestamp-based matching via checking how close one timestamp is to another. + + Args: + t1: A datetime string or STIXdatetime object. + t2: A datetime string or STIXdatetime object. + tdelta (float): A given time delta. This number is multiplied by 86400 (1 day) to + extend or shrink your time change tolerance. + + Returns: + float: Number between 0.0 and 1.0 depending on match criteria. + + """ + if not isinstance(t1, STIXdatetime): + t1 = parse_into_datetime(t1) + if not isinstance(t2, STIXdatetime): + t2 = parse_into_datetime(t2) + t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple()) + result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1) + logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result) + return result + + +def partial_list_based(l1, l2): + """Performs a partial list matching via finding the intersection between common values. + + Args: + l1: A list of values. + l2: A list of values. + + Returns: + float: 1.0 if the value matches exactly, 0.0 otherwise. + + """ + l1_set, l2_set = set(l1), set(l2) + result = len(l1_set.intersection(l2_set)) / max(len(l1_set), len(l2_set)) + logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result) + return result + + +def exact_match(val1, val2): + """Performs an exact value match based on two values + + Args: + val1: A value suitable for an equality test. + val2: A value suitable for an equality test. + + Returns: + float: 1.0 if the value matches exactly, 0.0 otherwise. + + """ + result = 0.0 + if val1 == val2: + result = 1.0 + logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result) + return result + + +def partial_string_based(str1, str2): + """Performs a partial string match using the Jaro-Winkler distance algorithm. + + Args: + str1: A string value to check. + str2: A string value to check. + + Returns: + float: Number between 0.0 and 1.0 depending on match criteria. + + """ + from rapidfuzz import fuzz + result = fuzz.token_sort_ratio(str1, str2) + logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result) + return result / 100.0 + + +def custom_pattern_based(pattern1, pattern2): + """Performs a matching on Indicator Patterns. + + Args: + pattern1: An Indicator pattern + pattern2: An Indicator pattern + + Returns: + float: Number between 0.0 and 1.0 depending on match criteria. + + """ + logger.warning("Indicator pattern equivalence is not fully defined; will default to zero if not completely identical") + return exact_match(pattern1, pattern2) # TODO: Implement pattern based equivalence + + +def partial_external_reference_based(refs1, refs2): + """Performs a matching on External References. + + Args: + refs1: A list of external references. + refs2: A list of external references. + + Returns: + float: Number between 0.0 and 1.0 depending on matches. + + """ + allowed = {"veris", "cve", "capec", "mitre-attack"} + matches = 0 + + if len(refs1) >= len(refs2): + l1 = refs1 + l2 = refs2 + else: + l1 = refs2 + l2 = refs1 + + for ext_ref1 in l1: + for ext_ref2 in l2: + sn_match = False + ei_match = False + url_match = False + source_name = None + + if check_property_present("source_name", ext_ref1, ext_ref2): + if ext_ref1["source_name"] == ext_ref2["source_name"]: + source_name = ext_ref1["source_name"] + sn_match = True + if check_property_present("external_id", ext_ref1, ext_ref2): + if ext_ref1["external_id"] == ext_ref2["external_id"]: + ei_match = True + if check_property_present("url", ext_ref1, ext_ref2): + if ext_ref1["url"] == ext_ref2["url"]: + url_match = True + + # Special case: if source_name is a STIX defined name and either + # external_id or url match then its a perfect match and other entries + # can be ignored. + if sn_match and (ei_match or url_match) and source_name in allowed: + result = 1.0 + logger.debug( + "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", + refs1, refs2, result, + ) + return result + + # Regular check. If the source_name (not STIX-defined) or external_id or + # url matches then we consider the entry a match. + if (sn_match or ei_match or url_match) and source_name not in allowed: + matches += 1 + + result = matches / max(len(refs1), len(refs2)) + logger.debug( + "--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'", + refs1, refs2, result, + ) + return result + + +def partial_location_distance(lat1, long1, lat2, long2, threshold): + """Given two coordinates perform a matching based on its distance using the Haversine Formula. + + Args: + lat1: Latitude value for first coordinate point. + lat2: Latitude value for second coordinate point. + long1: Longitude value for first coordinate point. + long2: Longitude value for second coordinate point. + threshold (float): A kilometer measurement for the threshold distance between these two points. + + Returns: + float: Number between 0.0 and 1.0 depending on match. + + """ + from haversine import Unit, haversine + distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS) + result = 1 - (distance / threshold) + logger.debug( + "--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'", + (lat1, long1), (lat2, long2), threshold, result, + ) + return result + + +def _versioned_checks(ref1, ref2, ds1, ds2, **weights): + """Checks multiple object versions if present in graph. + Maximizes for the semantic equivalence score of a particular version.""" + results = {} + objects1 = ds1.query([Filter("id", "=", ref1)]) + objects2 = ds2.query([Filter("id", "=", ref2)]) + + if len(objects1) > 0 and len(objects2) > 0: + for o1 in objects1: + for o2 in objects2: + result = semantically_equivalent(o1, o2, **weights) + if ref1 not in results: + results[ref1] = {"matched": ref2, "value": result} + elif result > results[ref1]["value"]: + results[ref1] = {"matched": ref2, "value": result} + result = results.get(ref1, {}).get("value", 0.0) + logger.debug( + "--\t\t_versioned_checks '%s' '%s'\tresult: '%s'", + ref1, ref2, result, + ) + return result + + +def reference_check(ref1, ref2, ds1, ds2, **weights): + """For two references, de-reference the object and perform object-based + semantic equivalence. The score influences the result of an edge check.""" + type1, type2 = ref1.split("--")[0], ref2.split("--")[0] + result = 0.0 + + if type1 == type2: + if weights["_internal"]["versioning_checks"]: + result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0 + else: + o1, o2 = ds1.get(ref1), ds2.get(ref2) + if o1 and o2: + result = semantically_equivalent(o1, o2, **weights) / 100.0 + + logger.debug( + "--\t\treference_check '%s' '%s'\tresult: '%s'", + ref1, ref2, result, + ) + return result + + +def list_reference_check(refs1, refs2, ds1, ds2, **weights): + """For objects that contain multiple references (i.e., object_refs) perform + the same de-reference procedure and perform object-based semantic equivalence. + The score influences the objects containing these references. The result is + weighted on the amount of unique objects that could 1) be de-referenced 2) """ + results = {} + if len(refs1) >= len(refs2): + l1 = refs1 + l2 = refs2 + b1 = ds1 + b2 = ds2 + else: + l1 = refs2 + l2 = refs1 + b1 = ds2 + b2 = ds1 + + l1.sort() + l2.sort() + + for ref1 in l1: + for ref2 in l2: + type1, type2 = ref1.split("--")[0], ref2.split("--")[0] + if type1 == type2: + score = reference_check(ref1, ref2, b1, b2, **weights) * 100.0 + + if ref1 not in results: + results[ref1] = {"matched": ref2, "value": score} + elif score > results[ref1]["value"]: + results[ref1] = {"matched": ref2, "value": score} + + result = 0.0 + total_sum = sum(x["value"] for x in results.values()) + max_score = len(results) * 100.0 + + if max_score > 0: + result = total_sum / max_score + + logger.debug( + "--\t\tlist_reference_check '%s' '%s'\ttotal_sum: '%s'\tmax_score: '%s'\tresult: '%s'", + refs1, refs2, total_sum, max_score, result, + ) + return result + + +# default weights used for the semantic equivalence process +WEIGHTS = { + "attack-pattern": { + "name": (30, partial_string_based), + "external_references": (70, partial_external_reference_based), + }, + "campaign": { + "name": (60, partial_string_based), + "aliases": (40, partial_list_based), + }, + "course-of-action": { + "name": (60, partial_string_based), + "external_references": (40, partial_external_reference_based), + }, + "identity": { + "name": (60, partial_string_based), + "identity_class": (20, exact_match), + "sectors": (20, partial_list_based), + }, + "indicator": { + "indicator_types": (15, partial_list_based), + "pattern": (80, custom_pattern_based), + "valid_from": (5, partial_timestamp_based), + "tdelta": 1, # One day interval + }, + "intrusion-set": { + "name": (20, partial_string_based), + "external_references": (60, partial_external_reference_based), + "aliases": (20, partial_list_based), + }, + "location": { + "longitude_latitude": (34, partial_location_distance), + "region": (33, exact_match), + "country": (33, exact_match), + "threshold": 1000.0, + }, + "malware": { + "malware_types": (20, partial_list_based), + "name": (80, partial_string_based), + }, + "marking-definition": { + "name": (20, exact_match), + "definition": (60, exact_match), + "definition_type": (20, exact_match), + }, + "threat-actor": { + "name": (60, partial_string_based), + "threat_actor_types": (20, partial_list_based), + "aliases": (20, partial_list_based), + }, + "tool": { + "tool_types": (20, partial_list_based), + "name": (80, partial_string_based), + }, + "vulnerability": { + "name": (30, partial_string_based), + "external_references": (70, partial_external_reference_based), + }, + "_internal": { + "ignore_spec_version": False, + }, +} #: :autodoc-skip: diff --git a/stix2/equivalence/patterns/__init__.py b/stix2/equivalence/pattern/__init__.py similarity index 95% rename from stix2/equivalence/patterns/__init__.py rename to stix2/equivalence/pattern/__init__.py index 85ec9ab..10494f7 100644 --- a/stix2/equivalence/patterns/__init__.py +++ b/stix2/equivalence/pattern/__init__.py @@ -10,13 +10,13 @@ """ import stix2 -from stix2.equivalence.patterns.compare.observation import ( +from stix2.equivalence.pattern.compare.observation import ( observation_expression_cmp, ) -from stix2.equivalence.patterns.transform import ( +from stix2.equivalence.pattern.transform import ( ChainTransformer, SettleTransformer, ) -from stix2.equivalence.patterns.transform.observation import ( +from stix2.equivalence.pattern.transform.observation import ( AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer, DNFTransformer, FlattenTransformer, OrderDedupeTransformer, ) diff --git a/stix2/equivalence/patterns/compare/__init__.py b/stix2/equivalence/pattern/compare/__init__.py similarity index 100% rename from stix2/equivalence/patterns/compare/__init__.py rename to stix2/equivalence/pattern/compare/__init__.py diff --git a/stix2/equivalence/patterns/compare/comparison.py b/stix2/equivalence/pattern/compare/comparison.py similarity index 99% rename from stix2/equivalence/patterns/compare/comparison.py rename to stix2/equivalence/pattern/compare/comparison.py index ed717fc..e412705 100644 --- a/stix2/equivalence/patterns/compare/comparison.py +++ b/stix2/equivalence/pattern/compare/comparison.py @@ -4,7 +4,7 @@ Comparison utilities for STIX pattern comparison expressions. import base64 import functools -from stix2.equivalence.patterns.compare import generic_cmp, iter_lex_cmp +from stix2.equivalence.pattern.compare import generic_cmp, iter_lex_cmp from stix2.patterns import ( AndBooleanExpression, BinaryConstant, BooleanConstant, FloatConstant, HexConstant, IntegerConstant, ListConstant, ListObjectPathComponent, diff --git a/stix2/equivalence/patterns/compare/observation.py b/stix2/equivalence/pattern/compare/observation.py similarity index 96% rename from stix2/equivalence/patterns/compare/observation.py rename to stix2/equivalence/pattern/compare/observation.py index 227b8ae..8df9e3f 100644 --- a/stix2/equivalence/patterns/compare/observation.py +++ b/stix2/equivalence/pattern/compare/observation.py @@ -1,8 +1,8 @@ """ Comparison utilities for STIX pattern observation expressions. """ -from stix2.equivalence.patterns.compare import generic_cmp, iter_lex_cmp -from stix2.equivalence.patterns.compare.comparison import ( +from stix2.equivalence.pattern.compare import generic_cmp, iter_lex_cmp +from stix2.equivalence.pattern.compare.comparison import ( comparison_expression_cmp, generic_constant_cmp, ) from stix2.patterns import ( diff --git a/stix2/equivalence/patterns/transform/__init__.py b/stix2/equivalence/pattern/transform/__init__.py similarity index 100% rename from stix2/equivalence/patterns/transform/__init__.py rename to stix2/equivalence/pattern/transform/__init__.py diff --git a/stix2/equivalence/patterns/transform/comparison.py b/stix2/equivalence/pattern/transform/comparison.py similarity index 97% rename from stix2/equivalence/patterns/transform/comparison.py rename to stix2/equivalence/pattern/transform/comparison.py index 528cc9b..d0f431b 100644 --- a/stix2/equivalence/patterns/transform/comparison.py +++ b/stix2/equivalence/pattern/transform/comparison.py @@ -4,12 +4,12 @@ Transformation utilities for STIX pattern comparison expressions. import functools import itertools -from stix2.equivalence.patterns.compare import iter_in, iter_lex_cmp -from stix2.equivalence.patterns.compare.comparison import ( +from stix2.equivalence.pattern.compare import iter_in, iter_lex_cmp +from stix2.equivalence.pattern.compare.comparison import ( comparison_expression_cmp, ) -from stix2.equivalence.patterns.transform import Transformer -from stix2.equivalence.patterns.transform.specials import ( +from stix2.equivalence.pattern.transform import Transformer +from stix2.equivalence.pattern.transform.specials import ( ipv4_addr, ipv6_addr, windows_reg_key, ) from stix2.patterns import ( diff --git a/stix2/equivalence/patterns/transform/observation.py b/stix2/equivalence/pattern/transform/observation.py similarity index 97% rename from stix2/equivalence/patterns/transform/observation.py rename to stix2/equivalence/pattern/transform/observation.py index d4ee175..a8982cf 100644 --- a/stix2/equivalence/patterns/transform/observation.py +++ b/stix2/equivalence/pattern/transform/observation.py @@ -4,23 +4,23 @@ Transformation utilities for STIX pattern observation expressions. import functools import itertools -from stix2.equivalence.patterns.compare import iter_in, iter_lex_cmp -from stix2.equivalence.patterns.compare.observation import ( +from stix2.equivalence.pattern.compare import iter_in, iter_lex_cmp +from stix2.equivalence.pattern.compare.observation import ( observation_expression_cmp, ) -from stix2.equivalence.patterns.transform import ( +from stix2.equivalence.pattern.transform import ( ChainTransformer, SettleTransformer, Transformer, ) -from stix2.equivalence.patterns.transform.comparison import ( +from stix2.equivalence.pattern.transform.comparison import ( SpecialValueCanonicalization, ) -from stix2.equivalence.patterns.transform.comparison import \ +from stix2.equivalence.pattern.transform.comparison import \ AbsorptionTransformer as CAbsorptionTransformer -from stix2.equivalence.patterns.transform.comparison import \ +from stix2.equivalence.pattern.transform.comparison import \ DNFTransformer as CDNFTransformer -from stix2.equivalence.patterns.transform.comparison import \ +from stix2.equivalence.pattern.transform.comparison import \ FlattenTransformer as CFlattenTransformer -from stix2.equivalence.patterns.transform.comparison import \ +from stix2.equivalence.pattern.transform.comparison import \ OrderDedupeTransformer as COrderDedupeTransformer from stix2.patterns import ( AndObservationExpression, FollowedByObservationExpression, diff --git a/stix2/equivalence/patterns/transform/specials.py b/stix2/equivalence/pattern/transform/specials.py similarity index 99% rename from stix2/equivalence/patterns/transform/specials.py rename to stix2/equivalence/pattern/transform/specials.py index b95e6bf..d3611f3 100644 --- a/stix2/equivalence/patterns/transform/specials.py +++ b/stix2/equivalence/pattern/transform/specials.py @@ -3,7 +3,7 @@ Some simple comparison expression canonicalization functions. """ import socket -from stix2.equivalence.patterns.compare.comparison import ( +from stix2.equivalence.pattern.compare.comparison import ( object_path_to_raw_values, ) diff --git a/stix2/test/test_pattern_equivalence.py b/stix2/test/test_pattern_equivalence.py index 6fc2adf..431322f 100644 --- a/stix2/test/test_pattern_equivalence.py +++ b/stix2/test/test_pattern_equivalence.py @@ -1,6 +1,6 @@ import pytest -from stix2.equivalence.patterns import ( +from stix2.equivalence.pattern import ( equivalent_patterns, find_equivalent_patterns, ) diff --git a/stix2/test/v20/test_environment.py b/stix2/test/v20/test_environment.py index 5afb430..34ce596 100644 --- a/stix2/test/v20/test_environment.py +++ b/stix2/test/v20/test_environment.py @@ -1,6 +1,10 @@ +import os + import pytest import stix2 +import stix2.equivalence.graph +import stix2.equivalence.object from .constants import ( CAMPAIGN_ID, CAMPAIGN_KWARGS, FAKE_TIME, IDENTITY_ID, IDENTITY_KWARGS, @@ -8,6 +12,8 @@ from .constants import ( RELATIONSHIP_IDS, ) +FS_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "stix2_data") + @pytest.fixture def ds(): @@ -18,7 +24,42 @@ def ds(): rel1 = stix2.v20.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0]) rel2 = stix2.v20.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1]) rel3 = stix2.v20.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2]) - stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3] + reprt = stix2.v20.Report( + name="Malware Report", + published="2021-05-09T08:22:22Z", + labels=["campaign"], + object_refs=[mal.id, rel1.id, ind.id], + ) + stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3, reprt] + yield stix2.MemoryStore(stix_objs) + + +@pytest.fixture +def ds2(): + cam = stix2.v20.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS) + idy = stix2.v20.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS) + ind = stix2.v20.Indicator(id=INDICATOR_ID, created_by_ref=idy.id, **INDICATOR_KWARGS) + indv2 = ind.new_version(external_references=[{ + "source_name": "unknown", + "url": "https://examplewebsite.com/", + }]) + mal = stix2.v20.Malware(id=MALWARE_ID, created_by_ref=idy.id, **MALWARE_KWARGS) + malv2 = mal.new_version(external_references=[{ + "source_name": "unknown", + "url": "https://examplewebsite2.com/", + }]) + rel1 = stix2.v20.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0]) + rel2 = stix2.v20.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1]) + rel3 = stix2.v20.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2]) + stix_objs = [cam, idy, ind, indv2, mal, malv2, rel1, rel2, rel3] + reprt = stix2.v20.Report( + created_by_ref=idy.id, + name="example", + labels=["campaign"], + published="2021-04-09T08:22:22Z", + object_refs=stix_objs, + ) + stix_objs.append(reprt) yield stix2.MemoryStore(stix_objs) @@ -370,3 +411,144 @@ def test_related_to_by_target(ds): assert len(resp) == 2 assert any(x['id'] == CAMPAIGN_ID for x in resp) assert any(x['id'] == INDICATOR_ID for x in resp) + + +def test_versioned_checks(ds, ds2): + weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights.update({ + "_internal": { + "ignore_spec_version": True, + "versioning_checks": True, + "max_depth": 1, + }, + }) + score = stix2.equivalence.object._versioned_checks(INDICATOR_ID, INDICATOR_ID, ds, ds2, **weights) + assert round(score) == 100 + + +def test_semantic_check_with_versioning(ds, ds2): + weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights.update({ + "_internal": { + "ignore_spec_version": False, + "versioning_checks": True, + "ds1": ds, + "ds2": ds2, + "max_depth": 1, + }, + }) + ind = stix2.v20.Indicator( + **dict( + labels=["malicious-activity"], + pattern="[file:hashes.'SHA-256' = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855']", + valid_from="2017-01-01T12:34:56Z", + external_references=[ + { + "source_name": "unknown", + "url": "https://examplewebsite2.com/", + }, + ], + object_marking_refs=[stix2.v20.TLP_WHITE], + ) + ) + ds.add(ind) + score = stix2.equivalence.object.reference_check(ind.id, INDICATOR_ID, ds, ds2, **weights) + assert round(score) == 0 # Since pattern is different score is really low + + +def test_list_semantic_check(ds, ds2): + weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights.update({ + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "ds1": ds, + "ds2": ds2, + "max_depth": 1, + }, + }) + object_refs1 = [ + "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e", + "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd", + "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7", + ] + object_refs2 = [ + "campaign--8e2e2d2b-17d4-4cbf-938f-98ee46b3cd3f", + "identity--311b2d2d-f010-4473-83ec-1edf84858f4c", + "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7", + "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e", + "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e", + "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd", + "relationship--181c9c09-43e6-45dd-9374-3bec192f05ef", + "relationship--a0cbb21c-8daf-4a7f-96aa-7155a4ef8f70", + ] + + score = stix2.equivalence.object.list_reference_check( + object_refs1, + object_refs2, + ds, + ds2, + **weights, + ) + assert round(score) == 1 + + +def test_graph_equivalence_with_filesystem_source(ds): + weights = { + "_internal": { + "ignore_spec_version": True, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores = {} + fs = stix2.FileSystemSource(FS_PATH) + env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights) + assert round(env) == 28 + assert round(prop_scores["matching_score"]) == 139 + assert round(prop_scores["sum_weights"]) == 500 + + +def test_graph_equivalence_with_duplicate_graph(ds): + weights = { + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores = {} + env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights) + assert round(env) == 100 + assert round(prop_scores["matching_score"]) == 800 + assert round(prop_scores["sum_weights"]) == 800 + + +def test_graph_equivalence_with_versioning_check_on(ds2, ds): + weights = { + "_internal": { + "ignore_spec_version": False, + "versioning_checks": True, + "max_depth": 1, + }, + } + prop_scores = {} + env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights) + assert round(env) == 93 + assert round(prop_scores["matching_score"]) == 745 + assert round(prop_scores["sum_weights"]) == 800 + + +def test_graph_equivalence_with_versioning_check_off(ds2, ds): + weights = { + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores = {} + env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights) + assert round(env) == 93 + assert round(prop_scores["matching_score"]) == 745 + assert round(prop_scores["sum_weights"]) == 800 diff --git a/stix2/test/v20/test_pattern_equivalence.py b/stix2/test/v20/test_pattern_equivalence.py index 1ada5c7..aab8533 100644 --- a/stix2/test/v20/test_pattern_equivalence.py +++ b/stix2/test/v20/test_pattern_equivalence.py @@ -4,7 +4,7 @@ Pattern equivalence unit tests which use STIX 2.0-specific pattern features import pytest -from stix2.equivalence.patterns import equivalent_patterns +from stix2.equivalence.pattern import equivalent_patterns @pytest.mark.parametrize( diff --git a/stix2/test/v21/test_environment.py b/stix2/test/v21/test_environment.py index 7a7d4c1..95094fe 100644 --- a/stix2/test/v21/test_environment.py +++ b/stix2/test/v21/test_environment.py @@ -1,7 +1,11 @@ +import os + import pytest import stix2 import stix2.environment +import stix2.equivalence.graph +import stix2.equivalence.object import stix2.exceptions from .constants import ( @@ -12,6 +16,8 @@ from .constants import ( VULNERABILITY_ID, VULNERABILITY_KWARGS, ) +FS_PATH = os.path.join(os.path.dirname(os.path.realpath(__file__)), "stix2_data") + @pytest.fixture def ds(): @@ -22,7 +28,46 @@ def ds(): rel1 = stix2.v21.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0]) rel2 = stix2.v21.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1]) rel3 = stix2.v21.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2]) - stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3] + reprt = stix2.v21.Report( + name="Malware Report", published="2021-05-09T08:22:22Z", + object_refs=[mal.id, rel1.id, ind.id], + ) + stix_objs = [cam, idy, ind, mal, rel1, rel2, rel3, reprt] + yield stix2.MemoryStore(stix_objs) + + +@pytest.fixture +def ds2(): + cam = stix2.v21.Campaign(id=CAMPAIGN_ID, **CAMPAIGN_KWARGS) + idy = stix2.v21.Identity(id=IDENTITY_ID, **IDENTITY_KWARGS) + ind = stix2.v21.Indicator(id=INDICATOR_ID, created_by_ref=idy.id, **INDICATOR_KWARGS) + indv2 = ind.new_version( + external_references=[ + { + "source_name": "unknown", + "url": "https://examplewebsite.com/", + }, + ], + object_marking_refs=[stix2.v21.TLP_WHITE], + ) + mal = stix2.v21.Malware(id=MALWARE_ID, created_by_ref=idy.id, **MALWARE_KWARGS) + malv2 = mal.new_version( + external_references=[ + { + "source_name": "unknown", + "url": "https://examplewebsite2.com/", + }, + ], + ) + rel1 = stix2.v21.Relationship(ind, 'indicates', mal, id=RELATIONSHIP_IDS[0]) + rel2 = stix2.v21.Relationship(mal, 'targets', idy, id=RELATIONSHIP_IDS[1]) + rel3 = stix2.v21.Relationship(cam, 'uses', mal, id=RELATIONSHIP_IDS[2]) + stix_objs = [cam, idy, ind, indv2, mal, malv2, rel1, rel2, rel3] + reprt = stix2.v21.Report( + created_by_ref=idy.id, name="example", + published="2021-04-09T08:22:22Z", object_refs=stix_objs, + ) + stix_objs.append(reprt) yield stix2.MemoryStore(stix_objs) @@ -820,3 +865,145 @@ def test_semantic_equivalence_prop_scores_method_provided(): assert len(prop_scores) == 2 assert prop_scores["matching_score"] == 96.0 assert prop_scores["sum_weights"] == 100.0 + + +def test_versioned_checks(ds, ds2): + weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights.update({ + "_internal": { + "ignore_spec_version": True, + "versioning_checks": True, + "max_depth": 1, + }, + }) + score = stix2.equivalence.object._versioned_checks(INDICATOR_ID, INDICATOR_ID, ds, ds2, **weights) + assert round(score) == 100 + + +def test_semantic_check_with_versioning(ds, ds2): + weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights.update({ + "_internal": { + "ignore_spec_version": False, + "versioning_checks": True, + "ds1": ds, + "ds2": ds2, + "max_depth": 1, + }, + }) + ind = stix2.v21.Indicator( + **dict( + indicator_types=["malicious-activity"], + pattern_type="stix", + pattern="[file:hashes.'SHA-256' = 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855']", + valid_from="2017-01-01T12:34:56Z", + external_references=[ + { + "source_name": "unknown", + "url": "https://examplewebsite2.com/", + }, + ], + object_marking_refs=[stix2.v21.TLP_WHITE], + ) + ) + ds.add(ind) + score = stix2.equivalence.object.reference_check(ind.id, INDICATOR_ID, ds, ds2, **weights) + assert round(score) == 0 # Since pattern is different score is really low + + +def test_list_semantic_check(ds, ds2): + weights = stix2.equivalence.graph.GRAPH_WEIGHTS.copy() + weights.update({ + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "ds1": ds, + "ds2": ds2, + "max_depth": 1, + }, + }) + object_refs1 = [ + "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e", + "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd", + "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7", + ] + object_refs2 = [ + "campaign--8e2e2d2b-17d4-4cbf-938f-98ee46b3cd3f", + "identity--311b2d2d-f010-4473-83ec-1edf84858f4c", + "indicator--a740531e-63ff-4e49-a9e1-a0a3eed0e3e7", + "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e", + "malware--9c4638ec-f1de-4ddb-abf4-1b760417654e", + "relationship--06520621-5352-4e6a-b976-e8fa3d437ffd", + "relationship--181c9c09-43e6-45dd-9374-3bec192f05ef", + "relationship--a0cbb21c-8daf-4a7f-96aa-7155a4ef8f70", + ] + + score = stix2.equivalence.object.list_reference_check( + object_refs1, + object_refs2, + ds, + ds2, + **weights, + ) + assert round(score) == 1 + + +def test_graph_equivalence_with_filesystem_source(ds): + weights = { + "_internal": { + "ignore_spec_version": True, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores = {} + fs = stix2.FileSystemSource(FS_PATH) + env = stix2.Environment().graphically_equivalent(fs, ds, prop_scores, **weights) + assert round(env) == 24 + assert round(prop_scores["matching_score"]) == 122 + assert round(prop_scores["sum_weights"]) == 500 + + +def test_graph_equivalence_with_duplicate_graph(ds): + weights = { + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores = {} + env = stix2.Environment().graphically_equivalent(ds, ds, prop_scores, **weights) + assert round(env) == 100 + assert round(prop_scores["matching_score"]) == 800 + assert round(prop_scores["sum_weights"]) == 800 + + +def test_graph_equivalence_with_versioning_check_on(ds2, ds): + weights = { + "_internal": { + "ignore_spec_version": False, + "versioning_checks": True, + "max_depth": 1, + }, + } + prop_scores = {} + env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights) + assert round(env) == 93 + assert round(prop_scores["matching_score"]) == 745 + assert round(prop_scores["sum_weights"]) == 800 + + +def test_graph_equivalence_with_versioning_check_off(ds2, ds): + weights = { + "_internal": { + "ignore_spec_version": False, + "versioning_checks": False, + "max_depth": 1, + }, + } + prop_scores = {} + env = stix2.Environment().graphically_equivalent(ds, ds2, prop_scores, **weights) + assert round(env) == 93 + assert round(prop_scores["matching_score"]) == 745 + assert round(prop_scores["sum_weights"]) == 800 diff --git a/stix2/test/v21/test_pattern_equivalence.py b/stix2/test/v21/test_pattern_equivalence.py index 71ded69..5f6b707 100644 --- a/stix2/test/v21/test_pattern_equivalence.py +++ b/stix2/test/v21/test_pattern_equivalence.py @@ -4,7 +4,7 @@ Pattern equivalence unit tests which use STIX 2.1+-specific pattern features import pytest -from stix2.equivalence.patterns import equivalent_patterns +from stix2.equivalence.pattern import equivalent_patterns @pytest.mark.parametrize( From 7d99a9dab28637e3515dc67cdad8c1f5829a5b6c Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 16 Oct 2020 16:40:25 -0400 Subject: [PATCH 4/9] remove docs in wrong locations or directories --- docs/api/equivalence/graph/stix2.equivalence.graph.rst | 5 ----- docs/api/equivalence/object/stix2.equivalence.object.rst | 5 ----- .../compare/stix2.equivalence.pattern.compare.comparison.rst | 5 ----- .../stix2.equivalence.pattern.compare.observation.rst | 5 ----- .../stix2.equivalence.pattern.transform.comparison.rst | 5 ----- .../stix2.equivalence.pattern.transform.observation.rst | 5 ----- .../stix2.equivalence.pattern.transform.specials.rst | 5 ----- 7 files changed, 35 deletions(-) delete mode 100644 docs/api/equivalence/graph/stix2.equivalence.graph.rst delete mode 100644 docs/api/equivalence/object/stix2.equivalence.object.rst delete mode 100644 docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst delete mode 100644 docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst delete mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst delete mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst delete mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst diff --git a/docs/api/equivalence/graph/stix2.equivalence.graph.rst b/docs/api/equivalence/graph/stix2.equivalence.graph.rst deleted file mode 100644 index deb3dde..0000000 --- a/docs/api/equivalence/graph/stix2.equivalence.graph.rst +++ /dev/null @@ -1,5 +0,0 @@ -graph -===== - -.. automodule:: stix2.equivalence.graph - :members: diff --git a/docs/api/equivalence/object/stix2.equivalence.object.rst b/docs/api/equivalence/object/stix2.equivalence.object.rst deleted file mode 100644 index 41c8f52..0000000 --- a/docs/api/equivalence/object/stix2.equivalence.object.rst +++ /dev/null @@ -1,5 +0,0 @@ -object -====== - -.. automodule:: stix2.equivalence.object - :members: diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst deleted file mode 100644 index 0b886be..0000000 --- a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst +++ /dev/null @@ -1,5 +0,0 @@ -comparison -============== - -.. automodule:: stix2.equivalence.pattern.compare.comparison - :members: diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst deleted file mode 100644 index 16bd619..0000000 --- a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst +++ /dev/null @@ -1,5 +0,0 @@ -observation -============== - -.. automodule:: stix2.equivalence.pattern.compare.observation - :members: diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst deleted file mode 100644 index 6364561..0000000 --- a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst +++ /dev/null @@ -1,5 +0,0 @@ -comparison -============== - -.. automodule:: stix2.equivalence.pattern.transform.comparison - :members: diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst deleted file mode 100644 index f67fb1b..0000000 --- a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst +++ /dev/null @@ -1,5 +0,0 @@ -observation -============== - -.. automodule:: stix2.equivalence.pattern.transform.observation - :members: diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst deleted file mode 100644 index 8b61b69..0000000 --- a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst +++ /dev/null @@ -1,5 +0,0 @@ -specials -============== - -.. automodule:: stix2.equivalence.pattern.transform.specials - :members: From 5ec001d3244203936adbc13aec85f3ff7d5513d6 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 16 Oct 2020 16:42:17 -0400 Subject: [PATCH 5/9] add new rst files and directories --- .../compare/stix2.equivalence.pattern.compare.comparison.rst | 5 +++++ .../stix2.equivalence.pattern.compare.observation.rst | 5 +++++ .../pattern/stix2.equivalence.pattern.compare.rst | 5 +++++ .../pattern/stix2.equivalence.pattern.transform.rst | 5 +++++ .../stix2.equivalence.pattern.transform.comparison.rst | 5 +++++ .../stix2.equivalence.pattern.transform.observation.rst | 5 +++++ .../stix2.equivalence.pattern.transform.specials.rst | 5 +++++ docs/api/equivalence/stix2.equivalence.graph.rst | 5 +++++ docs/api/equivalence/stix2.equivalence.object.rst | 5 +++++ 9 files changed, 45 insertions(+) create mode 100644 docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst create mode 100644 docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst create mode 100644 docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst create mode 100644 docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst create mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst create mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst create mode 100644 docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst create mode 100644 docs/api/equivalence/stix2.equivalence.graph.rst create mode 100644 docs/api/equivalence/stix2.equivalence.object.rst diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst new file mode 100644 index 0000000..37c6cc0 --- /dev/null +++ b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.comparison.rst @@ -0,0 +1,5 @@ +comparison +============================================ + +.. automodule:: stix2.equivalence.pattern.compare.comparison + :members: \ No newline at end of file diff --git a/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst new file mode 100644 index 0000000..463e11f --- /dev/null +++ b/docs/api/equivalence/pattern/compare/stix2.equivalence.pattern.compare.observation.rst @@ -0,0 +1,5 @@ +observation +============================================= + +.. automodule:: stix2.equivalence.pattern.compare.observation + :members: \ No newline at end of file diff --git a/docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst b/docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst new file mode 100644 index 0000000..3f64bea --- /dev/null +++ b/docs/api/equivalence/pattern/stix2.equivalence.pattern.compare.rst @@ -0,0 +1,5 @@ +compare +================================= + +.. automodule:: stix2.equivalence.pattern.compare + :members: \ No newline at end of file diff --git a/docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst b/docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst new file mode 100644 index 0000000..a26102b --- /dev/null +++ b/docs/api/equivalence/pattern/stix2.equivalence.pattern.transform.rst @@ -0,0 +1,5 @@ +transform +=================================== + +.. automodule:: stix2.equivalence.pattern.transform + :members: \ No newline at end of file diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst new file mode 100644 index 0000000..e2b5d42 --- /dev/null +++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.comparison.rst @@ -0,0 +1,5 @@ +comparison +============================================== + +.. automodule:: stix2.equivalence.pattern.transform.comparison + :members: \ No newline at end of file diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst new file mode 100644 index 0000000..607bdcf --- /dev/null +++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.observation.rst @@ -0,0 +1,5 @@ +observation +=============================================== + +.. automodule:: stix2.equivalence.pattern.transform.observation + :members: \ No newline at end of file diff --git a/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst new file mode 100644 index 0000000..f8191c3 --- /dev/null +++ b/docs/api/equivalence/pattern/transform/stix2.equivalence.pattern.transform.specials.rst @@ -0,0 +1,5 @@ +specials +============================================ + +.. automodule:: stix2.equivalence.pattern.transform.specials + :members: \ No newline at end of file diff --git a/docs/api/equivalence/stix2.equivalence.graph.rst b/docs/api/equivalence/stix2.equivalence.graph.rst new file mode 100644 index 0000000..0cc2922 --- /dev/null +++ b/docs/api/equivalence/stix2.equivalence.graph.rst @@ -0,0 +1,5 @@ +graph +======================= + +.. automodule:: stix2.equivalence.graph + :members: \ No newline at end of file diff --git a/docs/api/equivalence/stix2.equivalence.object.rst b/docs/api/equivalence/stix2.equivalence.object.rst new file mode 100644 index 0000000..76f5274 --- /dev/null +++ b/docs/api/equivalence/stix2.equivalence.object.rst @@ -0,0 +1,5 @@ +object +======================== + +.. automodule:: stix2.equivalence.object + :members: \ No newline at end of file From fc600df5f16c25ec768a1412c3a3d73cdf081f31 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 16 Oct 2020 16:43:12 -0400 Subject: [PATCH 6/9] update .gitignore and conf.py --- .gitignore | 3 ++- docs/conf.py | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index 9758937..72b31cd 100644 --- a/.gitignore +++ b/.gitignore @@ -55,7 +55,8 @@ coverage.xml # Sphinx documentation docs/_build/ .ipynb_checkpoints -default_sem_eq_weights.rst +graph_default_sem_eq_weights.rst +object_default_sem_eq_weights.rst # PyBuilder target/ diff --git a/docs/conf.py b/docs/conf.py index 8b372d5..9723e39 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -8,7 +8,8 @@ from six import class_types from sphinx.ext.autodoc import ClassDocumenter from stix2.base import _STIXBase -from stix2.environment import WEIGHTS +from stix2.equivalence.graph import GRAPH_WEIGHTS +from stix2.equivalence.object import WEIGHTS from stix2.version import __version__ sys.path.insert(0, os.path.abspath('..')) @@ -62,12 +63,19 @@ latex_documents = [ ] # Add a formatted version of environment.WEIGHTS -default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: o.__name__) -default_sem_eq_weights = default_sem_eq_weights.replace('\n', '\n ') -default_sem_eq_weights = default_sem_eq_weights.replace(' "', ' ') -default_sem_eq_weights = default_sem_eq_weights.replace('"\n', '\n') -with open('default_sem_eq_weights.rst', 'w') as f: - f.write(".. code-block:: py\n\n {}\n\n".format(default_sem_eq_weights)) +object_default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: o.__name__) +object_default_sem_eq_weights = object_default_sem_eq_weights.replace('\n', '\n ') +object_default_sem_eq_weights = object_default_sem_eq_weights.replace(' "', ' ') +object_default_sem_eq_weights = object_default_sem_eq_weights.replace('"\n', '\n') +with open('object_default_sem_eq_weights.rst', 'w') as f: + f.write(".. code-block:: python\n\n {}\n\n".format(object_default_sem_eq_weights)) + +graph_default_sem_eq_weights = json.dumps(GRAPH_WEIGHTS, indent=4, default=lambda o: o.__name__) +graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('\n', '\n ') +graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace(' "', ' ') +graph_default_sem_eq_weights = graph_default_sem_eq_weights.replace('"\n', '\n') +with open('graph_default_sem_eq_weights.rst', 'w') as f: + f.write(".. code-block:: python\n\n {}\n\n".format(graph_default_sem_eq_weights)) def get_property_type(prop): From 92ab1227edd28cf3da366e792a626c6f5c9a1d85 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 16 Oct 2020 17:12:52 -0400 Subject: [PATCH 7/9] docstrings, changes to equivalence.ipynb --- docs/guide/equivalence.ipynb | 18 ++++++++++-------- stix2/environment.py | 6 +++--- stix2/equivalence/graph/__init__.py | 5 +++-- stix2/equivalence/object/__init__.py | 5 +++-- stix2/equivalence/pattern/__init__.py | 2 +- 5 files changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/guide/equivalence.ipynb b/docs/guide/equivalence.ipynb index c0b9331..8393495 100644 --- a/docs/guide/equivalence.ipynb +++ b/docs/guide/equivalence.ipynb @@ -2165,15 +2165,17 @@ "The weights dictionary should contain both the weight and the comparison function for each property. You may use the default weights and functions, or provide your own.\n", "\n", "##### Existing comparison functions\n", - "For reference, here is a list of the comparison functions already built in the codebase (found in [stix2/environment.py](../api/stix2.environment.rst#stix2.environment.Environment)):\n", + "For reference, here is a list of the comparison functions already built in the codebase (found in [stix2/equivalence/object](../api/equivalence/stix2.equivalence.object.rst#module-stix2.equivalence.object)):\n", "\n", - " - [custom_pattern_based](../api/stix2.environment.rst#stix2.environment.custom_pattern_based)\n", - " - [exact_match](../api/stix2.environment.rst#stix2.environment.exact_match)\n", - " - [partial_external_reference_based](../api/stix2.environment.rst#stix2.environment.partial_external_reference_based)\n", - " - [partial_list_based](../api/stix2.environment.rst#stix2.environment.partial_list_based)\n", - " - [partial_location_distance](../api/stix2.environment.rst#stix2.environment.partial_location_distance)\n", - " - [partial_string_based](../api/stix2.environment.rst#stix2.environment.partial_string_based)\n", - " - [partial_timestamp_based](../api/stix2.environment.rst#stix2.environment.partial_timestamp_based)\n", + " - [custom_pattern_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.custom_pattern_based)\n", + " - [exact_match](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.exact_match)\n", + " - [list_reference_check](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.list_reference_check)\n", + " - [partial_external_reference_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_external_reference_based)\n", + " - [partial_list_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_list_based)\n", + " - [partial_location_distance](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_location_distance)\n", + " - [partial_string_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_string_based)\n", + " - [partial_timestamp_based](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.partial_timestamp_based)\n", + " - [reference_check](../api/equivalence/stix2.equivalence.object.rst#stix2.equivalence.object.reference_check)\n", "\n", "For instance, if we wanted to compare two of the `ThreatActor`s from before, but use our own weights, then we could do the following:" ] diff --git a/stix2/environment.py b/stix2/environment.py index ea93a83..f31762c 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -4,7 +4,7 @@ import copy from .datastore import CompositeDataSource, DataStoreMixin from .equivalence.graph import graphically_equivalent from .equivalence.object import ( # noqa: F401 - check_property_present, custom_pattern_based, exact_match, + WEIGHTS, check_property_present, custom_pattern_based, exact_match, list_reference_check, partial_external_reference_based, partial_list_based, partial_location_distance, partial_string_based, partial_timestamp_based, reference_check, semantically_equivalent, @@ -222,7 +222,7 @@ class Environment(DataStoreMixin): Note: Default weights_dict: - .. include:: ../default_sem_eq_weights.rst + .. include:: ../object_default_sem_eq_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. @@ -260,7 +260,7 @@ class Environment(DataStoreMixin): Note: Default weights_dict: - .. include:: ../default_sem_eq_weights.rst + .. include:: ../graph_default_sem_eq_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. diff --git a/stix2/equivalence/graph/__init__.py b/stix2/equivalence/graph/__init__.py index 3c8730a..680f42f 100644 --- a/stix2/equivalence/graph/__init__.py +++ b/stix2/equivalence/graph/__init__.py @@ -1,3 +1,4 @@ +"""Python APIs for STIX 2 Graph-based Semantic Equivalence.""" import logging from ..object import ( @@ -36,7 +37,7 @@ def graphically_equivalent(ds1, ds2, prop_scores={}, **weight_dict): Note: Default weights_dict: - .. include:: ../default_sem_eq_weights.rst + .. include:: ../../graph_default_sem_eq_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. @@ -133,4 +134,4 @@ GRAPH_WEIGHTS.update({ "ds2": None, "max_depth": 1, }, -}) #: :autodoc-skip: +}) # :autodoc-skip: diff --git a/stix2/equivalence/object/__init__.py b/stix2/equivalence/object/__init__.py index c24fa3c..8333ceb 100644 --- a/stix2/equivalence/object/__init__.py +++ b/stix2/equivalence/object/__init__.py @@ -1,3 +1,4 @@ +"""Python APIs for STIX 2 Object-based Semantic Equivalence.""" import logging import time @@ -32,7 +33,7 @@ def semantically_equivalent(obj1, obj2, prop_scores={}, **weight_dict): Note: Default weights_dict: - .. include:: ../default_sem_eq_weights.rst + .. include:: ../../object_default_sem_eq_weights.rst Note: This implementation follows the Semantic Equivalence Committee Note. @@ -448,4 +449,4 @@ WEIGHTS = { "_internal": { "ignore_spec_version": False, }, -} #: :autodoc-skip: +} # :autodoc-skip: diff --git a/stix2/equivalence/pattern/__init__.py b/stix2/equivalence/pattern/__init__.py index 10494f7..b2e5421 100644 --- a/stix2/equivalence/pattern/__init__.py +++ b/stix2/equivalence/pattern/__init__.py @@ -1,7 +1,7 @@ """Python APIs for STIX 2 Pattern Semantic Equivalence. .. autosummary:: - :toctree: patterns + :toctree: pattern compare transform From bb3b1e141af13a3fce1654fc03b0e6e823878336 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 16 Oct 2020 17:36:20 -0400 Subject: [PATCH 8/9] add requirement to prevent breaking change to fail build --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8a5e4a2..fa1773a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ bumpversion ipython +nbconvert<6 nbsphinx==0.4.3 pre-commit pygments<3,>=2.4.1 From e08a26a39cab2839705875ed782bbbe327f752da Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Sun, 18 Oct 2020 21:09:07 -0400 Subject: [PATCH 9/9] Correct variable name --- stix2/environment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stix2/environment.py b/stix2/environment.py index f31762c..4dc6ff0 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -220,7 +220,7 @@ class Environment(DataStoreMixin): or methods can be fine tuned for a particular use case. Note: - Default weights_dict: + Default weight_dict: .. include:: ../object_default_sem_eq_weights.rst @@ -258,7 +258,7 @@ class Environment(DataStoreMixin): or methods can be fine tuned for a particular use case. Note: - Default weights_dict: + Default weight_dict: .. include:: ../graph_default_sem_eq_weights.rst