2021-02-16 06:57:26 +01:00
|
|
|
"""Python APIs for STIX 2 Object-based Semantic Equivalence and Similarity."""
|
|
|
|
import collections
|
|
|
|
import itertools
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
import logging
|
|
|
|
import time
|
|
|
|
|
2021-03-10 19:32:02 +01:00
|
|
|
from ...datastore import DataSource, DataStoreMixin, Filter
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
from ...utils import STIXdatetime, parse_into_datetime
|
2020-11-20 15:33:56 +01:00
|
|
|
from ..pattern import equivalent_patterns
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
2021-03-01 18:27:52 +01:00
|
|
|
def object_equivalence(
|
|
|
|
obj1, obj2, prop_scores={}, threshold=70, ds1=None,
|
|
|
|
ds2=None, ignore_spec_version=False,
|
|
|
|
versioning_checks=False, max_depth=1, **weight_dict
|
|
|
|
):
|
2021-02-16 06:57:26 +01:00
|
|
|
"""This method returns a true/false value if two objects are semantically equivalent.
|
|
|
|
Internally, it calls the object_similarity function and compares it against the given
|
|
|
|
threshold value.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
obj1: A stix2 object instance
|
|
|
|
obj2: A stix2 object instance
|
|
|
|
prop_scores: A dictionary that can hold individual property scores,
|
|
|
|
weights, contributing score, matching score and sum of weights.
|
|
|
|
threshold: A numerical value between 0 and 100 to determine the minimum
|
|
|
|
score to result in successfully calling both objects equivalent. This
|
|
|
|
value can be tuned.
|
2021-03-10 19:32:02 +01:00
|
|
|
ds1 (optional): A DataStore object instance from which to pull related objects
|
|
|
|
ds2 (optional): A DataStore object instance from which to pull related objects
|
2021-03-01 18:27:52 +01:00
|
|
|
ignore_spec_version: A boolean indicating whether to test object types
|
|
|
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
|
|
|
If set to True this check will be skipped.
|
|
|
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
|
|
|
of the same object (when present) to maximize similarity against a
|
|
|
|
particular version. If set to True the algorithm will perform this step.
|
|
|
|
max_depth: A positive integer indicating the maximum recursion depth the
|
|
|
|
algorithm can reach when de-referencing objects and performing the
|
|
|
|
object_similarity algorithm.
|
|
|
|
weight_dict: A dictionary that can be used to override what checks are done
|
|
|
|
to objects in the similarity process.
|
2021-02-16 06:57:26 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
bool: True if the result of the object similarity is greater than or equal to
|
|
|
|
the threshold value. False otherwise.
|
|
|
|
|
|
|
|
Warning:
|
|
|
|
Object types need to have property weights defined for the similarity process.
|
|
|
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
|
|
|
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
|
|
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
|
|
|
or methods can be fine tuned for a particular use case.
|
|
|
|
|
|
|
|
Note:
|
|
|
|
Default weight_dict:
|
|
|
|
|
2021-03-01 18:27:52 +01:00
|
|
|
.. include:: ../../similarity_weights.rst
|
2021-02-16 06:57:26 +01:00
|
|
|
|
|
|
|
Note:
|
|
|
|
This implementation follows the Semantic Equivalence Committee Note.
|
|
|
|
see `the Committee Note <link here>`__.
|
|
|
|
|
|
|
|
"""
|
2021-03-01 18:27:52 +01:00
|
|
|
similarity_result = object_similarity(
|
|
|
|
obj1, obj2, prop_scores, ds1, ds2, ignore_spec_version,
|
|
|
|
versioning_checks, max_depth, **weight_dict
|
|
|
|
)
|
2021-02-16 06:57:26 +01:00
|
|
|
if similarity_result >= threshold:
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2021-03-01 18:27:52 +01:00
|
|
|
def object_similarity(
|
|
|
|
obj1, obj2, prop_scores={}, ds1=None, ds2=None,
|
|
|
|
ignore_spec_version=False, versioning_checks=False,
|
|
|
|
max_depth=1, **weight_dict
|
|
|
|
):
|
2021-02-16 06:57:26 +01:00
|
|
|
"""This method returns a measure of similarity depending on how
|
|
|
|
similar the two objects are.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
obj1: A stix2 object instance
|
|
|
|
obj2: A stix2 object instance
|
|
|
|
prop_scores: A dictionary that can hold individual property scores,
|
|
|
|
weights, contributing score, matching score and sum of weights.
|
2021-03-10 19:32:02 +01:00
|
|
|
ds1 (optional): A DataStore object instance from which to pull related objects
|
|
|
|
ds2 (optional): A DataStore object instance from which to pull related objects
|
2021-03-01 18:27:52 +01:00
|
|
|
ignore_spec_version: A boolean indicating whether to test object types
|
|
|
|
that belong to different spec versions (STIX 2.0 and STIX 2.1 for example).
|
|
|
|
If set to True this check will be skipped.
|
|
|
|
versioning_checks: A boolean indicating whether to test multiple revisions
|
|
|
|
of the same object (when present) to maximize similarity against a
|
|
|
|
particular version. If set to True the algorithm will perform this step.
|
|
|
|
max_depth: A positive integer indicating the maximum recursion depth the
|
|
|
|
algorithm can reach when de-referencing objects and performing the
|
|
|
|
object_similarity algorithm.
|
|
|
|
weight_dict: A dictionary that can be used to override what checks are done
|
|
|
|
to objects in the similarity process.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Returns:
|
2021-02-16 06:57:26 +01:00
|
|
|
float: A number between 0.0 and 100.0 as a measurement of similarity.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Warning:
|
2021-02-16 06:57:26 +01:00
|
|
|
Object types need to have property weights defined for the similarity process.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
Otherwise, those objects will not influence the final score. The WEIGHTS
|
|
|
|
dictionary under `stix2.equivalence.object` can give you an idea on how to add
|
|
|
|
new entries and pass them via the `weight_dict` argument. Similarly, the values
|
|
|
|
or methods can be fine tuned for a particular use case.
|
|
|
|
|
|
|
|
Note:
|
2021-02-16 06:57:26 +01:00
|
|
|
Default weight_dict:
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
2021-03-01 18:27:52 +01:00
|
|
|
.. include:: ../../similarity_weights.rst
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Note:
|
|
|
|
This implementation follows the Semantic Equivalence Committee Note.
|
|
|
|
see `the Committee Note <link here>`__.
|
|
|
|
|
|
|
|
"""
|
|
|
|
weights = WEIGHTS.copy()
|
|
|
|
|
|
|
|
if weight_dict:
|
|
|
|
weights.update(weight_dict)
|
|
|
|
|
2021-02-27 01:19:33 +01:00
|
|
|
weights["_internal"] = {
|
|
|
|
"ignore_spec_version": ignore_spec_version,
|
|
|
|
"versioning_checks": versioning_checks,
|
|
|
|
"ds1": ds1,
|
|
|
|
"ds2": ds2,
|
|
|
|
"max_depth": max_depth,
|
|
|
|
}
|
|
|
|
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
type1, type2 = obj1["type"], obj2["type"]
|
|
|
|
|
|
|
|
if type1 != type2:
|
|
|
|
raise ValueError('The objects to compare must be of the same type!')
|
|
|
|
|
|
|
|
if ignore_spec_version is False and obj1.get("spec_version", "2.0") != obj2.get("spec_version", "2.0"):
|
|
|
|
raise ValueError('The objects to compare must be of the same spec version!')
|
|
|
|
|
|
|
|
try:
|
|
|
|
weights[type1]
|
|
|
|
except KeyError:
|
2021-02-18 16:37:34 +01:00
|
|
|
logger.warning("'%s' type has no 'weights' dict specified & thus no object similarity method to call!", type1)
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
sum_weights = matching_score = 0
|
|
|
|
else:
|
|
|
|
try:
|
|
|
|
method = weights[type1]["method"]
|
|
|
|
except KeyError:
|
2021-02-18 16:37:34 +01:00
|
|
|
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
matching_score = 0.0
|
|
|
|
sum_weights = 0.0
|
|
|
|
|
|
|
|
for prop in weights[type1]:
|
2020-11-10 18:55:17 +01:00
|
|
|
if check_property_present(prop, obj1, obj2):
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
w = weights[type1][prop][0]
|
|
|
|
comp_funct = weights[type1][prop][1]
|
2021-02-27 01:19:33 +01:00
|
|
|
prop_scores[prop] = {}
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
if comp_funct == partial_timestamp_based:
|
|
|
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
|
|
|
|
elif comp_funct == partial_location_distance:
|
|
|
|
threshold = weights[type1]["threshold"]
|
|
|
|
contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
|
|
|
|
elif comp_funct == reference_check or comp_funct == list_reference_check:
|
2021-03-01 18:27:52 +01:00
|
|
|
if max_depth > 0:
|
|
|
|
weights["_internal"]["max_depth"] = max_depth - 1
|
2021-02-18 03:30:14 +01:00
|
|
|
ds1, ds2 = weights["_internal"]["ds1"], weights["_internal"]["ds2"]
|
2021-02-27 01:19:33 +01:00
|
|
|
if _datastore_check(ds1, ds2):
|
|
|
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop], ds1, ds2, **weights)
|
|
|
|
elif comp_funct == reference_check:
|
|
|
|
comp_funct = exact_match
|
|
|
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
|
|
|
elif comp_funct == list_reference_check:
|
|
|
|
comp_funct = partial_list_based
|
|
|
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
2021-03-01 18:44:35 +01:00
|
|
|
prop_scores[prop]["check_type"] = comp_funct.__name__
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
else:
|
2021-02-18 03:30:14 +01:00
|
|
|
continue # prevent excessive recursion
|
2021-03-01 18:27:52 +01:00
|
|
|
weights["_internal"]["max_depth"] = max_depth
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
else:
|
|
|
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
|
|
|
|
|
|
|
sum_weights += w
|
|
|
|
matching_score += contributing_score
|
|
|
|
|
2021-02-27 01:19:33 +01:00
|
|
|
prop_scores[prop]["weight"] = w
|
|
|
|
prop_scores[prop]["contributing_score"] = contributing_score
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
|
|
|
|
|
|
|
|
prop_scores["matching_score"] = matching_score
|
|
|
|
prop_scores["sum_weights"] = sum_weights
|
|
|
|
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
|
|
|
|
else:
|
2021-02-18 16:37:34 +01:00
|
|
|
logger.debug("Starting object similarity process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
try:
|
|
|
|
matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1])
|
|
|
|
except TypeError:
|
|
|
|
# method doesn't support detailed output with prop_scores
|
|
|
|
matching_score, sum_weights = method(obj1, obj2, **weights[type1])
|
|
|
|
logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights)
|
|
|
|
|
|
|
|
if sum_weights <= 0:
|
|
|
|
return 0
|
|
|
|
equivalence_score = (matching_score / sum_weights) * 100.0
|
|
|
|
return equivalence_score
|
|
|
|
|
|
|
|
|
|
|
|
def check_property_present(prop, obj1, obj2):
|
|
|
|
"""Helper method checks if a property is present on both objects."""
|
2020-11-10 18:55:17 +01:00
|
|
|
if prop == "longitude_latitude":
|
2021-03-01 18:27:52 +01:00
|
|
|
if all(x in obj1 and x in obj2 for x in ('latitude', 'longitude')):
|
2020-11-10 18:55:17 +01:00
|
|
|
return True
|
|
|
|
elif prop in obj1 and prop in obj2:
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def partial_timestamp_based(t1, t2, tdelta):
|
|
|
|
"""Performs a timestamp-based matching via checking how close one timestamp is to another.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
t1: A datetime string or STIXdatetime object.
|
|
|
|
t2: A datetime string or STIXdatetime object.
|
|
|
|
tdelta (float): A given time delta. This number is multiplied by 86400 (1 day) to
|
|
|
|
extend or shrink your time change tolerance.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Number between 0.0 and 1.0 depending on match criteria.
|
|
|
|
|
|
|
|
"""
|
|
|
|
if not isinstance(t1, STIXdatetime):
|
|
|
|
t1 = parse_into_datetime(t1)
|
|
|
|
if not isinstance(t2, STIXdatetime):
|
|
|
|
t2 = parse_into_datetime(t2)
|
|
|
|
t1, t2 = time.mktime(t1.timetuple()), time.mktime(t2.timetuple())
|
|
|
|
result = 1 - min(abs(t1 - t2) / (86400 * tdelta), 1)
|
|
|
|
logger.debug("--\t\tpartial_timestamp_based '%s' '%s' tdelta: '%s'\tresult: '%s'", t1, t2, tdelta, result)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def partial_list_based(l1, l2):
|
2021-02-27 01:19:33 +01:00
|
|
|
"""Performs a partial list matching via finding the intersection between
|
|
|
|
common values. Repeated values are counted only once. This method can be
|
|
|
|
used for *_refs equality checks when de-reference is not possible.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
l1: A list of values.
|
|
|
|
l2: A list of values.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: 1.0 if the value matches exactly, 0.0 otherwise.
|
|
|
|
|
|
|
|
"""
|
|
|
|
l1_set, l2_set = set(l1), set(l2)
|
|
|
|
result = len(l1_set.intersection(l2_set)) / max(len(l1_set), len(l2_set))
|
|
|
|
logger.debug("--\t\tpartial_list_based '%s' '%s'\tresult: '%s'", l1, l2, result)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def exact_match(val1, val2):
|
2021-02-27 01:19:33 +01:00
|
|
|
"""Performs an exact value match based on two values. This method can be
|
|
|
|
used for *_ref equality check when de-reference is not possible.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
val1: A value suitable for an equality test.
|
|
|
|
val2: A value suitable for an equality test.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: 1.0 if the value matches exactly, 0.0 otherwise.
|
|
|
|
|
|
|
|
"""
|
|
|
|
result = 0.0
|
|
|
|
if val1 == val2:
|
|
|
|
result = 1.0
|
|
|
|
logger.debug("--\t\texact_match '%s' '%s'\tresult: '%s'", val1, val2, result)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def partial_string_based(str1, str2):
|
|
|
|
"""Performs a partial string match using the Jaro-Winkler distance algorithm.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
str1: A string value to check.
|
|
|
|
str2: A string value to check.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Number between 0.0 and 1.0 depending on match criteria.
|
|
|
|
|
|
|
|
"""
|
|
|
|
from rapidfuzz import fuzz
|
|
|
|
result = fuzz.token_sort_ratio(str1, str2)
|
|
|
|
logger.debug("--\t\tpartial_string_based '%s' '%s'\tresult: '%s'", str1, str2, result)
|
|
|
|
return result / 100.0
|
|
|
|
|
|
|
|
|
|
|
|
def custom_pattern_based(pattern1, pattern2):
|
|
|
|
"""Performs a matching on Indicator Patterns.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
pattern1: An Indicator pattern
|
|
|
|
pattern2: An Indicator pattern
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Number between 0.0 and 1.0 depending on match criteria.
|
|
|
|
|
|
|
|
"""
|
2020-11-20 15:33:56 +01:00
|
|
|
return equivalent_patterns(pattern1, pattern2)
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
|
2021-03-01 18:27:52 +01:00
|
|
|
def partial_external_reference_based(ext_refs1, ext_refs2):
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
"""Performs a matching on External References.
|
|
|
|
|
|
|
|
Args:
|
2021-03-01 18:27:52 +01:00
|
|
|
ext_refs1: A list of external references.
|
|
|
|
ext_refs2: A list of external references.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Number between 0.0 and 1.0 depending on matches.
|
|
|
|
|
|
|
|
"""
|
|
|
|
allowed = {"veris", "cve", "capec", "mitre-attack"}
|
|
|
|
matches = 0
|
|
|
|
|
2021-03-01 18:27:52 +01:00
|
|
|
ref_pairs = itertools.chain(
|
|
|
|
itertools.product(ext_refs1, ext_refs2),
|
|
|
|
)
|
|
|
|
|
|
|
|
for ext_ref1, ext_ref2 in ref_pairs:
|
|
|
|
sn_match = False
|
|
|
|
ei_match = False
|
|
|
|
url_match = False
|
|
|
|
source_name = None
|
|
|
|
|
|
|
|
if check_property_present("source_name", ext_ref1, ext_ref2):
|
|
|
|
if ext_ref1["source_name"] == ext_ref2["source_name"]:
|
|
|
|
source_name = ext_ref1["source_name"]
|
|
|
|
sn_match = True
|
|
|
|
if check_property_present("external_id", ext_ref1, ext_ref2):
|
|
|
|
if ext_ref1["external_id"] == ext_ref2["external_id"]:
|
|
|
|
ei_match = True
|
|
|
|
if check_property_present("url", ext_ref1, ext_ref2):
|
|
|
|
if ext_ref1["url"] == ext_ref2["url"]:
|
|
|
|
url_match = True
|
|
|
|
|
|
|
|
# Special case: if source_name is a STIX defined name and either
|
|
|
|
# external_id or url match then its a perfect match and other entries
|
|
|
|
# can be ignored.
|
|
|
|
if sn_match and (ei_match or url_match) and source_name in allowed:
|
|
|
|
result = 1.0
|
|
|
|
logger.debug(
|
|
|
|
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
|
|
|
|
ext_refs1, ext_refs2, result,
|
|
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
|
|
# Regular check. If the source_name (not STIX-defined) or external_id or
|
|
|
|
# url matches then we consider the entry a match.
|
|
|
|
if (sn_match or ei_match or url_match) and source_name not in allowed:
|
|
|
|
matches += 1
|
|
|
|
|
|
|
|
result = matches / max(len(ext_refs1), len(ext_refs2))
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
logger.debug(
|
|
|
|
"--\t\tpartial_external_reference_based '%s' '%s'\tresult: '%s'",
|
2021-03-01 18:27:52 +01:00
|
|
|
ext_refs1, ext_refs2, result,
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def partial_location_distance(lat1, long1, lat2, long2, threshold):
|
|
|
|
"""Given two coordinates perform a matching based on its distance using the Haversine Formula.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
lat1: Latitude value for first coordinate point.
|
|
|
|
lat2: Latitude value for second coordinate point.
|
|
|
|
long1: Longitude value for first coordinate point.
|
|
|
|
long2: Longitude value for second coordinate point.
|
|
|
|
threshold (float): A kilometer measurement for the threshold distance between these two points.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
float: Number between 0.0 and 1.0 depending on match.
|
|
|
|
|
|
|
|
"""
|
|
|
|
from haversine import Unit, haversine
|
|
|
|
distance = haversine((lat1, long1), (lat2, long2), unit=Unit.KILOMETERS)
|
|
|
|
result = 1 - (distance / threshold)
|
|
|
|
logger.debug(
|
|
|
|
"--\t\tpartial_location_distance '%s' '%s' threshold: '%s'\tresult: '%s'",
|
|
|
|
(lat1, long1), (lat2, long2), threshold, result,
|
|
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def _versioned_checks(ref1, ref2, ds1, ds2, **weights):
|
|
|
|
"""Checks multiple object versions if present in graph.
|
2021-02-18 16:37:34 +01:00
|
|
|
Maximizes for the similarity score of a particular version."""
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
results = {}
|
|
|
|
|
2021-02-18 16:37:34 +01:00
|
|
|
pairs = _object_pairs(
|
2021-02-27 01:19:33 +01:00
|
|
|
_bucket_per_type(ds1.query([Filter("id", "=", ref1)])),
|
|
|
|
_bucket_per_type(ds2.query([Filter("id", "=", ref2)])),
|
2021-02-18 16:37:34 +01:00
|
|
|
weights,
|
|
|
|
)
|
2021-02-27 01:19:33 +01:00
|
|
|
ignore_spec_version = weights["_internal"]["ignore_spec_version"]
|
|
|
|
versioning_checks = weights["_internal"]["versioning_checks"]
|
|
|
|
max_depth = weights["_internal"]["max_depth"]
|
2021-02-18 16:37:34 +01:00
|
|
|
|
|
|
|
for object1, object2 in pairs:
|
2021-03-01 18:27:52 +01:00
|
|
|
result = object_similarity(
|
2021-03-01 20:40:05 +01:00
|
|
|
object1, object2, ds1=ds1, ds2=ds2,
|
|
|
|
ignore_spec_version=ignore_spec_version,
|
|
|
|
versioning_checks=versioning_checks,
|
|
|
|
max_depth=max_depth, **weights,
|
2021-03-01 19:14:03 +01:00
|
|
|
)
|
2021-02-18 16:37:34 +01:00
|
|
|
if ref1 not in results:
|
|
|
|
results[ref1] = {"matched": ref2, "value": result}
|
|
|
|
elif result > results[ref1]["value"]:
|
|
|
|
results[ref1] = {"matched": ref2, "value": result}
|
|
|
|
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
result = results.get(ref1, {}).get("value", 0.0)
|
|
|
|
logger.debug(
|
|
|
|
"--\t\t_versioned_checks '%s' '%s'\tresult: '%s'",
|
|
|
|
ref1, ref2, result,
|
|
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def reference_check(ref1, ref2, ds1, ds2, **weights):
|
2021-02-18 16:37:34 +01:00
|
|
|
"""For two references, de-reference the object and perform object_similarity.
|
|
|
|
The score influences the result of an edge check."""
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
|
|
|
result = 0.0
|
|
|
|
|
2021-02-18 03:30:14 +01:00
|
|
|
if type1 == type2 and type1 in weights:
|
2021-02-27 01:19:33 +01:00
|
|
|
ignore_spec_version = weights["_internal"]["ignore_spec_version"]
|
|
|
|
versioning_checks = weights["_internal"]["versioning_checks"]
|
|
|
|
max_depth = weights["_internal"]["max_depth"]
|
|
|
|
if versioning_checks:
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
result = _versioned_checks(ref1, ref2, ds1, ds2, **weights) / 100.0
|
|
|
|
else:
|
|
|
|
o1, o2 = ds1.get(ref1), ds2.get(ref2)
|
|
|
|
if o1 and o2:
|
2021-03-01 18:27:52 +01:00
|
|
|
result = object_similarity(
|
2021-03-01 18:44:35 +01:00
|
|
|
o1, o2, ds1=ds1, ds2=ds2,
|
|
|
|
ignore_spec_version=ignore_spec_version,
|
|
|
|
versioning_checks=versioning_checks,
|
2021-03-01 19:14:03 +01:00
|
|
|
max_depth=max_depth, **weights,
|
2021-03-01 18:27:52 +01:00
|
|
|
) / 100.0
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
"--\t\treference_check '%s' '%s'\tresult: '%s'",
|
|
|
|
ref1, ref2, result,
|
|
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
def list_reference_check(refs1, refs2, ds1, ds2, **weights):
|
|
|
|
"""For objects that contain multiple references (i.e., object_refs) perform
|
2021-02-18 16:37:34 +01:00
|
|
|
the same de-reference procedure and perform object_similarity.
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
The score influences the objects containing these references. The result is
|
|
|
|
weighted on the amount of unique objects that could 1) be de-referenced 2) """
|
|
|
|
results = {}
|
|
|
|
|
2021-02-16 17:01:07 +01:00
|
|
|
pairs = _object_pairs(
|
|
|
|
_bucket_per_type(refs1, "id-split"),
|
|
|
|
_bucket_per_type(refs2, "id-split"),
|
2021-02-16 15:44:03 +01:00
|
|
|
weights,
|
2021-02-16 06:57:26 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
for ref1, ref2 in pairs:
|
|
|
|
type1, type2 = ref1.split("--")[0], ref2.split("--")[0]
|
|
|
|
if type1 == type2:
|
|
|
|
score = reference_check(ref1, ref2, ds1, ds2, **weights)
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
2021-02-16 06:57:26 +01:00
|
|
|
if ref1 not in results:
|
|
|
|
results[ref1] = {"matched": ref2, "value": score}
|
|
|
|
elif score > results[ref1]["value"]:
|
|
|
|
results[ref1] = {"matched": ref2, "value": score}
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
2021-02-16 06:57:26 +01:00
|
|
|
if ref2 not in results:
|
|
|
|
results[ref2] = {"matched": ref1, "value": score}
|
|
|
|
elif score > results[ref2]["value"]:
|
|
|
|
results[ref2] = {"matched": ref1, "value": score}
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
result = 0.0
|
|
|
|
total_sum = sum(x["value"] for x in results.values())
|
2021-02-16 06:57:26 +01:00
|
|
|
max_score = len(results)
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
|
|
|
|
if max_score > 0:
|
|
|
|
result = total_sum / max_score
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
"--\t\tlist_reference_check '%s' '%s'\ttotal_sum: '%s'\tmax_score: '%s'\tresult: '%s'",
|
|
|
|
refs1, refs2, total_sum, max_score, result,
|
|
|
|
)
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2021-02-27 01:19:33 +01:00
|
|
|
def _datastore_check(ds1, ds2):
|
2021-03-01 18:27:52 +01:00
|
|
|
if (
|
2021-03-10 19:32:02 +01:00
|
|
|
issubclass(ds1.__class__, (DataStoreMixin, DataSource)) or
|
|
|
|
issubclass(ds2.__class__, (DataStoreMixin, DataSource))
|
2021-03-01 18:27:52 +01:00
|
|
|
):
|
2021-02-27 01:19:33 +01:00
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
2021-02-18 15:14:34 +01:00
|
|
|
def _bucket_per_type(graph, mode="type"):
|
2021-02-16 17:01:07 +01:00
|
|
|
"""Given a list of objects or references, bucket them by type.
|
|
|
|
Depending on the list type: extract from 'type' property or using
|
2021-02-18 15:14:34 +01:00
|
|
|
the 'id'.
|
|
|
|
"""
|
2021-02-16 06:57:26 +01:00
|
|
|
buckets = collections.defaultdict(list)
|
|
|
|
if mode == "type":
|
2021-02-18 15:14:34 +01:00
|
|
|
[buckets[obj["type"]].append(obj) for obj in graph]
|
2021-02-16 06:57:26 +01:00
|
|
|
elif mode == "id-split":
|
2021-02-18 15:14:34 +01:00
|
|
|
[buckets[obj.split("--")[0]].append(obj) for obj in graph]
|
2021-02-16 06:57:26 +01:00
|
|
|
return buckets
|
|
|
|
|
|
|
|
|
2021-02-18 15:14:34 +01:00
|
|
|
def _object_pairs(graph1, graph2, weights):
|
2021-02-16 17:01:07 +01:00
|
|
|
"""Returns a generator with the product of the comparable
|
|
|
|
objects for the graph similarity process. It determines
|
2021-02-18 15:14:34 +01:00
|
|
|
objects in common between graphs and objects with weights.
|
|
|
|
"""
|
|
|
|
types_in_common = set(graph1.keys()).intersection(graph2.keys())
|
|
|
|
testable_types = types_in_common.intersection(weights.keys())
|
2021-02-16 06:57:26 +01:00
|
|
|
|
|
|
|
return itertools.chain.from_iterable(
|
2021-02-18 15:14:34 +01:00
|
|
|
itertools.product(graph1[stix_type], graph2[stix_type])
|
2021-02-16 06:57:26 +01:00
|
|
|
for stix_type in testable_types
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# default weights used for the similarity process
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
WEIGHTS = {
|
|
|
|
"attack-pattern": {
|
|
|
|
"name": (30, partial_string_based),
|
|
|
|
"external_references": (70, partial_external_reference_based),
|
|
|
|
},
|
|
|
|
"campaign": {
|
|
|
|
"name": (60, partial_string_based),
|
|
|
|
"aliases": (40, partial_list_based),
|
|
|
|
},
|
|
|
|
"course-of-action": {
|
|
|
|
"name": (60, partial_string_based),
|
|
|
|
"external_references": (40, partial_external_reference_based),
|
|
|
|
},
|
2021-02-27 01:19:33 +01:00
|
|
|
"grouping": {
|
|
|
|
"name": (20, partial_string_based),
|
|
|
|
"context": (20, partial_string_based),
|
|
|
|
"object_refs": (60, list_reference_check),
|
|
|
|
},
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
"identity": {
|
|
|
|
"name": (60, partial_string_based),
|
|
|
|
"identity_class": (20, exact_match),
|
|
|
|
"sectors": (20, partial_list_based),
|
|
|
|
},
|
2021-02-27 01:19:33 +01:00
|
|
|
"incident": {
|
2021-03-12 22:49:23 +01:00
|
|
|
"name": (30, partial_string_based),
|
|
|
|
"external_references": (70, partial_external_reference_based),
|
2021-02-27 01:19:33 +01:00
|
|
|
},
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
"indicator": {
|
|
|
|
"indicator_types": (15, partial_list_based),
|
|
|
|
"pattern": (80, custom_pattern_based),
|
|
|
|
"valid_from": (5, partial_timestamp_based),
|
|
|
|
"tdelta": 1, # One day interval
|
|
|
|
},
|
|
|
|
"intrusion-set": {
|
|
|
|
"name": (20, partial_string_based),
|
|
|
|
"external_references": (60, partial_external_reference_based),
|
|
|
|
"aliases": (20, partial_list_based),
|
|
|
|
},
|
|
|
|
"location": {
|
|
|
|
"longitude_latitude": (34, partial_location_distance),
|
|
|
|
"region": (33, exact_match),
|
|
|
|
"country": (33, exact_match),
|
|
|
|
"threshold": 1000.0,
|
|
|
|
},
|
|
|
|
"malware": {
|
|
|
|
"malware_types": (20, partial_list_based),
|
|
|
|
"name": (80, partial_string_based),
|
|
|
|
},
|
|
|
|
"marking-definition": {
|
|
|
|
"name": (20, exact_match),
|
|
|
|
"definition": (60, exact_match),
|
|
|
|
"definition_type": (20, exact_match),
|
|
|
|
},
|
2021-02-27 01:19:33 +01:00
|
|
|
"relationship": {
|
|
|
|
"relationship_type": (20, exact_match),
|
|
|
|
"source_ref": (40, reference_check),
|
|
|
|
"target_ref": (40, reference_check),
|
|
|
|
},
|
|
|
|
"report": {
|
|
|
|
"name": (30, partial_string_based),
|
|
|
|
"published": (10, partial_timestamp_based),
|
|
|
|
"object_refs": (60, list_reference_check),
|
|
|
|
"tdelta": 1, # One day interval
|
|
|
|
},
|
|
|
|
"sighting": {
|
|
|
|
"first_seen": (5, partial_timestamp_based),
|
|
|
|
"last_seen": (5, partial_timestamp_based),
|
|
|
|
"sighting_of_ref": (40, reference_check),
|
|
|
|
"observed_data_refs": (20, list_reference_check),
|
|
|
|
"where_sighted_refs": (20, list_reference_check),
|
|
|
|
"summary": (10, exact_match),
|
|
|
|
},
|
Graph Equivalence (#449)
* new packages for graph and object-based semantic equivalence
* new method graphically_equivalent for Environment, move equivalence methods out
* object equivalence function, methods used for object-based moved here.
* new graph_equivalence methods
* add notes
* add support for versioning checks (default disabled)
* new tests to cover graph equivalence and new methods
* added more imports to environment.py to prevent breaking changes
* variable changes, new fields for checks, reset depth check per call
* flexibility when object is not available on graph.
* refactor debug logging message
* new file stix2.equivalence.graph_equivalence.rst and stix2.equivalence.object_equivalence.rst for docs
* API documentation for new modules
* additional text required to build docs
* add more test methods for list_semantic_check an graphically_equivalent/versioning
* add logging debug messages, code clean-up
* include individual scoring on results dict, fix issue on list_semantic_check not keeping highest score
* include results as summary in prop_scores, minor tweaks
* Update __init__.py
doctrings update
* apply feedback from pull request
- rename semantic_check to reference_check
- rename modules to graph and object respectively to eliminate redundancy
- remove created_by_ref and object_marking_refs from graph WEIGHTS and rebalance
* update docs/ entries
* add more checks, make max score based on actual objects checked instead of the full list, only create entry when type is present in WEIGHTS dictionary
update tests to reflect changes
* rename package patterns -> pattern
* documentation, moving weights around
* more documentation moving
* rename WEIGHTS variable for graph_equivalence
2020-10-16 17:35:26 +02:00
|
|
|
"threat-actor": {
|
|
|
|
"name": (60, partial_string_based),
|
|
|
|
"threat_actor_types": (20, partial_list_based),
|
|
|
|
"aliases": (20, partial_list_based),
|
|
|
|
},
|
|
|
|
"tool": {
|
|
|
|
"tool_types": (20, partial_list_based),
|
|
|
|
"name": (80, partial_string_based),
|
|
|
|
},
|
|
|
|
"vulnerability": {
|
|
|
|
"name": (30, partial_string_based),
|
|
|
|
"external_references": (70, partial_external_reference_based),
|
2021-03-01 18:27:52 +01:00
|
|
|
},
|
2020-10-16 23:12:52 +02:00
|
|
|
} # :autodoc-skip:
|