From 77eda29471991170e418ad5fddfeb9564dea114c Mon Sep 17 00:00:00 2001 From: Chris Lenk Date: Wed, 11 Dec 2019 13:13:36 -0500 Subject: [PATCH] Add default weight_dict to documentation for semantic equivalence --- .gitignore | 1 + docs/conf.py | 10 +++++ stix2/environment.py | 105 +++++++++++++++++++++++-------------------- 3 files changed, 68 insertions(+), 48 deletions(-) diff --git a/.gitignore b/.gitignore index 5534a28..9758937 100644 --- a/.gitignore +++ b/.gitignore @@ -55,6 +55,7 @@ coverage.xml # Sphinx documentation docs/_build/ .ipynb_checkpoints +default_sem_eq_weights.rst # PyBuilder target/ diff --git a/docs/conf.py b/docs/conf.py index 2a10fbd..8b372d5 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,4 +1,5 @@ import datetime +import json import os import re import sys @@ -7,6 +8,7 @@ from six import class_types from sphinx.ext.autodoc import ClassDocumenter from stix2.base import _STIXBase +from stix2.environment import WEIGHTS from stix2.version import __version__ sys.path.insert(0, os.path.abspath('..')) @@ -59,6 +61,14 @@ latex_documents = [ (master_doc, 'stix2.tex', 'stix2 Documentation', 'OASIS', 'manual'), ] +# Add a formatted version of environment.WEIGHTS +default_sem_eq_weights = json.dumps(WEIGHTS, indent=4, default=lambda o: o.__name__) +default_sem_eq_weights = default_sem_eq_weights.replace('\n', '\n ') +default_sem_eq_weights = default_sem_eq_weights.replace(' "', ' ') +default_sem_eq_weights = default_sem_eq_weights.replace('"\n', '\n') +with open('default_sem_eq_weights.rst', 'w') as f: + f.write(".. code-block:: py\n\n {}\n\n".format(default_sem_eq_weights)) + def get_property_type(prop): """Convert property classname into pretty string name of property. diff --git a/stix2/environment.py b/stix2/environment.py index 85381b2..1d99567 100644 --- a/stix2/environment.py +++ b/stix2/environment.py @@ -210,59 +210,17 @@ class Environment(DataStoreMixin): Course of Action, Intrusion-Set, Observed-Data, Report are not supported by this implementation. Indicator pattern check is also limited. + Note: + Default weights_dict: + + .. include:: ../default_sem_eq_weights.rst + Note: This implementation follows the Committee Note on semantic equivalence. see `the Committee Note `__. """ - # default weights used for the semantic equivalence process - weights = { - "attack-pattern": { - "name": (30, partial_string_based), - "external_references": (70, partial_external_reference_based), - }, - "campaign": { - "name": (60, partial_string_based), - "aliases": (40, partial_list_based), - }, - "identity": { - "name": (60, partial_string_based), - "identity_class": (20, exact_match), - "sectors": (20, partial_list_based), - }, - "indicator": { - "indicator_types": (15, partial_list_based), - "pattern": (80, custom_pattern_based), - "valid_from": (5, partial_timestamp_based), - "tdelta": 1, # One day interval - }, - "location": { - "longitude_latitude": (34, partial_location_distance), - "region": (33, exact_match), - "country": (33, exact_match), - "threshold": 1000.0, - }, - "malware": { - "malware_types": (20, partial_list_based), - "name": (80, partial_string_based), - }, - "threat-actor": { - "name": (60, partial_string_based), - "threat_actor_types": (20, partial_list_based), - "aliases": (20, partial_list_based), - }, - "tool": { - "tool_types": (20, partial_list_based), - "name": (80, partial_string_based), - }, - "vulnerability": { - "name": (30, partial_string_based), - "external_references": (70, partial_external_reference_based), - }, - "_internal": { - "ignore_spec_version": False, - }, - } + weights = WEIGHTS.copy() if weight_dict: weights.update(weight_dict) @@ -316,6 +274,7 @@ class Environment(DataStoreMixin): try: matching_score, sum_weights = method(obj1, obj2, prop_scores, **weights[type1]) except TypeError: + # method doesn't support detailed output with prop_scores matching_score, sum_weights = method(obj1, obj2, **weights[type1]) logger.debug("Matching Score: %s, Sum of Weights: %s", matching_score, sum_weights) @@ -507,3 +466,53 @@ def partial_location_distance(lat1, long1, lat2, long2, threshold): (lat1, long1), (lat2, long2), threshold, result, ) return result + + +# default weights used for the semantic equivalence process +WEIGHTS = { + "attack-pattern": { + "name": (30, partial_string_based), + "external_references": (70, partial_external_reference_based), + }, + "campaign": { + "name": (60, partial_string_based), + "aliases": (40, partial_list_based), + }, + "identity": { + "name": (60, partial_string_based), + "identity_class": (20, exact_match), + "sectors": (20, partial_list_based), + }, + "indicator": { + "indicator_types": (15, partial_list_based), + "pattern": (80, custom_pattern_based), + "valid_from": (5, partial_timestamp_based), + "tdelta": 1, # One day interval + }, + "location": { + "longitude_latitude": (34, partial_location_distance), + "region": (33, exact_match), + "country": (33, exact_match), + "threshold": 1000.0, + }, + "malware": { + "malware_types": (20, partial_list_based), + "name": (80, partial_string_based), + }, + "threat-actor": { + "name": (60, partial_string_based), + "threat_actor_types": (20, partial_list_based), + "aliases": (20, partial_list_based), + }, + "tool": { + "tool_types": (20, partial_list_based), + "name": (80, partial_string_based), + }, + "vulnerability": { + "name": (30, partial_string_based), + "external_references": (70, partial_external_reference_based), + }, + "_internal": { + "ignore_spec_version": False, + }, +} #: :autodoc-skip: