Generalize checking functionality within environment.py and add prop_scores dict so all scoring info is one python object

master
Desai, Kartikey H 2019-11-19 16:36:23 -05:00 committed by Chris Lenk
parent 387810c4a3
commit f5d199bedf
1 changed files with 53 additions and 33 deletions

View File

@ -218,55 +218,46 @@ class Environment(DataStoreMixin):
# default weights used for the semantic equivalence process # default weights used for the semantic equivalence process
weights = { weights = {
"attack-pattern": { "attack-pattern": {
"name": 30, "name": (30, partial_string_based),
"external_references": 70, "external_references": (70, partial_external_reference_based),
"method": _attack_pattern_checks,
}, },
"campaign": { "campaign": {
"name": 60, "name": (60, partial_string_based),
"aliases": 40, "aliases": (40, partial_list_based),
"method": _campaign_checks,
}, },
"identity": { "identity": {
"name": 60, "name": (60, partial_string_based),
"identity_class": 20, "identity_class": (20, exact_match),
"sectors": 20, "sectors": (20, partial_list_based),
"method": _identity_checks,
}, },
"indicator": { "indicator": {
"indicator_types": 15, "indicator_types": (15, partial_list_based),
"pattern": 80, "pattern": (80, custom_pattern_based),
"valid_from": 5, "valid_from": (5, partial_timestamp_based),
"tdelta": 1, # One day interval "tdelta": 1, # One day interval
"method": _indicator_checks,
}, },
"location": { "location": {
"longitude_latitude": 34, "longitude_latitude": (34, partial_location_distance),
"region": 33, "region": (33, exact_match),
"country": 33, "country": (33, exact_match),
"threshold": 1000.0, "threshold": 1000.0,
"method": _location_checks,
}, },
"malware": { "malware": {
"malware_types": 20, "malware_types": (20, partial_list_based),
"name": 80, "name": (80, partial_string_based),
"method": _malware_checks,
}, },
"threat-actor": { "threat-actor": {
"name": 60, "name": (60, partial_string_based),
"threat_actor_types": 20, "threat_actor_types": (20, partial_list_based),
"aliases": 20, "aliases": (20, partial_list_based),
"method": _threat_actor_checks,
}, },
"tool": { "tool": {
"tool_types": 20, "tool_types": (20, partial_list_based),
"name": 80, "name": (80, partial_string_based),
"method": _tool_checks,
}, },
"vulnerability": { "vulnerability": {
"name": 30, "name": (30, partial_string_based),
"external_references": 70, "external_references": (70, partial_external_reference_based),
"method": _vulnerability_checks,
}, },
"_internal": { "_internal": {
"ignore_spec_version": False, "ignore_spec_version": False,
@ -288,8 +279,37 @@ class Environment(DataStoreMixin):
try: try:
method = weights[type1]["method"] method = weights[type1]["method"]
except KeyError: except KeyError:
logger.warning("'%s' type has no semantic equivalence method to call!", type1) try:
sum_weights = matching_score = 0 weights[type1]
except KeyError:
logger.warning("'%s' type has no semantic equivalence method to call!", type1)
sum_weights = matching_score = 0
else:
matching_score = 0.0
sum_weights = 0.0
prop_scores = {}
for prop in weights[type1]:
if check_property_present(prop, obj1, obj2) or prop == "longitude_latitude":
w = weights[type1][prop][0]
comp_funct = weights[type1][prop][1]
if comp_funct == partial_timestamp_based:
contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
elif comp_funct == partial_location_distance:
threshold = weights[type1]["threshold"]
contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
else:
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
sum_weights += w
matching_score += contributing_score
prop_scores[prop] = (w, contributing_score)
logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
prop_scores["matching_score"] = matching_score
prop_scores["sum_weights"] = sum_weights
else: else:
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"]) logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
matching_score, sum_weights = method(obj1, obj2, **weights[type1]) matching_score, sum_weights = method(obj1, obj2, **weights[type1])