Generalize checking functionality within environment.py and add prop_scores dict so all scoring info is one python object
parent
387810c4a3
commit
f5d199bedf
|
@ -218,55 +218,46 @@ class Environment(DataStoreMixin):
|
||||||
# default weights used for the semantic equivalence process
|
# default weights used for the semantic equivalence process
|
||||||
weights = {
|
weights = {
|
||||||
"attack-pattern": {
|
"attack-pattern": {
|
||||||
"name": 30,
|
"name": (30, partial_string_based),
|
||||||
"external_references": 70,
|
"external_references": (70, partial_external_reference_based),
|
||||||
"method": _attack_pattern_checks,
|
|
||||||
},
|
},
|
||||||
"campaign": {
|
"campaign": {
|
||||||
"name": 60,
|
"name": (60, partial_string_based),
|
||||||
"aliases": 40,
|
"aliases": (40, partial_list_based),
|
||||||
"method": _campaign_checks,
|
|
||||||
},
|
},
|
||||||
"identity": {
|
"identity": {
|
||||||
"name": 60,
|
"name": (60, partial_string_based),
|
||||||
"identity_class": 20,
|
"identity_class": (20, exact_match),
|
||||||
"sectors": 20,
|
"sectors": (20, partial_list_based),
|
||||||
"method": _identity_checks,
|
|
||||||
},
|
},
|
||||||
"indicator": {
|
"indicator": {
|
||||||
"indicator_types": 15,
|
"indicator_types": (15, partial_list_based),
|
||||||
"pattern": 80,
|
"pattern": (80, custom_pattern_based),
|
||||||
"valid_from": 5,
|
"valid_from": (5, partial_timestamp_based),
|
||||||
"tdelta": 1, # One day interval
|
"tdelta": 1, # One day interval
|
||||||
"method": _indicator_checks,
|
|
||||||
},
|
},
|
||||||
"location": {
|
"location": {
|
||||||
"longitude_latitude": 34,
|
"longitude_latitude": (34, partial_location_distance),
|
||||||
"region": 33,
|
"region": (33, exact_match),
|
||||||
"country": 33,
|
"country": (33, exact_match),
|
||||||
"threshold": 1000.0,
|
"threshold": 1000.0,
|
||||||
"method": _location_checks,
|
|
||||||
},
|
},
|
||||||
"malware": {
|
"malware": {
|
||||||
"malware_types": 20,
|
"malware_types": (20, partial_list_based),
|
||||||
"name": 80,
|
"name": (80, partial_string_based),
|
||||||
"method": _malware_checks,
|
|
||||||
},
|
},
|
||||||
"threat-actor": {
|
"threat-actor": {
|
||||||
"name": 60,
|
"name": (60, partial_string_based),
|
||||||
"threat_actor_types": 20,
|
"threat_actor_types": (20, partial_list_based),
|
||||||
"aliases": 20,
|
"aliases": (20, partial_list_based),
|
||||||
"method": _threat_actor_checks,
|
|
||||||
},
|
},
|
||||||
"tool": {
|
"tool": {
|
||||||
"tool_types": 20,
|
"tool_types": (20, partial_list_based),
|
||||||
"name": 80,
|
"name": (80, partial_string_based),
|
||||||
"method": _tool_checks,
|
|
||||||
},
|
},
|
||||||
"vulnerability": {
|
"vulnerability": {
|
||||||
"name": 30,
|
"name": (30, partial_string_based),
|
||||||
"external_references": 70,
|
"external_references": (70, partial_external_reference_based),
|
||||||
"method": _vulnerability_checks,
|
|
||||||
},
|
},
|
||||||
"_internal": {
|
"_internal": {
|
||||||
"ignore_spec_version": False,
|
"ignore_spec_version": False,
|
||||||
|
@ -287,9 +278,38 @@ class Environment(DataStoreMixin):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
method = weights[type1]["method"]
|
method = weights[type1]["method"]
|
||||||
|
except KeyError:
|
||||||
|
try:
|
||||||
|
weights[type1]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
logger.warning("'%s' type has no semantic equivalence method to call!", type1)
|
logger.warning("'%s' type has no semantic equivalence method to call!", type1)
|
||||||
sum_weights = matching_score = 0
|
sum_weights = matching_score = 0
|
||||||
|
else:
|
||||||
|
matching_score = 0.0
|
||||||
|
sum_weights = 0.0
|
||||||
|
prop_scores = {}
|
||||||
|
|
||||||
|
for prop in weights[type1]:
|
||||||
|
if check_property_present(prop, obj1, obj2) or prop == "longitude_latitude":
|
||||||
|
w = weights[type1][prop][0]
|
||||||
|
comp_funct = weights[type1][prop][1]
|
||||||
|
|
||||||
|
if comp_funct == partial_timestamp_based:
|
||||||
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop], weights[type1]["tdelta"])
|
||||||
|
elif comp_funct == partial_location_distance:
|
||||||
|
threshold = weights[type1]["threshold"]
|
||||||
|
contributing_score = w * comp_funct(obj1["latitude"], obj1["longitude"], obj2["latitude"], obj2["longitude"], threshold)
|
||||||
|
else:
|
||||||
|
contributing_score = w * comp_funct(obj1[prop], obj2[prop])
|
||||||
|
|
||||||
|
sum_weights += w
|
||||||
|
matching_score += contributing_score
|
||||||
|
|
||||||
|
prop_scores[prop] = (w, contributing_score)
|
||||||
|
logger.debug("'%s' check -- weight: %s, contributing score: %s", prop, w, contributing_score)
|
||||||
|
|
||||||
|
prop_scores["matching_score"] = matching_score
|
||||||
|
prop_scores["sum_weights"] = sum_weights
|
||||||
else:
|
else:
|
||||||
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
logger.debug("Starting semantic equivalence process between: '%s' and '%s'", obj1["id"], obj2["id"])
|
||||||
matching_score, sum_weights = method(obj1, obj2, **weights[type1])
|
matching_score, sum_weights = method(obj1, obj2, **weights[type1])
|
||||||
|
|
Loading…
Reference in New Issue