AIL-framework/bin/lib/correlations_engine.py

156 lines
5.6 KiB
Python
Executable File

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_metadata = config_loader.get_db_conn("Kvrocks_Correlations")
config_loader = None
##################################
# CORRELATION MIGRATION
##################################
#
# MIGRATE TO KVROCKS + Rename correlation Keys
# => Add support for correlations between subtypes
# => Common correlation engine for each objects
#
# Objects Iterations: -screenshot
# -decoded
# -subtypes
# -domains
#
# /!\ Handle reinsertion /!\
#
#
# CORRELATION DB ????? => purge if needed
#
#
#
#
#
##################################
# CORRELATION MIGRATION
##################################
CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency": ["domain", "item"],
"cve": ["domain", "item"],
"decoded": ["domain", "item"],
"domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"],
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"],
"pgp": ["domain", "item"],
"username": ["domain", "item"],
"screenshot": ["domain", "item"],
}
def get_obj_correl_types(obj_type):
return CORRELATION_TYPES_BY_OBJ.get(obj_type)
def sanityze_obj_correl_types(obj_type, correl_types):
obj_correl_types = get_obj_correl_types(obj_type)
if correl_types:
correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types:
correl_types = obj_correl_types
return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
obj_correlations = {}
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
obj_correlations[correl_type] = get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
return obj_correlations
def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
return r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
def get_correlations(obj_type, subtype, obj_id, filter_types=[]):
if subtype is None:
subtype = ''
obj_correlations = {}
filter_types = sanityze_obj_correl_types(obj_type, filter_types)
for correl_type in filter_types:
obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
return obj_correlations
def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type):
if subtype is None:
subtype = ''
return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}')
def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id):
if subtype is None:
subtype = ''
if subtype2 is None:
subtype2 = ''
return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}')
def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
if subtype1 is None:
subtype1 = ''
if subtype2 is None:
subtype2 = ''
r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
def get_obj_str_id(obj_type, subtype, obj_id):
if subtype is None:
subtype = ''
return f'{obj_type};{subtype};{obj_id}'
def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
links = set()
nodes = set()
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
_get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='')
return obj_str_id, nodes, links
def _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''):
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
nodes.add(obj_str_id)
obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
# print(obj_correlations)
for correl_type in obj_correlations:
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
if obj2_str_id == previous_str_obj:
continue
if len(nodes) > max_nodes:
break
nodes.add(obj2_str_id)
links.add((obj_str_id, obj2_str_id))
if level > 0:
next_level = level - 1
_get_correlations_graph_node(links, nodes, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id)