mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			221 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			221 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| 
 | |
| sys.path.append(os.environ['AIL_BIN'])
 | |
| ##################################
 | |
| # Import Project packages
 | |
| ##################################
 | |
| from lib.ConfigLoader import ConfigLoader
 | |
| 
 | |
| config_loader = ConfigLoader()
 | |
| r_metadata = config_loader.get_db_conn("Kvrocks_Correlations")
 | |
| config_loader = None
 | |
| 
 | |
| ##################################
 | |
| # CORRELATION MIGRATION
 | |
| ##################################
 | |
| #
 | |
| #   MIGRATE TO KVROCKS + Rename correlation Keys
 | |
| #                           => Add support for correlations between subtypes
 | |
| #                           => Common correlation engine for each objects
 | |
| #
 | |
| #   Objects Iterations: -screenshot
 | |
| #                       -decoded
 | |
| #                       -subtypes
 | |
| #                       -domains
 | |
| #
 | |
| #   /!\ Handle reinsertion /!\
 | |
| #
 | |
| #
 | |
| #   CORRELATION DB ????? => purge if needed
 | |
| #
 | |
| #
 | |
| #
 | |
| #
 | |
| #
 | |
| ##################################
 | |
| # CORRELATION MIGRATION
 | |
| ##################################
 | |
| 
 | |
| CORRELATION_TYPES_BY_OBJ = {
 | |
|     "chat": ["image", "user-account"],  # message or direct correlation like cve, bitcoin, ... ???
 | |
|     "cookie-name": ["domain"],
 | |
|     "cryptocurrency": ["domain", "item", "message"],
 | |
|     "cve": ["domain", "item", "message"],
 | |
|     "decoded": ["domain", "item", "message"],
 | |
|     "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
 | |
|     "etag": ["domain"],
 | |
|     "favicon": ["domain", "item"],  # TODO Decoded
 | |
|     "hhhash": ["domain"],
 | |
|     "image": ["chat", "message", "user-account"],
 | |
|     "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],  # chat ???
 | |
|     "message": ["cve", "cryptocurrency", "decoded", "image", "pgp", "user-account"],  # chat ??
 | |
|     "pgp": ["domain", "item", "message"],
 | |
|     "screenshot": ["domain", "item"],
 | |
|     "title": ["domain", "item"],
 | |
|     "user-account": ["chat", "message"],
 | |
|     "username": ["domain", "item", "message"],  # TODO chat-user/account
 | |
| }
 | |
| 
 | |
| def get_obj_correl_types(obj_type):
 | |
|     return CORRELATION_TYPES_BY_OBJ.get(obj_type)
 | |
| 
 | |
| def sanityze_obj_correl_types(obj_type, correl_types):
 | |
|     obj_correl_types = get_obj_correl_types(obj_type)
 | |
|     if correl_types:
 | |
|         correl_types = set(correl_types).intersection(obj_correl_types)
 | |
|     if not correl_types:
 | |
|         correl_types = obj_correl_types
 | |
|         if not correl_types:
 | |
|             return []
 | |
|     return correl_types
 | |
| 
 | |
| def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
 | |
|     return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
 | |
| 
 | |
| def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]):
 | |
|     if subtype is None:
 | |
|         subtype = ''
 | |
|     obj_correlations = {}
 | |
|     filter_types = sanityze_obj_correl_types(obj_type, filter_types)
 | |
|     for correl_type in filter_types:
 | |
|         obj_correlations[correl_type] = get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type)
 | |
|     return obj_correlations
 | |
| 
 | |
| def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type, unpack=False):
 | |
|     correl = r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}')
 | |
|     if unpack:
 | |
|         unpacked = []
 | |
|         for str_correl in correl:
 | |
|             unpacked.append(str_correl.split(':', 1))
 | |
|         return unpacked
 | |
|     else:
 | |
|         return correl
 | |
| 
 | |
| def get_correlations(obj_type, subtype, obj_id, filter_types=[], unpack=False):
 | |
|     if subtype is None:
 | |
|         subtype = ''
 | |
|     obj_correlations = {}
 | |
|     filter_types = sanityze_obj_correl_types(obj_type, filter_types)
 | |
|     for correl_type in filter_types:
 | |
|         obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type,
 | |
|                                                                        unpack=unpack)
 | |
|     return obj_correlations
 | |
| 
 | |
| def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type):
 | |
|     if subtype is None:
 | |
|         subtype = ''
 | |
|     return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}')
 | |
| 
 | |
| def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id):
 | |
|     if subtype is None:
 | |
|         subtype = ''
 | |
|     if subtype2 is None:
 | |
|         subtype2 = ''
 | |
|     try:
 | |
|         return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}')
 | |
|     except:
 | |
|         return False
 | |
| 
 | |
| def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
 | |
|     if subtype1 is None:
 | |
|         subtype1 = ''
 | |
|     if subtype2 is None:
 | |
|         subtype2 = ''
 | |
|     r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
 | |
|     r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
 | |
| 
 | |
| 
 | |
| def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id):
 | |
|     if subtype1 is None:
 | |
|         subtype1 = ''
 | |
|     if subtype2 is None:
 | |
|         subtype2 = ''
 | |
|     r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}')
 | |
|     r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}')
 | |
| 
 | |
| def delete_obj_correlations(obj_type, subtype, obj_id):
 | |
|     obj_correlations = get_correlations(obj_type, subtype, obj_id)
 | |
|     for correl_type in obj_correlations:
 | |
|         for str_obj in obj_correlations[correl_type]:
 | |
|             subtype2, obj2_id = str_obj.split(':', 1)
 | |
|             delete_obj_correlation(obj_type, subtype, obj_id, correl_type, subtype2, obj2_id)
 | |
| 
 | |
| # # bypass max result/objects ???
 | |
| # def get_correlation_depht(obj_type, subtype, obj_id, filter_types=[], level=1, nb_max=300):
 | |
| #     objs = set()
 | |
| #     _get_correlation_depht(objs, obj_type, subtype, obj_id, filter_types, level, nb_max)
 | |
| #     return objs
 | |
| #
 | |
| # def _get_correlation_depht(objs, obj_type, subtype, obj_id, filter_types, level, nb_max, previous_str_obj=''):
 | |
| #     obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
 | |
| #     objs.add(obj_str_id)
 | |
| #
 | |
| #     obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
 | |
| #     for correl_type in obj_correlations:
 | |
| #         for str_obj in obj_correlations[correl_type]:
 | |
| #             subtype2, obj2_id = str_obj.split(':', 1)
 | |
| #             obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
 | |
| #
 | |
| #             if obj2_str_id == previous_str_obj:
 | |
| #                 continue
 | |
| #
 | |
| #             if len(nodes) > nb_max:
 | |
| #                 break
 | |
| #             objs.add(obj2_str_id)
 | |
| #
 | |
| #             if level > 0:
 | |
| #                 next_level = level - 1
 | |
| #                 _get_correlation_depht(objs, correl_type, subtype2, obj2_id, filter_types, next_level, nb_max,
 | |
| #                                        previous_str_obj=obj_str_id)
 | |
| 
 | |
| def get_obj_str_id(obj_type, subtype, obj_id):
 | |
|     if subtype is None:
 | |
|         subtype = ''
 | |
|     return f'{obj_type}:{subtype}:{obj_id}'
 | |
| 
 | |
| def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
 | |
|     links = set()
 | |
|     nodes = set()
 | |
|     meta = {'complete': True, 'objs': set()}
 | |
| 
 | |
|     obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
 | |
| 
 | |
|     _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
 | |
|     return obj_str_id, nodes, links, meta
 | |
| 
 | |
| 
 | |
| def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
 | |
|     obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
 | |
|     meta['objs'].add(obj_str_id)
 | |
|     nodes.add(obj_str_id)
 | |
| 
 | |
|     obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=filter_types)
 | |
|     # print(obj_correlations)
 | |
|     for correl_type in obj_correlations:
 | |
|         for str_obj in obj_correlations[correl_type]:
 | |
|             subtype2, obj2_id = str_obj.split(':', 1)
 | |
|             obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
 | |
|             # filter objects to hide
 | |
|             if obj2_str_id in objs_hidden:
 | |
|                 continue
 | |
| 
 | |
|             meta['objs'].add(obj2_str_id)
 | |
| 
 | |
|             if obj2_str_id == previous_str_obj:
 | |
|                 continue
 | |
| 
 | |
|             if len(nodes) > max_nodes != 0:
 | |
|                 meta['complete'] = False
 | |
|                 break
 | |
|             nodes.add(obj2_str_id)
 | |
|             links.add((obj_str_id, obj2_str_id))
 | |
| 
 | |
|             if level > 0:
 | |
|                 next_level = level - 1
 | |
|                 _get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)
 | |
| 
 |