From e27f72299600d4af4bdb8ea06dde9e52f05e3b1c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 14 Apr 2021 16:27:39 +0200 Subject: [PATCH] chg: [tools] add 2 new tools: extract cryptocurrency correlation by type + trigger manual update --- bin/lib/Correlate_object.py | 101 ++++++++++++++++++++++++-------- bin/packages/Correlation.py | 8 +++ tools/extract_cryptocurrency.py | 75 ++++++++++++++++++++++++ tools/manual_update_db.py | 45 ++++++++++++++ update/bin/ail_updater.py | 49 ++++++++++++++++ update/v3.4/Update.py | 42 ++++++------- update/v3.5/Update.py | 23 ++++---- 7 files changed, 287 insertions(+), 56 deletions(-) create mode 100755 tools/extract_cryptocurrency.py create mode 100755 tools/manual_update_db.py create mode 100755 update/bin/ail_updater.py diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index 1cddf9be..aa7ce61c 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -22,6 +22,7 @@ import Item config_loader = ConfigLoader.ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") config_loader = None def is_valid_object_type(object_type): @@ -199,35 +200,55 @@ def get_correlation_node_icon(correlation_name, correlation_type=None, value=Non return {"icon_class": icon_class, "icon_text": icon_text, "node_color": node_color, "node_radius": node_radius} -def get_item_url(correlation_name, value, correlation_type=None): +# flask_context: if this function is used with a Flask app context +def get_item_url(correlation_name, value, correlation_type=None, flask_context=True): ''' Warning: use only in flask ''' url = '#' if correlation_name == "pgp": - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="pgp", type_id=correlation_type, correlation_id=value) + if flask_context: + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="pgp", type_id=correlation_type, correlation_id=value) + else: + url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&type_id={correlation_type}&correlation_id={value}' elif correlation_name == 'cryptocurrency': - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="cryptocurrency", type_id=correlation_type, correlation_id=value) + if flask_context: + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="cryptocurrency", type_id=correlation_type, correlation_id=value) + else: + url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&type_id={correlation_type}&correlation_id={value}' elif correlation_name == 'username': - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="username", type_id=correlation_type, correlation_id=value) + if flask_context: + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="username", type_id=correlation_type, correlation_id=value) + else: + url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&type_id={correlation_type}&correlation_id={value}' elif correlation_name == 'decoded': - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="decoded", correlation_id=value) + if flask_context: + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="decoded", correlation_id=value) + else: + url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&correlation_id={value}' elif correlation_name == 'screenshot' or correlation_name == 'image': ### # TODO: rename me - endpoint = 'correlation.show_correlation' - url = url_for(endpoint, object_type="screenshot", correlation_id=value) + if flask_context: + endpoint = 'correlation.show_correlation' + url = url_for(endpoint, object_type="screenshot", correlation_id=value) + else: + url = f'{baseurl}/correlation/show_correlation?object_type={correlation_name}&correlation_id={value}' elif correlation_name == 'domain': - endpoint = 'crawler_splash.showDomain' - url = url_for(endpoint, domain=value) - elif correlation_name == 'item': - endpoint = 'objects_item.showItem' - url = url_for(endpoint, id=value) - elif correlation_name == 'paste': ### # TODO: remove me - endpoint = 'objects_item.showItem' - url = url_for(endpoint, id=value) + if flask_context: + endpoint = 'crawler_splash.showDomain' + url = url_for(endpoint, domain=value) + else: + url = f'{baseurl}/crawlers/showDomain?domain={value}' + elif correlation_name == 'item' or correlation_name == 'paste': ### # TODO: remove paste + if flask_context: + endpoint = 'objects_item.showItem' + url = url_for(endpoint, id=value) + else: + url = f'{baseurl}/object/item?id={value}' + #print(url) return url def get_obj_tag_table_keys(object_type): @@ -271,7 +292,7 @@ def create_graph_links(links_set): graph_links_list.append({"source": link[0], "target": link[1]}) return graph_links_list -def create_graph_nodes(nodes_set, root_node_id): +def create_graph_nodes(nodes_set, root_node_id, flask_context=True): graph_nodes_list = [] for node_id in nodes_set: correlation_name, correlation_type, value = node_id.split(';', 3) @@ -281,7 +302,7 @@ def create_graph_nodes(nodes_set, root_node_id): if node_id == root_node_id: dict_node["style"]["node_color"] = 'orange' dict_node["style"]["node_radius"] = 7 - dict_node['url'] = get_item_url(correlation_name, value, correlation_type) + dict_node['url'] = get_item_url(correlation_name, value, correlation_type, flask_context=flask_context) graph_nodes_list.append(dict_node) return graph_nodes_list @@ -293,7 +314,7 @@ def create_node_id(correlation_name, value, correlation_type=''): # # TODO: filter by correlation type => bitcoin, mail, ... -def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None): +def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None, flask_context=True): links = set() nodes = set() @@ -386,7 +407,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation links.add((root_node_id, correl_node_id)) - return {"nodes": create_graph_nodes(nodes, root_node_id), "links": create_graph_links(links)} + return {"nodes": create_graph_nodes(nodes, root_node_id, flask_context=flask_context), "links": create_graph_links(links)} def get_obj_global_id(obj_type, obj_id, obj_sub_type=None): @@ -422,6 +443,40 @@ def get_obj_str_type_subtype(obj_type, obj_subtype): else: return obj_type +def sanitise_correlation_names(correlation_names): + ''' + correlation_names ex = 'pgp,crypto' + ''' + all_correlation_names = get_all_correlation_names() + if correlation_names is None: + return all_correlation_names + else: + l_correlation_names = [] + for correl in correlation_names.split(','): + if correl in all_correlation_names: + l_correlation_names.append(correl) + if l_correlation_names: + return l_correlation_names + else: + return all_correlation_names + +def sanitise_correlation_objects(correlation_objects): + ''' + correlation_objects ex = 'domain,decoded' + ''' + all_correlation_objects = get_all_correlation_objects() + if correlation_objects is None: + return all_correlation_objects + else: + l_correlation_objects = [] + for correl in correlation_objects.split(','): + if correl in all_correlation_objects: + l_correlation_objects.append(correl) + if l_correlation_objects: + return l_correlation_objects + else: + return all_correlation_objects + ######## API EXPOSED ######## def sanitize_object_type(object_type): if not is_valid_object_type(object_type): diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index f833371d..5db4805d 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -124,6 +124,14 @@ class Correlation(object): return (dict_resp, 200) + def get_all_correlations_by_subtype(self, subtype): + return r_serv_metadata.zrange(f'{self.correlation_name}_all:{subtype}', 0, -1) + + def get_all_correlations_by_subtype_pagination(self, subtype, nb_elem=50, page=1): + start = (page - 1) * nb_elem + stop = start + nb_elem -1 + return r_serv_metadata.zrange(f'{self.correlation_name}_all:{subtype}', start, stop) + def get_all_correlation_types(self): ''' Gel all correlation types diff --git a/tools/extract_cryptocurrency.py b/tools/extract_cryptocurrency.py new file mode 100755 index 00000000..5e27c367 --- /dev/null +++ b/tools/extract_cryptocurrency.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import json +import argparse + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Cryptocurrency + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader +import Correlate_object + +mode = 'union' + +def sanitise_int(page, default_value): + try: + page = int(page) + except: + page = default_value + if page < 1: + page = default_value + return page + +def sanitise_nb_max_nodes(nb_max_nodes): + try: + nb_max_nodes = int(nb_max_nodes) + if nb_max_nodes < 2: + nb_max_nodes = 300 + except: + nb_max_nodes = 300 + return nb_max_nodes + +def get_object_correlation_json(correlation_id, subtype, max_nodes): + max_nodes = sanitise_nb_max_nodes(max_nodes) + + object_type = 'cryptocurrency' + type_id = 'bitcoin' + + max_nodes = sanitise_nb_max_nodes(max_nodes) + + # ALL correlations + correlation_names = Correlate_object.sanitise_correlation_names('') + correlation_objects = Correlate_object.sanitise_correlation_objects('') + + res = Correlate_object.get_graph_node_object_correlation(object_type, correlation_id, mode, correlation_names, + correlation_objects, requested_correl_type=type_id, + max_nodes=max_nodes, flask_context=False) + return res + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Trigger backgroud update') + parser.add_argument('-t', '--type', help='Cryptocurrency type (bitcoin, bitcoin-cash, etherum, litecoin, monero, dash, zcash)', type=str, dest='type', required=True, default=None) + parser.add_argument('-p', '--page',help='page number' , type=int, default=1, dest='page') + parser.add_argument('-n', '--nb',help='number of addresses by page' , type=int, default=50, dest='nb_elem') + parser.add_argument('--node' ,help='correlation graph: max number of nodes' , type=int, default=50, dest='max_nodes') + args = parser.parse_args() + + subtype = args.type + if subtype is None: + parser.print_help() + sys.exit(0) + + page = sanitise_int(args.page, 1) + nb_elem = sanitise_int(args.nb_elem, 50) + max_nodes = sanitise_int(args.max_nodes, 300) + + dict_json = {} + for address in Cryptocurrency.cryptocurrency.get_all_correlations_by_subtype_pagination(subtype, nb_elem=nb_elem, page=page): + dict_json[address] = get_object_correlation_json(address, subtype, max_nodes) + + print(json.dumps(dict_json)) diff --git a/tools/manual_update_db.py b/tools/manual_update_db.py new file mode 100755 index 00000000..5f81da6a --- /dev/null +++ b/tools/manual_update_db.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import argparse + +# # TODO: check max update +max_update = 3.5 + +def check_version(version): + if version[0] == 'v' and '.' in version: + try: + res = float(version[1:]) + if res >= 1 and res <= max_update: + return True + except: + pass + print(f'ERROR: invalid version/tag: {version}') + return False + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description='Trigger backgroud update') + parser.add_argument('-v', '--version', help='update version (tag) ex: v3.5', type=str, dest='version', required=True, default=None) + args = parser.parse_args() + + if args.version is None: + parser.print_help() + sys.exit(0) + version = args.version + if not check_version(version): + sys.exit(0) + + update_db_dir = os.path.join(os.environ['AIL_HOME'], 'update', version) + update_db_script = os.path.join(update_db_dir, 'Update.py') + if not os.path.isfile(update_db_script): + # # TODO: launch default update + print('DB Up To Date') + else: + # import Updater clas + sys.path.append(update_db_dir) + from Update import Updater + updater = Updater(version) + updater.run_update() diff --git a/update/bin/ail_updater.py b/update/bin/ail_updater.py new file mode 100755 index 00000000..9be78e81 --- /dev/null +++ b/update/bin/ail_updater.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +class AIL_Updater(object): + """docstring for AIL_Updater.""" + + def __init__(self, new_version): + self.version = new_version + self.start_time = time.time() + + self.config = ConfigLoader.ConfigLoader() + self.r_serv = self.config.get_redis_conn("ARDB_DB") + + self.f_version = float(self.version[1:]) + self.current_f_version = self.r_serv.get('ail:version') + if self.current_f_version: + self.current_f_version = float(self.current_f_version[1:]) + else: + self.current_f_version = 0 + + def update(self): + """ + AIL DB update + """ + pass + + def end_update(self): + """ + Update DB version + """ + #Set current ail version + self.r_serv.hset('ail:update_date', self.version, datetime.datetime.now().strftime("%Y%m%d")) + #Set current ail version + if self.f_version > self.current_f_version: + self.r_serv.set('ail:version', self.version) + + def run_update(self): + self.update() + self.end_update() diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py index 943d4b3a..67f74ca0 100755 --- a/update/v3.4/Update.py +++ b/update/v3.4/Update.py @@ -11,26 +11,28 @@ import datetime sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -new_version = 'v3.4' +sys.path.append(os.path.join(os.environ['AIL_HOME'], 'update', 'bin')) +from ail_updater import AIL_Updater + +class Updater(AIL_Updater): + """default Updater.""" + + def __init__(self, version): + super(Updater, self).__init__(version) + self.r_serv_onion = self.config.get_redis_conn("ARDB_Onion") + + def update(self): + """ + Update Domain Languages + """ + self.r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up') + self.r_serv.set('update:nb_elem_to_convert', self.r_serv_onion.scard('domain_update_v3.4')) + self.r_serv.set('update:nb_elem_converted',0) + + # Add background update + self.r_serv.sadd('ail:to_update', self.version) if __name__ == '__main__': - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up') - r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4')) - r_serv.set('update:nb_elem_converted',0) - - # Add background update - r_serv.sadd('ail:to_update', new_version) - - #Set current ail version - r_serv.set('ail:version', new_version) - - #Set current ail version - r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) + updater = Updater('v3.4') + updater.run_update() diff --git a/update/v3.5/Update.py b/update/v3.5/Update.py index 3bccc0cd..5cb73eb5 100755 --- a/update/v3.5/Update.py +++ b/update/v3.5/Update.py @@ -11,19 +11,16 @@ import datetime sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -new_version = 'v3.5' +sys.path.append(os.path.join(os.environ['AIL_HOME'], 'update', 'bin')) +from ail_updater import AIL_Updater + +class Updater(AIL_Updater): + """default Updater.""" + + def __init__(self, version): + super(Updater, self).__init__(version) if __name__ == '__main__': - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - #Set current ail version - r_serv.set('ail:version', new_version) - - #Set current ail version - r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) + updater = Updater('v3.5') + updater.run_update()