From 880c351c0c182eac7fdeac14c4fe50b6f127663a Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 8 Nov 2019 16:00:42 +0100 Subject: [PATCH] chg: [update + show decoded items] add background update --- bin/lib/Domain.py | 26 ++++++- bin/packages/Correlation.py | 44 +++++++++++ bin/packages/Cryptocurrency.py | 3 +- bin/packages/Item.py | 62 +++++++++++++++ bin/packages/Tag.py | 8 +- bin/update-background.py | 21 ++++- update/v2.4/Update.py | 5 ++ update/v2.4/Update_domain.py | 77 +++++++++++++++++++ var/www/blueprints/crawler_splash.py | 2 + var/www/modules/Flask_config.py | 4 +- .../crawler/crawler_splash/showDomain.html | 44 +++++++++++ 11 files changed, 288 insertions(+), 8 deletions(-) create mode 100755 update/v2.4/Update_domain.py diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 962c2e19..9220a9e2 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -17,6 +17,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Correlation from Cryptocurrency import cryptocurrency from Pgp import pgp +import Decoded import Item import Tag @@ -197,6 +198,14 @@ def get_domain_pgp(domain, currencies_type=None, get_nb=False): ''' return pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb) +def get_domain_decoded(domain): + ''' + Retun all decoded item of a given domain. + + :param domain: crawled domain + ''' + return Decoded.get_domain_decoded_item(domain) + def get_domain_all_correlation(domain, correlation_type=None, get_nb=False): ''' Retun all correlation of a given domain. @@ -214,6 +223,9 @@ def get_domain_all_correlation(domain, correlation_type=None, get_nb=False): res = get_domain_pgp(domain, get_nb=get_nb) if res: domain_correl['pgp'] = res + res = get_domain_decoded(domain) + if res: + domain_correl['decoded'] = res return domain_correl # TODO: handle port @@ -271,6 +283,12 @@ class Domain(object): if self.is_domain_up(): self.current_port = sanathyse_port(port, self.domain, self.type) + def get_domain_name(self): + return self.domain + + def get_domain_type(self): + return self.type + def get_current_port(self): return self.current_port @@ -361,10 +379,16 @@ class Domain(object): ''' return get_domain_all_correlation(self.domain, get_nb=True) - def get_domain_history_with_status(self): + def get_domain_history(self): ''' Retun the full history of a given domain and port. ''' + return get_domain_history(self.domain, self.type, 80) + + def get_domain_history_with_status(self): + ''' + Retun the full history (with status) of a given domain and port. + ''' return get_domain_history_with_status(self.domain, self.type, 80) def get_domain_items_crawled(self, port=None, epoch=None, items_link=False, item_screenshot=False, item_tag=False): diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index 8150ebf3..b3f33079 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -147,6 +147,50 @@ class Correlation(object): dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) return dict_correlation + def _get_item_correlation_obj(self, item_id, correlation_type): + ''' + Return correlation of a given item id. + + :param item_id: item id + :type item_id: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id)) + if res: + return list(res) + else: + return [] + + def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False): + ''' + Return all correlation of a given item id. + + :param item_id: item id + :param correlation_type: list of correlation types + :type correlation_type: list, optional + + :return: a dictionnary of all the requested correlations + :rtype: dict + ''' + correlation_type = self.sanythise_correlation_types(correlation_type) + dict_correlation = {} + for correl in correlation_type: + res = self._get_item_correlation_obj(item_id, correl) + if res: + dict_correlation[correl] = res + if get_nb: + dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) + return dict_correlation + + + + def save_domain_correlation(self, domain, correlation_type, correlation_value): + r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain), correlation_value) + r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, correlation_value), domain) ######## API EXPOSED ######## diff --git a/bin/packages/Cryptocurrency.py b/bin/packages/Cryptocurrency.py index d83f91dd..bce4ac05 100755 --- a/bin/packages/Cryptocurrency.py +++ b/bin/packages/Cryptocurrency.py @@ -61,6 +61,7 @@ def get_cryptocurrency(request_dict, cryptocurrency_type): return cryptocurrency.get_correlation(request_dict, cryptocurrency_type, field_name) +# # TODO: refractor/move me in Correlation def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): # create basic medata if not r_serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): @@ -89,7 +90,7 @@ def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrenc r_serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address) # domain - if Item.is_crawled(item_path): + if Item.is_crawled(item_path): # # TODO: use save_domain_correlation domain = Item.get_item_domain(item_path) r_serv_metadata.sadd('domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, domain), cryptocurrency_address) r_serv_metadata.sadd('set_domain_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), domain) diff --git a/bin/packages/Item.py b/bin/packages/Item.py index ff19a19e..497a0499 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -8,10 +8,13 @@ import redis sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +import Decoded sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date import Tag +from Cryptocurrency import cryptocurrency +from Pgp import pgp config_loader = ConfigLoader.ConfigLoader() PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' @@ -126,7 +129,61 @@ def get_item(request_dict): ### ### correlation ### +def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False): + ''' + Return all cryptocurrencies of a given item. + :param item_id: item id + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) + +def get_item_pgp(item_id, currencies_type=None, get_nb=False): + ''' + Return all pgp of a given item. + + :param item_id: item id + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) + +def get_item_decoded(item_id): + ''' + Return all pgp of a given item. + + :param item_id: item id + :param currencies_type: list of cryptocurrencies type + :type currencies_type: list, optional + ''' + return Decoded.get_item_decoded(item_id) + +def get_item_all_correlation(item_id, correlation_type=None, get_nb=False): + ''' + Retun all correlation of a given item id. + + :param item_id: item id + :type domain: str + + :return: a dict of all correlation for a item id + :rtype: dict + ''' + item_correl = {} + res = get_item_cryptocurrency(item_id, get_nb=get_nb) + if res: + item_correl['cryptocurrency'] = res + res = get_item_pgp(item_id, get_nb=get_nb) + if res: + item_correl['pgp'] = res + res = get_item_decoded(item_id) + if res: + item_correl['decoded'] = res + return item_correl + + + +## TODO: REFRACTOR def _get_item_correlation(correlation_name, correlation_type, item_id): res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) if res: @@ -134,18 +191,23 @@ def _get_item_correlation(correlation_name, correlation_type, item_id): else: return [] +## TODO: REFRACTOR def get_item_bitcoin(item_id): return _get_item_correlation('cryptocurrency', 'bitcoin', item_id) +## TODO: REFRACTOR def get_item_pgp_key(item_id): return _get_item_correlation('pgpdump', 'key', item_id) +## TODO: REFRACTOR def get_item_pgp_name(item_id): return _get_item_correlation('pgpdump', 'name', item_id) +## TODO: REFRACTOR def get_item_pgp_mail(item_id): return _get_item_correlation('pgpdump', 'mail', item_id) +## TODO: REFRACTOR def get_item_pgp_correlation(item_id): pass diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index ac5143d1..1e6c9fbc 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -172,8 +172,11 @@ def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"): res_dict['type'] = item_type return (res_dict, 200) +def add_domain_tag(tag, domain, item_date): + r_serv_metadata.sadd('tag:{}'.format(domain), tag) + r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) -def add_item_tag(tag, item_path, item_type="paste"): +def add_item_tag(tag, item_path, item_type="paste", tag_date=None): if item_type=="paste": item_date = int(Item.get_item_date(item_path)) @@ -189,8 +192,7 @@ def add_item_tag(tag, item_path, item_type="paste"): # domain item else: item_date = int(Domain.get_domain_last_check(item_path, r_format="int")) - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), item_path) + add_domain_tag(tag, item_path, item_date) r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) diff --git a/bin/update-background.py b/bin/update-background.py index 6f58d0f5..7ba51f1c 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -54,5 +54,22 @@ if __name__ == "__main__": r_serv.delete('ail:current_background_script_stat') r_serv.delete('ail:current_background_update') - if r_serv.scard('ail:update_v2.4') != 1: - pass + if r_serv.get('ail:current_background_update') == 'v2.4': + r_serv.delete('ail:update_error') + r_serv.set('ail:update_in_progress', 'v2.4') + r_serv.set('ail:current_background_update', 'v2.4') + r_serv.set('ail:current_background_script', 'domain update') + + update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v2.4', 'Update_domain.py') + process = subprocess.run(['python' ,update_file]) + + + if int(r_serv.get('ail:current_background_script_stat')) != 100: + r_serv.set('ail:update_error', 'Update v2.4 Failed, please relaunch the bin/update-background.py script') + else: + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + r_serv.delete('update:nb_elem_to_convert') + r_serv.delete('update:nb_elem_converted') diff --git a/update/v2.4/Update.py b/update/v2.4/Update.py index e45f5241..53456330 100755 --- a/update/v2.4/Update.py +++ b/update/v2.4/Update.py @@ -19,12 +19,17 @@ if __name__ == '__main__': config_loader = ConfigLoader.ConfigLoader() r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None #Set current update_in_progress r_serv.set('ail:update_in_progress', new_version) r_serv.set('ail:current_background_update', new_version) + r_serv_onion.sunionstore('domain_update_v2.4', 'full_onion_up', 'full_regular_up') + r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v2.4')) + r_serv.set('update:nb_elem_converted',0) + #Set current ail version r_serv.set('ail:version', new_version) diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py new file mode 100755 index 00000000..584818d8 --- /dev/null +++ b/update/v2.4/Update_domain.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Item +import Tag +from Cryptocurrency import cryptocurrency +from Pgp import pgp + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader +import Decoded +import Domain + +def update_update_stats(): + nb_updated = int(r_serv_db.get('update:nb_elem_converted')) + progress = int((nb_updated * 100) / nb_elem_to_update) + print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress)) + r_serv_db.set('ail:current_background_script_stat', progress) + +def update_domain_by_item(domain_obj, item_id): + domain_name = domain_obj.get_domain_name() + # update domain tags + for tag in Tag.get_item_tags(item_id): + if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': + Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id)) + + # update domain correlation + item_correlation = Item.get_item_all_correlation(item_id) + + for correlation_name in item_correlation: + for correlation_type in item_correlation[correlation_name]: + if correlation_name in ('pgp', 'cryptocurrency'): + for correl_value in item_correlation[correlation_name][correlation_type]: + if correlation_name=='pgp': + pgp.save_domain_correlation(domain_name, correlation_type, correl_value) + if correlation_name=='cryptocurrency': + cryptocurrency.save_domain_correlation(domain_name, correlation_type, correl_value) + if correlation_name=='decoded': + for decoded_item in item_correlation['decoded']: + Decoded.save_domain_decoded(domain_name, decoded_item) + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + nb_elem_to_update = int( r_serv_db.get('update:nb_elem_to_convert') ) + + while True: + domain = r_serv_onion.spop('domain_update_v2.4') + if domain is not None: + print(domain) + domain = Domain.Domain(domain) + for domain_history in domain.get_domain_history(): + + domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag + if "items" in domain_item: + for item_dict in domain_item['items']: + update_domain_by_item(domain, item_dict['id']) + + r_serv_db.incr('update:nb_elem_converted') + update_update_stats() + + else: + sys.exit(0) diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 2f142a9c..e1639c30 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -69,5 +69,7 @@ def showDomain(): dict_domain['crawler_history'] = domain.get_domain_items_crawled(items_link=True, epoch=epoch, item_screenshot=True, item_tag=True) # # TODO: handle multiple port dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) + print(dict_domain) + return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, modal_add_tags=get_modal_add_tags(dict_domain['domain'], tag_type="domain")) diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 6525cb5e..0619b564 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -86,7 +86,9 @@ DiffMaxLineLength = int(config_loader.get_config_str("Flask", "DiffMaxLineLengt bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_message': 'An Update is running on the background. Some informations like Tags, screenshot can be', - 'update_warning_message_notice_me': 'missing from the UI.'} + 'update_warning_message_notice_me': 'missing from the UI.'}, + 'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', + 'update_warning_message_notice_me': 'missing from the UI.'} } UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 8e95dcb5..2ebc09f5 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -102,6 +102,47 @@ + {% if 'decoded' in dict_domain%} +
+
+
+
+
+
+ Decoded   +
{{dict_domain['decoded']|length}}
+
+
+
+ +
+
+
+
+
+ + + + + + + + {% for decoded in dict_domain['decoded']%} + + + + {% endfor %} + +
Decoded
{{ decoded }}
+
+
+
+
+ {% endif %} + + {% if 'pgp' in dict_domain%}
@@ -346,6 +387,9 @@