From 0d06f633e2c4e78f0c198d13ec49455abe616a0c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 24 Aug 2020 22:46:28 +0200 Subject: [PATCH 01/43] fix: [install] canevasjs --- var/www/update_thirdparty.sh | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index f51ae231..b38609eb 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -91,9 +91,7 @@ wget -q https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb61 #Ressources for sparkline and canvasJS and slider wget -q http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js -wget -q http://canvasjs.com/fdm/chart/ -O temp/canvasjs.zip -unzip -qq temp/canvasjs.zip -d temp/ -mv temp/Chart\ 2.3.2\ GA\ -\ Stable/jquery.canvasjs.min.js ./static/js/jquery.canvasjs.min.js +wget -q https://canvasjs.com/assets/script/canvasjs.min.js -O ./static/js/jquery.canvasjs.min.js wget -q https://jqueryui.com/resources/download/jquery-ui-1.12.1.zip -O temp/jquery-ui.zip unzip -qq temp/jquery-ui.zip -d temp/ From 0872fb23d35608e4d889d40d70e8d0a75522f3f6 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 27 Aug 2020 09:51:05 +0200 Subject: [PATCH 02/43] chg: [yara trackers] add debug --- bin/lib/Tracker.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index b09a1d3e..020a56c4 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -104,8 +104,12 @@ def is_valid_default_yara_rule(yara_rule): filename = os.path.join(yara_dir, yara_rule) filename = os.path.realpath(filename) + print(yara_dir) + print(filename) + # incorrect filename if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + print('error: file transversal') return False else: if os.path.isfile(filename): From b671e4c7f4e7103af6cf4d42410faafa2c08e624 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 27 Aug 2020 12:07:13 +0200 Subject: [PATCH 03/43] chg: [update] add v3.2 --- update/v3.2/Update.py | 39 ++++++++++++++++++++++++++++++++ update/v3.2/Update.sh | 52 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100755 update/v3.2/Update.py create mode 100755 update/v3.2/Update.sh diff --git a/update/v3.2/Update.py b/update/v3.2/Update.py new file mode 100755 index 00000000..fa06c8fe --- /dev/null +++ b/update/v3.2/Update.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import argparse +import datetime +import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v3.2' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + config_loader = None + + #### NEW EXPORTER + # remove old tags errors + #r_serv_db.delete('mess_not_saved_export') + + # move solo tags to export in tags_db + #all_misp_tags = r_serv_db.smembers('whitelist_misp') + #all_hive_tags = r_serv_db.smembers('whitelist_hive') + # # TODO: save them in tags db + #### NEW EXPORTER + + #Set current ail version + r_serv_db.set('ail:version', new_version) + + #Set current ail version + r_serv_db.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v3.2/Update.sh b/update/v3.2/Update.sh new file mode 100755 index 00000000..a588e55d --- /dev/null +++ b/update/v3.2/Update.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -ldbv & +wait +echo "" + +# SUBMODULES # +git submodule init +git submodule update + +echo -e $GREEN"Installing YARA ..."$DEFAULT +pip3 install yara-python +bash ${AIL_BIN}/LAUNCH.sh -t + +# SUBMODULES # +git submodule init +git submodule update + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v3.2/Update.py +wait +echo "" +echo "" + + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 From 4a0613e3c9182654ead3ca3506f485d59cbff62d Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 1 Sep 2020 14:03:32 +0200 Subject: [PATCH 04/43] chg: [bitcoin tags + correlation] add bech32 adresses --- bin/Cryptocurrencies.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/bin/Cryptocurrencies.py b/bin/Cryptocurrencies.py index 4b50eaee..ded939e7 100755 --- a/bin/Cryptocurrencies.py +++ b/bin/Cryptocurrencies.py @@ -44,8 +44,9 @@ def search_crytocurrency(item_id, item_content): is_cryptocurrency_found = False - for crypto_name in cryptocurrency_dict: - crypto_dict = cryptocurrency_dict[crypto_name] + for dict_field in cryptocurrency_dict: + crypto_dict = cryptocurrency_dict[dict_field] + crypto_name = crypto_dict['name'] signal.alarm(crypto_dict['max_execution_time']) try: @@ -62,7 +63,7 @@ def search_crytocurrency(item_id, item_content): is_valid_crypto_addr = False # validate cryptocurrency address for address in crypto_addr: - if(Cryptocurrency.verify_cryptocurrency_address(crypto_name, address)): + if(Cryptocurrency.verify_cryptocurrency_address(dict_field, address)): is_valid_crypto_addr = True print('{} address found : {}'.format(crypto_name, address)) # build bitcoin correlation @@ -127,6 +128,17 @@ cryptocurrency_dict = { 'tag': 'infoleak:automatic-detection="bitcoin-private-key"', }, }, + 'bitcoin-bech32': { + 'name': 'bitcoin', # e.g. bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq + 'regex': r'\bbc(?:0(?:[ac-hj-np-z02-9]{39}|[ac-hj-np-z02-9]{59})|1[ac-hj-np-z02-9]{8,87})\b', + 'max_execution_time': default_max_execution_time, + 'tag': 'infoleak:automatic-detection="bitcoin-address"', + 'private_key': { + 'regex': r'\b(? Date: Thu, 3 Sep 2020 14:38:17 +0200 Subject: [PATCH 05/43] fix: [Launcher] fix virtualenv loader --- bin/LAUNCH.sh | 14 ++++++-------- bin/lib/Tracker.py | 6 ++---- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 1a117c1c..0c8a6a70 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -18,23 +18,21 @@ cd ${AIL_HOME} if [ -e "${DIR}/AILENV/bin/python" ]; then ENV_PY="${DIR}/AILENV/bin/python" export AIL_VENV=${AIL_HOME}/AILENV/ + . ./AILENV/bin/activate elif [ ! -z "$TRAVIS" ]; then echo "Travis detected" ENV_PY="~/virtualenv/python3.6/bin/python" export AIL_VENV="~/virtualenv/python3.6/" + + export AIL_BIN=${AIL_HOME}/bin/ + export AIL_FLASK=${AIL_HOME}/var/www/ + export AIL_REDIS=${AIL_HOME}/redis/src/ + export AIL_ARDB=${AIL_HOME}/ardb/src/ else echo "Please make sure you have a AIL-framework environment, au revoir" exit 1 fi -# redis-server is bundled during install -## [ ! -f "`which redis-server`" ] && echo "'redis-server' is not installed/not on PATH. Please fix and run again." && exit 1 - -export AIL_BIN=${AIL_HOME}/bin/ -export AIL_FLASK=${AIL_HOME}/var/www/ -export AIL_REDIS=${AIL_HOME}/redis/src/ -export AIL_ARDB=${AIL_HOME}/ardb/src/ - export PATH=$AIL_VENV/bin:$PATH export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 020a56c4..52c85b67 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -103,13 +103,11 @@ def is_valid_default_yara_rule(yara_rule): yara_dir = get_yara_rules_default_dir() filename = os.path.join(yara_dir, yara_rule) filename = os.path.realpath(filename) - - print(yara_dir) - print(filename) - # incorrect filename if not os.path.commonprefix([filename, yara_dir]) == yara_dir: print('error: file transversal') + print(yara_dir) + print(filename) return False else: if os.path.isfile(filename): From 8a1af2c37e280e3e2a432f5c745f3c61cd70f2f4 Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Thu, 3 Sep 2020 16:27:15 +0200 Subject: [PATCH 06/43] fix: [ardb] switch to ail-project ardb fork #38 Signed-off-by: Jean-Louis Huynen --- installing_deps.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/installing_deps.sh b/installing_deps.sh index ff2df4c6..1380a02c 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -80,7 +80,7 @@ sudo make install popd # ARDB # -test ! -d ardb/ && git clone https://github.com/yinqiwen/ardb.git +test ! -d ardb/ && git clone https://github.com/ail-project/ardb.git pushd ardb/ make popd From d55c8221ad95365dd2f7a38f81fd556c69154079 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 3 Sep 2020 16:33:10 +0200 Subject: [PATCH 07/43] chg: [yara trackers UI] add yara trackers, show default yara rule --- bin/lib/Tracker.py | 16 ++++++++++ var/www/modules/hunter/Flask_hunter.py | 13 +++++---- .../modules/hunter/templates/Add_tracker.html | 29 +++++++++++++++---- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 52c85b67..ff646bfe 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -139,6 +139,22 @@ def get_yara_rule_content(yara_rule): rule_content = f.read() return rule_content +def api_get_default_rule_content(default_yara_rule): + yara_dir = get_yara_rules_default_dir() + filename = os.path.join(yara_dir, default_yara_rule) + filename = os.path.realpath(filename) + + # incorrect filename + if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + return ({'status': 'error', 'reason': 'file transversal detected'}, 400) + + if not os.path.isfile(filename): + return ({'status': 'error', 'reason': 'yara rule not found'}, 400) + + with open(filename, 'r') as f: + rule_content = f.read() + return ({'rule_name': default_yara_rule, 'content': rule_content}, 200) + ##-- YARA --## diff --git a/var/www/modules/hunter/Flask_hunter.py b/var/www/modules/hunter/Flask_hunter.py index 48530574..a1281d4c 100644 --- a/var/www/modules/hunter/Flask_hunter.py +++ b/var/www/modules/hunter/Flask_hunter.py @@ -254,12 +254,13 @@ def get_json_tracker_stats(): res = Term.get_list_tracked_term_stats_by_day([tracker_uuid]) return jsonify(res) -# @hunter.route("/tracker/get_all_default_yara_rules_by_type", methods=['GET']) -# @login_required -# @login_read_only -# def get_all_default_yara_rules_by_type(): -# yara_types = request.args.get('yara_types') -# get_all_default_yara_rules_by_types(yara_types) +@hunter.route("/tracker/yara/default_rule/content", methods=['GET']) +@login_required +@login_read_only +def get_default_yara_rule_content(): + default_yara_rule = request.args.get('rule_name') + res = Tracker.api_get_default_rule_content(default_yara_rule) + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # ========= REGISTRATION ========= app.register_blueprint(hunter, url_prefix=baseUrl) diff --git a/var/www/modules/hunter/templates/Add_tracker.html b/var/www/modules/hunter/templates/Add_tracker.html index 0653b834..091d66f5 100644 --- a/var/www/modules/hunter/templates/Add_tracker.html +++ b/var/www/modules/hunter/templates/Add_tracker.html @@ -89,10 +89,10 @@ -
+
- {% for yara_types in all_yara_files %} {% for yara_file in all_yara_files[yara_types] %} @@ -100,12 +100,17 @@ {% endfor %} {% endfor %} + +

+
 										
-
- +
+ +
+ +
-

+ + + + - - - - - - +
+
+ + +
+
+ + +
+
{% endfor %} @@ -117,6 +127,13 @@ function toggle_sidebar(){ $('#core_content').addClass('col-lg-10') } } + +function show_api_key(key_id) { + $('#censored_key_' + key_id).hide(); + $('#btn_key_' + key_id).hide(); + $('#uncensored_key_' + key_id).show(); +} + From eec64c3bc7dcefb8164520b973aa516504500900 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Mon, 5 Oct 2020 14:56:50 +0200 Subject: [PATCH 16/43] chg: [UI show domain] add last origin table: domain + item --- bin/lib/Domain.py | 11 ++++++ var/www/blueprints/crawler_splash.py | 2 +- .../crawler/crawler_splash/showDomain.html | 34 +++++++++++++++---- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index bfc0b4cb..52b5b7c6 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -474,6 +474,14 @@ def get_domain_last_origin(domain, domain_type): origin_item = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') return origin_item +def get_domain_father(domain, domain_type): + dict_father = {} + dict_father['item_father'] = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') + if dict_father['item_father'] != 'auto' and dict_father['item_father'] != 'manual': + if Item.is_crawled(dict_father['item_father']): + dict_father['domain_father'] = Item.get_domain(dict_father['item_father']) + return dict_father + def get_domain_tags(domain): ''' Retun all tags of a given domain. @@ -744,6 +752,9 @@ class Domain(object): ''' return get_domain_last_origin(self.domain, self.type) + def get_domain_father(self): + return get_domain_father(self.domain, self.type) + def domain_was_up(self): ''' Return True if this domain was UP at least one time diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 7d006c3d..b2ef7f63 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -110,7 +110,7 @@ def showDomain(): if domain.domain_was_up(): dict_domain = {**dict_domain, **domain.get_domain_correlation()} dict_domain['correlation_nb'] = Domain.get_domain_total_nb_correlation(dict_domain) - dict_domain['origin_item'] = domain.get_domain_last_origin() + dict_domain['father'] = domain.get_domain_father() dict_domain['tags'] = domain.get_domain_tags() dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags']) dict_domain['history'] = domain.get_domain_history_with_status() diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index d83aee39..bbe86fe2 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -94,12 +94,34 @@
-
- {% if dict_domain['origin_item']=='manual' or dict_domain['origin_item']=='auto' %} - {{ dict_domain['origin_item'] }} - {%else%} - Last Origin: {{ dict_domain['origin_item'] }} - {%endif%} + + + + + + + {% if dict_domain['father']=='manual' or dict_domain['father']=='auto' %} + + + + + {%else%} + + + + + {% if dict_domain['father']['domain_father'] %} + + + {%endif%} +

+ {%endif%} +

+
Last Origin:
{{ dict_domain['father'] }}
+ {{ dict_domain['father']['item_father'] }} +
+ {{ dict_domain['father']['domain_father'] }} +
{% if 'correlation_nb' in dict_domain %} {% if dict_domain["correlation_nb"] > 0 %} From afc7cfc77d8096ddba0ad9ddab2d01547210a46f Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Oct 2020 16:02:30 +0200 Subject: [PATCH 17/43] chg: [UI show Item] refactoring + bootstrap 4 migration --- bin/RegexTracker.py | 2 +- bin/TermTrackerMod.py | 2 +- bin/export/Export.py | 28 ++ bin/lib/Correlate_object.py | 4 +- bin/lib/item_basic.py | 3 - bin/packages/Item.py | 64 ++- bin/trackers/Tracker_Yara.py | 2 +- requirements.txt | 1 + update/v3.3/Update.py | 29 ++ update/v3.3/Update.sh | 54 +++ var/www/Flask_server.py | 4 + var/www/blueprints/objects_item.py | 97 ++++ var/www/blueprints/old_endpoints.py | 34 ++ .../modules/PasteSubmit/Flask_PasteSubmit.py | 2 +- var/www/modules/Tags/Flask_Tags.py | 6 +- var/www/modules/Tags/templates/Tags.html | 4 +- var/www/modules/Tags/templates/tagged.html | 4 +- var/www/modules/dashboard/Flask_dashboard.py | 2 +- .../modules/dashboard/templates/index.html | 2 +- .../modules/hashDecoded/Flask_hashDecoded.py | 4 +- .../modules/hunter/templates/showTracker.html | 2 +- var/www/modules/search/templates/search.html | 8 +- var/www/modules/showpaste/Flask_showpaste.py | 7 - .../templates/show_saved_item_min.html | 6 +- .../terms/templates/credentials_tracker.html | 2 +- .../terms/templates/terms_management.html | 2 +- var/www/static/js/indexjavascript.js | 2 +- .../correlation/metadata_card_paste.html | 2 +- .../crawler/crawler_splash/showDomain.html | 6 +- .../templates/modals/create_hive_case.html | 120 +++++ var/www/templates/modals/show_min_item.html | 2 +- var/www/templates/objects/item/show_item.html | 446 ++++++++++++++++++ .../templates/tags/search_obj_by_tags.html | 2 +- 33 files changed, 910 insertions(+), 45 deletions(-) create mode 100755 update/v3.3/Update.py create mode 100755 update/v3.3/Update.sh create mode 100644 var/www/blueprints/objects_item.py create mode 100644 var/www/blueprints/old_endpoints.py create mode 100644 var/www/templates/modals/create_hive_case.html create mode 100644 var/www/templates/objects/item/show_item.html diff --git a/bin/RegexTracker.py b/bin/RegexTracker.py index 2de211b9..904be623 100755 --- a/bin/RegexTracker.py +++ b/bin/RegexTracker.py @@ -23,7 +23,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Tracker import regex_helper -full_item_url = "/showsavedpaste/?paste=" +full_item_url = "/object/item?id=" mail_body_template = "AIL Framework,\nNew occurrence for term tracked regex: {}\nitem id: {}\nurl: {}{}" dict_regex_tracked = Term.get_regex_tracked_words_dict() diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index 2b4241c0..ee64ab67 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -20,7 +20,7 @@ from packages import Term from lib import Tracker -full_item_url = "/showsavedpaste/?paste=" +full_item_url = "/object/item/?id=" mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}" diff --git a/bin/export/Export.py b/bin/export/Export.py index 67d631f2..90de4570 100755 --- a/bin/export/Export.py +++ b/bin/export/Export.py @@ -9,6 +9,24 @@ from uuid import uuid4 sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import ConfigLoader +sys.path.append('../../configs/keys') +try: + from thehive4py.api import TheHiveApi + import thehive4py.exceptions + from theHiveKEYS import the_hive_url, the_hive_key, the_hive_verifycert + if the_hive_url == '': + is_hive_connected = False + else: + is_hive_connected = TheHiveApi(the_hive_url, the_hive_key, cert=the_hive_verifycert) +except: + is_hive_connected = False +if is_hive_connected != False: + try: + is_hive_connected.get_alert(0) + is_hive_connected = True + except thehive4py.exceptions.AlertException: + is_hive_connected = False + ## LOAD CONFIG ## config_loader = ConfigLoader.ConfigLoader() r_serv_cache = config_loader.get_redis_conn("Redis_Cache") @@ -37,6 +55,16 @@ def load_tags_to_export_in_cache(): # save combinaison of tags in cache pass +def is_hive_connected(): # # TODO: REFRACTOR, put in cache (with retry) + return is_hive_connected + +def get_item_hive_cases(item_id): + hive_case = r_serv_metadata.get('hive_cases:{}'.format(item_id)) + if hive_case: + hive_case = the_hive_url + '/index.html#/case/{}/details'.format(hive_case) + return hive_case + + ########################################################### # # set default # if r_serv_db.get('hive:auto-alerts') is None: diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index c0d959f5..1138949a 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -223,8 +223,8 @@ def get_item_url(correlation_name, value, correlation_type=None): endpoint = 'crawler_splash.showDomain' url = url_for(endpoint, domain=value) elif correlation_name == 'item': - endpoint = 'showsavedpastes.showsavedpaste' - url = url_for(endpoint, paste=value) + endpoint = 'objects_item.showItem' + url = url_for(endpoint, id=value) elif correlation_name == 'paste': ### # TODO: remove me endpoint = 'showsavedpastes.showsavedpaste' url = url_for(endpoint, paste=value) diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index c1005b49..9ad6161d 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -113,9 +113,6 @@ def get_item_parent(item_id): def get_item_children(item_id): return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id))) -def add_item_parent(item_parent, item_id): - return item_basic.add_item_parent(item_parent, item_id) - # # TODO: handle domain last origin in domain lib def _delete_node(item_id): # only if item isn't deleted diff --git a/bin/packages/Item.py b/bin/packages/Item.py index d898d1e0..15993d7a 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -4,6 +4,7 @@ import os import sys import redis +import html2text from io import BytesIO @@ -59,6 +60,9 @@ def get_item_basename(item_id): def get_item_size(item_id): return round(os.path.getsize(os.path.join(PASTES_FOLDER, item_id))/1024.0, 2) +def get_item_encoding(item_id): + return None + def get_lines_info(item_id, item_content=None): if not item_content: item_content = get_item_content(item_id) @@ -73,9 +77,37 @@ def get_lines_info(item_id, item_content=None): return {'nb': nb_line, 'max_length': max_length} +def get_item_metadata(item_id, item_content=None): + ## TODO: FIXME ##performance + # encoding + # language + # lines info + + item_metadata = {} + item_metadata['date'] = get_item_date(item_id, add_separator=True) + item_metadata['source'] = get_source(item_id) + item_metadata['size'] = get_item_size(item_id) + item_metadata['encoding'] = get_item_encoding(item_id) + item_metadata['lines'] = get_lines_info(item_id, item_content=item_content) + + return item_metadata + +def get_item_parent(item_id): + return item_basic.get_item_parent(item_id) + +def add_item_parent(item_parent, item_id): + return item_basic.add_item_parent(item_parent, item_id) + def get_item_content(item_id): return item_basic.get_item_content(item_id) +def get_item_content_html2text(item_id, item_content=None): + if not item_content: + item_content = get_item_content(item_id) + h = html2text.HTML2Text() + h.ignore_links = False + return h.handle(item_content) + # API def get_item(request_dict): if not request_dict: @@ -257,6 +289,18 @@ def get_item_list_desc(list_item_id): def is_crawled(item_id): return item_basic.is_crawled(item_id) +def get_crawler_matadata(item_id, ltags=None): + dict_crawler = {} + if is_crawled(item_id): + dict_crawler['domain'] = get_item_domain(item_id) + if not ltags: + ltags = Tag.get_obj_tag(item_id) + dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags) + dict_crawler['url'] = get_item_link(item_id) + dict_crawler['screenshot'] = get_item_screenshot(item_id) + dict_crawler['har'] = get_item_har_name(item_id) + return dict_crawler + def is_onion(item_id): is_onion = False if len(is_onion) > 62: @@ -293,7 +337,7 @@ def get_item_screenshot(item_id): return '' def get_item_har_name(item_id): - os.path.join(screenshot_directory, item_id) + '.json' + har_path = os.path.join(screenshot_directory, item_id) + '.json' if os.path.isfile(har_path): return har_path else: @@ -322,6 +366,24 @@ def get_item_duplicate(item_id, r_list=True): return [] return res +def get_item_nb_duplicates(item_id): + return r_serv_metadata.scard('dup:{}'.format(item_id)) + +def get_item_duplicates_dict(item_id): + dict_duplicates = {} + for duplicate in get_item_duplicate(item_id): + duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',') + duplicate_id = duplicate[1] + if not duplicate_id in dict_duplicates: + dict_duplicates[duplicate_id] = {'date': get_item_date(duplicate_id, add_separator=True), 'algo': {}} + algo = duplicate[0] + if algo == 'tlsh': + similarity = 100 - int(duplicate[2]) + else: + similarity = int(duplicate[2]) + dict_duplicates[duplicate_id]['algo'][algo] = similarity + return dict_duplicates + def add_item_duplicate(item_id, l_dup): for item_dup in l_dup: r_serv_metadata.sadd('dup:{}'.format(item_dup), item_id) diff --git a/bin/trackers/Tracker_Yara.py b/bin/trackers/Tracker_Yara.py index a141f352..55e5a00b 100755 --- a/bin/trackers/Tracker_Yara.py +++ b/bin/trackers/Tracker_Yara.py @@ -24,7 +24,7 @@ import Tracker import item_basic -full_item_url = "/showsavedpaste/?paste=" +full_item_url = "/object/item?id=" mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}" last_refresh = time.time() diff --git a/requirements.txt b/requirements.txt index 33542f8b..73d06144 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,6 +22,7 @@ textblob #Tokeniser nltk +html2text yara-python #Crawler diff --git a/update/v3.3/Update.py b/update/v3.3/Update.py new file mode 100755 index 00000000..39d1371b --- /dev/null +++ b/update/v3.3/Update.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import argparse +import datetime +import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v3.3' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + config_loader = None + + #Set current ail version + r_serv_db.set('ail:version', new_version) + + #Set current ail version + r_serv_db.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v3.3/Update.sh b/update/v3.3/Update.sh new file mode 100755 index 00000000..86289dba --- /dev/null +++ b/update/v3.3/Update.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -ldbv & +wait +echo "" + +# SUBMODULES # +git submodule update + +# echo "" +# echo -e $GREEN"installing KVORCKS ..."$DEFAULT +# cd ${AIL_HOME} +# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git +# pushd kvrocks/ +# make -j4 +# popd + +echo -e $GREEN"Installing html2text ..."$DEFAULT +pip3 install html2text + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v3.3/Update.py +wait +echo "" +echo "" + + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 5f1f22d4..bce50bb3 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -42,6 +42,8 @@ from blueprints.crawler_splash import crawler_splash from blueprints.correlation import correlation from blueprints.tags_ui import tags_ui from blueprints.import_export import import_export +from blueprints.objects_item import objects_item +from blueprints.old_endpoints import old_endpoints Flask_dir = os.environ['AIL_FLASK'] @@ -97,6 +99,8 @@ app.register_blueprint(crawler_splash, url_prefix=baseUrl) app.register_blueprint(correlation, url_prefix=baseUrl) app.register_blueprint(tags_ui, url_prefix=baseUrl) app.register_blueprint(import_export, url_prefix=baseUrl) +app.register_blueprint(objects_item, url_prefix=baseUrl) +app.register_blueprint(old_endpoints, url_prefix=baseUrl) # ========= =========# # ========= Cookie name ======== diff --git a/var/www/blueprints/objects_item.py b/var/www/blueprints/objects_item.py new file mode 100644 index 00000000..2b951353 --- /dev/null +++ b/var/www/blueprints/objects_item.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Item +import Tag + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export')) +import Export + +# ============ BLUEPRINT ============ +objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== +@objects_item.route("/object/item") #completely shows the paste in a new tab +@login_required +@login_read_only +def showItem(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + + dict_item = {} + dict_item['id'] = item_id + dict_item['name'] = dict_item['id'].replace('/', ' / ') + dict_item['father'] = Item.get_item_parent(item_id) + dict_item['content'] = Item.get_item_content(item_id) + dict_item['metadata'] = Item.get_item_metadata(item_id, item_content=dict_item['content']) + dict_item['tags'] = Tag.get_obj_tag(item_id) + #dict_item['duplicates'] = Item.get_item_nb_duplicates(item_id) + dict_item['duplicates'] = Item.get_item_duplicates_dict(item_id) + dict_item['crawler'] = Item.get_crawler_matadata(item_id, ltags=dict_item['tags']) + + ## EXPORT SECTION + # # TODO: ADD in Export SECTION + dict_item['hive_case'] = Export.get_item_hive_cases(item_id) + + return render_template("show_item.html", bootstrap_label=bootstrap_label, + modal_add_tags=Tag.get_modal_add_tags(dict_item['id'], object_type='item'), + is_hive_connected=Export.get_item_hive_cases(item_id), + dict_item=dict_item) + + # kvrocks data + + # # TODO: dynamic load: + ## duplicates + ## correlations + + ## Dynamic Path FIX + +@objects_item.route("/object/item/html2text") +@login_required +@login_read_only +def html2text(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + return Item.get_item_content_html2text(item_id) + +@objects_item.route("/object/item/raw_content") +@login_required +@login_read_only +def item_raw_content(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + return Response(Item.get_item_content(item_id), mimetype='text/plain') + +@objects_item.route("/object/item/download") +@login_required +@login_read_only +def item_download(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + return send_file(Item.get_raw_content(item_id), attachment_filename=item_id, as_attachment=True) diff --git a/var/www/blueprints/old_endpoints.py b/var/www/blueprints/old_endpoints.py new file mode 100644 index 00000000..09f6bfaa --- /dev/null +++ b/var/www/blueprints/old_endpoints.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +# ============ BLUEPRINT ============ +old_endpoints = Blueprint('old_endpoints', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates')) + +# ============ VARIABLES ============ + + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== +@old_endpoints.route("/showsavedpaste/") +@login_required +@login_read_only +def showsavedpaste(): + item_id = request.args.get('paste', '') + return redirect(url_for('objects_item.showItem', id=item_id)) diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index 134753e7..0a10c251 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -358,7 +358,7 @@ def submit_status(): link = '' if paste_submit_link: for paste in paste_submit_link: - url = url_for('showsavedpastes.showsavedpaste') + '?paste=' + paste + url = url_for('objects_item.showItem') + '?id=' + paste link += '' + paste +'' if nb_total == '-1': diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index fa8873ba..010610e6 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -377,7 +377,7 @@ def remove_tag(): #TODO remove me , used by showpaste res = Tag.api_delete_obj_tags(tags=[tag], object_id=path, object_type="item") if res[1] != 200: return str(res[0]) - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + return redirect(url_for('objects_item.showItem', id=path)) @Tags.route("/Tags/confirm_tag") @login_required @@ -395,7 +395,7 @@ def confirm_tag(): #add analyst tag Tag.add_tag('item', tag, path) - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + return redirect(url_for('objects_item.showItem', id=path)) return 'incompatible tag' @@ -417,7 +417,7 @@ def tag_validation(): r_serv_statistics.sadd('fp:'+tag, path) r_serv_statistics.srem('tp:'+tag, path) - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + return redirect(url_for('objects_item.showItem', id=path)) else: return 'input error' diff --git a/var/www/modules/Tags/templates/Tags.html b/var/www/modules/Tags/templates/Tags.html index b4fb85c8..83ebfb3c 100644 --- a/var/www/modules/Tags/templates/Tags.html +++ b/var/www/modules/Tags/templates/Tags.html @@ -119,7 +119,7 @@ {% for path in all_path %} {{ paste_date[loop.index0] }} - +
{{ path }}
@@ -375,7 +375,7 @@ function toggle_sidebar(){ button.tooltip(button); $("#container-show-more").append(button); - $("#button_show_path").attr('href', '{{ url_for('showsavedpastes.showsavedpaste') }}?paste=' + $(modal).attr('data-path')); + $("#button_show_path").attr('href', '{{ url_for('objects_item.showItem') }}?id=' + $(modal).attr('data-path')); $("#button_show_path").show('fast'); $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed diff --git a/var/www/modules/Tags/templates/tagged.html b/var/www/modules/Tags/templates/tagged.html index 75be817f..4177e6cc 100644 --- a/var/www/modules/Tags/templates/tagged.html +++ b/var/www/modules/Tags/templates/tagged.html @@ -103,7 +103,7 @@ {% for path in all_path %} {{ loop.index0 }} - {{ path }} + {{ path }}
{% for tag in paste_tags[loop.index0] %} @@ -114,7 +114,7 @@ {{ paste_date[loop.index0] }} {{ paste_linenum[loop.index0] }} -

+

{% endfor %} diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py index 0091df5a..d57c4e67 100644 --- a/var/www/modules/dashboard/Flask_dashboard.py +++ b/var/www/modules/dashboard/Flask_dashboard.py @@ -76,7 +76,7 @@ def dashboard_alert(log): log = log[46:].split(';') if len(log) == 6: time = datetime_from_utc_to_local(utc_str) - path = url_for('showsavedpastes.showsavedpaste',paste=log[5]) + path = url_for('objects_item.showItem',id=log[5]) res = {'date': date, 'time': time, 'script': log[0], 'domain': log[1], 'date_paste': log[2], 'paste': log[3], 'message': log[4], 'path': path} diff --git a/var/www/modules/dashboard/templates/index.html b/var/www/modules/dashboard/templates/index.html index 812e1ea0..0cd858d0 100644 --- a/var/www/modules/dashboard/templates/index.html +++ b/var/www/modules/dashboard/templates/index.html @@ -165,7 +165,7 @@ - + diff --git a/var/www/templates/modals/show_min_item.html b/var/www/templates/modals/show_min_item.html index 69ff1239..8613d9c6 100644 --- a/var/www/templates/modals/show_min_item.html +++ b/var/www/templates/modals/show_min_item.html @@ -86,7 +86,7 @@ function get_html_and_update_modal(event, truemodal) { button.tooltip(button); $("#container-show-more").append(button); - $("#modal_show_min_item_button_show_item").attr('href', '{{ url_for('showsavedpastes.showsavedpaste') }}?paste=' + $(modal).attr('data-path')); + $("#modal_show_min_item_button_show_item").attr('href', '{{ url_for('objects_item.showItem') }}?id=' + $(modal).attr('data-path')); $("#modal_show_min_item_button_show_item").show('fast'); $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed diff --git a/var/www/templates/objects/item/show_item.html b/var/www/templates/objects/item/show_item.html new file mode 100644 index 00000000..084b3477 --- /dev/null +++ b/var/www/templates/objects/item/show_item.html @@ -0,0 +1,446 @@ + + + + Show Item Object - AIL + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+ + {% if misp_eventid %} +
+
  • MISP Events already Created
  • + {{ misp_url }} +
    + {% endif %} + + {% if dict_item['hive_case'] %} +
    +
  • The Hive Case already Created
  • + {{ hive_url }} +
    + {% endif %} + + {% if dict_item['duplicates'] != 0 %} +
    +
    +
    +
    +
    +
    + duplicates   +
    {{dict_item['duplicates']|length}}
    +
    +
    +
    + +
    +
    +
    + +
    +
    + + + + + + + + + + + + {% for duplicate_id in dict_item['duplicates'] %} + + + + + + + {% endfor %} + +
    DateSimilarityItemDiff
    {{dict_item['duplicates'][duplicate_id]['date']}} + + + {%for algo in dict_item['duplicates'][duplicate_id]['algo']|sort()%} + + + + + {%endfor%} + +
    {{algo}} +
    +
    + {{dict_item['duplicates'][duplicate_id]['algo'][algo]}}% +
    +
    +
    +
    + + {{duplicate_id}} + + + +
    + +
    +
    + +
    +
    + {% endif %} + + + {% if l_64|length != 0 %} +
    +
    +
    +
    +
    +
    + Decoded Files   +
    {{l_64|length}}
    +
    +
    +
    + +
    +
    +
    + +
    +
    + + + + + + + + + + {% for b64 in l_64 %} + + + + + {% endfor %} + +
    estimated typehash
      {{ b64[1] }} {{b64[2]}} ({{ b64[4] }})
    + +
    +
    +
    +
    + {% endif %} + + + {% if dict_item['crawler'] %} +
    +
    +
    +
    +
    +
    + Crawler +
    +
    +
    + +
    +
    +
    + +
    + +
    + +
    +
    + + + + + + + + + + + + + + + + + + +
    Last Origin:
    + {{ dict_item['father'] }} +
    + {{ dict_item['crawler']['domain'] }} +
    url + {{ dict_item['crawler']['url'] }} +
    +
    +
    + +
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    + +
    +
    + +
    +
    +
    + {% endif %} + + + + +
    +
    + + + + +
    +
    +

    {{ dict_item['content'] }}

    +
    +
    +

    +
    +
    +
    +
    + + + +{% if dict_item['crawler'] %} + +{% endif %} + + + + + diff --git a/var/www/templates/tags/search_obj_by_tags.html b/var/www/templates/tags/search_obj_by_tags.html index 3e9400ba..93ac9579 100644 --- a/var/www/templates/tags/search_obj_by_tags.html +++ b/var/www/templates/tags/search_obj_by_tags.html @@ -129,7 +129,7 @@ {{ dict_obj['date'] }} - +
    {{ dict_obj['id'] }}
    From 1d8aa44d8bdaeca42ea890e89a1d226fd01d4e62 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Oct 2020 16:26:54 +0200 Subject: [PATCH 18/43] fix: [UI correlation graph] fix item links --- bin/lib/Correlate_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index 1138949a..cff288f0 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -226,7 +226,7 @@ def get_item_url(correlation_name, value, correlation_type=None): endpoint = 'objects_item.showItem' url = url_for(endpoint, id=value) elif correlation_name == 'paste': ### # TODO: remove me - endpoint = 'showsavedpastes.showsavedpaste' + endpoint = 'objects_item.showItem' url = url_for(endpoint, paste=value) return url From dcd29aec16f3edc957d9166097e98a99d82ceddd Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 13 Oct 2020 16:28:03 +0200 Subject: [PATCH 19/43] fix: [UI correlation graph] fix item links --- bin/lib/Correlate_object.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index cff288f0..1cddf9be 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -227,7 +227,7 @@ def get_item_url(correlation_name, value, correlation_type=None): url = url_for(endpoint, id=value) elif correlation_name == 'paste': ### # TODO: remove me endpoint = 'objects_item.showItem' - url = url_for(endpoint, paste=value) + url = url_for(endpoint, id=value) return url def get_obj_tag_table_keys(object_type): From 5cc4da2a28fa9b8ea7c915796658ed12faf516e4 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 27 Oct 2020 08:41:47 +0100 Subject: [PATCH 20/43] fix: [Terms Trackcers] fix item link --- bin/TermTrackerMod.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index ee64ab67..f7abe4af 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -20,7 +20,7 @@ from packages import Term from lib import Tracker -full_item_url = "/object/item/?id=" +full_item_url = "/object/item?id=" mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}" From 9f35d526284a8b1c3a01550cc14f1020b2b6bfca Mon Sep 17 00:00:00 2001 From: Alexandre Dulaunoy Date: Fri, 20 Nov 2020 11:15:54 +0100 Subject: [PATCH 21/43] chg: [web] we process items nowadays not only pastes ;-) --- var/www/modules/dashboard/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/var/www/modules/dashboard/templates/index.html b/var/www/modules/dashboard/templates/index.html index 0cd858d0..5d40df1c 100644 --- a/var/www/modules/dashboard/templates/index.html +++ b/var/www/modules/dashboard/templates/index.html @@ -87,7 +87,7 @@ Feeder(s) Monitor:
    - Processed pastes + Processed items

    Filtered duplicates From d1b4d61ce5357bda486e7b380a0c52bfc2b58ad0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 24 Nov 2020 15:44:11 +0100 Subject: [PATCH 22/43] fix: [Yara Tracker] catch yara timeout --- bin/trackers/Tracker_Yara.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bin/trackers/Tracker_Yara.py b/bin/trackers/Tracker_Yara.py index 55e5a00b..b0356b55 100755 --- a/bin/trackers/Tracker_Yara.py +++ b/bin/trackers/Tracker_Yara.py @@ -71,10 +71,12 @@ if __name__ == "__main__": item_id = p.get_from_set() if item_id is not None: item_content = item_basic.get_item_content(item_id) - yara_match = rules.match(data=item_content, callback=yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) - if yara_match: - print(f'{item_id}: {yara_match}') - + try: + yara_match = rules.match(data=item_content, callback=yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) + if yara_match: + print(f'{item_id}: {yara_match}') + except yara.TimeoutError as e: + print(f'{item_id}: yara scanning timed out') else: time.sleep(5) From 4fa320741cbec40735a1efa1bb7904720cc5ad11 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 8 Dec 2020 16:47:55 +0100 Subject: [PATCH 23/43] chg: [Tracker] edit tracker --- bin/lib/Tracker.py | 369 +++++++++++++++++- bin/packages/Term.py | 14 +- var/www/modules/hunter/Flask_hunter.py | 64 ++- .../{Add_tracker.html => edit_tracker.html} | 59 +-- .../modules/hunter/templates/showTracker.html | 12 +- 5 files changed, 471 insertions(+), 47 deletions(-) rename var/www/modules/hunter/templates/{Add_tracker.html => edit_tracker.html} (69%) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index ff646bfe..9efbd375 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -2,19 +2,78 @@ # -*-coding:UTF-8 -* import os +import re import sys import time import redis +import uuid import yara +import datetime + +from flask import escape sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader #import item_basic config_loader = ConfigLoader.ConfigLoader() +r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_tracker = config_loader.get_redis_conn("ARDB_Tracker") config_loader = None +email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' +email_regex = re.compile(email_regex) + +special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') +special_characters.add('\\s') + +############### +#### UTILS #### +def is_valid_uuid_v4(UUID): + if not UUID: + return False + UUID = UUID.replace('-', '') + try: + uuid_test = uuid.UUID(hex=UUID, version=4) + return uuid_test.hex == UUID + except: + return False + +def is_valid_regex(tracker_regex): + try: + re.compile(tracker_regex) + return True + except: + return False + +def is_valid_mail(email): + result = email_regex.match(email) + if result: + return True + else: + return False + +def verify_mail_list(mail_list): + for mail in mail_list: + if not is_valid_mail(mail): + return ({'status': 'error', 'reason': 'Invalid email', 'value': mail}, 400) + return None + +##-- UTILS --## +############### + +def get_tracker_by_uuid(tracker_uuid): + return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'tracked') + +def get_tracker_type(tracker_uuid): + return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'type') + +def get_tracker_level(tracker_uuid): + return int(r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'level')) + +def get_tracker_user_id(tracker_uuid): + return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') + def get_tracker_uuid_list(tracker, tracker_type): return list(r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker))) @@ -27,6 +86,51 @@ def get_tracker_mails(tracker_uuid): def get_tracker_description(tracker_uuid): return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'description') +def get_tracker_first_seen(tracker_uuid): + res = r_serv_tracker.zrange('tracker:stat:{}'.format(tracker_uuid), 0, 0) + if res: + return res[0] + else: + return None + +def get_tracker_last_seen(tracker_uuid): + res = r_serv_tracker.zrevrange('tracker:stat:{}'.format(tracker_uuid), 0, 0) + if res: + return res[0] + else: + return None + +def get_tracker_metedata(tracker_uuid, user_id=False, description=False, level=False, tags=False, mails=False, sparkline=False): + dict_uuid = {} + dict_uuid['tracker'] = get_tracker_by_uuid(tracker_uuid) + dict_uuid['type'] = get_tracker_type(tracker_uuid) + dict_uuid['date'] = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'date') + dict_uuid['description'] = get_tracker_description(tracker_uuid) + dict_uuid['first_seen'] = get_tracker_first_seen(tracker_uuid) + dict_uuid['last_seen'] = get_tracker_last_seen(tracker_uuid) + if user_id: + dict_uuid['user_id'] = get_tracker_user_id(tracker_uuid) + if level: + dict_uuid['level'] = get_tracker_level(tracker_uuid) + if mails: + dict_uuid['mails'] = get_tracker_mails(tracker_uuid) + if tags: + dict_uuid['tags'] = get_tracker_tags(tracker_uuid) + if sparkline: + dict_uuid['sparkline'] = get_tracker_sparkline(tracker_uuid) + dict_uuid['uuid'] = tracker_uuid + return dict_uuid + +def get_tracker_sparkline(tracker_uuid, num_day=6): + date_range_sparkline = Date.get_date_range(num_day) + sparklines_value = [] + for date_day in date_range_sparkline: + nb_seen_this_day = r_serv_tracker.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day)) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + sparklines_value.append(int(nb_seen_this_day)) + return sparklines_value + def add_tracked_item(tracker_uuid, item_id, item_date): # track item r_serv_tracker.sadd('tracker:item:{}:{}'.format(tracker_uuid, item_date), item_id) @@ -46,6 +150,234 @@ def get_tracker_last_updated_by_type(tracker_type): epoch_update = 0 return float(epoch_update) +###################### +#### TRACKERS ACL #### + +# # TODO: use new package => duplicate fct +def is_in_role(user_id, role): + if r_serv_db.sismember('user_role:{}'.format(role), user_id): + return True + else: + return False + +def is_tracker_in_global_level(tracker, tracker_type): + res = r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker)) + if res: + for elem_uuid in res: + if r_serv_tracker.hget('tracker:{}'.format(elem_uuid), 'level')=='1': + return True + return False + +def is_tracker_in_user_level(tracker, tracker_type, user_id): + res = r_serv_tracker.smembers('user:tracker:{}'.format(user_id)) + if res: + for elem_uuid in res: + if r_serv_tracker.hget('tracker:{}'.format(elem_uuid), 'tracked')== tracker: + if r_serv_tracker.hget('tracker:{}'.format(elem_uuid), 'type')== tracker_type: + return True + return False + +def api_is_allowed_to_edit_tracker(tracker_uuid, user_id): + if not is_valid_uuid_v4(tracker_uuid): + return ({"status": "error", "reason": "Invalid uuid"}, 400) + tracker_creator = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') + if not tracker_creator: + return ({"status": "error", "reason": "Unknown uuid"}, 404) + if not is_in_role(user_id, 'admin') or user_id != tracker_creator: + return ({"status": "error", "reason": "Access Denied"}, 403) + return ({"uuid": tracker_uuid}, 200) + + +##-- ACL --## + +#### CREATE TRACKER #### +def api_validate_tracker_to_add(tracker , tracker_type, nb_words=1): + if tracker_type=='regex': + if not is_valid_regex(tracker): + return ({"status": "error", "reason": "Invalid regex"}, 400) + elif tracker_type=='word' or tracker_type=='set': + # force lowercase + tracker = tracker.lower() + word_set = set(tracker) + set_inter = word_set.intersection(special_characters) + if set_inter: + return ({"status": "error", "reason": f'special character(s) not allowed: {set_inter}', "message": "Please use a python regex or remove all special characters"}, 400) + words = tracker.split() + # not a word + if tracker_type=='word' and len(words)>1: + tracker_type = 'set' + + # ouput format: tracker1,tracker2,tracker3;2 + if tracker_type=='set': + try: + nb_words = int(nb_words) + except: + nb_words = 1 + if nb_words==0: + nb_words = 1 + + words_set = set(words) + words_set = sorted(words_set) + + if nb_words > len(words_set): + nb_words = len(words_set) + + tracker = ",".join(words_set) + tracker = "{};{}".format(tracker, nb_words) + + elif tracker_type=='yara_custom': + if not is_valid_yara_rule(tracker): + return ({"status": "error", "reason": "Invalid custom Yara Rule"}, 400) + elif tracker_type=='yara_default': + if not is_valid_default_yara_rule(tracker): + return ({"status": "error", "reason": "The Yara Rule doesn't exist"}, 400) + else: + return ({"status": "error", "reason": "Incorrect type"}, 400) + return ({"status": "success", "tracker": tracker, "type": tracker_type}, 200) + +def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, dashboard=0, tracker_uuid=None): + # edit tracker + if tracker_uuid: + edit_tracker = True + # check if type changed + old_type = get_tracker_type(tracker_uuid) + old_tracker = get_tracker_by_uuid(tracker_uuid) + old_level = get_tracker_level(tracker_uuid) + tracker_user_id = get_tracker_user_id(tracker_uuid) + + # Create new tracker + else: + edit_tracker = False + # generate tracker uuid + tracker_uuid = str(uuid.uuid4()) + old_type = None + old_tracker = None + + # YARA + if tracker_type == 'yara_custom' or tracker_type == 'yara_default': + # delete yara rule + if tracker_type == 'yara_default' and old_type == 'yara': + if not is_default_yara_rule(old_tracker): + filepath = get_yara_rule_file_by_tracker_name(old_tracker) + if filepath: + os.remove(filepath) + tracker = save_yara_rule(tracker_type, tracker, tracker_uuid=tracker_uuid) + tracker_type = 'yara' + + # create metadata + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'tracked', tracker) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'type', tracker_type) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'level', level) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'dashboard', dashboard) + if not edit_tracker: + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'user_id', user_id) + + if description: + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'description', description) + + # type change + if edit_tracker: + r_serv_tracker.srem('all:tracker:{}'.format(old_type), old_tracker) + r_serv_tracker.srem('all:tracker_uuid:{}:{}'.format(old_type, old_tracker), tracker_uuid) + if level != old_level: + if level == 0: + r_serv_tracker.srem('global:tracker', tracker_uuid) + elif level == 1: + r_serv_tracker.srem('user:tracker:{}'.format(tracker_user_id), tracker_uuid) + if tracker_type != old_type: + if old_level == 0: + r_serv_tracker.srem('user:tracker:{}:{}'.format(tracker_user_id, old_type), tracker_uuid) + elif old_level == 1: + r_serv_tracker.srem('global:tracker:{}'.format(old_type), tracker_uuid) + if old_type=='yara': + if not is_default_yara_rule(old_tracker): + filepath = get_yara_rule_file_by_tracker_name(old_tracker) + if filepath: + os.remove(filepath) + + # create all tracker set + r_serv_tracker.sadd('all:tracker:{}'.format(tracker_type), tracker) + + # create tracker - uuid map + r_serv_tracker.sadd('all:tracker_uuid:{}:{}'.format(tracker_type, tracker), tracker_uuid) + + # add display level set + if level == 0: # user only + r_serv_tracker.sadd('user:tracker:{}'.format(user_id), tracker_uuid) + r_serv_tracker.sadd('user:tracker:{}:{}'.format(user_id, tracker_type), tracker_uuid) + elif level == 1: # global + r_serv_tracker.sadd('global:tracker', tracker_uuid) + r_serv_tracker.sadd('global:tracker:{}'.format(tracker_type), tracker_uuid) + + # create tracker tags list + for tag in tags: + r_serv_tracker.sadd('tracker:tags:{}'.format(tracker_uuid), escape(tag) ) + + # create tracker tags mail notification list + for mail in mails: + r_serv_tracker.sadd('tracker:mail:{}'.format(tracker_uuid), escape(mail) ) + + # toggle refresh module tracker list/set + r_serv_tracker.set('tracker:refresh:{}'.format(tracker_type), time.time()) + if tracker_type != old_type: # toggle old type refresh + r_serv_tracker.set('tracker:refresh:{}'.format(old_type), time.time()) + return tracker_uuid + +def api_add_tracker(dict_input, user_id): + tracker = dict_input.get('tracker', None) + if not tracker: + return ({"status": "error", "reason": "Tracker not provided"}, 400) + tracker_type = dict_input.get('type', None) + if not tracker_type: + return ({"status": "error", "reason": "Tracker type not provided"}, 400) + nb_words = dict_input.get('nb_words', 1) + description = dict_input.get('description', '') + description = escape(description) + + res = api_validate_tracker_to_add(tracker , tracker_type, nb_words=nb_words) + if res[1]!=200: + return res + tracker = res[0]['tracker'] + tracker_type = res[0]['type'] + + tags = dict_input.get('tags', []) + mails = dict_input.get('mails', []) + res = verify_mail_list(mails) + if res: + return res + + ## TODO: add dashboard key + level = dict_input.get('level', 1) + try: + level = int(level) + if level not in range(0, 1): + level = 1 + except: + level = 1 + + tracker_uuid = dict_input.get('uuid', None) + # check edit ACL + if tracker_uuid: + res = api_is_allowed_to_edit_tracker(tracker_uuid, user_id) + if res[1] != 200: + return res + else: + # check if tracker already tracked in global + if level==1: + if is_tracker_in_global_level(tracker, tracker_type) and not tracker_uuid: + return ({"status": "error", "reason": "Tracker already exist"}, 409) + else: + if is_tracker_in_user_level(tracker, tracker_type, user_id) and not tracker_uuid: + return ({"status": "error", "reason": "Tracker already exist"}, 409) + + tracker_uuid = create_tracker(tracker , tracker_type, user_id, level, tags, mails, description, tracker_uuid=tracker_uuid) + + return ({'tracker': tracker, 'type': tracker_type, 'uuid': tracker_uuid}, 200) + +##-- CREATE TRACKER --## + +############## #### YARA #### def get_yara_rules_dir(): return os.path.join(os.environ['AIL_BIN'], 'trackers', 'yara') @@ -99,15 +431,32 @@ def is_valid_yara_rule(yara_rule): except: return False -def is_valid_default_yara_rule(yara_rule): +def is_default_yara_rule(tracked_yara_name): + yara_dir = get_yara_rules_dir() + filename = os.path.join(yara_dir, tracked_yara_name) + filename = os.path.realpath(filename) + try: + if tracked_yara_name.split('/')[0] == 'custom-rules': + return False + except: + return False + if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + return False + else: + if os.path.isfile(filename): + return True + return False + +def is_valid_default_yara_rule(yara_rule, verbose=True): yara_dir = get_yara_rules_default_dir() filename = os.path.join(yara_dir, yara_rule) filename = os.path.realpath(filename) # incorrect filename if not os.path.commonprefix([filename, yara_dir]) == yara_dir: - print('error: file transversal') - print(yara_dir) - print(filename) + if verbose: + print('error: file transversal') + print(yara_dir) + print(filename) return False else: if os.path.isfile(filename): @@ -126,6 +475,17 @@ def save_yara_rule(yara_rule_type, yara_rule, tracker_uuid=None): filename = os.path.join('ail-yara-rules', 'rules', yara_rule) return filename +def get_yara_rule_file_by_tracker_name(tracked_yara_name): + yara_dir = get_yara_rules_dir() + filename = os.path.join(yara_dir, tracked_yara_name) + filename = os.path.realpath(filename) + if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + print('error: file transversal') + print(yara_dir) + print(filename) + return None + return filename + def get_yara_rule_content(yara_rule): yara_dir = get_yara_rules_dir() filename = os.path.join(yara_dir, yara_rule) @@ -157,7 +517,6 @@ def api_get_default_rule_content(default_yara_rule): ##-- YARA --## - if __name__ == '__main__': res = is_valid_yara_rule('rule dummy { }') print(res) diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 7896dbbe..773310c9 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -38,6 +38,8 @@ tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+ gaps=True, discard_empty=True) def is_valid_uuid_v4(UUID): + if not UUID: + return False UUID = UUID.replace('-', '') try: uuid_test = uuid.UUID(hex=UUID, version=4) @@ -215,11 +217,12 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1): words_set = set(words) words_set = sorted(words_set) + if nb_words > len(words_set): + nb_words = len(words_set) + term = ",".join(words_set) term = "{};{}".format(term, nb_words) - if nb_words > len(words_set): - nb_words = len(words_set) elif term_type=='yara_custom': if not Tracker.is_valid_yara_rule(term): return ({"status": "error", "reason": "Invalid custom Yara Rule"}, 400) @@ -322,8 +325,11 @@ def delete_term(term_uuid): r_serv_term.delete('tracker:stat:{}'.format(term_uuid)) if term_type == 'yara': - # # TODO: - pass + # delete custom rule + if not Tracker.is_default_yara_rule(term): + filepath = Tracker.get_yara_rule_file_by_tracker_name(term) + if filepath: + os.remove(filepath) def replace_tracker_description(term_uuid, description): description = escape(description) diff --git a/var/www/modules/hunter/Flask_hunter.py b/var/www/modules/hunter/Flask_hunter.py index a1281d4c..1ada2914 100644 --- a/var/www/modules/hunter/Flask_hunter.py +++ b/var/www/modules/hunter/Flask_hunter.py @@ -93,8 +93,9 @@ def tracked_menu_yara(): @login_analyst def add_tracked_menu(): if request.method == 'POST': - term = request.form.get("term") - term_type = request.form.get("tracker_type") + tracker = request.form.get("tracker") + tracker_uuid = request.form.get("tracker_uuid") + tracker_type = request.form.get("tracker_type") nb_words = request.form.get("nb_word", 1) description = request.form.get("description", '') level = request.form.get("level", 0) @@ -102,15 +103,15 @@ def add_tracked_menu(): mails = request.form.get("mails", []) # YARA # - if term_type == 'yara': + if tracker_type == 'yara': yara_default_rule = request.form.get("yara_default_rule") yara_custom_rule = request.form.get("yara_custom_rule") if yara_custom_rule: - term = yara_custom_rule - term_type='yara_custom' + tracker = yara_custom_rule + tracker_type='yara_custom' else: - term = yara_default_rule - term_type='yara_default' + tracker = yara_default_rule + tracker_type='yara_default' # # if level == 'on': @@ -121,17 +122,58 @@ def add_tracked_menu(): if tags: tags = tags.split() - input_dict = {"term": term, "type": term_type, "nb_words": nb_words, "tags": tags, "mails": mails, "level": level, "description": description} + input_dict = {"tracker": tracker, "type": tracker_type, "nb_words": nb_words, "tags": tags, "mails": mails, "level": level, "description": description} user_id = current_user.get_id() - res = Term.parse_json_term_to_add(input_dict, user_id) + # edit tracker + if tracker_uuid: + input_dict['uuid'] = tracker_uuid + res = Tracker.api_add_tracker(input_dict, user_id) if res[1] == 200: return redirect(url_for('hunter.tracked_menu')) else: ## TODO: use modal return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] else: - all_yara_files = Tracker.get_all_default_yara_files() - return render_template("Add_tracker.html", all_yara_files=all_yara_files) + return render_template("edit_tracker.html", all_yara_files=Tracker.get_all_default_yara_files()) + +@hunter.route("/tracker/edit", methods=['GET', 'POST']) +@login_required +@login_analyst +def edit_tracked_menu(): + user_id = current_user.get_id() + tracker_uuid = request.args.get('uuid', None) + + res = Term.check_term_uuid_valid_access(tracker_uuid, user_id) # check if is author or admin + if res: # invalid access + return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] + + dict_tracker = Tracker.get_tracker_metedata(tracker_uuid, user_id=True, level=True, description=True, tags=True, mails=True) + dict_tracker['tags'] = ' '.join(dict_tracker['tags']) + dict_tracker['mails'] = ' '.join(dict_tracker['mails']) + + if dict_tracker['type'] == 'set': + dict_tracker['tracker'], dict_tracker['nb_words'] = dict_tracker['tracker'].split(';') + dict_tracker['tracker'] = dict_tracker['tracker'].replace(',', ' ') + elif dict_tracker['type'] == 'yara': #is_valid_default_yara_rule + if Tracker.is_default_yara_rule(dict_tracker['tracker']): + dict_tracker['yara_file'] = dict_tracker['tracker'].split('/') + dict_tracker['yara_file'] = dict_tracker['yara_file'][-2] + '/' + dict_tracker['yara_file'][-1] + dict_tracker['content'] = None + else: + dict_tracker['yara_file'] = None + dict_tracker['content'] = Tracker.get_yara_rule_content(dict_tracker['tracker']) + + return render_template("edit_tracker.html", dict_tracker=dict_tracker, + all_yara_files=Tracker.get_all_default_yara_files()) + + ## TO EDIT + # word + # set of word + nb words + # regex + # yara custum + # yara default ???? => allow edit ? + + #### EDIT SHow Trackers ?????????????????????????????????????????????????? @hunter.route("/tracker/show_tracker") @login_required diff --git a/var/www/modules/hunter/templates/Add_tracker.html b/var/www/modules/hunter/templates/edit_tracker.html similarity index 69% rename from var/www/modules/hunter/templates/Add_tracker.html rename to var/www/modules/hunter/templates/edit_tracker.html index 091d66f5..54eb5bcc 100644 --- a/var/www/modules/hunter/templates/Add_tracker.html +++ b/var/www/modules/hunter/templates/edit_tracker.html @@ -27,39 +27,41 @@
    -
    -
    -
    Create a new tracker
    +
    +
    +
    Edit a Tracker
    -

    Select a tracker type.

    + {%if dict_tracker%} + + {%endif%}
    -
    +
    - +
    -
    +
    - +
    -
    +
    - +
    - + @@ -68,6 +70,7 @@

    +

    Tracker Type:

    +
    - +
    @@ -92,11 +95,12 @@
    - {% for yara_types in all_yara_files %} - {% for yara_file in all_yara_files[yara_types] %} - + {% for yara_file_name in all_yara_files[yara_types] %} + {% endfor %} {% endfor %} @@ -107,14 +111,15 @@
    +
    Custom YARA rules:
    - +

    @@ -139,7 +144,7 @@ $(document).ready(function(){ $("#page-Tracker").addClass("active"); $("#nav_manual_crawler").addClass("active"); $("#tracker_desc").hide(); - $("#term").hide(); + $("#tracker").hide(); $("#nb_word").hide(); $("#yara_rule").hide(); @@ -148,30 +153,38 @@ $(document).ready(function(){ if (tracker_type=="word") { $("#tracker_desc").text("Token to track. You need to use a regex if you want to use one of the following special characters [<>~!?@#$%^&*|()_-+={}\":;,.\'\n\r\t]/\\ "); $("#tracker_desc").show(); - $("#term").show(); + $("#tracker").show(); $("#nb_word").hide(); $("#yara_rule").hide(); } else if (tracker_type=="set") { $("#tracker_desc").text("Set of Terms to track (space separated). This tracker is used to check if an item contain one or more terms specified in a set. If an item contain NB unique terms (by default NB of unique keywords = 1), this tracker is triggered. You need to use a regex if you want to use one of the following special characters [<>~!?@#$%^&*|()_-+={}\":;,.\'\n\r\t]/\\ "); $("#tracker_desc").show(); - $("#term").show(); + $("#tracker").show(); $("#nb_word").show(); $("#yara_rule").hide(); } else if (tracker_type=="regex") { $("#tracker_desc").text("Enter a valid Python regex"); $("#tracker_desc").show(); - $("#term").show(); + $("#tracker").show(); $("#nb_word").hide(); $("#yara_rule").hide(); } else if (tracker_type=="yara") { $("#tracker_desc").text("Select a default yara rule or create your own rule:"); $("#tracker_desc").show(); - $("#term").hide(); + $("#tracker").hide(); $("#nb_word").hide(); $("#yara_rule").show(); } }); + {%if dict_tracker%} + $('#tracker_type').val('{{dict_tracker['type']}}').change(); + + {%if dict_tracker['type']=='yara' and dict_tracker['yara_file']%} + $('#yara_default_rule').val('{{dict_tracker['yara_file']}}').change(); + {%endif%} + {%endif%} + }); function toggle_sidebar(){ diff --git a/var/www/modules/hunter/templates/showTracker.html b/var/www/modules/hunter/templates/showTracker.html index c0d0d589..af7d2b50 100644 --- a/var/www/modules/hunter/templates/showTracker.html +++ b/var/www/modules/hunter/templates/showTracker.html @@ -171,10 +171,14 @@
    - - - + {%if yara_rule_content%}



    {{ yara_rule_content }}

    From 78903ec033ac2f2bc764972f3f20ad98c83b6768 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 8 Dec 2020 17:08:39 +0100 Subject: [PATCH 24/43] fix: [Tracker] edit tracker ACL --- bin/lib/Tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 9efbd375..844e1093 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -183,7 +183,7 @@ def api_is_allowed_to_edit_tracker(tracker_uuid, user_id): tracker_creator = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') if not tracker_creator: return ({"status": "error", "reason": "Unknown uuid"}, 404) - if not is_in_role(user_id, 'admin') or user_id != tracker_creator: + if not is_in_role(user_id, 'admin') and user_id != tracker_creator: return ({"status": "error", "reason": "Access Denied"}, 403) return ({"uuid": tracker_uuid}, 200) From 28f6963ff402124ef40da0e527241b7b37219c75 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 11 Dec 2020 16:02:47 +0100 Subject: [PATCH 25/43] fix: [UI trackers] None trackers values --- .../hunter/templates/trackersManagement.html | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/var/www/modules/hunter/templates/trackersManagement.html b/var/www/modules/hunter/templates/trackersManagement.html index fee4b2ba..007d8046 100644 --- a/var/www/modules/hunter/templates/trackersManagement.html +++ b/var/www/modules/hunter/templates/trackersManagement.html @@ -67,10 +67,12 @@ - {% if dict_uuid['term']|length > 256 %} - {{ dict_uuid['term'][0:256]}}... - {% else %} - {{ dict_uuid['term']}} + {% if dict_uuid['term']%} + {% if dict_uuid['term']|length > 256 %} + {{ dict_uuid['term'][0:256]}}... + {% else %} + {{ dict_uuid['term']}} + {% endif %} {% endif %} @@ -128,10 +130,12 @@ - {% if dict_uuid['term']|length > 256 %} - {{ dict_uuid['term'][0:256]}}... - {% else %} - {{ dict_uuid['term']}} + {% if dict_uuid['term']%} + {% if dict_uuid['term']|length > 256 %} + {{ dict_uuid['term'][0:256]}}... + {% else %} + {{ dict_uuid['term']}} + {% endif %} {% endif %} From 6bc54baf74a026938316977dedb10a6d249f628d Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 11 Dec 2020 21:02:07 +0100 Subject: [PATCH 26/43] chg: [Languages]detect + search domains languages --- bin/LAUNCH.sh | 2 + bin/Languages.py | 33 +++ bin/lib/Domain.py | 124 ++++++++- bin/lib/Language.py | 240 ++++++++++++++++++ bin/packages/Item.py | 69 ++++- bin/packages/modules.cfg | 3 + bin/update-background.py | 24 ++ update/v3.4/Update.py | 37 +++ update/v3.4/Update.sh | 54 ++++ update/v3.4/Update_domain.py | 57 +++++ var/www/blueprints/crawler_splash.py | 37 +++ .../templates/Crawler_dashboard.html | 4 + .../crawler_splash/domain_explorer.html | 2 +- .../crawler/crawler_splash/showDomain.html | 6 + .../domains/block_languages_search.html | 73 ++++++ .../templates/domains/card_img_domain.html | 54 ++-- .../domains/domains_filter_languages.html | 192 ++++++++++++++ 17 files changed, 990 insertions(+), 21 deletions(-) create mode 100755 bin/Languages.py create mode 100755 bin/lib/Language.py create mode 100755 update/v3.4/Update.py create mode 100755 update/v3.4/Update.sh create mode 100755 update/v3.4/Update_domain.py create mode 100644 var/www/templates/domains/block_languages_search.html create mode 100644 var/www/templates/domains/domains_filter_languages.html diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 0c8a6a70..c4e4a538 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -216,6 +216,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Tags" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tags.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "Languages" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Languages.py; read x" + sleep 0.1 screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SentimentAnalysis.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./DbCleaner.py; read x" diff --git a/bin/Languages.py b/bin/Languages.py new file mode 100755 index 00000000..f4785250 --- /dev/null +++ b/bin/Languages.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +import cld3 + +from packages import Item +from lib import Domain + +from pubsublogger import publisher +from Helper import Process + +if __name__ == '__main__': + publisher.port = 6380 + publisher.channel = 'Script' + # Section name in bin/packages/modules.cfg + config_section = 'Languages' + # Setup the I/O queues + p = Process(config_section) + + while True: + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + item_id = Item.get_item_id(message) + if Item.is_crawled(item_id): + domain = Item.get_item_domain(item_id) + Domain.add_domain_languages_by_item_id(domain, item_id) diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 52b5b7c6..5eca1943 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -9,6 +9,7 @@ The ``Domain`` import os import sys +import itertools import time import redis import random @@ -24,6 +25,7 @@ import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader import Correlate_object +import Language import Screenshot import Username @@ -66,6 +68,15 @@ def sanitize_domain_type(domain_type): else: return 'regular' +def sanitize_domain_types(l_domain_type): + all_domain_types = get_all_domains_type() + if not l_domain_type: + return all_domain_types + for domain_type in l_domain_type: + if domain_type not in all_domain_types: + return all_domain_types + return l_domain_type + ######## DOMAINS ######## def get_all_domains_type(): return ['onion', 'regular'] @@ -210,6 +221,15 @@ def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], else: return None + + +## TODO: filters: +# - tags +# - languages +# - daterange UP +def get_domains_by_filters(): + pass + def create_domains_metadata_list(list_domains, domain_type): l_domains = [] for domain in list_domains: @@ -218,9 +238,98 @@ def create_domains_metadata_list(list_domains, domain_type): else: dom_type = domain_type l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True, - ports=True, tags=True, screenshot=True, tags_safe=True)) + ports=True, tags=True, languages=True, screenshot=True, tags_safe=True)) return l_domains + +######## LANGUAGES ######## +def get_all_domains_languages(): + return r_serv_onion.smembers('all_domains_languages') + +def get_domains_by_languages(languages, l_domain_type=[]): + l_domain_type = sanitize_domain_types(l_domain_type) + if not languages: + return [] + elif len(languages) == 1: + return get_all_domains_by_language(languages[0], l_domain_type=l_domain_type) + else: + all_domains_t = [] + for domain_type in l_domain_type: + l_keys_name = [] + for language in languages: + l_keys_name.append('language:domains:{}:{}'.format(domain_type, language)) + res = r_serv_onion.sinter(l_keys_name[0], *l_keys_name[1:]) + if res: + all_domains_t.append(res) + return list(itertools.chain.from_iterable(all_domains_t)) + +def get_all_domains_by_language(language, l_domain_type=[]): + l_domain_type = sanitize_domain_types(l_domain_type) + if len(l_domain_type) == 1: + return r_serv_onion.smembers('language:domains:{}:{}'.format(l_domain_type[0], language)) + else: + l_keys_name = [] + for domain_type in l_domain_type: + l_keys_name.append('language:domains:{}:{}'.format(domain_type, language)) + return r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:]) + +def get_domain_languages(domain, r_list=False): + res = r_serv_onion.smembers('domain:language:{}'.format(domain)) + if r_list: + return list(res) + else: + return res + +def add_domain_language(domain, language): + language = language.split('-')[0] + domain_type = get_domain_type(domain) + r_serv_onion.sadd('all_domains_languages', language) + r_serv_onion.sadd('all_domains_languages:{}'.format(domain_type), language) + r_serv_onion.sadd('language:domains:{}:{}'.format(domain_type, language), domain) + r_serv_onion.sadd('domain:language:{}'.format(domain), language) + +def add_domain_languages_by_item_id(domain, item_id): + for lang in Item.get_item_languages(item_id, min_proportion=0.2, min_probability=0.8): + add_domain_language(domain, lang.language) + +def delete_domain_languages(domain): + domain_type = get_domain_type(domain) + for language in get_domain_languages(domain): + r_serv_onion.srem('language:domains:{}:{}'.format(domain_type, language), domain) + if not r_serv_onion.exists('language:domains:{}:{}'.format(domain_type, language)): + r_serv_onion.srem('all_domains_languages:{}'.format(domain_type), language) + exist_domain_type_lang = False + for domain_type in get_all_domains_type(): + if r_serv_onion.sismembers('all_domains_languages:{}'.format(domain_type), language): + exist_domain_type_lang = True + continue + if not exist_domain_type_lang: + r_serv_onion.srem('all_domains_languages', language) + r_serv_onion.delete('domain:language:{}'.format(domain)) + +def _delete_all_domains_languages(): + for language in get_all_domains_languages(): + for domain in get_all_domains_by_language(language): + delete_domain_languages(domain) + +## API ## +## TODO: verify domains type + languages list +## TODO: add pagination +def api_get_domains_by_languages(domains_types, languages, domains_metadata=False, page=1): + l_domains = sorted(get_domains_by_languages(languages, l_domain_type=domains_types)) + l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) + if not domains_metadata: + return l_domains + else: + l_dict_domains = [] + for domain in l_domains['list_elem']: + l_dict_domains.append(get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, + status=True, ports=True, tags=True, tags_safe=True, + languages=True, screenshot=True)) + l_domains['list_elem'] = l_dict_domains + return l_domains +####---- ----#### + ######## DOMAIN ######## def get_domain_type(domain): @@ -498,7 +607,7 @@ def get_domain_random_screenshot(domain): ''' return Screenshot.get_randon_domain_screenshot(domain) -def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, screenshot=False): +def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, languages=False, screenshot=False): ''' Get Domain basic metadata @@ -516,6 +625,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s ''' dict_metadata = {} dict_metadata['id'] = domain + dict_metadata['type'] = domain_type if first_seen: res = get_domain_first_seen(domain, domain_type=domain_type) if res is not None: @@ -535,6 +645,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s dict_metadata['is_tags_safe'] = Tag.is_tags_safe(dict_metadata['tags']) else: dict_metadata['is_tags_safe'] = Tag.is_tags_safe(get_domain_tags(domain)) + if languages: + dict_metadata['languages'] = Language.get_languages_from_iso(get_domain_languages(domain, r_list=True), sort=True) if screenshot: dict_metadata['screenshot'] = get_domain_random_screenshot(domain) return dict_metadata @@ -796,6 +908,14 @@ class Domain(object): ''' return get_domain_tags(self.domain) + def get_domain_languages(self): + ''' + Retun all languages of a given domain. + + :param domain: domain name + ''' + return get_domain_languages(self.domain) + def get_domain_correlation(self): ''' Retun all correlation of a given domain. diff --git a/bin/lib/Language.py b/bin/lib/Language.py new file mode 100755 index 00000000..6b5bd6a0 --- /dev/null +++ b/bin/lib/Language.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + +dict_iso_languages = { + 'af': 'Afrikaans', + 'am': 'Amharic', + 'ar': 'Arabic', + 'bg': 'Bulgarian', + 'bn': 'Bangla', + 'bs': 'Bosnian', + 'ca': 'Catalan', + 'ceb': 'Cebuano', + 'co': 'Corsican', + 'cs': 'Czech', + 'cy': 'Welsh', + 'da': 'Danish', + 'de': 'German', + 'el': 'Greek', + 'en': 'English', + 'eo': 'Esperanto', + 'es': 'Spanish', + 'et': 'Estonian', + 'eu': 'Basque', + 'fa': 'Persian', + 'fi': 'Finnish', + 'fil': 'Filipino', + 'fr': 'French', + 'fy': 'Western Frisian', + 'ga': 'Irish', + 'gd': 'Scottish Gaelic', + 'gl': 'Galician', + 'gu': 'Gujarati', + 'ha': 'Hausa', + 'haw': 'Hawaiian', + 'hi': 'Hindi', + 'hmn': 'Hmong', + 'hr': 'Croatian', + 'ht': 'Haitian Creole', + 'hu': 'Hungarian', + 'hy': 'Armenian', + 'id': 'Indonesian', + 'ig': 'Igbo', + 'is': 'Icelandic', + 'it': 'Italian', + 'iw': 'Hebrew', + 'ja': 'Japanese', + 'jv': 'Javanese', + 'ka': 'Georgian', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'kn': 'Kannada', + 'ko': 'Korean', + 'ku': 'Kurdish', + 'ky': 'Kyrgyz', + 'la': 'Latin', + 'lb': 'Luxembourgish', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lv': 'Latvian', + 'mg': 'Malagasy', + 'mi': 'Maori', + 'mk': 'Macedonian', + 'ml': 'Malayalam', + 'mn': 'Mongolian', + 'mr': 'Marathi', + 'ms': 'Malay', + 'mt': 'Maltese', + 'my': 'Burmese', + 'ne': 'Nepali', + 'nl': 'Dutch', + 'no': 'Norwegian', + 'ny': 'Nyanja', + 'pa': 'Punjabi', + 'pl': 'Polish', + 'ps': 'Pashto', + 'pt': 'Portuguese', + 'ro': 'Romanian', + 'ru': 'Russian', + 'sd': 'Sindhi', + 'si': 'Sinhala', + 'sk': 'Slovak', + 'sl': 'Slovenian', + 'sm': 'Samoan', + 'sn': 'Shona', + 'so': 'Somali', + 'sq': 'Albanian', + 'sr': 'Serbian', + 'st': 'Southern Sotho', + 'su': 'Sundanese', + 'sv': 'Swedish', + 'sw': 'Swahili', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'tr': 'Turkish', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 'vi': 'Vietnamese', + 'xh': 'Xhosa', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'zh': 'Chinese', + 'zu': 'Zulu' +} + +dict_languages_iso = { + 'Afrikaans': 'af', + 'Amharic': 'am', + 'Arabic': 'ar', + 'Bulgarian': 'bg', + 'Bangla': 'bn', + 'Bosnian': 'bs', + 'Catalan': 'ca', + 'Cebuano': 'ceb', + 'Corsican': 'co', + 'Czech': 'cs', + 'Welsh': 'cy', + 'Danish': 'da', + 'German': 'de', + 'Greek': 'el', + 'English': 'en', + 'Esperanto': 'eo', + 'Spanish': 'es', + 'Estonian': 'et', + 'Basque': 'eu', + 'Persian': 'fa', + 'Finnish': 'fi', + 'Filipino': 'fil', + 'French': 'fr', + 'Western Frisian': 'fy', + 'Irish': 'ga', + 'Scottish Gaelic': 'gd', + 'Galician': 'gl', + 'Gujarati': 'gu', + 'Hausa': 'ha', + 'Hawaiian': 'haw', + 'Hindi': 'hi', + 'Hmong': 'hmn', + 'Croatian': 'hr', + 'Haitian Creole': 'ht', + 'Hungarian': 'hu', + 'Armenian': 'hy', + 'Indonesian': 'id', + 'Igbo': 'ig', + 'Icelandic': 'is', + 'Italian': 'it', + 'Hebrew': 'iw', + 'Japanese': 'ja', + 'Javanese': 'jv', + 'Georgian': 'ka', + 'Kazakh': 'kk', + 'Khmer': 'km', + 'Kannada': 'kn', + 'Korean': 'ko', + 'Kurdish': 'ku', + 'Kyrgyz': 'ky', + 'Latin': 'la', + 'Luxembourgish': 'lb', + 'Lao': 'lo', + 'Lithuanian': 'lt', + 'Latvian': 'lv', + 'Malagasy': 'mg', + 'Maori': 'mi', + 'Macedonian': 'mk', + 'Malayalam': 'ml', + 'Mongolian': 'mn', + 'Marathi': 'mr', + 'Malay': 'ms', + 'Maltese': 'mt', + 'Burmese': 'my', + 'Nepali': 'ne', + 'Dutch': 'nl', + 'Norwegian': 'no', + 'Nyanja': 'ny', + 'Punjabi': 'pa', + 'Polish': 'pl', + 'Pashto': 'ps', + 'Portuguese': 'pt', + 'Romanian': 'ro', + 'Russian': 'ru', + 'Sindhi': 'sd', + 'Sinhala': 'si', + 'Slovak': 'sk', + 'Slovenian': 'sl', + 'Samoan': 'sm', + 'Shona': 'sn', + 'Somali': 'so', + 'Albanian': 'sq', + 'Serbian': 'sr', + 'Southern Sotho': 'st', + 'Sundanese': 'su', + 'Swedish': 'sv', + 'Swahili': 'sw', + 'Tamil': 'ta', + 'Telugu': 'te', + 'Tajik': 'tg', + 'Thai': 'th', + 'Turkish': 'tr', + 'Ukrainian': 'uk', + 'Urdu': 'ur', + 'Uzbek': 'uz', + 'Vietnamese': 'vi', + 'Xhosa': 'xh', + 'Yiddish': 'yi', + 'Yoruba': 'yo', + 'Chinese': 'zh', + 'Zulu': 'zu' +} + +def get_language_from_iso(iso_language): + return dict_iso_languages.get(iso_language, None) + +def get_languages_from_iso(l_iso_languages, sort=False): + l_languages = [] + for iso_language in l_iso_languages: + language = get_language_from_iso(iso_language) + if language: + l_languages.append(language) + if sort: + l_languages = sorted(l_languages) + return l_languages + +def get_iso_from_language(language): + return dict_languages_iso.get(language, None) + +def get_iso_from_languages(l_languages, sort=False): + l_iso = [] + for language in l_languages: + iso_lang = get_iso_from_language(language) + if iso_lang: + l_iso.append(iso_lang) + if sort: + l_iso = sorted(l_iso) + return l_iso diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 15993d7a..e2b08f7d 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -2,8 +2,10 @@ # -*-coding:UTF-8 -* import os +import re import sys import redis +import cld3 import html2text from io import BytesIO @@ -101,13 +103,62 @@ def add_item_parent(item_parent, item_id): def get_item_content(item_id): return item_basic.get_item_content(item_id) -def get_item_content_html2text(item_id, item_content=None): +def get_item_content_html2text(item_id, item_content=None, ignore_links=False): if not item_content: item_content = get_item_content(item_id) h = html2text.HTML2Text() - h.ignore_links = False + h.ignore_links = ignore_links + h.ignore_images = ignore_links return h.handle(item_content) +def remove_all_urls_from_content(item_id, item_content=None): + if not item_content: + item_content = get_item_content(item_id) + regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b' + url_regex = re.compile(regex) + urls = url_regex.findall(item_content) + urls = sorted(urls, key=len, reverse=True) + for url in urls: + item_content = item_content.replace(url, '') + + regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' + regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' + regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' + re.compile(regex_pgp_public_blocs) + re.compile(regex_pgp_signature) + re.compile(regex_pgp_message) + + res = re.findall(regex_pgp_public_blocs, item_content) + for it in res: + item_content = item_content.replace(it, '') + res = re.findall(regex_pgp_signature, item_content) + for it in res: + item_content = item_content.replace(it, '') + res = re.findall(regex_pgp_message, item_content) + for it in res: + item_content = item_content.replace(it, '') + + return item_content + +def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7): + all_languages = [] + + ## CLEAN CONTENT ## + content = get_item_content_html2text(item_id, ignore_links=True) + content = remove_all_urls_from_content(item_id, item_content=content) + + # REMOVE USELESS SPACE + content = ' '.join(content.split()) + #- CLEAN CONTENT -# + + #print(content) + #print(len(content)) + if len(content) >= min_len: + for lang in cld3.get_frequent_languages(content, num_langs=num_langs): + if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable: + all_languages.append(lang) + return all_languages + # API def get_item(request_dict): if not request_dict: @@ -496,3 +547,17 @@ def delete_domain_node(item_id): domain_basic.delete_domain_item_core(item_id, domain, port) for child_id in get_all_domain_node_by_item_id(item_id): delete_item(child_id) + +# if __name__ == '__main__': +# import Domain +# domain = Domain.Domain('domain.onion') +# for domain_history in domain.get_domain_history(): +# domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag +# if "items" in domain_item: +# for item_dict in domain_item['items']: +# item_id = item_dict['id'] +# print(item_id) +# for lang in get_item_languages(item_id, min_proportion=0.2, min_probability=0.8): +# print(lang) +# print() +# print(get_item_languages(item_id, min_proportion=0.2, min_probability=0.6)) # 0.7 ? diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 233b9066..cf65a126 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -46,6 +46,9 @@ publish = Redis_Tags subscribe = Redis_Global publish = Redis_Tags +[Languages] +subscribe = Redis_Global + [Categ] subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey diff --git a/bin/update-background.py b/bin/update-background.py index d1ec6eaf..838ddf7b 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -17,6 +17,25 @@ import subprocess sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +def launch_background_upgrade(version, script_name): + if r_serv.sismember('ail:to_update', version): + r_serv.delete('ail:update_error') + r_serv.set('ail:update_in_progress', version) + r_serv.set('ail:current_background_update', version) + r_serv.set('ail:current_background_script', 'domain tags update') + + update_file = os.path.join(os.environ['AIL_HOME'], 'update', version, script_name) + process = subprocess.run(['python' ,update_file]) + + update_progress = r_serv.get('ail:current_background_script_stat') + if update_progress: + if int(update_progress) == 100: + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + r_serv.srem('ail:to_update', new_version) + if __name__ == "__main__": config_loader = ConfigLoader.ConfigLoader() @@ -114,3 +133,8 @@ if __name__ == "__main__": r_serv.delete('ail:current_background_script_stat') r_serv.delete('ail:current_background_update') r_serv.srem('ail:to_update', new_version) + + launch_background_upgrade('v2.6', 'Update_screenshots.py') + launch_background_upgrade('v2.7', 'Update_domain_tags.py') + + launch_background_upgrade('v3.4', 'Update_domain.py') diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py new file mode 100755 index 00000000..37ae0428 --- /dev/null +++ b/update/v3.4/Update.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v3.4' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + #Set current update_in_progress + r_serv.set('ail:update_in_progress', new_version) + r_serv.set('ail:current_background_update', new_version) + + r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up') + r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4')) + r_serv.set('update:nb_elem_converted',0) + + #Set current ail version + r_serv.set('ail:version', new_version) + + #Set current ail version + r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v3.4/Update.sh b/update/v3.4/Update.sh new file mode 100755 index 00000000..a1eaeb5a --- /dev/null +++ b/update/v3.4/Update.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -ldbv & +wait +echo "" + +# SUBMODULES # +git submodule update + +# echo "" +# echo -e $GREEN"installing KVORCKS ..."$DEFAULT +# cd ${AIL_HOME} +# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git +# pushd kvrocks/ +# make -j4 +# popd + +echo -e $GREEN"Installing html2text ..."$DEFAULT +pip3 install pycld3 + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v3.4/Update.py +wait +echo "" +echo "" + + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py new file mode 100755 index 00000000..092830c6 --- /dev/null +++ b/update/v3.4/Update_domain.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader +import Domain + +def update_update_stats(): + nb_updated = int(r_serv_db.get('update:nb_elem_converted')) + progress = int((nb_updated * 100) / nb_elem_to_update) + print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress)) + r_serv_db.set('ail:current_background_script_stat', progress) + +def update_domain_language(domain_obj, item_id): + domain_name = domain_obj.get_domain_name() + Domain.add_domain_languages_by_item_id(domain_name, item_id) + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') + if not nb_elem_to_update: + nb_elem_to_update = 1 + else: + nb_elem_to_update = int(nb_elem_to_update) + + #Domain._delete_all_domains_languages() + + while True: + domain = r_serv_onion.spop('domain_update_v3.4') + if domain is not None: + print(domain) + domain = Domain.Domain(domain) + for domain_history in domain.get_domain_history(): + domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag + if "items" in domain_item: + for item_dict in domain_item['items']: + update_domain_language(domain, item_dict['id']) + + r_serv_db.incr('update:nb_elem_converted') + update_update_stats() + + else: + sys.exit(0) diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index b2ef7f63..5da9b633 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -26,6 +26,7 @@ import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Domain import crawlers +import Language r_cache = Flask_config.r_cache r_serv_db = Flask_config.r_serv_db @@ -85,6 +86,9 @@ def send_to_spider(): return create_json_response(res[0], res[1]) return redirect(url_for('crawler_splash.manual')) + +#### Domains #### + # add route : /crawlers/show_domain @crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST']) @login_required @@ -111,6 +115,7 @@ def showDomain(): dict_domain = {**dict_domain, **domain.get_domain_correlation()} dict_domain['correlation_nb'] = Domain.get_domain_total_nb_correlation(dict_domain) dict_domain['father'] = domain.get_domain_father() + dict_domain['languages'] = Language.get_languages_from_iso(domain.get_domain_languages(), sort=True) dict_domain['tags'] = domain.get_domain_tags() dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags']) dict_domain['history'] = domain.get_domain_history_with_status() @@ -198,6 +203,38 @@ def domains_explorer_web(): dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to) return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular') +@crawler_splash.route('/domains/languages/all/json', methods=['GET']) +@login_required +@login_read_only +def domains_all_languages_json(): + # # TODO: get domain type + iso = request.args.get('iso') + domain_types = request.args.getlist('domain_types') + return jsonify(Language.get_languages_from_iso(Domain.get_all_domains_languages(), sort=True)) + +@crawler_splash.route('/domains/languages/search_get', methods=['GET']) +@login_required +@login_read_only +def domains_search_languages_get(): + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + domains_types = request.args.getlist('domain_types') + if domains_types: + domains_types = domains_types[0].split(',') + languages = request.args.getlist('languages') + if languages: + languages = languages[0].split(',') + l_dict_domains = Domain.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages), domains_metadata=True, page=page) + return render_template("domains/domains_filter_languages.html", template_folder='../../', + l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, + current_languages=languages, domains_types=domains_types) + +##-- --## + + ## Cookiejar ## @crawler_splash.route('/crawler/cookiejar/add', methods=['GET']) @login_required diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html index 9c0e1933..5cc7f987 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -148,6 +148,10 @@ {% include 'tags/block_obj_tags_search.html' %} {% endwith %} + {% with object_type='domain' %} + {% include 'domains/block_languages_search.html' %} + {% endwith %} +
    diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html index 16595014..629cd090 100644 --- a/var/www/templates/crawler/crawler_splash/domain_explorer.html +++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html @@ -68,7 +68,7 @@
    - {% with dict_data=dict_data, bootstrap_label=bootstrap_label %} + {% with l_dict_domains=dict_data['list_elem'], bootstrap_label=bootstrap_label %} {% include 'domains/card_img_domain.html' %} {% endwith %} diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index 2fba319f..c63dd5b5 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -67,6 +67,7 @@ First Seen Last Check Ports + Languages @@ -74,6 +75,11 @@ {%if "first_seen" in dict_domain%}{{ dict_domain['first_seen'] }}{%endif%} {%if "last_check" in dict_domain%}{{ dict_domain['last_check'] }}{%endif%} {%if dict_domain["ports"]%}{{ dict_domain["ports"] }}{%endif%} + + {% for languages in dict_domain['languages'] %} + {{languages}} + {% endfor %} + diff --git a/var/www/templates/domains/block_languages_search.html b/var/www/templates/domains/block_languages_search.html new file mode 100644 index 00000000..338e91d0 --- /dev/null +++ b/var/www/templates/domains/block_languages_search.html @@ -0,0 +1,73 @@ +
    +
    +
    + Domains by Languages : +
    +
    +
    + +
    +
    + +
    + +
    + +
    +
    + + +
    +
    + + +
    +
    + + + +
    +
    + + + + + diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index eb7e8371..3df796e8 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -1,10 +1,10 @@ -{% for dict_domain in dict_data['list_elem'] %} +{% for dict_domain in l_dict_domains %} {% if loop.index0 % 4 == 0 %}
    {% endif %} -
    +
    @@ -13,24 +13,46 @@ {{dict_domain["id"]}} + + {% if dict_domain["status"] %} + + UP + + {% else %} + + DOWN + + {% endif %} +
    + + + + + + {{dict_domain["first_seen"]}} + + + + + {{dict_domain["first_seen"]}} + + + + +

    - First seen: {{dict_domain["first_seen"]}}
    - Last_seen: {{dict_domain["first_seen"]}}
    - Ports: {{dict_domain["ports"]}} + Ports: {{dict_domain["ports"]}}
    + {% if dict_domain['languages'] %} + Languages: + {% for language in dict_domain['languages'] %} + {{ language }} + {% endfor %} + {% endif %}

    - Status: - {% if dict_domain["status"] %} - - UP - - {% else %} - - DOWN - - {% endif %} + {% endif %} diff --git a/var/www/templates/domains/domains_filter_languages.html b/var/www/templates/domains/domains_filter_languages.html new file mode 100644 index 00000000..b45236c6 --- /dev/null +++ b/var/www/templates/domains/domains_filter_languages.html @@ -0,0 +1,192 @@ + + + + Show Domain - AIL + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
    +
    + + {% include 'crawler/menu_sidebar.html' %} + +
    + +
    +
    + + {% include 'domains/block_languages_search.html' %} + + +
    +
    + +
    +
    +
    +
    + +
    +
    + +
    +
    + +
    +
    +
    +
    + +
    +
    + + + {% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} + +
    +
    + + {%if l_dict_domains['list_elem']%} + {% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %} + {% set target_url=url_for('crawler_splash.domains_search_languages_get') + "?languages=" + ','.join(current_languages)%} + {%if domains_types %} + {% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} + + +
    +
    +
    + + + + + + + + + From b6ed23b33a9653f09ebb156ee7b1709e03e09d16 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 11 Dec 2020 21:12:11 +0100 Subject: [PATCH 27/43] fix: [Languages] update --- update/v3.4/Update.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/update/v3.4/Update.sh b/update/v3.4/Update.sh index a1eaeb5a..16a9ccb7 100755 --- a/update/v3.4/Update.sh +++ b/update/v3.4/Update.sh @@ -19,9 +19,9 @@ echo -e $GREEN"Shutting down AIL ..."$DEFAULT bash ${AIL_BIN}/LAUNCH.sh -ks wait -bash ${AIL_BIN}/LAUNCH.sh -ldbv & -wait -echo "" +# bash ${AIL_BIN}/LAUNCH.sh -ldbv & +# wait +# echo "" # SUBMODULES # git submodule update From 72b6e5cf01d1e08e1a3fad5932653d3e3757cfd4 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 11 Dec 2020 21:31:14 +0100 Subject: [PATCH 28/43] fix: [Languages] import + update message --- bin/Languages.py | 2 +- var/www/modules/Flask_config.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/Languages.py b/bin/Languages.py index f4785250..bd646fd7 100755 --- a/bin/Languages.py +++ b/bin/Languages.py @@ -3,8 +3,8 @@ import os import sys - import cld3 +import time from packages import Item from lib import Domain diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 2b2937aa..a1a8de6b 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -92,6 +92,8 @@ dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_me 'v2.6':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', 'update_warning_message_notice_me': 'missing from the UI.'}, 'v2.7':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags can be', + 'update_warning_message_notice_me': 'missing from the UI.'}, + 'v3.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Languages can be', 'update_warning_message_notice_me': 'missing from the UI.'} } From 437447e6c5f472d648e56c2ee7c1c43eb6be7e7c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 11 Dec 2020 21:43:49 +0100 Subject: [PATCH 29/43] fix: [update v3.4] updater --- update/v3.4/Update.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py index 37ae0428..cc7b739b 100755 --- a/update/v3.4/Update.py +++ b/update/v3.4/Update.py @@ -30,6 +30,8 @@ if __name__ == '__main__': r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4')) r_serv.set('update:nb_elem_converted',0) + r_serv.sadd('ail:to_update', new_version) + #Set current ail version r_serv.set('ail:version', new_version) From e3800aa36a524936185334a752bc7d4497325733 Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Tue, 15 Dec 2020 10:39:01 +0100 Subject: [PATCH 30/43] fix: [install] pycld3 dependency Without this Flask fails starting --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 73d06144..2a9347c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -28,6 +28,7 @@ yara-python #Crawler scrapy scrapy-splash +pycld3 #Graph numpy From 917389670b52fb6c869b38da874cc8d8a891e2f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Sigr=C3=A9?= Date: Sun, 27 Dec 2020 19:59:19 +1100 Subject: [PATCH 31/43] Added 'wget' as a dependency 'wget' is a dependency as without this additional components won't download as part of initial install, one such example is; ``` wget -O /ail-framework/ardb/src/../deps/jemalloc-5.1.0.tar.bz2 https://github.com/jemalloc/jemalloc/releases/download/5.1.0/jemalloc-5.1.0.tar.bz2 && \ ``` --- installing_deps.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/installing_deps.sh b/installing_deps.sh index 1380a02c..c4b1ad30 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -11,6 +11,9 @@ sudo apt-get update sudo apt-get install python3-pip virtualenv python3-dev python3-tk libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -qq +#Needed for downloading jemalloc +sudo apt-get install wget -qq + #optional tor install sudo apt-get install tor -qq From fcf83b21bfb5a2c7b94c785d2fbe706d40b585c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Simon=20Sigr=C3=A9?= Date: Mon, 28 Dec 2020 09:29:53 +1100 Subject: [PATCH 32/43] Typo in placeholder "Optionnal" --> "Optional" Updated placeholder value to correct typo; "Optionnal" --> "Optional" --- var/www/modules/PasteSubmit/templates/submit_items.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/var/www/modules/PasteSubmit/templates/submit_items.html b/var/www/modules/PasteSubmit/templates/submit_items.html index 147adfcd..18b62c2a 100644 --- a/var/www/modules/PasteSubmit/templates/submit_items.html +++ b/var/www/modules/PasteSubmit/templates/submit_items.html @@ -112,7 +112,7 @@
    - +
    From dcd3a810f1690199df8e8a1fbbda474284847c16 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 6 Jan 2021 15:45:31 +0100 Subject: [PATCH 33/43] fix: [background updater] remove completed updates --- bin/update-background.py | 2 +- installing_deps.sh | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/bin/update-background.py b/bin/update-background.py index 838ddf7b..981545c1 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -34,7 +34,7 @@ def launch_background_upgrade(version, script_name): r_serv.delete('ail:current_background_script') r_serv.delete('ail:current_background_script_stat') r_serv.delete('ail:current_background_update') - r_serv.srem('ail:to_update', new_version) + r_serv.srem('ail:to_update', version) if __name__ == "__main__": diff --git a/installing_deps.sh b/installing_deps.sh index c4b1ad30..4206d48c 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -88,6 +88,13 @@ pushd ardb/ make popd +# KVROCKS # +# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git +# pushd kvrocks/ +# make -j4 +# popd + +# Config File if [ ! -f configs/core.cfg ]; then cp configs/core.cfg.sample configs/core.cfg fi From 054d15a446bee7e8acf75aea08e98c02ba789419 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 6 Jan 2021 15:56:03 +0100 Subject: [PATCH 34/43] fix: [v3.4 update] fix update progress --- update/v3.4/Update_domain.py | 1 + 1 file changed, 1 insertion(+) diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py index 092830c6..a065da0e 100755 --- a/update/v3.4/Update_domain.py +++ b/update/v3.4/Update_domain.py @@ -54,4 +54,5 @@ if __name__ == '__main__': update_update_stats() else: + r_serv_db.set('ail:current_background_script_stat', 100) sys.exit(0) From 11d537e2eb028910748990c5592ab614d4cf91d2 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 8 Jan 2021 17:37:18 +0100 Subject: [PATCH 35/43] chg: [screenshot + har directory] add option to change screenshots directory --- bin/lib/ConfigLoader.py | 9 +++++++++ bin/lib/Screenshot.py | 2 +- bin/packages/HiddenServices.py | 7 +++---- bin/packages/Item.py | 5 +++-- bin/torcrawler/TorSplashCrawler.py | 8 ++++++-- configs/core.cfg.sample | 9 ++++++++- update/v1.5/Update-ARDB_Onions_screenshots.py | 2 +- var/www/modules/Flask_config.py | 2 +- 8 files changed, 32 insertions(+), 12 deletions(-) diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py index c244b2e5..262a44bd 100755 --- a/bin/lib/ConfigLoader.py +++ b/bin/lib/ConfigLoader.py @@ -41,6 +41,15 @@ class ConfigLoader(object): db=self.cfg.getint(redis_name, "db"), decode_responses=decode_responses ) + def get_files_directory(self, key_name): + directory_path = self.cfg.get('Directories', key_name) + # full path + if directory_path[0] == '/': + return directory_path + else: + directory_path = os.path.join(os.environ['AIL_HOME'], directory_path) + return directory_path + def get_config_str(self, section, key_name): return self.cfg.get(section, key_name) diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index 46141e30..3f198f52 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -20,7 +20,7 @@ import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') config_loader = None # get screenshot relative path diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 8ed7372b..7b0c444a 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -60,15 +60,14 @@ class HiddenServices(object): self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled")) self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled") - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory_screenshot = os.path.join(self.screenshot_directory, 'screenshot') + self.screenshot_directory = config_loader.get_files_directory('screenshot') elif type == 'i2p': self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) + self.screenshot_directory = config_loader.get_files_directory('screenshot') else: ## TODO: # FIXME: add error pass - + config_loader = None #def remove_absolute_path_link(self, key, value): diff --git a/bin/packages/Item.py b/bin/packages/Item.py index e2b08f7d..36a236e0 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -34,7 +34,8 @@ PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') r_cache = config_loader.get_redis_conn("Redis_Cache") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) +screenshot_directory = config_loader.get_files_directory('screenshot') +har_directory = config_loader.get_files_directory('har') config_loader = None @@ -388,7 +389,7 @@ def get_item_screenshot(item_id): return '' def get_item_har_name(item_id): - har_path = os.path.join(screenshot_directory, item_id) + '.json' + har_path = os.path.join(har_directory, item_id) + '.json' if os.path.isfile(har_path): return har_path else: diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 13e6aaa6..17438d60 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -#import ConfigLoader +import ConfigLoader import Screenshot import crawlers @@ -133,7 +133,11 @@ class TorSplashCrawler(): config_section = 'Crawler' self.p = Process(config_section) self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str ) - self.har_dir = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str ) + + config_loader = ConfigLoader.ConfigLoader() + self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str ) + config_loader = None + self.r_serv_log_submit = redis.StrictRedis( host=self.p.config.get("Redis_Log_submit", "host"), port=self.p.config.getint("Redis_Log_submit", "port"), diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 8a7b86a7..9dd00e3b 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -4,7 +4,8 @@ dicofilters = Dicos pastes = PASTES hash = HASHS crawled = crawled -crawled_screenshot = CRAWLED_SCREENSHOT +har = CRAWLED_SCREENSHOT +screenshot = CRAWLED_SCREENSHOT/screenshot wordtrending_csv = var/www/static/csv/wordstrendingdata wordsfile = files/wordfile @@ -221,6 +222,11 @@ host = localhost port = 6382 db = 10 +[Kvrocks_Meta] +host = localhost +port = 6383 +db = 0 + [Url] cc_critical = DE @@ -278,6 +284,7 @@ default_crawler_closespider_pagecount = 50 default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 splash_url = http://127.0.0.1 splash_port = 8050-8052 +domain_proxy = onion.foundation [IP] # list of comma-separated CIDR that you wish to be alerted for. e.g: diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py index 3327878f..41f17c4a 100755 --- a/update/v1.5/Update-ARDB_Onions_screenshots.py +++ b/update/v1.5/Update-ARDB_Onions_screenshots.py @@ -33,7 +33,7 @@ if __name__ == '__main__': config_loader = ConfigLoader.ConfigLoader() SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - NEW_SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') + NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index a1a8de6b..7d11e484 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -100,7 +100,7 @@ dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_me UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') REPO_ORIGIN = 'https://github.com/ail-project/ail-framework.git' From 2fb03baf509775896f62245339be65f5b604f132 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 22 Jan 2021 17:03:43 +0100 Subject: [PATCH 36/43] fix: [background updater] Don't launch updates on fresh install --- bin/update-background.py | 119 ++++-------------- update/v1.5/Update-ARDB_Onions_screenshots.py | 3 + update/v1.5/Update.py | 7 +- update/v2.4/Update.py | 7 +- update/v2.4/Update_domain.py | 5 + update/v2.6/Update.py | 4 - update/v2.6/Update_screenshots.py | 6 - update/v2.7/Update.py | 4 - update/v2.7/Update_domain_tags.py | 6 - update/v3.4/Update.py | 5 +- update/v3.4/Update_domain.py | 2 + 11 files changed, 40 insertions(+), 128 deletions(-) diff --git a/bin/update-background.py b/bin/update-background.py index 981545c1..d4e7b359 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -17,15 +17,20 @@ import subprocess sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -def launch_background_upgrade(version, script_name): +def launch_background_upgrade(version, l_script_name): if r_serv.sismember('ail:to_update', version): r_serv.delete('ail:update_error') r_serv.set('ail:update_in_progress', version) r_serv.set('ail:current_background_update', version) - r_serv.set('ail:current_background_script', 'domain tags update') - update_file = os.path.join(os.environ['AIL_HOME'], 'update', version, script_name) - process = subprocess.run(['python' ,update_file]) + for script_name in l_script_name: + r_serv.set('ail:current_background_script', script_name) + update_file = os.path.join(os.environ['AIL_HOME'], 'update', version, script_name) + process = subprocess.run(['python' ,update_file]) + update_progress = r_serv.get('ail:current_background_script_stat') + #if update_progress: + # if int(update_progress) != 100: + # r_serv.set('ail:update_error', 'Update {} Failed'.format(version)) update_progress = r_serv.get('ail:current_background_script_stat') if update_progress: @@ -36,6 +41,13 @@ def launch_background_upgrade(version, script_name): r_serv.delete('ail:current_background_update') r_serv.srem('ail:to_update', version) +def clean_update_db(): + r_serv.delete('ail:update_error') + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + if __name__ == "__main__": config_loader = ConfigLoader.ConfigLoader() @@ -44,97 +56,12 @@ if __name__ == "__main__": r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - if r_serv.scard('ail:update_v1.5') != 5: - r_serv.delete('ail:update_error') - r_serv.set('ail:update_in_progress', 'v1.5') - r_serv.set('ail:current_background_update', 'v1.5') - if not r_serv.sismember('ail:update_v1.5', 'onions'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions.py') - process = subprocess.run(['python' ,update_file]) - - if not r_serv.sismember('ail:update_v1.5', 'metadata'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Metadata.py') - process = subprocess.run(['python' ,update_file]) - - if not r_serv.sismember('ail:update_v1.5', 'tags'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags.py') - process = subprocess.run(['python' ,update_file]) - - if not r_serv.sismember('ail:update_v1.5', 'tags_background'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags_background.py') - process = subprocess.run(['python' ,update_file]) - if not r_serv.sismember('ail:update_v1.5', 'crawled_screenshot'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions_screenshots.py') - process = subprocess.run(['python' ,update_file]) - if r_serv.scard('ail:update_v1.5') != 5: - r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script') - else: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - - if r_serv.get('ail:current_background_update') == 'v2.4': - r_serv.delete('ail:update_error') - r_serv.set('ail:update_in_progress', 'v2.4') - r_serv.set('ail:current_background_update', 'v2.4') - r_serv.set('ail:current_background_script', 'domain update') - - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v2.4', 'Update_domain.py') - process = subprocess.run(['python' ,update_file]) + if r_serv.scard('ail:to_update') == 0: + clean_update_db() - if int(r_serv_onion.scard('domain_update_v2.4')) != 0: - r_serv.set('ail:update_error', 'Update v2.4 Failed, please relaunch the bin/update-background.py script') - else: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - r_serv.delete('update:nb_elem_to_convert') - r_serv.delete('update:nb_elem_converted') - - if r_serv.sismember('ail:to_update', 'v2.6'): - new_version = 'v2.6' - r_serv.delete('ail:update_error') - r_serv.delete('ail:current_background_script_stat') - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.set('ail:current_background_script', 'screenshot update') - - update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_screenshots.py') - process = subprocess.run(['python' ,update_file]) - - update_progress = r_serv.get('ail:current_background_script_stat') - if update_progress: - if int(update_progress) == 100: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - r_serv.srem('ail:to_update', new_version) - - elif r_serv.sismember('ail:to_update', 'v2.7'): - new_version = 'v2.7' - r_serv.delete('ail:update_error') - r_serv.delete('ail:current_background_script_stat') - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.set('ail:current_background_script', 'domain tags update') - - update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_domain_tags.py') - process = subprocess.run(['python' ,update_file]) - - update_progress = r_serv.get('ail:current_background_script_stat') - if update_progress: - if int(update_progress) == 100: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - r_serv.srem('ail:to_update', new_version) - - launch_background_upgrade('v2.6', 'Update_screenshots.py') - launch_background_upgrade('v2.7', 'Update_domain_tags.py') - - launch_background_upgrade('v3.4', 'Update_domain.py') + launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py', 'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py']) + launch_background_upgrade('v2.4', ['Update_domain.py']) + launch_background_upgrade('v2.6', ['Update_screenshots.py']) + launch_background_upgrade('v2.7', ['Update_domain_tags.py']) + launch_background_upgrade('v3.4', ['Update_domain.py']) diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py index 41f17c4a..442f979c 100755 --- a/update/v1.5/Update-ARDB_Onions_screenshots.py +++ b/update/v1.5/Update-ARDB_Onions_screenshots.py @@ -111,4 +111,7 @@ if __name__ == '__main__': print() print('Done in {} s'.format(end - start_deb)) + r_serv.set('ail:current_background_script_stat', 100) r_serv.sadd('ail:update_v1.5', 'crawled_screenshot') + if r_serv.scard('ail:update_v1.5') != 5: + r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script') diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index dee56e44..be80c76d 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -43,13 +43,12 @@ if __name__ == '__main__': print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) print() + # Add background update + r_serv.sadd('ail:to_update', 'v1.5') + #Set current ail version r_serv.set('ail:version', 'v1.5') - #Set current update_in_progress - r_serv.set('ail:update_in_progress', 'v1.5') - r_serv.set('ail:current_background_update', 'v1.5') - #Set current ail version r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.4/Update.py b/update/v2.4/Update.py index 53456330..7728fab7 100755 --- a/update/v2.4/Update.py +++ b/update/v2.4/Update.py @@ -22,14 +22,13 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv_onion.sunionstore('domain_update_v2.4', 'full_onion_up', 'full_regular_up') r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v2.4')) r_serv.set('update:nb_elem_converted',0) + # Add background update + r_serv.sadd('ail:to_update', new_version) + #Set current ail version r_serv.set('ail:version', new_version) diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py index addaedb6..da4ba01d 100755 --- a/update/v2.4/Update_domain.py +++ b/update/v2.4/Update_domain.py @@ -56,6 +56,8 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None + r_serv.set('ail:current_background_script', 'domain update') + nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') if not nb_elem_to_update: nb_elem_to_update = 0 @@ -78,4 +80,7 @@ if __name__ == '__main__': update_update_stats() else: + r_serv.delete('update:nb_elem_to_convert') + r_serv.delete('update:nb_elem_converted') + r_serv_db.set('ail:current_background_script_stat', 100) sys.exit(0) diff --git a/update/v2.6/Update.py b/update/v2.6/Update.py index c8cdb5ac..4245eade 100755 --- a/update/v2.6/Update.py +++ b/update/v2.6/Update.py @@ -21,10 +21,6 @@ if __name__ == '__main__': r_serv = config_loader.get_redis_conn("ARDB_DB") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.sadd('ail:to_update', new_version) #Set current ail version diff --git a/update/v2.6/Update_screenshots.py b/update/v2.6/Update_screenshots.py index 38ba0366..9716dc67 100755 --- a/update/v2.6/Update_screenshots.py +++ b/update/v2.6/Update_screenshots.py @@ -93,9 +93,3 @@ if __name__ == '__main__': end = time.time() print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb)) - - r_serv_db.delete('ail:update_in_progress') - r_serv_db.delete('ail:current_background_script') - r_serv_db.delete('ail:current_background_script_stat') - r_serv_db.delete('ail:current_background_update') - r_serv_db.srem('ail:to_update', 'v2.6') diff --git a/update/v2.7/Update.py b/update/v2.7/Update.py index 1f4ead2c..fa6f539c 100755 --- a/update/v2.7/Update.py +++ b/update/v2.7/Update.py @@ -23,10 +23,6 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.sadd('ail:to_update', new_version) #### Update tags #### diff --git a/update/v2.7/Update_domain_tags.py b/update/v2.7/Update_domain_tags.py index 937110af..1f9f5eca 100755 --- a/update/v2.7/Update_domain_tags.py +++ b/update/v2.7/Update_domain_tags.py @@ -116,9 +116,3 @@ if __name__ == '__main__': end = time.time() print('ALL domains tags updated in {} s'.format(end - start_deb)) - - r_serv_db.delete('ail:update_in_progress') - r_serv_db.delete('ail:current_background_script') - r_serv_db.delete('ail:current_background_script_stat') - r_serv_db.delete('ail:current_background_update') - r_serv_db.srem('ail:to_update', update_version) diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py index cc7b739b..943d4b3a 100755 --- a/update/v3.4/Update.py +++ b/update/v3.4/Update.py @@ -22,14 +22,11 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up') r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4')) r_serv.set('update:nb_elem_converted',0) + # Add background update r_serv.sadd('ail:to_update', new_version) #Set current ail version diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py index a065da0e..72ba166d 100755 --- a/update/v3.4/Update_domain.py +++ b/update/v3.4/Update_domain.py @@ -31,6 +31,8 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None + r_serv.set('ail:current_background_script', 'domain languages update') + nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') if not nb_elem_to_update: nb_elem_to_update = 1 From 89e95ca030a562366306e55ebd312e820e0c2c13 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 3 Feb 2021 14:22:26 +0100 Subject: [PATCH 37/43] fix: [redis cache] remove old paste_name db --- .gitignore | 3 +++ bin/ModuleStats.py | 11 ++--------- configs/core.cfg.sample | 5 ----- var/www/modules/Flask_config.py | 1 - var/www/modules/search/Flask_search.py | 13 +------------ var/www/modules/showpaste/Flask_showpaste.py | 2 -- var/www/templates/domains/card_img_domain.html | 6 +++++- 7 files changed, 11 insertions(+), 30 deletions(-) diff --git a/.gitignore b/.gitignore index ac4837a3..046d0cbc 100644 --- a/.gitignore +++ b/.gitignore @@ -44,6 +44,9 @@ configs/update.cfg update/current_version files +# Trackers +bin/trackers/yara/custom-rules/* + # Helper bin/helper/gen_cert/rootCA.* bin/helper/gen_cert/server.* diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index cfdb82f7..7cf67299 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -57,7 +57,7 @@ def compute_most_posted(server, message): print(redis_progression_name_set) -def compute_provider_info(server_trend, server_pasteName, path): +def compute_provider_info(server_trend, path): redis_all_provider = 'all_provider_set' paste = Paste.Paste(path) @@ -71,7 +71,6 @@ def compute_provider_info(server_trend, server_pasteName, path): redis_providers_name_set = 'providers_set_' + paste_date # Add/Update in Redis - server_pasteName.sadd(paste_baseName, path) server_trend.sadd(redis_all_provider, paste_provider) num_paste = int(server_trend.hincrby(paste_provider+'_num', paste_date, 1)) @@ -137,12 +136,6 @@ if __name__ == '__main__': db=p.config.get("ARDB_Trending", "db"), decode_responses=True) - r_serv_pasteName = redis.StrictRedis( - host=p.config.get("Redis_Paste_Name", "host"), - port=p.config.get("Redis_Paste_Name", "port"), - db=p.config.get("Redis_Paste_Name", "db"), - decode_responses=True) - # Endless loop getting messages from the input queue while True: # Get one message from the input queue @@ -159,4 +152,4 @@ if __name__ == '__main__': if len(message.split(';')) > 1: compute_most_posted(r_serv_trend, message) else: - compute_provider_info(r_serv_trend, r_serv_pasteName, message) + compute_provider_info(r_serv_trend, message) diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 9dd00e3b..c6988bbb 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -147,11 +147,6 @@ host = localhost port = 6379 db = 1 -[Redis_Paste_Name] -host = localhost -port = 6379 -db = 2 - [Redis_Mixer_Cache] host = localhost port = 6381 diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 7d11e484..574c9950 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -26,7 +26,6 @@ r_serv_charts = config_loader.get_redis_conn("ARDB_Trending") r_serv_sentiment = config_loader.get_redis_conn("ARDB_Sentiment") r_serv_term = config_loader.get_redis_conn("ARDB_Tracker") r_serv_cred = config_loader.get_redis_conn("ARDB_TermCred") -r_serv_pasteName = config_loader.get_redis_conn("Redis_Paste_Name") r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_db = config_loader.get_redis_conn("ARDB_DB") diff --git a/var/www/modules/search/Flask_search.py b/var/www/modules/search/Flask_search.py index ff5395e6..cbc1f633 100644 --- a/var/www/modules/search/Flask_search.py +++ b/var/www/modules/search/Flask_search.py @@ -27,7 +27,6 @@ import Flask_config app = Flask_config.app config_loader = Flask_config.config_loader baseUrl = Flask_config.baseUrl -r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_metadata = Flask_config.r_serv_metadata max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal @@ -116,17 +115,7 @@ def search(): selected_index = os.path.join(baseindexpath, index_name) ''' temporary disabled - # Search filename - for path in r_serv_pasteName.smembers(q[0]): - r.append(path) - paste = Paste.Paste(path) - content = paste.get_p_content() - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) + # # TODO: search by filename/item id ''' # Search full line diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 7fc209ba..8bdf0984 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -32,11 +32,9 @@ import Flask_config app = Flask_config.app baseUrl = Flask_config.baseUrl -r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_metadata = Flask_config.r_serv_metadata r_serv_tags = Flask_config.r_serv_tags r_serv_statistics = Flask_config.r_serv_statistics -r_serv_onion = Flask_config.r_serv_onion max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal DiffMaxLineLength = Flask_config.DiffMaxLineLength diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index 3df796e8..fbb67bc8 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -11,7 +11,11 @@
    - {{dict_domain["id"]}} + {% if 'hl-start' in dict_domain %} + {{dict_domain["id"][:dict_domain['hl-start']]}}{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}{{dict_domain["id"][dict_domain['hl-end']:]}} + {% else %} + {{dict_domain["id"]}} + {% endif %} {% if dict_domain["status"] %} From 3482a85410f8685a2901e748e95973b88012eee6 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 3 Feb 2021 14:58:27 +0100 Subject: [PATCH 38/43] fix: [redis cache] remove old Redis_Data_Merging db --- bin/Attributes.py | 50 ------------------------------ bin/CreditCards.py | 9 +----- bin/Onion.py | 3 -- bin/Web.py | 10 +----- bin/packages/Paste.py | 66 +++++++++++++--------------------------- bin/packages/modules.cfg | 11 +++---- configs/core.cfg.sample | 5 --- 7 files changed, 28 insertions(+), 126 deletions(-) delete mode 100755 bin/Attributes.py diff --git a/bin/Attributes.py b/bin/Attributes.py deleted file mode 100755 index 74357065..00000000 --- a/bin/Attributes.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The ZMQ_Sub_Attribute Module -============================ - -This module is saving Attribute of the paste into redis - -""" -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Attributes' - - p = Process(config_section) - - # FUNCTIONS # - publisher.info("Attribute is Running") - - while True: - try: - message = p.get_from_set() - - if message is not None: - PST = Paste.Paste(message) - else: - publisher.debug("Script Attribute is idling 1s") - print('sleeping') - time.sleep(1) - continue - - # FIXME do it directly in the class - PST.save_attribute_redis("p_encoding", PST._get_p_encoding()) - #PST.save_attribute_redis("p_language", PST._get_p_language()) - # FIXME why not all saving everything there. - PST.save_all_attributes_redis() - # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_rel_path) - except IOError: - print("CRC Checksum Failed on :", PST.p_rel_path) - publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( - PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 0c6bdf3f..456e474a 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -31,14 +31,10 @@ if __name__ == "__main__": p = Process(config_section) # FUNCTIONS # - publisher.info("Creditcard script subscribed to channel creditcard_categ") - + publisher.info("CreditCards script started") creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" - # FIXME For retro compatibility - channel = 'creditcard_categ' - # Source: http://www.richardsramblings.com/regex/credit-card-numbers/ cards = [ r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators @@ -69,9 +65,6 @@ if __name__ == "__main__": print(clean_card, 'is valid') creditcard_set.add(clean_card) - paste.__setattr__(channel, creditcard_set) - paste.save_attribute_redis(channel, creditcard_set) - pprint.pprint(creditcard_set) to_print = 'CreditCard;{};{};{};'.format( paste.p_source, paste.p_date, paste.p_name) diff --git a/bin/Onion.py b/bin/Onion.py index ad1fe5bb..2b6be55e 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -193,9 +193,6 @@ if __name__ == "__main__": r_onion.sadd('i2p_crawler_queue', msg) ''' - # Saving the list of extracted onion domains. - PST.__setattr__(channel, domains_list) - PST.save_attribute_redis(channel, domains_list) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) diff --git a/bin/Web.py b/bin/Web.py index ca4366e8..68e37c25 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -58,10 +58,7 @@ if __name__ == "__main__": cc_critical = p.config.get("Url", "cc_critical") # FUNCTIONS # - publisher.info("Script URL subscribed to channel web_categ") - - # FIXME For retro compatibility - channel = 'web_categ' + publisher.info("Script URL Started") message = p.get_from_set() prec_filename = None @@ -150,11 +147,6 @@ if __name__ == "__main__": domains_list) if A_values[0] >= 1: - PST.__setattr__(channel, A_values) - PST.save_attribute_redis(channel, (A_values[0], - list(A_values[1]))) - - pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index f6695eba..65c3ca46 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -62,7 +62,6 @@ class Paste(object): config_loader = ConfigLoader.ConfigLoader() self.cache = config_loader.get_redis_conn("Redis_Queues") - self.store = config_loader.get_redis_conn("Redis_Data_Merging") self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata") self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) @@ -327,50 +326,27 @@ class Paste(object): def get_p_date_path(self): return self.p_date_path - def save_all_attributes_redis(self, key=None): - """ - Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) - - :param r_serv: -- Connexion to the Database. - :param key: -- Key of an additionnal set. - - Example: - import redis - - r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) - - PST = Paste("/home/Zkopkmlk.gz") - PST.save_all_attributes_redis(r_serv) - - """ - # LevelDB Compatibility - p = self.store.pipeline(False) - p.hset(self.p_path, "p_name", self.p_name) - p.hset(self.p_path, "p_size", self.p_size) - p.hset(self.p_path, "p_mime", self.p_mime) - # p.hset(self.p_path, "p_encoding", self.p_encoding) - p.hset(self.p_path, "p_date", self._get_p_date()) - p.hset(self.p_path, "p_hash_kind", self._get_p_hash_kind()) - p.hset(self.p_path, "p_hash", self.p_hash) - # p.hset(self.p_path, "p_langage", self.p_langage) - # p.hset(self.p_path, "p_nb_lines", self.p_nb_lines) - # p.hset(self.p_path, "p_max_length_line", self.p_max_length_line) - # p.hset(self.p_path, "p_categories", self.p_categories) - p.hset(self.p_path, "p_source", self.p_source) - if key is not None: - p.sadd(key, self.p_path) - else: - pass - p.execute() - - def save_attribute_redis(self, attr_name, value): - """ - Save an attribute as a field - """ - if type(value) == set: - self.store.hset(self.p_path, attr_name, json.dumps(list(value))) - else: - self.store.hset(self.p_path, attr_name, json.dumps(value)) + # def save_all_attributes_redis(self, key=None): + # """ + # Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) + # + # :param r_serv: -- Connexion to the Database. + # :param key: -- Key of an additionnal set. + # + # Example: + # import redis + # + # r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) + # + # PST = Paste("/home/Zkopkmlk.gz") + # PST.save_all_attributes_redis(r_serv) + # + # """ + # + # def save_attribute_redis(self, attr_name, value): + # """ + # Save an attribute as a field + # """ def save_attribute_duplicate(self, value): """ diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index cf65a126..ed3c466e 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -20,9 +20,6 @@ subscribe = Redis_Duplicate [Indexer] subscribe = Redis_Global -[Attributes] -subscribe = Redis_Global - [DomClassifier] subscribe = Redis_Global @@ -67,15 +64,17 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags [Onion] subscribe = Redis_Onion -publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler -#publish = Redis_Global,Redis_ValidOnion,ZMQ_FetchedOnion +publish = Redis_ValidOnion,Redis_Tags,Redis_Crawler +#publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler +# TODO remove me [DumpValidOnion] subscribe = Redis_ValidOnion [Web] subscribe = Redis_Web -publish = Redis_Url,ZMQ_Url +publish = Redis_Url +#publish = Redis_Url,ZMQ_Url [WebStats] subscribe = Redis_Url diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index c6988bbb..669550b9 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -142,11 +142,6 @@ host = localhost port = 6381 db = 0 -[Redis_Data_Merging] -host = localhost -port = 6379 -db = 1 - [Redis_Mixer_Cache] host = localhost port = 6381 From bbee3691b3ec105d8ae02bdc9629b9497b408bd2 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 4 Feb 2021 17:24:14 +0100 Subject: [PATCH 39/43] fix: [showDomain] empty father field --- .../crawler/crawler_splash/showDomain.html | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index c63dd5b5..e2ae19fb 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -106,26 +106,28 @@ Last Origin: - {% if dict_domain['father']=='manual' or dict_domain['father']=='auto' %} + {% if dict_domain %} + {% if dict_domain['father']=='manual' or dict_domain['father']=='auto' %} + + + {{ dict_domain['father'] }} + + {%else%} - - {{ dict_domain['father'] }} + + + {{ dict_domain['father']['item_father'] }} + - {%else%} - - - - {{ dict_domain['father']['item_father'] }} - - - {% if dict_domain['father']['domain_father'] %} - - - {{ dict_domain['father']['domain_father'] }} - + {% if dict_domain['father']['domain_father'] %} + + + {{ dict_domain['father']['domain_father'] }} + + {%endif%} +

    + {%endif%} {%endif%} -

    - {%endif%} From a1fe49192ba3867ee7e3593afa8078f8094a6bef Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 4 Feb 2021 17:28:56 +0100 Subject: [PATCH 40/43] fix: [showDomain] empty father field --- var/www/templates/crawler/crawler_splash/showDomain.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index e2ae19fb..dc6ff373 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -106,7 +106,7 @@ Last Origin: - {% if dict_domain %} + {% if 'father' in dict_domain %} {% if dict_domain['father']=='manual' or dict_domain['father']=='auto' %} From d941d8abb40ff4bb804b40ad564b973ed0bc2d01 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 5 Feb 2021 17:42:33 +0100 Subject: [PATCH 41/43] chg: [domains search] search domains by name --- bin/lib/Domain.py | 52 ++++- bin/lib/crawlers.py | 14 ++ var/www/blueprints/crawler_splash.py | 19 ++ .../templates/Crawler_dashboard.html | 18 +- var/www/static/css/ail-project.css | 7 + .../domains/block_domains_name_search.html | 50 +++++ .../templates/domains/card_img_domain.html | 2 +- .../domains/domains_result_list.html | 195 ++++++++++++++++++ 8 files changed, 338 insertions(+), 19 deletions(-) create mode 100644 var/www/static/css/ail-project.css create mode 100644 var/www/templates/domains/block_domains_name_search.html create mode 100644 var/www/templates/domains/domains_result_list.html diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index 5eca1943..60d78967 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -10,9 +10,10 @@ The ``Domain`` import os import sys import itertools -import time +import re import redis import random +import time sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Cryptocurrency @@ -241,6 +242,52 @@ def create_domains_metadata_list(list_domains, domain_type): ports=True, tags=True, languages=True, screenshot=True, tags_safe=True)) return l_domains +def sanithyse_domain_name_to_search(name_to_search, domain_type): + if domain_type == 'onion': + r_name = r'[a-z0-9\.]+' + else: + r_name = r'[a-zA-Z0-9\.-_]+' + # invalid domain name + if not re.fullmatch(r_name, name_to_search): + return None + return name_to_search.replace('.', '\.') + + +def search_domains_by_name(name_to_search, domain_types, r_pos=False): + domains_dict = {} + for domain_type in domain_types: + r_name = sanithyse_domain_name_to_search(name_to_search, domain_type) + if not name_to_search: + break + r_name = re.compile(r_name) + for domain in get_all_domains_up(domain_type): + res = re.search(r_name, domain) + if res: + domains_dict[domain] = {} + if r_pos: + domains_dict[domain]['hl-start'] = res.start() + domains_dict[domain]['hl-end'] = res.end() + return domains_dict + +def api_search_domains_by_name(name_to_search, domains_types, domains_metadata=False, page=1): + domains_types = sanitize_domain_types(domains_types) + domains_dict = search_domains_by_name(name_to_search, domains_types, r_pos=True) + l_domains = sorted(domains_dict.keys()) + l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) + if not domains_metadata: + return l_domains + else: + l_dict_domains = [] + for domain in l_domains['list_elem']: + dict_domain = get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, + status=True, ports=True, tags=True, tags_safe=True, + languages=True, screenshot=True) + dict_domain = {**domains_dict[domain], **dict_domain} + l_dict_domains.append(dict_domain) + l_domains['list_elem'] = l_dict_domains + l_domains['search'] = name_to_search + return l_domains + ######## LANGUAGES ######## def get_all_domains_languages(): @@ -940,3 +987,6 @@ class Domain(object): ''' port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port) return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag) + +if __name__ == '__main__': + search_domains_by_name('c', 'onion') diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 8a6817f5..ed60fb62 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -47,6 +47,20 @@ faup = Faup() def generate_uuid(): return str(uuid.uuid4()).replace('-', '') +def is_valid_onion_domain(domain): + if not domain.endswith('.onion'): + return False + domain = domain.replace('.onion', '', 1) + if len(domain) == 16: # v2 address + r_onion = r'[a-z0-9]{16}' + if re.match(r_onion, domain): + return True + elif len(domain) == 56: # v3 address + r_onion = r'[a-z0-9]{56}' + if re.fullmatch(r_onion, domain): + return True + return False + ################################################################################ # # TODO: handle prefix cookies diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 5da9b633..f80b3967 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -232,6 +232,25 @@ def domains_search_languages_get(): l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, current_languages=languages, domains_types=domains_types) +@crawler_splash.route('/domains/name/search', methods=['GET']) +@login_required +@login_analyst +def domains_search_name(): + name = request.args.get('name') + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + domains_types = request.args.getlist('domain_types') + if domains_types: + domains_types = domains_types[0].split(',') + + l_dict_domains = Domain.api_search_domains_by_name(name, domains_types, domains_metadata=True, page=page) + return render_template("domains/domains_result_list.html", template_folder='../../', + l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, + domains_types=domains_types) + ##-- --## diff --git a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html index 5cc7f987..86c82476 100644 --- a/var/www/modules/hiddenServices/templates/Crawler_dashboard.html +++ b/var/www/modules/hiddenServices/templates/Crawler_dashboard.html @@ -105,23 +105,7 @@

    -
    -
    -
    -
    Show Domain:
    -
    -
    - -
    - -
    -
    -
    -
    -
    -
    + {% include 'domains/block_domains_name_search.html' %}
    diff --git a/var/www/static/css/ail-project.css b/var/www/static/css/ail-project.css new file mode 100644 index 00000000..ba4d461b --- /dev/null +++ b/var/www/static/css/ail-project.css @@ -0,0 +1,7 @@ +.hg-text{ + padding-top: 0.2em; + padding-bottom: 0.2em; + padding-right: 0.15em; + padding-left: 0.15em; + background-color: #2e5; +} diff --git a/var/www/templates/domains/block_domains_name_search.html b/var/www/templates/domains/block_domains_name_search.html new file mode 100644 index 00000000..ca5e7cbe --- /dev/null +++ b/var/www/templates/domains/block_domains_name_search.html @@ -0,0 +1,50 @@ +
    +
    +
    +
    Search Domain by name:
    +
    + +
    + +
    +
    + +
    +
    + + +
    +
    + + +
    +
    + +
    +
    +
    + + diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index fbb67bc8..fb5480f1 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -12,7 +12,7 @@
    {% if 'hl-start' in dict_domain %} - {{dict_domain["id"][:dict_domain['hl-start']]}}{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}{{dict_domain["id"][dict_domain['hl-end']:]}} + {{dict_domain["id"][:dict_domain['hl-start']]}}{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}{{dict_domain["id"][dict_domain['hl-end']:]}} {% else %} {{dict_domain["id"]}} {% endif %} diff --git a/var/www/templates/domains/domains_result_list.html b/var/www/templates/domains/domains_result_list.html new file mode 100644 index 00000000..168cf530 --- /dev/null +++ b/var/www/templates/domains/domains_result_list.html @@ -0,0 +1,195 @@ + + + + Domain Search - AIL + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
    +
    + + {% include 'crawler/menu_sidebar.html' %} + +
    + +
    +
    + + {% with page=l_dict_domains['page'], search=l_dict_domains['search'] %} + {% include 'domains/block_domains_name_search.html' %} + {% endwith %} + + +
    +
    + +
    +
    +
    +
    + +
    +
    + +
    +
    + +
    +
    +
    +
    + +
    +
    + + + {% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} + +
    +
    + + {%if l_dict_domains['list_elem']%} + {% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %} + {% set target_url=url_for('crawler_splash.domains_search_name') + "?name=" + l_dict_domains['search']%} + {%if domains_types %} + {% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} + + +
    +
    +
    + + + + + + + + + From 056b6006f64f29fe437ae9081e06f32a6c87fd24 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 10 Feb 2021 15:27:31 +0100 Subject: [PATCH 42/43] fix: [gitignore] --- .gitignore | 3 +++ bin/lib/item_basic.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/.gitignore b/.gitignore index 046d0cbc..f9d2344e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ DATA_ARDB indexdir/ logs/ old/ +pgpdump/ +temp/ + DEFAULT_PASSWORD diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index 9ad6161d..010e30b6 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -166,3 +166,38 @@ def add_map_obj_id_item_id(obj_id, item_id, obj_type): # delete twitter id ##-- --## + +## COMMON ## +def _get_dir_source_name(directory, source_name=None, l_sources_name=set()): + if source_name: + l_dir = os.listdir(os.path.join(directory, source_name)) + else: + l_dir = os.listdir(directory) + # empty directory + if not l_dir: + return l_sources_name.add(source_name) + return l_sources_name + else: + for src_name in l_dir: + if len(src_name) == 4: + try: + int(src_name) + l_sources_name.add(os.path.join(source_name)) + return l_sources_name + except: + pass + if source_name: + src_name = os.path.join(source_name, src_name) + l_sources_name = _get_dir_source_name(directory, source_name=src_name, l_sources_name=l_sources_name) + return l_sources_name + + +def get_all_items_sources(): + res = _get_dir_source_name(PASTES_FOLDER) + print(res) + +##-- --## + + +if __name__ == '__main__': + get_all_items_sources() From e28326a6cf31fa061c3848c22e299c4c13cae648 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 10 Feb 2021 15:28:56 +0100 Subject: [PATCH 43/43] fix: [OVERVIEW] --- OVERVIEW.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OVERVIEW.md b/OVERVIEW.md index 3ff870b4..316942cb 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -16,7 +16,7 @@ Redis and ARDB overview DB 1 - Curve DB 2 - TermFreq - DB 3 - Trending + DB 3 - Trending/Trackers DB 4 - Sentiments DB 5 - TermCred DB 6 - Tags