From 9c1bfb7073a65a19dd905de40acc2518bdacf0d4 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 19 Aug 2022 16:53:31 +0200 Subject: [PATCH] DB migration --- bin/DB_KVROCKS_MIGRATION.py | 405 +++++++++- bin/LAUNCH.sh | 20 +- bin/core/ail_2_ail.py | 4 +- .../ail_json_importer/Ail_feeder_jabber.py | 20 +- .../ail_json_importer/Ail_feeder_telegram.py | 22 +- .../ail_json_importer/Ail_feeder_twitter.py | 23 +- .../Ail_feeder_urlextract.py | 1 - bin/lib/ConfigLoader.py | 6 + bin/lib/Duplicate.py | 2 +- bin/lib/Investigations.py | 2 +- bin/lib/Screenshot.py | 2 +- bin/lib/Tracker.py | 4 +- bin/lib/Users.py | 2 +- bin/lib/correlations_engine.py | 723 ++++++++++++++++++ bin/lib/crawlers.py | 111 ++- bin/lib/d4.py | 2 +- bin/lib/item_basic.py | 1 + bin/lib/objects/CryptoCurrencies.py | 54 +- bin/lib/objects/Decodeds.py | 337 +++++++- bin/lib/objects/Domains.py | 138 +++- bin/lib/objects/Items.py | 269 +++---- bin/lib/objects/Pgps.py | 19 +- bin/lib/objects/Screenshots.py | 30 + bin/lib/objects/Usernames.py | 18 +- bin/lib/objects/abstract_object.py | 37 +- bin/lib/objects/abstract_subtype_object.py | 73 +- bin/modules/Credential.py | 55 +- bin/modules/Decoder.py | 82 +- bin/modules/Global.py | 2 +- bin/modules/Telegram.py | 2 + bin/packages/Date.py | 6 + bin/packages/Tag.py | 3 +- bin/packages/Term.py | 2 +- var/www/Flask_server.py | 4 + var/www/blueprints/correlation.py | 31 +- var/www/blueprints/objects_decoded.py | 98 +++ .../modules/hashDecoded/Flask_hashDecoded.py | 467 +---------- .../hashDecoded/templates/hashDecoded.html | 10 +- var/www/modules/restApi/Flask_restApi.py | 31 +- .../correlation/metadata_card_decoded.html | 10 +- 40 files changed, 2234 insertions(+), 894 deletions(-) create mode 100755 bin/lib/correlations_engine.py create mode 100644 var/www/blueprints/objects_decoded.py diff --git a/bin/DB_KVROCKS_MIGRATION.py b/bin/DB_KVROCKS_MIGRATION.py index 3bdd125c..1b1cccdf 100755 --- a/bin/DB_KVROCKS_MIGRATION.py +++ b/bin/DB_KVROCKS_MIGRATION.py @@ -6,6 +6,7 @@ """ import os import sys +import time import importlib.util @@ -15,13 +16,23 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from lib.ConfigLoader import ConfigLoader from lib import Users +from lib.objects import Decodeds +from lib.objects import Domains +from lib.objects import Items +from lib.objects.CryptoCurrencies import CryptoCurrency +from lib.objects.Pgps import Pgp +from lib.objects.Screenshots import Screenshot, get_all_screenshots +from lib.objects.Usernames import Username # # # # CONFIGS # # # # config_loader = ConfigLoader() -r_kvrocks = config_loader.get_redis_conn("Kvrocks_DB") +r_kvrocks = config_loader.get_db_conn("Kvrocks_DB") r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_tracker = config_loader.get_redis_conn("ARDB_Tracker") +r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") +r_crawler = config_loader.get_redis_conn("ARDB_Onion") +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None # # - - CONFIGS - - # # @@ -32,6 +43,13 @@ spec.loader.exec_module(old_ail_2_ail) old_ail_2_ail.r_serv_sync = r_serv_db +from packages import Tag +spec = importlib.util.find_spec('Tag') +old_Tag = importlib.util.module_from_spec(spec) +spec.loader.exec_module(old_Tag) + +old_Tag.r_serv_tags = r_serv_tags + from lib import Tracker spec = importlib.util.find_spec('Tracker') old_Tracker = importlib.util.module_from_spec(spec) @@ -46,6 +64,12 @@ spec.loader.exec_module(old_Investigations) old_Investigations.r_tracking = r_serv_tracker +from lib import crawlers +spec = importlib.util.find_spec('crawlers') +old_crawlers = importlib.util.module_from_spec(spec) +spec.loader.exec_module(old_crawlers) + +old_crawlers.r_serv_onion = r_crawler # # TODO: desable features - credentials - stats ? - sentiment analysis @@ -53,6 +77,13 @@ old_Investigations.r_tracking = r_serv_tracker # /!\ ISSUE WITH FILE DUPLICATES => NEED TO BE REFACTORED + +def get_item_date(item_id): + dirs = item_id.split('/') + return f'{dirs[-4]}{dirs[-3]}{dirs[-2]}' + +################################################################ + def core_migration(): print('CORE MIGRATION...') @@ -81,6 +112,12 @@ def core_migration(): r_kvrocks.hset('d4:passivedns', 'enabled', bool(d4_enabled)) r_kvrocks.hset('d4:passivedns', 'update_time', d4_update_time) + # Crawler Manager + manager_url = old_crawlers.get_splash_manager_url() + manager_api_key = old_crawlers.get_splash_api_key() + crawlers.save_splash_manager_url_api(manager_url, manager_api_key) + crawlers.reload_splash_and_proxies_list() + # ail:misp # ail:thehive # hive:auto-alerts @@ -91,9 +128,6 @@ def core_migration(): # # TODO: TO CHECK - # config:all_global_section + - # config:global:crawler + - # mess_not_saved_export # # # # # # # # # # # # # # # # @@ -215,26 +249,361 @@ def item_submit_migration(): pass # /!\ KEY COLISION -# # TODO: change db +# # TODO: change db -> olds modules + blueprints +# # TODO: HANDLE LOCAL TAGS +# # TODO: HANDLE LOCAL TAGS +# # TODO: HANDLE LOCAL TAGS +# # TODO: HANDLE LOCAL TAGS +# # TODO: HANDLE LOCAL TAGS def tags_migration(): - + + # HANDLE LOCAL TAGS + + print(old_Tag.get_all_tags()) + + + # + # /!\ OBJECTS TAGS ISSUE /!\ + # -> only one layer + # + # issue with subtypes + between objects with same ID + # + # + # + # + pass + +# # TODO: MIGRATE item_basic.add_map_obj_id_item_id ?????????????????????? +############################### +# # +# ITEMS MIGRATION # +# # +############################### + +def get_item_father(item_id): + return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father') + def items_migration(): - pass + print('ITEMS MIGRATION...') + # MIGRATE IMPORTED URLEXTRACT Father + for item_id in Items.get_items_by_source('urlextract'): + father_id = get_item_father(item_id) + if father_id: + item = Items.Item(item_id) + item.set_father(father_id) + + + +# TODO: migrate cookies +# TODO: migrate auto crawlers + +############################### +# # +# CRAWLERS MIGRATION # +# # +############################### + +# Retun last crawled domains by type +# domain;epoch +def get_last_crawled_domains(domain_type): + return r_crawler.lrange(f'last_{domain_type}', 0 ,-1) def crawler_migration(): + print('CRAWLER MIGRATION...') - pass + # for domain_type in ['onion', 'regular']: + # for row in get_last_crawled_domains(domain_type): + # dom_row, epoch = row.rsplit(';', 1) + # domain, port = dom_row.rsplit(':', 1) + # print(domain, port, epoch) + # #crawlers.add_last_crawled_domain(domain_type, domain, port, epoch) + + for cookiejar_uuid in old_crawlers.get_all_cookiejar(): + meta = old_crawlers.get_cookiejar_metadata(cookiejar_uuid, level=True) + #print(meta) + #crawlers.create_cookiejar(meta['user_id'], level=meta['level'], description=meta['description'], cookiejar_uuid=cookiejar_uuid) + #_set_cookiejar_date(meta['date']) + + for meta_cookie, cookie_uuid in old_crawlers.get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True): + print(cookie_uuid) + #crawlers.add_cookie_to_cookiejar(cookiejar_uuid, meta_cookie, cookie_uuid=cookie_uuid) + + # TODO: auto crawler -> to Fix / change + + + # TODO: crawlers queues + +############################### +# # +# DOMAINS MIGRATION # +# # +############################### + +# # TODO: DOMAIN DOWN -> start onion_down:20190101 + +# Start -> 2019-01-01 + +# BY TYPE - FIRST DATE DOWN / UP + +def get_item_link(item_id): + return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link') + +def get_item_father(item_id): + return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father') + +def get_item_children(item_id): + return r_serv_metadata.smembers(f'paste_children:{item_id}') + +def get_domains_up_by_type(domain_type): + return r_crawler.smembers(f'full_{domain_type}_up') + +def get_domain_first_seen(domain_type, domain): + return r_crawler.hget(f'{domain_type}_metadata:{domain}', 'first_seen') + +def get_domain_last_check(domain_type, domain): + return r_crawler.hget(f'{domain_type}_metadata:{domain}', 'last_check') + +def get_domain_last_origin(domain_type, domain): + return r_crawler.hget(f'{domain_type}_metadata:{domain}', 'paste_parent') + +def get_domain_ports(domain_type, domain): + l_ports = r_crawler.hget(f'{domain_type}_metadata:{domain}', 'ports') + if l_ports: + return l_ports.split(";") + return [] + +def get_domain_languages(dom): + return r_crawler.smembers(f'domain:language:{dom}') + +def is_crawled_item(domain, item_id): + domain_lenght = len(domain) + if len(item_id) > (domain_lenght+48): + if item_id[-36-domain_lenght:-36] == domain: + return True + return False + +def get_crawled_items(domain, root_id): + crawled_items = get_crawled_items_children(domain, root_id) + crawled_items.append(root_id) + return crawled_items + +def get_crawled_items_children(domain, root_id): + crawled_items = [] + for item_id in get_item_children(root_id): + if is_crawled_item(domain, item_id): + crawled_items.append(item_id) + crawled_items.extend(get_crawled_items_children(domain, item_id)) + return crawled_items + +def get_domain_history_by_port(domain_type, domain, port): + history_tuple = r_crawler.zrange(f'crawler_history_{domain_type}:{domain}:{port}', 0, -1, withscores=True) + history = [] + for root_id, epoch in history_tuple: + dict_history = {} + epoch = int(epoch) # force int + dict_history["epoch"] = epoch + try: + int(root_id) + dict_history['status'] = False + except ValueError: + dict_history['status'] = True + dict_history['root'] = root_id + history.append(dict_history) + return history def domain_migration(): - pass + print('Domains MIGRATION...') -# # TODO: refractor keys -def correlations_migration(): - pass + for domain_type in ['onion', 'regular']: + for dom in get_domains_up_by_type(domain_type): + + ports = get_domain_ports(domain_type, dom) + first_seen = get_domain_first_seen(domain_type, dom) + last_check = get_domain_last_check(domain_type, dom) + last_origin = get_domain_last_origin(domain_type, dom) + languages = get_domain_languages(dom) + + domain = Domains.Domain(dom) + # domain.update_daterange(first_seen) + # domain.update_daterange(last_check) + # domain._set_ports(ports) + # if last_origin: + # domain.set_last_origin(last_origin) + for language in languages: + print(language) + # domain.add_language(language) + #print('------------------') + #print('------------------') + #print('------------------') + #print('------------------') + #print('------------------') + print(dom) + #print(first_seen) + #print(last_check) + #print(ports) + + # # TODO: FIXME filter invalid hostname + + + # CREATE DOMAIN HISTORY + for port in ports: + for history in get_domain_history_by_port(domain_type, dom, port): + epoch = history['epoch'] + # DOMAIN DOWN + if not history.get('status'): # domain DOWN + # domain.add_history(epoch, port) + print(f'DOWN {epoch}') + # DOMAIN UP + else: + root_id = history.get('root') + if root_id: + # domain.add_history(epoch, port, root_item=root_id) + #print(f'UP {root_id}') + crawled_items = get_crawled_items(dom, root_id) + for item_id in crawled_items: + url = get_item_link(item_id) + item_father = get_item_father(item_id) + if item_father and url: + #print(f'{url} {item_id}') + pass + # domain.add_crawled_item(url, port, item_id, item_father) + + + #print() + + + +############################### +# # +# DECODEDS MIGRATION # +# # +############################### +def get_estimated_type(decoded_id): + return r_serv_metadata.hget(f'metadata_hash:{decoded_id}', 'estimated_type') + +def get_decoded_items_list_by_decoder(decoder_type, decoded_id): ################### + #return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1) + return r_serv_metadata.zrange(f'{decoder_type}_hash:{decoded_id}', 0, -1) + + + +def decodeds_migration(): + print('Decoded MIGRATION...') + decoder_names = ['base64', 'binary', 'hexadecimal'] + + Decodeds._delete_old_json_descriptor() + for decoded_id in Decodeds.get_all_decodeds(): + mimetype = get_estimated_type(decoded_id) + # ignore invalid object + if mimetype is None: + continue + print() + print(decoded_id) + + decoded = Decodeds.Decoded(decoded_id) + filepath = decoded.get_filepath(mimetype=mimetype) + decoded._save_meta(filepath, mimetype) + + for decoder_type in decoder_names: + for item_id in get_decoded_items_list_by_decoder(decoder_type, decoded_id): + print(item_id, decoder_type) + date = get_item_date(item_id) + #for decoder_type in : + + decoded.add(decoder_type, date, item_id, mimetype) + +############################### +# # +# SCREENSHOTS MIGRATION # +# # +############################### + +# old correlation +def get_screenshot_items_list(screenshot_id): ######################### # TODO: DELETE SOLO SCREENSHOTS + print(f'screenshot:{screenshot_id}') + return r_crawler.smembers(f'screenshot:{screenshot_id}') +# old correlation +def get_screenshot_domain(screenshot_id): + return r_crawler.smembers(f'screenshot_domain:{screenshot_id}') + +# Tags + Correlations +# # TODO: save orphelin screenshot ????? +def screenshots_migration(): + print('SCREENSHOTS MIGRATION...') + screenshots = get_all_screenshots() + #screenshots = ['5fcc292ea8a699aa7a9ce93a704b78b8f493620ccdb2a5cebacb1069a4327211'] + for screenshot_id in screenshots: + print(screenshot_id) + + screenshot = Screenshot(screenshot_id) + + tags = old_Tag.get_obj_tag(screenshot_id) ################## # TODO: + if tags: + print(screenshot_id) + print(tags) + + # Correlations + for item_id in get_screenshot_items_list(screenshot_id): + print(item_id) + screenshot.add_correlation('item', '', item_id) + for domain_id in get_screenshot_domain(screenshot_id): + print(domain_id) + screenshot.add_correlation('domain', '', domain_id) + +############################### +# # +# SUBTYPES MIGRATION # +# # +############################### + +def get_item_correlation_obj(obj_type, subtype, obj_id): + return r_serv_metadata.smembers(f'set_{obj_type}_{subtype}:{obj_id}') + +def get_obj_subtype_first_seen(obj_type, subtype, obj_id): + return r_serv_metadata.hget(f'{obj_type}_metadata_{subtype}:{obj_id}', 'first_seen') + +def get_obj_subtype_last_seen(obj_type, subtype, obj_id): + return r_serv_metadata.hget(f'{obj_type}_metadata_{subtype}:{obj_id}', 'last_seen') + +def get_all_subtype_id(obj_type, subtype): + print(f'{obj_type}_all:{subtype}') + print(r_serv_metadata.zrange(f'{obj_type}_all:{subtype}', 0, -1)) + return r_serv_metadata.zrange(f'{obj_type}_all:{subtype}', 0, -1) + +def get_subtype_object(obj_type, subtype, obj_id): + if obj_type == 'cryptocurrency': + return CryptoCurrency(obj_id, subtype) + elif obj_type == 'pgpdump': + return Pgp(obj_id, subtype) + elif obj_type == 'username': + return Username(obj_id, subtype) + +def migrate_subtype_obj(Obj, obj_type, subtype, obj_id): + first_seen = get_obj_subtype_first_seen(obj_type, subtype, obj_id) + last_seen = get_obj_subtype_last_seen(obj_type, subtype, obj_id) + + # dates + for item_id in get_item_correlation_obj(obj_type, subtype, obj_id): + date = get_item_date(item_id) + Obj.add(date, item_id) + +dict_obj_subtypes = {'cryptocurrency': ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'], + 'pgpdump': ['key', 'mail', 'name'], + 'username': ['telegram', 'twitter', 'jabber']} + +def subtypes_obj_migration(): + print('SUBPTYPE MIGRATION...') + + for obj_type in dict_obj_subtypes: + print(f'{obj_type} MIGRATION...') + for subtype in dict_obj_subtypes[obj_type]: + for obj_id in get_all_subtype_id(obj_type, subtype): + Obj = get_subtype_object(obj_type, subtype, obj_id) + migrate_subtype_obj(Obj, obj_type, subtype, obj_id) # # # # # # # # # # # # # # # # # STATISTICS @@ -246,10 +615,16 @@ def statistics_migration(): if __name__ == '__main__': - core_migration() - user_migration() + #core_migration() + #user_migration() + #items_migration() + #crawler_migration() + #domain_migration() + #decodeds_migration() + #screenshots_migration() + #subtypes_obj_migration() #ail_2_ail_migration() - trackers_migration() + #trackers_migration() #investigations_migration() diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 1721b3f1..aa547a8f 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -295,7 +295,7 @@ function shutting_down_redis_servers { redis_dir=${AIL_HOME}/redis/src for port in "${array[@]}"; do - bash -c "${redis_dir}/redis-cli -p ${port} SHUTDOWN" + bash -c "${redis_dir}/redis-cli -p ${port} -a ail SHUTDOWN" sleep 0.1 done } @@ -324,7 +324,7 @@ function checking_redis_servers { for port in "${array[@]}"; do sleep 0.2 - bash -c "${redis_dir}/redis-cli -p ${port} PING | grep "PONG" &> /dev/null" + bash -c "${redis_dir}/redis-cli -p ${port} -a ail PING | grep "PONG" &> /dev/null" if [ ! $? == 0 ]; then echo -e "${RED}\t${port} ${db_name} not ready${DEFAULT}" flag_db=1 @@ -512,6 +512,20 @@ function killall { fi } +function _set_kvrocks_namespace() { + bash -c "${redis_dir}/redis-cli -p ${port} -a ail namespace add $1 $2" +} + +function set_kvrocks_namespaces() { + if checking_kvrocks; then + _set_kvrocks_namespace "cor" "ail_correls" + _set_kvrocks_namespace "obj" "ail_objs" + _set_kvrocks_namespace "tag" "ail_tags" + else + echo -e $RED"\t* Error: Please launch Kvrocks server"$DEFAULT + fi +} + function update() { bin_dir=${AIL_HOME}/bin @@ -672,6 +686,8 @@ while [ "$1" != "" ]; do -lkv | --launchKVORCKSVerify ) launch_kvrocks; wait_until_kvrocks_is_ready; ;; + --set_kvrocks_namespaces ) set_kvrocks_namespaces; + ;; -k | --killAll ) killall; ;; -ks | --killscript ) killscript; diff --git a/bin/core/ail_2_ail.py b/bin/core/ail_2_ail.py index 48f9fd3a..33ef8f96 100755 --- a/bin/core/ail_2_ail.py +++ b/bin/core/ail_2_ail.py @@ -26,8 +26,8 @@ import Tag config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") -r_serv_db = config_loader.get_redis_conn("Kvrocks_DB") -r_serv_sync = config_loader.get_redis_conn("Kvrocks_DB") +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") +r_serv_sync = config_loader.get_db_conn("Kvrocks_DB") config_loader = None WEBSOCKETS_CLOSE_CODES = { diff --git a/bin/import/ail_json_importer/Ail_feeder_jabber.py b/bin/import/ail_json_importer/Ail_feeder_jabber.py index 37a4d95a..96641abe 100755 --- a/bin/import/ail_json_importer/Ail_feeder_jabber.py +++ b/bin/import/ail_json_importer/Ail_feeder_jabber.py @@ -8,14 +8,15 @@ Receiver Jabber Json Items """ import os -import json import sys import time -import datetime -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import item_basic -import Username +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import item_basic +from lib.objects.Usernames import Username sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer')) from Default_json import Default_json @@ -45,6 +46,9 @@ class Ail_feeder_jabber(Default_json): to = str(self.json_item['meta']['jabber:to']) fr = str(self.json_item['meta']['jabber:from']) item_date = item_basic.get_item_date(item_id) - Username.save_item_correlation('jabber', to, item_id, item_date) - Username.save_item_correlation('jabber', fr, item_id, item_date) - return None \ No newline at end of file + + user_to = Username(to, 'jabber') + user_fr = Username(fr, 'jabber') + user_to.add(date, item_id) + user_fr.add(date, item_id) + return None diff --git a/bin/import/ail_json_importer/Ail_feeder_telegram.py b/bin/import/ail_json_importer/Ail_feeder_telegram.py index 06045a5e..8fb8bbff 100755 --- a/bin/import/ail_json_importer/Ail_feeder_telegram.py +++ b/bin/import/ail_json_importer/Ail_feeder_telegram.py @@ -8,13 +8,15 @@ Recieve Json Items (example: Twitter feeder) """ import os -import json import sys import datetime -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import item_basic -import Username +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import item_basic +from lib.objects.Usernames import Username sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer')) from Default_json import Default_json @@ -46,14 +48,14 @@ class Ail_feeder_telegram(Default_json): telegram_id = f'{channel_id}_{message_id}' item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id') #print(self.json_item['meta']) - username = None + user = None if self.json_item['meta'].get('user'): - username = str(self.json_item['meta']['user']) + user = str(self.json_item['meta']['user']) else: if self.json_item['meta'].get('channel'): - username = str(self.json_item['meta']['channel']['username']) - if username: - #print(username) + user = str(self.json_item['meta']['channel']['username']) + if user: item_date = item_basic.get_item_date(item_id) - Username.save_item_correlation('telegram', username, item_id, item_date) + username = Username(user, 'telegram') + username.add(date, item_id) return None diff --git a/bin/import/ail_json_importer/Ail_feeder_twitter.py b/bin/import/ail_json_importer/Ail_feeder_twitter.py index a622a800..bc0f1b2e 100755 --- a/bin/import/ail_json_importer/Ail_feeder_twitter.py +++ b/bin/import/ail_json_importer/Ail_feeder_twitter.py @@ -8,13 +8,15 @@ Recieve Json Items (example: Twitter feeder) """ import os -import json import sys import datetime -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import item_basic -import Username +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import item_basic +from lib.objects.Usernames import Username sys.path.append(os.path.join(os.environ['AIL_BIN'], 'import', 'ail_json_importer')) from Default_json import Default_json @@ -39,9 +41,12 @@ class Ail_feeder_twitter(Default_json): ''' Process JSON meta filed. ''' - twitter_id = str(self.json_item['meta']['twitter:tweet_id']) - item_basic.add_map_obj_id_item_id(twitter_id, item_id, 'twitter_id') - username = str(self.json_item['meta']['twitter:id']) - item_date = item_basic.get_item_date(item_id) - Username.save_item_correlation('twitter', username, item_id, item_date) + tweet_id = str(self.json_item['meta']['twitter:tweet_id']) + item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') + + date = item_basic.get_item_date(item_id) + user = str(self.json_item['meta']['twitter:id']) + username = Username(user, 'twitter') + username.add(date, item_id) + return None diff --git a/bin/import/ail_json_importer/Ail_feeder_urlextract.py b/bin/import/ail_json_importer/Ail_feeder_urlextract.py index bf72c714..54bd0cb3 100755 --- a/bin/import/ail_json_importer/Ail_feeder_urlextract.py +++ b/bin/import/ail_json_importer/Ail_feeder_urlextract.py @@ -8,7 +8,6 @@ Recieve Json Items (example: Twitter feeder) """ import os -import json import sys import datetime import uuid diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py index b68aa3a3..a6d5fef4 100755 --- a/bin/lib/ConfigLoader.py +++ b/bin/lib/ConfigLoader.py @@ -41,6 +41,12 @@ class ConfigLoader(object): db=self.cfg.getint(redis_name, "db"), decode_responses=decode_responses ) + def get_db_conn(self, db_name, decode_responses=True): ## TODO: verify redis name + return redis.StrictRedis( host=self.cfg.get(db_name, "host"), + port=self.cfg.getint(db_name, "port"), + password=self.cfg.get(db_name, "password"), + decode_responses=decode_responses ) + def get_files_directory(self, key_name): directory_path = self.cfg.get('Directories', key_name) # full path diff --git a/bin/lib/Duplicate.py b/bin/lib/Duplicate.py index 99de95e3..2cae8d7a 100755 --- a/bin/lib/Duplicate.py +++ b/bin/lib/Duplicate.py @@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader config_loader = ConfigLoader() -r_serv_db = config_loader.get_redis_conn("Kvrocks_DB") +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME config_loader = None diff --git a/bin/lib/Investigations.py b/bin/lib/Investigations.py index fdf70864..7e39fd4a 100755 --- a/bin/lib/Investigations.py +++ b/bin/lib/Investigations.py @@ -28,7 +28,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Tag config_loader = ConfigLoader.ConfigLoader() -r_tracking = config_loader.get_redis_conn("Kvrocks_DB") +r_tracking = config_loader.get_db_conn("Kvrocks_DB") config_loader = None diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index 3f198f52..4addb30e 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -146,7 +146,7 @@ def save_domain_relationship(obj_id, domain): def delete_domain_relationship(obj_id, domain): r_serv_onion.srem('domain_screenshot:{}'.format(domain), obj_id) - r_serv_onion.sadd('screenshot_domain:{}'.format(obj_id), domain) + r_serv_onion.srem('screenshot_domain:{}'.format(obj_id), domain) def save_obj_relationship(obj_id, obj2_type, obj2_id): if obj2_type == 'domain': diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index e26f3861..94d72e83 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -25,8 +25,8 @@ import item_basic config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") -r_serv_db = config_loader.get_redis_conn("Kvrocks_DB") -r_serv_tracker = config_loader.get_redis_conn("Kvrocks_DB") +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") +r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB") items_dir = config_loader.get_config_str("Directories", "pastes") if items_dir[-1] == '/': diff --git a/bin/lib/Users.py b/bin/lib/Users.py index 15efb6a6..77a6f31a 100755 --- a/bin/lib/Users.py +++ b/bin/lib/Users.py @@ -18,7 +18,7 @@ from lib.ConfigLoader import ConfigLoader # Config config_loader = ConfigLoader() #r_serv_db = config_loader.get_redis_conn("ARDB_DB") -r_serv_db = config_loader.get_redis_conn("Kvrocks_DB") +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None regex_password = r'^(?=(.*\d){2})(?=.*[a-z])(?=.*[A-Z]).{10,100}$' diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py new file mode 100755 index 00000000..818d78dc --- /dev/null +++ b/bin/lib/correlations_engine.py @@ -0,0 +1,723 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +config_loader = ConfigLoader() +r_metadata = config_loader.get_db_conn("Kvrocks_Correlations") +config_loader = None + +################################## +# CORRELATION MIGRATION +################################## +# +# MIGRATE TO KVROCKS + Rename correlation Keys +# => Add support for correlations between subtypes +# => Common correlation engine for each objects +# +# Objects Iterations: -screenshot +# -decoded +# -subtypes +# -domains +# +# /!\ Handle reinsertion /!\ +# +# +# CORRELATION DB ????? => purge if needed +# +# +# +# +# +################################## +# CORRELATION MIGRATION +################################## + +CORRELATION_TYPES_BY_OBJ = { + "cryptocurrency" : ["domain", "item"], + "decoded" : ["domain", "item"], + "domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"], + "item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"], + "pgp" : ["domain", "item"], + "username" : ["domain", "item"], + "screenshot" : ["domain", "item"], +} + +def get_obj_correl_types(obj_type): + return CORRELATION_TYPES_BY_OBJ.get(obj_type) + +def sanityze_obj_correl_types(obj_type, correl_types): + obj_correl_types = get_obj_correl_types(obj_type) + if correl_types: + correl_types = set(correl_types).intersection(obj_correl_types) + if not correl_types: + correl_types = obj_correl_types + return correl_types + +def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type): + return r_metadata.scard(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}') + +def get_nb_correlations(obj_type, subtype, obj_id, filter_types=[]): + if subtype is None: + subtype = '' + nb_correlations = 0 + filter_types = sanityze_obj_correl_types(obj_type, filter_types) + for correl_type in filter_types: + obj_correlations += get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type) + return obj_correlations + +def get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type): + return r_metadata.smembers(f'correlation:obj:{obj_type}:{subtype}:{correl_type}:{obj_id}') + +def get_correlations(obj_type, subtype, obj_id, filter_types=[]): + if subtype is None: + subtype = '' + obj_correlations = {} + filter_types = sanityze_obj_correl_types(obj_type, filter_types) + for correl_type in filter_types: + obj_correlations[correl_type] = get_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type) + return obj_correlations + +def exists_obj_correlation(obj_type, subtype, obj_id, obj2_type): + if subtype is None: + subtype = '' + return r_metadata.exists(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}') + +def is_obj_correlated(obj_type, subtype, obj_id, obj2_type, subtype2, obj2_id): + if subtype is None: + subtype = '' + if subtype2 is None: + subtype2 = '' + return r_metadata.sismember(f'correlation:obj:{obj_type}:{subtype}:{obj2_type}:{obj_id}', '{subtype2}:{obj2_id}') + +def add_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id): + print(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id) + if subtype1 is None: + subtype1 = '' + if subtype2 is None: + subtype2 = '' + r_metadata.sadd(f'correlation:obj:{obj1_type}:{subtype1}:{obj2_type}:{obj1_id}', f'{subtype2}:{obj2_id}') + r_metadata.sadd(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype1}:{obj1_id}') + + +def delete_obj_correlation(obj1_type, subtype1, obj1_id, obj2_type, subtype2, obj2_id): + if subtype1 is None: + subtype1 = '' + if subtype2 is None: + subtype2 = '' + r_metadata.srem(f'correlation:obj:{obj1_type}:{subtype}:{obj2_type}:{obj_id}', f'{subtype2}:{obj2_id}') + r_metadata.srem(f'correlation:obj:{obj2_type}:{subtype2}:{obj1_type}:{obj2_id}', f'{subtype}:{obj_id}') + + + +# # TODO: CORRELATION GRAPH + + +def get_obj_str_id(obj_type, subtype, obj_id): ################ REPLACE BY : ????????????????????????? + if subtype is None: + subtype = '' + return f'{obj_type};{subtype};{obj_id}' + +def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False): + links = set() + nodes = set() + + obj_str_id = get_obj_str_id(obj_type, subtype, obj_id) + + _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj='') + return obj_str_id, nodes, links + + +def _get_correlations_graph_node(links, nodes, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''): + obj_str_id = get_obj_str_id(obj_type, subtype, obj_id) + nodes.add(obj_str_id) + + obj_correlations = get_correlations(obj_type, subtype, obj_id, filter_types=[]) + print(obj_correlations) + for correl_type in obj_correlations: + for str_obj in obj_correlations[correl_type]: + subtype2, obj2_id = str_obj.split(':', 1) + obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id) + + if obj2_str_id == previous_str_obj: + continue + + if len(nodes) > max_nodes: + break + nodes.add(obj2_str_id) + links.add((obj_str_id, obj2_str_id)) + + if level > 0: + next_level = level - 1 + _get_correlations_graph_node(links, nodes, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id) + + + + +########################################################## +########################################################## +########################################################## +########################################################## +########################################################## +########################################################## + + + + + + + + + + + + + + + + + + + + + + + +# get_correlations_fcts = { +# "cryptocurrency" : ["domain", "item"], +# "decoded" : ["domain", "item"], +# "domain": ["cryptocurrency", "decoded", "item", "pgp", "username", "screenshot"], +# "item": ["cryptocurrency", "decoded", "domain", "pgp", "username", "screenshot"], +# "pgp" : ["domain", "item"], +# "username" : ["domain", "item"], +# "screenshot" :{ +# "domain": get_correl_screenshot_domain, +# "item": get_correl_screenshot_item, +# }, +# } +# } +# +# def build_lsets_obj_types(obj1_type, obj_types): +# return [set(obj1_type, x) for x in subtypes_obj] +# +# ########################## +# subtypes_obj = ['cryptocurrency', 'pgp', 'username'] +# lsets_subtypes_obj_domain = build_lsets_obj_types('domain', subtypes_obj) +# lsets_subtypes_obj_item = build_lsets_obj_types('item', subtypes_obj) +# ########################## + +# TODO HANDLE CRAWLED ITEMS +def add_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id): + set_type = set(ob1_type, ob2_type) + + # domain - subtypes objs + if set_type in lsets_subtypes_obj_domain: + if ob1_type == 'domain': + domain = obj1_id + obj_type = obj2_type + obj_subtype = obj2_subtype + obj_id = obj2_id + else: + domain = obj2_id + obj_type = obj1_type + obj_subtype = obj1_subtype + obj_id = obj1_id + r_metadata.sadd(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id) + r_metadata.sadd(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain) + + # TODO HANDLE CRAWLED ITEMS + # item - subtypes objs + elif set_type in lsets_subtypes_obj_item: + if ob1_type == 'item': + item_id = obj1_id + obj_type = obj2_type + obj_subtype = obj2_subtype + obj_id = obj2_id + else: + item_id = obj2_id + obj_type = obj1_type + obj_subtype = obj1_subtype + obj_id = obj1_id + r_metadata.sadd(f'set_{obj_type}_{obj_subtype}:{obj_id}', item_id) + r_metadata.sadd(f'item_{obj_type}_{obj_subtype}:{item_id}', obj_id) + + # domain - decoded + elif set_type == set('domain', 'decoded'): + if ob1_type == 'decoded': + decoded_id = ob1_id + domain = obj2_id + else: + decoded_id = obj2_id + domain = ob1_id + r_metadata.sadd(f'hash_domain:{domain}', decoded_id) # domain - hash map + r_metadata.sadd(f'domain_hash:{decoded_id}', domain) # hash - domain map + + # item - decoded + elif set_type == set('item', 'decoded'): + if ob1_type == 'decoded': + decoded_id = ob1_id + item_id = obj2_id + else: + decoded_id = obj2_id + item_id = ob1_id + + ############################################################ + + + # domain - screenshot + elif set_type == set('domain', 'screenshot'): + if ob1_type == 'screenshot': + screenshot_id = ob1_id + domain = obj2_id + else: + screenshot_id = obj2_id + domain = ob1_id + r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id) + r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain) + + # item - screenshot + elif set_type == set('item', 'screenshot'): + if ob1_type == 'screenshot': + screenshot_id = ob1_id + item_id = obj2_id + else: + screenshot_id = obj2_id + item_id = ob1_id + r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) + r_crawler.sadd(f'screenshot:{screenshot_id}', item_id) + + # domain - item + elif set_type == set('domain', 'item'): + if ob1_type == 'item': + item_id = ob1_id + domain = obj2_id + else: + item_id = obj2_id + domain = ob1_id + + ############################################################ + + + +# TODO ADD COMPLETE DELETE +# TODO: Handle items crawled +def delete_correlation(obj1_type, obj1_subtype, obj1_id, obj2_type, obj2_subtype, obj2_id): + set_type = set(ob1_type, ob2_type) + + # domain - subtypes objs + if set_type in lsets_subtypes_obj_domain: + if ob1_type == 'domain': + domain = obj1_id + obj_type = obj2_type + obj_subtype = obj2_subtype + obj_id = obj2_id + else: + domain = obj2_id + obj_type = obj1_type + obj_subtype = obj1_subtype + obj_id = obj1_id + r_metadata.srem(f'domain_{obj_type}_{obj_subtype}:{domain}', obj_id) + r_metadata.srem(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}', domain) + + + + # TODO ADD COMPLETE DELETE + # item - subtypes objs + elif set_type in lsets_subtypes_obj_item: + if ob1_type == 'item': + item_id = obj1_id + obj_type = obj2_type + obj_subtype = obj2_subtype + obj_id = obj2_id + else: + item_id = obj2_id + obj_type = obj1_type + obj_subtype = obj1_subtype + obj_id = obj1_id + # TODO ADD COMPLETE DELETE + r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id) + r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id) + # TODO ADD COMPLETE DELETE + + # domain - decoded + elif set_type == set('domain', 'decoded'): + if ob1_type == 'decoded': + decoded_id = ob1_id + domain = obj2_id + else: + decoded_id = obj2_id + domain = ob1_id + r_metadata.srem(f'hash_domain:{domain}', decoded_id) + r_metadata.srem(f'domain_hash:{decoded_id}', domain) + + # item - decoded + elif set_type == set('item', 'decoded'): + if ob1_type == 'decoded': + decoded_id = ob1_id + item_id = obj2_id + else: + decoded_id = obj2_id + item_id = ob1_id + + #################################################################### + + + # domain - screenshot + elif set_type == set('domain', 'screenshot'): + if ob1_type == 'screenshot': + screenshot_id = ob1_id + domain = obj2_id + else: + screenshot_id = obj2_id + domain = ob1_id + r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id) + r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain) + + # item - screenshot + elif set_type == set('item', 'screenshot'): + if ob1_type == 'screenshot': + screenshot_id = ob1_id + item_id = obj2_id + else: + screenshot_id = obj2_id + item_id = ob1_id + r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) + r_crawler.srem(f'screenshot:{screenshot_id}', item_id) + + # domain - item + +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # + +## Subtypes - Cryptocurrency Pgp Username ## + +def get_correl_subtypes_obj_domain(obj_type, obj_subtype, obj_id): + r_serv_metadata.smembers(f'set_domain_{obj_type}_{obj_subtype}:{obj_id}') + +def get_correl_subtypes_obj_item(): + pass + +def delete_subtype_domain_correlation(domain, obj_type, subtype, obj_id): + r_metadata.srem(f'domain_{obj_type}_{subtype}:{domain}', obj_id) + r_metadata.srem(f'set_domain_{obj_type}_{subtype}:{obj_id}', domain) + +# TODO ADD COMPLETE DELETE +def delete_subtype_item_correlation(obj_type, subtype, obj_id, item_id, item_date): + #self.update_correlation_daterange(subtype, obj_id, item_date) update daterange ! # # TODO: + r_metadata.srem(f'set_{obj_type}_{subtype}:{obj_id}', item_id) + r_metadata.srem(f'item_{obj_type}_{subtype}:{item_id}', obj_id) + + # # TODO: FIXME HANDLE SUB Objects Metadata # WARNING: + # res = r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, -1) + # if int(res) < 0: # remove last + # r_serv_metadata.hdel('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id) + # + # res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id) + # if int(res) > 0: + # r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, -1) + +## Screenshot ## + +##-- Screenshot - Domain --## +def add_correl_screenshot_domain(screenshot_id, domain): + r_crawler.sadd(f'domain_screenshot:{domain}', screenshot_id) + r_crawler.sadd(f'screenshot_domain:{screenshot_id}', domain) + +def get_correl_screenshot_domain(screenshot_id): + return r_crawler.smembers(f'screenshot_domain:{screenshot_id}') + +# def delete_correl_screenshot_domain(screenshot_id, domain): +# r_crawler.srem(f'domain_screenshot:{domain}', screenshot_id) +# r_crawler.srem(f'screenshot_domain:{screenshot_id}', domain) + +##-- Screenshot - Item --## +def add_correl_screenshot_item(screenshot_id, item_id): + r_metadata.hset(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) + r_crawler.sadd(f'screenshot:{screenshot_id}', item_id) + +def get_correl_screenshot_item(screenshot_id): + r_crawler.smembers(f'screenshot:{screenshot_id}') + +# def delete_correl_screenshot_item(screenshot_id, item_id): +# r_metadata.hdel(f'paste_metadata:{item_id}', 'screenshot', screenshot_id) +# r_crawler.srem(f'screenshot:{screenshot_id}', item_id) + +## -- ## + + +def get_correl_item_screenshot(item_id): + res = r_metadata.hget(f'paste_metadata:{item_id}', 'screenshot') + if res: + return set(res) + else: + return set() + +## Domain ## + +def get_correl_domain_subtypes_obj(domain_id, obj_type, obj_subtype): + return r_serv_metadata.smembers(f'domain_{obj_type}_{obj_subtype}:{domain_id}') + +## -- ## + +## Item ## + +def get_correl_item_subtypes_obj(): + pass + +## -- ## war game stinger - stranger thing + + +def _get_object_correlations(obj_type, obj_subtype, obj_id, filter_types=[]): # # TODO: , filter_subtypes=[] + obj_relationships = get_obj_relationships(obj_type) + correlations = [] + for correlation_fct in obj_relationship_fcts[obj_type]: + correlations + + + + + + +def get_object_correlations(filter_types, filter_subtypes, lvl=0): + pass + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +#################################################################### +#################################################################### +#################################################################### +#################################################################### +#################################################################### +#################################################################### + +def get_object_correlation(object_type, value, correlation_names=None, correlation_objects=None, requested_correl_type=None): + if object_type == 'domain': + return Domain.get_domain_all_correlation(value, correlation_names=correlation_names) + elif object_type == 'paste' or object_type == 'item': + return Item.get_item_all_correlation(value, correlation_names=correlation_names) + elif object_type == 'decoded': + return Decoded.get_decoded_correlated_object(value, correlation_objects=correlation_objects) + elif object_type == 'pgp': + return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) + elif object_type == 'cryptocurrency': + return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) + elif object_type == 'username': + return Username.correlation.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects) + elif object_type == 'screenshot' or object_type == 'image': + return Screenshot.get_screenshot_correlated_object(value, correlation_objects=correlation_objects) + return {} + +def get_obj_tag_table_keys(object_type): + ''' + Warning: use only in flask (dynamic templates) + ''' + if object_type=="domain": + return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot + +def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): + if obj1_type == 'domain': + pass + elif obj1_type == 'item': + pass # son/father + duplicate + domain + elif obj1_type == 'pgp': + Pgp.pgp.save_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) + elif obj1_type == 'cryptocurrency': + Cryptocurrency.cryptocurrency.save_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) + elif obj1_type == 'decoded': + Decoded.save_obj_relationship(obj1_id, obj2_type, obj2_id) + elif obj1_type == 'image': + Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id) + +def delete_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype=None, obj2_subtype=None): + if obj1_type == 'domain': + pass + elif obj1_type == 'item': + pass # son/father + duplicate + domain + elif obj1_type == 'pgp': + Pgp.pgp.delete_obj_relationship(obj1_subtype, obj1_id, obj2_type, obj2_id) + elif obj1_type == 'cryptocurrency': + Cryptocurrency.cryptocurrency.delete_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) + elif obj1_type == 'decoded': + Decoded.delete_obj_relationship(obj1_id, obj2_type, obj2_id) + elif obj1_type == 'image': + Screenshot.delete_obj_relationship(obj1_id, obj2_type, obj2_id) + +def create_graph_links(links_set): + graph_links_list = [] + for link in links_set: + graph_links_list.append({"source": link[0], "target": link[1]}) + return graph_links_list + +def create_graph_nodes(nodes_set, root_node_id, flask_context=True): + graph_nodes_list = [] + for node_id in nodes_set: + correlation_name, correlation_type, value = node_id.split(';', 3) + dict_node = {"id": node_id} + dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value) + dict_node['text'] = value + if node_id == root_node_id: + dict_node["style"]["node_color"] = 'orange' + dict_node["style"]["node_radius"] = 7 + dict_node['url'] = get_item_url(correlation_name, value, correlation_type, flask_context=flask_context) + graph_nodes_list.append(dict_node) + return graph_nodes_list + +def create_node_id(correlation_name, value, correlation_type=''): + if correlation_type is None: + correlation_type = '' + return '{};{};{}'.format(correlation_name, correlation_type, value) + + + +# # TODO: filter by correlation type => bitcoin, mail, ... +def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None, flask_context=True): + links = set() + nodes = set() + + root_node_id = create_node_id(object_type, root_value, requested_correl_type) + nodes.add(root_node_id) + + root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type) + for correl in root_correlation: + if correl in ('pgp', 'cryptocurrency', 'username'): + for correl_type in root_correlation[correl]: + for correl_val in root_correlation[correl][correl_type]: + + # add correlation + correl_node_id = create_node_id(correl, correl_val, correl_type) + + if mode=="union": + if len(nodes) > max_nodes: + break + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + # get second correlation + res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects, requested_correl_type=correl_type) + if res: + for corr_obj in res: + for correl_key_val in res[corr_obj]: + #filter root value + if correl_key_val == root_value: + continue + + if len(nodes) > max_nodes: + break + new_corel_1 = create_node_id(corr_obj, correl_key_val) + new_corel_2 = create_node_id(correl, correl_val, correl_type) + nodes.add(new_corel_1) + nodes.add(new_corel_2) + links.add((new_corel_1, new_corel_2)) + + if mode=="inter": + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + if correl in ('decoded', 'screenshot', 'domain', 'paste'): + for correl_val in root_correlation[correl]: + + correl_node_id = create_node_id(correl, correl_val) + if mode=="union": + if len(nodes) > max_nodes: + break + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects) + if res: + for corr_obj in res: + if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'): + for correl_key_val in res[corr_obj]: + #filter root value + if correl_key_val == root_value: + continue + + if len(nodes) > max_nodes: + break + new_corel_1 = create_node_id(corr_obj, correl_key_val) + new_corel_2 = create_node_id(correl, correl_val) + nodes.add(new_corel_1) + nodes.add(new_corel_2) + links.add((new_corel_1, new_corel_2)) + + if mode=="inter": + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + if corr_obj in ('pgp', 'cryptocurrency', 'username'): + for correl_key_type in res[corr_obj]: + for correl_key_val in res[corr_obj][correl_key_type]: + #filter root value + if correl_key_val == root_value: + continue + + if len(nodes) > max_nodes: + break + new_corel_1 = create_node_id(corr_obj, correl_key_val, correl_key_type) + new_corel_2 = create_node_id(correl, correl_val) + nodes.add(new_corel_1) + nodes.add(new_corel_2) + links.add((new_corel_1, new_corel_2)) + + if mode=="inter": + nodes.add(correl_node_id) + links.add((root_node_id, correl_node_id)) + + + return {"nodes": create_graph_nodes(nodes, root_node_id, flask_context=flask_context), "links": create_graph_links(links)} + + + + + + + + + + + +#######################################################################################3 diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index a2201c74..cf2894e9 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -168,7 +168,7 @@ def load_crawler_cookies(cookiejar_uuid, domain, crawler_type='regular'): ################################################################################ def get_all_cookiejar(): - r_serv_onion.smembers('cookiejar:all') + return r_serv_onion.smembers('cookiejar:all') def get_global_cookiejar(): res = r_serv_onion.smembers('cookiejar:global') @@ -185,19 +185,24 @@ def get_user_cookiejar(user_id): def exist_cookiejar(cookiejar_uuid): return r_serv_onion.exists('cookiejar_metadata:{}'.format(cookiejar_uuid)) -def create_cookiejar(user_id, level=1, description=None): - cookiejar_uuid = str(uuid.uuid4()) +def _set_cookiejar_date(date): + r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'date', date) + +# # TODO: sanitize cookie_uuid +def create_cookiejar(user_id, level=1, description=None, cookiejar_uuid=None): + if not cookiejar_uuid: + cookiejar_uuid = str(uuid.uuid4()) r_serv_onion.sadd('cookiejar:all', cookiejar_uuid) if level==0: - r_serv_onion.sadd('cookiejar:user:{}'.format(user_id), cookiejar_uuid) + r_serv_onion.sadd(f'cookiejar:user:{user_id}', cookiejar_uuid) else: r_serv_onion.sadd('cookiejar:global', cookiejar_uuid) # metadata - r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'user_id', user_id) - r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'level', level) - r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'description', description) - r_serv_onion.hset('cookiejar_metadata:{}'.format(cookiejar_uuid), 'date', datetime.now().strftime("%Y%m%d")) + r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'user_id', user_id) + r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'level', level) + r_serv_onion.hset(f'cookiejar_metadata:{cookiejar_uuid}', 'description', description) + _set_cookiejar_date(datetime.now().strftime("%Y%m%d")) # if json_cookies: # json_cookies = json.loads(json_cookies) # # TODO: catch Exception @@ -259,7 +264,7 @@ def get_cookiejar_metadata(cookiejar_uuid, level=False): dict_cookiejar['date'] = get_cookiejar_date(cookiejar_uuid) dict_cookiejar['user_id'] = get_cookiejar_owner(cookiejar_uuid) if level: - dict_cookiejar['level'] = get_cookies_level(cookiejar_uuid) + dict_cookiejar['level'] = get_cookiejar_level(cookiejar_uuid) return dict_cookiejar def get_cookiejar_metadata_by_iterator(iter_cookiejar_uuid): @@ -311,10 +316,12 @@ def get_cookie_dict(cookie_uuid): return cookie_dict # name, value, path=None, httpOnly=None, secure=None, domain=None, text=None -def add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict): - cookie_uuid = generate_uuid() - r_serv_onion.sadd('cookiejar:{}:cookies:uuid'.format(cookiejar_uuid), cookie_uuid) - r_serv_onion.sadd('cookies:map:cookiejar:{}'.format(cookie_uuid), cookiejar_uuid) +def add_cookie_to_cookiejar(cookiejar_uuid, cookie_dict, cookie_uuid=None): + # # TODO: sanitize cookie_uuid + if not cookie_uuid: + cookie_uuid = generate_uuid() + r_serv_onion.sadd(f'cookiejar:{cookiejar_uuid}:cookies:uuid', cookie_uuid) + r_serv_onion.sadd(f'cookies:map:cookiejar:{cookie_uuid}', cookiejar_uuid) set_cookie_value(cookie_uuid, 'name', cookie_dict['name']) set_cookie_value(cookie_uuid, 'value', cookie_dict['value']) @@ -631,7 +638,6 @@ def add_auto_crawler_in_queue(domain, domain_type, port, epoch, delta, message): def update_auto_crawler_queue(): current_epoch = int(time.time()) - current_epoch = 1631096842 # check if current_epoch > domain_next_epoch l_queue = r_serv_onion.zrangebyscore('crawler_auto_queue', 0, current_epoch) for elem in l_queue: @@ -715,6 +721,73 @@ def send_url_to_crawl_in_queue(crawler_mode, crawler_type, url): #### #### #### CRAWLER TASK API #### + +def api_add_crawler_task(json_dict): + user_id = None ############################################### + user_agent = data.get('user_agent', None) + url = json_dict.get('url', '') + if not is_valid_uuid_v4(investigation_uuid): + return {"status": "error", "reason": f"Invalid Investigation uuid: {investigation_uuid}"}, 400 + + screenshot = json_dict.get('screenshot', True) #### + screenshot = screenshot == True + har = json_dict.get('screenshot', True) #### + har = har == True + + depth_limit = data.get('depth_limit', 1) + try: + depth_limit = int(depth_limit) + if depth_limit < 0: + depth_limit = 0 + except ValueError: + return ({'error':'invalid depth limit'}, 400) + + max_pages = data.get('max_pages', 100) + if max_pages: + try: + max_pages = int(max_pages) + if max_pages < 1: + max_pages = 1 + except ValueError: + return ({'error':'invalid max_pages limit'}, 400) + + auto_crawler = data.get('auto_crawler', False) + auto_crawler = auto_crawler == True + crawler_delta = data.get('crawler_delta', 3600) + if auto_crawler: + try: + crawler_delta = int(crawler_delta) + if crawler_delta < 0: + return ({'error':'invalid delta between two pass of the crawler'}, 400) + except ValueError: + return ({'error':'invalid delta between two pass of the crawler'}, 400) + + + crawler_type = data.get('crawler_type', None) + + cookiejar_uuid = data.get('cookiejar_uuid', None) + if cookiejar_uuid: + if not exist_cookiejar(cookiejar_uuid): + return ({'error': 'unknow cookiejar uuid', 'cookiejar_uuid': cookiejar_uuid}, 404) + level = get_cookiejar_level(cookiejar_uuid) + if level == 0: # # TODO: check if user is admin ###################################################### + cookie_owner = get_cookiejar_owner(cookiejar_uuid) + if cookie_owner != user_id: + return ({'error': 'The access to this cookiejar is restricted'}, 403) + + + + + + + + create_crawler_task(url, screenshot=screenshot, har=har, depth_limit=depth_limit, + max_pages=max_pages, crawler_type=crawler_type, + auto_crawler=auto_crawler, crawler_delta=crawler_delta, + cookiejar_uuid=cookiejar_uuid, user_agent=user_agent) + + +# # TODO: # FIXME: REPLACE ME def api_create_crawler_task(user_id, url, screenshot=True, har=True, depth_limit=1, max_pages=100, auto_crawler=False, crawler_delta=3600, crawler_type=None, cookiejar_uuid=None, user_agent=None): # validate url if url is None or url=='' or url=='\n': @@ -802,10 +875,16 @@ def create_domain_metadata(domain_type, domain, current_port, date, date_month): all_domain_ports.append(current_port) r_serv_onion.hset('{}_metadata:{}'.format(domain_type, domain), 'ports', ';'.join(all_domain_ports)) +def add_last_crawled_domain(domain_type, domain, port, epoch): + # update list, last crawled domains + redis_crawler.lpush(f'last_{domain_type}', f'{domain}:{port};{epoch}') + redis_crawler.ltrim(f'last_{domain_type}', 0, 15) + # add root_item to history +# if down -> root_item = epoch_date def add_domain_root_item(root_item, domain_type, domain, epoch_date, port): # Create/Update crawler history - r_serv_onion.zadd('crawler_history_{}:{}:{}'.format(domain_type, domain, port), epoch_date, root_item) + r_serv_onion.zadd(f'crawler_history_{domain_type}:{domain}:{port}', epoch_date, root_item) def create_item_metadata(item_id, domain, url, port, item_father): r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', item_father) @@ -1498,5 +1577,5 @@ if __name__ == '__main__': #print(get_all_queues_stats()) #res = get_auto_crawler_all_domain() - res = update_auto_crawler_queue() + res = get_all_cookiejar() print(res) diff --git a/bin/lib/d4.py b/bin/lib/d4.py index 20d496ec..c66c0f27 100755 --- a/bin/lib/d4.py +++ b/bin/lib/d4.py @@ -11,7 +11,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import ConfigLoader config_loader = ConfigLoader.ConfigLoader() -r_serv_db = config_loader.get_redis_conn("Kvrocks_DB") +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") r_cache = config_loader.get_redis_conn("Redis_Cache") config_loader = None diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index c212f5ab..d4e9854b 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -185,6 +185,7 @@ def get_obj_id_item_id(parent_type, parent_id): else: return None +# # TODO: # FIXME: TO MIGRATE ?????? def add_map_obj_id_item_id(obj_id, item_id, obj_type): if obj_type == 'twitter_id': r_serv_metadata.hset('map:twitter_id:item_id', obj_id, item_id) diff --git a/bin/lib/objects/CryptoCurrencies.py b/bin/lib/objects/CryptoCurrencies.py index 65dce40c..5ac30f2d 100755 --- a/bin/lib/objects/CryptoCurrencies.py +++ b/bin/lib/objects/CryptoCurrencies.py @@ -9,13 +9,13 @@ from flask import url_for sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader -from lib.objects import abstract_object +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id config_loader = ConfigLoader() config_loader = None -class CryptoCurrency(abstract_object.AbstractObject): +class CryptoCurrency(AbstractSubtypeObject): """ AIL CryptoCurrency Object. (strings) """ @@ -88,29 +88,45 @@ class CryptoCurrency(abstract_object.AbstractObject): obj_attr.add_tag(tag) return obj - ############################################################################ - ############################################################################ - ############################################################################ + def get_meta(self, options=set()): + return self._get_meta() + - def exist_correlation(self): - pass ############################################################################ ############################################################################ -def build_crypto_regex(subtype, search_id): - pass +def get_all_subtypes(): + return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] -def search_by_name(subtype, search_id): - - # # TODO: BUILD regex - obj = CryptoCurrency(subtype, search_id) - if obj.exists(): - return search_id - else: - regex = build_crypto_regex(subtype, search_id) - return abstract_object.search_subtype_obj_by_id('cryptocurrency', subtype, regex) +# def build_crypto_regex(subtype, search_id): +# pass +# +# def search_by_name(subtype, search_id): ################################################## +# +# # # TODO: BUILD regex +# obj = CryptoCurrency(subtype, search_id) +# if obj.exists(): +# return search_id +# else: +# regex = build_crypto_regex(subtype, search_id) +# return abstract_object.search_subtype_obj_by_id('cryptocurrency', subtype, regex) -#if __name__ == '__main__': + + +# by days -> need first/last entry USEFULL FOR DATA RETENTION UI + +def get_all_cryptocurrencies(): + cryptos = {} + for subtype in get_all_subtypes(): + cryptos[subtype] = get_all_cryptocurrencies_by_subtype(subtype) + return cryptos + +def get_all_cryptocurrencies_by_subtype(subtype): + return get_all_id('cryptocurrency', subtype) + +if __name__ == '__main__': + res = get_all_cryptocurrencies() + print(res) diff --git a/bin/lib/objects/Decodeds.py b/bin/lib/objects/Decodeds.py index 8f4efd01..b1cf0b52 100755 --- a/bin/lib/objects/Decodeds.py +++ b/bin/lib/objects/Decodeds.py @@ -3,6 +3,8 @@ import os import sys +import requests +import zipfile from flask import url_for from io import BytesIO @@ -10,11 +12,17 @@ from io import BytesIO sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader from lib.objects.abstract_object import AbstractObject +from lib.item_basic import is_crawled, get_item_domain + +from packages import Date config_loader = ConfigLoader() +r_metadata = config_loader.get_db_conn("Kvrocks_Objects") + r_metadata = config_loader.get_redis_conn("ARDB_Metadata") HASH_DIR = config_loader.get_config_str('Directories', 'hash') baseurl = config_loader.get_config_str("Notifications", "ail_domain") +VT_TOKEN = 'f1a6281c8a533172a45d901435452f67f5e61fd06a83dcc058f3f7b4aab66f5b' config_loader = None @@ -32,17 +40,14 @@ class Decoded(AbstractObject): def __init__(self, id): super(Decoded, self).__init__('decoded', id) + def exists(self): + return r_metadata.exists(f'metadata_hash:{self.id}') + # def get_ail_2_ail_payload(self): # payload = {'raw': self.get_gzip_content(b64=True), # 'compress': 'gzip'} # return payload - def get_sha1(self): - return self.id.split('/')[0] - - def get_file_type(self): - return r_metadata.hget(f'metadata_hash:{self.get_sha1()}', 'estimated_type') - # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ def delete(self): # # TODO: @@ -50,13 +55,14 @@ class Decoded(AbstractObject): def get_link(self, flask_context=False): if flask_context: - url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=value) + url = url_for('correlation.show_correlation', object_type="decoded", correlation_id=self.id) else: url = f'{baseurl}/correlation/show_correlation?object_type={self.type}&correlation_id={self.id}' return url def get_svg_icon(self): - file_type = self.get_file_type() + file_type = self.get_estimated_type() + file_type = file_type.split('/')[0] if file_type == 'application': icon = '\uf15b' elif file_type == 'audio': @@ -85,12 +91,23 @@ class Decoded(AbstractObject): def get_filepath(self, mimetype=None): return os.path.join(os.environ['AIL_HOME'], self.get_rel_path(mimetype=mimetype)) - def get_file_content(self, mimetype=None): + def get_content(self, mimetype=None): filepath = self.get_filepath(mimetype=mimetype) with open(filepath, 'rb') as f: file_content = BytesIO(f.read()) return file_content + def get_zip_content(self): + mimetype = self.get_estimated_type() + zip_content = BytesIO() + with zipfile.ZipFile(zip_content, "w") as zf: + # TODO: Fix password + #zf.setpassword(b"infected") + zf.writestr( self.id, self.get_content().getvalue()) + zip_content.seek(0) + return zip_content + + def get_misp_object(self): obj_attrs = [] obj = MISPObject('file') @@ -99,7 +116,7 @@ class Decoded(AbstractObject): obj_attrs.append( obj.add_attribute('sha1', value=self.id) ) obj_attrs.append( obj.add_attribute('mimetype', value=self.get_estimated_type()) ) - obj_attrs.append( obj.add_attribute('malware-sample', value=self.id, data=self.get_file_content()) ) + obj_attrs.append( obj.add_attribute('malware-sample', value=self.id, data=self.get_content()) ) for obj_attr in obj_attrs: for tag in self.get_tags(): obj_attr.add_tag(tag) @@ -108,24 +125,312 @@ class Decoded(AbstractObject): ############################################################################ ############################################################################ ############################################################################ + def get_decoders(self): + return ['base64', 'binary', 'hexadecimal'] - def exist_correlation(self): + def get_first_seen(self): + res = r_metadata.hget(f'metadata_hash:{self.id}', 'first_seen') + if res: + return int(res) + else: + return 99999999 + + def get_last_seen(self): + res = r_metadata.hget(f'metadata_hash:{self.id}', 'last_seen') + if res: + return int(res) + else: + return 0 + + def set_first_seen(self, date): + r_metadata.hset(f'metadata_hash:{self.id}', 'first_seen', date) + + def set_last_seen(self, date): + r_metadata.hset(f'metadata_hash:{self.id}', 'last_seen', date) + + def update_daterange(self, date): + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if date < first_seen: + self.set_first_seen(date) + if date > last_seen: + self.set_last_seen(date) + + def get_meta(self): pass - def create(self, content, date): + def get_meta_vt(self): + meta = {} + meta['link'] = r_metadata.hget(f'metadata_hash:{self.id}', 'vt_link') + meta['report'] = r_metadata.hget(f'metadata_hash:{self.id}', 'vt_report') + return meta + + def guess_mimetype(self, bytes_content): + return magic.from_buffer(bytes_content, mime=True) + + def _save_meta(self, filepath, mimetype): + # create hash metadata + r_metadata.hset(f'metadata_hash:{self.id}', 'size', os.path.getsize(filepath)) + r_metadata.hset(f'metadata_hash:{self.id}', 'estimated_type', mimetype) + r_metadata.sadd('hash_all_type', mimetype) #################################################### rename ???? + + def save_file(self, content, mimetype): ##################################################### + filepath = self.get_filepath(mimetype=mimetype) + if os.path.isfile(filepath): + #print('File already exist') + return False + # create dir + dirname = os.path.dirname(filepath) + if not os.path.exists(dirname): + os.makedirs(dirname) + with open(filepath, 'wb') as f: + f.write(file_content) + + # create hash metadata + self._save_meta(filepath, mimetype) + return True + + # avoid counting the same hash multiple time on the same item + # except if defferent encoding + + def is_seen_this_day(self, date): + for decoder in get_decoders_names(): + if r_metadata.zscore(f'{decoder_name}_date:{date}', self.id): + return True + return False + + def add(self, decoder_name, date, obj_id, mimetype): + + if not self.is_seen_this_day(date): + # mimetype + r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1) + r_metadata.sadd(f'decoded:mimetypes', mimetype) + + # filter hash encoded in the same object + if not self.is_correlated('item', None, obj_id): + + r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_name}_decoder', 1) + r_metadata.zincrby(f'{decoder_name}_type:{mimetype}', date, 1) + + r_metadata.incrby(f'{decoder_name}_decoded:{date}', 1) + r_metadata.zincrby(f'{decoder_name}_date:{date}', self.id, 1) + + + self.update_daterange(date) + + # First Hash for this decoder this day - Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype) - ####correlation Decoded.save_item_relationship(sha1_string, item_id) - Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name) + # Correlations + self.add_correlation('item', '', obj_id) + # domain + if is_crawled(obj_id): + domain = get_item_domain(obj_id) + self.add_correlation('domain', '', domain) + + + # Filter duplicates ###################################################################### + # Filter on item + hash for this day + + # filter Obj Duplicate + + # first time we see this day + # iterate on all decoder + + + + + ###################################################################### + + # first time we see this hash today + + + # mimetype # # # # # # # # + r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1) + + # create hash metadata + r_metadata.sadd(f'decoded:mimetypes', mimetype) + + + + # # TODO: DUPLICATES + check fields + def add(self, decoder_name, date, obj_id, mimetype): + self.update_daterange(date) + + r_metadata.incrby(f'{decoder_type}_decoded:{date}', 1) + r_metadata.zincrby(f'{decoder_type}_date:{date}', self.id, 1) + + r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_type}_decoder', 1) + r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1 + + ################################################################ # TODO: REMOVE ????????????????????????????????? + r_metadata.zincrby(f'{decoder_type}_hash:{self.id}', obj_id, 1) # number of b64 on this item + + + # first time we see this hash encoding on this item + if not r_metadata.zscore(f'{decoder_type}_hash:{self.id}', obj_id): + + # create hash metadata + r_metadata.sadd(f'hash_{decoder_type}_all_type', mimetype) + + # first time we see this hash encoding today + if not r_metadata.zscore(f'{decoder_type}_date:{date}', self.id): + r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1 + + + # Correlations + self.add_correlation('item', '', obj_id) + # domain + if is_crawled(obj_id): + domain = get_item_domain(obj_id) + self.add_correlation('domain', '', domain) + + + # NB of MIMETYPE / DAY -> ALL HASH OR UNIQ HASH ?????? + + # # TODO: ADD items + def create(self, content, date, mimetype=None): + if not mimetype: + mimetype = self.guess_mimetype(content) + self.save_file(content, mimetype) + + + update_decoded_daterange(sha1_string, date_from) + if date_from != date_to and date_to: + update_decoded_daterange(sha1_string, date_to) + + ####################################################################################### + ####################################################################################### + + ####################################################################################### + ####################################################################################### + + def set_vt_report(self, report): + r_metadata.hset(f'metadata_hash:{self.id}', 'vt_report', report) + + def set_meta_vt(self, link, report): + r_metadata.hset(f'metadata_hash:{self.id}', 'vt_link', link) + self.set_vt_report(report) + + def refresh_vt_report(self): + params = {'apikey': VT_TOKEN, 'resource': self.id} + response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) + if response.status_code == 200: + json_response = response.json() + response_code = json_response['response_code'] + # report exist + if response_code == 1: + total = json_response['total'] + detection = json_response['positives'] + report = f'Detection {detection}/{total}' + # no report found + elif response_code == 0: + report = 'No report found' + # file in queue + elif response_code == -2: + report = 'In Queue - Refresh' + self.set_vt_report(report) + print(json_response) + print(response_code) + print(report) + return report + elif response.status_code == 403: + Flask_config.vt_enabled = False + return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed' + elif response.status_code == 204: + return 'Rate Limited' + + def send_to_vt(self): + files = {'file': (self.id, self.get_content())} + response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params= {'apikey': VT_TOKEN}) + json_response = response.json() + link = json_response['permalink'].split('analysis')[0] + 'analysis/' + self.set_meta_vt(link, 'Please Refresh') + ############################################################################ ############################################################################ +def get_decoders_names(): + return ['base64', 'binary', 'hexadecimal'] +def get_all_mimetypes(): + return r_metadata.smembers(f'decoded:mimetypes') -#if __name__ == '__main__': +def sanityze_decoder_names(decoder_name): + if decoder_name not in Decodeds.get_decoders_names(): + return None + else: + return decoder_name + +def sanityze_mimetype(mimetype): + if mimetype == 'All types': + return None + elif not r_metadata.sismember('hash_all_type', mimetype): + return None + else: + return mimetype + +def pie_chart_mimetype_json(date_from, date_to, mimetype, decoder_name): + if mimetype: + all_mimetypes = [mimetype] + else: + all_mimetypes = get_all_mimetypes() + date_range = Date.substract_date(date_from, date_to) + for date in date_range: + for mimet in all_mimetypes: + pass + +def pie_chart_decoder_json(date_from, date_to, mimetype): + all_decoder = get_decoders_names() + date_range = Date.substract_date(date_from, date_to) + if not date_range: + date_range.append(Date.get_today_date_str()) + nb_decoder = {} + for date in date_range: + for decoder_name in all_decoder: + if not mimetype: + nb = r_metadata.get(f'{decoder_name}_decoded:{date}') + if nb is None: + nb = 0 + else: + nb = int(nb) + else: + nb = r_metadata.zscore(f'{decoder_name}_type:{mimetype}', date) + nb_decoder[decoder_name] = nb_decoder.get(decoder_name, 0) + nb + pie_chart = [] + for decoder_name in all_decoder: + pie_chart.append({'name': decoder_name, 'value': nb_decoder[decoder_name]}) + return pie_chart + +def api_pie_chart_decoder_json(date_from, date_to, mimetype): + mimetype = sanityze_mimetype(mimetype) + date = Date.sanitise_date_range(date_from, date_to) + return pie_chart_decoder_json(date['date_from'], date['date_to'], mimetype) + +def _delete_old_json_descriptor(): + decodeds = [] + hash_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR) + for root, dirs, files in os.walk(hash_dir): + for file in files: + if file.endswith('.json'): + decoded_path = f'{root}/{file}' + os.remove(decoded_path) + return decodeds + +def get_all_decodeds(): + decodeds = [] + hash_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR) + if not hash_dir.endswith("/"): + hash_dir = f"{hash_dir}/" + for root, dirs, files in os.walk(hash_dir): + for file in files: + decoded_path = f'{root}{file}' + decodeds.append(file) + return decodeds + + #if __name__ == '__main__': diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 28d59787..9ea50c23 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -43,7 +43,7 @@ class Domain(AbstractObject): else: return 'regular' - def get_first_seen(selfr_int=False, separator=True): + def get_first_seen(self, r_int=False, separator=True): first_seen = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'first_seen') if first_seen: if separator: @@ -62,12 +62,59 @@ class Domain(AbstractObject): last_check = int(last_check) return last_check - def get_ports(self): + def _set_first_seen(self, date): + r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', date) + + def _set_last_check(self, date): + r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', date) + + def update_daterange(self, date): + first_seen = self.get_first_seen(r_int=True) + last_check = self.get_last_check(r_int=True) + if not first_seen: + self._set_first_seen(date) + self._set_last_check(date) + elif int(first_seen) > date: + self._set_first_seen(date) + elif int(last_check) < date: + self._set_last_check(date) + + def get_last_origin(self): + return r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'paste_parent') + + def set_last_origin(self, origin_id): + r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'paste_parent', origin_id) + + def is_up(self, ports=[]): + if not ports: + ports = self.get_ports() + for port in ports: + res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True) + if res: + item_core, epoch = res[0] + if item_core != str(epoch): + return True + return False + + def get_ports(self, r_set=False): l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports') if l_ports: - return l_ports.split(";") + l_ports = l_ports.split(";") + if r_set: + return set(l_ports) + else: + return l_ports return [] + def _set_ports(self, ports): + ports = ';'.join(ports) + r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'ports', ports) + + def add_ports(self, port): + ports = self.get_ports(r_set=True) + ports.add(port) + self._set_ports(ports) + def get_history_by_port(self, port, status=False, root=False): ''' Return . @@ -94,6 +141,23 @@ class Domain(AbstractObject): history.append(dict_history) return history + def get_languages(self): + return r_onion.smembers(f'domain:language:{self.id}') + + def get_meta(self): + meta = {} + meta['type'] = self.domain_type + meta['first_seen'] = self.get_first_seen() + meta['last_check'] = self.get_last_check() + meta['last_origin'] = self.last_origin() + meta['ports'] = self.get_ports() + meta['status'] = self.is_up(ports=ports) + meta['tags'] = self.get_last_origin() + #meta['is_tags_safe'] = + meta['languages'] = self.get_languages() + #meta['screenshot'] = + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ def delete(self): # # TODO: @@ -179,16 +243,74 @@ class Domain(AbstractObject): obj_attr.add_tag(tag) return obj - ############################################################################ - ############################################################################ - ############################################################################ + def add_language(self, language): + r_onion.sadd('all_domains_languages', language) + r_onion.sadd(f'all_domains_languages:{self.domain_type}', language) + r_onion.sadd(f'language:domains:{self.domain_type}:{language}', self.id) + r_onion.sadd(f'domain:language:{self.id}', language) - def exist_correlation(self): - pass ############################################################################ ############################################################################ + def create(self, first_seen, last_check, ports, status, tags, languages): + + + r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'first_seen', first_seen) + r_onion.hset(f'{self.domain_type}_metadata:{self.id}', 'last_check', last_check) + + for language in languages: + self.add_language(language) + + #### CRAWLER #### + + # add root_item to history + # if domain down -> root_item = epoch + def _add_history_root_item(self, root_item, epoch, port): + # Create/Update crawler history + r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item)) + + # if domain down -> root_item = epoch + def add_history(self, epoch, port, root_item=None): + date = time.strftime('%Y%m%d', time.gmtime(epoch)) + try: + int(root_item) + except ValueError: + root_item = None + # UP + if root_item: + r_onion.sadd(f'full_{self.domain_type}_up', self.id) + r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day + r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month + self._add_history_root_item(root_item, epoch, port) + else: + r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month + self._add_history_root_item(epoch, epoch, port) + + def add_crawled_item(self, url, port, item_id, item_father): + r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father) + r_metadata.hset(f'paste_metadata:{item_id}', 'domain', f'{self.id}:{port}') + r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url) + # add this item_id to his father + r_metadata.sadd(f'paste_children:{item_father}', item_id) + + ##-- CRAWLER --## + + + ############################################################################ + ############################################################################ + +def get_all_domains_types(): + return ['onion', 'regular'] # i2p + +def get_all_domains_languages(): + return r_onion.smembers('all_domains_languages') + +def get_domains_up_by_type(domain_type): + return r_onion.smembers(f'full_{domain_type}_up') + +################################################################################ +################################################################################ #if __name__ == '__main__': diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py index a0dc8ab9..37f58dff 100755 --- a/bin/lib/objects/Items.py +++ b/bin/lib/objects/Items.py @@ -22,19 +22,9 @@ from export.Export import get_ail_uuid # # TODO: REPLACE from lib.objects.abstract_object import AbstractObject from lib.ConfigLoader import ConfigLoader from lib import item_basic -from lib import domain_basic from packages import Tag -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Cryptocurrency -import Pgp - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import Correlate_object -import Decoded -import Screenshot -import Username from flask import url_for @@ -310,6 +300,51 @@ class Item(AbstractObject): ############################################################################ ############################################################################ +def _get_dir_source_name(dir, source_name=None, l_sources_name=set(), filter_dir=False): + if not l_sources_name: + l_sources_name = set() + if source_name: + l_dir = os.listdir(os.path.join(dir, source_name)) + else: + l_dir = os.listdir(dir) + # empty directory + if not l_dir: + return l_sources_name.add(source_name) + else: + for src_name in l_dir: + if len(src_name) == 4: + #try: + int(src_name) + to_add = os.path.join(source_name) + # filter sources, remove first directory + if filter_dir: + to_add = to_add.replace('archive/', '').replace('alerts/', '') + l_sources_name.add(to_add) + return l_sources_name + #except: + # pass + if source_name: + src_name = os.path.join(source_name, src_name) + l_sources_name = _get_dir_source_name(dir, source_name=src_name, l_sources_name=l_sources_name, filter_dir=filter_dir) + return l_sources_name + +def get_items_sources(filter_dir=False, r_list=False): + res = _get_dir_source_name(ITEMS_FOLDER, filter_dir=filter_dir) + if res: + if r_list: + res = list(res) + return res + else: + return [] + +def get_items_by_source(source): + l_items = [] + dir_item = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER, source) + for root, dirs, files in os.walk(dir_item): + for file in files: + item_id = os.path.join(root, file).replace(ITEMS_FOLDER, '', 1) + l_items.append(item_id) + return l_items ################################################################################ ################################################################################ @@ -526,117 +561,6 @@ def api_get_items_sources(): # return {'status': 'error', 'reason': 'Invalid source', 'provide': source}, 400 # return {'status': 'success', 'reason': 'Valid source', 'provide': source}, 200 -### -### correlation -### -def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False): - ''' - Return all cryptocurrencies of a given item. - - :param item_id: item id - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Cryptocurrency.cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) - -def get_item_pgp(item_id, currencies_type=None, get_nb=False): - ''' - Return all pgp of a given item. - - :param item_id: item id - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Pgp.pgp.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb) - -def get_item_username(item_id, sub_type=None, get_nb=False): - ''' - Return all pgp of a given item. - - :param item_id: item id - :param sub_type: list of username type - :type sub_type: list, optional - ''' - return Username.correlation.get_item_correlation_dict(item_id, correlation_type=sub_type, get_nb=get_nb) - -def get_item_decoded(item_id): - ''' - Return all pgp of a given item. - - :param item_id: item id - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Decoded.get_item_decoded(item_id) - -def get_item_all_screenshot(item_id): - ''' - Return all screenshot of a given item. - - :param item_id: item id - ''' - return Screenshot.get_item_screenshot_list(item_id) - -def get_item_all_correlation(item_id, correlation_names=[], get_nb=False): - ''' - Retun all correlation of a given item id. - - :param item_id: item id - :type domain: str - - :return: a dict of all correlation for a item id - :rtype: dict - ''' - if not correlation_names: - correlation_names = Correlate_object.get_all_correlation_names() - item_correl = {} - for correlation_name in correlation_names: - if correlation_name=='cryptocurrency': - res = get_item_cryptocurrency(item_id, get_nb=get_nb) - elif correlation_name=='pgp': - res = get_item_pgp(item_id, get_nb=get_nb) - elif correlation_name=='username': - res = get_item_username(item_id, get_nb=get_nb) - elif correlation_name=='decoded': - res = get_item_decoded(item_id) - elif correlation_name=='screenshot': - res = get_item_all_screenshot(item_id) - else: - res = None - # add correllation to dict - if res: - item_correl[correlation_name] = res - return item_correl - - - -## TODO: REFRACTOR -def _get_item_correlation(correlation_name, correlation_type, item_id): - res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_name, correlation_type, item_id)) - if res: - return list(res) - else: - return [] - -## TODO: REFRACTOR -def get_item_bitcoin(item_id): - return _get_item_correlation('cryptocurrency', 'bitcoin', item_id) - -## TODO: REFRACTOR -def get_item_pgp_key(item_id): - return _get_item_correlation('pgpdump', 'key', item_id) - -## TODO: REFRACTOR -def get_item_pgp_name(item_id): - return _get_item_correlation('pgpdump', 'name', item_id) - -## TODO: REFRACTOR -def get_item_pgp_mail(item_id): - return _get_item_correlation('pgpdump', 'mail', item_id) - -## TODO: REFRACTOR -def get_item_pgp_correlation(item_id): - pass ### ### GET Internal Module DESC @@ -804,60 +728,63 @@ def create_item(obj_id, obj_metadata, io_content): # Item not created return False +# # TODO: def delete_item(obj_id): - # check if item exists - if not exist_item(obj_id): - return False - else: - delete_item_duplicate(obj_id) - # delete MISP event - r_serv_metadata.delete('misp_events:{}'.format(obj_id)) - r_serv_metadata.delete('hive_cases:{}'.format(obj_id)) + pass - os.remove(get_item_filename(obj_id)) - - # get all correlation - obj_correlations = get_item_all_correlation(obj_id) - for correlation in obj_correlations: - if correlation=='cryptocurrency' or correlation=='pgp': - for obj2_subtype in obj_correlations[correlation]: - for obj2_id in obj_correlations[correlation][obj2_subtype]: - Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id, - obj1_subtype=obj2_subtype) - else: - for obj2_id in obj_correlations[correlation]: - Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id) - - # delete father/child - delete_node(obj_id) - - # delete item metadata - r_serv_metadata.delete('paste_metadata:{}'.format(obj_id)) - - return True - - ### TODO in inport V2 - # delete from tracked items - - # # # TODO: # FIXME: LATER - # delete from queue - ### - return False + # # check if item exists + # if not exist_item(obj_id): + # return False + # else: + # delete_item_duplicate(obj_id) + # # delete MISP event + # r_serv_metadata.delete('misp_events:{}'.format(obj_id)) + # r_serv_metadata.delete('hive_cases:{}'.format(obj_id)) + # + # os.remove(get_item_filename(obj_id)) + # + # # get all correlation + # obj_correlations = get_item_all_correlation(obj_id) + # for correlation in obj_correlations: + # if correlation=='cryptocurrency' or correlation=='pgp': + # for obj2_subtype in obj_correlations[correlation]: + # for obj2_id in obj_correlations[correlation][obj2_subtype]: + # Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id, + # obj1_subtype=obj2_subtype) + # else: + # for obj2_id in obj_correlations[correlation]: + # Correlate_object.delete_obj_relationship(correlation, obj2_id, 'item', obj_id) + # + # # delete father/child + # delete_node(obj_id) + # + # # delete item metadata + # r_serv_metadata.delete('paste_metadata:{}'.format(obj_id)) + # + # return True + # + # ### TODO in inport V2 + # # delete from tracked items + # + # # # # TODO: # FIXME: LATER + # # delete from queue + # ### + # return False #### #### -def delete_node(item_id): - if is_node(item_id): - if is_crawled(item_id): - delete_domain_node(item_id) - item_basic._delete_node(item_id) - -def delete_domain_node(item_id): - if is_domain_root(item_id): - # remove from domain history - domain, port = get_item_domain_with_port(item_id).split(':') - domain_basic.delete_domain_item_core(item_id, domain, port) - for child_id in get_all_domain_node_by_item_id(item_id): - delete_item(child_id) +# def delete_node(item_id): +# if is_node(item_id): +# if is_crawled(item_id): +# delete_domain_node(item_id) +# item_basic._delete_node(item_id) +# +# def delete_domain_node(item_id): +# if is_domain_root(item_id): +# # remove from domain history +# domain, port = get_item_domain_with_port(item_id).split(':') +# domain_basic.delete_domain_item_core(item_id, domain, port) +# for child_id in get_all_domain_node_by_item_id(item_id): +# delete_item(child_id) if __name__ == '__main__': diff --git a/bin/lib/objects/Pgps.py b/bin/lib/objects/Pgps.py index 8e0b0fd7..4ae22642 100755 --- a/bin/lib/objects/Pgps.py +++ b/bin/lib/objects/Pgps.py @@ -10,7 +10,7 @@ import redis sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -from abstract_object import AbstractObject +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id from flask import url_for config_loader = ConfigLoader.ConfigLoader() @@ -22,7 +22,7 @@ config_loader = None ################################################################################ ################################################################################ -class Pgp(AbstractObject): +class Pgp(AbstractSubtypeObject): """ AIL Pgp Object. (strings) """ @@ -78,13 +78,18 @@ class Pgp(AbstractObject): ############################################################################ ############################################################################ - ############################################################################ - def exist_correlation(self): - pass +def get_all_subtypes(): + return ['key', 'mail', 'name'] - ############################################################################ - ############################################################################ +def get_all_pgps(): + pgps = {} + for subtype in get_all_subtypes(): + pgps[subtype] = get_all_pgps_by_subtype(subtype) + return pgps + +def get_all_pgps_by_subtype(subtype): + return get_all_id('pgp', subtype) diff --git a/bin/lib/objects/Screenshots.py b/bin/lib/objects/Screenshots.py index 60fe22ea..aaea7c60 100755 --- a/bin/lib/objects/Screenshots.py +++ b/bin/lib/objects/Screenshots.py @@ -5,6 +5,7 @@ import os import sys from io import BytesIO +from flask import url_for sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader @@ -71,6 +72,15 @@ class Screenshot(AbstractObject): obj_attr.add_tag(tag) return obj + def get_meta(self, options=set()): + meta = {} + meta['id'] = self.id + metadata_dict['img'] = get_screenshot_rel_path(sha256_string) ######### # TODO: Rename ME ?????? + meta['tags'] = self.get_tags() + # TODO: ADD IN ABSTRACT CLASS + #meta['is_tags_safe'] = Tag.is_tags_safe(metadata_dict['tags']) ################## # TODO: ADD IN ABSZTRACT CLASS + return meta + ############################################################################ ############################################################################ ############################################################################ @@ -81,6 +91,26 @@ class Screenshot(AbstractObject): ############################################################################ ############################################################################ +def get_screenshot_dir(): + return SCREENSHOT_FOLDER + +# get screenshot relative path +def get_screenshot_rel_path(sha256_str, add_extension=False): + screenshot_path = os.path.join(sha256_str[0:2], sha256_str[2:4], sha256_str[4:6], sha256_str[6:8], sha256_str[8:10], sha256_str[10:12], sha256_str[12:]) + if add_extension: + screenshot_path = f'{screenshot_path}.png' + return screenshot_path + + +def get_all_screenshots(): + screenshots = [] + screenshot_dir = os.path.join(os.environ['AIL_HOME'], SCREENSHOT_FOLDER) + for root, dirs, files in os.walk(screenshot_dir): + for file in files: + screenshot_path = f'{root}{file}' + screenshot_id = screenshot_path.replace(SCREENSHOT_FOLDER, '').replace('/', '')[:-4] + screenshots.append(screenshot_id) + return screenshots #if __name__ == '__main__': diff --git a/bin/lib/objects/Usernames.py b/bin/lib/objects/Usernames.py index 958e3dce..cfcdbf66 100755 --- a/bin/lib/objects/Usernames.py +++ b/bin/lib/objects/Usernames.py @@ -13,7 +13,7 @@ from pymisp import MISPObject sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -from abstract_subtype_object import AbstractSubtypeObject +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id config_loader = ConfigLoader.ConfigLoader() @@ -82,16 +82,20 @@ class Username(AbstractSubtypeObject): obj_attr.add_tag(tag) return obj - - ############################################################################ ############################################################################ ############################################################################ - def exist_correlation(self): - pass +def get_all_subtypes(): + return ['telegram', 'twitter', 'jabber'] - ############################################################################ - ############################################################################ +def get_all_usernames(): + users = {} + for subtype in get_all_subtypes(): + users[subtype] = get_all_usernames_by_subtype(subtype) + return users + +def get_all_usernames_by_subtype(subtype): + return get_all_id('username', subtype) diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index bc880ac7..e295fa62 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from packages import Tag from lib import Duplicate +from lib.correlations_engine import get_correlations, add_obj_correlation, delete_obj_correlation, exists_obj_correlation, is_obj_correlated from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers @@ -156,13 +157,35 @@ class AbstractObject(ABC): def get_misp_object(self): pass - # # TODO: - # @abstractmethod - # def get_correlations(self, message): - # """ - # Get object correlations - # """ - # pass + def get_correlations(self): + """ + Get object correlations + """ + return get_correlations(self.type, self.subtype, self.id) + + def add_correlation(self, type2, subtype2, id2): + """ + Add object correlation + """ + add_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2) + + def exists_correlation(self, type2): + """ + Check if an object is correlated + """ + return exists_obj_correlation(self.type, self.subtype, self.id, type2) + + def is_correlated(self, type2, subtype2, id2): + """ + Check if an object is correlated by another object + """ + return is_obj_correlated(self.type, self.subtype, self.id, type2, subtype2, id2) + + def delete_correlation(self, type2, subtype2, id2): + """ + Get object correlations + """ + delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2) # # TODO: get favicon diff --git a/bin/lib/objects/abstract_subtype_object.py b/bin/lib/objects/abstract_subtype_object.py index 7a86ea33..143b55c8 100755 --- a/bin/lib/objects/abstract_subtype_object.py +++ b/bin/lib/objects/abstract_subtype_object.py @@ -18,6 +18,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from lib.objects.abstract_object import AbstractObject from lib.ConfigLoader import ConfigLoader +from lib.item_basic import is_crawled, get_item_domain # LOAD CONFIG config_loader = ConfigLoader() @@ -43,7 +44,6 @@ class AbstractSubtypeObject(AbstractObject): self.type = obj_type self.subtype = subtype - # # TODO: # FIXME: REMOVE R_INT ???????????????????????????????????????????????????????????????????? def get_first_seen(self, r_int=False): first_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen') if r_int: @@ -54,7 +54,6 @@ class AbstractSubtypeObject(AbstractObject): else: return first_seen - # # TODO: # FIXME: REMOVE R_INT ???????????????????????????????????????????????????????????????????? def get_last_seen(self, r_int=False): last_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen') if r_int: @@ -113,49 +112,38 @@ class AbstractSubtypeObject(AbstractObject): if date > last_seen: self.set_last_seen(date) +# +# HANDLE Others objects ???? +# +# NEW field => first record(last record) +# by subtype ?????? + +# => data Retention + efficicent search +# +# + def add(self, date, item_id): - self.update_correlation_daterange() + self.update_daterange(date) # daily r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1) - # all type + # all subtypes r_metadata.zincrby(f'{self.type}_all:{self.subtype}', self.id, 1) ####################################################################### ####################################################################### - # REPLACE WITH CORRELATION ????? - # global set - r_serv_metadata.sadd(f'set_{self.type}_{self.subtype}:{self.id}', item_id) - - ## object_metadata - # item - r_serv_metadata.sadd(f'item_{self.type}_{self.subtype}:{item_id}', self.id) - - # new correlation - # - # How to filter by correlation type ???? - # - f'correlation:obj:{self.type}:{self.subtype}:{self.id}', f'{obj_type}:{obj_subtype}:{obj_id}' - f'correlation:obj:{self.type}:{self.subtype}:{obj_type}:{self.id}', f'{obj_subtype}:{obj_id}' - - # - # - # - # - # - # - # - # + # Correlations + self.add_correlation('item', '', item_id) + # domain + if is_crawled(item_id): + domain = get_item_domain(item_id) + self.add_correlation('domain', '', domain) - # # domain - # if item_basic.is_crawled(item_id): - # domain = item_basic.get_item_domain(item_id) - # self.save_domain_correlation(domain, subtype, obj_id) - + # TODO:ADD objects + Stats def create(self, first_seen, last_seen): - pass - + self.set_first_seen(first_seen) + self.set_last_seen(last_seen) def _delete(self): @@ -168,16 +156,7 @@ class AbstractSubtypeObject(AbstractObject): # get_metadata # # - # - # - # - # - # - # - # - # - # - # - # - # - # + + +def get_all_id(obj_type, subtype): + return r_metadata.zrange(f'{obj_type}_all:{subtype}', 0, -1) diff --git a/bin/modules/Credential.py b/bin/modules/Credential.py index ca9e0372..5e14fa13 100755 --- a/bin/modules/Credential.py +++ b/bin/modules/Credential.py @@ -73,7 +73,7 @@ class Credential(AbstractModule): # Database config_loader = ConfigLoader.ConfigLoader() - self.server_cred = config_loader.get_redis_conn("ARDB_TermCred") + #self.server_cred = config_loader.get_redis_conn("ARDB_TermCred") self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") # Config values @@ -172,32 +172,33 @@ class Credential(AbstractModule): self.redis_logger.info(to_print) print(f'found {nb_cred} credentials') - # For searching credential in termFreq - for cred in all_credentials: - cred = cred.split('@')[0] #Split to ignore mail address - - # unique number attached to unique path - uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH) - self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path}) - self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()}) - - # unique number attached to unique username - uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred) - if uniq_num_cred is None: - # cred do not exist, create new entries - uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME) - self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) - self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) - - # Add the mapping between the credential and the path - self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) - - # Split credentials on capital letters, numbers, dots and so on - # Add the split to redis, each split point towards its initial credential unique number - splitedCred = re.findall(Credential.REGEX_CRED, cred) - for partCred in splitedCred: - if len(partCred) > self.minimumLengthThreshold: - self.server_cred.sadd(partCred, uniq_num_cred) + # # TODO: # FIXME: TEMP DESABLE + # # For searching credential in termFreq + # for cred in all_credentials: + # cred = cred.split('@')[0] #Split to ignore mail address + # + # # unique number attached to unique path + # uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH) + # self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path}) + # self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()}) + # + # # unique number attached to unique username + # uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred) + # if uniq_num_cred is None: + # # cred do not exist, create new entries + # uniq_num_cred = self.server_cred.incr(Credential.REDIS_KEY_NUM_USERNAME) + # self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET, {cred: uniq_num_cred}) + # self.server_cred.hmset(Credential.REDIS_KEY_ALL_CRED_SET_REV, {uniq_num_cred: cred}) + # + # # Add the mapping between the credential and the path + # self.server_cred.sadd(Credential.REDIS_KEY_MAP_CRED_TO_PATH+'_'+str(uniq_num_cred), uniq_num_path) + # + # # Split credentials on capital letters, numbers, dots and so on + # # Add the split to redis, each split point towards its initial credential unique number + # splitedCred = re.findall(Credential.REGEX_CRED, cred) + # for partCred in splitedCred: + # if len(partCred) > self.minimumLengthThreshold: + # self.server_cred.sadd(partCred, uniq_num_cred) if __name__ == '__main__': diff --git a/bin/modules/Decoder.py b/bin/modules/Decoder.py index 629c8914..d818072e 100755 --- a/bin/modules/Decoder.py +++ b/bin/modules/Decoder.py @@ -11,7 +11,6 @@ ################################## import time import os -import redis import base64 from hashlib import sha1 import magic @@ -26,10 +25,19 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from modules.abstract_module import AbstractModule -from Helper import Process -from packages import Item -from lib import ConfigLoader -from lib import Decoded +from lib.ConfigLoader import ConfigLoader +from lib.objects.Items import Item +from lib.objects import Decodeds + +config_loader = ConfigLoader() +serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +hex_max_execution_time = config_loader.get_config_int("Hex", "max_execution_time") +binary_max_execution_time = config_loader.get_config_int("Binary", "max_execution_time") +base64_max_execution_time = config_loader.get_config_int("Base64", "max_execution_time") +config_loader = None + +##################################################### +##################################################### # # TODO: use regex_helper class TimeoutException(Exception): @@ -38,9 +46,15 @@ class TimeoutException(Exception): def timeout_handler(signum, frame): raise TimeoutException + +# # TODO: # FIXME: Remove signal -> replace with regex_helper signal.signal(signal.SIGALRM, timeout_handler) +##################################################### +#################################################### + + class Decoder(AbstractModule): """ Decoder module for AIL framework @@ -65,8 +79,6 @@ class Decoder(AbstractModule): def __init__(self): super(Decoder, self).__init__() - serv_metadata = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Metadata") - regex_binary = '[0-1]{40,}' #regex_hex = '(0[xX])?[A-Fa-f0-9]{40,}' regex_hex = '[A-Fa-f0-9]{40,}' @@ -79,10 +91,6 @@ class Decoder(AbstractModule): # map decoder function self.decoder_function = {'binary':self.binary_decoder,'hexadecimal':self.hex_decoder, 'base64':self.base64_decoder} - hex_max_execution_time = self.process.config.getint("Hex", "max_execution_time") - binary_max_execution_time = self.process.config.getint("Binary", "max_execution_time") - base64_max_execution_time = self.process.config.getint("Base64", "max_execution_time") - # list all decoder with regex, decoder_binary = {'name': 'binary', 'regex': cmp_regex_binary, 'encoded_min_size': 300, 'max_execution_time': binary_max_execution_time} decoder_hexadecimal = {'name': 'hexadecimal', 'regex': cmp_regex_hex, 'encoded_min_size': 300, 'max_execution_time': hex_max_execution_time} @@ -102,11 +110,9 @@ class Decoder(AbstractModule): def compute(self, message): - obj_id = Item.get_item_id(message) - - # Extract info from message - content = Item.get_item_content(obj_id) - date = Item.get_item_date(obj_id) + item = Item(message) + content = item.get_content() + date = item.get_date() for decoder in self.decoder_order: # add threshold and size limit # max execution time on regex @@ -117,16 +123,16 @@ class Decoder(AbstractModule): except TimeoutException: encoded_list = [] self.process.incr_module_timeout_statistic() # add encoder type - self.redis_logger.debug(f"{obj_id} processing timeout") + self.redis_logger.debug(f"{item.id} processing timeout") continue else: signal.alarm(0) if(len(encoded_list) > 0): - content = self.decode_string(content, message, date, encoded_list, decoder['name'], decoder['encoded_min_size']) + content = self.decode_string(content, item.id, date, encoded_list, decoder['name'], decoder['encoded_min_size']) - def decode_string(self, content, item_id, item_date, encoded_list, decoder_name, encoded_min_size): + def decode_string(self, content, item_id, date, encoded_list, decoder_name, encoded_min_size): find = False for encoded in encoded_list: if len(encoded) >= encoded_min_size: @@ -134,16 +140,18 @@ class Decoder(AbstractModule): find = True sha1_string = sha1(decoded_file).hexdigest() - mimetype = Decoded.get_file_mimetype(decoded_file) + decoded = Decoded(sha1_string) + + mimetype = decoded.guess_mimetype(decoded_file) if not mimetype: - self.redis_logger.debug(item_id) - self.redis_logger.debug(sha1_string) print(item_id) print(sha1_string) - raise Exception('Invalid mimetype') - Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype) - Decoded.save_item_relationship(sha1_string, item_id) - Decoded.create_decoder_matadata(sha1_string, item_id, decoder_name) + raise Exception(f'Invalid mimetype: {sha1_string} {item_id}') + + decoded.create(content, date) + decoded.add(decoder_name, date, item_id, mimetype) + + save_item_relationship(sha1_string, item_id) ################################ #remove encoded from item content content = content.replace(encoded, '', 1) @@ -151,24 +159,18 @@ class Decoder(AbstractModule): self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}') print(f'{item_id} : {decoder_name} - {mimetype}') if(find): - self.set_out_item(decoder_name, item_id) + self.redis_logger.info(f'{decoder_name} decoded') + print(f'{decoder_name} decoded') + # Send to Tags + msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}' + self.send_message_to_queue(msg, 'Tags') + + # perf: remove encoded from item content return content - - def set_out_item(self, decoder_name, item_id): - - self.redis_logger.info(f'{decoder_name} decoded') - print(f'{decoder_name} decoded') - - # Send to duplicate - self.send_message_to_queue(item_id, 'Duplicate') - - # Send to Tags - msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}' - self.send_message_to_queue(msg, 'Tags') - if __name__ == '__main__': + # # TODO: TEST ME module = Decoder() module.run() diff --git a/bin/modules/Global.py b/bin/modules/Global.py index 1fb237ac..503b42cb 100755 --- a/bin/modules/Global.py +++ b/bin/modules/Global.py @@ -208,7 +208,7 @@ class Global(AbstractModule): return curr_file_content - # # TODO: add stats incomplete_file/Not a gzipped file + # # TODO: add stats incomplete_file/Not a gzipped file def gunzip_bytes_obj(self, filename, bytes_obj): gunzipped_bytes_obj = None try: diff --git a/bin/modules/Telegram.py b/bin/modules/Telegram.py index 0973eb14..a44d14e6 100755 --- a/bin/modules/Telegram.py +++ b/bin/modules/Telegram.py @@ -50,6 +50,8 @@ class Telegram(AbstractModule): # extract telegram links telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content) for telegram_link_tuple in telegram_links: + print(telegram_link_tuple) + print(telegram_link_tuple[2:-2].split("', '", 1)) base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1) dict_url = telegram.get_data_from_telegram_url(base_url, url_path) if dict_url.get('username'): diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 99e984dc..33047e92 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -153,6 +153,12 @@ def sanitise_date_range(date_from, date_to, separator='', date_type='str'): date_to = date_from if date_type=='str': + # remove separators + if len(date_from) == 10: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + if len(date_to) == 10: + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + if not validate_str_date(date_from, separator=separator): date_from = datetime.date.today().strftime("%Y%m%d") if not validate_str_date(date_to, separator=separator): diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index b368f6a8..aef242e7 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -18,6 +18,7 @@ from pymispgalaxies import Galaxies, Clusters config_loader = ConfigLoader.ConfigLoader() r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") +#r_serv_tags = config_loader.get_db_conn("Kvrocks_Tags") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None @@ -89,7 +90,7 @@ def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update return r_serv_tags.smembers('active_galaxies_tags') -def get_all_custom_tags(): +def get_all_custom_tags(): # # TODO: add + REMOVE + Update return r_serv_tags.smembers('tags:custom') def get_taxonomies_enabled_tags(r_list=False): diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 6e9b9b8e..5bf9bf91 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -25,7 +25,7 @@ import Date import Item config_loader = ConfigLoader.ConfigLoader() -r_serv_term = config_loader.get_redis_conn("Kvrocks_DB") +r_serv_term = config_loader.get_db_conn("Kvrocks_DB") config_loader = None email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 9450bab5..7f1f5249 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -52,6 +52,8 @@ from blueprints.hunters import hunters from blueprints.old_endpoints import old_endpoints from blueprints.ail_2_ail_sync import ail_2_ail_sync from blueprints.settings_b import settings_b +from blueprints.objects_decoded import objects_decoded +from blueprints.objects_range import objects_range Flask_dir = os.environ['AIL_FLASK'] @@ -113,6 +115,8 @@ app.register_blueprint(hunters, url_prefix=baseUrl) app.register_blueprint(old_endpoints, url_prefix=baseUrl) app.register_blueprint(ail_2_ail_sync, url_prefix=baseUrl) app.register_blueprint(settings_b, url_prefix=baseUrl) +app.register_blueprint(objects_decoded, url_prefix=baseUrl) +app.register_blueprint(objects_range, url_prefix=baseUrl) # ========= =========# # ========= Cookie name ======== diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index 2fc82c4a..08fe0a9c 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -19,6 +19,17 @@ import Flask_config # Import Role_Manager from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects import ail_objects + + +################################################################################ + + sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Correlate_object import Domain @@ -244,22 +255,22 @@ def get_description(): @login_required @login_read_only def graph_node_json(): - correlation_id = request.args.get('correlation_id') - type_id = request.args.get('type_id') - object_type = request.args.get('object_type') + obj_id = request.args.get('correlation_id') #######################3 + subtype = request.args.get('type_id') ####################### + obj_type = request.args.get('object_type') ####################### max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) correlation_names = sanitise_correlation_names(request.args.get('correlation_names')) correlation_objects = sanitise_correlation_objects(request.args.get('correlation_objects')) - # # TODO: remove me, rename screenshot to image - if object_type == 'image': - object_type == 'screenshot' + # # TODO: remove me, rename screenshot + if obj_type == 'image': + obj_type == 'screenshot' - mode = sanitise_graph_mode(request.args.get('mode')) - - res = Correlate_object.get_graph_node_object_correlation(object_type, correlation_id, mode, correlation_names, correlation_objects, requested_correl_type=type_id, max_nodes=max_nodes) - return jsonify(res) + filter_types = correlation_names + correlation_objects + json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=2, flask_context=True) + #json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes) + return jsonify(json_graph) @correlation.route('/correlation/subtype_search', methods=['POST']) @login_required diff --git a/var/www/blueprints/objects_decoded.py b/var/www/blueprints/objects_decoded.py new file mode 100644 index 00000000..0a7c041e --- /dev/null +++ b/var/www/blueprints/objects_decoded.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects import Decodeds + + +# ============ BLUEPRINT ============ +objects_decoded = Blueprint('objects_decoded', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/decoded')) + +# ============ VARIABLES ============ + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== + +# # TODO: # FIXME: CHECK IF OBJ EXIST + +@objects_decoded.route("/object/decoded/download") #completely shows the paste in a new tab +@login_required +@login_read_only +def decoded_download(): + obj_id = request.args.get('id') + + # # TODO: sanitize hash + obj_id = obj_id.split('/')[0] + decoded = Decodeds.Decoded(obj_id) + if decoded.exists(): + filename = f'{decoded.id}.zip' + zip_content = decoded.get_zip_content() + return send_file(zip_content, attachment_filename=filename, as_attachment=True) + else: + abort(404) + +@objects_decoded.route("/object/decoded/send_to_vt") #completely shows the paste in a new tab +@login_required +@login_read_only +def send_to_vt(): + obj_id = request.args.get('id') + + # # TODO: sanitize hash + obj_id = obj_id.split('/')[0] + decoded = Decodeds.Decoded(obj_id) + if decoded.exists(): + decoded.send_to_vt() + return jsonify(decoded.get_meta_vt()) + else: + abort(404) + +@objects_decoded.route("/object/decoded/refresh_vt_report") #completely shows the paste in a new tab +@login_required +@login_read_only +def refresh_vt_report(): + obj_id = request.args.get('id') + + # # TODO: sanitize hash + obj_id = obj_id.split('/')[0] + decoded = Decodeds.Decoded(obj_id) + if decoded.exists(): + report = decoded.refresh_vt_report() + return jsonify(hash=decoded.id, report=report) + else: + abort(404) + +@objects_decoded.route("/object/decoded/decoder_pie_chart_json", methods=['GET']) +@login_required +@login_read_only +def decoder_pie_chart_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + mimetype = request.args.get('type') + return jsonify(Decodeds.api_pie_chart_decoder_json(date_from, date_to, mimetype)) + + + + + + +#####################################################3 diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index 1470809b..6463d033 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -12,7 +12,6 @@ import json from Date import Date from io import BytesIO -import zipfile from hashlib import sha256 @@ -208,23 +207,6 @@ def get_all_keys_id_from_item(correlation_type, item_path): all_keys_id_dump.add( (key_id, type_id) ) return all_keys_id_dump -def one(): - return 1 - -''' -def decode_base58(bc, length): - n = 0 - for char in bc: - n = n * 58 + digits58.index(char) - return n.to_bytes(length, 'big') - -def check_bc(bc): - try: - bcbytes = decode_base58(bc, 25) - return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4] - except Exception: - return False -''' def get_correlation_type_search_endpoint(correlation_type): if correlation_type == 'pgpdump': @@ -262,37 +244,6 @@ def get_range_type_json_endpoint(correlation_type): endpoint = 'hashDecoded.hashDecoded_page' return endpoint -def get_graph_node_json_endpoint(correlation_type): - if correlation_type == 'pgpdump': - endpoint = 'hashDecoded.pgpdump_graph_node_json' - elif correlation_type == 'cryptocurrency': - endpoint = 'hashDecoded.cryptocurrency_graph_node_json' - elif correlation_type == 'username': - endpoint = 'hashDecoded.username_graph_node_json' - else: - endpoint = 'hashDecoded.hashDecoded_page' - return endpoint - -def get_graph_line_json_endpoint(correlation_type): - if correlation_type == 'pgpdump': - endpoint = 'hashDecoded.pgpdump_graph_line_json' - elif correlation_type == 'cryptocurrency': - endpoint = 'hashDecoded.cryptocurrency_graph_line_json' - elif correlation_type == 'username': - endpoint = 'hashDecoded.username_graph_line_json' - else: - endpoint = 'hashDecoded.hashDecoded_page' - return endpoint - -def get_font_family(correlation_type): - if correlation_type == 'pgpdump': - font = 'fa' - elif correlation_type == 'cryptocurrency': - font = 'fab' - else: - font = 'fa' - return font - ############ CORE CORRELATION ############ def main_correlation_page(correlation_type, type_id, date_from, date_to, show_decoded_files): @@ -392,27 +343,7 @@ def main_correlation_page(correlation_type, type_id, date_from, date_to, show_de date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files) -# def show_correlation(correlation_type, type_id, key_id): -# if is_valid_type_id(correlation_type, type_id): -# key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id) -# if key_id_metadata: -# -# num_day_sparkline = 6 -# date_range_sparkline = get_date_range(num_day_sparkline) -# -# sparkline_values = list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id) -# return render_template('showCorrelation.html', key_id=key_id, type_id=type_id, -# correlation_type=correlation_type, -# graph_node_endpoint=get_graph_node_json_endpoint(correlation_type), -# graph_line_endpoint=get_graph_line_json_endpoint(correlation_type), -# font_family=get_font_family(correlation_type), -# key_id_metadata=key_id_metadata, -# type_icon=get_icon(correlation_type, type_id), -# sparkline_values=sparkline_values) -# else: -# return '404' -# else: -# return 'error' + def correlation_type_range_type_json(correlation_type, date_from, date_to): date_range = [] @@ -463,46 +394,6 @@ def correlation_type_range_type_json(correlation_type, date_from, date_to): return jsonify(range_type) -def correlation_graph_node_json(correlation_type, type_id, key_id): - if key_id is not None and is_valid_type_id(correlation_type, type_id): - - nodes_set_dump = set() - nodes_set_paste = set() - links_set = set() - - key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id) - - nodes_set_dump.add((key_id, 1, type_id, key_id_metadata['first_seen'], key_id_metadata['last_seen'], key_id_metadata['nb_seen'])) - - #get related paste - l_pastes = r_serv_metadata.smembers('set_{}_{}:{}'.format(correlation_type, type_id, key_id)) - for paste in l_pastes: - nodes_set_paste.add((paste, 2)) - links_set.add((key_id, paste)) - - for key_id_with_type_id in get_all_keys_id_from_item(correlation_type, paste): - new_key_id, typ_id = key_id_with_type_id - if new_key_id != key_id: - - key_id_metadata = get_key_id_metadata(correlation_type, typ_id, new_key_id) - - nodes_set_dump.add((new_key_id, 3, typ_id, key_id_metadata['first_seen'], key_id_metadata['last_seen'], key_id_metadata['nb_seen'])) - links_set.add((new_key_id, paste)) - - nodes = [] - for node in nodes_set_dump: - nodes.append({"id": node[0], "group": node[1], "first_seen": node[3], "last_seen": node[4], "nb_seen_in_paste": node[5], 'icon': get_icon_text(correlation_type, node[2]),"url": url_for(get_show_key_id_endpoint(correlation_type), type_id=node[2], key_id=node[0]), 'hash': True}) - for node in nodes_set_paste: - nodes.append({"id": node[0], "group": node[1],"url": url_for('objects_item.showItem', id=node[0]), 'hash': False}) - links = [] - for link in links_set: - links.append({"source": link[0], "target": link[1]}) - json = {"nodes": nodes, "links": links} - return jsonify(json) - - else: - return jsonify({}) - # ============= ROUTES ============== @hashDecoded.route("/hashDecoded/all_hash_search", methods=['POST']) @login_required @@ -634,113 +525,8 @@ def hashDecoded_page(): encoding=encoding, all_encoding=all_encoding, date_from=date_from, date_to=date_to, show_decoded_files=show_decoded_files) -@hashDecoded.route('/hashDecoded/hash_by_type') -@login_required -@login_read_only -def hash_by_type(): - type = request.args.get('type') - type = 'text/plain' - return render_template('hash_type.html',type = type) -@hashDecoded.route('/hashDecoded/hash_hash') -@login_required -@login_read_only -def hash_hash(): - hash = request.args.get('hash') - return render_template('hash_hash.html') - -# -# @hashDecoded.route('/hashDecoded/showHash') -# @login_required -# @login_analyst -# def showHash(): -# hash = request.args.get('hash') -# #hash = 'e02055d3efaad5d656345f6a8b1b6be4fe8cb5ea' -# -# # TODO FIXME show error -# if hash is None: -# return hashDecoded_page() -# -# estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') -# # hash not found -# # TODO FIXME show error -# if estimated_type is None: -# return hashDecoded_page() -# -# else: -# file_icon = get_file_icon(estimated_type) -# size = r_serv_metadata.hget('metadata_hash:'+hash, 'size') -# first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen') -# last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen') -# nb_seen_in_all_pastes = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes') -# -# # get all encoding for this hash -# list_hash_decoder = [] -# list_decoder = r_serv_metadata.smembers('all_decoder') -# for decoder in list_decoder: -# encoding = r_serv_metadata.hget('metadata_hash:'+hash, decoder+'_decoder') -# if encoding is not None: -# list_hash_decoder.append({'encoding': decoder, 'nb_seen': encoding}) -# -# num_day_type = 6 -# date_range_sparkline = get_date_range(num_day_type) -# sparkline_values = list_sparkline_values(date_range_sparkline, hash) -# -# if r_serv_metadata.hexists('metadata_hash:'+hash, 'vt_link'): -# b64_vt = True -# b64_vt_link = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_link') -# b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') -# else: -# b64_vt = False -# b64_vt_link = '' -# b64_vt_report = r_serv_metadata.hget('metadata_hash:'+hash, 'vt_report') -# # hash never refreshed -# if b64_vt_report is None: -# b64_vt_report = '' -# -# return render_template('showHash.html', hash=hash, vt_enabled=vt_enabled, b64_vt=b64_vt, b64_vt_link=b64_vt_link, -# b64_vt_report=b64_vt_report, -# size=size, estimated_type=estimated_type, file_icon=file_icon, -# first_seen=first_seen, list_hash_decoder=list_hash_decoder, -# last_seen=last_seen, nb_seen_in_all_pastes=nb_seen_in_all_pastes, sparkline_values=sparkline_values) - - -@hashDecoded.route('/hashDecoded/downloadHash') -@login_required -@login_read_only -def downloadHash(): - hash = request.args.get('hash') - # sanitize hash - hash = hash.split('/')[0] - - # hash exist - if r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') is not None: - - b64_full_path = Decoded.get_decoded_filepath(hash) - hash_content = '' - try: - with open(b64_full_path, 'rb') as f: - hash_content = f.read() - - # zip buffer - result = BytesIO() - temp = BytesIO() - temp.write(hash_content) - - with zipfile.ZipFile(result, "w") as zf: - #zf.setpassword(b"infected") - zf.writestr( hash, temp.getvalue()) - - filename = hash + '.zip' - result.seek(0) - - return send_file(result, attachment_filename=filename, as_attachment=True) - except Exception as e: - print(e) - return 'Server Error' - else: - return 'hash: ' + hash + " don't exist" @hashDecoded.route('/hashDecoded/hash_by_type_json') @@ -778,62 +564,6 @@ def hash_by_type_json(): return jsonify() -@hashDecoded.route('/hashDecoded/decoder_type_json') -@login_required -@login_read_only -def decoder_type_json(): - date_from = request.args.get('date_from') - date_to = request.args.get('date_to') - - typ = request.args.get('type') - - if typ == 'All types': - typ = None - - # verify file type input - if typ is not None: - #retrieve + char - typ = typ.replace(' ', '+') - if typ not in r_serv_metadata.smembers('hash_all_type'): - typ = None - - all_decoder = r_serv_metadata.smembers('all_decoder') - # sort DESC decoder for color - all_decoder = sorted(all_decoder) - - date_range = [] - if date_from is not None and date_to is not None: - #change format - try: - if len(date_from) != 8: - date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] - date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] - date_range = substract_date(date_from, date_to) - except: - pass - - if not date_range: - date_range.append(datetime.date.today().strftime("%Y%m%d")) - - nb_decoded = {} - for decoder in all_decoder: - nb_decoded[decoder] = 0 - - for date in date_range: - for decoder in all_decoder: - if typ is None: - nb_decod = r_serv_metadata.get(decoder+'_decoded:'+date) - else: - nb_decod = r_serv_metadata.zscore(decoder+'_type:'+typ, date) - - if nb_decod is not None: - nb_decoded[decoder] = nb_decoded[decoder] + int(nb_decod) - - to_json = [] - for decoder in all_decoder: - to_json.append({'name': decoder, 'value': nb_decoded[decoder]}) - return jsonify(to_json) - @hashDecoded.route('/hashDecoded/top5_type_json') @login_required @@ -881,7 +611,7 @@ def top5_type_json(): for date in date_range: for typ in all_type: for decoder in all_decoder: - nb_decoded = r_serv_metadata.zscore('{}_type:{}'.format(decoder, typ), date) + nb_decoded = r_serv_metadata.zscore(f'{decoder}_type:{typ}', date) # daily_type key:date mimetype 3 if nb_decoded is not None: if typ in nb_types_decoded: nb_types_decoded[typ] = nb_types_decoded[typ] + int(nb_decoded) @@ -1005,145 +735,6 @@ def hash_graph_line_json(): else: return jsonify() - -@hashDecoded.route('/hashDecoded/hash_graph_node_json') -@login_required -@login_read_only -def hash_graph_node_json(): - hash = request.args.get('hash') - - estimated_type = r_serv_metadata.hget('metadata_hash:'+hash, 'estimated_type') - - if hash is not None and estimated_type is not None: - - nodes_set_hash = set() - nodes_set_paste = set() - links_set = set() - - url = hash - first_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'first_seen') - last_seen = r_serv_metadata.hget('metadata_hash:'+hash, 'last_seen') - nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+hash, 'nb_seen_in_all_pastes') - size = r_serv_metadata.hget('metadata_hash:'+hash, 'size') - - nodes_set_hash.add((hash, 1, first_seen, last_seen, estimated_type, nb_seen_in_paste, size, url)) - - #get related paste - l_pastes = r_serv_metadata.zrange('nb_seen_hash:'+hash, 0, -1) - for paste in l_pastes: - # dynamic update - if PASTES_FOLDER in paste: - score = r_serv_metadata.zscore('nb_seen_hash:{}'.format(hash), paste) - r_serv_metadata.zrem('nb_seen_hash:{}'.format(hash), paste) - paste = paste.replace(PASTES_FOLDER, '', 1) - r_serv_metadata.zadd('nb_seen_hash:{}'.format(hash), score, paste) - url = paste - #nb_seen_in_this_paste = nb_in_file = int(r_serv_metadata.zscore('nb_seen_hash:'+hash, paste)) - nb_hash_in_paste = r_serv_metadata.scard('hash_paste:'+paste) - - nodes_set_paste.add((paste, 2,nb_hash_in_paste,url)) - links_set.add((hash, paste)) - - l_hash = r_serv_metadata.smembers('hash_paste:'+paste) - for child_hash in l_hash: - if child_hash != hash: - url = child_hash - first_seen = r_serv_metadata.hget('metadata_hash:'+child_hash, 'first_seen') - last_seen = r_serv_metadata.hget('metadata_hash:'+child_hash, 'last_seen') - nb_seen_in_paste = r_serv_metadata.hget('metadata_hash:'+child_hash, 'nb_seen_in_all_pastes') - size = r_serv_metadata.hget('metadata_hash:'+child_hash, 'size') - estimated_type = r_serv_metadata.hget('metadata_hash:'+child_hash, 'estimated_type') - - nodes_set_hash.add((child_hash, 3, first_seen, last_seen, estimated_type, nb_seen_in_paste, size, url)) - links_set.add((child_hash, paste)) - - #l_pastes_child = r_serv_metadata.zrange('nb_seen_hash:'+child_hash, 0, -1) - #for child_paste in l_pastes_child: - - nodes = [] - for node in nodes_set_hash: - nodes.append({"id": node[0], "group": node[1], "first_seen": node[2], "last_seen": node[3], 'estimated_type': node[4], "nb_seen_in_paste": node[5], "size": node[6], 'icon': get_file_icon_text(node[4]),"url": url_for('hashDecoded.showHash', hash=node[7]), 'hash': True}) - for node in nodes_set_paste: - nodes.append({"id": node[0], "group": node[1], "nb_seen_in_paste": node[2],"url": url_for('objects_item.showItem', id=node[3]), 'hash': False}) - links = [] - for link in links_set: - links.append({"source": link[0], "target": link[1]}) - json = {"nodes": nodes, "links": links} - return jsonify(json) - - else: - return jsonify({}) - - -@hashDecoded.route('/hashDecoded/hash_types') -@login_required -@login_read_only -def hash_types(): - date_from = 20180701 - date_to = 20180706 - return render_template('hash_types.html', date_from=date_from, date_to=date_to) - - -@hashDecoded.route('/hashDecoded/send_file_to_vt_js') -@login_required -@login_analyst -def send_file_to_vt_js(): - hash = request.args.get('hash') - - b64_full_path = Decoded.get_decoded_filepath(hash) - b64_content = '' - with open(b64_full_path, 'rb') as f: - b64_content = f.read() - - files = {'file': (hash, b64_content)} - response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params= {'apikey': vt_auth}) - json_response = response.json() - #print(json_response) - - vt_link = json_response['permalink'].split('analysis')[0] + 'analysis/' - r_serv_metadata.hset('metadata_hash:'+hash, 'vt_link', vt_link) - vt_report = 'Please Refresh' - r_serv_metadata.hset('metadata_hash:'+hash, 'vt_report', vt_report) - - return jsonify({'vt_link': vt_link, 'vt_report': vt_report}) - - -@hashDecoded.route('/hashDecoded/update_vt_result') -@login_required -@login_analyst -def update_vt_result(): - hash = request.args.get('hash') - - params = {'apikey': vt_auth, 'resource': hash} - response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params) - if response.status_code == 200: - json_response = response.json() - response_code = json_response['response_code'] - # report exist - if response_code == 1: - total = json_response['total'] - positive = json_response['positives'] - - b64_vt_report = 'Detection {}/{}'.format(positive, total) - # no report found - elif response_code == 0: - b64_vt_report = 'No report found' - pass - # file in queue - elif response_code == -2: - b64_vt_report = 'File in queue' - pass - - r_serv_metadata.hset('metadata_hash:'+hash, 'vt_report', b64_vt_report) - return jsonify(hash=hash, report_vt=b64_vt_report) - elif response.status_code == 403: - Flask_config.vt_enabled = False - print('Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed or reaching limits.') - return jsonify() - else: - # TODO FIXME make json response - return jsonify() - ############################ PGPDump ############################ @hashDecoded.route('/decoded/pgp_by_type_json') ## TODO: REFRACTOR @@ -1191,7 +782,7 @@ def pgp_by_type_json(): else: return jsonify() -############################ Correlation ############################ +############################ DateRange ############################ @hashDecoded.route("/correlation/pgpdump", methods=['GET']) @login_required @login_read_only @@ -1258,22 +849,10 @@ def all_username_search(): show_decoded_files = request.form.get('show_decoded_files') return redirect(url_for('hashDecoded.username_page', date_from=date_from, date_to=date_to, type_id=type_id, show_decoded_files=show_decoded_files)) -# @hashDecoded.route('/correlation/show_pgpdump') -# @login_required -# @login_analyst -# def show_pgpdump(): -# type_id = request.args.get('type_id') -# key_id = request.args.get('key_id') -# return show_correlation('pgpdump', type_id, key_id) -# -# -# @hashDecoded.route('/correlation/show_cryptocurrency') -# @login_required -# @login_analyst -# def show_cryptocurrency(): -# type_id = request.args.get('type_id') -# key_id = request.args.get('key_id') -# return show_correlation('cryptocurrency', type_id, key_id) + + + + @hashDecoded.route('/correlation/cryptocurrency_range_type_json') @login_required @@ -1299,30 +878,16 @@ def username_range_type_json(): date_to = request.args.get('date_to') return correlation_type_range_type_json('username', date_from, date_to) -@hashDecoded.route('/correlation/pgpdump_graph_node_json') -@login_required -@login_read_only -def pgpdump_graph_node_json(): - type_id = request.args.get('type_id') - key_id = request.args.get('key_id') - return correlation_graph_node_json('pgpdump', type_id, key_id) +########################################################################################## +########################################################################################## +########################################################################################## +########################################################################################## +########################################################################################## +########################################################################################## +########################################################################################## +########################################################################################## +########################################################################################## -# # TODO: REFRACTOR -@hashDecoded.route('/correlation/cryptocurrency_graph_node_json') -@login_required -@login_read_only -def cryptocurrency_graph_node_json(): - type_id = request.args.get('type_id') - key_id = request.args.get('key_id') - return correlation_graph_node_json('cryptocurrency', type_id, key_id) - -@hashDecoded.route('/correlation/username_graph_node_json') -@login_required -@login_read_only -def username_graph_node_json(): - type_id = request.args.get('type_id') - key_id = request.args.get('key_id') - return correlation_graph_node_json('username', type_id, key_id) # # TODO: REFRACTOR @hashDecoded.route('/correlation/pgpdump_graph_line_json') diff --git a/var/www/modules/hashDecoded/templates/hashDecoded.html b/var/www/modules/hashDecoded/templates/hashDecoded.html index ef52ee7f..c7a25abe 100644 --- a/var/www/modules/hashDecoded/templates/hashDecoded.html +++ b/var/www/modules/hashDecoded/templates/hashDecoded.html @@ -253,7 +253,7 @@ chart.stackBarChart = barchart_type_stack("{{ url_for('hashDecoded.range_type_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id'); {% endif %} - draw_pie_chart("pie_chart_encoded" ,"{{ url_for('hashDecoded.decoder_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding="); + draw_pie_chart("pie_chart_encoded" ,"{{ url_for('objects_decoded.decoder_pie_chart_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}&encoding="); draw_pie_chart("pie_chart_top5_types" ,"{{ url_for('hashDecoded.top5_type_json') }}?date_from={{date_from}}&date_to={{date_to}}&type={{type}}", "{{ url_for('hashDecoded.hashDecoded_page') }}?date_from={{date_from}}&date_to={{date_to}}&type="); chart.onResize(); @@ -280,18 +280,18 @@ function toggle_sidebar(){