diff --git a/OVERVIEW.md b/OVERVIEW.md index 38488a74..b84f6447 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -168,6 +168,88 @@ Redis and ARDB overview | binary_hash:**hash** | **item** | **nb_seen** | | hexadecimal_hash:**hash** | **item** | **nb_seen** | +#### PgpDump + +##### Hset: +| Key | Field | Value | +| ------ | ------ | ------ | +| pgpdump_metadata_key:*key id* | first_seen | **date** | +| | last_seen | **date** | +| | | +| pgpdump_metadata_name:*name* | first_seen | **date** | +| | last_seen | **date** | +| | | +| pgpdump_metadata_mail:*mail* | first_seen | **date** | +| | last_seen | **date** | + +##### set: +| Key | Value | +| ------ | ------ | +| set_pgpdump_key:*key id* | *item_path* | +| | | +| set_pgpdump_name:*name* | *item_path* | +| | | +| set_pgpdump_mail:*mail* | *item_path* | + +##### Hset date: +| Key | Field | Value | +| ------ | ------ | +| pgpdump:key:*date* | *key* | *nb seen* | +| | | +| pgpdump:name:*date* | *name* | *nb seen* | +| | | +| pgpdump:mail:*date* | *mail* | *nb seen* | + +##### zset: +| Key | Field | Value | +| ------ | ------ | ------ | +| pgpdump_all:key | *key* | *nb seen* | +| | | +| pgpdump_all:name | *name* | *nb seen* | +| | | +| pgpdump_all:mail | *mail* | *nb seen* | + +##### set: +| Key | Value | +| ------ | ------ | +| item_pgpdump_key:*item_path* | *key* | +| | | +| item_pgpdump_name:*item_path* | *name* | +| | | +| item_pgpdump_mail:*item_path* | *mail* | + +#### Cryptocurrency + +Supported cryptocurrency: +- bitcoin + +##### Hset: +| Key | Field | Value | +| ------ | ------ | ------ | +| cryptocurrency_metadata_**cryptocurrency name**:**cryptocurrency address** | first_seen | **date** | +| | last_seen | **date** | + +##### set: +| Key | Value | +| ------ | ------ | +| set_cryptocurrency_**cryptocurrency name**:**cryptocurrency address** | **item_path** | + +##### Hset date: +| Key | Field | Value | +| ------ | ------ | +| cryptocurrency:**cryptocurrency name**:**date** | **cryptocurrency address** | **nb seen** | + +##### zset: +| Key | Field | Value | +| ------ | ------ | ------ | +| cryptocurrency_all:**cryptocurrency name** | **cryptocurrency address** | **nb seen** | + +##### set: +| Key | Value | +| ------ | ------ | +| item_cryptocurrency_**cryptocurrency name**:**item_path** | **cryptocurrency address** | + + ## DB9 - Crawler: ##### Hset: diff --git a/README.md b/README.md index 1b6eab35..9f9d8d4d 100644 --- a/README.md +++ b/README.md @@ -62,11 +62,12 @@ Features * Create events on [MISP](https://github.com/MISP/MISP) and cases on [The Hive](https://github.com/TheHive-Project/TheHive) * Automatic paste export at detection on [MISP](https://github.com/MISP/MISP) (events) and [The Hive](https://github.com/TheHive-Project/TheHive) (alerts) on selected tags * Extracted and decoded files can be searched by date range, type of file (mime-type) and encoding discovered -* Graph relationships between decoded file (hashes) +* Graph relationships between decoded file (hashes), similar PGP UIDs and addresses of cryptocurrencies * Tor hidden services crawler to crawl and parse output * Tor onion availability is monitored to detect up and down of hidden services * Browser hidden services are screenshot and integrated in the analysed output including a blurring screenshot interface (to avoid "burning the eyes" of the security analysis with specific content) * Tor hidden services is part of the standard framework, all the AIL modules are available to the crawled hidden services +* Generic web crawler to trigger crawling on demand or at regular interval URL or Tor hidden services Installation diff --git a/ansible/roles/ail-host/tasks/main.yml b/ansible/roles/ail-host/tasks/main.yml index 43b23a8e..af520fe4 100644 --- a/ansible/roles/ail-host/tasks/main.yml +++ b/ansible/roles/ail-host/tasks/main.yml @@ -145,15 +145,9 @@ extra_args: --upgrade virtualenv: /opt/AIL-framework/AILENV -- name: Install pip requirements - pip: - requirements: /opt/AIL-framework/pip_packages_requirement.txt - extra_args: --upgrade - virtualenv: /opt/AIL-framework/AILENV - - name: Install pip3 requirements pip: - requirements: /opt/AIL-framework/pip3_packages_requirement.txt + requirements: /opt/AIL-framework/requirements.txt executable: pip3 extra_args: --upgrade diff --git a/bin/Bitcoin.py b/bin/Bitcoin.py index ff76c5f0..a3cfcfc7 100755 --- a/bin/Bitcoin.py +++ b/bin/Bitcoin.py @@ -21,6 +21,7 @@ from pubsublogger import publisher import re import time +import redis from hashlib import sha256 @@ -44,6 +45,7 @@ def check_bc(bc): def search_key(content, message, paste): bitcoin_address = re.findall(regex_bitcoin_public_address, content) bitcoin_private_key = re.findall(regex_bitcoin_private_key, content) + date = str(paste._get_p_date()) validate_address = False key = False if(len(bitcoin_address) >0): @@ -56,6 +58,8 @@ def search_key(content, message, paste): for private_key in bitcoin_private_key: print('Bitcoin private key found : {}'.format(private_key)) key = True + # build bitcoin correlation + save_cryptocurrency_data('bitcoin', date, message, address) if(validate_address): p.populate_set_out(message, 'Duplicate') @@ -75,6 +79,31 @@ def search_key(content, message, paste): publisher.warning('{}Detected {} Bitcoin private key;{}'.format( to_print, len(bitcoin_private_key),paste.p_rel_path)) +def save_cryptocurrency_data(cryptocurrency_name, date, item_path, cryptocurrency_address): + # create basic medata + if not serv_metadata.exists('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address)): + serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'first_seen', date) + serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) + else: + last_seen = serv_metadata.hget('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen') + if not last_seen: + serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) + else: + if int(last_seen) < int(date): + serv_metadata.hset('cryptocurrency_metadata_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), 'last_seen', date) + + # global set + serv_metadata.sadd('set_cryptocurrency_{}:{}'.format(cryptocurrency_name, cryptocurrency_address), item_path) + + # daily + serv_metadata.hincrby('cryptocurrency:{}:{}'.format(cryptocurrency_name, date), cryptocurrency_address, 1) + + # all type + serv_metadata.zincrby('cryptocurrency_all:{}'.format(cryptocurrency_name), cryptocurrency_address, 1) + + # item_metadata + serv_metadata.sadd('item_cryptocurrency_{}:{}'.format(cryptocurrency_name, item_path), cryptocurrency_address) + if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" @@ -84,6 +113,12 @@ if __name__ == "__main__": # Setup the I/O queues p = Process(config_section) + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + # Sent to the logging a description of the module publisher.info("Run Keys module ") diff --git a/bin/Crawler.py b/bin/Crawler.py index a7e9365b..e5864059 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -12,6 +12,7 @@ import time import subprocess import requests +from collections import deque from pyfaup.faup import Faup sys.path.append(os.environ['AIL_BIN']) @@ -43,25 +44,49 @@ def unpack_url(url): to_crawl = {} faup.decode(url) url_unpack = faup.get() - to_crawl['domain'] = url_unpack['domain'].decode() + # # FIXME: # TODO: remove me + try: + to_crawl['domain'] = url_unpack['domain'].decode() + except: + to_crawl['domain'] = url_unpack['domain'] + to_crawl['domain'] = to_crawl['domain'].lower() + + + # force lower case domain/subdomain (rfc4343) + # # FIXME: # TODO: remove me + try: + url_host = url_unpack['host'].decode() + except: + url_host = url_unpack['host'] + + new_url_host = url_host.lower() + url_lower_case = url.replace(url_host, new_url_host, 1) if url_unpack['scheme'] is None: to_crawl['scheme'] = 'http' - url= 'http://{}'.format(url_unpack['url'].decode()) + url= 'http://{}'.format(url_lower_case) else: - scheme = url_unpack['scheme'].decode() + # # FIXME: # TODO: remove me + try: + scheme = url_unpack['scheme'].decode() + except Exception as e: + scheme = url_unpack['scheme'] if scheme in default_proto_map: to_crawl['scheme'] = scheme - url = url_unpack['url'].decode() + url = url_lower_case else: - redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_unpack['url'].decode())) + redis_crawler.sadd('new_proto', '{} {}'.format(scheme, url_lower_case)) to_crawl['scheme'] = 'http' - url= 'http://{}'.format(url_unpack['url'].decode().replace(scheme, '', 1)) + url= 'http://{}'.format(url_lower_case.replace(scheme, '', 1)) if url_unpack['port'] is None: to_crawl['port'] = default_proto_map[to_crawl['scheme']] else: - port = url_unpack['port'].decode() + # # FIXME: # TODO: remove me + try: + port = url_unpack['port'].decode() + except: + port = url_unpack['port'] # Verify port number #################### make function to verify/correct port number try: int(port) @@ -80,12 +105,16 @@ def unpack_url(url): to_crawl['url'] = url if to_crawl['port'] == 80: - to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], url_unpack['host'].decode()) + to_crawl['domain_url'] = '{}://{}'.format(to_crawl['scheme'], new_url_host) else: - to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], url_unpack['host'].decode(), to_crawl['port']) + to_crawl['domain_url'] = '{}://{}:{}'.format(to_crawl['scheme'], new_url_host, to_crawl['port']) + # # FIXME: # TODO: remove me + try: + to_crawl['tld'] = url_unpack['tld'].decode() + except: + to_crawl['tld'] = url_unpack['tld'] - to_crawl['tld'] = url_unpack['tld'].decode() return to_crawl # get url, paste and service_type to crawl @@ -275,7 +304,7 @@ if __name__ == '__main__': #mode = sys.argv[1] splash_port = sys.argv[1] - rotation_mode = ['onion', 'regular'] + rotation_mode = deque(['onion', 'regular']) default_proto_map = {'http': 80, 'https': 443} ######################################################## add ftp ??? @@ -333,6 +362,7 @@ if __name__ == '__main__': update_auto_crawler() + rotation_mode.rotate() to_crawl = get_elem_to_crawl(rotation_mode) if to_crawl: url_data = unpack_url(to_crawl['url']) diff --git a/bin/Credential.py b/bin/Credential.py index 8da84883..2eeb3d55 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -124,6 +124,11 @@ if __name__ == "__main__": for url in sites: faup.decode(url) domain = faup.get()['domain'] + ## TODO: # FIXME: remove me + try: + domain = domain.decode() + except: + pass if domain in creds_sites.keys(): creds_sites[domain] += 1 else: @@ -143,6 +148,11 @@ if __name__ == "__main__": maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] faup.decode(maildomains) tld = faup.get()['tld'] + ## TODO: # FIXME: remove me + try: + tld = tld.decode() + except: + pass server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: publisher.info(to_print) diff --git a/bin/Keys.py b/bin/Keys.py index 7fcc7a1e..eb06601a 100755 --- a/bin/Keys.py +++ b/bin/Keys.py @@ -25,13 +25,26 @@ from Helper import Process def search_key(paste): content = paste.get_p_content() find = False + get_pgp_content = False if '-----BEGIN PGP MESSAGE-----' in content: publisher.warning('{} has a PGP enc message'.format(paste.p_name)) msg = 'infoleak:automatic-detection="pgp-message";{}'.format(message) p.populate_set_out(msg, 'Tags') + get_pgp_content = True find = True + if '-----BEGIN PGP PUBLIC KEY BLOCK-----' in content: + msg = 'infoleak:automatic-detection="pgp-public-key-block";{}'.format(message) + p.populate_set_out(msg, 'Tags') + get_pgp_content = True + + if '-----BEGIN PGP SIGNATURE-----' in content: + msg = 'infoleak:automatic-detection="pgp-signature";{}'.format(message) + p.populate_set_out(msg, 'Tags') + get_pgp_content = True + + if '-----BEGIN CERTIFICATE-----' in content: publisher.warning('{} has a certificate message'.format(paste.p_name)) @@ -108,6 +121,10 @@ def search_key(paste): p.populate_set_out(msg, 'Tags') find = True + # pgp content + if get_pgp_content: + p.populate_set_out(message, 'PgpDump') + if find : #Send to duplicate diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 6d7c6bf7..e4175b90 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -187,6 +187,8 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "Keys" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Keys.py; read x" sleep 0.1 + screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./PgpDump.py; read x" + sleep 0.1 screen -S "Script_AIL" -X screen -t "Decoder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Decoder.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "Bitcoin" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Bitcoin.py; read x" diff --git a/bin/LibInjection.py b/bin/LibInjection.py index 4b851f21..65789e62 100755 --- a/bin/LibInjection.py +++ b/bin/LibInjection.py @@ -29,8 +29,17 @@ def analyse(url, path): faup.decode(url) url_parsed = faup.get() pprint.pprint(url_parsed) - resource_path = url_parsed['resource_path'] - query_string = url_parsed['query_string'] + ## TODO: # FIXME: remove me + try: + resource_path = url_parsed['resource_path'].encode() + except: + resource_path = url_parsed['resource_path'] + + ## TODO: # FIXME: remove me + try: + query_string = url_parsed['query_string'].encode() + except: + query_string = url_parsed['query_string'] result_path = {'sqli' : False} result_query = {'sqli' : False} @@ -56,7 +65,11 @@ def analyse(url, path): p.populate_set_out(msg, 'Tags') #statistics - tld = url_parsed['tld'] + ## TODO: # FIXME: remove me + try: + tld = url_parsed['tld'].decode() + except: + tld = url_parsed['tld'] if tld is not None: date = datetime.datetime.now().strftime("%Y%m") server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) diff --git a/bin/Mail.py b/bin/Mail.py index b8311d3c..b90ee4ab 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -95,6 +95,10 @@ if __name__ == "__main__": faup.decode(mail) tld = faup.get()['tld'] + try: + tld = tld.decode() + except: + pass server_statistics.hincrby('mail_by_tld:'+date, tld, MX_values[1][mail]) else: diff --git a/bin/Onion.py b/bin/Onion.py index cbe8bb9f..2aa56d8e 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -224,7 +224,11 @@ if __name__ == "__main__": faup.decode(url) url_unpack = faup.get() - domain = url_unpack['domain'].decode() + ## TODO: # FIXME: remove me + try: + domain = url_unpack['domain'].decode().lower() + except Exception as e: + domain = url_unpack['domain'].lower() ## TODO: blackilst by port ? # check blacklist @@ -233,7 +237,7 @@ if __name__ == "__main__": subdomain = re.findall(url_regex, url) if len(subdomain) > 0: - subdomain = subdomain[0][4] + subdomain = subdomain[0][4].lower() else: continue diff --git a/bin/PgpDump.py b/bin/PgpDump.py new file mode 100755 index 00000000..21ffd263 --- /dev/null +++ b/bin/PgpDump.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" + PgpDum module + + Extract ID from PGP Blocks +""" + +import os +import re +import time +import uuid +import redis +import signal +import datetime +import subprocess + +from pubsublogger import publisher +from bs4 import BeautifulSoup + +from Helper import Process +from packages import Paste + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + +# save pgp message in directory, process one time each day +def save_in_file(message, pgp_content): + print('save in file') + UUID = str(uuid.uuid4()) + file_name = os.path.join(pgp_dump_dir_to_process, UUID) + with open(file_name, 'w') as f: + f.write(pgp_content) + r_serv_db.sadd('pgpdumb:uuid', '{};{}'.format(UUID, message)) + +def remove_html(item_content): + if bool(BeautifulSoup(item_content, "html.parser").find()): + soup = BeautifulSoup(item_content, 'html.parser') + # kill all script and style elements + for script in soup(["script", "style"]): + script.extract() # remove + + # get text + text = soup.get_text() + return text + else: + return item_content + +def extract_all_id(message, item_content, regex=None, is_file=False): + + if is_file: + pgp_packet = get_pgp_packet_file(item_content) + extract_id_from_output(pgp_packet) + + else: + # max execution time on regex + signal.alarm(max_execution_time) + try: + pgp_extracted_block = re.findall(regex, item_content) + except TimeoutException: + pgp_extracted_block = [] + p.incr_module_timeout_statistic() # add encoder type + print ("{0} processing timeout".format(paste.p_rel_path)) + else: + signal.alarm(0) + + for pgp_to_dump in pgp_extracted_block: + pgp_packet = get_pgp_packet(message, pgp_to_dump) + extract_id_from_output(pgp_packet) + +def get_pgp_packet(message, save_path): + save_path = '{}'.format(save_path) + if len(save_path) > 131072: + save_in_file(message, save_path) + return '' + else: + process1 = subprocess.Popen([ 'echo', '-e', save_path], stdout=subprocess.PIPE) + process2 = subprocess.Popen([ 'pgpdump'], stdin=process1.stdout, stdout=subprocess.PIPE) + process1.stdout.close() + output = process2.communicate()[0].decode() + return output + +def get_pgp_packet_file(file): + process1 = subprocess.Popen([ 'pgpdump', file], stdout=subprocess.PIPE) + output = process1.communicate()[0].decode() + return output + +def extract_id_from_output(pgp_dump_outpout): + all_user_id = set(re.findall(regex_user_id, pgp_dump_outpout)) + for user_id in all_user_id: + # avoid key injection in user_id: + pgp_dump_outpout.replace(user_id, '', 1) + + user_id = user_id.replace(user_id_str, '', 1) + mail = None + if ' <' in user_id: + name, mail = user_id.rsplit(' <', 1) + mail = mail[:-1] + set_name.add(name) + set_mail.add(mail) + else: + name = user_id + set_name.add(name) + + all_key_id = set(re.findall(regex_key_id, pgp_dump_outpout)) + for key_id in all_key_id: + key_id = key_id.replace(key_id_str, '', 1) + set_key.add(key_id) + +def save_pgp_data(type_pgp, date, item_path, data): + # create basic medata + if not serv_metadata.exists('pgpdump_metadata_{}:{}'.format(type_pgp, data)): + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'first_seen', date) + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + else: + last_seen = serv_metadata.hget('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen') + if not last_seen: + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + else: + if int(last_seen) < int(date): + serv_metadata.hset('pgpdump_metadata_{}:{}'.format(type_pgp, data), 'last_seen', date) + + # global set + serv_metadata.sadd('set_pgpdump_{}:{}'.format(type_pgp, data), item_path) + + # daily + serv_metadata.hincrby('pgpdump:{}:{}'.format(type_pgp, date), data, 1) + + # all type + serv_metadata.zincrby('pgpdump_all:{}'.format(type_pgp), data, 1) + + # item_metadata + serv_metadata.sadd('item_pgpdump_{}:{}'.format(type_pgp, item_path), data) + + +if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + #config_section = 'PgpDump' + config_section = 'PgpDump' + + # Setup the I/O queues + p = Process(config_section) + + r_serv_db = redis.StrictRedis( + host=p.config.get("ARDB_DB", "host"), + port=p.config.getint("ARDB_DB", "port"), + db=p.config.getint("ARDB_DB", "db"), + decode_responses=True) + + serv_metadata = redis.StrictRedis( + host=p.config.get("ARDB_Metadata", "host"), + port=p.config.getint("ARDB_Metadata", "port"), + db=p.config.getint("ARDB_Metadata", "db"), + decode_responses=True) + + # Sent to the logging a description of the module + publisher.info("PgpDump started") + + # check/create pgpdump queue directory (used for huge pgp blocks) + pgp_dump_dir_to_process = os.path.join(os.environ['AIL_HOME'], 'temp', 'pgpdump') + if not os.path.isdir(pgp_dump_dir_to_process): + os.makedirs(pgp_dump_dir_to_process) + + user_id_str = 'User ID - ' + regex_user_id= '{}.+'.format(user_id_str) + + key_id_str = 'Key ID - ' + regex_key_id = '{}.+'.format(key_id_str) + regex_pgp_public_blocs = '-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' + regex_pgp_signature = '-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' + regex_pgp_message = '-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' + + re.compile(regex_user_id) + re.compile(regex_key_id) + re.compile(regex_pgp_public_blocs) + re.compile(regex_pgp_signature) + re.compile(regex_pgp_message) + + max_execution_time = p.config.getint("PgpDump", "max_execution_time") + + # Endless loop getting messages from the input queue + while True: + + is_file = False + set_key = set() + set_name = set() + set_mail = set() + + if r_serv_db.scard('pgpdumb:uuid') > 0: + res = r_serv_db.spop('pgpdumb:uuid') + file_to_process, message = res.split(';', 1) + file_to_process = os.path.join(pgp_dump_dir_to_process, file_to_process) + date = datetime.datetime.now().strftime("%Y/%m/%d") + paste = Paste.Paste(message) + date = str(paste._get_p_date()) + print(message) + extract_all_id(message, file_to_process, is_file=True) + os.remove(file_to_process) + + else: + # Get one message from the input queue + message = p.get_from_set() + + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + paste = Paste.Paste(message) + + date = str(paste._get_p_date()) + content = paste.get_p_content() + content = remove_html(content) + + print(message) + + extract_all_id(message, content, regex_pgp_public_blocs) + extract_all_id(message, content, regex_pgp_signature) + extract_all_id(message, content, regex_pgp_message) + + for key_id in set_key: + print(key_id) + save_pgp_data('key', date, message, key_id) + + for name_id in set_name: + print(name_id) + save_pgp_data('name', date, message, name_id) + + for mail_id in set_mail: + print(mail_id) + save_pgp_data('mail', date, message, mail_id) diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index a5595dc7..ffb21003 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -68,10 +68,20 @@ def analyse(url, path): result_query = 0 if resource_path is not None: - result_path = is_sql_injection(resource_path.decode('utf8')) + ## TODO: # FIXME: remove me + try: + resource_path = resource_path.decode() + except: + pass + result_path = is_sql_injection(resource_path) if query_string is not None: - result_query = is_sql_injection(query_string.decode('utf8')) + ## TODO: # FIXME: remove me + try: + query_string = query_string.decode() + except: + pass + result_query = is_sql_injection(query_string) if (result_path > 0) or (result_query > 0): paste = Paste.Paste(path) @@ -89,6 +99,11 @@ def analyse(url, path): #statistics tld = url_parsed['tld'] if tld is not None: + ## TODO: # FIXME: remove me + try: + tld = tld.decode() + except: + pass date = datetime.datetime.now().strftime("%Y%m") server_statistics.hincrby('SQLInjection_by_tld:'+date, tld, 1) diff --git a/bin/Update.py b/bin/Update.py index 4001c5d0..45b72918 100755 --- a/bin/Update.py +++ b/bin/Update.py @@ -145,10 +145,14 @@ def get_git_upper_tags_remote(current_tag, is_fork): list_upper_tags = [] if list_all_tags[-1][1:] == current_tag: list_upper_tags.append( (list_all_tags[-1], None) ) + # force update order + list_upper_tags.sort() return list_upper_tags for tag in list_all_tags: if float(tag[1:]) >= float(current_tag): list_upper_tags.append( (tag, None) ) + # force update order + list_upper_tags.sort() return list_upper_tags else: print('{}{}{}'.format(TERMINAL_RED, process.stderr.decode(), TERMINAL_DEFAULT)) @@ -165,6 +169,8 @@ def get_git_upper_tags_remote(current_tag, is_fork): list_upper_tags = [] if last_tag[1:] == current_tag: list_upper_tags.append( (last_tag, last_commit) ) + # force update order + list_upper_tags.sort() return list_upper_tags else: for mess_tag in list_all_tags: @@ -177,6 +183,8 @@ def get_git_upper_tags_remote(current_tag, is_fork): # add last commit if last_tag not in list_upper_tags[-1][0]: list_upper_tags.append( (last_tag, last_commit) ) + # force update order + list_upper_tags.sort() return list_upper_tags else: diff --git a/bin/Web.py b/bin/Web.py index 7cc96822..ca4366e8 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -94,18 +94,22 @@ if __name__ == "__main__": faup.decode(url) domain = faup.get_domain() subdomain = faup.get_subdomain() - f1 = None publisher.debug('{} Published'.format(url)) - if f1 == "onion": - print(domain) - if subdomain is not None: - subdomain = subdomain.decode('utf8') + ## TODO: # FIXME: remove me + try: + subdomain = subdomain.decode() + except: + pass if domain is not None: - domain = domain.decode('utf8') + ## TODO: # FIXME: remove me + try: + domain = domain.decode() + except: + pass domains_list.append(domain) hostl = avoidNone(subdomain) + avoidNone(domain) diff --git a/bin/helper/reprocess_bitcoin.py b/bin/helper/reprocess_bitcoin.py new file mode 100755 index 00000000..5e609f1f --- /dev/null +++ b/bin/helper/reprocess_bitcoin.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import gzip +import base64 +import uuid +import datetime +import base64 +import redis +import json +import time + +sys.path.append(os.environ['AIL_BIN']) +from Helper import Process + +def substract_date(date_from, date_to): + date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) + date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) + delta = date_to - date_from # timedelta + l_date = [] + for i in range(delta.days + 1): + date = date_from + datetime.timedelta(i) + l_date.append( date.strftime('%Y%m%d') ) + return l_date + +config_section = 'Global' +p = Process(config_section) + +r_tags = redis.StrictRedis( + host=p.config.get("ARDB_Tags", "host"), + port=p.config.getint("ARDB_Tags", "port"), + db=p.config.getint("ARDB_Tags", "db"), + decode_responses=True) + +tag = 'infoleak:automatic-detection="bitcoin-address"' + +# get tag first/last seen +first_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') +last_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') + +l_dates = substract_date(first_seen, last_seen) + +# get all tagged items +for date in l_dates: + daily_tagged_items = r_tags.smembers('{}:{}'.format(tag, date)) + + for item in daily_tagged_items: + p.populate_set_out(item) diff --git a/bin/helper/reprocess_pgp_message.py b/bin/helper/reprocess_pgp_message.py new file mode 100755 index 00000000..bf8d78ee --- /dev/null +++ b/bin/helper/reprocess_pgp_message.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import gzip +import base64 +import uuid +import datetime +import base64 +import redis +import json +import time + +sys.path.append(os.environ['AIL_BIN']) +from Helper import Process + +def substract_date(date_from, date_to): + date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) + date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) + delta = date_to - date_from # timedelta + l_date = [] + for i in range(delta.days + 1): + date = date_from + datetime.timedelta(i) + l_date.append( date.strftime('%Y%m%d') ) + return l_date + +config_section = 'Keys' +p = Process(config_section) + +r_tags = redis.StrictRedis( + host=p.config.get("ARDB_Tags", "host"), + port=p.config.getint("ARDB_Tags", "port"), + db=p.config.getint("ARDB_Tags", "db"), + decode_responses=True) + +tag = 'infoleak:automatic-detection="pgp-message"' + +# get tag first/last seen +first_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') +last_seen = r_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') + +l_dates = substract_date(first_seen, last_seen) + +# get all tagged items +for date in l_dates: + daily_tagged_items = r_tags.smembers('{}:{}'.format(tag, date)) + + for item in daily_tagged_items: + p.populate_set_out(item, 'PgpDump') diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index f8be2f9b..1087880b 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -125,7 +125,15 @@ class Paste(object): """ - paste = self.cache.get(self.p_path) + try: + paste = self.cache.get(self.p_path) + except UnicodeDecodeError: + paste = None + except Exception as e: + print("ERROR in: " + self.p_path) + print(e) + paste = None + if paste is None: try: with gzip.open(self.p_path, 'r') as f: diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index f9483476..ea0ea55c 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -71,6 +71,9 @@ max_execution_time = 90 [Onion] max_execution_time = 180 +[PgpDump] +max_execution_time = 60 + [Base64] path = Base64/ max_execution_time = 60 diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index c3c09a4e..f1fe5e3d 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -118,6 +118,10 @@ publish = Redis_Duplicate,Redis_Tags [Keys] subscribe = Redis_Global +publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags + +[PgpDump] +subscribe = Redis_PgpDump publish = Redis_Duplicate,Redis_Tags [ApiKey] diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index d1417e6e..4e36c1c9 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -48,15 +48,16 @@ class TorSplashCrawler(): 'DEPTH_LIMIT': crawler_options['depth_limit'] }) - def crawl(self, type, crawler_options, date, url, domain, port, original_item): - self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, url=url, domain=domain, port=port, original_item=original_item) + def crawl(self, type, crawler_options, date, requested_mode, url, domain, port, original_item): + self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, original_item=original_item) self.process.start() class TorSplashSpider(Spider): name = 'TorSplashSpider' - def __init__(self, type, crawler_options, date, url, domain, port, original_item, *args, **kwargs): + def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, original_item, *args, **kwargs): self.type = type + self.requested_mode = requested_mode self.original_item = original_item self.root_key = None self.start_urls = url @@ -183,7 +184,7 @@ class TorSplashCrawler(): if 'png' in response.data: size_screenshot = (len(response.data['png'])*3) /4 - if size_screenshot < 5000000: #bytes + if size_screenshot < 5000000 or self.requested_mode: #bytes or manual/auto image_content = base64.standard_b64decode(response.data['png'].encode()) hash = sha256(image_content).hexdigest() img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12]) diff --git a/bin/torcrawler/tor_crawler.py b/bin/torcrawler/tor_crawler.py index 13a67545..2d8365c4 100755 --- a/bin/torcrawler/tor_crawler.py +++ b/bin/torcrawler/tor_crawler.py @@ -43,8 +43,9 @@ if __name__ == '__main__': original_item = crawler_json['item'] crawler_options = crawler_json['crawler_options'] date = crawler_json['date'] + requested_mode = crawler_json['requested'] redis_cache.delete('crawler_request:{}'.format(uuid)) crawler = TorSplashCrawler(splash_url, crawler_options) - crawler.crawl(service_type, crawler_options, date, url, domain, port, original_item) + crawler.crawl(service_type, crawler_options, date, requested_mode, url, domain, port, original_item) diff --git a/installing_deps.sh b/installing_deps.sh index f8f2846f..e8de3fd6 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -67,6 +67,14 @@ sudo ldconfig popd popd +# pgpdump +test ! -d pgpdump && git clone https://github.com/kazu-yamamoto/pgpdump.git +pushd pgpdump/ +./configure +make +sudo make install +popd + # ARDB # test ! -d ardb/ && git clone https://github.com/yinqiwen/ardb.git pushd ardb/ @@ -99,7 +107,7 @@ mkdir -p $AIL_HOME/PASTES pip3 install -U pip pip3 install 'git+https://github.com/D4-project/BGP-Ranking.git/@7e698f87366e6f99b4d0d11852737db28e3ddc62#egg=pybgpranking&subdirectory=client' -pip3 install -U -r pip3_packages_requirement.txt +pip3 install -U -r requirements.txt # Pyfaup pushd faup/src/lib/bindings/python/ diff --git a/pip3_packages_requirement.txt b/requirements.txt similarity index 98% rename from pip3_packages_requirement.txt rename to requirements.txt index 4f5372bc..fdccf9bb 100644 --- a/pip3_packages_requirement.txt +++ b/requirements.txt @@ -54,6 +54,8 @@ DomainClassifier #Indexer requirements whoosh +beautifulsoup4 + ipaddress pycountry diff --git a/update/v1.7/Update.py b/update/v1.7/Update.py new file mode 100755 index 00000000..d2a826e6 --- /dev/null +++ b/update/v1.7/Update.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import datetime +import configparser + +if __name__ == '__main__': + + start_deb = time.time() + + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + cfg = configparser.ConfigParser() + cfg.read(configfile) + + r_serv = redis.StrictRedis( + host=cfg.get("ARDB_DB", "host"), + port=cfg.getint("ARDB_DB", "port"), + db=cfg.getint("ARDB_DB", "db"), + decode_responses=True) + + #Set current ail version + r_serv.set('ail:version', 'v1.7') + + #Set current ail version + r_serv.set('ail:update_date_v1.7', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v1.7/Update.sh b/update/v1.7/Update.sh new file mode 100755 index 00000000..fe941541 --- /dev/null +++ b/update/v1.7/Update.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -k & +wait + +echo "" +echo -e $GREEN"Update DomainClassifier"$DEFAULT +echo "" + +cd $AIL_HOME +git clone https://github.com/kazu-yamamoto/pgpdump.git +cd pgpdump +./configure +make +sudo make install +wait +echo "" + +echo "" +echo -e $GREEN"Update requirement"$DEFAULT +echo "" +pip3 install beautifulsoup4 + +bash ${AIL_BIN}LAUNCH.sh -lav & +wait +echo "" + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v1.7/Update.py & +wait +echo "" +echo "" + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -k & +wait + +echo "" +echo -e $GREEN"Update thirdparty ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -t & +wait + + +echo "" + +exit 0 diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 2e8c8375..e004cb5a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -116,10 +116,6 @@ except IOError: f = open('templates/ignored_modules.txt', 'w') f.close() -activate_crawler = cfg.get("Crawler", "activate_crawler") -if activate_crawler != 'True': - toIgnoreModule.add('hiddenServices') - # Dynamically import routes and functions from modules # Also, prepare header.html to_add_to_header_dico = {} diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index d4251e5f..899a26b5 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -173,6 +173,8 @@ REPO_ORIGIN = 'https://github.com/CIRCL/AIL-framework.git' max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs")) +crawler_enabled = cfg.getboolean("Crawler", "activate_crawler") + # VT try: from virusTotalKEYS import vt_key diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index eba8541a..8a1ffaab 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -297,6 +297,13 @@ def submit(): ltagsgalaxies = request.form['tags_galaxies'] paste_content = request.form['paste_content'] + is_file = False + if 'file' in request.files: + file = request.files['file'] + if file: + if file.filename: + is_file = True + submitted_tag = 'infoleak:submission="manual"' #active taxonomies @@ -307,6 +314,7 @@ def submit(): if ltags or ltagsgalaxies: if not addTagsVerification(ltags, ltagsgalaxies): content = 'INVALID TAGS' + print(content) return content, 400 # add submitted tags @@ -315,9 +323,7 @@ def submit(): else: ltags = submitted_tag - if 'file' in request.files: - - file = request.files['file'] + if is_file: if file: if file and allowed_file(file.filename): @@ -357,6 +363,7 @@ def submit(): else: content = 'wrong file type, allowed_extensions: sh, pdf, zip, gz, tar.gz or remove the extension' + print(content) return content, 400 @@ -379,9 +386,11 @@ def submit(): else: content = 'size error' + print(content) return content, 400 content = 'submit aborded' + print(content) return content, 400 diff --git a/var/www/modules/hashDecoded/Flask_hashDecoded.py b/var/www/modules/hashDecoded/Flask_hashDecoded.py index db60e0c8..83ce54b8 100644 --- a/var/www/modules/hashDecoded/Flask_hashDecoded.py +++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py @@ -13,6 +13,8 @@ from Date import Date from io import BytesIO import zipfile +from hashlib import sha256 + import requests from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, send_file from flask_login import login_required @@ -30,6 +32,10 @@ PASTES_FOLDER = Flask_config.PASTES_FOLDER hashDecoded = Blueprint('hashDecoded', __name__, template_folder='templates') +## TODO: put me in option +all_cryptocurrency = ['bitcoin', 'monero'] +all_pgpdump = ['key', 'name', 'mail'] + # ============ FUNCTIONS ============ def get_date_range(num_day): @@ -67,13 +73,13 @@ def get_file_icon(estimated_type): if file_type == 'application': file_icon = 'fa-file ' elif file_type == 'audio': - file_icon = 'fa-file-video-o ' + file_icon = 'fa-file-audio ' elif file_type == 'image': - file_icon = 'fa-file-image-o' + file_icon = 'fa-file-image' elif file_type == 'text': - file_icon = 'fa-file-text-o' + file_icon = 'fa-file-alt' else: - file_icon = 'fa-file-o' + file_icon = 'fa-sticky-note' return file_icon @@ -89,13 +95,384 @@ def get_file_icon_text(estimated_type): elif file_type == 'text': file_icon_text = '\uf15c' else: - file_icon_text = '\uf15b' + file_icon_text = '\uf249' return file_icon_text +def get_icon(correlation_type, type_id): + icon_text = 'fas fa-sticky-note' + if correlation_type == 'pgpdump': + # set type_id icon + if type_id == 'key': + icon_text = 'fas fa-key' + elif type_id == 'name': + icon_text = 'fas fa-user-tag' + elif type_id == 'mail': + icon_text = 'fas fa-at' + else: + icon_text = 'times' + elif correlation_type == 'cryptocurrency': + if type_id == 'bitcoin': + icon_text = 'fab fa-btc' + elif type_id == 'monero': + icon_text = 'fab fa-monero' + elif type_id == 'ethereum': + icon_text = 'fab fa-ethereum' + else: + icon_text = 'fas fa-coins' + return icon_text + +def get_icon_text(correlation_type, type_id): + icon_text = '\uf249' + if correlation_type == 'pgpdump': + if type_id == 'key': + icon_text = '\uf084' + elif type_id == 'name': + icon_text = '\uf507' + elif type_id == 'mail': + icon_text = '\uf1fa' + else: + icon_text = 'times' + elif correlation_type == 'cryptocurrency': + if type_id == 'bitcoin': + icon_text = '\uf15a' + elif type_id == 'monero': + icon_text = '\uf3d0' + elif type_id == 'ethereum': + icon_text = '\uf42e' + else: + icon_text = '\uf51e' + return icon_text + +def get_all_types_id(correlation_type): + if correlation_type == 'pgpdump': + return all_pgpdump + elif correlation_type == 'cryptocurrency': + return all_cryptocurrency + else: + return [] + +def is_valid_type_id(correlation_type, type_id): + all_type_id = get_all_types_id(correlation_type) + if type_id in all_type_id: + return True + else: + return False + +def get_key_id_metadata(correlation_type, type_id, key_id): + key_id_metadata = {} + if r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)): + key_id_metadata['first_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'first_seen') + key_id_metadata['first_seen'] = '{}/{}/{}'.format(key_id_metadata['first_seen'][0:4], key_id_metadata['first_seen'][4:6], key_id_metadata['first_seen'][6:8]) + key_id_metadata['last_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'last_seen') + key_id_metadata['last_seen'] = '{}/{}/{}'.format(key_id_metadata['last_seen'][0:4], key_id_metadata['last_seen'][4:6], key_id_metadata['last_seen'][6:8]) + key_id_metadata['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(correlation_type, type_id, key_id)) + return key_id_metadata + +def list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id): + sparklines_value = [] + for date_day in date_range_sparkline: + nb_seen_this_day = r_serv_metadata.hget('{}:{}:{}'.format(correlation_type, type_id, date_day), key_id) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + sparklines_value.append(int(nb_seen_this_day)) + return sparklines_value + +def get_all_keys_id_from_item(correlation_type, item_path): + all_keys_id_dump = set() + if item_path is not None: + for type_id in get_all_types_id(correlation_type): + res = r_serv_metadata.smembers('item_{}_{}:{}'.format(correlation_type, type_id, item_path)) + for key_id in res: + all_keys_id_dump.add( (key_id, type_id) ) + return all_keys_id_dump + def one(): return 1 +''' +def decode_base58(bc, length): + n = 0 + for char in bc: + n = n * 58 + digits58.index(char) + return n.to_bytes(length, 'big') + +def check_bc(bc): + try: + bcbytes = decode_base58(bc, 25) + return bcbytes[-4:] == sha256(sha256(bcbytes[:-4]).digest()).digest()[:4] + except Exception: + return False +''' + +def get_correlation_type_search_endpoint(correlation_type): + if correlation_type == 'pgpdump': + endpoint = 'hashDecoded.all_pgpdump_search' + elif correlation_type == 'cryptocurrency': + endpoint = 'hashDecoded.all_cryptocurrency_search' + else: + endpoint = 'hashDecoded.hashDecoded_page' + return endpoint + +def get_correlation_type_page_endpoint(correlation_type): + if correlation_type == 'pgpdump': + endpoint = 'hashDecoded.pgpdump_page' + elif correlation_type == 'cryptocurrency': + endpoint = 'hashDecoded.cryptocurrency_page' + else: + endpoint = 'hashDecoded.hashDecoded_page' + return endpoint + +def get_show_key_id_endpoint(correlation_type): + if correlation_type == 'pgpdump': + endpoint = 'hashDecoded.show_pgpdump' + elif correlation_type == 'cryptocurrency': + endpoint = 'hashDecoded.show_cryptocurrency' + else: + endpoint = 'hashDecoded.hashDecoded_page' + return endpoint + +def get_range_type_json_endpoint(correlation_type): + if correlation_type == 'pgpdump': + endpoint = 'hashDecoded.pgpdump_range_type_json' + elif correlation_type == 'cryptocurrency': + endpoint = 'hashDecoded.cryptocurrency_range_type_json' + else: + endpoint = 'hashDecoded.hashDecoded_page' + return endpoint + +def get_graph_node_json_endpoint(correlation_type): + if correlation_type == 'pgpdump': + endpoint = 'hashDecoded.pgpdump_graph_node_json' + elif correlation_type == 'cryptocurrency': + endpoint = 'hashDecoded.cryptocurrency_graph_node_json' + else: + endpoint = 'hashDecoded.hashDecoded_page' + return endpoint + +def get_graph_line_json_endpoint(correlation_type): + if correlation_type == 'pgpdump': + endpoint = 'hashDecoded.pgpdump_graph_line_json' + elif correlation_type == 'cryptocurrency': + endpoint = 'hashDecoded.cryptocurrency_graph_line_json' + else: + endpoint = 'hashDecoded.hashDecoded_page' + return endpoint + +def get_font_family(correlation_type): + if correlation_type == 'pgpdump': + font = 'fa' + elif correlation_type == 'cryptocurrency': + font = 'fab' + else: + font = 'fa' + return font + +############ CORE CORRELATION ############ + +def main_correlation_page(correlation_type, type_id, date_from, date_to, show_decoded_files): + + if type_id == 'All types': + type_id = None + + # verify type input + if type_id is not None: + #retrieve char + type_id = type_id.replace(' ', '') + if not is_valid_type_id(correlation_type, type_id): + type_id = None + + date_range = [] + if date_from is not None and date_to is not None: + #change format + try: + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + except: + pass + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + date_from = date_range[0][0:4] + '-' + date_range[0][4:6] + '-' + date_range[0][6:8] + date_to = date_from + + else: + date_from = date_from[0:4] + '-' + date_from[4:6] + '-' + date_from[6:8] + date_to = date_to[0:4] + '-' + date_to[4:6] + '-' + date_to[6:8] + + # display day type bar chart + if len(date_range) == 1 and type is None: + daily_type_chart = True + daily_date = date_range[0] + else: + daily_type_chart = False + daily_date = None + + if type_id is None: + all_type_id = get_all_types_id(correlation_type) + else: + all_type_id = type_id + + l_keys_id_dump = set() + if show_decoded_files: + for date in date_range: + if isinstance(all_type_id, str): + l_dump = r_serv_metadata.hkeys('{}:{}:{}'.format(correlation_type, all_type_id, date)) + if l_dump: + for dump in l_dump: + l_keys_id_dump.add( (dump, all_type_id) ) + else: + for typ_id in all_type_id: + l_dump = r_serv_metadata.hkeys('{}:{}:{}'.format(correlation_type, typ_id, date)) + if l_dump: + for dump in l_dump: + l_keys_id_dump.add( (dump, typ_id) ) + + + num_day_sparkline = 6 + date_range_sparkline = get_date_range(num_day_sparkline) + + sparkline_id = 0 + keys_id_metadata = {} + for dump_res in l_keys_id_dump: + new_key_id, typ_id = dump_res + + keys_id_metadata[new_key_id] = get_key_id_metadata(correlation_type, typ_id, new_key_id) + + if keys_id_metadata[new_key_id]: + keys_id_metadata[new_key_id]['type_id'] = typ_id + keys_id_metadata[new_key_id]['type_icon'] = get_icon(correlation_type, typ_id) + + keys_id_metadata[new_key_id]['sparklines_data'] = list_sparkline_type_id_values(date_range_sparkline, correlation_type, typ_id, new_key_id) + keys_id_metadata[new_key_id]['sparklines_id'] = sparkline_id + sparkline_id += 1 + + l_type = get_all_types_id(correlation_type) + + return render_template("DaysCorrelation.html", all_metadata=keys_id_metadata, + correlation_type=correlation_type, + correlation_type_endpoint=get_correlation_type_page_endpoint(correlation_type), + correlation_type_search_endpoint=get_correlation_type_search_endpoint(correlation_type), + show_key_id_endpoint=get_show_key_id_endpoint(correlation_type), + range_type_json_endpoint=get_range_type_json_endpoint(correlation_type), + l_type=l_type, type_id=type_id, + daily_type_chart=daily_type_chart, daily_date=daily_date, + date_from=date_from, date_to=date_to, + show_decoded_files=show_decoded_files) + +def show_correlation(correlation_type, type_id, key_id): + if is_valid_type_id(correlation_type, type_id): + key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id) + if key_id_metadata: + + num_day_sparkline = 6 + date_range_sparkline = get_date_range(num_day_sparkline) + + sparkline_values = list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id) + return render_template('showCorrelation.html', key_id=key_id, type_id=type_id, + correlation_type=correlation_type, + graph_node_endpoint=get_graph_node_json_endpoint(correlation_type), + graph_line_endpoint=get_graph_line_json_endpoint(correlation_type), + font_family=get_font_family(correlation_type), + key_id_metadata=key_id_metadata, + type_icon=get_icon(correlation_type, type_id), + sparkline_values=sparkline_values) + else: + return '404' + else: + return 'error' + +def correlation_type_range_type_json(correlation_type, date_from, date_to): + date_range = [] + if date_from is not None and date_to is not None: + #change format + if len(date_from) != 8: + date_from = date_from[0:4] + date_from[5:7] + date_from[8:10] + date_to = date_to[0:4] + date_to[5:7] + date_to[8:10] + date_range = substract_date(date_from, date_to) + + if not date_range: + date_range.append(datetime.date.today().strftime("%Y%m%d")) + + range_type = [] + all_types_id = get_all_types_id(correlation_type) + + # one day + if len(date_range) == 1: + for type_id in all_types_id: + day_type = {} + # init 0 + for typ_id in all_types_id: + day_type[typ_id] = 0 + day_type['date'] = type_id + num_day_type_id = 0 + all_keys = r_serv_metadata.hvals('{}:{}:{}'.format(correlation_type, type_id, date_range[0])) + if all_keys: + for val in all_keys: + num_day_type_id += int(val) + day_type[type_id]= num_day_type_id + + #if day_type[type_id] != 0: + range_type.append(day_type) + + else: + # display type_id + for date in date_range: + day_type = {} + day_type['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] + for type_id in all_types_id: + num_day_type_id = 0 + all_keys = r_serv_metadata.hvals('{}:{}:{}'.format(correlation_type, type_id, date)) + if all_keys: + for val in all_keys: + num_day_type_id += int(val) + day_type[type_id]= num_day_type_id + range_type.append(day_type) + + return jsonify(range_type) + +def correlation_graph_node_json(correlation_type, type_id, key_id): + if key_id is not None and is_valid_type_id(correlation_type, type_id): + + nodes_set_dump = set() + nodes_set_paste = set() + links_set = set() + + key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id) + + nodes_set_dump.add((key_id, 1, type_id, key_id_metadata['first_seen'], key_id_metadata['last_seen'], key_id_metadata['nb_seen'])) + + #get related paste + l_pastes = r_serv_metadata.smembers('set_{}_{}:{}'.format(correlation_type, type_id, key_id)) + for paste in l_pastes: + nodes_set_paste.add((paste, 2)) + links_set.add((key_id, paste)) + + for key_id_with_type_id in get_all_keys_id_from_item(correlation_type, paste): + new_key_id, typ_id = key_id_with_type_id + if new_key_id != key_id: + + key_id_metadata = get_key_id_metadata(correlation_type, typ_id, new_key_id) + + nodes_set_dump.add((new_key_id, 3, typ_id, key_id_metadata['first_seen'], key_id_metadata['last_seen'], key_id_metadata['nb_seen'])) + links_set.add((new_key_id, paste)) + + nodes = [] + for node in nodes_set_dump: + nodes.append({"id": node[0], "group": node[1], "first_seen": node[3], "last_seen": node[4], "nb_seen_in_paste": node[5], 'icon': get_icon_text(correlation_type, node[2]),"url": url_for(get_show_key_id_endpoint(correlation_type), type_id=node[2], key_id=node[0]), 'hash': True}) + for node in nodes_set_paste: + nodes.append({"id": node[0], "group": node[1],"url": url_for('showsavedpastes.showsavedpaste', paste=node[0]), 'hash': False}) + links = [] + for link in links_set: + links.append({"source": link[0], "target": link[1]}) + json = {"nodes": nodes, "links": links} + return jsonify(json) + + else: + return jsonify({}) + # ============= ROUTES ============== @hashDecoded.route("/hashDecoded/all_hash_search", methods=['POST']) @login_required @@ -107,7 +484,6 @@ def all_hash_search(): show_decoded_files = request.form.get('show_decoded_files') return redirect(url_for('hashDecoded.hashDecoded_page', date_from=date_from, date_to=date_to, type=type, encoding=encoding, show_decoded_files=show_decoded_files)) - @hashDecoded.route("/hashDecoded/", methods=['GET']) @login_required def hashDecoded_page(): @@ -723,5 +1099,164 @@ def update_vt_result(): # TODO FIXME make json response return jsonify() +############################ PGPDump ############################ + +@hashDecoded.route('/decoded/pgp_by_type_json') ## TODO: REFRACTOR +def pgp_by_type_json(): + type_id = request.args.get('type_id') + date_from = request.args.get('date_from') + + if date_from is None: + date_from = datetime.date.today().strftime("%Y%m%d") + + #retrieve + char + type_id = type_id.replace(' ', '+') + default = False + + if type_id is None: + default = True + all_type = ['key', 'name', 'mail'] + else: + all_type = [ type_id ] + + num_day_type = 30 + date_range = get_date_range(num_day_type) + + #verify input + if verify_pgp_type_id(type_id) or default: + + type_value = [] + + range_decoder = [] + for date in date_range: + day_type_id = {} + day_type_id['date']= date[0:4] + '-' + date[4:6] + '-' + date[6:8] + for type_pgp in all_type: + all_vals_key = r_serv_metadata.hvals('pgp:{}:date'.format(type_id, date)) + num_day_type_id = 0 + if all_vals_key is not None: + for val_key in all_vals_key: + num_day_type_id += int(val_key) + day_type_id[type_pgp]= num_day_type_id + range_decoder.append(day_type_id) + + return jsonify(range_decoder) + else: + return jsonify() + +############################ Correlation ############################ +@hashDecoded.route("/correlation/pgpdump", methods=['GET']) +def pgpdump_page(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + type_id = request.args.get('type_id') + + show_decoded_files = request.args.get('show_decoded_files') + res = main_correlation_page('pgpdump', type_id, date_from, date_to, show_decoded_files) + return res + +@hashDecoded.route("/correlation/cryptocurrency", methods=['GET']) +def cryptocurrency_page(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + type_id = request.args.get('type_id') + + show_decoded_files = request.args.get('show_decoded_files') + res = main_correlation_page('cryptocurrency', type_id, date_from, date_to, show_decoded_files) + return res + +@hashDecoded.route("/correlation/all_pgpdump_search", methods=['POST']) +def all_pgpdump_search(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + type_id = request.form.get('type') + show_decoded_files = request.form.get('show_decoded_files') + return redirect(url_for('hashDecoded.pgpdump_page', date_from=date_from, date_to=date_to, type_id=type_id, show_decoded_files=show_decoded_files)) + +@hashDecoded.route("/correlation/all_cryptocurrency_search", methods=['POST']) +def all_cryptocurrency_search(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + type_id = request.form.get('type') + show_decoded_files = request.form.get('show_decoded_files') + return redirect(url_for('hashDecoded.cryptocurrency_page', date_from=date_from, date_to=date_to, type_id=type_id, show_decoded_files=show_decoded_files)) + +@hashDecoded.route('/correlation/show_pgpdump') +def show_pgpdump(): + type_id = request.args.get('type_id') + key_id = request.args.get('key_id') + return show_correlation('pgpdump', type_id, key_id) + + +@hashDecoded.route('/correlation/show_cryptocurrency') +def show_cryptocurrency(): + type_id = request.args.get('type_id') + key_id = request.args.get('key_id') + return show_correlation('cryptocurrency', type_id, key_id) + +@hashDecoded.route('/correlation/cryptocurrency_range_type_json') +def cryptocurrency_range_type_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + return correlation_type_range_type_json('cryptocurrency', date_from, date_to) + +@hashDecoded.route('/correlation/pgpdump_range_type_json') +def pgpdump_range_type_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + return correlation_type_range_type_json('pgpdump', date_from, date_to) + +@hashDecoded.route('/correlation/pgpdump_graph_node_json') +def pgpdump_graph_node_json(): + type_id = request.args.get('type_id') + key_id = request.args.get('key_id') + return correlation_graph_node_json('pgpdump', type_id, key_id) + +@hashDecoded.route('/correlation/cryptocurrency_graph_node_json') +def cryptocurrency_graph_node_json(): + type_id = request.args.get('type_id') + key_id = request.args.get('key_id') + return correlation_graph_node_json('cryptocurrency', type_id, key_id) + +@hashDecoded.route('/correlation/pgpdump_graph_line_json') +def pgpdump_graph_line_json(): + type_id = request.args.get('type_id') + key_id = request.args.get('key_id') + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + return correlation_graph_line_json('pgpdump', type_id, key_id, date_from, date_to) + +def correlation_graph_line_json(correlation_type, type_id, key_id, date_from, date_to): + # verify input + if key_id is not None and is_valid_type_id(correlation_type, type_id) and r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)): + + if date_from is None or date_to is None: + nb_days_seen_in_pastes = 30 + else: + # # TODO: # FIXME: + nb_days_seen_in_pastes = 30 + + date_range_seen_in_pastes = get_date_range(nb_days_seen_in_pastes) + + json_seen_in_paste = [] + for date in date_range_seen_in_pastes: + nb_seen_this_day = r_serv_metadata.hget('{}:{}:{}'.format(correlation_type, type_id, date), key_id) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + date = date[0:4] + '-' + date[4:6] + '-' + date[6:8] + json_seen_in_paste.append({'date': date, 'value': int(nb_seen_this_day)}) + + return jsonify(json_seen_in_paste) + else: + return jsonify() + +@hashDecoded.route('/correlation/cryptocurrency_graph_line_json') +def cryptocurrency_graph_line_json(): + type_id = request.args.get('type_id') + key_id = request.args.get('key_id') + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + return correlation_graph_line_json('cryptocurrency', type_id, key_id, date_from, date_to) + # ========= REGISTRATION ========= app.register_blueprint(hashDecoded, url_prefix=baseUrl) diff --git a/var/www/modules/hashDecoded/templates/DaysCorrelation.html b/var/www/modules/hashDecoded/templates/DaysCorrelation.html new file mode 100644 index 00000000..2051cef0 --- /dev/null +++ b/var/www/modules/hashDecoded/templates/DaysCorrelation.html @@ -0,0 +1,660 @@ + + + + + Decoded - AIL + + + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'decoded/menu_sidebar.html' %} + +
+ +
+
+
+
+
+ +
+ +
+
+
Select a date range :
+
+
+
+ +
+
+
+ +
+
Type ID :
+ +
+ + +
+ + +
+
+ +
+
+
+
+
+
+ + {% if all_metadata|length != 0 %} + {% if date_from|string == date_to|string %} +

{{ date_from }} {{correlation_type}}:

+ {% else %} +

{{ date_from }} to {{ date_to }} {{correlation_type}}:

+ {% endif %} + + + + + + + + + + + + + {% for key_id in all_metadata %} + + + + + + + + + {% endfor %} + +
type idkey idfirst seenlast seennb itemSparkline
  {{ all_metadata[key_id]['type_id'] }}{{ key_id }}{{ all_metadata[key_id]['first_seen'] }}{{ all_metadata[key_id]['last_seen'] }}{{ all_metadata[key_id]['nb_seen'] }}
+ {% else %} + {% if show_decoded_files %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No Dumped Keys

+ {% else %} +

{{ date_from }} to {{ date_to }}, No {{correlation_type}}

+ {% endif %} + {% endif %} + {% endif %} +
+ +
+
+ + + + + + + + + + + + + + + + + + diff --git a/var/www/modules/hashDecoded/templates/hashDecoded.html b/var/www/modules/hashDecoded/templates/hashDecoded.html index 19ddcdaf..766f618a 100644 --- a/var/www/modules/hashDecoded/templates/hashDecoded.html +++ b/var/www/modules/hashDecoded/templates/hashDecoded.html @@ -1,282 +1,287 @@ - - - + + Decoded - AIL + - HashesDecoded - AIL + + + + + - - - - - - - - - - - - - - - - - - - - + + + + + + + + + - {% include 'navbar.html' %} + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'decoded/menu_sidebar.html' %} + +
-
-
-

Hashed Files

-
- -
-
- - -
-
-
-
-
- -
-
-
- Select a date range : - -
- - -
-
- - -
- Encoding : - - File Type : - -
- -
- - -
-
-
-
-
-
-
+
+
- - {% if l_64|length != 0 %} - {% if date_from|string == date_to|string %} -

{{ date_from }} Hashed files:

- {% else %} -

{{ date_from }} to {{ date_to }} Hashed files:

- {% endif %} - - +
+ +
+
+
Select a date range :
+ +
+
+ +
+
+
+ +
+
Encoding :
+ +
File Type :
+ +
+ + +
+ + +
+
+ +
+
+
+
+
+ + + {% if l_64|length != 0 %} + {% if date_from|string == date_to|string %} +

{{ date_from }} Decoded files:

+ {% else %} +

{{ date_from }} to {{ date_to }} Decoded files:

+ {% endif %} +
+ - + - - - {% for b64 in l_64 %} - - - - - - - - + {% for b64 in l_64 %} + + + + + + + + - - - {% endfor %} - -
estimated type hash first seen last seennb pastenb item size Virus Total Sparkline
  {{ b64[1] }}{{ b64[2] }}{{ b64[5] }}{{ b64[6] }}{{ b64[3] }}{{ b64[4] }} - {% if vt_enabled %} - {% if not b64[7] %} - - - - {% else %} - VT Report - {% endif %} -
  {{ b64[1] }}{{ b64[2] }}{{ b64[5] }}{{ b64[6] }}{{ b64[3] }}{{ b64[4] }} + {% if vt_enabled %} + {% if not b64[7] %} + + - {% else %} - Virus Total submission is disabled - {% endif %} + + {% else %} +  VT Report + {% endif %} + + {% else %} + Virus Total submission is disabled + {% endif %} - -
- {% else %} - {% if show_decoded_files %} - {% if date_from|string == date_to|string %} -

{{ date_from }}, No Hashes

- {% else %} -

{{ date_from }} to {{ date_to }}, No Hashes

- {% endif %} + + + + {% endfor %} + + + {% else %} + {% if show_decoded_files %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No Hashes

+ {% else %} +

{{ date_from }} to {{ date_to }}, No Hashes

{% endif %} {% endif %} -
- + {% endif %}
- - - chart.onResize(); - $(window).on("resize", function() { - chart.onResize(); - }); - }); - - + diff --git a/var/www/modules/hashDecoded/templates/showCorrelation.html b/var/www/modules/hashDecoded/templates/showCorrelation.html new file mode 100644 index 00000000..35ec63ec --- /dev/null +++ b/var/www/modules/hashDecoded/templates/showCorrelation.html @@ -0,0 +1,553 @@ + + + + + + + AIL - framework + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'decoded/menu_sidebar.html' %} + +
+ +
+
+

{{ key_id }} :

+
    +
  • +
    +
    + + + + + + + + + + + + + + + + + +
    typeFirst_seenLast_seenNb seen
      {{ type_id }}{{ key_id_metadata['first_seen'] }}{{ key_id_metadata['last_seen'] }}{{ key_id_metadata['nb_seen'] }}
    +
    +
    +
    +
    +
    +
  • +
+
+
+ +
+
+ +
+
+ Graph +
+
+
+
+
+
+
+ +
+ +
+
+ Graph +
+
+ + + {% if correlation_type=='pgpdump' %} + {% include 'decoded/show_helper_pgpdump.html' %} + {% elif correlation_type=='cryptocurrency' %} + {% include 'decoded/show_helper_cryptocurrency.html' %} + {% endif %} + +
+
+
+
+ +
+
+ Graph +
+
+
+
+
+
+ +
+
+
+ + + + + + + + + + + + + diff --git a/var/www/modules/hashDecoded/templates/showHash.html b/var/www/modules/hashDecoded/templates/showHash.html index b0bcaa50..c7cac4d8 100644 --- a/var/www/modules/hashDecoded/templates/showHash.html +++ b/var/www/modules/hashDecoded/templates/showHash.html @@ -3,27 +3,20 @@ - Hash Information - AIL + - - - - - + + + - - - - - - - - - + + + +