diff --git a/.gitignore b/.gitignore index ac4837a3..f9d2344e 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,9 @@ DATA_ARDB indexdir/ logs/ old/ +pgpdump/ +temp/ + DEFAULT_PASSWORD @@ -44,6 +47,9 @@ configs/update.cfg update/current_version files +# Trackers +bin/trackers/yara/custom-rules/* + # Helper bin/helper/gen_cert/rootCA.* bin/helper/gen_cert/server.* diff --git a/.travis.yml b/.travis.yml index 0cae47c4..f757818a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,7 +12,7 @@ env: AIL_FLASK=$TRAVIS_BUILD_DIR/var/www/ AIL_REDIS=$TRAVIS_BUILD_DIR/redis/src/ \ AIL_LEVELDB=$TRAVIS_BUILD_DIR/redis-leveldb/ PATH=$AIL_HOME:$AIL_REDIS:$AIL_LEVELDB:$PATH -dist: xenial +dist: bionic install: - ./installing_deps.sh diff --git a/OVERVIEW.md b/OVERVIEW.md index c3ab3ce4..5790acd9 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -16,7 +16,7 @@ Redis and ARDB overview DB 1 - Curve DB 2 - TermFreq - DB 3 - Trending + DB 3 - Trending/Trackers DB 4 - Sentiments DB 5 - TermCred DB 6 - Tags diff --git a/bin/Attributes.py b/bin/Attributes.py deleted file mode 100755 index 74357065..00000000 --- a/bin/Attributes.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The ZMQ_Sub_Attribute Module -============================ - -This module is saving Attribute of the paste into redis - -""" -import time -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Attributes' - - p = Process(config_section) - - # FUNCTIONS # - publisher.info("Attribute is Running") - - while True: - try: - message = p.get_from_set() - - if message is not None: - PST = Paste.Paste(message) - else: - publisher.debug("Script Attribute is idling 1s") - print('sleeping') - time.sleep(1) - continue - - # FIXME do it directly in the class - PST.save_attribute_redis("p_encoding", PST._get_p_encoding()) - #PST.save_attribute_redis("p_language", PST._get_p_language()) - # FIXME why not all saving everything there. - PST.save_all_attributes_redis() - # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_rel_path) - except IOError: - print("CRC Checksum Failed on :", PST.p_rel_path) - publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( - PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 0c6bdf3f..456e474a 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -31,14 +31,10 @@ if __name__ == "__main__": p = Process(config_section) # FUNCTIONS # - publisher.info("Creditcard script subscribed to channel creditcard_categ") - + publisher.info("CreditCards script started") creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" - # FIXME For retro compatibility - channel = 'creditcard_categ' - # Source: http://www.richardsramblings.com/regex/credit-card-numbers/ cards = [ r'\b4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}\b', # 16-digit VISA, with separators @@ -69,9 +65,6 @@ if __name__ == "__main__": print(clean_card, 'is valid') creditcard_set.add(clean_card) - paste.__setattr__(channel, creditcard_set) - paste.save_attribute_redis(channel, creditcard_set) - pprint.pprint(creditcard_set) to_print = 'CreditCard;{};{};{};'.format( paste.p_source, paste.p_date, paste.p_name) diff --git a/bin/Cryptocurrencies.py b/bin/Cryptocurrencies.py index 4b50eaee..ded939e7 100755 --- a/bin/Cryptocurrencies.py +++ b/bin/Cryptocurrencies.py @@ -44,8 +44,9 @@ def search_crytocurrency(item_id, item_content): is_cryptocurrency_found = False - for crypto_name in cryptocurrency_dict: - crypto_dict = cryptocurrency_dict[crypto_name] + for dict_field in cryptocurrency_dict: + crypto_dict = cryptocurrency_dict[dict_field] + crypto_name = crypto_dict['name'] signal.alarm(crypto_dict['max_execution_time']) try: @@ -62,7 +63,7 @@ def search_crytocurrency(item_id, item_content): is_valid_crypto_addr = False # validate cryptocurrency address for address in crypto_addr: - if(Cryptocurrency.verify_cryptocurrency_address(crypto_name, address)): + if(Cryptocurrency.verify_cryptocurrency_address(dict_field, address)): is_valid_crypto_addr = True print('{} address found : {}'.format(crypto_name, address)) # build bitcoin correlation @@ -127,6 +128,17 @@ cryptocurrency_dict = { 'tag': 'infoleak:automatic-detection="bitcoin-private-key"', }, }, + 'bitcoin-bech32': { + 'name': 'bitcoin', # e.g. bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq + 'regex': r'\bbc(?:0(?:[ac-hj-np-z02-9]{39}|[ac-hj-np-z02-9]{59})|1[ac-hj-np-z02-9]{8,87})\b', + 'max_execution_time': default_max_execution_time, + 'tag': 'infoleak:automatic-detection="bitcoin-address"', + 'private_key': { + 'regex': r'\b(? 1: compute_most_posted(r_serv_trend, message) else: - compute_provider_info(r_serv_trend, r_serv_pasteName, message) + compute_provider_info(r_serv_trend, message) diff --git a/bin/Onion.py b/bin/Onion.py index ad1fe5bb..2b6be55e 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -193,9 +193,6 @@ if __name__ == "__main__": r_onion.sadd('i2p_crawler_queue', msg) ''' - # Saving the list of extracted onion domains. - PST.__setattr__(channel, domains_list) - PST.save_attribute_redis(channel, domains_list) to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) diff --git a/bin/RegexTracker.py b/bin/RegexTracker.py index 938738d0..904be623 100755 --- a/bin/RegexTracker.py +++ b/bin/RegexTracker.py @@ -23,7 +23,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import Tracker import regex_helper -full_item_url = "/showsavedpaste/?paste=" +full_item_url = "/object/item?id=" mail_body_template = "AIL Framework,\nNew occurrence for term tracked regex: {}\nitem id: {}\nurl: {}{}" dict_regex_tracked = Term.get_regex_tracked_words_dict() @@ -58,7 +58,7 @@ if __name__ == "__main__": p = Process(config_section) max_execution_time = p.config.getint(config_section, "max_execution_time") - ull_item_url = p.config.get("Notifications", "ail_domain") + full_item_url + full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url redis_cache_key = regex_helper.generate_redis_cache_key(module_name) diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py index 2b4241c0..f7abe4af 100755 --- a/bin/TermTrackerMod.py +++ b/bin/TermTrackerMod.py @@ -20,7 +20,7 @@ from packages import Term from lib import Tracker -full_item_url = "/showsavedpaste/?paste=" +full_item_url = "/object/item?id=" mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}" diff --git a/bin/Web.py b/bin/Web.py index ca4366e8..68e37c25 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -58,10 +58,7 @@ if __name__ == "__main__": cc_critical = p.config.get("Url", "cc_critical") # FUNCTIONS # - publisher.info("Script URL subscribed to channel web_categ") - - # FIXME For retro compatibility - channel = 'web_categ' + publisher.info("Script URL Started") message = p.get_from_set() prec_filename = None @@ -150,11 +147,6 @@ if __name__ == "__main__": domains_list) if A_values[0] >= 1: - PST.__setattr__(channel, A_values) - PST.save_attribute_redis(channel, (A_values[0], - list(A_values[1]))) - - pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) diff --git a/bin/export/Export.py b/bin/export/Export.py index 67d631f2..90de4570 100755 --- a/bin/export/Export.py +++ b/bin/export/Export.py @@ -9,6 +9,24 @@ from uuid import uuid4 sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) import ConfigLoader +sys.path.append('../../configs/keys') +try: + from thehive4py.api import TheHiveApi + import thehive4py.exceptions + from theHiveKEYS import the_hive_url, the_hive_key, the_hive_verifycert + if the_hive_url == '': + is_hive_connected = False + else: + is_hive_connected = TheHiveApi(the_hive_url, the_hive_key, cert=the_hive_verifycert) +except: + is_hive_connected = False +if is_hive_connected != False: + try: + is_hive_connected.get_alert(0) + is_hive_connected = True + except thehive4py.exceptions.AlertException: + is_hive_connected = False + ## LOAD CONFIG ## config_loader = ConfigLoader.ConfigLoader() r_serv_cache = config_loader.get_redis_conn("Redis_Cache") @@ -37,6 +55,16 @@ def load_tags_to_export_in_cache(): # save combinaison of tags in cache pass +def is_hive_connected(): # # TODO: REFRACTOR, put in cache (with retry) + return is_hive_connected + +def get_item_hive_cases(item_id): + hive_case = r_serv_metadata.get('hive_cases:{}'.format(item_id)) + if hive_case: + hive_case = the_hive_url + '/index.html#/case/{}/details'.format(hive_case) + return hive_case + + ########################################################### # # set default # if r_serv_db.get('hive:auto-alerts') is None: diff --git a/bin/lib/ConfigLoader.py b/bin/lib/ConfigLoader.py index 6eedadd0..b68aa3a3 100755 --- a/bin/lib/ConfigLoader.py +++ b/bin/lib/ConfigLoader.py @@ -41,6 +41,15 @@ class ConfigLoader(object): db=self.cfg.getint(redis_name, "db"), decode_responses=decode_responses ) + def get_files_directory(self, key_name): + directory_path = self.cfg.get('Directories', key_name) + # full path + if directory_path[0] == '/': + return directory_path + else: + directory_path = os.path.join(os.environ['AIL_HOME'], directory_path) + return directory_path + def get_config_str(self, section, key_name): return self.cfg.get(section, key_name) diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index c0d959f5..1cddf9be 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -223,11 +223,11 @@ def get_item_url(correlation_name, value, correlation_type=None): endpoint = 'crawler_splash.showDomain' url = url_for(endpoint, domain=value) elif correlation_name == 'item': - endpoint = 'showsavedpastes.showsavedpaste' - url = url_for(endpoint, paste=value) + endpoint = 'objects_item.showItem' + url = url_for(endpoint, id=value) elif correlation_name == 'paste': ### # TODO: remove me - endpoint = 'showsavedpastes.showsavedpaste' - url = url_for(endpoint, paste=value) + endpoint = 'objects_item.showItem' + url = url_for(endpoint, id=value) return url def get_obj_tag_table_keys(object_type): diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index bfc0b4cb..60d78967 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -9,9 +9,11 @@ The ``Domain`` import os import sys -import time +import itertools +import re import redis import random +import time sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Cryptocurrency @@ -24,6 +26,7 @@ import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader import Correlate_object +import Language import Screenshot import Username @@ -66,6 +69,15 @@ def sanitize_domain_type(domain_type): else: return 'regular' +def sanitize_domain_types(l_domain_type): + all_domain_types = get_all_domains_type() + if not l_domain_type: + return all_domain_types + for domain_type in l_domain_type: + if domain_type not in all_domain_types: + return all_domain_types + return l_domain_type + ######## DOMAINS ######## def get_all_domains_type(): return ['onion', 'regular'] @@ -210,6 +222,15 @@ def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], else: return None + + +## TODO: filters: +# - tags +# - languages +# - daterange UP +def get_domains_by_filters(): + pass + def create_domains_metadata_list(list_domains, domain_type): l_domains = [] for domain in list_domains: @@ -218,9 +239,144 @@ def create_domains_metadata_list(list_domains, domain_type): else: dom_type = domain_type l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True, - ports=True, tags=True, screenshot=True, tags_safe=True)) + ports=True, tags=True, languages=True, screenshot=True, tags_safe=True)) return l_domains +def sanithyse_domain_name_to_search(name_to_search, domain_type): + if domain_type == 'onion': + r_name = r'[a-z0-9\.]+' + else: + r_name = r'[a-zA-Z0-9\.-_]+' + # invalid domain name + if not re.fullmatch(r_name, name_to_search): + return None + return name_to_search.replace('.', '\.') + + +def search_domains_by_name(name_to_search, domain_types, r_pos=False): + domains_dict = {} + for domain_type in domain_types: + r_name = sanithyse_domain_name_to_search(name_to_search, domain_type) + if not name_to_search: + break + r_name = re.compile(r_name) + for domain in get_all_domains_up(domain_type): + res = re.search(r_name, domain) + if res: + domains_dict[domain] = {} + if r_pos: + domains_dict[domain]['hl-start'] = res.start() + domains_dict[domain]['hl-end'] = res.end() + return domains_dict + +def api_search_domains_by_name(name_to_search, domains_types, domains_metadata=False, page=1): + domains_types = sanitize_domain_types(domains_types) + domains_dict = search_domains_by_name(name_to_search, domains_types, r_pos=True) + l_domains = sorted(domains_dict.keys()) + l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) + if not domains_metadata: + return l_domains + else: + l_dict_domains = [] + for domain in l_domains['list_elem']: + dict_domain = get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, + status=True, ports=True, tags=True, tags_safe=True, + languages=True, screenshot=True) + dict_domain = {**domains_dict[domain], **dict_domain} + l_dict_domains.append(dict_domain) + l_domains['list_elem'] = l_dict_domains + l_domains['search'] = name_to_search + return l_domains + + +######## LANGUAGES ######## +def get_all_domains_languages(): + return r_serv_onion.smembers('all_domains_languages') + +def get_domains_by_languages(languages, l_domain_type=[]): + l_domain_type = sanitize_domain_types(l_domain_type) + if not languages: + return [] + elif len(languages) == 1: + return get_all_domains_by_language(languages[0], l_domain_type=l_domain_type) + else: + all_domains_t = [] + for domain_type in l_domain_type: + l_keys_name = [] + for language in languages: + l_keys_name.append('language:domains:{}:{}'.format(domain_type, language)) + res = r_serv_onion.sinter(l_keys_name[0], *l_keys_name[1:]) + if res: + all_domains_t.append(res) + return list(itertools.chain.from_iterable(all_domains_t)) + +def get_all_domains_by_language(language, l_domain_type=[]): + l_domain_type = sanitize_domain_types(l_domain_type) + if len(l_domain_type) == 1: + return r_serv_onion.smembers('language:domains:{}:{}'.format(l_domain_type[0], language)) + else: + l_keys_name = [] + for domain_type in l_domain_type: + l_keys_name.append('language:domains:{}:{}'.format(domain_type, language)) + return r_serv_onion.sunion(l_keys_name[0], *l_keys_name[1:]) + +def get_domain_languages(domain, r_list=False): + res = r_serv_onion.smembers('domain:language:{}'.format(domain)) + if r_list: + return list(res) + else: + return res + +def add_domain_language(domain, language): + language = language.split('-')[0] + domain_type = get_domain_type(domain) + r_serv_onion.sadd('all_domains_languages', language) + r_serv_onion.sadd('all_domains_languages:{}'.format(domain_type), language) + r_serv_onion.sadd('language:domains:{}:{}'.format(domain_type, language), domain) + r_serv_onion.sadd('domain:language:{}'.format(domain), language) + +def add_domain_languages_by_item_id(domain, item_id): + for lang in Item.get_item_languages(item_id, min_proportion=0.2, min_probability=0.8): + add_domain_language(domain, lang.language) + +def delete_domain_languages(domain): + domain_type = get_domain_type(domain) + for language in get_domain_languages(domain): + r_serv_onion.srem('language:domains:{}:{}'.format(domain_type, language), domain) + if not r_serv_onion.exists('language:domains:{}:{}'.format(domain_type, language)): + r_serv_onion.srem('all_domains_languages:{}'.format(domain_type), language) + exist_domain_type_lang = False + for domain_type in get_all_domains_type(): + if r_serv_onion.sismembers('all_domains_languages:{}'.format(domain_type), language): + exist_domain_type_lang = True + continue + if not exist_domain_type_lang: + r_serv_onion.srem('all_domains_languages', language) + r_serv_onion.delete('domain:language:{}'.format(domain)) + +def _delete_all_domains_languages(): + for language in get_all_domains_languages(): + for domain in get_all_domains_by_language(language): + delete_domain_languages(domain) + +## API ## +## TODO: verify domains type + languages list +## TODO: add pagination +def api_get_domains_by_languages(domains_types, languages, domains_metadata=False, page=1): + l_domains = sorted(get_domains_by_languages(languages, l_domain_type=domains_types)) + l_domains = paginate_iterator(l_domains, nb_obj=28, page=page) + if not domains_metadata: + return l_domains + else: + l_dict_domains = [] + for domain in l_domains['list_elem']: + l_dict_domains.append(get_domain_metadata(domain, get_domain_type(domain), first_seen=True, last_ckeck=True, + status=True, ports=True, tags=True, tags_safe=True, + languages=True, screenshot=True)) + l_domains['list_elem'] = l_dict_domains + return l_domains +####---- ----#### + ######## DOMAIN ######## def get_domain_type(domain): @@ -474,6 +630,14 @@ def get_domain_last_origin(domain, domain_type): origin_item = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') return origin_item +def get_domain_father(domain, domain_type): + dict_father = {} + dict_father['item_father'] = r_serv_onion.hget('{}_metadata:{}'.format(domain_type, domain), 'paste_parent') + if dict_father['item_father'] != 'auto' and dict_father['item_father'] != 'manual': + if Item.is_crawled(dict_father['item_father']): + dict_father['domain_father'] = Item.get_domain(dict_father['item_father']) + return dict_father + def get_domain_tags(domain): ''' Retun all tags of a given domain. @@ -490,7 +654,7 @@ def get_domain_random_screenshot(domain): ''' return Screenshot.get_randon_domain_screenshot(domain) -def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, screenshot=False): +def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False, tags_safe=False, languages=False, screenshot=False): ''' Get Domain basic metadata @@ -508,6 +672,7 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s ''' dict_metadata = {} dict_metadata['id'] = domain + dict_metadata['type'] = domain_type if first_seen: res = get_domain_first_seen(domain, domain_type=domain_type) if res is not None: @@ -527,6 +692,8 @@ def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, s dict_metadata['is_tags_safe'] = Tag.is_tags_safe(dict_metadata['tags']) else: dict_metadata['is_tags_safe'] = Tag.is_tags_safe(get_domain_tags(domain)) + if languages: + dict_metadata['languages'] = Language.get_languages_from_iso(get_domain_languages(domain, r_list=True), sort=True) if screenshot: dict_metadata['screenshot'] = get_domain_random_screenshot(domain) return dict_metadata @@ -744,6 +911,9 @@ class Domain(object): ''' return get_domain_last_origin(self.domain, self.type) + def get_domain_father(self): + return get_domain_father(self.domain, self.type) + def domain_was_up(self): ''' Return True if this domain was UP at least one time @@ -785,6 +955,14 @@ class Domain(object): ''' return get_domain_tags(self.domain) + def get_domain_languages(self): + ''' + Retun all languages of a given domain. + + :param domain: domain name + ''' + return get_domain_languages(self.domain) + def get_domain_correlation(self): ''' Retun all correlation of a given domain. @@ -809,3 +987,6 @@ class Domain(object): ''' port = sanathyse_port(port, self.domain, self.type, strict=True, current_port=self.current_port) return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag) + +if __name__ == '__main__': + search_domains_by_name('c', 'onion') diff --git a/bin/lib/Language.py b/bin/lib/Language.py new file mode 100755 index 00000000..6b5bd6a0 --- /dev/null +++ b/bin/lib/Language.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + +dict_iso_languages = { + 'af': 'Afrikaans', + 'am': 'Amharic', + 'ar': 'Arabic', + 'bg': 'Bulgarian', + 'bn': 'Bangla', + 'bs': 'Bosnian', + 'ca': 'Catalan', + 'ceb': 'Cebuano', + 'co': 'Corsican', + 'cs': 'Czech', + 'cy': 'Welsh', + 'da': 'Danish', + 'de': 'German', + 'el': 'Greek', + 'en': 'English', + 'eo': 'Esperanto', + 'es': 'Spanish', + 'et': 'Estonian', + 'eu': 'Basque', + 'fa': 'Persian', + 'fi': 'Finnish', + 'fil': 'Filipino', + 'fr': 'French', + 'fy': 'Western Frisian', + 'ga': 'Irish', + 'gd': 'Scottish Gaelic', + 'gl': 'Galician', + 'gu': 'Gujarati', + 'ha': 'Hausa', + 'haw': 'Hawaiian', + 'hi': 'Hindi', + 'hmn': 'Hmong', + 'hr': 'Croatian', + 'ht': 'Haitian Creole', + 'hu': 'Hungarian', + 'hy': 'Armenian', + 'id': 'Indonesian', + 'ig': 'Igbo', + 'is': 'Icelandic', + 'it': 'Italian', + 'iw': 'Hebrew', + 'ja': 'Japanese', + 'jv': 'Javanese', + 'ka': 'Georgian', + 'kk': 'Kazakh', + 'km': 'Khmer', + 'kn': 'Kannada', + 'ko': 'Korean', + 'ku': 'Kurdish', + 'ky': 'Kyrgyz', + 'la': 'Latin', + 'lb': 'Luxembourgish', + 'lo': 'Lao', + 'lt': 'Lithuanian', + 'lv': 'Latvian', + 'mg': 'Malagasy', + 'mi': 'Maori', + 'mk': 'Macedonian', + 'ml': 'Malayalam', + 'mn': 'Mongolian', + 'mr': 'Marathi', + 'ms': 'Malay', + 'mt': 'Maltese', + 'my': 'Burmese', + 'ne': 'Nepali', + 'nl': 'Dutch', + 'no': 'Norwegian', + 'ny': 'Nyanja', + 'pa': 'Punjabi', + 'pl': 'Polish', + 'ps': 'Pashto', + 'pt': 'Portuguese', + 'ro': 'Romanian', + 'ru': 'Russian', + 'sd': 'Sindhi', + 'si': 'Sinhala', + 'sk': 'Slovak', + 'sl': 'Slovenian', + 'sm': 'Samoan', + 'sn': 'Shona', + 'so': 'Somali', + 'sq': 'Albanian', + 'sr': 'Serbian', + 'st': 'Southern Sotho', + 'su': 'Sundanese', + 'sv': 'Swedish', + 'sw': 'Swahili', + 'ta': 'Tamil', + 'te': 'Telugu', + 'tg': 'Tajik', + 'th': 'Thai', + 'tr': 'Turkish', + 'uk': 'Ukrainian', + 'ur': 'Urdu', + 'uz': 'Uzbek', + 'vi': 'Vietnamese', + 'xh': 'Xhosa', + 'yi': 'Yiddish', + 'yo': 'Yoruba', + 'zh': 'Chinese', + 'zu': 'Zulu' +} + +dict_languages_iso = { + 'Afrikaans': 'af', + 'Amharic': 'am', + 'Arabic': 'ar', + 'Bulgarian': 'bg', + 'Bangla': 'bn', + 'Bosnian': 'bs', + 'Catalan': 'ca', + 'Cebuano': 'ceb', + 'Corsican': 'co', + 'Czech': 'cs', + 'Welsh': 'cy', + 'Danish': 'da', + 'German': 'de', + 'Greek': 'el', + 'English': 'en', + 'Esperanto': 'eo', + 'Spanish': 'es', + 'Estonian': 'et', + 'Basque': 'eu', + 'Persian': 'fa', + 'Finnish': 'fi', + 'Filipino': 'fil', + 'French': 'fr', + 'Western Frisian': 'fy', + 'Irish': 'ga', + 'Scottish Gaelic': 'gd', + 'Galician': 'gl', + 'Gujarati': 'gu', + 'Hausa': 'ha', + 'Hawaiian': 'haw', + 'Hindi': 'hi', + 'Hmong': 'hmn', + 'Croatian': 'hr', + 'Haitian Creole': 'ht', + 'Hungarian': 'hu', + 'Armenian': 'hy', + 'Indonesian': 'id', + 'Igbo': 'ig', + 'Icelandic': 'is', + 'Italian': 'it', + 'Hebrew': 'iw', + 'Japanese': 'ja', + 'Javanese': 'jv', + 'Georgian': 'ka', + 'Kazakh': 'kk', + 'Khmer': 'km', + 'Kannada': 'kn', + 'Korean': 'ko', + 'Kurdish': 'ku', + 'Kyrgyz': 'ky', + 'Latin': 'la', + 'Luxembourgish': 'lb', + 'Lao': 'lo', + 'Lithuanian': 'lt', + 'Latvian': 'lv', + 'Malagasy': 'mg', + 'Maori': 'mi', + 'Macedonian': 'mk', + 'Malayalam': 'ml', + 'Mongolian': 'mn', + 'Marathi': 'mr', + 'Malay': 'ms', + 'Maltese': 'mt', + 'Burmese': 'my', + 'Nepali': 'ne', + 'Dutch': 'nl', + 'Norwegian': 'no', + 'Nyanja': 'ny', + 'Punjabi': 'pa', + 'Polish': 'pl', + 'Pashto': 'ps', + 'Portuguese': 'pt', + 'Romanian': 'ro', + 'Russian': 'ru', + 'Sindhi': 'sd', + 'Sinhala': 'si', + 'Slovak': 'sk', + 'Slovenian': 'sl', + 'Samoan': 'sm', + 'Shona': 'sn', + 'Somali': 'so', + 'Albanian': 'sq', + 'Serbian': 'sr', + 'Southern Sotho': 'st', + 'Sundanese': 'su', + 'Swedish': 'sv', + 'Swahili': 'sw', + 'Tamil': 'ta', + 'Telugu': 'te', + 'Tajik': 'tg', + 'Thai': 'th', + 'Turkish': 'tr', + 'Ukrainian': 'uk', + 'Urdu': 'ur', + 'Uzbek': 'uz', + 'Vietnamese': 'vi', + 'Xhosa': 'xh', + 'Yiddish': 'yi', + 'Yoruba': 'yo', + 'Chinese': 'zh', + 'Zulu': 'zu' +} + +def get_language_from_iso(iso_language): + return dict_iso_languages.get(iso_language, None) + +def get_languages_from_iso(l_iso_languages, sort=False): + l_languages = [] + for iso_language in l_iso_languages: + language = get_language_from_iso(iso_language) + if language: + l_languages.append(language) + if sort: + l_languages = sorted(l_languages) + return l_languages + +def get_iso_from_language(language): + return dict_languages_iso.get(language, None) + +def get_iso_from_languages(l_languages, sort=False): + l_iso = [] + for language in l_languages: + iso_lang = get_iso_from_language(language) + if iso_lang: + l_iso.append(iso_lang) + if sort: + l_iso = sorted(l_iso) + return l_iso diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index 46141e30..3f198f52 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -20,7 +20,7 @@ import ConfigLoader config_loader = ConfigLoader.ConfigLoader() r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') config_loader = None # get screenshot relative path diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index b09a1d3e..844e1093 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -2,19 +2,78 @@ # -*-coding:UTF-8 -* import os +import re import sys import time import redis +import uuid import yara +import datetime + +from flask import escape sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader #import item_basic config_loader = ConfigLoader.ConfigLoader() +r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_tracker = config_loader.get_redis_conn("ARDB_Tracker") config_loader = None +email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' +email_regex = re.compile(email_regex) + +special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') +special_characters.add('\\s') + +############### +#### UTILS #### +def is_valid_uuid_v4(UUID): + if not UUID: + return False + UUID = UUID.replace('-', '') + try: + uuid_test = uuid.UUID(hex=UUID, version=4) + return uuid_test.hex == UUID + except: + return False + +def is_valid_regex(tracker_regex): + try: + re.compile(tracker_regex) + return True + except: + return False + +def is_valid_mail(email): + result = email_regex.match(email) + if result: + return True + else: + return False + +def verify_mail_list(mail_list): + for mail in mail_list: + if not is_valid_mail(mail): + return ({'status': 'error', 'reason': 'Invalid email', 'value': mail}, 400) + return None + +##-- UTILS --## +############### + +def get_tracker_by_uuid(tracker_uuid): + return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'tracked') + +def get_tracker_type(tracker_uuid): + return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'type') + +def get_tracker_level(tracker_uuid): + return int(r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'level')) + +def get_tracker_user_id(tracker_uuid): + return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') + def get_tracker_uuid_list(tracker, tracker_type): return list(r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker))) @@ -27,6 +86,51 @@ def get_tracker_mails(tracker_uuid): def get_tracker_description(tracker_uuid): return r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'description') +def get_tracker_first_seen(tracker_uuid): + res = r_serv_tracker.zrange('tracker:stat:{}'.format(tracker_uuid), 0, 0) + if res: + return res[0] + else: + return None + +def get_tracker_last_seen(tracker_uuid): + res = r_serv_tracker.zrevrange('tracker:stat:{}'.format(tracker_uuid), 0, 0) + if res: + return res[0] + else: + return None + +def get_tracker_metedata(tracker_uuid, user_id=False, description=False, level=False, tags=False, mails=False, sparkline=False): + dict_uuid = {} + dict_uuid['tracker'] = get_tracker_by_uuid(tracker_uuid) + dict_uuid['type'] = get_tracker_type(tracker_uuid) + dict_uuid['date'] = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'date') + dict_uuid['description'] = get_tracker_description(tracker_uuid) + dict_uuid['first_seen'] = get_tracker_first_seen(tracker_uuid) + dict_uuid['last_seen'] = get_tracker_last_seen(tracker_uuid) + if user_id: + dict_uuid['user_id'] = get_tracker_user_id(tracker_uuid) + if level: + dict_uuid['level'] = get_tracker_level(tracker_uuid) + if mails: + dict_uuid['mails'] = get_tracker_mails(tracker_uuid) + if tags: + dict_uuid['tags'] = get_tracker_tags(tracker_uuid) + if sparkline: + dict_uuid['sparkline'] = get_tracker_sparkline(tracker_uuid) + dict_uuid['uuid'] = tracker_uuid + return dict_uuid + +def get_tracker_sparkline(tracker_uuid, num_day=6): + date_range_sparkline = Date.get_date_range(num_day) + sparklines_value = [] + for date_day in date_range_sparkline: + nb_seen_this_day = r_serv_tracker.scard('tracker:item:{}:{}'.format(tracker_uuid, date_day)) + if nb_seen_this_day is None: + nb_seen_this_day = 0 + sparklines_value.append(int(nb_seen_this_day)) + return sparklines_value + def add_tracked_item(tracker_uuid, item_id, item_date): # track item r_serv_tracker.sadd('tracker:item:{}:{}'.format(tracker_uuid, item_date), item_id) @@ -46,6 +150,234 @@ def get_tracker_last_updated_by_type(tracker_type): epoch_update = 0 return float(epoch_update) +###################### +#### TRACKERS ACL #### + +# # TODO: use new package => duplicate fct +def is_in_role(user_id, role): + if r_serv_db.sismember('user_role:{}'.format(role), user_id): + return True + else: + return False + +def is_tracker_in_global_level(tracker, tracker_type): + res = r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker)) + if res: + for elem_uuid in res: + if r_serv_tracker.hget('tracker:{}'.format(elem_uuid), 'level')=='1': + return True + return False + +def is_tracker_in_user_level(tracker, tracker_type, user_id): + res = r_serv_tracker.smembers('user:tracker:{}'.format(user_id)) + if res: + for elem_uuid in res: + if r_serv_tracker.hget('tracker:{}'.format(elem_uuid), 'tracked')== tracker: + if r_serv_tracker.hget('tracker:{}'.format(elem_uuid), 'type')== tracker_type: + return True + return False + +def api_is_allowed_to_edit_tracker(tracker_uuid, user_id): + if not is_valid_uuid_v4(tracker_uuid): + return ({"status": "error", "reason": "Invalid uuid"}, 400) + tracker_creator = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id') + if not tracker_creator: + return ({"status": "error", "reason": "Unknown uuid"}, 404) + if not is_in_role(user_id, 'admin') and user_id != tracker_creator: + return ({"status": "error", "reason": "Access Denied"}, 403) + return ({"uuid": tracker_uuid}, 200) + + +##-- ACL --## + +#### CREATE TRACKER #### +def api_validate_tracker_to_add(tracker , tracker_type, nb_words=1): + if tracker_type=='regex': + if not is_valid_regex(tracker): + return ({"status": "error", "reason": "Invalid regex"}, 400) + elif tracker_type=='word' or tracker_type=='set': + # force lowercase + tracker = tracker.lower() + word_set = set(tracker) + set_inter = word_set.intersection(special_characters) + if set_inter: + return ({"status": "error", "reason": f'special character(s) not allowed: {set_inter}', "message": "Please use a python regex or remove all special characters"}, 400) + words = tracker.split() + # not a word + if tracker_type=='word' and len(words)>1: + tracker_type = 'set' + + # ouput format: tracker1,tracker2,tracker3;2 + if tracker_type=='set': + try: + nb_words = int(nb_words) + except: + nb_words = 1 + if nb_words==0: + nb_words = 1 + + words_set = set(words) + words_set = sorted(words_set) + + if nb_words > len(words_set): + nb_words = len(words_set) + + tracker = ",".join(words_set) + tracker = "{};{}".format(tracker, nb_words) + + elif tracker_type=='yara_custom': + if not is_valid_yara_rule(tracker): + return ({"status": "error", "reason": "Invalid custom Yara Rule"}, 400) + elif tracker_type=='yara_default': + if not is_valid_default_yara_rule(tracker): + return ({"status": "error", "reason": "The Yara Rule doesn't exist"}, 400) + else: + return ({"status": "error", "reason": "Incorrect type"}, 400) + return ({"status": "success", "tracker": tracker, "type": tracker_type}, 200) + +def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, dashboard=0, tracker_uuid=None): + # edit tracker + if tracker_uuid: + edit_tracker = True + # check if type changed + old_type = get_tracker_type(tracker_uuid) + old_tracker = get_tracker_by_uuid(tracker_uuid) + old_level = get_tracker_level(tracker_uuid) + tracker_user_id = get_tracker_user_id(tracker_uuid) + + # Create new tracker + else: + edit_tracker = False + # generate tracker uuid + tracker_uuid = str(uuid.uuid4()) + old_type = None + old_tracker = None + + # YARA + if tracker_type == 'yara_custom' or tracker_type == 'yara_default': + # delete yara rule + if tracker_type == 'yara_default' and old_type == 'yara': + if not is_default_yara_rule(old_tracker): + filepath = get_yara_rule_file_by_tracker_name(old_tracker) + if filepath: + os.remove(filepath) + tracker = save_yara_rule(tracker_type, tracker, tracker_uuid=tracker_uuid) + tracker_type = 'yara' + + # create metadata + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'tracked', tracker) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'type', tracker_type) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'level', level) + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'dashboard', dashboard) + if not edit_tracker: + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'user_id', user_id) + + if description: + r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'description', description) + + # type change + if edit_tracker: + r_serv_tracker.srem('all:tracker:{}'.format(old_type), old_tracker) + r_serv_tracker.srem('all:tracker_uuid:{}:{}'.format(old_type, old_tracker), tracker_uuid) + if level != old_level: + if level == 0: + r_serv_tracker.srem('global:tracker', tracker_uuid) + elif level == 1: + r_serv_tracker.srem('user:tracker:{}'.format(tracker_user_id), tracker_uuid) + if tracker_type != old_type: + if old_level == 0: + r_serv_tracker.srem('user:tracker:{}:{}'.format(tracker_user_id, old_type), tracker_uuid) + elif old_level == 1: + r_serv_tracker.srem('global:tracker:{}'.format(old_type), tracker_uuid) + if old_type=='yara': + if not is_default_yara_rule(old_tracker): + filepath = get_yara_rule_file_by_tracker_name(old_tracker) + if filepath: + os.remove(filepath) + + # create all tracker set + r_serv_tracker.sadd('all:tracker:{}'.format(tracker_type), tracker) + + # create tracker - uuid map + r_serv_tracker.sadd('all:tracker_uuid:{}:{}'.format(tracker_type, tracker), tracker_uuid) + + # add display level set + if level == 0: # user only + r_serv_tracker.sadd('user:tracker:{}'.format(user_id), tracker_uuid) + r_serv_tracker.sadd('user:tracker:{}:{}'.format(user_id, tracker_type), tracker_uuid) + elif level == 1: # global + r_serv_tracker.sadd('global:tracker', tracker_uuid) + r_serv_tracker.sadd('global:tracker:{}'.format(tracker_type), tracker_uuid) + + # create tracker tags list + for tag in tags: + r_serv_tracker.sadd('tracker:tags:{}'.format(tracker_uuid), escape(tag) ) + + # create tracker tags mail notification list + for mail in mails: + r_serv_tracker.sadd('tracker:mail:{}'.format(tracker_uuid), escape(mail) ) + + # toggle refresh module tracker list/set + r_serv_tracker.set('tracker:refresh:{}'.format(tracker_type), time.time()) + if tracker_type != old_type: # toggle old type refresh + r_serv_tracker.set('tracker:refresh:{}'.format(old_type), time.time()) + return tracker_uuid + +def api_add_tracker(dict_input, user_id): + tracker = dict_input.get('tracker', None) + if not tracker: + return ({"status": "error", "reason": "Tracker not provided"}, 400) + tracker_type = dict_input.get('type', None) + if not tracker_type: + return ({"status": "error", "reason": "Tracker type not provided"}, 400) + nb_words = dict_input.get('nb_words', 1) + description = dict_input.get('description', '') + description = escape(description) + + res = api_validate_tracker_to_add(tracker , tracker_type, nb_words=nb_words) + if res[1]!=200: + return res + tracker = res[0]['tracker'] + tracker_type = res[0]['type'] + + tags = dict_input.get('tags', []) + mails = dict_input.get('mails', []) + res = verify_mail_list(mails) + if res: + return res + + ## TODO: add dashboard key + level = dict_input.get('level', 1) + try: + level = int(level) + if level not in range(0, 1): + level = 1 + except: + level = 1 + + tracker_uuid = dict_input.get('uuid', None) + # check edit ACL + if tracker_uuid: + res = api_is_allowed_to_edit_tracker(tracker_uuid, user_id) + if res[1] != 200: + return res + else: + # check if tracker already tracked in global + if level==1: + if is_tracker_in_global_level(tracker, tracker_type) and not tracker_uuid: + return ({"status": "error", "reason": "Tracker already exist"}, 409) + else: + if is_tracker_in_user_level(tracker, tracker_type, user_id) and not tracker_uuid: + return ({"status": "error", "reason": "Tracker already exist"}, 409) + + tracker_uuid = create_tracker(tracker , tracker_type, user_id, level, tags, mails, description, tracker_uuid=tracker_uuid) + + return ({'tracker': tracker, 'type': tracker_type, 'uuid': tracker_uuid}, 200) + +##-- CREATE TRACKER --## + +############## #### YARA #### def get_yara_rules_dir(): return os.path.join(os.environ['AIL_BIN'], 'trackers', 'yara') @@ -99,13 +431,32 @@ def is_valid_yara_rule(yara_rule): except: return False -def is_valid_default_yara_rule(yara_rule): +def is_default_yara_rule(tracked_yara_name): + yara_dir = get_yara_rules_dir() + filename = os.path.join(yara_dir, tracked_yara_name) + filename = os.path.realpath(filename) + try: + if tracked_yara_name.split('/')[0] == 'custom-rules': + return False + except: + return False + if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + return False + else: + if os.path.isfile(filename): + return True + return False + +def is_valid_default_yara_rule(yara_rule, verbose=True): yara_dir = get_yara_rules_default_dir() filename = os.path.join(yara_dir, yara_rule) filename = os.path.realpath(filename) - # incorrect filename if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + if verbose: + print('error: file transversal') + print(yara_dir) + print(filename) return False else: if os.path.isfile(filename): @@ -124,6 +475,17 @@ def save_yara_rule(yara_rule_type, yara_rule, tracker_uuid=None): filename = os.path.join('ail-yara-rules', 'rules', yara_rule) return filename +def get_yara_rule_file_by_tracker_name(tracked_yara_name): + yara_dir = get_yara_rules_dir() + filename = os.path.join(yara_dir, tracked_yara_name) + filename = os.path.realpath(filename) + if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + print('error: file transversal') + print(yara_dir) + print(filename) + return None + return filename + def get_yara_rule_content(yara_rule): yara_dir = get_yara_rules_dir() filename = os.path.join(yara_dir, yara_rule) @@ -137,8 +499,23 @@ def get_yara_rule_content(yara_rule): rule_content = f.read() return rule_content -##-- YARA --## +def api_get_default_rule_content(default_yara_rule): + yara_dir = get_yara_rules_default_dir() + filename = os.path.join(yara_dir, default_yara_rule) + filename = os.path.realpath(filename) + # incorrect filename + if not os.path.commonprefix([filename, yara_dir]) == yara_dir: + return ({'status': 'error', 'reason': 'file transversal detected'}, 400) + + if not os.path.isfile(filename): + return ({'status': 'error', 'reason': 'yara rule not found'}, 400) + + with open(filename, 'r') as f: + rule_content = f.read() + return ({'rule_name': default_yara_rule, 'content': rule_content}, 200) + +##-- YARA --## if __name__ == '__main__': res = is_valid_yara_rule('rule dummy { }') diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 9cc7933f..6260aa3b 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -49,10 +49,23 @@ faup = Faup() def generate_uuid(): return str(uuid.uuid4()).replace('-', '') +# # TODO: remove me ? def get_current_date(): return datetime.now().strftime("%Y%m%d") -##-- COMMON --# +def is_valid_onion_domain(domain): + if not domain.endswith('.onion'): + return False + domain = domain.replace('.onion', '', 1) + if len(domain) == 16: # v2 address + r_onion = r'[a-z0-9]{16}' + if re.match(r_onion, domain): + return True + elif len(domain) == 56: # v3 address + r_onion = r'[a-z0-9]{56}' + if re.fullmatch(r_onion, domain): + return True + return False ################################################################################ @@ -616,6 +629,19 @@ def api_create_crawler_task(user_id, url, screenshot=True, har=True, depth_limit crawler_type=crawler_type, auto_crawler=auto_crawler, crawler_delta=crawler_delta, cookiejar_uuid=cookiejar_uuid, user_agent=user_agent) return None + +#### #### + +#### SPLASH API #### +def is_splash_reachable(splash_url, timeout=1.0): + try: + r = requests.get(splash_url , timeout=timeout) + except Exception: + return False + if r.status_code == 200: + return True + else: + return False #### #### def is_redirection(domain, last_url): @@ -683,6 +709,15 @@ def save_har(har_dir, item_id, har_content): with open(filename, 'w') as f: f.write(json.dumps(har_content)) +# # TODO: FIXME +def api_add_crawled_item(dict_crawled): + + domain = None + # create item_id item_id = + + save_crawled_item(item_id, response.data['html']) + create_item_metadata(item_id, domain, 'last_url', port, 'father') + #### CRAWLER QUEUES #### def get_all_crawlers_queues_types(): all_queues_types = set() @@ -757,7 +792,6 @@ def get_nb_elem_to_crawl_by_type(queue_type): # SPLASH MANAGER # # # # # # # # # # # # # # # - def get_splash_manager_url(reload=False): # TODO: add in db config return r_serv_onion.get('crawler:splash:manager:url') diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index c1005b49..010e30b6 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -113,9 +113,6 @@ def get_item_parent(item_id): def get_item_children(item_id): return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id))) -def add_item_parent(item_parent, item_id): - return item_basic.add_item_parent(item_parent, item_id) - # # TODO: handle domain last origin in domain lib def _delete_node(item_id): # only if item isn't deleted @@ -169,3 +166,38 @@ def add_map_obj_id_item_id(obj_id, item_id, obj_type): # delete twitter id ##-- --## + +## COMMON ## +def _get_dir_source_name(directory, source_name=None, l_sources_name=set()): + if source_name: + l_dir = os.listdir(os.path.join(directory, source_name)) + else: + l_dir = os.listdir(directory) + # empty directory + if not l_dir: + return l_sources_name.add(source_name) + return l_sources_name + else: + for src_name in l_dir: + if len(src_name) == 4: + try: + int(src_name) + l_sources_name.add(os.path.join(source_name)) + return l_sources_name + except: + pass + if source_name: + src_name = os.path.join(source_name, src_name) + l_sources_name = _get_dir_source_name(directory, source_name=src_name, l_sources_name=l_sources_name) + return l_sources_name + + +def get_all_items_sources(): + res = _get_dir_source_name(PASTES_FOLDER) + print(res) + +##-- --## + + +if __name__ == '__main__': + get_all_items_sources() diff --git a/bin/packages/HiddenServices.py b/bin/packages/HiddenServices.py index 8ed7372b..7b0c444a 100755 --- a/bin/packages/HiddenServices.py +++ b/bin/packages/HiddenServices.py @@ -60,15 +60,14 @@ class HiddenServices(object): self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) self.paste_crawled_directory = os.path.join(self.paste_directory, config_loader.get_config_str("Directories", "crawled")) self.paste_crawled_directory_name = config_loader.get_config_str("Directories", "crawled") - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory_screenshot = os.path.join(self.screenshot_directory, 'screenshot') + self.screenshot_directory = config_loader.get_files_directory('screenshot') elif type == 'i2p': self.paste_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - self.screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) + self.screenshot_directory = config_loader.get_files_directory('screenshot') else: ## TODO: # FIXME: add error pass - + config_loader = None #def remove_absolute_path_link(self, key, value): diff --git a/bin/packages/Item.py b/bin/packages/Item.py index d898d1e0..36a236e0 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -2,8 +2,11 @@ # -*-coding:UTF-8 -* import os +import re import sys import redis +import cld3 +import html2text from io import BytesIO @@ -31,7 +34,8 @@ PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') r_cache = config_loader.get_redis_conn("Redis_Cache") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -screenshot_directory = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) +screenshot_directory = config_loader.get_files_directory('screenshot') +har_directory = config_loader.get_files_directory('har') config_loader = None @@ -59,6 +63,9 @@ def get_item_basename(item_id): def get_item_size(item_id): return round(os.path.getsize(os.path.join(PASTES_FOLDER, item_id))/1024.0, 2) +def get_item_encoding(item_id): + return None + def get_lines_info(item_id, item_content=None): if not item_content: item_content = get_item_content(item_id) @@ -73,9 +80,86 @@ def get_lines_info(item_id, item_content=None): return {'nb': nb_line, 'max_length': max_length} +def get_item_metadata(item_id, item_content=None): + ## TODO: FIXME ##performance + # encoding + # language + # lines info + + item_metadata = {} + item_metadata['date'] = get_item_date(item_id, add_separator=True) + item_metadata['source'] = get_source(item_id) + item_metadata['size'] = get_item_size(item_id) + item_metadata['encoding'] = get_item_encoding(item_id) + item_metadata['lines'] = get_lines_info(item_id, item_content=item_content) + + return item_metadata + +def get_item_parent(item_id): + return item_basic.get_item_parent(item_id) + +def add_item_parent(item_parent, item_id): + return item_basic.add_item_parent(item_parent, item_id) + def get_item_content(item_id): return item_basic.get_item_content(item_id) +def get_item_content_html2text(item_id, item_content=None, ignore_links=False): + if not item_content: + item_content = get_item_content(item_id) + h = html2text.HTML2Text() + h.ignore_links = ignore_links + h.ignore_images = ignore_links + return h.handle(item_content) + +def remove_all_urls_from_content(item_id, item_content=None): + if not item_content: + item_content = get_item_content(item_id) + regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b' + url_regex = re.compile(regex) + urls = url_regex.findall(item_content) + urls = sorted(urls, key=len, reverse=True) + for url in urls: + item_content = item_content.replace(url, '') + + regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' + regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' + regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' + re.compile(regex_pgp_public_blocs) + re.compile(regex_pgp_signature) + re.compile(regex_pgp_message) + + res = re.findall(regex_pgp_public_blocs, item_content) + for it in res: + item_content = item_content.replace(it, '') + res = re.findall(regex_pgp_signature, item_content) + for it in res: + item_content = item_content.replace(it, '') + res = re.findall(regex_pgp_message, item_content) + for it in res: + item_content = item_content.replace(it, '') + + return item_content + +def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7): + all_languages = [] + + ## CLEAN CONTENT ## + content = get_item_content_html2text(item_id, ignore_links=True) + content = remove_all_urls_from_content(item_id, item_content=content) + + # REMOVE USELESS SPACE + content = ' '.join(content.split()) + #- CLEAN CONTENT -# + + #print(content) + #print(len(content)) + if len(content) >= min_len: + for lang in cld3.get_frequent_languages(content, num_langs=num_langs): + if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable: + all_languages.append(lang) + return all_languages + # API def get_item(request_dict): if not request_dict: @@ -257,6 +341,18 @@ def get_item_list_desc(list_item_id): def is_crawled(item_id): return item_basic.is_crawled(item_id) +def get_crawler_matadata(item_id, ltags=None): + dict_crawler = {} + if is_crawled(item_id): + dict_crawler['domain'] = get_item_domain(item_id) + if not ltags: + ltags = Tag.get_obj_tag(item_id) + dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags) + dict_crawler['url'] = get_item_link(item_id) + dict_crawler['screenshot'] = get_item_screenshot(item_id) + dict_crawler['har'] = get_item_har_name(item_id) + return dict_crawler + def is_onion(item_id): is_onion = False if len(is_onion) > 62: @@ -293,7 +389,7 @@ def get_item_screenshot(item_id): return '' def get_item_har_name(item_id): - os.path.join(screenshot_directory, item_id) + '.json' + har_path = os.path.join(har_directory, item_id) + '.json' if os.path.isfile(har_path): return har_path else: @@ -322,6 +418,24 @@ def get_item_duplicate(item_id, r_list=True): return [] return res +def get_item_nb_duplicates(item_id): + return r_serv_metadata.scard('dup:{}'.format(item_id)) + +def get_item_duplicates_dict(item_id): + dict_duplicates = {} + for duplicate in get_item_duplicate(item_id): + duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',') + duplicate_id = duplicate[1] + if not duplicate_id in dict_duplicates: + dict_duplicates[duplicate_id] = {'date': get_item_date(duplicate_id, add_separator=True), 'algo': {}} + algo = duplicate[0] + if algo == 'tlsh': + similarity = 100 - int(duplicate[2]) + else: + similarity = int(duplicate[2]) + dict_duplicates[duplicate_id]['algo'][algo] = similarity + return dict_duplicates + def add_item_duplicate(item_id, l_dup): for item_dup in l_dup: r_serv_metadata.sadd('dup:{}'.format(item_dup), item_id) @@ -434,3 +548,17 @@ def delete_domain_node(item_id): domain_basic.delete_domain_item_core(item_id, domain, port) for child_id in get_all_domain_node_by_item_id(item_id): delete_item(child_id) + +# if __name__ == '__main__': +# import Domain +# domain = Domain.Domain('domain.onion') +# for domain_history in domain.get_domain_history(): +# domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag +# if "items" in domain_item: +# for item_dict in domain_item['items']: +# item_id = item_dict['id'] +# print(item_id) +# for lang in get_item_languages(item_id, min_proportion=0.2, min_probability=0.8): +# print(lang) +# print() +# print(get_item_languages(item_id, min_proportion=0.2, min_probability=0.6)) # 0.7 ? diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index f6695eba..65c3ca46 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -62,7 +62,6 @@ class Paste(object): config_loader = ConfigLoader.ConfigLoader() self.cache = config_loader.get_redis_conn("Redis_Queues") - self.store = config_loader.get_redis_conn("Redis_Data_Merging") self.store_metadata = config_loader.get_redis_conn("ARDB_Metadata") self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) @@ -327,50 +326,27 @@ class Paste(object): def get_p_date_path(self): return self.p_date_path - def save_all_attributes_redis(self, key=None): - """ - Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) - - :param r_serv: -- Connexion to the Database. - :param key: -- Key of an additionnal set. - - Example: - import redis - - r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) - - PST = Paste("/home/Zkopkmlk.gz") - PST.save_all_attributes_redis(r_serv) - - """ - # LevelDB Compatibility - p = self.store.pipeline(False) - p.hset(self.p_path, "p_name", self.p_name) - p.hset(self.p_path, "p_size", self.p_size) - p.hset(self.p_path, "p_mime", self.p_mime) - # p.hset(self.p_path, "p_encoding", self.p_encoding) - p.hset(self.p_path, "p_date", self._get_p_date()) - p.hset(self.p_path, "p_hash_kind", self._get_p_hash_kind()) - p.hset(self.p_path, "p_hash", self.p_hash) - # p.hset(self.p_path, "p_langage", self.p_langage) - # p.hset(self.p_path, "p_nb_lines", self.p_nb_lines) - # p.hset(self.p_path, "p_max_length_line", self.p_max_length_line) - # p.hset(self.p_path, "p_categories", self.p_categories) - p.hset(self.p_path, "p_source", self.p_source) - if key is not None: - p.sadd(key, self.p_path) - else: - pass - p.execute() - - def save_attribute_redis(self, attr_name, value): - """ - Save an attribute as a field - """ - if type(value) == set: - self.store.hset(self.p_path, attr_name, json.dumps(list(value))) - else: - self.store.hset(self.p_path, attr_name, json.dumps(value)) + # def save_all_attributes_redis(self, key=None): + # """ + # Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) + # + # :param r_serv: -- Connexion to the Database. + # :param key: -- Key of an additionnal set. + # + # Example: + # import redis + # + # r_serv = redis.StrictRedis(host = 127.0.0.1, port = 6739, db = 0) + # + # PST = Paste("/home/Zkopkmlk.gz") + # PST.save_all_attributes_redis(r_serv) + # + # """ + # + # def save_attribute_redis(self, attr_name, value): + # """ + # Save an attribute as a field + # """ def save_attribute_duplicate(self, value): """ diff --git a/bin/packages/Term.py b/bin/packages/Term.py index 7896dbbe..773310c9 100755 --- a/bin/packages/Term.py +++ b/bin/packages/Term.py @@ -38,6 +38,8 @@ tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+ gaps=True, discard_empty=True) def is_valid_uuid_v4(UUID): + if not UUID: + return False UUID = UUID.replace('-', '') try: uuid_test = uuid.UUID(hex=UUID, version=4) @@ -215,11 +217,12 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1): words_set = set(words) words_set = sorted(words_set) + if nb_words > len(words_set): + nb_words = len(words_set) + term = ",".join(words_set) term = "{};{}".format(term, nb_words) - if nb_words > len(words_set): - nb_words = len(words_set) elif term_type=='yara_custom': if not Tracker.is_valid_yara_rule(term): return ({"status": "error", "reason": "Invalid custom Yara Rule"}, 400) @@ -322,8 +325,11 @@ def delete_term(term_uuid): r_serv_term.delete('tracker:stat:{}'.format(term_uuid)) if term_type == 'yara': - # # TODO: - pass + # delete custom rule + if not Tracker.is_default_yara_rule(term): + filepath = Tracker.get_yara_rule_file_by_tracker_name(term) + if filepath: + os.remove(filepath) def replace_tracker_description(term_uuid, description): description = escape(description) diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 233b9066..ed3c466e 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -20,9 +20,6 @@ subscribe = Redis_Duplicate [Indexer] subscribe = Redis_Global -[Attributes] -subscribe = Redis_Global - [DomClassifier] subscribe = Redis_Global @@ -46,6 +43,9 @@ publish = Redis_Tags subscribe = Redis_Global publish = Redis_Tags +[Languages] +subscribe = Redis_Global + [Categ] subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve,Redis_ApiKey @@ -64,15 +64,17 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags [Onion] subscribe = Redis_Onion -publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler -#publish = Redis_Global,Redis_ValidOnion,ZMQ_FetchedOnion +publish = Redis_ValidOnion,Redis_Tags,Redis_Crawler +#publish = Redis_ValidOnion,ZMQ_FetchedOnion,Redis_Tags,Redis_Crawler +# TODO remove me [DumpValidOnion] subscribe = Redis_ValidOnion [Web] subscribe = Redis_Web -publish = Redis_Url,ZMQ_Url +publish = Redis_Url +#publish = Redis_Url,ZMQ_Url [WebStats] subscribe = Redis_Url diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 41f45acb..17438d60 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN']) from Helper import Process sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -#import ConfigLoader +import ConfigLoader import Screenshot import crawlers @@ -105,14 +105,15 @@ class TorSplashCrawler(): 'SPLASH_COOKIES_DEBUG': False }) - def crawl(self, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item): - self.process.crawl(self.crawler, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item) + def crawl(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item): + self.process.crawl(self.crawler, splash_url=splash_url, type=type, crawler_options=crawler_options, date=date, requested_mode=requested_mode, url=url, domain=domain, port=port, cookies=cookies, original_item=original_item) self.process.start() class TorSplashSpider(Spider): name = 'TorSplashSpider' - def __init__(self, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs): + def __init__(self, splash_url, type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item, *args, **kwargs): + self.splash_url = splash_url self.domain_type = type self.requested_mode = requested_mode self.original_item = original_item @@ -132,7 +133,11 @@ class TorSplashCrawler(): config_section = 'Crawler' self.p = Process(config_section) self.item_dir = os.path.join(self.p.config.get("Directories", "crawled"), date_str ) - self.har_dir = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "crawled_screenshot"), date_str ) + + config_loader = ConfigLoader.ConfigLoader() + self.har_dir = os.path.join(config_loader.get_files_directory('har') , date_str ) + config_loader = None + self.r_serv_log_submit = redis.StrictRedis( host=self.p.config.get("Redis_Log_submit", "host"), port=self.p.config.getint("Redis_Log_submit", "port"), @@ -245,30 +250,34 @@ class TorSplashCrawler(): self.logger.error(repr(failure)) if failure.check(ResponseNeverReceived): - request = failure.request - url= request.meta['current_url'] - father = request.meta['father'] + ## DEBUG ## + self.logger.error(failure.request) + if failure.value.response: + self.logger.error(failure.value.response) + ## ----- ## + + # Extract request metadata + url = failure.request.meta['current_url'] + father = failure.request.meta['father'] + l_cookies = self.build_request_arg(failure.request.meta['splash']['args']['cookies']) + + # Check if Splash restarted + if not crawlers.is_splash_reachable(self.splash_url): + self.logger.error('Splash, ResponseNeverReceived for %s, retry in 30s ...', url) + time.sleep(30) - self.logger.error('Splash, ResponseNeverReceived for %s, retry in 10s ...', url) - time.sleep(10) - if response: - response_root_key = response.meta['root_key'] - else: - response_root_key = None yield SplashRequest( url, self.parse, errback=self.errback_catcher, endpoint='execute', - cache_args=['lua_source'], meta={'father': father, 'current_url': url}, - args=self.build_request_arg(response.cookiejar) + args=l_cookies ) else: - print('failure') - #print(failure) - print(failure.type) + self.logger.error(failure.type) + self.logger.error(failure.getErrorMessage()) def save_crawled_item(self, item_id, item_content): gzip64encoded = crawlers.save_crawled_item(item_id, item_content) diff --git a/bin/torcrawler/tor_crawler.py b/bin/torcrawler/tor_crawler.py index f060482b..954eae0f 100755 --- a/bin/torcrawler/tor_crawler.py +++ b/bin/torcrawler/tor_crawler.py @@ -46,4 +46,4 @@ if __name__ == '__main__': redis_cache.delete('crawler_request:{}'.format(uuid)) crawler = TorSplashCrawler(splash_url, crawler_options) - crawler.crawl(service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item) + crawler.crawl(splash_url, service_type, crawler_options, date, requested_mode, url, domain, port, cookies, original_item) diff --git a/bin/trackers/Tracker_Yara.py b/bin/trackers/Tracker_Yara.py index 25666304..b0356b55 100755 --- a/bin/trackers/Tracker_Yara.py +++ b/bin/trackers/Tracker_Yara.py @@ -11,12 +11,10 @@ import time import yara from pubsublogger import publisher -# -# import NotificationHelper -# sys.path.append(os.environ['AIL_BIN']) from Helper import Process +import NotificationHelper # # TODO: refractor sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) import Term @@ -26,7 +24,7 @@ import Tracker import item_basic -full_item_url = "/showsavedpaste/?paste=" +full_item_url = "/object/item?id=" mail_body_template = "AIL Framework,\nNew YARA match: {}\nitem id: {}\nurl: {}{}" last_refresh = time.time() @@ -48,7 +46,7 @@ def yara_rules_match(data): mail_to_notify = Tracker.get_tracker_mails(tracker_uuid) if mail_to_notify: mail_subject = Tracker.get_email_subject(tracker_uuid) - mail_body = mail_body_template.format(term, item_id, full_item_url, item_id) + mail_body = mail_body_template.format(data['rule'], item_id, full_item_url, item_id) for mail in mail_to_notify: NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body) @@ -73,10 +71,12 @@ if __name__ == "__main__": item_id = p.get_from_set() if item_id is not None: item_content = item_basic.get_item_content(item_id) - yara_match = rules.match(data=item_content, callback=yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) - if yara_match: - print(f'{item_id}: {yara_match}') - + try: + yara_match = rules.match(data=item_content, callback=yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=60) + if yara_match: + print(f'{item_id}: {yara_match}') + except yara.TimeoutError as e: + print(f'{item_id}: yara scanning timed out') else: time.sleep(5) diff --git a/bin/update-background.py b/bin/update-background.py index d1ec6eaf..d4e7b359 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -17,6 +17,37 @@ import subprocess sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +def launch_background_upgrade(version, l_script_name): + if r_serv.sismember('ail:to_update', version): + r_serv.delete('ail:update_error') + r_serv.set('ail:update_in_progress', version) + r_serv.set('ail:current_background_update', version) + + for script_name in l_script_name: + r_serv.set('ail:current_background_script', script_name) + update_file = os.path.join(os.environ['AIL_HOME'], 'update', version, script_name) + process = subprocess.run(['python' ,update_file]) + update_progress = r_serv.get('ail:current_background_script_stat') + #if update_progress: + # if int(update_progress) != 100: + # r_serv.set('ail:update_error', 'Update {} Failed'.format(version)) + + update_progress = r_serv.get('ail:current_background_script_stat') + if update_progress: + if int(update_progress) == 100: + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + r_serv.srem('ail:to_update', version) + +def clean_update_db(): + r_serv.delete('ail:update_error') + r_serv.delete('ail:update_in_progress') + r_serv.delete('ail:current_background_script') + r_serv.delete('ail:current_background_script_stat') + r_serv.delete('ail:current_background_update') + if __name__ == "__main__": config_loader = ConfigLoader.ConfigLoader() @@ -25,92 +56,12 @@ if __name__ == "__main__": r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - if r_serv.scard('ail:update_v1.5') != 5: - r_serv.delete('ail:update_error') - r_serv.set('ail:update_in_progress', 'v1.5') - r_serv.set('ail:current_background_update', 'v1.5') - if not r_serv.sismember('ail:update_v1.5', 'onions'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions.py') - process = subprocess.run(['python' ,update_file]) - - if not r_serv.sismember('ail:update_v1.5', 'metadata'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Metadata.py') - process = subprocess.run(['python' ,update_file]) - - if not r_serv.sismember('ail:update_v1.5', 'tags'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags.py') - process = subprocess.run(['python' ,update_file]) - - if not r_serv.sismember('ail:update_v1.5', 'tags_background'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Tags_background.py') - process = subprocess.run(['python' ,update_file]) - if not r_serv.sismember('ail:update_v1.5', 'crawled_screenshot'): - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v1.5', 'Update-ARDB_Onions_screenshots.py') - process = subprocess.run(['python' ,update_file]) - if r_serv.scard('ail:update_v1.5') != 5: - r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script') - else: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - - if r_serv.get('ail:current_background_update') == 'v2.4': - r_serv.delete('ail:update_error') - r_serv.set('ail:update_in_progress', 'v2.4') - r_serv.set('ail:current_background_update', 'v2.4') - r_serv.set('ail:current_background_script', 'domain update') - - update_file = os.path.join(os.environ['AIL_HOME'], 'update', 'v2.4', 'Update_domain.py') - process = subprocess.run(['python' ,update_file]) + if r_serv.scard('ail:to_update') == 0: + clean_update_db() - if int(r_serv_onion.scard('domain_update_v2.4')) != 0: - r_serv.set('ail:update_error', 'Update v2.4 Failed, please relaunch the bin/update-background.py script') - else: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - r_serv.delete('update:nb_elem_to_convert') - r_serv.delete('update:nb_elem_converted') - - if r_serv.sismember('ail:to_update', 'v2.6'): - new_version = 'v2.6' - r_serv.delete('ail:update_error') - r_serv.delete('ail:current_background_script_stat') - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.set('ail:current_background_script', 'screenshot update') - - update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_screenshots.py') - process = subprocess.run(['python' ,update_file]) - - update_progress = r_serv.get('ail:current_background_script_stat') - if update_progress: - if int(update_progress) == 100: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - r_serv.srem('ail:to_update', new_version) - - elif r_serv.sismember('ail:to_update', 'v2.7'): - new_version = 'v2.7' - r_serv.delete('ail:update_error') - r_serv.delete('ail:current_background_script_stat') - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.set('ail:current_background_script', 'domain tags update') - - update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_domain_tags.py') - process = subprocess.run(['python' ,update_file]) - - update_progress = r_serv.get('ail:current_background_script_stat') - if update_progress: - if int(update_progress) == 100: - r_serv.delete('ail:update_in_progress') - r_serv.delete('ail:current_background_script') - r_serv.delete('ail:current_background_script_stat') - r_serv.delete('ail:current_background_update') - r_serv.srem('ail:to_update', new_version) + launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py', 'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py']) + launch_background_upgrade('v2.4', ['Update_domain.py']) + launch_background_upgrade('v2.6', ['Update_screenshots.py']) + launch_background_upgrade('v2.7', ['Update_domain_tags.py']) + launch_background_upgrade('v3.4', ['Update_domain.py']) diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index e4c509ff..669550b9 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -4,7 +4,8 @@ dicofilters = Dicos pastes = PASTES hash = HASHS crawled = crawled -crawled_screenshot = CRAWLED_SCREENSHOT +har = CRAWLED_SCREENSHOT +screenshot = CRAWLED_SCREENSHOT/screenshot wordtrending_csv = var/www/static/csv/wordstrendingdata wordsfile = files/wordfile @@ -40,7 +41,7 @@ sender_user = #Proxying requests to the app baseUrl = / #Host to bind to -host = 0.0.0.0 +host = 127.0.0.1 #Flask server port port = 7000 #Number of logs to display in the dashboard @@ -141,16 +142,6 @@ host = localhost port = 6381 db = 0 -[Redis_Data_Merging] -host = localhost -port = 6379 -db = 1 - -[Redis_Paste_Name] -host = localhost -port = 6379 -db = 2 - [Redis_Mixer_Cache] host = localhost port = 6381 @@ -221,6 +212,11 @@ host = localhost port = 6382 db = 10 +[Kvrocks_Meta] +host = localhost +port = 6383 +db = 0 + [Url] cc_critical = DE @@ -278,6 +274,7 @@ default_crawler_closespider_pagecount = 50 default_crawler_user_agent = Mozilla/5.0 (Windows NT 6.1; rv:60.0) Gecko/20100101 Firefox/60.0 splash_url = http://127.0.0.1 splash_port = 8050-8052 +domain_proxy = onion.foundation [IP] # list of comma-separated CIDR that you wish to be alerted for. e.g: diff --git a/installing_deps.sh b/installing_deps.sh index ff2df4c6..4206d48c 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -11,6 +11,9 @@ sudo apt-get update sudo apt-get install python3-pip virtualenv python3-dev python3-tk libfreetype6-dev \ screen g++ python-tk unzip libsnappy-dev cmake -qq +#Needed for downloading jemalloc +sudo apt-get install wget -qq + #optional tor install sudo apt-get install tor -qq @@ -80,11 +83,18 @@ sudo make install popd # ARDB # -test ! -d ardb/ && git clone https://github.com/yinqiwen/ardb.git +test ! -d ardb/ && git clone https://github.com/ail-project/ardb.git pushd ardb/ make popd +# KVROCKS # +# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git +# pushd kvrocks/ +# make -j4 +# popd + +# Config File if [ ! -f configs/core.cfg ]; then cp configs/core.cfg.sample configs/core.cfg fi diff --git a/requirements.txt b/requirements.txt index 33542f8b..2a9347c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,11 +22,13 @@ textblob #Tokeniser nltk +html2text yara-python #Crawler scrapy scrapy-splash +pycld3 #Graph numpy diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py index 3327878f..442f979c 100755 --- a/update/v1.5/Update-ARDB_Onions_screenshots.py +++ b/update/v1.5/Update-ARDB_Onions_screenshots.py @@ -33,7 +33,7 @@ if __name__ == '__main__': config_loader = ConfigLoader.ConfigLoader() SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - NEW_SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') + NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' @@ -111,4 +111,7 @@ if __name__ == '__main__': print() print('Done in {} s'.format(end - start_deb)) + r_serv.set('ail:current_background_script_stat', 100) r_serv.sadd('ail:update_v1.5', 'crawled_screenshot') + if r_serv.scard('ail:update_v1.5') != 5: + r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script') diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py index dee56e44..be80c76d 100755 --- a/update/v1.5/Update.py +++ b/update/v1.5/Update.py @@ -43,13 +43,12 @@ if __name__ == '__main__': print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) print() + # Add background update + r_serv.sadd('ail:to_update', 'v1.5') + #Set current ail version r_serv.set('ail:version', 'v1.5') - #Set current update_in_progress - r_serv.set('ail:update_in_progress', 'v1.5') - r_serv.set('ail:current_background_update', 'v1.5') - #Set current ail version r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.4/Update.py b/update/v2.4/Update.py index 53456330..7728fab7 100755 --- a/update/v2.4/Update.py +++ b/update/v2.4/Update.py @@ -22,14 +22,13 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv_onion.sunionstore('domain_update_v2.4', 'full_onion_up', 'full_regular_up') r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v2.4')) r_serv.set('update:nb_elem_converted',0) + # Add background update + r_serv.sadd('ail:to_update', new_version) + #Set current ail version r_serv.set('ail:version', new_version) diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py index addaedb6..da4ba01d 100755 --- a/update/v2.4/Update_domain.py +++ b/update/v2.4/Update_domain.py @@ -56,6 +56,8 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None + r_serv.set('ail:current_background_script', 'domain update') + nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') if not nb_elem_to_update: nb_elem_to_update = 0 @@ -78,4 +80,7 @@ if __name__ == '__main__': update_update_stats() else: + r_serv.delete('update:nb_elem_to_convert') + r_serv.delete('update:nb_elem_converted') + r_serv_db.set('ail:current_background_script_stat', 100) sys.exit(0) diff --git a/update/v2.6/Update.py b/update/v2.6/Update.py index c8cdb5ac..4245eade 100755 --- a/update/v2.6/Update.py +++ b/update/v2.6/Update.py @@ -21,10 +21,6 @@ if __name__ == '__main__': r_serv = config_loader.get_redis_conn("ARDB_DB") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.sadd('ail:to_update', new_version) #Set current ail version diff --git a/update/v2.6/Update_screenshots.py b/update/v2.6/Update_screenshots.py index 38ba0366..9716dc67 100755 --- a/update/v2.6/Update_screenshots.py +++ b/update/v2.6/Update_screenshots.py @@ -93,9 +93,3 @@ if __name__ == '__main__': end = time.time() print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb)) - - r_serv_db.delete('ail:update_in_progress') - r_serv_db.delete('ail:current_background_script') - r_serv_db.delete('ail:current_background_script_stat') - r_serv_db.delete('ail:current_background_update') - r_serv_db.srem('ail:to_update', 'v2.6') diff --git a/update/v2.7/Update.py b/update/v2.7/Update.py index 1f4ead2c..fa6f539c 100755 --- a/update/v2.7/Update.py +++ b/update/v2.7/Update.py @@ -23,10 +23,6 @@ if __name__ == '__main__': r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") config_loader = None - #Set current update_in_progress - r_serv.set('ail:update_in_progress', new_version) - r_serv.set('ail:current_background_update', new_version) - r_serv.sadd('ail:to_update', new_version) #### Update tags #### diff --git a/update/v2.7/Update_domain_tags.py b/update/v2.7/Update_domain_tags.py index 937110af..1f9f5eca 100755 --- a/update/v2.7/Update_domain_tags.py +++ b/update/v2.7/Update_domain_tags.py @@ -116,9 +116,3 @@ if __name__ == '__main__': end = time.time() print('ALL domains tags updated in {} s'.format(end - start_deb)) - - r_serv_db.delete('ail:update_in_progress') - r_serv_db.delete('ail:current_background_script') - r_serv_db.delete('ail:current_background_script_stat') - r_serv_db.delete('ail:current_background_update') - r_serv_db.srem('ail:to_update', update_version) diff --git a/update/v3.2/Update.py b/update/v3.2/Update.py new file mode 100755 index 00000000..fa06c8fe --- /dev/null +++ b/update/v3.2/Update.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import argparse +import datetime +import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v3.2' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + config_loader = None + + #### NEW EXPORTER + # remove old tags errors + #r_serv_db.delete('mess_not_saved_export') + + # move solo tags to export in tags_db + #all_misp_tags = r_serv_db.smembers('whitelist_misp') + #all_hive_tags = r_serv_db.smembers('whitelist_hive') + # # TODO: save them in tags db + #### NEW EXPORTER + + #Set current ail version + r_serv_db.set('ail:version', new_version) + + #Set current ail version + r_serv_db.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v3.2/Update.sh b/update/v3.2/Update.sh new file mode 100755 index 00000000..a588e55d --- /dev/null +++ b/update/v3.2/Update.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -ldbv & +wait +echo "" + +# SUBMODULES # +git submodule init +git submodule update + +echo -e $GREEN"Installing YARA ..."$DEFAULT +pip3 install yara-python +bash ${AIL_BIN}/LAUNCH.sh -t + +# SUBMODULES # +git submodule init +git submodule update + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v3.2/Update.py +wait +echo "" +echo "" + + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/update/v3.3/Update.py b/update/v3.3/Update.py new file mode 100755 index 00000000..39d1371b --- /dev/null +++ b/update/v3.3/Update.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import time +import redis +import argparse +import datetime +import configparser + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v3.3' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + config_loader = None + + #Set current ail version + r_serv_db.set('ail:version', new_version) + + #Set current ail version + r_serv_db.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v3.3/Update.sh b/update/v3.3/Update.sh new file mode 100755 index 00000000..86289dba --- /dev/null +++ b/update/v3.3/Update.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +bash ${AIL_BIN}/LAUNCH.sh -ldbv & +wait +echo "" + +# SUBMODULES # +git submodule update + +# echo "" +# echo -e $GREEN"installing KVORCKS ..."$DEFAULT +# cd ${AIL_HOME} +# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git +# pushd kvrocks/ +# make -j4 +# popd + +echo -e $GREEN"Installing html2text ..."$DEFAULT +pip3 install html2text + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v3.3/Update.py +wait +echo "" +echo "" + + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py new file mode 100755 index 00000000..943d4b3a --- /dev/null +++ b/update/v3.4/Update.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +new_version = 'v3.4' + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up') + r_serv.set('update:nb_elem_to_convert', r_serv_onion.scard('domain_update_v3.4')) + r_serv.set('update:nb_elem_converted',0) + + # Add background update + r_serv.sadd('ail:to_update', new_version) + + #Set current ail version + r_serv.set('ail:version', new_version) + + #Set current ail version + r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v3.4/Update.sh b/update/v3.4/Update.sh new file mode 100755 index 00000000..16a9ccb7 --- /dev/null +++ b/update/v3.4/Update.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_BIN:$PATH +export PATH=$AIL_FLASK:$PATH + +GREEN="\\033[1;32m" +DEFAULT="\\033[0;39m" + +echo -e $GREEN"Shutting down AIL ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +# bash ${AIL_BIN}/LAUNCH.sh -ldbv & +# wait +# echo "" + +# SUBMODULES # +git submodule update + +# echo "" +# echo -e $GREEN"installing KVORCKS ..."$DEFAULT +# cd ${AIL_HOME} +# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git +# pushd kvrocks/ +# make -j4 +# popd + +echo -e $GREEN"Installing html2text ..."$DEFAULT +pip3 install pycld3 + +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v3.4/Update.py +wait +echo "" +echo "" + + +echo "" +echo -e $GREEN"Shutting down ARDB ..."$DEFAULT +bash ${AIL_BIN}/LAUNCH.sh -ks +wait + +exit 0 diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py new file mode 100755 index 00000000..72ba166d --- /dev/null +++ b/update/v3.4/Update_domain.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import time +import redis +import datetime + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader +import Domain + +def update_update_stats(): + nb_updated = int(r_serv_db.get('update:nb_elem_converted')) + progress = int((nb_updated * 100) / nb_elem_to_update) + print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress)) + r_serv_db.set('ail:current_background_script_stat', progress) + +def update_domain_language(domain_obj, item_id): + domain_name = domain_obj.get_domain_name() + Domain.add_domain_languages_by_item_id(domain_name, item_id) + +if __name__ == '__main__': + + start_deb = time.time() + + config_loader = ConfigLoader.ConfigLoader() + r_serv_db = config_loader.get_redis_conn("ARDB_DB") + r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") + config_loader = None + + r_serv.set('ail:current_background_script', 'domain languages update') + + nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') + if not nb_elem_to_update: + nb_elem_to_update = 1 + else: + nb_elem_to_update = int(nb_elem_to_update) + + #Domain._delete_all_domains_languages() + + while True: + domain = r_serv_onion.spop('domain_update_v3.4') + if domain is not None: + print(domain) + domain = Domain.Domain(domain) + for domain_history in domain.get_domain_history(): + domain_item = domain.get_domain_items_crawled(epoch=domain_history[1]) # item_tag + if "items" in domain_item: + for item_dict in domain_item['items']: + update_domain_language(domain, item_dict['id']) + + r_serv_db.incr('update:nb_elem_converted') + update_update_stats() + + else: + r_serv_db.set('ail:current_background_script_stat', 100) + sys.exit(0) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 5f1f22d4..bce50bb3 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -42,6 +42,8 @@ from blueprints.crawler_splash import crawler_splash from blueprints.correlation import correlation from blueprints.tags_ui import tags_ui from blueprints.import_export import import_export +from blueprints.objects_item import objects_item +from blueprints.old_endpoints import old_endpoints Flask_dir = os.environ['AIL_FLASK'] @@ -97,6 +99,8 @@ app.register_blueprint(crawler_splash, url_prefix=baseUrl) app.register_blueprint(correlation, url_prefix=baseUrl) app.register_blueprint(tags_ui, url_prefix=baseUrl) app.register_blueprint(import_export, url_prefix=baseUrl) +app.register_blueprint(objects_item, url_prefix=baseUrl) +app.register_blueprint(old_endpoints, url_prefix=baseUrl) # ========= =========# # ========= Cookie name ======== diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 5756a895..a5f6d548 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -24,8 +24,10 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import Domain import crawlers +import Domain +import Language + import Config_DB r_cache = Flask_config.r_cache @@ -123,6 +125,9 @@ def send_to_spider(): return create_json_response(res[0], res[1]) return redirect(url_for('crawler_splash.manual')) + +#### Domains #### + # add route : /crawlers/show_domain @crawler_splash.route('/crawlers/showDomain', methods=['GET', 'POST']) @login_required @@ -148,7 +153,8 @@ def showDomain(): if domain.domain_was_up(): dict_domain = {**dict_domain, **domain.get_domain_correlation()} dict_domain['correlation_nb'] = Domain.get_domain_total_nb_correlation(dict_domain) - dict_domain['origin_item'] = domain.get_domain_last_origin() + dict_domain['father'] = domain.get_domain_father() + dict_domain['languages'] = Language.get_languages_from_iso(domain.get_domain_languages(), sort=True) dict_domain['tags'] = domain.get_domain_tags() dict_domain['tags_safe'] = Tag.is_tags_safe(dict_domain['tags']) dict_domain['history'] = domain.get_domain_history_with_status() @@ -236,6 +242,57 @@ def domains_explorer_web(): dict_data = Domain.get_domains_up_by_filers('regular', page=page, date_from=date_from, date_to=date_to) return render_template("domain_explorer.html", dict_data=dict_data, bootstrap_label=bootstrap_label, domain_type='regular') +@crawler_splash.route('/domains/languages/all/json', methods=['GET']) +@login_required +@login_read_only +def domains_all_languages_json(): + # # TODO: get domain type + iso = request.args.get('iso') + domain_types = request.args.getlist('domain_types') + return jsonify(Language.get_languages_from_iso(Domain.get_all_domains_languages(), sort=True)) + +@crawler_splash.route('/domains/languages/search_get', methods=['GET']) +@login_required +@login_read_only +def domains_search_languages_get(): + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + domains_types = request.args.getlist('domain_types') + if domains_types: + domains_types = domains_types[0].split(',') + languages = request.args.getlist('languages') + if languages: + languages = languages[0].split(',') + l_dict_domains = Domain.api_get_domains_by_languages(domains_types, Language.get_iso_from_languages(languages), domains_metadata=True, page=page) + return render_template("domains/domains_filter_languages.html", template_folder='../../', + l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, + current_languages=languages, domains_types=domains_types) + +@crawler_splash.route('/domains/name/search', methods=['GET']) +@login_required +@login_analyst +def domains_search_name(): + name = request.args.get('name') + page = request.args.get('page') + try: + page = int(page) + except: + page = 1 + domains_types = request.args.getlist('domain_types') + if domains_types: + domains_types = domains_types[0].split(',') + + l_dict_domains = Domain.api_search_domains_by_name(name, domains_types, domains_metadata=True, page=page) + return render_template("domains/domains_result_list.html", template_folder='../../', + l_dict_domains=l_dict_domains, bootstrap_label=bootstrap_label, + domains_types=domains_types) + +##-- --## + + ## Cookiejar ## @crawler_splash.route('/crawler/cookiejar/add', methods=['GET']) @login_required diff --git a/var/www/blueprints/objects_item.py b/var/www/blueprints/objects_item.py new file mode 100644 index 00000000..2b951353 --- /dev/null +++ b/var/www/blueprints/objects_item.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Item +import Tag + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export')) +import Export + +# ============ BLUEPRINT ============ +objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== +@objects_item.route("/object/item") #completely shows the paste in a new tab +@login_required +@login_read_only +def showItem(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + + dict_item = {} + dict_item['id'] = item_id + dict_item['name'] = dict_item['id'].replace('/', ' / ') + dict_item['father'] = Item.get_item_parent(item_id) + dict_item['content'] = Item.get_item_content(item_id) + dict_item['metadata'] = Item.get_item_metadata(item_id, item_content=dict_item['content']) + dict_item['tags'] = Tag.get_obj_tag(item_id) + #dict_item['duplicates'] = Item.get_item_nb_duplicates(item_id) + dict_item['duplicates'] = Item.get_item_duplicates_dict(item_id) + dict_item['crawler'] = Item.get_crawler_matadata(item_id, ltags=dict_item['tags']) + + ## EXPORT SECTION + # # TODO: ADD in Export SECTION + dict_item['hive_case'] = Export.get_item_hive_cases(item_id) + + return render_template("show_item.html", bootstrap_label=bootstrap_label, + modal_add_tags=Tag.get_modal_add_tags(dict_item['id'], object_type='item'), + is_hive_connected=Export.get_item_hive_cases(item_id), + dict_item=dict_item) + + # kvrocks data + + # # TODO: dynamic load: + ## duplicates + ## correlations + + ## Dynamic Path FIX + +@objects_item.route("/object/item/html2text") +@login_required +@login_read_only +def html2text(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + return Item.get_item_content_html2text(item_id) + +@objects_item.route("/object/item/raw_content") +@login_required +@login_read_only +def item_raw_content(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + return Response(Item.get_item_content(item_id), mimetype='text/plain') + +@objects_item.route("/object/item/download") +@login_required +@login_read_only +def item_download(): # # TODO: support post + item_id = request.args.get('id') + if not item_id or not Item.exist_item(item_id): + abort(404) + return send_file(Item.get_raw_content(item_id), attachment_filename=item_id, as_attachment=True) diff --git a/var/www/blueprints/old_endpoints.py b/var/www/blueprints/old_endpoints.py new file mode 100644 index 00000000..09f6bfaa --- /dev/null +++ b/var/www/blueprints/old_endpoints.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +# ============ BLUEPRINT ============ +old_endpoints = Blueprint('old_endpoints', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates')) + +# ============ VARIABLES ============ + + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== +@old_endpoints.route("/showsavedpaste/") +@login_required +@login_read_only +def showsavedpaste(): + item_id = request.args.get('paste', '') + return redirect(url_for('objects_item.showItem', id=item_id)) diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index 2b2937aa..574c9950 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -26,7 +26,6 @@ r_serv_charts = config_loader.get_redis_conn("ARDB_Trending") r_serv_sentiment = config_loader.get_redis_conn("ARDB_Sentiment") r_serv_term = config_loader.get_redis_conn("ARDB_Tracker") r_serv_cred = config_loader.get_redis_conn("ARDB_TermCred") -r_serv_pasteName = config_loader.get_redis_conn("Redis_Paste_Name") r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_db = config_loader.get_redis_conn("ARDB_DB") @@ -92,13 +91,15 @@ dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_me 'v2.6':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be', 'update_warning_message_notice_me': 'missing from the UI.'}, 'v2.7':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags can be', + 'update_warning_message_notice_me': 'missing from the UI.'}, + 'v3.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Languages can be', 'update_warning_message_notice_me': 'missing from the UI.'} } UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' -SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') +SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') REPO_ORIGIN = 'https://github.com/ail-project/ail-framework.git' diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index 134753e7..0a10c251 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -358,7 +358,7 @@ def submit_status(): link = '' if paste_submit_link: for paste in paste_submit_link: - url = url_for('showsavedpastes.showsavedpaste') + '?paste=' + paste + url = url_for('objects_item.showItem') + '?id=' + paste link += '' + paste +'' if nb_total == '-1': diff --git a/var/www/modules/PasteSubmit/templates/submit_items.html b/var/www/modules/PasteSubmit/templates/submit_items.html index 147adfcd..18b62c2a 100644 --- a/var/www/modules/PasteSubmit/templates/submit_items.html +++ b/var/www/modules/PasteSubmit/templates/submit_items.html @@ -112,7 +112,7 @@
- +
diff --git a/var/www/modules/Role_Manager.py b/var/www/modules/Role_Manager.py index 631119cb..080a8ba6 100644 --- a/var/www/modules/Role_Manager.py +++ b/var/www/modules/Role_Manager.py @@ -185,6 +185,8 @@ def delete_user_db(user_id): r_serv_db.hdel('user:tokens', user_token) r_serv_db.delete('user_metadata:{}'.format(user_id)) r_serv_db.hdel('user:all', user_id) + else: + print('Error: user {} do not exist'.format(user_id)) def hashing_password(bytes_password): hashed = bcrypt.hashpw(bytes_password, bcrypt.gensalt()) diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index fa8873ba..010610e6 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -377,7 +377,7 @@ def remove_tag(): #TODO remove me , used by showpaste res = Tag.api_delete_obj_tags(tags=[tag], object_id=path, object_type="item") if res[1] != 200: return str(res[0]) - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + return redirect(url_for('objects_item.showItem', id=path)) @Tags.route("/Tags/confirm_tag") @login_required @@ -395,7 +395,7 @@ def confirm_tag(): #add analyst tag Tag.add_tag('item', tag, path) - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + return redirect(url_for('objects_item.showItem', id=path)) return 'incompatible tag' @@ -417,7 +417,7 @@ def tag_validation(): r_serv_statistics.sadd('fp:'+tag, path) r_serv_statistics.srem('tp:'+tag, path) - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + return redirect(url_for('objects_item.showItem', id=path)) else: return 'input error' diff --git a/var/www/modules/Tags/templates/Tags.html b/var/www/modules/Tags/templates/Tags.html index b4fb85c8..83ebfb3c 100644 --- a/var/www/modules/Tags/templates/Tags.html +++ b/var/www/modules/Tags/templates/Tags.html @@ -119,7 +119,7 @@ {% for path in all_path %} {{ paste_date[loop.index0] }} - +
{{ path }}
@@ -375,7 +375,7 @@ function toggle_sidebar(){ button.tooltip(button); $("#container-show-more").append(button); - $("#button_show_path").attr('href', '{{ url_for('showsavedpastes.showsavedpaste') }}?paste=' + $(modal).attr('data-path')); + $("#button_show_path").attr('href', '{{ url_for('objects_item.showItem') }}?id=' + $(modal).attr('data-path')); $("#button_show_path").show('fast'); $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed diff --git a/var/www/modules/Tags/templates/tagged.html b/var/www/modules/Tags/templates/tagged.html index 75be817f..4177e6cc 100644 --- a/var/www/modules/Tags/templates/tagged.html +++ b/var/www/modules/Tags/templates/tagged.html @@ -103,7 +103,7 @@ {% for path in all_path %} {{ loop.index0 }} - {{ path }} + {{ path }}
{% for tag in paste_tags[loop.index0] %} @@ -114,7 +114,7 @@ {{ paste_date[loop.index0] }} {{ paste_linenum[loop.index0] }} -

+

{% endfor %} diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py index 0091df5a..d57c4e67 100644 --- a/var/www/modules/dashboard/Flask_dashboard.py +++ b/var/www/modules/dashboard/Flask_dashboard.py @@ -76,7 +76,7 @@ def dashboard_alert(log): log = log[46:].split(';') if len(log) == 6: time = datetime_from_utc_to_local(utc_str) - path = url_for('showsavedpastes.showsavedpaste',paste=log[5]) + path = url_for('objects_item.showItem',id=log[5]) res = {'date': date, 'time': time, 'script': log[0], 'domain': log[1], 'date_paste': log[2], 'paste': log[3], 'message': log[4], 'path': path} diff --git a/var/www/modules/dashboard/templates/index.html b/var/www/modules/dashboard/templates/index.html index 812e1ea0..5d40df1c 100644 --- a/var/www/modules/dashboard/templates/index.html +++ b/var/www/modules/dashboard/templates/index.html @@ -87,7 +87,7 @@ Feeder(s) Monitor:
- Processed pastes + Processed items

Filtered duplicates @@ -165,7 +165,7 @@ - + diff --git a/var/www/modules/hunter/templates/showTracker.html b/var/www/modules/hunter/templates/showTracker.html index d55c4c6b..af7d2b50 100644 --- a/var/www/modules/hunter/templates/showTracker.html +++ b/var/www/modules/hunter/templates/showTracker.html @@ -171,10 +171,14 @@
-
- - +
+ + + + + + +
{%if yara_rule_content%}



{{ yara_rule_content }}

@@ -227,7 +231,7 @@ {{item['date'][0:4]}}/{{item['date'][4:6]}}/{{item['date'][6:8]}} - +
{{ item['id'] }}
diff --git a/var/www/modules/hunter/templates/trackersManagement.html b/var/www/modules/hunter/templates/trackersManagement.html index fee4b2ba..007d8046 100644 --- a/var/www/modules/hunter/templates/trackersManagement.html +++ b/var/www/modules/hunter/templates/trackersManagement.html @@ -67,10 +67,12 @@ - {% if dict_uuid['term']|length > 256 %} - {{ dict_uuid['term'][0:256]}}... - {% else %} - {{ dict_uuid['term']}} + {% if dict_uuid['term']%} + {% if dict_uuid['term']|length > 256 %} + {{ dict_uuid['term'][0:256]}}... + {% else %} + {{ dict_uuid['term']}} + {% endif %} {% endif %} @@ -128,10 +130,12 @@ - {% if dict_uuid['term']|length > 256 %} - {{ dict_uuid['term'][0:256]}}... - {% else %} - {{ dict_uuid['term']}} + {% if dict_uuid['term']%} + {% if dict_uuid['term']|length > 256 %} + {{ dict_uuid['term'][0:256]}}... + {% else %} + {{ dict_uuid['term']}} + {% endif %} {% endif %} diff --git a/var/www/modules/search/Flask_search.py b/var/www/modules/search/Flask_search.py index ff5395e6..cbc1f633 100644 --- a/var/www/modules/search/Flask_search.py +++ b/var/www/modules/search/Flask_search.py @@ -27,7 +27,6 @@ import Flask_config app = Flask_config.app config_loader = Flask_config.config_loader baseUrl = Flask_config.baseUrl -r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_metadata = Flask_config.r_serv_metadata max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal @@ -116,17 +115,7 @@ def search(): selected_index = os.path.join(baseindexpath, index_name) ''' temporary disabled - # Search filename - for path in r_serv_pasteName.smembers(q[0]): - r.append(path) - paste = Paste.Paste(path) - content = paste.get_p_content() - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) + # # TODO: search by filename/item id ''' # Search full line diff --git a/var/www/modules/search/templates/search.html b/var/www/modules/search/templates/search.html index eb30c0bc..3ea961f0 100644 --- a/var/www/modules/search/templates/search.html +++ b/var/www/modules/search/templates/search.html @@ -98,7 +98,7 @@ {% for path in r %} {{ loop.index0 }} - {{ path }} + {{ path }}
{% for tag in paste_tags[loop.index0] %} @@ -109,7 +109,7 @@ {{ paste_date[loop.index0] }} {{ paste_size[loop.index0] }} -

+

{% endfor %} @@ -207,11 +207,11 @@ } search_table.row.add( [ init_num_of_elements_in_table+((offset))+i+1, - "
"+ data.path_array[i] +"" + " "+ data.path_array[i] +"" + "
" + tag + "
", data.date_array[i], data.size_array[i], - "

" + "

" ] ).draw( false ); } offset = offset + data.path_array.length; diff --git a/var/www/modules/settings/Flask_settings.py b/var/www/modules/settings/Flask_settings.py index 7119a35f..6b8cdb09 100644 --- a/var/www/modules/settings/Flask_settings.py +++ b/var/www/modules/settings/Flask_settings.py @@ -131,11 +131,11 @@ def new_token(): generate_new_token(current_user.get_id()) return redirect(url_for('settings.edit_profile')) -@settings.route("/settings/new_token_user", methods=['GET']) +@settings.route("/settings/new_token_user", methods=['POST']) @login_required @login_admin def new_token_user(): - user_id = request.args.get('user_id') + user_id = request.form.get('user_id') if r_serv_db.exists('user_metadata:{}'.format(user_id)): generate_new_token(user_id) return redirect(url_for('settings.users_list')) @@ -215,18 +215,18 @@ def users_list(): new_user_dict['password'] = request.args.get('new_user_password') return render_template("users_list.html", all_users=all_users, new_user=new_user_dict, admin_level=True) -@settings.route("/settings/edit_user", methods=['GET']) +@settings.route("/settings/edit_user", methods=['POST']) @login_required @login_admin def edit_user(): - user_id = request.args.get('user_id') + user_id = request.form.get('user_id') return redirect(url_for('settings.create_user', user_id=user_id)) -@settings.route("/settings/delete_user", methods=['GET']) +@settings.route("/settings/delete_user", methods=['POST']) @login_required @login_admin def delete_user(): - user_id = request.args.get('user_id') + user_id = request.form.get('user_id') delete_user_db(user_id) return redirect(url_for('settings.users_list')) diff --git a/var/www/modules/settings/templates/edit_profile.html b/var/www/modules/settings/templates/edit_profile.html index ffa65e83..9a095f7c 100644 --- a/var/www/modules/settings/templates/edit_profile.html +++ b/var/www/modules/settings/templates/edit_profile.html @@ -52,8 +52,16 @@ API Key - {{user_metadata['api_key']}} + + {{user_metadata['api_key'][:4]}}*********************************{{user_metadata['api_key'][-4:]}} + + + + + @@ -91,6 +99,13 @@ function toggle_sidebar(){ $('#core_content').addClass('col-lg-10') } } + +function show_api_key() { + $('#censored_key').hide(); + $('#btn_key').hide(); + $('#uncensored_key').show(); +} + diff --git a/var/www/modules/settings/templates/users_list.html b/var/www/modules/settings/templates/users_list.html index 0c58ab2e..00601977 100644 --- a/var/www/modules/settings/templates/users_list.html +++ b/var/www/modules/settings/templates/users_list.html @@ -17,17 +17,6 @@ - - @@ -75,16 +64,37 @@ {{user['email']}} {{user['role']}} - {{user['api_key']}} - +
+ + {{user['api_key'][:4]}}*********************************{{user['api_key'][-4:]}} + + + + + + + +
- - - - - - +
+
+ + +
+
+ + +
+
{% endfor %} @@ -117,6 +127,13 @@ function toggle_sidebar(){ $('#core_content').addClass('col-lg-10') } } + +function show_api_key(key_id) { + $('#censored_key_' + key_id).hide(); + $('#btn_key_' + key_id).hide(); + $('#uncensored_key_' + key_id).show(); +} + diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 479d9cb2..8bdf0984 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -32,11 +32,9 @@ import Flask_config app = Flask_config.app baseUrl = Flask_config.baseUrl -r_serv_pasteName = Flask_config.r_serv_pasteName r_serv_metadata = Flask_config.r_serv_metadata r_serv_tags = Flask_config.r_serv_tags r_serv_statistics = Flask_config.r_serv_statistics -r_serv_onion = Flask_config.r_serv_onion max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal DiffMaxLineLength = Flask_config.DiffMaxLineLength @@ -404,13 +402,6 @@ def show_item_min(requested_path , content_range=0): # ============ ROUTES ============ -@showsavedpastes.route("/showsavedpaste/") #completely shows the paste in a new tab -@login_required -@login_read_only -def showsavedpaste(): - requested_path = request.args.get('paste', '') - return showpaste(0, requested_path) - @showsavedpastes.route("/showsaveditem_min/") #completely shows the paste in a new tab @login_required @login_read_only diff --git a/var/www/modules/showpaste/templates/show_saved_item_min.html b/var/www/modules/showpaste/templates/show_saved_item_min.html index 94327771..7030868c 100644 --- a/var/www/modules/showpaste/templates/show_saved_item_min.html +++ b/var/www/modules/showpaste/templates/show_saved_item_min.html @@ -73,7 +73,7 @@ {% if item_parent %} {% endif %} @@ -179,7 +179,7 @@ Father - {{ crawler_metadata['paste_father'] }} + {{ crawler_metadata['paste_father'] }} Url @@ -220,7 +220,7 @@
diff --git a/var/www/modules/terms/templates/credentials_tracker.html b/var/www/modules/terms/templates/credentials_tracker.html index c580ef45..969ffae1 100644 --- a/var/www/modules/terms/templates/credentials_tracker.html +++ b/var/www/modules/terms/templates/credentials_tracker.html @@ -176,7 +176,7 @@ html_to_add += ""+curr_data.size+""; html_to_add += ""+curr_data.lineinfo[0]+""; html_to_add += ""+curr_data.lineinfo[1]+""; - html_to_add += "
"; + html_to_add += "
"; html_to_add += ""; } diff --git a/var/www/modules/terms/templates/terms_management.html b/var/www/modules/terms/templates/terms_management.html index 1762e407..aeeedf6b 100644 --- a/var/www/modules/terms/templates/terms_management.html +++ b/var/www/modules/terms/templates/terms_management.html @@ -460,7 +460,7 @@ function bindEventsForCurrentPage() { html_to_add += ""+curr_data.size+""; html_to_add += ""+curr_data.lineinfo[0]+""; html_to_add += ""+curr_data.lineinfo[1]+""; - html_to_add += "
"; + html_to_add += "
"; html_to_add += ""; } diff --git a/var/www/static/css/ail-project.css b/var/www/static/css/ail-project.css new file mode 100644 index 00000000..ba4d461b --- /dev/null +++ b/var/www/static/css/ail-project.css @@ -0,0 +1,7 @@ +.hg-text{ + padding-top: 0.2em; + padding-bottom: 0.2em; + padding-right: 0.15em; + padding-left: 0.15em; + background-color: #2e5; +} diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index 7d9be182..b2cc7e39 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -251,7 +251,7 @@ function create_log_table(obj_json) { msage.appendChild(document.createTextNode(message.join(" "))); var paste_path = parsedmess[5]; - var url_to_saved_paste = url_showSavedPath+"?paste="+paste_path+"&num="+parsedmess[0]; + var url_to_saved_paste = url_showSavedPath+"?id="+paste_path; var action_icon_a = document.createElement("A"); action_icon_a.setAttribute("TARGET", "_blank"); diff --git a/var/www/templates/correlation/metadata_card_paste.html b/var/www/templates/correlation/metadata_card_paste.html index 5541cd00..08744321 100644 --- a/var/www/templates/correlation/metadata_card_paste.html +++ b/var/www/templates/correlation/metadata_card_paste.html @@ -38,7 +38,7 @@ {% endfor %}
- + diff --git a/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html b/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html index 5b059e23..0a80d08c 100644 --- a/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html +++ b/var/www/templates/crawler/crawler_splash/dashboard_splash_crawler.html @@ -92,24 +92,7 @@ -
-
-
-
Show Domain:
-
-
- -
- -
-
-
-
-
-
- + {% include 'domains/block_domains_name_search.html' %}
@@ -135,6 +118,10 @@ {% include 'tags/block_obj_tags_search.html' %} {% endwith %} + {% with object_type='domain' %} + {% include 'domains/block_languages_search.html' %} + {% endwith %} +
diff --git a/var/www/templates/crawler/crawler_splash/domain_explorer.html b/var/www/templates/crawler/crawler_splash/domain_explorer.html index 16595014..629cd090 100644 --- a/var/www/templates/crawler/crawler_splash/domain_explorer.html +++ b/var/www/templates/crawler/crawler_splash/domain_explorer.html @@ -68,7 +68,7 @@ - {% with dict_data=dict_data, bootstrap_label=bootstrap_label %} + {% with l_dict_domains=dict_data['list_elem'], bootstrap_label=bootstrap_label %} {% include 'domains/card_img_domain.html' %} {% endwith %} diff --git a/var/www/templates/crawler/crawler_splash/showDomain.html b/var/www/templates/crawler/crawler_splash/showDomain.html index d83aee39..dc6ff373 100644 --- a/var/www/templates/crawler/crawler_splash/showDomain.html +++ b/var/www/templates/crawler/crawler_splash/showDomain.html @@ -67,6 +67,7 @@ First Seen Last Check Ports + Languages @@ -74,6 +75,11 @@ {%if "first_seen" in dict_domain%}{{ dict_domain['first_seen'] }}{%endif%} {%if "last_check" in dict_domain%}{{ dict_domain['last_check'] }}{%endif%} {%if dict_domain["ports"]%}{{ dict_domain["ports"] }}{%endif%} + + {% for languages in dict_domain['languages'] %} + {{languages}} + {% endfor %} + @@ -94,12 +100,36 @@ -
- {% if dict_domain['origin_item']=='manual' or dict_domain['origin_item']=='auto' %} - {{ dict_domain['origin_item'] }} - {%else%} - Last Origin: {{ dict_domain['origin_item'] }} - {%endif%} + + + + + + + {% if 'father' in dict_domain %} + {% if dict_domain['father']=='manual' or dict_domain['father']=='auto' %} + + + + + {%else%} + + + + + {% if dict_domain['father']['domain_father'] %} + + + {%endif%} +

+ {%endif%} + {%endif%} +

+
Last Origin:
{{ dict_domain['father'] }}
+ {{ dict_domain['father']['item_father'] }} +
+ {{ dict_domain['father']['domain_father'] }} +
{% if 'correlation_nb' in dict_domain %} {% if dict_domain["correlation_nb"] > 0 %} @@ -352,7 +382,7 @@ {% for item in dict_domain["crawler_history"]["items"] %} - +
{{ item["link"] }}
@@ -498,7 +528,7 @@ function toggle_sidebar(){ diff --git a/var/www/templates/domains/block_languages_search.html b/var/www/templates/domains/block_languages_search.html new file mode 100644 index 00000000..338e91d0 --- /dev/null +++ b/var/www/templates/domains/block_languages_search.html @@ -0,0 +1,73 @@ +
+
+
+ Domains by Languages : +
+
+
+ +
+
+ +
+ +
+ +
+
+ + +
+
+ + +
+
+ + + +
+
+ + + + + diff --git a/var/www/templates/domains/card_img_domain.html b/var/www/templates/domains/card_img_domain.html index eb7e8371..fb5480f1 100644 --- a/var/www/templates/domains/card_img_domain.html +++ b/var/www/templates/domains/card_img_domain.html @@ -1,36 +1,62 @@ -{% for dict_domain in dict_data['list_elem'] %} +{% for dict_domain in l_dict_domains %} {% if loop.index0 % 4 == 0 %}
{% endif %} -
+
- {{dict_domain["id"]}} + {% if 'hl-start' in dict_domain %} + {{dict_domain["id"][:dict_domain['hl-start']]}}{{dict_domain["id"][dict_domain['hl-start']:dict_domain['hl-end']]}}{{dict_domain["id"][dict_domain['hl-end']:]}} + {% else %} + {{dict_domain["id"]}} + {% endif %} + + {% if dict_domain["status"] %} + + UP + + {% else %} + + DOWN + + {% endif %}
+
+ + + + + + {{dict_domain["first_seen"]}} + + + + + {{dict_domain["first_seen"]}} + + + + +

- First seen: {{dict_domain["first_seen"]}}
- Last_seen: {{dict_domain["first_seen"]}}
- Ports: {{dict_domain["ports"]}} + Ports: {{dict_domain["ports"]}}
+ {% if dict_domain['languages'] %} + Languages: + {% for language in dict_domain['languages'] %} + {{ language }} + {% endfor %} + {% endif %}

- Status: - {% if dict_domain["status"] %} - - UP - - {% else %} - - DOWN - - {% endif %} + {% endif %} diff --git a/var/www/templates/domains/domains_filter_languages.html b/var/www/templates/domains/domains_filter_languages.html new file mode 100644 index 00000000..b45236c6 --- /dev/null +++ b/var/www/templates/domains/domains_filter_languages.html @@ -0,0 +1,192 @@ + + + + Show Domain - AIL + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +
+
+ + {% include 'domains/block_languages_search.html' %} + + +
+
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+
+
+ +
+
+ + + {% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} + +
+
+ + {%if l_dict_domains['list_elem']%} + {% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %} + {% set target_url=url_for('crawler_splash.domains_search_languages_get') + "?languages=" + ','.join(current_languages)%} + {%if domains_types %} + {% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} + + +
+
+
+ + + + + + + + + diff --git a/var/www/templates/domains/domains_result_list.html b/var/www/templates/domains/domains_result_list.html new file mode 100644 index 00000000..168cf530 --- /dev/null +++ b/var/www/templates/domains/domains_result_list.html @@ -0,0 +1,195 @@ + + + + Domain Search - AIL + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'crawler/menu_sidebar.html' %} + +
+ +
+
+ + {% with page=l_dict_domains['page'], search=l_dict_domains['search'] %} + {% include 'domains/block_domains_name_search.html' %} + {% endwith %} + + +
+
+ +
+
+
+
+ +
+
+ +
+
+ +
+
+
+
+ +
+
+ + + {% with l_dict_domains=l_dict_domains['list_elem'], bootstrap_label=bootstrap_label %} + {% include 'domains/card_img_domain.html' %} + {% endwith %} + +
+
+ + {%if l_dict_domains['list_elem']%} + {% with page=l_dict_domains['page'], nb_page_max=l_dict_domains['nb_pages'], nb_first_elem=l_dict_domains['nb_first_elem'], nb_last_elem=l_dict_domains['nb_last_elem'], nb_all_elem=l_dict_domains['nb_all_elem'] %} + {% set target_url=url_for('crawler_splash.domains_search_name') + "?name=" + l_dict_domains['search']%} + {%if domains_types %} + {% set target_url = target_url + '&domain_types=' + ','.join(domains_types)%} + {%endif%} + {% include 'pagination.html' %} + {% endwith %} + {%endif%} + + +
+
+
+ + + + + + + + + diff --git a/var/www/templates/modals/create_hive_case.html b/var/www/templates/modals/create_hive_case.html new file mode 100644 index 00000000..be56389e --- /dev/null +++ b/var/www/templates/modals/create_hive_case.html @@ -0,0 +1,120 @@ + + + diff --git a/var/www/templates/modals/show_min_item.html b/var/www/templates/modals/show_min_item.html index 69ff1239..8613d9c6 100644 --- a/var/www/templates/modals/show_min_item.html +++ b/var/www/templates/modals/show_min_item.html @@ -86,7 +86,7 @@ function get_html_and_update_modal(event, truemodal) { button.tooltip(button); $("#container-show-more").append(button); - $("#modal_show_min_item_button_show_item").attr('href', '{{ url_for('showsavedpastes.showsavedpaste') }}?paste=' + $(modal).attr('data-path')); + $("#modal_show_min_item_button_show_item").attr('href', '{{ url_for('objects_item.showItem') }}?id=' + $(modal).attr('data-path')); $("#modal_show_min_item_button_show_item").show('fast'); $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed diff --git a/var/www/templates/objects/item/show_item.html b/var/www/templates/objects/item/show_item.html new file mode 100644 index 00000000..084b3477 --- /dev/null +++ b/var/www/templates/objects/item/show_item.html @@ -0,0 +1,446 @@ + + + + Show Item Object - AIL + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+ + {% if misp_eventid %} +
+
  • MISP Events already Created
  • + {{ misp_url }} +
    + {% endif %} + + {% if dict_item['hive_case'] %} +
    +
  • The Hive Case already Created
  • + {{ hive_url }} +
    + {% endif %} + + {% if dict_item['duplicates'] != 0 %} +
    +
    +
    +
    +
    +
    + duplicates   +
    {{dict_item['duplicates']|length}}
    +
    +
    +
    + +
    +
    +
    + +
    +
    + + + + + + + + + + + + {% for duplicate_id in dict_item['duplicates'] %} + + + + + + + {% endfor %} + +
    DateSimilarityItemDiff
    {{dict_item['duplicates'][duplicate_id]['date']}} + + + {%for algo in dict_item['duplicates'][duplicate_id]['algo']|sort()%} + + + + + {%endfor%} + +
    {{algo}} +
    +
    + {{dict_item['duplicates'][duplicate_id]['algo'][algo]}}% +
    +
    +
    +
    + + {{duplicate_id}} + + + +
    + +
    +
    + +
    +
    + {% endif %} + + + {% if l_64|length != 0 %} +
    +
    +
    +
    +
    +
    + Decoded Files   +
    {{l_64|length}}
    +
    +
    +
    + +
    +
    +
    + +
    +
    + + + + + + + + + + {% for b64 in l_64 %} + + + + + {% endfor %} + +
    estimated typehash
      {{ b64[1] }} {{b64[2]}} ({{ b64[4] }})
    + +
    +
    +
    +
    + {% endif %} + + + {% if dict_item['crawler'] %} +
    +
    +
    +
    +
    +
    + Crawler +
    +
    +
    + +
    +
    +
    + +
    + +
    + +
    +
    + + + + + + + + + + + + + + + + + + +
    Last Origin:
    + {{ dict_item['father'] }} +
    + {{ dict_item['crawler']['domain'] }} +
    url + {{ dict_item['crawler']['url'] }} +
    +
    +
    + +
    +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +
    + +
    +
    + +
    +
    +
    + {% endif %} + + + + +
    +
    + + + + +
    +
    +

    {{ dict_item['content'] }}

    +
    +
    +

    +
    +
    +
    +
    + + + +{% if dict_item['crawler'] %} + +{% endif %} + + + + + diff --git a/var/www/templates/tags/search_obj_by_tags.html b/var/www/templates/tags/search_obj_by_tags.html index 3e9400ba..93ac9579 100644 --- a/var/www/templates/tags/search_obj_by_tags.html +++ b/var/www/templates/tags/search_obj_by_tags.html @@ -129,7 +129,7 @@ {{ dict_obj['date'] }} - +
    {{ dict_obj['id'] }}
    diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index f51ae231..b38609eb 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -91,9 +91,7 @@ wget -q https://raw.githubusercontent.com/flot/flot/958e5fd43c6dff4bab3e1fd5cb61 #Ressources for sparkline and canvasJS and slider wget -q http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js -wget -q http://canvasjs.com/fdm/chart/ -O temp/canvasjs.zip -unzip -qq temp/canvasjs.zip -d temp/ -mv temp/Chart\ 2.3.2\ GA\ -\ Stable/jquery.canvasjs.min.js ./static/js/jquery.canvasjs.min.js +wget -q https://canvasjs.com/assets/script/canvasjs.min.js -O ./static/js/jquery.canvasjs.min.js wget -q https://jqueryui.com/resources/download/jquery-ui-1.12.1.zip -O temp/jquery-ui.zip unzip -qq temp/jquery-ui.zip -d temp/