From d27d47dc7036aba33ca3af2a454dc11ca7d755f0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 1 Sep 2022 14:04:00 +0200 Subject: [PATCH] chg: [Kvrocks migration] rewrite obj tags + migration --- bin/DB_KVROCKS_MIGRATION.py | 290 ++++-- bin/core/ail_2_ail.py | 2 +- bin/lib/Decoded.py | 7 +- bin/lib/Domain.py | 124 +-- bin/lib/Investigations.py | 4 +- bin/lib/Tag.py | 954 ++++++++++++++++++ bin/lib/Tracker.py | 2 +- bin/lib/ail_core.py | 35 + bin/lib/crawlers.py | 18 +- bin/lib/data_retention_engine.py | 53 + bin/lib/index_whoosh.py | 8 +- bin/lib/item_basic.py | 6 +- bin/lib/objects/CryptoCurrencies.py | 1 + bin/lib/objects/Domains.py | 88 +- bin/lib/objects/Items.py | 9 +- bin/lib/objects/Pgps.py | 1 + bin/lib/objects/Screenshots.py | 1 + bin/lib/objects/Usernames.py | 1 + bin/lib/objects/abstract_object.py | 21 +- bin/lib/objects/ail_objects.py | 134 ++- bin/lib/simple_correlation.py | 282 ------ bin/modules/Tags.py | 4 +- bin/modules/submit_paste.py | 6 +- bin/packages/Date.py | 2 + bin/packages/Tag.py | 785 -------------- configs/6383.conf | 32 +- var/www/Flask_server.py | 8 +- var/www/blueprints/objects_item.py | 2 +- var/www/blueprints/tags_ui.py | 31 +- var/www/modules/Flask_config.py | 5 +- .../modules/PasteSubmit/Flask_PasteSubmit.py | 3 +- var/www/modules/Tags/Flask_Tags.py | 3 +- .../hiddenServices/Flask_hiddenServices.py | 2 +- 33 files changed, 1524 insertions(+), 1400 deletions(-) create mode 100755 bin/lib/Tag.py create mode 100755 bin/lib/ail_core.py create mode 100755 bin/lib/data_retention_engine.py delete mode 100755 bin/lib/simple_correlation.py delete mode 100755 bin/packages/Tag.py diff --git a/bin/DB_KVROCKS_MIGRATION.py b/bin/DB_KVROCKS_MIGRATION.py index 1b1cccdf..529088fc 100755 --- a/bin/DB_KVROCKS_MIGRATION.py +++ b/bin/DB_KVROCKS_MIGRATION.py @@ -15,6 +15,7 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from lib.ConfigLoader import ConfigLoader +from lib import Tag from lib import Users from lib.objects import Decodeds from lib.objects import Domains @@ -23,6 +24,7 @@ from lib.objects.CryptoCurrencies import CryptoCurrency from lib.objects.Pgps import Pgp from lib.objects.Screenshots import Screenshot, get_all_screenshots from lib.objects.Usernames import Username +from packages import Date # # # # CONFIGS # # # # config_loader = ConfigLoader() @@ -43,13 +45,6 @@ spec.loader.exec_module(old_ail_2_ail) old_ail_2_ail.r_serv_sync = r_serv_db -from packages import Tag -spec = importlib.util.find_spec('Tag') -old_Tag = importlib.util.module_from_spec(spec) -spec.loader.exec_module(old_Tag) - -old_Tag.r_serv_tags = r_serv_tags - from lib import Tracker spec = importlib.util.find_spec('Tracker') old_Tracker = importlib.util.module_from_spec(spec) @@ -118,16 +113,32 @@ def core_migration(): crawlers.save_splash_manager_url_api(manager_url, manager_api_key) crawlers.reload_splash_and_proxies_list() - # ail:misp - # ail:thehive - # hive:auto-alerts - # list_export_tags - # misp:auto-events - # whitelist_hive - # whitelist_misp + # Auto Export Migration + ail_misp = r_serv_db.get('ail:misp') + if ail_misp != 'True': + ail_misp == 'False' + r_kvrocks.set('ail:misp', ail_misp) + ail_thehive = r_serv_db.get('ail:thehive') + if ail_thehive != 'True': + ail_thehive == 'False' + r_kvrocks.set('ail:thehive', ail_thehive) - # # TODO: TO CHECK + misp_auto_events = r_serv_db.get('misp:auto-events') + if misp_auto_events != '1': + misp_auto_events = '0' + r_kvrocks.set('misp:auto-events', misp_auto_events) + + hive_auto_alerts = r_serv_db.get('hive:auto-alerts') + if hive_auto_alerts != '1': + hive_auto_alerts = '0' + r_kvrocks.set('hive:auto-alerts', hive_auto_alerts) + + for tag in r_serv_db.smembers('whitelist_misp'): + r_kvrocks.sadd('whitelist_misp', tag) + + for tag in r_serv_db.smembers('whitelist_hive'): + r_kvrocks.sadd('whitelist_hive', tag) # # # # # # # # # # # # # # # # @@ -248,36 +259,71 @@ def investigations_migration(): def item_submit_migration(): pass -# /!\ KEY COLISION -# # TODO: change db -> olds modules + blueprints -# # TODO: HANDLE LOCAL TAGS -# # TODO: HANDLE LOCAL TAGS -# # TODO: HANDLE LOCAL TAGS -# # TODO: HANDLE LOCAL TAGS + +############################### +# # +# ITEMS MIGRATION # +# # +############################### + +def get_all_items_tags(): + return r_serv_tags.smembers('list_tags:item') + +def get_all_items_tags_by_day(tag, date): + return r_serv_tags.smembers(f'{tag}:{date}') + +def get_tag_first_seen(tag, r_int=False): + res = r_serv_tags.hget(f'tag_metadata:{tag}', 'first_seen') + if r_int: + if res is None: + return 99999999 + else: + return int(res) + return res + +def get_tags_first_seen(): + first_seen = int(Date.get_today_date_str()) + for tag in get_all_items_tags(): + tag_first = get_tag_first_seen(tag, r_int=True) + if tag_first < first_seen: + first_seen = tag_first + return str(first_seen) + +def get_active_taxonomies(): + return r_serv_tags.smembers('active_taxonomies') + +def get_active_galaxies(): + return r_serv_tags.smembers('active_galaxies') + + # # TODO: HANDLE LOCAL TAGS def tags_migration(): + for taxonomy in get_active_taxonomies(): + Tag.enable_taxonomy(taxonomy) + + for galaxy in get_active_galaxies(): + Tag.enable_galaxy(galaxy) + + # for tag in get_all_items_tags(): + # print(tag) + # tag_first = get_tag_first_seen(tag) + # if tag_first: + # for date in Date.get_date_range_today(tag_first): + # print(date) + # for item_id in get_all_items_tags_by_day(tag, date): + # item = Items.Item(item_id) + # item.add_tag(tag) + + - # HANDLE LOCAL TAGS - print(old_Tag.get_all_tags()) - - - # - # /!\ OBJECTS TAGS ISSUE /!\ - # -> only one layer - # - # issue with subtypes + between objects with same ID - # - # - # - # - pass # # TODO: MIGRATE item_basic.add_map_obj_id_item_id ?????????????????????? +# # TODO: BUILD FIRST/LAST object DATE ############################### # # # ITEMS MIGRATION # @@ -298,7 +344,7 @@ def items_migration(): -# TODO: migrate cookies +# TODO: test cookies migration # TODO: migrate auto crawlers ############################### @@ -326,7 +372,7 @@ def crawler_migration(): meta = old_crawlers.get_cookiejar_metadata(cookiejar_uuid, level=True) #print(meta) #crawlers.create_cookiejar(meta['user_id'], level=meta['level'], description=meta['description'], cookiejar_uuid=cookiejar_uuid) - #_set_cookiejar_date(meta['date']) + #crawlers._set_cookiejar_date(meta['date']) for meta_cookie, cookie_uuid in old_crawlers.get_cookiejar_cookies_list(cookiejar_uuid, add_cookie_uuid=True): print(cookie_uuid) @@ -349,6 +395,9 @@ def crawler_migration(): # BY TYPE - FIRST DATE DOWN / UP +def get_domain_down_by_date(domain_type, date): + return r_crawler.smembers(f'{domain_type}_down:{date}') + def get_item_link(item_id): return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link') @@ -415,66 +464,90 @@ def get_domain_history_by_port(domain_type, domain, port): history.append(dict_history) return history +def get_domain_tags(domain): + return r_serv_metadata.smembers(f'tag:{domain}') + def domain_migration(): print('Domains MIGRATION...') for domain_type in ['onion', 'regular']: for dom in get_domains_up_by_type(domain_type): + if domain_type == 'onion': + if not crawlers.is_valid_onion_domain(dom): + print(dom) + continue + # ports = get_domain_ports(domain_type, dom) + # first_seen = get_domain_first_seen(domain_type, dom) + # last_check = get_domain_last_check(domain_type, dom) + # last_origin = get_domain_last_origin(domain_type, dom) + # languages = get_domain_languages(dom) + # + # domain = Domains.Domain(dom) + # # domain.update_daterange(first_seen) + # # domain.update_daterange(last_check) + # # domain._set_ports(ports) + # # if last_origin: + # # domain.set_last_origin(last_origin) + # for language in languages: + # print(language) + # # domain.add_language(language) + # for tag in get_domain_tags(domain): + # domain.add_tag(tag) + # #print('------------------') + # #print('------------------') + # #print('------------------') + # #print('------------------') + # #print('------------------') + # print(dom) + # #print(first_seen) + # #print(last_check) + # #print(ports) + # + # # # TODO: FIXME filter invalid hostname + # + # # CREATE DOMAIN HISTORY + # for port in ports: + # for history in get_domain_history_by_port(domain_type, dom, port): + # epoch = history['epoch'] + # # DOMAIN DOWN + # if not history.get('status'): # domain DOWN + # # domain.add_history(epoch, port) + # print(f'DOWN {epoch}') + # # DOMAIN UP + # else: + # root_id = history.get('root') + # if root_id: + # # domain.add_history(epoch, port, root_item=root_id) + # #print(f'UP {root_id}') + # crawled_items = get_crawled_items(dom, root_id) + # for item_id in crawled_items: + # url = get_item_link(item_id) + # item_father = get_item_father(item_id) + # if item_father and url: + # #print(f'{url} {item_id}') + # pass + # # domain.add_crawled_item(url, port, item_id, item_father) + # + # + # #print() - ports = get_domain_ports(domain_type, dom) - first_seen = get_domain_first_seen(domain_type, dom) - last_check = get_domain_last_check(domain_type, dom) - last_origin = get_domain_last_origin(domain_type, dom) - languages = get_domain_languages(dom) - - domain = Domains.Domain(dom) - # domain.update_daterange(first_seen) - # domain.update_daterange(last_check) - # domain._set_ports(ports) - # if last_origin: - # domain.set_last_origin(last_origin) - for language in languages: - print(language) - # domain.add_language(language) - #print('------------------') - #print('------------------') - #print('------------------') - #print('------------------') - #print('------------------') - print(dom) - #print(first_seen) - #print(last_check) - #print(ports) - - # # TODO: FIXME filter invalid hostname - - - # CREATE DOMAIN HISTORY - for port in ports: - for history in get_domain_history_by_port(domain_type, dom, port): - epoch = history['epoch'] - # DOMAIN DOWN - if not history.get('status'): # domain DOWN - # domain.add_history(epoch, port) - print(f'DOWN {epoch}') - # DOMAIN UP - else: - root_id = history.get('root') - if root_id: - # domain.add_history(epoch, port, root_item=root_id) - #print(f'UP {root_id}') - crawled_items = get_crawled_items(dom, root_id) - for item_id in crawled_items: - url = get_item_link(item_id) - item_father = get_item_father(item_id) - if item_father and url: - #print(f'{url} {item_id}') - pass - # domain.add_crawled_item(url, port, item_id, item_father) - - - #print() + for domain_type in ['onion', 'regular']: + for date in Date.get_date_range_today('20190101'): + for dom in get_domain_down_by_date(domain_type, date): + if domain_type == 'onion': + if not crawlers.is_valid_onion_domain(dom): + print(dom) + continue + first_seen = get_domain_first_seen(domain_type, dom) + last_check = get_domain_last_check(domain_type, dom) + last_origin = get_domain_last_origin(domain_type, dom) + domain = Domains.Domain(dom) + # domain.update_daterange(first_seen) + # domain.update_daterange(last_check) + # if last_origin: + # domain.set_last_origin(last_origin) + # domain.add_history(None, None, date=date) ############################### @@ -489,7 +562,8 @@ def get_decoded_items_list_by_decoder(decoder_type, decoded_id): ############### #return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1) return r_serv_metadata.zrange(f'{decoder_type}_hash:{decoded_id}', 0, -1) - +def get_decodeds_tags(decoded_id): + return r_serv_metadata.smembers(f'tag:{decoded_id}') def decodeds_migration(): print('Decoded MIGRATION...') @@ -508,6 +582,9 @@ def decodeds_migration(): filepath = decoded.get_filepath(mimetype=mimetype) decoded._save_meta(filepath, mimetype) + for tag in get_decodeds_tags(decoded_id): + decoded.add_tag(tag) + for decoder_type in decoder_names: for item_id in get_decoded_items_list_by_decoder(decoder_type, decoded_id): print(item_id, decoder_type) @@ -530,6 +607,9 @@ def get_screenshot_items_list(screenshot_id): ######################### # TODO: def get_screenshot_domain(screenshot_id): return r_crawler.smembers(f'screenshot_domain:{screenshot_id}') +def get_screenshot_tags(screenshot_id): + return r_serv_metadata.smembers(f'tag:{screenshot_id}') + # Tags + Correlations # # TODO: save orphelin screenshot ????? def screenshots_migration(): @@ -541,14 +621,13 @@ def screenshots_migration(): screenshot = Screenshot(screenshot_id) - tags = old_Tag.get_obj_tag(screenshot_id) ################## # TODO: - if tags: - print(screenshot_id) - print(tags) + for tag in get_screenshot_tags(screenshot_id): + screenshot.add_tag(tag) # Correlations for item_id in get_screenshot_items_list(screenshot_id): print(item_id) + date = get_item_date(item_id) screenshot.add_correlation('item', '', item_id) for domain_id in get_screenshot_domain(screenshot_id): print(domain_id) @@ -615,23 +694,24 @@ def statistics_migration(): if __name__ == '__main__': - #core_migration() - #user_migration() + core_migration() + # user_migration() + # tags_migration() #items_migration() #crawler_migration() - #domain_migration() + # domain_migration() # TO TEST #decodeds_migration() - #screenshots_migration() + # screenshots_migration() #subtypes_obj_migration() - #ail_2_ail_migration() - #trackers_migration() - #investigations_migration() - - - + # ail_2_ail_migration() + # trackers_migration() + # investigations_migration() + # custom tags + # crawler queues + auto_crawlers + # stats - Cred - Mail - Provider diff --git a/bin/core/ail_2_ail.py b/bin/core/ail_2_ail.py index 33ef8f96..b0d86a2b 100755 --- a/bin/core/ail_2_ail.py +++ b/bin/core/ail_2_ail.py @@ -16,13 +16,13 @@ from pubsublogger import publisher sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'core/')) import screen sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) from Item import Item -import Tag config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") diff --git a/bin/lib/Decoded.py b/bin/lib/Decoded.py index 08adb28d..57d59256 100755 --- a/bin/lib/Decoded.py +++ b/bin/lib/Decoded.py @@ -11,10 +11,9 @@ from io import BytesIO sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) import Item import Date -import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) - +import Tag import ConfigLoader @@ -113,7 +112,7 @@ def get_decoded_metadata(sha1_string, nb_seen=False, size=False, file_type=False return metadata_dict def get_decoded_tag(sha1_string): - return Tag.get_obj_tag(sha1_string) + return Tag.get_object_tags('decoded', sha1_string) def get_list_nb_previous_hash(sha1_string, num_day): nb_previous_hash = [] @@ -351,7 +350,7 @@ def delete_decoded_file(obj_id): if not os.path.isfile(filepath): return False - Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) + Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) ############# os.remove(filepath) return True diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index d178cf29..9e1f191c 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -16,16 +16,14 @@ import random import time sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Cryptocurrency -import Pgp import Date -import Decoded import Item import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -import Correlate_object +import Tag + import Language import Screenshot import Username @@ -154,6 +152,11 @@ def get_domains_up_by_daterange(date_from, date_to, domain_type): domains_up = [] return domains_up +# Retun last crawled domains by type +# domain;epoch +def get_last_crawled_domains(domain_type): + return r_serv_onion.lrange('last_{}'.format(domain_type), 0 ,-1) + def paginate_iterator(iter_elems, nb_obj=50, page=1): dict_page = {} dict_page['nb_all_elem'] = len(iter_elems) @@ -231,15 +234,21 @@ def get_domains_up_by_filers(domain_type, date_from=None, date_to=None, tags=[], def get_domains_by_filters(): pass -def create_domains_metadata_list(list_domains, domain_type): +def create_domains_metadata_list(list_domains, domain_type, tags=True): + + # # TODO: + # tags => optional + # last check timestamp + l_domains = [] for domain in list_domains: if domain_type=='all': dom_type = get_domain_type(domain) else: dom_type = domain_type + l_domains.append(get_domain_metadata(domain, dom_type, first_seen=True, last_ckeck=True, status=True, - ports=True, tags=True, languages=True, screenshot=True, tags_safe=True)) + ports=True, tags=tags, languages=True, screenshot=True, tags_safe=True)) return l_domains def sanithyse_domain_name_to_search(name_to_search, domain_type): @@ -653,7 +662,7 @@ def get_domain_tags(domain): :param domain: crawled domain ''' - return Tag.get_obj_tag(domain) + return Tag.get_object_tags('domain', domain) def get_domain_random_screenshot(domain): ''' @@ -712,97 +721,6 @@ def get_domain_metadata_basic(domain, domain_type=None): domain_type = get_domain_type(domain) return get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=False) -def get_domain_cryptocurrency(domain, currencies_type=None, get_nb=False): - ''' - Retun all cryptocurrencies of a given domain. - - :param domain: crawled domain - :param currencies_type: list of cryptocurrencies type - :type currencies_type: list, optional - ''' - return Cryptocurrency.cryptocurrency.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb) - -def get_domain_pgp(domain, currencies_type=None, get_nb=False): - ''' - Retun all pgp of a given domain. - - :param domain: crawled domain - :param currencies_type: list of pgp type - :type currencies_type: list, optional - ''' - return Pgp.pgp.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb) - -def get_domain_username(domain, currencies_type=None, get_nb=False): - ''' - Retun all pgp of a given domain. - - :param domain: crawled domain - :param currencies_type: list of pgp type - :type currencies_type: list, optional - ''' - return Username.correlation.get_domain_correlation_dict(domain, correlation_type=currencies_type, get_nb=get_nb) - -def get_domain_decoded(domain): - ''' - Retun all decoded item of a given domain. - - :param domain: crawled domain - ''' - return Decoded.get_domain_decoded_item(domain) - -def get_domain_screenshot(domain): - ''' - Retun all decoded item of a given domain. - - :param domain: crawled domain - ''' - return Screenshot.get_domain_screenshot(domain) - - -def get_domain_all_correlation(domain, correlation_names=[], get_nb=False): - ''' - Retun all correlation of a given domain. - - :param domain: crawled domain - :type domain: str - - :return: a dict of all correlation for a given domain - :rtype: dict - ''' - if not correlation_names: - correlation_names = Correlate_object.get_all_correlation_names() - domain_correl = {} - for correlation_name in correlation_names: - if correlation_name=='cryptocurrency': - res = get_domain_cryptocurrency(domain, get_nb=get_nb) - elif correlation_name=='pgp': - res = get_domain_pgp(domain, get_nb=get_nb) - elif correlation_name=='username': - res = get_domain_username(domain, get_nb=get_nb) - elif correlation_name=='decoded': - res = get_domain_decoded(domain) - elif correlation_name=='screenshot': - res = get_domain_screenshot(domain) - else: - res = None - # add correllation to dict - if res: - domain_correl[correlation_name] = res - - return domain_correl - -def get_domain_total_nb_correlation(correlation_dict): - total_correlation = 0 - if 'decoded' in correlation_dict: - total_correlation += len(correlation_dict['decoded']) - if 'screenshot' in correlation_dict: - total_correlation += len(correlation_dict['screenshot']) - if 'cryptocurrency' in correlation_dict: - total_correlation += correlation_dict['cryptocurrency'].get('nb', 0) - if 'pgp' in correlation_dict: - total_correlation += correlation_dict['pgp'].get('nb', 0) - return total_correlation - # TODO: handle port def get_domain_history(domain, domain_type, port): # TODO: add date_range: from to + nb_elem ''' @@ -972,12 +890,6 @@ class Domain(object): ''' return get_domain_languages(self.domain) - def get_domain_correlation(self): - ''' - Retun all correlation of a given domain. - ''' - return get_domain_all_correlation(self.domain, get_nb=True) - def get_domain_history(self): ''' Retun the full history of a given domain and port. @@ -998,4 +910,6 @@ class Domain(object): return get_domain_items_crawled(self.domain, self.type, port, epoch=epoch, items_link=items_link, item_screenshot=item_screenshot, item_tag=item_tag) if __name__ == '__main__': - search_domains_by_name('c', 'onion') + #search_domains_by_name('c', 'onion') + res = get_last_crawled_domains('onion') + print(res) diff --git a/bin/lib/Investigations.py b/bin/lib/Investigations.py index 7e39fd4a..b2943f0b 100755 --- a/bin/lib/Investigations.py +++ b/bin/lib/Investigations.py @@ -22,10 +22,8 @@ from flask import escape sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -from exceptions import UpdateInvestigationError - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Tag +from exceptions import UpdateInvestigationError config_loader = ConfigLoader.ConfigLoader() r_tracking = config_loader.get_db_conn("Kvrocks_DB") diff --git a/bin/lib/Tag.py b/bin/lib/Tag.py new file mode 100755 index 00000000..b3f55024 --- /dev/null +++ b/bin/lib/Tag.py @@ -0,0 +1,954 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis +import datetime + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader +from lib import ail_core +from lib import item_basic +from packages import Date + +from pytaxonomies import Taxonomies +from pymispgalaxies import Galaxies, Clusters + +config_loader = ConfigLoader() +r_tags = config_loader.get_db_conn("Kvrocks_Tags") +config_loader = None + +#### CORE FUNCTIONS #### + +def build_unsafe_tags(): + unsafe_tags = set() + ## CE content + unsafe_tags.add('dark-web:topic="pornography-child-exploitation"') + # add copine-scale tags + taxonomies = Taxonomies() + copine_scale = taxonomies.get('copine-scale') + if copine_scale: + for tag in copine_scale.machinetags(): + unsafe_tags.add(tag) + return unsafe_tags + +# set of unsafe tags +unsafe_tags = build_unsafe_tags() + +def is_tags_safe(ltags): + ''' + Check if a list of tags contain an unsafe tag (CE, ...) + + :param ltags: list of tags + :type ltags: list + :return: is a tag in the unsafe set + :rtype: boolean + ''' + return unsafe_tags.isdisjoint(ltags) + +# # TODO: verify tags + object_type +# get set_keys: intersection +def get_obj_keys_by_tags(tags, obj_type, subtype='', date=None): + l_set_keys = [] + if obj_type=='item': + for tag in tags: + l_set_keys.append(f'{obj_type}:{subtype}:{tag}:{date}') + else: + for tag in tags: + l_set_keys.append(f'{obj_type}:{subtype}:{tag}') + return l_set_keys + +def get_obj_by_tag(key_tag): + return r_tags.smembers(key_tag) + +##-- CORE FUNCTIONS --## + +################################################################################ +################################################################################ +################################################################################ +################################################################################ +################################################################################ + +def is_obj_tagged(obj_type, obj_id, subtype=''): + ''' + Check if a object is tagged + + :param object_id: object id + :type domain: str + + :return: is object tagged + :rtype: boolean + ''' + return r_tags.exists(f'tag:{obj_type}:{subtype}:{obj_id}') + +def is_obj_tagged_by_tag(obj_type, obj_id, tag, subtype=''): + ''' + Check if a object is tagged + + :param object_id: object id + :type domain: str + :param tag: object type + :type domain: str + + :return: is object tagged + :rtype: boolean + ''' + return r_tags.sismember(f'tag:{obj_type}:{subtype}:{obj_id}', tag) + + + + +# +# f'tag:{obj_type}:{subtype}:{id}' f'tag:{id}' +# +# f'list_tags:{obj_type}:{subtype}' f'list_tags:{obj_type}' +# +# graph tags by days ??????????????????????????????? +# +# + +# # TODO: metadata by object type ???????????? +# tag_metadata: +# f'{tag}:{date}' -> set of item_id + +# # TODO: ADD subtype support +# f'{obj_type}:{tag}' -> set of item_id + +def get_tag_first_seen(tag, object_type=None, r_int=False): + first_seen = r_tags.hget(f'tag_metadata:{tag}', 'first_seen') + if r_int: + if first_seen: + first_seen = int(first_seen) + else: + first_seen = 99999999 + return first_seen + # # TODO: LATER ADD object metadata + # if not object_type: + # r_tags.hget(f'tag_metadata:{tag}', 'first_seen') + # else: + # r_tags.hget(f'tag_metadata:{tag}', 'first_seen:{object_type}') + +def get_tag_last_seen(tag, object_type=None, r_int=False): + last_seen = r_tags.hget(f'tag_metadata:{tag}', 'last_seen') + if r_int: + if last_seen: + last_seen = int(last_seen) + else: + last_seen = 0 + return last_seen + +def get_tag_metadata_date(tag, r_int=False): + return {'first_seen': get_tag_first_seen(tag, r_int=r_int), 'last_seen': get_tag_last_seen(tag, r_int=r_int)} + +def set_tag_first_seen(tag, date): + r_tags.hset(f'tag_metadata:{tag}', 'first_seen', date) + +def set_tag_last_seen(tag, date): + r_tags.hset(f'tag_metadata:{tag}', 'last_seen', date) + +# # TODO: handle others objects date +def _update_tag_first_seen(tag, first_seen, last_seen): + if first_seen == last_seen: + if r_tags.scard(f'item::{tag}:{first_seen}') > 0: + r_tags.hset(f'tag_metadata:{tag}', 'first_seen', first_seen) + # no tag in db + else: + r_tags.hdel(f'tag_metadata:{tag}', 'first_seen') + r_tags.hdel(f'tag_metadata:{tag}', 'last_seen') + else: + if r_tags.scard(f'item::{tag}:{first_seen}') > 0: + r_tags.hset(f'tag_metadata:{tag}', 'first_seen', first_seen) + else: + first_seen = Date.date_add_day(first_seen) + if int(last_seen) >= int(first_seen): + _update_tag_first_seen(tag, first_seen, last_seen) + +# # TODO: +def _update_tag_last_seen(tag, first_seen, last_seen): + if first_seen == last_seen: + if r_tags.scard(f'item::{tag}:{last_seen}') > 0: + r_tags.hset(f'tag_metadata:{tag}', 'last_seen', last_seen) + # no tag in db + else: + r_tags.hdel(f'tag_metadata:{tag}', 'first_seen') + r_tags.hdel(f'tag_metadata:{tag}', 'last_seen') + else: + if r_tags.scard(f'item::{tag}:{last_seen}') > 0: + r_tags.hset(f'tag_metadata:{tag}', 'last_seen', last_seen) + else: + last_seen = Date.date_substract_day(str(last_seen)) + if int(last_seen) >= int(first_seen): + _update_tag_last_seen(tag, first_seen, last_seen) + + +def update_tag_metadata(tag, date, delete=False): # # TODO: delete Tags + date = int(date) + tag_date = get_tag_metadata_date(tag, r_int=True) + # Add Tag + if not delete: + # update first_seen + if date < tag_date['first_seen']: + set_tag_first_seen(tag, date) + # update last_seen + if date > tag_date['last_seen']: + set_tag_last_seen(tag, date) + # Delete Tag + else: + if date == tag_date['first_seen']: + _update_tag_first_seen(tag, tag_date['first_seen'], tag_date['last_seen']) + if date == tag_date['last_seen']: + _update_tag_last_seen(tag, tag_date['first_seen'], tag_date['last_seen']) + + + +# old +# r_tags.smembers(f'{tag}:{date}') +# r_tags.smembers(f'{obj_type}:{tag}') +def get_tag_objects(obj_type, subtype='', date=''): + if obj_type == 'item': + return r_tags.smembers(f'{obj_type}:{subtype}:{tag}:{date}') + else: + return r_tags.smembers(f'{obj_type}:{subtype}:{tag}') + +def get_object_tags(obj_type, obj_id, subtype=''): + return r_tags.smembers(f'tag:{obj_type}:{subtype}:{obj_id}') + +def add_object_tag(tag, obj_type, id, subtype=''): ############################# + if r_tags.sadd(f'tag:{obj_type}:{subtype}:{id}', tag) == 1: + r_tags.sadd('list_tags', tag) + r_tags.sadd(f'list_tags:{obj_type}', tag) + r_tags.sadd(f'list_tags:{obj_type}:{subtype}', tag) + if obj_type == 'item': + date = item_basic.get_item_date(id) + r_tags.sadd(f'{obj_type}:{subtype}:{tag}:{date}', id) + + # add domain tag + if item_basic.is_crawled(id) and tag!='infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': + domain = item_basic.get_item_domain(id) + add_object_tag(tag, "domain", domain) + + update_tag_metadata(tag, date) + else: + r_tags.sadd(f'{obj_type}:{subtype}:{tag}', id) + + r_tags.hincrby(f'daily_tags:{datetime.date.today().strftime("%Y%m%d")}', tag, 1) + +def update_tag_global_by_obj_type(tag, object_type, subtype=''): + tag_deleted = False + if object_type=='item': + if not r_tags.exists(f'tag_metadata:{tag}'): + tag_deleted = True + else: + if not r_tags.exists(f'{object_type}:{subtype}:{tag}'): + r_tags.srem(f'list_tags:{obj_type}:{subtype}', tag) + # Iterate on all subtypes + delete_global_obj_tag = True + for obj_subtype in ail_core.get_object_all_subtypes(): + if r_tags.exists(f'list_tags:{obj_type}:{subtype}'): + delete_global_obj_tag = False + break + if delete_global_obj_tag: + r_tags.srem(f'list_tags:{obj_type}', tag) + tag_deleted = True + if tag_deleted: + # update global tags + for obj_type in ail_core.get_all_objects(): + if r_tags.exists(f'{obj_type}:{tag}'): + tag_deleted = False + if tag_deleted: + r_tags.srem('list_tags', tag) + +def delete_object_tag(tag, obj_type, id, subtype=''): + if is_obj_tagged_by_tag(obj_type, id, tag, subtype=subtype): + r_tags.sadd('list_tags', tag) + r_tags.sadd(f'list_tags:{obj_type}', tag) + r_tags.sadd(f'list_tags:{obj_type}:{subtype}', tag) + if obj_type == 'item': + date = item_basic.get_item_date(id) + r_tags.srem(f'{obj_type}:{subtype}:{tag}:{date}', id) + + update_tag_metadata(tag, date, delete=True) + else: + r_tags.srem(f'{obj_type}:{subtype}:{tag}', id) + + r_tags.srem(f'tag:{obj_type}:{subtype}:{id}', tag) + update_tag_global_by_obj_type(tag, obj_type, subtype=subtype) + +################################################################################################################ + +# TODO: rewrite me +# TODO: other objects +def get_obj_by_tags(obj_type, l_tags, date_from=None, date_to=None, nb_obj=50, page=1): + # with daterange + l_tagged_obj = [] + if obj_type=='item': + #sanityze date + date_range = sanitise_tags_date_range(l_tags, date_from=date_from, date_to=date_to) + l_dates = Date.substract_date(date_range['date_from'], date_range['date_to']) + for date_day in l_dates: + l_set_keys = get_obj_keys_by_tags(l_tags, obj_type, date=date_day) + # if len(l_set_keys) > nb_obj: + # return l_tagged_obj + if len(l_set_keys) < 2: + date_day_obj = get_obj_by_tag(l_set_keys[0]) + else: + date_day_obj = r_tags.sinter(l_set_keys[0], *l_set_keys[1:]) + + # next_nb_start = len(l_tagged_obj) + len(date_day_obj) - nb_obj + # if next_nb_start > 0: + # get + filter nb_start + l_tagged_obj.extend( date_day_obj ) + + # handle pagination + nb_all_elem = len(l_tagged_obj) + nb_pages = nb_all_elem / nb_obj + if not nb_pages.is_integer(): + nb_pages = int(nb_pages)+1 + else: + nb_pages = int(nb_pages) + if page > nb_pages: + page = nb_pages + + start = nb_obj*(page -1) + if nb_pages > 1: + stop = (nb_obj*page) + l_tagged_obj = l_tagged_obj[start:stop] + # only one page + else: + stop = nb_all_elem + l_tagged_obj = l_tagged_obj[start:] + + if stop > nb_all_elem: + stop = nb_all_elem + stop = stop -1 + + return {"tagged_obj":l_tagged_obj, "date" : date_range, + "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop+1, "nb_all_elem":nb_all_elem} + + # without daterange + else: + l_set_keys = get_obj_keys_by_tags(l_tags, obj_type) + if len(l_set_keys) < 2: + l_tagged_obj = get_obj_by_tag(l_set_keys[0]) + else: + l_tagged_obj = r_tags.sinter(l_set_keys[0], *l_set_keys[1:]) + + if not l_tagged_obj: + return {"tagged_obj":l_tagged_obj, "page":0, "nb_pages":0} + + # handle pagination + nb_all_elem = len(l_tagged_obj) + nb_pages = nb_all_elem / nb_obj + if not nb_pages.is_integer(): + nb_pages = int(nb_pages)+1 + else: + nb_pages = int(nb_pages) + if page > nb_pages: + page = nb_pages + + # multiple pages + if nb_pages > 1: + start = nb_obj*(page -1) + stop = (nb_obj*page) -1 + current_index = 0 + l_obj = [] + for elem in l_tagged_obj: + if current_index > stop: + break + if start <= current_index and stop >= current_index: + l_obj.append(elem) + current_index += 1 + l_tagged_obj = l_obj + stop += 1 + if stop > nb_all_elem: + stop = nb_all_elem + # only one page + else: + start = 0 + stop = nb_all_elem + l_tagged_obj = list(l_tagged_obj) + + return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem} + + + +################################################################################ +################################################################################ +################################################################################ +################################################################################ + +#### Taxonomies - Galaxies #### + +################################################################################ +# galaxies = Galaxies() +# clusters = Clusters(skip_duplicates=True) +# +# list_all_tags = {} +# for name, c in clusters.items(): #galaxy name + tags +# list_all_tags[name] = c +# +# list_galaxies = [] +# for g in galaxies.values(): +# list_galaxies.append(g.to_json()) +# +# list_clusters = [] +# for c in clusters.values(): +# list_clusters.append(c.to_json()) +# +# # tags numbers in galaxies +# total_tags = {} +# for name, tags in clusters.items(): #galaxie name + tags +# total_tags[name] = len(tags) +################################################################################ + +#### Taxonomies #### + +def get_taxonomy_tags_from_cluster(taxonomy_name): + taxonomies = Taxonomies() + taxonomy = taxonomies[taxonomy_name] + return taxonomy.machinetags() + +# TODO: ADD api handler +def enable_taxonomy(taxonomy): + tags = get_taxonomy_tags_from_cluster(taxonomy) + r_tags.sadd('active_taxonomies', taxonomy) + for tag in tags: + r_tags.sadd(f'active_tag_{taxonomy}', tag) + +# def enable_taxonomy(taxonomie, enable_tags=True): +# ''' +# Enable a taxonomy. (UI) +# +# :param taxonomie: MISP taxonomy +# :type taxonomie: str +# :param enable_tags: crawled domain +# :type enable_tags: boolean +# ''' +# taxonomies = Taxonomies() +# if enable_tags: +# taxonomie_info = taxonomies.get(taxonomie) +# if taxonomie_info: +# # activate taxonomie +# r_tags.sadd('active_taxonomies', taxonomie) +# # activate taxonomie tags +# for tag in taxonomie_info.machinetags(): +# r_tags.sadd('active_tag_{}'.format(taxonomie), tag) +# #r_tags.sadd('active_taxonomies_tags', tag) +# else: +# print('Error: {}, please update pytaxonomies'.format(taxonomie)) + +#### Galaxies #### + +def get_galaxy_tags_from_cluster(galaxy_name): + clusters = Clusters(skip_duplicates=True) + cluster = clusters[galaxy_name] + return cluster.machinetags() + +def get_galaxy_tags_with_sysnonym_from_cluster(galaxy_name): + tags = {} + clusters = Clusters(skip_duplicates=True) + cluster = clusters[galaxy_name] + for data in cluster.to_dict()['values']: + tag = f'misp-galaxy:{cluster.type}="{data.value}"' + synonyms = data.meta.synonyms + if not synonyms: + synonyms = [] + tags[tag] = synonyms + return tags + +def enable_galaxy(galaxy): + tags = get_galaxy_tags_with_sysnonym_from_cluster(galaxy) + r_tags.sadd('active_galaxies', galaxy) + for tag in tags: + r_tags.sadd(f'active_tag_galaxies_{galaxy}', tag) + # synonyms + for synonym in tags[tag]: + r_tags.sadd(f'synonym_tag_{tag}', synonym) + + + +################################################################################ +################################################################################ +################################################################################ +################################################################################ + +def get_taxonomie_from_tag(tag): + try: + return tag.split(':')[0] + except IndexError: + return None + +def get_galaxy_from_tag(tag): + try: + galaxy = tag.split(':')[1] + galaxy = galaxy.split('=')[0] + return galaxy + except IndexError: + return None + +def get_taxonomies(): + return Taxonomies().keys() + +def is_taxonomie(taxonomie, taxonomies=[]): + if not taxonomies: + taxonomies = get_taxonomies() + return taxonomie in taxonomies + +def get_active_taxonomies(r_set=False): + res = r_tags.smembers('active_taxonomies') + if r_set: + return set(res) + return res + +def get_active_galaxies(r_set=False): + res = r_tags.smembers('active_galaxies') + if r_set: + return set(res) + return res + +def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update + return r_tags.smembers('active_taxonomies_tags') + +def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update + return r_tags.smembers('active_galaxies_tags') + +def get_all_custom_tags(): # # TODO: add + REMOVE + Update + return r_tags.smembers('tags:custom') + +def get_taxonomies_enabled_tags(r_list=False): + l_tag_keys = [] + for taxonomie in get_active_taxonomies(): + l_tag_keys.append(f'active_tag_{taxonomie}') + if len(l_tag_keys) > 1: + res = r_tags.sunion(l_tag_keys[0], *l_tag_keys[1:]) + elif l_tag_keys: + res = r_tags.smembers(l_tag_keys[0]) + #### # WARNING: # TODO: DIRTY FIX, REPLACE WITH LOCAL TAGS #### + + + if r_list: + return list(res) + else: + return res + +def get_galaxies_enabled_tags(): + l_tag_keys = [] + for galaxy in get_active_galaxies(): + l_tag_keys.append(f'active_tag_galaxies_{galaxy}') + if len(l_tag_keys) > 1: + return r_tags.sunion(l_tag_keys[0], *l_tag_keys[1:]) + elif l_tag_keys: + return r_tags.smembers(l_tag_keys[0]) + else: + return [] + +def get_custom_enabled_tags(r_list=False): + res = r_tags.smembers('tags:custom:enabled_tags') + if r_list: + return list(res) + else: + return res + +def get_taxonomies_customs_tags(r_list=False): + tags = get_custom_enabled_tags().union(get_taxonomies_enabled_tags()) + if r_list: + tags = list(tags) + return tags + +def get_taxonomie_enabled_tags(taxonomie, r_list=False): + res = r_tags.smembers(f'active_tag_{taxonomie}') + if r_list: + return list(res) + else: + return res + +def get_galaxy_enabled_tags(galaxy, r_list=False): + res = r_tags.smembers(f'active_tag_galaxies_{galaxy}') + if r_list: + return list(res) + else: + return res + +def is_taxonomie_tag_enabled(taxonomie, tag): + if tag in r_tags.smembers('active_tag_' + taxonomie): + return True + else: + return False + +def is_galaxy_tag_enabled(galaxy, tag): + if tag in r_tags.smembers('active_tag_galaxies_' + galaxy): + return True + else: + return False + +def is_custom_tag_enabled(tag): + return r_tags.sismember('tags:custom:enabled_tags', tag) + +# Check if tags are enabled in AIL +def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy): + if list_tags: + active_taxonomies = get_active_taxonomies() + + for tag in list_tags: + taxonomie = get_taxonomie_from_tag(tag) + if taxonomie is None: + return False + if taxonomie not in active_taxonomies: + return False + if not is_taxonomie_tag_enabled(taxonomie, tag): + return False + + if list_tags_galaxy: + active_galaxies = get_active_galaxies() + + for tag in list_tags_galaxy: + galaxy = get_galaxy_from_tag(tag) + if galaxy is None: + return False + if galaxy not in active_galaxies: + return False + if not is_galaxy_tag_enabled(galaxy, tag): + return False + return True + +def is_taxonomie_tag(tag, namespace=None): + if not namespace: + namespace = tag.split(':')[0] + if namespace != 'misp-galaxy': + return is_taxonomie(namespace) + else: + return False + +def is_galaxy_tag(tag, namespace=None): + if not namespace: + namespace = tag.split(':')[0] + if namespace == 'misp-galaxy': + return True + else: + return False + +def is_custom_tag(tag): + return r_tags.sismember('tags:custom', tag) + +# # TODO: +# def is_valid_tag(tag): +# pass + +def is_enabled_tag(tag, enabled_namespace=None): + if is_taxonomie_tag(tag): + return is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_namespace) + else: + return is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_namespace) + +def are_enabled_tags(tags): + enabled_taxonomies = get_active_taxonomies(r_set=True) + enabled_galaxies = get_active_galaxies(r_set=True) + for tag in tags: + if is_taxonomie_tag(tag): + res = is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_taxonomies) + else: + res = is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_galaxies) + if not res: + return False + return True + +def is_enabled_taxonomie_tag(tag, enabled_taxonomies=None): + if not enabled_taxonomies: + enabled_taxonomies = get_active_taxonomies() + taxonomie = get_taxonomie_from_tag(tag) + if taxonomie is None: + return False + if taxonomie not in enabled_taxonomies: + return False + if not is_taxonomie_tag_enabled(taxonomie, tag): + return False + return True + +def is_enabled_galaxy_tag(tag, enabled_galaxies=None): + if not enabled_galaxies: + enabled_galaxies = get_active_galaxies() + galaxy = get_galaxy_from_tag(tag) + if galaxy is None: + return False + if galaxy not in enabled_galaxies: + return False + if not is_galaxy_tag_enabled(galaxy, tag): + return False + return True + +def sort_tags_taxonomies_galaxies(tags): + taxonomies_tags = [] + galaxies_tags = [] + for tag in tags: + if is_taxonomie_tag(tag): + taxonomies_tags.append(tag) + else: + galaxies_tags.append(tag) + return taxonomies_tags, galaxies_tags + +##-- Taxonomies - Galaxies --## + +def is_tag_in_all_tag(tag): + if r_tags.sismember('list_tags', tag): + return True + else: + return False + +def get_tag_synonyms(tag): + return r_tags.smembers(f'synonym_tag_{tag}') + +def get_tag_dislay_name(tag): + tag_synonyms = get_tag_synonyms(tag) + if not tag_synonyms: + return tag + else: + return tag + ', '.join(tag_synonyms) + +def get_tags_selector_dict(tags): + list_tags = [] + for tag in tags: + list_tags.append(get_tag_selector_dict(tag)) + return list_tags + +def get_tag_selector_dict(tag): + return {'name':get_tag_dislay_name(tag),'id':tag} + +def get_tags_selector_data(): + dict_selector = {} + dict_selector['active_taxonomies'] = get_active_taxonomies() + dict_selector['active_galaxies'] = get_active_galaxies() + return dict_selector + +def get_min_tag(tag): + tag = tag.split('=') + if len(tag) > 1: + if tag[1] != '': + tag = tag[1][1:-1] + # no value + else: + tag = tag[0][1:-1] + # custom tags + else: + tag = tag[0] + return tag + +# TODO: ADD object type +def get_obj_tags_minimal(item_id): ####? + return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_object_tags('item', item_id) ] + +def unpack_str_tags_list(str_tags_list): + str_tags_list = str_tags_list.replace('"','\"') + if str_tags_list: + return str_tags_list.split(',') + else: + return [] + +# used by modal +def get_modal_add_tags(item_id, object_type='item'): + ''' + Modal: add tags to domain or Paste + ''' + return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), + "object_id": item_id, "object_type": object_type} + +######## NEW VERSION ######## +def create_custom_tag(tag): + r_tags.sadd('tags:custom', tag) + r_tags.sadd('tags:custom:enabled_tags', tag) + +# # TODO: ADD color +def get_tag_metadata(tag, r_int=False): + ''' + Get tag metadata (current: item only) + ''' + tag_metadata = {"tag": tag} + tag_metadata['first_seen'] = get_tag_first_seen(tag, r_int=r_int) + tag_metadata['last_seen'] = get_tag_last_seen(tag, r_int=r_int) + return tag_metadata + +def get_tags_min_last_seen(l_tags, r_int=False): + ''' + Get max last seen from a list of tags (current: item only) + ''' + min_last_seen = 99999999 + for tag in l_tags: + last_seen = get_tag_last_seen(tag, r_int=True) + if last_seen < min_last_seen: + min_last_seen = last_seen + if r_int: + return min_last_seen + else: + return str(min_last_seen) + +def get_all_tags(): + return list(r_tags.smembers('list_tags')) + +def get_all_obj_tags(obj_type): + return list(r_tags.smembers(f'list_tags:{obj_type}')) + +## Objects tags ## + +################################################################################### +################################################################################### +################################################################################### +################################################################################### +################################################################################### +################################################################################### +################################################################################### +################################################################################### +################################################################################### + +def add_global_tag(tag, object_type=None): + ''' + Create a set of all tags used in AIL (all + by object) + + :param tag: tag + :type domain: str + :param object_type: object type + :type domain: str + ''' + r_tags.sadd('list_tags', tag) + if object_type: + r_tags.sadd('list_tags:{}'.format(object_type), tag) + +def add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]): + obj_date = get_obj_date(object_type, object_id) + for tag in tags: + if tag: + taxonomie = get_taxonomie_from_tag(tag) + if is_taxonomie_tag_enabled(taxonomie, tag): + add_object_tag(tag, object_type, object_id) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) + + for tag in galaxy_tags: + if tag: + galaxy = get_galaxy_from_tag(tag) + if is_galaxy_tag_enabled(galaxy, tag): + add_object_tag(tag, object_type, object_id) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) + +# TEMPLATE + API QUERY +def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"): + res_dict = {} + if object_id == None: + return ({'status': 'error', 'reason': 'object_id id not found'}, 404) + if not tags and not galaxy_tags: + return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) + if object_type not in ('item', 'domain', 'image', 'decoded'): # # TODO: put me in another file + return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) + + # remove empty tags + tags = list(filter(bool, tags)) + galaxy_tags = list(filter(bool, galaxy_tags)) + + res = add_obj_tags(object_id, object_type, tags=tags, galaxy_tags=galaxy_tags) + if res: + return res + + res_dict['tags'] = tags + galaxy_tags + res_dict['id'] = object_id + res_dict['type'] = object_type + return (res_dict, 200) + +# def add_tag(object_type, tag, object_id, obj_date=None): +# # new tag +# if not is_obj_tagged(object_id, tag): +# # # TODO: # FIXME: sanityze object_type +# if obj_date: +# try: +# obj_date = int(obj_date) +# except: +# obj_date = None +# if not obj_date: +# obj_date = get_obj_date(object_type, object_id) +# add_global_tag(tag, object_type=object_type) +# add_obj_tag(object_type, object_id, tag, obj_date=obj_date) +# update_tag_metadata(tag, obj_date, object_type=object_type) +# +# # create tags stats # # TODO: put me in cache +# r_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1) + +# def delete_obj_tag(object_type, object_id, tag, obj_date): +# if object_type=="item": # # TODO: # FIXME: # REVIEW: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +# obj_date = get_obj_date(object_type, object_id) +# r_serv_metadata.srem('tag:{}'.format(object_id), tag) +# r_tags.srem('{}:{}'.format(tag, obj_date), object_id) +# else: +# r_serv_metadata.srem('tag:{}'.format(object_id), tag) +# r_tags.srem('{}:{}'.format(object_type, tag), object_id) + +def delete_tag(object_type, tag, object_id, obj_date=None): ################################ # TODO: + # tag exist + if is_obj_tagged(object_id, tag): + if not obj_date: + obj_date = get_obj_date(object_type, object_id) + delete_obj_tag(object_type, object_id, tag, obj_date) + update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False) + update_tag_global_by_obj_type(object_type, tag) + + else: + return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400) + +# # TODO: move me +def get_obj_date(object_type, object_id): + if object_type == "item": + return int(item_basic.get_item_date(object_id)) + else: + return None + +# API QUERY +def api_delete_obj_tags(tags=[], object_id=None, object_type="item"): + if not object_id: + return ({'status': 'error', 'reason': 'object id not found'}, 404) + if not tags: + return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400) + + for tag in tags: + res = delete_object_tag(tag, object_type, object_id, subtype='') + if res: + return res + + dict_res = {} + dict_res['tags'] = tags + dict_res['id'] = object_id + return (dict_res, 200) + + +# def delete_obj_tags(object_id, object_type, tags): +# obj_date = get_obj_date(object_type, object_id) +# for tag in tags: +# res = delete_tag(object_type, tag, object_id, obj_date=obj_date) +# if res: +# return res +# +# def delete_obj_all_tags(obj_id, obj_type): +# delete_obj_tags(obj_id, obj_type, get_obj_tag(obj_id)) + +def sanitise_tags_date_range(l_tags, date_from=None, date_to=None): + if date_from is None or date_to is None: + date_from = get_tags_min_last_seen(l_tags, r_int=False) + date_to = date_from + return Date.sanitise_date_range(date_from, date_to) + + +#### TAGS EXPORT #### +# # TODO: +def is_updated_tags_to_export(): # by type + return False + +def get_list_of_solo_tags_to_export_by_type(export_type): # by type + if export_type in ['misp', 'thehive']: + return r_serv_db.smembers('whitelist_{}'.format(export_type)) + else: + return None + #r_serv_db.smembers('whitelist_hive') + +# if __name__ == '__main__': +# galaxy = 'infoleak' +# get_taxonomy_tags_from_cluster(galaxy) diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 94d72e83..5b2a60e1 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -16,11 +16,11 @@ from flask import escape sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Date -import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader import item_basic +import Tag config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py new file mode 100755 index 00000000..5556bcdf --- /dev/null +++ b/bin/lib/ail_core.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +config_loader = ConfigLoader() + +config_loader = None + +def get_ail_uuid(): + pass + +#### AIL OBJECTS #### + +# # TODO: check change paste => item +def get_all_objects(): + return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username'] + +def get_object_all_subtypes(obj_type): + if obj_type == 'cryptocurrency': + return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] + if obj_type == 'pgp': + return ['key', 'mail', 'name'] + if obj_type == 'username': + return ['telegram', 'twitter', 'jabber'] + +##-- AIL OBJECTS --## diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index cf2894e9..4e62b328 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -506,11 +506,11 @@ def reset_all_spash_crawler_status(): r_cache.delete('all_splash_crawlers') def get_splash_crawler_status(spash_url): - crawler_type = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'type') - crawling_domain = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'crawling_domain') - started_time = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'started_time') - status_info = r_cache.hget('metadata_crawler:{}'.format(spash_url), 'status') - crawler_info = '{} - {}'.format(spash_url, started_time) + crawler_type = r_cache.hget(f'metadata_crawler:{spash_url}', 'type') + crawling_domain = r_cache.hget(f'metadata_crawler:{spash_url}', 'crawling_domain') + started_time = r_cache.hget(f'metadata_crawler:{spash_url}', 'started_time') + status_info = r_cache.hget(f'metadata_crawler:{spash_url}', 'status') + crawler_info = f'{spash_url} - {started_time}' if status_info=='Waiting' or status_info=='Crawling': status=True else: @@ -520,13 +520,13 @@ def get_splash_crawler_status(spash_url): def set_current_crawler_status(splash_url, status, started_time=False, crawled_domain=None, crawler_type=None): # TODO: get crawler type if None # Status: ['Waiting', 'Error', ...] - r_cache.hset('metadata_crawler:{}'.format(splash_url), 'status', status) + r_cache.hset(f'metadata_crawler:{splash_url}', 'status', status) if started_time: - r_cache.hset('metadata_crawler:{}'.format(splash_url), 'started_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) + r_cache.hset(f'metadata_crawler:{splash_url}', 'started_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) if crawler_type: - r_cache.hset('metadata_crawler:{}'.format(splash_url), 'type', crawler_type) + r_cache.hset(f'metadata_crawler:{splash_url}', 'type', crawler_type) if crawled_domain: - r_cache.hset('metadata_crawler:{}'.format(splash_url), 'crawling_domain', crawled_domain) + r_cache.hset(f'metadata_crawler:{splash_url}', 'crawling_domain', crawled_domain) #r_cache.sadd('all_splash_crawlers', splash_url) # # TODO: add me in fct: create_ail_crawler diff --git a/bin/lib/data_retention_engine.py b/bin/lib/data_retention_engine.py new file mode 100755 index 00000000..55624dd4 --- /dev/null +++ b/bin/lib/data_retention_engine.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import ConfigLoader + +config_loader = ConfigLoader.ConfigLoader() +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") +config_loader = None + +def get_first_object_date(object_type, subtype, field=''): + first_date = r_serv_db.zscore('objs:first_date', f'{object_type}:{subtype}:{field}') + if not first_date: + first_date = 99999999 + return int(first_date) + +def get_last_object_date(object_type, subtype, field=''): + last_date = r_serv_db.zscore('objs:last_date', f'{object_type}:{subtype}:{field}') + if not last_date: + last_date = 0 + return int(last_date) + +def _set_first_object_date(object_type, subtype, date, field=''): + return r_serv_db.zadd('objs:first_date', f'{object_type}:{subtype}:{field}', date) + +def _set_last_object_date(object_type, subtype, date, field=''): + return r_serv_db.zadd('objs:last_date', f'{object_type}:{subtype}:{field}', date) + +def update_first_object_date(object_type, subtype, date, field=''): + first_date = get_first_object_date(object_type, subtype, field=field) + if int(date) < first_date: + _set_first_object_date(object_typel, subtype, date, field=field) + return date + else: + return first_date + +def update_last_object_date(object_type, subtype, date, field=''): + last_date = get_last_object_date(object_type, subtype, field=field) + if int(date) > last_date: + _set_last_object_date(object_type, subtype, date, field=field) + return date + else: + return last_date + +def update_object_date(object_type, subtype, date, field=''): + update_first_object_date(object_type, subtype, date, field=field) + update_last_object_date(object_type, subtype, date, field=field) + + +############################################################### diff --git a/bin/lib/index_whoosh.py b/bin/lib/index_whoosh.py index b5657697..bd18a172 100755 --- a/bin/lib/index_whoosh.py +++ b/bin/lib/index_whoosh.py @@ -61,9 +61,9 @@ def delete_index_by_name(index_name): index_path = os.path.realpath(index_path) # incorrect filename if not os.path.commonprefix([index_path, INDEX_PATH]) == INDEX_PATH: - raise Exception('Path traversal detected {}'.format(index_path)) + raise Exception(f'Path traversal detected {index_path}') if not os.path.isdir(index_path): - print('Error: The index directory {} doesn\'t exist'.format(index_path)) + print('Error: The index directory {index_path} doesn\'t exist') return None res = rmtree(index_path) _remove_index_name_from_all_index(index_name) @@ -85,7 +85,7 @@ def delete_older_index_by_time(int_time): if int(all_index[-1]) > int_time: # make sure to keep one files for index_name in all_index: if int(index_name) < int_time: - print('deleting index {} ...'.format(index_name)) + print(f'deleting index {index_name} ...') delete_index_by_name(index_name) # keep x most recent index @@ -94,7 +94,7 @@ def delete_older_index(number_of_index_to_keep): all_index = get_all_index() if len(get_all_index()) > number_of_index_to_keep: for index_name in all_index[0:-number_of_index_to_keep]: - print('deleting index {} ...'.format(index_name)) + print(f'deleting index {index_name} ...') delete_index_by_name(index_name) ##-- DATA RETENTION --## diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index d4e9854b..b9120f6d 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -7,11 +7,9 @@ import gzip import magic -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Tag - sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader +import Tag config_loader = ConfigLoader.ConfigLoader() # get and sanityze PASTE DIRECTORY @@ -247,7 +245,7 @@ def verify_sources_list(sources): def get_all_items_metadata_dict(list_id): list_meta = [] for item_id in list_id: - list_meta.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} ) + list_meta.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} ) return list_meta ##-- --## diff --git a/bin/lib/objects/CryptoCurrencies.py b/bin/lib/objects/CryptoCurrencies.py index 5ac30f2d..3983612e 100755 --- a/bin/lib/objects/CryptoCurrencies.py +++ b/bin/lib/objects/CryptoCurrencies.py @@ -97,6 +97,7 @@ class CryptoCurrency(AbstractSubtypeObject): ############################################################################ def get_all_subtypes(): + #return ail_core.get_object_all_subtypes(self.type) return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] # def build_crypto_regex(subtype, search_id): diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 9ea50c23..60536561 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -12,6 +12,7 @@ from lib.ConfigLoader import ConfigLoader from lib.objects.abstract_object import AbstractObject from lib.item_basic import get_item_children, get_item_date, get_item_url +from lib import data_retention_engine config_loader = ConfigLoader() r_onion = config_loader.get_redis_conn("ARDB_Onion") @@ -48,7 +49,6 @@ class Domain(AbstractObject): if first_seen: if separator: first_seen = f'{first_seen[0:4]}/{first_seen[4:6]}/{first_seen[6:8]}' - first_seen = int(first_seen) elif r_int==True: first_seen = int(first_seen) return first_seen @@ -92,10 +92,17 @@ class Domain(AbstractObject): res = r_onion.zrevrange(f'crawler_history_{self.domain_type}:{self.id}:{port}', 0, 0, withscores=True) if res: item_core, epoch = res[0] - if item_core != str(epoch): + try: + epoch = int(item_core) + except: + print('True') return True + print('False') return False + def was_up(self): + return r_onion.hexists(f'{self.domain_type}_metadata:{self.id}', 'ports') + def get_ports(self, r_set=False): l_ports = r_onion.hget(f'{self.domain_type}_metadata:{self.id}', 'ports') if l_ports: @@ -144,18 +151,26 @@ class Domain(AbstractObject): def get_languages(self): return r_onion.smembers(f'domain:language:{self.id}') - def get_meta(self): + def get_meta_keys(self): + return ['type', 'first_seen', 'last_check', 'last_origin', 'ports', 'status', 'tags', 'languages'] + + # options: set of optional meta fields + def get_meta(self, options=set()): meta = {} meta['type'] = self.domain_type meta['first_seen'] = self.get_first_seen() meta['last_check'] = self.get_last_check() - meta['last_origin'] = self.last_origin() + meta['tags'] = self.get_tags() meta['ports'] = self.get_ports() - meta['status'] = self.is_up(ports=ports) - meta['tags'] = self.get_last_origin() - #meta['is_tags_safe'] = - meta['languages'] = self.get_languages() + meta['status'] = self.is_up(ports=meta['ports']) + + if 'last_origin' in options: + meta['last_origin'] = self.get_last_origin() + #meta['is_tags_safe'] = ################################## + if 'languages' in options: + meta['languages'] = self.get_languages() #meta['screenshot'] = + return meta # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ @@ -272,21 +287,32 @@ class Domain(AbstractObject): r_onion.zadd(f'crawler_history_{self.domain_type}:{self.id}:{port}', epoch, int(root_item)) # if domain down -> root_item = epoch - def add_history(self, epoch, port, root_item=None): - date = time.strftime('%Y%m%d', time.gmtime(epoch)) + def add_history(self, epoch, port, root_item=None, date=None): + if not date: + date = time.strftime('%Y%m%d', time.gmtime(epoch)) try: int(root_item) except ValueError: root_item = None + + data_retention_engine.update_object_date('domain', self.domain_type, date) + update_first_object_date(date, self.domain_type) + update_last_object_date(date, self.domain_type) # UP if root_item: + r_onion.srem(f'full_{self.domain_type}_down', self.id) r_onion.sadd(f'full_{self.domain_type}_up', self.id) r_onion.sadd(f'{self.domain_type}_up:{date}', self.id) # # TODO: -> store first day r_onion.sadd(f'month_{self.domain_type}_up:{date[0:6]}', self.id) # # TODO: -> store first month self._add_history_root_item(root_item, epoch, port) else: - r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month - self._add_history_root_item(epoch, epoch, port) + if port: + r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) # # TODO: -> store first month + self._add_history_root_item(epoch, epoch, port) + else: + r_onion.sadd(f'{self.domain_type}_down:{date}', self.id) + if not self.was_up(): + r_onion.sadd(f'full_{self.domain_type}_down', self.id) def add_crawled_item(self, url, port, item_id, item_father): r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father) @@ -310,6 +336,44 @@ def get_all_domains_languages(): def get_domains_up_by_type(domain_type): return r_onion.smembers(f'full_{domain_type}_up') +def get_domains_down_by_type(domain_type): + return r_onion.smembers(f'full_{domain_type}_down') + +def get_first_object_date(subtype, field=''): + first_date = r_onion.zscore('objs:first_date', f'domain:{subtype}:{field}') + if not first_date: + first_date = 99999999 + return int(first_date) + +def get_last_object_date(subtype, field=''): + last_date = r_onion.zscore('objs:last_date', f'domain:{subtype}:{field}') + if not last_date: + last_date = 0 + return int(last_date) + +def _set_first_object_date(date, subtype, field=''): + return r_onion.zadd('objs:first_date', f'domain:{subtype}:{field}', date) + +def _set_last_object_date(date, subtype, field=''): + return r_onion.zadd('objs:last_date', f'domain:{subtype}:{field}', date) + +def update_first_object_date(date, subtype, field=''): + first_date = get_first_object_date(subtype, field=field) + if int(date) < first_date: + _set_first_object_date(date, subtype, field=field) + return date + else: + return first_date + +def update_last_object_date(date, subtype, field=''): + last_date = get_last_object_date(subtype, field=field) + if int(date) > last_date: + _set_last_object_date(date, subtype, field=field) + return date + else: + return last_date + + ################################################################################ ################################################################################ diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py index 37f58dff..69b7a266 100755 --- a/bin/lib/objects/Items.py +++ b/bin/lib/objects/Items.py @@ -22,8 +22,7 @@ from export.Export import get_ail_uuid # # TODO: REPLACE from lib.objects.abstract_object import AbstractObject from lib.ConfigLoader import ConfigLoader from lib import item_basic - -from packages import Tag +from lib import Tag from flask import url_for @@ -493,7 +492,7 @@ def get_item(request_dict): dict_item['date'] = get_item_date(item_id, add_separator=add_separator) tags = request_dict.get('tags', True) if tags: - dict_item['tags'] = Tag.get_obj_tag(item_id) + dict_item['tags'] = Tag.get_object_tags('item', item_id) size = request_dict.get('size', False) if size: @@ -568,7 +567,7 @@ def api_get_items_sources(): def get_item_list_desc(list_item_id): desc_list = [] for item_id in list_item_id: - desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} ) + desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} ) return desc_list def is_crawled(item_id): @@ -579,7 +578,7 @@ def get_crawler_matadata(item_id, tags=None): if is_crawled(item_id): dict_crawler['domain'] = get_item_domain(item_id) if not ltags: - ltags = Tag.get_obj_tag(item_id) + ltags = Tag.get_object_tags('item', item_id) dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags) dict_crawler['url'] = get_item_link(item_id) dict_crawler['screenshot'] = get_item_screenshot(item_id) diff --git a/bin/lib/objects/Pgps.py b/bin/lib/objects/Pgps.py index 4ae22642..8197a020 100755 --- a/bin/lib/objects/Pgps.py +++ b/bin/lib/objects/Pgps.py @@ -80,6 +80,7 @@ class Pgp(AbstractSubtypeObject): ############################################################################ def get_all_subtypes(): + #return get_object_all_subtypes(self.type) return ['key', 'mail', 'name'] def get_all_pgps(): diff --git a/bin/lib/objects/Screenshots.py b/bin/lib/objects/Screenshots.py index aaea7c60..2bd1c39a 100755 --- a/bin/lib/objects/Screenshots.py +++ b/bin/lib/objects/Screenshots.py @@ -8,6 +8,7 @@ from io import BytesIO from flask import url_for sys.path.append(os.environ['AIL_BIN']) +#from lib import Tag from lib.ConfigLoader import ConfigLoader from lib.objects.abstract_object import AbstractObject diff --git a/bin/lib/objects/Usernames.py b/bin/lib/objects/Usernames.py index cfcdbf66..3eef0c9f 100755 --- a/bin/lib/objects/Usernames.py +++ b/bin/lib/objects/Usernames.py @@ -86,6 +86,7 @@ class Username(AbstractSubtypeObject): ############################################################################ def get_all_subtypes(): + #return ail_core.get_object_all_subtypes(self.type) return ['telegram', 'twitter', 'jabber'] def get_all_usernames(): diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index e295fa62..46c141f1 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from packages import Tag +from lib import Tag from lib import Duplicate from lib.correlations_engine import get_correlations, add_obj_correlation, delete_obj_correlation, exists_obj_correlation, is_obj_correlated from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations @@ -66,7 +66,7 @@ class AbstractObject(ABC): ## Tags ## def get_tags(self, r_set=False): - tags = Tag.get_obj_tag(self.id) + tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True)) if r_set: tags = set(tags) return tags @@ -75,7 +75,8 @@ class AbstractObject(ABC): return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id) ## ADD TAGS ???? - #def add_tags(self): + def add_tag(self, tag): + Tag.add_object_tag(tag, self.type, self.id, subtype=self.get_subtype(r_str=True)) #- Tags -# @@ -120,7 +121,7 @@ class AbstractObject(ABC): def _delete(self): # DELETE TAGS - Tag.delete_obj_all_tags(self.id, self.type) + Tag.delete_obj_all_tags(self.id, self.type) ############ # TODO: # TODO: # FIXME: # remove from tracker self.delete_trackers() # remove from investigations @@ -135,12 +136,12 @@ class AbstractObject(ABC): """ pass - # @abstractmethod - # def get_meta(self): - # """ - # get Object metadata - # """ - # pass + @abstractmethod + def get_meta(self): + """ + get Object metadata + """ + pass @abstractmethod def get_link(self, flask_context=False): diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index c6312603..e04d8ac2 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -9,40 +9,33 @@ import redis from abc import ABC from flask import url_for +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +from lib.ConfigLoader import ConfigLoader +from lib.ail_core import get_all_objects +from lib import correlations_engine -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +from lib.objects.CryptoCurrencies import CryptoCurrency +from lib.objects.Decodeds import Decoded +from lib.objects.Domains import Domain +from lib.objects.Items import Item +from lib.objects.Pgps import Pgp +from lib.objects.Screenshots import Screenshot +from lib.objects.Usernames import Username -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/objects')) -from CryptoCurrencies import CryptoCurrency -from Decodeds import Decoded -from Domains import Domain -from Items import Item -from Pgps import Pgp -from Screenshots import Screenshot -from Usernames import Username -################################################################## -################################################################## -#sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) - -#sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -################################################################## -################################################################## - -config_loader = ConfigLoader.ConfigLoader() +config_loader = ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None -class AILObjects(object): +class AILObjects(object): ## ?????????????????????? initial = 0 ongoing = 1 completed = 2 - -# # TODO: check change paste => item -def get_all_objects(): - return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'screenshot', 'username'] +def is_valid_object_type(obj_type): + return obj_type in get_all_objects() def get_object(obj_type, subtype, id): if obj_type == 'item': @@ -60,21 +53,40 @@ def get_object(obj_type, subtype, id): elif obj_type == 'username': return Username(id, subtype) +def get_object_link(obj_type, subtype, id, flask_context=False): + object = get_object(obj_type, subtype, id) + return object.get_link(flask_context=flask_context) + def get_object_svg(obj_type, subtype, id): object = get_object(obj_type, subtype, id) return object.get_svg_icon() -def get_objects_meta(l_dict_objs, icon=False, url=False, flask_context=False): - l_meta = [] - for dict_obj in l_dict_objs: - object = get_object(dict_obj['type'], dict_obj['subtype'], dict_obj['id']) - dict_meta = object.get_default_meta(tags=True) - if icon: - dict_meta['icon'] = object.get_svg_icon() - if url: - dict_meta['link'] = object.get_link(flask_context=flask_context) - l_meta.append(dict_meta) - return l_meta +def get_object_meta(obj_type, subtype, id, flask_context=False): + object = get_object(obj_type, subtype, id) + meta = object.get_meta() + meta['icon'] = object.get_svg_icon() + meta['link'] = object.get_link(flask_context=flask_context) + return meta + +def get_ui_obj_tag_table_keys(obj_type): + ''' + Warning: use only in flask (dynamic templates) + ''' + if obj_type=="domain": + return ['id', 'first_seen', 'last_check', 'status'] # # TODO: add root screenshot + +# # TODO: # FIXME: +# def get_objects_meta(l_dict_objs, icon=False, url=False, flask_context=False): +# l_meta = [] +# for dict_obj in l_dict_objs: +# object = get_object(dict_obj['type'], dict_obj['subtype'], dict_obj['id']) +# dict_meta = object.get_default_meta(tags=True) +# if icon: +# dict_meta['icon'] = object.get_svg_icon() +# if url: +# dict_meta['link'] = object.get_link(flask_context=flask_context) +# l_meta.append(dict_meta) +# return l_meta # # TODO: CHECK IF object already have an UUID def get_misp_object(obj_type, subtype, id): @@ -126,7 +138,21 @@ def get_objects_relationship(obj_1, obj2): return relationship +def api_sanitize_object_type(obj_type): + if not is_valid_object_type(obj_type): + return ({'status': 'error', 'reason': 'Incorrect object type'}, 400) +################################################################################ +# DATA RETENTION +# # TODO: TO ADD ?????????????????????? +# def get_first_objects_date(): +# return r_object.zrange('objs:first_date', 0, -1) +# +# def get_first_object_date(obj_type, subtype): +# return r_object.zscore('objs:first_date', f'{obj_type}:{subtype}') +# +# def set_first_object_date(obj_type, subtype, date): +# return r_object.zadd('objs:first_date', f'{obj_type}:{subtype}', date) ################################################################################ @@ -142,3 +168,41 @@ def delete_obj(obj_type, subtype, id): ################################################################################ ################################################################################ ################################################################################ + +def create_correlation_graph_links(links_set): + links = [] + for link in links_set: + links.append({"source": link[0], "target": link[1]}) + return links + +def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True): + graph_nodes_list = [] + for node_id in nodes_set: + obj_type, subtype, obj_id = node_id.split(';', 2) + dict_node = {"id": node_id} + dict_node['style'] = get_object_svg(obj_type, subtype, obj_id) + + # # TODO: # FIXME: in UI + dict_node['style']['icon_class'] = dict_node['style']['style'] + dict_node['style']['icon_text'] = dict_node['style']['icon'] + dict_node['style']['node_color'] = dict_node['style']['color'] + dict_node['style']['node_radius'] = dict_node['style']['radius'] + # # TODO: # FIXME: in UI + + dict_node['style'] + dict_node['text'] = obj_id + if node_id == obj_str_id: + dict_node["style"]["node_color"] = 'orange' + dict_node["style"]["node_radius"] = 7 + dict_node['url'] = get_object_link(obj_type, subtype, obj_id, flask_context=flask_context) + graph_nodes_list.append(dict_node) + return graph_nodes_list + +def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False): + obj_str_id, nodes, links = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, flask_context=flask_context) + return {"nodes": create_correlation_graph_nodes(nodes, obj_str_id, flask_context=flask_context), "links": create_correlation_graph_links(links)} + + + + +############### diff --git a/bin/lib/simple_correlation.py b/bin/lib/simple_correlation.py deleted file mode 100755 index 254b19eb..00000000 --- a/bin/lib/simple_correlation.py +++ /dev/null @@ -1,282 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date -import Item -#import Tag - -config_loader = ConfigLoader.ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -config_loader = None - -def get_all_correlation_objects(): - ''' - Return a list of all correllated objects - ''' - return ['domain', 'paste'] - -class SimpleCorrelation(object): #social_name - - def __init__(self, correlation_name): - self.correlation_name = correlation_name - - def exist_correlation(self, obj_id): - res = r_serv_metadata.zscore('s_correl:{}:all'.format(self.correlation_name), obj_id) - if res is not None: - return True - else: - return False - - def _get_items(self, obj_id): - res = r_serv_metadata.smembers('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id)) - if res: - return list(res) - else: - return [] - - def get_correlation_first_seen(self, obj_id, r_int=False): - res = r_serv_metadata.hget('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen') - if r_int: - if res: - return int(res) - else: - return 99999999 - else: - return res - - def get_correlation_last_seen(self, obj_id, r_int=False): - res = r_serv_metadata.hget('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen') - if r_int: - if res: - return int(res) - else: - return 0 - else: - return res - - def _get_metadata(self, obj_id): - meta_dict = {} - meta_dict['first_seen'] = self.get_correlation_first_seen(obj_id) - meta_dict['last_seen'] = self.get_correlation_last_seen(obj_id) - meta_dict['nb_seen'] = r_serv_metadata.scard('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id)) - return meta_dict - - def get_metadata(self, correlation_type, field_name, date_format='str_date'): - meta_dict = self._get_metadata(obj_id) - if date_format == "str_date": - if meta_dict['first_seen']: - meta_dict['first_seen'] = '{}/{}/{}'.format(meta_dict['first_seen'][0:4], meta_dict['first_seen'][4:6], meta_dict['first_seen'][6:8]) - if meta_dict['last_seen']: - meta_dict['last_seen'] = '{}/{}/{}'.format(meta_dict['last_seen'][0:4], meta_dict['last_seen'][4:6], meta_dict['last_seen'][6:8]) - return meta_dict - - def get_nb_object_seen_by_date(self, obj_id, date_day): - nb = r_serv_metadata.zscore('s_correl:date:{}:{}'.format(self.correlation_name, date_day), obj_id) - if nb is None: - return 0 - else: - return int(nb) - - def get_list_nb_previous_correlation_object(self, obj_id, numDay): - nb_previous_correlation = [] - for date_day in Date.get_previous_date_list(numDay): - nb_previous_correlation.append(self.get_nb_object_seen_by_date(obj_id, date_day)) - return nb_previous_correlation - - def _get_correlation_by_date(self, date_day): - return r_serv_metadata.zrange('s_correl:date:{}:{}'.format(self.correlation_name, date_day), 0, -1) - - # def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'): - # if not request_dict: - # return ({'status': 'error', 'reason': 'Malformed JSON'}, 400) - # - # field_name = request_dict.get(correlation_type, None) - # if not field_name: - # return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) - # if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type): - # return ( {'status': 'error', 'reason': 'Item not found'}, 404 ) - - def get_correlation(self, request_dict, obj_id): - dict_resp = {} - - if request_dict.get('items'): - dict_resp['items'] = self._get_items(obj_id) - - if request_dict.get('metadata'): - dict_resp['metadata'] = self._get_metadata(obj_id) - return (dict_resp, 200) - - def _get_domain_correlation_obj(self, domain): - ''' - Return correlation of a given domain. - - :param domain: crawled domain - :type domain: str - :param correlation_type: correlation type - :type correlation_type: str - - :return: a list of correlation - :rtype: list - ''' - res = r_serv_metadata.smembers('domain:s_correl:{}:{}'.format(self.correlation_name, domain)) - if res: - return list(res) - else: - return [] - - def _get_correlation_obj_domain(self, correlation_id): - ''' - Return all domains that contain this correlation. - - :param domain: field name - :type domain: str - :param correlation_type: correlation type - :type correlation_type: str - - :return: a list of correlation - :rtype: list - ''' - res = r_serv_metadata.smembers('s_correl:set_domain_{}:{}'.format(self.correlation_name, correlation_id)) - if res: - return list(res) - else: - return [] - - def _get_item_correlation_obj(self, item_id): - ''' - Return correlation of a given item id. - - :param item_id: item id - :type item_id: str - - :return: a list of correlation - :rtype: list - ''' - res = r_serv_metadata.smembers('item:s_correl:{}:{}'.format(self.correlation_name, item_id)) - if res: - return list(res) - else: - return [] - - def get_correlation_all_object(self, correlation_value, correlation_objects=[]): - if not correlation_objects: - correlation_objects = get_all_correlation_objects() - correlation_obj = {} - for correlation_object in correlation_objects: - if correlation_object == 'paste': - res = self._get_items(correlation_value) - elif correlation_object == 'domain': - res = self.get_correlation_obj_domain(correlation_value) - else: - res = None - if res: - correlation_obj[correlation_object] = res - return correlation_obj - - def update_correlation_daterange(self, obj_id, date): - date = int(date) - # obj_id don't exit - if not r_serv_metadata.exists('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id)): - r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen', date) - r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen', date) - else: - first_seen = self.get_correlation_last_seen(obj_id, r_int=True) - last_seen = self.get_correlation_first_seen(obj_id, r_int=True) - if date < first_seen: - r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'first_seen', date) - if date > last_seen: - r_serv_metadata.hset('s_correl:{}:metadata:{}'.format(self.correlation_name, obj_id), 'last_seen', date) - - def save_item_correlation(self, obj_id, item_id, item_date): - self.update_correlation_daterange(obj_id, item_date) - # global set - r_serv_metadata.sadd('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id), item_id) - - # daily - r_serv_metadata.zincrby('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id, 1) - - # all correlation - r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, 1) - - # item - r_serv_metadata.sadd('item:s_correl:{}:{}'.format(self.correlation_name, item_id), obj_id) - - # domain - if Item.is_crawled(item_id): - domain = Item.get_item_domain(item_id) - self.save_domain_correlation(domain, subtype, obj_id) - - def delete_item_correlation(self, subtype, obj_id, item_id, item_date): - #self.update_correlation_daterange ! # # TODO: - r_serv_metadata.srem('s_correl:set_item_{}:{}'.format(self.correlation_name, obj_id), item_id) - r_serv_metadata.srem('item:s_correl:{}:{}'.format(self.correlation_name, item_id), obj_id) - - res = r_serv_metadata.zincrby('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id, -1) - if int(res) < 0: # remove last - r_serv_metadata.zrem('s_correl:date:{}:{}'.format(self.correlation_name, item_date), obj_id) - - res = r_serv_metadata.zscore('s_correl:{}:all'.format(self.correlation_name), obj_id) - if int(res) > 0: - r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, -1) - - def save_domain_correlation(self, domain, obj_id): - r_serv_metadata.sadd('domain:s_correl:{}:{}'.format(self.correlation_name, domain), obj_id) - r_serv_metadata.sadd('s_correl:set_domain_{}:{}'.format(self.correlation_name, obj_id), domain) - - def delete_domain_correlation(self, domain, obj_id): - r_serv_metadata.srem('domain:s_correl:{}:{}'.format(self.correlation_name, domain), obj_id) - r_serv_metadata.srem('s_correl:set_domain_{}:{}'.format(self.correlation_name, obj_id), domain) - - ###### - - def save_correlation(self, obj_id, date_range): - r_serv_metadata.zincrby('s_correl:{}:all'.format(self.correlation_name), obj_id, 0) - self.update_correlation_daterange(obj_id, date_range['date_from']) - if date_range['date_from'] != date_range['date_to']: - self.update_correlation_daterange(obj_id, date_range['date_to']) - return True - - def save_obj_relationship(self, obj_id, obj2_type, obj2_id): - if obj2_type == 'domain': - self.save_domain_correlation(obj2_id, obj_id) - elif obj2_type == 'item': - self.save_item_correlation(obj_id, obj2_id, Item.get_item_date(obj2_id)) - - def delete_obj_relationship(self, obj_id, obj2_type, obj2_id): - if obj2_type == 'domain': - self.delete_domain_correlation(obj2_id, obj_id) - elif obj2_type == 'item': - self.delete_item_correlation(obj_id, obj2_id, Item.get_item_date(obj2_id)) - - # def create_correlation(self, subtype, obj_id, obj_meta): - # res = self.sanythise_correlation_types([subtype], r_boolean=True) - # if not res: - # print('invalid subtype') - # return False - # first_seen = obj_meta.get('first_seen', None) - # last_seen = obj_meta.get('last_seen', None) - # date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') - # res = self.save_correlation(subtype, obj_id, date_range) - # if res and 'tags' in obj_meta: - # # # TODO: handle mixed tags: taxonomies and Galaxies - # pass - # #Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type()) - # return True - # - # # # TODO: handle tags - # def delete_correlation(self, obj_id): - # pass - - -######## API EXPOSED ######## - - -######## ######## diff --git a/bin/modules/Tags.py b/bin/modules/Tags.py index 9bfc080c..9a14b14d 100755 --- a/bin/modules/Tags.py +++ b/bin/modules/Tags.py @@ -21,7 +21,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from modules.abstract_module import AbstractModule from packages.Item import Item -from packages import Tag +from lib import Tag class Tags(AbstractModule): @@ -47,7 +47,7 @@ class Tags(AbstractModule): item = Item(mess_split[1]) # Create a new tag - Tag.add_tag('item', tag, item.get_id()) + Tag.add_object_tag(tag, 'item', item.get_id()) print(f'{item.get_id()}: Tagged {tag}') # Forward message to channel diff --git a/bin/modules/submit_paste.py b/bin/modules/submit_paste.py index ddd76f4a..7777b301 100755 --- a/bin/modules/submit_paste.py +++ b/bin/modules/submit_paste.py @@ -28,8 +28,8 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from modules.abstract_module import AbstractModule -from packages import Tag from lib import ConfigLoader +from lib import Tag class SubmitPaste(AbstractModule): @@ -298,10 +298,10 @@ class SubmitPaste(AbstractModule): # add tags for tag in ltags: - Tag.add_tag('item', tag, rel_item_path) + Tag.add_object_tag(tag, 'item', rel_item_path) for tag in ltagsgalaxies: - Tag.add_tag('item', tag, rel_item_path) + Tag.add_object_tag(tag, 'item', rel_item_path) self.r_serv_log_submit.incr(f'{uuid}:nb_end') self.r_serv_log_submit.incr(f'{uuid}:nb_sucess') diff --git a/bin/packages/Date.py b/bin/packages/Date.py index 33047e92..ba234532 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -116,6 +116,8 @@ def get_nb_days_by_daterange(date_from, date_to): delta = date_to - date_from # timedelta return len(range(delta.days + 1)) +def get_date_range_today(date_from): + return substract_date(date_from, get_today_date_str()) def substract_date(date_from, date_to): date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py deleted file mode 100755 index aef242e7..00000000 --- a/bin/packages/Tag.py +++ /dev/null @@ -1,785 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis -import datetime - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Date - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader -import item_basic - -from pytaxonomies import Taxonomies -from pymispgalaxies import Galaxies, Clusters - -config_loader = ConfigLoader.ConfigLoader() -r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") -#r_serv_tags = config_loader.get_db_conn("Kvrocks_Tags") -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -config_loader = None - -def build_unsafe_tags(): - unsafe_tags = set() - ## CE content - unsafe_tags.add('dark-web:topic="pornography-child-exploitation"') - # add copine-scale tags - taxonomies = Taxonomies() - copine_scale = taxonomies.get('copine-scale') - if copine_scale: - for tag in copine_scale.machinetags(): - unsafe_tags.add(tag) - return unsafe_tags - -# set of unsafe tags -unsafe_tags = build_unsafe_tags() - -def is_tags_safe(ltags): - ''' - Check if a list of tags contain an unsafe tag (CE, ...) - - :param ltags: list of tags - :type ltags: list - :return: is a tag in the unsafe set - :rtype: boolean - ''' - return unsafe_tags.isdisjoint(ltags) - -#### Taxonomies - Galaxies #### - -def get_taxonomie_from_tag(tag): - try: - return tag.split(':')[0] - except IndexError: - return None - -def get_galaxy_from_tag(tag): - try: - galaxy = tag.split(':')[1] - galaxy = galaxy.split('=')[0] - return galaxy - except IndexError: - return None - -def get_taxonomies(): - return Taxonomies().keys() - -def is_taxonomie(taxonomie, taxonomies=[]): - if not taxonomies: - taxonomies = get_taxonomies() - return taxonomie in taxonomies - -def get_active_taxonomies(r_set=False): - res = r_serv_tags.smembers('active_taxonomies') - if r_set: - return set(res) - return res - -def get_active_galaxies(r_set=False): - res = r_serv_tags.smembers('active_galaxies') - if r_set: - return set(res) - return res - -def get_all_taxonomies_tags(): # # TODO: add + REMOVE + Update - return r_serv_tags.smembers('active_taxonomies_tags') - -def get_all_galaxies_tags(): # # TODO: add + REMOVE + Update - return r_serv_tags.smembers('active_galaxies_tags') - -def get_all_custom_tags(): # # TODO: add + REMOVE + Update - return r_serv_tags.smembers('tags:custom') - -def get_taxonomies_enabled_tags(r_list=False): - l_tag_keys = [] - for taxonomie in get_active_taxonomies(): - l_tag_keys.append(f'active_tag_{taxonomie}') - if len(l_tag_keys) > 1: - res = r_serv_tags.sunion(l_tag_keys[0], *l_tag_keys[1:]) - elif l_tag_keys: - res = r_serv_tags.smembers(l_tag_keys[0]) - #### # WARNING: # TODO: DIRTY FIX, REPLACE WITH LOCAL TAGS #### - - - if r_list: - return list(res) - else: - return res - -def get_galaxies_enabled_tags(): - l_tag_keys = [] - for galaxy in get_active_galaxies(): - l_tag_keys.append(f'active_tag_galaxies_{galaxy}') - if len(l_tag_keys) > 1: - return r_serv_tags.sunion(l_tag_keys[0], *l_tag_keys[1:]) - elif l_tag_keys: - return r_serv_tags.smembers(l_tag_keys[0]) - else: - return [] - -def get_custom_enabled_tags(r_list=False): - res = r_serv_tags.smembers('tags:custom:enabled_tags') - if r_list: - return list(res) - else: - return res - -def get_taxonomies_customs_tags(r_list=False): - tags = get_custom_enabled_tags().union(get_taxonomies_enabled_tags()) - if r_list: - tags = list(tags) - return tags - -def get_taxonomie_enabled_tags(taxonomie, r_list=False): - res = r_serv_tags.smembers(f'active_tag_{taxonomie}') - if r_list: - return list(res) - else: - return res - -def get_galaxy_enabled_tags(galaxy, r_list=False): - res = r_serv_tags.smembers(f'active_tag_galaxies_{galaxy}') - if r_list: - return list(res) - else: - return res - -def is_taxonomie_tag_enabled(taxonomie, tag): - if tag in r_serv_tags.smembers('active_tag_' + taxonomie): - return True - else: - return False - -def is_galaxy_tag_enabled(galaxy, tag): - if tag in r_serv_tags.smembers('active_tag_galaxies_' + galaxy): - return True - else: - return False - -def is_custom_tag_enabled(tag): - return r_serv_tags.sismember('tags:custom:enabled_tags', tag) - -def enable_taxonomy(taxonomie, enable_tags=True): - ''' - Enable a taxonomy. (UI) - - :param taxonomie: MISP taxonomy - :type taxonomie: str - :param enable_tags: crawled domain - :type enable_tags: boolean - ''' - taxonomies = Taxonomies() - if enable_tags: - taxonomie_info = taxonomies.get(taxonomie) - if taxonomie_info: - # activate taxonomie - r_serv_tags.sadd('active_taxonomies', taxonomie) - # activate taxonomie tags - for tag in taxonomie_info.machinetags(): - r_serv_tags.sadd('active_tag_{}'.format(taxonomie), tag) - #r_serv_tags.sadd('active_taxonomies_tags', tag) - else: - print('Error: {}, please update pytaxonomies'.format(taxonomie)) - -# Check if tags are enabled in AIL -def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy): - if list_tags: - active_taxonomies = get_active_taxonomies() - - for tag in list_tags: - taxonomie = get_taxonomie_from_tag(tag) - if taxonomie is None: - return False - if taxonomie not in active_taxonomies: - return False - if not is_taxonomie_tag_enabled(taxonomie, tag): - return False - - if list_tags_galaxy: - active_galaxies = get_active_galaxies() - - for tag in list_tags_galaxy: - galaxy = get_galaxy_from_tag(tag) - if galaxy is None: - return False - if galaxy not in active_galaxies: - return False - if not is_galaxy_tag_enabled(galaxy, tag): - return False - return True - -def is_taxonomie_tag(tag, namespace=None): - if not namespace: - namespace = tag.split(':')[0] - if namespace != 'misp-galaxy': - return is_taxonomie(namespace) - else: - return False - -def is_galaxy_tag(tag, namespace=None): - if not namespace: - namespace = tag.split(':')[0] - if namespace == 'misp-galaxy': - return True - else: - return False - -def is_custom_tag(tag): - return r_serv_tags.sismember('tags:custom', tag) - -# # TODO: -# def is_valid_tag(tag): -# pass - -def is_enabled_tag(tag, enabled_namespace=None): - if is_taxonomie_tag(tag): - return is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_namespace) - else: - return is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_namespace) - -def are_enabled_tags(tags): - enabled_taxonomies = get_active_taxonomies(r_set=True) - enabled_galaxies = get_active_galaxies(r_set=True) - for tag in tags: - if is_taxonomie_tag(tag): - res = is_enabled_taxonomie_tag(tag, enabled_taxonomies=enabled_taxonomies) - else: - res = is_enabled_galaxy_tag(tag, enabled_galaxies=enabled_galaxies) - if not res: - return False - return True - -def is_enabled_taxonomie_tag(tag, enabled_taxonomies=None): - if not enabled_taxonomies: - enabled_taxonomies = get_active_taxonomies() - taxonomie = get_taxonomie_from_tag(tag) - if taxonomie is None: - return False - if taxonomie not in enabled_taxonomies: - return False - if not is_taxonomie_tag_enabled(taxonomie, tag): - return False - return True - -def is_enabled_galaxy_tag(tag, enabled_galaxies=None): - if not enabled_galaxies: - enabled_galaxies = get_active_galaxies() - galaxy = get_galaxy_from_tag(tag) - if galaxy is None: - return False - if galaxy not in enabled_galaxies: - return False - if not is_galaxy_tag_enabled(galaxy, tag): - return False - return True - -def sort_tags_taxonomies_galaxies(tags): - taxonomies_tags = [] - galaxies_tags = [] - for tag in tags: - if is_taxonomie_tag(tag): - taxonomies_tags.append(tag) - else: - galaxies_tags.append(tag) - return taxonomies_tags, galaxies_tags - -#### #### - -def is_tag_in_all_tag(tag): - if r_serv_tags.sismember('list_tags', tag): - return True - else: - return False - -def get_tag_synonyms(tag): - return r_serv_tags.smembers(f'synonym_tag_{tag}') - -def get_tag_dislay_name(tag): - tag_synonyms = get_tag_synonyms(tag) - if not tag_synonyms: - return tag - else: - return tag + ', '.join(tag_synonyms) - -def get_tags_selector_dict(tags): - list_tags = [] - for tag in tags: - list_tags.append(get_tag_selector_dict(tag)) - return list_tags - -def get_tag_selector_dict(tag): - return {'name':get_tag_dislay_name(tag),'id':tag} - -def get_tags_selector_data(): - dict_selector = {} - dict_selector['active_taxonomies'] = get_active_taxonomies() - dict_selector['active_galaxies'] = get_active_galaxies() - return dict_selector - -def get_min_tag(tag): - tag = tag.split('=') - if len(tag) > 1: - if tag[1] != '': - tag = tag[1][1:-1] - # no value - else: - tag = tag[0][1:-1] - # custom tags - else: - tag = tag[0] - return tag - -def get_obj_tags_minimal(item_id): - return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_obj_tag(item_id) ] - -def unpack_str_tags_list(str_tags_list): - str_tags_list = str_tags_list.replace('"','\"') - if str_tags_list: - return str_tags_list.split(',') - else: - return [] - -# used by modal -def get_modal_add_tags(item_id, object_type='item'): - ''' - Modal: add tags to domain or Paste - ''' - return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), - "object_id": item_id, "object_type": object_type} - -######## NEW VERSION ######## -def create_custom_tag(tag): - r_serv_tags.sadd('tags:custom', tag) - r_serv_tags.sadd('tags:custom:enabled_tags', tag) - -def get_tag_first_seen(tag, r_int=False): - ''' - Get tag first seen (current: item only) - ''' - res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') - if r_int: - if res is None: - return 99999999 - else: - return int(res) - return res - -def get_tag_last_seen(tag, r_int=False): - ''' - Get tag last seen (current: item only) - ''' - res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if r_int: - if res is None: - return 0 - else: - return int(res) - return res - -# # TODO: ADD color -def get_tag_metadata(tag, r_int=False): - ''' - Get tag metadata (current: item only) - ''' - tag_metadata = {"tag": tag} - tag_metadata['first_seen'] = get_tag_first_seen(tag, r_int=r_int) - tag_metadata['last_seen'] = get_tag_last_seen(tag, r_int=r_int) - return tag_metadata - -def get_tags_min_last_seen(l_tags, r_int=False): - ''' - Get max last seen from a list of tags (current: item only) - ''' - min_last_seen = 99999999 - for tag in l_tags: - last_seen = get_tag_last_seen(tag, r_int=True) - if last_seen < min_last_seen: - min_last_seen = last_seen - if r_int: - return min_last_seen - else: - return str(min_last_seen) - -def is_obj_tagged(object_id, tag): - ''' - Check if a object is tagged - - :param object_id: object id - :type domain: str - :param tag: object type - :type domain: str - - :return: is object tagged - :rtype: boolean - ''' - return r_serv_metadata.sismember('tag:{}'.format(object_id), tag) - -def get_all_tags(): - return list(r_serv_tags.smembers('list_tags')) - -def get_all_obj_tags(object_type): - return list(r_serv_tags.smembers('list_tags:{}'.format(object_type))) - -def get_obj_tag(object_id): - ''' - Retun all the tags of a given object. - :param object_id: (item_id, domain, ...) - ''' - res = r_serv_metadata.smembers('tag:{}'.format(object_id)) - if res: - return list(res) - else: - return [] - -def update_tag_first_seen(tag, tag_first_seen, tag_last_seen): - if tag_first_seen == tag_last_seen: - if r_serv_tags.scard('{}:{}'.format(tag, tag_first_seen)) > 0: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_first_seen) - # no tag in db - else: - r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen') - r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen') - else: - if r_serv_tags.scard('{}:{}'.format(tag, tag_first_seen)) > 0: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_first_seen) - else: - tag_first_seen = Date.date_add_day(tag_first_seen) - update_tag_first_seen(tag, tag_first_seen, tag_last_seen) - -def update_tag_last_seen(tag, tag_first_seen, tag_last_seen): - if tag_first_seen == tag_last_seen: - if r_serv_tags.scard('{}:{}'.format(tag, tag_last_seen)) > 0: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_last_seen) - # no tag in db - else: - r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen') - r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen') - else: - if r_serv_tags.scard('{}:{}'.format(tag, tag_last_seen)) > 0: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_last_seen) - else: - # # TODO: # FIXME: - #tag_last_seen = Date.date_substract_day(str(tag_last_seen)) - #update_tag_last_seen(tag, tag_first_seen, tag_last_seen) - pass - -## Objects tags ## - -def update_tag_metadata(tag, tag_date, object_type=None, add_tag=True): - ''' - Update tag metadata (current: item only) - ''' - if object_type=="item": # # TODO: use another getter (get all object with date) - # get object metadata - tag_metadata = get_tag_metadata(tag, r_int=True) - ############# - ## ADD tag ## - if add_tag: - # update fisrt_seen - if tag_date < tag_metadata['first_seen']: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_date) - # update last_seen - if tag_date > tag_metadata['last_seen']: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_date) - ################ - ## REMOVE tag ## - else: - if tag_date == tag_metadata['first_seen']: - update_tag_first_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) - if tag_date == tag_metadata['last_seen']: - update_tag_last_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) - -def update_tag_global_by_obj_type(object_type, tag): - tag_deleted = False - if object_type=='item': - if not r_serv_tags.exists('tag_metadata:{}'.format(tag)): - tag_deleted = True - else: - if not r_serv_tags.exists('{}:{}'.format(object_type, tag)): - tag_deleted = True - if tag_deleted: - # update object global tags - r_serv_tags.srem('list_tags:{}'.format(object_type), tag) - # update global tags - for obj_type in get_all_objects(): - if r_serv_tags.exists('{}:{}'.format(obj_type, tag)): - tag_deleted = False - if tag_deleted: - r_serv_tags.srem('list_tags', tag) - -def get_all_objects(): - return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'image'] - -def add_global_tag(tag, object_type=None): - ''' - Create a set of all tags used in AIL (all + by object) - - :param tag: tag - :type domain: str - :param object_type: object type - :type domain: str - ''' - r_serv_tags.sadd('list_tags', tag) - if object_type: - r_serv_tags.sadd('list_tags:{}'.format(object_type), tag) - -def add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]): - obj_date = get_obj_date(object_type, object_id) - for tag in tags: - if tag: - taxonomie = get_taxonomie_from_tag(tag) - if is_taxonomie_tag_enabled(taxonomie, tag): - add_tag(object_type, tag, object_id, obj_date=obj_date) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) - - for tag in galaxy_tags: - if tag: - galaxy = get_galaxy_from_tag(tag) - if is_galaxy_tag_enabled(galaxy, tag): - add_tag(object_type, tag, object_id, obj_date=obj_date) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) - -# TEMPLATE + API QUERY -def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"): - res_dict = {} - if object_id == None: - return ({'status': 'error', 'reason': 'object_id id not found'}, 404) - if not tags and not galaxy_tags: - return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) - if object_type not in ('item', 'domain', 'image', 'decoded'): # # TODO: put me in another file - return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) - - # remove empty tags - tags = list(filter(bool, tags)) - galaxy_tags = list(filter(bool, galaxy_tags)) - - res = add_obj_tags(object_id, object_type, tags=tags, galaxy_tags=galaxy_tags) - if res: - return res - - res_dict['tags'] = tags + galaxy_tags - res_dict['id'] = object_id - res_dict['type'] = object_type - return (res_dict, 200) - -def add_obj_tag(object_type, object_id, tag, obj_date=None): - if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - if obj_date is None: - raise ValueError("obj_date is None") - - # add tag - r_serv_metadata.sadd('tag:{}'.format(object_id), tag) - r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id) - - # add domain tag - if item_basic.is_crawled(object_id) and tag!='infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': - domain = item_basic.get_item_domain(object_id) - add_tag("domain", tag, domain) - else: - r_serv_metadata.sadd('tag:{}'.format(object_id), tag) - r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id) - -def add_tag(object_type, tag, object_id, obj_date=None): - # new tag - if not is_obj_tagged(object_id, tag): - # # TODO: # FIXME: sanityze object_type - if obj_date: - try: - obj_date = int(obj_date) - except: - obj_date = None - if not obj_date: - obj_date = get_obj_date(object_type, object_id) - add_global_tag(tag, object_type=object_type) - add_obj_tag(object_type, object_id, tag, obj_date=obj_date) - update_tag_metadata(tag, obj_date, object_type=object_type) - - # create tags stats # # TODO: put me in cache - r_serv_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1) - -def delete_obj_tag(object_type, object_id, tag, obj_date): - if object_type=="item": # # TODO: # FIXME: # REVIEW: !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - obj_date = get_obj_date(object_type, object_id) - r_serv_metadata.srem('tag:{}'.format(object_id), tag) - r_serv_tags.srem('{}:{}'.format(tag, obj_date), object_id) - else: - r_serv_metadata.srem('tag:{}'.format(object_id), tag) - r_serv_tags.srem('{}:{}'.format(object_type, tag), object_id) - -def delete_tag(object_type, tag, object_id, obj_date=None): - # tag exist - if is_obj_tagged(object_id, tag): - if not obj_date: - obj_date = get_obj_date(object_type, object_id) - delete_obj_tag(object_type, object_id, tag, obj_date) - update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False) - update_tag_global_by_obj_type(object_type, tag) - - else: - return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400) - -# # TODO: move me -def get_obj_date(object_type, object_id): - if object_type == "item": - return int(item_basic.get_item_date(object_id)) - else: - return None - -# API QUERY -def api_delete_obj_tags(tags=[], object_id=None, object_type="item"): - if not object_id: - return ({'status': 'error', 'reason': 'object id not found'}, 404) - if not tags: - return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400) - - res = delete_obj_tags(object_id, object_type, tags) - if res: - return res - - dict_res = {} - dict_res['tags'] = tags - dict_res['id'] = object_id - return (dict_res, 200) - -def delete_obj_tags(object_id, object_type, tags): - obj_date = get_obj_date(object_type, object_id) - for tag in tags: - res = delete_tag(object_type, tag, object_id, obj_date=obj_date) - if res: - return res - -def delete_obj_all_tags(obj_id, obj_type): - delete_obj_tags(obj_id, obj_type, get_obj_tag(obj_id)) - -def sanitise_tags_date_range(l_tags, date_from=None, date_to=None): - if date_from is None or date_to is None: - date_from = get_tags_min_last_seen(l_tags, r_int=False) - date_to = date_from - return Date.sanitise_date_range(date_from, date_to) - - -# # TODO: verify tags + object_type -# get set_keys: intersection -def get_obj_keys_by_tags(object_type, l_tags, date_day=None): - l_set_keys = [] - if object_type=='item': - for tag in l_tags: - l_set_keys.append('{}:{}'.format(tag, date_day)) - else: - for tag in l_tags: - l_set_keys.append('{}:{}'.format(object_type, tag)) - return l_set_keys - -def get_obj_by_tag(key_tag): - return r_serv_tags.smembers(key_tag) - -def get_obj_by_tags(object_type, l_tags, date_from=None, date_to=None, nb_obj=50, page=1): # remove old object - # with daterange - l_tagged_obj = [] - if object_type=='item': - #sanityze date - date_range = sanitise_tags_date_range(l_tags, date_from=date_from, date_to=date_to) - l_dates = Date.substract_date(date_range['date_from'], date_range['date_to']) - - for date_day in l_dates: - l_set_keys = get_obj_keys_by_tags(object_type, l_tags, date_day) - # if len(l_set_keys) > nb_obj: - # return l_tagged_obj - if len(l_set_keys) < 2: - date_day_obj = get_obj_by_tag(l_set_keys[0]) - else: - date_day_obj = r_serv_tags.sinter(l_set_keys[0], *l_set_keys[1:]) - - # next_nb_start = len(l_tagged_obj) + len(date_day_obj) - nb_obj - # if next_nb_start > 0: - # get + filter nb_start - l_tagged_obj.extend( date_day_obj ) - - # handle pagination - nb_all_elem = len(l_tagged_obj) - nb_pages = nb_all_elem / nb_obj - if not nb_pages.is_integer(): - nb_pages = int(nb_pages)+1 - else: - nb_pages = int(nb_pages) - if page > nb_pages: - page = nb_pages - - start = nb_obj*(page -1) - if nb_pages > 1: - stop = (nb_obj*page) - l_tagged_obj = l_tagged_obj[start:stop] - # only one page - else: - stop = nb_all_elem - l_tagged_obj = l_tagged_obj[start:] - - if stop > nb_all_elem: - stop = nb_all_elem - stop = stop -1 - - return {"tagged_obj":l_tagged_obj, "date" : date_range, - "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop+1, "nb_all_elem":nb_all_elem} - - # without daterange - else: - l_set_keys = get_obj_keys_by_tags(object_type, l_tags) - if len(l_set_keys) < 2: - l_tagged_obj = get_obj_by_tag(l_set_keys[0]) - else: - l_tagged_obj = r_serv_tags.sinter(l_set_keys[0], *l_set_keys[1:]) - - if not l_tagged_obj: - return {"tagged_obj":l_tagged_obj, "page":0, "nb_pages":0} - - # handle pagination - nb_all_elem = len(l_tagged_obj) - nb_pages = nb_all_elem / nb_obj - if not nb_pages.is_integer(): - nb_pages = int(nb_pages)+1 - else: - nb_pages = int(nb_pages) - if page > nb_pages: - page = nb_pages - - # multiple pages - if nb_pages > 1: - start = nb_obj*(page -1) - stop = (nb_obj*page) -1 - current_index = 0 - l_obj = [] - for elem in l_tagged_obj: - if current_index > stop: - break - if start <= current_index and stop >= current_index: - l_obj.append(elem) - current_index += 1 - l_tagged_obj = l_obj - stop += 1 - if stop > nb_all_elem: - stop = nb_all_elem - # only one page - else: - start = 0 - stop = nb_all_elem - l_tagged_obj = list(l_tagged_obj) - - return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem} - - -#### TAGS EXPORT #### -# # TODO: -def is_updated_tags_to_export(): # by type - return False - -def get_list_of_solo_tags_to_export_by_type(export_type): # by type - if export_type in ['misp', 'thehive']: - return r_serv_db.smembers('whitelist_{}'.format(export_type)) - else: - return None - #r_serv_db.smembers('whitelist_hive') diff --git a/configs/6383.conf b/configs/6383.conf index 8a3c3aac..9b83fc89 100644 --- a/configs/6383.conf +++ b/configs/6383.conf @@ -72,6 +72,7 @@ maxclients 10000 # use a very strong password otherwise it will be very easy to break. # # requirepass foobared +requirepass ail # If the master is password protected (using the "masterauth" configuration # directive below) it is possible to tell the slave to authenticate before @@ -100,7 +101,7 @@ dir /home/aurelien/git/ail-framework/DATA_KVROCKS # When running daemonized, kvrocks writes a pid file in ${CONFIG_DIR}/kvrocks.pid by # default. You can specify a custom pid file location here. # pidfile /var/run/kvrocks.pid -pidfile "" +pidfile /home/aurelien/git/ail-framework/DATA_KVROCKS/kvrocks.pid # You can configure a slave instance to accept writes or not. Writing against # a slave instance may be useful to store some ephemeral data (because data @@ -636,7 +637,7 @@ rocksdb.level_compaction_dynamic_level_bytes no # The total file size of level-1 sst. # # Default: 268435456 bytes -rocksdb.max_bytes_for_level_base 268435456 +rocksdb.max_bytes_for_level_base 268435456 # Multiplication factor for the total file size of L(n+1) layers. # This option is a double type number in RocksDB, but kvrocks is @@ -648,3 +649,30 @@ rocksdb.max_bytes_for_level_multiplier 10 ################################ NAMESPACE ##################################### # namespace.test change.me + +backup-dir /home/aurelien/git/ail-framework/DATA_KVROCKS/backup +fullsync-recv-file-delay 0 +log-dir /home/aurelien/git/ail-framework/DATA_KVROCKS +unixsocketperm 26 + + + + +namespace.cor ail_correls +#namespace.correl ail_correls +namespace.crawl ail_crawlers +namespace.db ail_datas +namespace.dup ail_dups +namespace.obj ail_objs +namespace.stat ail_stats +namespace.tag ail_tags +namespace.track ail_trackers + +# investigation -> db ???? +# + + + + + + diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 7f1f5249..ae7a95ab 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -23,9 +23,6 @@ from os.path import join # # TODO: put me in lib/Tag from pytaxonomies import Taxonomies -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) -import Tag - sys.path.append('./modules/') sys.path.append(os.environ['AIL_BIN']) @@ -33,6 +30,7 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from lib.Users import User +from lib import Tag sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -72,9 +70,7 @@ except Exception: FLASK_PORT = 7000 # ========= REDIS =========# -r_serv_db = config_loader.get_redis_conn("ARDB_DB") -r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") -r_cache = config_loader.get_redis_conn("Redis_Cache") +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") # logs log_dir = os.path.join(os.environ['AIL_HOME'], 'logs') diff --git a/var/www/blueprints/objects_item.py b/var/www/blueprints/objects_item.py index 0d2e0da6..2c6b6ea5 100644 --- a/var/www/blueprints/objects_item.py +++ b/var/www/blueprints/objects_item.py @@ -21,8 +21,8 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from lib import item_basic from lib.objects.Items import Item +from lib import Tag from export import Export -from packages import Tag # ============ BLUEPRINT ============ diff --git a/var/www/blueprints/tags_ui.py b/var/www/blueprints/tags_ui.py index 82209c90..a00103ed 100644 --- a/var/www/blueprints/tags_ui.py +++ b/var/www/blueprints/tags_ui.py @@ -19,16 +19,13 @@ import Flask_config # Import Role_Manager from Role_Manager import login_admin, login_analyst, login_read_only -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Date -import Tag +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +from packages import Date +from lib import Tag +from lib.objects import ail_objects -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import Correlate_object - -r_cache = Flask_config.r_cache -r_serv_db = Flask_config.r_serv_db -r_serv_tags = Flask_config.r_serv_tags bootstrap_label = Flask_config.bootstrap_label # ============ BLUEPRINT ============ @@ -51,6 +48,7 @@ def add_tags(): tagsgalaxies = request.args.get('tagsgalaxies') object_id = request.args.get('object_id') object_type = request.args.get('object_type') + subtype = '' # TODO: handle subtype object list_tag = tags.split(',') list_tag_galaxies = tagsgalaxies.split(',') @@ -60,7 +58,7 @@ def add_tags(): if res[1] != 200: return str(res[0]) - return redirect(Correlate_object.get_item_url(object_type, object_id)) + return redirect(ail_objects.get_object_link(object_type, subtype, object_id, flask_context=True)) @tags_ui.route('/tag/delete_tag') @login_required @@ -69,12 +67,13 @@ def delete_tag(): object_type = request.args.get('object_type') object_id = request.args.get('object_id') + subtype = '' # TODO: handle subtype object tag = request.args.get('tag') res = Tag.api_delete_obj_tags(tags=[tag], object_id=object_id, object_type=object_type) if res[1] != 200: return str(res[0]) - return redirect(Correlate_object.get_item_url(object_type, object_id)) + return redirect(ail_objects.get_object_link(object_type, subtype, object_id, flask_context=True)) @tags_ui.route('/tag/get_all_tags') @@ -94,7 +93,7 @@ def get_all_taxonomies_customs_tags(): @login_read_only def get_all_obj_tags(): object_type = request.args.get('object_type') - res = Correlate_object.sanitize_object_type(object_type) + res = ail_objects.api_sanitize_object_type(object_type) if res: return jsonify(res) return jsonify(Tag.get_all_obj_tags(object_type)) @@ -173,6 +172,7 @@ def get_obj_by_tags(): # # TODO: sanityze all object_type = request.args.get('object_type') + subtype = '' # TODO: handle subtype ltags = request.args.get('ltags') page = request.args.get('page') date_from = request.args.get('date_from') @@ -191,7 +191,7 @@ def get_obj_by_tags(): list_tag.append(tag.replace('"','\"')) # object_type - res = Correlate_object.sanitize_object_type(object_type) + res = ail_objects.api_sanitize_object_type(object_type) if res: return jsonify(res) @@ -209,11 +209,12 @@ def get_obj_by_tags(): "nb_first_elem":dict_obj['nb_first_elem'], "nb_last_elem":dict_obj['nb_last_elem'], "nb_all_elem":dict_obj['nb_all_elem']} for obj_id in dict_obj['tagged_obj']: - obj_metadata = Correlate_object.get_object_metadata(object_type, obj_id) + obj_metadata = ail_objects.get_object_meta(object_type, subtype, obj_id, flask_context=True) + #ail_objects. obj_metadata['id'] = obj_id dict_tagged["tagged_obj"].append(obj_metadata) - dict_tagged['tab_keys'] = Correlate_object.get_obj_tag_table_keys(object_type) + dict_tagged['tab_keys'] = ail_objects.get_ui_obj_tag_table_keys(object_type) if len(list_tag) == 1: dict_tagged['current_tags'] = [ltags.replace('"', '\"')] diff --git a/var/www/modules/Flask_config.py b/var/www/modules/Flask_config.py index fd9b0a82..b026db1b 100644 --- a/var/www/modules/Flask_config.py +++ b/var/www/modules/Flask_config.py @@ -33,12 +33,13 @@ r_serv_charts = config_loader.get_redis_conn("ARDB_Trending") r_serv_sentiment = config_loader.get_redis_conn("ARDB_Sentiment") r_serv_term = config_loader.get_redis_conn("ARDB_Tracker") r_serv_cred = config_loader.get_redis_conn("ARDB_TermCred") -r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -r_serv_db = config_loader.get_redis_conn("ARDB_DB") r_serv_statistics = config_loader.get_redis_conn("ARDB_Statistics") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") +# # # # # # # +r_serv_db = config_loader.get_db_conn("Kvrocks_DB") +r_serv_tags = config_loader.get_db_conn("Kvrocks_Tags") # Logger (Redis) redis_logger = publisher diff --git a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py index b4bf016c..b5d1a9af 100644 --- a/var/www/modules/PasteSubmit/Flask_PasteSubmit.py +++ b/var/www/modules/PasteSubmit/Flask_PasteSubmit.py @@ -30,9 +30,10 @@ from flask_login import login_required ################################## # Import Project packages ################################## +from lib import Tag + import Paste import Import_helper -import Tag from pytaxonomies import Taxonomies from pymispgalaxies import Galaxies, Clusters diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index 5db95e48..dc18aa30 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -20,7 +20,7 @@ from pymispgalaxies import Galaxies, Clusters # ============ VARIABLES ============ import Flask_config -import Tag +from lib import Tag app = Flask_config.app baseUrl = Flask_config.baseUrl @@ -31,7 +31,6 @@ max_preview_char = Flask_config.max_preview_char max_preview_modal = Flask_config.max_preview_modal bootstrap_label = Flask_config.bootstrap_label max_tags_result = Flask_config.max_tags_result -PASTES_FOLDER = Flask_config.PASTES_FOLDER Tags = Blueprint('Tags', __name__, template_folder='templates') diff --git a/var/www/modules/hiddenServices/Flask_hiddenServices.py b/var/www/modules/hiddenServices/Flask_hiddenServices.py index 55a7abe4..5d85e6c7 100644 --- a/var/www/modules/hiddenServices/Flask_hiddenServices.py +++ b/var/www/modules/hiddenServices/Flask_hiddenServices.py @@ -130,7 +130,7 @@ def get_domain_from_url(url): pass return domain -def get_last_domains_crawled(type): +def get_last_domains_crawled(type): # DONE return r_serv_onion.lrange('last_{}'.format(type), 0 ,-1) def get_nb_domains_inqueue(type):