From f9715408be8925a188195cd24a9f83793654adf0 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 30 Nov 2022 15:50:10 +0100 Subject: [PATCH] chg: [migration] migrate Item + Domain metas --- OVERVIEW.md | 7 - bin/DB_KVROCKS_MIGRATION.py | 7 +- bin/crawlers/Crawler.py | 2 +- bin/helper/CVE_check.py | 85 ------------ bin/lib/ail_users.py | 28 ---- bin/lib/crawlers.py | 20 +-- bin/lib/item_basic.py | 66 ++++------ bin/lib/objects/Domains.py | 17 +-- bin/lib/objects/Items.py | 233 ++++++++++++--------------------- bin/lib/objects/ail_objects.py | 2 +- configs/6383.conf | 3 +- 11 files changed, 129 insertions(+), 341 deletions(-) delete mode 100755 bin/helper/CVE_check.py delete mode 100755 bin/lib/ail_users.py diff --git a/OVERVIEW.md b/OVERVIEW.md index 5790acd9..cc573810 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -229,13 +229,6 @@ Redis and ARDB overview ## DB7 - Metadata: #### Crawled Items: -##### Hset: -| Key | Field | Value | -| ------ | ------ | ------ | -| paste_metadata:**item path** | super_father | **first url crawled** | -| | father | **item father** | -| | domain | **crawled domain**:**domain port** | -| | screenshot | **screenshot hash** | ##### Set: | Key | Field | diff --git a/bin/DB_KVROCKS_MIGRATION.py b/bin/DB_KVROCKS_MIGRATION.py index 2b5d3c1b..5f84351f 100755 --- a/bin/DB_KVROCKS_MIGRATION.py +++ b/bin/DB_KVROCKS_MIGRATION.py @@ -570,11 +570,12 @@ def domain_migration(): print(f'UP {root_id}') crawled_items = get_crawled_items(dom, root_id) for item_id in crawled_items: + item = Items.Item(item_id) url = get_item_link(item_id) - item_father = get_item_father(item_id) - if item_father and url: + parent_id = get_item_father(item_id) + if parent_id and url: print(f'{url} {item_id}') - domain.add_crawled_item(url, item_id, item_father) + item.set_crawled(url, parent_id) #print() diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index d6aabd72..b069eff2 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -205,7 +205,7 @@ class Crawler(AbstractModule): msg = f'infoleak:submission="crawler";{item_id}' self.send_message_to_queue(msg, 'Tags') - crawlers.create_item_metadata(item_id, self.domain.id, last_url, parent_id) + crawlers.create_item_metadata(item_id, last_url, parent_id) if self.root_item is None: self.root_item = item_id parent_id = item_id diff --git a/bin/helper/CVE_check.py b/bin/helper/CVE_check.py deleted file mode 100755 index e6200506..00000000 --- a/bin/helper/CVE_check.py +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -from lib.objects.Items import Item -from Helper import Process - -import os -import re -import time -import redis -import configparser - -from collections import defaultdict - -# TODO FIX ME OR REMOVE ME - -def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False): - dict_keyword = {} - - for paste_cve in list_paste_cve: - paste_content = Item(paste_cve).get_content() - - cve_list = reg_cve.findall(paste_content) - if only_one_same_cve_by_paste: - cve_list = set(cve_list) - - for cve in reg_cve.findall(paste_content): - try: - dict_keyword[cve] += 1 - except KeyError: - dict_keyword[cve] = 1 - - print('------------------------------------------------') - if dict_keyword: - res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)] - for item in res: - pass - print(item) - - - -if __name__ == '__main__': - - # CONFIG # - configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') - if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - - cfg = configparser.ConfigParser() - cfg.read(configfile) - - serv_metadata = redis.StrictRedis( - host=cfg.get("ARDB_Metadata", "host"), - port=cfg.getint("ARDB_Metadata", "port"), - db=cfg.getint("ARDB_Metadata", "db"), - decode_responses=True) - - serv_tags = redis.StrictRedis( - host=cfg.get("ARDB_Tags", "host"), - port=cfg.get("ARDB_Tags", "port"), - db=cfg.get("ARDB_Tags", "db"), - decode_responses=True) - - reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}') - - #all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"') - #all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"') - #all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"') - - #print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve))) - - print('ALL_CVE') - get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True) - print() - print() - print() - print('REGULAR_CVE') - get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True) - print() - print() - print() - print('CRAWLER_CVE') - get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True) diff --git a/bin/lib/ail_users.py b/bin/lib/ail_users.py deleted file mode 100755 index 67d252ef..00000000 --- a/bin/lib/ail_users.py +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() -r_serv_db = config_loader.get_redis_conn("ARDB_DB") -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") -config_loader = None - -class User(object): - """AIL User.""" - - def __init__(self, id): - self.id = id - if self.id == '__anonymous__': - self.role = 'anonymous' - else: - self.role = None - - def get_role(self): - pass - - diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 21dd9cf3..f29c382e 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -37,6 +37,7 @@ sys.path.append(os.environ['AIL_BIN']) from packages import git_status from lib.ConfigLoader import ConfigLoader from lib.objects.Domains import Domain +from lib.objects.Items import Item from core import screen config_loader = ConfigLoader() @@ -44,7 +45,6 @@ r_db = config_loader.get_db_conn("Kvrocks_DB") r_crawler = config_loader.get_db_conn("Kvrocks_Crawler") r_cache = config_loader.get_redis_conn("Redis_Cache") -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes") @@ -561,12 +561,9 @@ def update_last_crawled_domain(domain_type, domain, epoch): r_crawler.lpush(f'last_{domain_type}', f'{domain}:{epoch}') r_crawler.ltrim(f'last_{domain_type}', 0, 15) -def create_item_metadata(item_id, domain, url, item_father): - r_serv_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father) - r_serv_metadata.hset(f'paste_metadata:{item_id}', 'domain', domain) - r_serv_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url) - # add this item_id to his father - r_serv_metadata.sadd(f'paste_children:{item_father}', item_id) +def create_item_metadata(item_id, url, item_father): + item = Item(item_id) + item.set_crawled(url, item_father) def get_gzipped_b64_item(item_id, content): try: @@ -1121,15 +1118,6 @@ def save_har(har_dir, item_id, har_content): with open(filename, 'w') as f: f.write(json.dumps(har_content)) -# # TODO: FIXME -def api_add_crawled_item(dict_crawled): - - domain = None - # create item_id item_id = - - save_crawled_item(item_id, response.data['html']) - create_item_metadata(item_id, domain, 'last_url', 'father') - #### CRAWLER QUEUES #### ## queues priority: diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index b62b0193..d8b89579 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -18,6 +18,7 @@ from lib import Tag config_loader = ConfigLoader.ConfigLoader() r_cache = config_loader.get_redis_conn("Redis_Cache") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +r_object = config_loader.get_db_conn("Kvrocks_Objects") config_loader = None def exist_item(item_id): @@ -85,26 +86,26 @@ def get_item_mimetype(item_id): return magic.from_buffer(get_item_content(item_id), mime=True) # # # # TREE CHILD/FATHER # # # # -def is_father(item_id): - return r_serv_metadata.exists('paste_children:{}'.format(item_id)) +def is_parent(item_id): + return r_object.exists(f'obj:child:item::{item_id}') def is_children(item_id): - return r_serv_metadata.hexists('paste_metadata:{}'.format(item_id), 'father') + return r_object.hexists(f'meta:item::{item_id}' 'parent') def is_root_node(item_id): - if is_father(item_id) and not is_children(item_id): + if is_parent(item_id) and not is_children(item_id): return True else: return False def is_node(item_id): - if is_father(item_id) or is_children(item_id): + if is_parent(item_id) or is_children(item_id): return True else: return False def is_leaf(item_id): - if not is_father(item_id) and is_children(item_id): + if not is_parent(item_id) and is_children(item_id): return True else: return False @@ -125,7 +126,7 @@ def is_domain_root(item_id): return True def get_item_url(item_id): - return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link') + return r_object.hget(f'meta:item::{item_id}', 'url') def get_item_har(item_id): har = '/'.join(item_id.rsplit('/')[-4:]) @@ -134,34 +135,29 @@ def get_item_har(item_id): if os.path.isfile(path): return har -def get_item_har_content(har): - with open(har, 'rb') as f: - har_content = f.read() - return har_content - -def get_nb_children(item_id): - return r_serv_metadata.scard('paste_children:{}'.format(item_id)) +# def get_item_har_content(har): +# with open(har, 'rb') as f: +# har_content = f.read() +# return har_content def get_item_parent(item_id): - return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'father') + return r_object.hget(f'meta:item::{item_id}', 'parent') def get_item_children(item_id): - return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id))) + return list(r_object.smembers(f'obj:child:item::{item_id}')) # # TODO: handle domain last origin in domain lib -def _delete_node(item_id): - # only if item isn't deleted - # if is_crawled(item_id): - # r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link') - for children_id in get_item_children(item_id): - r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father') - r_serv_metadata.delete('paste_children:{}'.format(item_id)) - - # delete regular - # simple if leaf - - # delete item node +# def _delete_node(item_id): +# # only if item isn't deleted +# # if is_crawled(item_id): +# # delete item meta url +# # delete item parent + children +# +# # delete regular +# # simple if leaf +# +# # delete item node def get_all_domain_node_by_item_id(item_id, l_nodes=[]): domain = get_item_domain(item_id) @@ -174,15 +170,11 @@ def get_all_domain_node_by_item_id(item_id, l_nodes=[]): ##-- --## -def add_item_parent_by_parent_id(parent_type, parent_id, item_id): - parent_item_id = get_obj_id_item_id(parent_type, parent_id) - if parent_item_id: - add_item_parent(parent_item_id, item_id) - -def add_item_parent(parent_item_id, item_id): - r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', parent_item_id) - r_serv_metadata.sadd('paste_children:{}'.format(parent_item_id), item_id) - return True +# def add_item_parent_by_parent_id(parent_type, parent_id, item_id): +# parent_item_id = get_obj_id_item_id(parent_type, parent_id) +# if parent_item_id: +# add_item_parent(parent_item_id, item_id) +# # TODO: # FIXME: diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 50f4685c..4d84a75b 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -20,7 +20,7 @@ from lib import ConfigLoader from lib.objects.abstract_object import AbstractObject from lib.ail_core import paginate_iterator -from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har +from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_domain, get_item_har from lib import data_retention_engine from packages import Date @@ -28,8 +28,6 @@ from packages import Date config_loader = ConfigLoader.ConfigLoader() r_crawler = config_loader.get_db_conn("Kvrocks_Crawler") -r_metadata = config_loader.get_redis_conn("ARDB_Metadata") ###################################### - baseurl = config_loader.get_config_str("Notifications", "ail_domain") config_loader = None @@ -103,8 +101,8 @@ class Domain(AbstractObject): if obj and origin['item']: if origin['item'] != 'manual' and origin['item'] != 'auto': item_id = origin['item'] - origin['domain'] = r_metadata.hget(f'paste_metadata:{item_id}', 'domain') - origin['url'] = r_metadata.hget(f'paste_metadata:{item_id}', 'url') + origin['domain'] = get_item_domain() + origin['url'] = get_item_url() return origin def set_last_origin(self, origin_id): @@ -443,15 +441,6 @@ class Domain(AbstractObject): else: r_crawler.sadd(f'full_{self.domain_type}_down', self.id) - # TODO RENAME PASTE_METADATA - def add_crawled_item(self, url, item_id, item_father): - r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father) - r_metadata.hset(f'paste_metadata:{item_id}', 'domain', self.id) # FIXME REMOVE ME -> extract for real link ????????? - r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url) - # add this item_id to his father - r_metadata.sadd(f'paste_children:{item_father}', item_id) - - ############################################################################ # In memory zipfile def _write_in_zip_buffer(zf, path, filename): diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py index b4bbf460..3407684f 100755 --- a/bin/lib/objects/Items.py +++ b/bin/lib/objects/Items.py @@ -18,22 +18,21 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from export.Export import get_ail_uuid # # TODO: REPLACE +from lib.ail_core import get_ail_uuid from lib.objects.abstract_object import AbstractObject from lib.ConfigLoader import ConfigLoader from lib import item_basic -from lib import Tag from flask import url_for config_loader = ConfigLoader() -# # TODO: get and sanityze ITEMS DIRECTORY +# # TODO: get and sanitize ITEMS DIRECTORY ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '') r_cache = config_loader.get_redis_conn("Redis_Cache") -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +r_object = config_loader.get_db_conn("Kvrocks_Objects") screenshot_directory = config_loader.get_files_directory('screenshot') har_directory = config_loader.get_files_directory('har') baseurl = config_loader.get_config_str("Notifications", "ail_domain") @@ -65,7 +64,7 @@ class Item(AbstractObject): """ Returns Item source/feeder name """ - #return self.id.split('/')[-5] + # return self.id.split('/')[-5] l_source = self.id.split('/')[:-4] return os.path.join(*l_source) @@ -113,9 +112,9 @@ class Item(AbstractObject): h.ignore_images = ignore_links return h.handle(content) - def get_size(self, str=False): + def get_size(self, r_str=False): size = os.path.getsize(self.get_filename())/1024.0 - if str: + if r_str: size = round(size, 2) return size @@ -126,16 +125,13 @@ class Item(AbstractObject): def get_parent(self): return item_basic.get_item_parent(self.id) - def set_father(self, father_id): # UPDATE KEYS ????????????????????????????? - r_serv_metadata.sadd(f'paste_children:{father_id}', self.id) - r_serv_metadata.hset(f'paste_metadata:{self.id}', 'father', father_id) - - #f'obj:children:{obj_type}:{subtype}:{id}, {obj_type}:{subtype}:{id} - #f'obj:metadata:{obj_type}:{subtype}:{id}', 'father', fathe - # => ON Object LEVEL ????????? - - + def set_parent(self, parent_id): + r_object.sadd(f'obj:child:item::{parent_id}', self.id) # TODO + r_object.hset(f'meta:item::{self.id}', 'parent', parent_id) + def add_children(self, child_id): + r_object.sadd(f'obj:child:item::{self.id}', child_id) # TODO + r_object.hset(f'meta:item::{child_id}', 'parent', self.id) def sanitize_id(self): pass @@ -249,7 +245,11 @@ class Item(AbstractObject): return None def get_url(self): - return r_serv_metadata.hget(f'paste_metadata:{self.id}', 'real_link') + return r_object.hset(f'meta:item::{self.id}', 'url') + + def set_crawled(self, url, parent_id): + r_object.hset(f'meta:item::{self.id}', 'url', url) + self.set_parent(parent_id) # options: set of optional meta fields def get_meta(self, options=set()): @@ -273,7 +273,7 @@ class Item(AbstractObject): if 'parent' in options: meta['parent'] = self.get_parent() if 'size' in options: - meta['size'] = self.get_size(str=True) + meta['size'] = self.get_size(r_str=True) if 'mimetype' in options: content = meta.get('content') meta['mimetype'] = self.get_mimetype(content=content) @@ -290,14 +290,13 @@ class Item(AbstractObject): crawler['url'] = self.get_url() if not tags: tags = self.get_tags() - crawler['is_tags_safe'] = Tag.is_tags_safe(tags) + crawler['is_tags_safe'] = self.is_tags_safe(tags) return crawler def get_meta_lines(self, content=None): if not content: content = self.get_content() max_length = 0 - line_id = 0 nb_line = 0 for line in content.splitlines(): length = len(line) @@ -503,60 +502,60 @@ def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, mi return all_languages # API -def get_item(request_dict): - if not request_dict: - return {'status': 'error', 'reason': 'Malformed JSON'}, 400 - - item_id = request_dict.get('id', None) - if not item_id: - return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 - if not exist_item(item_id): - return {'status': 'error', 'reason': 'Item not found'}, 404 - - dict_item = {} - dict_item['id'] = item_id - date = request_dict.get('date', True) - if date: - add_separator = False - if request_dict.get('date_separator', False): - add_separator = True - dict_item['date'] = get_item_date(item_id, add_separator=add_separator) - tags = request_dict.get('tags', True) - if tags: - dict_item['tags'] = Tag.get_object_tags('item', item_id) - - size = request_dict.get('size', False) - if size: - dict_item['size'] = get_item_size(item_id) - - content = request_dict.get('content', False) - if content: - # UTF-8 outpout, # TODO: use base64 - dict_item['content'] = get_item_content(item_id) - - raw_content = request_dict.get('raw_content', False) - if raw_content: - dict_item['raw_content'] = get_raw_content(item_id) - - lines_info = request_dict.get('lines', False) - if lines_info: - dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None')) - - if request_dict.get('pgp'): - dict_item['pgp'] = {} - if request_dict['pgp'].get('key'): - dict_item['pgp']['key'] = get_item_pgp_key(item_id) - if request_dict['pgp'].get('mail'): - dict_item['pgp']['mail'] = get_item_pgp_mail(item_id) - if request_dict['pgp'].get('name'): - dict_item['pgp']['name'] = get_item_pgp_name(item_id) - - if request_dict.get('cryptocurrency'): - dict_item['cryptocurrency'] = {} - if request_dict['cryptocurrency'].get('bitcoin'): - dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id) - - return dict_item, 200 +# def get_item(request_dict): +# if not request_dict: +# return {'status': 'error', 'reason': 'Malformed JSON'}, 400 +# +# item_id = request_dict.get('id', None) +# if not item_id: +# return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 +# if not exist_item(item_id): +# return {'status': 'error', 'reason': 'Item not found'}, 404 +# +# dict_item = {} +# dict_item['id'] = item_id +# date = request_dict.get('date', True) +# if date: +# add_separator = False +# if request_dict.get('date_separator', False): +# add_separator = True +# dict_item['date'] = get_item_date(item_id, add_separator=add_separator) +# tags = request_dict.get('tags', True) +# if tags: +# dict_item['tags'] = Tag.get_object_tags('item', item_id) +# +# size = request_dict.get('size', False) +# if size: +# dict_item['size'] = get_item_size(item_id) +# +# content = request_dict.get('content', False) +# if content: +# # UTF-8 outpout, # TODO: use base64 +# dict_item['content'] = get_item_content(item_id) +# +# raw_content = request_dict.get('raw_content', False) +# if raw_content: +# dict_item['raw_content'] = get_raw_content(item_id) +# +# lines_info = request_dict.get('lines', False) +# if lines_info: +# dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None')) +# +# if request_dict.get('pgp'): +# dict_item['pgp'] = {} +# if request_dict['pgp'].get('key'): +# dict_item['pgp']['key'] = get_item_pgp_key(item_id) +# if request_dict['pgp'].get('mail'): +# dict_item['pgp']['mail'] = get_item_pgp_mail(item_id) +# if request_dict['pgp'].get('name'): +# dict_item['pgp']['name'] = get_item_pgp_name(item_id) +# +# if request_dict.get('cryptocurrency'): +# dict_item['cryptocurrency'] = {} +# if request_dict['cryptocurrency'].get('bitcoin'): +# dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id) +# +# return dict_item, 200 @@ -598,24 +597,13 @@ def api_get_items_sources(): def get_item_list_desc(list_item_id): desc_list = [] for item_id in list_item_id: - desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} ) + item = Item(item_id) + desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': item.get_tags(r_list=True)}) return desc_list def is_crawled(item_id): return item_basic.is_crawled(item_id) -def get_crawler_matadata(item_id, tags=None): - dict_crawler = {} - if is_crawled(item_id): - dict_crawler['domain'] = get_item_domain(item_id) - if not ltags: - ltags = Tag.get_object_tags('item', item_id) - dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags) - dict_crawler['url'] = get_item_link(item_id) - dict_crawler['screenshot'] = get_item_screenshot(item_id) - dict_crawler['har'] = get_item_har_name(item_id) - return dict_crawler - def is_onion(item_id): is_onion = False if len(is_onion) > 62: @@ -639,18 +627,6 @@ def get_domain(item_id): item_id = item_id[-1] return item_id[:-36] -def get_item_domain_with_port(item_id): - return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'domain') - -def get_item_link(item_id): - return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'real_link') - -def get_item_screenshot(item_id): - screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') - if screenshot: - return os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:]) - return '' - def get_item_har_name(item_id): har_path = os.path.join(har_directory, item_id) + '.json' if os.path.isfile(har_path): @@ -672,44 +648,6 @@ def get_item_filename(item_id): else: return filename -def get_item_duplicate(item_id, r_list=True): - res = r_serv_metadata.smembers('dup:{}'.format(item_id)) - if r_list: - if res: - return list(res) - else: - return [] - return res - -def get_item_nb_duplicates(item_id): - return r_serv_metadata.scard('dup:{}'.format(item_id)) - -def get_item_duplicates_dict(item_id): - dict_duplicates = {} - for duplicate in get_item_duplicate(item_id): - duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',') - duplicate_id = duplicate[1] - if not duplicate_id in dict_duplicates: - dict_duplicates[duplicate_id] = {'date': get_item_date(duplicate_id, add_separator=True), 'algo': {}} - algo = duplicate[0] - if algo == 'tlsh': - similarity = 100 - int(duplicate[2]) - else: - similarity = int(duplicate[2]) - dict_duplicates[duplicate_id]['algo'][algo] = similarity - return dict_duplicates - -def add_item_duplicate(item_id, l_dup): - for item_dup in l_dup: - r_serv_metadata.sadd('dup:{}'.format(item_dup), item_id) - r_serv_metadata.sadd('dup:{}'.format(item_id), item_dup) - -def delete_item_duplicate(item_id): - item_dup = get_item_duplicate(item_id) - for item_dup in get_item_duplicate(item_id): - r_serv_metadata.srem('dup:{}'.format(item_dup), item_id) - r_serv_metadata.delete('dup:{}'.format(item_id)) - def get_raw_content(item_id): filepath = get_item_filepath(item_id) with open(filepath, 'rb') as f: @@ -751,8 +689,10 @@ def create_item(obj_id, obj_metadata, io_content): if res: # creata tags if 'tags' in obj_metadata: + item = Item(obj_id) # # TODO: handle mixed tags: taxonomies and Galaxies - Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="item") + # for tag in obj_metadata['tags']: + # item.add_tag(tag) return True # Item not created @@ -768,8 +708,8 @@ def delete_item(obj_id): # else: # delete_item_duplicate(obj_id) # # delete MISP event - # r_serv_metadata.delete('misp_events:{}'.format(obj_id)) - # r_serv_metadata.delete('hive_cases:{}'.format(obj_id)) + # r_s_metadata.delete('misp_events:{}'.format(obj_id)) + # r_s_metadata.delete('hive_cases:{}'.format(obj_id)) # # os.remove(get_item_filename(obj_id)) # @@ -789,7 +729,6 @@ def delete_item(obj_id): # delete_node(obj_id) # # # delete item metadata - # r_serv_metadata.delete('paste_metadata:{}'.format(obj_id)) # # return True # @@ -817,9 +756,9 @@ def delete_item(obj_id): # delete_item(child_id) -if __name__ == '__main__': - content = 'test file content' - duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]} - - item = Item('tests/2020/01/02/test_save.gz') - item.create(content, _save=False) +# if __name__ == '__main__': +# content = 'test file content' +# duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]} +# +# item = Item('tests/2020/01/02/test_save.gz') +# item.create(content, _save=False) diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index 48c2589f..03c09ea6 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -28,7 +28,7 @@ from lib.objects import Usernames config_loader = ConfigLoader() -r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") + config_loader = None class AILObjects(object): ## ?????????????????????? diff --git a/configs/6383.conf b/configs/6383.conf index 9b83fc89..595bd3b7 100644 --- a/configs/6383.conf +++ b/configs/6383.conf @@ -658,8 +658,7 @@ unixsocketperm 26 -namespace.cor ail_correls -#namespace.correl ail_correls +namespace.cor ail_correls namespace.crawl ail_crawlers namespace.db ail_datas namespace.dup ail_dups