chg: [migration] migrate Item + Domain metas

pull/594/head
Terrtia 2022-11-30 15:50:10 +01:00
parent af583939d8
commit f9715408be
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
11 changed files with 129 additions and 341 deletions

View File

@ -229,13 +229,6 @@ Redis and ARDB overview
## DB7 - Metadata: ## DB7 - Metadata:
#### Crawled Items: #### Crawled Items:
##### Hset:
| Key | Field | Value |
| ------ | ------ | ------ |
| paste_metadata:**item path** | super_father | **first url crawled** |
| | father | **item father** |
| | domain | **crawled domain**:**domain port** |
| | screenshot | **screenshot hash** |
##### Set: ##### Set:
| Key | Field | | Key | Field |

View File

@ -570,11 +570,12 @@ def domain_migration():
print(f'UP {root_id}') print(f'UP {root_id}')
crawled_items = get_crawled_items(dom, root_id) crawled_items = get_crawled_items(dom, root_id)
for item_id in crawled_items: for item_id in crawled_items:
item = Items.Item(item_id)
url = get_item_link(item_id) url = get_item_link(item_id)
item_father = get_item_father(item_id) parent_id = get_item_father(item_id)
if item_father and url: if parent_id and url:
print(f'{url} {item_id}') print(f'{url} {item_id}')
domain.add_crawled_item(url, item_id, item_father) item.set_crawled(url, parent_id)
#print() #print()

View File

@ -205,7 +205,7 @@ class Crawler(AbstractModule):
msg = f'infoleak:submission="crawler";{item_id}' msg = f'infoleak:submission="crawler";{item_id}'
self.send_message_to_queue(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
crawlers.create_item_metadata(item_id, self.domain.id, last_url, parent_id) crawlers.create_item_metadata(item_id, last_url, parent_id)
if self.root_item is None: if self.root_item is None:
self.root_item = item_id self.root_item = item_id
parent_id = item_id parent_id = item_id

View File

@ -1,85 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from lib.objects.Items import Item
from Helper import Process
import os
import re
import time
import redis
import configparser
from collections import defaultdict
# TODO FIX ME OR REMOVE ME
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
dict_keyword = {}
for paste_cve in list_paste_cve:
paste_content = Item(paste_cve).get_content()
cve_list = reg_cve.findall(paste_content)
if only_one_same_cve_by_paste:
cve_list = set(cve_list)
for cve in reg_cve.findall(paste_content):
try:
dict_keyword[cve] += 1
except KeyError:
dict_keyword[cve] = 1
print('------------------------------------------------')
if dict_keyword:
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
for item in res:
pass
print(item)
if __name__ == '__main__':
# CONFIG #
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
cfg.read(configfile)
serv_metadata = redis.StrictRedis(
host=cfg.get("ARDB_Metadata", "host"),
port=cfg.getint("ARDB_Metadata", "port"),
db=cfg.getint("ARDB_Metadata", "db"),
decode_responses=True)
serv_tags = redis.StrictRedis(
host=cfg.get("ARDB_Tags", "host"),
port=cfg.get("ARDB_Tags", "port"),
db=cfg.get("ARDB_Tags", "db"),
decode_responses=True)
reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}')
#all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"')
#all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
#print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve)))
print('ALL_CVE')
get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True)
print()
print()
print()
print('REGULAR_CVE')
get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
print()
print()
print()
print('CRAWLER_CVE')
get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)

View File

@ -1,28 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
from lib import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
class User(object):
"""AIL User."""
def __init__(self, id):
self.id = id
if self.id == '__anonymous__':
self.role = 'anonymous'
else:
self.role = None
def get_role(self):
pass

View File

@ -37,6 +37,7 @@ sys.path.append(os.environ['AIL_BIN'])
from packages import git_status from packages import git_status
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects.Items import Item
from core import screen from core import screen
config_loader = ConfigLoader() config_loader = ConfigLoader()
@ -44,7 +45,6 @@ r_db = config_loader.get_db_conn("Kvrocks_DB")
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler") r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
r_cache = config_loader.get_redis_conn("Redis_Cache") r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes") ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes")
@ -561,12 +561,9 @@ def update_last_crawled_domain(domain_type, domain, epoch):
r_crawler.lpush(f'last_{domain_type}', f'{domain}:{epoch}') r_crawler.lpush(f'last_{domain_type}', f'{domain}:{epoch}')
r_crawler.ltrim(f'last_{domain_type}', 0, 15) r_crawler.ltrim(f'last_{domain_type}', 0, 15)
def create_item_metadata(item_id, domain, url, item_father): def create_item_metadata(item_id, url, item_father):
r_serv_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father) item = Item(item_id)
r_serv_metadata.hset(f'paste_metadata:{item_id}', 'domain', domain) item.set_crawled(url, item_father)
r_serv_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
# add this item_id to his father
r_serv_metadata.sadd(f'paste_children:{item_father}', item_id)
def get_gzipped_b64_item(item_id, content): def get_gzipped_b64_item(item_id, content):
try: try:
@ -1121,15 +1118,6 @@ def save_har(har_dir, item_id, har_content):
with open(filename, 'w') as f: with open(filename, 'w') as f:
f.write(json.dumps(har_content)) f.write(json.dumps(har_content))
# # TODO: FIXME
def api_add_crawled_item(dict_crawled):
domain = None
# create item_id item_id =
save_crawled_item(item_id, response.data['html'])
create_item_metadata(item_id, domain, 'last_url', 'father')
#### CRAWLER QUEUES #### #### CRAWLER QUEUES ####
## queues priority: ## queues priority:

View File

@ -18,6 +18,7 @@ from lib import Tag
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache") r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None config_loader = None
def exist_item(item_id): def exist_item(item_id):
@ -85,26 +86,26 @@ def get_item_mimetype(item_id):
return magic.from_buffer(get_item_content(item_id), mime=True) return magic.from_buffer(get_item_content(item_id), mime=True)
# # # # TREE CHILD/FATHER # # # # # # # # TREE CHILD/FATHER # # # #
def is_father(item_id): def is_parent(item_id):
return r_serv_metadata.exists('paste_children:{}'.format(item_id)) return r_object.exists(f'obj:child:item::{item_id}')
def is_children(item_id): def is_children(item_id):
return r_serv_metadata.hexists('paste_metadata:{}'.format(item_id), 'father') return r_object.hexists(f'meta:item::{item_id}' 'parent')
def is_root_node(item_id): def is_root_node(item_id):
if is_father(item_id) and not is_children(item_id): if is_parent(item_id) and not is_children(item_id):
return True return True
else: else:
return False return False
def is_node(item_id): def is_node(item_id):
if is_father(item_id) or is_children(item_id): if is_parent(item_id) or is_children(item_id):
return True return True
else: else:
return False return False
def is_leaf(item_id): def is_leaf(item_id):
if not is_father(item_id) and is_children(item_id): if not is_parent(item_id) and is_children(item_id):
return True return True
else: else:
return False return False
@ -125,7 +126,7 @@ def is_domain_root(item_id):
return True return True
def get_item_url(item_id): def get_item_url(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link') return r_object.hget(f'meta:item::{item_id}', 'url')
def get_item_har(item_id): def get_item_har(item_id):
har = '/'.join(item_id.rsplit('/')[-4:]) har = '/'.join(item_id.rsplit('/')[-4:])
@ -134,34 +135,29 @@ def get_item_har(item_id):
if os.path.isfile(path): if os.path.isfile(path):
return har return har
def get_item_har_content(har): # def get_item_har_content(har):
with open(har, 'rb') as f: # with open(har, 'rb') as f:
har_content = f.read() # har_content = f.read()
return har_content # return har_content
def get_nb_children(item_id):
return r_serv_metadata.scard('paste_children:{}'.format(item_id))
def get_item_parent(item_id): def get_item_parent(item_id):
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'father') return r_object.hget(f'meta:item::{item_id}', 'parent')
def get_item_children(item_id): def get_item_children(item_id):
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id))) return list(r_object.smembers(f'obj:child:item::{item_id}'))
# # TODO: handle domain last origin in domain lib # # TODO: handle domain last origin in domain lib
def _delete_node(item_id): # def _delete_node(item_id):
# only if item isn't deleted # # only if item isn't deleted
# if is_crawled(item_id): # # if is_crawled(item_id):
# r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link') # # delete item meta url
for children_id in get_item_children(item_id): # # delete item parent + children
r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father') #
r_serv_metadata.delete('paste_children:{}'.format(item_id)) # # delete regular
# # simple if leaf
# delete regular #
# simple if leaf # # delete item node
# delete item node
def get_all_domain_node_by_item_id(item_id, l_nodes=[]): def get_all_domain_node_by_item_id(item_id, l_nodes=[]):
domain = get_item_domain(item_id) domain = get_item_domain(item_id)
@ -174,15 +170,11 @@ def get_all_domain_node_by_item_id(item_id, l_nodes=[]):
##-- --## ##-- --##
def add_item_parent_by_parent_id(parent_type, parent_id, item_id): # def add_item_parent_by_parent_id(parent_type, parent_id, item_id):
parent_item_id = get_obj_id_item_id(parent_type, parent_id) # parent_item_id = get_obj_id_item_id(parent_type, parent_id)
if parent_item_id: # if parent_item_id:
add_item_parent(parent_item_id, item_id) # add_item_parent(parent_item_id, item_id)
#
def add_item_parent(parent_item_id, item_id):
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', parent_item_id)
r_serv_metadata.sadd('paste_children:{}'.format(parent_item_id), item_id)
return True
# TODO: # TODO:
# FIXME: # FIXME:

View File

@ -20,7 +20,7 @@ from lib import ConfigLoader
from lib.objects.abstract_object import AbstractObject from lib.objects.abstract_object import AbstractObject
from lib.ail_core import paginate_iterator from lib.ail_core import paginate_iterator
from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_domain, get_item_har
from lib import data_retention_engine from lib import data_retention_engine
from packages import Date from packages import Date
@ -28,8 +28,6 @@ from packages import Date
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler") r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
r_metadata = config_loader.get_redis_conn("ARDB_Metadata") ######################################
baseurl = config_loader.get_config_str("Notifications", "ail_domain") baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None config_loader = None
@ -103,8 +101,8 @@ class Domain(AbstractObject):
if obj and origin['item']: if obj and origin['item']:
if origin['item'] != 'manual' and origin['item'] != 'auto': if origin['item'] != 'manual' and origin['item'] != 'auto':
item_id = origin['item'] item_id = origin['item']
origin['domain'] = r_metadata.hget(f'paste_metadata:{item_id}', 'domain') origin['domain'] = get_item_domain()
origin['url'] = r_metadata.hget(f'paste_metadata:{item_id}', 'url') origin['url'] = get_item_url()
return origin return origin
def set_last_origin(self, origin_id): def set_last_origin(self, origin_id):
@ -443,15 +441,6 @@ class Domain(AbstractObject):
else: else:
r_crawler.sadd(f'full_{self.domain_type}_down', self.id) r_crawler.sadd(f'full_{self.domain_type}_down', self.id)
# TODO RENAME PASTE_METADATA
def add_crawled_item(self, url, item_id, item_father):
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', self.id) # FIXME REMOVE ME -> extract for real link ?????????
r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
# add this item_id to his father
r_metadata.sadd(f'paste_children:{item_father}', item_id)
############################################################################ ############################################################################
# In memory zipfile # In memory zipfile
def _write_in_zip_buffer(zf, path, filename): def _write_in_zip_buffer(zf, path, filename):

View File

@ -18,22 +18,21 @@ sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from export.Export import get_ail_uuid # # TODO: REPLACE from lib.ail_core import get_ail_uuid
from lib.objects.abstract_object import AbstractObject from lib.objects.abstract_object import AbstractObject
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib import item_basic from lib import item_basic
from lib import Tag
from flask import url_for from flask import url_for
config_loader = ConfigLoader() config_loader = ConfigLoader()
# # TODO: get and sanityze ITEMS DIRECTORY # # TODO: get and sanitize ITEMS DIRECTORY
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '') ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
r_cache = config_loader.get_redis_conn("Redis_Cache") r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") r_object = config_loader.get_db_conn("Kvrocks_Objects")
screenshot_directory = config_loader.get_files_directory('screenshot') screenshot_directory = config_loader.get_files_directory('screenshot')
har_directory = config_loader.get_files_directory('har') har_directory = config_loader.get_files_directory('har')
baseurl = config_loader.get_config_str("Notifications", "ail_domain") baseurl = config_loader.get_config_str("Notifications", "ail_domain")
@ -65,7 +64,7 @@ class Item(AbstractObject):
""" """
Returns Item source/feeder name Returns Item source/feeder name
""" """
#return self.id.split('/')[-5] # return self.id.split('/')[-5]
l_source = self.id.split('/')[:-4] l_source = self.id.split('/')[:-4]
return os.path.join(*l_source) return os.path.join(*l_source)
@ -113,9 +112,9 @@ class Item(AbstractObject):
h.ignore_images = ignore_links h.ignore_images = ignore_links
return h.handle(content) return h.handle(content)
def get_size(self, str=False): def get_size(self, r_str=False):
size = os.path.getsize(self.get_filename())/1024.0 size = os.path.getsize(self.get_filename())/1024.0
if str: if r_str:
size = round(size, 2) size = round(size, 2)
return size return size
@ -126,16 +125,13 @@ class Item(AbstractObject):
def get_parent(self): def get_parent(self):
return item_basic.get_item_parent(self.id) return item_basic.get_item_parent(self.id)
def set_father(self, father_id): # UPDATE KEYS ????????????????????????????? def set_parent(self, parent_id):
r_serv_metadata.sadd(f'paste_children:{father_id}', self.id) r_object.sadd(f'obj:child:item::{parent_id}', self.id) # TODO
r_serv_metadata.hset(f'paste_metadata:{self.id}', 'father', father_id) r_object.hset(f'meta:item::{self.id}', 'parent', parent_id)
#f'obj:children:{obj_type}:{subtype}:{id}, {obj_type}:{subtype}:{id}
#f'obj:metadata:{obj_type}:{subtype}:{id}', 'father', fathe
# => ON Object LEVEL ?????????
def add_children(self, child_id):
r_object.sadd(f'obj:child:item::{self.id}', child_id) # TODO
r_object.hset(f'meta:item::{child_id}', 'parent', self.id)
def sanitize_id(self): def sanitize_id(self):
pass pass
@ -249,7 +245,11 @@ class Item(AbstractObject):
return None return None
def get_url(self): def get_url(self):
return r_serv_metadata.hget(f'paste_metadata:{self.id}', 'real_link') return r_object.hset(f'meta:item::{self.id}', 'url')
def set_crawled(self, url, parent_id):
r_object.hset(f'meta:item::{self.id}', 'url', url)
self.set_parent(parent_id)
# options: set of optional meta fields # options: set of optional meta fields
def get_meta(self, options=set()): def get_meta(self, options=set()):
@ -273,7 +273,7 @@ class Item(AbstractObject):
if 'parent' in options: if 'parent' in options:
meta['parent'] = self.get_parent() meta['parent'] = self.get_parent()
if 'size' in options: if 'size' in options:
meta['size'] = self.get_size(str=True) meta['size'] = self.get_size(r_str=True)
if 'mimetype' in options: if 'mimetype' in options:
content = meta.get('content') content = meta.get('content')
meta['mimetype'] = self.get_mimetype(content=content) meta['mimetype'] = self.get_mimetype(content=content)
@ -290,14 +290,13 @@ class Item(AbstractObject):
crawler['url'] = self.get_url() crawler['url'] = self.get_url()
if not tags: if not tags:
tags = self.get_tags() tags = self.get_tags()
crawler['is_tags_safe'] = Tag.is_tags_safe(tags) crawler['is_tags_safe'] = self.is_tags_safe(tags)
return crawler return crawler
def get_meta_lines(self, content=None): def get_meta_lines(self, content=None):
if not content: if not content:
content = self.get_content() content = self.get_content()
max_length = 0 max_length = 0
line_id = 0
nb_line = 0 nb_line = 0
for line in content.splitlines(): for line in content.splitlines():
length = len(line) length = len(line)
@ -503,60 +502,60 @@ def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, mi
return all_languages return all_languages
# API # API
def get_item(request_dict): # def get_item(request_dict):
if not request_dict: # if not request_dict:
return {'status': 'error', 'reason': 'Malformed JSON'}, 400 # return {'status': 'error', 'reason': 'Malformed JSON'}, 400
#
item_id = request_dict.get('id', None) # item_id = request_dict.get('id', None)
if not item_id: # if not item_id:
return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 # return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400
if not exist_item(item_id): # if not exist_item(item_id):
return {'status': 'error', 'reason': 'Item not found'}, 404 # return {'status': 'error', 'reason': 'Item not found'}, 404
#
dict_item = {} # dict_item = {}
dict_item['id'] = item_id # dict_item['id'] = item_id
date = request_dict.get('date', True) # date = request_dict.get('date', True)
if date: # if date:
add_separator = False # add_separator = False
if request_dict.get('date_separator', False): # if request_dict.get('date_separator', False):
add_separator = True # add_separator = True
dict_item['date'] = get_item_date(item_id, add_separator=add_separator) # dict_item['date'] = get_item_date(item_id, add_separator=add_separator)
tags = request_dict.get('tags', True) # tags = request_dict.get('tags', True)
if tags: # if tags:
dict_item['tags'] = Tag.get_object_tags('item', item_id) # dict_item['tags'] = Tag.get_object_tags('item', item_id)
#
size = request_dict.get('size', False) # size = request_dict.get('size', False)
if size: # if size:
dict_item['size'] = get_item_size(item_id) # dict_item['size'] = get_item_size(item_id)
#
content = request_dict.get('content', False) # content = request_dict.get('content', False)
if content: # if content:
# UTF-8 outpout, # TODO: use base64 # # UTF-8 outpout, # TODO: use base64
dict_item['content'] = get_item_content(item_id) # dict_item['content'] = get_item_content(item_id)
#
raw_content = request_dict.get('raw_content', False) # raw_content = request_dict.get('raw_content', False)
if raw_content: # if raw_content:
dict_item['raw_content'] = get_raw_content(item_id) # dict_item['raw_content'] = get_raw_content(item_id)
#
lines_info = request_dict.get('lines', False) # lines_info = request_dict.get('lines', False)
if lines_info: # if lines_info:
dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None')) # dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None'))
#
if request_dict.get('pgp'): # if request_dict.get('pgp'):
dict_item['pgp'] = {} # dict_item['pgp'] = {}
if request_dict['pgp'].get('key'): # if request_dict['pgp'].get('key'):
dict_item['pgp']['key'] = get_item_pgp_key(item_id) # dict_item['pgp']['key'] = get_item_pgp_key(item_id)
if request_dict['pgp'].get('mail'): # if request_dict['pgp'].get('mail'):
dict_item['pgp']['mail'] = get_item_pgp_mail(item_id) # dict_item['pgp']['mail'] = get_item_pgp_mail(item_id)
if request_dict['pgp'].get('name'): # if request_dict['pgp'].get('name'):
dict_item['pgp']['name'] = get_item_pgp_name(item_id) # dict_item['pgp']['name'] = get_item_pgp_name(item_id)
#
if request_dict.get('cryptocurrency'): # if request_dict.get('cryptocurrency'):
dict_item['cryptocurrency'] = {} # dict_item['cryptocurrency'] = {}
if request_dict['cryptocurrency'].get('bitcoin'): # if request_dict['cryptocurrency'].get('bitcoin'):
dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id) # dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id)
#
return dict_item, 200 # return dict_item, 200
@ -598,24 +597,13 @@ def api_get_items_sources():
def get_item_list_desc(list_item_id): def get_item_list_desc(list_item_id):
desc_list = [] desc_list = []
for item_id in list_item_id: for item_id in list_item_id:
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} ) item = Item(item_id)
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': item.get_tags(r_list=True)})
return desc_list return desc_list
def is_crawled(item_id): def is_crawled(item_id):
return item_basic.is_crawled(item_id) return item_basic.is_crawled(item_id)
def get_crawler_matadata(item_id, tags=None):
dict_crawler = {}
if is_crawled(item_id):
dict_crawler['domain'] = get_item_domain(item_id)
if not ltags:
ltags = Tag.get_object_tags('item', item_id)
dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags)
dict_crawler['url'] = get_item_link(item_id)
dict_crawler['screenshot'] = get_item_screenshot(item_id)
dict_crawler['har'] = get_item_har_name(item_id)
return dict_crawler
def is_onion(item_id): def is_onion(item_id):
is_onion = False is_onion = False
if len(is_onion) > 62: if len(is_onion) > 62:
@ -639,18 +627,6 @@ def get_domain(item_id):
item_id = item_id[-1] item_id = item_id[-1]
return item_id[:-36] return item_id[:-36]
def get_item_domain_with_port(item_id):
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'domain')
def get_item_link(item_id):
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'real_link')
def get_item_screenshot(item_id):
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
if screenshot:
return os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:])
return ''
def get_item_har_name(item_id): def get_item_har_name(item_id):
har_path = os.path.join(har_directory, item_id) + '.json' har_path = os.path.join(har_directory, item_id) + '.json'
if os.path.isfile(har_path): if os.path.isfile(har_path):
@ -672,44 +648,6 @@ def get_item_filename(item_id):
else: else:
return filename return filename
def get_item_duplicate(item_id, r_list=True):
res = r_serv_metadata.smembers('dup:{}'.format(item_id))
if r_list:
if res:
return list(res)
else:
return []
return res
def get_item_nb_duplicates(item_id):
return r_serv_metadata.scard('dup:{}'.format(item_id))
def get_item_duplicates_dict(item_id):
dict_duplicates = {}
for duplicate in get_item_duplicate(item_id):
duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',')
duplicate_id = duplicate[1]
if not duplicate_id in dict_duplicates:
dict_duplicates[duplicate_id] = {'date': get_item_date(duplicate_id, add_separator=True), 'algo': {}}
algo = duplicate[0]
if algo == 'tlsh':
similarity = 100 - int(duplicate[2])
else:
similarity = int(duplicate[2])
dict_duplicates[duplicate_id]['algo'][algo] = similarity
return dict_duplicates
def add_item_duplicate(item_id, l_dup):
for item_dup in l_dup:
r_serv_metadata.sadd('dup:{}'.format(item_dup), item_id)
r_serv_metadata.sadd('dup:{}'.format(item_id), item_dup)
def delete_item_duplicate(item_id):
item_dup = get_item_duplicate(item_id)
for item_dup in get_item_duplicate(item_id):
r_serv_metadata.srem('dup:{}'.format(item_dup), item_id)
r_serv_metadata.delete('dup:{}'.format(item_id))
def get_raw_content(item_id): def get_raw_content(item_id):
filepath = get_item_filepath(item_id) filepath = get_item_filepath(item_id)
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
@ -751,8 +689,10 @@ def create_item(obj_id, obj_metadata, io_content):
if res: if res:
# creata tags # creata tags
if 'tags' in obj_metadata: if 'tags' in obj_metadata:
item = Item(obj_id)
# # TODO: handle mixed tags: taxonomies and Galaxies # # TODO: handle mixed tags: taxonomies and Galaxies
Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="item") # for tag in obj_metadata['tags']:
# item.add_tag(tag)
return True return True
# Item not created # Item not created
@ -768,8 +708,8 @@ def delete_item(obj_id):
# else: # else:
# delete_item_duplicate(obj_id) # delete_item_duplicate(obj_id)
# # delete MISP event # # delete MISP event
# r_serv_metadata.delete('misp_events:{}'.format(obj_id)) # r_s_metadata.delete('misp_events:{}'.format(obj_id))
# r_serv_metadata.delete('hive_cases:{}'.format(obj_id)) # r_s_metadata.delete('hive_cases:{}'.format(obj_id))
# #
# os.remove(get_item_filename(obj_id)) # os.remove(get_item_filename(obj_id))
# #
@ -789,7 +729,6 @@ def delete_item(obj_id):
# delete_node(obj_id) # delete_node(obj_id)
# #
# # delete item metadata # # delete item metadata
# r_serv_metadata.delete('paste_metadata:{}'.format(obj_id))
# #
# return True # return True
# #
@ -817,9 +756,9 @@ def delete_item(obj_id):
# delete_item(child_id) # delete_item(child_id)
if __name__ == '__main__': # if __name__ == '__main__':
content = 'test file content' # content = 'test file content'
duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]} # duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]}
#
item = Item('tests/2020/01/02/test_save.gz') # item = Item('tests/2020/01/02/test_save.gz')
item.create(content, _save=False) # item.create(content, _save=False)

View File

@ -28,7 +28,7 @@ from lib.objects import Usernames
config_loader = ConfigLoader() config_loader = ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None config_loader = None
class AILObjects(object): ## ?????????????????????? class AILObjects(object): ## ??????????????????????

View File

@ -659,7 +659,6 @@ unixsocketperm 26
namespace.cor ail_correls namespace.cor ail_correls
#namespace.correl ail_correls
namespace.crawl ail_crawlers namespace.crawl ail_crawlers
namespace.db ail_datas namespace.db ail_datas
namespace.dup ail_dups namespace.dup ail_dups