mirror of https://github.com/CIRCL/AIL-framework
chg: [migration] migrate Item + Domain metas
parent
af583939d8
commit
f9715408be
|
@ -229,13 +229,6 @@ Redis and ARDB overview
|
|||
## DB7 - Metadata:
|
||||
|
||||
#### Crawled Items:
|
||||
##### Hset:
|
||||
| Key | Field | Value |
|
||||
| ------ | ------ | ------ |
|
||||
| paste_metadata:**item path** | super_father | **first url crawled** |
|
||||
| | father | **item father** |
|
||||
| | domain | **crawled domain**:**domain port** |
|
||||
| | screenshot | **screenshot hash** |
|
||||
|
||||
##### Set:
|
||||
| Key | Field |
|
||||
|
|
|
@ -570,11 +570,12 @@ def domain_migration():
|
|||
print(f'UP {root_id}')
|
||||
crawled_items = get_crawled_items(dom, root_id)
|
||||
for item_id in crawled_items:
|
||||
item = Items.Item(item_id)
|
||||
url = get_item_link(item_id)
|
||||
item_father = get_item_father(item_id)
|
||||
if item_father and url:
|
||||
parent_id = get_item_father(item_id)
|
||||
if parent_id and url:
|
||||
print(f'{url} {item_id}')
|
||||
domain.add_crawled_item(url, item_id, item_father)
|
||||
item.set_crawled(url, parent_id)
|
||||
|
||||
|
||||
#print()
|
||||
|
|
|
@ -205,7 +205,7 @@ class Crawler(AbstractModule):
|
|||
msg = f'infoleak:submission="crawler";{item_id}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
crawlers.create_item_metadata(item_id, self.domain.id, last_url, parent_id)
|
||||
crawlers.create_item_metadata(item_id, last_url, parent_id)
|
||||
if self.root_item is None:
|
||||
self.root_item = item_id
|
||||
parent_id = item_id
|
||||
|
|
|
@ -1,85 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from lib.objects.Items import Item
|
||||
from Helper import Process
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import redis
|
||||
import configparser
|
||||
|
||||
from collections import defaultdict
|
||||
|
||||
# TODO FIX ME OR REMOVE ME
|
||||
|
||||
def get_dict_cve(list_paste_cve, only_one_same_cve_by_paste=False):
|
||||
dict_keyword = {}
|
||||
|
||||
for paste_cve in list_paste_cve:
|
||||
paste_content = Item(paste_cve).get_content()
|
||||
|
||||
cve_list = reg_cve.findall(paste_content)
|
||||
if only_one_same_cve_by_paste:
|
||||
cve_list = set(cve_list)
|
||||
|
||||
for cve in reg_cve.findall(paste_content):
|
||||
try:
|
||||
dict_keyword[cve] += 1
|
||||
except KeyError:
|
||||
dict_keyword[cve] = 1
|
||||
|
||||
print('------------------------------------------------')
|
||||
if dict_keyword:
|
||||
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
|
||||
for item in res:
|
||||
pass
|
||||
print(item)
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# CONFIG #
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
if not os.path.exists(configfile):
|
||||
raise Exception('Unable to find the configuration file. \
|
||||
Did you set environment variables? \
|
||||
Or activate the virtualenv.')
|
||||
|
||||
cfg = configparser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
|
||||
serv_metadata = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Metadata", "host"),
|
||||
port=cfg.getint("ARDB_Metadata", "port"),
|
||||
db=cfg.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
serv_tags = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Tags", "host"),
|
||||
port=cfg.get("ARDB_Tags", "port"),
|
||||
db=cfg.get("ARDB_Tags", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,7}')
|
||||
|
||||
#all_past_cve = serv_tags.smembers('infoleak:automatic-detection="cve"')
|
||||
#all_past_cve_regular = serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
|
||||
#all_past_cve_crawler = serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"')
|
||||
|
||||
#print('{} + {} = {}'.format(len(all_past_cve_regular), len(all_past_cve_crawler), len(all_past_cve)))
|
||||
|
||||
print('ALL_CVE')
|
||||
get_dict_cve(serv_tags.smembers('infoleak:automatic-detection="cve"'), True)
|
||||
print()
|
||||
print()
|
||||
print()
|
||||
print('REGULAR_CVE')
|
||||
get_dict_cve(serv_tags.sdiff('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
|
||||
print()
|
||||
print()
|
||||
print()
|
||||
print('CRAWLER_CVE')
|
||||
get_dict_cve(serv_tags.sinter('infoleak:automatic-detection="cve"', 'infoleak:submission="crawler"'), True)
|
|
@ -1,28 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from lib import ConfigLoader
|
||||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
config_loader = None
|
||||
|
||||
class User(object):
|
||||
"""AIL User."""
|
||||
|
||||
def __init__(self, id):
|
||||
self.id = id
|
||||
if self.id == '__anonymous__':
|
||||
self.role = 'anonymous'
|
||||
else:
|
||||
self.role = None
|
||||
|
||||
def get_role(self):
|
||||
pass
|
||||
|
||||
|
|
@ -37,6 +37,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
from packages import git_status
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects.Items import Item
|
||||
from core import screen
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
|
@ -44,7 +45,6 @@ r_db = config_loader.get_db_conn("Kvrocks_DB")
|
|||
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
|
||||
ITEMS_FOLDER = config_loader.get_config_str("Directories", "pastes")
|
||||
|
@ -561,12 +561,9 @@ def update_last_crawled_domain(domain_type, domain, epoch):
|
|||
r_crawler.lpush(f'last_{domain_type}', f'{domain}:{epoch}')
|
||||
r_crawler.ltrim(f'last_{domain_type}', 0, 15)
|
||||
|
||||
def create_item_metadata(item_id, domain, url, item_father):
|
||||
r_serv_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
|
||||
r_serv_metadata.hset(f'paste_metadata:{item_id}', 'domain', domain)
|
||||
r_serv_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
|
||||
# add this item_id to his father
|
||||
r_serv_metadata.sadd(f'paste_children:{item_father}', item_id)
|
||||
def create_item_metadata(item_id, url, item_father):
|
||||
item = Item(item_id)
|
||||
item.set_crawled(url, item_father)
|
||||
|
||||
def get_gzipped_b64_item(item_id, content):
|
||||
try:
|
||||
|
@ -1121,15 +1118,6 @@ def save_har(har_dir, item_id, har_content):
|
|||
with open(filename, 'w') as f:
|
||||
f.write(json.dumps(har_content))
|
||||
|
||||
# # TODO: FIXME
|
||||
def api_add_crawled_item(dict_crawled):
|
||||
|
||||
domain = None
|
||||
# create item_id item_id =
|
||||
|
||||
save_crawled_item(item_id, response.data['html'])
|
||||
create_item_metadata(item_id, domain, 'last_url', 'father')
|
||||
|
||||
#### CRAWLER QUEUES ####
|
||||
|
||||
## queues priority:
|
||||
|
|
|
@ -18,6 +18,7 @@ from lib import Tag
|
|||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
config_loader = None
|
||||
|
||||
def exist_item(item_id):
|
||||
|
@ -85,26 +86,26 @@ def get_item_mimetype(item_id):
|
|||
return magic.from_buffer(get_item_content(item_id), mime=True)
|
||||
|
||||
# # # # TREE CHILD/FATHER # # # #
|
||||
def is_father(item_id):
|
||||
return r_serv_metadata.exists('paste_children:{}'.format(item_id))
|
||||
def is_parent(item_id):
|
||||
return r_object.exists(f'obj:child:item::{item_id}')
|
||||
|
||||
def is_children(item_id):
|
||||
return r_serv_metadata.hexists('paste_metadata:{}'.format(item_id), 'father')
|
||||
return r_object.hexists(f'meta:item::{item_id}' 'parent')
|
||||
|
||||
def is_root_node(item_id):
|
||||
if is_father(item_id) and not is_children(item_id):
|
||||
if is_parent(item_id) and not is_children(item_id):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_node(item_id):
|
||||
if is_father(item_id) or is_children(item_id):
|
||||
if is_parent(item_id) or is_children(item_id):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def is_leaf(item_id):
|
||||
if not is_father(item_id) and is_children(item_id):
|
||||
if not is_parent(item_id) and is_children(item_id):
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
@ -125,7 +126,7 @@ def is_domain_root(item_id):
|
|||
return True
|
||||
|
||||
def get_item_url(item_id):
|
||||
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
|
||||
return r_object.hget(f'meta:item::{item_id}', 'url')
|
||||
|
||||
def get_item_har(item_id):
|
||||
har = '/'.join(item_id.rsplit('/')[-4:])
|
||||
|
@ -134,34 +135,29 @@ def get_item_har(item_id):
|
|||
if os.path.isfile(path):
|
||||
return har
|
||||
|
||||
def get_item_har_content(har):
|
||||
with open(har, 'rb') as f:
|
||||
har_content = f.read()
|
||||
return har_content
|
||||
|
||||
def get_nb_children(item_id):
|
||||
return r_serv_metadata.scard('paste_children:{}'.format(item_id))
|
||||
# def get_item_har_content(har):
|
||||
# with open(har, 'rb') as f:
|
||||
# har_content = f.read()
|
||||
# return har_content
|
||||
|
||||
|
||||
def get_item_parent(item_id):
|
||||
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'father')
|
||||
return r_object.hget(f'meta:item::{item_id}', 'parent')
|
||||
|
||||
def get_item_children(item_id):
|
||||
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))
|
||||
return list(r_object.smembers(f'obj:child:item::{item_id}'))
|
||||
|
||||
# # TODO: handle domain last origin in domain lib
|
||||
def _delete_node(item_id):
|
||||
# only if item isn't deleted
|
||||
# if is_crawled(item_id):
|
||||
# r_serv_metadata.hrem('paste_metadata:{}'.format(item_id), 'real_link')
|
||||
for children_id in get_item_children(item_id):
|
||||
r_serv_metadata.hdel('paste_metadata:{}'.format(children_id), 'father')
|
||||
r_serv_metadata.delete('paste_children:{}'.format(item_id))
|
||||
|
||||
# delete regular
|
||||
# simple if leaf
|
||||
|
||||
# delete item node
|
||||
# def _delete_node(item_id):
|
||||
# # only if item isn't deleted
|
||||
# # if is_crawled(item_id):
|
||||
# # delete item meta url
|
||||
# # delete item parent + children
|
||||
#
|
||||
# # delete regular
|
||||
# # simple if leaf
|
||||
#
|
||||
# # delete item node
|
||||
|
||||
def get_all_domain_node_by_item_id(item_id, l_nodes=[]):
|
||||
domain = get_item_domain(item_id)
|
||||
|
@ -174,15 +170,11 @@ def get_all_domain_node_by_item_id(item_id, l_nodes=[]):
|
|||
##-- --##
|
||||
|
||||
|
||||
def add_item_parent_by_parent_id(parent_type, parent_id, item_id):
|
||||
parent_item_id = get_obj_id_item_id(parent_type, parent_id)
|
||||
if parent_item_id:
|
||||
add_item_parent(parent_item_id, item_id)
|
||||
|
||||
def add_item_parent(parent_item_id, item_id):
|
||||
r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'father', parent_item_id)
|
||||
r_serv_metadata.sadd('paste_children:{}'.format(parent_item_id), item_id)
|
||||
return True
|
||||
# def add_item_parent_by_parent_id(parent_type, parent_id, item_id):
|
||||
# parent_item_id = get_obj_id_item_id(parent_type, parent_id)
|
||||
# if parent_item_id:
|
||||
# add_item_parent(parent_item_id, item_id)
|
||||
#
|
||||
|
||||
# TODO:
|
||||
# FIXME:
|
||||
|
|
|
@ -20,7 +20,7 @@ from lib import ConfigLoader
|
|||
from lib.objects.abstract_object import AbstractObject
|
||||
|
||||
from lib.ail_core import paginate_iterator
|
||||
from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_har
|
||||
from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_domain, get_item_har
|
||||
from lib import data_retention_engine
|
||||
|
||||
from packages import Date
|
||||
|
@ -28,8 +28,6 @@ from packages import Date
|
|||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_crawler = config_loader.get_db_conn("Kvrocks_Crawler")
|
||||
|
||||
r_metadata = config_loader.get_redis_conn("ARDB_Metadata") ######################################
|
||||
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
config_loader = None
|
||||
|
||||
|
@ -103,8 +101,8 @@ class Domain(AbstractObject):
|
|||
if obj and origin['item']:
|
||||
if origin['item'] != 'manual' and origin['item'] != 'auto':
|
||||
item_id = origin['item']
|
||||
origin['domain'] = r_metadata.hget(f'paste_metadata:{item_id}', 'domain')
|
||||
origin['url'] = r_metadata.hget(f'paste_metadata:{item_id}', 'url')
|
||||
origin['domain'] = get_item_domain()
|
||||
origin['url'] = get_item_url()
|
||||
return origin
|
||||
|
||||
def set_last_origin(self, origin_id):
|
||||
|
@ -443,15 +441,6 @@ class Domain(AbstractObject):
|
|||
else:
|
||||
r_crawler.sadd(f'full_{self.domain_type}_down', self.id)
|
||||
|
||||
# TODO RENAME PASTE_METADATA
|
||||
def add_crawled_item(self, url, item_id, item_father):
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'father', item_father)
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'domain', self.id) # FIXME REMOVE ME -> extract for real link ?????????
|
||||
r_metadata.hset(f'paste_metadata:{item_id}', 'real_link', url)
|
||||
# add this item_id to his father
|
||||
r_metadata.sadd(f'paste_children:{item_father}', item_id)
|
||||
|
||||
|
||||
############################################################################
|
||||
# In memory zipfile
|
||||
def _write_in_zip_buffer(zf, path, filename):
|
||||
|
|
|
@ -18,22 +18,21 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from export.Export import get_ail_uuid # # TODO: REPLACE
|
||||
from lib.ail_core import get_ail_uuid
|
||||
from lib.objects.abstract_object import AbstractObject
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import item_basic
|
||||
from lib import Tag
|
||||
|
||||
|
||||
from flask import url_for
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
# # TODO: get and sanityze ITEMS DIRECTORY
|
||||
# # TODO: get and sanitize ITEMS DIRECTORY
|
||||
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
||||
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
|
||||
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
screenshot_directory = config_loader.get_files_directory('screenshot')
|
||||
har_directory = config_loader.get_files_directory('har')
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
|
@ -113,9 +112,9 @@ class Item(AbstractObject):
|
|||
h.ignore_images = ignore_links
|
||||
return h.handle(content)
|
||||
|
||||
def get_size(self, str=False):
|
||||
def get_size(self, r_str=False):
|
||||
size = os.path.getsize(self.get_filename())/1024.0
|
||||
if str:
|
||||
if r_str:
|
||||
size = round(size, 2)
|
||||
return size
|
||||
|
||||
|
@ -126,16 +125,13 @@ class Item(AbstractObject):
|
|||
def get_parent(self):
|
||||
return item_basic.get_item_parent(self.id)
|
||||
|
||||
def set_father(self, father_id): # UPDATE KEYS ?????????????????????????????
|
||||
r_serv_metadata.sadd(f'paste_children:{father_id}', self.id)
|
||||
r_serv_metadata.hset(f'paste_metadata:{self.id}', 'father', father_id)
|
||||
|
||||
#f'obj:children:{obj_type}:{subtype}:{id}, {obj_type}:{subtype}:{id}
|
||||
#f'obj:metadata:{obj_type}:{subtype}:{id}', 'father', fathe
|
||||
# => ON Object LEVEL ?????????
|
||||
|
||||
|
||||
def set_parent(self, parent_id):
|
||||
r_object.sadd(f'obj:child:item::{parent_id}', self.id) # TODO
|
||||
r_object.hset(f'meta:item::{self.id}', 'parent', parent_id)
|
||||
|
||||
def add_children(self, child_id):
|
||||
r_object.sadd(f'obj:child:item::{self.id}', child_id) # TODO
|
||||
r_object.hset(f'meta:item::{child_id}', 'parent', self.id)
|
||||
|
||||
def sanitize_id(self):
|
||||
pass
|
||||
|
@ -249,7 +245,11 @@ class Item(AbstractObject):
|
|||
return None
|
||||
|
||||
def get_url(self):
|
||||
return r_serv_metadata.hget(f'paste_metadata:{self.id}', 'real_link')
|
||||
return r_object.hset(f'meta:item::{self.id}', 'url')
|
||||
|
||||
def set_crawled(self, url, parent_id):
|
||||
r_object.hset(f'meta:item::{self.id}', 'url', url)
|
||||
self.set_parent(parent_id)
|
||||
|
||||
# options: set of optional meta fields
|
||||
def get_meta(self, options=set()):
|
||||
|
@ -273,7 +273,7 @@ class Item(AbstractObject):
|
|||
if 'parent' in options:
|
||||
meta['parent'] = self.get_parent()
|
||||
if 'size' in options:
|
||||
meta['size'] = self.get_size(str=True)
|
||||
meta['size'] = self.get_size(r_str=True)
|
||||
if 'mimetype' in options:
|
||||
content = meta.get('content')
|
||||
meta['mimetype'] = self.get_mimetype(content=content)
|
||||
|
@ -290,14 +290,13 @@ class Item(AbstractObject):
|
|||
crawler['url'] = self.get_url()
|
||||
if not tags:
|
||||
tags = self.get_tags()
|
||||
crawler['is_tags_safe'] = Tag.is_tags_safe(tags)
|
||||
crawler['is_tags_safe'] = self.is_tags_safe(tags)
|
||||
return crawler
|
||||
|
||||
def get_meta_lines(self, content=None):
|
||||
if not content:
|
||||
content = self.get_content()
|
||||
max_length = 0
|
||||
line_id = 0
|
||||
nb_line = 0
|
||||
for line in content.splitlines():
|
||||
length = len(line)
|
||||
|
@ -503,60 +502,60 @@ def get_item_languages(item_id, min_len=600, num_langs=3, min_proportion=0.2, mi
|
|||
return all_languages
|
||||
|
||||
# API
|
||||
def get_item(request_dict):
|
||||
if not request_dict:
|
||||
return {'status': 'error', 'reason': 'Malformed JSON'}, 400
|
||||
|
||||
item_id = request_dict.get('id', None)
|
||||
if not item_id:
|
||||
return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400
|
||||
if not exist_item(item_id):
|
||||
return {'status': 'error', 'reason': 'Item not found'}, 404
|
||||
|
||||
dict_item = {}
|
||||
dict_item['id'] = item_id
|
||||
date = request_dict.get('date', True)
|
||||
if date:
|
||||
add_separator = False
|
||||
if request_dict.get('date_separator', False):
|
||||
add_separator = True
|
||||
dict_item['date'] = get_item_date(item_id, add_separator=add_separator)
|
||||
tags = request_dict.get('tags', True)
|
||||
if tags:
|
||||
dict_item['tags'] = Tag.get_object_tags('item', item_id)
|
||||
|
||||
size = request_dict.get('size', False)
|
||||
if size:
|
||||
dict_item['size'] = get_item_size(item_id)
|
||||
|
||||
content = request_dict.get('content', False)
|
||||
if content:
|
||||
# UTF-8 outpout, # TODO: use base64
|
||||
dict_item['content'] = get_item_content(item_id)
|
||||
|
||||
raw_content = request_dict.get('raw_content', False)
|
||||
if raw_content:
|
||||
dict_item['raw_content'] = get_raw_content(item_id)
|
||||
|
||||
lines_info = request_dict.get('lines', False)
|
||||
if lines_info:
|
||||
dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None'))
|
||||
|
||||
if request_dict.get('pgp'):
|
||||
dict_item['pgp'] = {}
|
||||
if request_dict['pgp'].get('key'):
|
||||
dict_item['pgp']['key'] = get_item_pgp_key(item_id)
|
||||
if request_dict['pgp'].get('mail'):
|
||||
dict_item['pgp']['mail'] = get_item_pgp_mail(item_id)
|
||||
if request_dict['pgp'].get('name'):
|
||||
dict_item['pgp']['name'] = get_item_pgp_name(item_id)
|
||||
|
||||
if request_dict.get('cryptocurrency'):
|
||||
dict_item['cryptocurrency'] = {}
|
||||
if request_dict['cryptocurrency'].get('bitcoin'):
|
||||
dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id)
|
||||
|
||||
return dict_item, 200
|
||||
# def get_item(request_dict):
|
||||
# if not request_dict:
|
||||
# return {'status': 'error', 'reason': 'Malformed JSON'}, 400
|
||||
#
|
||||
# item_id = request_dict.get('id', None)
|
||||
# if not item_id:
|
||||
# return {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400
|
||||
# if not exist_item(item_id):
|
||||
# return {'status': 'error', 'reason': 'Item not found'}, 404
|
||||
#
|
||||
# dict_item = {}
|
||||
# dict_item['id'] = item_id
|
||||
# date = request_dict.get('date', True)
|
||||
# if date:
|
||||
# add_separator = False
|
||||
# if request_dict.get('date_separator', False):
|
||||
# add_separator = True
|
||||
# dict_item['date'] = get_item_date(item_id, add_separator=add_separator)
|
||||
# tags = request_dict.get('tags', True)
|
||||
# if tags:
|
||||
# dict_item['tags'] = Tag.get_object_tags('item', item_id)
|
||||
#
|
||||
# size = request_dict.get('size', False)
|
||||
# if size:
|
||||
# dict_item['size'] = get_item_size(item_id)
|
||||
#
|
||||
# content = request_dict.get('content', False)
|
||||
# if content:
|
||||
# # UTF-8 outpout, # TODO: use base64
|
||||
# dict_item['content'] = get_item_content(item_id)
|
||||
#
|
||||
# raw_content = request_dict.get('raw_content', False)
|
||||
# if raw_content:
|
||||
# dict_item['raw_content'] = get_raw_content(item_id)
|
||||
#
|
||||
# lines_info = request_dict.get('lines', False)
|
||||
# if lines_info:
|
||||
# dict_item['lines'] = get_lines_info(item_id, dict_item.get('content', 'None'))
|
||||
#
|
||||
# if request_dict.get('pgp'):
|
||||
# dict_item['pgp'] = {}
|
||||
# if request_dict['pgp'].get('key'):
|
||||
# dict_item['pgp']['key'] = get_item_pgp_key(item_id)
|
||||
# if request_dict['pgp'].get('mail'):
|
||||
# dict_item['pgp']['mail'] = get_item_pgp_mail(item_id)
|
||||
# if request_dict['pgp'].get('name'):
|
||||
# dict_item['pgp']['name'] = get_item_pgp_name(item_id)
|
||||
#
|
||||
# if request_dict.get('cryptocurrency'):
|
||||
# dict_item['cryptocurrency'] = {}
|
||||
# if request_dict['cryptocurrency'].get('bitcoin'):
|
||||
# dict_item['cryptocurrency']['bitcoin'] = get_item_bitcoin(item_id)
|
||||
#
|
||||
# return dict_item, 200
|
||||
|
||||
|
||||
|
||||
|
@ -598,24 +597,13 @@ def api_get_items_sources():
|
|||
def get_item_list_desc(list_item_id):
|
||||
desc_list = []
|
||||
for item_id in list_item_id:
|
||||
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_object_tags('item', item_id)} )
|
||||
item = Item(item_id)
|
||||
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': item.get_tags(r_list=True)})
|
||||
return desc_list
|
||||
|
||||
def is_crawled(item_id):
|
||||
return item_basic.is_crawled(item_id)
|
||||
|
||||
def get_crawler_matadata(item_id, tags=None):
|
||||
dict_crawler = {}
|
||||
if is_crawled(item_id):
|
||||
dict_crawler['domain'] = get_item_domain(item_id)
|
||||
if not ltags:
|
||||
ltags = Tag.get_object_tags('item', item_id)
|
||||
dict_crawler['is_tags_safe'] = Tag.is_tags_safe(ltags)
|
||||
dict_crawler['url'] = get_item_link(item_id)
|
||||
dict_crawler['screenshot'] = get_item_screenshot(item_id)
|
||||
dict_crawler['har'] = get_item_har_name(item_id)
|
||||
return dict_crawler
|
||||
|
||||
def is_onion(item_id):
|
||||
is_onion = False
|
||||
if len(is_onion) > 62:
|
||||
|
@ -639,18 +627,6 @@ def get_domain(item_id):
|
|||
item_id = item_id[-1]
|
||||
return item_id[:-36]
|
||||
|
||||
def get_item_domain_with_port(item_id):
|
||||
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'domain')
|
||||
|
||||
def get_item_link(item_id):
|
||||
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'real_link')
|
||||
|
||||
def get_item_screenshot(item_id):
|
||||
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
|
||||
if screenshot:
|
||||
return os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:])
|
||||
return ''
|
||||
|
||||
def get_item_har_name(item_id):
|
||||
har_path = os.path.join(har_directory, item_id) + '.json'
|
||||
if os.path.isfile(har_path):
|
||||
|
@ -672,44 +648,6 @@ def get_item_filename(item_id):
|
|||
else:
|
||||
return filename
|
||||
|
||||
def get_item_duplicate(item_id, r_list=True):
|
||||
res = r_serv_metadata.smembers('dup:{}'.format(item_id))
|
||||
if r_list:
|
||||
if res:
|
||||
return list(res)
|
||||
else:
|
||||
return []
|
||||
return res
|
||||
|
||||
def get_item_nb_duplicates(item_id):
|
||||
return r_serv_metadata.scard('dup:{}'.format(item_id))
|
||||
|
||||
def get_item_duplicates_dict(item_id):
|
||||
dict_duplicates = {}
|
||||
for duplicate in get_item_duplicate(item_id):
|
||||
duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',')
|
||||
duplicate_id = duplicate[1]
|
||||
if not duplicate_id in dict_duplicates:
|
||||
dict_duplicates[duplicate_id] = {'date': get_item_date(duplicate_id, add_separator=True), 'algo': {}}
|
||||
algo = duplicate[0]
|
||||
if algo == 'tlsh':
|
||||
similarity = 100 - int(duplicate[2])
|
||||
else:
|
||||
similarity = int(duplicate[2])
|
||||
dict_duplicates[duplicate_id]['algo'][algo] = similarity
|
||||
return dict_duplicates
|
||||
|
||||
def add_item_duplicate(item_id, l_dup):
|
||||
for item_dup in l_dup:
|
||||
r_serv_metadata.sadd('dup:{}'.format(item_dup), item_id)
|
||||
r_serv_metadata.sadd('dup:{}'.format(item_id), item_dup)
|
||||
|
||||
def delete_item_duplicate(item_id):
|
||||
item_dup = get_item_duplicate(item_id)
|
||||
for item_dup in get_item_duplicate(item_id):
|
||||
r_serv_metadata.srem('dup:{}'.format(item_dup), item_id)
|
||||
r_serv_metadata.delete('dup:{}'.format(item_id))
|
||||
|
||||
def get_raw_content(item_id):
|
||||
filepath = get_item_filepath(item_id)
|
||||
with open(filepath, 'rb') as f:
|
||||
|
@ -751,8 +689,10 @@ def create_item(obj_id, obj_metadata, io_content):
|
|||
if res:
|
||||
# creata tags
|
||||
if 'tags' in obj_metadata:
|
||||
item = Item(obj_id)
|
||||
# # TODO: handle mixed tags: taxonomies and Galaxies
|
||||
Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="item")
|
||||
# for tag in obj_metadata['tags']:
|
||||
# item.add_tag(tag)
|
||||
return True
|
||||
|
||||
# Item not created
|
||||
|
@ -768,8 +708,8 @@ def delete_item(obj_id):
|
|||
# else:
|
||||
# delete_item_duplicate(obj_id)
|
||||
# # delete MISP event
|
||||
# r_serv_metadata.delete('misp_events:{}'.format(obj_id))
|
||||
# r_serv_metadata.delete('hive_cases:{}'.format(obj_id))
|
||||
# r_s_metadata.delete('misp_events:{}'.format(obj_id))
|
||||
# r_s_metadata.delete('hive_cases:{}'.format(obj_id))
|
||||
#
|
||||
# os.remove(get_item_filename(obj_id))
|
||||
#
|
||||
|
@ -789,7 +729,6 @@ def delete_item(obj_id):
|
|||
# delete_node(obj_id)
|
||||
#
|
||||
# # delete item metadata
|
||||
# r_serv_metadata.delete('paste_metadata:{}'.format(obj_id))
|
||||
#
|
||||
# return True
|
||||
#
|
||||
|
@ -817,9 +756,9 @@ def delete_item(obj_id):
|
|||
# delete_item(child_id)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
content = 'test file content'
|
||||
duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]}
|
||||
|
||||
item = Item('tests/2020/01/02/test_save.gz')
|
||||
item.create(content, _save=False)
|
||||
# if __name__ == '__main__':
|
||||
# content = 'test file content'
|
||||
# duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]}
|
||||
#
|
||||
# item = Item('tests/2020/01/02/test_save.gz')
|
||||
# item.create(content, _save=False)
|
||||
|
|
|
@ -28,7 +28,7 @@ from lib.objects import Usernames
|
|||
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
||||
|
||||
config_loader = None
|
||||
|
||||
class AILObjects(object): ## ??????????????????????
|
||||
|
|
|
@ -659,7 +659,6 @@ unixsocketperm 26
|
|||
|
||||
|
||||
namespace.cor ail_correls
|
||||
#namespace.correl ail_correls
|
||||
namespace.crawl ail_crawlers
|
||||
namespace.db ail_datas
|
||||
namespace.dup ail_dups
|
||||
|
|
Loading…
Reference in New Issue