chg; [Duplicates module] refactor module + DB keys

pull/594/head
Terrtia 2022-07-13 15:10:27 +02:00
parent 8672671e51
commit 2f8a5a333a
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
11 changed files with 480 additions and 290 deletions

View File

@ -217,6 +217,8 @@ def item_submit_migration():
# /!\ KEY COLISION
# # TODO: change db
def tags_migration():
pass

View File

@ -1,198 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Duplicate module
====================
This huge module is, in short term, checking duplicates.
Its input comes from other modules, namely:
Credential, CreditCard, Keys, Mails, SQLinjectionDetection, CVE and Phone
This one differ from v1 by only using redis and not json file stored on disk
Perform comparisions with ssdeep and tlsh
Requirements:
-------------
"""
import redis
import os
import time
from datetime import datetime, timedelta
import json
import ssdeep
import tlsh
from packages import Paste
from pubsublogger import publisher
from Helper import Process
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Duplicates'
p = Process(config_section)
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
maximum_month_range = int(p.config.get("Modules_Duplicates", "maximum_month_range"))
threshold_duplicate_ssdeep = int(p.config.get("Modules_Duplicates", "threshold_duplicate_ssdeep"))
threshold_duplicate_tlsh = int(p.config.get("Modules_Duplicates", "threshold_duplicate_tlsh"))
threshold_set = {}
threshold_set['ssdeep'] = threshold_duplicate_ssdeep
threshold_set['tlsh'] = threshold_duplicate_tlsh
min_paste_size = float(p.config.get("Modules_Duplicates", "min_paste_size"))
# REDIS #
dico_redis = {}
date_today = datetime.today()
for year in range(2013, date_today.year+1):
for month in range(0, 13):
dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
host=p.config.get("ARDB_DB", "host"),
port=p.config.get("ARDB_DB", "port"),
db=str(year) + str(month),
decode_responses=True)
# FUNCTIONS #
publisher.info("Script duplicate started")
while True:
try:
hash_dico = {}
dupl = set()
dico_range_list = []
x = time.time()
message = p.get_from_set()
if message is not None:
path = message
PST = Paste.Paste(path)
else:
publisher.debug("Script Attribute is idling 10s")
print('sleeping')
time.sleep(10)
continue
# the paste is too small
if (PST._get_p_size() < min_paste_size):
continue
PST._set_p_hash_kind("ssdeep")
PST._set_p_hash_kind("tlsh")
# Assignate the correct redis connexion
r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
# Creating the dico name: yyyymm
# Get the date of the range
date_range = date_today - timedelta(days = maximum_month_range*30.4166666)
num_of_month = (date_today.year - date_range.year)*12 + (date_today.month - date_range.month)
for diff_month in range(0, num_of_month+1):
curr_date_range = date_today - timedelta(days = diff_month*30.4166666)
to_append = str(curr_date_range.year)+str(curr_date_range.month).zfill(2)
dico_range_list.append(to_append)
# Use all dico in range
dico_range_list = dico_range_list[0:maximum_month_range]
# UNIQUE INDEX HASHS TABLE
yearly_index = str(date_today.year)+'00'
r_serv0 = dico_redis[yearly_index]
r_serv0.incr("current_index")
index = (r_serv0.get("current_index")) + str(PST.p_date)
# Open selected dico range
opened_dico = []
for dico_name in dico_range_list:
opened_dico.append([dico_name, dico_redis[dico_name]])
# retrieve hash from paste
paste_hashes = PST._get_p_hash()
# Go throught the Database of the dico (of the month)
for curr_dico_name, curr_dico_redis in opened_dico:
for hash_type, paste_hash in paste_hashes.items():
for dico_hash in curr_dico_redis.smembers('HASHS_'+hash_type):
try:
if hash_type == 'ssdeep':
percent = 100-ssdeep.compare(dico_hash, paste_hash)
else:
percent = tlsh.diffxlen(dico_hash, paste_hash)
if percent > 100:
percent = 100
threshold_duplicate = threshold_set[hash_type]
if percent < threshold_duplicate:
percent = 100 - percent if hash_type == 'ssdeep' else percent #recovert the correct percent value for ssdeep
# Go throught the Database of the dico filter (month)
r_serv_dico = dico_redis[curr_dico_name]
# index of paste
index_current = r_serv_dico.get(dico_hash)
index_current = index_current
paste_path = r_serv_dico.get(index_current)
paste_path = paste_path
paste_date = r_serv_dico.get(index_current+'_date')
paste_date = paste_date
paste_date = paste_date if paste_date != None else "No date available"
if paste_path != None:
paste_path = paste_path.replace(PASTES_FOLDER+'/', '', 1)
if paste_path != PST.p_rel_path:
hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date)
print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent))
except Exception:
print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash)
# Add paste in DB after checking to prevent its analysis twice
# hash_type_i -> index_i AND index_i -> PST.PATH
r_serv1.set(index, PST.p_rel_path)
r_serv1.set(index+'_date', PST._get_p_date())
r_serv1.sadd("INDEX", index)
# Adding hashes in Redis
for hash_type, paste_hash in paste_hashes.items():
r_serv1.set(paste_hash, index)
#bad hash
if paste_hash == '':
print('bad Hash: ' + hash_type)
else:
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
##################### Similarity found #######################
# if there is data in this dictionnary
if len(hash_dico) != 0:
# paste_tuple = (hash_type, date, paste_path, percent)
for dico_hash, paste_tuple in hash_dico.items():
dupl.add(paste_tuple)
# Creating the object attribute and save it.
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
if dupl != []:
dupl = list(dupl)
PST.__setattr__("p_duplicate", dupl)
PST.save_attribute_duplicate(dupl)
PST.save_others_pastes_attribute_duplicate(dupl)
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path))
print('{}Detected {}'.format(to_print, len(dupl)))
print('')
y = time.time()
publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
except IOError:
to_print = 'Duplicate;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
print("CRC Checksum Failed on :", PST.p_rel_path)
publisher.error('{}CRC Checksum Failed'.format(to_print))

130
bin/lib/Duplicate.py Executable file
View File

@ -0,0 +1,130 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import ssdeep
import sys
import time
import tlsh
import datetime
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_serv_db = config_loader.get_redis_conn("Kvrocks_DB")
MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME
config_loader = None
#
#
# Hash != Duplicates => New correlation HASH => check if same hash if duplicate == 100
#
# Object Hash => correlation decoded => don't need correlation to exists
#
# New CORRELATION => HASH
# -> compute/get(if exist we have a correlation) hash -> get correlation same hash
#
#
# Duplicates between differents objects ?????
# Diff Decoded -> Item => Diff Item decoded - Item
#
# Duplicates domains != Duplicates items
def get_ssdeep_hash(content):
return ssdeep.hash(content)
def get_ssdeep_similarity(obj_hash, other_hash):
return ssdeep.compare(obj_hash, other_hash)
def get_tlsh_hash(content):
return tlsh.hash(content)
def get_tlsh_similarity(obj_hash, other_hash):
similarity = tlsh.diffxlen(obj_hash, other_hash)
if similarity > 100:
similarity = 100
similarity = 100 - similarity
return similarity
def get_algo_similarity(algo, obj_hash, other_hash):
if algo == 'ssdeep':
return get_ssdeep_similarity(obj_hash, other_hash)
elif algo == 'tlsh':
return get_tlsh_similarity(obj_hash, other_hash)
def get_algo_hashs_by_month(algo, date_ymonth):
return r_serv_db.hkeys(f'duplicates:hashs:{algo}:{date_ymonth}')
def exists_algo_hash_by_month(algo, hash, date_ymonth):
return r_serv_db.hexists(f'duplicates:hashs:{algo}:{date_ymonth}', hash)
def get_object_id_by_hash(algo, hash, date_ymonth):
return r_serv_db.hget(f'duplicates:hashs:{algo}:{date_ymonth}', hash)
def save_object_hash(algo, date_ymonth, hash, obj_id):
r_serv_db.hset(f'duplicates:hashs:{algo}:{date_ymonth}', hash, obj_id)
def get_duplicates(obj_type, subtype, id):
dict_dup = {}
duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{id}')
for str_dup in duplicates:
similarity, algo, id = str_dup.split(':', 2)
if not dict_dup.get(id):
dict_dup[id] = []
dict_dup[id].append({'algo': algo, 'similarity': int(similarity)})
return dict_dup
def _add_obj_duplicate(algo, similarity, obj_type, subtype, id, id_2):
r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{id}', f'{similarity}:{algo}:{id_2}')
def add_obj_duplicate(algo, hash, similarity, obj_type, subtype, id, date_ymonth):
obj2_id = get_object_id_by_hash(algo, hash, date_ymonth)
# same content
if similarity == 100:
dups = get_duplicates(obj_type, subtype, id)
for dup_id in dups:
for algo_dict in dups[dup_id]:
if algo_dict['similarity'] == 100 and algo_dict['algo'] == algo:
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id])
_add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id)
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id)
_add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id)
def get_last_x_month_dates(nb_months):
now = datetime.datetime.now()
result = [now.strftime("%Y%m")]
for x in range(0, nb_months):
now = now.replace(day=1) - datetime.timedelta(days=1)
result.append(now.strftime("%Y%m"))
return result
if __name__ == '__main__':
res = get_last_x_month_dates(7)
print(res)
#################################

View File

@ -54,6 +54,16 @@ def is_crawled(item_id):
def get_item_domain(item_id):
return item_id[19:-36]
def get_item_content_binary(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id)
try:
with gzip.open(item_full_path, 'rb') as f:
item_content = f.read()
except Exception as e:
print(e)
item_content = ''
return item_content
def get_item_content(item_id):
item_full_path = os.path.join(PASTES_FOLDER, item_id)
try:

View File

@ -91,11 +91,14 @@ class Item(AbstractObject):
else:
return filename
def get_content(self):
def get_content(self, binary=False):
"""
Returns Item content
"""
return item_basic.get_item_content(self.id)
if binary:
return item_basic.get_item_content_binary(self.id)
else:
return item_basic.get_item_content(self.id)
def get_raw_content(self):
filepath = self.get_filename()
@ -110,15 +113,34 @@ class Item(AbstractObject):
content = base64.b64encode(content)
return content.decode()
def get_html2text_content(self, content=None, ignore_links=False):
if not content:
content = self.get_content()
h = html2text.HTML2Text()
h.ignore_links = ignore_links
h.ignore_images = ignore_links
return h.handle(content)
def get_size(self, str=False):
size = os.path.getsize(self.get_filename())/1024.0
if str:
size = round(size, 2)
return size
def get_ail_2_ail_payload(self):
payload = {'raw': self.get_gzip_content(b64=True)}
return payload
def set_origin(self): # set_parent ?
pass
def set_father(self, father_id): # UPDATE KEYS ?????????????????????????????
r_serv_metadata.sadd(f'paste_children:{father_id}', self.id)
r_serv_metadata.hset(f'paste_metadata:{self.id}', 'father', father_id)
#f'obj:children:{obj_type}:{subtype}:{id}, {obj_type}:{subtype}:{id}
#f'obj:metadata:{obj_type}:{subtype}:{id}', 'father', fathe
# => ON Object LEVEL ?????????
def add_duplicate(self):
pass
def sanitize_id(self):
pass
@ -150,18 +172,25 @@ class Item(AbstractObject):
# origin
# duplicate -> all item iterations ???
#
def create(self, content, tags, origin=None, duplicate=None):
self.save_on_disk(content, binary=True, compressed=False, base64=False)
def create(self, content, tags, father=None, duplicates=[], _save=True):
if _save:
self.save_on_disk(content, binary=True, compressed=False, base64=False)
# # TODO:
# for tag in tags:
# self.add_tag(tag)
if origin:
if father:
pass
for obj_id in duplicates:
for dup in duplicates[obj_id]:
self.add_duplicate(obj_id, dup['algo'], dup['similarity'])
if duplicate:
pass
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
# TODO: DELETE ITEM CORRELATION + TAGS + METADATA + ...
@ -204,6 +233,80 @@ class Item(AbstractObject):
def exist_correlation(self):
pass
def is_crawled(self):
return self.id.startswith('crawled')
# if is_crawled
def get_domain(self):
return self.id[19:-36]
def get_screenshot(self):
s = r_serv_metadata.hget(f'paste_metadata:{self.id}', 'screenshot')
if s:
return os.path.join(s[0:2], s[2:4], s[4:6], s[6:8], s[8:10], s[10:12], s[12:])
def get_har(self):
har_path = os.path.join(har_directory, self.id) + '.json'
if os.path.isfile(har_path):
return har_path
else:
return None
def get_url(self):
return r_serv_metadata.hget(f'paste_metadata:{self.id}', 'real_link')
# options: set of optional meta fields
def get_meta(self, options=set()):
meta = {}
meta['id'] = self.id
meta['date'] = self.get_date(separator=True) ############################ # TODO:
meta['source'] = self.get_source()
meta['tags'] = self.get_tags()
# optional meta fields
if 'content' in options:
meta['content'] = self.get_content()
if 'crawler' in options:
if self.is_crawled():
tags = meta.get('tags')
meta['crawler'] = self.get_meta_crawler(tags=tags)
if 'duplicates' in options:
meta['duplicates'] = self.get_duplicates()
if 'lines' in options:
content = meta.get('content')
meta['lines'] = self.get_meta_lines(content=content)
if 'size' in options:
meta['size'] = self.get_size(str=True)
# # TODO: ADD GET FATHER
# meta['encoding'] = None
return meta
def get_meta_crawler(self, tags=[]):
crawler = {}
if self.is_crawled():
crawler['domain'] = self.get_domain()
crawler['har'] = self.get_har()
crawler['screenshot'] = self.get_screenshot()
crawler['url'] = self.get_url()
if not tags:
tags = self.get_tags()
crawler['is_tags_safe'] = Tag.is_tags_safe(tags)
return crawler
def get_meta_lines(self, content=None):
if not content:
content = self.get_content()
max_length = 0
line_id = 0
nb_line = 0
for line in content.splitlines():
length = len(line)
if length > max_length:
max_length = length
nb_line += 1
return {'nb': nb_line, 'max_length': max_length}
############################################################################
############################################################################
@ -547,7 +650,7 @@ def get_item_list_desc(list_item_id):
def is_crawled(item_id):
return item_basic.is_crawled(item_id)
def get_crawler_matadata(item_id, ltags=None):
def get_crawler_matadata(item_id, tags=None):
dict_crawler = {}
if is_crawled(item_id):
dict_crawler['domain'] = get_item_domain(item_id)
@ -759,5 +862,7 @@ def delete_domain_node(item_id):
if __name__ == '__main__':
content = 'test file content'
duplicates = {'tests/2020/01/02/test.gz': [{'algo':'ssdeep', 'similarity':75}, {'algo':'tlsh', 'similarity':45}]}
item = Item('tests/2020/01/02/test_save.gz')
item.save_on_disk(content, binary=False)
item.create(content, _save=False)

View File

@ -17,6 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from packages import Tag
from lib import Duplicate
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
from lib.Tracker import is_obj_tracked, get_obj_all_trackers, delete_obj_trackers
@ -69,6 +70,9 @@ class AbstractObject(ABC):
tags = set(tags)
return tags
def get_duplicates(self):
return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id)
## ADD TAGS ????
#def add_tags(self):

View File

@ -113,21 +113,49 @@ class AbstractSubtypeObject(AbstractObject):
if date > last_seen:
self.set_last_seen(date)
def add(self, date):
def add(self, date, item_id):
self.update_correlation_daterange()
# daily
r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
# all type
r_metadata.zincrby(f'{self.type}_all:{self.subtype}', self.id, 1)
#######################################################################
#######################################################################
# REPLACE WITH CORRELATION ?????
# global set
r_serv_metadata.sadd(f'set_{self.type}_{self.subtype}:{self.id}', item_id)
## object_metadata
# item
r_serv_metadata.sadd(f'item_{self.type}_{self.subtype}:{item_id}', self.id)
# new correlation
#
# How to filter by correlation type ????
#
f'correlation:obj:{self.type}:{self.subtype}:{self.id}', f'{obj_type}:{obj_subtype}:{obj_id}'
f'correlation:obj:{self.type}:{self.subtype}:{obj_type}:{self.id}', f'{obj_subtype}:{obj_id}'
#
#
#
#
#
#
#
#
# # domain
# if item_basic.is_crawled(item_id):
# domain = item_basic.get_item_domain(item_id)
# self.save_domain_correlation(domain, subtype, obj_id)
def create(self, first_seen, last_seen):
pass
def _delete(self):

108
bin/modules/Duplicates.py Executable file
View File

@ -0,0 +1,108 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Duplicate module
====================
This huge module is, in short term, checking duplicates.
Its input comes from other modules, namely:
Credential
Perform comparisions with ssdeep and tlsh
"""
import redis
import os
import sys
import time
#from datetime import datetime, timedelta
import datetime
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib import Duplicate
from lib.objects.Items import Item
class Duplicates(AbstractModule):
"""Duplicates module."""
def __init__(self):
super(Duplicates, self).__init__()
config_loader = ConfigLoader()
THRESHOLD_SSDEEP = config_loader.get_config_int('Modules_Duplicates', 'threshold_duplicate_ssdeep')
THRESHOLD_TLSH = config_loader.get_config_int('Modules_Duplicates', 'threshold_duplicate_tlsh')
self.min_item_size = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: # FIXME: rename me
self.maximum_month_range = config_loader.get_config_int('Modules_Duplicates', 'maximum_month_range')
self.algos = {
"ssdeep": {"threshold": THRESHOLD_SSDEEP},
"tlsh": {"threshold": THRESHOLD_TLSH}
}
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, message):
# IOError: "CRC Checksum Failed on : {id}"
item = Item(message)
# Check file size
if item.get_size() < self.min_item_size:
return None
# one month
curr_date_ymonth = datetime.datetime.now().strftime("%Y%m")
last_month_dates = Duplicate.get_last_x_month_dates(self.maximum_month_range)
x = time.time()
# Get Hashs
content = item.get_content(binary=True)
self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content)
self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content)
# TODO: Handle coputed duplicates
nb_duplicates = 0
for algo in self.algos:
obj_hash = self.algos[algo]['hash']
for date_ymonth in last_month_dates:
if Duplicate.exists_algo_hash_by_month(algo, obj_hash, date_ymonth):
Duplicate.add_obj_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
nb_duplicates +=1
else:
for hash in Duplicate.get_algo_hashs_by_month(algo, date_ymonth):
# # FIXME: try - catch 'hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash
similarity = Duplicate.get_algo_similarity(algo, obj_hash, hash)
print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG:
if similarity >= self.algos[algo]['threshold']:
Duplicate.add_obj_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
nb_duplicates +=1
# Save Hashs
Duplicate.save_object_hash(algo, curr_date_ymonth, self.algos[algo]['hash'], item.get_id())
if nb_duplicates:
self.redis_logger.info(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {nb_duplicates};{item.get_id()}')
y = time.time()
print(f'{item.get_id()} Processed in {y-x} sec')
#self.redis_logger.debug('{}Processed in {} sec'.format(to_print, y-x))
if __name__ == "__main__":
module = Duplicates()
module.run()

View File

@ -66,15 +66,15 @@ publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Urls,Redis_Credential,R
[CreditCards]
subscribe = Redis_CreditCards
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
publish = Redis_ModuleStats,Redis_Tags
[BankAccount]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[Mail]
subscribe = Redis_Mail
publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
publish = Redis_ModuleStats,Redis_Tags
[Onion]
subscribe = Redis_Onion
@ -92,11 +92,11 @@ publish = Redis_Url
[LibInjection]
subscribe = Redis_Url
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[SQLInjectionDetection]
subscribe = Redis_Url
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[ModuleStats]
subscribe = Redis_ModuleStats
@ -128,31 +128,31 @@ publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
[Cve]
subscribe = Redis_Cve
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[Phone]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[Keys]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_PgpDump,Redis_Tags
publish = Redis_PgpDump,Redis_Tags
[PgpDump]
subscribe = Redis_PgpDump
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[ApiKey]
subscribe = Redis_ApiKey
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[Decoder]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[Bitcoin]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[submit_paste]
subscribe = Redis
@ -164,7 +164,8 @@ publish = Redis_Mixer,Redis_Tags
[IP]
subscribe = Redis_Global
publish = Redis_Duplicate,Redis_Tags
publish = Redis_Tags
[Zerobins]
subscribe = Redis_Url
subscribe = Redis_Url

View File

@ -15,12 +15,15 @@ from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
import Tag
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import item_basic
from lib.objects.Items import Item
from export import Export
from packages import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'export'))
import Export
# ============ BLUEPRINT ============
objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
@ -38,28 +41,22 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
@login_read_only
def showItem(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
abort(404)
dict_item = {}
dict_item['id'] = item_id
dict_item['name'] = dict_item['id'].replace('/', ' / ')
dict_item['father'] = Item.get_item_parent(item_id)
dict_item['content'] = Item.get_item_content(item_id)
dict_item['metadata'] = Item.get_item_metadata(item_id, item_content=dict_item['content'])
dict_item['tags'] = Tag.get_obj_tag(item_id)
#dict_item['duplicates'] = Item.get_item_nb_duplicates(item_id)
dict_item['duplicates'] = Item.get_item_duplicates_dict(item_id)
dict_item['crawler'] = Item.get_crawler_matadata(item_id, ltags=dict_item['tags'])
item = Item(item_id)
meta = item.get_meta(options=set(['content', 'crawler', 'duplicates', 'lines', 'size']))
meta['name'] = meta['id'].replace('/', ' / ')
meta['father'] = item_basic.get_item_parent(item_id)
## EXPORT SECTION
# # TODO: ADD in Export SECTION
dict_item['hive_case'] = Export.get_item_hive_cases(item_id)
meta['hive_case'] = Export.get_item_hive_cases(item_id)
return render_template("show_item.html", bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(dict_item['id'], object_type='item'),
modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
is_hive_connected=Export.get_item_hive_cases(item_id),
dict_item=dict_item)
meta=meta)
# kvrocks data
@ -74,24 +71,27 @@ def showItem(): # # TODO: support post
@login_read_only
def html2text(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
abort(404)
return Item.get_item_content_html2text(item_id)
item = Item(item_id)
return item.get_html2text_content()
@objects_item.route("/object/item/raw_content")
@login_required
@login_read_only
def item_raw_content(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
abort(404)
return Response(Item.get_item_content(item_id), mimetype='text/plain')
item = Item(item_id)
return Response(item.get_content(), mimetype='text/plain')
@objects_item.route("/object/item/download")
@login_required
@login_read_only
def item_download(): # # TODO: support post
item_id = request.args.get('id')
if not item_id or not Item.exist_item(item_id):
if not item_id or not item_basic.exist_item(item_id):
abort(404)
return send_file(Item.get_raw_content(item_id), attachment_filename=item_id, as_attachment=True)
item = Item(item_id)
return send_file(item.get_raw_content(), attachment_filename=item_id, as_attachment=True)

View File

@ -38,7 +38,7 @@
<div class="card my-2 mx-2">
<div class="card-header bg-dark">
<h3 class="text-white text-center" >{{ dict_item['name'] }}</h3>
<h3 class="text-white text-center" >{{ meta['name'] }}</h3>
</div>
<div class="card-body pb-1">
<table class="table table-condensed">
@ -46,7 +46,7 @@
<tr>
<th>Date</th>
<th>Source</th>
<th>Encoding</th>
<!-- <th>Encoding</th> -->
<th>Size (Kb)</th>
<th>Number of lines</th>
<th>Max line length</th>
@ -54,12 +54,12 @@
</thead>
<tbody>
<tr>
<td>{{ dict_item['metadata']['date'] }}</td>
<td>{{ dict_item['metadata']['source'] }}</td>
<td>{{ dict_item['metadata']['encoding'] }}</td>
<td>{{ dict_item['metadata']['size'] }}</td>
<td>{{ dict_item['metadata']['lines']['nb'] }}</td>
<td>{{ dict_item['metadata']['lines']['max_length'] }}</td>
<td>{{ meta['date'] }}</td>
<td>{{ meta['source'] }}</td>
<!-- <td>{{ meta['encoding'] }}</td> -->
<td>{{ meta['size'] }}</td>
<td>{{ meta['lines']['nb'] }}</td>
<td>{{ meta['lines']['max_length'] }}</td>
</tr>
</tbody>
</table>
@ -68,9 +68,9 @@
<h5>
<div>
{% include 'modals/edit_tag.html' %}
{% for tag in dict_item['tags'] %}
{% for tag in meta['tags'] %}
<button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}" data-toggle="modal" data-target="#edit_tags_modal"
data-tagid="{{ tag }}" data-objtype="item" data-objid="{{ dict_item['id'] }}">
data-tagid="{{ tag }}" data-objtype="item" data-objid="{{ meta['id'] }}">
{{ tag }}
</button>
@ -84,21 +84,21 @@
</h5>
</div>
{% if dict_item['father'] %}
{% if meta['father'] %}
<div class="mt-3">
Father: <a href="{{ url_for('objects_item.showItem')}}?id={{dict_item['father']}}" target="_blank">{{dict_item['father']}}</a>
Father: <a href="{{ url_for('objects_item.showItem')}}?id={{meta['father']}}" target="_blank">{{meta['father']}}</a>
</div>
{% endif %}
<div class="d-flex flex-row-reverse bd-highlight">
<div>
<a href="{{ url_for('correlation.show_correlation')}}?object_type=paste&correlation_id={{ dict_item['id'] }}&correlation_objects=paste" target="_blank">
<a href="{{ url_for('correlation.show_correlation')}}?object_type=paste&correlation_id={{ meta['id'] }}&correlation_objects=paste" target="_blank">
<button class="btn btn-lg btn-info"><i class="fas fa-project-diagram"></i> Correlations Graph
</button>
</a>
</div>
<div>
{% with obj_type='item', obj_id=dict_item['id'], obj_subtype=''%}
{% with obj_type='item', obj_id=meta['id'], obj_subtype=''%}
{% include 'modals/investigations_register_obj.html' %}
{% endwith %}
<div class="mr-2">
@ -108,7 +108,7 @@
</div>
</div>
<div class="mx-2">
{% with obj_type='item', obj_id=dict_item['id'], obj_lvl=0%}
{% with obj_type='item', obj_id=meta['id'], obj_lvl=0%}
{% include 'import_export/block_add_user_object_to_export.html' %}
{% endwith %}
</div>
@ -134,14 +134,14 @@
</div>
{% endif %}
{% if dict_item['hive_case'] %}
{% if meta['hive_case'] %}
<div class="list-group" id="misp_event">
<li class="list-group-item active">The Hive Case already Created</li>
<a target="_blank" href="{{ hive_url }}" class="list-group-item">{{ hive_url }}</a>
</div>
{% endif %}
{% if dict_item['duplicates'] != 0 %}
{% if meta['duplicates'] != 0 %}
<div id="accordionDuplicate" class="mb-2 mx-3">
<div class="card">
<div class="card-header py-1" id="headingDuplicate">
@ -149,7 +149,7 @@
<div class="col-11">
<div class="mt-2">
<i class="far fa-clone"></i> duplicates&nbsp;&nbsp;
<div class="badge badge-warning">{{dict_item['duplicates']|length}}</div>
<div class="badge badge-warning">{{meta['duplicates']|length}}</div>
</div>
</div>
<div class="col-1">
@ -173,19 +173,19 @@
</tr>
</thead>
<tbody>
{% for duplicate_id in dict_item['duplicates'] %}
{% for duplicate_id in meta['duplicates'] %}
<tr>
<td>{{dict_item['duplicates'][duplicate_id]['date']}}</td>
<td>{{meta['duplicates'][duplicate_id]['date']}}</td>
<td class="py-0">
<table class="table table-borderless table-sm my-0">
<tbody>
{%for algo in dict_item['duplicates'][duplicate_id]['algo']|sort()%}
{%for dict_algo in meta['duplicates'][duplicate_id]|sort(attribute='algo')%}
<tr>
<td class="py-0">{{algo}}</td>
<td class="py-0">{{dict_algo['algo']}}</td>
<td class="w-100 py-0">
<div class="progress mt-1">
<div class="progress-bar progress-bar-striped {%if algo=='tlsh'%}bg-secondary{%endif%}" role="progressbar" style="width: {{dict_item['duplicates'][duplicate_id]['algo'][algo]}}%;" aria-valuenow="{{dict_item['duplicates'][duplicate_id]['algo'][algo]}}" aria-valuemin="0" aria-valuemax="100">
{{dict_item['duplicates'][duplicate_id]['algo'][algo]}}%
<div class="progress-bar progress-bar-striped {%if dict_algo['algo']=='tlsh'%}bg-secondary{%endif%}" role="progressbar" style="width: {{dict_algo['similarity']}}%;" aria-valuenow="{{dict_algo['similarity']}}" aria-valuemin="0" aria-valuemax="100">
{{dict_algo['similarity']}}%
</div>
</div>
</td>
@ -200,7 +200,7 @@
</a>
</td>
<td>
<a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{dict_item['id']}}&s2={{duplicate_id}}" class="fa fa-columns" title="Show diff"></a>
<a target="_blank" href="{{ url_for('showsavedpastes.showDiff') }}?s1={{meta['id']}}&s2={{duplicate_id}}" class="fa fa-columns" title="Show diff"></a>
</td>
</tr>
{% endfor %}
@ -261,7 +261,7 @@
{% endif %}
{% if dict_item['crawler'] %}
{% if meta['crawler'] %}
<div id="accordionCrawler" class="mb-3 mx-3">
<div class="card">
<div class="card-header py-1" id="headingCrawler">
@ -294,18 +294,18 @@
<tr>
<td><i class="far fa-file"></i></td>
<td>
<a class="badge" target="_blank" href="{{ url_for('objects_item.showItem', paste=dict_item['father']) }}" />{{ dict_item['father'] }}</a>
<a class="badge" target="_blank" href="{{ url_for('objects_item.showItem', paste=meta['father']) }}" />{{ meta['father'] }}</a>
</td>
</tr>
<td><i class="fab fa-html5"></i></td>
<td>
<a class="badge" target="_blank" href="{{ url_for('crawler_splash.showDomain', domain=dict_item['crawler']['domain']) }}" />{{ dict_item['crawler']['domain'] }}</a>
<a class="badge" target="_blank" href="{{ url_for('crawler_splash.showDomain', domain=meta['crawler']['domain']) }}" />{{ meta['crawler']['domain'] }}</a>
</td>
</tr>
<tr>
<td>url</td>
<td>
{{ dict_item['crawler']['url'] }}
{{ meta['crawler']['url'] }}
</td>
</tr>
</tbody>
@ -318,11 +318,11 @@
<div class="card-body py-2">
<div class="row">
<div class="col-md-8">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="{%if dict_item['crawler']['is_tags_safe']%}13{%else%}0{%endif%}">
<input class="custom-range mt-2" id="blocks" type="range" min="1" max="50" value="{%if meta['crawler']['is_tags_safe']%}13{%else%}0{%endif%}">
</div>
<div class="col-md-4">
<button class="btn {%if dict_item['crawler']['is_tags_safe']%}btn-primary{%else%}btn-danger{%endif%}" onclick="blocks.value=50;pixelate();">
{%if dict_item['crawler']['is_tags_safe']%}
<button class="btn {%if meta['crawler']['is_tags_safe']%}btn-primary{%else%}btn-danger{%endif%}" onclick="blocks.value=50;pixelate();">
{%if meta['crawler']['is_tags_safe']%}
<i class="fas fas fa-plus-square"></i>
{%else%}
<i class="fas fa-exclamation-triangle"></i>
@ -358,8 +358,8 @@
<li class="nav-item dropdown">
<a class="nav-link dropdown-toggle" data-toggle="dropdown" href="#">Others</a>
<div class="dropdown-menu">
<a class="dropdown-item" href="{{ url_for('objects_item.item_raw_content', id=dict_item['id']) }}"><i class="far fa-file"></i> &nbsp;Raw Content</a>
<a class="dropdown-item" href="{{ url_for('objects_item.item_download', id=dict_item['id']) }}"><i class="fas fa-download"></i> &nbsp;Download</i></a>
<a class="dropdown-item" href="{{ url_for('objects_item.item_raw_content', id=meta['id']) }}"><i class="far fa-file"></i> &nbsp;Raw Content</a>
<a class="dropdown-item" href="{{ url_for('objects_item.item_download', id=meta['id']) }}"><i class="fas fa-download"></i> &nbsp;Download</i></a>
</div>
</li>
</ul>
@ -367,7 +367,7 @@
<div class="tab-content" id="pills-tabContent">
<div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
<p class="my-0"> <pre class="border">{{ dict_item['content'] }}</pre></p>
<p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
</div>
<div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
<p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
@ -393,7 +393,7 @@
$('#pills-html2text-tab').on('shown.bs.tab', function (e) {
if ($('#html2text-container').is(':empty')){
$.get("{{ url_for('objects_item.html2text') }}?id={{ dict_item['id'] }}").done(function(data){
$.get("{{ url_for('objects_item.html2text') }}?id={{ meta['id'] }}").done(function(data){
$('#html2text-container').text(data);
});
@ -401,7 +401,7 @@
});
</script>
{% if dict_item['crawler'] %}
{% if meta['crawler'] %}
<script>
var ctx = canvas.getContext('2d'), img = new Image();
@ -413,7 +413,7 @@
img.addEventListener("error", img_error);
var draw_img = false;
img.src = "{{ url_for('showsavedpastes.screenshot', filename=dict_item['crawler']['screenshot']) }}";
img.src = "{{ url_for('showsavedpastes.screenshot', filename=meta['crawler']['screenshot']) }}";
function pixelate() {