2022-03-07 15:12:01 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
import os
|
2023-05-04 16:35:56 +02:00
|
|
|
import re
|
2022-03-07 15:12:01 +01:00
|
|
|
import sys
|
2023-02-22 11:08:29 +01:00
|
|
|
import magic
|
2022-08-19 16:53:31 +02:00
|
|
|
import requests
|
|
|
|
import zipfile
|
2022-03-07 15:12:01 +01:00
|
|
|
|
2022-03-08 10:44:41 +01:00
|
|
|
from flask import url_for
|
2022-07-08 09:47:47 +02:00
|
|
|
from io import BytesIO
|
2023-05-04 16:35:56 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
from pymisp import MISPObject
|
2022-07-08 09:47:47 +02:00
|
|
|
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
2023-01-09 16:03:06 +01:00
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
2022-07-08 09:47:47 +02:00
|
|
|
from lib.ConfigLoader import ConfigLoader
|
2023-01-09 16:03:06 +01:00
|
|
|
from lib.objects.abstract_daterange_object import AbstractDaterangeObject
|
2022-08-19 16:53:31 +02:00
|
|
|
from packages import Date
|
2022-03-07 15:12:01 +01:00
|
|
|
|
2022-09-20 16:11:48 +02:00
|
|
|
sys.path.append('../../configs/keys')
|
|
|
|
try:
|
|
|
|
from virusTotalKEYS import vt_key
|
|
|
|
if vt_key != '':
|
|
|
|
VT_TOKEN = vt_key
|
|
|
|
VT_ENABLED = True
|
2023-01-09 16:03:06 +01:00
|
|
|
# print('VT submission is enabled')
|
2022-09-20 16:11:48 +02:00
|
|
|
else:
|
|
|
|
VT_ENABLED = False
|
2023-01-09 16:03:06 +01:00
|
|
|
# print('VT submission is disabled')
|
2022-09-20 16:11:48 +02:00
|
|
|
except:
|
|
|
|
VT_TOKEN = None
|
|
|
|
VT_ENABLED = False
|
2023-01-09 16:03:06 +01:00
|
|
|
# print('VT submission is disabled')
|
2022-09-20 16:11:48 +02:00
|
|
|
|
2022-07-08 09:47:47 +02:00
|
|
|
config_loader = ConfigLoader()
|
2023-01-09 16:03:06 +01:00
|
|
|
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2022-03-07 15:12:01 +01:00
|
|
|
HASH_DIR = config_loader.get_config_str('Directories', 'hash')
|
|
|
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
|
|
|
config_loader = None
|
|
|
|
|
|
|
|
|
|
|
|
################################################################################
|
|
|
|
################################################################################
|
|
|
|
################################################################################
|
|
|
|
|
|
|
|
# # TODO: COMPLETE CLASS
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
class Decoded(AbstractDaterangeObject):
|
2022-03-07 15:12:01 +01:00
|
|
|
"""
|
|
|
|
AIL Decoded Object. (strings)
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, id):
|
|
|
|
super(Decoded, self).__init__('decoded', id)
|
|
|
|
|
|
|
|
# def get_ail_2_ail_payload(self):
|
|
|
|
# payload = {'raw': self.get_gzip_content(b64=True),
|
|
|
|
# 'compress': 'gzip'}
|
|
|
|
# return payload
|
|
|
|
|
|
|
|
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
|
|
|
def delete(self):
|
|
|
|
# # TODO:
|
|
|
|
pass
|
|
|
|
|
|
|
|
def get_link(self, flask_context=False):
|
|
|
|
if flask_context:
|
2022-10-25 16:25:19 +02:00
|
|
|
url = url_for('correlation.show_correlation', type="decoded", id=self.id)
|
2022-03-07 15:12:01 +01:00
|
|
|
else:
|
2022-10-25 16:25:19 +02:00
|
|
|
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
2022-03-07 15:12:01 +01:00
|
|
|
return url
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
def get_svg_icon(self, mimetype=None):
|
|
|
|
if not mimetype:
|
|
|
|
mimetype = self.get_mimetype()
|
|
|
|
file_type = mimetype.split('/')[0]
|
2022-03-07 15:12:01 +01:00
|
|
|
if file_type == 'application':
|
|
|
|
icon = '\uf15b'
|
|
|
|
elif file_type == 'audio':
|
|
|
|
icon = '\uf1c7'
|
|
|
|
elif file_type == 'image':
|
|
|
|
icon = '\uf1c5'
|
|
|
|
elif file_type == 'text':
|
|
|
|
icon = '\uf15c'
|
|
|
|
else:
|
|
|
|
icon = '\uf249'
|
2023-01-09 16:03:06 +01:00
|
|
|
return {'style': 'fas', 'icon': icon, 'color': '#88CCEE', 'radius': 5}
|
2022-03-07 15:12:01 +01:00
|
|
|
|
2022-07-08 09:47:47 +02:00
|
|
|
'''
|
2022-10-25 16:25:19 +02:00
|
|
|
Return the estimated type of a given decoded item.
|
2022-07-08 09:47:47 +02:00
|
|
|
|
|
|
|
:param sha1_string: sha1_string
|
|
|
|
'''
|
2023-01-09 16:03:06 +01:00
|
|
|
def get_mimetype(self):
|
|
|
|
return r_objects.hget(f'meta:{self.type}:{self.id}', 'mime')
|
|
|
|
|
|
|
|
def set_mimetype(self, mimetype):
|
|
|
|
return r_objects.hset(f'meta:{self.type}:{self.id}', 'mime', mimetype)
|
|
|
|
|
|
|
|
def get_size(self):
|
|
|
|
return r_objects.hget(f'meta:{self.type}:{self.id}', 'size')
|
|
|
|
|
|
|
|
def set_size(self, size):
|
|
|
|
return r_objects.hset(f'meta:{self.type}:{self.id}', 'size', int(size))
|
2022-07-08 09:47:47 +02:00
|
|
|
|
|
|
|
def get_rel_path(self, mimetype=None):
|
|
|
|
if not mimetype:
|
2023-01-09 16:03:06 +01:00
|
|
|
mimetype = self.get_mimetype()
|
2023-06-19 10:53:15 +02:00
|
|
|
if not mimetype:
|
|
|
|
self.logger.warning(f'Decoded {self.id}: Empty mimetype')
|
|
|
|
return None
|
2022-07-08 09:47:47 +02:00
|
|
|
return os.path.join(HASH_DIR, mimetype, self.id[0:2], self.id)
|
|
|
|
|
|
|
|
def get_filepath(self, mimetype=None):
|
2023-06-19 10:53:15 +02:00
|
|
|
rel_path = self.get_rel_path(mimetype=mimetype)
|
|
|
|
if not rel_path:
|
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return os.path.join(os.environ['AIL_HOME'], rel_path)
|
2022-07-08 09:47:47 +02:00
|
|
|
|
2023-05-12 15:29:53 +02:00
|
|
|
def get_content(self, mimetype=None, r_type='str'):
|
2022-07-08 09:47:47 +02:00
|
|
|
filepath = self.get_filepath(mimetype=mimetype)
|
2023-06-19 10:53:15 +02:00
|
|
|
if not filepath:
|
|
|
|
if r_type == 'str':
|
|
|
|
return ''
|
|
|
|
else:
|
|
|
|
return b''
|
2023-05-12 15:29:53 +02:00
|
|
|
if r_type == 'str':
|
2023-05-04 16:35:56 +02:00
|
|
|
with open(filepath, 'r') as f:
|
|
|
|
content = f.read()
|
2023-05-12 15:29:53 +02:00
|
|
|
return content
|
|
|
|
elif r_type == 'bytes':
|
|
|
|
with open(filepath, 'rb') as f:
|
|
|
|
content = f.read()
|
|
|
|
return content
|
2023-06-30 16:22:30 +02:00
|
|
|
elif r_type == 'bytesio':
|
2023-05-04 16:35:56 +02:00
|
|
|
with open(filepath, 'rb') as f:
|
|
|
|
content = BytesIO(f.read())
|
2023-05-12 15:29:53 +02:00
|
|
|
return content
|
2022-07-08 09:47:47 +02:00
|
|
|
|
2022-08-19 16:53:31 +02:00
|
|
|
def get_zip_content(self):
|
2023-01-09 16:03:06 +01:00
|
|
|
# mimetype = self.get_estimated_type()
|
2022-08-19 16:53:31 +02:00
|
|
|
zip_content = BytesIO()
|
|
|
|
with zipfile.ZipFile(zip_content, "w") as zf:
|
|
|
|
# TODO: Fix password
|
2023-01-09 16:03:06 +01:00
|
|
|
# zf.setpassword(b"infected")
|
2023-06-30 16:22:30 +02:00
|
|
|
zf.writestr(self.id, self.get_content(r_type='bytesio').getvalue())
|
2022-08-19 16:53:31 +02:00
|
|
|
zip_content.seek(0)
|
|
|
|
return zip_content
|
|
|
|
|
2022-07-08 09:47:47 +02:00
|
|
|
def get_misp_object(self):
|
|
|
|
obj_attrs = []
|
|
|
|
obj = MISPObject('file')
|
2023-06-09 11:19:22 +02:00
|
|
|
first_seen = self.get_first_seen()
|
|
|
|
last_seen = self.get_last_seen()
|
|
|
|
if first_seen:
|
|
|
|
obj.first_seen = first_seen
|
|
|
|
if last_seen:
|
|
|
|
obj.last_seen = last_seen
|
|
|
|
if not first_seen or not last_seen:
|
|
|
|
self.logger.warning(
|
|
|
|
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
2022-07-08 09:47:47 +02:00
|
|
|
|
2023-02-22 11:08:29 +01:00
|
|
|
obj_attrs.append(obj.add_attribute('sha1', value=self.id))
|
|
|
|
obj_attrs.append(obj.add_attribute('mimetype', value=self.get_mimetype()))
|
|
|
|
obj_attrs.append(obj.add_attribute('malware-sample', value=self.id, data=self.get_content()))
|
2022-07-08 09:47:47 +02:00
|
|
|
for obj_attr in obj_attrs:
|
|
|
|
for tag in self.get_tags():
|
|
|
|
obj_attr.add_tag(tag)
|
|
|
|
return obj
|
|
|
|
|
2022-03-07 15:12:01 +01:00
|
|
|
############################################################################
|
|
|
|
############################################################################
|
|
|
|
############################################################################
|
2023-01-09 16:03:06 +01:00
|
|
|
|
2022-08-19 16:53:31 +02:00
|
|
|
def get_decoders(self):
|
|
|
|
return ['base64', 'binary', 'hexadecimal']
|
2022-03-07 15:12:01 +01:00
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
def get_meta(self, options=set()):
|
2023-01-09 16:03:06 +01:00
|
|
|
meta = self._get_meta(options=options)
|
|
|
|
meta['id'] = self.id
|
|
|
|
if 'mimetype' in options:
|
|
|
|
meta['mimetype'] = self.get_mimetype()
|
|
|
|
if 'icon' in options:
|
|
|
|
if 'mimetype' in meta:
|
|
|
|
mimetype = meta['mimetype']
|
|
|
|
else:
|
|
|
|
mimetype = None
|
|
|
|
meta['icon'] = self.get_svg_icon(mimetype=mimetype)
|
|
|
|
if 'size' in options:
|
|
|
|
meta['size'] = self.get_size()
|
|
|
|
if 'tags' in options:
|
|
|
|
meta['tags'] = self.get_tags()
|
|
|
|
if 'vt' in options:
|
|
|
|
meta['vt'] = self.get_meta_vt()
|
2022-10-25 16:25:19 +02:00
|
|
|
return meta
|
2022-03-07 15:12:01 +01:00
|
|
|
|
2022-08-19 16:53:31 +02:00
|
|
|
def get_meta_vt(self):
|
2023-01-09 16:03:06 +01:00
|
|
|
link = r_objects.hget(f'meta:{self.type}:{self.id}', 'vt_link')
|
|
|
|
report = r_objects.hget(f'meta:{self.type}:{self.id}', 'vt_report')
|
|
|
|
if link or report:
|
|
|
|
return {'link': link, 'report': report}
|
|
|
|
else:
|
|
|
|
return {}
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-02-22 11:08:29 +01:00
|
|
|
# TODO
|
2022-08-19 16:53:31 +02:00
|
|
|
def guess_mimetype(self, bytes_content):
|
2023-02-22 11:08:29 +01:00
|
|
|
# if not bytes_content:
|
|
|
|
# bytes_content = self.get_content()
|
2022-08-19 16:53:31 +02:00
|
|
|
return magic.from_buffer(bytes_content, mime=True)
|
2022-07-08 09:47:47 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# avoid counting the same hash multiple time on the same item
|
|
|
|
# except if different encoding
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
def is_seen_this_day(self, date):
|
|
|
|
return bool(self.get_nb_seen_by_date(Date.get_today_date_str()))
|
|
|
|
|
|
|
|
def save_file(self, b_content, mimetype): # TODO TEST ME
|
2022-08-19 16:53:31 +02:00
|
|
|
filepath = self.get_filepath(mimetype=mimetype)
|
|
|
|
if os.path.isfile(filepath):
|
2023-01-09 16:03:06 +01:00
|
|
|
# print('File already exist')
|
2022-08-19 16:53:31 +02:00
|
|
|
return False
|
|
|
|
# create dir
|
|
|
|
dirname = os.path.dirname(filepath)
|
|
|
|
if not os.path.exists(dirname):
|
|
|
|
os.makedirs(dirname)
|
|
|
|
with open(filepath, 'wb') as f:
|
2023-01-09 16:03:06 +01:00
|
|
|
f.write(b_content)
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# create meta
|
|
|
|
self._add_create()
|
|
|
|
self.set_mimetype(mimetype)
|
|
|
|
self.set_size(os.path.getsize(filepath))
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
return True
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-10-12 13:53:00 +02:00
|
|
|
def add(self, date, obj, algo_name, mimetype=None):
|
|
|
|
self._add(date, obj)
|
2023-01-09 16:03:06 +01:00
|
|
|
if not mimetype:
|
|
|
|
mimetype = self.get_mimetype()
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
is_new_decoded = r_objects.sadd(f'decoded:algo:{algo_name}:{date}', self.id) # filter by algo + sparkline
|
|
|
|
# uniq decoded in this algo today
|
|
|
|
if int(is_new_decoded):
|
|
|
|
r_objects.zincrby(f'decoded:algos:{date}', 1, algo_name) # pie chart
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# mimetype -> decodeds
|
|
|
|
is_new_decoded = r_objects.sadd(f'decoded:mime:{mimetype}:{date}', self.id) # filter by mimetype
|
|
|
|
# uniq decoded in this mimetype today
|
|
|
|
if int(is_new_decoded):
|
|
|
|
r_objects.zincrby(f'decoded:mime:{date}', 1, mimetype) # TDO ADD OPTION TO CALC IF NOT EXISTS
|
|
|
|
r_objects.sadd('decoded:mimetypes', mimetype)
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# algo + mimetype -> Decodeds
|
|
|
|
# -> sinter with r_objects.sunion(f'decoded:algo:{algo_name}:{date}')
|
2022-08-19 16:53:31 +02:00
|
|
|
|
|
|
|
# # TODO: ADD items
|
2023-02-22 11:08:29 +01:00
|
|
|
# def create(self, content, date, mimetype=None):
|
|
|
|
# if not mimetype:
|
|
|
|
# mimetype = self.guess_mimetype(content)
|
|
|
|
# self.save_file(content, mimetype)
|
|
|
|
#
|
|
|
|
#
|
|
|
|
# update_decoded_daterange(sha1_string, date_from)
|
|
|
|
# if date_from != date_to and date_to:
|
|
|
|
# update_decoded_daterange(sha1_string, date_to)
|
2022-08-19 16:53:31 +02:00
|
|
|
|
|
|
|
#######################################################################################
|
|
|
|
#######################################################################################
|
|
|
|
|
2022-10-25 16:25:19 +02:00
|
|
|
def is_vt_enabled(self):
|
|
|
|
return VT_ENABLED
|
|
|
|
|
2022-08-19 16:53:31 +02:00
|
|
|
def set_vt_report(self, report):
|
2023-01-09 16:03:06 +01:00
|
|
|
r_objects.hset(f'meta:{self.type}:{self.id}', 'vt_report', report)
|
2022-08-19 16:53:31 +02:00
|
|
|
|
|
|
|
def set_meta_vt(self, link, report):
|
2023-01-09 16:03:06 +01:00
|
|
|
r_objects.hset(f'meta:{self.type}:{self.id}', 'vt_link', link)
|
2022-08-19 16:53:31 +02:00
|
|
|
self.set_vt_report(report)
|
|
|
|
|
|
|
|
def refresh_vt_report(self):
|
|
|
|
params = {'apikey': VT_TOKEN, 'resource': self.id}
|
|
|
|
response = requests.get('https://www.virustotal.com/vtapi/v2/file/report', params=params)
|
|
|
|
if response.status_code == 200:
|
|
|
|
json_response = response.json()
|
|
|
|
response_code = json_response['response_code']
|
|
|
|
# report exist
|
|
|
|
if response_code == 1:
|
|
|
|
total = json_response['total']
|
|
|
|
detection = json_response['positives']
|
|
|
|
report = f'Detection {detection}/{total}'
|
|
|
|
# no report found
|
|
|
|
elif response_code == 0:
|
|
|
|
report = 'No report found'
|
|
|
|
# file in queue
|
|
|
|
elif response_code == -2:
|
|
|
|
report = 'In Queue - Refresh'
|
2023-02-22 11:08:29 +01:00
|
|
|
else:
|
|
|
|
report = 'Error - Unknown VT response'
|
2022-08-19 16:53:31 +02:00
|
|
|
self.set_vt_report(report)
|
|
|
|
print(json_response)
|
|
|
|
print(response_code)
|
|
|
|
print(report)
|
|
|
|
return report
|
|
|
|
elif response.status_code == 403:
|
|
|
|
return 'Virustotal key is incorrect (e.g. for public API not for virustotal intelligence), authentication failed'
|
|
|
|
elif response.status_code == 204:
|
|
|
|
return 'Rate Limited'
|
|
|
|
|
|
|
|
def send_to_vt(self):
|
|
|
|
files = {'file': (self.id, self.get_content())}
|
|
|
|
response = requests.post('https://www.virustotal.com/vtapi/v2/file/scan', files=files, params= {'apikey': VT_TOKEN})
|
|
|
|
json_response = response.json()
|
|
|
|
link = json_response['permalink'].split('analysis')[0] + 'analysis/'
|
|
|
|
self.set_meta_vt(link, 'Please Refresh')
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
############################################################################
|
|
|
|
|
|
|
|
def is_vt_enabled():
|
|
|
|
return VT_ENABLED
|
2022-08-19 16:53:31 +02:00
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
def get_all_decodeds():
|
|
|
|
return r_objects.smembers(f'decoded:all')
|
|
|
|
|
|
|
|
def get_algos():
|
2022-08-19 16:53:31 +02:00
|
|
|
return ['base64', 'binary', 'hexadecimal']
|
|
|
|
|
|
|
|
def get_all_mimetypes():
|
2023-01-09 16:03:06 +01:00
|
|
|
return r_objects.smembers('decoded:mimetypes')
|
|
|
|
|
|
|
|
def get_nb_decodeds_by_date(date):
|
|
|
|
return r_objects.zcard(f'decoded:date:{date}')
|
|
|
|
|
|
|
|
def get_decodeds_by_date(date):
|
|
|
|
return r_objects.zrange(f'decoded:date:{date}', 0, -1)
|
|
|
|
|
|
|
|
def get_algo_decodeds_by_date(date, algo):
|
|
|
|
return r_objects.smembers(f'decoded:algo:{algo}:{date}')
|
|
|
|
|
|
|
|
def get_mimetype_decodeds_by_date(date, mimetype):
|
|
|
|
return r_objects.smembers(f'decoded:mime:{mimetype}:{date}')
|
|
|
|
|
|
|
|
def get_algo_mimetype_decodeds_by_date(date, algo, mimetype):
|
|
|
|
return r_objects.sinter(f'decoded:algo:{algo}:{date}', f'decoded:mime:{mimetype}:{date}')
|
|
|
|
|
|
|
|
def get_decodeds_by_daterange(date_from, date_to, algo=None, mimetype=None):
|
|
|
|
decodeds = set()
|
|
|
|
if not algo and not mimetype:
|
|
|
|
for date in Date.substract_date(date_from, date_to):
|
|
|
|
decodeds = decodeds | set(get_decodeds_by_date(date))
|
|
|
|
elif algo and not mimetype:
|
|
|
|
for date in Date.substract_date(date_from, date_to):
|
|
|
|
decodeds = decodeds | get_algo_decodeds_by_date(date, algo)
|
|
|
|
elif mimetype and not algo:
|
|
|
|
for date in Date.substract_date(date_from, date_to):
|
|
|
|
decodeds = decodeds | get_mimetype_decodeds_by_date(date, mimetype)
|
|
|
|
elif algo and mimetype:
|
|
|
|
for date in Date.substract_date(date_from, date_to):
|
|
|
|
decodeds = decodeds | get_algo_mimetype_decodeds_by_date(date, algo, mimetype)
|
|
|
|
return decodeds
|
|
|
|
|
|
|
|
def sanitise_algo(algo):
|
|
|
|
if algo in get_algos():
|
|
|
|
return algo
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
|
|
|
def sanitise_mimetype(mimetype):
|
|
|
|
if mimetype:
|
|
|
|
if r_objects.sismember('decoded:mimetypes', mimetype):
|
|
|
|
return mimetype
|
|
|
|
else:
|
|
|
|
return None
|
|
|
|
|
2023-05-04 16:35:56 +02:00
|
|
|
def sanitize_decoded_name_to_search(name_to_search): # TODO FILTER NAME
|
|
|
|
return name_to_search
|
|
|
|
|
|
|
|
def search_decodeds_by_name(name_to_search, r_pos=False):
|
|
|
|
decodeds = {}
|
|
|
|
# for subtype in subtypes:
|
|
|
|
r_name = sanitize_decoded_name_to_search(name_to_search)
|
|
|
|
if not name_to_search or isinstance(r_name, dict):
|
|
|
|
return decodeds
|
|
|
|
r_name = re.compile(r_name)
|
|
|
|
for decoded_name in get_all_decodeds():
|
|
|
|
res = re.search(r_name, decoded_name)
|
|
|
|
if res:
|
|
|
|
decodeds[decoded_name] = {}
|
|
|
|
if r_pos:
|
|
|
|
decodeds[decoded_name]['hl-start'] = res.start()
|
|
|
|
decodeds[decoded_name]['hl-end'] = res.end()
|
|
|
|
return decodeds
|
|
|
|
|
2023-05-10 16:26:46 +02:00
|
|
|
|
|
|
|
############################################################################
|
|
|
|
|
|
|
|
def get_decodeds_dir():
|
|
|
|
decodeds_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR)
|
|
|
|
if not decodeds_dir.endswith("/"):
|
|
|
|
decodeds_dir = f"{decodeds_dir}/"
|
|
|
|
return decodeds_dir
|
|
|
|
|
|
|
|
# Generator
|
|
|
|
|
|
|
|
def get_nb_decodeds_objects(filters={}):
|
|
|
|
nb = 0
|
|
|
|
if 'mimetypes' in filters:
|
|
|
|
mimetypes = filters['mimetypes']
|
|
|
|
else:
|
|
|
|
mimetypes = get_all_mimetypes()
|
|
|
|
d_dir = get_decodeds_dir()
|
|
|
|
for mimetype in mimetypes:
|
|
|
|
for root, dirs, files in os.walk(os.path.join(d_dir, mimetype)):
|
|
|
|
nb += len(files)
|
|
|
|
return nb
|
|
|
|
|
|
|
|
def get_all_decodeds_objects(filters={}):
|
|
|
|
if 'mimetypes' in filters:
|
|
|
|
# TODO sanityze mimetype
|
|
|
|
mimetypes = filters['mimetypes']
|
|
|
|
else:
|
|
|
|
mimetypes = get_all_mimetypes()
|
|
|
|
mimetypes = sorted(mimetypes)
|
|
|
|
|
|
|
|
if filters.get('start'):
|
|
|
|
_, start_id = filters['start'].split(':', 1)
|
|
|
|
decoded = Decoded(start_id)
|
|
|
|
# remove sources
|
|
|
|
start_mimetype = decoded.get_mimetype()
|
|
|
|
i = 0
|
|
|
|
while start_mimetype and len(mimetypes) > i:
|
|
|
|
if mimetypes[i] == start_mimetype:
|
|
|
|
mimetypes = mimetypes[i:]
|
|
|
|
start_mimetype = None
|
|
|
|
i += 1
|
|
|
|
else:
|
|
|
|
start_id = None
|
|
|
|
|
|
|
|
d_dir = get_decodeds_dir()
|
|
|
|
for mimetype in mimetypes:
|
|
|
|
for root, dirs, files in os.walk(os.path.join(d_dir, mimetype)):
|
|
|
|
if start_id:
|
|
|
|
i = 0
|
|
|
|
while start_id and len(files) > i:
|
|
|
|
if files[i] == start_id:
|
|
|
|
files = files[i:]
|
|
|
|
start_id = None
|
|
|
|
i += 1
|
|
|
|
if i >= len(files):
|
|
|
|
files = []
|
|
|
|
for file in files:
|
2023-06-19 10:53:15 +02:00
|
|
|
yield Decoded(file)
|
2023-05-10 16:26:46 +02:00
|
|
|
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
############################################################################
|
2022-08-19 16:53:31 +02:00
|
|
|
|
|
|
|
def sanityze_decoder_names(decoder_name):
|
2023-10-12 13:53:00 +02:00
|
|
|
if decoder_name not in get_algos():
|
2022-08-19 16:53:31 +02:00
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return decoder_name
|
|
|
|
|
|
|
|
def sanityze_mimetype(mimetype):
|
|
|
|
if mimetype == 'All types':
|
|
|
|
return None
|
2023-01-09 16:03:06 +01:00
|
|
|
elif not r_objects.sismember(f'decoded:mimetypes', mimetype):
|
2022-08-19 16:53:31 +02:00
|
|
|
return None
|
|
|
|
else:
|
|
|
|
return mimetype
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# TODO
|
2022-08-19 16:53:31 +02:00
|
|
|
def pie_chart_mimetype_json(date_from, date_to, mimetype, decoder_name):
|
|
|
|
if mimetype:
|
|
|
|
all_mimetypes = [mimetype]
|
|
|
|
else:
|
|
|
|
all_mimetypes = get_all_mimetypes()
|
|
|
|
date_range = Date.substract_date(date_from, date_to)
|
|
|
|
for date in date_range:
|
|
|
|
for mimet in all_mimetypes:
|
|
|
|
pass
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# TODO
|
2022-08-19 16:53:31 +02:00
|
|
|
def pie_chart_decoder_json(date_from, date_to, mimetype):
|
2023-01-09 16:03:06 +01:00
|
|
|
all_algos = get_algos()
|
2022-08-19 16:53:31 +02:00
|
|
|
date_range = Date.substract_date(date_from, date_to)
|
|
|
|
if not date_range:
|
|
|
|
date_range.append(Date.get_today_date_str())
|
2023-01-09 16:03:06 +01:00
|
|
|
nb_algos = {}
|
2022-08-19 16:53:31 +02:00
|
|
|
for date in date_range:
|
2023-01-09 16:03:06 +01:00
|
|
|
for algo_name in all_algos:
|
|
|
|
# if not mimetype:
|
|
|
|
nb = r_objects.zscore(f'decoded:algos:{date}', algo_name)
|
|
|
|
# TODO mimetype necoding per day
|
|
|
|
# else:
|
|
|
|
# nb = r_metadata.zscore(f'{algo_name}_type:{mimetype}', date)
|
|
|
|
if nb is None:
|
|
|
|
nb = 0
|
2022-08-19 16:53:31 +02:00
|
|
|
else:
|
2023-01-09 16:03:06 +01:00
|
|
|
nb = int(nb)
|
|
|
|
nb_algos[algo_name] = nb_algos.get(algo_name, 0) + nb
|
2022-08-19 16:53:31 +02:00
|
|
|
pie_chart = []
|
2023-01-09 16:03:06 +01:00
|
|
|
for algo_name in all_algos:
|
|
|
|
pie_chart.append({'name': algo_name, 'value': nb_algos[algo_name]})
|
2022-08-19 16:53:31 +02:00
|
|
|
return pie_chart
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
# TODO FILTER BY ALGO
|
|
|
|
def pie_chart_mimetype_json(date_from, date_to, algo):
|
|
|
|
date_range = Date.substract_date(date_from, date_to)
|
|
|
|
if not date_range:
|
|
|
|
date_range.append(Date.get_today_date_str())
|
|
|
|
mimetypes = {}
|
|
|
|
if len(date_range) == 1:
|
|
|
|
mimes = r_objects.zrange(f'decoded:mime:{date_range[0]}', 0, -1, withscores=True)
|
|
|
|
for t_mime in mimes:
|
|
|
|
mime, nb = t_mime
|
|
|
|
mimetypes[mime] = int(nb)
|
|
|
|
else:
|
|
|
|
mimetypes = {}
|
|
|
|
for date in date_range:
|
|
|
|
mimes = r_objects.zrange(f'decoded:mime:{date}', 0, -1, withscores=True)
|
|
|
|
for t_mime in mimes:
|
|
|
|
mime, nb = t_mime
|
|
|
|
mimetypes[mime] = mimetypes.get(mime, 0) + int(nb)
|
|
|
|
top5_mimes = sorted(mimetypes, key=mimetypes.get, reverse=True)[:5]
|
|
|
|
pie_chart = []
|
|
|
|
for mime in top5_mimes:
|
|
|
|
pie_chart.append({'name': mime, 'value': mimetypes[mime]})
|
|
|
|
return pie_chart
|
|
|
|
|
|
|
|
def barchart_range_json(date_from, date_to, mimetype=None):
|
|
|
|
date_range = Date.substract_date(date_from, date_to)
|
|
|
|
if not date_range:
|
|
|
|
date_range.append(Date.get_today_date_str())
|
|
|
|
barchart = []
|
|
|
|
if mimetype:
|
|
|
|
for date in date_range:
|
|
|
|
range_day = {'date': f'{date[0:4]}-{date[4:6]}-{date[6:8]}'}
|
|
|
|
nb_day = r_objects.scard(f'decoded:mime:{mimetype}:{date}')
|
|
|
|
range_day[mimetype] = nb_day
|
|
|
|
barchart.append(range_day)
|
|
|
|
else:
|
|
|
|
# algo by mimetype, date = mimetype
|
|
|
|
if len(date_range) == 1:
|
|
|
|
mimes = r_objects.zrange(f'decoded:mime:{date_range[0]}', 0, -1, withscores=True)
|
|
|
|
# TODO
|
|
|
|
# UNION
|
|
|
|
# f'decoded:algo:{algo_name}:{date}'
|
|
|
|
# f'decoded:mime:{mimetype}:{date}'
|
|
|
|
for t_mime in mimes:
|
|
|
|
mime, nb = t_mime
|
|
|
|
range_day = {'date': mime, 'mimetype': nb}
|
|
|
|
barchart.append(range_day)
|
|
|
|
# mimetypes by date
|
|
|
|
else:
|
|
|
|
mimetypes = set()
|
|
|
|
for date in date_range:
|
|
|
|
range_day = {'date': f'{date[0:4]}-{date[4:6]}-{date[6:8]}'}
|
|
|
|
mimes = r_objects.zrange(f'decoded:mime:{date}', 0, -1, withscores=True)
|
|
|
|
for t_mime in mimes:
|
|
|
|
mime, nb = t_mime
|
|
|
|
mimetypes.add(mime)
|
|
|
|
range_day[mime] = int(nb)
|
|
|
|
barchart.append(range_day)
|
|
|
|
if not mimetypes:
|
|
|
|
mimetypes.add('No Data')
|
|
|
|
for row in barchart:
|
|
|
|
for mime in mimetypes:
|
|
|
|
if mime not in row:
|
|
|
|
row[mime] = 0
|
|
|
|
return barchart
|
|
|
|
|
|
|
|
def graphline_json(decoded_id):
|
|
|
|
decoded = Decoded(decoded_id)
|
|
|
|
graphline = []
|
|
|
|
if decoded.exists():
|
|
|
|
nb_day = 30
|
|
|
|
for date in Date.get_previous_date_list(nb_day):
|
|
|
|
graphline.append({'date': f'{date[0:4]}-{date[4:6]}-{date[6:8]}', 'value': decoded.get_nb_seen_by_date(date)})
|
|
|
|
return graphline
|
|
|
|
|
2022-08-19 16:53:31 +02:00
|
|
|
def api_pie_chart_decoder_json(date_from, date_to, mimetype):
|
|
|
|
mimetype = sanityze_mimetype(mimetype)
|
|
|
|
date = Date.sanitise_date_range(date_from, date_to)
|
|
|
|
return pie_chart_decoder_json(date['date_from'], date['date_to'], mimetype)
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
def api_pie_chart_mimetype_json(date_from, date_to, algo):
|
|
|
|
algo = sanitise_algo(algo)
|
|
|
|
date = Date.sanitise_date_range(date_from, date_to)
|
|
|
|
return pie_chart_mimetype_json(date['date_from'], date['date_to'], algo)
|
|
|
|
|
|
|
|
def api_barchart_range_json(date_from, date_to, mimetype):
|
|
|
|
date = Date.sanitise_date_range(date_from, date_to)
|
|
|
|
if mimetype:
|
|
|
|
mimetype = sanityze_mimetype(mimetype)
|
|
|
|
return barchart_range_json(date['date_from'], date['date_to'], mimetype=mimetype)
|
|
|
|
|
2022-08-19 16:53:31 +02:00
|
|
|
def _delete_old_json_descriptor():
|
|
|
|
decodeds = []
|
|
|
|
hash_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR)
|
|
|
|
for root, dirs, files in os.walk(hash_dir):
|
|
|
|
for file in files:
|
|
|
|
if file.endswith('.json'):
|
|
|
|
decoded_path = f'{root}/{file}'
|
|
|
|
os.remove(decoded_path)
|
|
|
|
return decodeds
|
|
|
|
|
2023-01-09 16:03:06 +01:00
|
|
|
|
|
|
|
# TODO
|
|
|
|
def get_all_decodeds_files():
|
2022-08-19 16:53:31 +02:00
|
|
|
decodeds = []
|
|
|
|
hash_dir = os.path.join(os.environ['AIL_HOME'], HASH_DIR)
|
|
|
|
if not hash_dir.endswith("/"):
|
|
|
|
hash_dir = f"{hash_dir}/"
|
|
|
|
for root, dirs, files in os.walk(hash_dir):
|
|
|
|
for file in files:
|
2023-01-09 16:03:06 +01:00
|
|
|
# decoded_path = f'{root}{file}'
|
2022-08-19 16:53:31 +02:00
|
|
|
decodeds.append(file)
|
|
|
|
return decodeds
|
|
|
|
|
2023-05-04 16:35:56 +02:00
|
|
|
|
2023-05-10 16:26:46 +02:00
|
|
|
if __name__ == '__main__':
|
2023-05-04 16:35:56 +02:00
|
|
|
# name_to_search = '4d36'
|
2023-05-10 16:26:46 +02:00
|
|
|
# print(search_decodeds_by_name(name_to_search))
|
|
|
|
# filters = {'mimetypes': ['text/html']}
|
|
|
|
filters = {'start': ':1a005f82a4ae0940205c8fd81fd14838845696be'}
|
|
|
|
# filters = {}
|
|
|
|
gen = get_all_decodeds_objects(filters=filters)
|
|
|
|
for f in gen:
|
|
|
|
print(f)
|