diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh
index 547cd76f..00c224e4 100755
--- a/bin/LAUNCH.sh
+++ b/bin/LAUNCH.sh
@@ -27,7 +27,7 @@ fi
export PATH=$AIL_VENV/bin:$PATH
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
+export PATH=$AIL_KVROCKS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
@@ -685,9 +685,6 @@ while [ "$1" != "" ]; do
-lrv | --launchRedisVerify ) launch_redis;
wait_until_redis_is_ready;
;;
- -lav | --launchARDBVerify ) launch_ardb;
- wait_until_ardb_is_ready;
- ;;
-lkv | --launchKVORCKSVerify ) launch_kvrocks;
wait_until_kvrocks_is_ready;
;;
diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py
index be615993..d16ad9f7 100755
--- a/bin/crawlers/Crawler.py
+++ b/bin/crawlers/Crawler.py
@@ -17,6 +17,7 @@ from lib import ail_logger
from lib import crawlers
from lib.ConfigLoader import ConfigLoader
from lib.objects import CookiesNames
+from lib.objects import Etags
from lib.objects.Domains import Domain
from lib.objects.Items import Item
from lib.objects import Screenshots
@@ -59,6 +60,7 @@ class Crawler(AbstractModule):
self.root_item = None
self.date = None
self.items_dir = None
+ self.original_domain = None
self.domain = None
# TODO Replace with warning list ???
@@ -98,7 +100,7 @@ class Crawler(AbstractModule):
self.crawler_scheduler.update_queue()
self.crawler_scheduler.process_queue()
- self.refresh_lacus_status() # TODO LOG ERROR
+ self.refresh_lacus_status() # TODO LOG ERROR
if not self.is_lacus_up:
return None
@@ -121,11 +123,19 @@ class Crawler(AbstractModule):
if capture:
try:
status = self.lacus.get_capture_status(capture.uuid)
- if status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time ### print start time
+ if status == crawlers.CaptureStatus.DONE:
+ return capture
+ elif status == crawlers.CaptureStatus.UNKNOWN:
+ capture_start = capture.get_start_time(r_str=False)
+ if int(time.time()) - capture_start > 600: # TODO ADD in new crawler config
+ task = capture.get_task()
+ task.reset()
+ capture.delete()
+ else:
+ capture.update(status)
+ else:
capture.update(status)
print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time()))
- else:
- return capture
except ConnectionError:
print(capture.uuid)
@@ -181,6 +191,7 @@ class Crawler(AbstractModule):
print(domain)
self.domain = Domain(domain)
+ self.original_domain = Domain(domain)
epoch = int(time.time())
parent_id = task.get_parent()
@@ -203,12 +214,20 @@ class Crawler(AbstractModule):
# Origin + History + tags
if self.root_item:
self.domain.set_last_origin(parent_id)
- self.domain.add_history(epoch, root_item=self.root_item)
# Tags
for tag in task.get_tags():
self.domain.add_tag(tag)
- elif self.domain.was_up():
- self.domain.add_history(epoch, root_item=epoch)
+ self.domain.add_history(epoch, root_item=self.root_item)
+
+ if self.domain != self.original_domain:
+ self.original_domain.update_daterange(self.date.replace('/', ''))
+ if self.root_item:
+ self.original_domain.set_last_origin(parent_id)
+ # Tags
+ for tag in task.get_tags():
+ self.domain.add_tag(tag)
+ self.original_domain.add_history(epoch, root_item=self.root_item)
+ crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch)
crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch)
print('capture:', capture.uuid, 'completed')
@@ -263,7 +282,7 @@ class Crawler(AbstractModule):
title_content = crawlers.extract_title_from_html(entries['html'])
if title_content:
title = Titles.create_title(title_content)
- title.add(item.get_date(), item_id)
+ title.add(item.get_date(), item)
# SCREENSHOT
if self.screenshot:
@@ -287,7 +306,12 @@ class Crawler(AbstractModule):
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):
print(cookie_name)
cookie = CookiesNames.create(cookie_name)
- cookie.add(self.date.replace('/', ''), self.domain.id)
+ cookie.add(self.date.replace('/', ''), self.domain)
+ for etag_content in crawlers.extract_etag_from_har(entries['har']):
+ print(etag_content)
+ etag = Etags.create(etag_content)
+ etag.add(self.date.replace('/', ''), self.domain)
+ crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
# Next Children
entries_children = entries.get('children')
diff --git a/bin/exporter/MailExporter.py b/bin/exporter/MailExporter.py
index 4b2d4a3a..c4d3f5b5 100755
--- a/bin/exporter/MailExporter.py
+++ b/bin/exporter/MailExporter.py
@@ -8,9 +8,12 @@ Import Content
"""
import os
+import logging
+import logging.config
import sys
from abc import ABC
+from ssl import create_default_context
import smtplib
from email.mime.multipart import MIMEMultipart
@@ -22,17 +25,22 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
+from lib import ail_logger
from exporter.abstract_exporter import AbstractExporter
from lib.ConfigLoader import ConfigLoader
# from lib.objects.abstract_object import AbstractObject
# from lib.Tracker import Tracker
+logging.config.dictConfig(ail_logger.get_config(name='modules'))
+
class MailExporter(AbstractExporter, ABC):
- def __init__(self, host=None, port=None, password=None, user='', sender=''):
+ def __init__(self, host=None, port=None, password=None, user='', sender='', cert_required=None, ca_file=None):
super().__init__()
config_loader = ConfigLoader()
+ self.logger = logging.getLogger(f'{self.__class__.__name__}')
+
if host:
self.host = host
self.port = port
@@ -45,6 +53,15 @@ class MailExporter(AbstractExporter, ABC):
self.pw = config_loader.get_config_str("Notifications", "sender_pw")
if self.pw == 'None':
self.pw = None
+ if cert_required is not None:
+ self.cert_required = bool(cert_required)
+ self.ca_file = ca_file
+ else:
+ self.cert_required = config_loader.get_config_boolean("Notifications", "cert_required")
+ if self.cert_required:
+ self.ca_file = config_loader.get_config_str("Notifications", "ca_file")
+ else:
+ self.ca_file = None
if user:
self.user = user
else:
@@ -67,8 +84,12 @@ class MailExporter(AbstractExporter, ABC):
smtp_server = smtplib.SMTP(self.host, self.port)
smtp_server.starttls()
except smtplib.SMTPNotSupportedError:
- print("The server does not support the STARTTLS extension.")
- smtp_server = smtplib.SMTP_SSL(self.host, self.port)
+ self.logger.info(f"The server {self.host}:{self.port} does not support the STARTTLS extension.")
+ if self.cert_required:
+ context = create_default_context(cafile=self.ca_file)
+ else:
+ context = None
+ smtp_server = smtplib.SMTP_SSL(self.host, self.port, context=context)
smtp_server.ehlo()
if self.user is not None:
@@ -80,7 +101,7 @@ class MailExporter(AbstractExporter, ABC):
return smtp_server
# except Exception as err:
# traceback.print_tb(err.__traceback__)
- # logger.warning(err)
+ # self.logger.warning(err)
def _export(self, recipient, subject, body):
mime_msg = MIMEMultipart()
@@ -95,8 +116,8 @@ class MailExporter(AbstractExporter, ABC):
smtp_client.quit()
# except Exception as err:
# traceback.print_tb(err.__traceback__)
- # logger.warning(err)
- print(f'Send notification: {subject} to {recipient}')
+ # self.logger.warning(err)
+ self.logger.info(f'Send notification: {subject} to {recipient}')
class MailExporterTracker(MailExporter):
diff --git a/bin/importer/FeederImporter.py b/bin/importer/FeederImporter.py
index 7acd6ae9..dc2dfb7d 100755
--- a/bin/importer/FeederImporter.py
+++ b/bin/importer/FeederImporter.py
@@ -87,13 +87,16 @@ class FeederImporter(AbstractImporter):
feeder_name = feeder.get_name()
print(f'importing: {feeder_name} feeder')
- item_id = feeder.get_item_id()
+ item_id = feeder.get_item_id() # TODO replace me with object global id
# process meta
if feeder.get_json_meta():
feeder.process_meta()
- gzip64_content = feeder.get_gzip64_content()
- return f'{feeder_name} {item_id} {gzip64_content}'
+ if feeder_name == 'telegram':
+ return item_id # TODO support UI dashboard
+ else:
+ gzip64_content = feeder.get_gzip64_content()
+ return f'{feeder_name} {item_id} {gzip64_content}'
class FeederModuleImporter(AbstractModule):
diff --git a/bin/importer/PystemonImporter.py b/bin/importer/PystemonImporter.py
index df7d8d3a..1a0e68d8 100755
--- a/bin/importer/PystemonImporter.py
+++ b/bin/importer/PystemonImporter.py
@@ -35,7 +35,7 @@ class PystemonImporter(AbstractImporter):
print(item_id)
if item_id:
print(item_id)
- full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
+ full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
# Check if pystemon file exists
if not os.path.isfile(full_item_path):
print(f'Error: {full_item_path}, file not found')
@@ -47,7 +47,12 @@ class PystemonImporter(AbstractImporter):
if not content:
return None
- return self.create_message(item_id, content, source='pystemon')
+ if full_item_path[-3:] == '.gz':
+ gzipped = True
+ else:
+ gzipped = False
+
+ return self.create_message(item_id, content, gzipped=gzipped, source='pystemon')
except IOError as e:
print(f'Error: {full_item_path}, IOError')
diff --git a/bin/importer/abstract_importer.py b/bin/importer/abstract_importer.py
index e5155775..1c4b458d 100755
--- a/bin/importer/abstract_importer.py
+++ b/bin/importer/abstract_importer.py
@@ -89,7 +89,7 @@ class AbstractImporter(ABC): # TODO ail queues
if not gzipped:
content = self.b64_gzip(content)
elif not b64:
- content = self.b64(gzipped)
+ content = self.b64(content)
if not content:
return None
if isinstance(content, bytes):
diff --git a/bin/importer/feeders/Jabber.py b/bin/importer/feeders/Jabber.py
index 79d0950f..8c90adfd 100755
--- a/bin/importer/feeders/Jabber.py
+++ b/bin/importer/feeders/Jabber.py
@@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from importer.feeders.Default import DefaultFeeder
from lib.objects.Usernames import Username
-from lib import item_basic
+from lib.objects.Items import Item
class JabberFeeder(DefaultFeeder):
@@ -36,7 +36,7 @@ class JabberFeeder(DefaultFeeder):
self.item_id = f'{item_id}.gz'
return self.item_id
- def process_meta(self):
+ def process_meta(self): # TODO replace me by message
"""
Process JSON meta field.
"""
@@ -44,10 +44,12 @@ class JabberFeeder(DefaultFeeder):
# item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ##############################################
to = str(self.json_data['meta']['jabber:to'])
fr = str(self.json_data['meta']['jabber:from'])
- date = item_basic.get_item_date(item_id)
+
+ item = Item(self.item_id)
+ date = item.get_date()
user_to = Username(to, 'jabber')
user_fr = Username(fr, 'jabber')
- user_to.add(date, self.item_id)
- user_fr.add(date, self.item_id)
+ user_to.add(date, item)
+ user_fr.add(date, item)
return None
diff --git a/bin/importer/feeders/Telegram.py b/bin/importer/feeders/Telegram.py
index 3856c88e..2cc6a127 100755
--- a/bin/importer/feeders/Telegram.py
+++ b/bin/importer/feeders/Telegram.py
@@ -16,8 +16,28 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages
##################################
from importer.feeders.Default import DefaultFeeder
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.Chats import Chat
+from lib.objects import Messages
+from lib.objects import UsersAccount
from lib.objects.Usernames import Username
-from lib import item_basic
+
+import base64
+import io
+import gzip
+def gunzip_bytes_obj(bytes_obj):
+ gunzipped_bytes_obj = None
+ try:
+ in_ = io.BytesIO()
+ in_.write(bytes_obj)
+ in_.seek(0)
+
+ with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
+ gunzipped_bytes_obj = fo.read()
+ except Exception as e:
+ print(f'Global; Invalid Gzip file: {e}')
+
+ return gunzipped_bytes_obj
class TelegramFeeder(DefaultFeeder):
@@ -26,31 +46,90 @@ class TelegramFeeder(DefaultFeeder):
self.name = 'telegram'
# define item id
- def get_item_id(self):
- # TODO use telegram message date
- date = datetime.date.today().strftime("%Y/%m/%d")
- channel_id = str(self.json_data['meta']['channel_id'])
- message_id = str(self.json_data['meta']['message_id'])
- item_id = f'{channel_id}_{message_id}'
- item_id = os.path.join('telegram', date, item_id)
- self.item_id = f'{item_id}.gz'
+ def get_item_id(self): # TODO rename self.item_id
+ # Get message date
+ timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP
+ # if self.json_data['meta'].get('date'):
+ # date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
+ # date = date.strftime('%Y/%m/%d')
+ # else:
+ # date = datetime.date.today().strftime("%Y/%m/%d")
+ chat_id = str(self.json_data['meta']['chat']['id'])
+ message_id = str(self.json_data['meta']['id'])
+ self.item_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp) # TODO rename self.item_id
return self.item_id
def process_meta(self):
"""
Process JSON meta field.
"""
- # channel_id = str(self.json_data['meta']['channel_id'])
- # message_id = str(self.json_data['meta']['message_id'])
- # telegram_id = f'{channel_id}_{message_id}'
- # item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id') #########################################
- user = None
- if self.json_data['meta'].get('user'):
- user = str(self.json_data['meta']['user'])
- elif self.json_data['meta'].get('channel'):
- user = str(self.json_data['meta']['channel'].get('username'))
- if user:
- date = item_basic.get_item_date(self.item_id)
- username = Username(user, 'telegram')
- username.add(date, self.item_id)
+ # message chat
+ meta = self.json_data['meta']
+ mess_id = self.json_data['meta']['id']
+ if meta.get('reply_to'):
+ reply_to_id = meta['reply_to']['id']
+ else:
+ reply_to_id = None
+
+ timestamp = meta['date']['timestamp']
+ date = datetime.datetime.fromtimestamp(timestamp)
+ date = date.strftime('%Y%m%d')
+
+ if self.json_data.get('translation'):
+ translation = self.json_data['translation']
+ else:
+ translation = None
+ decoded = base64.standard_b64decode(self.json_data['data'])
+ content = gunzip_bytes_obj(decoded)
+ message = Messages.create(self.item_id, content, translation=translation)
+
+ if meta.get('chat'):
+ chat = Chat(meta['chat']['id'], 'telegram')
+
+ if meta['chat'].get('username'):
+ chat_username = Username(meta['chat']['username'], 'telegram')
+ chat.update_username_timeline(chat_username.get_global_id(), timestamp)
+
+ # Chat---Message
+ chat.add(date)
+ chat.add_message(message.get_global_id(), timestamp, mess_id, reply_id=reply_to_id)
+ else:
+ chat = None
+
+ # message sender
+ if meta.get('sender'): # TODO handle message channel forward - check if is user
+ user_id = meta['sender']['id']
+ user_account = UsersAccount.UserAccount(user_id, 'telegram')
+ # UserAccount---Message
+ user_account.add(date, obj=message)
+ # UserAccount---Chat
+ user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id)
+
+ if meta['sender'].get('firstname'):
+ user_account.set_first_name(meta['sender']['firstname'])
+ if meta['sender'].get('lastname'):
+ user_account.set_last_name(meta['sender']['lastname'])
+ if meta['sender'].get('phone'):
+ user_account.set_phone(meta['sender']['phone'])
+
+ if meta['sender'].get('username'):
+ username = Username(meta['sender']['username'], 'telegram')
+ # TODO timeline or/and correlation ????
+ user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
+ user_account.update_username_timeline(username.get_global_id(), timestamp)
+
+ # Username---Message
+ username.add(date) # TODO # correlation message ???
+
+ # if chat: # TODO Chat---Username correlation ???
+ # # Chat---Username
+ # chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id)
+
+ # if meta.get('fwd_from'):
+ # if meta['fwd_from'].get('post_author') # user first name
+
+ # TODO reply threads ????
+ # message edit ????
+
+
return None
diff --git a/bin/importer/feeders/Twitter.py b/bin/importer/feeders/Twitter.py
index d5040c65..1c719e73 100755
--- a/bin/importer/feeders/Twitter.py
+++ b/bin/importer/feeders/Twitter.py
@@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from importer.feeders.Default import DefaultFeeder
from lib.objects.Usernames import Username
-from lib import item_basic
+from lib.objects.Items import Item
class TwitterFeeder(DefaultFeeder):
@@ -40,9 +40,9 @@ class TwitterFeeder(DefaultFeeder):
'''
# tweet_id = str(self.json_data['meta']['twitter:tweet_id'])
# item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################
-
- date = item_basic.get_item_date(self.item_id)
+ item = Item(self.item_id)
+ date = item.get_date()
user = str(self.json_data['meta']['twitter:id'])
username = Username(user, 'twitter')
- username.add(date, item_id)
+ username.add(date, item)
return None
diff --git a/bin/lib/Investigations.py b/bin/lib/Investigations.py
index 1944d00f..9c6def0f 100755
--- a/bin/lib/Investigations.py
+++ b/bin/lib/Investigations.py
@@ -235,18 +235,27 @@ class Investigation(object):
objs.append(dict_obj)
return objs
+ def get_objects_comment(self, obj_global_id):
+ return r_tracking.hget(f'investigations:objs:comment:{self.uuid}', obj_global_id)
+
+ def set_objects_comment(self, obj_global_id, comment):
+ if comment:
+ r_tracking.hset(f'investigations:objs:comment:{self.uuid}', obj_global_id, comment)
+
# # TODO: def register_object(self, Object): in OBJECT CLASS
- def register_object(self, obj_id, obj_type, subtype):
+ def register_object(self, obj_id, obj_type, subtype, comment=''):
r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
+ if comment:
+ self.set_objects_comment(f'{obj_type}:{subtype}:{obj_id}', comment)
timestamp = int(time.time())
self.set_last_change(timestamp)
-
def unregister_object(self, obj_id, obj_type, subtype):
r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid)
+ r_tracking.hdel(f'investigations:objs:comment:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}')
timestamp = int(time.time())
self.set_last_change(timestamp)
@@ -351,7 +360,7 @@ def get_investigations_selector():
for investigation_uuid in get_all_investigations():
investigation = Investigation(investigation_uuid)
name = investigation.get_info()
- l_investigations.append({"id":investigation_uuid, "name": name})
+ l_investigations.append({"id": investigation_uuid, "name": name})
return l_investigations
#{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"}
@@ -453,7 +462,11 @@ def api_register_object(json_dict):
if subtype == 'None':
subtype = ''
obj_id = json_dict.get('id', '').replace(' ', '')
- res = investigation.register_object(obj_id, obj_type, subtype)
+
+ comment = json_dict.get('comment', '')
+ # if comment:
+ # comment = escape(comment)
+ res = investigation.register_object(obj_id, obj_type, subtype, comment=comment)
return res, 200
def api_unregister_object(json_dict):
diff --git a/bin/lib/Tag.py b/bin/lib/Tag.py
index 94b2eca4..64850b3c 100755
--- a/bin/lib/Tag.py
+++ b/bin/lib/Tag.py
@@ -338,7 +338,7 @@ def get_galaxy_meta(galaxy_name, nb_active_tags=False):
else:
meta['icon'] = f'fas fa-{icon}'
if nb_active_tags:
- meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy)
+ meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy.type)
meta['nb_tags'] = len(get_galaxy_tags(galaxy.type))
return meta
diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py
index 2a5336ad..f1ea8905 100755
--- a/bin/lib/Tracker.py
+++ b/bin/lib/Tracker.py
@@ -207,6 +207,13 @@ class Tracker:
if filters:
self._set_field('filters', json.dumps(filters))
+ def del_filters(self, tracker_type, to_track):
+ filters = self.get_filters()
+ for obj_type in filters:
+ r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
+ r_tracker.srem(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
+ r_tracker.hdel(f'tracker:{self.uuid}', 'filters')
+
def get_tracked(self):
return self._get_field('tracked')
@@ -513,6 +520,7 @@ class Tracker:
self._set_mails(mails)
# Filters
+ self.del_filters(old_type, old_to_track)
if not filters:
filters = {}
for obj_type in get_objects_tracked():
@@ -522,9 +530,6 @@ class Tracker:
for obj_type in filters:
r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track)
r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}')
- if tracker_type != old_type:
- r_tracker.srem(f'trackers:objs:{old_type}:{obj_type}', old_to_track)
- r_tracker.srem(f'trackers:uuid:{old_type}:{old_to_track}', f'{self.uuid}:{obj_type}')
# Refresh Trackers
trigger_trackers_refresh(tracker_type)
@@ -650,14 +655,14 @@ def get_user_trackers_meta(user_id, tracker_type=None):
metas = []
for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type):
tracker = Tracker(tracker_uuid)
- metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'}))
+ metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
return metas
def get_global_trackers_meta(tracker_type=None):
metas = []
for tracker_uuid in get_global_trackers(tracker_type=tracker_type):
tracker = Tracker(tracker_uuid)
- metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'}))
+ metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'}))
return metas
def get_users_trackers_meta():
diff --git a/bin/lib/Users.py b/bin/lib/Users.py
index a61830ef..765b1360 100755
--- a/bin/lib/Users.py
+++ b/bin/lib/Users.py
@@ -247,7 +247,10 @@ class User(UserMixin):
self.id = "__anonymous__"
def exists(self):
- return self.id != "__anonymous__"
+ if self.id == "__anonymous__":
+ return False
+ else:
+ return r_serv_db.exists(f'ail:user:metadata:{self.id}')
# return True or False
# def is_authenticated():
@@ -287,3 +290,6 @@ class User(UserMixin):
return True
else:
return False
+
+ def get_role(self):
+ return r_serv_db.hget(f'ail:user:metadata:{self.id}', 'role')
diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py
index c52db274..eeb83a98 100755
--- a/bin/lib/ail_core.py
+++ b/bin/lib/ail_core.py
@@ -15,8 +15,8 @@ config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
-AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp',
- 'screenshot', 'title', 'username'})
+AIL_OBJECTS = sorted({'chat', 'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item',
+ 'pgp', 'screenshot', 'title', 'user-account', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
@@ -38,9 +38,11 @@ def get_all_objects():
return AIL_OBJECTS
def get_objects_with_subtypes():
- return ['cryptocurrency', 'pgp', 'username']
+ return ['chat', 'cryptocurrency', 'pgp', 'username']
def get_object_all_subtypes(obj_type):
+ if obj_type == 'chat':
+ return ['discord', 'jabber', 'telegram']
if obj_type == 'cryptocurrency':
return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash']
if obj_type == 'pgp':
@@ -66,6 +68,14 @@ def get_all_objects_with_subtypes_tuple():
str_objs.append((obj_type, ''))
return str_objs
+def unpack_obj_global_id(global_id, r_type='tuple'):
+ if r_type == 'dict':
+ obj = global_id.split(':', 2)
+ return {'type': obj[0], 'subtype': obj[1], 'id': obj['2']}
+ else: # tuple(type, subtype, id)
+ return global_id.split(':', 2)
+
+
##-- AIL OBJECTS --##
#### Redis ####
diff --git a/bin/lib/ail_updates.py b/bin/lib/ail_updates.py
index c3729831..07fd791a 100755
--- a/bin/lib/ail_updates.py
+++ b/bin/lib/ail_updates.py
@@ -15,38 +15,15 @@ config_loader = ConfigLoader()
r_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
-BACKGROUND_UPDATES = {
- 'v1.5': {
- 'nb_updates': 5,
- 'message': 'Tags and Screenshots'
- },
- 'v2.4': {
- 'nb_updates': 1,
- 'message': ' Domains Tags and Correlations'
- },
- 'v2.6': {
- 'nb_updates': 1,
- 'message': 'Domains Tags and Correlations'
- },
- 'v2.7': {
- 'nb_updates': 1,
- 'message': 'Domains Tags'
- },
- 'v3.4': {
- 'nb_updates': 1,
- 'message': 'Domains Languages'
- },
- 'v3.7': {
- 'nb_updates': 1,
- 'message': 'Trackers first_seen/last_seen'
- }
-}
-
+# # # # # # # #
+# #
+# UPDATE #
+# #
+# # # # # # # #
def get_ail_version():
return r_db.get('ail:version')
-
def get_ail_float_version():
version = get_ail_version()
if version:
@@ -55,6 +32,179 @@ def get_ail_float_version():
version = 0
return version
+# # # - - # # #
+
+# # # # # # # # # # # #
+# #
+# UPDATE BACKGROUND #
+# #
+# # # # # # # # # # # #
+
+
+BACKGROUND_UPDATES = {
+ 'v5.2': {
+ 'message': 'Compress HAR',
+ 'scripts': ['compress_har.py']
+ },
+}
+
+class AILBackgroundUpdate:
+ """
+ AIL Background Update.
+ """
+
+ def __init__(self, version):
+ self.version = version
+
+ def _get_field(self, field):
+ return r_db.hget('ail:update:background', field)
+
+ def _set_field(self, field, value):
+ r_db.hset('ail:update:background', field, value)
+
+ def get_version(self):
+ return self.version
+
+ def get_message(self):
+ return BACKGROUND_UPDATES.get(self.version, {}).get('message', '')
+
+ def get_error(self):
+ return self._get_field('error')
+
+ def set_error(self, error): # TODO ADD LOGS
+ self._set_field('error', error)
+
+ def get_nb_scripts(self):
+ return int(len(BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [''])))
+
+ def get_scripts(self):
+ return BACKGROUND_UPDATES.get(self.version, {}).get('scripts', [])
+
+ def get_nb_scripts_done(self):
+ done = self._get_field('done')
+ try:
+ done = int(done)
+ except (TypeError, ValueError):
+ done = 0
+ return done
+
+ def inc_nb_scripts_done(self):
+ self._set_field('done', self.get_nb_scripts_done() + 1)
+
+ def get_script(self):
+ return self._get_field('script')
+
+ def get_script_path(self):
+ path = os.path.basename(self.get_script())
+ if path:
+ return os.path.join(os.environ['AIL_HOME'], 'update', self.version, path)
+
+ def get_nb_to_update(self): # TODO use cache ?????
+ nb_to_update = self._get_field('nb_to_update')
+ if not nb_to_update:
+ nb_to_update = 1
+ return int(nb_to_update)
+
+ def set_nb_to_update(self, nb):
+ self._set_field('nb_to_update', int(nb))
+
+ def get_nb_updated(self): # TODO use cache ?????
+ nb_updated = self._get_field('nb_updated')
+ if not nb_updated:
+ nb_updated = 0
+ return int(nb_updated)
+
+ def inc_nb_updated(self): # TODO use cache ?????
+ r_db.hincrby('ail:update:background', 'nb_updated', 1)
+
+ def get_progress(self): # TODO use cache ?????
+ return self._get_field('progress')
+
+ def set_progress(self, progress):
+ self._set_field('progress', progress)
+
+ def update_progress(self):
+ nb_updated = self.get_nb_updated()
+ nb_to_update = self.get_nb_to_update()
+ if nb_updated == nb_to_update:
+ progress = 100
+ elif nb_updated > nb_to_update:
+ progress = 99
+ else:
+ progress = int((nb_updated * 100) / nb_to_update)
+ self.set_progress(progress)
+ print(f'{nb_updated}/{nb_to_update} updated {progress}%')
+ return progress
+
+ def is_running(self):
+ return r_db.hget('ail:update:background', 'version') == self.version
+
+ def get_meta(self, options=set()):
+ meta = {'version': self.get_version(),
+ 'error': self.get_error(),
+ 'script': self.get_script(),
+ 'script_progress': self.get_progress(),
+ 'nb_update': self.get_nb_scripts(),
+ 'nb_completed': self.get_nb_scripts_done()}
+ meta['progress'] = int(meta['nb_completed'] * 100 / meta['nb_update'])
+ if 'message' in options:
+ meta['message'] = self.get_message()
+ return meta
+
+ def start(self):
+ self._set_field('version', self.version)
+ r_db.hdel('ail:update:background', 'error')
+
+ def start_script(self, script):
+ self.clear()
+ self._set_field('script', script)
+ self.set_progress(0)
+
+ def end_script(self):
+ self.set_progress(100)
+ self.inc_nb_scripts_done()
+
+ def clear(self):
+ r_db.hdel('ail:update:background', 'error')
+ r_db.hdel('ail:update:background', 'progress')
+ r_db.hdel('ail:update:background', 'nb_updated')
+ r_db.hdel('ail:update:background', 'nb_to_update')
+
+ def end(self):
+ r_db.delete('ail:update:background')
+ r_db.srem('ail:updates:background', self.version)
+
+
+# To Add in update script
+def add_background_update(version):
+ r_db.sadd('ail:updates:background', version)
+
+def is_update_background_running():
+ return r_db.exists('ail:update:background')
+
+def get_update_background_version():
+ return r_db.hget('ail:update:background', 'version')
+
+def get_update_background_meta(options=set()):
+ version = get_update_background_version()
+ if version:
+ return AILBackgroundUpdate(version).get_meta(options=options)
+ else:
+ return {}
+
+def get_update_background_to_launch():
+ to_launch = []
+ updates = r_db.smembers('ail:updates:background')
+ for version in BACKGROUND_UPDATES:
+ if version in updates:
+ to_launch.append(version)
+ return to_launch
+
+# # # - - # # #
+
+##########################################################################################
+##########################################################################################
+##########################################################################################
def get_ail_all_updates(date_separator='-'):
dict_update = r_db.hgetall('ail:update_date')
@@ -87,111 +237,6 @@ def check_version(version):
return True
-#### UPDATE BACKGROUND ####
-
-def exits_background_update_to_launch():
- return r_db.scard('ail:update:to_update') != 0
-
-
-def is_version_in_background_update(version):
- return r_db.sismember('ail:update:to_update', version)
-
-
-def get_all_background_updates_to_launch():
- return r_db.smembers('ail:update:to_update')
-
-
-def get_current_background_update():
- return r_db.get('ail:update:update_in_progress')
-
-
-def get_current_background_update_script():
- return r_db.get('ail:update:current_background_script')
-
-
-def get_current_background_update_script_path(version, script_name):
- return os.path.join(os.environ['AIL_HOME'], 'update', version, script_name)
-
-
-def get_current_background_nb_update_completed():
- return r_db.scard('ail:update:update_in_progress:completed')
-
-
-def get_current_background_update_progress():
- progress = r_db.get('ail:update:current_background_script_stat')
- if not progress:
- progress = 0
- return int(progress)
-
-
-def get_background_update_error():
- return r_db.get('ail:update:error')
-
-
-def add_background_updates_to_launch(version):
- return r_db.sadd('ail:update:to_update', version)
-
-
-def start_background_update(version):
- r_db.delete('ail:update:error')
- r_db.set('ail:update:update_in_progress', version)
-
-
-def set_current_background_update_script(script_name):
- r_db.set('ail:update:current_background_script', script_name)
- r_db.set('ail:update:current_background_script_stat', 0)
-
-
-def set_current_background_update_progress(progress):
- r_db.set('ail:update:current_background_script_stat', progress)
-
-
-def set_background_update_error(error):
- r_db.set('ail:update:error', error)
-
-
-def end_background_update_script():
- r_db.sadd('ail:update:update_in_progress:completed')
-
-
-def end_background_update(version):
- r_db.delete('ail:update:update_in_progress')
- r_db.delete('ail:update:current_background_script')
- r_db.delete('ail:update:current_background_script_stat')
- r_db.delete('ail:update:update_in_progress:completed')
- r_db.srem('ail:update:to_update', version)
-
-
-def clear_background_update():
- r_db.delete('ail:update:error')
- r_db.delete('ail:update:update_in_progress')
- r_db.delete('ail:update:current_background_script')
- r_db.delete('ail:update:current_background_script_stat')
- r_db.delete('ail:update:update_in_progress:completed')
-
-
-def get_update_background_message(version):
- return BACKGROUND_UPDATES[version]['message']
-
-
-# TODO: Detect error in subprocess
-def get_update_background_metadata():
- dict_update = {}
- version = get_current_background_update()
- if version:
- dict_update['version'] = version
- dict_update['script'] = get_current_background_update_script()
- dict_update['script_progress'] = get_current_background_update_progress()
- dict_update['nb_update'] = BACKGROUND_UPDATES[dict_update['version']]['nb_updates']
- dict_update['nb_completed'] = get_current_background_nb_update_completed()
- dict_update['progress'] = int(dict_update['nb_completed'] * 100 / dict_update['nb_update'])
- dict_update['error'] = get_background_update_error()
- return dict_update
-
-
-##-- UPDATE BACKGROUND --##
-
-
if __name__ == '__main__':
res = check_version('v3.1..1')
print(res)
diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py
index 8e29837d..f7b13f61 100755
--- a/bin/lib/correlations_engine.py
+++ b/bin/lib/correlations_engine.py
@@ -41,17 +41,22 @@ config_loader = None
##################################
CORRELATION_TYPES_BY_OBJ = {
+ "chat": ["user-account"], # message or direct correlation like cve, bitcoin, ... ???
"cookie-name": ["domain"],
- "cryptocurrency": ["domain", "item"],
- "cve": ["domain", "item"],
- "decoded": ["domain", "item"],
- "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
+ "cryptocurrency": ["domain", "item", "message"],
+ "cve": ["domain", "item", "message"],
+ "decoded": ["domain", "item", "message"],
+ "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
+ "etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
- "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
- "pgp": ["domain", "item"],
+ "hhhash": ["domain"],
+ "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
+ "message": ["cve", "cryptocurrency", "decoded", "pgp", "user-account"], # chat ??
+ "pgp": ["domain", "item", "message"],
"screenshot": ["domain", "item"],
"title": ["domain", "item"],
- "username": ["domain", "item"],
+ "user-account": ["chat", "message"],
+ "username": ["domain", "item", "message"], # TODO chat-user/account
}
def get_obj_correl_types(obj_type):
@@ -63,6 +68,8 @@ def sanityze_obj_correl_types(obj_type, correl_types):
correl_types = set(correl_types).intersection(obj_correl_types)
if not correl_types:
correl_types = obj_correl_types
+ if not correl_types:
+ return []
return correl_types
def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type):
@@ -169,18 +176,18 @@ def get_obj_str_id(obj_type, subtype, obj_id):
subtype = ''
return f'{obj_type}:{subtype}:{obj_id}'
-def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False):
+def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False):
links = set()
nodes = set()
meta = {'complete': True, 'objs': set()}
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
- _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='')
+ _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='')
return obj_str_id, nodes, links, meta
-def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''):
+def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''):
obj_str_id = get_obj_str_id(obj_type, subtype, obj_id)
meta['objs'].add(obj_str_id)
nodes.add(obj_str_id)
@@ -191,6 +198,10 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
for str_obj in obj_correlations[correl_type]:
subtype2, obj2_id = str_obj.split(':', 1)
obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id)
+ # filter objects to hide
+ if obj2_str_id in objs_hidden:
+ continue
+
meta['objs'].add(obj2_str_id)
if obj2_str_id == previous_str_obj:
@@ -204,5 +215,5 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id,
if level > 0:
next_level = level - 1
- _get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id)
+ _get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id)
diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py
index 300edb66..6387c76f 100755
--- a/bin/lib/crawlers.py
+++ b/bin/lib/crawlers.py
@@ -39,6 +39,7 @@ from packages import git_status
from packages import Date
from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain
+from lib.objects import HHHashs
from lib.objects.Items import Item
config_loader = ConfigLoader()
@@ -134,7 +135,7 @@ def unpack_url(url):
# # # # # # # # TODO CREATE NEW OBJECT
def get_favicon_from_html(html, domain, url):
- favicon_urls = extract_favicon_from_html(html, url)
+ favicon_urls, favicons = extract_favicon_from_html(html, url)
# add root favicon
if not favicon_urls:
favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico')
@@ -162,7 +163,6 @@ def extract_favicon_from_html(html, url):
# -
# -
-
# Root Favicon
f = get_faup()
f.decode(url)
@@ -244,13 +244,6 @@ def extract_description_from_html(html):
return description['content']
return ''
-def extract_description_from_html(html):
- soup = BeautifulSoup(html, 'html.parser')
- description = soup.find('meta', attrs={'name': 'description'})
- if description:
- return description['content']
- return ''
-
def extract_keywords_from_html(html):
soup = BeautifulSoup(html, 'html.parser')
keywords = soup.find('meta', attrs={'name': 'keywords'})
@@ -264,6 +257,7 @@ def extract_author_from_html(html):
if keywords:
return keywords['content']
return ''
+
# # # - - # # #
@@ -275,7 +269,7 @@ def extract_author_from_html(html):
def create_har_id(date, item_id):
item_id = item_id.split('/')[-1]
- return os.path.join(date, f'{item_id}.json')
+ return os.path.join(date, f'{item_id}.json.gz')
def save_har(har_id, har_content):
# create dir
@@ -284,8 +278,8 @@ def save_har(har_id, har_content):
os.makedirs(har_dir)
# save HAR
filename = os.path.join(get_har_dir(), har_id)
- with open(filename, 'w') as f:
- f.write(json.dumps(har_content))
+ with gzip.open(filename, 'wb') as f:
+ f.write(json.dumps(har_content).encode())
def get_all_har_ids():
har_ids = []
@@ -299,9 +293,10 @@ def get_all_har_ids():
except (TypeError, ValueError):
pass
- for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
- har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
- har_ids.append(har_id)
+ if os.path.exists(today_root_dir):
+ for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
+ har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
+ har_ids.append(har_id)
for ydir in sorted(dirs_year, reverse=False):
search_dear = os.path.join(HAR_DIR, ydir)
@@ -312,14 +307,17 @@ def get_all_har_ids():
har_ids.append(har_id)
return har_ids
-def extract_cookies_names_from_har_by_har_id(har_id):
+def get_har_content(har_id):
har_path = os.path.join(HAR_DIR, har_id)
- with open(har_path) as f:
- try:
- har_content = json.loads(f.read())
- except json.decoder.JSONDecodeError:
- har_content = {}
- return extract_cookies_names_from_har(har_content)
+ try:
+ with gzip.open(har_path) as f:
+ try:
+ return json.loads(f.read())
+ except json.decoder.JSONDecodeError:
+ return {}
+ except Exception as e:
+ print(e) # TODO LOGS
+ return {}
def extract_cookies_names_from_har(har):
cookies = set()
@@ -334,17 +332,110 @@ def extract_cookies_names_from_har(har):
cookies.add(name)
return cookies
-def _reprocess_all_hars():
+def _reprocess_all_hars_cookie_name():
from lib.objects import CookiesNames
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
- domain = domain[:-41]
+ domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
- for cookie_name in extract_cookies_names_from_har_by_har_id(har_id):
+ for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)):
print(domain, date, cookie_name)
cookie = CookiesNames.create(cookie_name)
- cookie.add(date, domain)
+ cookie.add(date, Domain(domain))
+
+def extract_etag_from_har(har): # TODO check response url
+ etags = set()
+ for entrie in har.get('log', {}).get('entries', []):
+ for header in entrie.get('response', {}).get('headers', []):
+ if header.get('name') == 'etag':
+ # print(header)
+ etag = header.get('value')
+ if etag:
+ etags.add(etag)
+ return etags
+
+def _reprocess_all_hars_etag():
+ from lib.objects import Etags
+ for har_id in get_all_har_ids():
+ domain = har_id.split('/')[-1]
+ domain = domain[:-44]
+ date = har_id.split('/')
+ date = f'{date[-4]}{date[-3]}{date[-2]}'
+ for etag_content in extract_etag_from_har(get_har_content(har_id)):
+ print(domain, date, etag_content)
+ etag = Etags.create(etag_content)
+ etag.add(date, Domain(domain))
+
+def extract_hhhash_by_id(har_id, domain, date):
+ return extract_hhhash(get_har_content(har_id), domain, date)
+
+def extract_hhhash(har, domain, date):
+ hhhashs = set()
+ urls = set()
+ for entrie in har.get('log', {}).get('entries', []):
+ url = entrie.get('request').get('url')
+ if url not in urls:
+ # filter redirect
+ if entrie.get('response').get('status') == 200: # != 301:
+ # print(url, entrie.get('response').get('status'))
+
+ f = get_faup()
+ f.decode(url)
+ domain_url = f.get().get('domain')
+ if domain_url == domain:
+
+ headers = entrie.get('response').get('headers')
+
+ hhhash_header = HHHashs.build_hhhash_headers(headers)
+ hhhash = HHHashs.hhhash_headers(hhhash_header)
+
+ if hhhash not in hhhashs:
+ print('', url, hhhash)
+
+ # -----
+ obj = HHHashs.create(hhhash_header, hhhash)
+ obj.add(date, Domain(domain))
+
+ hhhashs.add(hhhash)
+ urls.add(url)
+ print()
+ print()
+ print('HHHASH:')
+ for hhhash in hhhashs:
+ print(hhhash)
+ return hhhashs
+
+def _reprocess_all_hars_hhhashs():
+ for har_id in get_all_har_ids():
+ print()
+ print(har_id)
+ domain = har_id.split('/')[-1]
+ domain = domain[:-44]
+ date = har_id.split('/')
+ date = f'{date[-4]}{date[-3]}{date[-2]}'
+ extract_hhhash_by_id(har_id, domain, date)
+
+
+
+def _gzip_har(har_id):
+ har_path = os.path.join(HAR_DIR, har_id)
+ new_id = f'{har_path}.gz'
+ if not har_id.endswith('.gz'):
+ if not os.path.exists(new_id):
+ with open(har_path, 'rb') as f:
+ content = f.read()
+ if content:
+ with gzip.open(new_id, 'wb') as f:
+ r = f.write(content)
+ print(r)
+ if os.path.exists(new_id) and os.path.exists(har_path):
+ os.remove(har_path)
+ print('delete:', har_path)
+
+def _gzip_all_hars():
+ for har_id in get_all_har_ids():
+ _gzip_har(har_id)
# # # - - # # #
@@ -662,8 +753,7 @@ class Cookie:
meta[field] = value
if r_json:
data = json.dumps(meta, indent=4, sort_keys=True)
- meta = {'data': data}
- meta['uuid'] = self.uuid
+ meta = {'data': data, 'uuid': self.uuid}
return meta
def edit(self, cookie_dict):
@@ -775,7 +865,7 @@ def unpack_imported_json_cookie(json_cookie):
## - - ##
#### COOKIEJAR API ####
-def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch
+def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch
resp = api_verify_cookiejar_acl(cookiejar_uuid, user_id)
if resp:
return resp
@@ -944,8 +1034,8 @@ class CrawlerScheduler:
minutes = 0
current_time = datetime.now().timestamp()
time_next_run = (datetime.now() + relativedelta(months=int(months), weeks=int(weeks),
- days=int(days), hours=int(hours),
- minutes=int(minutes))).timestamp()
+ days=int(days), hours=int(hours),
+ minutes=int(minutes))).timestamp()
# Make sure the next capture is not scheduled for in a too short interval
interval_next_capture = time_next_run - current_time
if interval_next_capture < self.min_frequency:
@@ -1225,8 +1315,13 @@ class CrawlerCapture:
if task_uuid:
return CrawlerTask(task_uuid)
- def get_start_time(self):
- return self.get_task().get_start_time()
+ def get_start_time(self, r_str=True):
+ start_time = self.get_task().get_start_time()
+ if r_str:
+ return start_time
+ else:
+ start_time = datetime.strptime(start_time, "%Y/%m/%d - %H:%M.%S").timestamp()
+ return int(start_time)
def get_status(self):
status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status')
@@ -1239,7 +1334,8 @@ class CrawlerCapture:
def create(self, task_uuid):
if self.exists():
- raise Exception(f'Error: Capture {self.uuid} already exists')
+ print(f'Capture {self.uuid} already exists') # TODO LOGS
+ return None
launch_time = int(time.time())
r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid)
r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid)
@@ -1492,6 +1588,11 @@ class CrawlerTask:
def start(self):
self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S"))
+ def reset(self):
+ priority = 49
+ r_crawler.hdel(f'crawler:task:{self.uuid}', 'start_time')
+ self.add_to_db_crawler_queue(priority)
+
# Crawler
def remove(self): # zrem cache + DB
capture_uuid = self.get_capture()
@@ -1622,14 +1723,16 @@ def api_add_crawler_task(data, user_id=None):
if frequency:
# TODO verify user
- return create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
- cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags), 200
+ task_uuid = create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
+ cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags)
else:
# TODO HEADERS
# TODO USER AGENT
- return create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
+ task_uuid = create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None,
cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags,
- parent='manual', priority=90), 200
+ parent='manual', priority=90)
+
+ return {'uuid': task_uuid}, 200
#### ####
@@ -1702,13 +1805,13 @@ class CrawlerProxy:
self.uuid = proxy_uuid
def get_description(self):
- return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'description')
+ return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'description')
# Host
# Port
# Type -> need test
def get_url(self):
- return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'url')
+ return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'url')
#### CRAWLER LACUS ####
@@ -1770,7 +1873,11 @@ def ping_lacus():
ping = False
req_error = {'error': 'Lacus URL undefined', 'status_code': 400}
else:
- ping = lacus.is_up
+ try:
+ ping = lacus.is_up
+ except:
+ req_error = {'error': 'Failed to connect Lacus URL', 'status_code': 400}
+ ping = False
update_lacus_connection_status(ping, req_error=req_error)
return ping
@@ -1787,7 +1894,7 @@ def api_save_lacus_url_key(data):
# unpack json
manager_url = data.get('url', None)
api_key = data.get('api_key', None)
- if not manager_url: # or not api_key:
+ if not manager_url: # or not api_key:
return {'status': 'error', 'reason': 'No url or API key supplied'}, 400
# check if is valid url
try:
@@ -1830,7 +1937,7 @@ def api_set_crawler_max_captures(data):
save_nb_max_captures(nb_captures)
return nb_captures, 200
- ## TEST ##
+## TEST ##
def is_test_ail_crawlers_successful():
return r_db.hget('crawler:tor:test', 'success') == 'True'
@@ -1903,14 +2010,16 @@ def test_ail_crawlers():
# TODO MOVE ME IN CRAWLER OR FLASK
load_blacklist()
-# if __name__ == '__main__':
-# delete_captures()
-
-# item_id = 'crawled/2023/02/20/data.gz'
-# item = Item(item_id)
-# content = item.get_content()
-# temp_url = ''
-# r = extract_favicon_from_html(content, temp_url)
-# print(r)
-# _reprocess_all_hars()
+if __name__ == '__main__':
+ # delete_captures()
+ # item_id = 'crawled/2023/02/20/data.gz'
+ # item = Item(item_id)
+ # content = item.get_content()
+ # temp_url = ''
+ # r = extract_favicon_from_html(content, temp_url)
+ # print(r)
+ # _reprocess_all_hars_cookie_name()
+ # _reprocess_all_hars_etag()
+ # _gzip_all_hars()
+ _reprocess_all_hars_hhhashs()
diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py
index fdfe1059..71fa5378 100755
--- a/bin/lib/item_basic.py
+++ b/bin/lib/item_basic.py
@@ -129,7 +129,7 @@ def get_item_url(item_id):
def get_item_har(item_id):
har = '/'.join(item_id.rsplit('/')[-4:])
- har = f'{har}.json'
+ har = f'{har}.json.gz'
path = os.path.join(ConfigLoader.get_hars_dir(), har)
if os.path.isfile(path):
return har
diff --git a/bin/lib/module_extractor.py b/bin/lib/module_extractor.py
index d4ea6c78..b6254372 100755
--- a/bin/lib/module_extractor.py
+++ b/bin/lib/module_extractor.py
@@ -104,9 +104,13 @@ def _get_word_regex(word):
def convert_byte_offset_to_string(b_content, offset):
byte_chunk = b_content[:offset + 1]
- string_chunk = byte_chunk.decode()
- offset = len(string_chunk) - 1
- return offset
+ try:
+ string_chunk = byte_chunk.decode()
+ offset = len(string_chunk) - 1
+ return offset
+ except UnicodeDecodeError as e:
+ logger.error(f'Yara offset converter error, {str(e)}\n{offset}/{len(b_content)}')
+ return convert_byte_offset_to_string(b_content, offset - 1)
# TODO RETRO HUNTS
diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py
new file mode 100755
index 00000000..bb27413d
--- /dev/null
+++ b/bin/lib/objects/Chats.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+
+from datetime import datetime
+
+from flask import url_for
+# from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib import ail_core
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
+from lib.data_retention_engine import update_obj_date
+from lib.objects import ail_objects
+from lib.timeline_engine import Timeline
+
+from lib.correlations_engine import get_correlation_by_correl_type
+
+config_loader = ConfigLoader()
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+r_object = config_loader.get_db_conn("Kvrocks_Objects")
+r_cache = config_loader.get_redis_conn("Redis_Cache")
+config_loader = None
+
+
+################################################################################
+################################################################################
+################################################################################
+
+class Chat(AbstractSubtypeObject): # TODO # ID == username ?????
+ """
+ AIL Chat Object. (strings)
+ """
+
+ def __init__(self, id, subtype):
+ super(Chat, self).__init__('chat', id, subtype)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
+ return url
+
+ def get_svg_icon(self): # TODO
+ # if self.subtype == 'telegram':
+ # style = 'fab'
+ # icon = '\uf2c6'
+ # elif self.subtype == 'discord':
+ # style = 'fab'
+ # icon = '\uf099'
+ # else:
+ # style = 'fas'
+ # icon = '\uf007'
+ style = 'fas'
+ icon = '\uf086'
+ return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['subtype'] = self.subtype
+ meta['tags'] = self.get_tags(r_list=True)
+ return meta
+
+ def get_misp_object(self):
+ # obj_attrs = []
+ # if self.subtype == 'telegram':
+ # obj = MISPObject('telegram-account', standalone=True)
+ # obj_attrs.append(obj.add_attribute('username', value=self.id))
+ #
+ # elif self.subtype == 'twitter':
+ # obj = MISPObject('twitter-account', standalone=True)
+ # obj_attrs.append(obj.add_attribute('name', value=self.id))
+ #
+ # else:
+ # obj = MISPObject('user-account', standalone=True)
+ # obj_attrs.append(obj.add_attribute('username', value=self.id))
+ #
+ # first_seen = self.get_first_seen()
+ # last_seen = self.get_last_seen()
+ # if first_seen:
+ # obj.first_seen = first_seen
+ # if last_seen:
+ # obj.last_seen = last_seen
+ # if not first_seen or not last_seen:
+ # self.logger.warning(
+ # f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+ #
+ # for obj_attr in obj_attrs:
+ # for tag in self.get_tags():
+ # obj_attr.add_tag(tag)
+ # return obj
+ return
+
+ ############################################################################
+ ############################################################################
+
+ # others optional metas, ... -> # TODO ALL meta in hset
+
+ def get_name(self): # get username ????
+ pass
+
+ # users that send at least a message else participants/spectator
+ # correlation created by messages
+ def get_users(self):
+ users = set()
+ accounts = self.get_correlation('user-account').get('user-account', [])
+ for account in accounts:
+ users.add(account[1:])
+ return users
+
+ def _get_timeline_username(self):
+ return Timeline(self.get_global_id(), 'username')
+
+ def get_username(self):
+ return self._get_timeline_username().get_last_obj_id()
+
+ def get_usernames(self):
+ return self._get_timeline_username().get_objs_ids()
+
+ def update_username_timeline(self, username_global_id, timestamp):
+ self._get_timeline_username().add_timestamp(timestamp, username_global_id)
+
+
+ # def get_last_message_id(self):
+ #
+ # return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id')
+
+ def get_obj_message_id(self, obj_id):
+ if obj_id.endswith('.gz'):
+ obj_id = obj_id[:-3]
+ return int(obj_id.split('_')[-1])
+
+ def _get_message_timestamp(self, obj_global_id):
+ return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id)
+
+ def _get_messages(self):
+ return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True)
+
+ def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None):
+ obj = ail_objects.get_obj_from_global_id(obj_global_id)
+ mess_dict = obj.get_meta(options={'content', 'link', 'parent', 'user-account'})
+ if mess_dict.get('parent') and parent:
+ mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False)
+ if mess_dict.get('user-account'):
+ user_account = ail_objects.get_obj_from_global_id(mess_dict['user-account'])
+ mess_dict['user-account'] = {}
+ mess_dict['user-account']['type'] = user_account.get_type()
+ mess_dict['user-account']['subtype'] = user_account.get_subtype(r_str=True)
+ mess_dict['user-account']['id'] = user_account.get_id()
+ username = user_account.get_username()
+ if username:
+ username = ail_objects.get_obj_from_global_id(username).get_default_meta(link=False)
+ mess_dict['user-account']['username'] = username # TODO get username at the given timestamp ???
+ else:
+ mess_dict['user-account']['id'] = 'UNKNOWN'
+
+ if not mess_datetime:
+ obj_mess_id = self._get_message_timestamp(obj_global_id)
+ mess_datetime = datetime.fromtimestamp(obj_mess_id)
+ mess_dict['date'] = mess_datetime.isoformat(' ')
+ mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S')
+ return mess_dict
+
+
+ def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies
+ start = 0
+ stop = -1
+ # r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}')
+
+ # TODO chat without username ???? -> chat ID ????
+
+ messages = {}
+ curr_date = None
+ for message in self._get_messages():
+ date = datetime.fromtimestamp(message[1])
+ date_day = date.strftime('%Y/%m/%d')
+ if date_day != curr_date:
+ messages[date_day] = []
+ curr_date = date_day
+ mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date)
+ messages[date_day].append(mess_dict)
+ return messages
+
+ # Zset with ID ??? id -> item id ??? multiple id == media + text
+ # id -> media id
+ # How do we handle reply/thread ??? -> separate with new chats name/id ZSET ???
+ # Handle media ???
+
+ # list of message id -> obj_id
+ # list of obj_id ->
+ # abuse parent children ???
+
+ # def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None):
+ # date = # TODO get date from object
+ # self.update_daterange(date)
+ # update_obj_date(date, self.type, self.subtype)
+ #
+ #
+ # # daily
+ # r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
+ # # all subtypes
+ # r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id)
+ #
+ # #######################################################################
+ # #######################################################################
+ #
+ # # Correlations
+ # self.add_correlation('item', '', item_id)
+ # # domain
+ # if is_crawled(item_id):
+ # domain = get_item_domain(item_id)
+ # self.add_correlation('domain', '', domain)
+
+ # TODO kvrocks exception if key don't exists
+ def get_obj_by_message_id(self, mess_id):
+ return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id)
+
+ # importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn
+ def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None):
+ r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id)
+ r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp})
+
+ if reply_id:
+ reply_obj = self.get_obj_by_message_id(reply_id)
+ if reply_obj:
+ self.add_obj_children(reply_obj, obj_global_id)
+ else:
+ self.add_message_cached_reply(reply_id, mess_id)
+
+ # ADD cached replies
+ for reply_obj in self.get_cached_message_reply(mess_id):
+ self.add_obj_children(obj_global_id, reply_obj)
+
+ def _get_message_cached_reply(self, message_id):
+ return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}')
+
+ def get_cached_message_reply(self, message_id):
+ objs_global_id = []
+ for mess_id in self._get_message_cached_reply(message_id):
+ obj_global_id = self.get_obj_by_message_id(mess_id)
+ if obj_global_id:
+ objs_global_id.append(obj_global_id)
+ return objs_global_id
+
+ def add_message_cached_reply(self, reply_to_id, message_id):
+ r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id)
+ r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600)
+
+ # TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering
+
+
+# TODO factorize
+def get_all_subtypes():
+ return ail_core.get_object_all_subtypes('chat')
+
+def get_all():
+ objs = {}
+ for subtype in get_all_subtypes():
+ objs[subtype] = get_all_by_subtype(subtype)
+ return objs
+
+def get_all_by_subtype(subtype):
+ return get_all_id('chat', subtype)
+
+# # TODO FILTER NAME + Key + mail
+# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME
+#
+# return name_to_search
+#
+# def search_usernames_by_name(name_to_search, subtype, r_pos=False):
+# usernames = {}
+# # for subtype in subtypes:
+# r_name = sanitize_username_name_to_search(name_to_search, subtype)
+# if not name_to_search or isinstance(r_name, dict):
+# # break
+# return usernames
+# r_name = re.compile(r_name)
+# for user_name in get_all_usernames_by_subtype(subtype):
+# res = re.search(r_name, user_name)
+# if res:
+# usernames[user_name] = {}
+# if r_pos:
+# usernames[user_name]['hl-start'] = res.start()
+# usernames[user_name]['hl-end'] = res.end()
+# return usernames
+
+
+if __name__ == '__main__':
+ chat = Chat('test', 'telegram')
+ r = chat.get_messages()
+ print(r)
diff --git a/bin/lib/objects/Decodeds.py b/bin/lib/objects/Decodeds.py
index 001f7dfd..fb194be1 100755
--- a/bin/lib/objects/Decodeds.py
+++ b/bin/lib/objects/Decodeds.py
@@ -138,7 +138,7 @@ class Decoded(AbstractDaterangeObject):
with open(filepath, 'rb') as f:
content = f.read()
return content
- elif r_str == 'bytesio':
+ elif r_type == 'bytesio':
with open(filepath, 'rb') as f:
content = BytesIO(f.read())
return content
@@ -149,7 +149,7 @@ class Decoded(AbstractDaterangeObject):
with zipfile.ZipFile(zip_content, "w") as zf:
# TODO: Fix password
# zf.setpassword(b"infected")
- zf.writestr(self.id, self.get_content().getvalue())
+ zf.writestr(self.id, self.get_content(r_type='bytesio').getvalue())
zip_content.seek(0)
return zip_content
diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py
index 811ea6f7..dc216617 100755
--- a/bin/lib/objects/Domains.py
+++ b/bin/lib/objects/Domains.py
@@ -389,10 +389,10 @@ class Domain(AbstractObject):
har = get_item_har(item_id)
if har:
print(har)
- _write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json')
+ _write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
# Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
- if screenshot:
+ if screenshot and screenshot['screenshot']:
screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
screenshot[8:10], screenshot[10:12], screenshot[12:])
@@ -595,21 +595,22 @@ def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[]
return None
def sanitize_domain_name_to_search(name_to_search, domain_type):
+ if not name_to_search:
+ return ""
if domain_type == 'onion':
r_name = r'[a-z0-9\.]+'
else:
r_name = r'[a-zA-Z0-9-_\.]+'
# invalid domain name
if not re.fullmatch(r_name, name_to_search):
- res = re.match(r_name, name_to_search)
- return {'search': name_to_search, 'error': res.string.replace( res[0], '')}
+ return ""
return name_to_search.replace('.', '\.')
def search_domain_by_name(name_to_search, domain_types, r_pos=False):
domains = {}
for domain_type in domain_types:
r_name = sanitize_domain_name_to_search(name_to_search, domain_type)
- if not name_to_search or isinstance(r_name, dict):
+ if not r_name:
break
r_name = re.compile(r_name)
for domain in get_domains_up_by_type(domain_type):
diff --git a/bin/lib/objects/Etags.py b/bin/lib/objects/Etags.py
new file mode 100755
index 00000000..eb41f68c
--- /dev/null
+++ b/bin/lib/objects/Etags.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+
+from hashlib import sha256
+from flask import url_for
+
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
+
+config_loader = ConfigLoader()
+r_objects = config_loader.get_db_conn("Kvrocks_Objects")
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+# TODO NEW ABSTRACT OBJECT -> daterange for all objects ????
+
+class Etag(AbstractDaterangeObject):
+ """
+ AIL Etag Object.
+ """
+
+ def __init__(self, obj_id):
+ super(Etag, self).__init__('etag', obj_id)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ def get_content(self, r_type='str'):
+ if r_type == 'str':
+ return self._get_field('content')
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
+ return url
+
+ # TODO # CHANGE COLOR
+ def get_svg_icon(self):
+ return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5}
+
+ def get_misp_object(self):
+ obj_attrs = []
+ obj = MISPObject('etag')
+ first_seen = self.get_first_seen()
+ last_seen = self.get_last_seen()
+ if first_seen:
+ obj.first_seen = first_seen
+ if last_seen:
+ obj.last_seen = last_seen
+ if not first_seen or not last_seen:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+
+ obj_attrs.append(obj.add_attribute('etag', value=self.get_content()))
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+
+ def get_nb_seen(self):
+ return self.get_nb_correlation('domain')
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['tags'] = self.get_tags(r_list=True)
+ meta['content'] = self.get_content()
+ return meta
+
+ def add(self, date, obj_id): # date = HAR Date
+ self._add(date, 'domain', '', obj_id)
+
+ def create(self, content, _first_seen=None, _last_seen=None):
+ if not isinstance(content, str):
+ content = content.decode()
+ self._set_field('content', content)
+ self._create()
+
+
+def create(content):
+ if isinstance(content, str):
+ content = content.encode()
+ obj_id = sha256(content).hexdigest()
+ etag = Etag(obj_id)
+ if not etag.exists():
+ etag.create(content)
+ return etag
+
+
+class Etags(AbstractDaterangeObjects):
+ """
+ Etags Objects
+ """
+ def __init__(self):
+ super().__init__('etag', Etag)
+
+ def sanitize_id_to_search(self, name_to_search):
+ return name_to_search # TODO
+
+
+# if __name__ == '__main__':
+# name_to_search = '98'
+# print(search_cves_by_name(name_to_search))
diff --git a/bin/lib/objects/HHHashs.py b/bin/lib/objects/HHHashs.py
new file mode 100755
index 00000000..021ac451
--- /dev/null
+++ b/bin/lib/objects/HHHashs.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import hashlib
+import os
+import sys
+
+from flask import url_for
+
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
+
+config_loader = ConfigLoader()
+r_objects = config_loader.get_db_conn("Kvrocks_Objects")
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+
+class HHHash(AbstractDaterangeObject):
+ """
+ AIL HHHash Object.
+ """
+
+ def __init__(self, obj_id):
+ super(HHHash, self).__init__('hhhash', obj_id)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ def get_content(self, r_type='str'):
+ if r_type == 'str':
+ return self._get_field('content')
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
+ return url
+
+ # TODO # CHANGE COLOR
+ def get_svg_icon(self):
+ return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5}
+
+ def get_misp_object(self):
+ obj_attrs = []
+ obj = MISPObject('hhhash')
+ first_seen = self.get_first_seen()
+ last_seen = self.get_last_seen()
+ if first_seen:
+ obj.first_seen = first_seen
+ if last_seen:
+ obj.last_seen = last_seen
+ if not first_seen or not last_seen:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+
+ obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id()))
+ obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content()))
+ obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus'))
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+
+ def get_nb_seen(self):
+ return self.get_nb_correlation('domain')
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['tags'] = self.get_tags(r_list=True)
+ meta['content'] = self.get_content()
+ return meta
+
+ def add(self, date, obj_id): # date = HAR Date
+ self._add(date, 'domain', '', obj_id)
+
+ def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
+ self._set_field('content', hhhash_header)
+ self._create()
+
+
+def create(hhhash_header, hhhash=None):
+ if not hhhash:
+ hhhash = hhhash_headers(hhhash_header)
+ hhhash = HHHash(hhhash)
+ if not hhhash.exists():
+ hhhash.create(hhhash_header)
+ return hhhash
+
+def build_hhhash_headers(dict_headers): # filter_dup=True
+ hhhash = ''
+ previous_header = ''
+ for header in dict_headers:
+ header_name = header.get('name')
+ if header_name:
+ if header_name != previous_header: # remove dup headers, filter playwright invalid splitting
+ hhhash = f'{hhhash}:{header_name}'
+ previous_header = header_name
+ hhhash = hhhash[1:]
+ # print(hhhash)
+ return hhhash
+
+def hhhash_headers(header_hhhash):
+ m = hashlib.sha256()
+ m.update(header_hhhash.encode())
+ digest = m.hexdigest()
+ return f"hhh:1:{digest}"
+
+
+class HHHashs(AbstractDaterangeObjects):
+ """
+ HHHashs Objects
+ """
+ def __init__(self):
+ super().__init__('hhhash', HHHash)
+
+ def sanitize_id_to_search(self, name_to_search):
+ return name_to_search # TODO
+
+
+# if __name__ == '__main__':
+# name_to_search = '98'
+# print(search_cves_by_name(name_to_search))
diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py
index 2e35497e..c2edbb40 100755
--- a/bin/lib/objects/Items.py
+++ b/bin/lib/objects/Items.py
@@ -264,10 +264,9 @@ class Item(AbstractObject):
"""
if options is None:
options = set()
- meta = {'id': self.id,
- 'date': self.get_date(separator=True),
- 'source': self.get_source(),
- 'tags': self.get_tags(r_list=True)}
+ meta = self.get_default_meta(tags=True)
+ meta['date'] = self.get_date(separator=True)
+ meta['source'] = self.get_source()
# optional meta fields
if 'content' in options:
meta['content'] = self.get_content()
@@ -289,6 +288,8 @@ class Item(AbstractObject):
meta['mimetype'] = self.get_mimetype(content=content)
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
+ if 'link' in options:
+ meta['link'] = self.get_link(flask_context=True)
# meta['encoding'] = None
return meta
diff --git a/bin/lib/objects/Messages.py b/bin/lib/objects/Messages.py
new file mode 100755
index 00000000..b724f854
--- /dev/null
+++ b/bin/lib/objects/Messages.py
@@ -0,0 +1,275 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import re
+import sys
+import cld3
+import html2text
+
+from datetime import datetime
+
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ail_core import get_ail_uuid
+from lib.objects.abstract_object import AbstractObject
+from lib.ConfigLoader import ConfigLoader
+from lib.data_retention_engine import update_obj_date, get_obj_date_first
+# TODO Set all messages ???
+
+
+from flask import url_for
+
+config_loader = ConfigLoader()
+r_cache = config_loader.get_redis_conn("Redis_Cache")
+r_object = config_loader.get_db_conn("Kvrocks_Objects")
+# r_content = config_loader.get_db_conn("Kvrocks_Content")
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+
+# TODO SAVE OR EXTRACT MESSAGE SOURCE FOR ICON ?????????
+# TODO iterate on all objects
+# TODO also add support for small objects ????
+
+# CAN Message exists without CHAT -> no convert it to object
+
+# ID: source:chat_id:message_id ????
+#
+# /!\ handle null chat and message id -> chat = uuid and message = timestamp ???
+
+
+class Message(AbstractObject):
+ """
+ AIL Message Object. (strings)
+ """
+
+ def __init__(self, id): # TODO subtype or use source ????
+ super(Message, self).__init__('message', id) # message::< telegram/1692189934.380827/ChatID_MessageID >
+
+ def exists(self):
+ if self.subtype is None:
+ return r_object.exists(f'meta:{self.type}:{self.id}')
+ else:
+ return r_object.exists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
+
+ def get_source(self):
+ """
+ Returns source/feeder name
+ """
+ l_source = self.id.split('/')[:-2]
+ return os.path.join(*l_source)
+
+ def get_basename(self):
+ return os.path.basename(self.id)
+
+ def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ???????
+ """
+ Returns content
+ """
+ content = self._get_field('content')
+ if r_type == 'str':
+ return content
+ elif r_type == 'bytes':
+ return content.encode()
+
+ def get_date(self):
+ timestamp = self.get_timestamp()
+ return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d')
+
+ def get_timestamp(self):
+ dirs = self.id.split('/')
+ return dirs[-2]
+
+ def get_message_id(self): # TODO optimize
+ message_id = self.get_basename().rsplit('_', 1)[1]
+ # if message_id.endswith('.gz'):
+ # message_id = message_id[:-3]
+ return message_id
+
+ def get_chat_id(self): # TODO optimize -> use me to tag Chat
+ chat_id = self.get_basename().rsplit('_', 1)[0]
+ # if chat_id.endswith('.gz'):
+ # chat_id = chat_id[:-3]
+ return chat_id
+
+ def get_user_account(self):
+ user_account = self.get_correlation('user-account')
+ if user_account.get('user-account'):
+ return f'user-account:{user_account["user-account"].pop()}'
+
+ # Update value on import
+ # reply to -> parent ?
+ # reply/comment - > children ?
+ # nb views
+ # reactions
+ # nb fowards
+ # room ???
+ # message from channel ???
+ # message media
+
+ def get_translation(self): # TODO support multiple translated languages ?????
+ """
+ Returns translated content
+ """
+ return self._get_field('translated') # TODO multiples translation ... -> use set
+
+ def _set_translation(self, translation):
+ """
+ Set translated content
+ """
+ return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
+
+ def get_html2text_content(self, content=None, ignore_links=False):
+ if not content:
+ content = self.get_content()
+ h = html2text.HTML2Text()
+ h.ignore_links = ignore_links
+ h.ignore_images = ignore_links
+ return h.handle(content)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True)}
+ # return payload
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
+ return url
+
+ def get_svg_icon(self):
+ return {'style': 'fas', 'icon': '\uf4ad', 'color': '#4dffff', 'radius': 5}
+
+ def get_misp_object(self): # TODO
+ obj = MISPObject('instant-message', standalone=True)
+ obj_date = self.get_date()
+ if obj_date:
+ obj.first_seen = obj_date
+ else:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
+
+ # obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
+ # obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
+ # obj.add_attribute('sensor', value=get_ail_uuid())]
+ obj_attrs = []
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+
+ # def get_url(self):
+ # return r_object.hget(f'meta:item::{self.id}', 'url')
+
+ # options: set of optional meta fields
+ def get_meta(self, options=None):
+ """
+ :type options: set
+ """
+ if options is None:
+ options = set()
+ meta = self.get_default_meta(tags=True)
+ meta['date'] = self.get_date() # TODO replace me by timestamp ??????
+ meta['source'] = self.get_source()
+ # optional meta fields
+ if 'content' in options:
+ meta['content'] = self.get_content()
+ if 'parent' in options:
+ meta['parent'] = self.get_parent()
+ if 'investigations' in options:
+ meta['investigations'] = self.get_investigations()
+ if 'link' in options:
+ meta['link'] = self.get_link(flask_context=True)
+ if 'user-account' in options:
+ meta['user-account'] = self.get_user_account()
+
+ # meta['encoding'] = None
+ return meta
+
+ def _languages_cleaner(self, content=None):
+ if not content:
+ content = self.get_content()
+ # REMOVE URLS
+ regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b'
+ url_regex = re.compile(regex)
+ urls = url_regex.findall(content)
+ urls = sorted(urls, key=len, reverse=True)
+ for url in urls:
+ content = content.replace(url, '')
+ # REMOVE PGP Blocks
+ regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----'
+ regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----'
+ regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----'
+ re.compile(regex_pgp_public_blocs)
+ re.compile(regex_pgp_signature)
+ re.compile(regex_pgp_message)
+ res = re.findall(regex_pgp_public_blocs, content)
+ for it in res:
+ content = content.replace(it, '')
+ res = re.findall(regex_pgp_signature, content)
+ for it in res:
+ content = content.replace(it, '')
+ res = re.findall(regex_pgp_message, content)
+ for it in res:
+ content = content.replace(it, '')
+ return content
+
+ def detect_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7):
+ languages = []
+ ## CLEAN CONTENT ##
+ content = self.get_html2text_content(ignore_links=True)
+ content = self._languages_cleaner(content=content)
+ # REMOVE USELESS SPACE
+ content = ' '.join(content.split())
+ # - CLEAN CONTENT - #
+ if len(content) >= min_len:
+ for lang in cld3.get_frequent_languages(content, num_langs=num_langs):
+ if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable:
+ languages.append(lang)
+ return languages
+
+ # def translate(self, content=None): # TODO translation plugin
+ # # TODO get text language
+ # if not content:
+ # content = self.get_content()
+ # translated = argostranslate.translate.translate(content, 'ru', 'en')
+ # # Save translation
+ # self._set_translation(translated)
+ # return translated
+
+ def create(self, content, translation, tags):
+ self._set_field('content', content)
+ # r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content)
+ if translation:
+ self._set_translation(translation)
+ for tag in tags:
+ self.add_tag(tag)
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ pass
+
+def create_obj_id(source, chat_id, message_id, timestamp):
+ return f'{source}/{timestamp}/{chat_id}_{message_id}'
+
+# TODO Check if already exists
+# def create(source, chat_id, message_id, timestamp, content, tags=[]):
+def create(obj_id, content, translation=None, tags=[]):
+ message = Message(obj_id)
+ if not message.exists():
+ message.create(content, translation, tags)
+ return message
+
+
+# TODO Encode translation
+
+
+if __name__ == '__main__':
+ r = 'test'
+ print(r)
diff --git a/bin/lib/objects/Screenshots.py b/bin/lib/objects/Screenshots.py
index 19ae3754..26f8543f 100755
--- a/bin/lib/objects/Screenshots.py
+++ b/bin/lib/objects/Screenshots.py
@@ -88,7 +88,7 @@ class Screenshot(AbstractObject):
return obj
def get_meta(self, options=set()):
- meta = {'id': self.id}
+ meta = self.get_default_meta()
meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ??????
meta['tags'] = self.get_tags(r_list=True)
if 'tags_safe' in options:
diff --git a/bin/lib/objects/UsersAccount.py b/bin/lib/objects/UsersAccount.py
new file mode 100755
index 00000000..5bc94a9c
--- /dev/null
+++ b/bin/lib/objects/UsersAccount.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+# import re
+
+from flask import url_for
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib import ail_core
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
+from lib.timeline_engine import Timeline
+
+config_loader = ConfigLoader()
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+
+################################################################################
+################################################################################
+################################################################################
+
+class UserAccount(AbstractSubtypeObject):
+ """
+ AIL User Object. (strings)
+ """
+
+ def __init__(self, id, subtype):
+ super(UserAccount, self).__init__('user-account', id, subtype)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}'
+ return url
+
+ def get_svg_icon(self): # TODO change icon/color
+ if self.subtype == 'telegram':
+ style = 'fab'
+ icon = '\uf2c6'
+ elif self.subtype == 'twitter':
+ style = 'fab'
+ icon = '\uf099'
+ else:
+ style = 'fas'
+ icon = '\uf007'
+ return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5}
+
+ def get_first_name(self):
+ return self._get_field('firstname')
+
+ def get_last_name(self):
+ return self._get_field('lastname')
+
+ def get_phone(self):
+ return self._get_field('phone')
+
+ def set_first_name(self, firstname):
+ return self._set_field('firstname', firstname)
+
+ def set_last_name(self, lastname):
+ return self._set_field('lastname', lastname)
+
+ def set_phone(self, phone):
+ return self._set_field('phone', phone)
+
+ def _get_timeline_username(self):
+ return Timeline(self.get_global_id(), 'username')
+
+ def get_username(self):
+ return self._get_timeline_username().get_last_obj_id()
+
+ def get_usernames(self):
+ return self._get_timeline_username().get_objs_ids()
+
+ def update_username_timeline(self, username_global_id, timestamp):
+ self._get_timeline_username().add_timestamp(timestamp, username_global_id)
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['subtype'] = self.subtype
+ meta['tags'] = self.get_tags(r_list=True)
+ if 'username' in options:
+ meta['username'] = self.get_username()
+ if 'usernames' in options:
+ meta['usernames'] = self.get_usernames()
+ return meta
+
+ def get_misp_object(self):
+ obj_attrs = []
+ if self.subtype == 'telegram':
+ obj = MISPObject('telegram-account', standalone=True)
+ obj_attrs.append(obj.add_attribute('username', value=self.id))
+
+ elif self.subtype == 'twitter':
+ obj = MISPObject('twitter-account', standalone=True)
+ obj_attrs.append(obj.add_attribute('name', value=self.id))
+
+ else:
+ obj = MISPObject('user-account', standalone=True)
+ obj_attrs.append(obj.add_attribute('username', value=self.id))
+
+ first_seen = self.get_first_seen()
+ last_seen = self.get_last_seen()
+ if first_seen:
+ obj.first_seen = first_seen
+ if last_seen:
+ obj.last_seen = last_seen
+ if not first_seen or not last_seen:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+
+def get_user_by_username():
+ pass
+
+def get_all_subtypes():
+ return ail_core.get_object_all_subtypes('user-account')
+
+def get_all():
+ users = {}
+ for subtype in get_all_subtypes():
+ users[subtype] = get_all_by_subtype(subtype)
+ return users
+
+def get_all_by_subtype(subtype):
+ return get_all_id('user-account', subtype)
+
+
+# if __name__ == '__main__':
+# name_to_search = 'co'
+# subtype = 'telegram'
+# print(search_usernames_by_name(name_to_search, subtype))
diff --git a/bin/lib/objects/abstract_daterange_object.py b/bin/lib/objects/abstract_daterange_object.py
index b96c5ec4..98aa49c2 100755
--- a/bin/lib/objects/abstract_daterange_object.py
+++ b/bin/lib/objects/abstract_daterange_object.py
@@ -45,10 +45,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
def exists(self):
return r_object.exists(f'meta:{self.type}:{self.id}')
- def _get_field(self, field):
+ def _get_field(self, field): # TODO remove me (NEW in abstract)
return r_object.hget(f'meta:{self.type}:{self.id}', field)
- def _set_field(self, field, value):
+ def _set_field(self, field, value): # TODO remove me (NEW in abstract)
return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
def get_first_seen(self, r_int=False):
@@ -82,9 +82,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
return int(nb)
def _get_meta(self, options=[]):
- meta_dict = {'first_seen': self.get_first_seen(),
- 'last_seen': self.get_last_seen(),
- 'nb_seen': self.get_nb_seen()}
+ meta_dict = self.get_default_meta()
+ meta_dict['first_seen'] = self.get_first_seen()
+ meta_dict['last_seen'] = self.get_last_seen()
+ meta_dict['nb_seen'] = self.get_nb_seen()
if 'sparkline' in options:
meta_dict['sparkline'] = self.get_sparkline()
return meta_dict
diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py
index cb7595ad..a3f25216 100755
--- a/bin/lib/objects/abstract_object.py
+++ b/bin/lib/objects/abstract_object.py
@@ -20,6 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib import ail_logger
from lib import Tag
+from lib.ConfigLoader import ConfigLoader
from lib import Duplicate
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
@@ -27,6 +28,11 @@ from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
logging.config.dictConfig(ail_logger.get_config(name='ail'))
+config_loader = ConfigLoader()
+# r_cache = config_loader.get_redis_conn("Redis_Cache")
+r_object = config_loader.get_db_conn("Kvrocks_Objects")
+config_loader = None
+
class AbstractObject(ABC):
"""
Abstract Object
@@ -59,14 +65,28 @@ class AbstractObject(ABC):
def get_global_id(self):
return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}'
- def get_default_meta(self, tags=False):
+ def get_default_meta(self, tags=False, link=False):
dict_meta = {'id': self.get_id(),
'type': self.get_type(),
- 'subtype': self.get_subtype()}
+ 'subtype': self.get_subtype(r_str=True)}
if tags:
dict_meta['tags'] = self.get_tags()
+ if link:
+ dict_meta['link'] = self.get_link()
return dict_meta
+ def _get_field(self, field):
+ if self.subtype is None:
+ return r_object.hget(f'meta:{self.type}:{self.id}', field)
+ else:
+ return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field)
+
+ def _set_field(self, field, value):
+ if self.subtype is None:
+ return r_object.hset(f'meta:{self.type}:{self.id}', field, value)
+ else:
+ return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value)
+
## Tags ##
def get_tags(self, r_list=False):
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
@@ -198,6 +218,8 @@ class AbstractObject(ABC):
else:
return []
+ ## Correlation ##
+
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
"""
Get object correlation
@@ -253,3 +275,39 @@ class AbstractObject(ABC):
Get object correlations
"""
delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2)
+
+ ## -Correlation- ##
+
+ ## Parent ##
+
+ def is_parent(self):
+ return r_object.exists(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
+
+ def is_children(self):
+ return r_object.hexists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
+
+ def get_parent(self):
+ return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent')
+
+ def get_children(self):
+ return r_object.smembers(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}')
+
+ def set_parent(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
+ if not obj_global_id:
+ if obj_subtype is None:
+ obj_subtype = ''
+ obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
+ r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent', obj_global_id)
+
+ def add_children(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ######################
+ if not obj_global_id:
+ if obj_subtype is None:
+ obj_subtype = ''
+ obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}'
+ r_object.sadd(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', obj_global_id)
+
+ def add_obj_children(self, parent_global_id, son_global_id):
+ r_object.sadd(f'child:{parent_global_id}', son_global_id)
+ r_object.hset(f'meta:{son_global_id}', 'parent', parent_global_id)
+
+ ## Parent ##
diff --git a/bin/lib/objects/abstract_subtype_object.py b/bin/lib/objects/abstract_subtype_object.py
index 82bb85f6..007f716b 100755
--- a/bin/lib/objects/abstract_subtype_object.py
+++ b/bin/lib/objects/abstract_subtype_object.py
@@ -151,7 +151,7 @@ class AbstractSubtypeObject(AbstractObject, ABC):
#
#
- def add(self, date, item_id):
+ def add(self, date, obj=None):
self.update_daterange(date)
update_obj_date(date, self.type, self.subtype)
# daily
@@ -162,20 +162,22 @@ class AbstractSubtypeObject(AbstractObject, ABC):
#######################################################################
#######################################################################
- # Correlations
- self.add_correlation('item', '', item_id)
- # domain
- if is_crawled(item_id):
- domain = get_item_domain(item_id)
- self.add_correlation('domain', '', domain)
+ if obj:
+ # Correlations
+ self.add_correlation(obj.type, obj.get_subtype(r_str=True), obj.get_id())
+ if obj.type == 'item': # TODO same for message->chat ???
+ item_id = obj.get_id()
+ # domain
+ if is_crawled(item_id):
+ domain = get_item_domain(item_id)
+ self.add_correlation('domain', '', domain)
# TODO:ADD objects + Stats
def create(self, first_seen, last_seen):
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
-
def _delete(self):
pass
diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py
index 01990996..89be336f 100755
--- a/bin/lib/objects/ail_objects.py
+++ b/bin/lib/objects/ail_objects.py
@@ -13,16 +13,21 @@ from lib import correlations_engine
from lib import btc_ail
from lib import Tag
+from lib.objects import Chats
from lib.objects import CryptoCurrencies
from lib.objects import CookiesNames
from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
from lib.objects.Domains import Domain
+from lib.objects import Etags
from lib.objects.Favicons import Favicon
+from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
+from lib.objects.Messages import Message
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
from lib.objects import Titles
+from lib.objects.UsersAccount import UserAccount
from lib.objects import Usernames
config_loader = ConfigLoader()
@@ -53,12 +58,20 @@ def get_object(obj_type, subtype, obj_id):
return Domain(obj_id)
elif obj_type == 'decoded':
return Decoded(obj_id)
+ elif obj_type == 'chat':
+ return Chats.Chat(obj_id, subtype)
elif obj_type == 'cookie-name':
return CookiesNames.CookieName(obj_id)
elif obj_type == 'cve':
return Cve(obj_id)
+ elif obj_type == 'etag':
+ return Etags.Etag(obj_id)
elif obj_type == 'favicon':
return Favicon(obj_id)
+ elif obj_type == 'hhhash':
+ return HHHashs.HHHash(obj_id)
+ elif obj_type == 'message':
+ return Message(obj_id)
elif obj_type == 'screenshot':
return Screenshot(obj_id)
elif obj_type == 'cryptocurrency':
@@ -67,6 +80,8 @@ def get_object(obj_type, subtype, obj_id):
return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'title':
return Titles.Title(obj_id)
+ elif obj_type == 'user-account':
+ return UserAccount(obj_id, subtype)
elif obj_type == 'username':
return Usernames.Username(obj_id, subtype)
@@ -101,9 +116,12 @@ def get_obj_global_id(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
return obj.get_global_id()
+def get_obj_type_subtype_id_from_global_id(global_id):
+ obj_type, subtype, obj_id = global_id.split(':', 2)
+ return obj_type, subtype, obj_id
def get_obj_from_global_id(global_id):
- obj = global_id.split(':', 3)
+ obj = get_obj_type_subtype_id_from_global_id(global_id)
return get_object(obj[0], obj[1], obj[2])
@@ -159,7 +177,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
subtype = obj[1]
obj_id = obj[2]
else:
- obj_type, subtype, obj_id = obj.split(':', 2)
+ obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj)
metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context))
return metas
@@ -168,7 +186,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
- if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
+ if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve':
meta['cve_search'] = obj.get_cve_search()
@@ -177,6 +195,8 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
if subtype == 'bitcoin' and related_btc:
meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id)
if obj.get_type() == 'decoded':
+ meta['mimetype'] = obj.get_mimetype()
+ meta['size'] = obj.get_size()
meta["vt"] = obj.get_meta_vt()
meta["vt"]["status"] = obj.is_vt_enabled()
# TAGS MODAL
@@ -333,8 +353,8 @@ def get_obj_correlations(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
return obj.get_correlations()
-def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max):
- if len(objs) < nb_max or nb_max == -1:
+def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden):
+ if len(objs) < nb_max or nb_max == 0:
if lvl == 0:
objs.add((obj_type, subtype, obj_id))
@@ -346,15 +366,17 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv
for obj2_type in correlations:
for str_obj in correlations[obj2_type]:
obj2_subtype, obj2_id = str_obj.split(':', 1)
- _get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max)
+ if get_obj_global_id(obj2_type, obj2_subtype, obj2_id) in objs_hidden:
+ continue # filter object to hide
+ _get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max, objs_hidden)
-def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300):
+def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
objs = set()
- _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max)
+ _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden)
return objs
-def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300):
- objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max)
+def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()):
+ objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max, objs_hidden=objs_hidden)
# print(objs)
for obj_tuple in objs:
obj1_type, subtype1, id1 = obj_tuple
@@ -395,7 +417,7 @@ def create_correlation_graph_links(links_set):
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
graph_nodes_list = []
for node_id in nodes_set:
- obj_type, subtype, obj_id = node_id.split(':', 2)
+ obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id)
dict_node = {'id': node_id}
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
@@ -416,10 +438,12 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1,
+ objs_hidden=set(),
flask_context=False):
obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id,
filter_types=filter_types,
max_nodes=max_nodes, level=level,
+ objs_hidden=objs_hidden,
flask_context=flask_context)
# print(meta)
meta['objs'] = list(meta['objs'])
diff --git a/bin/lib/timeline_engine.py b/bin/lib/timeline_engine.py
new file mode 100755
index 00000000..58c222f6
--- /dev/null
+++ b/bin/lib/timeline_engine.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+
+from uuid import uuid4
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+
+config_loader = ConfigLoader()
+r_meta = config_loader.get_db_conn("Kvrocks_Timeline")
+config_loader = None
+
+# CORRELATION_TYPES_BY_OBJ = {
+# "chat": ["item", "username"], # item ???
+# "cookie-name": ["domain"],
+# "cryptocurrency": ["domain", "item"],
+# "cve": ["domain", "item"],
+# "decoded": ["domain", "item"],
+# "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
+# "etag": ["domain"],
+# "favicon": ["domain", "item"],
+# "hhhash": ["domain"],
+# "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
+# "pgp": ["domain", "item"],
+# "screenshot": ["domain", "item"],
+# "title": ["domain", "item"],
+# "username": ["chat", "domain", "item"],
+# }
+#
+# def get_obj_correl_types(obj_type):
+# return CORRELATION_TYPES_BY_OBJ.get(obj_type)
+
+# def sanityze_obj_correl_types(obj_type, correl_types):
+# obj_correl_types = get_obj_correl_types(obj_type)
+# if correl_types:
+# correl_types = set(correl_types).intersection(obj_correl_types)
+# if not correl_types:
+# correl_types = obj_correl_types
+# if not correl_types:
+# return []
+# return correl_types
+
+class Timeline:
+
+ def __init__(self, global_id, name):
+ self.id = global_id
+ self.name = name
+
+ def _get_block_obj_global_id(self, block):
+ return r_meta.hget(f'block:{self.id}:{self.name}', block)
+
+ def _set_block_obj_global_id(self, block, global_id):
+ return r_meta.hset(f'block:{self.id}:{self.name}', block, global_id)
+
+ def _get_block_timestamp(self, block, position):
+ return r_meta.zscore(f'line:{self.id}:{self.name}', f'{position}:{block}')
+
+ def _get_nearest_bloc_inf(self, timestamp):
+ inf = r_meta.zrevrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), 0, start=0, num=1, withscores=True)
+ if inf:
+ inf, score = inf[0]
+ if inf.startswith('end'):
+ inf_key = f'start:{inf[4:]}'
+ inf_score = r_meta.zscore(f'line:{self.id}:{self.name}', inf_key)
+ if inf_score == score:
+ inf = inf_key
+ return inf
+ else:
+ return None
+
+ def _get_nearest_bloc_sup(self, timestamp):
+ sup = r_meta.zrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), '+inf', start=0, num=1, withscores=True)
+ if sup:
+ sup, score = sup[0]
+ if sup.startswith('start'):
+ sup_key = f'end:{sup[6:]}'
+ sup_score = r_meta.zscore(f'line:{self.id}:{self.name}', sup_key)
+ if score == sup_score:
+ sup = sup_key
+ return sup
+ else:
+ return None
+
+ def get_first_obj_id(self):
+ first = r_meta.zrange(f'line:{self.id}:{self.name}', 0, 0)
+ if first: # start:block
+ first = first[0]
+ if first.startswith('start:'):
+ first = first[6:]
+ else:
+ first = first[4:]
+ return self._get_block_obj_global_id(first)
+
+ def get_last_obj_id(self):
+ last = r_meta.zrevrange(f'line:{self.id}:{self.name}', 0, 0)
+ if last: # end:block
+ last = last[0]
+ if last.startswith('end:'):
+ last = last[4:]
+ else:
+ last = last[6:]
+ return self._get_block_obj_global_id(last)
+
+ def get_objs_ids(self):
+ objs = set()
+ for block in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1):
+ if block:
+ if block.startswith('start:'):
+ objs.add(self._get_block_obj_global_id(block[6:]))
+ return objs
+
+ # def get_objs_ids(self):
+ # objs = {}
+ # last_obj_id = None
+ # for block, timestamp in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1, withscores=True):
+ # if block:
+ # if block.startswith('start:'):
+ # last_obj_id = self._get_block_obj_global_id(block[6:])
+ # objs[last_obj_id] = {'first_seen': timestamp}
+ # else:
+ # objs[last_obj_id]['last_seen'] = timestamp
+ # return objs
+
+ def _update_bloc(self, block, position, timestamp):
+ r_meta.zadd(f'line:{self.id}:{self.name}', {f'{position}:{block}': timestamp})
+
+ def _add_bloc(self, obj_global_id, timestamp, end=None):
+ if end:
+ timestamp_end = end
+ else:
+ timestamp_end = timestamp
+ new_bloc = str(uuid4())
+ r_meta.zadd(f'line:{self.id}:{self.name}', {f'start:{new_bloc}': timestamp, f'end:{new_bloc}': timestamp_end})
+ self._set_block_obj_global_id(new_bloc, obj_global_id)
+ return new_bloc
+
+ def add_timestamp(self, timestamp, obj_global_id):
+ inf = self._get_nearest_bloc_inf(timestamp)
+ sup = self._get_nearest_bloc_sup(timestamp)
+ if not inf and not sup:
+ # create new bloc
+ new_bloc = self._add_bloc(obj_global_id, timestamp)
+ return new_bloc
+ # timestamp < first_seen
+ elif not inf:
+ sup_pos, sup_id = sup.split(':')
+ sup_obj = self._get_block_obj_global_id(sup_id)
+ if sup_obj == obj_global_id:
+ self._update_bloc(sup_id, 'start', timestamp)
+ # create new bloc
+ else:
+ new_bloc = self._add_bloc(obj_global_id, timestamp)
+ return new_bloc
+
+ # timestamp > first_seen
+ elif not sup:
+ inf_pos, inf_id = inf.split(':')
+ inf_obj = self._get_block_obj_global_id(inf_id)
+ if inf_obj == obj_global_id:
+ self._update_bloc(inf_id, 'end', timestamp)
+ # create new bloc
+ else:
+ new_bloc = self._add_bloc(obj_global_id, timestamp)
+ return new_bloc
+
+ else:
+ inf_pos, inf_id = inf.split(':')
+ sup_pos, sup_id = sup.split(':')
+ inf_obj = self._get_block_obj_global_id(inf_id)
+
+ if inf_id == sup_id:
+ # reduce bloc + create two new bloc
+ if obj_global_id != inf_obj:
+ # get end timestamp
+ sup_timestamp = self._get_block_timestamp(sup_id, 'end')
+ # reduce original bloc
+ self._update_bloc(inf_id, 'end', timestamp - 1)
+ # Insert new bloc
+ new_bloc = self._add_bloc(obj_global_id, timestamp)
+ # Recreate end of the first bloc by a new bloc
+ self._add_bloc(inf_obj, timestamp + 1, end=sup_timestamp)
+ return new_bloc
+
+ # timestamp in existing bloc
+ else:
+ return inf_id
+
+ # different blocs: expend sup/inf bloc or create a new bloc if
+ elif inf_pos == 'end' and sup_pos == 'start':
+ # Extend inf bloc
+ if obj_global_id == inf_obj:
+ self._update_bloc(inf_id, 'end', timestamp)
+ return inf_id
+
+ sup_obj = self._get_block_obj_global_id(sup_id)
+ # Extend sup bloc
+ if obj_global_id == sup_obj:
+ self._update_bloc(sup_id, 'start', timestamp)
+ return sup_id
+
+ # create new bloc
+ new_bloc = self._add_bloc(obj_global_id, timestamp)
+ return new_bloc
+
+ # inf_pos == 'start' and sup_pos == 'end'
+ # else raise error ???
diff --git a/bin/modules/Cryptocurrencies.py b/bin/modules/Cryptocurrencies.py
index fd5c5402..5a83689f 100755
--- a/bin/modules/Cryptocurrencies.py
+++ b/bin/modules/Cryptocurrencies.py
@@ -130,7 +130,7 @@ class Cryptocurrencies(AbstractModule, ABC):
if crypto.is_valid_address():
# print(address)
is_valid_address = True
- crypto.add(date, item_id)
+ crypto.add(date, item)
# Check private key
if is_valid_address:
diff --git a/bin/modules/Mixer.py b/bin/modules/Mixer.py
index b8f2bedf..62c427e3 100755
--- a/bin/modules/Mixer.py
+++ b/bin/modules/Mixer.py
@@ -131,7 +131,7 @@ class Mixer(AbstractModule):
self.last_refresh = time.time()
self.clear_feeders_stat()
- time.sleep(0.5)
+ time.sleep(0.5)
def computeNone(self):
self.refresh_stats()
diff --git a/bin/modules/Onion.py b/bin/modules/Onion.py
index 2066e9a3..681bae0c 100755
--- a/bin/modules/Onion.py
+++ b/bin/modules/Onion.py
@@ -42,7 +42,8 @@ class Onion(AbstractModule):
self.faup = crawlers.get_faup()
# activate_crawler = p.config.get("Crawler", "activate_crawler")
-
+ self.har = config_loader.get_config_boolean('Crawler', 'default_har')
+ self.screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
# self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
@@ -90,8 +91,9 @@ class Onion(AbstractModule):
if onion_urls:
if crawlers.is_crawler_activated():
- for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR
- task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0)
+ for domain in domains:
+ task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0,
+ har=self.har, screenshot=self.screenshot)
if task_uuid:
print(f'{domain} added to crawler queue: {task_uuid}')
else:
diff --git a/bin/modules/PgpDump.py b/bin/modules/PgpDump.py
index 1e8a27a7..82ec9f32 100755
--- a/bin/modules/PgpDump.py
+++ b/bin/modules/PgpDump.py
@@ -210,18 +210,18 @@ class PgpDump(AbstractModule):
date = item.get_date()
for key in self.keys:
pgp = Pgps.Pgp(key, 'key')
- pgp.add(date, self.item_id)
+ pgp.add(date, item)
print(f' key: {key}')
for name in self.names:
pgp = Pgps.Pgp(name, 'name')
- pgp.add(date, self.item_id)
+ pgp.add(date, item)
print(f' name: {name}')
self.tracker_term.compute(name, obj_type='pgp', subtype='name')
self.tracker_regex.compute(name, obj_type='pgp', subtype='name')
self.tracker_yara.compute(name, obj_type='pgp', subtype='name')
for mail in self.mails:
pgp = Pgps.Pgp(mail, 'mail')
- pgp.add(date, self.item_id)
+ pgp.add(date, item)
print(f' mail: {mail}')
self.tracker_term.compute(mail, obj_type='pgp', subtype='mail')
self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail')
diff --git a/bin/modules/Telegram.py b/bin/modules/Telegram.py
index 273d20b9..140948c2 100755
--- a/bin/modules/Telegram.py
+++ b/bin/modules/Telegram.py
@@ -58,7 +58,7 @@ class Telegram(AbstractModule):
user_id = dict_url.get('username')
if user_id:
username = Username(user_id, 'telegram')
- username.add(item_date, item.id)
+ username.add(item_date, item)
print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash')
if invite_hash:
@@ -73,7 +73,7 @@ class Telegram(AbstractModule):
user_id = dict_url.get('username')
if user_id:
username = Username(user_id, 'telegram')
- username.add(item_date, item.id)
+ username.add(item_date, item)
print(f'username: {user_id}')
invite_hash = dict_url.get('invite_hash')
if invite_hash:
diff --git a/bin/update-background.py b/bin/update-background.py
index f5a3c58e..c6f81d57 100755
--- a/bin/update-background.py
+++ b/bin/update-background.py
@@ -10,6 +10,8 @@ Update AIL in the background
"""
import os
+import logging
+import logging.config
import sys
import subprocess
@@ -17,37 +19,55 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
+from lib import ail_logger
from lib import ail_updates
-def launch_background_upgrade(version, l_script_name):
- if ail_updates.is_version_in_background_update(version):
- ail_updates.start_background_update(version)
+logging.config.dictConfig(ail_logger.get_config(name='updates'))
+def launch_background_upgrade(version):
+ logger = logging.getLogger()
+ logger.warning(f'launching background update {version}')
+ update = ail_updates.AILBackgroundUpdate(version)
+ nb_done = update.get_nb_scripts_done()
+ update.start()
+ scripts = update.get_scripts()
+ scripts = scripts[nb_done:]
+ for script in scripts:
+ print('launching background script update', script)
+ # launch script
+ update.start_script(script)
+ script_path = update.get_script_path()
+ if script_path:
+ try:
+ process = subprocess.run(['python', script_path])
+ if process.returncode != 0:
+ stderr = process.stderr
+ if stderr:
+ error = stderr.decode()
+ logger.error(error)
+ update.set_error(error)
+ else:
+ update.set_error('Error Updater Script')
+ logger.error('Error Updater Script')
+ sys.exit(0)
+ except Exception as e:
+ update.set_error(str(e))
+ logger.error(str(e))
+ sys.exit(0)
- for script_name in l_script_name:
- ail_updates.set_current_background_update_script(script_name)
- update_file = ail_updates.get_current_background_update_script_path(version, script_name)
+ if not update.get_error():
+ update.end_script()
+ else:
+ logger.warning('Updater exited on error')
+ sys.exit(0)
- # # TODO: Get error output
- process = subprocess.run(['python', update_file])
-
- update_progress = ail_updates.get_current_background_update_progress()
- if update_progress == 100:
- ail_updates.end_background_update_script()
- # # TODO: Create Custom error
- # 'Please relaunch the bin/update-background.py script'
- # # TODO: Create Class background update
-
- ail_updates.end_background_update(version)
+ update.end()
+ logger.warning(f'ending background update {version}')
if __name__ == "__main__":
-
- if not ail_updates.exits_background_update_to_launch():
- ail_updates.clear_background_update()
+ if ail_updates.is_update_background_running():
+ v = ail_updates.get_update_background_version()
+ launch_background_upgrade(v)
else:
- launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py',
- 'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py'])
- launch_background_upgrade('v2.6', ['Update_screenshots.py'])
- launch_background_upgrade('v2.7', ['Update_domain_tags.py'])
- launch_background_upgrade('v3.4', ['Update_domain.py'])
- launch_background_upgrade('v3.7', ['Update_trackers.py'])
+ for ver in ail_updates.get_update_background_to_launch():
+ launch_background_upgrade(ver)
diff --git a/configs/6383.conf b/configs/6383.conf
index c730003c..a06d4e69 100644
--- a/configs/6383.conf
+++ b/configs/6383.conf
@@ -663,6 +663,7 @@ namespace.crawl ail_crawlers
namespace.db ail_datas
namespace.dup ail_dups
namespace.obj ail_objs
+namespace.tl ail_tls
namespace.stat ail_stats
namespace.tag ail_tags
namespace.track ail_trackers
diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample
index 62e9efc3..9d7bb390 100644
--- a/configs/core.cfg.sample
+++ b/configs/core.cfg.sample
@@ -45,6 +45,10 @@ sender = sender@example.com
sender_host = smtp.example.com
sender_port = 1337
sender_pw = None
+# Only needed for SMTP over SSL if the mail server don't support TLS (used by default). use this option to validate the server certificate.
+cert_required = False
+# Only needed for SMTP over SSL if you want to validate your self signed certificate for SSL
+ca_file =
# Only needed when the credentials for email server needs a username instead of an email address
#sender_user = sender
sender_user =
@@ -191,6 +195,11 @@ host = localhost
port = 6383
password = ail_objs
+[Kvrocks_Timeline]
+host = localhost
+port = 6383
+password = ail_tls
+
[Kvrocks_Stats]
host = localhost
port = 6383
diff --git a/doc/README.md b/doc/README.md
index d6b707e8..aee47955 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -89,12 +89,12 @@ Available Importers:
5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment):
- Option 1 (recommended):
```
- ./ail-framework/bin/LAUNCH.py -l #starts ail-framework
- ./ail-framework/bin/LAUNCH.py -f #starts pystemon and the PystemonImporter.py
+ ./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
+ ./ail-framework/bin/LAUNCH.sh -f #starts pystemon and the PystemonImporter.py
```
- Option 2 (may require two terminal windows):
```
- ./ail-framework/bin/LAUNCH.py -l #starts ail-framework
+ ./ail-framework/bin/LAUNCH.sh -l #starts ail-framework
./pystemon/pystemon.py
./ail-framework/bin/importer/PystemonImporter.py
```
diff --git a/tools/crawler_add_task.py b/tools/crawler_add_task.py
new file mode 100755
index 00000000..5d9604fe
--- /dev/null
+++ b/tools/crawler_add_task.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Send an URL to the crawler - Create a crawler task
+================
+
+Import URL to be crawled by AIL and then analysed
+
+"""
+
+import argparse
+import os
+from pyail import PyAIL
+import sys
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+
+def check_frequency(value):
+ value = int(value)
+ if value <= 0:
+ raise argparse.ArgumentTypeError(f'Error: Invalid frequency {value}')
+
+
+if __name__ == "__main__":
+
+ # TODO add c argument for config file
+ parser = argparse.ArgumentParser(description='Send an URL to the crawler - Create a crawler task')
+ parser.add_argument('-u', '--url', type=str, help='URL to crawl', required=True)
+ parser.add_argument('-k', '--key', type=str, help='AIL API Key', required=True)
+ parser.add_argument('-a', '--ail', type=str, help='AIL URL')
+ parser.add_argument('-d', '--depth', type=int, default=1, help='Depth limit') # TODO improve me
+ parser.add_argument('--cookiejar', type=str, help='Cookiejar uuid')
+ parser.add_argument('-p', '--proxy', type=str, help='Proxy address to use, "web" and "tor" can be used as shortcut (web is used by default if the domain isn\'t an onion)')
+
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('--har', dest='har', action='store_true', help='Save HAR')
+ group.add_argument('--no-har', dest='har', action='store_false', help='Don\'t save HAR')
+ parser.set_defaults(har=None)
+
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('--screenshot', dest='screenshot', action='store_true', help='Save screenshot')
+ group.add_argument('--no-screenshot', dest='screenshot', action='store_false', help='Don\'t save screenshot')
+ parser.set_defaults(screenshot=None)
+
+ group = parser.add_argument_group('Frequency, create a regular crawler/scheduler. one shot if not specified')
+ group.add_argument('-f', '--frequency', type=str, choices=['monthly', 'weekly', 'daily', 'hourly'],
+ help='monthly, weekly, daily or hourly frequency or specify a custom one with the others arguments')
+ group.add_argument('--minutes', type=int, help='frequency in minutes')
+ group.add_argument('--hours', type=int, help='frequency in hours')
+ group.add_argument('--days', type=int, help='frequency in days')
+ group.add_argument('--weeks', type=int, help='frequency in weeks')
+ group.add_argument('--months', type=int, help='frequency in months')
+
+ args = parser.parse_args()
+
+ if not args.url and not args.key:
+ parser.print_help()
+ sys.exit(0)
+
+ # Load crawler default config
+ config_loader = ConfigLoader()
+ har = args.har
+ if har is None:
+ har = config_loader.get_config_boolean('Crawler', 'default_har')
+ screenshot = args.screenshot
+ if screenshot is None:
+ screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot')
+
+ if args.depth:
+ depth = args.depth
+ if depth < 0:
+ raise argparse.ArgumentTypeError(f'Error: Invalid depth {depth}')
+ else:
+ depth = 1
+
+ # frequency
+ frequency = {}
+ if args.frequency:
+ if args.frequency in ['monthly', 'weekly', 'daily', 'hourly']:
+ frequency = args.frequency
+ else:
+ raise argparse.ArgumentTypeError('Invalid frequency')
+ elif args.minutes or args.hours or args.days or args.weeks or args.months:
+ if args.minutes:
+ check_frequency(args.minutes)
+ frequency['minutes'] = args.minutes
+ if args.hours:
+ check_frequency(args.hours)
+ frequency['hours'] = args.hours
+ if args.days:
+ check_frequency(args.days)
+ frequency['days'] = args.days
+ if args.weeks:
+ check_frequency(args.weeks)
+ frequency['weeks'] = args.weeks
+ if args.months:
+ check_frequency(args.months)
+ frequency['months'] = args.months
+ if not frequency:
+ frequency = None
+
+ proxy = args.proxy
+
+ if args.cookiejar:
+ cookiejar = args.cookiejar
+ else:
+ cookiejar = None
+
+ ail = args.ail
+ if not ail:
+ ail = 'https://localhost:7000/'
+
+ client = PyAIL(ail, args.key, ssl=False)
+ r = client.crawl_url(args.url, har=har, screenshot=screenshot, depth_limit=depth, frequency=frequency,
+ cookiejar=cookiejar, proxy=proxy)
+ print(r)
diff --git a/update/bin/Update_ARDB.sh b/update/bin/Update_ARDB.sh
deleted file mode 100755
index 2544973e..00000000
--- a/update/bin/Update_ARDB.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-echo "Killing all screens ..."
-bash -c "bash ../../bin/LAUNCH.sh -k"
-echo ""
-echo "Updating ARDB ..."
-pushd ../../
-rm -r ardb
-pushd ardb/
-git clone https://github.com/yinqiwen/ardb.git
-git checkout 0.10 || exit 1
-make || exit 1
-popd
-popd
-echo "ARDB Updated"
-echo ""
-
-exit 0
diff --git a/update/bin/Update_Redis.sh b/update/bin/Update_Redis.sh
index 238d53f7..dc4d394d 100755
--- a/update/bin/Update_Redis.sh
+++ b/update/bin/Update_Redis.sh
@@ -2,13 +2,11 @@
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
+[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
diff --git a/update/bin/ail_updater.py b/update/bin/ail_updater.py
index 6edc8ea6..af45d3a4 100755
--- a/update/bin/ail_updater.py
+++ b/update/bin/ail_updater.py
@@ -20,7 +20,7 @@ class AIL_Updater(object):
self.start_time = time.time()
self.config = ConfigLoader()
- self.r_serv = self.config.get_redis_conn("Kvrocks_DB")
+ self.r_serv = self.config.get_db_conn("Kvrocks_DB")
self.f_version = float(self.version[1:])
self.current_f_version = ail_updates.get_ail_float_version()
@@ -35,7 +35,7 @@ class AIL_Updater(object):
"""
Update DB version
"""
- ail_updates.add_ail_update(version)
+ ail_updates.add_ail_update(self.version)
def run_update(self):
self.update()
diff --git a/update/bin/old_ail_updater.py b/update/bin/old_ail_updater.py
deleted file mode 100755
index 14833679..00000000
--- a/update/bin/old_ail_updater.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-##################################
-# Import Project packages
-##################################
-from lib import ConfigLoader
-
-class AIL_Updater(object):
- """docstring for AIL_Updater."""
-
- def __init__(self, new_version):
- self.version = new_version
- self.start_time = time.time()
-
- self.config = ConfigLoader.ConfigLoader()
- self.r_serv = self.config.get_redis_conn("ARDB_DB")
-
- self.f_version = float(self.version[1:])
- self.current_f_version = self.r_serv.get('ail:version')
- if self.current_f_version:
- self.current_f_version = float(self.current_f_version[1:])
- else:
- self.current_f_version = 0
-
- def update(self):
- """
- AIL DB update
- """
- pass
-
- def end_update(self):
- """
- Update DB version
- """
- # Set current ail version
- self.r_serv.hset('ail:update_date', self.version, datetime.datetime.now().strftime("%Y%m%d"))
- # Set current ail version
- if self.f_version > self.current_f_version:
- self.r_serv.set('ail:version', self.version)
-
- def run_update(self):
- self.update()
- self.end_update()
diff --git a/update/default_update/Update.sh b/update/default_update/Update.sh
index 189ae846..ef881805 100755
--- a/update/default_update/Update.sh
+++ b/update/default_update/Update.sh
@@ -7,13 +7,13 @@ fi
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
+[ -z "$AIL_KVROCKS" ] && echo "Needs the env var AIL_KVROCKS. Run the script from the virtual environment." && exit 1;
+[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
+export PATH=AIL_KVROCKS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
@@ -25,7 +25,7 @@ bash ${AIL_BIN}/LAUNCH.sh -ks
wait
echo ""
-bash ${AIL_BIN}/LAUNCH.sh -lav
+bash ${AIL_BIN}/LAUNCH.sh -lkv
wait
echo ""
diff --git a/update/v1.0/Update.sh b/update/v1.0/Update.sh
deleted file mode 100755
index 1d3691d0..00000000
--- a/update/v1.0/Update.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/bash
-YELLOW="\\033[1;33m"
-DEFAULT="\\033[0;39m"
-
-echo -e $YELLOW"\t"
-echo -e "* ------------------------------------------------------------------"
-echo -e "\t"
-echo -e " - - - - - - - - PLEASE RELAUNCH AIL - - - - - - - - "
-echo -e "\t"
-echo -e "* ------------------------------------------------------------------"
-echo -e "\t"
-echo -e "\t"$DEFAULT
-
-# fix invalid Updater version (kill parent):
-kill -SIGUSR1 `ps --pid $$ -oppid=`; exit
diff --git a/update/v1.5/Update-ARDB_Metadata.py b/update/v1.5/Update-ARDB_Metadata.py
deleted file mode 100755
index f678f74e..00000000
--- a/update/v1.5/Update-ARDB_Metadata.py
+++ /dev/null
@@ -1,165 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-
-sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
-from lib import ConfigLoader
-
-def update_tracked_terms(main_key, tracked_container_key):
- for tracked_item in r_serv_term.smembers(main_key):
- all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item))
- for item_path in all_items:
- if PASTES_FOLDER in item_path:
- new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
- r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path)
- r_serv_term.srem(tracked_container_key.format(tracked_item), item_path)
-
-def update_hash_item(has_type):
- #get all hash items:
- all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
- for item_path in all_hash_items:
- if PASTES_FOLDER in item_path:
- base64_key = '{}_paste:{}'.format(has_type, item_path)
- hash_key = 'hash_paste:{}'.format(item_path)
-
- if r_serv_metadata.exists(base64_key):
- new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
- res = r_serv_metadata.renamenx(base64_key, new_base64_key)
- if res == 0:
- print('same key, double name: {}'.format(item_path))
- # fusion
- all_key = r_serv_metadata.smembers(base64_key)
- for elem in all_key:
- r_serv_metadata.sadd(new_base64_key, elem)
- r_serv_metadata.srem(base64_key, elem)
-
- if r_serv_metadata.exists(hash_key):
- new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1)
- res = r_serv_metadata.renamenx(hash_key, new_hash_key)
- if res == 0:
- print('same key, double name: {}'.format(item_path))
- # fusion
- all_key = r_serv_metadata.smembers(hash_key)
- for elem in all_key:
- r_serv_metadata.sadd(new_hash_key, elem)
- r_serv_metadata.srem(hash_key, elem)
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
- r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- config_loader = None
-
- r_serv.set('ail:current_background_script', 'metadata')
-
- ## Update metadata ##
- print('Updating ARDB_Metadata ...')
- index = 0
- start = time.time()
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 0)
-
- # Update base64
- update_hash_item('base64')
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 20)
- # Update binary
- update_hash_item('binary')
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 40)
- # Update binary
- update_hash_item('hexadecimal')
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 60)
-
- total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"')
- nb_updated = 0
- last_progress = 0
-
- # Update onion metadata
- all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
- for item_path in all_crawled_items:
- domain = None
- if PASTES_FOLDER in item_path:
- old_item_metadata = 'paste_metadata:{}'.format(item_path)
- item_path = item_path.replace(PASTES_FOLDER, '', 1)
- new_item_metadata = 'paste_metadata:{}'.format(item_path)
- res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
- #key already exist
- if res == 0:
- r_serv_metadata.delete(old_item_metadata)
-
- # update domain port
- domain = r_serv_metadata.hget(new_item_metadata, 'domain')
- if domain:
- if domain[-3:] != ':80':
- r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
- super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
- if super_father:
- if PASTES_FOLDER in super_father:
- r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
- father = r_serv_metadata.hget(new_item_metadata, 'father')
- if father:
- if PASTES_FOLDER in father:
- r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
-
- nb_updated += 1
- progress = int((nb_updated * 30) /total_onion)
- print('{}/{} updated {}%'.format(nb_updated, total_onion, progress + 60))
- # update progress stats
- if progress != last_progress:
- r_serv.set('ail:current_background_script_stat', progress + 60)
- last_progress = progress
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 90)
-
- ## update tracked term/set/regex
- # update tracked term
- update_tracked_terms('TrackedSetTermSet', 'tracked_{}')
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 93)
- # update tracked set
- update_tracked_terms('TrackedSetSet', 'set_{}')
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 96)
- # update tracked regex
- update_tracked_terms('TrackedRegexSet', 'regex_{}')
-
- #update stats
- r_serv.set('ail:current_background_script_stat', 100)
- ##
-
- end = time.time()
-
- print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
- print()
-
- r_serv.sadd('ail:update_v1.5', 'metadata')
-
- ##
- #Key, Dynamic Update
- ##
- #paste_children
- #nb_seen_hash, base64_hash, binary_hash
- #paste_onion_external_links
- #misp_events, hive_cases
- ##
diff --git a/update/v1.5/Update-ARDB_Onions.py b/update/v1.5/Update-ARDB_Onions.py
deleted file mode 100755
index 199ee194..00000000
--- a/update/v1.5/Update-ARDB_Onions.py
+++ /dev/null
@@ -1,129 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-def substract_date(date_from, date_to):
- date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
- date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
- delta = date_to - date_from # timedelta
- l_date = []
- for i in range(delta.days + 1):
- date = date_from + datetime.timedelta(i)
- l_date.append( date.strftime('%Y%m%d') )
- return l_date
-
-def get_date_epoch(date):
- return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp())
-
-def get_domain_root_from_paste_childrens(item_father, domain):
- item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father))
- domain_root = ''
- for item_path in item_children:
- # remove absolute_path
- if PASTES_FOLDER in item_path:
- r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path)
- item_path = item_path.replace(PASTES_FOLDER, '', 1)
- r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path)
- if domain in item_path:
- domain_root = item_path
- return domain_root
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- config_loader = None
-
- r_serv.set('ail:current_background_script', 'onions')
- r_serv.set('ail:current_background_script_stat', 0)
-
- ## Update Onion ##
- print('Updating ARDB_Onion ...')
- index = 0
- start = time.time()
-
- # clean down domain from db
- date_from = '20180929'
- date_today = datetime.date.today().strftime("%Y%m%d")
- for date in substract_date(date_from, date_today):
-
- onion_down = r_serv_onion.smembers('onion_down:{}'.format(date))
- #print(onion_down)
- for onion_domain in onion_down:
- if not r_serv_onion.sismember('full_onion_up', onion_domain):
- # delete history
- all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
- if all_onion_history:
- for date_history in all_onion_history:
- #print('onion_history:{}:{}'.format(onion_domain, date_history))
- r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
- r_serv_onion.delete('onion_history:{}'.format(onion_domain))
-
- #stats
- total_domain = r_serv_onion.scard('full_onion_up')
- nb_updated = 0
- last_progress = 0
-
- # clean up domain
- all_domain_up = r_serv_onion.smembers('full_onion_up')
- for onion_domain in all_domain_up:
- # delete history
- all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1)
- if all_onion_history:
- for date_history in all_onion_history:
- print('--------')
- print('onion_history:{}:{}'.format(onion_domain, date_history))
- item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0)
- print('item_father: {}'.format(item_father))
- try:
- item_father = item_father[0]
- except IndexError:
- r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
- continue
- #print(item_father)
- # delete old history
- r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history))
- # create new history
- root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
- if root_key:
- r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)})
- print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
- #update service metadata: paste_parent
- r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)
-
- r_serv_onion.delete('onion_history:{}'.format(onion_domain))
-
- r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80')
- r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen')
-
- nb_updated += 1
- progress = int((nb_updated * 100) /total_domain)
- print('{}/{} updated {}%'.format(nb_updated, total_domain, progress))
- # update progress stats
- if progress != last_progress:
- r_serv.set('ail:current_background_script_stat', progress)
- last_progress = progress
-
-
- end = time.time()
- print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
- print()
- print('Done in {} s'.format(end - start_deb))
-
- r_serv.sadd('ail:update_v1.5', 'onions')
diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py
deleted file mode 100755
index 6ad7a025..00000000
--- a/update/v1.5/Update-ARDB_Onions_screenshots.py
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-from hashlib import sha256
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-def rreplace(s, old, new, occurrence):
- li = s.rsplit(old, occurrence)
- return new.join(li)
-
-def substract_date(date_from, date_to):
- date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8]))
- date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8]))
- delta = date_to - date_from # timedelta
- l_date = []
- for i in range(delta.days + 1):
- date = date_from + datetime.timedelta(i)
- l_date.append( date.strftime('%Y%m%d') )
- return l_date
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"))
- NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- config_loader = None
-
- r_serv.set('ail:current_background_script', 'crawled_screenshot')
- r_serv.set('ail:current_background_script_stat', 0)
-
- ## Update Onion ##
- print('Updating ARDB_Onion ...')
- index = 0
- start = time.time()
-
- # clean down domain from db
- date_from = '20180801'
- date_today = datetime.date.today().strftime("%Y%m%d")
- list_date = substract_date(date_from, date_today)
- nb_done = 0
- last_progress = 0
- total_to_update = len(list_date)
- for date in list_date:
- screenshot_dir = os.path.join(SCREENSHOT_FOLDER, date[0:4], date[4:6], date[6:8])
- if os.path.isdir(screenshot_dir):
- print(screenshot_dir)
- for file in os.listdir(screenshot_dir):
- if file.endswith(".png"):
- index += 1
- #print(file)
-
- img_path = os.path.join(screenshot_dir, file)
- with open(img_path, 'br') as f:
- image_content = f.read()
-
- hash = sha256(image_content).hexdigest()
- img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12])
- filename_img = os.path.join(NEW_SCREENSHOT_FOLDER, img_dir_path, hash[12:] +'.png')
- dirname = os.path.dirname(filename_img)
- if not os.path.exists(dirname):
- os.makedirs(dirname)
- if not os.path.exists(filename_img):
- os.rename(img_path, filename_img)
- else:
- os.remove(img_path)
-
- item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4])
- # add item metadata
- r_serv_metadata.hset('paste_metadata:{}'.format(item), 'screenshot', hash)
- # add sha256 metadata
- r_serv_onion.sadd('screenshot:{}'.format(hash), item)
-
- if file.endswith('.pnghar.txt'):
- har_path = os.path.join(screenshot_dir, file)
- new_file = rreplace(file, '.pnghar.txt', '.json', 1)
- new_har_path = os.path.join(screenshot_dir, new_file)
- os.rename(har_path, new_har_path)
-
- progress = int((nb_done * 100) /total_to_update)
- # update progress stats
- if progress != last_progress:
- r_serv.set('ail:current_background_script_stat', progress)
- print('{}/{} screenshot updated {}%'.format(nb_done, total_to_update, progress))
- last_progress = progress
-
- nb_done += 1
-
- r_serv.set('ail:current_background_script_stat', 100)
-
-
- end = time.time()
- print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
- print()
- print('Done in {} s'.format(end - start_deb))
-
- r_serv.set('ail:current_background_script_stat', 100)
- r_serv.sadd('ail:update_v1.5', 'crawled_screenshot')
- if r_serv.scard('ail:update_v1.5') != 5:
- r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script')
diff --git a/update/v1.5/Update-ARDB_Tags.py b/update/v1.5/Update-ARDB_Tags.py
deleted file mode 100755
index 38f3c09c..00000000
--- a/update/v1.5/Update-ARDB_Tags.py
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import redis
-
-sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
-from lib import ConfigLoader
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- r_important_paste_2018 = redis.StrictRedis(
- host=config_loader.get_config_str("ARDB_Metadata", "host"),
- port=config_loader.get_config_int("ARDB_Metadata", "port"),
- db=2018,
- decode_responses=True)
-
- r_important_paste_2019 = redis.StrictRedis(
- host=config_loader.get_config_str("ARDB_Metadata", "host"),
- port=config_loader.get_config_int("ARDB_Metadata", "port"),
- db=2019,
- decode_responses=True)
-
- config_loader = None
-
- r_serv.set('ail:current_background_script', 'tags')
- r_serv.set('ail:current_background_script_stat', 0)
-
- if r_serv.sismember('ail:update_v1.5', 'onions') and r_serv.sismember('ail:update_v1.5', 'metadata'):
-
- print('Updating ARDB_Tags ...')
- index = 0
- nb_tags_to_update = 0
- nb_updated = 0
- last_progress = 0
- start = time.time()
-
- tags_list = r_serv_tag.smembers('list_tags')
- # create temp tags metadata
- tag_metadata = {}
- for tag in tags_list:
- tag_metadata[tag] = {}
- tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen')
- if tag_metadata[tag]['first_seen'] is None:
- tag_metadata[tag]['first_seen'] = 99999999
- else:
- tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen'])
-
- tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
- if tag_metadata[tag]['last_seen'] is None:
- tag_metadata[tag]['last_seen'] = 0
- else:
- tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen'])
- nb_tags_to_update += r_serv_tag.scard(tag)
-
- if nb_tags_to_update == 0:
- nb_tags_to_update = 1
-
- for tag in tags_list:
-
- all_item = r_serv_tag.smembers(tag)
- for item_path in all_item:
- splitted_item_path = item_path.split('/')
- #print(tag)
- #print(item_path)
- try:
- item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) )
- except IndexError:
- r_serv_tag.srem(tag, item_path)
- continue
-
- # remove absolute path
- new_path = item_path.replace(PASTES_FOLDER, '', 1)
- if new_path != item_path:
- # save in queue absolute path to remove
- r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path)
-
- # update metadata first_seen
- if item_date < tag_metadata[tag]['first_seen']:
- tag_metadata[tag]['first_seen'] = item_date
- r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date)
-
- # update metadata last_seen
- if item_date > tag_metadata[tag]['last_seen']:
- tag_metadata[tag]['last_seen'] = item_date
- last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen')
- if last_seen_db:
- if item_date > int(last_seen_db):
- r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date)
- else:
- tag_metadata[tag]['last_seen'] = last_seen_db
-
- r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path)
- r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1)
-
- # clean db
- r_serv_tag.srem(tag, item_path)
- index = index + 1
-
- nb_updated += 1
- progress = int((nb_updated * 100) /nb_tags_to_update)
- print('{}/{} updated {}%'.format(nb_updated, nb_tags_to_update, progress))
- # update progress stats
- if progress != last_progress:
- r_serv.set('ail:current_background_script_stat', progress)
- last_progress = progress
-
- #flush browse importante pastes db
- try:
- r_important_paste_2018.flushdb()
- except Exception:
- pass
-
- try:
- r_important_paste_2019.flushdb()
- except Exception:
- pass
-
- end = time.time()
-
- print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start))
-
- r_serv.sadd('ail:update_v1.5', 'tags')
diff --git a/update/v1.5/Update-ARDB_Tags_background.py b/update/v1.5/Update-ARDB_Tags_background.py
deleted file mode 100755
index 0df3d75d..00000000
--- a/update/v1.5/Update-ARDB_Tags_background.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-def tags_key_fusion(old_item_path_key, new_item_path_key):
- print('fusion:')
- print(old_item_path_key)
- print(new_item_path_key)
- for tag in r_serv_metadata.smembers(old_item_path_key):
- r_serv_metadata.sadd(new_item_path_key, tag)
- r_serv_metadata.srem(old_item_path_key, tag)
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
- config_loader = ConfigLoader.ConfigLoader()
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
- config_loader = None
-
- if r_serv.sismember('ail:update_v1.5', 'tags'):
-
- r_serv.set('ail:current_background_script', 'tags_background')
- r_serv.set('ail:current_background_script_stat', 0)
-
- print('Updating ARDB_Tags ...')
- start = time.time()
-
- # update item metadata tags
- tag_not_updated = True
- total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename')
- nb_updated = 0
- last_progress = 0
- if total_to_update > 0:
- while tag_not_updated:
- item_path = r_serv_tag.srandmember('maj:v1.5:absolute_path_to_rename')
- old_tag_item_key = 'tag:{}'.format(item_path)
- new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
- new_tag_item_key = 'tag:{}'.format(new_item_path)
- res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key)
- if res == 0:
- tags_key_fusion(old_tag_item_key, new_tag_item_key)
- nb_updated += 1
- r_serv_tag.srem('maj:v1.5:absolute_path_to_rename', item_path)
- if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0:
- tag_not_updated = False
- else:
- progress = int((nb_updated * 100) / total_to_update)
- print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress))
- # update progress stats
- if progress != last_progress:
- r_serv.set('ail:current_background_script_stat', progress)
- last_progress = progress
-
- end = time.time()
-
- print('Updating ARDB_Tags Done: {} s'.format(end - start))
-
- r_serv.sadd('ail:update_v1.5', 'tags_background')
diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py
deleted file mode 100755
index fd1a2dd5..00000000
--- a/update/v1.5/Update.py
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- config_loader = None
-
- print()
- print('Updating ARDB_Onion ...')
- index = 0
- start = time.time()
-
- # update crawler queue
- for elem in r_serv_onion.smembers('onion_crawler_queue'):
- if PASTES_FOLDER in elem:
- r_serv_onion.srem('onion_crawler_queue', elem)
- r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
- index = index +1
- for elem in r_serv_onion.smembers('onion_crawler_priority_queue'):
- if PASTES_FOLDER in elem:
- r_serv_onion.srem('onion_crawler_queue', elem)
- r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1))
- index = index +1
-
- end = time.time()
- print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start))
- print()
-
- # Add background update
- r_serv.sadd('ail:to_update', 'v1.5')
-
- #Set current ail version
- r_serv.set('ail:version', 'v1.5')
-
- #Set current ail version
- r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d"))
-
- print('Done in {} s'.format(end - start_deb))
diff --git a/update/v1.5/Update.sh b/update/v1.5/Update.sh
deleted file mode 100755
index cf70a444..00000000
--- a/update/v1.5/Update.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -k &
-wait
-
-echo ""
-bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh"
-#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh"
-
-echo ""
-echo -e $GREEN"Update DomainClassifier"$DEFAULT
-echo ""
-pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking
-pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Update Web thirdparty"$DEFAULT
-echo ""
-bash -c "(cd ${AIL_FLASK}; ./update_thirdparty.sh &)"
-wait
-echo ""
-
-bash ${AIL_BIN}LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Fixing ARDB ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v1.5/Update.py &
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks &
-wait
-
-echo ""
-
-exit 0
diff --git a/update/v1.7/Update.py b/update/v1.7/Update.py
deleted file mode 100755
index f3777d0e..00000000
--- a/update/v1.7/Update.py
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- config_loader = None
-
- # Set current ail version
- r_serv.set('ail:version', 'v1.7')
-
- # Set current ail version
- r_serv.set('ail:update_date_v1.7', datetime.datetime.now().strftime("%Y%m%d"))
diff --git a/update/v1.7/Update.sh b/update/v1.7/Update.sh
deleted file mode 100755
index 603e9517..00000000
--- a/update/v1.7/Update.sh
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks &
-wait
-
-echo ""
-echo -e $GREEN"Update DomainClassifier"$DEFAULT
-echo ""
-
-cd $AIL_HOME
-git clone https://github.com/kazu-yamamoto/pgpdump.git
-cd pgpdump
-./configure
-make
-sudo make install
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Update requirement"$DEFAULT
-echo ""
-pip3 install beautifulsoup4
-
-bash ${AIL_BIN}LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v1.7/Update.py &
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks &
-wait
-
-echo ""
-echo -e $GREEN"Update thirdparty ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -t &
-wait
-
-
-echo ""
-
-exit 0
diff --git a/update/v2.0/Update.py b/update/v2.0/Update.py
deleted file mode 100755
index 4d3504e4..00000000
--- a/update/v2.0/Update.py
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- config_loader = None
-
- # Set current ail version
- r_serv.set('ail:version', 'v2.0')
-
- # use new update_date format
- date_tag_to_replace = ['v1.5', 'v1.7']
- for tag in date_tag_to_replace:
- if r_serv.exists('ail:update_date_{}'.format(tag)):
- date_tag = r_serv.get('ail:update_date_{}'.format(tag))
- r_serv.hset('ail:update_date', tag, date_tag)
- r_serv.delete('ail:update_date_{}'.format(tag))
-
- # Set current ail version
- r_serv.hset('ail:update_date', 'v2.0', datetime.datetime.now().strftime("%Y%m%d"))
diff --git a/update/v2.0/Update.sh b/update/v2.0/Update.sh
deleted file mode 100755
index 0132ec8e..00000000
--- a/update/v2.0/Update.sh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-echo ""
-echo -e $GREEN"Create Self-Signed Certificate"$DEFAULT
-echo ""
-pushd ${AIL_BIN}/helper/gen_cert
-bash gen_root.sh
-wait
-bash gen_cert.sh
-wait
-popd
-
-cp ${AIL_BIN}/helper/gen_cert/server.crt ${AIL_FLASK}/server.crt
-cp ${AIL_BIN}/helper/gen_cert/server.key ${AIL_FLASK}/server.key
-
-echo ""
-echo -e $GREEN"Update requirement"$DEFAULT
-echo ""
-pip3 install flask-login
-wait
-echo ""
-pip3 install bcrypt
-wait
-echo ""
-echo ""
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v2.0/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Update thirdparty ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -t
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Create Default User"$DEFAULT
-echo ""
-python3 ${AIL_FLASK}create_default_user.py
-
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v2.2/Update.py b/update/v2.2/Update.py
deleted file mode 100755
index 2bfef8e9..00000000
--- a/update/v2.2/Update.py
+++ /dev/null
@@ -1,118 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from packages import Term
-
-from lib import ConfigLoader
-
-
-def rreplace(s, old, new, occurrence):
- li = s.rsplit(old, occurrence)
- return new.join(li)
-
-def get_item_id(full_path):
- return full_path.replace(PASTES_FOLDER, '', 1)
-
-def get_item_date(id_item):
- l_dir = id_item.split('/')
- return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}'
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
- PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '')
-
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_term_stats = config_loader.get_redis_conn("ARDB_Trending")
- r_serv_termfreq = config_loader.get_redis_conn("ARDB_TermFreq")
- config_loader = None
-
- r_serv_term_stats.flushdb()
-
- # Disabled. Checkout the v2.2 branch if you need it
- # # convert all regex:
- # all_regex = r_serv_termfreq.smembers('TrackedRegexSet')
- # for regex in all_regex:
- # tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex)))
- # mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex)))
- #
- # new_term = regex[1:-1]
- # res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1},
- # 'admin@admin.test')
- # if res[1] == 200:
- # term_uuid = res[0]['uuid']
- # list_items = r_serv_termfreq.smembers('regex_{}'.format(regex))
- # for paste_item in list_items:
- # item_id = get_item_id(paste_item)
- # item_date = get_item_date(item_id)
- # Term.add_tracked_item(term_uuid, item_id, item_date)
- #
- # # Invalid Tracker => remove it
- # else:
- # print('Invalid Regex Removed: {}'.format(regex))
- # print(res[0])
- # # allow reprocess
- # r_serv_termfreq.srem('TrackedRegexSet', regex)
- #
- # all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet')
- # for token in all_tokens:
- # tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token)))
- # mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token)))
- #
- # res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
- # if res[1] == 200:
- # term_uuid = res[0]['uuid']
- # list_items = r_serv_termfreq.smembers('tracked_{}'.format(token))
- # for paste_item in list_items:
- # item_id = get_item_id(paste_item)
- # item_date = get_item_date(item_id)
- # Term.add_tracked_item(term_uuid, item_id, item_date)
- # # Invalid Tracker => remove it
- # else:
- # print('Invalid Token Removed: {}'.format(token))
- # print(res[0])
- # # allow reprocess
- # r_serv_termfreq.srem('TrackedSetTermSet', token)
- #
- # all_set = r_serv_termfreq.smembers('TrackedSetSet')
- # for curr_set in all_set:
- # tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set)))
- # mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set)))
- #
- # to_remove = ',{}'.format(curr_set.split(',')[-1])
- # new_set = rreplace(curr_set, to_remove, '', 1)
- # new_set = new_set[2:]
- # new_set = new_set.replace(',', '')
- #
- # res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test')
- # if res[1] == 200:
- # term_uuid = res[0]['uuid']
- # list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set))
- # for paste_item in list_items:
- # item_id = get_item_id(paste_item)
- # item_date = get_item_date(item_id)
- # Term.add_tracked_item(term_uuid, item_id, item_date)
- # # Invalid Tracker => remove it
- # else:
- # print('Invalid Set Removed: {}'.format(curr_set))
- # print(res[0])
- # # allow reprocess
- # r_serv_termfreq.srem('TrackedSetSet', curr_set)
-
- r_serv_termfreq.flushdb()
-
- # Set current ail version
- r_serv.set('ail:version', 'v2.2')
-
- # Set current ail version
- r_serv.hset('ail:update_date', 'v2.2', datetime.datetime.now().strftime("%Y%m%d"))
diff --git a/update/v2.2/Update.sh b/update/v2.2/Update.sh
deleted file mode 100755
index 37704f3b..00000000
--- a/update/v2.2/Update.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v2.2/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v2.5/Update.py b/update/v2.5/Update.py
deleted file mode 100755
index 6264c7f4..00000000
--- a/update/v2.5/Update.py
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-new_version = 'v2.5'
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- config_loader = None
-
- r_serv.zadd('ail:all_role', {'user': 3})
- r_serv.zadd('ail:all_role', {'user_no_api': 4})
- r_serv.zadd('ail:all_role', {'read_only': 5})
-
- for user in r_serv.hkeys('user:all'):
- r_serv.sadd('user_role:user', user)
- r_serv.sadd('user_role:user_no_api', user)
- r_serv.sadd('user_role:read_only', user)
-
- # Set current ail version
- r_serv.set('ail:version', new_version)
-
- # Set current ail version
- r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
diff --git a/update/v2.5/Update.sh b/update/v2.5/Update.sh
deleted file mode 100755
index 6c75f15f..00000000
--- a/update/v2.5/Update.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v2.5/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v2.6/Update.py b/update/v2.6/Update.py
deleted file mode 100755
index ded00cf1..00000000
--- a/update/v2.6/Update.py
+++ /dev/null
@@ -1,27 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-new_version = 'v2.6'
-
-if __name__ == '__main__':
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- config_loader = None
-
- r_serv.sadd('ail:to_update', new_version)
-
- # Set current ail version
- r_serv.set('ail:version', new_version)
-
- # Set current ail version
- r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
diff --git a/update/v2.6/Update.sh b/update/v2.6/Update.sh
deleted file mode 100755
index 874bf0ec..00000000
--- a/update/v2.6/Update.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v2.6/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v2.6/Update_screenshots.py b/update/v2.6/Update_screenshots.py
deleted file mode 100755
index 735d64bc..00000000
--- a/update/v2.6/Update_screenshots.py
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-
-from pyfaup.faup import Faup
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-def get_domain(item_id):
- item_id = item_id.split('/')
- item_id = item_id[-1]
- return item_id[:-36]
-
-def get_all_item(s_sha256):
- return r_serv_onion.smembers(f'screenshot:{s_sha256}')
-
-def sanitize_domain(domain):
- faup.decode(domain)
- domain_sanitized = faup.get()
- domain_sanitized = domain_sanitized['domain']
- try:
- domain_sanitized = domain_sanitized.decode()
- except:
- pass
- return domain_sanitized.lower()
-
-def update_db(s_sha256):
- screenshot_items = get_all_item(s_sha256)
- if screenshot_items:
- for item_id in screenshot_items:
- item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
- domain = get_domain(item_id)
-
- domain_sanitized = sanitize_domain(domain)
- if domain != domain_sanitized:
- r_serv_onion.sadd('incorrect_domain', domain)
- domain = domain_sanitized
-
- r_serv_onion.sadd('domain_screenshot:{}'.format(domain), s_sha256)
- r_serv_onion.sadd('screenshot_domain:{}'.format(s_sha256), domain)
- else:
- pass
- # broken screenshot
- r_serv_onion.sadd('broken_screenshot', s_sha256)
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
- faup = Faup()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
- SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
-
- r_serv_db = config_loader.get_redis_conn("ARDB_DB")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- config_loader = None
-
- r_serv_db.set('ail:update_in_progress', 'v2.6')
- r_serv_db.set('ail:current_background_update', 'v2.6')
-
- r_serv_db.set('ail:current_background_script_stat', 20)
- r_serv_db.set('ail:current_background_script', 'screenshot update')
-
- nb = 0
-
- if os.path.isdir(SCREENSHOT_FOLDER):
- for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
- # print(dirs)
- for name in files:
- nb = nb + 1
- screenshot_sha256 = os.path.join(root, name)
- screenshot_sha256 = screenshot_sha256[:-4] # remove .png
- screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
- screenshot_sha256 = screenshot_sha256.replace('/', '')
- update_db(screenshot_sha256)
- # print('Screenshot updated: {}'.format(nb))
- if nb % 1000 == 0:
- r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
-
- r_serv_db.set('ail:current_background_script_stat', 100)
-
- end = time.time()
- print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))
diff --git a/update/v2.7/Update.py b/update/v2.7/Update.py
deleted file mode 100755
index eed7c219..00000000
--- a/update/v2.7/Update.py
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-import datetime
-
-sys.path.append(os.environ['AIL_BIN'])
-from lib import ConfigLoader
-
-new_version = 'v2.7'
-
-if __name__ == '__main__':
-
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
- r_serv = config_loader.get_redis_conn("ARDB_DB")
- r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- config_loader = None
-
- r_serv.sadd('ail:to_update', new_version)
-
- #### Update tags ####
- r_serv_tags.sunionstore('list_tags:item', 'list_tags', [])
- r_serv_onion.sunionstore('domain_update_v2.7', 'full_onion_up', [])
- r_serv_onion.delete('incorrect_domain')
- r_serv.set('ail:update_v2.7:deletetagrange', 1)
- #### ####
-
- # Set current ail version
- r_serv.set('ail:version', new_version)
-
- # Set current ail version
- r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))
diff --git a/update/v2.7/Update.sh b/update/v2.7/Update.sh
deleted file mode 100755
index 8f9a4efd..00000000
--- a/update/v2.7/Update.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v2.7/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v2.7/Update_domain_tags.py b/update/v2.7/Update_domain_tags.py
deleted file mode 100755
index cbe1e4b6..00000000
--- a/update/v2.7/Update_domain_tags.py
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-
-from pyfaup.faup import Faup
-
-sys.path.append(os.environ['AIL_BIN'])
-from packages import Date
-
-from lib import ConfigLoader
-
-def sanitize_domain(domain):
- faup.decode(domain)
- domain_sanitized = faup.get()
- domain_sanitized = domain_sanitized['domain']
- try:
- domain_sanitized = domain_sanitized.decode()
- except:
- pass
- return domain_sanitized.lower()
-
-def get_all_obj_tags(obj_type):
- return list(r_serv_tags.smembers(f'list_tags:{obj_type}'))
-
-def add_global_tag(tag, object_type=None):
- r_serv_tags.sadd('list_tags', tag)
- if object_type:
- r_serv_tags.sadd('list_tags:{}'.format(object_type), tag)
-
-def get_obj_tag(object_id):
- res = r_serv_metadata.smembers('tag:{}'.format(object_id))
- if res:
- return list(res)
- else:
- return []
-
-def delete_domain_tag_daterange():
- all_domains_tags = get_all_obj_tags('domain')
- nb_updated = 0
- nb_to_update = len(all_domains_tags)
- if nb_to_update == 0:
- nb_to_update = 1
- refresh_time = time.time()
- l_dates = Date.substract_date('20191008', Date.get_today_date_str())
- for tag in all_domains_tags:
- for date_day in l_dates:
- r_serv_tags.delete('domain:{}:{}'.format(tag, date_day))
- nb_updated += 1
- refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
- r_serv_db.delete('ail:update_v2.7:deletetagrange')
-
-def update_domain_tags(domain):
- domain_sanitized = sanitize_domain(domain)
- if domain != domain_sanitized:
- r_serv_onion.sadd('incorrect_domain', domain)
- domain = domain_sanitized
-
- domain_tags = get_obj_tag(domain)
- for tag in domain_tags:
- # delete incorrect tags
- if tag == 'infoleak:submission="crawler"' or tag == 'infoleak:submission="manual"':
- r_serv_metadata.srem('tag:{}'.format(domain), tag)
- else:
- add_global_tag(tag, object_type='domain')
- r_serv_tags.sadd('{}:{}'.format('domain', tag), domain)
-
-def update_progress(refresh_time, nb_updated, nb_elem_to_update):
- if time.time() - refresh_time > 10:
- progress = int((nb_updated * 100) / nb_elem_to_update)
- print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress))
- r_serv_db.set('ail:current_background_script_stat', progress)
- refresh_time = time.time()
-
- return refresh_time
-
-def update_db():
- nb_updated = 0
- nb_to_update = r_serv_onion.scard('domain_update_v2.7')
- refresh_time = time.time()
- r_serv_db.set('ail:current_background_script_stat', 0)
- r_serv_db.set('ail:current_background_script', 'domain tags update')
- domain = r_serv_onion.spop('domain_update_v2.7')
- while domain is not None:
- update_domain_tags(domain)
- nb_updated += 1
- refresh_time = update_progress(refresh_time, nb_updated, nb_to_update)
- domain = r_serv_onion.spop('domain_update_v2.7')
- if r_serv_db.exists('ail:update_v2.7:deletetagrange'):
- r_serv_db.set('ail:current_background_script_stat', 0)
- r_serv_db.set('ail:current_background_script', 'tags: remove deprecated keys')
- delete_domain_tag_daterange()
-
- # sort all crawled domain
- r_serv_onion.sort('full_onion_up', alpha=True)
- r_serv_onion.sort('full_regular_up', alpha=True)
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
- faup = Faup()
-
- config_loader = ConfigLoader.ConfigLoader()
-
- r_serv_db = config_loader.get_redis_conn("ARDB_DB")
- r_serv_tags = config_loader.get_redis_conn("ARDB_Tags")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- config_loader = None
-
- update_version = 'v2.7'
-
- r_serv_db.set('ail:update_in_progress', update_version)
- r_serv_db.set('ail:current_background_update', update_version)
-
- r_serv_db.set('ail:current_background_script_stat', 0)
- r_serv_db.set('ail:current_background_script', 'tags update')
-
- update_db()
-
- r_serv_db.set('ail:current_background_script_stat', 100)
-
- end = time.time()
- print('ALL domains tags updated in {} s'.format(end - start_deb))
diff --git a/update/v3.0/Update.sh b/update/v3.0/Update.sh
deleted file mode 100755
index 9c7a7d9d..00000000
--- a/update/v3.0/Update.sh
+++ /dev/null
@@ -1,43 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.0/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Update thirdparty ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -t
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.1.1/Update.py b/update/v3.1.1/Update.py
deleted file mode 100755
index a5a16751..00000000
--- a/update/v3.1.1/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.1.1')
- updater.run_update()
diff --git a/update/v3.1.1/Update.sh b/update/v3.1.1/Update.sh
deleted file mode 100755
index f43591c0..00000000
--- a/update/v3.1.1/Update.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -ldbv &
-wait
-echo ""
-
-# SUBMODULES #
-git submodule init
-git submodule update
-
-echo -e $GREEN"Installing YARA ..."$DEFAULT
-pip3 install yara-python
-bash ${AIL_BIN}/LAUNCH.sh -t
-
-# SUBMODULES #
-git submodule init
-git submodule update
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.1.1/Update.py
-wait
-echo ""
-echo ""
-
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.1/Update.py b/update/v3.1/Update.py
deleted file mode 100755
index 6c4c4546..00000000
--- a/update/v3.1/Update.py
+++ /dev/null
@@ -1,23 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.1')
- updater.run_update()
-
diff --git a/update/v3.1/Update.sh b/update/v3.1/Update.sh
deleted file mode 100755
index 53b27cc6..00000000
--- a/update/v3.1/Update.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -lav &
-wait
-echo ""
-
-pip3 install scrapy
-pip3 install scrapy-splash
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.1/Update.py
-wait
-echo ""
-echo ""
-
-echo ""
-echo -e $GREEN"Update thirdparty ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -t
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.2/Update.py b/update/v3.2/Update.py
deleted file mode 100755
index 086ed098..00000000
--- a/update/v3.2/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.2')
- updater.run_update()
diff --git a/update/v3.2/Update.sh b/update/v3.2/Update.sh
deleted file mode 100755
index a588e55d..00000000
--- a/update/v3.2/Update.sh
+++ /dev/null
@@ -1,52 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -ldbv &
-wait
-echo ""
-
-# SUBMODULES #
-git submodule init
-git submodule update
-
-echo -e $GREEN"Installing YARA ..."$DEFAULT
-pip3 install yara-python
-bash ${AIL_BIN}/LAUNCH.sh -t
-
-# SUBMODULES #
-git submodule init
-git submodule update
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.2/Update.py
-wait
-echo ""
-echo ""
-
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.3/Update.py b/update/v3.3/Update.py
deleted file mode 100755
index 5f0efc78..00000000
--- a/update/v3.3/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.3')
- updater.run_update()
diff --git a/update/v3.3/Update.sh b/update/v3.3/Update.sh
deleted file mode 100755
index 86289dba..00000000
--- a/update/v3.3/Update.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-bash ${AIL_BIN}/LAUNCH.sh -ldbv &
-wait
-echo ""
-
-# SUBMODULES #
-git submodule update
-
-# echo ""
-# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
-# cd ${AIL_HOME}
-# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
-# pushd kvrocks/
-# make -j4
-# popd
-
-echo -e $GREEN"Installing html2text ..."$DEFAULT
-pip3 install html2text
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.3/Update.py
-wait
-echo ""
-echo ""
-
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py
deleted file mode 100755
index d4308551..00000000
--- a/update/v3.4/Update.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
- self.r_serv_onion = self.config.get_redis_conn("ARDB_Onion")
-
- def update(self):
- """
- Update Domain Languages
- """
- self.r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up')
- self.r_serv.set('update:nb_elem_to_convert', self.r_serv_onion.scard('domain_update_v3.4'))
- self.r_serv.set('update:nb_elem_converted', 0)
-
- # Add background update
- self.r_serv.sadd('ail:to_update', self.version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.4')
- updater.run_update()
diff --git a/update/v3.4/Update.sh b/update/v3.4/Update.sh
deleted file mode 100755
index 16a9ccb7..00000000
--- a/update/v3.4/Update.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# bash ${AIL_BIN}/LAUNCH.sh -ldbv &
-# wait
-# echo ""
-
-# SUBMODULES #
-git submodule update
-
-# echo ""
-# echo -e $GREEN"installing KVORCKS ..."$DEFAULT
-# cd ${AIL_HOME}
-# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git
-# pushd kvrocks/
-# make -j4
-# popd
-
-echo -e $GREEN"Installing html2text ..."$DEFAULT
-pip3 install pycld3
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.4/Update.py
-wait
-echo ""
-echo ""
-
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py
deleted file mode 100755
index c6183725..00000000
--- a/update/v3.4/Update_domain.py
+++ /dev/null
@@ -1,121 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-
-sys.path.append(os.environ['AIL_BIN'])
-##################################
-# Import Project packages
-##################################
-from lib import ConfigLoader
-from lib.objects.Items import Item
-
-def get_domain_type(domain_name):
- if str(domain_name).endswith('.onion'):
- return 'onion'
- else:
- return 'regular'
-
-def add_domain_language(domain_name, language):
- language = language.split('-')[0]
- domain_type = get_domain_type(domain_name)
- r_serv_onion.sadd('all_domains_languages', language)
- r_serv_onion.sadd(f'all_domains_languages:{domain_type}', language)
- r_serv_onion.sadd(f'language:domains:{domain_type}:{language}', domain_name)
- r_serv_onion.sadd(f'domain:language:{domain_name}', language)
-
-def add_domain_languages_by_item_id(domain_name, item_id):
- item = Item(item_id)
- for lang in item.get_languages():
- add_domain_language(domain_name, lang.language)
-
-def update_update_stats():
- nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
- progress = int((nb_updated * 100) / nb_elem_to_update)
- print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
- r_serv_db.set('ail:current_background_script_stat', progress)
-
-def update_domain_language(domain_obj, item_id):
- domain_name = domain_obj.get_domain_name()
- add_domain_languages_by_item_id(domain_name, item_id)
-
-def get_domain_history(domain_type, domain_name):
- return r_serv_onion.zrange(f'crawler_history_{domain_type}:{domain_name}:80', 0, -1, withscores=True)
-
-
-def get_item_children(item_id):
- return r_serv_metadata.smembers(f'paste_children:{item_id}')
-
-def get_domain_items(domain_name, root_item_id):
- dom_item = get_domain_item_children(domain_name, root_item_id)
- dom_item.append(root_item_id)
- return dom_item
-
-def is_item_in_domain(domain_name, item_id):
- is_in_domain = False
- domain_length = len(domain_name)
- if len(item_id) > (domain_length+48):
- if item_id[-36-domain_length:-36] == domain_name:
- is_in_domain = True
- return is_in_domain
-
-def get_domain_item_children(domain_name, root_item_id):
- all_items = []
- for item_id in get_item_children(root_item_id):
- if is_item_in_domain(domain_name, item_id):
- all_items.append(item_id)
- all_items.extend(get_domain_item_children(domain_name, item_id))
- return all_items
-
-def get_domain_crawled_item_root(domain_name, domain_type, epoch):
- res = r_serv_onion.zrevrangebyscore(f'crawler_history_{domain_type}:{domain_name}:80', int(epoch), int(epoch))
- return {"root_item": res[0], "epoch": int(epoch)}
-
-def get_domain_items_crawled(domain_name, domain_type, epoch):
- item_crawled = []
- item_root = get_domain_crawled_item_root(domain_name, domain_type, epoch)
- if item_root:
- if item_root['root_item'] != str(item_root['epoch']) and item_root['root_item']:
- for item_id in get_domain_items(domain_name, item_root['root_item']):
- item_crawled.append(item_id)
- return item_crawled
-
-
-if __name__ == '__main__':
-
- start_deb = time.time()
- config_loader = ConfigLoader.ConfigLoader()
- r_serv_db = config_loader.get_redis_conn("ARDB_DB")
- r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
- r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
- config_loader = None
-
- r_serv_db.set('ail:current_background_script', 'domain languages update')
-
- nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
- if not nb_elem_to_update:
- nb_elem_to_update = 1
- else:
- nb_elem_to_update = int(nb_elem_to_update)
-
- # _delete_all_domains_languages()
-
- while True:
- domain = r_serv_onion.spop('domain_update_v3.4')
- if domain is not None:
- print(domain)
- domain = str(domain)
- domain_t = get_domain_type(domain)
- for domain_history in get_domain_history(domain_t, domain):
- domain_items = get_domain_items_crawled(domain, domain_t, domain_history[1])
- for id_item in domain_items:
- update_domain_language(domain, id_item)
-
- r_serv_db.incr('update:nb_elem_converted')
- update_update_stats()
-
- else:
- r_serv_db.set('ail:current_background_script_stat', 100)
- sys.exit(0)
diff --git a/update/v3.5/Update.py b/update/v3.5/Update.py
deleted file mode 100755
index 651fe8ea..00000000
--- a/update/v3.5/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.5')
- updater.run_update()
diff --git a/update/v3.5/Update.sh b/update/v3.5/Update.sh
deleted file mode 100755
index ea0d39c3..00000000
--- a/update/v3.5/Update.sh
+++ /dev/null
@@ -1,35 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# SUBMODULES #
-git submodule update
-
-echo -e $GREEN"Installing PyAIL ..."$DEFAULT
-pip3 install -U pyail
-
-echo -e $GREEN"Installing D4 CLIENT ..."$DEFAULT
-pip3 install -U d4-pyclient
-
-echo ""
-echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
-pip3 install -U DomainClassifier
-
-exit 0
diff --git a/update/v3.6/Update.py b/update/v3.6/Update.py
deleted file mode 100755
index 8d7715de..00000000
--- a/update/v3.6/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.6')
- updater.run_update()
diff --git a/update/v3.6/Update.sh b/update/v3.6/Update.sh
deleted file mode 100755
index e82dba98..00000000
--- a/update/v3.6/Update.sh
+++ /dev/null
@@ -1,39 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# SUBMODULES #
-git submodule update
-
-echo -e $GREEN"Updating D4 CLIENT ..."$DEFAULT
-pip3 install -U d4-pyclient
-
-echo ""
-echo -e $GREEN"Installing nose ..."$DEFAULT
-pip3 install -U nose
-
-echo -e $GREEN"Installing coverage ..."$DEFAULT
-pip3 install -U coverage
-
-echo ""
-echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT
-pip3 install -U DomainClassifier
-
-exit 0
diff --git a/update/v3.7/Update.py b/update/v3.7/Update.py
deleted file mode 100755
index 8b238f9c..00000000
--- a/update/v3.7/Update.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-from lib import Tracker
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
- def update(self):
- """
- Update Domain Languages
- """
- print('Fixing Tracker_uuid list ...')
- Tracker.fix_all_tracker_uuid_list()
- nb = 0
- for tracker_uuid in Tracker.get_trackers():
- self.r_serv.sadd('trackers_update_v3.7', tracker_uuid)
- nb += 1
-
- self.r_serv.set('update:nb_elem_to_convert', nb)
- self.r_serv.set('update:nb_elem_converted',0)
-
- # Add background update
- self.r_serv.sadd('ail:to_update', self.version)
-
-
-if __name__ == '__main__':
- updater = Updater('v3.7')
- updater.run_update()
diff --git a/update/v3.7/Update.sh b/update/v3.7/Update.sh
deleted file mode 100755
index 0196d872..00000000
--- a/update/v3.7/Update.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# SUBMODULES #
-git submodule update
-
-
-echo -e $GREEN"Updating thirdparty ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ut
-wait
-
-echo ""
-echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
-echo ""
-python ${AIL_HOME}/update/v3.7/Update.py
-wait
-echo ""
-echo ""
-
-
-echo ""
-echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-exit 0
diff --git a/update/v3.7/Update_trackers.py b/update/v3.7/Update_trackers.py
deleted file mode 100644
index b84f06e8..00000000
--- a/update/v3.7/Update_trackers.py
+++ /dev/null
@@ -1,53 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import time
-
-sys.path.append(os.environ['AIL_BIN'])
-##################################
-# Import Project packages
-##################################
-from lib import ConfigLoader
-from lib import Tracker
-
-def update_update_stats():
- nb_updated = int(r_serv_db.get('update:nb_elem_converted'))
- progress = int((nb_updated * 100) / nb_elem_to_update)
- print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%')
- r_serv_db.set('ail:current_background_script_stat', progress)
-
-
-if __name__ == '__main__':
- start_deb = time.time()
-
- config_loader = ConfigLoader.ConfigLoader()
- r_serv_db = config_loader.get_redis_conn("ARDB_DB")
- r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB")
- config_loader = None
- Tracker.r_serv_tracker = r_serv_tracker
-
- r_serv_db.set('ail:current_background_script', 'trackers update')
-
- nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
- if not nb_elem_to_update:
- nb_elem_to_update = 1
- else:
- nb_elem_to_update = int(nb_elem_to_update)
-
- while True:
- tracker_uuid = r_serv_db.spop('trackers_update_v3.7')
- if tracker_uuid is not None:
- print(tracker_uuid)
- # FIX STATS
- Tracker.fix_tracker_stats_per_day(tracker_uuid)
- # MAP TRACKER - ITEM_ID
- Tracker.fix_tracker_item_link(tracker_uuid)
-
- r_serv_db.incr('update:nb_elem_converted')
- update_update_stats()
-
- else:
- r_serv_db.set('ail:current_background_script_stat', 100)
- sys.exit(0)
diff --git a/update/v4.0/Update.py b/update/v4.0/Update.py
deleted file mode 100755
index b609ebb6..00000000
--- a/update/v4.0/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v4.0')
- updater.run_update()
diff --git a/update/v4.0/Update.sh b/update/v4.0/Update.sh
deleted file mode 100755
index 09bc3f4f..00000000
--- a/update/v4.0/Update.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# SUBMODULES #
-git submodule update
-
-echo ""
-echo -e $GREEN"Installing nose ..."$DEFAULT
-pip3 install -U websockets
-
-exit 0
diff --git a/update/v4.1/Update.py b/update/v4.1/Update.py
deleted file mode 100755
index f7a7a72a..00000000
--- a/update/v4.1/Update.py
+++ /dev/null
@@ -1,31 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-import redis
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
- def update(self):
- r_tracking = redis.StrictRedis(host='localhost',
- port=6382,
- db=2,
- decode_responses=True)
- # FLUSH OLD DB
- r_tracking.flushdb()
-
-
-if __name__ == '__main__':
- updater = Updater('v4.1')
- updater.run_update()
diff --git a/update/v4.2.1/Update.py b/update/v4.2.1/Update.py
deleted file mode 100755
index f20a9184..00000000
--- a/update/v4.2.1/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v4.2.1')
- updater.run_update()
diff --git a/update/v4.2.1/Update.sh b/update/v4.2.1/Update.sh
deleted file mode 100755
index bf04638b..00000000
--- a/update/v4.2.1/Update.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# SUBMODULES #
-git submodule update
-
-echo ""
-echo -e $GREEN"Updating pusblogger ..."$DEFAULT
-pip3 install -U pubsublogger
-
-exit 0
diff --git a/update/v4.2/Update.py b/update/v4.2/Update.py
deleted file mode 100755
index 708009b2..00000000
--- a/update/v4.2/Update.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
-# -*-coding:UTF-8 -*
-
-import os
-import sys
-
-sys.path.append(os.environ['AIL_HOME'])
-##################################
-# Import Project packages
-##################################
-from update.bin.old_ail_updater import AIL_Updater
-
-class Updater(AIL_Updater):
- """default Updater."""
-
- def __init__(self, version):
- super(Updater, self).__init__(version)
-
-
-if __name__ == '__main__':
- updater = Updater('v4.2')
- updater.run_update()
diff --git a/update/v4.2/Update.sh b/update/v4.2/Update.sh
deleted file mode 100755
index a18aae61..00000000
--- a/update/v4.2/Update.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/bash
-
-[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
-
-export PATH=$AIL_HOME:$PATH
-export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
-export PATH=$AIL_BIN:$PATH
-export PATH=$AIL_FLASK:$PATH
-
-GREEN="\\033[1;32m"
-DEFAULT="\\033[0;39m"
-
-echo -e $GREEN"Shutting down AIL ..."$DEFAULT
-bash ${AIL_BIN}/LAUNCH.sh -ks
-wait
-
-# SUBMODULES #
-git submodule update
-
-echo ""
-echo -e $GREEN"Installing typo-squatting ..."$DEFAULT
-pip3 install -U ail_typo_squatting
-
-echo ""
-echo -e $GREEN"Updating d4-client ..."$DEFAULT
-pip3 install -U d4-pyclient
-
-exit 0
diff --git a/update/v3.0/Update.py b/update/v5.2/Update.py
similarity index 72%
rename from update/v3.0/Update.py
rename to update/v5.2/Update.py
index 9d393f6f..f87a84d0 100755
--- a/update/v3.0/Update.py
+++ b/update/v5.2/Update.py
@@ -8,7 +8,8 @@ sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
-from update.bin.old_ail_updater import AIL_Updater
+from update.bin.ail_updater import AIL_Updater
+from lib import ail_updates
class Updater(AIL_Updater):
"""default Updater."""
@@ -18,5 +19,6 @@ class Updater(AIL_Updater):
if __name__ == '__main__':
- updater = Updater('v3.0')
+ updater = Updater('v5.2')
updater.run_update()
+ ail_updates.add_background_update('v5.2')
diff --git a/update/v4.1/Update.sh b/update/v5.2/Update.sh
similarity index 84%
rename from update/v4.1/Update.sh
rename to update/v5.2/Update.sh
index 2be5376a..989bf1f8 100755
--- a/update/v4.1/Update.sh
+++ b/update/v5.2/Update.sh
@@ -2,13 +2,11 @@
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
-[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
-export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
@@ -22,4 +20,12 @@ wait
# SUBMODULES #
git submodule update
+echo ""
+echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
+echo ""
+python ${AIL_HOME}/update/v5.2/Update.py
+wait
+echo ""
+echo ""
+
exit 0
diff --git a/update/v5.2/compress_har.py b/update/v5.2/compress_har.py
new file mode 100755
index 00000000..12d08dce
--- /dev/null
+++ b/update/v5.2/compress_har.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import gzip
+import os
+import sys
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib import ail_updates
+from lib import crawlers
+
+if __name__ == '__main__':
+ update = ail_updates.AILBackgroundUpdate('v5.2')
+ HAR_DIR = crawlers.HAR_DIR
+ hars_ids = crawlers.get_all_har_ids()
+ update.set_nb_to_update(len(hars_ids))
+ n = 0
+ for har_id in hars_ids:
+ crawlers._gzip_har(har_id)
+ update.inc_nb_updated()
+ if n % 100 == 0:
+ update.update_progress()
+
+ crawlers._gzip_all_hars()
diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py
index cc110c35..c330443b 100755
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@@ -17,9 +17,6 @@ from flask_login import LoginManager, current_user, login_user, logout_user, log
import importlib
from os.path import join
-# # TODO: put me in lib/Tag
-from pytaxonomies import Taxonomies
-
sys.path.append('./modules/')
sys.path.append(os.environ['AIL_BIN'])
@@ -51,6 +48,9 @@ from blueprints.objects_decoded import objects_decoded
from blueprints.objects_subtypes import objects_subtypes
from blueprints.objects_title import objects_title
from blueprints.objects_cookie_name import objects_cookie_name
+from blueprints.objects_etag import objects_etag
+from blueprints.objects_hhhash import objects_hhhash
+from blueprints.objects_chat import objects_chat
Flask_dir = os.environ['AIL_FLASK']
@@ -106,6 +106,9 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
app.register_blueprint(objects_title, url_prefix=baseUrl)
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
+app.register_blueprint(objects_etag, url_prefix=baseUrl)
+app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
+app.register_blueprint(objects_chat, url_prefix=baseUrl)
# ========= =========#
@@ -250,16 +253,6 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"]
for taxonomy in default_taxonomies:
Tag.enable_taxonomy_tags(taxonomy)
-# ========== INITIAL tags auto export ============
-# taxonomies = Taxonomies()
-#
-# infoleak_tags = taxonomies.get('infoleak').machinetags()
-# infoleak_automatic_tags = []
-# for tag in taxonomies.get('infoleak').machinetags():
-# if tag.split('=')[0][:] == 'infoleak:automatic-detection':
-# r_serv_db.sadd('list_export_tags', tag)
-#
-# r_serv_db.sadd('list_export_tags', 'infoleak:submission="manual"')
# ============ MAIN ============
if __name__ == "__main__":
diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py
index f6e7feda..d5d672b1 100644
--- a/var/www/blueprints/correlation.py
+++ b/var/www/blueprints/correlation.py
@@ -61,6 +61,13 @@ def sanitise_level(level):
level = 2
return level
+def sanitise_objs_hidden(objs_hidden):
+ if objs_hidden:
+ objs_hidden = set(objs_hidden.split(',')) # TODO sanitize objects
+ else:
+ objs_hidden = set()
+ return objs_hidden
+
# ============= ROUTES ==============
@correlation.route('/correlation/show', methods=['GET', 'POST'])
@login_required
@@ -83,12 +90,18 @@ def show_correlation():
correl_option = request.form.get('CookieNameCheck')
if correl_option:
filter_types.append('cookie-name')
+ correl_option = request.form.get('EtagCheck')
+ if correl_option:
+ filter_types.append('etag')
correl_option = request.form.get('CveCheck')
if correl_option:
filter_types.append('cve')
correl_option = request.form.get('CryptocurrencyCheck')
if correl_option:
filter_types.append('cryptocurrency')
+ correl_option = request.form.get('HHHashCheck')
+ if correl_option:
+ filter_types.append('hhhash')
correl_option = request.form.get('PgpCheck')
if correl_option:
filter_types.append('pgp')
@@ -127,6 +140,10 @@ def show_correlation():
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
mode = sanitise_graph_mode(request.args.get('mode'))
level = sanitise_level(request.args.get('level'))
+ objs_hidden = sanitise_objs_hidden(request.args.get('hidden'))
+ obj_to_hide = request.args.get('hide')
+ if obj_to_hide:
+ objs_hidden.add(obj_to_hide)
related_btc = bool(request.args.get('related_btc', False))
@@ -136,17 +153,24 @@ def show_correlation():
if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return abort(404)
# object exist
- else:
- dict_object = {"object_type": obj_type,
- "correlation_id": obj_id,
+ else: # TODO remove old dict key
+ dict_object = {"type": obj_type,
+ "id": obj_id,
+ "object_type": obj_type,
"max_nodes": max_nodes, "mode": mode, "level": level,
"filter": filter_types, "filter_str": ",".join(filter_types),
+ "hidden": objs_hidden, "hidden_str": ",".join(objs_hidden),
+
+ "correlation_id": obj_id,
"metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id,
options={'tags'}, flask_context=True),
"nb_correl": ail_objects.get_obj_nb_correlations(obj_type, subtype, obj_id)
}
if subtype:
+ dict_object["subtype"] = subtype
dict_object["metadata"]['type_id'] = subtype
+ else:
+ dict_object["subtype"] = ''
dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc)
return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label,
tags_selector_data=Tag.get_tags_selector_data())
@@ -156,26 +180,15 @@ def show_correlation():
@login_read_only
def get_description():
object_id = request.args.get('object_id')
- object_id = object_id.split(':')
- # unpack object_id # # TODO: put me in lib
- if len(object_id) == 3:
- object_type = object_id[0]
- type_id = object_id[1]
- correlation_id = object_id[2]
- elif len(object_id) == 2:
- object_type = object_id[0]
- type_id = None
- correlation_id = object_id[1]
- else:
- return jsonify({})
+ obj_type, subtype, obj_id = ail_objects.get_obj_type_subtype_id_from_global_id(object_id)
- # check if correlation_id exist
+ # check if obj exist
# # TODO: return error json
- if not ail_objects.exists_obj(object_type, type_id, correlation_id):
+ if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
# object exist
else:
- res = ail_objects.get_object_meta(object_type, type_id, correlation_id, options={'tags', 'tags_safe'},
+ res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'tags_safe'},
flask_context=True)
if 'tags' in res:
res['tags'] = list(res['tags'])
@@ -191,9 +204,15 @@ def graph_node_json():
max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes'))
level = sanitise_level(request.args.get('level'))
+ hidden = request.args.get('hidden')
+ if hidden:
+ hidden = set(hidden.split(','))
+ else:
+ hidden = set()
+
filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(','))
- json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, flask_context=True)
+ json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, objs_hidden=hidden, flask_context=True)
#json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes)
return jsonify(json_graph)
@@ -221,6 +240,7 @@ def correlation_tags_add():
nb_max = sanitise_nb_max_nodes(request.form.get('tag_nb_max'))
level = sanitise_level(request.form.get('tag_level'))
filter_types = ail_objects.sanitize_objs_types(request.form.get('tag_filter', '').split(','))
+ hidden = sanitise_objs_hidden(request.form.get('tag_hidden'))
if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return abort(404)
@@ -249,9 +269,11 @@ def correlation_tags_add():
if tags:
ail_objects.obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=filter_types,
+ objs_hidden=hidden,
lvl=level + 1, nb_max=nb_max)
return redirect(url_for('correlation.show_correlation',
type=obj_type, subtype=subtype, id=obj_id,
level=level,
max_nodes=nb_max,
+ hidden=hidden, hidden_str=",".join(hidden),
filter=",".join(filter_types)))
diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py
index 39d84971..1b7f4454 100644
--- a/var/www/blueprints/crawler_splash.py
+++ b/var/www/blueprints/crawler_splash.py
@@ -272,6 +272,7 @@ def crawlers_last_domains():
domain, epoch = domain_row.split(':', 1)
dom = Domains.Domain(domain)
meta = dom.get_meta()
+ meta['last'] = datetime.fromtimestamp(int(epoch)).strftime("%Y/%m/%d %H:%M.%S")
meta['epoch'] = epoch
meta['status_epoch'] = dom.is_up_by_epoch(epoch)
domains.append(meta)
diff --git a/var/www/blueprints/hunters.py b/var/www/blueprints/hunters.py
index b2a2e30b..9a2b6c3e 100644
--- a/var/www/blueprints/hunters.py
+++ b/var/www/blueprints/hunters.py
@@ -172,7 +172,7 @@ def show_tracker():
typo_squatting = set()
if date_from:
- date_from, date_to = Date.sanitise_daterange(meta['first_seen'], meta['last_seen'])
+ date_from, date_to = Date.sanitise_daterange(date_from, date_to)
objs = tracker.get_objs_by_daterange(date_from, date_to)
meta['objs'] = ail_objects.get_objects_meta(objs, flask_context=True)
else:
diff --git a/var/www/blueprints/import_export.py b/var/www/blueprints/import_export.py
index 312fe0be..bb28d080 100644
--- a/var/www/blueprints/import_export.py
+++ b/var/www/blueprints/import_export.py
@@ -163,6 +163,7 @@ def objects_misp_export_post():
MISPExporter.delete_user_misp_objects_to_export(user_id)
if not export:
event_uuid = event[10:46]
+ event = f'{{"Event": {event}}}'
# TODO ADD JAVASCRIPT REFRESH PAGE IF RESP == 200
return send_file(io.BytesIO(event.encode()), as_attachment=True,
download_name=f'ail_export_{event_uuid}.json')
diff --git a/var/www/blueprints/investigations_b.py b/var/www/blueprints/investigations_b.py
index 8c1d592b..cf3cf688 100644
--- a/var/www/blueprints/investigations_b.py
+++ b/var/www/blueprints/investigations_b.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
-'''
+"""
Blueprint Flask: ail_investigations
-'''
+"""
import os
import sys
@@ -54,7 +54,13 @@ def show_investigation():
investigation_uuid = request.args.get("uuid")
investigation = Investigations.Investigation(investigation_uuid)
metadata = investigation.get_metadata(r_str=True)
- objs = ail_objects.get_objects_meta(investigation.get_objects(), flask_context=True)
+ objs = []
+ for obj in investigation.get_objects():
+ obj_meta = ail_objects.get_object_meta(obj["type"], obj["subtype"], obj["id"], flask_context=True)
+ comment = investigation.get_objects_comment(f'{obj["type"]}:{obj["subtype"]}:{obj["id"]}')
+ if comment:
+ obj_meta['comment'] = comment
+ objs.append(obj_meta)
return render_template("view_investigation.html", bootstrap_label=bootstrap_label,
metadata=metadata, investigation_objs=objs)
@@ -169,10 +175,13 @@ def register_investigation():
object_type = request.args.get('type')
object_subtype = request.args.get('subtype')
object_id = request.args.get('id')
+ comment = request.args.get('comment')
for investigation_uuid in investigations_uuid:
input_dict = {"uuid": investigation_uuid, "id": object_id,
"type": object_type, "subtype": object_subtype}
+ if comment:
+ input_dict["comment"] = comment
res = Investigations.api_register_object(input_dict)
if res[1] != 200:
return create_json_response(res[0], res[1])
diff --git a/var/www/blueprints/objects_chat.py b/var/www/blueprints/objects_chat.py
new file mode 100644
index 00000000..8a1db11f
--- /dev/null
+++ b/var/www/blueprints/objects_chat.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+'''
+ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
+'''
+
+import os
+import sys
+import json
+
+from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file
+from flask_login import login_required, current_user
+
+# Import Role_Manager
+from Role_Manager import login_admin, login_analyst, login_read_only
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib import ail_core
+from lib.objects import abstract_subtype_object
+from lib.objects import ail_objects
+from lib.objects import Chats
+from packages import Date
+
+# ============ BLUEPRINT ============
+objects_chat = Blueprint('objects_chat', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/chat'))
+
+# ============ VARIABLES ============
+bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
+
+def create_json_response(data, status_code):
+ return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
+
+# ============ FUNCTIONS ============
+
+# ============= ROUTES ==============
+
+
+@objects_chat.route("/objects/chat/messages", methods=['GET'])
+@login_required
+@login_read_only
+def objects_dashboard_chat():
+ chat = request.args.get('id')
+ subtype = request.args.get('subtype')
+ chat = Chats.Chat(chat, subtype)
+ if chat.exists():
+ messages = chat.get_messages()
+ meta = chat.get_meta({'icon'})
+ print(meta)
+ return render_template('ChatMessages.html', meta=meta, messages=messages, bootstrap_label=bootstrap_label)
+ else:
+ return abort(404)
+
+
+
diff --git a/var/www/blueprints/objects_cookie_name.py b/var/www/blueprints/objects_cookie_name.py
index ab111ff2..06d6743a 100644
--- a/var/www/blueprints/objects_cookie_name.py
+++ b/var/www/blueprints/objects_cookie_name.py
@@ -45,8 +45,6 @@ def objects_cookies_names():
else:
dict_objects = {}
- print(dict_objects)
-
return render_template("CookieNameDaterange.html", date_from=date_from, date_to=date_to,
dict_objects=dict_objects, show_objects=show_objects)
diff --git a/var/www/blueprints/objects_etag.py b/var/www/blueprints/objects_etag.py
new file mode 100644
index 00000000..ad2b24fd
--- /dev/null
+++ b/var/www/blueprints/objects_etag.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+'''
+ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
+'''
+
+import os
+import sys
+
+from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
+from flask_login import login_required, current_user
+
+# Import Role_Manager
+from Role_Manager import login_admin, login_analyst, login_read_only
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.objects import Etags
+from packages import Date
+
+# ============ BLUEPRINT ============
+objects_etag = Blueprint('objects_etag', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/etag'))
+
+# ============ VARIABLES ============
+bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
+
+
+# ============ FUNCTIONS ============
+@objects_etag.route("/objects/etags", methods=['GET'])
+@login_required
+@login_read_only
+def objects_etags():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ show_objects = request.args.get('show_objects')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+
+ if show_objects:
+ dict_objects = Etags.Etags().api_get_meta_by_daterange(date_from, date_to)
+ else:
+ dict_objects = {}
+
+ return render_template("EtagDaterange.html", date_from=date_from, date_to=date_to,
+ dict_objects=dict_objects, show_objects=show_objects)
+
+@objects_etag.route("/objects/etag/post", methods=['POST'])
+@login_required
+@login_read_only
+def objects_etags_post():
+ date_from = request.form.get('date_from')
+ date_to = request.form.get('date_to')
+ show_objects = request.form.get('show_objects')
+ return redirect(url_for('objects_etag.objects_etags', date_from=date_from, date_to=date_to, show_objects=show_objects))
+
+@objects_etag.route("/objects/etag/range/json", methods=['GET'])
+@login_required
+@login_read_only
+def objects_etag_range_json():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+ return jsonify(Etags.Etags().api_get_chart_nb_by_daterange(date_from, date_to))
+
+# @objects_etag.route("/objects/etag/search", methods=['POST'])
+# @login_required
+# @login_read_only
+# def objects_etags_names_search():
+# to_search = request.form.get('object_id')
+#
+# # TODO SANITIZE ID
+# # TODO Search all
+# cve = Cves.Cve(to_search)
+# if not cve.exists():
+# abort(404)
+# else:
+# return redirect(cve.get_link(flask_context=True))
+
+# ============= ROUTES ==============
+
diff --git a/var/www/blueprints/objects_hhhash.py b/var/www/blueprints/objects_hhhash.py
new file mode 100644
index 00000000..9d5bd320
--- /dev/null
+++ b/var/www/blueprints/objects_hhhash.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+'''
+ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
+'''
+
+import os
+import sys
+
+from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
+from flask_login import login_required, current_user
+
+# Import Role_Manager
+from Role_Manager import login_admin, login_analyst, login_read_only
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.objects import HHHashs
+from packages import Date
+
+# ============ BLUEPRINT ============
+objects_hhhash = Blueprint('objects_hhhash', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/hhhash'))
+
+# ============ VARIABLES ============
+bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
+
+
+# ============ FUNCTIONS ============
+@objects_hhhash.route("/objects/hhhashs", methods=['GET'])
+@login_required
+@login_read_only
+def objects_hhhashs():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ show_objects = request.args.get('show_objects')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+
+ if show_objects:
+ dict_objects = HHHashs.HHHashs().api_get_meta_by_daterange(date_from, date_to)
+ else:
+ dict_objects = {}
+
+ return render_template("HHHashDaterange.html", date_from=date_from, date_to=date_to,
+ dict_objects=dict_objects, show_objects=show_objects)
+
+@objects_hhhash.route("/objects/hhhash/post", methods=['POST'])
+@login_required
+@login_read_only
+def objects_hhhashs_post():
+ date_from = request.form.get('date_from')
+ date_to = request.form.get('date_to')
+ show_objects = request.form.get('show_objects')
+ return redirect(url_for('objects_hhhash.objects_hhhashs', date_from=date_from, date_to=date_to, show_objects=show_objects))
+
+@objects_hhhash.route("/objects/hhhash/range/json", methods=['GET'])
+@login_required
+@login_read_only
+def objects_hhhash_range_json():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+ return jsonify(HHHashs.HHHashs().api_get_chart_nb_by_daterange(date_from, date_to))
+
+# @objects_hhhash.route("/objects/hhhash/search", methods=['POST'])
+# @login_required
+# @login_read_only
+# def objects_hhhashs_names_search():
+# to_search = request.form.get('object_id')
+#
+# # TODO SANITIZE ID
+# # TODO Search all
+# cve = Cves.Cve(to_search)
+# if not cve.exists():
+# abort(404)
+# else:
+# return redirect(cve.get_link(flask_context=True))
+
+# ============= ROUTES ==============
+
diff --git a/var/www/blueprints/objects_subtypes.py b/var/www/blueprints/objects_subtypes.py
index dc97ffa8..a41066a4 100644
--- a/var/www/blueprints/objects_subtypes.py
+++ b/var/www/blueprints/objects_subtypes.py
@@ -91,6 +91,12 @@ def subtypes_objects_dashboard(obj_type, f_request):
# ============= ROUTES ==============
+@objects_subtypes.route("/objects/chats", methods=['GET'])
+@login_required
+@login_read_only
+def objects_dashboard_chat():
+ return subtypes_objects_dashboard('chat', request)
+
@objects_subtypes.route("/objects/cryptocurrencies", methods=['GET'])
@login_required
@login_read_only
diff --git a/var/www/blueprints/settings_b.py b/var/www/blueprints/settings_b.py
index 518f6fb9..b154e196 100644
--- a/var/www/blueprints/settings_b.py
+++ b/var/www/blueprints/settings_b.py
@@ -48,7 +48,7 @@ def settings_page():
@login_required
@login_read_only
def get_background_update_metadata_json():
- return jsonify(ail_updates.get_update_background_metadata())
+ return jsonify(ail_updates.get_update_background_meta(options={}))
@settings_b.route("/settings/modules", methods=['GET'])
@login_required
diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py
index 923390dd..3f0df03f 100644
--- a/var/www/modules/dashboard/Flask_dashboard.py
+++ b/var/www/modules/dashboard/Flask_dashboard.py
@@ -167,10 +167,9 @@ def index():
# Check if update in progress
background_update = False
update_message = ''
- if ail_updates.get_current_background_update():
+ if ail_updates.is_update_background_running():
background_update = True
- # update_message = ail_updates.get_update_background_message()
- update_message = None
+ update_message = ail_updates.AILBackgroundUpdate(ail_updates.get_update_background_version()).get_message()
return render_template("index.html", default_minute = default_minute,
threshold_stucked_module=threshold_stucked_module,
diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py
index e233bb00..0c95d0a0 100644
--- a/var/www/modules/restApi/Flask_restApi.py
+++ b/var/www/modules/restApi/Flask_restApi.py
@@ -1,9 +1,9 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
-'''
+"""
Flask functions and routes for the rest api
-'''
+"""
import os
import re
@@ -508,6 +508,7 @@ def get_item_cryptocurrency_bitcoin():
# # # # # # # # # # # # # # CRAWLER # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # TODO: ADD RESULT JSON Response
+# @restApi.route("api/v1/crawler/task/add", methods=['POST'])
@restApi.route("api/v1/add/crawler/task", methods=['POST'])
@token_required('analyst')
def add_crawler_task():
diff --git a/var/www/modules/settings/Flask_settings.py b/var/www/modules/settings/Flask_settings.py
index 4316d490..2b1b8826 100644
--- a/var/www/modules/settings/Flask_settings.py
+++ b/var/www/modules/settings/Flask_settings.py
@@ -19,7 +19,6 @@ sys.path.append(os.environ['AIL_BIN'])
from lib import d4
from lib import Users
-
# ============ VARIABLES ============
import Flask_config
@@ -33,7 +32,6 @@ email_regex = Flask_config.email_regex
settings = Blueprint('settings', __name__, template_folder='templates')
-
# ============ FUNCTIONS ============
def check_email(email):
@@ -43,6 +41,7 @@ def check_email(email):
else:
return False
+
# ============= ROUTES ==============
@settings.route("/settings/edit_profile", methods=['GET'])
@@ -52,7 +51,8 @@ def edit_profile():
user_metadata = Users.get_user_metadata(current_user.get_id())
admin_level = current_user.is_in_role('admin')
return render_template("edit_profile.html", user_metadata=user_metadata,
- admin_level=admin_level)
+ admin_level=admin_level)
+
@settings.route("/settings/new_token", methods=['GET'])
@login_required
@@ -61,6 +61,7 @@ def new_token():
Users.generate_new_token(current_user.get_id())
return redirect(url_for('settings.edit_profile'))
+
@settings.route("/settings/new_token_user", methods=['POST'])
@login_required
@login_admin
@@ -70,6 +71,7 @@ def new_token_user():
Users.generate_new_token(user_id)
return redirect(url_for('settings.users_list'))
+
@settings.route("/settings/create_user", methods=['GET'])
@login_required
@login_admin
@@ -78,14 +80,15 @@ def create_user():
error = request.args.get('error')
error_mail = request.args.get('error_mail')
role = None
- if r_serv_db.exists('user_metadata:{}'.format(user_id)):
- role = r_serv_db.hget('user_metadata:{}'.format(user_id), 'role')
- else:
- user_id = None
+ if user_id:
+ user = Users.User(user_id)
+ if user.exists():
+ role = user.get_role()
all_roles = Users.get_all_roles()
return render_template("create_user.html", all_roles=all_roles, user_id=user_id, user_role=role,
- error=error, error_mail=error_mail,
- admin_level=True)
+ error=error, error_mail=error_mail,
+ admin_level=True)
+
@settings.route("/settings/create_user_post", methods=['POST'])
@login_required
@@ -98,17 +101,19 @@ def create_user_post():
all_roles = Users.get_all_roles()
- if email and len(email)< 300 and check_email(email) and role:
+ if email and len(email) < 300 and check_email(email) and role:
if role in all_roles:
# password set
if password1 and password2:
- if password1==password2:
+ if password1 == password2:
if Users.check_password_strength(password1):
password = password1
else:
- return render_template("create_user.html", all_roles=all_roles, error="Incorrect Password", admin_level=True)
+ return render_template("create_user.html", all_roles=all_roles, error="Incorrect Password",
+ admin_level=True)
else:
- return render_template("create_user.html", all_roles=all_roles, error="Passwords don't match", admin_level=True)
+ return render_template("create_user.html", all_roles=all_roles, error="Passwords don't match",
+ admin_level=True)
# generate password
else:
password = Users.gen_password()
@@ -127,6 +132,7 @@ def create_user_post():
else:
return render_template("create_user.html", all_roles=all_roles, error_mail=True, admin_level=True)
+
@settings.route("/settings/users_list", methods=['GET'])
@login_required
@login_admin
@@ -140,6 +146,7 @@ def users_list():
new_user_dict['password'] = request.args.get('new_user_password')
return render_template("users_list.html", all_users=all_users, new_user=new_user_dict, admin_level=True)
+
@settings.route("/settings/edit_user", methods=['POST'])
@login_required
@login_admin
@@ -147,6 +154,7 @@ def edit_user():
user_id = request.form.get('user_id')
return redirect(url_for('settings.create_user', user_id=user_id))
+
@settings.route("/settings/delete_user", methods=['POST'])
@login_required
@login_admin
@@ -163,6 +171,7 @@ def passive_dns():
passivedns_enabled = d4.is_passive_dns_enabled()
return render_template("passive_dns.html", passivedns_enabled=passivedns_enabled)
+
@settings.route("/settings/passivedns/change_state", methods=['GET'])
@login_required
@login_admin
@@ -171,11 +180,13 @@ def passive_dns_change_state():
passivedns_enabled = d4.change_passive_dns_state(new_state)
return redirect(url_for('settings.passive_dns'))
+
@settings.route("/settings/ail", methods=['GET'])
@login_required
@login_admin
def ail_configs():
return render_template("ail_configs.html", passivedns_enabled=None)
+
# ========= REGISTRATION =========
app.register_blueprint(settings, url_prefix=baseUrl)
diff --git a/var/www/templates/correlation/metadata_card_decoded.html b/var/www/templates/correlation/metadata_card_decoded.html
index da57cb21..1e292a80 100644
--- a/var/www/templates/correlation/metadata_card_decoded.html
+++ b/var/www/templates/correlation/metadata_card_decoded.html
@@ -35,12 +35,12 @@
Object type | +First seen | +Last seen | +Nb seen | +
---|---|---|---|
+ + {{ dict_object["object_type"] }} + | +{{ dict_object["metadata"]['first_seen'] }} | +{{ dict_object["metadata"]['last_seen'] }} | +{{ dict_object["metadata"]['nb_seen'] }} | +
Object type | +First seen | +Last seen | +Nb seen | +
---|---|---|---|
+ + {{ dict_object["object_type"] }} + | +{{ dict_object["metadata"]['first_seen'] }} | +{{ dict_object["metadata"]['last_seen'] }} | +{{ dict_object["metadata"]['nb_seen'] }} | +
Press H on an object / node to hide it.
+ {% if dict_object["hidden"] %} +