diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 547cd76f..00c224e4 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -27,7 +27,7 @@ fi export PATH=$AIL_VENV/bin:$PATH export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH +export PATH=$AIL_KVROCKS:$PATH export PATH=$AIL_BIN:$PATH export PATH=$AIL_FLASK:$PATH @@ -685,9 +685,6 @@ while [ "$1" != "" ]; do -lrv | --launchRedisVerify ) launch_redis; wait_until_redis_is_ready; ;; - -lav | --launchARDBVerify ) launch_ardb; - wait_until_ardb_is_ready; - ;; -lkv | --launchKVORCKSVerify ) launch_kvrocks; wait_until_kvrocks_is_ready; ;; diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index be615993..d16ad9f7 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -17,6 +17,7 @@ from lib import ail_logger from lib import crawlers from lib.ConfigLoader import ConfigLoader from lib.objects import CookiesNames +from lib.objects import Etags from lib.objects.Domains import Domain from lib.objects.Items import Item from lib.objects import Screenshots @@ -59,6 +60,7 @@ class Crawler(AbstractModule): self.root_item = None self.date = None self.items_dir = None + self.original_domain = None self.domain = None # TODO Replace with warning list ??? @@ -98,7 +100,7 @@ class Crawler(AbstractModule): self.crawler_scheduler.update_queue() self.crawler_scheduler.process_queue() - self.refresh_lacus_status() # TODO LOG ERROR + self.refresh_lacus_status() # TODO LOG ERROR if not self.is_lacus_up: return None @@ -121,11 +123,19 @@ class Crawler(AbstractModule): if capture: try: status = self.lacus.get_capture_status(capture.uuid) - if status != crawlers.CaptureStatus.DONE: # TODO ADD GLOBAL TIMEOUT-> Save start time ### print start time + if status == crawlers.CaptureStatus.DONE: + return capture + elif status == crawlers.CaptureStatus.UNKNOWN: + capture_start = capture.get_start_time(r_str=False) + if int(time.time()) - capture_start > 600: # TODO ADD in new crawler config + task = capture.get_task() + task.reset() + capture.delete() + else: + capture.update(status) + else: capture.update(status) print(capture.uuid, crawlers.CaptureStatus(status).name, int(time.time())) - else: - return capture except ConnectionError: print(capture.uuid) @@ -181,6 +191,7 @@ class Crawler(AbstractModule): print(domain) self.domain = Domain(domain) + self.original_domain = Domain(domain) epoch = int(time.time()) parent_id = task.get_parent() @@ -203,12 +214,20 @@ class Crawler(AbstractModule): # Origin + History + tags if self.root_item: self.domain.set_last_origin(parent_id) - self.domain.add_history(epoch, root_item=self.root_item) # Tags for tag in task.get_tags(): self.domain.add_tag(tag) - elif self.domain.was_up(): - self.domain.add_history(epoch, root_item=epoch) + self.domain.add_history(epoch, root_item=self.root_item) + + if self.domain != self.original_domain: + self.original_domain.update_daterange(self.date.replace('/', '')) + if self.root_item: + self.original_domain.set_last_origin(parent_id) + # Tags + for tag in task.get_tags(): + self.domain.add_tag(tag) + self.original_domain.add_history(epoch, root_item=self.root_item) + crawlers.update_last_crawled_domain(self.original_domain.get_domain_type(), self.original_domain.id, epoch) crawlers.update_last_crawled_domain(self.domain.get_domain_type(), self.domain.id, epoch) print('capture:', capture.uuid, 'completed') @@ -263,7 +282,7 @@ class Crawler(AbstractModule): title_content = crawlers.extract_title_from_html(entries['html']) if title_content: title = Titles.create_title(title_content) - title.add(item.get_date(), item_id) + title.add(item.get_date(), item) # SCREENSHOT if self.screenshot: @@ -287,7 +306,12 @@ class Crawler(AbstractModule): for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']): print(cookie_name) cookie = CookiesNames.create(cookie_name) - cookie.add(self.date.replace('/', ''), self.domain.id) + cookie.add(self.date.replace('/', ''), self.domain) + for etag_content in crawlers.extract_etag_from_har(entries['har']): + print(etag_content) + etag = Etags.create(etag_content) + etag.add(self.date.replace('/', ''), self.domain) + crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', '')) # Next Children entries_children = entries.get('children') diff --git a/bin/exporter/MailExporter.py b/bin/exporter/MailExporter.py index 4b2d4a3a..c4d3f5b5 100755 --- a/bin/exporter/MailExporter.py +++ b/bin/exporter/MailExporter.py @@ -8,9 +8,12 @@ Import Content """ import os +import logging +import logging.config import sys from abc import ABC +from ssl import create_default_context import smtplib from email.mime.multipart import MIMEMultipart @@ -22,17 +25,22 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## +from lib import ail_logger from exporter.abstract_exporter import AbstractExporter from lib.ConfigLoader import ConfigLoader # from lib.objects.abstract_object import AbstractObject # from lib.Tracker import Tracker +logging.config.dictConfig(ail_logger.get_config(name='modules')) + class MailExporter(AbstractExporter, ABC): - def __init__(self, host=None, port=None, password=None, user='', sender=''): + def __init__(self, host=None, port=None, password=None, user='', sender='', cert_required=None, ca_file=None): super().__init__() config_loader = ConfigLoader() + self.logger = logging.getLogger(f'{self.__class__.__name__}') + if host: self.host = host self.port = port @@ -45,6 +53,15 @@ class MailExporter(AbstractExporter, ABC): self.pw = config_loader.get_config_str("Notifications", "sender_pw") if self.pw == 'None': self.pw = None + if cert_required is not None: + self.cert_required = bool(cert_required) + self.ca_file = ca_file + else: + self.cert_required = config_loader.get_config_boolean("Notifications", "cert_required") + if self.cert_required: + self.ca_file = config_loader.get_config_str("Notifications", "ca_file") + else: + self.ca_file = None if user: self.user = user else: @@ -67,8 +84,12 @@ class MailExporter(AbstractExporter, ABC): smtp_server = smtplib.SMTP(self.host, self.port) smtp_server.starttls() except smtplib.SMTPNotSupportedError: - print("The server does not support the STARTTLS extension.") - smtp_server = smtplib.SMTP_SSL(self.host, self.port) + self.logger.info(f"The server {self.host}:{self.port} does not support the STARTTLS extension.") + if self.cert_required: + context = create_default_context(cafile=self.ca_file) + else: + context = None + smtp_server = smtplib.SMTP_SSL(self.host, self.port, context=context) smtp_server.ehlo() if self.user is not None: @@ -80,7 +101,7 @@ class MailExporter(AbstractExporter, ABC): return smtp_server # except Exception as err: # traceback.print_tb(err.__traceback__) - # logger.warning(err) + # self.logger.warning(err) def _export(self, recipient, subject, body): mime_msg = MIMEMultipart() @@ -95,8 +116,8 @@ class MailExporter(AbstractExporter, ABC): smtp_client.quit() # except Exception as err: # traceback.print_tb(err.__traceback__) - # logger.warning(err) - print(f'Send notification: {subject} to {recipient}') + # self.logger.warning(err) + self.logger.info(f'Send notification: {subject} to {recipient}') class MailExporterTracker(MailExporter): diff --git a/bin/importer/FeederImporter.py b/bin/importer/FeederImporter.py index 7acd6ae9..dc2dfb7d 100755 --- a/bin/importer/FeederImporter.py +++ b/bin/importer/FeederImporter.py @@ -87,13 +87,16 @@ class FeederImporter(AbstractImporter): feeder_name = feeder.get_name() print(f'importing: {feeder_name} feeder') - item_id = feeder.get_item_id() + item_id = feeder.get_item_id() # TODO replace me with object global id # process meta if feeder.get_json_meta(): feeder.process_meta() - gzip64_content = feeder.get_gzip64_content() - return f'{feeder_name} {item_id} {gzip64_content}' + if feeder_name == 'telegram': + return item_id # TODO support UI dashboard + else: + gzip64_content = feeder.get_gzip64_content() + return f'{feeder_name} {item_id} {gzip64_content}' class FeederModuleImporter(AbstractModule): diff --git a/bin/importer/PystemonImporter.py b/bin/importer/PystemonImporter.py index df7d8d3a..1a0e68d8 100755 --- a/bin/importer/PystemonImporter.py +++ b/bin/importer/PystemonImporter.py @@ -35,7 +35,7 @@ class PystemonImporter(AbstractImporter): print(item_id) if item_id: print(item_id) - full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH + full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH # Check if pystemon file exists if not os.path.isfile(full_item_path): print(f'Error: {full_item_path}, file not found') @@ -47,7 +47,12 @@ class PystemonImporter(AbstractImporter): if not content: return None - return self.create_message(item_id, content, source='pystemon') + if full_item_path[-3:] == '.gz': + gzipped = True + else: + gzipped = False + + return self.create_message(item_id, content, gzipped=gzipped, source='pystemon') except IOError as e: print(f'Error: {full_item_path}, IOError') diff --git a/bin/importer/abstract_importer.py b/bin/importer/abstract_importer.py index e5155775..1c4b458d 100755 --- a/bin/importer/abstract_importer.py +++ b/bin/importer/abstract_importer.py @@ -89,7 +89,7 @@ class AbstractImporter(ABC): # TODO ail queues if not gzipped: content = self.b64_gzip(content) elif not b64: - content = self.b64(gzipped) + content = self.b64(content) if not content: return None if isinstance(content, bytes): diff --git a/bin/importer/feeders/Jabber.py b/bin/importer/feeders/Jabber.py index 79d0950f..8c90adfd 100755 --- a/bin/importer/feeders/Jabber.py +++ b/bin/importer/feeders/Jabber.py @@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from importer.feeders.Default import DefaultFeeder from lib.objects.Usernames import Username -from lib import item_basic +from lib.objects.Items import Item class JabberFeeder(DefaultFeeder): @@ -36,7 +36,7 @@ class JabberFeeder(DefaultFeeder): self.item_id = f'{item_id}.gz' return self.item_id - def process_meta(self): + def process_meta(self): # TODO replace me by message """ Process JSON meta field. """ @@ -44,10 +44,12 @@ class JabberFeeder(DefaultFeeder): # item_basic.add_map_obj_id_item_id(jabber_id, item_id, 'jabber_id') ############################################## to = str(self.json_data['meta']['jabber:to']) fr = str(self.json_data['meta']['jabber:from']) - date = item_basic.get_item_date(item_id) + + item = Item(self.item_id) + date = item.get_date() user_to = Username(to, 'jabber') user_fr = Username(fr, 'jabber') - user_to.add(date, self.item_id) - user_fr.add(date, self.item_id) + user_to.add(date, item) + user_fr.add(date, item) return None diff --git a/bin/importer/feeders/Telegram.py b/bin/importer/feeders/Telegram.py index 3856c88e..2cc6a127 100755 --- a/bin/importer/feeders/Telegram.py +++ b/bin/importer/feeders/Telegram.py @@ -16,8 +16,28 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from importer.feeders.Default import DefaultFeeder +from lib.ConfigLoader import ConfigLoader +from lib.objects.Chats import Chat +from lib.objects import Messages +from lib.objects import UsersAccount from lib.objects.Usernames import Username -from lib import item_basic + +import base64 +import io +import gzip +def gunzip_bytes_obj(bytes_obj): + gunzipped_bytes_obj = None + try: + in_ = io.BytesIO() + in_.write(bytes_obj) + in_.seek(0) + + with gzip.GzipFile(fileobj=in_, mode='rb') as fo: + gunzipped_bytes_obj = fo.read() + except Exception as e: + print(f'Global; Invalid Gzip file: {e}') + + return gunzipped_bytes_obj class TelegramFeeder(DefaultFeeder): @@ -26,31 +46,90 @@ class TelegramFeeder(DefaultFeeder): self.name = 'telegram' # define item id - def get_item_id(self): - # TODO use telegram message date - date = datetime.date.today().strftime("%Y/%m/%d") - channel_id = str(self.json_data['meta']['channel_id']) - message_id = str(self.json_data['meta']['message_id']) - item_id = f'{channel_id}_{message_id}' - item_id = os.path.join('telegram', date, item_id) - self.item_id = f'{item_id}.gz' + def get_item_id(self): # TODO rename self.item_id + # Get message date + timestamp = self.json_data['meta']['date']['timestamp'] # TODO CREATE DEFAULT TIMESTAMP + # if self.json_data['meta'].get('date'): + # date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp']) + # date = date.strftime('%Y/%m/%d') + # else: + # date = datetime.date.today().strftime("%Y/%m/%d") + chat_id = str(self.json_data['meta']['chat']['id']) + message_id = str(self.json_data['meta']['id']) + self.item_id = Messages.create_obj_id('telegram', chat_id, message_id, timestamp) # TODO rename self.item_id return self.item_id def process_meta(self): """ Process JSON meta field. """ - # channel_id = str(self.json_data['meta']['channel_id']) - # message_id = str(self.json_data['meta']['message_id']) - # telegram_id = f'{channel_id}_{message_id}' - # item_basic.add_map_obj_id_item_id(telegram_id, item_id, 'telegram_id') ######################################### - user = None - if self.json_data['meta'].get('user'): - user = str(self.json_data['meta']['user']) - elif self.json_data['meta'].get('channel'): - user = str(self.json_data['meta']['channel'].get('username')) - if user: - date = item_basic.get_item_date(self.item_id) - username = Username(user, 'telegram') - username.add(date, self.item_id) + # message chat + meta = self.json_data['meta'] + mess_id = self.json_data['meta']['id'] + if meta.get('reply_to'): + reply_to_id = meta['reply_to']['id'] + else: + reply_to_id = None + + timestamp = meta['date']['timestamp'] + date = datetime.datetime.fromtimestamp(timestamp) + date = date.strftime('%Y%m%d') + + if self.json_data.get('translation'): + translation = self.json_data['translation'] + else: + translation = None + decoded = base64.standard_b64decode(self.json_data['data']) + content = gunzip_bytes_obj(decoded) + message = Messages.create(self.item_id, content, translation=translation) + + if meta.get('chat'): + chat = Chat(meta['chat']['id'], 'telegram') + + if meta['chat'].get('username'): + chat_username = Username(meta['chat']['username'], 'telegram') + chat.update_username_timeline(chat_username.get_global_id(), timestamp) + + # Chat---Message + chat.add(date) + chat.add_message(message.get_global_id(), timestamp, mess_id, reply_id=reply_to_id) + else: + chat = None + + # message sender + if meta.get('sender'): # TODO handle message channel forward - check if is user + user_id = meta['sender']['id'] + user_account = UsersAccount.UserAccount(user_id, 'telegram') + # UserAccount---Message + user_account.add(date, obj=message) + # UserAccount---Chat + user_account.add_correlation(chat.type, chat.get_subtype(r_str=True), chat.id) + + if meta['sender'].get('firstname'): + user_account.set_first_name(meta['sender']['firstname']) + if meta['sender'].get('lastname'): + user_account.set_last_name(meta['sender']['lastname']) + if meta['sender'].get('phone'): + user_account.set_phone(meta['sender']['phone']) + + if meta['sender'].get('username'): + username = Username(meta['sender']['username'], 'telegram') + # TODO timeline or/and correlation ???? + user_account.add_correlation(username.type, username.get_subtype(r_str=True), username.id) + user_account.update_username_timeline(username.get_global_id(), timestamp) + + # Username---Message + username.add(date) # TODO # correlation message ??? + + # if chat: # TODO Chat---Username correlation ??? + # # Chat---Username + # chat.add_correlation(username.type, username.get_subtype(r_str=True), username.id) + + # if meta.get('fwd_from'): + # if meta['fwd_from'].get('post_author') # user first name + + # TODO reply threads ???? + # message edit ???? + + return None diff --git a/bin/importer/feeders/Twitter.py b/bin/importer/feeders/Twitter.py index d5040c65..1c719e73 100755 --- a/bin/importer/feeders/Twitter.py +++ b/bin/importer/feeders/Twitter.py @@ -17,7 +17,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from importer.feeders.Default import DefaultFeeder from lib.objects.Usernames import Username -from lib import item_basic +from lib.objects.Items import Item class TwitterFeeder(DefaultFeeder): @@ -40,9 +40,9 @@ class TwitterFeeder(DefaultFeeder): ''' # tweet_id = str(self.json_data['meta']['twitter:tweet_id']) # item_basic.add_map_obj_id_item_id(tweet_id, item_id, 'twitter_id') ############################################ - - date = item_basic.get_item_date(self.item_id) + item = Item(self.item_id) + date = item.get_date() user = str(self.json_data['meta']['twitter:id']) username = Username(user, 'twitter') - username.add(date, item_id) + username.add(date, item) return None diff --git a/bin/lib/Investigations.py b/bin/lib/Investigations.py index 1944d00f..9c6def0f 100755 --- a/bin/lib/Investigations.py +++ b/bin/lib/Investigations.py @@ -235,18 +235,27 @@ class Investigation(object): objs.append(dict_obj) return objs + def get_objects_comment(self, obj_global_id): + return r_tracking.hget(f'investigations:objs:comment:{self.uuid}', obj_global_id) + + def set_objects_comment(self, obj_global_id, comment): + if comment: + r_tracking.hset(f'investigations:objs:comment:{self.uuid}', obj_global_id, comment) + # # TODO: def register_object(self, Object): in OBJECT CLASS - def register_object(self, obj_id, obj_type, subtype): + def register_object(self, obj_id, obj_type, subtype, comment=''): r_tracking.sadd(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}') r_tracking.sadd(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid) + if comment: + self.set_objects_comment(f'{obj_type}:{subtype}:{obj_id}', comment) timestamp = int(time.time()) self.set_last_change(timestamp) - def unregister_object(self, obj_id, obj_type, subtype): r_tracking.srem(f'investigations:objs:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}') r_tracking.srem(f'obj:investigations:{obj_type}:{subtype}:{obj_id}', self.uuid) + r_tracking.hdel(f'investigations:objs:comment:{self.uuid}', f'{obj_type}:{subtype}:{obj_id}') timestamp = int(time.time()) self.set_last_change(timestamp) @@ -351,7 +360,7 @@ def get_investigations_selector(): for investigation_uuid in get_all_investigations(): investigation = Investigation(investigation_uuid) name = investigation.get_info() - l_investigations.append({"id":investigation_uuid, "name": name}) + l_investigations.append({"id": investigation_uuid, "name": name}) return l_investigations #{id:'8dc4b81aeff94a9799bd70ba556fa345',name:"Paris"} @@ -453,7 +462,11 @@ def api_register_object(json_dict): if subtype == 'None': subtype = '' obj_id = json_dict.get('id', '').replace(' ', '') - res = investigation.register_object(obj_id, obj_type, subtype) + + comment = json_dict.get('comment', '') + # if comment: + # comment = escape(comment) + res = investigation.register_object(obj_id, obj_type, subtype, comment=comment) return res, 200 def api_unregister_object(json_dict): diff --git a/bin/lib/Tag.py b/bin/lib/Tag.py index 94b2eca4..64850b3c 100755 --- a/bin/lib/Tag.py +++ b/bin/lib/Tag.py @@ -338,7 +338,7 @@ def get_galaxy_meta(galaxy_name, nb_active_tags=False): else: meta['icon'] = f'fas fa-{icon}' if nb_active_tags: - meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy) + meta['nb_active_tags'] = get_galaxy_nb_tags_enabled(galaxy.type) meta['nb_tags'] = len(get_galaxy_tags(galaxy.type)) return meta diff --git a/bin/lib/Tracker.py b/bin/lib/Tracker.py index 2a5336ad..f1ea8905 100755 --- a/bin/lib/Tracker.py +++ b/bin/lib/Tracker.py @@ -207,6 +207,13 @@ class Tracker: if filters: self._set_field('filters', json.dumps(filters)) + def del_filters(self, tracker_type, to_track): + filters = self.get_filters() + for obj_type in filters: + r_tracker.srem(f'trackers:objs:{tracker_type}:{obj_type}', to_track) + r_tracker.srem(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}') + r_tracker.hdel(f'tracker:{self.uuid}', 'filters') + def get_tracked(self): return self._get_field('tracked') @@ -513,6 +520,7 @@ class Tracker: self._set_mails(mails) # Filters + self.del_filters(old_type, old_to_track) if not filters: filters = {} for obj_type in get_objects_tracked(): @@ -522,9 +530,6 @@ class Tracker: for obj_type in filters: r_tracker.sadd(f'trackers:objs:{tracker_type}:{obj_type}', to_track) r_tracker.sadd(f'trackers:uuid:{tracker_type}:{to_track}', f'{self.uuid}:{obj_type}') - if tracker_type != old_type: - r_tracker.srem(f'trackers:objs:{old_type}:{obj_type}', old_to_track) - r_tracker.srem(f'trackers:uuid:{old_type}:{old_to_track}', f'{self.uuid}:{obj_type}') # Refresh Trackers trigger_trackers_refresh(tracker_type) @@ -650,14 +655,14 @@ def get_user_trackers_meta(user_id, tracker_type=None): metas = [] for tracker_uuid in get_user_trackers(user_id, tracker_type=tracker_type): tracker = Tracker(tracker_uuid) - metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'})) + metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'})) return metas def get_global_trackers_meta(tracker_type=None): metas = [] for tracker_uuid in get_global_trackers(tracker_type=tracker_type): tracker = Tracker(tracker_uuid) - metas.append(tracker.get_meta(options={'mails', 'sparkline', 'tags'})) + metas.append(tracker.get_meta(options={'description', 'mails', 'sparkline', 'tags'})) return metas def get_users_trackers_meta(): diff --git a/bin/lib/Users.py b/bin/lib/Users.py index a61830ef..765b1360 100755 --- a/bin/lib/Users.py +++ b/bin/lib/Users.py @@ -247,7 +247,10 @@ class User(UserMixin): self.id = "__anonymous__" def exists(self): - return self.id != "__anonymous__" + if self.id == "__anonymous__": + return False + else: + return r_serv_db.exists(f'ail:user:metadata:{self.id}') # return True or False # def is_authenticated(): @@ -287,3 +290,6 @@ class User(UserMixin): return True else: return False + + def get_role(self): + return r_serv_db.hget(f'ail:user:metadata:{self.id}', 'role') diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py index c52db274..eeb83a98 100755 --- a/bin/lib/ail_core.py +++ b/bin/lib/ail_core.py @@ -15,8 +15,8 @@ config_loader = ConfigLoader() r_serv_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None -AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp', - 'screenshot', 'title', 'username'}) +AIL_OBJECTS = sorted({'chat', 'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item', + 'pgp', 'screenshot', 'title', 'user-account', 'username'}) def get_ail_uuid(): ail_uuid = r_serv_db.get('ail:uuid') @@ -38,9 +38,11 @@ def get_all_objects(): return AIL_OBJECTS def get_objects_with_subtypes(): - return ['cryptocurrency', 'pgp', 'username'] + return ['chat', 'cryptocurrency', 'pgp', 'username'] def get_object_all_subtypes(obj_type): + if obj_type == 'chat': + return ['discord', 'jabber', 'telegram'] if obj_type == 'cryptocurrency': return ['bitcoin', 'bitcoin-cash', 'dash', 'ethereum', 'litecoin', 'monero', 'zcash'] if obj_type == 'pgp': @@ -66,6 +68,14 @@ def get_all_objects_with_subtypes_tuple(): str_objs.append((obj_type, '')) return str_objs +def unpack_obj_global_id(global_id, r_type='tuple'): + if r_type == 'dict': + obj = global_id.split(':', 2) + return {'type': obj[0], 'subtype': obj[1], 'id': obj['2']} + else: # tuple(type, subtype, id) + return global_id.split(':', 2) + + ##-- AIL OBJECTS --## #### Redis #### diff --git a/bin/lib/ail_updates.py b/bin/lib/ail_updates.py index c3729831..07fd791a 100755 --- a/bin/lib/ail_updates.py +++ b/bin/lib/ail_updates.py @@ -15,38 +15,15 @@ config_loader = ConfigLoader() r_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None -BACKGROUND_UPDATES = { - 'v1.5': { - 'nb_updates': 5, - 'message': 'Tags and Screenshots' - }, - 'v2.4': { - 'nb_updates': 1, - 'message': ' Domains Tags and Correlations' - }, - 'v2.6': { - 'nb_updates': 1, - 'message': 'Domains Tags and Correlations' - }, - 'v2.7': { - 'nb_updates': 1, - 'message': 'Domains Tags' - }, - 'v3.4': { - 'nb_updates': 1, - 'message': 'Domains Languages' - }, - 'v3.7': { - 'nb_updates': 1, - 'message': 'Trackers first_seen/last_seen' - } -} - +# # # # # # # # +# # +# UPDATE # +# # +# # # # # # # # def get_ail_version(): return r_db.get('ail:version') - def get_ail_float_version(): version = get_ail_version() if version: @@ -55,6 +32,179 @@ def get_ail_float_version(): version = 0 return version +# # # - - # # # + +# # # # # # # # # # # # +# # +# UPDATE BACKGROUND # +# # +# # # # # # # # # # # # + + +BACKGROUND_UPDATES = { + 'v5.2': { + 'message': 'Compress HAR', + 'scripts': ['compress_har.py'] + }, +} + +class AILBackgroundUpdate: + """ + AIL Background Update. + """ + + def __init__(self, version): + self.version = version + + def _get_field(self, field): + return r_db.hget('ail:update:background', field) + + def _set_field(self, field, value): + r_db.hset('ail:update:background', field, value) + + def get_version(self): + return self.version + + def get_message(self): + return BACKGROUND_UPDATES.get(self.version, {}).get('message', '') + + def get_error(self): + return self._get_field('error') + + def set_error(self, error): # TODO ADD LOGS + self._set_field('error', error) + + def get_nb_scripts(self): + return int(len(BACKGROUND_UPDATES.get(self.version, {}).get('scripts', ['']))) + + def get_scripts(self): + return BACKGROUND_UPDATES.get(self.version, {}).get('scripts', []) + + def get_nb_scripts_done(self): + done = self._get_field('done') + try: + done = int(done) + except (TypeError, ValueError): + done = 0 + return done + + def inc_nb_scripts_done(self): + self._set_field('done', self.get_nb_scripts_done() + 1) + + def get_script(self): + return self._get_field('script') + + def get_script_path(self): + path = os.path.basename(self.get_script()) + if path: + return os.path.join(os.environ['AIL_HOME'], 'update', self.version, path) + + def get_nb_to_update(self): # TODO use cache ????? + nb_to_update = self._get_field('nb_to_update') + if not nb_to_update: + nb_to_update = 1 + return int(nb_to_update) + + def set_nb_to_update(self, nb): + self._set_field('nb_to_update', int(nb)) + + def get_nb_updated(self): # TODO use cache ????? + nb_updated = self._get_field('nb_updated') + if not nb_updated: + nb_updated = 0 + return int(nb_updated) + + def inc_nb_updated(self): # TODO use cache ????? + r_db.hincrby('ail:update:background', 'nb_updated', 1) + + def get_progress(self): # TODO use cache ????? + return self._get_field('progress') + + def set_progress(self, progress): + self._set_field('progress', progress) + + def update_progress(self): + nb_updated = self.get_nb_updated() + nb_to_update = self.get_nb_to_update() + if nb_updated == nb_to_update: + progress = 100 + elif nb_updated > nb_to_update: + progress = 99 + else: + progress = int((nb_updated * 100) / nb_to_update) + self.set_progress(progress) + print(f'{nb_updated}/{nb_to_update} updated {progress}%') + return progress + + def is_running(self): + return r_db.hget('ail:update:background', 'version') == self.version + + def get_meta(self, options=set()): + meta = {'version': self.get_version(), + 'error': self.get_error(), + 'script': self.get_script(), + 'script_progress': self.get_progress(), + 'nb_update': self.get_nb_scripts(), + 'nb_completed': self.get_nb_scripts_done()} + meta['progress'] = int(meta['nb_completed'] * 100 / meta['nb_update']) + if 'message' in options: + meta['message'] = self.get_message() + return meta + + def start(self): + self._set_field('version', self.version) + r_db.hdel('ail:update:background', 'error') + + def start_script(self, script): + self.clear() + self._set_field('script', script) + self.set_progress(0) + + def end_script(self): + self.set_progress(100) + self.inc_nb_scripts_done() + + def clear(self): + r_db.hdel('ail:update:background', 'error') + r_db.hdel('ail:update:background', 'progress') + r_db.hdel('ail:update:background', 'nb_updated') + r_db.hdel('ail:update:background', 'nb_to_update') + + def end(self): + r_db.delete('ail:update:background') + r_db.srem('ail:updates:background', self.version) + + +# To Add in update script +def add_background_update(version): + r_db.sadd('ail:updates:background', version) + +def is_update_background_running(): + return r_db.exists('ail:update:background') + +def get_update_background_version(): + return r_db.hget('ail:update:background', 'version') + +def get_update_background_meta(options=set()): + version = get_update_background_version() + if version: + return AILBackgroundUpdate(version).get_meta(options=options) + else: + return {} + +def get_update_background_to_launch(): + to_launch = [] + updates = r_db.smembers('ail:updates:background') + for version in BACKGROUND_UPDATES: + if version in updates: + to_launch.append(version) + return to_launch + +# # # - - # # # + +########################################################################################## +########################################################################################## +########################################################################################## def get_ail_all_updates(date_separator='-'): dict_update = r_db.hgetall('ail:update_date') @@ -87,111 +237,6 @@ def check_version(version): return True -#### UPDATE BACKGROUND #### - -def exits_background_update_to_launch(): - return r_db.scard('ail:update:to_update') != 0 - - -def is_version_in_background_update(version): - return r_db.sismember('ail:update:to_update', version) - - -def get_all_background_updates_to_launch(): - return r_db.smembers('ail:update:to_update') - - -def get_current_background_update(): - return r_db.get('ail:update:update_in_progress') - - -def get_current_background_update_script(): - return r_db.get('ail:update:current_background_script') - - -def get_current_background_update_script_path(version, script_name): - return os.path.join(os.environ['AIL_HOME'], 'update', version, script_name) - - -def get_current_background_nb_update_completed(): - return r_db.scard('ail:update:update_in_progress:completed') - - -def get_current_background_update_progress(): - progress = r_db.get('ail:update:current_background_script_stat') - if not progress: - progress = 0 - return int(progress) - - -def get_background_update_error(): - return r_db.get('ail:update:error') - - -def add_background_updates_to_launch(version): - return r_db.sadd('ail:update:to_update', version) - - -def start_background_update(version): - r_db.delete('ail:update:error') - r_db.set('ail:update:update_in_progress', version) - - -def set_current_background_update_script(script_name): - r_db.set('ail:update:current_background_script', script_name) - r_db.set('ail:update:current_background_script_stat', 0) - - -def set_current_background_update_progress(progress): - r_db.set('ail:update:current_background_script_stat', progress) - - -def set_background_update_error(error): - r_db.set('ail:update:error', error) - - -def end_background_update_script(): - r_db.sadd('ail:update:update_in_progress:completed') - - -def end_background_update(version): - r_db.delete('ail:update:update_in_progress') - r_db.delete('ail:update:current_background_script') - r_db.delete('ail:update:current_background_script_stat') - r_db.delete('ail:update:update_in_progress:completed') - r_db.srem('ail:update:to_update', version) - - -def clear_background_update(): - r_db.delete('ail:update:error') - r_db.delete('ail:update:update_in_progress') - r_db.delete('ail:update:current_background_script') - r_db.delete('ail:update:current_background_script_stat') - r_db.delete('ail:update:update_in_progress:completed') - - -def get_update_background_message(version): - return BACKGROUND_UPDATES[version]['message'] - - -# TODO: Detect error in subprocess -def get_update_background_metadata(): - dict_update = {} - version = get_current_background_update() - if version: - dict_update['version'] = version - dict_update['script'] = get_current_background_update_script() - dict_update['script_progress'] = get_current_background_update_progress() - dict_update['nb_update'] = BACKGROUND_UPDATES[dict_update['version']]['nb_updates'] - dict_update['nb_completed'] = get_current_background_nb_update_completed() - dict_update['progress'] = int(dict_update['nb_completed'] * 100 / dict_update['nb_update']) - dict_update['error'] = get_background_update_error() - return dict_update - - -##-- UPDATE BACKGROUND --## - - if __name__ == '__main__': res = check_version('v3.1..1') print(res) diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py index 8e29837d..f7b13f61 100755 --- a/bin/lib/correlations_engine.py +++ b/bin/lib/correlations_engine.py @@ -41,17 +41,22 @@ config_loader = None ################################## CORRELATION_TYPES_BY_OBJ = { + "chat": ["user-account"], # message or direct correlation like cve, bitcoin, ... ??? "cookie-name": ["domain"], - "cryptocurrency": ["domain", "item"], - "cve": ["domain", "item"], - "decoded": ["domain", "item"], - "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"], + "cryptocurrency": ["domain", "item", "message"], + "cve": ["domain", "item", "message"], + "decoded": ["domain", "item", "message"], + "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"], + "etag": ["domain"], "favicon": ["domain", "item"], # TODO Decoded - "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], - "pgp": ["domain", "item"], + "hhhash": ["domain"], + "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ??? + "message": ["cve", "cryptocurrency", "decoded", "pgp", "user-account"], # chat ?? + "pgp": ["domain", "item", "message"], "screenshot": ["domain", "item"], "title": ["domain", "item"], - "username": ["domain", "item"], + "user-account": ["chat", "message"], + "username": ["domain", "item", "message"], # TODO chat-user/account } def get_obj_correl_types(obj_type): @@ -63,6 +68,8 @@ def sanityze_obj_correl_types(obj_type, correl_types): correl_types = set(correl_types).intersection(obj_correl_types) if not correl_types: correl_types = obj_correl_types + if not correl_types: + return [] return correl_types def get_nb_correlation_by_correl_type(obj_type, subtype, obj_id, correl_type): @@ -169,18 +176,18 @@ def get_obj_str_id(obj_type, subtype, obj_id): subtype = '' return f'{obj_type}:{subtype}:{obj_id}' -def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, flask_context=False): +def get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, objs_hidden=set(), flask_context=False): links = set() nodes = set() meta = {'complete': True, 'objs': set()} obj_str_id = get_obj_str_id(obj_type, subtype, obj_id) - _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, previous_str_obj='') + _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj='') return obj_str_id, nodes, links, meta -def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], previous_str_obj=''): +def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, level, max_nodes, filter_types=[], objs_hidden=set(), previous_str_obj=''): obj_str_id = get_obj_str_id(obj_type, subtype, obj_id) meta['objs'].add(obj_str_id) nodes.add(obj_str_id) @@ -191,6 +198,10 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, for str_obj in obj_correlations[correl_type]: subtype2, obj2_id = str_obj.split(':', 1) obj2_str_id = get_obj_str_id(correl_type, subtype2, obj2_id) + # filter objects to hide + if obj2_str_id in objs_hidden: + continue + meta['objs'].add(obj2_str_id) if obj2_str_id == previous_str_obj: @@ -204,5 +215,5 @@ def _get_correlations_graph_node(links, nodes, meta, obj_type, subtype, obj_id, if level > 0: next_level = level - 1 - _get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, previous_str_obj=obj_str_id) + _get_correlations_graph_node(links, nodes, meta, correl_type, subtype2, obj2_id, next_level, max_nodes, filter_types=filter_types, objs_hidden=objs_hidden, previous_str_obj=obj_str_id) diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 300edb66..6387c76f 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -39,6 +39,7 @@ from packages import git_status from packages import Date from lib.ConfigLoader import ConfigLoader from lib.objects.Domains import Domain +from lib.objects import HHHashs from lib.objects.Items import Item config_loader = ConfigLoader() @@ -134,7 +135,7 @@ def unpack_url(url): # # # # # # # # TODO CREATE NEW OBJECT def get_favicon_from_html(html, domain, url): - favicon_urls = extract_favicon_from_html(html, url) + favicon_urls, favicons = extract_favicon_from_html(html, url) # add root favicon if not favicon_urls: favicon_urls.add(f'{urlparse(url).scheme}://{domain}/favicon.ico') @@ -162,7 +163,6 @@ def extract_favicon_from_html(html, url): # - # - - # Root Favicon f = get_faup() f.decode(url) @@ -244,13 +244,6 @@ def extract_description_from_html(html): return description['content'] return '' -def extract_description_from_html(html): - soup = BeautifulSoup(html, 'html.parser') - description = soup.find('meta', attrs={'name': 'description'}) - if description: - return description['content'] - return '' - def extract_keywords_from_html(html): soup = BeautifulSoup(html, 'html.parser') keywords = soup.find('meta', attrs={'name': 'keywords'}) @@ -264,6 +257,7 @@ def extract_author_from_html(html): if keywords: return keywords['content'] return '' + # # # - - # # # @@ -275,7 +269,7 @@ def extract_author_from_html(html): def create_har_id(date, item_id): item_id = item_id.split('/')[-1] - return os.path.join(date, f'{item_id}.json') + return os.path.join(date, f'{item_id}.json.gz') def save_har(har_id, har_content): # create dir @@ -284,8 +278,8 @@ def save_har(har_id, har_content): os.makedirs(har_dir) # save HAR filename = os.path.join(get_har_dir(), har_id) - with open(filename, 'w') as f: - f.write(json.dumps(har_content)) + with gzip.open(filename, 'wb') as f: + f.write(json.dumps(har_content).encode()) def get_all_har_ids(): har_ids = [] @@ -299,9 +293,10 @@ def get_all_har_ids(): except (TypeError, ValueError): pass - for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]: - har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR) - har_ids.append(har_id) + if os.path.exists(today_root_dir): + for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]: + har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR) + har_ids.append(har_id) for ydir in sorted(dirs_year, reverse=False): search_dear = os.path.join(HAR_DIR, ydir) @@ -312,14 +307,17 @@ def get_all_har_ids(): har_ids.append(har_id) return har_ids -def extract_cookies_names_from_har_by_har_id(har_id): +def get_har_content(har_id): har_path = os.path.join(HAR_DIR, har_id) - with open(har_path) as f: - try: - har_content = json.loads(f.read()) - except json.decoder.JSONDecodeError: - har_content = {} - return extract_cookies_names_from_har(har_content) + try: + with gzip.open(har_path) as f: + try: + return json.loads(f.read()) + except json.decoder.JSONDecodeError: + return {} + except Exception as e: + print(e) # TODO LOGS + return {} def extract_cookies_names_from_har(har): cookies = set() @@ -334,17 +332,110 @@ def extract_cookies_names_from_har(har): cookies.add(name) return cookies -def _reprocess_all_hars(): +def _reprocess_all_hars_cookie_name(): from lib.objects import CookiesNames for har_id in get_all_har_ids(): domain = har_id.split('/')[-1] - domain = domain[:-41] + domain = domain[:-44] date = har_id.split('/') date = f'{date[-4]}{date[-3]}{date[-2]}' - for cookie_name in extract_cookies_names_from_har_by_har_id(har_id): + for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)): print(domain, date, cookie_name) cookie = CookiesNames.create(cookie_name) - cookie.add(date, domain) + cookie.add(date, Domain(domain)) + +def extract_etag_from_har(har): # TODO check response url + etags = set() + for entrie in har.get('log', {}).get('entries', []): + for header in entrie.get('response', {}).get('headers', []): + if header.get('name') == 'etag': + # print(header) + etag = header.get('value') + if etag: + etags.add(etag) + return etags + +def _reprocess_all_hars_etag(): + from lib.objects import Etags + for har_id in get_all_har_ids(): + domain = har_id.split('/')[-1] + domain = domain[:-44] + date = har_id.split('/') + date = f'{date[-4]}{date[-3]}{date[-2]}' + for etag_content in extract_etag_from_har(get_har_content(har_id)): + print(domain, date, etag_content) + etag = Etags.create(etag_content) + etag.add(date, Domain(domain)) + +def extract_hhhash_by_id(har_id, domain, date): + return extract_hhhash(get_har_content(har_id), domain, date) + +def extract_hhhash(har, domain, date): + hhhashs = set() + urls = set() + for entrie in har.get('log', {}).get('entries', []): + url = entrie.get('request').get('url') + if url not in urls: + # filter redirect + if entrie.get('response').get('status') == 200: # != 301: + # print(url, entrie.get('response').get('status')) + + f = get_faup() + f.decode(url) + domain_url = f.get().get('domain') + if domain_url == domain: + + headers = entrie.get('response').get('headers') + + hhhash_header = HHHashs.build_hhhash_headers(headers) + hhhash = HHHashs.hhhash_headers(hhhash_header) + + if hhhash not in hhhashs: + print('', url, hhhash) + + # ----- + obj = HHHashs.create(hhhash_header, hhhash) + obj.add(date, Domain(domain)) + + hhhashs.add(hhhash) + urls.add(url) + print() + print() + print('HHHASH:') + for hhhash in hhhashs: + print(hhhash) + return hhhashs + +def _reprocess_all_hars_hhhashs(): + for har_id in get_all_har_ids(): + print() + print(har_id) + domain = har_id.split('/')[-1] + domain = domain[:-44] + date = har_id.split('/') + date = f'{date[-4]}{date[-3]}{date[-2]}' + extract_hhhash_by_id(har_id, domain, date) + + + +def _gzip_har(har_id): + har_path = os.path.join(HAR_DIR, har_id) + new_id = f'{har_path}.gz' + if not har_id.endswith('.gz'): + if not os.path.exists(new_id): + with open(har_path, 'rb') as f: + content = f.read() + if content: + with gzip.open(new_id, 'wb') as f: + r = f.write(content) + print(r) + if os.path.exists(new_id) and os.path.exists(har_path): + os.remove(har_path) + print('delete:', har_path) + +def _gzip_all_hars(): + for har_id in get_all_har_ids(): + _gzip_har(har_id) # # # - - # # # @@ -662,8 +753,7 @@ class Cookie: meta[field] = value if r_json: data = json.dumps(meta, indent=4, sort_keys=True) - meta = {'data': data} - meta['uuid'] = self.uuid + meta = {'data': data, 'uuid': self.uuid} return meta def edit(self, cookie_dict): @@ -775,7 +865,7 @@ def unpack_imported_json_cookie(json_cookie): ## - - ## #### COOKIEJAR API #### -def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch +def api_import_cookies_from_json(user_id, cookiejar_uuid, json_cookies_str): # # TODO: add catch resp = api_verify_cookiejar_acl(cookiejar_uuid, user_id) if resp: return resp @@ -944,8 +1034,8 @@ class CrawlerScheduler: minutes = 0 current_time = datetime.now().timestamp() time_next_run = (datetime.now() + relativedelta(months=int(months), weeks=int(weeks), - days=int(days), hours=int(hours), - minutes=int(minutes))).timestamp() + days=int(days), hours=int(hours), + minutes=int(minutes))).timestamp() # Make sure the next capture is not scheduled for in a too short interval interval_next_capture = time_next_run - current_time if interval_next_capture < self.min_frequency: @@ -1225,8 +1315,13 @@ class CrawlerCapture: if task_uuid: return CrawlerTask(task_uuid) - def get_start_time(self): - return self.get_task().get_start_time() + def get_start_time(self, r_str=True): + start_time = self.get_task().get_start_time() + if r_str: + return start_time + else: + start_time = datetime.strptime(start_time, "%Y/%m/%d - %H:%M.%S").timestamp() + return int(start_time) def get_status(self): status = r_cache.hget(f'crawler:capture:{self.uuid}', 'status') @@ -1239,7 +1334,8 @@ class CrawlerCapture: def create(self, task_uuid): if self.exists(): - raise Exception(f'Error: Capture {self.uuid} already exists') + print(f'Capture {self.uuid} already exists') # TODO LOGS + return None launch_time = int(time.time()) r_crawler.hset(f'crawler:task:{task_uuid}', 'capture', self.uuid) r_crawler.hset('crawler:captures:tasks', self.uuid, task_uuid) @@ -1492,6 +1588,11 @@ class CrawlerTask: def start(self): self._set_field('start_time', datetime.now().strftime("%Y/%m/%d - %H:%M.%S")) + def reset(self): + priority = 49 + r_crawler.hdel(f'crawler:task:{self.uuid}', 'start_time') + self.add_to_db_crawler_queue(priority) + # Crawler def remove(self): # zrem cache + DB capture_uuid = self.get_capture() @@ -1622,14 +1723,16 @@ def api_add_crawler_task(data, user_id=None): if frequency: # TODO verify user - return create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None, - cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags), 200 + task_uuid = create_schedule(frequency, user_id, url, depth=depth_limit, har=har, screenshot=screenshot, header=None, + cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags) else: # TODO HEADERS # TODO USER AGENT - return create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None, + task_uuid = create_task(url, depth=depth_limit, har=har, screenshot=screenshot, header=None, cookiejar=cookiejar_uuid, proxy=proxy, user_agent=None, tags=tags, - parent='manual', priority=90), 200 + parent='manual', priority=90) + + return {'uuid': task_uuid}, 200 #### #### @@ -1702,13 +1805,13 @@ class CrawlerProxy: self.uuid = proxy_uuid def get_description(self): - return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'description') + return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'description') # Host # Port # Type -> need test def get_url(self): - return r_crawler.hgrt(f'crawler:proxy:{self.uuif}', 'url') + return r_crawler.hget(f'crawler:proxy:{self.uuid}', 'url') #### CRAWLER LACUS #### @@ -1770,7 +1873,11 @@ def ping_lacus(): ping = False req_error = {'error': 'Lacus URL undefined', 'status_code': 400} else: - ping = lacus.is_up + try: + ping = lacus.is_up + except: + req_error = {'error': 'Failed to connect Lacus URL', 'status_code': 400} + ping = False update_lacus_connection_status(ping, req_error=req_error) return ping @@ -1787,7 +1894,7 @@ def api_save_lacus_url_key(data): # unpack json manager_url = data.get('url', None) api_key = data.get('api_key', None) - if not manager_url: # or not api_key: + if not manager_url: # or not api_key: return {'status': 'error', 'reason': 'No url or API key supplied'}, 400 # check if is valid url try: @@ -1830,7 +1937,7 @@ def api_set_crawler_max_captures(data): save_nb_max_captures(nb_captures) return nb_captures, 200 - ## TEST ## +## TEST ## def is_test_ail_crawlers_successful(): return r_db.hget('crawler:tor:test', 'success') == 'True' @@ -1903,14 +2010,16 @@ def test_ail_crawlers(): # TODO MOVE ME IN CRAWLER OR FLASK load_blacklist() -# if __name__ == '__main__': -# delete_captures() - -# item_id = 'crawled/2023/02/20/data.gz' -# item = Item(item_id) -# content = item.get_content() -# temp_url = '' -# r = extract_favicon_from_html(content, temp_url) -# print(r) -# _reprocess_all_hars() +if __name__ == '__main__': + # delete_captures() + # item_id = 'crawled/2023/02/20/data.gz' + # item = Item(item_id) + # content = item.get_content() + # temp_url = '' + # r = extract_favicon_from_html(content, temp_url) + # print(r) + # _reprocess_all_hars_cookie_name() + # _reprocess_all_hars_etag() + # _gzip_all_hars() + _reprocess_all_hars_hhhashs() diff --git a/bin/lib/item_basic.py b/bin/lib/item_basic.py index fdfe1059..71fa5378 100755 --- a/bin/lib/item_basic.py +++ b/bin/lib/item_basic.py @@ -129,7 +129,7 @@ def get_item_url(item_id): def get_item_har(item_id): har = '/'.join(item_id.rsplit('/')[-4:]) - har = f'{har}.json' + har = f'{har}.json.gz' path = os.path.join(ConfigLoader.get_hars_dir(), har) if os.path.isfile(path): return har diff --git a/bin/lib/module_extractor.py b/bin/lib/module_extractor.py index d4ea6c78..b6254372 100755 --- a/bin/lib/module_extractor.py +++ b/bin/lib/module_extractor.py @@ -104,9 +104,13 @@ def _get_word_regex(word): def convert_byte_offset_to_string(b_content, offset): byte_chunk = b_content[:offset + 1] - string_chunk = byte_chunk.decode() - offset = len(string_chunk) - 1 - return offset + try: + string_chunk = byte_chunk.decode() + offset = len(string_chunk) - 1 + return offset + except UnicodeDecodeError as e: + logger.error(f'Yara offset converter error, {str(e)}\n{offset}/{len(b_content)}') + return convert_byte_offset_to_string(b_content, offset - 1) # TODO RETRO HUNTS diff --git a/bin/lib/objects/Chats.py b/bin/lib/objects/Chats.py new file mode 100755 index 00000000..bb27413d --- /dev/null +++ b/bin/lib/objects/Chats.py @@ -0,0 +1,309 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +from datetime import datetime + +from flask import url_for +# from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_core +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id +from lib.data_retention_engine import update_obj_date +from lib.objects import ail_objects +from lib.timeline_engine import Timeline + +from lib.correlations_engine import get_correlation_by_correl_type + +config_loader = ConfigLoader() +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +r_object = config_loader.get_db_conn("Kvrocks_Objects") +r_cache = config_loader.get_redis_conn("Redis_Cache") +config_loader = None + + +################################################################################ +################################################################################ +################################################################################ + +class Chat(AbstractSubtypeObject): # TODO # ID == username ????? + """ + AIL Chat Object. (strings) + """ + + def __init__(self, id, subtype): + super(Chat, self).__init__('chat', id, subtype) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}' + return url + + def get_svg_icon(self): # TODO + # if self.subtype == 'telegram': + # style = 'fab' + # icon = '\uf2c6' + # elif self.subtype == 'discord': + # style = 'fab' + # icon = '\uf099' + # else: + # style = 'fas' + # icon = '\uf007' + style = 'fas' + icon = '\uf086' + return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5} + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['subtype'] = self.subtype + meta['tags'] = self.get_tags(r_list=True) + return meta + + def get_misp_object(self): + # obj_attrs = [] + # if self.subtype == 'telegram': + # obj = MISPObject('telegram-account', standalone=True) + # obj_attrs.append(obj.add_attribute('username', value=self.id)) + # + # elif self.subtype == 'twitter': + # obj = MISPObject('twitter-account', standalone=True) + # obj_attrs.append(obj.add_attribute('name', value=self.id)) + # + # else: + # obj = MISPObject('user-account', standalone=True) + # obj_attrs.append(obj.add_attribute('username', value=self.id)) + # + # first_seen = self.get_first_seen() + # last_seen = self.get_last_seen() + # if first_seen: + # obj.first_seen = first_seen + # if last_seen: + # obj.last_seen = last_seen + # if not first_seen or not last_seen: + # self.logger.warning( + # f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + # + # for obj_attr in obj_attrs: + # for tag in self.get_tags(): + # obj_attr.add_tag(tag) + # return obj + return + + ############################################################################ + ############################################################################ + + # others optional metas, ... -> # TODO ALL meta in hset + + def get_name(self): # get username ???? + pass + + # users that send at least a message else participants/spectator + # correlation created by messages + def get_users(self): + users = set() + accounts = self.get_correlation('user-account').get('user-account', []) + for account in accounts: + users.add(account[1:]) + return users + + def _get_timeline_username(self): + return Timeline(self.get_global_id(), 'username') + + def get_username(self): + return self._get_timeline_username().get_last_obj_id() + + def get_usernames(self): + return self._get_timeline_username().get_objs_ids() + + def update_username_timeline(self, username_global_id, timestamp): + self._get_timeline_username().add_timestamp(timestamp, username_global_id) + + + # def get_last_message_id(self): + # + # return r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last:message:id') + + def get_obj_message_id(self, obj_id): + if obj_id.endswith('.gz'): + obj_id = obj_id[:-3] + return int(obj_id.split('_')[-1]) + + def _get_message_timestamp(self, obj_global_id): + return r_object.zscore(f'messages:{self.type}:{self.subtype}:{self.id}', obj_global_id) + + def _get_messages(self): + return r_object.zrange(f'messages:{self.type}:{self.subtype}:{self.id}', 0, -1, withscores=True) + + def get_message_meta(self, obj_global_id, parent=True, mess_datetime=None): + obj = ail_objects.get_obj_from_global_id(obj_global_id) + mess_dict = obj.get_meta(options={'content', 'link', 'parent', 'user-account'}) + if mess_dict.get('parent') and parent: + mess_dict['reply_to'] = self.get_message_meta(mess_dict['parent'], parent=False) + if mess_dict.get('user-account'): + user_account = ail_objects.get_obj_from_global_id(mess_dict['user-account']) + mess_dict['user-account'] = {} + mess_dict['user-account']['type'] = user_account.get_type() + mess_dict['user-account']['subtype'] = user_account.get_subtype(r_str=True) + mess_dict['user-account']['id'] = user_account.get_id() + username = user_account.get_username() + if username: + username = ail_objects.get_obj_from_global_id(username).get_default_meta(link=False) + mess_dict['user-account']['username'] = username # TODO get username at the given timestamp ??? + else: + mess_dict['user-account']['id'] = 'UNKNOWN' + + if not mess_datetime: + obj_mess_id = self._get_message_timestamp(obj_global_id) + mess_datetime = datetime.fromtimestamp(obj_mess_id) + mess_dict['date'] = mess_datetime.isoformat(' ') + mess_dict['hour'] = mess_datetime.strftime('%H:%M:%S') + return mess_dict + + + def get_messages(self, start=0, page=1, nb=500): # TODO limit nb returned, # TODO add replies + start = 0 + stop = -1 + # r_object.delete(f'messages:{self.type}:{self.subtype}:{self.id}') + + # TODO chat without username ???? -> chat ID ???? + + messages = {} + curr_date = None + for message in self._get_messages(): + date = datetime.fromtimestamp(message[1]) + date_day = date.strftime('%Y/%m/%d') + if date_day != curr_date: + messages[date_day] = [] + curr_date = date_day + mess_dict = self.get_message_meta(message[0], parent=True, mess_datetime=date) + messages[date_day].append(mess_dict) + return messages + + # Zset with ID ??? id -> item id ??? multiple id == media + text + # id -> media id + # How do we handle reply/thread ??? -> separate with new chats name/id ZSET ??? + # Handle media ??? + + # list of message id -> obj_id + # list of obj_id -> + # abuse parent children ??? + + # def add(self, timestamp, obj_id, mess_id=0, username=None, user_id=None): + # date = # TODO get date from object + # self.update_daterange(date) + # update_obj_date(date, self.type, self.subtype) + # + # + # # daily + # r_object.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1) + # # all subtypes + # r_object.zincrby(f'{self.type}_all:{self.subtype}', 1, self.id) + # + # ####################################################################### + # ####################################################################### + # + # # Correlations + # self.add_correlation('item', '', item_id) + # # domain + # if is_crawled(item_id): + # domain = get_item_domain(item_id) + # self.add_correlation('domain', '', domain) + + # TODO kvrocks exception if key don't exists + def get_obj_by_message_id(self, mess_id): + return r_object.hget(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id) + + # importer -> use cache for previous reply SET to_add_id: previously_imported : expire SET key -> 30 mn + def add_message(self, obj_global_id, timestamp, mess_id, reply_id=None): + r_object.hset(f'messages:ids:{self.type}:{self.subtype}:{self.id}', mess_id, obj_global_id) + r_object.zadd(f'messages:{self.type}:{self.subtype}:{self.id}', {obj_global_id: timestamp}) + + if reply_id: + reply_obj = self.get_obj_by_message_id(reply_id) + if reply_obj: + self.add_obj_children(reply_obj, obj_global_id) + else: + self.add_message_cached_reply(reply_id, mess_id) + + # ADD cached replies + for reply_obj in self.get_cached_message_reply(mess_id): + self.add_obj_children(obj_global_id, reply_obj) + + def _get_message_cached_reply(self, message_id): + return r_cache.smembers(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{message_id}') + + def get_cached_message_reply(self, message_id): + objs_global_id = [] + for mess_id in self._get_message_cached_reply(message_id): + obj_global_id = self.get_obj_by_message_id(mess_id) + if obj_global_id: + objs_global_id.append(obj_global_id) + return objs_global_id + + def add_message_cached_reply(self, reply_to_id, message_id): + r_cache.sadd(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', message_id) + r_cache.expire(f'messages:ids:{self.type}:{self.subtype}:{self.id}:{reply_to_id}', 600) + + # TODO nb replies = nb son ???? what if it create a onion item ??? -> need source filtering + + +# TODO factorize +def get_all_subtypes(): + return ail_core.get_object_all_subtypes('chat') + +def get_all(): + objs = {} + for subtype in get_all_subtypes(): + objs[subtype] = get_all_by_subtype(subtype) + return objs + +def get_all_by_subtype(subtype): + return get_all_id('chat', subtype) + +# # TODO FILTER NAME + Key + mail +# def sanitize_username_name_to_search(name_to_search, subtype): # TODO FILTER NAME +# +# return name_to_search +# +# def search_usernames_by_name(name_to_search, subtype, r_pos=False): +# usernames = {} +# # for subtype in subtypes: +# r_name = sanitize_username_name_to_search(name_to_search, subtype) +# if not name_to_search or isinstance(r_name, dict): +# # break +# return usernames +# r_name = re.compile(r_name) +# for user_name in get_all_usernames_by_subtype(subtype): +# res = re.search(r_name, user_name) +# if res: +# usernames[user_name] = {} +# if r_pos: +# usernames[user_name]['hl-start'] = res.start() +# usernames[user_name]['hl-end'] = res.end() +# return usernames + + +if __name__ == '__main__': + chat = Chat('test', 'telegram') + r = chat.get_messages() + print(r) diff --git a/bin/lib/objects/Decodeds.py b/bin/lib/objects/Decodeds.py index 001f7dfd..fb194be1 100755 --- a/bin/lib/objects/Decodeds.py +++ b/bin/lib/objects/Decodeds.py @@ -138,7 +138,7 @@ class Decoded(AbstractDaterangeObject): with open(filepath, 'rb') as f: content = f.read() return content - elif r_str == 'bytesio': + elif r_type == 'bytesio': with open(filepath, 'rb') as f: content = BytesIO(f.read()) return content @@ -149,7 +149,7 @@ class Decoded(AbstractDaterangeObject): with zipfile.ZipFile(zip_content, "w") as zf: # TODO: Fix password # zf.setpassword(b"infected") - zf.writestr(self.id, self.get_content().getvalue()) + zf.writestr(self.id, self.get_content(r_type='bytesio').getvalue()) zip_content.seek(0) return zip_content diff --git a/bin/lib/objects/Domains.py b/bin/lib/objects/Domains.py index 811ea6f7..dc216617 100755 --- a/bin/lib/objects/Domains.py +++ b/bin/lib/objects/Domains.py @@ -389,10 +389,10 @@ class Domain(AbstractObject): har = get_item_har(item_id) if har: print(har) - _write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json') + _write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz') # Screenshot screenshot = self._get_external_correlation('item', '', item_id, 'screenshot') - if screenshot: + if screenshot and screenshot['screenshot']: screenshot = screenshot['screenshot'].pop()[1:] screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8], screenshot[8:10], screenshot[10:12], screenshot[12:]) @@ -595,21 +595,22 @@ def get_domains_up_by_filers(domain_types, date_from=None, date_to=None, tags=[] return None def sanitize_domain_name_to_search(name_to_search, domain_type): + if not name_to_search: + return "" if domain_type == 'onion': r_name = r'[a-z0-9\.]+' else: r_name = r'[a-zA-Z0-9-_\.]+' # invalid domain name if not re.fullmatch(r_name, name_to_search): - res = re.match(r_name, name_to_search) - return {'search': name_to_search, 'error': res.string.replace( res[0], '')} + return "" return name_to_search.replace('.', '\.') def search_domain_by_name(name_to_search, domain_types, r_pos=False): domains = {} for domain_type in domain_types: r_name = sanitize_domain_name_to_search(name_to_search, domain_type) - if not name_to_search or isinstance(r_name, dict): + if not r_name: break r_name = re.compile(r_name) for domain in get_domains_up_by_type(domain_type): diff --git a/bin/lib/objects/Etags.py b/bin/lib/objects/Etags.py new file mode 100755 index 00000000..eb41f68c --- /dev/null +++ b/bin/lib/objects/Etags.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +from hashlib import sha256 +from flask import url_for + +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects + +config_loader = ConfigLoader() +r_objects = config_loader.get_db_conn("Kvrocks_Objects") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + +# TODO NEW ABSTRACT OBJECT -> daterange for all objects ???? + +class Etag(AbstractDaterangeObject): + """ + AIL Etag Object. + """ + + def __init__(self, obj_id): + super(Etag, self).__init__('etag', obj_id) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_content(self, r_type='str'): + if r_type == 'str': + return self._get_field('content') + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}' + return url + + # TODO # CHANGE COLOR + def get_svg_icon(self): + return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5} + + def get_misp_object(self): + obj_attrs = [] + obj = MISPObject('etag') + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if first_seen: + obj.first_seen = first_seen + if last_seen: + obj.last_seen = last_seen + if not first_seen or not last_seen: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + + obj_attrs.append(obj.add_attribute('etag', value=self.get_content())) + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + + def get_nb_seen(self): + return self.get_nb_correlation('domain') + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['tags'] = self.get_tags(r_list=True) + meta['content'] = self.get_content() + return meta + + def add(self, date, obj_id): # date = HAR Date + self._add(date, 'domain', '', obj_id) + + def create(self, content, _first_seen=None, _last_seen=None): + if not isinstance(content, str): + content = content.decode() + self._set_field('content', content) + self._create() + + +def create(content): + if isinstance(content, str): + content = content.encode() + obj_id = sha256(content).hexdigest() + etag = Etag(obj_id) + if not etag.exists(): + etag.create(content) + return etag + + +class Etags(AbstractDaterangeObjects): + """ + Etags Objects + """ + def __init__(self): + super().__init__('etag', Etag) + + def sanitize_id_to_search(self, name_to_search): + return name_to_search # TODO + + +# if __name__ == '__main__': +# name_to_search = '98' +# print(search_cves_by_name(name_to_search)) diff --git a/bin/lib/objects/HHHashs.py b/bin/lib/objects/HHHashs.py new file mode 100755 index 00000000..021ac451 --- /dev/null +++ b/bin/lib/objects/HHHashs.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import hashlib +import os +import sys + +from flask import url_for + +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects + +config_loader = ConfigLoader() +r_objects = config_loader.get_db_conn("Kvrocks_Objects") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + + +class HHHash(AbstractDaterangeObject): + """ + AIL HHHash Object. + """ + + def __init__(self, obj_id): + super(HHHash, self).__init__('hhhash', obj_id) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_content(self, r_type='str'): + if r_type == 'str': + return self._get_field('content') + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}' + return url + + # TODO # CHANGE COLOR + def get_svg_icon(self): + return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5} + + def get_misp_object(self): + obj_attrs = [] + obj = MISPObject('hhhash') + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if first_seen: + obj.first_seen = first_seen + if last_seen: + obj.last_seen = last_seen + if not first_seen or not last_seen: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + + obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id())) + obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content())) + obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus')) + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + + def get_nb_seen(self): + return self.get_nb_correlation('domain') + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['tags'] = self.get_tags(r_list=True) + meta['content'] = self.get_content() + return meta + + def add(self, date, obj_id): # date = HAR Date + self._add(date, 'domain', '', obj_id) + + def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set + self._set_field('content', hhhash_header) + self._create() + + +def create(hhhash_header, hhhash=None): + if not hhhash: + hhhash = hhhash_headers(hhhash_header) + hhhash = HHHash(hhhash) + if not hhhash.exists(): + hhhash.create(hhhash_header) + return hhhash + +def build_hhhash_headers(dict_headers): # filter_dup=True + hhhash = '' + previous_header = '' + for header in dict_headers: + header_name = header.get('name') + if header_name: + if header_name != previous_header: # remove dup headers, filter playwright invalid splitting + hhhash = f'{hhhash}:{header_name}' + previous_header = header_name + hhhash = hhhash[1:] + # print(hhhash) + return hhhash + +def hhhash_headers(header_hhhash): + m = hashlib.sha256() + m.update(header_hhhash.encode()) + digest = m.hexdigest() + return f"hhh:1:{digest}" + + +class HHHashs(AbstractDaterangeObjects): + """ + HHHashs Objects + """ + def __init__(self): + super().__init__('hhhash', HHHash) + + def sanitize_id_to_search(self, name_to_search): + return name_to_search # TODO + + +# if __name__ == '__main__': +# name_to_search = '98' +# print(search_cves_by_name(name_to_search)) diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py index 2e35497e..c2edbb40 100755 --- a/bin/lib/objects/Items.py +++ b/bin/lib/objects/Items.py @@ -264,10 +264,9 @@ class Item(AbstractObject): """ if options is None: options = set() - meta = {'id': self.id, - 'date': self.get_date(separator=True), - 'source': self.get_source(), - 'tags': self.get_tags(r_list=True)} + meta = self.get_default_meta(tags=True) + meta['date'] = self.get_date(separator=True) + meta['source'] = self.get_source() # optional meta fields if 'content' in options: meta['content'] = self.get_content() @@ -289,6 +288,8 @@ class Item(AbstractObject): meta['mimetype'] = self.get_mimetype(content=content) if 'investigations' in options: meta['investigations'] = self.get_investigations() + if 'link' in options: + meta['link'] = self.get_link(flask_context=True) # meta['encoding'] = None return meta diff --git a/bin/lib/objects/Messages.py b/bin/lib/objects/Messages.py new file mode 100755 index 00000000..b724f854 --- /dev/null +++ b/bin/lib/objects/Messages.py @@ -0,0 +1,275 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import re +import sys +import cld3 +import html2text + +from datetime import datetime + +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ail_core import get_ail_uuid +from lib.objects.abstract_object import AbstractObject +from lib.ConfigLoader import ConfigLoader +from lib.data_retention_engine import update_obj_date, get_obj_date_first +# TODO Set all messages ??? + + +from flask import url_for + +config_loader = ConfigLoader() +r_cache = config_loader.get_redis_conn("Redis_Cache") +r_object = config_loader.get_db_conn("Kvrocks_Objects") +# r_content = config_loader.get_db_conn("Kvrocks_Content") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + + +# TODO SAVE OR EXTRACT MESSAGE SOURCE FOR ICON ????????? +# TODO iterate on all objects +# TODO also add support for small objects ???? + +# CAN Message exists without CHAT -> no convert it to object + +# ID: source:chat_id:message_id ???? +# +# /!\ handle null chat and message id -> chat = uuid and message = timestamp ??? + + +class Message(AbstractObject): + """ + AIL Message Object. (strings) + """ + + def __init__(self, id): # TODO subtype or use source ???? + super(Message, self).__init__('message', id) # message::< telegram/1692189934.380827/ChatID_MessageID > + + def exists(self): + if self.subtype is None: + return r_object.exists(f'meta:{self.type}:{self.id}') + else: + return r_object.exists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}') + + def get_source(self): + """ + Returns source/feeder name + """ + l_source = self.id.split('/')[:-2] + return os.path.join(*l_source) + + def get_basename(self): + return os.path.basename(self.id) + + def get_content(self, r_type='str'): # TODO ADD cache # TODO Compress content ??????? + """ + Returns content + """ + content = self._get_field('content') + if r_type == 'str': + return content + elif r_type == 'bytes': + return content.encode() + + def get_date(self): + timestamp = self.get_timestamp() + return datetime.fromtimestamp(float(timestamp)).strftime('%Y%m%d') + + def get_timestamp(self): + dirs = self.id.split('/') + return dirs[-2] + + def get_message_id(self): # TODO optimize + message_id = self.get_basename().rsplit('_', 1)[1] + # if message_id.endswith('.gz'): + # message_id = message_id[:-3] + return message_id + + def get_chat_id(self): # TODO optimize -> use me to tag Chat + chat_id = self.get_basename().rsplit('_', 1)[0] + # if chat_id.endswith('.gz'): + # chat_id = chat_id[:-3] + return chat_id + + def get_user_account(self): + user_account = self.get_correlation('user-account') + if user_account.get('user-account'): + return f'user-account:{user_account["user-account"].pop()}' + + # Update value on import + # reply to -> parent ? + # reply/comment - > children ? + # nb views + # reactions + # nb fowards + # room ??? + # message from channel ??? + # message media + + def get_translation(self): # TODO support multiple translated languages ????? + """ + Returns translated content + """ + return self._get_field('translated') # TODO multiples translation ... -> use set + + def _set_translation(self, translation): + """ + Set translated content + """ + return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time + + def get_html2text_content(self, content=None, ignore_links=False): + if not content: + content = self.get_content() + h = html2text.HTML2Text() + h.ignore_links = ignore_links + h.ignore_images = ignore_links + return h.handle(content) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True)} + # return payload + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}' + return url + + def get_svg_icon(self): + return {'style': 'fas', 'icon': '\uf4ad', 'color': '#4dffff', 'radius': 5} + + def get_misp_object(self): # TODO + obj = MISPObject('instant-message', standalone=True) + obj_date = self.get_date() + if obj_date: + obj.first_seen = obj_date + else: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}') + + # obj_attrs = [obj.add_attribute('first-seen', value=obj_date), + # obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()), + # obj.add_attribute('sensor', value=get_ail_uuid())] + obj_attrs = [] + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + + # def get_url(self): + # return r_object.hget(f'meta:item::{self.id}', 'url') + + # options: set of optional meta fields + def get_meta(self, options=None): + """ + :type options: set + """ + if options is None: + options = set() + meta = self.get_default_meta(tags=True) + meta['date'] = self.get_date() # TODO replace me by timestamp ?????? + meta['source'] = self.get_source() + # optional meta fields + if 'content' in options: + meta['content'] = self.get_content() + if 'parent' in options: + meta['parent'] = self.get_parent() + if 'investigations' in options: + meta['investigations'] = self.get_investigations() + if 'link' in options: + meta['link'] = self.get_link(flask_context=True) + if 'user-account' in options: + meta['user-account'] = self.get_user_account() + + # meta['encoding'] = None + return meta + + def _languages_cleaner(self, content=None): + if not content: + content = self.get_content() + # REMOVE URLS + regex = r'\b(?:http://|https://)?(?:[a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*\b' + url_regex = re.compile(regex) + urls = url_regex.findall(content) + urls = sorted(urls, key=len, reverse=True) + for url in urls: + content = content.replace(url, '') + # REMOVE PGP Blocks + regex_pgp_public_blocs = r'-----BEGIN PGP PUBLIC KEY BLOCK-----[\s\S]+?-----END PGP PUBLIC KEY BLOCK-----' + regex_pgp_signature = r'-----BEGIN PGP SIGNATURE-----[\s\S]+?-----END PGP SIGNATURE-----' + regex_pgp_message = r'-----BEGIN PGP MESSAGE-----[\s\S]+?-----END PGP MESSAGE-----' + re.compile(regex_pgp_public_blocs) + re.compile(regex_pgp_signature) + re.compile(regex_pgp_message) + res = re.findall(regex_pgp_public_blocs, content) + for it in res: + content = content.replace(it, '') + res = re.findall(regex_pgp_signature, content) + for it in res: + content = content.replace(it, '') + res = re.findall(regex_pgp_message, content) + for it in res: + content = content.replace(it, '') + return content + + def detect_languages(self, min_len=600, num_langs=3, min_proportion=0.2, min_probability=0.7): + languages = [] + ## CLEAN CONTENT ## + content = self.get_html2text_content(ignore_links=True) + content = self._languages_cleaner(content=content) + # REMOVE USELESS SPACE + content = ' '.join(content.split()) + # - CLEAN CONTENT - # + if len(content) >= min_len: + for lang in cld3.get_frequent_languages(content, num_langs=num_langs): + if lang.proportion >= min_proportion and lang.probability >= min_probability and lang.is_reliable: + languages.append(lang) + return languages + + # def translate(self, content=None): # TODO translation plugin + # # TODO get text language + # if not content: + # content = self.get_content() + # translated = argostranslate.translate.translate(content, 'ru', 'en') + # # Save translation + # self._set_translation(translated) + # return translated + + def create(self, content, translation, tags): + self._set_field('content', content) + # r_content.get(f'content:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', content) + if translation: + self._set_translation(translation) + for tag in tags: + self.add_tag(tag) + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + pass + +def create_obj_id(source, chat_id, message_id, timestamp): + return f'{source}/{timestamp}/{chat_id}_{message_id}' + +# TODO Check if already exists +# def create(source, chat_id, message_id, timestamp, content, tags=[]): +def create(obj_id, content, translation=None, tags=[]): + message = Message(obj_id) + if not message.exists(): + message.create(content, translation, tags) + return message + + +# TODO Encode translation + + +if __name__ == '__main__': + r = 'test' + print(r) diff --git a/bin/lib/objects/Screenshots.py b/bin/lib/objects/Screenshots.py index 19ae3754..26f8543f 100755 --- a/bin/lib/objects/Screenshots.py +++ b/bin/lib/objects/Screenshots.py @@ -88,7 +88,7 @@ class Screenshot(AbstractObject): return obj def get_meta(self, options=set()): - meta = {'id': self.id} + meta = self.get_default_meta() meta['img'] = get_screenshot_rel_path(self.id) ######### # TODO: Rename ME ?????? meta['tags'] = self.get_tags(r_list=True) if 'tags_safe' in options: diff --git a/bin/lib/objects/UsersAccount.py b/bin/lib/objects/UsersAccount.py new file mode 100755 index 00000000..5bc94a9c --- /dev/null +++ b/bin/lib/objects/UsersAccount.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +# import re + +from flask import url_for +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_core +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id +from lib.timeline_engine import Timeline + +config_loader = ConfigLoader() +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + + +################################################################################ +################################################################################ +################################################################################ + +class UserAccount(AbstractSubtypeObject): + """ + AIL User Object. (strings) + """ + + def __init__(self, id, subtype): + super(UserAccount, self).__init__('user-account', id, subtype) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, subtype=self.subtype, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&subtype={self.subtype}&id={self.id}' + return url + + def get_svg_icon(self): # TODO change icon/color + if self.subtype == 'telegram': + style = 'fab' + icon = '\uf2c6' + elif self.subtype == 'twitter': + style = 'fab' + icon = '\uf099' + else: + style = 'fas' + icon = '\uf007' + return {'style': style, 'icon': icon, 'color': '#4dffff', 'radius': 5} + + def get_first_name(self): + return self._get_field('firstname') + + def get_last_name(self): + return self._get_field('lastname') + + def get_phone(self): + return self._get_field('phone') + + def set_first_name(self, firstname): + return self._set_field('firstname', firstname) + + def set_last_name(self, lastname): + return self._set_field('lastname', lastname) + + def set_phone(self, phone): + return self._set_field('phone', phone) + + def _get_timeline_username(self): + return Timeline(self.get_global_id(), 'username') + + def get_username(self): + return self._get_timeline_username().get_last_obj_id() + + def get_usernames(self): + return self._get_timeline_username().get_objs_ids() + + def update_username_timeline(self, username_global_id, timestamp): + self._get_timeline_username().add_timestamp(timestamp, username_global_id) + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['subtype'] = self.subtype + meta['tags'] = self.get_tags(r_list=True) + if 'username' in options: + meta['username'] = self.get_username() + if 'usernames' in options: + meta['usernames'] = self.get_usernames() + return meta + + def get_misp_object(self): + obj_attrs = [] + if self.subtype == 'telegram': + obj = MISPObject('telegram-account', standalone=True) + obj_attrs.append(obj.add_attribute('username', value=self.id)) + + elif self.subtype == 'twitter': + obj = MISPObject('twitter-account', standalone=True) + obj_attrs.append(obj.add_attribute('name', value=self.id)) + + else: + obj = MISPObject('user-account', standalone=True) + obj_attrs.append(obj.add_attribute('username', value=self.id)) + + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if first_seen: + obj.first_seen = first_seen + if last_seen: + obj.last_seen = last_seen + if not first_seen or not last_seen: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + +def get_user_by_username(): + pass + +def get_all_subtypes(): + return ail_core.get_object_all_subtypes('user-account') + +def get_all(): + users = {} + for subtype in get_all_subtypes(): + users[subtype] = get_all_by_subtype(subtype) + return users + +def get_all_by_subtype(subtype): + return get_all_id('user-account', subtype) + + +# if __name__ == '__main__': +# name_to_search = 'co' +# subtype = 'telegram' +# print(search_usernames_by_name(name_to_search, subtype)) diff --git a/bin/lib/objects/abstract_daterange_object.py b/bin/lib/objects/abstract_daterange_object.py index b96c5ec4..98aa49c2 100755 --- a/bin/lib/objects/abstract_daterange_object.py +++ b/bin/lib/objects/abstract_daterange_object.py @@ -45,10 +45,10 @@ class AbstractDaterangeObject(AbstractObject, ABC): def exists(self): return r_object.exists(f'meta:{self.type}:{self.id}') - def _get_field(self, field): + def _get_field(self, field): # TODO remove me (NEW in abstract) return r_object.hget(f'meta:{self.type}:{self.id}', field) - def _set_field(self, field, value): + def _set_field(self, field, value): # TODO remove me (NEW in abstract) return r_object.hset(f'meta:{self.type}:{self.id}', field, value) def get_first_seen(self, r_int=False): @@ -82,9 +82,10 @@ class AbstractDaterangeObject(AbstractObject, ABC): return int(nb) def _get_meta(self, options=[]): - meta_dict = {'first_seen': self.get_first_seen(), - 'last_seen': self.get_last_seen(), - 'nb_seen': self.get_nb_seen()} + meta_dict = self.get_default_meta() + meta_dict['first_seen'] = self.get_first_seen() + meta_dict['last_seen'] = self.get_last_seen() + meta_dict['nb_seen'] = self.get_nb_seen() if 'sparkline' in options: meta_dict['sparkline'] = self.get_sparkline() return meta_dict diff --git a/bin/lib/objects/abstract_object.py b/bin/lib/objects/abstract_object.py index cb7595ad..a3f25216 100755 --- a/bin/lib/objects/abstract_object.py +++ b/bin/lib/objects/abstract_object.py @@ -20,6 +20,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from lib import ail_logger from lib import Tag +from lib.ConfigLoader import ConfigLoader from lib import Duplicate from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations @@ -27,6 +28,11 @@ from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers logging.config.dictConfig(ail_logger.get_config(name='ail')) +config_loader = ConfigLoader() +# r_cache = config_loader.get_redis_conn("Redis_Cache") +r_object = config_loader.get_db_conn("Kvrocks_Objects") +config_loader = None + class AbstractObject(ABC): """ Abstract Object @@ -59,14 +65,28 @@ class AbstractObject(ABC): def get_global_id(self): return f'{self.get_type()}:{self.get_subtype(r_str=True)}:{self.get_id()}' - def get_default_meta(self, tags=False): + def get_default_meta(self, tags=False, link=False): dict_meta = {'id': self.get_id(), 'type': self.get_type(), - 'subtype': self.get_subtype()} + 'subtype': self.get_subtype(r_str=True)} if tags: dict_meta['tags'] = self.get_tags() + if link: + dict_meta['link'] = self.get_link() return dict_meta + def _get_field(self, field): + if self.subtype is None: + return r_object.hget(f'meta:{self.type}:{self.id}', field) + else: + return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field) + + def _set_field(self, field, value): + if self.subtype is None: + return r_object.hset(f'meta:{self.type}:{self.id}', field, value) + else: + return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value) + ## Tags ## def get_tags(self, r_list=False): tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True)) @@ -198,6 +218,8 @@ class AbstractObject(ABC): else: return [] + ## Correlation ## + def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type): """ Get object correlation @@ -253,3 +275,39 @@ class AbstractObject(ABC): Get object correlations """ delete_obj_correlation(self.type, self.subtype, self.id, type2, subtype2, id2) + + ## -Correlation- ## + + ## Parent ## + + def is_parent(self): + return r_object.exists(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}') + + def is_children(self): + return r_object.hexists(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent') + + def get_parent(self): + return r_object.hget(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent') + + def get_children(self): + return r_object.smembers(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}') + + def set_parent(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ###################### + if not obj_global_id: + if obj_subtype is None: + obj_subtype = '' + obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' + r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', 'parent', obj_global_id) + + def add_children(self, obj_type=None, obj_subtype=None, obj_id=None, obj_global_id=None): # TODO ###################### + if not obj_global_id: + if obj_subtype is None: + obj_subtype = '' + obj_global_id = f'{obj_type}:{obj_subtype}:{obj_id}' + r_object.sadd(f'child:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', obj_global_id) + + def add_obj_children(self, parent_global_id, son_global_id): + r_object.sadd(f'child:{parent_global_id}', son_global_id) + r_object.hset(f'meta:{son_global_id}', 'parent', parent_global_id) + + ## Parent ## diff --git a/bin/lib/objects/abstract_subtype_object.py b/bin/lib/objects/abstract_subtype_object.py index 82bb85f6..007f716b 100755 --- a/bin/lib/objects/abstract_subtype_object.py +++ b/bin/lib/objects/abstract_subtype_object.py @@ -151,7 +151,7 @@ class AbstractSubtypeObject(AbstractObject, ABC): # # - def add(self, date, item_id): + def add(self, date, obj=None): self.update_daterange(date) update_obj_date(date, self.type, self.subtype) # daily @@ -162,20 +162,22 @@ class AbstractSubtypeObject(AbstractObject, ABC): ####################################################################### ####################################################################### - # Correlations - self.add_correlation('item', '', item_id) - # domain - if is_crawled(item_id): - domain = get_item_domain(item_id) - self.add_correlation('domain', '', domain) + if obj: + # Correlations + self.add_correlation(obj.type, obj.get_subtype(r_str=True), obj.get_id()) + if obj.type == 'item': # TODO same for message->chat ??? + item_id = obj.get_id() + # domain + if is_crawled(item_id): + domain = get_item_domain(item_id) + self.add_correlation('domain', '', domain) # TODO:ADD objects + Stats def create(self, first_seen, last_seen): self.set_first_seen(first_seen) self.set_last_seen(last_seen) - def _delete(self): pass diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index 01990996..89be336f 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -13,16 +13,21 @@ from lib import correlations_engine from lib import btc_ail from lib import Tag +from lib.objects import Chats from lib.objects import CryptoCurrencies from lib.objects import CookiesNames from lib.objects.Cves import Cve from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects from lib.objects.Domains import Domain +from lib.objects import Etags from lib.objects.Favicons import Favicon +from lib.objects import HHHashs from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects +from lib.objects.Messages import Message from lib.objects import Pgps from lib.objects.Screenshots import Screenshot from lib.objects import Titles +from lib.objects.UsersAccount import UserAccount from lib.objects import Usernames config_loader = ConfigLoader() @@ -53,12 +58,20 @@ def get_object(obj_type, subtype, obj_id): return Domain(obj_id) elif obj_type == 'decoded': return Decoded(obj_id) + elif obj_type == 'chat': + return Chats.Chat(obj_id, subtype) elif obj_type == 'cookie-name': return CookiesNames.CookieName(obj_id) elif obj_type == 'cve': return Cve(obj_id) + elif obj_type == 'etag': + return Etags.Etag(obj_id) elif obj_type == 'favicon': return Favicon(obj_id) + elif obj_type == 'hhhash': + return HHHashs.HHHash(obj_id) + elif obj_type == 'message': + return Message(obj_id) elif obj_type == 'screenshot': return Screenshot(obj_id) elif obj_type == 'cryptocurrency': @@ -67,6 +80,8 @@ def get_object(obj_type, subtype, obj_id): return Pgps.Pgp(obj_id, subtype) elif obj_type == 'title': return Titles.Title(obj_id) + elif obj_type == 'user-account': + return UserAccount(obj_id, subtype) elif obj_type == 'username': return Usernames.Username(obj_id, subtype) @@ -101,9 +116,12 @@ def get_obj_global_id(obj_type, subtype, obj_id): obj = get_object(obj_type, subtype, obj_id) return obj.get_global_id() +def get_obj_type_subtype_id_from_global_id(global_id): + obj_type, subtype, obj_id = global_id.split(':', 2) + return obj_type, subtype, obj_id def get_obj_from_global_id(global_id): - obj = global_id.split(':', 3) + obj = get_obj_type_subtype_id_from_global_id(global_id) return get_object(obj[0], obj[1], obj[2]) @@ -159,7 +177,7 @@ def get_objects_meta(objs, options=set(), flask_context=False): subtype = obj[1] obj_id = obj[2] else: - obj_type, subtype, obj_id = obj.split(':', 2) + obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj) metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context)) return metas @@ -168,7 +186,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False): obj = get_object(obj_type, subtype, id) meta = obj.get_meta() meta['icon'] = obj.get_svg_icon() - if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon': + if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash': meta['sparkline'] = obj.get_sparkline() if obj_type == 'cve': meta['cve_search'] = obj.get_cve_search() @@ -177,6 +195,8 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False): if subtype == 'bitcoin' and related_btc: meta["related_btc"] = btc_ail.get_bitcoin_info(obj.id) if obj.get_type() == 'decoded': + meta['mimetype'] = obj.get_mimetype() + meta['size'] = obj.get_size() meta["vt"] = obj.get_meta_vt() meta["vt"]["status"] = obj.is_vt_enabled() # TAGS MODAL @@ -333,8 +353,8 @@ def get_obj_correlations(obj_type, subtype, obj_id): obj = get_object(obj_type, subtype, obj_id) return obj.get_correlations() -def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max): - if len(objs) < nb_max or nb_max == -1: +def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden): + if len(objs) < nb_max or nb_max == 0: if lvl == 0: objs.add((obj_type, subtype, obj_id)) @@ -346,15 +366,17 @@ def _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lv for obj2_type in correlations: for str_obj in correlations[obj2_type]: obj2_subtype, obj2_id = str_obj.split(':', 1) - _get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max) + if get_obj_global_id(obj2_type, obj2_subtype, obj2_id) in objs_hidden: + continue # filter object to hide + _get_obj_correlations_objs(objs, obj2_type, obj2_subtype, obj2_id, filter_types, lvl, nb_max, objs_hidden) -def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300): +def get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()): objs = set() - _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max) + _get_obj_correlations_objs(objs, obj_type, subtype, obj_id, filter_types, lvl, nb_max, objs_hidden) return objs -def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300): - objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max) +def obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=[], lvl=0, nb_max=300, objs_hidden=set()): + objs = get_obj_correlations_objs(obj_type, subtype, obj_id, filter_types=filter_types, lvl=lvl, nb_max=nb_max, objs_hidden=objs_hidden) # print(objs) for obj_tuple in objs: obj1_type, subtype1, id1 = obj_tuple @@ -395,7 +417,7 @@ def create_correlation_graph_links(links_set): def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True): graph_nodes_list = [] for node_id in nodes_set: - obj_type, subtype, obj_id = node_id.split(':', 2) + obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id) dict_node = {'id': node_id} dict_node['style'] = get_object_svg(obj_type, subtype, obj_id) @@ -416,10 +438,12 @@ def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True): def get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=[], max_nodes=300, level=1, + objs_hidden=set(), flask_context=False): obj_str_id, nodes, links, meta = correlations_engine.get_correlations_graph_nodes_links(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, + objs_hidden=objs_hidden, flask_context=flask_context) # print(meta) meta['objs'] = list(meta['objs']) diff --git a/bin/lib/timeline_engine.py b/bin/lib/timeline_engine.py new file mode 100755 index 00000000..58c222f6 --- /dev/null +++ b/bin/lib/timeline_engine.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +from uuid import uuid4 + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +config_loader = ConfigLoader() +r_meta = config_loader.get_db_conn("Kvrocks_Timeline") +config_loader = None + +# CORRELATION_TYPES_BY_OBJ = { +# "chat": ["item", "username"], # item ??? +# "cookie-name": ["domain"], +# "cryptocurrency": ["domain", "item"], +# "cve": ["domain", "item"], +# "decoded": ["domain", "item"], +# "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"], +# "etag": ["domain"], +# "favicon": ["domain", "item"], +# "hhhash": ["domain"], +# "item": ["chat", "cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], +# "pgp": ["domain", "item"], +# "screenshot": ["domain", "item"], +# "title": ["domain", "item"], +# "username": ["chat", "domain", "item"], +# } +# +# def get_obj_correl_types(obj_type): +# return CORRELATION_TYPES_BY_OBJ.get(obj_type) + +# def sanityze_obj_correl_types(obj_type, correl_types): +# obj_correl_types = get_obj_correl_types(obj_type) +# if correl_types: +# correl_types = set(correl_types).intersection(obj_correl_types) +# if not correl_types: +# correl_types = obj_correl_types +# if not correl_types: +# return [] +# return correl_types + +class Timeline: + + def __init__(self, global_id, name): + self.id = global_id + self.name = name + + def _get_block_obj_global_id(self, block): + return r_meta.hget(f'block:{self.id}:{self.name}', block) + + def _set_block_obj_global_id(self, block, global_id): + return r_meta.hset(f'block:{self.id}:{self.name}', block, global_id) + + def _get_block_timestamp(self, block, position): + return r_meta.zscore(f'line:{self.id}:{self.name}', f'{position}:{block}') + + def _get_nearest_bloc_inf(self, timestamp): + inf = r_meta.zrevrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), 0, start=0, num=1, withscores=True) + if inf: + inf, score = inf[0] + if inf.startswith('end'): + inf_key = f'start:{inf[4:]}' + inf_score = r_meta.zscore(f'line:{self.id}:{self.name}', inf_key) + if inf_score == score: + inf = inf_key + return inf + else: + return None + + def _get_nearest_bloc_sup(self, timestamp): + sup = r_meta.zrangebyscore(f'line:{self.id}:{self.name}', float(timestamp), '+inf', start=0, num=1, withscores=True) + if sup: + sup, score = sup[0] + if sup.startswith('start'): + sup_key = f'end:{sup[6:]}' + sup_score = r_meta.zscore(f'line:{self.id}:{self.name}', sup_key) + if score == sup_score: + sup = sup_key + return sup + else: + return None + + def get_first_obj_id(self): + first = r_meta.zrange(f'line:{self.id}:{self.name}', 0, 0) + if first: # start:block + first = first[0] + if first.startswith('start:'): + first = first[6:] + else: + first = first[4:] + return self._get_block_obj_global_id(first) + + def get_last_obj_id(self): + last = r_meta.zrevrange(f'line:{self.id}:{self.name}', 0, 0) + if last: # end:block + last = last[0] + if last.startswith('end:'): + last = last[4:] + else: + last = last[6:] + return self._get_block_obj_global_id(last) + + def get_objs_ids(self): + objs = set() + for block in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1): + if block: + if block.startswith('start:'): + objs.add(self._get_block_obj_global_id(block[6:])) + return objs + + # def get_objs_ids(self): + # objs = {} + # last_obj_id = None + # for block, timestamp in r_meta.zrange(f'line:{self.id}:{self.name}', 0, -1, withscores=True): + # if block: + # if block.startswith('start:'): + # last_obj_id = self._get_block_obj_global_id(block[6:]) + # objs[last_obj_id] = {'first_seen': timestamp} + # else: + # objs[last_obj_id]['last_seen'] = timestamp + # return objs + + def _update_bloc(self, block, position, timestamp): + r_meta.zadd(f'line:{self.id}:{self.name}', {f'{position}:{block}': timestamp}) + + def _add_bloc(self, obj_global_id, timestamp, end=None): + if end: + timestamp_end = end + else: + timestamp_end = timestamp + new_bloc = str(uuid4()) + r_meta.zadd(f'line:{self.id}:{self.name}', {f'start:{new_bloc}': timestamp, f'end:{new_bloc}': timestamp_end}) + self._set_block_obj_global_id(new_bloc, obj_global_id) + return new_bloc + + def add_timestamp(self, timestamp, obj_global_id): + inf = self._get_nearest_bloc_inf(timestamp) + sup = self._get_nearest_bloc_sup(timestamp) + if not inf and not sup: + # create new bloc + new_bloc = self._add_bloc(obj_global_id, timestamp) + return new_bloc + # timestamp < first_seen + elif not inf: + sup_pos, sup_id = sup.split(':') + sup_obj = self._get_block_obj_global_id(sup_id) + if sup_obj == obj_global_id: + self._update_bloc(sup_id, 'start', timestamp) + # create new bloc + else: + new_bloc = self._add_bloc(obj_global_id, timestamp) + return new_bloc + + # timestamp > first_seen + elif not sup: + inf_pos, inf_id = inf.split(':') + inf_obj = self._get_block_obj_global_id(inf_id) + if inf_obj == obj_global_id: + self._update_bloc(inf_id, 'end', timestamp) + # create new bloc + else: + new_bloc = self._add_bloc(obj_global_id, timestamp) + return new_bloc + + else: + inf_pos, inf_id = inf.split(':') + sup_pos, sup_id = sup.split(':') + inf_obj = self._get_block_obj_global_id(inf_id) + + if inf_id == sup_id: + # reduce bloc + create two new bloc + if obj_global_id != inf_obj: + # get end timestamp + sup_timestamp = self._get_block_timestamp(sup_id, 'end') + # reduce original bloc + self._update_bloc(inf_id, 'end', timestamp - 1) + # Insert new bloc + new_bloc = self._add_bloc(obj_global_id, timestamp) + # Recreate end of the first bloc by a new bloc + self._add_bloc(inf_obj, timestamp + 1, end=sup_timestamp) + return new_bloc + + # timestamp in existing bloc + else: + return inf_id + + # different blocs: expend sup/inf bloc or create a new bloc if + elif inf_pos == 'end' and sup_pos == 'start': + # Extend inf bloc + if obj_global_id == inf_obj: + self._update_bloc(inf_id, 'end', timestamp) + return inf_id + + sup_obj = self._get_block_obj_global_id(sup_id) + # Extend sup bloc + if obj_global_id == sup_obj: + self._update_bloc(sup_id, 'start', timestamp) + return sup_id + + # create new bloc + new_bloc = self._add_bloc(obj_global_id, timestamp) + return new_bloc + + # inf_pos == 'start' and sup_pos == 'end' + # else raise error ??? diff --git a/bin/modules/Cryptocurrencies.py b/bin/modules/Cryptocurrencies.py index fd5c5402..5a83689f 100755 --- a/bin/modules/Cryptocurrencies.py +++ b/bin/modules/Cryptocurrencies.py @@ -130,7 +130,7 @@ class Cryptocurrencies(AbstractModule, ABC): if crypto.is_valid_address(): # print(address) is_valid_address = True - crypto.add(date, item_id) + crypto.add(date, item) # Check private key if is_valid_address: diff --git a/bin/modules/Mixer.py b/bin/modules/Mixer.py index b8f2bedf..62c427e3 100755 --- a/bin/modules/Mixer.py +++ b/bin/modules/Mixer.py @@ -131,7 +131,7 @@ class Mixer(AbstractModule): self.last_refresh = time.time() self.clear_feeders_stat() - time.sleep(0.5) + time.sleep(0.5) def computeNone(self): self.refresh_stats() diff --git a/bin/modules/Onion.py b/bin/modules/Onion.py index 2066e9a3..681bae0c 100755 --- a/bin/modules/Onion.py +++ b/bin/modules/Onion.py @@ -42,7 +42,8 @@ class Onion(AbstractModule): self.faup = crawlers.get_faup() # activate_crawler = p.config.get("Crawler", "activate_crawler") - + self.har = config_loader.get_config_boolean('Crawler', 'default_har') + self.screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot') self.onion_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.onion)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" # self.i2p_regex = r"((http|https|ftp)?(?:\://)?([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.i2p)(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" @@ -90,8 +91,9 @@ class Onion(AbstractModule): if onion_urls: if crawlers.is_crawler_activated(): - for domain in domains: # TODO LOAD DEFAULT SCREENSHOT + HAR - task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0) + for domain in domains: + task_uuid = crawlers.create_task(domain, parent=item.get_id(), priority=0, + har=self.har, screenshot=self.screenshot) if task_uuid: print(f'{domain} added to crawler queue: {task_uuid}') else: diff --git a/bin/modules/PgpDump.py b/bin/modules/PgpDump.py index 1e8a27a7..82ec9f32 100755 --- a/bin/modules/PgpDump.py +++ b/bin/modules/PgpDump.py @@ -210,18 +210,18 @@ class PgpDump(AbstractModule): date = item.get_date() for key in self.keys: pgp = Pgps.Pgp(key, 'key') - pgp.add(date, self.item_id) + pgp.add(date, item) print(f' key: {key}') for name in self.names: pgp = Pgps.Pgp(name, 'name') - pgp.add(date, self.item_id) + pgp.add(date, item) print(f' name: {name}') self.tracker_term.compute(name, obj_type='pgp', subtype='name') self.tracker_regex.compute(name, obj_type='pgp', subtype='name') self.tracker_yara.compute(name, obj_type='pgp', subtype='name') for mail in self.mails: pgp = Pgps.Pgp(mail, 'mail') - pgp.add(date, self.item_id) + pgp.add(date, item) print(f' mail: {mail}') self.tracker_term.compute(mail, obj_type='pgp', subtype='mail') self.tracker_regex.compute(mail, obj_type='pgp', subtype='mail') diff --git a/bin/modules/Telegram.py b/bin/modules/Telegram.py index 273d20b9..140948c2 100755 --- a/bin/modules/Telegram.py +++ b/bin/modules/Telegram.py @@ -58,7 +58,7 @@ class Telegram(AbstractModule): user_id = dict_url.get('username') if user_id: username = Username(user_id, 'telegram') - username.add(item_date, item.id) + username.add(item_date, item) print(f'username: {user_id}') invite_hash = dict_url.get('invite_hash') if invite_hash: @@ -73,7 +73,7 @@ class Telegram(AbstractModule): user_id = dict_url.get('username') if user_id: username = Username(user_id, 'telegram') - username.add(item_date, item.id) + username.add(item_date, item) print(f'username: {user_id}') invite_hash = dict_url.get('invite_hash') if invite_hash: diff --git a/bin/update-background.py b/bin/update-background.py index f5a3c58e..c6f81d57 100755 --- a/bin/update-background.py +++ b/bin/update-background.py @@ -10,6 +10,8 @@ Update AIL in the background """ import os +import logging +import logging.config import sys import subprocess @@ -17,37 +19,55 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## +from lib import ail_logger from lib import ail_updates -def launch_background_upgrade(version, l_script_name): - if ail_updates.is_version_in_background_update(version): - ail_updates.start_background_update(version) +logging.config.dictConfig(ail_logger.get_config(name='updates')) +def launch_background_upgrade(version): + logger = logging.getLogger() + logger.warning(f'launching background update {version}') + update = ail_updates.AILBackgroundUpdate(version) + nb_done = update.get_nb_scripts_done() + update.start() + scripts = update.get_scripts() + scripts = scripts[nb_done:] + for script in scripts: + print('launching background script update', script) + # launch script + update.start_script(script) + script_path = update.get_script_path() + if script_path: + try: + process = subprocess.run(['python', script_path]) + if process.returncode != 0: + stderr = process.stderr + if stderr: + error = stderr.decode() + logger.error(error) + update.set_error(error) + else: + update.set_error('Error Updater Script') + logger.error('Error Updater Script') + sys.exit(0) + except Exception as e: + update.set_error(str(e)) + logger.error(str(e)) + sys.exit(0) - for script_name in l_script_name: - ail_updates.set_current_background_update_script(script_name) - update_file = ail_updates.get_current_background_update_script_path(version, script_name) + if not update.get_error(): + update.end_script() + else: + logger.warning('Updater exited on error') + sys.exit(0) - # # TODO: Get error output - process = subprocess.run(['python', update_file]) - - update_progress = ail_updates.get_current_background_update_progress() - if update_progress == 100: - ail_updates.end_background_update_script() - # # TODO: Create Custom error - # 'Please relaunch the bin/update-background.py script' - # # TODO: Create Class background update - - ail_updates.end_background_update(version) + update.end() + logger.warning(f'ending background update {version}') if __name__ == "__main__": - - if not ail_updates.exits_background_update_to_launch(): - ail_updates.clear_background_update() + if ail_updates.is_update_background_running(): + v = ail_updates.get_update_background_version() + launch_background_upgrade(v) else: - launch_background_upgrade('v1.5', ['Update-ARDB_Onions.py', 'Update-ARDB_Metadata.py', 'Update-ARDB_Tags.py', - 'Update-ARDB_Tags_background.py', 'Update-ARDB_Onions_screenshots.py']) - launch_background_upgrade('v2.6', ['Update_screenshots.py']) - launch_background_upgrade('v2.7', ['Update_domain_tags.py']) - launch_background_upgrade('v3.4', ['Update_domain.py']) - launch_background_upgrade('v3.7', ['Update_trackers.py']) + for ver in ail_updates.get_update_background_to_launch(): + launch_background_upgrade(ver) diff --git a/configs/6383.conf b/configs/6383.conf index c730003c..a06d4e69 100644 --- a/configs/6383.conf +++ b/configs/6383.conf @@ -663,6 +663,7 @@ namespace.crawl ail_crawlers namespace.db ail_datas namespace.dup ail_dups namespace.obj ail_objs +namespace.tl ail_tls namespace.stat ail_stats namespace.tag ail_tags namespace.track ail_trackers diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 62e9efc3..9d7bb390 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -45,6 +45,10 @@ sender = sender@example.com sender_host = smtp.example.com sender_port = 1337 sender_pw = None +# Only needed for SMTP over SSL if the mail server don't support TLS (used by default). use this option to validate the server certificate. +cert_required = False +# Only needed for SMTP over SSL if you want to validate your self signed certificate for SSL +ca_file = # Only needed when the credentials for email server needs a username instead of an email address #sender_user = sender sender_user = @@ -191,6 +195,11 @@ host = localhost port = 6383 password = ail_objs +[Kvrocks_Timeline] +host = localhost +port = 6383 +password = ail_tls + [Kvrocks_Stats] host = localhost port = 6383 diff --git a/doc/README.md b/doc/README.md index d6b707e8..aee47955 100644 --- a/doc/README.md +++ b/doc/README.md @@ -89,12 +89,12 @@ Available Importers: 5. Launch ail-framework, pystemon and PystemonImporter.py (all within the virtual environment): - Option 1 (recommended): ``` - ./ail-framework/bin/LAUNCH.py -l #starts ail-framework - ./ail-framework/bin/LAUNCH.py -f #starts pystemon and the PystemonImporter.py + ./ail-framework/bin/LAUNCH.sh -l #starts ail-framework + ./ail-framework/bin/LAUNCH.sh -f #starts pystemon and the PystemonImporter.py ``` - Option 2 (may require two terminal windows): ``` - ./ail-framework/bin/LAUNCH.py -l #starts ail-framework + ./ail-framework/bin/LAUNCH.sh -l #starts ail-framework ./pystemon/pystemon.py ./ail-framework/bin/importer/PystemonImporter.py ``` diff --git a/tools/crawler_add_task.py b/tools/crawler_add_task.py new file mode 100755 index 00000000..5d9604fe --- /dev/null +++ b/tools/crawler_add_task.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Send an URL to the crawler - Create a crawler task +================ + +Import URL to be crawled by AIL and then analysed + +""" + +import argparse +import os +from pyail import PyAIL +import sys + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +def check_frequency(value): + value = int(value) + if value <= 0: + raise argparse.ArgumentTypeError(f'Error: Invalid frequency {value}') + + +if __name__ == "__main__": + + # TODO add c argument for config file + parser = argparse.ArgumentParser(description='Send an URL to the crawler - Create a crawler task') + parser.add_argument('-u', '--url', type=str, help='URL to crawl', required=True) + parser.add_argument('-k', '--key', type=str, help='AIL API Key', required=True) + parser.add_argument('-a', '--ail', type=str, help='AIL URL') + parser.add_argument('-d', '--depth', type=int, default=1, help='Depth limit') # TODO improve me + parser.add_argument('--cookiejar', type=str, help='Cookiejar uuid') + parser.add_argument('-p', '--proxy', type=str, help='Proxy address to use, "web" and "tor" can be used as shortcut (web is used by default if the domain isn\'t an onion)') + + group = parser.add_mutually_exclusive_group() + group.add_argument('--har', dest='har', action='store_true', help='Save HAR') + group.add_argument('--no-har', dest='har', action='store_false', help='Don\'t save HAR') + parser.set_defaults(har=None) + + group = parser.add_mutually_exclusive_group() + group.add_argument('--screenshot', dest='screenshot', action='store_true', help='Save screenshot') + group.add_argument('--no-screenshot', dest='screenshot', action='store_false', help='Don\'t save screenshot') + parser.set_defaults(screenshot=None) + + group = parser.add_argument_group('Frequency, create a regular crawler/scheduler. one shot if not specified') + group.add_argument('-f', '--frequency', type=str, choices=['monthly', 'weekly', 'daily', 'hourly'], + help='monthly, weekly, daily or hourly frequency or specify a custom one with the others arguments') + group.add_argument('--minutes', type=int, help='frequency in minutes') + group.add_argument('--hours', type=int, help='frequency in hours') + group.add_argument('--days', type=int, help='frequency in days') + group.add_argument('--weeks', type=int, help='frequency in weeks') + group.add_argument('--months', type=int, help='frequency in months') + + args = parser.parse_args() + + if not args.url and not args.key: + parser.print_help() + sys.exit(0) + + # Load crawler default config + config_loader = ConfigLoader() + har = args.har + if har is None: + har = config_loader.get_config_boolean('Crawler', 'default_har') + screenshot = args.screenshot + if screenshot is None: + screenshot = config_loader.get_config_boolean('Crawler', 'default_screenshot') + + if args.depth: + depth = args.depth + if depth < 0: + raise argparse.ArgumentTypeError(f'Error: Invalid depth {depth}') + else: + depth = 1 + + # frequency + frequency = {} + if args.frequency: + if args.frequency in ['monthly', 'weekly', 'daily', 'hourly']: + frequency = args.frequency + else: + raise argparse.ArgumentTypeError('Invalid frequency') + elif args.minutes or args.hours or args.days or args.weeks or args.months: + if args.minutes: + check_frequency(args.minutes) + frequency['minutes'] = args.minutes + if args.hours: + check_frequency(args.hours) + frequency['hours'] = args.hours + if args.days: + check_frequency(args.days) + frequency['days'] = args.days + if args.weeks: + check_frequency(args.weeks) + frequency['weeks'] = args.weeks + if args.months: + check_frequency(args.months) + frequency['months'] = args.months + if not frequency: + frequency = None + + proxy = args.proxy + + if args.cookiejar: + cookiejar = args.cookiejar + else: + cookiejar = None + + ail = args.ail + if not ail: + ail = 'https://localhost:7000/' + + client = PyAIL(ail, args.key, ssl=False) + r = client.crawl_url(args.url, har=har, screenshot=screenshot, depth_limit=depth, frequency=frequency, + cookiejar=cookiejar, proxy=proxy) + print(r) diff --git a/update/bin/Update_ARDB.sh b/update/bin/Update_ARDB.sh deleted file mode 100755 index 2544973e..00000000 --- a/update/bin/Update_ARDB.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -echo "Killing all screens ..." -bash -c "bash ../../bin/LAUNCH.sh -k" -echo "" -echo "Updating ARDB ..." -pushd ../../ -rm -r ardb -pushd ardb/ -git clone https://github.com/yinqiwen/ardb.git -git checkout 0.10 || exit 1 -make || exit 1 -popd -popd -echo "ARDB Updated" -echo "" - -exit 0 diff --git a/update/bin/Update_Redis.sh b/update/bin/Update_Redis.sh index 238d53f7..dc4d394d 100755 --- a/update/bin/Update_Redis.sh +++ b/update/bin/Update_Redis.sh @@ -2,13 +2,11 @@ [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1; [ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH export PATH=$AIL_BIN:$PATH export PATH=$AIL_FLASK:$PATH diff --git a/update/bin/ail_updater.py b/update/bin/ail_updater.py index 6edc8ea6..af45d3a4 100755 --- a/update/bin/ail_updater.py +++ b/update/bin/ail_updater.py @@ -20,7 +20,7 @@ class AIL_Updater(object): self.start_time = time.time() self.config = ConfigLoader() - self.r_serv = self.config.get_redis_conn("Kvrocks_DB") + self.r_serv = self.config.get_db_conn("Kvrocks_DB") self.f_version = float(self.version[1:]) self.current_f_version = ail_updates.get_ail_float_version() @@ -35,7 +35,7 @@ class AIL_Updater(object): """ Update DB version """ - ail_updates.add_ail_update(version) + ail_updates.add_ail_update(self.version) def run_update(self): self.update() diff --git a/update/bin/old_ail_updater.py b/update/bin/old_ail_updater.py deleted file mode 100755 index 14833679..00000000 --- a/update/bin/old_ail_updater.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -################################## -# Import Project packages -################################## -from lib import ConfigLoader - -class AIL_Updater(object): - """docstring for AIL_Updater.""" - - def __init__(self, new_version): - self.version = new_version - self.start_time = time.time() - - self.config = ConfigLoader.ConfigLoader() - self.r_serv = self.config.get_redis_conn("ARDB_DB") - - self.f_version = float(self.version[1:]) - self.current_f_version = self.r_serv.get('ail:version') - if self.current_f_version: - self.current_f_version = float(self.current_f_version[1:]) - else: - self.current_f_version = 0 - - def update(self): - """ - AIL DB update - """ - pass - - def end_update(self): - """ - Update DB version - """ - # Set current ail version - self.r_serv.hset('ail:update_date', self.version, datetime.datetime.now().strftime("%Y%m%d")) - # Set current ail version - if self.f_version > self.current_f_version: - self.r_serv.set('ail:version', self.version) - - def run_update(self): - self.update() - self.end_update() diff --git a/update/default_update/Update.sh b/update/default_update/Update.sh index 189ae846..ef881805 100755 --- a/update/default_update/Update.sh +++ b/update/default_update/Update.sh @@ -7,13 +7,13 @@ fi [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_KVROCKS" ] && echo "Needs the env var AIL_KVROCKS. Run the script from the virtual environment." && exit 1; +[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_BIN. Run the script from the virtual environment." && exit 1; [ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH +export PATH=AIL_KVROCKS:$PATH export PATH=$AIL_BIN:$PATH export PATH=$AIL_FLASK:$PATH @@ -25,7 +25,7 @@ bash ${AIL_BIN}/LAUNCH.sh -ks wait echo "" -bash ${AIL_BIN}/LAUNCH.sh -lav +bash ${AIL_BIN}/LAUNCH.sh -lkv wait echo "" diff --git a/update/v1.0/Update.sh b/update/v1.0/Update.sh deleted file mode 100755 index 1d3691d0..00000000 --- a/update/v1.0/Update.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -YELLOW="\\033[1;33m" -DEFAULT="\\033[0;39m" - -echo -e $YELLOW"\t" -echo -e "* ------------------------------------------------------------------" -echo -e "\t" -echo -e " - - - - - - - - PLEASE RELAUNCH AIL - - - - - - - - " -echo -e "\t" -echo -e "* ------------------------------------------------------------------" -echo -e "\t" -echo -e "\t"$DEFAULT - -# fix invalid Updater version (kill parent): -kill -SIGUSR1 `ps --pid $$ -oppid=`; exit diff --git a/update/v1.5/Update-ARDB_Metadata.py b/update/v1.5/Update-ARDB_Metadata.py deleted file mode 100755 index f678f74e..00000000 --- a/update/v1.5/Update-ARDB_Metadata.py +++ /dev/null @@ -1,165 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -from lib import ConfigLoader - -def update_tracked_terms(main_key, tracked_container_key): - for tracked_item in r_serv_term.smembers(main_key): - all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item)) - for item_path in all_items: - if PASTES_FOLDER in item_path: - new_item_path = item_path.replace(PASTES_FOLDER, '', 1) - r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path) - r_serv_term.srem(tracked_container_key.format(tracked_item), item_path) - -def update_hash_item(has_type): - #get all hash items: - all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type)) - for item_path in all_hash_items: - if PASTES_FOLDER in item_path: - base64_key = '{}_paste:{}'.format(has_type, item_path) - hash_key = 'hash_paste:{}'.format(item_path) - - if r_serv_metadata.exists(base64_key): - new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1) - res = r_serv_metadata.renamenx(base64_key, new_base64_key) - if res == 0: - print('same key, double name: {}'.format(item_path)) - # fusion - all_key = r_serv_metadata.smembers(base64_key) - for elem in all_key: - r_serv_metadata.sadd(new_base64_key, elem) - r_serv_metadata.srem(base64_key, elem) - - if r_serv_metadata.exists(hash_key): - new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1) - res = r_serv_metadata.renamenx(hash_key, new_hash_key) - if res == 0: - print('same key, double name: {}'.format(item_path)) - # fusion - all_key = r_serv_metadata.smembers(hash_key) - for elem in all_key: - r_serv_metadata.sadd(new_hash_key, elem) - r_serv_metadata.srem(hash_key, elem) - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") - r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - r_serv.set('ail:current_background_script', 'metadata') - - ## Update metadata ## - print('Updating ARDB_Metadata ...') - index = 0 - start = time.time() - - #update stats - r_serv.set('ail:current_background_script_stat', 0) - - # Update base64 - update_hash_item('base64') - - #update stats - r_serv.set('ail:current_background_script_stat', 20) - # Update binary - update_hash_item('binary') - - #update stats - r_serv.set('ail:current_background_script_stat', 40) - # Update binary - update_hash_item('hexadecimal') - - #update stats - r_serv.set('ail:current_background_script_stat', 60) - - total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"') - nb_updated = 0 - last_progress = 0 - - # Update onion metadata - all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"') - for item_path in all_crawled_items: - domain = None - if PASTES_FOLDER in item_path: - old_item_metadata = 'paste_metadata:{}'.format(item_path) - item_path = item_path.replace(PASTES_FOLDER, '', 1) - new_item_metadata = 'paste_metadata:{}'.format(item_path) - res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata) - #key already exist - if res == 0: - r_serv_metadata.delete(old_item_metadata) - - # update domain port - domain = r_serv_metadata.hget(new_item_metadata, 'domain') - if domain: - if domain[-3:] != ':80': - r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain)) - super_father = r_serv_metadata.hget(new_item_metadata, 'super_father') - if super_father: - if PASTES_FOLDER in super_father: - r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1)) - father = r_serv_metadata.hget(new_item_metadata, 'father') - if father: - if PASTES_FOLDER in father: - r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1)) - - nb_updated += 1 - progress = int((nb_updated * 30) /total_onion) - print('{}/{} updated {}%'.format(nb_updated, total_onion, progress + 60)) - # update progress stats - if progress != last_progress: - r_serv.set('ail:current_background_script_stat', progress + 60) - last_progress = progress - - #update stats - r_serv.set('ail:current_background_script_stat', 90) - - ## update tracked term/set/regex - # update tracked term - update_tracked_terms('TrackedSetTermSet', 'tracked_{}') - - #update stats - r_serv.set('ail:current_background_script_stat', 93) - # update tracked set - update_tracked_terms('TrackedSetSet', 'set_{}') - - #update stats - r_serv.set('ail:current_background_script_stat', 96) - # update tracked regex - update_tracked_terms('TrackedRegexSet', 'regex_{}') - - #update stats - r_serv.set('ail:current_background_script_stat', 100) - ## - - end = time.time() - - print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start)) - print() - - r_serv.sadd('ail:update_v1.5', 'metadata') - - ## - #Key, Dynamic Update - ## - #paste_children - #nb_seen_hash, base64_hash, binary_hash - #paste_onion_external_links - #misp_events, hive_cases - ## diff --git a/update/v1.5/Update-ARDB_Onions.py b/update/v1.5/Update-ARDB_Onions.py deleted file mode 100755 index 199ee194..00000000 --- a/update/v1.5/Update-ARDB_Onions.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -def substract_date(date_from, date_to): - date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) - date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) - delta = date_to - date_from # timedelta - l_date = [] - for i in range(delta.days + 1): - date = date_from + datetime.timedelta(i) - l_date.append( date.strftime('%Y%m%d') ) - return l_date - -def get_date_epoch(date): - return int(datetime.datetime(int(date[0:4]), int(date[4:6]), int(date[6:8])).timestamp()) - -def get_domain_root_from_paste_childrens(item_father, domain): - item_children = r_serv_metadata.smembers('paste_children:{}'.format(item_father)) - domain_root = '' - for item_path in item_children: - # remove absolute_path - if PASTES_FOLDER in item_path: - r_serv_metadata.srem('paste_children:{}'.format(item_father), item_path) - item_path = item_path.replace(PASTES_FOLDER, '', 1) - r_serv_metadata.sadd('paste_children:{}'.format(item_father), item_path) - if domain in item_path: - domain_root = item_path - return domain_root - - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - r_serv.set('ail:current_background_script', 'onions') - r_serv.set('ail:current_background_script_stat', 0) - - ## Update Onion ## - print('Updating ARDB_Onion ...') - index = 0 - start = time.time() - - # clean down domain from db - date_from = '20180929' - date_today = datetime.date.today().strftime("%Y%m%d") - for date in substract_date(date_from, date_today): - - onion_down = r_serv_onion.smembers('onion_down:{}'.format(date)) - #print(onion_down) - for onion_domain in onion_down: - if not r_serv_onion.sismember('full_onion_up', onion_domain): - # delete history - all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1) - if all_onion_history: - for date_history in all_onion_history: - #print('onion_history:{}:{}'.format(onion_domain, date_history)) - r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) - r_serv_onion.delete('onion_history:{}'.format(onion_domain)) - - #stats - total_domain = r_serv_onion.scard('full_onion_up') - nb_updated = 0 - last_progress = 0 - - # clean up domain - all_domain_up = r_serv_onion.smembers('full_onion_up') - for onion_domain in all_domain_up: - # delete history - all_onion_history = r_serv_onion.lrange('onion_history:{}'.format(onion_domain), 0 ,-1) - if all_onion_history: - for date_history in all_onion_history: - print('--------') - print('onion_history:{}:{}'.format(onion_domain, date_history)) - item_father = r_serv_onion.lrange('onion_history:{}:{}'.format(onion_domain, date_history), 0, 0) - print('item_father: {}'.format(item_father)) - try: - item_father = item_father[0] - except IndexError: - r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) - continue - #print(item_father) - # delete old history - r_serv_onion.delete('onion_history:{}:{}'.format(onion_domain, date_history)) - # create new history - root_key = get_domain_root_from_paste_childrens(item_father, onion_domain) - if root_key: - r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)}) - print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key)) - #update service metadata: paste_parent - r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key) - - r_serv_onion.delete('onion_history:{}'.format(onion_domain)) - - r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'ports', '80') - r_serv_onion.hdel('onion_metadata:{}'.format(onion_domain), 'last_seen') - - nb_updated += 1 - progress = int((nb_updated * 100) /total_domain) - print('{}/{} updated {}%'.format(nb_updated, total_domain, progress)) - # update progress stats - if progress != last_progress: - r_serv.set('ail:current_background_script_stat', progress) - last_progress = progress - - - end = time.time() - print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) - print() - print('Done in {} s'.format(end - start_deb)) - - r_serv.sadd('ail:update_v1.5', 'onions') diff --git a/update/v1.5/Update-ARDB_Onions_screenshots.py b/update/v1.5/Update-ARDB_Onions_screenshots.py deleted file mode 100755 index 6ad7a025..00000000 --- a/update/v1.5/Update-ARDB_Onions_screenshots.py +++ /dev/null @@ -1,117 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -from hashlib import sha256 - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -def rreplace(s, old, new, occurrence): - li = s.rsplit(old, occurrence) - return new.join(li) - -def substract_date(date_from, date_to): - date_from = datetime.date(int(date_from[0:4]), int(date_from[4:6]), int(date_from[6:8])) - date_to = datetime.date(int(date_to[0:4]), int(date_to[4:6]), int(date_to[6:8])) - delta = date_to - date_from # timedelta - l_date = [] - for i in range(delta.days + 1): - date = date_from + datetime.timedelta(i) - l_date.append( date.strftime('%Y%m%d') ) - return l_date - - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot")) - NEW_SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot') - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - r_serv.set('ail:current_background_script', 'crawled_screenshot') - r_serv.set('ail:current_background_script_stat', 0) - - ## Update Onion ## - print('Updating ARDB_Onion ...') - index = 0 - start = time.time() - - # clean down domain from db - date_from = '20180801' - date_today = datetime.date.today().strftime("%Y%m%d") - list_date = substract_date(date_from, date_today) - nb_done = 0 - last_progress = 0 - total_to_update = len(list_date) - for date in list_date: - screenshot_dir = os.path.join(SCREENSHOT_FOLDER, date[0:4], date[4:6], date[6:8]) - if os.path.isdir(screenshot_dir): - print(screenshot_dir) - for file in os.listdir(screenshot_dir): - if file.endswith(".png"): - index += 1 - #print(file) - - img_path = os.path.join(screenshot_dir, file) - with open(img_path, 'br') as f: - image_content = f.read() - - hash = sha256(image_content).hexdigest() - img_dir_path = os.path.join(hash[0:2], hash[2:4], hash[4:6], hash[6:8], hash[8:10], hash[10:12]) - filename_img = os.path.join(NEW_SCREENSHOT_FOLDER, img_dir_path, hash[12:] +'.png') - dirname = os.path.dirname(filename_img) - if not os.path.exists(dirname): - os.makedirs(dirname) - if not os.path.exists(filename_img): - os.rename(img_path, filename_img) - else: - os.remove(img_path) - - item = os.path.join('crawled', date[0:4], date[4:6], date[6:8], file[:-4]) - # add item metadata - r_serv_metadata.hset('paste_metadata:{}'.format(item), 'screenshot', hash) - # add sha256 metadata - r_serv_onion.sadd('screenshot:{}'.format(hash), item) - - if file.endswith('.pnghar.txt'): - har_path = os.path.join(screenshot_dir, file) - new_file = rreplace(file, '.pnghar.txt', '.json', 1) - new_har_path = os.path.join(screenshot_dir, new_file) - os.rename(har_path, new_har_path) - - progress = int((nb_done * 100) /total_to_update) - # update progress stats - if progress != last_progress: - r_serv.set('ail:current_background_script_stat', progress) - print('{}/{} screenshot updated {}%'.format(nb_done, total_to_update, progress)) - last_progress = progress - - nb_done += 1 - - r_serv.set('ail:current_background_script_stat', 100) - - - end = time.time() - print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) - print() - print('Done in {} s'.format(end - start_deb)) - - r_serv.set('ail:current_background_script_stat', 100) - r_serv.sadd('ail:update_v1.5', 'crawled_screenshot') - if r_serv.scard('ail:update_v1.5') != 5: - r_serv.set('ail:update_error', 'Update v1.5 Failed, please relaunch the bin/update-background.py script') diff --git a/update/v1.5/Update-ARDB_Tags.py b/update/v1.5/Update-ARDB_Tags.py deleted file mode 100755 index 38f3c09c..00000000 --- a/update/v1.5/Update-ARDB_Tags.py +++ /dev/null @@ -1,135 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import redis - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -from lib import ConfigLoader - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - r_important_paste_2018 = redis.StrictRedis( - host=config_loader.get_config_str("ARDB_Metadata", "host"), - port=config_loader.get_config_int("ARDB_Metadata", "port"), - db=2018, - decode_responses=True) - - r_important_paste_2019 = redis.StrictRedis( - host=config_loader.get_config_str("ARDB_Metadata", "host"), - port=config_loader.get_config_int("ARDB_Metadata", "port"), - db=2019, - decode_responses=True) - - config_loader = None - - r_serv.set('ail:current_background_script', 'tags') - r_serv.set('ail:current_background_script_stat', 0) - - if r_serv.sismember('ail:update_v1.5', 'onions') and r_serv.sismember('ail:update_v1.5', 'metadata'): - - print('Updating ARDB_Tags ...') - index = 0 - nb_tags_to_update = 0 - nb_updated = 0 - last_progress = 0 - start = time.time() - - tags_list = r_serv_tag.smembers('list_tags') - # create temp tags metadata - tag_metadata = {} - for tag in tags_list: - tag_metadata[tag] = {} - tag_metadata[tag]['first_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'first_seen') - if tag_metadata[tag]['first_seen'] is None: - tag_metadata[tag]['first_seen'] = 99999999 - else: - tag_metadata[tag]['first_seen'] = int(tag_metadata[tag]['first_seen']) - - tag_metadata[tag]['last_seen'] = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_metadata[tag]['last_seen'] is None: - tag_metadata[tag]['last_seen'] = 0 - else: - tag_metadata[tag]['last_seen'] = int(tag_metadata[tag]['last_seen']) - nb_tags_to_update += r_serv_tag.scard(tag) - - if nb_tags_to_update == 0: - nb_tags_to_update = 1 - - for tag in tags_list: - - all_item = r_serv_tag.smembers(tag) - for item_path in all_item: - splitted_item_path = item_path.split('/') - #print(tag) - #print(item_path) - try: - item_date = int( ''.join([splitted_item_path[-4], splitted_item_path[-3], splitted_item_path[-2]]) ) - except IndexError: - r_serv_tag.srem(tag, item_path) - continue - - # remove absolute path - new_path = item_path.replace(PASTES_FOLDER, '', 1) - if new_path != item_path: - # save in queue absolute path to remove - r_serv_tag.sadd('maj:v1.5:absolute_path_to_rename', item_path) - - # update metadata first_seen - if item_date < tag_metadata[tag]['first_seen']: - tag_metadata[tag]['first_seen'] = item_date - r_serv_tag.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) - - # update metadata last_seen - if item_date > tag_metadata[tag]['last_seen']: - tag_metadata[tag]['last_seen'] = item_date - last_seen_db = r_serv_tag.hget('tag_metadata:{}'.format(tag), 'last_seen') - if last_seen_db: - if item_date > int(last_seen_db): - r_serv_tag.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - else: - tag_metadata[tag]['last_seen'] = last_seen_db - - r_serv_tag.sadd('{}:{}'.format(tag, item_date), new_path) - r_serv_tag.hincrby('daily_tags:{}'.format(item_date), tag, 1) - - # clean db - r_serv_tag.srem(tag, item_path) - index = index + 1 - - nb_updated += 1 - progress = int((nb_updated * 100) /nb_tags_to_update) - print('{}/{} updated {}%'.format(nb_updated, nb_tags_to_update, progress)) - # update progress stats - if progress != last_progress: - r_serv.set('ail:current_background_script_stat', progress) - last_progress = progress - - #flush browse importante pastes db - try: - r_important_paste_2018.flushdb() - except Exception: - pass - - try: - r_important_paste_2019.flushdb() - except Exception: - pass - - end = time.time() - - print('Updating ARDB_Tags Done => {} paths: {} s'.format(index, end - start)) - - r_serv.sadd('ail:update_v1.5', 'tags') diff --git a/update/v1.5/Update-ARDB_Tags_background.py b/update/v1.5/Update-ARDB_Tags_background.py deleted file mode 100755 index 0df3d75d..00000000 --- a/update/v1.5/Update-ARDB_Tags_background.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -def tags_key_fusion(old_item_path_key, new_item_path_key): - print('fusion:') - print(old_item_path_key) - print(new_item_path_key) - for tag in r_serv_metadata.smembers(old_item_path_key): - r_serv_metadata.sadd(new_item_path_key, tag) - r_serv_metadata.srem(old_item_path_key, tag) - - -if __name__ == '__main__': - - start_deb = time.time() - config_loader = ConfigLoader.ConfigLoader() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - r_serv_tag = config_loader.get_redis_conn("ARDB_Tags") - config_loader = None - - if r_serv.sismember('ail:update_v1.5', 'tags'): - - r_serv.set('ail:current_background_script', 'tags_background') - r_serv.set('ail:current_background_script_stat', 0) - - print('Updating ARDB_Tags ...') - start = time.time() - - # update item metadata tags - tag_not_updated = True - total_to_update = r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') - nb_updated = 0 - last_progress = 0 - if total_to_update > 0: - while tag_not_updated: - item_path = r_serv_tag.srandmember('maj:v1.5:absolute_path_to_rename') - old_tag_item_key = 'tag:{}'.format(item_path) - new_item_path = item_path.replace(PASTES_FOLDER, '', 1) - new_tag_item_key = 'tag:{}'.format(new_item_path) - res = r_serv_metadata.renamenx(old_tag_item_key, new_tag_item_key) - if res == 0: - tags_key_fusion(old_tag_item_key, new_tag_item_key) - nb_updated += 1 - r_serv_tag.srem('maj:v1.5:absolute_path_to_rename', item_path) - if r_serv_tag.scard('maj:v1.5:absolute_path_to_rename') == 0: - tag_not_updated = False - else: - progress = int((nb_updated * 100) / total_to_update) - print('{}/{} Tags updated {}%'.format(nb_updated, total_to_update, progress)) - # update progress stats - if progress != last_progress: - r_serv.set('ail:current_background_script_stat', progress) - last_progress = progress - - end = time.time() - - print('Updating ARDB_Tags Done: {} s'.format(end - start)) - - r_serv.sadd('ail:update_v1.5', 'tags_background') diff --git a/update/v1.5/Update.py b/update/v1.5/Update.py deleted file mode 100755 index fd1a2dd5..00000000 --- a/update/v1.5/Update.py +++ /dev/null @@ -1,54 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - print() - print('Updating ARDB_Onion ...') - index = 0 - start = time.time() - - # update crawler queue - for elem in r_serv_onion.smembers('onion_crawler_queue'): - if PASTES_FOLDER in elem: - r_serv_onion.srem('onion_crawler_queue', elem) - r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) - index = index +1 - for elem in r_serv_onion.smembers('onion_crawler_priority_queue'): - if PASTES_FOLDER in elem: - r_serv_onion.srem('onion_crawler_queue', elem) - r_serv_onion.sadd('onion_crawler_queue', elem.replace(PASTES_FOLDER, '', 1)) - index = index +1 - - end = time.time() - print('Updating ARDB_Onion Done => {} paths: {} s'.format(index, end - start)) - print() - - # Add background update - r_serv.sadd('ail:to_update', 'v1.5') - - #Set current ail version - r_serv.set('ail:version', 'v1.5') - - #Set current ail version - r_serv.set('ail:update_date_v1.5', datetime.datetime.now().strftime("%Y%m%d")) - - print('Done in {} s'.format(end - start_deb)) diff --git a/update/v1.5/Update.sh b/update/v1.5/Update.sh deleted file mode 100755 index cf70a444..00000000 --- a/update/v1.5/Update.sh +++ /dev/null @@ -1,60 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -k & -wait - -echo "" -bash -c "bash ${AIL_HOME}/update/bin/Update_Redis.sh" -#bash -c "bash ${AIL_HOME}/update/bin/Update_ARDB.sh" - -echo "" -echo -e $GREEN"Update DomainClassifier"$DEFAULT -echo "" -pip3 install --upgrade --force-reinstall git+https://github.com/D4-project/BGP-Ranking.git/@28013297efb039d2ebbce96ee2d89493f6ae56b0#subdirectory=client&egg=pybgpranking -pip3 install --upgrade --force-reinstall git+https://github.com/adulau/DomainClassifier.git -wait -echo "" - -echo "" -echo -e $GREEN"Update Web thirdparty"$DEFAULT -echo "" -bash -c "(cd ${AIL_FLASK}; ./update_thirdparty.sh &)" -wait -echo "" - -bash ${AIL_BIN}LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Fixing ARDB ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v1.5/Update.py & -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks & -wait - -echo "" - -exit 0 diff --git a/update/v1.7/Update.py b/update/v1.7/Update.py deleted file mode 100755 index f3777d0e..00000000 --- a/update/v1.7/Update.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - r_serv = config_loader.get_redis_conn("ARDB_DB") - config_loader = None - - # Set current ail version - r_serv.set('ail:version', 'v1.7') - - # Set current ail version - r_serv.set('ail:update_date_v1.7', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v1.7/Update.sh b/update/v1.7/Update.sh deleted file mode 100755 index 603e9517..00000000 --- a/update/v1.7/Update.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks & -wait - -echo "" -echo -e $GREEN"Update DomainClassifier"$DEFAULT -echo "" - -cd $AIL_HOME -git clone https://github.com/kazu-yamamoto/pgpdump.git -cd pgpdump -./configure -make -sudo make install -wait -echo "" - -echo "" -echo -e $GREEN"Update requirement"$DEFAULT -echo "" -pip3 install beautifulsoup4 - -bash ${AIL_BIN}LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v1.7/Update.py & -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks & -wait - -echo "" -echo -e $GREEN"Update thirdparty ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -t & -wait - - -echo "" - -exit 0 diff --git a/update/v2.0/Update.py b/update/v2.0/Update.py deleted file mode 100755 index 4d3504e4..00000000 --- a/update/v2.0/Update.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - - r_serv = config_loader.get_redis_conn("ARDB_DB") - config_loader = None - - # Set current ail version - r_serv.set('ail:version', 'v2.0') - - # use new update_date format - date_tag_to_replace = ['v1.5', 'v1.7'] - for tag in date_tag_to_replace: - if r_serv.exists('ail:update_date_{}'.format(tag)): - date_tag = r_serv.get('ail:update_date_{}'.format(tag)) - r_serv.hset('ail:update_date', tag, date_tag) - r_serv.delete('ail:update_date_{}'.format(tag)) - - # Set current ail version - r_serv.hset('ail:update_date', 'v2.0', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.0/Update.sh b/update/v2.0/Update.sh deleted file mode 100755 index 0132ec8e..00000000 --- a/update/v2.0/Update.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -echo "" -echo -e $GREEN"Create Self-Signed Certificate"$DEFAULT -echo "" -pushd ${AIL_BIN}/helper/gen_cert -bash gen_root.sh -wait -bash gen_cert.sh -wait -popd - -cp ${AIL_BIN}/helper/gen_cert/server.crt ${AIL_FLASK}/server.crt -cp ${AIL_BIN}/helper/gen_cert/server.key ${AIL_FLASK}/server.key - -echo "" -echo -e $GREEN"Update requirement"$DEFAULT -echo "" -pip3 install flask-login -wait -echo "" -pip3 install bcrypt -wait -echo "" -echo "" - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v2.0/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Update thirdparty ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -t -wait -echo "" - -echo "" -echo -e $GREEN"Create Default User"$DEFAULT -echo "" -python3 ${AIL_FLASK}create_default_user.py - - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v2.2/Update.py b/update/v2.2/Update.py deleted file mode 100755 index 2bfef8e9..00000000 --- a/update/v2.2/Update.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from packages import Term - -from lib import ConfigLoader - - -def rreplace(s, old, new, occurrence): - li = s.rsplit(old, occurrence) - return new.join(li) - -def get_item_id(full_path): - return full_path.replace(PASTES_FOLDER, '', 1) - -def get_item_date(id_item): - l_dir = id_item.split('/') - return f'{l_dir[-4]}{l_dir[-3]}{l_dir[-2]}' - - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/' - PASTES_FOLDER = os.path.join(os.path.realpath(PASTES_FOLDER), '') - - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_term_stats = config_loader.get_redis_conn("ARDB_Trending") - r_serv_termfreq = config_loader.get_redis_conn("ARDB_TermFreq") - config_loader = None - - r_serv_term_stats.flushdb() - - # Disabled. Checkout the v2.2 branch if you need it - # # convert all regex: - # all_regex = r_serv_termfreq.smembers('TrackedRegexSet') - # for regex in all_regex: - # tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(regex))) - # mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(regex))) - # - # new_term = regex[1:-1] - # res = Term.parse_json_term_to_add({"term": new_term, "type": 'regex', "tags": tags, "mails": mails, "level": 1}, - # 'admin@admin.test') - # if res[1] == 200: - # term_uuid = res[0]['uuid'] - # list_items = r_serv_termfreq.smembers('regex_{}'.format(regex)) - # for paste_item in list_items: - # item_id = get_item_id(paste_item) - # item_date = get_item_date(item_id) - # Term.add_tracked_item(term_uuid, item_id, item_date) - # - # # Invalid Tracker => remove it - # else: - # print('Invalid Regex Removed: {}'.format(regex)) - # print(res[0]) - # # allow reprocess - # r_serv_termfreq.srem('TrackedRegexSet', regex) - # - # all_tokens = r_serv_termfreq.smembers('TrackedSetTermSet') - # for token in all_tokens: - # tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(token))) - # mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(token))) - # - # res = Term.parse_json_term_to_add({"term": token, "type": 'word', "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test') - # if res[1] == 200: - # term_uuid = res[0]['uuid'] - # list_items = r_serv_termfreq.smembers('tracked_{}'.format(token)) - # for paste_item in list_items: - # item_id = get_item_id(paste_item) - # item_date = get_item_date(item_id) - # Term.add_tracked_item(term_uuid, item_id, item_date) - # # Invalid Tracker => remove it - # else: - # print('Invalid Token Removed: {}'.format(token)) - # print(res[0]) - # # allow reprocess - # r_serv_termfreq.srem('TrackedSetTermSet', token) - # - # all_set = r_serv_termfreq.smembers('TrackedSetSet') - # for curr_set in all_set: - # tags = list(r_serv_termfreq.smembers('TrackedNotificationTags_{}'.format(curr_set))) - # mails = list(r_serv_termfreq.smembers('TrackedNotificationEmails_{}'.format(curr_set))) - # - # to_remove = ',{}'.format(curr_set.split(',')[-1]) - # new_set = rreplace(curr_set, to_remove, '', 1) - # new_set = new_set[2:] - # new_set = new_set.replace(',', '') - # - # res = Term.parse_json_term_to_add({"term": new_set, "type": 'set', "nb_words": 1, "tags": tags, "mails": mails, "level": 1}, 'admin@admin.test') - # if res[1] == 200: - # term_uuid = res[0]['uuid'] - # list_items = r_serv_termfreq.smembers('tracked_{}'.format(curr_set)) - # for paste_item in list_items: - # item_id = get_item_id(paste_item) - # item_date = get_item_date(item_id) - # Term.add_tracked_item(term_uuid, item_id, item_date) - # # Invalid Tracker => remove it - # else: - # print('Invalid Set Removed: {}'.format(curr_set)) - # print(res[0]) - # # allow reprocess - # r_serv_termfreq.srem('TrackedSetSet', curr_set) - - r_serv_termfreq.flushdb() - - # Set current ail version - r_serv.set('ail:version', 'v2.2') - - # Set current ail version - r_serv.hset('ail:update_date', 'v2.2', datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.2/Update.sh b/update/v2.2/Update.sh deleted file mode 100755 index 37704f3b..00000000 --- a/update/v2.2/Update.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v2.2/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v2.5/Update.py b/update/v2.5/Update.py deleted file mode 100755 index 6264c7f4..00000000 --- a/update/v2.5/Update.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -new_version = 'v2.5' - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - r_serv = config_loader.get_redis_conn("ARDB_DB") - config_loader = None - - r_serv.zadd('ail:all_role', {'user': 3}) - r_serv.zadd('ail:all_role', {'user_no_api': 4}) - r_serv.zadd('ail:all_role', {'read_only': 5}) - - for user in r_serv.hkeys('user:all'): - r_serv.sadd('user_role:user', user) - r_serv.sadd('user_role:user_no_api', user) - r_serv.sadd('user_role:read_only', user) - - # Set current ail version - r_serv.set('ail:version', new_version) - - # Set current ail version - r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.5/Update.sh b/update/v2.5/Update.sh deleted file mode 100755 index 6c75f15f..00000000 --- a/update/v2.5/Update.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v2.5/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v2.6/Update.py b/update/v2.6/Update.py deleted file mode 100755 index ded00cf1..00000000 --- a/update/v2.6/Update.py +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -new_version = 'v2.6' - -if __name__ == '__main__': - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - r_serv = config_loader.get_redis_conn("ARDB_DB") - config_loader = None - - r_serv.sadd('ail:to_update', new_version) - - # Set current ail version - r_serv.set('ail:version', new_version) - - # Set current ail version - r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.6/Update.sh b/update/v2.6/Update.sh deleted file mode 100755 index 874bf0ec..00000000 --- a/update/v2.6/Update.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v2.6/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v2.6/Update_screenshots.py b/update/v2.6/Update_screenshots.py deleted file mode 100755 index 735d64bc..00000000 --- a/update/v2.6/Update_screenshots.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time - -from pyfaup.faup import Faup - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -def get_domain(item_id): - item_id = item_id.split('/') - item_id = item_id[-1] - return item_id[:-36] - -def get_all_item(s_sha256): - return r_serv_onion.smembers(f'screenshot:{s_sha256}') - -def sanitize_domain(domain): - faup.decode(domain) - domain_sanitized = faup.get() - domain_sanitized = domain_sanitized['domain'] - try: - domain_sanitized = domain_sanitized.decode() - except: - pass - return domain_sanitized.lower() - -def update_db(s_sha256): - screenshot_items = get_all_item(s_sha256) - if screenshot_items: - for item_id in screenshot_items: - item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path - domain = get_domain(item_id) - - domain_sanitized = sanitize_domain(domain) - if domain != domain_sanitized: - r_serv_onion.sadd('incorrect_domain', domain) - domain = domain_sanitized - - r_serv_onion.sadd('domain_screenshot:{}'.format(domain), s_sha256) - r_serv_onion.sadd('screenshot_domain:{}'.format(s_sha256), domain) - else: - pass - # broken screenshot - r_serv_onion.sadd('broken_screenshot', s_sha256) - - -if __name__ == '__main__': - - start_deb = time.time() - faup = Faup() - - config_loader = ConfigLoader.ConfigLoader() - - PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) - SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot') - - r_serv_db = config_loader.get_redis_conn("ARDB_DB") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - r_serv_db.set('ail:update_in_progress', 'v2.6') - r_serv_db.set('ail:current_background_update', 'v2.6') - - r_serv_db.set('ail:current_background_script_stat', 20) - r_serv_db.set('ail:current_background_script', 'screenshot update') - - nb = 0 - - if os.path.isdir(SCREENSHOT_FOLDER): - for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False): - # print(dirs) - for name in files: - nb = nb + 1 - screenshot_sha256 = os.path.join(root, name) - screenshot_sha256 = screenshot_sha256[:-4] # remove .png - screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1) - screenshot_sha256 = screenshot_sha256.replace('/', '') - update_db(screenshot_sha256) - # print('Screenshot updated: {}'.format(nb)) - if nb % 1000 == 0: - r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb)) - - r_serv_db.set('ail:current_background_script_stat', 100) - - end = time.time() - print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb)) diff --git a/update/v2.7/Update.py b/update/v2.7/Update.py deleted file mode 100755 index eed7c219..00000000 --- a/update/v2.7/Update.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -from lib import ConfigLoader - -new_version = 'v2.7' - -if __name__ == '__main__': - - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - r_serv = config_loader.get_redis_conn("ARDB_DB") - r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - config_loader = None - - r_serv.sadd('ail:to_update', new_version) - - #### Update tags #### - r_serv_tags.sunionstore('list_tags:item', 'list_tags', []) - r_serv_onion.sunionstore('domain_update_v2.7', 'full_onion_up', []) - r_serv_onion.delete('incorrect_domain') - r_serv.set('ail:update_v2.7:deletetagrange', 1) - #### #### - - # Set current ail version - r_serv.set('ail:version', new_version) - - # Set current ail version - r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d")) diff --git a/update/v2.7/Update.sh b/update/v2.7/Update.sh deleted file mode 100755 index 8f9a4efd..00000000 --- a/update/v2.7/Update.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v2.7/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v2.7/Update_domain_tags.py b/update/v2.7/Update_domain_tags.py deleted file mode 100755 index cbe1e4b6..00000000 --- a/update/v2.7/Update_domain_tags.py +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time - -from pyfaup.faup import Faup - -sys.path.append(os.environ['AIL_BIN']) -from packages import Date - -from lib import ConfigLoader - -def sanitize_domain(domain): - faup.decode(domain) - domain_sanitized = faup.get() - domain_sanitized = domain_sanitized['domain'] - try: - domain_sanitized = domain_sanitized.decode() - except: - pass - return domain_sanitized.lower() - -def get_all_obj_tags(obj_type): - return list(r_serv_tags.smembers(f'list_tags:{obj_type}')) - -def add_global_tag(tag, object_type=None): - r_serv_tags.sadd('list_tags', tag) - if object_type: - r_serv_tags.sadd('list_tags:{}'.format(object_type), tag) - -def get_obj_tag(object_id): - res = r_serv_metadata.smembers('tag:{}'.format(object_id)) - if res: - return list(res) - else: - return [] - -def delete_domain_tag_daterange(): - all_domains_tags = get_all_obj_tags('domain') - nb_updated = 0 - nb_to_update = len(all_domains_tags) - if nb_to_update == 0: - nb_to_update = 1 - refresh_time = time.time() - l_dates = Date.substract_date('20191008', Date.get_today_date_str()) - for tag in all_domains_tags: - for date_day in l_dates: - r_serv_tags.delete('domain:{}:{}'.format(tag, date_day)) - nb_updated += 1 - refresh_time = update_progress(refresh_time, nb_updated, nb_to_update) - r_serv_db.delete('ail:update_v2.7:deletetagrange') - -def update_domain_tags(domain): - domain_sanitized = sanitize_domain(domain) - if domain != domain_sanitized: - r_serv_onion.sadd('incorrect_domain', domain) - domain = domain_sanitized - - domain_tags = get_obj_tag(domain) - for tag in domain_tags: - # delete incorrect tags - if tag == 'infoleak:submission="crawler"' or tag == 'infoleak:submission="manual"': - r_serv_metadata.srem('tag:{}'.format(domain), tag) - else: - add_global_tag(tag, object_type='domain') - r_serv_tags.sadd('{}:{}'.format('domain', tag), domain) - -def update_progress(refresh_time, nb_updated, nb_elem_to_update): - if time.time() - refresh_time > 10: - progress = int((nb_updated * 100) / nb_elem_to_update) - print('{}/{} updated {}%'.format(nb_updated, nb_elem_to_update, progress)) - r_serv_db.set('ail:current_background_script_stat', progress) - refresh_time = time.time() - - return refresh_time - -def update_db(): - nb_updated = 0 - nb_to_update = r_serv_onion.scard('domain_update_v2.7') - refresh_time = time.time() - r_serv_db.set('ail:current_background_script_stat', 0) - r_serv_db.set('ail:current_background_script', 'domain tags update') - domain = r_serv_onion.spop('domain_update_v2.7') - while domain is not None: - update_domain_tags(domain) - nb_updated += 1 - refresh_time = update_progress(refresh_time, nb_updated, nb_to_update) - domain = r_serv_onion.spop('domain_update_v2.7') - if r_serv_db.exists('ail:update_v2.7:deletetagrange'): - r_serv_db.set('ail:current_background_script_stat', 0) - r_serv_db.set('ail:current_background_script', 'tags: remove deprecated keys') - delete_domain_tag_daterange() - - # sort all crawled domain - r_serv_onion.sort('full_onion_up', alpha=True) - r_serv_onion.sort('full_regular_up', alpha=True) - - -if __name__ == '__main__': - - start_deb = time.time() - faup = Faup() - - config_loader = ConfigLoader.ConfigLoader() - - r_serv_db = config_loader.get_redis_conn("ARDB_DB") - r_serv_tags = config_loader.get_redis_conn("ARDB_Tags") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - config_loader = None - - update_version = 'v2.7' - - r_serv_db.set('ail:update_in_progress', update_version) - r_serv_db.set('ail:current_background_update', update_version) - - r_serv_db.set('ail:current_background_script_stat', 0) - r_serv_db.set('ail:current_background_script', 'tags update') - - update_db() - - r_serv_db.set('ail:current_background_script_stat', 100) - - end = time.time() - print('ALL domains tags updated in {} s'.format(end - start_deb)) diff --git a/update/v3.0/Update.sh b/update/v3.0/Update.sh deleted file mode 100755 index 9c7a7d9d..00000000 --- a/update/v3.0/Update.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.0/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Update thirdparty ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -t - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.1.1/Update.py b/update/v3.1.1/Update.py deleted file mode 100755 index a5a16751..00000000 --- a/update/v3.1.1/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v3.1.1') - updater.run_update() diff --git a/update/v3.1.1/Update.sh b/update/v3.1.1/Update.sh deleted file mode 100755 index f43591c0..00000000 --- a/update/v3.1.1/Update.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -ldbv & -wait -echo "" - -# SUBMODULES # -git submodule init -git submodule update - -echo -e $GREEN"Installing YARA ..."$DEFAULT -pip3 install yara-python -bash ${AIL_BIN}/LAUNCH.sh -t - -# SUBMODULES # -git submodule init -git submodule update - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.1.1/Update.py -wait -echo "" -echo "" - - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.1/Update.py b/update/v3.1/Update.py deleted file mode 100755 index 6c4c4546..00000000 --- a/update/v3.1/Update.py +++ /dev/null @@ -1,23 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v3.1') - updater.run_update() - diff --git a/update/v3.1/Update.sh b/update/v3.1/Update.sh deleted file mode 100755 index 53b27cc6..00000000 --- a/update/v3.1/Update.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -lav & -wait -echo "" - -pip3 install scrapy -pip3 install scrapy-splash - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.1/Update.py -wait -echo "" -echo "" - -echo "" -echo -e $GREEN"Update thirdparty ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -t - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.2/Update.py b/update/v3.2/Update.py deleted file mode 100755 index 086ed098..00000000 --- a/update/v3.2/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v3.2') - updater.run_update() diff --git a/update/v3.2/Update.sh b/update/v3.2/Update.sh deleted file mode 100755 index a588e55d..00000000 --- a/update/v3.2/Update.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -ldbv & -wait -echo "" - -# SUBMODULES # -git submodule init -git submodule update - -echo -e $GREEN"Installing YARA ..."$DEFAULT -pip3 install yara-python -bash ${AIL_BIN}/LAUNCH.sh -t - -# SUBMODULES # -git submodule init -git submodule update - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.2/Update.py -wait -echo "" -echo "" - - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.3/Update.py b/update/v3.3/Update.py deleted file mode 100755 index 5f0efc78..00000000 --- a/update/v3.3/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v3.3') - updater.run_update() diff --git a/update/v3.3/Update.sh b/update/v3.3/Update.sh deleted file mode 100755 index 86289dba..00000000 --- a/update/v3.3/Update.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -bash ${AIL_BIN}/LAUNCH.sh -ldbv & -wait -echo "" - -# SUBMODULES # -git submodule update - -# echo "" -# echo -e $GREEN"installing KVORCKS ..."$DEFAULT -# cd ${AIL_HOME} -# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git -# pushd kvrocks/ -# make -j4 -# popd - -echo -e $GREEN"Installing html2text ..."$DEFAULT -pip3 install html2text - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.3/Update.py -wait -echo "" -echo "" - - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.4/Update.py b/update/v3.4/Update.py deleted file mode 100755 index d4308551..00000000 --- a/update/v3.4/Update.py +++ /dev/null @@ -1,34 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - self.r_serv_onion = self.config.get_redis_conn("ARDB_Onion") - - def update(self): - """ - Update Domain Languages - """ - self.r_serv_onion.sunionstore('domain_update_v3.4', 'full_onion_up', 'full_regular_up') - self.r_serv.set('update:nb_elem_to_convert', self.r_serv_onion.scard('domain_update_v3.4')) - self.r_serv.set('update:nb_elem_converted', 0) - - # Add background update - self.r_serv.sadd('ail:to_update', self.version) - - -if __name__ == '__main__': - updater = Updater('v3.4') - updater.run_update() diff --git a/update/v3.4/Update.sh b/update/v3.4/Update.sh deleted file mode 100755 index 16a9ccb7..00000000 --- a/update/v3.4/Update.sh +++ /dev/null @@ -1,54 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# bash ${AIL_BIN}/LAUNCH.sh -ldbv & -# wait -# echo "" - -# SUBMODULES # -git submodule update - -# echo "" -# echo -e $GREEN"installing KVORCKS ..."$DEFAULT -# cd ${AIL_HOME} -# test ! -d kvrocks/ && git clone https://github.com/bitleak/kvrocks.git -# pushd kvrocks/ -# make -j4 -# popd - -echo -e $GREEN"Installing html2text ..."$DEFAULT -pip3 install pycld3 - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.4/Update.py -wait -echo "" -echo "" - - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.4/Update_domain.py b/update/v3.4/Update_domain.py deleted file mode 100755 index c6183725..00000000 --- a/update/v3.4/Update_domain.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time - -sys.path.append(os.environ['AIL_BIN']) -################################## -# Import Project packages -################################## -from lib import ConfigLoader -from lib.objects.Items import Item - -def get_domain_type(domain_name): - if str(domain_name).endswith('.onion'): - return 'onion' - else: - return 'regular' - -def add_domain_language(domain_name, language): - language = language.split('-')[0] - domain_type = get_domain_type(domain_name) - r_serv_onion.sadd('all_domains_languages', language) - r_serv_onion.sadd(f'all_domains_languages:{domain_type}', language) - r_serv_onion.sadd(f'language:domains:{domain_type}:{language}', domain_name) - r_serv_onion.sadd(f'domain:language:{domain_name}', language) - -def add_domain_languages_by_item_id(domain_name, item_id): - item = Item(item_id) - for lang in item.get_languages(): - add_domain_language(domain_name, lang.language) - -def update_update_stats(): - nb_updated = int(r_serv_db.get('update:nb_elem_converted')) - progress = int((nb_updated * 100) / nb_elem_to_update) - print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%') - r_serv_db.set('ail:current_background_script_stat', progress) - -def update_domain_language(domain_obj, item_id): - domain_name = domain_obj.get_domain_name() - add_domain_languages_by_item_id(domain_name, item_id) - -def get_domain_history(domain_type, domain_name): - return r_serv_onion.zrange(f'crawler_history_{domain_type}:{domain_name}:80', 0, -1, withscores=True) - - -def get_item_children(item_id): - return r_serv_metadata.smembers(f'paste_children:{item_id}') - -def get_domain_items(domain_name, root_item_id): - dom_item = get_domain_item_children(domain_name, root_item_id) - dom_item.append(root_item_id) - return dom_item - -def is_item_in_domain(domain_name, item_id): - is_in_domain = False - domain_length = len(domain_name) - if len(item_id) > (domain_length+48): - if item_id[-36-domain_length:-36] == domain_name: - is_in_domain = True - return is_in_domain - -def get_domain_item_children(domain_name, root_item_id): - all_items = [] - for item_id in get_item_children(root_item_id): - if is_item_in_domain(domain_name, item_id): - all_items.append(item_id) - all_items.extend(get_domain_item_children(domain_name, item_id)) - return all_items - -def get_domain_crawled_item_root(domain_name, domain_type, epoch): - res = r_serv_onion.zrevrangebyscore(f'crawler_history_{domain_type}:{domain_name}:80', int(epoch), int(epoch)) - return {"root_item": res[0], "epoch": int(epoch)} - -def get_domain_items_crawled(domain_name, domain_type, epoch): - item_crawled = [] - item_root = get_domain_crawled_item_root(domain_name, domain_type, epoch) - if item_root: - if item_root['root_item'] != str(item_root['epoch']) and item_root['root_item']: - for item_id in get_domain_items(domain_name, item_root['root_item']): - item_crawled.append(item_id) - return item_crawled - - -if __name__ == '__main__': - - start_deb = time.time() - config_loader = ConfigLoader.ConfigLoader() - r_serv_db = config_loader.get_redis_conn("ARDB_DB") - r_serv_onion = config_loader.get_redis_conn("ARDB_Onion") - r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") - config_loader = None - - r_serv_db.set('ail:current_background_script', 'domain languages update') - - nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') - if not nb_elem_to_update: - nb_elem_to_update = 1 - else: - nb_elem_to_update = int(nb_elem_to_update) - - # _delete_all_domains_languages() - - while True: - domain = r_serv_onion.spop('domain_update_v3.4') - if domain is not None: - print(domain) - domain = str(domain) - domain_t = get_domain_type(domain) - for domain_history in get_domain_history(domain_t, domain): - domain_items = get_domain_items_crawled(domain, domain_t, domain_history[1]) - for id_item in domain_items: - update_domain_language(domain, id_item) - - r_serv_db.incr('update:nb_elem_converted') - update_update_stats() - - else: - r_serv_db.set('ail:current_background_script_stat', 100) - sys.exit(0) diff --git a/update/v3.5/Update.py b/update/v3.5/Update.py deleted file mode 100755 index 651fe8ea..00000000 --- a/update/v3.5/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v3.5') - updater.run_update() diff --git a/update/v3.5/Update.sh b/update/v3.5/Update.sh deleted file mode 100755 index ea0d39c3..00000000 --- a/update/v3.5/Update.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# SUBMODULES # -git submodule update - -echo -e $GREEN"Installing PyAIL ..."$DEFAULT -pip3 install -U pyail - -echo -e $GREEN"Installing D4 CLIENT ..."$DEFAULT -pip3 install -U d4-pyclient - -echo "" -echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT -pip3 install -U DomainClassifier - -exit 0 diff --git a/update/v3.6/Update.py b/update/v3.6/Update.py deleted file mode 100755 index 8d7715de..00000000 --- a/update/v3.6/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v3.6') - updater.run_update() diff --git a/update/v3.6/Update.sh b/update/v3.6/Update.sh deleted file mode 100755 index e82dba98..00000000 --- a/update/v3.6/Update.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# SUBMODULES # -git submodule update - -echo -e $GREEN"Updating D4 CLIENT ..."$DEFAULT -pip3 install -U d4-pyclient - -echo "" -echo -e $GREEN"Installing nose ..."$DEFAULT -pip3 install -U nose - -echo -e $GREEN"Installing coverage ..."$DEFAULT -pip3 install -U coverage - -echo "" -echo -e $GREEN"Updating DomainClassifier ..."$DEFAULT -pip3 install -U DomainClassifier - -exit 0 diff --git a/update/v3.7/Update.py b/update/v3.7/Update.py deleted file mode 100755 index 8b238f9c..00000000 --- a/update/v3.7/Update.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater -from lib import Tracker - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - def update(self): - """ - Update Domain Languages - """ - print('Fixing Tracker_uuid list ...') - Tracker.fix_all_tracker_uuid_list() - nb = 0 - for tracker_uuid in Tracker.get_trackers(): - self.r_serv.sadd('trackers_update_v3.7', tracker_uuid) - nb += 1 - - self.r_serv.set('update:nb_elem_to_convert', nb) - self.r_serv.set('update:nb_elem_converted',0) - - # Add background update - self.r_serv.sadd('ail:to_update', self.version) - - -if __name__ == '__main__': - updater = Updater('v3.7') - updater.run_update() diff --git a/update/v3.7/Update.sh b/update/v3.7/Update.sh deleted file mode 100755 index 0196d872..00000000 --- a/update/v3.7/Update.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# SUBMODULES # -git submodule update - - -echo -e $GREEN"Updating thirdparty ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ut -wait - -echo "" -echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT -echo "" -python ${AIL_HOME}/update/v3.7/Update.py -wait -echo "" -echo "" - - -echo "" -echo -e $GREEN"Shutting down ARDB ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -exit 0 diff --git a/update/v3.7/Update_trackers.py b/update/v3.7/Update_trackers.py deleted file mode 100644 index b84f06e8..00000000 --- a/update/v3.7/Update_trackers.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import time - -sys.path.append(os.environ['AIL_BIN']) -################################## -# Import Project packages -################################## -from lib import ConfigLoader -from lib import Tracker - -def update_update_stats(): - nb_updated = int(r_serv_db.get('update:nb_elem_converted')) - progress = int((nb_updated * 100) / nb_elem_to_update) - print(f'{nb_updated}/{nb_elem_to_update} updated {progress}%') - r_serv_db.set('ail:current_background_script_stat', progress) - - -if __name__ == '__main__': - start_deb = time.time() - - config_loader = ConfigLoader.ConfigLoader() - r_serv_db = config_loader.get_redis_conn("ARDB_DB") - r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB") - config_loader = None - Tracker.r_serv_tracker = r_serv_tracker - - r_serv_db.set('ail:current_background_script', 'trackers update') - - nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert') - if not nb_elem_to_update: - nb_elem_to_update = 1 - else: - nb_elem_to_update = int(nb_elem_to_update) - - while True: - tracker_uuid = r_serv_db.spop('trackers_update_v3.7') - if tracker_uuid is not None: - print(tracker_uuid) - # FIX STATS - Tracker.fix_tracker_stats_per_day(tracker_uuid) - # MAP TRACKER - ITEM_ID - Tracker.fix_tracker_item_link(tracker_uuid) - - r_serv_db.incr('update:nb_elem_converted') - update_update_stats() - - else: - r_serv_db.set('ail:current_background_script_stat', 100) - sys.exit(0) diff --git a/update/v4.0/Update.py b/update/v4.0/Update.py deleted file mode 100755 index b609ebb6..00000000 --- a/update/v4.0/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v4.0') - updater.run_update() diff --git a/update/v4.0/Update.sh b/update/v4.0/Update.sh deleted file mode 100755 index 09bc3f4f..00000000 --- a/update/v4.0/Update.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# SUBMODULES # -git submodule update - -echo "" -echo -e $GREEN"Installing nose ..."$DEFAULT -pip3 install -U websockets - -exit 0 diff --git a/update/v4.1/Update.py b/update/v4.1/Update.py deleted file mode 100755 index f7a7a72a..00000000 --- a/update/v4.1/Update.py +++ /dev/null @@ -1,31 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys -import redis - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - def update(self): - r_tracking = redis.StrictRedis(host='localhost', - port=6382, - db=2, - decode_responses=True) - # FLUSH OLD DB - r_tracking.flushdb() - - -if __name__ == '__main__': - updater = Updater('v4.1') - updater.run_update() diff --git a/update/v4.2.1/Update.py b/update/v4.2.1/Update.py deleted file mode 100755 index f20a9184..00000000 --- a/update/v4.2.1/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v4.2.1') - updater.run_update() diff --git a/update/v4.2.1/Update.sh b/update/v4.2.1/Update.sh deleted file mode 100755 index bf04638b..00000000 --- a/update/v4.2.1/Update.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# SUBMODULES # -git submodule update - -echo "" -echo -e $GREEN"Updating pusblogger ..."$DEFAULT -pip3 install -U pubsublogger - -exit 0 diff --git a/update/v4.2/Update.py b/update/v4.2/Update.py deleted file mode 100755 index 708009b2..00000000 --- a/update/v4.2/Update.py +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -import os -import sys - -sys.path.append(os.environ['AIL_HOME']) -################################## -# Import Project packages -################################## -from update.bin.old_ail_updater import AIL_Updater - -class Updater(AIL_Updater): - """default Updater.""" - - def __init__(self, version): - super(Updater, self).__init__(version) - - -if __name__ == '__main__': - updater = Updater('v4.2') - updater.run_update() diff --git a/update/v4.2/Update.sh b/update/v4.2/Update.sh deleted file mode 100755 index a18aae61..00000000 --- a/update/v4.2/Update.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; - -export PATH=$AIL_HOME:$PATH -export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH -export PATH=$AIL_BIN:$PATH -export PATH=$AIL_FLASK:$PATH - -GREEN="\\033[1;32m" -DEFAULT="\\033[0;39m" - -echo -e $GREEN"Shutting down AIL ..."$DEFAULT -bash ${AIL_BIN}/LAUNCH.sh -ks -wait - -# SUBMODULES # -git submodule update - -echo "" -echo -e $GREEN"Installing typo-squatting ..."$DEFAULT -pip3 install -U ail_typo_squatting - -echo "" -echo -e $GREEN"Updating d4-client ..."$DEFAULT -pip3 install -U d4-pyclient - -exit 0 diff --git a/update/v3.0/Update.py b/update/v5.2/Update.py similarity index 72% rename from update/v3.0/Update.py rename to update/v5.2/Update.py index 9d393f6f..f87a84d0 100755 --- a/update/v3.0/Update.py +++ b/update/v5.2/Update.py @@ -8,7 +8,8 @@ sys.path.append(os.environ['AIL_HOME']) ################################## # Import Project packages ################################## -from update.bin.old_ail_updater import AIL_Updater +from update.bin.ail_updater import AIL_Updater +from lib import ail_updates class Updater(AIL_Updater): """default Updater.""" @@ -18,5 +19,6 @@ class Updater(AIL_Updater): if __name__ == '__main__': - updater = Updater('v3.0') + updater = Updater('v5.2') updater.run_update() + ail_updates.add_background_update('v5.2') diff --git a/update/v4.1/Update.sh b/update/v5.2/Update.sh similarity index 84% rename from update/v4.1/Update.sh rename to update/v5.2/Update.sh index 2be5376a..989bf1f8 100755 --- a/update/v4.1/Update.sh +++ b/update/v5.2/Update.sh @@ -2,13 +2,11 @@ [ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1; [ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1; -[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1; [ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1; export PATH=$AIL_HOME:$PATH export PATH=$AIL_REDIS:$PATH -export PATH=$AIL_ARDB:$PATH export PATH=$AIL_BIN:$PATH export PATH=$AIL_FLASK:$PATH @@ -22,4 +20,12 @@ wait # SUBMODULES # git submodule update +echo "" +echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT +echo "" +python ${AIL_HOME}/update/v5.2/Update.py +wait +echo "" +echo "" + exit 0 diff --git a/update/v5.2/compress_har.py b/update/v5.2/compress_har.py new file mode 100755 index 00000000..12d08dce --- /dev/null +++ b/update/v5.2/compress_har.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import gzip +import os +import sys + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_updates +from lib import crawlers + +if __name__ == '__main__': + update = ail_updates.AILBackgroundUpdate('v5.2') + HAR_DIR = crawlers.HAR_DIR + hars_ids = crawlers.get_all_har_ids() + update.set_nb_to_update(len(hars_ids)) + n = 0 + for har_id in hars_ids: + crawlers._gzip_har(har_id) + update.inc_nb_updated() + if n % 100 == 0: + update.update_progress() + + crawlers._gzip_all_hars() diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index cc110c35..c330443b 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -17,9 +17,6 @@ from flask_login import LoginManager, current_user, login_user, logout_user, log import importlib from os.path import join -# # TODO: put me in lib/Tag -from pytaxonomies import Taxonomies - sys.path.append('./modules/') sys.path.append(os.environ['AIL_BIN']) @@ -51,6 +48,9 @@ from blueprints.objects_decoded import objects_decoded from blueprints.objects_subtypes import objects_subtypes from blueprints.objects_title import objects_title from blueprints.objects_cookie_name import objects_cookie_name +from blueprints.objects_etag import objects_etag +from blueprints.objects_hhhash import objects_hhhash +from blueprints.objects_chat import objects_chat Flask_dir = os.environ['AIL_FLASK'] @@ -106,6 +106,9 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl) app.register_blueprint(objects_subtypes, url_prefix=baseUrl) app.register_blueprint(objects_title, url_prefix=baseUrl) app.register_blueprint(objects_cookie_name, url_prefix=baseUrl) +app.register_blueprint(objects_etag, url_prefix=baseUrl) +app.register_blueprint(objects_hhhash, url_prefix=baseUrl) +app.register_blueprint(objects_chat, url_prefix=baseUrl) # ========= =========# @@ -250,16 +253,6 @@ default_taxonomies = ["infoleak", "gdpr", "fpf", "dark-web"] for taxonomy in default_taxonomies: Tag.enable_taxonomy_tags(taxonomy) -# ========== INITIAL tags auto export ============ -# taxonomies = Taxonomies() -# -# infoleak_tags = taxonomies.get('infoleak').machinetags() -# infoleak_automatic_tags = [] -# for tag in taxonomies.get('infoleak').machinetags(): -# if tag.split('=')[0][:] == 'infoleak:automatic-detection': -# r_serv_db.sadd('list_export_tags', tag) -# -# r_serv_db.sadd('list_export_tags', 'infoleak:submission="manual"') # ============ MAIN ============ if __name__ == "__main__": diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index f6e7feda..d5d672b1 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -61,6 +61,13 @@ def sanitise_level(level): level = 2 return level +def sanitise_objs_hidden(objs_hidden): + if objs_hidden: + objs_hidden = set(objs_hidden.split(',')) # TODO sanitize objects + else: + objs_hidden = set() + return objs_hidden + # ============= ROUTES ============== @correlation.route('/correlation/show', methods=['GET', 'POST']) @login_required @@ -83,12 +90,18 @@ def show_correlation(): correl_option = request.form.get('CookieNameCheck') if correl_option: filter_types.append('cookie-name') + correl_option = request.form.get('EtagCheck') + if correl_option: + filter_types.append('etag') correl_option = request.form.get('CveCheck') if correl_option: filter_types.append('cve') correl_option = request.form.get('CryptocurrencyCheck') if correl_option: filter_types.append('cryptocurrency') + correl_option = request.form.get('HHHashCheck') + if correl_option: + filter_types.append('hhhash') correl_option = request.form.get('PgpCheck') if correl_option: filter_types.append('pgp') @@ -127,6 +140,10 @@ def show_correlation(): max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) mode = sanitise_graph_mode(request.args.get('mode')) level = sanitise_level(request.args.get('level')) + objs_hidden = sanitise_objs_hidden(request.args.get('hidden')) + obj_to_hide = request.args.get('hide') + if obj_to_hide: + objs_hidden.add(obj_to_hide) related_btc = bool(request.args.get('related_btc', False)) @@ -136,17 +153,24 @@ def show_correlation(): if not ail_objects.exists_obj(obj_type, subtype, obj_id): return abort(404) # object exist - else: - dict_object = {"object_type": obj_type, - "correlation_id": obj_id, + else: # TODO remove old dict key + dict_object = {"type": obj_type, + "id": obj_id, + "object_type": obj_type, "max_nodes": max_nodes, "mode": mode, "level": level, "filter": filter_types, "filter_str": ",".join(filter_types), + "hidden": objs_hidden, "hidden_str": ",".join(objs_hidden), + + "correlation_id": obj_id, "metadata": ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags'}, flask_context=True), "nb_correl": ail_objects.get_obj_nb_correlations(obj_type, subtype, obj_id) } if subtype: + dict_object["subtype"] = subtype dict_object["metadata"]['type_id'] = subtype + else: + dict_object["subtype"] = '' dict_object["metadata_card"] = ail_objects.get_object_card_meta(obj_type, subtype, obj_id, related_btc=related_btc) return render_template("show_correlation.html", dict_object=dict_object, bootstrap_label=bootstrap_label, tags_selector_data=Tag.get_tags_selector_data()) @@ -156,26 +180,15 @@ def show_correlation(): @login_read_only def get_description(): object_id = request.args.get('object_id') - object_id = object_id.split(':') - # unpack object_id # # TODO: put me in lib - if len(object_id) == 3: - object_type = object_id[0] - type_id = object_id[1] - correlation_id = object_id[2] - elif len(object_id) == 2: - object_type = object_id[0] - type_id = None - correlation_id = object_id[1] - else: - return jsonify({}) + obj_type, subtype, obj_id = ail_objects.get_obj_type_subtype_id_from_global_id(object_id) - # check if correlation_id exist + # check if obj exist # # TODO: return error json - if not ail_objects.exists_obj(object_type, type_id, correlation_id): + if not ail_objects.exists_obj(obj_type, subtype, obj_id): return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404 # object exist else: - res = ail_objects.get_object_meta(object_type, type_id, correlation_id, options={'tags', 'tags_safe'}, + res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'tags_safe'}, flask_context=True) if 'tags' in res: res['tags'] = list(res['tags']) @@ -191,9 +204,15 @@ def graph_node_json(): max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) level = sanitise_level(request.args.get('level')) + hidden = request.args.get('hidden') + if hidden: + hidden = set(hidden.split(',')) + else: + hidden = set() + filter_types = ail_objects.sanitize_objs_types(request.args.get('filter', '').split(',')) - json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, flask_context=True) + json_graph = ail_objects.get_correlations_graph_node(obj_type, subtype, obj_id, filter_types=filter_types, max_nodes=max_nodes, level=level, objs_hidden=hidden, flask_context=True) #json_graph = Correlate_object.get_graph_node_object_correlation(obj_type, obj_id, 'union', correlation_names, correlation_objects, requested_correl_type=subtype, max_nodes=max_nodes) return jsonify(json_graph) @@ -221,6 +240,7 @@ def correlation_tags_add(): nb_max = sanitise_nb_max_nodes(request.form.get('tag_nb_max')) level = sanitise_level(request.form.get('tag_level')) filter_types = ail_objects.sanitize_objs_types(request.form.get('tag_filter', '').split(',')) + hidden = sanitise_objs_hidden(request.form.get('tag_hidden')) if not ail_objects.exists_obj(obj_type, subtype, obj_id): return abort(404) @@ -249,9 +269,11 @@ def correlation_tags_add(): if tags: ail_objects.obj_correlations_objs_add_tags(obj_type, subtype, obj_id, tags, filter_types=filter_types, + objs_hidden=hidden, lvl=level + 1, nb_max=nb_max) return redirect(url_for('correlation.show_correlation', type=obj_type, subtype=subtype, id=obj_id, level=level, max_nodes=nb_max, + hidden=hidden, hidden_str=",".join(hidden), filter=",".join(filter_types))) diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index 39d84971..1b7f4454 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -272,6 +272,7 @@ def crawlers_last_domains(): domain, epoch = domain_row.split(':', 1) dom = Domains.Domain(domain) meta = dom.get_meta() + meta['last'] = datetime.fromtimestamp(int(epoch)).strftime("%Y/%m/%d %H:%M.%S") meta['epoch'] = epoch meta['status_epoch'] = dom.is_up_by_epoch(epoch) domains.append(meta) diff --git a/var/www/blueprints/hunters.py b/var/www/blueprints/hunters.py index b2a2e30b..9a2b6c3e 100644 --- a/var/www/blueprints/hunters.py +++ b/var/www/blueprints/hunters.py @@ -172,7 +172,7 @@ def show_tracker(): typo_squatting = set() if date_from: - date_from, date_to = Date.sanitise_daterange(meta['first_seen'], meta['last_seen']) + date_from, date_to = Date.sanitise_daterange(date_from, date_to) objs = tracker.get_objs_by_daterange(date_from, date_to) meta['objs'] = ail_objects.get_objects_meta(objs, flask_context=True) else: diff --git a/var/www/blueprints/import_export.py b/var/www/blueprints/import_export.py index 312fe0be..bb28d080 100644 --- a/var/www/blueprints/import_export.py +++ b/var/www/blueprints/import_export.py @@ -163,6 +163,7 @@ def objects_misp_export_post(): MISPExporter.delete_user_misp_objects_to_export(user_id) if not export: event_uuid = event[10:46] + event = f'{{"Event": {event}}}' # TODO ADD JAVASCRIPT REFRESH PAGE IF RESP == 200 return send_file(io.BytesIO(event.encode()), as_attachment=True, download_name=f'ail_export_{event_uuid}.json') diff --git a/var/www/blueprints/investigations_b.py b/var/www/blueprints/investigations_b.py index 8c1d592b..cf3cf688 100644 --- a/var/www/blueprints/investigations_b.py +++ b/var/www/blueprints/investigations_b.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* -''' +""" Blueprint Flask: ail_investigations -''' +""" import os import sys @@ -54,7 +54,13 @@ def show_investigation(): investigation_uuid = request.args.get("uuid") investigation = Investigations.Investigation(investigation_uuid) metadata = investigation.get_metadata(r_str=True) - objs = ail_objects.get_objects_meta(investigation.get_objects(), flask_context=True) + objs = [] + for obj in investigation.get_objects(): + obj_meta = ail_objects.get_object_meta(obj["type"], obj["subtype"], obj["id"], flask_context=True) + comment = investigation.get_objects_comment(f'{obj["type"]}:{obj["subtype"]}:{obj["id"]}') + if comment: + obj_meta['comment'] = comment + objs.append(obj_meta) return render_template("view_investigation.html", bootstrap_label=bootstrap_label, metadata=metadata, investigation_objs=objs) @@ -169,10 +175,13 @@ def register_investigation(): object_type = request.args.get('type') object_subtype = request.args.get('subtype') object_id = request.args.get('id') + comment = request.args.get('comment') for investigation_uuid in investigations_uuid: input_dict = {"uuid": investigation_uuid, "id": object_id, "type": object_type, "subtype": object_subtype} + if comment: + input_dict["comment"] = comment res = Investigations.api_register_object(input_dict) if res[1] != 200: return create_json_response(res[0], res[1]) diff --git a/var/www/blueprints/objects_chat.py b/var/www/blueprints/objects_chat.py new file mode 100644 index 00000000..8a1db11f --- /dev/null +++ b/var/www/blueprints/objects_chat.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib import ail_core +from lib.objects import abstract_subtype_object +from lib.objects import ail_objects +from lib.objects import Chats +from packages import Date + +# ============ BLUEPRINT ============ +objects_chat = Blueprint('objects_chat', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/chat')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + +def create_json_response(data, status_code): + return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code + +# ============ FUNCTIONS ============ + +# ============= ROUTES ============== + + +@objects_chat.route("/objects/chat/messages", methods=['GET']) +@login_required +@login_read_only +def objects_dashboard_chat(): + chat = request.args.get('id') + subtype = request.args.get('subtype') + chat = Chats.Chat(chat, subtype) + if chat.exists(): + messages = chat.get_messages() + meta = chat.get_meta({'icon'}) + print(meta) + return render_template('ChatMessages.html', meta=meta, messages=messages, bootstrap_label=bootstrap_label) + else: + return abort(404) + + + diff --git a/var/www/blueprints/objects_cookie_name.py b/var/www/blueprints/objects_cookie_name.py index ab111ff2..06d6743a 100644 --- a/var/www/blueprints/objects_cookie_name.py +++ b/var/www/blueprints/objects_cookie_name.py @@ -45,8 +45,6 @@ def objects_cookies_names(): else: dict_objects = {} - print(dict_objects) - return render_template("CookieNameDaterange.html", date_from=date_from, date_to=date_to, dict_objects=dict_objects, show_objects=show_objects) diff --git a/var/www/blueprints/objects_etag.py b/var/www/blueprints/objects_etag.py new file mode 100644 index 00000000..ad2b24fd --- /dev/null +++ b/var/www/blueprints/objects_etag.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys + +from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects import Etags +from packages import Date + +# ============ BLUEPRINT ============ +objects_etag = Blueprint('objects_etag', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/etag')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + + +# ============ FUNCTIONS ============ +@objects_etag.route("/objects/etags", methods=['GET']) +@login_required +@login_read_only +def objects_etags(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + show_objects = request.args.get('show_objects') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + + if show_objects: + dict_objects = Etags.Etags().api_get_meta_by_daterange(date_from, date_to) + else: + dict_objects = {} + + return render_template("EtagDaterange.html", date_from=date_from, date_to=date_to, + dict_objects=dict_objects, show_objects=show_objects) + +@objects_etag.route("/objects/etag/post", methods=['POST']) +@login_required +@login_read_only +def objects_etags_post(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + show_objects = request.form.get('show_objects') + return redirect(url_for('objects_etag.objects_etags', date_from=date_from, date_to=date_to, show_objects=show_objects)) + +@objects_etag.route("/objects/etag/range/json", methods=['GET']) +@login_required +@login_read_only +def objects_etag_range_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + return jsonify(Etags.Etags().api_get_chart_nb_by_daterange(date_from, date_to)) + +# @objects_etag.route("/objects/etag/search", methods=['POST']) +# @login_required +# @login_read_only +# def objects_etags_names_search(): +# to_search = request.form.get('object_id') +# +# # TODO SANITIZE ID +# # TODO Search all +# cve = Cves.Cve(to_search) +# if not cve.exists(): +# abort(404) +# else: +# return redirect(cve.get_link(flask_context=True)) + +# ============= ROUTES ============== + diff --git a/var/www/blueprints/objects_hhhash.py b/var/www/blueprints/objects_hhhash.py new file mode 100644 index 00000000..9d5bd320 --- /dev/null +++ b/var/www/blueprints/objects_hhhash.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys + +from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects import HHHashs +from packages import Date + +# ============ BLUEPRINT ============ +objects_hhhash = Blueprint('objects_hhhash', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/hhhash')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + + +# ============ FUNCTIONS ============ +@objects_hhhash.route("/objects/hhhashs", methods=['GET']) +@login_required +@login_read_only +def objects_hhhashs(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + show_objects = request.args.get('show_objects') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + + if show_objects: + dict_objects = HHHashs.HHHashs().api_get_meta_by_daterange(date_from, date_to) + else: + dict_objects = {} + + return render_template("HHHashDaterange.html", date_from=date_from, date_to=date_to, + dict_objects=dict_objects, show_objects=show_objects) + +@objects_hhhash.route("/objects/hhhash/post", methods=['POST']) +@login_required +@login_read_only +def objects_hhhashs_post(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + show_objects = request.form.get('show_objects') + return redirect(url_for('objects_hhhash.objects_hhhashs', date_from=date_from, date_to=date_to, show_objects=show_objects)) + +@objects_hhhash.route("/objects/hhhash/range/json", methods=['GET']) +@login_required +@login_read_only +def objects_hhhash_range_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + return jsonify(HHHashs.HHHashs().api_get_chart_nb_by_daterange(date_from, date_to)) + +# @objects_hhhash.route("/objects/hhhash/search", methods=['POST']) +# @login_required +# @login_read_only +# def objects_hhhashs_names_search(): +# to_search = request.form.get('object_id') +# +# # TODO SANITIZE ID +# # TODO Search all +# cve = Cves.Cve(to_search) +# if not cve.exists(): +# abort(404) +# else: +# return redirect(cve.get_link(flask_context=True)) + +# ============= ROUTES ============== + diff --git a/var/www/blueprints/objects_subtypes.py b/var/www/blueprints/objects_subtypes.py index dc97ffa8..a41066a4 100644 --- a/var/www/blueprints/objects_subtypes.py +++ b/var/www/blueprints/objects_subtypes.py @@ -91,6 +91,12 @@ def subtypes_objects_dashboard(obj_type, f_request): # ============= ROUTES ============== +@objects_subtypes.route("/objects/chats", methods=['GET']) +@login_required +@login_read_only +def objects_dashboard_chat(): + return subtypes_objects_dashboard('chat', request) + @objects_subtypes.route("/objects/cryptocurrencies", methods=['GET']) @login_required @login_read_only diff --git a/var/www/blueprints/settings_b.py b/var/www/blueprints/settings_b.py index 518f6fb9..b154e196 100644 --- a/var/www/blueprints/settings_b.py +++ b/var/www/blueprints/settings_b.py @@ -48,7 +48,7 @@ def settings_page(): @login_required @login_read_only def get_background_update_metadata_json(): - return jsonify(ail_updates.get_update_background_metadata()) + return jsonify(ail_updates.get_update_background_meta(options={})) @settings_b.route("/settings/modules", methods=['GET']) @login_required diff --git a/var/www/modules/dashboard/Flask_dashboard.py b/var/www/modules/dashboard/Flask_dashboard.py index 923390dd..3f0df03f 100644 --- a/var/www/modules/dashboard/Flask_dashboard.py +++ b/var/www/modules/dashboard/Flask_dashboard.py @@ -167,10 +167,9 @@ def index(): # Check if update in progress background_update = False update_message = '' - if ail_updates.get_current_background_update(): + if ail_updates.is_update_background_running(): background_update = True - # update_message = ail_updates.get_update_background_message() - update_message = None + update_message = ail_updates.AILBackgroundUpdate(ail_updates.get_update_background_version()).get_message() return render_template("index.html", default_minute = default_minute, threshold_stucked_module=threshold_stucked_module, diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index e233bb00..0c95d0a0 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 # -*-coding:UTF-8 -* -''' +""" Flask functions and routes for the rest api -''' +""" import os import re @@ -508,6 +508,7 @@ def get_item_cryptocurrency_bitcoin(): # # # # # # # # # # # # # # CRAWLER # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # TODO: ADD RESULT JSON Response +# @restApi.route("api/v1/crawler/task/add", methods=['POST']) @restApi.route("api/v1/add/crawler/task", methods=['POST']) @token_required('analyst') def add_crawler_task(): diff --git a/var/www/modules/settings/Flask_settings.py b/var/www/modules/settings/Flask_settings.py index 4316d490..2b1b8826 100644 --- a/var/www/modules/settings/Flask_settings.py +++ b/var/www/modules/settings/Flask_settings.py @@ -19,7 +19,6 @@ sys.path.append(os.environ['AIL_BIN']) from lib import d4 from lib import Users - # ============ VARIABLES ============ import Flask_config @@ -33,7 +32,6 @@ email_regex = Flask_config.email_regex settings = Blueprint('settings', __name__, template_folder='templates') - # ============ FUNCTIONS ============ def check_email(email): @@ -43,6 +41,7 @@ def check_email(email): else: return False + # ============= ROUTES ============== @settings.route("/settings/edit_profile", methods=['GET']) @@ -52,7 +51,8 @@ def edit_profile(): user_metadata = Users.get_user_metadata(current_user.get_id()) admin_level = current_user.is_in_role('admin') return render_template("edit_profile.html", user_metadata=user_metadata, - admin_level=admin_level) + admin_level=admin_level) + @settings.route("/settings/new_token", methods=['GET']) @login_required @@ -61,6 +61,7 @@ def new_token(): Users.generate_new_token(current_user.get_id()) return redirect(url_for('settings.edit_profile')) + @settings.route("/settings/new_token_user", methods=['POST']) @login_required @login_admin @@ -70,6 +71,7 @@ def new_token_user(): Users.generate_new_token(user_id) return redirect(url_for('settings.users_list')) + @settings.route("/settings/create_user", methods=['GET']) @login_required @login_admin @@ -78,14 +80,15 @@ def create_user(): error = request.args.get('error') error_mail = request.args.get('error_mail') role = None - if r_serv_db.exists('user_metadata:{}'.format(user_id)): - role = r_serv_db.hget('user_metadata:{}'.format(user_id), 'role') - else: - user_id = None + if user_id: + user = Users.User(user_id) + if user.exists(): + role = user.get_role() all_roles = Users.get_all_roles() return render_template("create_user.html", all_roles=all_roles, user_id=user_id, user_role=role, - error=error, error_mail=error_mail, - admin_level=True) + error=error, error_mail=error_mail, + admin_level=True) + @settings.route("/settings/create_user_post", methods=['POST']) @login_required @@ -98,17 +101,19 @@ def create_user_post(): all_roles = Users.get_all_roles() - if email and len(email)< 300 and check_email(email) and role: + if email and len(email) < 300 and check_email(email) and role: if role in all_roles: # password set if password1 and password2: - if password1==password2: + if password1 == password2: if Users.check_password_strength(password1): password = password1 else: - return render_template("create_user.html", all_roles=all_roles, error="Incorrect Password", admin_level=True) + return render_template("create_user.html", all_roles=all_roles, error="Incorrect Password", + admin_level=True) else: - return render_template("create_user.html", all_roles=all_roles, error="Passwords don't match", admin_level=True) + return render_template("create_user.html", all_roles=all_roles, error="Passwords don't match", + admin_level=True) # generate password else: password = Users.gen_password() @@ -127,6 +132,7 @@ def create_user_post(): else: return render_template("create_user.html", all_roles=all_roles, error_mail=True, admin_level=True) + @settings.route("/settings/users_list", methods=['GET']) @login_required @login_admin @@ -140,6 +146,7 @@ def users_list(): new_user_dict['password'] = request.args.get('new_user_password') return render_template("users_list.html", all_users=all_users, new_user=new_user_dict, admin_level=True) + @settings.route("/settings/edit_user", methods=['POST']) @login_required @login_admin @@ -147,6 +154,7 @@ def edit_user(): user_id = request.form.get('user_id') return redirect(url_for('settings.create_user', user_id=user_id)) + @settings.route("/settings/delete_user", methods=['POST']) @login_required @login_admin @@ -163,6 +171,7 @@ def passive_dns(): passivedns_enabled = d4.is_passive_dns_enabled() return render_template("passive_dns.html", passivedns_enabled=passivedns_enabled) + @settings.route("/settings/passivedns/change_state", methods=['GET']) @login_required @login_admin @@ -171,11 +180,13 @@ def passive_dns_change_state(): passivedns_enabled = d4.change_passive_dns_state(new_state) return redirect(url_for('settings.passive_dns')) + @settings.route("/settings/ail", methods=['GET']) @login_required @login_admin def ail_configs(): return render_template("ail_configs.html", passivedns_enabled=None) + # ========= REGISTRATION ========= app.register_blueprint(settings, url_prefix=baseUrl) diff --git a/var/www/templates/correlation/metadata_card_decoded.html b/var/www/templates/correlation/metadata_card_decoded.html index da57cb21..1e292a80 100644 --- a/var/www/templates/correlation/metadata_card_decoded.html +++ b/var/www/templates/correlation/metadata_card_decoded.html @@ -35,12 +35,12 @@ {{ dict_object["metadata_card"]["icon"]["icon"] }} - {{ dict_object["metadata"]["file_type"] }} + {{ dict_object["metadata_card"]["mimetype"] }} - {{ dict_object["metadata"]['first_seen'] }} - {{ dict_object["metadata"]['last_seen'] }} - {{ dict_object["metadata"]['size'] }} - {{ dict_object["metadata"]['nb_seen'] }} + {{ dict_object["metadata_card"]['first_seen'] }} + {{ dict_object["metadata_card"]['last_seen'] }} + {{ dict_object["metadata_card"]['size'] }} + {{ dict_object["metadata_card"]['nb_seen'] }} diff --git a/var/www/templates/correlation/metadata_card_etag.html b/var/www/templates/correlation/metadata_card_etag.html new file mode 100644 index 00000000..cc599227 --- /dev/null +++ b/var/www/templates/correlation/metadata_card_etag.html @@ -0,0 +1,173 @@ + + + +{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%} + {% include 'modals/add_tags.html' %} +{% endwith %} + +{% include 'modals/edit_tag.html' %} + +
+
+

{{ dict_object["metadata"]["content"] }}

+
{{ dict_object["correlation_id"] }}
+ + + {% with obj_type='etag', obj_id=dict_object['correlation_id'], obj_subtype='' %} + {% include 'modals/investigations_register_obj.html' %} + {% endwith %} + + +
+
+ + + + + + diff --git a/var/www/templates/correlation/metadata_card_hhhash.html b/var/www/templates/correlation/metadata_card_hhhash.html new file mode 100644 index 00000000..c1474605 --- /dev/null +++ b/var/www/templates/correlation/metadata_card_hhhash.html @@ -0,0 +1,173 @@ + + + +{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%} + {% include 'modals/add_tags.html' %} +{% endwith %} + +{% include 'modals/edit_tag.html' %} + +
+
+

{{ dict_object["metadata"]["content"] }}

+
{{ dict_object["correlation_id"] }}
+ + + {% with obj_type='hhhash', obj_id=dict_object['correlation_id'], obj_subtype='' %} + {% include 'modals/investigations_register_obj.html' %} + {% endwith %} + + +
+
+ + + + + + diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html index 95aa922c..11a85cd7 100644 --- a/var/www/templates/correlation/show_correlation.html +++ b/var/www/templates/correlation/show_correlation.html @@ -117,6 +117,10 @@ {% include 'correlation/metadata_card_title.html' %} {% elif dict_object["object_type"] == "cookie-name" %} {% include 'correlation/metadata_card_cookie_name.html' %} + {% elif dict_object["object_type"] == "etag" %} + {% include 'correlation/metadata_card_etag.html' %} + {% elif dict_object["object_type"] == "hhhash" %} + {% include 'correlation/metadata_card_hhhash.html' %} {% elif dict_object["object_type"] == "item" %} {% include 'correlation/metadata_card_item.html' %} {% endif %} @@ -162,6 +166,11 @@  Resize Graph + + +
 Graph Incomplete, Max Nodes Reached.
@@ -178,6 +187,14 @@ +

Press H on an object / node to hide it.

+ {% if dict_object["hidden"] %} +
Hidden objects:
+ {% for obj_hidden in dict_object["hidden"] %} + {{ obj_hidden }}
+ {% endfor %} + {% endif %} +
@@ -211,6 +228,14 @@
+
+ + +
+
+ + +
@@ -337,6 +362,7 @@ + {% include 'tags/block_tags_selector.html' %}