diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index adb1444f..808afdba 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -142,13 +142,7 @@ function launching_logs { screen -dmS "Logging_AIL" sleep 0.1 echo -e $GREEN"\t* Launching logging process"$DEFAULT - screen -S "Logging_AIL" -X screen -t "LogQueue" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Queuing -l ../logs/ ${syslog_cmd}; read x" - sleep 0.1 screen -S "Logging_AIL" -X screen -t "LogScript" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Script -l ../logs/ ${syslog_cmd}; read x" - sleep 0.1 - screen -S "Logging_AIL" -X screen -t "LogSync" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Sync -l ../logs/ ${syslog_cmd}; read x" - sleep 0.1 - screen -S "Logging_AIL" -X screen -t "LogCrawler" bash -c "cd ${AIL_BIN}; ${AIL_VENV}/bin/log_subscriber -p 6380 -c Crawler -l ../logs/ ${syslog_cmd}; read x" } function checking_configuration { @@ -197,8 +191,6 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "D4_client" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./D4_client.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "DbCleaner" bash -c "cd ${AIL_BIN}/core; ${ENV_PY} ./DbCleaner.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "UpdateBackground" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./update-background.py; read x" sleep 0.1 diff --git a/bin/NotificationHelper.py b/bin/NotificationHelper.py index 932c4bf0..0505483f 100755 --- a/bin/NotificationHelper.py +++ b/bin/NotificationHelper.py @@ -5,13 +5,14 @@ import os import sys import argparse +import logging.config import traceback import smtplib -from pubsublogger import publisher from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText sys.path.append(os.environ['AIL_BIN']) +from lib import ail_logger from lib import ConfigLoader """ @@ -20,8 +21,8 @@ This module allows the global configuration and management of notification setti config_loader = ConfigLoader.ConfigLoader() -publisher.port = 6380 -publisher.channel = "Script" +logging.config.dictConfig(ail_logger.get_config()) +logger = logging.getLogger() def sendEmailNotification(recipient, mail_subject, mail_body): @@ -34,10 +35,7 @@ def sendEmailNotification(recipient, mail_subject, mail_body): sender_pw = None # raise an exception if any of these is None - if (sender is None or - sender_host is None or - sender_port is None - ): + if sender is None or sender_host is None or sender_port is None: raise Exception('SMTP configuration (host, port, sender) is missing or incomplete!') try: @@ -70,7 +68,7 @@ def sendEmailNotification(recipient, mail_subject, mail_body): except Exception as err: traceback.print_tb(err.__traceback__) - publisher.warning(err) + logger.warning(err) if __name__ == '__main__': diff --git a/bin/core/D4_client.py b/bin/core/D4_client.py index 2b48e138..9c452912 100755 --- a/bin/core/D4_client.py +++ b/bin/core/D4_client.py @@ -36,7 +36,7 @@ class D4Client(AbstractModule): self.last_refresh = time.time() # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def compute(self, dns_record): # Refresh D4 Client diff --git a/bin/core/DbCleaner.py b/bin/core/DbCleaner.py deleted file mode 100755 index 721e38cc..00000000 --- a/bin/core/DbCleaner.py +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -The DbCleaner Module -=================== - -""" -import os -import sys -import time -import datetime - -sys.path.append(os.environ['AIL_BIN']) -################################## -# Import Project packages -################################## - -from pubsublogger import publisher - -if __name__ == "__main__": - - publisher.port = 6380 - publisher.channel = "Script" - publisher.info("DbCleaner started") - - # low priority - time.sleep(180) - - daily_cleaner = True - current_date = datetime.datetime.now().strftime("%Y%m%d") - - while True: - - if daily_cleaner: - - daily_cleaner = False - else: - sys.exit(0) - time.sleep(600) - - new_date = datetime.datetime.now().strftime("%Y%m%d") - if new_date != current_date: - current_date = new_date - daily_cleaner = True diff --git a/bin/core/Sync_importer.py b/bin/core/Sync_importer.py index d1f37ee5..bf70b67e 100755 --- a/bin/core/Sync_importer.py +++ b/bin/core/Sync_importer.py @@ -46,7 +46,7 @@ class Sync_importer(AbstractModule): # self.last_refresh = time.time() # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} Launched') + self.logger.info(f'Module {self.module_name} Launched') def run(self): while self.proceed: @@ -63,7 +63,7 @@ class Sync_importer(AbstractModule): else: self.computeNone() # Wait before next process - self.redis_logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s") + self.logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s") time.sleep(self.pending_seconds) def compute(self, ail_stream): diff --git a/bin/core/Sync_module.py b/bin/core/Sync_module.py index 7212c727..e37d48d6 100755 --- a/bin/core/Sync_module.py +++ b/bin/core/Sync_module.py @@ -42,7 +42,7 @@ class Sync_module(AbstractModule): print(self.dict_sync_queues) # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} Launched') + self.logger.info(f'Module {self.module_name} Launched') def compute(self, message): diff --git a/bin/core/ail_2_ail.py b/bin/core/ail_2_ail.py index da388611..530cfa87 100755 --- a/bin/core/ail_2_ail.py +++ b/bin/core/ail_2_ail.py @@ -12,7 +12,6 @@ import uuid import subprocess from flask import escape -from pubsublogger import publisher sys.path.append(os.environ['AIL_BIN']) ################################## @@ -47,11 +46,6 @@ WEBSOCKETS_CLOSE_CODES = { 1015: 'TLS Handshake', } -#### LOGS #### -# redis_logger = publisher -# redis_logger.port = 6380 -# redis_logger.channel = 'Sync' - def get_websockets_close_message(code): if code in WEBSOCKETS_CLOSE_CODES: msg = f'{code} {WEBSOCKETS_CLOSE_CODES[code]}' diff --git a/bin/core/ail_2_ail_client.py b/bin/core/ail_2_ail_client.py index 8f23b053..f14b7868 100755 --- a/bin/core/ail_2_ail_client.py +++ b/bin/core/ail_2_ail_client.py @@ -4,10 +4,10 @@ import argparse import json import os +import logging.config import sys import time import traceback -from pubsublogger import publisher from urllib.parse import urljoin import asyncio @@ -19,9 +19,15 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## +from lib import ail_logger from core import ail_2_ail from lib.ConfigLoader import ConfigLoader +#### LOGS #### +logging.config.dictConfig(ail_logger.get_config(name='syncs')) +logger = logging.getLogger() + + config_loader = ConfigLoader() local_addr = config_loader.get_config_str('AIL_2_AIL', 'local_addr') if not local_addr or local_addr == None: @@ -30,13 +36,6 @@ else: local_addr = (local_addr, 0) config_loader = None - -#### LOGS #### -redis_logger = publisher -redis_logger.port = 6380 -redis_logger.channel = 'Sync' -##-- LOGS --## - #################################################################### class AIL2AILClient(object): @@ -154,29 +153,29 @@ async def ail_to_ail_client(ail_uuid, sync_mode, api, ail_key=None, client_id=No error_message = str(e) if error_message: sys.stderr.write(error_message) - redis_logger.warning(f'{ail_uuid}: {error_message}') + logger.warning(f'{ail_uuid}: {error_message}') ail_2_ail.save_ail_server_error(ail_uuid, error_message) except websockets.exceptions.ConnectionClosedError as e: error_message = ail_2_ail.get_websockets_close_message(e.code) sys.stderr.write(error_message) - redis_logger.info(f'{ail_uuid}: {error_message}') + logger.info(f'{ail_uuid}: {error_message}') ail_2_ail.save_ail_server_error(ail_uuid, error_message) except websockets.exceptions.InvalidURI as e: error_message = f'Invalid AIL url: {e.uri}' sys.stderr.write(error_message) - redis_logger.warning(f'{ail_uuid}: {error_message}') + logger.warning(f'{ail_uuid}: {error_message}') ail_2_ail.save_ail_server_error(ail_uuid, error_message) except ConnectionError as e: error_message = str(e) sys.stderr.write(error_message) - redis_logger.info(f'{ail_uuid}: {error_message}') + logger.info(f'{ail_uuid}: {error_message}') ail_2_ail.save_ail_server_error(ail_uuid, error_message) # OSError: Multiple exceptions except OSError as e: # # TODO: check if we need to check if is connection error error_message = str(e) sys.stderr.write(error_message) - redis_logger.info(f'{ail_uuid}: {error_message}') + logger.info(f'{ail_uuid}: {error_message}') ail_2_ail.save_ail_server_error(ail_uuid, error_message) except websockets.exceptions.ConnectionClosedOK as e: print('connection closed') @@ -186,7 +185,7 @@ async def ail_to_ail_client(ail_uuid, sync_mode, api, ail_key=None, client_id=No trace = str(trace) error_message = f'{trace}\n{str(err)}' sys.stderr.write(error_message) - redis_logger.critical(f'{ail_uuid}: {error_message}') + logger.critical(f'{ail_uuid}: {error_message}') ail_2_ail.save_ail_server_error(ail_uuid, error_message) ail_2_ail.delete_sync_client_cache(client_id) diff --git a/bin/core/ail_2_ail_server.py b/bin/core/ail_2_ail_server.py index 6a49b7d9..3cc6e789 100755 --- a/bin/core/ail_2_ail_server.py +++ b/bin/core/ail_2_ail_server.py @@ -3,6 +3,7 @@ import json import os +import logging.config import sys import uuid @@ -15,21 +16,20 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from pubsublogger import publisher +from lib import ail_logger from core import ail_2_ail from lib.ConfigLoader import ConfigLoader + +logging.config.dictConfig(ail_logger.get_config(name='syncs')) +logger = logging.getLogger() + + config_loader = ConfigLoader() host = config_loader.get_config_str('AIL_2_AIL', 'server_host') port = config_loader.get_config_int('AIL_2_AIL', 'server_port') config_loader = None -# # TODO: refactor logging -#### LOGS #### -redis_logger = publisher -redis_logger.port = 6380 -redis_logger.channel = 'Sync' - ############################# CONNECTED_CLIENTS = {} @@ -80,7 +80,7 @@ async def server_controller(): connected_clients = CONNECTED_CLIENTS[ail_uuid].copy() for c_websocket in connected_clients: await c_websocket.close(code=1000) - redis_logger.info(f'Server Command Connection closed: {ail_uuid}') + logger.info(f'Server Command Connection closed: {ail_uuid}') print(f'Server Command Connection closed: {ail_uuid}') await asyncio.sleep(10) @@ -91,7 +91,7 @@ async def register(websocket): ail_uuid = websocket.ail_uuid remote_address = websocket.remote_address sync_mode = websocket.sync_mode - redis_logger.info(f'Client Connected: {ail_uuid} {remote_address}') + logger.info(f'Client Connected: {ail_uuid} {remote_address}') print(f'Client Connected: {ail_uuid} {remote_address}') if not ail_uuid in CONNECTED_CLIENTS: @@ -201,7 +201,7 @@ async def ail_to_ail_serv(websocket, path): if sync_mode == 'pull': await pull(websocket, websocket.ail_uuid) await websocket.close() - redis_logger.info(f'Connection closed: {ail_uuid} {remote_address}') + logger.info(f'Connection closed: {ail_uuid} {remote_address}') print(f'Connection closed: {ail_uuid} {remote_address}') elif sync_mode == 'push': @@ -210,7 +210,7 @@ async def ail_to_ail_serv(websocket, path): elif sync_mode == 'api': await api(websocket, websocket.ail_uuid, path['api']) await websocket.close() - redis_logger.info(f'Connection closed: {ail_uuid} {remote_address}') + logger.info(f'Connection closed: {ail_uuid} {remote_address}') print(f'Connection closed: {ail_uuid} {remote_address}') finally: @@ -234,12 +234,12 @@ class AIL_2_AIL_Protocol(websockets.WebSocketServerProtocol): # API TOKEN api_key = request_headers.get('Authorization', '') if api_key is None: - redis_logger.warning(f'Missing token: {self.remote_address}') + logger.warning(f'Missing token: {self.remote_address}') print(f'Missing token: {self.remote_address}') return http.HTTPStatus.UNAUTHORIZED, [], b"Missing token\n" if not ail_2_ail.is_allowed_ail_instance_key(api_key): - redis_logger.warning(f'Invalid token: {self.remote_address}') + logger.warning(f'Invalid token: {self.remote_address}') print(f'Invalid token: {self.remote_address}') return http.HTTPStatus.UNAUTHORIZED, [], b"Invalid token\n" @@ -247,20 +247,20 @@ class AIL_2_AIL_Protocol(websockets.WebSocketServerProtocol): try: dict_path = unpack_path(path) except Exception as e: - redis_logger.warning(f'Invalid path: {self.remote_address}') + logger.warning(f'Invalid path: {self.remote_address}') print(f'Invalid path: {self.remote_address}') return http.HTTPStatus.BAD_REQUEST, [], b"Invalid path\n" ail_uuid = ail_2_ail.get_ail_instance_by_key(api_key) if ail_uuid != dict_path['ail_uuid']: - redis_logger.warning(f'Invalid token: {self.remote_address} {ail_uuid}') + logger.warning(f'Invalid token: {self.remote_address} {ail_uuid}') print(f'Invalid token: {self.remote_address} {ail_uuid}') return http.HTTPStatus.UNAUTHORIZED, [], b"Invalid token\n" if not api_key != ail_2_ail.get_ail_instance_key(api_key): - redis_logger.warning(f'Invalid token: {self.remote_address} {ail_uuid}') + logger.warning(f'Invalid token: {self.remote_address} {ail_uuid}') print(f'Invalid token: {self.remote_address} {ail_uuid}') return http.HTTPStatus.UNAUTHORIZED, [], b"Invalid token\n" @@ -288,7 +288,7 @@ class AIL_2_AIL_Protocol(websockets.WebSocketServerProtocol): # SYNC MODE if not ail_2_ail.is_ail_instance_sync_enabled(self.ail_uuid, sync_mode=self.sync_mode): sync_mode = self.sync_mode - redis_logger.warning(f'SYNC mode disabled: {self.remote_address} {ail_uuid} {sync_mode}') + logger.warning(f'SYNC mode disabled: {self.remote_address} {ail_uuid} {sync_mode}') print(f'SYNC mode disabled: {self.remote_address} {ail_uuid} {sync_mode}') return http.HTTPStatus.FORBIDDEN, [], b"SYNC mode disabled\n" @@ -298,7 +298,7 @@ class AIL_2_AIL_Protocol(websockets.WebSocketServerProtocol): else: print(f'Invalid path: {self.remote_address}') - redis_logger.info(f'Invalid path: {self.remote_address}') + logger.info(f'Invalid path: {self.remote_address}') return http.HTTPStatus.BAD_REQUEST, [], b"Invalid path\n" ########################################### @@ -310,7 +310,7 @@ class AIL_2_AIL_Protocol(websockets.WebSocketServerProtocol): if __name__ == '__main__': print('Launching Server...') - redis_logger.info('Launching Server...') + logger.info('Launching Server...') ail_2_ail.clear_server_connected_clients() @@ -321,7 +321,7 @@ if __name__ == '__main__': start_server = websockets.serve(ail_to_ail_serv, host, port, ssl=ssl_context, create_protocol=AIL_2_AIL_Protocol, max_size=None) print(f'Server Launched: wss://{host}:{port}') - redis_logger.info(f'Server Launched: wss://{host}:{port}') + logger.info(f'Server Launched: wss://{host}:{port}') loop = asyncio.get_event_loop() # server command diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index d3f0acb3..fd8a758f 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -2,6 +2,7 @@ # -*-coding:UTF-8 -* import os +import logging.config import sys import time @@ -12,17 +13,21 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from modules.abstract_module import AbstractModule +from lib import ail_logger from lib import crawlers from lib.ConfigLoader import ConfigLoader from lib.objects.Domains import Domain from lib.objects.Items import Item from lib.objects import Screenshots +logging.config.dictConfig(ail_logger.get_config(name='crawlers')) class Crawler(AbstractModule): def __init__(self): - super(Crawler, self, ).__init__(logger_channel='Crawler') + super(Crawler, self, ).__init__() + + self.logger = logging.getLogger(f'{self.__class__.__name__}') # Waiting time in seconds between to message processed self.pending_seconds = 1 @@ -58,7 +63,7 @@ class Crawler(AbstractModule): self.placeholder_screenshots = {'27e14ace10b0f96acd2bd919aaa98a964597532c35b6409dff6cc8eec8214748'} # Send module state to logs - self.redis_logger.info('Crawler initialized') + self.logger.info('Crawler initialized') def refresh_lacus_status(self): try: @@ -209,7 +214,7 @@ class Crawler(AbstractModule): print(entries.keys()) if 'error' in entries: # TODO IMPROVE ERROR MESSAGE - self.redis_logger.warning(str(entries['error'])) + self.logger.warning(str(entries['error'])) print(entries['error']) if entries.get('html'): print('retrieved content') @@ -221,7 +226,7 @@ class Crawler(AbstractModule): current_domain = unpacked_last_url['domain'] # REDIRECTION TODO CHECK IF TYPE CHANGE if current_domain != self.domain.id and not self.root_item: - self.redis_logger.warning(f'External redirection {self.domain.id} -> {current_domain}') + self.logger.warning(f'External redirection {self.domain.id} -> {current_domain}') print(f'External redirection {self.domain.id} -> {current_domain}') if not self.root_item: self.domain = Domain(current_domain) diff --git a/bin/exporter/MailExporter.py b/bin/exporter/MailExporter.py index 4a5ab6c2..4b2d4a3a 100755 --- a/bin/exporter/MailExporter.py +++ b/bin/exporter/MailExporter.py @@ -80,7 +80,7 @@ class MailExporter(AbstractExporter, ABC): return smtp_server # except Exception as err: # traceback.print_tb(err.__traceback__) - # publisher.warning(err) + # logger.warning(err) def _export(self, recipient, subject, body): mime_msg = MIMEMultipart() @@ -95,7 +95,7 @@ class MailExporter(AbstractExporter, ABC): smtp_client.quit() # except Exception as err: # traceback.print_tb(err.__traceback__) - # publisher.warning(err) + # logger.warning(err) print(f'Send notification: {subject} to {recipient}') class MailExporterTracker(MailExporter): diff --git a/bin/lib/ail_logger.py b/bin/lib/ail_logger.py new file mode 100755 index 00000000..05192ade --- /dev/null +++ b/bin/lib/ail_logger.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import json +import sys +import logging + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader + +config_loader = ConfigLoader() +r_db = config_loader.get_db_conn("Kvrocks_DB") +config_loader = None + +LOGGING_CONFIG = os.path.join(os.environ['AIL_HOME'], 'configs', 'logging.json') + +def get_config(name=None): + if not name: + name = 'ail_warning.log' + else: + name = f'{name}.log' + with open(LOGGING_CONFIG, 'r') as f: + config = json.load(f) + config['handlers']['file']['filename'] = os.path.join(os.environ['AIL_HOME'], 'logs', name) + return config + + +if __name__ == '__main__': + pass diff --git a/bin/lib/objects/Decodeds.py b/bin/lib/objects/Decodeds.py index 39c8443f..ae776a1f 100755 --- a/bin/lib/objects/Decodeds.py +++ b/bin/lib/objects/Decodeds.py @@ -116,15 +116,20 @@ class Decoded(AbstractDaterangeObject): def get_filepath(self, mimetype=None): return os.path.join(os.environ['AIL_HOME'], self.get_rel_path(mimetype=mimetype)) - def get_content(self, mimetype=None, r_str=False): + def get_content(self, mimetype=None, r_type='str'): filepath = self.get_filepath(mimetype=mimetype) - if r_str: + if r_type == 'str': with open(filepath, 'r') as f: content = f.read() - else: + return content + elif r_type == 'bytes': + with open(filepath, 'rb') as f: + content = f.read() + return content + elif r_str == 'bytesio': with open(filepath, 'rb') as f: content = BytesIO(f.read()) - return content + return content def get_zip_content(self): # mimetype = self.get_estimated_type() diff --git a/bin/lib/objects/Items.py b/bin/lib/objects/Items.py index 2175b3f0..e20fa501 100755 --- a/bin/lib/objects/Items.py +++ b/bin/lib/objects/Items.py @@ -80,19 +80,19 @@ class Item(AbstractObject): else: return filename - def get_content(self, r_str=True, binary=False): + def get_content(self, r_type='str'): """ Returns Item content """ - if binary: - return item_basic.get_item_content_binary(self.id) - else: + if r_type == 'str': return item_basic.get_item_content(self.id) + elif r_type == 'bytes': + return item_basic.get_item_content_binary(self.id) def get_raw_content(self, decompress=False): filepath = self.get_filename() if decompress: - raw_content = BytesIO(self.get_content(binary=True)) + raw_content = BytesIO(self.get_content(r_type='bytes')) else: with open(filepath, 'rb') as f: raw_content = BytesIO(f.read()) diff --git a/bin/lib/regex_helper.py b/bin/lib/regex_helper.py index 507a52ad..d1722516 100755 --- a/bin/lib/regex_helper.py +++ b/bin/lib/regex_helper.py @@ -6,6 +6,7 @@ Regex Helper """ import os +import logging.config import re import sys import uuid @@ -13,24 +14,24 @@ import uuid from multiprocessing import Process as Proc sys.path.append(os.environ['AIL_BIN']) -from pubsublogger import publisher sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## +from lib import ail_logger from lib import ConfigLoader # from lib import Statistics +logging.config.dictConfig(ail_logger.get_config()) +logger = logging.getLogger() + ## LOAD CONFIG ## config_loader = ConfigLoader.ConfigLoader() r_serv_cache = config_loader.get_redis_conn("Redis_Cache") config_loader = None ## -- ## -publisher.port = 6380 -publisher.channel = "Script" - def generate_redis_cache_key(module_name): new_uuid = str(uuid.uuid4()) return f'{module_name}_extracted:{new_uuid}' @@ -65,7 +66,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time # Statistics.incr_module_timeout_statistic(module_name) err_mess = f"{module_name}: processing timeout: {item_id}" print(err_mess) - publisher.info(err_mess) + logger.info(err_mess) return [] else: if r_set: @@ -99,7 +100,7 @@ def regex_finditer(r_key, regex, item_id, content, max_time=30): # Statistics.incr_module_timeout_statistic(r_key) err_mess = f"{r_key}: processing timeout: {item_id}" print(err_mess) - publisher.info(err_mess) + logger.info(err_mess) return [] else: res = r_serv_cache.lrange(r_key, 0, -1) @@ -130,7 +131,7 @@ def regex_search(r_key, regex, item_id, content, max_time=30): # Statistics.incr_module_timeout_statistic(r_key) err_mess = f"{r_key}: processing timeout: {item_id}" print(err_mess) - publisher.info(err_mess) + logger.info(err_mess) return False else: if r_serv_cache.exists(r_key): diff --git a/bin/modules/ApiKey.py b/bin/modules/ApiKey.py index 92e0d2a5..3501ebb5 100755 --- a/bin/modules/ApiKey.py +++ b/bin/modules/ApiKey.py @@ -44,7 +44,7 @@ class ApiKey(AbstractModule): re.compile(self.re_google_api_key) # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") + self.logger.info(f"Module {self.module_name} initialized") def compute(self, message, r_result=False): item_id, score = message.split() diff --git a/bin/modules/Credential.py b/bin/modules/Credential.py index 3c1efd9c..7fcbafb0 100755 --- a/bin/modules/Credential.py +++ b/bin/modules/Credential.py @@ -148,18 +148,18 @@ class Credential(AbstractModule): discovered_sites = ', '.join(all_sites) print(f"=======> Probably on : {discovered_sites}") - date = datetime.now().strftime("%Y%m") - nb_tlds = {} - for cred in all_credentials: - maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] - self.faup.decode(maildomains) - tld = self.faup.get()['tld'] - # # TODO: # FIXME: remove me - try: - tld = tld.decode() - except: - pass - nb_tlds[tld] = nb_tlds.get(tld, 0) + 1 + # date = datetime.now().strftime("%Y%m") + # nb_tlds = {} + # for cred in all_credentials: + # maildomains = re.findall(r"@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] + # self.faup.decode(maildomains) + # tld = self.faup.get()['tld'] + # # # TODO: # FIXME: remove me + # try: + # tld = tld.decode() + # except: + # pass + # nb_tlds[tld] = nb_tlds.get(tld, 0) + 1 # for tld in nb_tlds: # Statistics.add_module_tld_stats_by_date('credential', date, tld, nb_tlds[tld]) else: diff --git a/bin/modules/CreditCards.py b/bin/modules/CreditCards.py index 5757e1b2..e34890b4 100755 --- a/bin/modules/CreditCards.py +++ b/bin/modules/CreditCards.py @@ -51,7 +51,7 @@ class CreditCards(AbstractModule): self.pending_seconds = 10 # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") + self.logger.info(f"Module {self.module_name} initialized") def get_valid_card(self, card): clean_card = re.sub(self.re_clean_card, '', card) @@ -74,7 +74,7 @@ class CreditCards(AbstractModule): all_cards = self.regex_findall(self.regex, item.id, content) if len(all_cards) > 0: - # self.redis_logger.debug(f'All matching {all_cards}') + # self.logger.debug(f'All matching {all_cards}') creditcard_set = set() for card in all_cards: print(card) diff --git a/bin/modules/Cryptocurrencies.py b/bin/modules/Cryptocurrencies.py index dbb5d637..6197f8a1 100755 --- a/bin/modules/Cryptocurrencies.py +++ b/bin/modules/Cryptocurrencies.py @@ -111,7 +111,7 @@ class Cryptocurrencies(AbstractModule, ABC): self.pending_seconds = 1 # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def compute(self, message): item = Item(message) diff --git a/bin/modules/CveModule.py b/bin/modules/CveModule.py index a3f9b436..8df75c6a 100755 --- a/bin/modules/CveModule.py +++ b/bin/modules/CveModule.py @@ -41,7 +41,7 @@ class CveModule(AbstractModule): self.pending_seconds = 1 # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def compute(self, message): diff --git a/bin/modules/Decoder.py b/bin/modules/Decoder.py index e94f192e..c7627480 100755 --- a/bin/modules/Decoder.py +++ b/bin/modules/Decoder.py @@ -84,7 +84,7 @@ class Decoder(AbstractModule): self.tracker_yara = Tracker_Yara(queue=False) # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def compute(self, message): @@ -122,13 +122,11 @@ class Decoder(AbstractModule): mimetype = decoded.get_mimetype() decoded.add(dname, date, item.id, mimetype=mimetype) - # DEBUG - self.redis_logger.debug(f'{item.id} : {dname} - {decoded.id} - {mimetype}') - print(f'{item.id} : {dname} - {decoded.id} - {mimetype}') + # new_decodeds.append(decoded.id) + self.logger.info(f'{item.id} : {dname} - {decoded.id} - {mimetype}') if find: - self.redis_logger.info(f'{item.id} - {dname}') - print(f'{item.id} - {dname}') + self.logger.info(f'{item.id} - {dname}') # Send to Tags msg = f'infoleak:automatic-detection="{dname}";{item.id}' @@ -137,8 +135,11 @@ class Decoder(AbstractModule): #################### # TRACKERS DECODED for decoded_id in new_decodeds: - self.tracker_term.compute(decoded_id, obj_type='decoded') - self.tracker_regex.compute(decoded_id, obj_type='decoded') + try: + self.tracker_term.compute(decoded_id, obj_type='decoded') + self.tracker_regex.compute(decoded_id, obj_type='decoded') + except UnicodeDecodeError: + pass self.tracker_yara.compute(decoded_id, obj_type='decoded') diff --git a/bin/modules/DomClassifier.py b/bin/modules/DomClassifier.py index 57d7633c..fd16939d 100755 --- a/bin/modules/DomClassifier.py +++ b/bin/modules/DomClassifier.py @@ -48,7 +48,7 @@ class DomClassifier(AbstractModule): self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld") # Send module state to logs - self.redis_logger.info(f"Module: {self.module_name} Launched") + self.logger.info(f"Module: {self.module_name} Launched") def compute(self, message, r_result=False): host, item_id = message.split() @@ -62,7 +62,7 @@ class DomClassifier(AbstractModule): self.c.text(rawtext=host) print(self.c.domain) self.c.validdomain(passive_dns=True, extended=False) - # self.redis_logger.debug(self.c.vdomain) + # self.logger.debug(self.c.vdomain) print(self.c.vdomain) print() diff --git a/bin/modules/Duplicates.py b/bin/modules/Duplicates.py index 1d0d2dae..d0a39c0d 100755 --- a/bin/modules/Duplicates.py +++ b/bin/modules/Duplicates.py @@ -47,7 +47,7 @@ class Duplicates(AbstractModule): "tlsh": {"threshold": THRESHOLD_TLSH} } - self.redis_logger.info(f"Module: {self.module_name} Launched") + self.logger.info(f"Module: {self.module_name} Launched") def compute(self, message): # IOError: "CRC Checksum Failed on : {id}" @@ -65,7 +65,7 @@ class Duplicates(AbstractModule): x = time.time() # Get Hashs - content = item.get_content(binary=True) + content = item.get_content(r_type='bytes') self.algos['ssdeep']['hash'] = Duplicate.get_ssdeep_hash(content) self.algos['tlsh']['hash'] = Duplicate.get_tlsh_hash(content) diff --git a/bin/modules/Global.py b/bin/modules/Global.py index 77d27086..4cc676df 100755 --- a/bin/modules/Global.py +++ b/bin/modules/Global.py @@ -67,8 +67,8 @@ class Global(AbstractModule): self.pending_seconds = 0.5 # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") - # Send module state to logs + self.logger.info(f"Module {self.module_name} initialized") + # Send module state to logs # TODO MOVE ME IN INIT SCRIPT self.redis_logger.critical(f"AIL {get_ail_uuid()} started") def computeNone(self): @@ -103,7 +103,7 @@ class Global(AbstractModule): # Incorrect filename if not os.path.commonprefix([filename, self.ITEMS_FOLDER]) == self.ITEMS_FOLDER: - self.redis_logger.warning(f'Global; Path traversal detected {filename}') + self.logger.warning(f'Global; Path traversal detected {filename}') print(f'Global; Path traversal detected {filename}') else: @@ -146,7 +146,7 @@ class Global(AbstractModule): return item_id else: - self.redis_logger.debug(f"Empty Item: {message} not processed") + self.logger.debug(f"Empty Item: {message} not processed") print(f"Empty Item: {message} not processed") def check_filename(self, filename, new_file_content): @@ -157,7 +157,7 @@ class Global(AbstractModule): # check if file exist if os.path.isfile(filename): - self.redis_logger.warning(f'File already exist {filename}') + self.logger.warning(f'File already exist {filename}') print(f'File already exist {filename}') # Check that file already exists but content differs @@ -174,17 +174,17 @@ class Global(AbstractModule): filename = f'{filename[:-3]}_{new_file_md5}.gz' else: filename = f'{filename}_{new_file_md5}' - self.redis_logger.debug(f'new file to check: {filename}') + self.logger.debug(f'new file to check: {filename}') if os.path.isfile(filename): # Ignore duplicate - self.redis_logger.debug(f'ignore duplicated file {filename}') + self.logger.debug(f'ignore duplicated file {filename}') print(f'ignore duplicated file {filename}') filename = None else: # Ignore duplicate checksum equals - self.redis_logger.debug(f'ignore duplicated file {filename}') + self.logger.debug(f'ignore duplicated file {filename}') print(f'ignore duplicated file {filename}') filename = None @@ -205,13 +205,13 @@ class Global(AbstractModule): with gzip.open(filename, 'rb') as f: curr_file_content = f.read() except EOFError: - self.redis_logger.warning(f'Global; Incomplete file: {filename}') + self.logger.warning(f'Global; Incomplete file: {filename}') print(f'Global; Incomplete file: {filename}') # save daily stats # self.r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d')) # Statistics. except OSError: - self.redis_logger.warning(f'Global; Not a gzipped file: {filename}') + self.logger.warning(f'Global; Not a gzipped file: {filename}') print(f'Global; Not a gzipped file: {filename}') # save daily stats # self.r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d')) @@ -229,7 +229,7 @@ class Global(AbstractModule): with gzip.GzipFile(fileobj=in_, mode='rb') as fo: gunzipped_bytes_obj = fo.read() except Exception as e: - self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}') + self.logger.warning(f'Global; Invalid Gzip file: {filename}, {e}') print(f'Global; Invalid Gzip file: {filename}, {e}') return gunzipped_bytes_obj diff --git a/bin/modules/Hosts.py b/bin/modules/Hosts.py index 3509b5ea..37f278a5 100755 --- a/bin/modules/Hosts.py +++ b/bin/modules/Hosts.py @@ -46,7 +46,7 @@ class Hosts(AbstractModule): self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b' re.compile(self.host_regex) - self.redis_logger.info(f"Module: {self.module_name} Launched") + self.logger.info(f"Module: {self.module_name} Launched") def compute(self, message): item = Item(message) diff --git a/bin/modules/Indexer.py b/bin/modules/Indexer.py index 75052329..03fededa 100755 --- a/bin/modules/Indexer.py +++ b/bin/modules/Indexer.py @@ -63,12 +63,11 @@ class Indexer(AbstractModule): # create the index register if not present time_now = int(time.time()) if not os.path.isfile(self.indexRegister_path): # index are not organised - self.redis_logger.debug("Indexes are not organized") - self.redis_logger.debug( - "moving all files in folder 'old_index' ") + self.logger.debug("Indexes are not organized") + self.logger.debug("moving all files in folder 'old_index' ") # move all files to old_index folder self.move_index_into_old_index_folder() - self.redis_logger.debug("Creating new index") + self.logger.debug("Creating new index") # create all_index.txt with open(self.indexRegister_path, 'w') as f: f.write(str(time_now)) @@ -100,7 +99,7 @@ class Indexer(AbstractModule): item_id = item.get_id() item_content = item.get_content() - self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") + self.logger.debug(f"Indexing - {self.indexname}: {docpath}") print(f"Indexing - {self.indexname}: {docpath}") try: @@ -109,7 +108,7 @@ class Indexer(AbstractModule): self.last_refresh = time.time() if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000): timestamp = int(time.time()) - self.redis_logger.debug(f"Creating new index {timestamp}") + self.logger.debug(f"Creating new index {timestamp}") print(f"Creating new index {timestamp}") self.indexpath = join(self.baseindexpath, str(timestamp)) self.indexname = str(timestamp) @@ -129,9 +128,9 @@ class Indexer(AbstractModule): indexwriter.commit() except IOError: - self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}") + self.logger.debug(f"CRC Checksum Failed on: {item_id}") print(f"CRC Checksum Failed on: {item_id}") - self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed') + self.logger.error(f'{item_id} CRC Checksum Failed') def check_index_size(self): """ diff --git a/bin/modules/Keys.py b/bin/modules/Keys.py index 7ed1845c..f86b6223 100755 --- a/bin/modules/Keys.py +++ b/bin/modules/Keys.py @@ -170,7 +170,7 @@ class Keys(AbstractModule): # if find : # # Send to duplicate # self.add_message_to_queue(item.get_id(), 'Duplicate') - # self.redis_logger.debug(f'{item.get_id()} has key(s)') + # self.logger.debug(f'{item.get_id()} has key(s)') # print(f'{item.get_id()} has key(s)') diff --git a/bin/modules/Languages.py b/bin/modules/Languages.py index f775547e..4924b379 100755 --- a/bin/modules/Languages.py +++ b/bin/modules/Languages.py @@ -22,7 +22,7 @@ class Languages(AbstractModule): super(Languages, self).__init__() # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def compute(self, message): item = Item(message) diff --git a/bin/modules/Mixer.py b/bin/modules/Mixer.py index 2fdb5228..5e5d744e 100755 --- a/bin/modules/Mixer.py +++ b/bin/modules/Mixer.py @@ -71,7 +71,7 @@ class Mixer(AbstractModule): self.feeders_processed = {} self.feeders_duplicate = {} - self.redis_logger.info(f"Module: {self.module_name} Launched") + self.logger.info(f"Module: {self.module_name} Launched") # TODO Save stats in cache # def get_feeders(self): @@ -154,7 +154,7 @@ class Mixer(AbstractModule): feeder_name, item_id, gzip64encoded = splitted else: print('Invalid message: not processed') - self.redis_logger.debug('Invalid Item: {message} not processed') + self.logger.debug(f'Invalid Item: {item_id} not processed') return None # remove absolute path diff --git a/bin/modules/Onion.py b/bin/modules/Onion.py index d5b25326..5e76d0fa 100755 --- a/bin/modules/Onion.py +++ b/bin/modules/Onion.py @@ -49,7 +49,7 @@ class Onion(AbstractModule): re.compile(self.onion_regex) # re.compile(self.i2p_regex) - self.redis_logger.info(f"Module: {self.module_name} Launched") + self.logger.info(f"Module: {self.module_name} Launched") # TEMP var: SAVE I2P Domain (future I2P crawler) # self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p") diff --git a/bin/modules/PgpDump.py b/bin/modules/PgpDump.py index aa4b629b..0647e897 100755 --- a/bin/modules/PgpDump.py +++ b/bin/modules/PgpDump.py @@ -69,7 +69,7 @@ class PgpDump(AbstractModule): self.symmetrically_encrypted = False # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def remove_html(self, pgp_block): try: @@ -130,7 +130,7 @@ class PgpDump(AbstractModule): try: output = output.decode() except UnicodeDecodeError: - self.redis_logger.error(f'Error PgpDump UnicodeDecodeError: {self.item_id}') + self.logger.error(f'Error PgpDump UnicodeDecodeError: {self.item_id}') output = '' return output diff --git a/bin/modules/Phone.py b/bin/modules/Phone.py index 997b5826..0e86d1d3 100755 --- a/bin/modules/Phone.py +++ b/bin/modules/Phone.py @@ -50,7 +50,7 @@ class Phone(AbstractModule): # If the list is greater than 4, we consider the Item may contain a list of phone numbers if len(results) > 4: - self.redis_logger.debug(results) + self.logger.debug(results) self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)') msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}' diff --git a/bin/modules/SQLInjectionDetection.py b/bin/modules/SQLInjectionDetection.py index e0827ce6..ed6dc89d 100755 --- a/bin/modules/SQLInjectionDetection.py +++ b/bin/modules/SQLInjectionDetection.py @@ -41,7 +41,7 @@ class SQLInjectionDetection(AbstractModule): self.faup = Faup() - self.redis_logger.info(f"Module: {self.module_name} Launched") + self.logger.info(f"Module: {self.module_name} Launched") def compute(self, message): url, item_id = message.split() diff --git a/bin/modules/SentimentAnalysis.py b/bin/modules/SentimentAnalysis.py index 5aea7e7f..a5afc3f7 100755 --- a/bin/modules/SentimentAnalysis.py +++ b/bin/modules/SentimentAnalysis.py @@ -70,7 +70,7 @@ class SentimentAnalysis(AbstractModule): self.pending_seconds = 1 # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") + self.logger.info(f"Module {self.module_name} initialized") def compute(self, message): # Max time to compute one entry @@ -78,7 +78,7 @@ class SentimentAnalysis(AbstractModule): try: self.analyse(message) except TimeoutException: - self.redis_logger.debug(f"{message} processing timeout") + self.logger.debug(f"{message} processing timeout") else: signal.alarm(0) @@ -114,7 +114,7 @@ class SentimentAnalysis(AbstractModule): p_MimeType = "JSON" if p_MimeType in SentimentAnalysis.accepted_Mime_type: - self.redis_logger.debug(f'Accepted :{p_MimeType}') + self.logger.debug(f'Accepted :{p_MimeType}') the_date = datetime.date(int(p_date[0:4]), int(p_date[4:6]), int(p_date[6:8])) the_time = datetime.datetime.now() diff --git a/bin/modules/SubmitPaste.py b/bin/modules/SubmitPaste.py index 92e1fcc9..782323ef 100755 --- a/bin/modules/SubmitPaste.py +++ b/bin/modules/SubmitPaste.py @@ -60,7 +60,7 @@ class SubmitPaste(AbstractModule): """ Main method of the Module to implement """ - self.redis_logger.debug(f'compute UUID {uuid}') + self.logger.debug(f'compute UUID {uuid}') # get temp value save on disk ltags = self.r_serv_db.smembers(f'{uuid}:ltags') @@ -73,9 +73,9 @@ class SubmitPaste(AbstractModule): if source in ['crawled', 'tests']: source = 'submitted' - self.redis_logger.debug(f'isfile UUID {isfile}') - self.redis_logger.debug(f'source UUID {source}') - self.redis_logger.debug(f'paste_content UUID {paste_content}') + self.logger.debug(f'isfile UUID {isfile}') + self.logger.debug(f'source UUID {source}') + self.logger.debug(f'paste_content UUID {paste_content}') # needed if redis is restarted self.r_serv_log_submit.set(f'{uuid}:end', 0) @@ -114,15 +114,15 @@ class SubmitPaste(AbstractModule): if isinstance(uuid, list): uuid = uuid[0] # Module processing with the message from the queue - self.redis_logger.debug(uuid) + self.logger.debug(uuid) self.compute(uuid) except Exception as err: - self.redis_logger.error(f'Error in module {self.module_name}: {err}') + self.logger.critical(err) # Remove uuid ref self.remove_submit_uuid(uuid) else: # Wait before next process - self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s') + self.logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s') time.sleep(self.pending_seconds) def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source): diff --git a/bin/modules/Tags.py b/bin/modules/Tags.py index eb8a4e8d..6c2fb0fb 100755 --- a/bin/modules/Tags.py +++ b/bin/modules/Tags.py @@ -36,7 +36,7 @@ class Tags(AbstractModule): self.pending_seconds = 10 # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def compute(self, message): # Extract item ID and tag from message diff --git a/bin/modules/Telegram.py b/bin/modules/Telegram.py index 9cf4a2b6..35a6ef98 100755 --- a/bin/modules/Telegram.py +++ b/bin/modules/Telegram.py @@ -38,7 +38,7 @@ class Telegram(AbstractModule): self.max_execution_time = 60 # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") + self.logger.info(f"Module {self.module_name} initialized") def compute(self, message, r_result=False): item = Item(message) diff --git a/bin/modules/Tools.py b/bin/modules/Tools.py index 7401c2be..06b2d53a 100755 --- a/bin/modules/Tools.py +++ b/bin/modules/Tools.py @@ -402,7 +402,7 @@ class Tools(AbstractModule): # Waiting time in seconds between to message processed self.pending_seconds = 10 # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") + self.logger.info(f"Module {self.module_name} initialized") def get_tools(self): return TOOLS.keys() diff --git a/bin/modules/Urls.py b/bin/modules/Urls.py index 33bd8250..2dc97226 100755 --- a/bin/modules/Urls.py +++ b/bin/modules/Urls.py @@ -56,7 +56,7 @@ class Urls(AbstractModule): ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:[a-zA-Z]{2,15}))(?:\:[0-9]+)*(?:/?(?:[a-zA-Z0-9\.\,\?'\\+&%\$#\=~_\-]+))*)" # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") + self.logger.info(f"Module {self.module_name} initialized") def compute(self, message): """ @@ -82,7 +82,7 @@ class Urls(AbstractModule): to_send = f"{url} {item.get_id()}" print(to_send) self.add_message_to_queue(to_send, 'Url') - self.redis_logger.debug(f"url_parsed: {to_send}") + self.logger.debug(f"url_parsed: {to_send}") if len(l_urls) > 0: to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};' diff --git a/bin/modules/Zerobins.py b/bin/modules/Zerobins.py index 000975a5..f3fcea5a 100755 --- a/bin/modules/Zerobins.py +++ b/bin/modules/Zerobins.py @@ -39,13 +39,13 @@ class Zerobins(AbstractModule): self.pending_seconds = 10 # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') def computeNone(self): """ Compute when no message in queue """ - self.redis_logger.debug("No message in queue") + self.logger.debug("No message in queue") def compute(self, message): """ @@ -63,7 +63,7 @@ class Zerobins(AbstractModule): crawlers.create_task(bin_url, depth=0, har=False, screenshot=False, proxy='force_tor', parent='manual', priority=60) - self.redis_logger.debug("Compute message in queue") + self.logger.debug("Compute message in queue") if __name__ == '__main__': diff --git a/bin/modules/abstract_module.py b/bin/modules/abstract_module.py index ea419ef5..7e1f8220 100644 --- a/bin/modules/abstract_module.py +++ b/bin/modules/abstract_module.py @@ -8,6 +8,8 @@ Base Class for AIL Modules ################################## from abc import ABC, abstractmethod import os +import logging +import logging.config import sys import time import traceback @@ -17,22 +19,27 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from pubsublogger import publisher +from lib import ail_logger from lib.ail_queues import AILQueue from lib import regex_helper from lib.exceptions import ModuleQueueError +logging.config.dictConfig(ail_logger.get_config(name='modules')) + class AbstractModule(ABC): """ Abstract Module class """ - def __init__(self, module_name=None, logger_channel='Script', queue=True): + def __init__(self, module_name=None, queue=True): """ Init Module module_name: str; set the module name if different from the instance ClassName queue_name: str; set the queue name if different from the instance ClassName logger_channel: str; set the logger channel name, 'Script' by default """ + self.logger = logging.getLogger(f'{self.__class__.__name__}') + # Module name if provided else instance className self.module_name = module_name if module_name else self._module_name() @@ -44,14 +51,12 @@ class AbstractModule(ABC): # Init Redis Logger self.redis_logger = publisher - # Port of the redis instance used by pubsublogger self.redis_logger.port = 6380 - # Channel name to publish logs # # TODO: refactor logging # If provided could be a namespaced channel like script: - self.redis_logger.channel = logger_channel + self.redis_logger.channel = 'Script' # Cache key self.r_cache_key = regex_helper.generate_redis_cache_key(self.module_name) @@ -127,14 +132,9 @@ class AbstractModule(ABC): # LOG ERROR trace = traceback.format_tb(err.__traceback__) trace = ''.join(trace) - self.redis_logger.critical(f"Error in module {self.module_name}: {err}") - self.redis_logger.critical(f"Module {self.module_name} input message: {message}") - self.redis_logger.critical(trace) - print() - print(f"ERROR: {err}") - print(f'MESSAGE: {message}') - print('TRACEBACK:') - print(trace) + self.logger.critical(f"Error in module {self.module_name}: {__name__} : {err}") + self.logger.critical(f"Module {self.module_name} input message: {message}") + self.logger.critical(trace) if isinstance(err, ModuleQueueError): self.queue.error() @@ -145,7 +145,7 @@ class AbstractModule(ABC): else: self.computeNone() # Wait before next process - self.redis_logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s") + self.logger.debug(f"{self.module_name}, waiting for new message, Idling {self.pending_seconds}s") time.sleep(self.pending_seconds) def _module_name(self): diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index f26723bc..deb25a96 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -4,8 +4,7 @@ import os import sys import dns.resolver import dns.exception - -from pubsublogger import publisher +import logging.config from datetime import timedelta @@ -13,8 +12,13 @@ sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## +from lib import ail_logger from lib import ConfigLoader + +logging.config.dictConfig(ail_logger.get_config(name='modules')) +logger = logging.getLogger() + config_loader = ConfigLoader.ConfigLoader() dns_server = config_loader.get_config_str("DomClassifier", "dns") config_loader = None @@ -73,24 +77,24 @@ def checking_MX_record(r_serv, MXdomains, addr_dns): pass except dns.resolver.NoNameservers: - publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.') + logger.debug('NoNameserver, No non-broken nameservers are available to answer the query.') print('NoNameserver, No non-broken nameservers are available to answer the query.') except dns.resolver.NoAnswer: - publisher.debug('NoAnswer, The response did not contain an answer to the question.') + logger.debug('NoAnswer, The response did not contain an answer to the question.') print('NoAnswer, The response did not contain an answer to the question.') except dns.name.EmptyLabel: - publisher.debug('SyntaxError: EmptyLabel') + logger.debug('SyntaxError: EmptyLabel') print('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) - publisher.debug('The query name does not exist.') + logger.debug('The query name does not exist.') print('The query name does not exist.') except dns.name.LabelTooLong: - publisher.debug('The Label is too long') + logger.debug('The Label is too long') print('The Label is too long') except dns.exception.Timeout: @@ -100,7 +104,7 @@ def checking_MX_record(r_serv, MXdomains, addr_dns): except Exception as e: print(e) - publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) + logger.debug("emails before: {0} after: {1} (valid)".format(num, score)) #return (num, WalidMX) return (num, validMX) @@ -133,23 +137,23 @@ def checking_A_record(r_serv, domains_set): pass except dns.resolver.NoNameservers: - publisher.debug('NoNameserver, No non-broken nameservers are available to answer the query.') + logger.debug('NoNameserver, No non-broken nameservers are available to answer the query.') except dns.resolver.NoAnswer: - publisher.debug('NoAnswer, The response did not contain an answer to the question.') + logger.debug('NoAnswer, The response did not contain an answer to the question.') except dns.name.EmptyLabel: - publisher.debug('SyntaxError: EmptyLabel') + logger.debug('SyntaxError: EmptyLabel') except dns.resolver.NXDOMAIN: r_serv.setex(Adomain[1:], 1, timedelta(days=1)) - publisher.debug('The query name does not exist.') + logger.debug('The query name does not exist.') except dns.name.LabelTooLong: - publisher.debug('The Label is too long') + logger.debug('The Label is too long') except Exception as e: print(e) - publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score)) + logger.debug("URLs before: {0} after: {1} (valid)".format(num, score)) return (num, WalidA) diff --git a/bin/template.py b/bin/template.py index bb66e86a..5cb2105b 100755 --- a/bin/template.py +++ b/bin/template.py @@ -34,19 +34,19 @@ class Template(AbstractModule): self.pending_seconds = 10 # Send module state to logs - self.redis_logger.info(f'Module {self.module_name} initialized') + self.logger.info(f'Module {self.module_name} initialized') # def computeNone(self): # """ # Do something when there is no message in the queue # """ - # self.redis_logger.debug("No message in queue") + # self.logger.debug("No message in queue") def compute(self, message): """ Compute a message in queue / process the message (item_id, ...) """ - self.redis_logger.debug("Compute message in queue") + self.logger.debug("Compute message in queue") # # if message is an item_id: # item = Item(message) # content = item.get_content() diff --git a/bin/trackers/Retro_Hunt.py b/bin/trackers/Retro_Hunt.py index b6c00c8e..a280e3a1 100755 --- a/bin/trackers/Retro_Hunt.py +++ b/bin/trackers/Retro_Hunt.py @@ -87,7 +87,7 @@ class Retro_Hunt_Module(AbstractModule): last_obj_type = None for obj in ail_objects.obj_iterator(obj_type, filters): self.obj = obj - content = obj.get_content(r_str=True) + content = obj.get_content(r_type='bytes') rule.match(data=content, callback=self.yara_rules_match, which_callbacks=yara.CALLBACK_MATCHES, timeout=timeout) diff --git a/bin/trackers/Tracker_Regex.py b/bin/trackers/Tracker_Regex.py index 091140bd..5cc06410 100755 --- a/bin/trackers/Tracker_Regex.py +++ b/bin/trackers/Tracker_Regex.py @@ -63,7 +63,7 @@ class Tracker_Regex(AbstractModule): if obj_type not in self.tracked_regexs: return None - content = obj.get_content(r_str=True) + content = obj.get_content() for dict_regex in self.tracked_regexs[obj_type]: matched = self.regex_findall(dict_regex['regex'], obj_id, content) diff --git a/bin/trackers/Tracker_Term.py b/bin/trackers/Tracker_Term.py index c79d308e..3e0ba43b 100755 --- a/bin/trackers/Tracker_Term.py +++ b/bin/trackers/Tracker_Term.py @@ -85,7 +85,7 @@ class Tracker_Term(AbstractModule): if obj_type not in self.tracked_words and obj_type not in self.tracked_sets: return None - content = obj.get_content(r_str=True) + content = obj.get_content() signal.alarm(self.max_execution_time) diff --git a/bin/trackers/Tracker_Yara.py b/bin/trackers/Tracker_Yara.py index 7ffea007..fab397d1 100755 --- a/bin/trackers/Tracker_Yara.py +++ b/bin/trackers/Tracker_Yara.py @@ -61,7 +61,7 @@ class Tracker_Yara(AbstractModule): if obj_type not in self.rules: return None - content = self.obj.get_content(r_str=True) + content = self.obj.get_content(r_type='bytes') try: yara_match = self.rules[obj_type].match(data=content, callback=self.yara_rules_match, @@ -109,4 +109,3 @@ class Tracker_Yara(AbstractModule): if __name__ == '__main__': module = Tracker_Yara() module.run() - # module.compute('archive/gist.github.com/2023/04/13/chipzoller_d8d6d2d737d02ad4fe9d30a897170761.gz') diff --git a/configs/logging.json b/configs/logging.json new file mode 100755 index 00000000..7c3cc39d --- /dev/null +++ b/configs/logging.json @@ -0,0 +1,33 @@ +{ + "version": 1, + "disable_existing_loggers": false, + "formatters": { + "simple": { + "format": "%(asctime)s %(name)s %(levelname)s:%(message)s" + } + }, + "handlers": { + "stdout": { + "class": "logging.StreamHandler", + "level": "INFO", + "stream": "ext://sys.stdout" + }, + "file": { + "class": "logging.handlers.RotatingFileHandler", + "level": "WARNING", + "formatter": "simple", + "filename": "logs/flask_warning.log", + "mode": "a", + "maxBytes": 1000000, + "backupCount": 5 + } + }, + "root": { + "level": "DEBUG", + "handlers": [ + "stdout", + "file" + ] + } +} + diff --git a/tools/empty_queue.py b/tools/empty_queue.py deleted file mode 100755 index 51f1b633..00000000 --- a/tools/empty_queue.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The Empty queue module -==================== - -This simple module can be used to clean all queues. - -Requirements: -------------- - - -""" -import os -import time -from pubsublogger import publisher -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = ['Global', 'Duplicates', 'Indexer', 'Attributes', 'Lines', 'DomClassifier', 'Tokenize', 'Curve', 'Categ', 'CreditCards', 'Mail', 'Onion', 'DumpValidOnion', 'Web', 'WebStats', 'Release', 'Credential', 'Cve', 'Phone', 'SourceCode', 'Keys'] - config_section = ['Curve'] - - for queue in config_section: - print('dropping: ' + queue) - p = Process(queue) - while True: - message = p.get_from_set() - if message is None: - break diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index a2d491a6..c6669ef6 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -7,10 +7,9 @@ import ssl import json import time import uuid - import random import logging -import logging.handlers +import logging.config from flask import Flask, render_template, jsonify, request, Request, Response, session, redirect, url_for from flask_login import LoginManager, current_user, login_user, logout_user, login_required @@ -30,6 +29,7 @@ sys.path.append(os.environ['AIL_BIN']) from lib.ConfigLoader import ConfigLoader from lib.Users import User from lib import Tag +from lib import ail_logger # Import config import Flask_config @@ -50,7 +50,6 @@ from blueprints.objects_cve import objects_cve from blueprints.objects_decoded import objects_decoded from blueprints.objects_subtypes import objects_subtypes - Flask_dir = os.environ['AIL_FLASK'] # CONFIG # @@ -74,15 +73,7 @@ log_dir = os.path.join(os.environ['AIL_HOME'], 'logs') if not os.path.isdir(log_dir): os.makedirs(log_dir) -# log_filename = os.path.join(log_dir, 'flask_server.logs') -# logger = logging.getLogger() -# formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') -# handler_log = logging.handlers.TimedRotatingFileHandler(log_filename, when="midnight", interval=1) -# handler_log.suffix = '%Y-%m-%d.log' -# handler_log.setFormatter(formatter) -# handler_log.setLevel(30) -# logger.addHandler(handler_log) -# logger.setLevel(30) +logging.config.dictConfig(ail_logger.get_config(name='flask')) # ========= =========#