From 3d8d18bbe15a8b8f676a190863114374a26771ed Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 19 Jan 2022 16:20:18 +0100 Subject: [PATCH] chg: [Telegram module] refactor module + fix str format --- bin/Telegram.py | 175 --------------------------------- bin/lib/regex_helper.py | 7 +- bin/lib/telegram.py | 66 ++++++++++++- bin/modules/Telegram.py | 86 ++++++++++++++++ bin/modules/abstract_module.py | 12 +++ tests/test_modules.py | 11 +++ 6 files changed, 178 insertions(+), 179 deletions(-) delete mode 100755 bin/Telegram.py create mode 100755 bin/modules/Telegram.py diff --git a/bin/Telegram.py b/bin/Telegram.py deleted file mode 100755 index 8b9b5439..00000000 --- a/bin/Telegram.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* -""" -Tools Module -============================ - -Search tools outpout - -""" - -from Helper import Process -from pubsublogger import publisher - -import os -import re -import sys -import time -import redis -import signal - -from urllib.parse import urlparse - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) -import telegram - -class TimeoutException(Exception): - pass - -def timeout_handler(signum, frame): - raise TimeoutException - -signal.signal(signal.SIGALRM, timeout_handler) - -# https://github.com/LonamiWebs/Telethon/wiki/Special-links -regex_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)' -regex_tg_link = re.compile(r'tg://.+') - -regex_username = re.compile(r'[0-9a-zA-z_]+') -regex_join_hash = re.compile(r'[0-9a-zA-z-]+') - -max_execution_time = 60 - -def extract_data_from_telegram_url(item_id, item_date, base_url, url_path): - invite_code_found = False - - #url = urlparse(url_path) - url_path = url_path.split('/') - # username len > 5, a-z A-Z _ - if len(url_path) == 1: - username = url_path[0].lower() - username = regex_username.search(username) - if username: - username = username[0].replace('\\', '') - if len(username) > 5: - print('username: {}'.format(username)) - telegram.save_item_correlation(username, item_id, item_date) - elif url_path[0] == 'joinchat': - invite_hash = regex_join_hash.search(url_path[1]) - if invite_hash: - invite_hash = invite_hash[0] - telegram.save_telegram_invite_hash(invite_hash, item_id) - print('invite code: {}'.format(invite_hash)) - invite_code_found = True - return invite_code_found - - -# # TODO: -# Add openmessafe -# Add passport ? -# Add confirmphone -# Add user -def extract_data_from_tg_url(item_id, item_date, tg_link): - invite_code_found = False - - url = urlparse(tg_link) - # username len > 5, a-z A-Z _ - if url.netloc == 'resolve' and len(url.query) > 7: - if url.query[:7] == 'domain=': - # remove domain= - username = url.query[7:] - username = regex_username.search(username) - if username: - username = username[0].replace('\\', '') - if len(username) > 5: - print('username: {}'.format(username)) - telegram.save_item_correlation(username, item_id, item_date) - elif url.netloc == 'join' and len(url.query) > 7: - if url.query[:7] == 'invite=': - invite_hash = url.query[7:] - invite_hash = regex_join_hash.search(invite_hash) - if invite_hash: - invite_hash = invite_hash[0] - telegram.save_telegram_invite_hash(invite_hash, item_id) - print('invite code: {}'.format(invite_hash)) - invite_code_found = True - - elif url.netloc == 'login' and len(url.query) > 5: - login_code = url.query[5:] - print('login code: {}').format(login_code) - - else: - print(url) - - return invite_code_found - -def search_telegram(item_id, item_date, item_content): - # telegram links - signal.alarm(max_execution_time) - try: - telegram_links = re.findall(regex_telegram_link, item_content) - except TimeoutException: - telegram_links = [] - p.incr_module_timeout_statistic() # add encoder type - print ("{0} processing timeout".format(item_id)) - else: - signal.alarm(0) - - invite_code_found = False - - for telegram_link in telegram_links: - res = extract_data_from_telegram_url(item_id, item_date, telegram_link[0], telegram_link[1]) - if res: - invite_code_found = True - - # tg links - signal.alarm(max_execution_time) - try: - tg_links = re.findall(regex_tg_link, item_content) - except TimeoutException: - tg_links = [] - p.incr_module_timeout_statistic() # add encoder type - print ("{0} processing timeout".format(item_id)) - else: - signal.alarm(0) - - for tg_link in tg_links: - res = extract_data_from_tg_url(item_id, item_date, tg_link) - if res: - invite_code_found = True - - if invite_code_found: - #tags - msg = 'infoleak:automatic-detection="telegram-invite-hash";{}'.format(item_id) - p.populate_set_out(msg, 'Tags') - - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Telegram' - # # TODO: add duplicate - - # Setup the I/O queues - p = Process(config_section) - - # Sent to the logging a description of the module - publisher.info("Run Telegram module ") - - # Endless loop getting messages from the input queue - while True: - # Get one message from the input queue - item_id = p.get_from_set() - if item_id is None: - publisher.debug("{} queue is empty, waiting".format(config_section)) - time.sleep(1) - continue - - # Do something with the message from the queue - item_content = Item.get_item_content(item_id) - item_date = Item.get_item_date(item_id) - search_telegram(item_id, item_date, item_content) diff --git a/bin/lib/regex_helper.py b/bin/lib/regex_helper.py index f3c4379d..796dd7bb 100755 --- a/bin/lib/regex_helper.py +++ b/bin/lib/regex_helper.py @@ -29,7 +29,8 @@ publisher.port = 6380 publisher.channel = "Script" def generate_redis_cache_key(module_name): - return '{}_extracted:{}'.format(module_name, str(uuid.uuid4())) + new_uuid = str(uuid.uuid4()) + return f'{module_name}_extracted:{new_uuid}' def _regex_findall(redis_key, regex, item_content, r_set): all_items = re.findall(regex, item_content) @@ -57,7 +58,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time if proc.is_alive(): proc.terminate() Statistics.incr_module_timeout_statistic(module_name) - err_mess = "{}: processing timeout: {}".format(module_name, item_id) + err_mess = f"{module_name}: processing timeout: {item_id}" print(err_mess) publisher.info(err_mess) return [] @@ -87,7 +88,7 @@ def regex_search(module_name, redis_key, regex, item_id, item_content, max_time= if proc.is_alive(): proc.terminate() Statistics.incr_module_timeout_statistic(module_name) - err_mess = "{}: processing timeout: {}".format(module_name, item_id) + err_mess = f"{module_name}: processing timeout: {item_id}" print(err_mess) publisher.info(err_mess) return None diff --git a/bin/lib/telegram.py b/bin/lib/telegram.py index cfad88ed..db04699d 100755 --- a/bin/lib/telegram.py +++ b/bin/lib/telegram.py @@ -2,8 +2,10 @@ # -*-coding:UTF-8 -* import os +import re import sys -import redis + +from urllib.parse import urlparse sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -13,8 +15,70 @@ config_loader = ConfigLoader.ConfigLoader() r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion") config_loader = None +REGEX_USERNAME = re.compile(r'[0-9a-zA-z_]+') +REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+') + +## ## + def save_item_correlation(username, item_id, item_date): Username.save_item_correlation('telegram', username, item_id, item_date) def save_telegram_invite_hash(invite_hash, item_id): r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id)) + +def get_data_from_telegram_url(base_url, url_path): + dict_url = {} + url_path = url_path.split('/') + + # username len > 5, a-z A-Z _ + if len(url_path) == 1: + username = url_path[0].lower() + username = REGEX_USERNAME.search(username) + if username: + username = username[0].replace('\\', '') + if len(username) > 5: + dict_url['username'] = username + elif url_path[0] == 'joinchat': + invite_hash = REGEX_JOIN_HASH.search(url_path[1]) + if invite_hash: + invite_hash = invite_hash[0] + dict_url['invite_hash'] = invite_hash + return dict_url + +# # TODO: +# Add openmessafe +# Add passport ? +# Add confirmphone +# Add user +def get_data_from_tg_url(tg_link): + dict_url = {} + + url = urlparse(tg_link) + # username len > 5, a-z A-Z _ + if url.netloc == 'resolve' and len(url.query) > 7: + if url.query[:7] == 'domain=': + # remove domain= + username = url.query[7:] + username = REGEX_USERNAME.search(username) + if username: + username = username[0].replace('\\', '') + if len(username) > 5: + dict_url['username'] = username + + elif url.netloc == 'join' and len(url.query) > 7: + if url.query[:7] == 'invite=': + invite_hash = url.query[7:] + invite_hash = REGEX_JOIN_HASH.search(invite_hash) + if invite_hash: + invite_hash = invite_hash[0] + dict_url['invite_hash'] = invite_hash + + elif url.netloc == 'login' and len(url.query) > 5: + login_code = url.query[5:] + if login_code: + dict_url['login_code'] = login_code + else: + # # TODO: log invalid URL ??????? + print(url) + + return dict_url diff --git a/bin/modules/Telegram.py b/bin/modules/Telegram.py new file mode 100755 index 00000000..0973eb14 --- /dev/null +++ b/bin/modules/Telegram.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +Telegram Module +============================ + +Search telegram username,channel and invite code + +""" +import os +import re +import sys + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from modules.abstract_module import AbstractModule +from packages.Item import Item +from lib import regex_helper +from lib import telegram + +class Telegram(AbstractModule): + """Telegram module for AIL framework""" + + def __init__(self): + super(Telegram, self).__init__() + + # https://github.com/LonamiWebs/Telethon/wiki/Special-links + self.re_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)' + self.re_tg_link = r'tg://.+' + + re.compile(self.re_telegram_link) + re.compile(self.re_tg_link) + + self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name) + self.max_execution_time = 60 + + # Send module state to logs + self.redis_logger.info(f"Module {self.module_name} initialized") + + def compute(self, message, r_result=False): + # messsage = item_id + item = Item(message) + item_content = item.get_content() + item_date = item.get_date() + + invite_code_found = False + + # extract telegram links + telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content) + for telegram_link_tuple in telegram_links: + base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1) + dict_url = telegram.get_data_from_telegram_url(base_url, url_path) + if dict_url.get('username'): + telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date) + print(f'username: {dict_url["username"]}') + if dict_url.get('invite_hash'): + telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id()) + print(f'invite code: {dict_url["invite_hash"]}') + invite_code_found = True + + # extract tg links + tg_links = self.regex_findall(self.re_tg_link, item.get_id(), item_content) + for tg_link in tg_links: + dict_url = telegram.get_data_from_tg_url(tg_link) + if dict_url.get('username'): + telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date) + print(f'username: {dict_url["username"]}') + if dict_url.get('invite_hash'): + telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id()) + print(f'invite code: {dict_url["invite_hash"]}') + invite_code_found = True + if dict_url.get('login_code'): + print(f'login code: {dict_url["login_code"]}') + + # CREATE TAG + if invite_code_found: + #tags + msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}' + self.send_message_to_queue(msg, 'Tags') + + +if __name__ == "__main__": + module = Telegram() + module.run() diff --git a/bin/modules/abstract_module.py b/bin/modules/abstract_module.py index 632f8e43..555a6bc9 100644 --- a/bin/modules/abstract_module.py +++ b/bin/modules/abstract_module.py @@ -15,6 +15,7 @@ import traceback ################################## from pubsublogger import publisher from Helper import Process +from lib import regex_helper class AbstractModule(ABC): """ @@ -74,6 +75,17 @@ class AbstractModule(ABC): self.process.populate_set_out(message, queue_name) # add to new set_module + def regex_findall(self, regex, id, content): + """ + regex findall helper (force timeout) + :param regex: compiled regex + :param id: object id + :param content: object content + + ex: send_to_queue(item_id, 'Global') + """ + return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time) + def run(self): """ Run Module endless process diff --git a/tests/test_modules.py b/tests/test_modules.py index bb69397f..728294fe 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -19,6 +19,7 @@ from modules.DomClassifier import DomClassifier from modules.Global import Global from modules.Keys import Keys from modules.Onion import Onion +from modules.Telegram import Telegram # project packages from lib.ConfigLoader import ConfigLoader @@ -169,5 +170,15 @@ class Test_Module_Onion(unittest.TestCase): # # TODO: check warning logs pass +class Test_Module_Telegram(unittest.TestCase): + + def setUp(self): + self.module_obj = Telegram() + + def test_module(self): + item_id = 'tests/2021/01/01/keys.gz' + # # TODO: check results + result = self.module_obj.compute(item_id) + if __name__ == '__main__': unittest.main()