From 927a8c96c5322a66408c17d62c8cb73ec6ddbb6c Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 10 Apr 2020 10:50:55 +0200 Subject: [PATCH] chg [telegram + correlation] new module: telegram (username + login code + join_chat) + add simple_correlation backend --- bin/Telegram.py | 150 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100755 bin/Telegram.py diff --git a/bin/Telegram.py b/bin/Telegram.py new file mode 100755 index 00000000..c95e4eb2 --- /dev/null +++ b/bin/Telegram.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +Tools Module +============================ + +Search tools outpout + +""" + +from Helper import Process +from pubsublogger import publisher + +import os +import re +import sys +import time +import redis +import signal + +from urllib.parse import urlparse + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Item + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import simple_correlation + +telegram = simple_correlation.SimpleCorrelation('telegram') + +class TimeoutException(Exception): + pass + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + +# https://github.com/LonamiWebs/Telethon/wiki/Special-links +regex_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)' +regex_tg_link = re.compile(r'tg://.+') + +regex_username = re.compile(r'[0-9a-zA-z_]+') +regex_join_hash = re.compile(r'[0-9a-zA-z-]+') + +max_execution_time = 60 + +def extract_data_from_telegram_url(item_id, base_url, url_path): + #url = urlparse(url_path) + url_path = url_path.split('/') + # username len > 5, a-z A-Z _ + if len(url_path) == 1: + username = url_path[0].lower() + username = regex_username.search(username) + if username: + username = username[0] + print('username: {}'.format(username)) + telegram.save_item_correlation(username, item_id, Item.get_item_date(item_id)) + elif url_path[0] == 'joinchat': + invite_hash = regex_join_hash.search(url_path[1]) + if invite_hash: + invite_hash = invite_hash[0] + print(invite_hash) + +# # TODO: +# Add openmessafe +# Add passport ? +# Add confirmphone +# Add user +def extract_data_from_tg_url(item_id, tg_link): + url = urlparse(tg_link) + # username len > 5, a-z A-Z _ + if url.netloc == 'resolve' and len(url.query) > 7: + if url.query[:7] == 'domain=': + # remove domain= + username = url.query[7:] + username = regex_username.search(username) + if username: + username = username[0] + print('username: {}'.format(username)) + telegram.save_item_correlation(username, item_id, Item.get_item_date(item_id)) + elif url.netloc == 'join' and len(url.query) > 7: + if url.query[:7] == 'invite=': + invite_hash = url.query[7:] + invite_hash = regex_join_hash.search(invite_hash) + if invite_hash: + invite_hash = invite_hash[0] + print('invite code: {}'.format(invite_hash)) + elif url.netloc == 'login' and len(url.query) > 5: + login_code = url.query[5:] + print('login code: {}').format(login_code) + else: + print(url) + +def search_telegram(item_id, item_content): + # telegram links + signal.alarm(max_execution_time) + try: + telegram_links = re.findall(regex_telegram_link, item_content) + except TimeoutException: + telegram_links = [] + p.incr_module_timeout_statistic() # add encoder type + print ("{0} processing timeout".format(item_id)) + else: + signal.alarm(0) + + for telegram_link in telegram_links: + extract_data_from_telegram_url(item_id, telegram_link[0], telegram_link[1]) + + # tg links + signal.alarm(max_execution_time) + try: + tg_links = re.findall(regex_tg_link, item_content) + except TimeoutException: + tg_links = [] + p.incr_module_timeout_statistic() # add encoder type + print ("{0} processing timeout".format(item_id)) + else: + signal.alarm(0) + + for tg_link in tg_links: + extract_data_from_tg_url(item_id, tg_link) + + +if __name__ == "__main__": + publisher.port = 6380 + publisher.channel = "Script" + + config_section = 'Telegram' + # # TODO: add duplicate + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Run Telegram module ") + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + item_id = p.get_from_set() + if item_id is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + # Do something with the message from the queue + item_content = Item.get_item_content(item_id) + search_telegram(item_id, item_content) + sys.exit(0)