chg: [Telegram module] refactor module + fix str format

pull/586/head
Terrtia 2022-01-19 16:20:18 +01:00
parent 9c561d4827
commit 3d8d18bbe1
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 178 additions and 179 deletions

View File

@ -1,175 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Tools Module
============================
Search tools outpout
"""
from Helper import Process
from pubsublogger import publisher
import os
import re
import sys
import time
import redis
import signal
from urllib.parse import urlparse
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import telegram
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
regex_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
regex_tg_link = re.compile(r'tg://.+')
regex_username = re.compile(r'[0-9a-zA-z_]+')
regex_join_hash = re.compile(r'[0-9a-zA-z-]+')
max_execution_time = 60
def extract_data_from_telegram_url(item_id, item_date, base_url, url_path):
invite_code_found = False
#url = urlparse(url_path)
url_path = url_path.split('/')
# username len > 5, a-z A-Z _
if len(url_path) == 1:
username = url_path[0].lower()
username = regex_username.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
print('username: {}'.format(username))
telegram.save_item_correlation(username, item_id, item_date)
elif url_path[0] == 'joinchat':
invite_hash = regex_join_hash.search(url_path[1])
if invite_hash:
invite_hash = invite_hash[0]
telegram.save_telegram_invite_hash(invite_hash, item_id)
print('invite code: {}'.format(invite_hash))
invite_code_found = True
return invite_code_found
# # TODO:
# Add openmessafe
# Add passport ?
# Add confirmphone
# Add user
def extract_data_from_tg_url(item_id, item_date, tg_link):
invite_code_found = False
url = urlparse(tg_link)
# username len > 5, a-z A-Z _
if url.netloc == 'resolve' and len(url.query) > 7:
if url.query[:7] == 'domain=':
# remove domain=
username = url.query[7:]
username = regex_username.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
print('username: {}'.format(username))
telegram.save_item_correlation(username, item_id, item_date)
elif url.netloc == 'join' and len(url.query) > 7:
if url.query[:7] == 'invite=':
invite_hash = url.query[7:]
invite_hash = regex_join_hash.search(invite_hash)
if invite_hash:
invite_hash = invite_hash[0]
telegram.save_telegram_invite_hash(invite_hash, item_id)
print('invite code: {}'.format(invite_hash))
invite_code_found = True
elif url.netloc == 'login' and len(url.query) > 5:
login_code = url.query[5:]
print('login code: {}').format(login_code)
else:
print(url)
return invite_code_found
def search_telegram(item_id, item_date, item_content):
# telegram links
signal.alarm(max_execution_time)
try:
telegram_links = re.findall(regex_telegram_link, item_content)
except TimeoutException:
telegram_links = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(item_id))
else:
signal.alarm(0)
invite_code_found = False
for telegram_link in telegram_links:
res = extract_data_from_telegram_url(item_id, item_date, telegram_link[0], telegram_link[1])
if res:
invite_code_found = True
# tg links
signal.alarm(max_execution_time)
try:
tg_links = re.findall(regex_tg_link, item_content)
except TimeoutException:
tg_links = []
p.incr_module_timeout_statistic() # add encoder type
print ("{0} processing timeout".format(item_id))
else:
signal.alarm(0)
for tg_link in tg_links:
res = extract_data_from_tg_url(item_id, item_date, tg_link)
if res:
invite_code_found = True
if invite_code_found:
#tags
msg = 'infoleak:automatic-detection="telegram-invite-hash";{}'.format(item_id)
p.populate_set_out(msg, 'Tags')
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
config_section = 'Telegram'
# # TODO: add duplicate
# Setup the I/O queues
p = Process(config_section)
# Sent to the logging a description of the module
publisher.info("Run Telegram module ")
# Endless loop getting messages from the input queue
while True:
# Get one message from the input queue
item_id = p.get_from_set()
if item_id is None:
publisher.debug("{} queue is empty, waiting".format(config_section))
time.sleep(1)
continue
# Do something with the message from the queue
item_content = Item.get_item_content(item_id)
item_date = Item.get_item_date(item_id)
search_telegram(item_id, item_date, item_content)

View File

@ -29,7 +29,8 @@ publisher.port = 6380
publisher.channel = "Script"
def generate_redis_cache_key(module_name):
return '{}_extracted:{}'.format(module_name, str(uuid.uuid4()))
new_uuid = str(uuid.uuid4())
return f'{module_name}_extracted:{new_uuid}'
def _regex_findall(redis_key, regex, item_content, r_set):
all_items = re.findall(regex, item_content)
@ -57,7 +58,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(module_name)
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
err_mess = f"{module_name}: processing timeout: {item_id}"
print(err_mess)
publisher.info(err_mess)
return []
@ -87,7 +88,7 @@ def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=
if proc.is_alive():
proc.terminate()
Statistics.incr_module_timeout_statistic(module_name)
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
err_mess = f"{module_name}: processing timeout: {item_id}"
print(err_mess)
publisher.info(err_mess)
return None

View File

@ -2,8 +2,10 @@
# -*-coding:UTF-8 -*
import os
import re
import sys
import redis
from urllib.parse import urlparse
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
@ -13,8 +15,70 @@ config_loader = ConfigLoader.ConfigLoader()
r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
REGEX_USERNAME = re.compile(r'[0-9a-zA-z_]+')
REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+')
## ##
def save_item_correlation(username, item_id, item_date):
Username.save_item_correlation('telegram', username, item_id, item_date)
def save_telegram_invite_hash(invite_hash, item_id):
r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id))
def get_data_from_telegram_url(base_url, url_path):
dict_url = {}
url_path = url_path.split('/')
# username len > 5, a-z A-Z _
if len(url_path) == 1:
username = url_path[0].lower()
username = REGEX_USERNAME.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
dict_url['username'] = username
elif url_path[0] == 'joinchat':
invite_hash = REGEX_JOIN_HASH.search(url_path[1])
if invite_hash:
invite_hash = invite_hash[0]
dict_url['invite_hash'] = invite_hash
return dict_url
# # TODO:
# Add openmessafe
# Add passport ?
# Add confirmphone
# Add user
def get_data_from_tg_url(tg_link):
dict_url = {}
url = urlparse(tg_link)
# username len > 5, a-z A-Z _
if url.netloc == 'resolve' and len(url.query) > 7:
if url.query[:7] == 'domain=':
# remove domain=
username = url.query[7:]
username = REGEX_USERNAME.search(username)
if username:
username = username[0].replace('\\', '')
if len(username) > 5:
dict_url['username'] = username
elif url.netloc == 'join' and len(url.query) > 7:
if url.query[:7] == 'invite=':
invite_hash = url.query[7:]
invite_hash = REGEX_JOIN_HASH.search(invite_hash)
if invite_hash:
invite_hash = invite_hash[0]
dict_url['invite_hash'] = invite_hash
elif url.netloc == 'login' and len(url.query) > 5:
login_code = url.query[5:]
if login_code:
dict_url['login_code'] = login_code
else:
# # TODO: log invalid URL ???????
print(url)
return dict_url

86
bin/modules/Telegram.py Executable file
View File

@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Telegram Module
============================
Search telegram username,channel and invite code
"""
import os
import re
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from packages.Item import Item
from lib import regex_helper
from lib import telegram
class Telegram(AbstractModule):
"""Telegram module for AIL framework"""
def __init__(self):
super(Telegram, self).__init__()
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
self.re_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
self.re_tg_link = r'tg://.+'
re.compile(self.re_telegram_link)
re.compile(self.re_tg_link)
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
self.max_execution_time = 60
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message, r_result=False):
# messsage = item_id
item = Item(message)
item_content = item.get_content()
item_date = item.get_date()
invite_code_found = False
# extract telegram links
telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content)
for telegram_link_tuple in telegram_links:
base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1)
dict_url = telegram.get_data_from_telegram_url(base_url, url_path)
if dict_url.get('username'):
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
print(f'username: {dict_url["username"]}')
if dict_url.get('invite_hash'):
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
print(f'invite code: {dict_url["invite_hash"]}')
invite_code_found = True
# extract tg links
tg_links = self.regex_findall(self.re_tg_link, item.get_id(), item_content)
for tg_link in tg_links:
dict_url = telegram.get_data_from_tg_url(tg_link)
if dict_url.get('username'):
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
print(f'username: {dict_url["username"]}')
if dict_url.get('invite_hash'):
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
print(f'invite code: {dict_url["invite_hash"]}')
invite_code_found = True
if dict_url.get('login_code'):
print(f'login code: {dict_url["login_code"]}')
# CREATE TAG
if invite_code_found:
#tags
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
self.send_message_to_queue(msg, 'Tags')
if __name__ == "__main__":
module = Telegram()
module.run()

View File

@ -15,6 +15,7 @@ import traceback
##################################
from pubsublogger import publisher
from Helper import Process
from lib import regex_helper
class AbstractModule(ABC):
"""
@ -74,6 +75,17 @@ class AbstractModule(ABC):
self.process.populate_set_out(message, queue_name)
# add to new set_module
def regex_findall(self, regex, id, content):
"""
regex findall helper (force timeout)
:param regex: compiled regex
:param id: object id
:param content: object content
ex: send_to_queue(item_id, 'Global')
"""
return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
def run(self):
"""
Run Module endless process

View File

@ -19,6 +19,7 @@ from modules.DomClassifier import DomClassifier
from modules.Global import Global
from modules.Keys import Keys
from modules.Onion import Onion
from modules.Telegram import Telegram
# project packages
from lib.ConfigLoader import ConfigLoader
@ -169,5 +170,15 @@ class Test_Module_Onion(unittest.TestCase):
# # TODO: check warning logs
pass
class Test_Module_Telegram(unittest.TestCase):
def setUp(self):
self.module_obj = Telegram()
def test_module(self):
item_id = 'tests/2021/01/01/keys.gz'
# # TODO: check results
result = self.module_obj.compute(item_id)
if __name__ == '__main__':
unittest.main()