mirror of https://github.com/CIRCL/AIL-framework
chg: [Telegram module] refactor module + fix str format
parent
9c561d4827
commit
3d8d18bbe1
175
bin/Telegram.py
175
bin/Telegram.py
|
@ -1,175 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*-coding:UTF-8 -*
|
|
||||||
"""
|
|
||||||
Tools Module
|
|
||||||
============================
|
|
||||||
|
|
||||||
Search tools outpout
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
from Helper import Process
|
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import sys
|
|
||||||
import time
|
|
||||||
import redis
|
|
||||||
import signal
|
|
||||||
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
|
||||||
import Item
|
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
|
||||||
import telegram
|
|
||||||
|
|
||||||
class TimeoutException(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def timeout_handler(signum, frame):
|
|
||||||
raise TimeoutException
|
|
||||||
|
|
||||||
signal.signal(signal.SIGALRM, timeout_handler)
|
|
||||||
|
|
||||||
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
|
|
||||||
regex_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
|
|
||||||
regex_tg_link = re.compile(r'tg://.+')
|
|
||||||
|
|
||||||
regex_username = re.compile(r'[0-9a-zA-z_]+')
|
|
||||||
regex_join_hash = re.compile(r'[0-9a-zA-z-]+')
|
|
||||||
|
|
||||||
max_execution_time = 60
|
|
||||||
|
|
||||||
def extract_data_from_telegram_url(item_id, item_date, base_url, url_path):
|
|
||||||
invite_code_found = False
|
|
||||||
|
|
||||||
#url = urlparse(url_path)
|
|
||||||
url_path = url_path.split('/')
|
|
||||||
# username len > 5, a-z A-Z _
|
|
||||||
if len(url_path) == 1:
|
|
||||||
username = url_path[0].lower()
|
|
||||||
username = regex_username.search(username)
|
|
||||||
if username:
|
|
||||||
username = username[0].replace('\\', '')
|
|
||||||
if len(username) > 5:
|
|
||||||
print('username: {}'.format(username))
|
|
||||||
telegram.save_item_correlation(username, item_id, item_date)
|
|
||||||
elif url_path[0] == 'joinchat':
|
|
||||||
invite_hash = regex_join_hash.search(url_path[1])
|
|
||||||
if invite_hash:
|
|
||||||
invite_hash = invite_hash[0]
|
|
||||||
telegram.save_telegram_invite_hash(invite_hash, item_id)
|
|
||||||
print('invite code: {}'.format(invite_hash))
|
|
||||||
invite_code_found = True
|
|
||||||
return invite_code_found
|
|
||||||
|
|
||||||
|
|
||||||
# # TODO:
|
|
||||||
# Add openmessafe
|
|
||||||
# Add passport ?
|
|
||||||
# Add confirmphone
|
|
||||||
# Add user
|
|
||||||
def extract_data_from_tg_url(item_id, item_date, tg_link):
|
|
||||||
invite_code_found = False
|
|
||||||
|
|
||||||
url = urlparse(tg_link)
|
|
||||||
# username len > 5, a-z A-Z _
|
|
||||||
if url.netloc == 'resolve' and len(url.query) > 7:
|
|
||||||
if url.query[:7] == 'domain=':
|
|
||||||
# remove domain=
|
|
||||||
username = url.query[7:]
|
|
||||||
username = regex_username.search(username)
|
|
||||||
if username:
|
|
||||||
username = username[0].replace('\\', '')
|
|
||||||
if len(username) > 5:
|
|
||||||
print('username: {}'.format(username))
|
|
||||||
telegram.save_item_correlation(username, item_id, item_date)
|
|
||||||
elif url.netloc == 'join' and len(url.query) > 7:
|
|
||||||
if url.query[:7] == 'invite=':
|
|
||||||
invite_hash = url.query[7:]
|
|
||||||
invite_hash = regex_join_hash.search(invite_hash)
|
|
||||||
if invite_hash:
|
|
||||||
invite_hash = invite_hash[0]
|
|
||||||
telegram.save_telegram_invite_hash(invite_hash, item_id)
|
|
||||||
print('invite code: {}'.format(invite_hash))
|
|
||||||
invite_code_found = True
|
|
||||||
|
|
||||||
elif url.netloc == 'login' and len(url.query) > 5:
|
|
||||||
login_code = url.query[5:]
|
|
||||||
print('login code: {}').format(login_code)
|
|
||||||
|
|
||||||
else:
|
|
||||||
print(url)
|
|
||||||
|
|
||||||
return invite_code_found
|
|
||||||
|
|
||||||
def search_telegram(item_id, item_date, item_content):
|
|
||||||
# telegram links
|
|
||||||
signal.alarm(max_execution_time)
|
|
||||||
try:
|
|
||||||
telegram_links = re.findall(regex_telegram_link, item_content)
|
|
||||||
except TimeoutException:
|
|
||||||
telegram_links = []
|
|
||||||
p.incr_module_timeout_statistic() # add encoder type
|
|
||||||
print ("{0} processing timeout".format(item_id))
|
|
||||||
else:
|
|
||||||
signal.alarm(0)
|
|
||||||
|
|
||||||
invite_code_found = False
|
|
||||||
|
|
||||||
for telegram_link in telegram_links:
|
|
||||||
res = extract_data_from_telegram_url(item_id, item_date, telegram_link[0], telegram_link[1])
|
|
||||||
if res:
|
|
||||||
invite_code_found = True
|
|
||||||
|
|
||||||
# tg links
|
|
||||||
signal.alarm(max_execution_time)
|
|
||||||
try:
|
|
||||||
tg_links = re.findall(regex_tg_link, item_content)
|
|
||||||
except TimeoutException:
|
|
||||||
tg_links = []
|
|
||||||
p.incr_module_timeout_statistic() # add encoder type
|
|
||||||
print ("{0} processing timeout".format(item_id))
|
|
||||||
else:
|
|
||||||
signal.alarm(0)
|
|
||||||
|
|
||||||
for tg_link in tg_links:
|
|
||||||
res = extract_data_from_tg_url(item_id, item_date, tg_link)
|
|
||||||
if res:
|
|
||||||
invite_code_found = True
|
|
||||||
|
|
||||||
if invite_code_found:
|
|
||||||
#tags
|
|
||||||
msg = 'infoleak:automatic-detection="telegram-invite-hash";{}'.format(item_id)
|
|
||||||
p.populate_set_out(msg, 'Tags')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
publisher.port = 6380
|
|
||||||
publisher.channel = "Script"
|
|
||||||
|
|
||||||
config_section = 'Telegram'
|
|
||||||
# # TODO: add duplicate
|
|
||||||
|
|
||||||
# Setup the I/O queues
|
|
||||||
p = Process(config_section)
|
|
||||||
|
|
||||||
# Sent to the logging a description of the module
|
|
||||||
publisher.info("Run Telegram module ")
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
|
||||||
while True:
|
|
||||||
# Get one message from the input queue
|
|
||||||
item_id = p.get_from_set()
|
|
||||||
if item_id is None:
|
|
||||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
||||||
time.sleep(1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Do something with the message from the queue
|
|
||||||
item_content = Item.get_item_content(item_id)
|
|
||||||
item_date = Item.get_item_date(item_id)
|
|
||||||
search_telegram(item_id, item_date, item_content)
|
|
|
@ -29,7 +29,8 @@ publisher.port = 6380
|
||||||
publisher.channel = "Script"
|
publisher.channel = "Script"
|
||||||
|
|
||||||
def generate_redis_cache_key(module_name):
|
def generate_redis_cache_key(module_name):
|
||||||
return '{}_extracted:{}'.format(module_name, str(uuid.uuid4()))
|
new_uuid = str(uuid.uuid4())
|
||||||
|
return f'{module_name}_extracted:{new_uuid}'
|
||||||
|
|
||||||
def _regex_findall(redis_key, regex, item_content, r_set):
|
def _regex_findall(redis_key, regex, item_content, r_set):
|
||||||
all_items = re.findall(regex, item_content)
|
all_items = re.findall(regex, item_content)
|
||||||
|
@ -57,7 +58,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
|
||||||
if proc.is_alive():
|
if proc.is_alive():
|
||||||
proc.terminate()
|
proc.terminate()
|
||||||
Statistics.incr_module_timeout_statistic(module_name)
|
Statistics.incr_module_timeout_statistic(module_name)
|
||||||
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
|
err_mess = f"{module_name}: processing timeout: {item_id}"
|
||||||
print(err_mess)
|
print(err_mess)
|
||||||
publisher.info(err_mess)
|
publisher.info(err_mess)
|
||||||
return []
|
return []
|
||||||
|
@ -87,7 +88,7 @@ def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=
|
||||||
if proc.is_alive():
|
if proc.is_alive():
|
||||||
proc.terminate()
|
proc.terminate()
|
||||||
Statistics.incr_module_timeout_statistic(module_name)
|
Statistics.incr_module_timeout_statistic(module_name)
|
||||||
err_mess = "{}: processing timeout: {}".format(module_name, item_id)
|
err_mess = f"{module_name}: processing timeout: {item_id}"
|
||||||
print(err_mess)
|
print(err_mess)
|
||||||
publisher.info(err_mess)
|
publisher.info(err_mess)
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -2,8 +2,10 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import sys
|
import sys
|
||||||
import redis
|
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||||
import ConfigLoader
|
import ConfigLoader
|
||||||
|
@ -13,8 +15,70 @@ config_loader = ConfigLoader.ConfigLoader()
|
||||||
r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion")
|
r_serv_crawler = config_loader.get_redis_conn("ARDB_Onion")
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
|
REGEX_USERNAME = re.compile(r'[0-9a-zA-z_]+')
|
||||||
|
REGEX_JOIN_HASH = re.compile(r'[0-9a-zA-z-]+')
|
||||||
|
|
||||||
|
## ##
|
||||||
|
|
||||||
def save_item_correlation(username, item_id, item_date):
|
def save_item_correlation(username, item_id, item_date):
|
||||||
Username.save_item_correlation('telegram', username, item_id, item_date)
|
Username.save_item_correlation('telegram', username, item_id, item_date)
|
||||||
|
|
||||||
def save_telegram_invite_hash(invite_hash, item_id):
|
def save_telegram_invite_hash(invite_hash, item_id):
|
||||||
r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id))
|
r_serv_crawler.sadd('telegram:invite_code', '{};{}'.format(invite_hash, item_id))
|
||||||
|
|
||||||
|
def get_data_from_telegram_url(base_url, url_path):
|
||||||
|
dict_url = {}
|
||||||
|
url_path = url_path.split('/')
|
||||||
|
|
||||||
|
# username len > 5, a-z A-Z _
|
||||||
|
if len(url_path) == 1:
|
||||||
|
username = url_path[0].lower()
|
||||||
|
username = REGEX_USERNAME.search(username)
|
||||||
|
if username:
|
||||||
|
username = username[0].replace('\\', '')
|
||||||
|
if len(username) > 5:
|
||||||
|
dict_url['username'] = username
|
||||||
|
elif url_path[0] == 'joinchat':
|
||||||
|
invite_hash = REGEX_JOIN_HASH.search(url_path[1])
|
||||||
|
if invite_hash:
|
||||||
|
invite_hash = invite_hash[0]
|
||||||
|
dict_url['invite_hash'] = invite_hash
|
||||||
|
return dict_url
|
||||||
|
|
||||||
|
# # TODO:
|
||||||
|
# Add openmessafe
|
||||||
|
# Add passport ?
|
||||||
|
# Add confirmphone
|
||||||
|
# Add user
|
||||||
|
def get_data_from_tg_url(tg_link):
|
||||||
|
dict_url = {}
|
||||||
|
|
||||||
|
url = urlparse(tg_link)
|
||||||
|
# username len > 5, a-z A-Z _
|
||||||
|
if url.netloc == 'resolve' and len(url.query) > 7:
|
||||||
|
if url.query[:7] == 'domain=':
|
||||||
|
# remove domain=
|
||||||
|
username = url.query[7:]
|
||||||
|
username = REGEX_USERNAME.search(username)
|
||||||
|
if username:
|
||||||
|
username = username[0].replace('\\', '')
|
||||||
|
if len(username) > 5:
|
||||||
|
dict_url['username'] = username
|
||||||
|
|
||||||
|
elif url.netloc == 'join' and len(url.query) > 7:
|
||||||
|
if url.query[:7] == 'invite=':
|
||||||
|
invite_hash = url.query[7:]
|
||||||
|
invite_hash = REGEX_JOIN_HASH.search(invite_hash)
|
||||||
|
if invite_hash:
|
||||||
|
invite_hash = invite_hash[0]
|
||||||
|
dict_url['invite_hash'] = invite_hash
|
||||||
|
|
||||||
|
elif url.netloc == 'login' and len(url.query) > 5:
|
||||||
|
login_code = url.query[5:]
|
||||||
|
if login_code:
|
||||||
|
dict_url['login_code'] = login_code
|
||||||
|
else:
|
||||||
|
# # TODO: log invalid URL ???????
|
||||||
|
print(url)
|
||||||
|
|
||||||
|
return dict_url
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
"""
|
||||||
|
Telegram Module
|
||||||
|
============================
|
||||||
|
|
||||||
|
Search telegram username,channel and invite code
|
||||||
|
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from modules.abstract_module import AbstractModule
|
||||||
|
from packages.Item import Item
|
||||||
|
from lib import regex_helper
|
||||||
|
from lib import telegram
|
||||||
|
|
||||||
|
class Telegram(AbstractModule):
|
||||||
|
"""Telegram module for AIL framework"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super(Telegram, self).__init__()
|
||||||
|
|
||||||
|
# https://github.com/LonamiWebs/Telethon/wiki/Special-links
|
||||||
|
self.re_telegram_link = r'(telegram\.me|t\.me|telegram\.dog|telesco\.pe)/([^\.\",\s]+)'
|
||||||
|
self.re_tg_link = r'tg://.+'
|
||||||
|
|
||||||
|
re.compile(self.re_telegram_link)
|
||||||
|
re.compile(self.re_tg_link)
|
||||||
|
|
||||||
|
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||||
|
self.max_execution_time = 60
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||||
|
|
||||||
|
def compute(self, message, r_result=False):
|
||||||
|
# messsage = item_id
|
||||||
|
item = Item(message)
|
||||||
|
item_content = item.get_content()
|
||||||
|
item_date = item.get_date()
|
||||||
|
|
||||||
|
invite_code_found = False
|
||||||
|
|
||||||
|
# extract telegram links
|
||||||
|
telegram_links = self.regex_findall(self.re_telegram_link, item.get_id(), item_content)
|
||||||
|
for telegram_link_tuple in telegram_links:
|
||||||
|
base_url, url_path = telegram_link_tuple[2:-2].split("', '", 1)
|
||||||
|
dict_url = telegram.get_data_from_telegram_url(base_url, url_path)
|
||||||
|
if dict_url.get('username'):
|
||||||
|
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
|
||||||
|
print(f'username: {dict_url["username"]}')
|
||||||
|
if dict_url.get('invite_hash'):
|
||||||
|
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
|
||||||
|
print(f'invite code: {dict_url["invite_hash"]}')
|
||||||
|
invite_code_found = True
|
||||||
|
|
||||||
|
# extract tg links
|
||||||
|
tg_links = self.regex_findall(self.re_tg_link, item.get_id(), item_content)
|
||||||
|
for tg_link in tg_links:
|
||||||
|
dict_url = telegram.get_data_from_tg_url(tg_link)
|
||||||
|
if dict_url.get('username'):
|
||||||
|
telegram.save_item_correlation(dict_url['username'], item.get_id(), item_date)
|
||||||
|
print(f'username: {dict_url["username"]}')
|
||||||
|
if dict_url.get('invite_hash'):
|
||||||
|
telegram.save_telegram_invite_hash(dict_url['invite_hash'], item.get_id())
|
||||||
|
print(f'invite code: {dict_url["invite_hash"]}')
|
||||||
|
invite_code_found = True
|
||||||
|
if dict_url.get('login_code'):
|
||||||
|
print(f'login code: {dict_url["login_code"]}')
|
||||||
|
|
||||||
|
# CREATE TAG
|
||||||
|
if invite_code_found:
|
||||||
|
#tags
|
||||||
|
msg = f'infoleak:automatic-detection="telegram-invite-hash";{item.get_id()}'
|
||||||
|
self.send_message_to_queue(msg, 'Tags')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
module = Telegram()
|
||||||
|
module.run()
|
|
@ -15,6 +15,7 @@ import traceback
|
||||||
##################################
|
##################################
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
from lib import regex_helper
|
||||||
|
|
||||||
class AbstractModule(ABC):
|
class AbstractModule(ABC):
|
||||||
"""
|
"""
|
||||||
|
@ -74,6 +75,17 @@ class AbstractModule(ABC):
|
||||||
self.process.populate_set_out(message, queue_name)
|
self.process.populate_set_out(message, queue_name)
|
||||||
# add to new set_module
|
# add to new set_module
|
||||||
|
|
||||||
|
def regex_findall(self, regex, id, content):
|
||||||
|
"""
|
||||||
|
regex findall helper (force timeout)
|
||||||
|
:param regex: compiled regex
|
||||||
|
:param id: object id
|
||||||
|
:param content: object content
|
||||||
|
|
||||||
|
ex: send_to_queue(item_id, 'Global')
|
||||||
|
"""
|
||||||
|
return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
"""
|
"""
|
||||||
Run Module endless process
|
Run Module endless process
|
||||||
|
|
|
@ -19,6 +19,7 @@ from modules.DomClassifier import DomClassifier
|
||||||
from modules.Global import Global
|
from modules.Global import Global
|
||||||
from modules.Keys import Keys
|
from modules.Keys import Keys
|
||||||
from modules.Onion import Onion
|
from modules.Onion import Onion
|
||||||
|
from modules.Telegram import Telegram
|
||||||
|
|
||||||
# project packages
|
# project packages
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
|
@ -169,5 +170,15 @@ class Test_Module_Onion(unittest.TestCase):
|
||||||
# # TODO: check warning logs
|
# # TODO: check warning logs
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class Test_Module_Telegram(unittest.TestCase):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.module_obj = Telegram()
|
||||||
|
|
||||||
|
def test_module(self):
|
||||||
|
item_id = 'tests/2021/01/01/keys.gz'
|
||||||
|
# # TODO: check results
|
||||||
|
result = self.module_obj.compute(item_id)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
Loading…
Reference in New Issue