chg: [Phone module] Filter Invalid Phone numbers + UI Show extracted

pull/594/head
Terrtia 2023-05-24 10:48:29 +02:00
parent 7a52aec884
commit 353b290899
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 101 additions and 36 deletions

View File

@ -249,6 +249,8 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x" screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x" screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Tools.py; read x" screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Tools.py; read x"
@ -290,8 +292,6 @@ function launching_scripts {
################################## ##################################
# DISABLED MODULES # # DISABLED MODULES #
################################## ##################################
# screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
# sleep 0.1
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x" # screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
# sleep 0.1 # sleep 0.1
# screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x" # screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"

View File

@ -25,6 +25,7 @@ from modules.CreditCards import CreditCards
from modules.Iban import Iban from modules.Iban import Iban
from modules.Mail import Mail from modules.Mail import Mail
from modules.Onion import Onion from modules.Onion import Onion
from modules.Phone import Phone
from modules.Tools import Tools from modules.Tools import Tools
config_loader = ConfigLoader() config_loader = ConfigLoader()
@ -40,6 +41,7 @@ MODULES = {
'infoleak:automatic-detection="iban"': Iban(queue=False), 'infoleak:automatic-detection="iban"': Iban(queue=False),
'infoleak:automatic-detection="mail"': Mail(queue=False), 'infoleak:automatic-detection="mail"': Mail(queue=False),
'infoleak:automatic-detection="onion"': Onion(queue=False), 'infoleak:automatic-detection="onion"': Onion(queue=False),
'infoleak:automatic-detection="phone-number"': Phone(queue=False),
# APIkey ??? # APIkey ???
# Credentials # Credentials
# Zerobins # Zerobins

View File

@ -7,14 +7,13 @@ Regex Helper
import os import os
import logging.config import logging.config
import phonenumbers
import re import re
import sys import sys
import uuid import uuid
from multiprocessing import Process as Proc from multiprocessing import Process as Proc
sys.path.append(os.environ['AIL_BIN'])
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
@ -65,7 +64,6 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
proc.terminate() proc.terminate()
# Statistics.incr_module_timeout_statistic(module_name) # Statistics.incr_module_timeout_statistic(module_name)
err_mess = f"{module_name}: processing timeout: {item_id}" err_mess = f"{module_name}: processing timeout: {item_id}"
print(err_mess)
logger.info(err_mess) logger.info(err_mess)
return [] return []
else: else:
@ -99,7 +97,6 @@ def regex_finditer(r_key, regex, item_id, content, max_time=30):
proc.terminate() proc.terminate()
# Statistics.incr_module_timeout_statistic(r_key) # Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}" err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
logger.info(err_mess) logger.info(err_mess)
return [] return []
else: else:
@ -130,7 +127,6 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
proc.terminate() proc.terminate()
# Statistics.incr_module_timeout_statistic(r_key) # Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}" err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
logger.info(err_mess) logger.info(err_mess)
return False return False
else: else:
@ -144,3 +140,40 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
print("Caught KeyboardInterrupt, terminating regex worker") print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate() proc.terminate()
sys.exit(0) sys.exit(0)
## Phone Regexs ##
def _regex_phone_iter(r_key, country_code, content):
iterator = phonenumbers.PhoneNumberMatcher(content, country_code)
for match in iterator:
value = match.raw_string
# PhoneNumberFormat.E164
# value = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
start = match.start
end = match.end
r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
r_serv_cache.expire(r_key, 360)
def regex_phone_iter(r_key, country_code, item_id, content, max_time=30):
proc = Proc(target=_regex_phone_iter, args=(r_key, country_code, content))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
# Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
logger.info(err_mess)
return []
else:
res = r_serv_cache.lrange(r_key, 0, -1)
r_serv_cache.delete(r_key)
proc.terminate()
all_match = []
for match in res:
start, end, value = match.split(':', 2)
all_match.append((int(start), int(end), value))
return all_match
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)

View File

@ -15,7 +15,6 @@ It apply phone number regexes on item content and warn if above a threshold.
# Import External packages # Import External packages
################################## ##################################
import os import os
import re
import sys import sys
import phonenumbers import phonenumbers
@ -34,44 +33,65 @@ class Phone(AbstractModule):
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') # reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') # REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
def __init__(self): def __init__(self, queue=True):
super(Phone, self).__init__() super(Phone, self).__init__(queue=queue)
# Waiting time in seconds between to message processed # Waiting time in seconds between to message processed
self.pending_seconds = 1 self.pending_seconds = 1
def extract(self, obj_id, content, tag):
extracted = []
phones = self.regex_phone_iter('US', obj_id, content)
for phone in phones:
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
return extracted
def compute(self, message): def compute(self, message):
item = Item(message) item = Item(message)
content = item.get_content() content = item.get_content()
# List of the regex results in the Item, may be null
results = self.REG_PHONE.findall(content)
# If the list is greater than 4, we consider the Item may contain a list of phone numbers # TODO use language detection to choose the country code ?
if len(results) > 4: results = self.regex_phone_iter('US', item.id, content)
self.logger.debug(results) for phone in results:
self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)') print(phone[2])
if results:
# TAGS
msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}' msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
self.add_message_to_queue(msg, 'Tags') self.add_message_to_queue(msg, 'Tags')
stats = {} self.redis_logger.warning(f'{item.get_id()} contains {len(phone)} Phone numbers')
for phone_number in results:
try: # # List of the regex results in the Item, may be null
x = phonenumbers.parse(phone_number, None) # results = self.REG_PHONE.findall(content)
country_code = x.country_code #
if stats.get(country_code) is None: # # If the list is greater than 4, we consider the Item may contain a list of phone numbers
stats[country_code] = 1 # if len(results) > 4:
else: # self.logger.debug(results)
stats[country_code] = stats[country_code] + 1 # self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
except: #
pass # msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
for country_code in stats: # self.add_message_to_queue(msg, 'Tags')
if stats[country_code] > 4: #
self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}') # stats = {}
# for phone_number in results:
# try:
# x = phonenumbers.parse(phone_number, None)
# country_code = x.country_code
# if stats.get(country_code) is None:
# stats[country_code] = 1
# else:
# stats[country_code] = stats[country_code] + 1
# except:
# pass
# for country_code in stats:
# if stats[country_code] > 4:
# self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
if __name__ == '__main__': if __name__ == '__main__':
module = Phone() module = Phone()
module.run() # module.run()
module.compute('crawled/2023/02/21/circl.luc90be694-a559-4d77-bfa4-9c54ea8bc2f7')

View File

@ -110,6 +110,17 @@ class AbstractModule(ABC):
return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, obj_id, content, return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, obj_id, content,
max_time=self.max_execution_time, r_set=r_set) max_time=self.max_execution_time, r_set=r_set)
def regex_phone_iter(self, country_code, obj_id, content):
"""
regex findall helper (force timeout)
:param regex: compiled regex
:param obj_id: object id
:param content: object content
:param r_set: return result as set
"""
return regex_helper.regex_phone_iter(self.r_cache_key, country_code, obj_id, content,
max_time=self.max_execution_time)
def run(self): def run(self):
""" """
Run Module endless process Run Module endless process

View File

@ -128,10 +128,9 @@ publish = Duplicate,Tags
subscribe = Cve subscribe = Cve
publish = Tags publish = Tags
# Disabled [Phone]
#[Phone] subscribe = Item
#subscribe = Item publish = Tags
#publish = Tags
[Keys] [Keys]
subscribe = Item subscribe = Item