chg: [Phone module] Filter Invalid Phone numbers + UI Show extracted

pull/594/head
Terrtia 2023-05-24 10:48:29 +02:00
parent 7a52aec884
commit 353b290899
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 101 additions and 36 deletions

View File

@ -249,6 +249,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Tools.py; read x"
@ -290,8 +292,6 @@ function launching_scripts {
##################################
# DISABLED MODULES #
##################################
# screen -S "Script_AIL" -X screen -t "Phone" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Phone.py; read x"
# sleep 0.1
# screen -S "Script_AIL" -X screen -t "SentimentAnalysis" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./SentimentAnalysis.py; read x"
# sleep 0.1
# screen -S "Script_AIL" -X screen -t "Release" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Release.py; read x"

View File

@ -25,6 +25,7 @@ from modules.CreditCards import CreditCards
from modules.Iban import Iban
from modules.Mail import Mail
from modules.Onion import Onion
from modules.Phone import Phone
from modules.Tools import Tools
config_loader = ConfigLoader()
@ -40,6 +41,7 @@ MODULES = {
'infoleak:automatic-detection="iban"': Iban(queue=False),
'infoleak:automatic-detection="mail"': Mail(queue=False),
'infoleak:automatic-detection="onion"': Onion(queue=False),
'infoleak:automatic-detection="phone-number"': Phone(queue=False),
# APIkey ???
# Credentials
# Zerobins

View File

@ -7,14 +7,13 @@ Regex Helper
import os
import logging.config
import phonenumbers
import re
import sys
import uuid
from multiprocessing import Process as Proc
sys.path.append(os.environ['AIL_BIN'])
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
@ -65,7 +64,6 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
proc.terminate()
# Statistics.incr_module_timeout_statistic(module_name)
err_mess = f"{module_name}: processing timeout: {item_id}"
print(err_mess)
logger.info(err_mess)
return []
else:
@ -99,7 +97,6 @@ def regex_finditer(r_key, regex, item_id, content, max_time=30):
proc.terminate()
# Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
logger.info(err_mess)
return []
else:
@ -130,7 +127,6 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
proc.terminate()
# Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
print(err_mess)
logger.info(err_mess)
return False
else:
@ -144,3 +140,40 @@ def regex_search(r_key, regex, item_id, content, max_time=30):
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)
## Phone Regexs ##
def _regex_phone_iter(r_key, country_code, content):
iterator = phonenumbers.PhoneNumberMatcher(content, country_code)
for match in iterator:
value = match.raw_string
# PhoneNumberFormat.E164
# value = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)
start = match.start
end = match.end
r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
r_serv_cache.expire(r_key, 360)
def regex_phone_iter(r_key, country_code, item_id, content, max_time=30):
proc = Proc(target=_regex_phone_iter, args=(r_key, country_code, content))
try:
proc.start()
proc.join(max_time)
if proc.is_alive():
proc.terminate()
# Statistics.incr_module_timeout_statistic(r_key)
err_mess = f"{r_key}: processing timeout: {item_id}"
logger.info(err_mess)
return []
else:
res = r_serv_cache.lrange(r_key, 0, -1)
r_serv_cache.delete(r_key)
proc.terminate()
all_match = []
for match in res:
start, end, value = match.split(':', 2)
all_match.append((int(start), int(end), value))
return all_match
except KeyboardInterrupt:
print("Caught KeyboardInterrupt, terminating regex worker")
proc.terminate()
sys.exit(0)

View File

@ -15,7 +15,6 @@ It apply phone number regexes on item content and warn if above a threshold.
# Import External packages
##################################
import os
import re
import sys
import phonenumbers
@ -34,44 +33,65 @@ class Phone(AbstractModule):
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
# REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
def __init__(self):
super(Phone, self).__init__()
def __init__(self, queue=True):
super(Phone, self).__init__(queue=queue)
# Waiting time in seconds between to message processed
self.pending_seconds = 1
def extract(self, obj_id, content, tag):
extracted = []
phones = self.regex_phone_iter('US', obj_id, content)
for phone in phones:
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
return extracted
def compute(self, message):
item = Item(message)
content = item.get_content()
# List of the regex results in the Item, may be null
results = self.REG_PHONE.findall(content)
# If the list is greater than 4, we consider the Item may contain a list of phone numbers
if len(results) > 4:
self.logger.debug(results)
self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
# TODO use language detection to choose the country code ?
results = self.regex_phone_iter('US', item.id, content)
for phone in results:
print(phone[2])
if results:
# TAGS
msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
self.add_message_to_queue(msg, 'Tags')
stats = {}
for phone_number in results:
try:
x = phonenumbers.parse(phone_number, None)
country_code = x.country_code
if stats.get(country_code) is None:
stats[country_code] = 1
else:
stats[country_code] = stats[country_code] + 1
except:
pass
for country_code in stats:
if stats[country_code] > 4:
self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
self.redis_logger.warning(f'{item.get_id()} contains {len(phone)} Phone numbers')
# # List of the regex results in the Item, may be null
# results = self.REG_PHONE.findall(content)
#
# # If the list is greater than 4, we consider the Item may contain a list of phone numbers
# if len(results) > 4:
# self.logger.debug(results)
# self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
#
# msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
# self.add_message_to_queue(msg, 'Tags')
#
# stats = {}
# for phone_number in results:
# try:
# x = phonenumbers.parse(phone_number, None)
# country_code = x.country_code
# if stats.get(country_code) is None:
# stats[country_code] = 1
# else:
# stats[country_code] = stats[country_code] + 1
# except:
# pass
# for country_code in stats:
# if stats[country_code] > 4:
# self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
if __name__ == '__main__':
module = Phone()
module.run()
# module.run()
module.compute('crawled/2023/02/21/circl.luc90be694-a559-4d77-bfa4-9c54ea8bc2f7')

View File

@ -110,6 +110,17 @@ class AbstractModule(ABC):
return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, obj_id, content,
max_time=self.max_execution_time, r_set=r_set)
def regex_phone_iter(self, country_code, obj_id, content):
"""
regex findall helper (force timeout)
:param regex: compiled regex
:param obj_id: object id
:param content: object content
:param r_set: return result as set
"""
return regex_helper.regex_phone_iter(self.r_cache_key, country_code, obj_id, content,
max_time=self.max_execution_time)
def run(self):
"""
Run Module endless process

View File

@ -128,10 +128,9 @@ publish = Duplicate,Tags
subscribe = Cve
publish = Tags
# Disabled
#[Phone]
#subscribe = Item
#publish = Tags
[Phone]
subscribe = Item
publish = Tags
[Keys]
subscribe = Item