2018-05-04 13:53:29 +02:00
|
|
|
#!/usr/bin/env python3
|
2016-02-05 19:58:21 +01:00
|
|
|
# -*-coding:UTF-8 -*
|
2017-05-09 11:13:16 +02:00
|
|
|
|
2016-02-05 19:58:21 +01:00
|
|
|
"""
|
2017-05-09 11:13:16 +02:00
|
|
|
The Phone Module
|
|
|
|
================
|
|
|
|
|
|
|
|
This module is consuming the Redis-list created by the Categ module.
|
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
It apply phone number regexes on item content and warn if above a threshold.
|
2017-05-09 11:13:16 +02:00
|
|
|
|
2016-02-05 19:58:21 +01:00
|
|
|
"""
|
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
##################################
|
|
|
|
# Import External packages
|
|
|
|
##################################
|
2021-06-02 16:04:52 +02:00
|
|
|
import os
|
2016-02-05 19:58:21 +01:00
|
|
|
import re
|
2021-06-02 16:04:52 +02:00
|
|
|
import sys
|
|
|
|
import time
|
2017-05-03 14:25:18 +02:00
|
|
|
import phonenumbers
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
2021-04-02 09:52:05 +02:00
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
2021-06-02 16:04:52 +02:00
|
|
|
from modules.abstract_module import AbstractModule
|
|
|
|
from packages.Item import Item
|
2016-02-05 19:58:21 +01:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
# # TODO: # FIXME: improve regex / filter false positives
|
2021-04-02 09:52:05 +02:00
|
|
|
class Phone(AbstractModule):
|
|
|
|
"""
|
|
|
|
Phone module for AIL framework
|
|
|
|
"""
|
|
|
|
|
2016-02-05 19:58:21 +01:00
|
|
|
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
2021-04-02 09:52:05 +02:00
|
|
|
# reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
|
|
|
REG_PHONE = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
super(Phone, self).__init__()
|
|
|
|
|
|
|
|
# Waiting time in secondes between to message proccessed
|
|
|
|
self.pending_seconds = 1
|
|
|
|
|
|
|
|
|
|
|
|
def compute(self, message):
|
2021-06-02 16:04:52 +02:00
|
|
|
item = Item(message)
|
|
|
|
content = item.get_content()
|
|
|
|
# List of the regex results in the Item, may be null
|
2021-04-02 09:52:05 +02:00
|
|
|
results = self.REG_PHONE.findall(content)
|
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
# If the list is greater than 4, we consider the Item may contain a list of phone numbers
|
2021-04-02 09:52:05 +02:00
|
|
|
if len(results) > 4:
|
|
|
|
self.redis_logger.debug(results)
|
2021-06-02 16:04:52 +02:00
|
|
|
self.redis_logger.warning(f'{item.get_id()} contains PID (phone numbers)')
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
msg = f'infoleak:automatic-detection="phone-number";{item.get_id()}'
|
|
|
|
self.send_message_to_queue(msg, 'Tags')
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
# Send to duplicate
|
2021-06-02 16:04:52 +02:00
|
|
|
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
stats = {}
|
|
|
|
for phone_number in results:
|
|
|
|
try:
|
|
|
|
x = phonenumbers.parse(phone_number, None)
|
|
|
|
country_code = x.country_code
|
|
|
|
if stats.get(country_code) is None:
|
|
|
|
stats[country_code] = 1
|
|
|
|
else:
|
|
|
|
stats[country_code] = stats[country_code] + 1
|
|
|
|
except:
|
|
|
|
pass
|
|
|
|
for country_code in stats:
|
|
|
|
if stats[country_code] > 4:
|
2021-06-02 16:04:52 +02:00
|
|
|
self.redis_logger.warning(f'{item.get_id()} contains Phone numbers with country code {country_code}')
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2016-02-05 19:58:21 +01:00
|
|
|
|
2016-02-10 16:39:06 +01:00
|
|
|
if __name__ == '__main__':
|
2021-06-02 16:04:52 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
module = Phone()
|
|
|
|
module.run()
|