mirror of https://github.com/CIRCL/AIL-framework
81 lines
2.8 KiB
Python
Executable File
81 lines
2.8 KiB
Python
Executable File
#!/usr/bin/env python2
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
The Phone Module
|
|
================
|
|
|
|
This module is consuming the Redis-list created by the Categ module.
|
|
|
|
It apply phone number regexes on paste content and warn if above a threshold.
|
|
|
|
"""
|
|
|
|
import time
|
|
import re
|
|
import phonenumbers
|
|
from packages import Paste
|
|
from pubsublogger import publisher
|
|
from Helper import Process
|
|
|
|
|
|
def search_phone(message):
|
|
paste = Paste.Paste(message)
|
|
content = paste.get_p_content()
|
|
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
|
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
|
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
|
# list of the regex results in the Paste, may be null
|
|
results = reg_phone.findall(content)
|
|
|
|
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
|
|
if len(results) > 4:
|
|
print results
|
|
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
|
|
#send to Browse_warning_paste
|
|
p.populate_set_out('phone;{}'.format(message), 'BrowseWarningPaste')
|
|
#Send to duplicate
|
|
p.populate_set_out(message, 'Duplicate')
|
|
stats = {}
|
|
for phone_number in results:
|
|
try:
|
|
x = phonenumbers.parse(phone_number, None)
|
|
country_code = x.country_code
|
|
if stats.get(country_code) is None:
|
|
stats[country_code] = 1
|
|
else:
|
|
stats[country_code] = stats[country_code] + 1
|
|
except:
|
|
pass
|
|
for country_code in stats:
|
|
if stats[country_code] > 4:
|
|
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
|
|
|
|
if __name__ == '__main__':
|
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
|
# Port of the redis instance used by pubsublogger
|
|
publisher.port = 6380
|
|
# Script is the default channel used for the modules.
|
|
publisher.channel = 'Script'
|
|
|
|
# Section name in bin/packages/modules.cfg
|
|
config_section = 'Phone'
|
|
|
|
# Setup the I/O queues
|
|
p = Process(config_section)
|
|
|
|
# Sent to the logging a description of the module
|
|
publisher.info("Run Phone module")
|
|
|
|
# Endless loop getting messages from the input queue
|
|
while True:
|
|
# Get one message from the input queue
|
|
message = p.get_from_set()
|
|
if message is None:
|
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
|
time.sleep(1)
|
|
continue
|
|
|
|
# Do something with the message from the queue
|
|
search_phone(message)
|