From 43b3556588d782f2826c20b0956190007e87e113 Mon Sep 17 00:00:00 2001 From: Alain Date: Fri, 5 Feb 2016 13:58:21 -0500 Subject: [PATCH] Starting Phone number recognition --- bin/Phone.py | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 bin/Phone.py diff --git a/bin/Phone.py b/bin/Phone.py new file mode 100644 index 00000000..87caf772 --- /dev/null +++ b/bin/Phone.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + module for finding phone numbers +""" + +import time +import pprint +import re +from packages import Paste +from packages import lib_refine +from pubsublogger import publisher +from Helper import Process + + +def search_phone(message): + paste = Paste.Paste(message) + content = paste.get_p_content() + # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) + reg_phone = re.compile(r'(\+\d{1,3}\(\d{1,2}\)\d?)?(\d{2,4}[\W\D\s]?){4,6} ') + # list of the regex results in the Paste, may be null + results = reg_phone.findall(content) + + # if the list is greater than 4, we consider the Paste may contain a list of phone numbers + if len(results) > 4 : + print results + publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) + + if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Phone' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Run Phone module") + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + # Do something with the message from the queue + search_phone(message) +