diff --git a/.gitignore b/.gitignore index 8fac5792..264aaa5f 100644 --- a/.gitignore +++ b/.gitignore @@ -19,3 +19,6 @@ var/www/static/ # Local config bin/packages/config.cfg + +# installed files +nltk_data/ diff --git a/bin/Phone.py b/bin/Phone.py new file mode 100644 index 00000000..384040cf --- /dev/null +++ b/bin/Phone.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* +""" + module for finding phone numbers +""" + +import time +import pprint +import re +from packages import Paste +from packages import lib_refine +from pubsublogger import publisher +from Helper import Process + + +def search_phone(message): + paste = Paste.Paste(message) + content = paste.get_p_content() + # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) + reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') + # list of the regex results in the Paste, may be null + results = reg_phone.findall(content) + + # if the list is greater than 4, we consider the Paste may contain a list of phone numbers + if len(results) > 4 : + print results + publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) + + if __name__ == '__main__': + # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) + # Port of the redis instance used by pubsublogger + publisher.port = 6380 + # Script is the default channel used for the modules. + publisher.channel = 'Script' + + # Section name in bin/packages/modules.cfg + config_section = 'Phone' + + # Setup the I/O queues + p = Process(config_section) + + # Sent to the logging a description of the module + publisher.info("Run Phone module") + + # Endless loop getting messages from the input queue + while True: + # Get one message from the input queue + message = p.get_from_set() + if message is None: + publisher.debug("{} queue is empty, waiting".format(config_section)) + time.sleep(1) + continue + + # Do something with the message from the queue + search_phone(message) + diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index b7456465..d8f49da2 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -52,3 +52,9 @@ subscribe = Redis_Global [Credential] subscribe = Redis_Global + +[Keys] +subscribe = Redis_Global + +[Phone] +subscribe = Redis_Global diff --git a/installing_deps_archlinux.sh b/installing_deps_archlinux.sh new file mode 100644 index 00000000..b921d3c0 --- /dev/null +++ b/installing_deps_archlinux.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -e +set -x + +sudo pacman -Syu + +sudo pacman -S python2-pip screen gcc unzip freetype2 python2 git --needed +sudo yaourt -S snappy --needed +sudo pip2 install virtualenv + +#Needed for bloom filters +sudo pacman -S openssl python2-numpy --needed + +# DNS deps +sudo pacman -S adns --needed + +#Needed for redis-lvlDB +sudo pacman -S libev gmp --needed + +#needed for mathplotlib +test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ +sudo easy_install-2.7 -U distribute + +# REDIS # +test ! -d redis/ && git clone https://github.com/antirez/redis.git +pushd redis/ +git checkout 2.8 +make +popd + +# REDIS LEVEL DB # +test ! -d redis-leveldb/ && git clone https://github.com/KDr2/redis-leveldb.git +pushd redis-leveldb/ +git submodule init +git submodule update +make +popd + +if [ ! -f bin/packages/config.cfg ]; then + cp bin/packages/config.cfg.sample bin/packages/config.cfg +fi + +virtualenv AILENV + +echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate +echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate +echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate +echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate +echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate + +. ./AILENV/bin/activate + +mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} +mkdir -p $AIL_HOME/LEVEL_DB_DATA/{2014,2013} + +pip install -r pip_packages_requirement.txt + +# Download the necessary NLTK corpora +HOME=$(pwd) python -m textblob.download_corpora