mirror of https://github.com/CIRCL/AIL-framework
Merge branch 'master' of https://github.com/CIRCL/AIL-framework into module
commit
b7d2b64a86
|
@ -19,3 +19,6 @@ var/www/static/
|
||||||
|
|
||||||
# Local config
|
# Local config
|
||||||
bin/packages/config.cfg
|
bin/packages/config.cfg
|
||||||
|
|
||||||
|
# installed files
|
||||||
|
nltk_data/
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
#!/usr/bin/env python2
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
"""
|
||||||
|
module for finding phone numbers
|
||||||
|
"""
|
||||||
|
|
||||||
|
import time
|
||||||
|
import pprint
|
||||||
|
import re
|
||||||
|
from packages import Paste
|
||||||
|
from packages import lib_refine
|
||||||
|
from pubsublogger import publisher
|
||||||
|
from Helper import Process
|
||||||
|
|
||||||
|
|
||||||
|
def search_phone(message):
|
||||||
|
paste = Paste.Paste(message)
|
||||||
|
content = paste.get_p_content()
|
||||||
|
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
||||||
|
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||||
|
# list of the regex results in the Paste, may be null
|
||||||
|
results = reg_phone.findall(content)
|
||||||
|
|
||||||
|
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers
|
||||||
|
if len(results) > 4 :
|
||||||
|
print results
|
||||||
|
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||||
|
# Port of the redis instance used by pubsublogger
|
||||||
|
publisher.port = 6380
|
||||||
|
# Script is the default channel used for the modules.
|
||||||
|
publisher.channel = 'Script'
|
||||||
|
|
||||||
|
# Section name in bin/packages/modules.cfg
|
||||||
|
config_section = 'Phone'
|
||||||
|
|
||||||
|
# Setup the I/O queues
|
||||||
|
p = Process(config_section)
|
||||||
|
|
||||||
|
# Sent to the logging a description of the module
|
||||||
|
publisher.info("Run Phone module")
|
||||||
|
|
||||||
|
# Endless loop getting messages from the input queue
|
||||||
|
while True:
|
||||||
|
# Get one message from the input queue
|
||||||
|
message = p.get_from_set()
|
||||||
|
if message is None:
|
||||||
|
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||||
|
time.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Do something with the message from the queue
|
||||||
|
search_phone(message)
|
||||||
|
|
|
@ -52,3 +52,9 @@ subscribe = Redis_Global
|
||||||
|
|
||||||
[Credential]
|
[Credential]
|
||||||
subscribe = Redis_Global
|
subscribe = Redis_Global
|
||||||
|
|
||||||
|
[Keys]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
|
||||||
|
[Phone]
|
||||||
|
subscribe = Redis_Global
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
sudo pacman -Syu
|
||||||
|
|
||||||
|
sudo pacman -S python2-pip screen gcc unzip freetype2 python2 git --needed
|
||||||
|
sudo yaourt -S snappy --needed
|
||||||
|
sudo pip2 install virtualenv
|
||||||
|
|
||||||
|
#Needed for bloom filters
|
||||||
|
sudo pacman -S openssl python2-numpy --needed
|
||||||
|
|
||||||
|
# DNS deps
|
||||||
|
sudo pacman -S adns --needed
|
||||||
|
|
||||||
|
#Needed for redis-lvlDB
|
||||||
|
sudo pacman -S libev gmp --needed
|
||||||
|
|
||||||
|
#needed for mathplotlib
|
||||||
|
test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/
|
||||||
|
sudo easy_install-2.7 -U distribute
|
||||||
|
|
||||||
|
# REDIS #
|
||||||
|
test ! -d redis/ && git clone https://github.com/antirez/redis.git
|
||||||
|
pushd redis/
|
||||||
|
git checkout 2.8
|
||||||
|
make
|
||||||
|
popd
|
||||||
|
|
||||||
|
# REDIS LEVEL DB #
|
||||||
|
test ! -d redis-leveldb/ && git clone https://github.com/KDr2/redis-leveldb.git
|
||||||
|
pushd redis-leveldb/
|
||||||
|
git submodule init
|
||||||
|
git submodule update
|
||||||
|
make
|
||||||
|
popd
|
||||||
|
|
||||||
|
if [ ! -f bin/packages/config.cfg ]; then
|
||||||
|
cp bin/packages/config.cfg.sample bin/packages/config.cfg
|
||||||
|
fi
|
||||||
|
|
||||||
|
virtualenv AILENV
|
||||||
|
|
||||||
|
echo export AIL_HOME=$(pwd) >> ./AILENV/bin/activate
|
||||||
|
echo export AIL_BIN=$(pwd)/bin/ >> ./AILENV/bin/activate
|
||||||
|
echo export AIL_FLASK=$(pwd)/var/www/ >> ./AILENV/bin/activate
|
||||||
|
echo export AIL_REDIS=$(pwd)/redis/src/ >> ./AILENV/bin/activate
|
||||||
|
echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate
|
||||||
|
|
||||||
|
. ./AILENV/bin/activate
|
||||||
|
|
||||||
|
mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
|
||||||
|
mkdir -p $AIL_HOME/LEVEL_DB_DATA/{2014,2013}
|
||||||
|
|
||||||
|
pip install -r pip_packages_requirement.txt
|
||||||
|
|
||||||
|
# Download the necessary NLTK corpora
|
||||||
|
HOME=$(pwd) python -m textblob.download_corpora
|
Loading…
Reference in New Issue