Cleanup (remove unused imports, more pep8 compatible)

pull/14/head
Raphaël Vinot 2014-08-14 14:11:07 +02:00
parent 04a8f1bdf2
commit 4a1f300a1a
11 changed files with 129 additions and 138 deletions

View File

@ -3,8 +3,8 @@
import argparse
import redis
from bin.pubsublogger import publisher
from bin.packages.lib_words import create_dirfile
from pubsublogger import publisher
from packages.lib_words import create_dirfile
import ConfigParser

View File

@ -20,11 +20,12 @@ Requirements
*Need the ZMQ_Feed_Q Module running to be able to work properly.
"""
import redis, ConfigParser
from pubsublogger import publisher
import redis
import ConfigParser
configfile = './packages/config.cfg'
def main():
"""Main Function"""
@ -32,13 +33,12 @@ def main():
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
#REDIS
r_serv = redis.StrictRedis(
host = cfg.get("Redis_Queues", "host"),
port = cfg.getint("Redis_Queues", "port"),
db = cfg.getint("Redis_Queues", "db"))
# REDIS
r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
#### SCRIPTS ####
# ### SCRIPTS ####
r_serv.sadd("SHUTDOWN_FLAGS", "Feed")
r_serv.sadd("SHUTDOWN_FLAGS", "Categ")
r_serv.sadd("SHUTDOWN_FLAGS", "Lines")

View File

@ -20,13 +20,17 @@ Requirements
*Need the ZMQ_Feed_Q Module running to be able to work properly.
"""
import redis, zmq, ConfigParser, sys, base64, gzip, os, time
#import zlib
import redis
import ConfigParser
import base64
import os
import time
from pubsublogger import publisher
from packages import ZMQ_PubSub
configfile = './packages/config.cfg'
def main():
"""Main Function"""
@ -34,19 +38,19 @@ def main():
cfg = ConfigParser.ConfigParser()
cfg.read(configfile)
#REDIS
# REDIS
r_serv = redis.StrictRedis(
host = cfg.get("Redis_Queues", "host"),
port = cfg.getint("Redis_Queues", "port"),
db = cfg.getint("Redis_Queues", "db"))
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# ZMQ #
channel = cfg.get("Feed", "topicfilter")
#Subscriber
# Subscriber
subscriber_name = "feed"
subscriber_config_section = "Feed"
#Publisher
# Publisher
publisher_name = "pubfed"
publisher_config_section = "PubSub_Global"
@ -60,13 +64,13 @@ def main():
while True:
message = Sub.get_msg_from_queue(r_serv)
#Recovering the streamed message informations.
if message != None:
# Recovering the streamed message informations.
if message is not None:
if len(message.split()) == 3:
topic, paste, gzip64encoded = message.split()
print paste
else:
#TODO Store the name of the empty paste inside a Redis-list.
# TODO Store the name of the empty paste inside a Redis-list.
print "Empty Paste: not processed"
publisher.debug("Empty Paste: {0} not processed".format(paste))
continue
@ -79,17 +83,17 @@ def main():
print "Empty Queues: Waiting..."
time.sleep(10)
continue
#Creating the full filepath
# Creating the full filepath
filename = cfg.get("Directories", "pastes") + paste
if not os.path.exists(filename.rsplit("/", 1)[0]):
os.makedirs(filename.rsplit("/", 1)[0])
else:
#Path already existing
# Path already existing
pass
decoded_gzip = base64.standard_b64decode(gzip64encoded)
#paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)
# paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)
with open(filename, 'wb') as F:
F.write(decoded_gzip)

View File

@ -20,12 +20,14 @@ Requirements
"channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste"
"""
import redis, zmq, ConfigParser
import redis
import ConfigParser
from pubsublogger import publisher
from packages import ZMQ_PubSub
configfile = './packages/config.cfg'
def main():
"""Main Function"""
@ -35,24 +37,22 @@ def main():
# REDIS #
r_serv = redis.StrictRedis(
host = cfg.get("Redis_Queues", "host"),
port = cfg.getint("Redis_Queues", "port"),
db = cfg.getint("Redis_Queues", "db"))
p_serv = r_serv.pipeline(False)
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# LOGGING #
publisher.channel = "Queuing"
# ZMQ #
channel = cfg.get("Feed", "topicfilter")
Sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed")
sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed")
# FUNCTIONS #
publisher.info("""Suscribed to channel {0}""".format(channel))
while True:
Sub.get_and_lpush(r_serv)
sub.get_and_lpush(r_serv)
if r_serv.sismember("SHUTDOWN_FLAGS", "Feed_Q"):
r_serv.srem("SHUTDOWN_FLAGS", "Feed_Q")

View File

@ -4,7 +4,8 @@
The ZMQ_PubSub_Categ Module
============================
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module.
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
Module.
Each words files created under /files/ are representing categories.
This modules take these files and compare them to
@ -21,7 +22,8 @@ this word will be pushed to this specific channel.
..note:: The channel will have the name of the file created.
Implementing modules can start here, create your own category file,
and then create your own module to treat the specific paste matching this category.
and then create your own module to treat the specific paste matching this
category.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
@ -34,13 +36,17 @@ Requirements
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
"""
import redis, argparse, zmq, ConfigParser, time
from packages import Paste as P
import redis
import argparse
import ConfigParser
import time
from packages import ZMQ_PubSub
from pubsublogger import publisher
from packages import Paste
configfile = './packages/config.cfg'
def main():
"""Main Function"""
@ -50,23 +56,21 @@ def main():
# SCRIPT PARSER #
parser = argparse.ArgumentParser(
description = '''This script is a part of the Analysis Information
Leak framework.''',
epilog = '''''')
description='''This script is a part of the Analysis Information Leak framework.''',
epilog='''''')
parser.add_argument('-l',
type = str,
default = "../files/list_categ_files",
help = 'Path to the list_categ_files (../files/list_categ_files)',
action = 'store')
parser.add_argument(
'-l', type=str, default="../files/list_categ_files",
help='Path to the list_categ_files (../files/list_categ_files)',
action='store')
args = parser.parse_args()
# REDIS #
r_serv = redis.StrictRedis(
host = cfg.get("Redis_Queues", "host"),
port = cfg.getint("Redis_Queues", "port"),
db = cfg.getint("Redis_Queues", "db"))
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# LOGGING #
publisher.channel = "Script"
@ -79,17 +83,20 @@ def main():
publisher_name = "pubcateg"
publisher_config_section = "PubSub_Categ"
Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
subscriber_name)
pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section,
publisher_name)
# FUNCTIONS #
publisher.info("Script Categ subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0")))
publisher.info("Script Categ subscribed to channel {0}".format(
cfg.get("PubSub_Words", "channel_0")))
with open(args.l, 'rb') as L:
tmp_dict = {}
for num, fname in enumerate(L):
#keywords temp list
# keywords temp list
tmp_list = []
with open(fname[:-1], 'rb') as LS:
@ -99,16 +106,15 @@ def main():
tmp_dict[fname.split('/')[-1][:-1]] = tmp_list
paste_words = []
message = Sub.get_msg_from_queue(r_serv)
message = sub.get_msg_from_queue(r_serv)
prec_filename = None
while True:
if message != None:
if message is not None:
channel, filename, word, score = message.split()
if prec_filename == None or filename != prec_filename:
PST = P.Paste(filename)
if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename)
prec_filename = filename
@ -117,10 +123,12 @@ def main():
if word.lower() in list:
channel = categ
msg = channel+" "+PST.p_path+" "+word+" "+score
Pub.send_message(msg)
#dico_categ.add(categ)
pub.send_message(msg)
# dico_categ.add(categ)
publisher.info('{0};{1};{2};{3};{4}'.format("Categ", PST.p_source, PST.p_date, PST.p_name,"Detected "+score+" "+"\""+word+"\""))
publisher.info(
'Categ;{};{};{};Detected {} "{}"'.format(
PST.p_source, PST.p_date, PST.p_name, score, word))
else:
if r_serv.sismember("SHUTDOWN_FLAGS", "Categ"):
@ -131,7 +139,7 @@ def main():
publisher.debug("Script Categ is Idling 10s")
time.sleep(10)
message = Sub.get_msg_from_queue(r_serv)
message = sub.get_msg_from_queue(r_serv)
if __name__ == "__main__":

View File

@ -8,7 +8,8 @@ This module is consuming the Redis-list created by the script ./Dir.py.
This module is as the same level of the ZMQ tree than the Module ZMQ_Feed
Whereas the ZMQ_Feed is poping the list created in redis by ZMQ_Feed_Q which is
listening a stream, ZMQ_Pub_Global is poping the list created in redis by ./Dir.py.
listening a stream, ZMQ_Pub_Global is poping the list created in redis by
./Dir.py.
Thanks to this Module there is now two way to Feed the ZMQ tree:
*By a continuous stream ..seealso:: ZMQ_Feed Module
@ -20,13 +21,15 @@ Requirements
*Need running Redis instances. (Redis)
"""
import redis, zmq, ConfigParser, time
from packages import Paste as P
import redis
import ConfigParser
import time
from packages import ZMQ_PubSub
from pubsublogger import publisher
configfile = './packages/config.cfg'
def main():
"""Main Function"""
@ -36,15 +39,15 @@ def main():
# REDIS #
r_serv = redis.StrictRedis(
host = cfg.get("Redis_Queues", "host"),
port = cfg.getint("Redis_Queues", "port"),
db = cfg.getint("Redis_Queues", "db"))
host=cfg.get("Redis_Queues", "host"),
port=cfg.getint("Redis_Queues", "port"),
db=cfg.getint("Redis_Queues", "db"))
# LOGGING #
publisher.channel = "Global"
# ZMQ #
PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")
pub_glob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")
# FONCTIONS #
publisher.info("Starting to publish.")
@ -52,10 +55,10 @@ def main():
while True:
filename = r_serv.lpop("filelist")
if filename != None:
if filename is not None:
msg = cfg.get("PubSub_Global", "channel")+" "+filename
PubGlob.send_message(msg)
pub_glob.send_message(msg)
publisher.debug("{0} Published".format(msg))
else:
time.sleep(10)

View File

@ -30,5 +30,3 @@ class Date(object):
def _set_day(self, day):
self.day = day

View File

@ -1,4 +1,7 @@
import hashlib, crcmod, mmh3
import hashlib
import crcmod
import mmh3
class Hash(object):
"""docstring for Hash"""
@ -29,4 +32,4 @@ class Hash(object):
elif self.name == "murmur":
hash = mmh3.hash(string)
return hash
return hash

View File

@ -5,8 +5,9 @@ The ``ZMQ PubSub`` Modules
"""
import zmq, ConfigParser, redis, pprint, os, sys
#from pubsublogger import publisher
import zmq
import ConfigParser
class PubSub(object):
"""
@ -29,14 +30,14 @@ class PubSub(object):
def __init__(self, file_conf, log_channel, ps_name):
self._ps_name = ps_name
self._config_parser = ConfigParser.ConfigParser()
self._config_file = file_conf # "./packages/config.cfg"
self._config_file = file_conf # "./packages/config.cfg"
self._config_parser.read(self._config_file)
self._context_zmq = zmq.Context()
#self._logging_publisher_channel = log_channel # "Default"
#publisher.channel(self._logging_publisher_channel)
# self._logging_publisher_channel = log_channel # "Default"
# publisher.channel(self._logging_publisher_channel)
class ZMQPub(PubSub):
@ -146,7 +147,7 @@ class ZMQSub(PubSub):
..note:: This function also create a set named "queue" for monitoring needs
"""
r_serv.sadd("queues",self._channel+self._ps_name)
r_serv.sadd("queues", self._channel+self._ps_name)
r_serv.lpush(self._channel+self._ps_name, self._subsocket.recv())
def get_msg_from_queue(self, r_serv):
@ -156,4 +157,4 @@ class ZMQSub(PubSub):
:return: (str) Message from Publisher
"""
return r_serv.rpop(self._channel+self._ps_name)
return r_serv.rpop(self._channel+self._ps_name)

View File

@ -1,18 +1,9 @@
import gzip, string, sys, os, redis, re
import re
import dns.resolver
from pubsublogger import publisher
from lib_jobs import *
from operator import itemgetter
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
import calendar as cal
from datetime import date, timedelta
from dateutil.rrule import rrule, DAILY
from datetime import timedelta
def is_luhn_valid(card_number):
@ -23,9 +14,7 @@ def is_luhn_valid(card_number):
"""
r = [int(ch) for ch in str(card_number)][::-1]
return (sum(r[0::2]) + sum(sum(divmod(d*2,10)) for d in r[1::2])) % 10 == 0
return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0
def checking_MX_record(r_serv, adress_set):
@ -49,16 +38,16 @@ def checking_MX_record(r_serv, adress_set):
for MXdomain in set(MXdomains):
try:
#Already in Redis living.
# Already in Redis living.
if r_serv.exists(MXdomain[1:]):
score += 1
WalidMX.add(MXdomain[1:])
# Not already in Redis
else:
# If I'm Walid MX domain
if dns.resolver.query(MXdomain[1:], rdtype = dns.rdatatype.MX):
if dns.resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX):
# Gonna be added in redis.
r_serv.setex(MXdomain[1:],timedelta(days=1),1)
r_serv.setex(MXdomain[1:], timedelta(days=1), 1)
score += 1
WalidMX.add(MXdomain[1:])
else:
@ -86,8 +75,6 @@ def checking_MX_record(r_serv, adress_set):
return (num, WalidMX)
def checking_A_record(r_serv, domains_set):
score = 0
num = len(domains_set)
@ -95,16 +82,16 @@ def checking_A_record(r_serv, domains_set):
for Adomain in domains_set:
try:
#Already in Redis living.
# Already in Redis living.
if r_serv.exists(Adomain):
score += 1
WalidA.add(Adomain)
# Not already in Redis
else:
# If I'm Walid domain
if dns.resolver.query(Adomain, rdtype = dns.rdatatype.A):
if dns.resolver.query(Adomain, rdtype=dns.rdatatype.A):
# Gonna be added in redis.
r_serv.setex(Adomain,timedelta(days=1),1)
r_serv.setex(Adomain, timedelta(days=1), 1)
score += 1
WalidA.add(Adomain)
else:

View File

@ -1,24 +1,12 @@
import redis, gzip
import numpy as np
import matplotlib.pyplot as plt
from pylab import *
from textblob import TextBlob
from nltk.corpus import stopwords
from nltk.tokenize import RegexpTokenizer
from lib_redis_insert import clean, listdirectory
from lib_jobs import *
import os
import string
from pubsublogger import publisher
import calendar as cal
from datetime import date, timedelta
import calendar
from datetime import date
from dateutil.rrule import rrule, DAILY
from packages import *
def listdirectory(path):
"""Path Traversing Function.
@ -29,7 +17,7 @@ def listdirectory(path):
the argument directory.
"""
fichier=[]
fichier = []
for root, dirs, files in os.walk(path):
for i in files:
@ -38,15 +26,10 @@ def listdirectory(path):
return fichier
clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty))
"""It filters out non-printable characters from the string it receives."""
def create_dirfile(r_serv, directory, overwrite):
"""Create a file of path.
@ -62,7 +45,7 @@ def create_dirfile(r_serv, directory, overwrite):
r_serv.delete("filelist")
for x in listdirectory(directory):
r_serv.rpush("filelist",x)
r_serv.rpush("filelist", x)
publisher.info("The list was overwritten")
@ -70,19 +53,17 @@ def create_dirfile(r_serv, directory, overwrite):
if r_serv.llen("filelist") == 0:
for x in listdirectory(directory):
r_serv.rpush("filelist",x)
r_serv.rpush("filelist", x)
publisher.info("New list created")
else:
for x in listdirectory(directory):
r_serv.rpush("filelist",x)
r_serv.rpush("filelist", x)
publisher.info("The list was updated with new elements")
def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month):
"""Create a csv file used with dygraph.
@ -100,23 +81,29 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
"""
a = date(year, month, 01)
b = date(year, month, cal.monthrange(year,month)[1])
b = date(year, month, calendar.monthrange(year, month)[1])
days = {}
words = []
with open(feederfilename, 'rb') as F:
for word in F: # words of the files
words.append(word[:-1]) # list of words (sorted as in the file)
# words of the files
for word in F:
# list of words (sorted as in the file)
words.append(word[:-1])
for dt in rrule(DAILY, dtstart = a, until = b): # for each days
# for each days
for dt in rrule(DAILY, dtstart=a, until=b):
mot = []
mot1 = []
mot2 = []
days[dt.strftime("%Y%m%d")] = ''
for word in sorted(words): # from the 1srt day to the last of the list
if r_serv.hexists(word, dt.strftime("%Y%m%d")): # if the word have a value for the day
# from the 1srt day to the last of the list
for word in sorted(words):
# if the word have a value for the day
if r_serv.hexists(word, dt.strftime("%Y%m%d")):
mot1.append(str(word))
mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d")))
@ -144,9 +131,9 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
with open(csvfilename+".csv", 'rb') as F:
h = F.read()
h = h.replace("[","")
h = h.replace("]","")
h = h.replace('\'',"")
h = h.replace("[", "")
h = h.replace("]", "")
h = h.replace('\'', "")
with open(csvfilename+".csv", 'wb') as F:
F.write(h)