From 4a1f300a1acc85632ea078840c5e61a6a44c2fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 14 Aug 2014 14:11:07 +0200 Subject: [PATCH] Cleanup (remove unused imports, more pep8 compatible) --- bin/Dir.py | 4 +-- bin/Shutdown.py | 16 +++++----- bin/ZMQ_Feed.py | 32 ++++++++++--------- bin/ZMQ_Feed_Q.py | 16 +++++----- bin/ZMQ_PubSub_Categ.py | 64 +++++++++++++++++++++----------------- bin/ZMQ_Pub_Global.py | 21 +++++++------ bin/packages/Date.py | 2 -- bin/packages/Hash.py | 7 +++-- bin/packages/ZMQ_PubSub.py | 15 ++++----- bin/packages/lib_refine.py | 31 ++++++------------ bin/packages/lib_words.py | 59 ++++++++++++++--------------------- 11 files changed, 129 insertions(+), 138 deletions(-) diff --git a/bin/Dir.py b/bin/Dir.py index 353beeb3..37354f6e 100755 --- a/bin/Dir.py +++ b/bin/Dir.py @@ -3,8 +3,8 @@ import argparse import redis -from bin.pubsublogger import publisher -from bin.packages.lib_words import create_dirfile +from pubsublogger import publisher +from packages.lib_words import create_dirfile import ConfigParser diff --git a/bin/Shutdown.py b/bin/Shutdown.py index ae0e3024..e2474c32 100755 --- a/bin/Shutdown.py +++ b/bin/Shutdown.py @@ -20,11 +20,12 @@ Requirements *Need the ZMQ_Feed_Q Module running to be able to work properly. """ -import redis, ConfigParser -from pubsublogger import publisher +import redis +import ConfigParser configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -32,13 +33,12 @@ def main(): cfg = ConfigParser.ConfigParser() cfg.read(configfile) - #REDIS - r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + # REDIS + r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) -#### SCRIPTS #### + # ### SCRIPTS #### r_serv.sadd("SHUTDOWN_FLAGS", "Feed") r_serv.sadd("SHUTDOWN_FLAGS", "Categ") r_serv.sadd("SHUTDOWN_FLAGS", "Lines") diff --git a/bin/ZMQ_Feed.py b/bin/ZMQ_Feed.py index 710088f4..be1e3eff 100755 --- a/bin/ZMQ_Feed.py +++ b/bin/ZMQ_Feed.py @@ -20,13 +20,17 @@ Requirements *Need the ZMQ_Feed_Q Module running to be able to work properly. """ -import redis, zmq, ConfigParser, sys, base64, gzip, os, time -#import zlib +import redis +import ConfigParser +import base64 +import os +import time from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -34,19 +38,19 @@ def main(): cfg = ConfigParser.ConfigParser() cfg.read(configfile) - #REDIS + # REDIS r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # ZMQ # channel = cfg.get("Feed", "topicfilter") - #Subscriber + # Subscriber subscriber_name = "feed" subscriber_config_section = "Feed" - #Publisher + # Publisher publisher_name = "pubfed" publisher_config_section = "PubSub_Global" @@ -60,13 +64,13 @@ def main(): while True: message = Sub.get_msg_from_queue(r_serv) - #Recovering the streamed message informations. - if message != None: + # Recovering the streamed message informations. + if message is not None: if len(message.split()) == 3: topic, paste, gzip64encoded = message.split() print paste else: - #TODO Store the name of the empty paste inside a Redis-list. + # TODO Store the name of the empty paste inside a Redis-list. print "Empty Paste: not processed" publisher.debug("Empty Paste: {0} not processed".format(paste)) continue @@ -79,17 +83,17 @@ def main(): print "Empty Queues: Waiting..." time.sleep(10) continue - #Creating the full filepath + # Creating the full filepath filename = cfg.get("Directories", "pastes") + paste if not os.path.exists(filename.rsplit("/", 1)[0]): os.makedirs(filename.rsplit("/", 1)[0]) else: - #Path already existing + # Path already existing pass decoded_gzip = base64.standard_b64decode(gzip64encoded) - #paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16) + # paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16) with open(filename, 'wb') as F: F.write(decoded_gzip) diff --git a/bin/ZMQ_Feed_Q.py b/bin/ZMQ_Feed_Q.py index 4f267552..ab9ed09a 100755 --- a/bin/ZMQ_Feed_Q.py +++ b/bin/ZMQ_Feed_Q.py @@ -20,12 +20,14 @@ Requirements "channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste" """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -35,24 +37,22 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) - - p_serv = r_serv.pipeline(False) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # channel = cfg.get("Feed", "topicfilter") - Sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed") + sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Feed_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Feed_Q") diff --git a/bin/ZMQ_PubSub_Categ.py b/bin/ZMQ_PubSub_Categ.py index d5189423..71477b9a 100755 --- a/bin/ZMQ_PubSub_Categ.py +++ b/bin/ZMQ_PubSub_Categ.py @@ -4,7 +4,8 @@ The ZMQ_PubSub_Categ Module ============================ -This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module. +This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q +Module. Each words files created under /files/ are representing categories. This modules take these files and compare them to @@ -21,7 +22,8 @@ this word will be pushed to this specific channel. ..note:: The channel will have the name of the file created. Implementing modules can start here, create your own category file, -and then create your own module to treat the specific paste matching this category. +and then create your own module to treat the specific paste matching this +category. ..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put the same Subscriber name in both of them. @@ -34,13 +36,17 @@ Requirements *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. """ -import redis, argparse, zmq, ConfigParser, time -from packages import Paste as P +import redis +import argparse +import ConfigParser +import time from packages import ZMQ_PubSub from pubsublogger import publisher +from packages import Paste configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -50,23 +56,21 @@ def main(): # SCRIPT PARSER # parser = argparse.ArgumentParser( - description = '''This script is a part of the Analysis Information - Leak framework.''', - epilog = '''''') + description='''This script is a part of the Analysis Information Leak framework.''', + epilog='''''') - parser.add_argument('-l', - type = str, - default = "../files/list_categ_files", - help = 'Path to the list_categ_files (../files/list_categ_files)', - action = 'store') + parser.add_argument( + '-l', type=str, default="../files/list_categ_files", + help='Path to the list_categ_files (../files/list_categ_files)', + action='store') args = parser.parse_args() # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" @@ -79,17 +83,20 @@ def main(): publisher_name = "pubcateg" publisher_config_section = "PubSub_Categ" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) - Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, + subscriber_name) + pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, + publisher_name) # FUNCTIONS # - publisher.info("Script Categ subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0"))) + publisher.info("Script Categ subscribed to channel {0}".format( + cfg.get("PubSub_Words", "channel_0"))) with open(args.l, 'rb') as L: tmp_dict = {} for num, fname in enumerate(L): - #keywords temp list + # keywords temp list tmp_list = [] with open(fname[:-1], 'rb') as LS: @@ -99,16 +106,15 @@ def main(): tmp_dict[fname.split('/')[-1][:-1]] = tmp_list - paste_words = [] - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) prec_filename = None while True: - if message != None: + if message is not None: channel, filename, word, score = message.split() - if prec_filename == None or filename != prec_filename: - PST = P.Paste(filename) + if prec_filename is None or filename != prec_filename: + PST = Paste.Paste(filename) prec_filename = filename @@ -117,10 +123,12 @@ def main(): if word.lower() in list: channel = categ msg = channel+" "+PST.p_path+" "+word+" "+score - Pub.send_message(msg) - #dico_categ.add(categ) + pub.send_message(msg) + # dico_categ.add(categ) - publisher.info('{0};{1};{2};{3};{4}'.format("Categ", PST.p_source, PST.p_date, PST.p_name,"Detected "+score+" "+"\""+word+"\"")) + publisher.info( + 'Categ;{};{};{};Detected {} "{}"'.format( + PST.p_source, PST.p_date, PST.p_name, score, word)) else: if r_serv.sismember("SHUTDOWN_FLAGS", "Categ"): @@ -131,7 +139,7 @@ def main(): publisher.debug("Script Categ is Idling 10s") time.sleep(10) - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) if __name__ == "__main__": diff --git a/bin/ZMQ_Pub_Global.py b/bin/ZMQ_Pub_Global.py index 7b52e01d..d47ee730 100755 --- a/bin/ZMQ_Pub_Global.py +++ b/bin/ZMQ_Pub_Global.py @@ -8,7 +8,8 @@ This module is consuming the Redis-list created by the script ./Dir.py. This module is as the same level of the ZMQ tree than the Module ZMQ_Feed Whereas the ZMQ_Feed is poping the list created in redis by ZMQ_Feed_Q which is -listening a stream, ZMQ_Pub_Global is poping the list created in redis by ./Dir.py. +listening a stream, ZMQ_Pub_Global is poping the list created in redis by +./Dir.py. Thanks to this Module there is now two way to Feed the ZMQ tree: *By a continuous stream ..seealso:: ZMQ_Feed Module @@ -20,13 +21,15 @@ Requirements *Need running Redis instances. (Redis) """ -import redis, zmq, ConfigParser, time -from packages import Paste as P +import redis +import ConfigParser +import time from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -36,15 +39,15 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Global" # ZMQ # - PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global") + pub_glob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global") # FONCTIONS # publisher.info("Starting to publish.") @@ -52,10 +55,10 @@ def main(): while True: filename = r_serv.lpop("filelist") - if filename != None: + if filename is not None: msg = cfg.get("PubSub_Global", "channel")+" "+filename - PubGlob.send_message(msg) + pub_glob.send_message(msg) publisher.debug("{0} Published".format(msg)) else: time.sleep(10) diff --git a/bin/packages/Date.py b/bin/packages/Date.py index eca18de2..4abb0910 100644 --- a/bin/packages/Date.py +++ b/bin/packages/Date.py @@ -30,5 +30,3 @@ class Date(object): def _set_day(self, day): self.day = day - - diff --git a/bin/packages/Hash.py b/bin/packages/Hash.py index 0640e48c..f8dcac0f 100644 --- a/bin/packages/Hash.py +++ b/bin/packages/Hash.py @@ -1,4 +1,7 @@ -import hashlib, crcmod, mmh3 +import hashlib +import crcmod +import mmh3 + class Hash(object): """docstring for Hash""" @@ -29,4 +32,4 @@ class Hash(object): elif self.name == "murmur": hash = mmh3.hash(string) - return hash \ No newline at end of file + return hash diff --git a/bin/packages/ZMQ_PubSub.py b/bin/packages/ZMQ_PubSub.py index 644be4c4..b7c65231 100755 --- a/bin/packages/ZMQ_PubSub.py +++ b/bin/packages/ZMQ_PubSub.py @@ -5,8 +5,9 @@ The ``ZMQ PubSub`` Modules """ -import zmq, ConfigParser, redis, pprint, os, sys -#from pubsublogger import publisher +import zmq +import ConfigParser + class PubSub(object): """ @@ -29,14 +30,14 @@ class PubSub(object): def __init__(self, file_conf, log_channel, ps_name): self._ps_name = ps_name self._config_parser = ConfigParser.ConfigParser() - self._config_file = file_conf # "./packages/config.cfg" + self._config_file = file_conf # "./packages/config.cfg" self._config_parser.read(self._config_file) self._context_zmq = zmq.Context() - #self._logging_publisher_channel = log_channel # "Default" - #publisher.channel(self._logging_publisher_channel) + # self._logging_publisher_channel = log_channel # "Default" + # publisher.channel(self._logging_publisher_channel) class ZMQPub(PubSub): @@ -146,7 +147,7 @@ class ZMQSub(PubSub): ..note:: This function also create a set named "queue" for monitoring needs """ - r_serv.sadd("queues",self._channel+self._ps_name) + r_serv.sadd("queues", self._channel+self._ps_name) r_serv.lpush(self._channel+self._ps_name, self._subsocket.recv()) def get_msg_from_queue(self, r_serv): @@ -156,4 +157,4 @@ class ZMQSub(PubSub): :return: (str) Message from Publisher """ - return r_serv.rpop(self._channel+self._ps_name) \ No newline at end of file + return r_serv.rpop(self._channel+self._ps_name) diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index 1fe458b8..45b1d009 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -1,18 +1,9 @@ -import gzip, string, sys, os, redis, re +import re import dns.resolver from pubsublogger import publisher -from lib_jobs import * -from operator import itemgetter - -import numpy as np -import matplotlib.pyplot as plt -from pylab import * - -import calendar as cal -from datetime import date, timedelta -from dateutil.rrule import rrule, DAILY +from datetime import timedelta def is_luhn_valid(card_number): @@ -23,9 +14,7 @@ def is_luhn_valid(card_number): """ r = [int(ch) for ch in str(card_number)][::-1] - return (sum(r[0::2]) + sum(sum(divmod(d*2,10)) for d in r[1::2])) % 10 == 0 - - + return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0 def checking_MX_record(r_serv, adress_set): @@ -49,16 +38,16 @@ def checking_MX_record(r_serv, adress_set): for MXdomain in set(MXdomains): try: - #Already in Redis living. + # Already in Redis living. if r_serv.exists(MXdomain[1:]): score += 1 WalidMX.add(MXdomain[1:]) # Not already in Redis else: # If I'm Walid MX domain - if dns.resolver.query(MXdomain[1:], rdtype = dns.rdatatype.MX): + if dns.resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): # Gonna be added in redis. - r_serv.setex(MXdomain[1:],timedelta(days=1),1) + r_serv.setex(MXdomain[1:], timedelta(days=1), 1) score += 1 WalidMX.add(MXdomain[1:]) else: @@ -86,8 +75,6 @@ def checking_MX_record(r_serv, adress_set): return (num, WalidMX) - - def checking_A_record(r_serv, domains_set): score = 0 num = len(domains_set) @@ -95,16 +82,16 @@ def checking_A_record(r_serv, domains_set): for Adomain in domains_set: try: - #Already in Redis living. + # Already in Redis living. if r_serv.exists(Adomain): score += 1 WalidA.add(Adomain) # Not already in Redis else: # If I'm Walid domain - if dns.resolver.query(Adomain, rdtype = dns.rdatatype.A): + if dns.resolver.query(Adomain, rdtype=dns.rdatatype.A): # Gonna be added in redis. - r_serv.setex(Adomain,timedelta(days=1),1) + r_serv.setex(Adomain, timedelta(days=1), 1) score += 1 WalidA.add(Adomain) else: diff --git a/bin/packages/lib_words.py b/bin/packages/lib_words.py index e72fef52..0acea7c8 100644 --- a/bin/packages/lib_words.py +++ b/bin/packages/lib_words.py @@ -1,24 +1,12 @@ -import redis, gzip - -import numpy as np -import matplotlib.pyplot as plt -from pylab import * - -from textblob import TextBlob -from nltk.corpus import stopwords -from nltk.tokenize import RegexpTokenizer - -from lib_redis_insert import clean, listdirectory -from lib_jobs import * +import os +import string from pubsublogger import publisher -import calendar as cal -from datetime import date, timedelta +import calendar +from datetime import date from dateutil.rrule import rrule, DAILY -from packages import * - def listdirectory(path): """Path Traversing Function. @@ -29,7 +17,7 @@ def listdirectory(path): the argument directory. """ - fichier=[] + fichier = [] for root, dirs, files in os.walk(path): for i in files: @@ -38,15 +26,10 @@ def listdirectory(path): return fichier - - - clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty)) """It filters out non-printable characters from the string it receives.""" - - def create_dirfile(r_serv, directory, overwrite): """Create a file of path. @@ -62,7 +45,7 @@ def create_dirfile(r_serv, directory, overwrite): r_serv.delete("filelist") for x in listdirectory(directory): - r_serv.rpush("filelist",x) + r_serv.rpush("filelist", x) publisher.info("The list was overwritten") @@ -70,19 +53,17 @@ def create_dirfile(r_serv, directory, overwrite): if r_serv.llen("filelist") == 0: for x in listdirectory(directory): - r_serv.rpush("filelist",x) + r_serv.rpush("filelist", x) publisher.info("New list created") else: for x in listdirectory(directory): - r_serv.rpush("filelist",x) + r_serv.rpush("filelist", x) publisher.info("The list was updated with new elements") - - def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month): """Create a csv file used with dygraph. @@ -100,23 +81,29 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month """ a = date(year, month, 01) - b = date(year, month, cal.monthrange(year,month)[1]) + b = date(year, month, calendar.monthrange(year, month)[1]) days = {} words = [] with open(feederfilename, 'rb') as F: - for word in F: # words of the files - words.append(word[:-1]) # list of words (sorted as in the file) + # words of the files + for word in F: + # list of words (sorted as in the file) + words.append(word[:-1]) - for dt in rrule(DAILY, dtstart = a, until = b): # for each days + # for each days + for dt in rrule(DAILY, dtstart=a, until=b): mot = [] mot1 = [] mot2 = [] days[dt.strftime("%Y%m%d")] = '' - for word in sorted(words): # from the 1srt day to the last of the list - if r_serv.hexists(word, dt.strftime("%Y%m%d")): # if the word have a value for the day + # from the 1srt day to the last of the list + for word in sorted(words): + + # if the word have a value for the day + if r_serv.hexists(word, dt.strftime("%Y%m%d")): mot1.append(str(word)) mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d"))) @@ -144,9 +131,9 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month with open(csvfilename+".csv", 'rb') as F: h = F.read() - h = h.replace("[","") - h = h.replace("]","") - h = h.replace('\'',"") + h = h.replace("[", "") + h = h.replace("]", "") + h = h.replace('\'', "") with open(csvfilename+".csv", 'wb') as F: F.write(h)