Cleanup (remove unused imports, more pep8 compatible)

2014-08-14 14:11:07 +02:00 · 2014-08-14 14:11:07 +02:00 · 4a1f300a1a
parent 04a8f1bdf2
commit 4a1f300a1a
11 changed files with 129 additions and 138 deletions
--- a/bin/Dir.py
+++ b/bin/Dir.py
@ -3,8 +3,8 @@

 import argparse
 import redis
-from bin.pubsublogger import publisher
-from bin.packages.lib_words import create_dirfile
+from pubsublogger import publisher
+from packages.lib_words import create_dirfile
 import ConfigParser


--- a/bin/Shutdown.py
+++ b/bin/Shutdown.py
@ -20,11 +20,12 @@ Requirements
 *Need the ZMQ_Feed_Q Module running to be able to work properly.

 """
-import redis, ConfigParser
-from pubsublogger import publisher
+import redis
+import ConfigParser

 configfile = './packages/config.cfg'

+
 def main():
    """Main Function"""

@ -32,13 +33,12 @@ def main():
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

-    #REDIS
-    r_serv = redis.StrictRedis(
-        host = cfg.get("Redis_Queues", "host"),
-        port = cfg.getint("Redis_Queues", "port"),
-        db = cfg.getint("Redis_Queues", "db"))
+    # REDIS
+    r_serv = redis.StrictRedis(host=cfg.get("Redis_Queues", "host"),
+                               port=cfg.getint("Redis_Queues", "port"),
+                               db=cfg.getint("Redis_Queues", "db"))

-#### SCRIPTS ####
+    # ### SCRIPTS ####
    r_serv.sadd("SHUTDOWN_FLAGS", "Feed")
    r_serv.sadd("SHUTDOWN_FLAGS", "Categ")
    r_serv.sadd("SHUTDOWN_FLAGS", "Lines")
--- a/bin/ZMQ_Feed.py
+++ b/bin/ZMQ_Feed.py
@ -20,13 +20,17 @@ Requirements
 *Need the ZMQ_Feed_Q Module running to be able to work properly.

 """
-import redis, zmq, ConfigParser, sys, base64, gzip, os, time
-#import zlib
+import redis
+import ConfigParser
+import base64
+import os
+import time
 from pubsublogger import publisher
 from packages import ZMQ_PubSub

 configfile = './packages/config.cfg'

+
 def main():
    """Main Function"""

@ -34,19 +38,19 @@ def main():
    cfg = ConfigParser.ConfigParser()
    cfg.read(configfile)

-    #REDIS
+    # REDIS
    r_serv = redis.StrictRedis(
-        host = cfg.get("Redis_Queues", "host"),
-        port = cfg.getint("Redis_Queues", "port"),
-        db = cfg.getint("Redis_Queues", "db"))
+        host=cfg.get("Redis_Queues", "host"),
+        port=cfg.getint("Redis_Queues", "port"),
+        db=cfg.getint("Redis_Queues", "db"))

    # ZMQ #
    channel = cfg.get("Feed", "topicfilter")

-    #Subscriber
+    # Subscriber
    subscriber_name = "feed"
    subscriber_config_section = "Feed"
-    #Publisher
+    # Publisher
    publisher_name = "pubfed"
    publisher_config_section = "PubSub_Global"

@ -60,13 +64,13 @@ def main():
    while True:

        message = Sub.get_msg_from_queue(r_serv)
-        #Recovering the streamed message informations.
-        if message != None:
+        # Recovering the streamed message informations.
+        if message is not None:
            if len(message.split()) == 3:
                topic, paste, gzip64encoded = message.split()
                print paste
            else:
-                #TODO Store the name of the empty paste inside a Redis-list.
+                # TODO Store the name of the empty paste inside a Redis-list.
                print "Empty Paste: not processed"
                publisher.debug("Empty Paste: {0} not processed".format(paste))
                continue
@ -79,17 +83,17 @@ def main():
            print "Empty Queues: Waiting..."
            time.sleep(10)
            continue
-        #Creating the full filepath
+        # Creating the full filepath
        filename = cfg.get("Directories", "pastes") + paste

        if not os.path.exists(filename.rsplit("/", 1)[0]):
            os.makedirs(filename.rsplit("/", 1)[0])
        else:
-            #Path already existing
+            # Path already existing
            pass

        decoded_gzip = base64.standard_b64decode(gzip64encoded)
-        #paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)
+        # paste, zlib.decompress(decoded_gzip, zlib.MAX_WBITS|16)

        with open(filename, 'wb') as F:
            F.write(decoded_gzip)
--- a/bin/ZMQ_Feed_Q.py
+++ b/bin/ZMQ_Feed_Q.py
@ -20,12 +20,14 @@ Requirements
    "channel_name"+" "+/path/to/the/paste.gz+" "base64_data_encoded_paste"

 """
-import redis, zmq, ConfigParser
+import redis
+import ConfigParser
 from pubsublogger import publisher
 from packages import ZMQ_PubSub

 configfile = './packages/config.cfg'

+
 def main():
    """Main Function"""

@ -35,24 +37,22 @@ def main():

    # REDIS #
    r_serv = redis.StrictRedis(
-        host = cfg.get("Redis_Queues", "host"),
-        port = cfg.getint("Redis_Queues", "port"),
-        db = cfg.getint("Redis_Queues", "db"))
-
-    p_serv = r_serv.pipeline(False)
+        host=cfg.get("Redis_Queues", "host"),
+        port=cfg.getint("Redis_Queues", "port"),
+        db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Queuing"

    # ZMQ #
    channel = cfg.get("Feed", "topicfilter")
-    Sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed")
+    sub = ZMQ_PubSub.ZMQSub(configfile, "Feed", channel, "feed")

    # FUNCTIONS #
    publisher.info("""Suscribed to channel {0}""".format(channel))

    while True:
-        Sub.get_and_lpush(r_serv)
+        sub.get_and_lpush(r_serv)

        if r_serv.sismember("SHUTDOWN_FLAGS", "Feed_Q"):
            r_serv.srem("SHUTDOWN_FLAGS", "Feed_Q")
--- a/bin/ZMQ_PubSub_Categ.py
+++ b/bin/ZMQ_PubSub_Categ.py
@ -4,7 +4,8 @@
 The ZMQ_PubSub_Categ Module
 ============================

-This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q Module.
+This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
+Module.

 Each words files created under /files/ are representing categories.
 This modules take these files and compare them to
@ -21,7 +22,8 @@ this word will be pushed to this specific channel.
 ..note:: The channel will have the name of the file created.

 Implementing modules can start here, create your own category file,
-and then create your own module to treat the specific paste matching this category.
+and then create your own module to treat the specific paste matching this
+category.

 ..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
 the same Subscriber name in both of them.
@ -34,13 +36,17 @@ Requirements
 *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.

 """
-import redis, argparse, zmq, ConfigParser, time
-from packages import Paste as P
+import redis
+import argparse
+import ConfigParser
+import time
 from packages import ZMQ_PubSub
 from pubsublogger import publisher
+from packages import Paste

 configfile = './packages/config.cfg'

+
 def main():
    """Main Function"""

@ -50,23 +56,21 @@ def main():

    # SCRIPT PARSER #
    parser = argparse.ArgumentParser(
-    description = '''This script is a part of the Analysis Information
-    Leak framework.''',
-    epilog = '''''')
+        description='''This script is a part of the Analysis Information Leak framework.''',
+        epilog='''''')

-    parser.add_argument('-l',
-    type = str,
-    default = "../files/list_categ_files",
-    help = 'Path to the list_categ_files (../files/list_categ_files)',
-    action = 'store')
+    parser.add_argument(
+        '-l', type=str, default="../files/list_categ_files",
+        help='Path to the list_categ_files (../files/list_categ_files)',
+        action='store')

    args = parser.parse_args()

    # REDIS #
    r_serv = redis.StrictRedis(
-        host = cfg.get("Redis_Queues", "host"),
-        port = cfg.getint("Redis_Queues", "port"),
-        db = cfg.getint("Redis_Queues", "db"))
+        host=cfg.get("Redis_Queues", "host"),
+        port=cfg.getint("Redis_Queues", "port"),
+        db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Script"
@ -79,17 +83,20 @@ def main():
    publisher_name = "pubcateg"
    publisher_config_section = "PubSub_Categ"

-    Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name)
-    Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name)
+    sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel,
+                            subscriber_name)
+    pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section,
+                            publisher_name)

    # FUNCTIONS #
-    publisher.info("Script Categ subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0")))
+    publisher.info("Script Categ subscribed to channel {0}".format(
+        cfg.get("PubSub_Words", "channel_0")))

    with open(args.l, 'rb') as L:
        tmp_dict = {}

        for num, fname in enumerate(L):
-            #keywords temp list
+            # keywords temp list
            tmp_list = []

            with open(fname[:-1], 'rb') as LS:
@ -99,16 +106,15 @@ def main():

                tmp_dict[fname.split('/')[-1][:-1]] = tmp_list

-    paste_words = []
-    message = Sub.get_msg_from_queue(r_serv)
+    message = sub.get_msg_from_queue(r_serv)
    prec_filename = None

    while True:
-        if message != None:
+        if message is not None:
            channel, filename, word, score = message.split()

-            if prec_filename == None or filename != prec_filename:
-                PST = P.Paste(filename)
+            if prec_filename is None or filename != prec_filename:
+                PST = Paste.Paste(filename)

            prec_filename = filename

@ -117,10 +123,12 @@ def main():
                if word.lower() in list:
                    channel = categ
                    msg = channel+" "+PST.p_path+" "+word+" "+score
-                    Pub.send_message(msg)
-                    #dico_categ.add(categ)
+                    pub.send_message(msg)
+                    # dico_categ.add(categ)

-                    publisher.info('{0};{1};{2};{3};{4}'.format("Categ", PST.p_source, PST.p_date, PST.p_name,"Detected "+score+" "+"\""+word+"\""))
+                    publisher.info(
+                        'Categ;{};{};{};Detected {} "{}"'.format(
+                            PST.p_source, PST.p_date, PST.p_name, score, word))

        else:
            if r_serv.sismember("SHUTDOWN_FLAGS", "Categ"):
@ -131,7 +139,7 @@ def main():
            publisher.debug("Script Categ is Idling 10s")
            time.sleep(10)

-        message = Sub.get_msg_from_queue(r_serv)
+        message = sub.get_msg_from_queue(r_serv)


 if __name__ == "__main__":
--- a/bin/ZMQ_Pub_Global.py
+++ b/bin/ZMQ_Pub_Global.py
@ -8,7 +8,8 @@ This module is consuming the Redis-list created by the script ./Dir.py.
 This module is as the same level of the ZMQ tree than the Module ZMQ_Feed

 Whereas the ZMQ_Feed is poping the list created in redis by ZMQ_Feed_Q which is
-listening a stream, ZMQ_Pub_Global is poping the list created in redis by ./Dir.py.
+listening a stream, ZMQ_Pub_Global is poping the list created in redis by
+./Dir.py.

 Thanks to this Module there is now two way to Feed the ZMQ tree:
 *By a continuous stream ..seealso:: ZMQ_Feed Module
@ -20,13 +21,15 @@ Requirements
 *Need running Redis instances. (Redis)

 """
-import redis, zmq, ConfigParser, time
-from packages import Paste as P
+import redis
+import ConfigParser
+import time
 from packages import ZMQ_PubSub
 from pubsublogger import publisher

 configfile = './packages/config.cfg'

+
 def main():
    """Main Function"""

@ -36,15 +39,15 @@ def main():

    # REDIS #
    r_serv = redis.StrictRedis(
-        host = cfg.get("Redis_Queues", "host"),
-        port = cfg.getint("Redis_Queues", "port"),
-        db = cfg.getint("Redis_Queues", "db"))
+        host=cfg.get("Redis_Queues", "host"),
+        port=cfg.getint("Redis_Queues", "port"),
+        db=cfg.getint("Redis_Queues", "db"))

    # LOGGING #
    publisher.channel = "Global"

    # ZMQ #
-    PubGlob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")
+    pub_glob = ZMQ_PubSub.ZMQPub(configfile, "PubSub_Global", "global")

    # FONCTIONS #
    publisher.info("Starting to publish.")
@ -52,10 +55,10 @@ def main():
    while True:
        filename = r_serv.lpop("filelist")

-        if filename != None:
+        if filename is not None:

            msg = cfg.get("PubSub_Global", "channel")+" "+filename
-            PubGlob.send_message(msg)
+            pub_glob.send_message(msg)
            publisher.debug("{0} Published".format(msg))
        else:
            time.sleep(10)
--- a/bin/packages/Date.py
+++ b/bin/packages/Date.py
@ -30,5 +30,3 @@ class Date(object):

    def _set_day(self, day):
        self.day = day
-
-
--- a/bin/packages/Hash.py
+++ b/bin/packages/Hash.py
@ -1,4 +1,7 @@
-import hashlib, crcmod, mmh3
+import hashlib
+import crcmod
+import mmh3
+

 class Hash(object):
    """docstring for Hash"""
@ -29,4 +32,4 @@ class Hash(object):
        elif self.name == "murmur":
            hash = mmh3.hash(string)

-        return hash
+        return hash
--- a/bin/packages/ZMQ_PubSub.py
+++ b/bin/packages/ZMQ_PubSub.py
@ -5,8 +5,9 @@ The ``ZMQ PubSub`` Modules

 """

-import zmq, ConfigParser, redis, pprint, os, sys
-#from pubsublogger import publisher
+import zmq
+import ConfigParser
+

 class PubSub(object):
    """
@ -29,14 +30,14 @@ class PubSub(object):
    def __init__(self, file_conf, log_channel, ps_name):
        self._ps_name = ps_name
        self._config_parser = ConfigParser.ConfigParser()
-        self._config_file = file_conf # "./packages/config.cfg"
+        self._config_file = file_conf  # "./packages/config.cfg"

        self._config_parser.read(self._config_file)

        self._context_zmq = zmq.Context()

-        #self._logging_publisher_channel = log_channel # "Default"
-        #publisher.channel(self._logging_publisher_channel)
+        # self._logging_publisher_channel = log_channel # "Default"
+        # publisher.channel(self._logging_publisher_channel)


 class ZMQPub(PubSub):
@ -146,7 +147,7 @@ class ZMQSub(PubSub):
        ..note:: This function also create a set named "queue" for monitoring needs

        """
-        r_serv.sadd("queues",self._channel+self._ps_name)
+        r_serv.sadd("queues", self._channel+self._ps_name)
        r_serv.lpush(self._channel+self._ps_name, self._subsocket.recv())

    def get_msg_from_queue(self, r_serv):
@ -156,4 +157,4 @@ class ZMQSub(PubSub):
        :return: (str) Message from Publisher

        """
-        return r_serv.rpop(self._channel+self._ps_name)
+        return r_serv.rpop(self._channel+self._ps_name)
--- a/bin/packages/lib_refine.py
+++ b/bin/packages/lib_refine.py
@ -1,18 +1,9 @@
-import gzip, string, sys, os, redis, re
+import re
 import dns.resolver

 from pubsublogger import publisher

-from lib_jobs import *
-from operator import itemgetter
-
-import numpy as np
-import matplotlib.pyplot as plt
-from pylab import *
-
-import calendar as cal
-from datetime import date, timedelta
-from dateutil.rrule import rrule, DAILY
+from datetime import timedelta


 def is_luhn_valid(card_number):
@ -23,9 +14,7 @@ def is_luhn_valid(card_number):

    """
    r = [int(ch) for ch in str(card_number)][::-1]
-    return (sum(r[0::2]) + sum(sum(divmod(d*2,10)) for d in r[1::2])) % 10 == 0
-
-
+    return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0


 def checking_MX_record(r_serv, adress_set):
@ -49,16 +38,16 @@ def checking_MX_record(r_serv, adress_set):

            for MXdomain in set(MXdomains):
                try:
-                    #Already in Redis living.
+                    # Already in Redis living.
                    if r_serv.exists(MXdomain[1:]):
                        score += 1
                        WalidMX.add(MXdomain[1:])
                    # Not already in Redis
                    else:
                        # If I'm Walid MX domain
-                        if dns.resolver.query(MXdomain[1:], rdtype = dns.rdatatype.MX):
+                        if dns.resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX):
                            # Gonna be added in redis.
-                            r_serv.setex(MXdomain[1:],timedelta(days=1),1)
+                            r_serv.setex(MXdomain[1:], timedelta(days=1), 1)
                            score += 1
                            WalidMX.add(MXdomain[1:])
                        else:
@ -86,8 +75,6 @@ def checking_MX_record(r_serv, adress_set):
    return (num, WalidMX)


-
-
 def checking_A_record(r_serv, domains_set):
    score = 0
    num = len(domains_set)
@ -95,16 +82,16 @@ def checking_A_record(r_serv, domains_set):

    for Adomain in domains_set:
        try:
-            #Already in Redis living.
+            # Already in Redis living.
            if r_serv.exists(Adomain):
                score += 1
                WalidA.add(Adomain)
            # Not already in Redis
            else:
                # If I'm Walid domain
-                if dns.resolver.query(Adomain, rdtype = dns.rdatatype.A):
+                if dns.resolver.query(Adomain, rdtype=dns.rdatatype.A):
                    # Gonna be added in redis.
-                    r_serv.setex(Adomain,timedelta(days=1),1)
+                    r_serv.setex(Adomain, timedelta(days=1), 1)
                    score += 1
                    WalidA.add(Adomain)
                else:
--- a/bin/packages/lib_words.py
+++ b/bin/packages/lib_words.py
@ -1,24 +1,12 @@
-import redis, gzip
-
-import numpy as np
-import matplotlib.pyplot as plt
-from pylab import *
-
-from textblob import TextBlob
-from nltk.corpus import stopwords
-from nltk.tokenize import RegexpTokenizer
-
-from lib_redis_insert import clean, listdirectory
-from lib_jobs import *
+import os
+import string

 from pubsublogger import publisher

-import calendar as cal
-from datetime import date, timedelta
+import calendar
+from datetime import date
 from dateutil.rrule import rrule, DAILY

-from packages import *
-

 def listdirectory(path):
    """Path Traversing Function.
@ -29,7 +17,7 @@ def listdirectory(path):
    the argument directory.

    """
-    fichier=[]
+    fichier = []
    for root, dirs, files in os.walk(path):

        for i in files:
@ -38,15 +26,10 @@ def listdirectory(path):

    return fichier

-
-
-
 clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty))
 """It filters out non-printable characters from the string it receives."""


-
-
 def create_dirfile(r_serv, directory, overwrite):
    """Create a file of path.

@ -62,7 +45,7 @@ def create_dirfile(r_serv, directory, overwrite):
        r_serv.delete("filelist")

        for x in listdirectory(directory):
-            r_serv.rpush("filelist",x)
+            r_serv.rpush("filelist", x)

        publisher.info("The list was overwritten")

@ -70,19 +53,17 @@ def create_dirfile(r_serv, directory, overwrite):
        if r_serv.llen("filelist") == 0:

            for x in listdirectory(directory):
-                r_serv.rpush("filelist",x)
+                r_serv.rpush("filelist", x)

            publisher.info("New list created")
        else:

            for x in listdirectory(directory):
-                r_serv.rpush("filelist",x)
+                r_serv.rpush("filelist", x)

            publisher.info("The list was updated with new elements")


-
-
 def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month):
    """Create a csv file used with dygraph.

@ -100,23 +81,29 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month

    """
    a = date(year, month, 01)
-    b = date(year, month, cal.monthrange(year,month)[1])
+    b = date(year, month, calendar.monthrange(year, month)[1])
    days = {}
    words = []

    with open(feederfilename, 'rb') as F:
-        for word in F: # words of the files
-            words.append(word[:-1]) # list of words (sorted as in the file)
+        # words of the files
+        for word in F:
+            # list of words (sorted as in the file)
+            words.append(word[:-1])

-        for dt in rrule(DAILY, dtstart = a, until = b): # for each days
+        # for each days
+        for dt in rrule(DAILY, dtstart=a, until=b):

            mot = []
            mot1 = []
            mot2 = []

            days[dt.strftime("%Y%m%d")] = ''
-            for word in sorted(words): # from the 1srt day to the last of the list
-                if r_serv.hexists(word, dt.strftime("%Y%m%d")): # if the word have a value for the day
+            # from the 1srt day to the last of the list
+            for word in sorted(words):
+
+                # if the word have a value for the day
+                if r_serv.hexists(word, dt.strftime("%Y%m%d")):
                    mot1.append(str(word))
                    mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d")))

@ -144,9 +131,9 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month

    with open(csvfilename+".csv", 'rb') as F:
        h = F.read()
-        h = h.replace("[","")
-        h = h.replace("]","")
-        h = h.replace('\'',"")
+        h = h.replace("[", "")
+        h = h.replace("]", "")
+        h = h.replace('\'', "")

    with open(csvfilename+".csv", 'wb') as F:
        F.write(h)