diff --git a/bin/Repartition_graph.py b/bin/Repartition_graph.py index 09889310..89a661c7 100755 --- a/bin/Repartition_graph.py +++ b/bin/Repartition_graph.py @@ -1,14 +1,13 @@ #!/usr/bin/python2.7 # -*-coding:UTF-8 -* -import redis, argparse, zmq, ConfigParser, time, os +import redis +import argparse +import ConfigParser from pubsublogger import publisher -from packages import ZMQ_PubSub -import texttable -import numpy as np import matplotlib.pyplot as plt -from pylab import * + def main(): """Main Function""" @@ -19,24 +18,20 @@ def main(): # SCRIPT PARSER # parser = argparse.ArgumentParser( - description = '''This script is a part of the Analysis Information - Leak framework.''', - epilog = '''''') + description='''This script is a part of the Analysis Information Leak framework.''', + epilog='''''') - parser.add_argument('-f', - type = str, - metavar = "filename", - default = "figure", - help = 'The absolute path name of the "figure.png"', - action = 'store') + parser.add_argument('-f', type=str, metavar="filename", default="figure", + help='The absolute path name of the "figure.png"', + action='store') args = parser.parse_args() # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Level_DB_Hashs", "host"), - port = cfg.getint("Redis_Level_DB_Hashs", "port"), - db = cfg.getint("Redis_Level_DB_Hashs", "db")) + host=cfg.get("Redis_Level_DB_Hashs", "host"), + port=cfg.getint("Redis_Level_DB_Hashs", "port"), + db=cfg.getint("Redis_Level_DB_Hashs", "db")) # LOGGING # publisher.channel = "Graph" @@ -68,12 +63,11 @@ def main(): pastie_list.append(pastie) pastebin_list.append(pastebin) - codepad_list.sort(reverse = True) - pastie_list.sort(reverse = True) - pastebin_list.sort(reverse = True) - - total_list.sort(reverse = True) + codepad_list.sort(reverse=True) + pastie_list.sort(reverse=True) + pastebin_list.sort(reverse=True) + total_list.sort(reverse=True) plt.plot(codepad_list, 'b', label='Codepad.org') plt.plot(pastebin_list, 'g', label='Pastebin.org') @@ -89,9 +83,9 @@ def main(): plt.tight_layout() plt.savefig(args.f+".png", dpi=None, facecolor='w', edgecolor='b', - orientation='portrait', papertype=None, format="png", - transparent=False, bbox_inches=None, pad_inches=0.1, - frameon=True) + orientation='portrait', papertype=None, format="png", + transparent=False, bbox_inches=None, pad_inches=0.1, + frameon=True) if __name__ == "__main__": main() diff --git a/bin/ZMQ_PubSub_Categ_Q.py b/bin/ZMQ_PubSub_Categ_Q.py index f823a9f8..45e0b563 100755 --- a/bin/ZMQ_PubSub_Categ_Q.py +++ b/bin/ZMQ_PubSub_Categ_Q.py @@ -17,12 +17,14 @@ Requirements *Should register to the Publisher "ZMQ_PubSub_Tokenize" """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -32,9 +34,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" @@ -44,12 +46,12 @@ def main(): subscriber_name = "categ" subscriber_config_section = "PubSub_Words" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Categ_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Categ_Q") diff --git a/bin/ZMQ_PubSub_Lines.py b/bin/ZMQ_PubSub_Lines.py index 8f440f6f..d13103c6 100755 --- a/bin/ZMQ_PubSub_Lines.py +++ b/bin/ZMQ_PubSub_Lines.py @@ -26,13 +26,17 @@ Requirements *Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. """ -import redis, argparse, zmq, ConfigParser, time -from packages import Paste as P +import redis +import argparse +import ConfigParser +import time +from packages import Paste from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -42,62 +46,58 @@ def main(): # SCRIPT PARSER # parser = argparse.ArgumentParser( - description = '''This script is a part of the Analysis Information - Leak framework.''', - epilog = '''''') + description='''This script is a part of the Analysis Information Leak framework.''', + epilog='''''') - parser.add_argument('-max', - type = int, - default = 500, - help = 'The limit between "short lines" and "long lines" (500)', - action = 'store') + parser.add_argument('-max', type=int, default=500, + help='The limit between "short lines" and "long lines" (500)', + action='store') args = parser.parse_args() # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) - - p_serv = r_serv.pipeline(False) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # - #Subscriber + # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "line" subscriber_config_section = "PubSub_Global" - #Publisher + # Publisher publisher_config_section = "PubSub_Longlines" publisher_name = "publine" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) - Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) + pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) channel_0 = cfg.get("PubSub_Longlines", "channel_0") channel_1 = cfg.get("PubSub_Longlines", "channel_1") # FUNCTIONS # - publisher.info("""Lines script Subscribed to channel {0} and Start to publish - on channel {1}, {2}""".format(cfg.get("PubSub_Global", "channel"), - cfg.get("PubSub_Longlines", "channel_0"), - cfg.get("PubSub_Longlines", "channel_1"))) + tmp_string = "Lines script Subscribed to channel {} and Start to publish on channel {}, {}" + publisher.info(tmp_string.format( + cfg.get("PubSub_Global", "channel"), + cfg.get("PubSub_Longlines", "channel_0"), + cfg.get("PubSub_Longlines", "channel_1"))) while True: try: - message = Sub.get_msg_from_queue(r_serv1) - if message != None: - PST = P.Paste(message.split(" ",-1)[-1]) + message = sub.get_msg_from_queue(r_serv1) + if message is not None: + PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Lines"): r_serv1.srem("SHUTDOWN_FLAGS", "Lines") @@ -113,13 +113,13 @@ def main(): PST.save_attribute_redis(r_serv, "p_nb_lines", lines_infos[0]) PST.save_attribute_redis(r_serv, "p_max_length_line", lines_infos[1]) - r_serv.sadd("Pastes_Objects",PST.p_path) + r_serv.sadd("Pastes_Objects", PST.p_path) if lines_infos[1] >= args.max: msg = channel_0+" "+PST.p_path else: msg = channel_1+" "+PST.p_path - Pub.send_message(msg) + pub.send_message(msg) except IOError: print "CRC Checksum Error on : ", PST.p_path pass diff --git a/bin/ZMQ_PubSub_Lines_Q.py b/bin/ZMQ_PubSub_Lines_Q.py index 722fdcf5..61f29443 100755 --- a/bin/ZMQ_PubSub_Lines_Q.py +++ b/bin/ZMQ_PubSub_Lines_Q.py @@ -18,12 +18,14 @@ Requirements """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -33,9 +35,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" @@ -44,13 +46,13 @@ def main(): channel = cfg.get("PubSub_Global", "channel") subscriber_name = "line" - Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Global", channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Lines_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Lines_Q") diff --git a/bin/ZMQ_PubSub_Tokenize.py b/bin/ZMQ_PubSub_Tokenize.py index 88f01e01..701968d0 100755 --- a/bin/ZMQ_PubSub_Tokenize.py +++ b/bin/ZMQ_PubSub_Tokenize.py @@ -21,13 +21,16 @@ Requirements *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. """ -import redis, zmq, ConfigParser, time -from packages import Paste as P +import redis +import ConfigParser +import time +from packages import Paste from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -37,9 +40,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" @@ -49,12 +52,12 @@ def main(): subscriber_name = "tokenize" subscriber_config_section = "PubSub_Longlines" - #Publisher + # Publisher publisher_config_section = "PubSub_Words" publisher_name = "pubtokenize" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) - Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) channel_0 = cfg.get("PubSub_Words", "channel_0") @@ -62,10 +65,10 @@ def main(): publisher.info("Tokeniser subscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_1"))) while True: - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) print message - if message != None: - PST = P.Paste(message.split(" ",-1)[-1]) + if message is not None: + PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize"): r_serv.srem("SHUTDOWN_FLAGS", "Tokenize") @@ -80,7 +83,7 @@ def main(): for word, score in PST._get_top_words().items(): if len(word) >= 4: msg = channel_0+' '+PST.p_path+' '+str(word)+' '+str(score) - Pub.send_message(msg) + pub.send_message(msg) print msg else: pass diff --git a/bin/ZMQ_PubSub_Tokenize_Q.py b/bin/ZMQ_PubSub_Tokenize_Q.py index 60204daa..c5a5791f 100755 --- a/bin/ZMQ_PubSub_Tokenize_Q.py +++ b/bin/ZMQ_PubSub_Tokenize_Q.py @@ -17,12 +17,14 @@ Requirements *Should register to the Publisher "ZMQ_PubSub_Line" channel 1 """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -32,9 +34,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" @@ -44,13 +46,13 @@ def main(): subscriber_name = "tokenize" subscriber_config_section = "PubSub_Longlines" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Tokenize_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Tokenize_Q") diff --git a/bin/ZMQ_Sub_Attributes.py b/bin/ZMQ_Sub_Attributes.py index 081e26ff..e8a59071 100755 --- a/bin/ZMQ_Sub_Attributes.py +++ b/bin/ZMQ_Sub_Attributes.py @@ -26,13 +26,16 @@ Requirements *Need the ZMQ_PubSub_Line_Q Module running to be able to work properly. """ -import redis, zmq, ConfigParser, time -from packages import Paste as P +import redis +import ConfigParser +import time +from packages import Paste from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -42,37 +45,35 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) - - p_serv = r_serv.pipeline(False) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # - #Subscriber + # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "attributes" subscriber_config_section = "PubSub_Global" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ Attribute is Running""") while True: - try: - message = Sub.get_msg_from_queue(r_serv1) + try: + message = sub.get_msg_from_queue(r_serv1) - if message != None: - PST = P.Paste(message.split(" ",-1)[-1]) + if message is not None: + PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Attributes"): r_serv1.srem("SHUTDOWN_FLAGS", "Attributes") @@ -89,13 +90,13 @@ def main(): PST.save_attribute_redis(r_serv, "p_encoding", encoding) PST.save_attribute_redis(r_serv, "p_language", language) - r_serv.sadd("Pastes_Objects",PST.p_path) + r_serv.sadd("Pastes_Objects", PST.p_path) PST.save_all_attributes_redis(r_serv) except IOError: - print "CRC Checksum Failed on :", PST.p_path - publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) - pass + print "CRC Checksum Failed on :", PST.p_path + publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed")) + pass if __name__ == "__main__": diff --git a/bin/ZMQ_Sub_Attributes_Q.py b/bin/ZMQ_Sub_Attributes_Q.py index 07b494ee..4396a6bc 100755 --- a/bin/ZMQ_Sub_Attributes_Q.py +++ b/bin/ZMQ_Sub_Attributes_Q.py @@ -18,12 +18,14 @@ Requirements """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -33,9 +35,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" @@ -44,13 +46,13 @@ def main(): channel = cfg.get("PubSub_Global", "channel") subscriber_name = "attributes" - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Attributes_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Attributes_Q") diff --git a/bin/ZMQ_Sub_CreditCards.py b/bin/ZMQ_Sub_CreditCards.py index f2790392..54021144 100755 --- a/bin/ZMQ_Sub_CreditCards.py +++ b/bin/ZMQ_Sub_CreditCards.py @@ -1,7 +1,10 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser, json, pprint, time -from packages import Paste as P +import redis +import ConfigParser +import pprint +import time +from packages import Paste from packages import lib_refine from packages import ZMQ_PubSub from pubsublogger import publisher @@ -9,6 +12,7 @@ from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -18,60 +22,58 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) - - p_serv = r_serv.pipeline(False) + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards") # FUNCTIONS # publisher.info("Creditcard script subscribed to channel creditcard_categ") - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) prec_filename = None creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?" - mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}" - visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}" - discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}" - jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}" - amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}" - chinaUP_regex = "62[0-5]\d{13,16}" - maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}" + # mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}" + # visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}" + # discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|[3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}" + # jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}" + # amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}" + # chinaUP_regex = "62[0-5]\d{13,16}" + # maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}" while True: - if message != None: - channel, filename, word, score = message.split() + if message is not None: + channel, filename, word, score = message.split() - if prec_filename == None or filename != prec_filename: - Creditcard_set = set([]) - PST = P.Paste(filename) + if prec_filename is None or filename != prec_filename: + creditcard_set = set([]) + PST = Paste.Paste(filename) for x in PST.get_regex(creditcard_regex): if lib_refine.is_luhn_valid(x): - Creditcard_set.add(x) + creditcard_set.add(x) + PST.__setattr__(channel, creditcard_set) + PST.save_attribute_redis(r_serv1, channel, creditcard_set) - PST.__setattr__(channel, Creditcard_set) - PST.save_attribute_redis(r_serv1, channel, Creditcard_set) - - pprint.pprint(Creditcard_set) - if (len(Creditcard_set) > 0): - publisher.critical('{0};{1};{2};{3};{4}'.format("CreditCard", PST.p_source, PST.p_date, PST.p_name,"Checked " + str(len(Creditcard_set))+" valid number(s)" )) + pprint.pprint(creditcard_set) + to_print = 'CreditCard;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) + if (len(creditcard_set) > 0): + publisher.critical('{}Checked {} valid number(s)'.format(to_print, len(creditcard_set))) else: - publisher.info('{0};{1};{2};{3};{4}'.format("CreditCard", PST.p_source, PST.p_date, PST.p_name, "CreditCard related" )) + publisher.info('{}CreditCard related'.format(to_print)) prec_filename = filename @@ -84,7 +86,7 @@ def main(): publisher.debug("Script creditcard is idling 1m") time.sleep(60) - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) if __name__ == "__main__": diff --git a/bin/ZMQ_Sub_CreditCards_Q.py b/bin/ZMQ_Sub_CreditCards_Q.py index 392ee22c..7ef4a9b9 100755 --- a/bin/ZMQ_Sub_CreditCards_Q.py +++ b/bin/ZMQ_Sub_CreditCards_Q.py @@ -1,12 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser +import redis +import ConfigParser from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -16,21 +18,21 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "creditcard_categ", "cards") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format("creditcard_categ")) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Creditcards_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Creditcards_Q") diff --git a/bin/ZMQ_Sub_Curve.py b/bin/ZMQ_Sub_Curve.py index 824726e0..7a065afb 100755 --- a/bin/ZMQ_Sub_Curve.py +++ b/bin/ZMQ_Sub_Curve.py @@ -21,7 +21,9 @@ Requirements *Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly. """ -import redis, argparse, zmq, ConfigParser, time +import redis +import ConfigParser +import time from packages import Paste as P from packages import ZMQ_PubSub from pubsublogger import publisher @@ -29,6 +31,7 @@ from packages import lib_words configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -36,30 +39,16 @@ def main(): cfg = ConfigParser.ConfigParser() cfg.read(configfile) - # SCRIPT PARSER # - parser = argparse.ArgumentParser( - description = '''This script is a part of the Analysis Information - Leak framework.''', - epilog = '''''') - - parser.add_argument('-l', - type = str, - default = "../files/list_categ_files", - help = 'Path to the list_categ_files (../files/list_categ_files)', - action = 'store') - - args = parser.parse_args() - # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Level_DB", "host"), - port = cfg.get("Redis_Level_DB", "port"), - db = 0) + host=cfg.get("Redis_Level_DB", "host"), + port=cfg.get("Redis_Level_DB", "port"), + db=0) # LOGGING # publisher.channel = "Script" @@ -69,7 +58,7 @@ def main(): subscriber_name = "curve" subscriber_config_section = "PubSub_Words" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("Script Curve subscribed to channel {0}".format(cfg.get("PubSub_Words", "channel_0"))) @@ -78,24 +67,23 @@ def main(): csv_path = cfg.get("Directories", "wordtrending_csv") wordfile_path = cfg.get("Directories", "wordsfile") - paste_words = [] - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) prec_filename = None while True: - if message != None: + if message is not None: channel, filename, word, score = message.split() - if prec_filename == None or filename != prec_filename: + if prec_filename is None or filename != prec_filename: PST = P.Paste(filename) lib_words.create_curve_with_word_file(r_serv1, csv_path, wordfile_path, int(PST.p_date.year), int(PST.p_date.month)) prec_filename = filename prev_score = r_serv1.hget(word.lower(), PST.p_date) print prev_score - if prev_score != None: + if prev_score is not None: r_serv1.hset(word.lower(), PST.p_date, int(prev_score) + int(score)) else: r_serv1.hset(word.lower(), PST.p_date, score) - #r_serv.expire(word,86400) #1day + # r_serv.expire(word,86400) #1day else: if r_serv.sismember("SHUTDOWN_FLAGS", "Curve"): @@ -107,7 +95,7 @@ def main(): print "sleepin" time.sleep(1) - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) if __name__ == "__main__": diff --git a/bin/ZMQ_Sub_Curve_Q.py b/bin/ZMQ_Sub_Curve_Q.py index 54737e3e..ba6aa67c 100755 --- a/bin/ZMQ_Sub_Curve_Q.py +++ b/bin/ZMQ_Sub_Curve_Q.py @@ -17,12 +17,14 @@ Requirements *Should register to the Publisher "ZMQ_PubSub_Tokenize" """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -32,9 +34,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" @@ -44,12 +46,12 @@ def main(): subscriber_name = "curve" subscriber_config_section = "PubSub_Words" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Curve_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Curve_Q") diff --git a/bin/ZMQ_Sub_Duplicate.py b/bin/ZMQ_Sub_Duplicate.py index 5b711ef3..5bcaf4c7 100755 --- a/bin/ZMQ_Sub_Duplicate.py +++ b/bin/ZMQ_Sub_Duplicate.py @@ -12,15 +12,18 @@ Requirements: """ -import redis, zmq, ConfigParser, time, datetime, pprint, time, os -from packages import Paste as P +import redis +import ConfigParser +import os +import time +from packages import Paste from packages import ZMQ_PubSub -from datetime import date from pubsublogger import publisher from pybloomfilter import BloomFilter configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -31,25 +34,24 @@ def main(): # REDIS # # DB QUEUE ( MEMORY ) r_Q_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv_merge = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) - + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) # REDIS # # DB OBJECT & HASHS ( DISK ) dico_redis = {} for year in xrange(2013, 2015): - for month in xrange(0,16): + for month in xrange(0, 16): dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host = cfg.get("Redis_Level_DB", "host"), - port = year, - db = month) + host=cfg.get("Redis_Level_DB", "host"), + port=year, + db=month) # LOGGING # publisher.channel = "Script" @@ -59,7 +61,7 @@ def main(): subscriber_name = "duplicate" subscriber_config_section = "PubSub_Global" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""Script duplicate subscribed to channel {0}""".format(cfg.get("PubSub_Global", "channel"))) @@ -75,10 +77,10 @@ def main(): x = time.time() - message = Sub.get_msg_from_queue(r_Q_serv) - if message != None: - path = message.split(" ",-1)[-1] - PST = P.Paste(path) + message = sub.get_msg_from_queue(r_Q_serv) + if message is not None: + path = message.split(" ", -1)[-1] + PST = Paste.Paste(path) else: publisher.debug("Script Attribute is idling 10s") time.sleep(10) @@ -91,17 +93,17 @@ def main(): PST._set_p_hash_kind("md5") - #Assignate the correct redis connexion + # Assignate the correct redis connexion r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] - #Creating the bloom filter name: bloomyyyymm + # Creating the bloom filter name: bloomyyyymm bloomname = 'bloom' + PST.p_date.year + PST.p_date.month bloompath = cfg.get("Directories", "bloomfilters") filebloompath = bloompath + bloomname - #datetime.date(int(PST.p_date.year),int(PST.p_date.month),int(PST.p_date.day)).timetuple().tm_yday % 7 + # datetime.date(int(PST.p_date.year),int(PST.p_date.month),int(PST.p_date.day)).timetuple().tm_yday % 7 if os.path.exists(filebloompath): bloom = BloomFilter.open(filebloompath) @@ -117,40 +119,40 @@ def main(): r_serv1.set(index, PST.p_path) r_serv1.sadd("INDEX", index) - #For each bloom filter + # For each bloom filter opened_bloom = [] for bloo in r_Q_serv.smembers("bloomlist"): - #Opening blooms + # Opening blooms opened_bloom.append(BloomFilter.open(bloo)) # For each hash of the paste - for hash in PST._get_hash_lines(min = 5, start = 1, jump = 0): + for hash in PST._get_hash_lines(min=5, start=1, jump=0): nb_hash_current += 1 - #Adding the hash in Redis & limiting the set + # Adding the hash in Redis & limiting the set if r_serv1.scard(hash) <= set_limit: r_serv1.sadd(hash, index) r_serv1.sadd("HASHS", hash) - #Adding the hash in the bloom of the month + # Adding the hash in the bloom of the month bloom.add(hash) - #Go throught the Database of the bloom filter (of the month) + # Go throught the Database of the bloom filter (of the month) for bloo in opened_bloom: if hash in bloo: db = bloo.name[-6:] - #Go throught the Database of the bloom filter (of the month) + # Go throught the Database of the bloom filter (of the month) r_serv_bloom = dico_redis[db] - #set of index paste: set([1,2,4,65]) + # set of index paste: set([1,2,4,65]) hash_current = r_serv_bloom.smembers(hash) - #removing itself from the list + # removing itself from the list hash_current = hash_current - set([index]) # if the hash is present at least in 1 files (already processed) if len(hash_current) != 0: hash_dico[hash] = hash_current - #if there is data in this dictionnary + # if there is data in this dictionnary if len(hash_dico) != 0: super_dico[index] = hash_dico else: @@ -159,12 +161,11 @@ def main(): ########################################################################################### - #if there is data in this dictionnary + # if there is data in this dictionnary if len(super_dico) != 0: # current = current paste, phash_dico = {hash: set, ...} occur_dico = {} for current, phash_dico in super_dico.items(): - nb_similar_hash = len(phash_dico) # phash = hash, pset = set([ pastes ...]) for phash, pset in hash_dico.items(): @@ -180,17 +181,18 @@ def main(): dupl.append((paste, percentage)) # Creating the object attribute and save it. + to_print = 'Duplicate;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) if dupl != []: PST.__setattr__("p_duplicate", dupl) PST.save_attribute_redis(r_serv_merge, "p_duplicate", dupl) - publisher.info('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name,"Detected " + str(len(dupl)))) + publisher.info('{}Detected {}'.format(to_print, len(dupl))) y = time.time() - publisher.debug('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "Processed in "+str(y-x)+ " sec" )) + publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) except IOError: print "CRC Checksum Failed on :", PST.p_path - publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) + publisher.error('{}CRC Checksum Failed'.format(to_print)) pass if __name__ == "__main__": diff --git a/bin/ZMQ_Sub_Duplicate_Q.py b/bin/ZMQ_Sub_Duplicate_Q.py index d3e8d3bf..1f4b8ef6 100755 --- a/bin/ZMQ_Sub_Duplicate_Q.py +++ b/bin/ZMQ_Sub_Duplicate_Q.py @@ -1,12 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser +import redis +import ConfigParser from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -16,22 +18,22 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # channel = cfg.get("PubSub_Global", "channel") - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, "duplicate") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format(channel)) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Duplicate_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Duplicate_Q") diff --git a/bin/ZMQ_Sub_Indexer.py b/bin/ZMQ_Sub_Indexer.py index 2fd91a88..3fc3f56e 100755 --- a/bin/ZMQ_Sub_Indexer.py +++ b/bin/ZMQ_Sub_Indexer.py @@ -9,17 +9,20 @@ The ZMQ_Sub_Indexer modules is fetching the list of files to be processed and index each file with a full-text indexer (Whoosh until now). """ -import redis, zmq, ConfigParser, time -from packages import Paste as P +import redis +import ConfigParser +import time +from packages import Paste from packages import ZMQ_PubSub from pubsublogger import publisher from whoosh.index import create_in, exists_in, open_dir -from whoosh.fields import * +from whoosh.fields import Schema, TEXT, ID import os configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -29,19 +32,17 @@ def main(): # Redis r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # Indexer configuration - index dir and schema setup indexpath = cfg.get("Indexer", "path") indexertype = cfg.get("Indexer", "type") if indexertype == "whoosh": - schema = Schema(title=TEXT(stored=True), path=ID(stored=True,unique=True), content=TEXT) - + schema = Schema(title=TEXT(stored=True), path=ID(stored=True, unique=True), content=TEXT) if not os.path.exists(indexpath): os.mkdir(indexpath) - if not exists_in(indexpath): ix = create_in(indexpath, schema) else: @@ -51,22 +52,22 @@ def main(): publisher.channel = "Script" # ZMQ # - #Subscriber + # Subscriber channel = cfg.get("PubSub_Global", "channel") subscriber_name = "indexer" subscriber_config_section = "PubSub_Global" - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, channel, subscriber_name) # FUNCTIONS # publisher.info("""ZMQ Indexer is Running""") while True: - try: - message = Sub.get_msg_from_queue(r_serv1) + try: + message = sub.get_msg_from_queue(r_serv1) - if message != None: - PST = P.Paste(message.split(" ",-1)[-1]) + if message is not None: + PST = Paste.Paste(message.split(" ", -1)[-1]) else: if r_serv1.sismember("SHUTDOWN_FLAGS", "Indexer"): r_serv1.srem("SHUTDOWN_FLAGS", "Indexer") @@ -75,17 +76,20 @@ def main(): publisher.debug("Script Indexer is idling 10s") time.sleep(1) continue - docpath = message.split(" ",-1)[-1] + docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() print "Indexing :", docpath if indexertype == "whoosh": indexwriter = ix.writer() - indexwriter.update_document(title=unicode(docpath, errors='ignore'),path=unicode(docpath, errors='ignore'),content=unicode(paste, errors='ignore')) + indexwriter.update_document( + title=unicode(docpath, errors='ignore'), + path=unicode(docpath, errors='ignore'), + content=unicode(paste, errors='ignore')) indexwriter.commit() except IOError: - print "CRC Checksum Failed on :", PST.p_path - publisher.error('{0};{1};{2};{3};{4}'.format("Duplicate", PST.p_source, PST.p_date, PST.p_name, "CRC Checksum Failed" )) - pass + print "CRC Checksum Failed on :", PST.p_path + publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(PST.p_source, PST.p_date, PST.p_name)) + pass if __name__ == "__main__": diff --git a/bin/ZMQ_Sub_Indexer_Q.py b/bin/ZMQ_Sub_Indexer_Q.py index a23f8ad0..004c0d57 100755 --- a/bin/ZMQ_Sub_Indexer_Q.py +++ b/bin/ZMQ_Sub_Indexer_Q.py @@ -12,12 +12,14 @@ handling the indexing process of the files seen. """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from pubsublogger import publisher from packages import ZMQ_PubSub configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -27,9 +29,9 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" @@ -38,7 +40,7 @@ def main(): channel = cfg.get("PubSub_Global", "channel") subscriber_name = "indexer" - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Global", channel, subscriber_name) publisher.info("""Suscribed to channel {0}""".format(channel)) @@ -46,7 +48,7 @@ def main(): # will get the data from the global ZMQ queue and buffer it in Redis. while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Indexer_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Indexer_Q") diff --git a/bin/ZMQ_Sub_Longlines.py b/bin/ZMQ_Sub_Longlines.py deleted file mode 100755 index 86f7176d..00000000 --- a/bin/ZMQ_Sub_Longlines.py +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env python2 -# -*-coding:UTF-8 -* - -import redis, zmq, ConfigParser -from packages import Paste as P -from packages import ZMQ_PubSub -from pubsublogger import publisher - -configfile = './packages/config.cfg' - -def main(): - """Main Function""" - - # CONFIG # - cfg = ConfigParser.ConfigParser() - cfg.read(configfile) - - # REDIS # - r_serv = redis.StrictRedis( - host = cfg.get("Redis_default", "host"), - port = cfg.getint("Redis_default", "port"), - db = args.db) - - p_serv = r_serv.pipeline(False) - - # LOGGING # - publisher.channel = "Script" - - # ZMQ # - channel = cfg.get("PubSub_Longlines", "channel_0") - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Longlines", channel) - - # FUNCTIONS # - publisher.info("Longlines ubscribed to channel {0}".format(cfg.get("PubSub_Longlines", "channel_0"))) - - while True: - PST = P.Paste(Sub.get_message().split(" ", -1)[-1]) - r_serv.sadd("Longlines", PST.p_mime) - PST.save_in_redis(r_serv, PST.p_mime) - - -if __name__ == "__main__": - main() diff --git a/bin/ZMQ_Sub_Mails.py b/bin/ZMQ_Sub_Mails.py index a663ecc3..b406a152 100755 --- a/bin/ZMQ_Sub_Mails.py +++ b/bin/ZMQ_Sub_Mails.py @@ -1,7 +1,10 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser, json, pprint, time +import redis +import ConfigParser +import pprint +import time import dns.exception from packages import Paste as P from packages import lib_refine @@ -10,6 +13,7 @@ from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -19,40 +23,40 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) r_serv2 = redis.StrictRedis( - host = cfg.get("Redis_Cache", "host"), - port = cfg.getint("Redis_Cache", "port"), - db = cfg.getint("Redis_Cache", "db")) + host=cfg.get("Redis_Cache", "host"), + port=cfg.getint("Redis_Cache", "port"), + db=cfg.getint("Redis_Cache", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # - Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails") # FUNCTIONS # publisher.info("Suscribed to channel mails_categ") - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) prec_filename = None email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}" while True: try: - if message != None: - channel, filename, word, score = message.split() + if message is not None: + channel, filename, word, score = message.split() - if prec_filename == None or filename != prec_filename: + if prec_filename is None or filename != prec_filename: PST = P.Paste(filename) MX_values = lib_refine.checking_MX_record(r_serv2, PST.get_regex(email_regex)) @@ -62,10 +66,11 @@ def main(): PST.save_attribute_redis(r_serv1, channel, (MX_values[0], list(MX_values[1]))) pprint.pprint(MX_values) + to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.format(PST.p_source, PST.p_date, PST.p_name, MX_values[0]) if MX_values[0] > 10: - publisher.warning('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mails" )) + publisher.warning(to_print) else: - publisher.info('{0};{1};{2};{3};{4}'.format("Mails", PST.p_source, PST.p_date, PST.p_name,"Checked "+ str(MX_values[0])+ " e-mail(s)" )) + publisher.info(to_print) prec_filename = filename else: @@ -77,7 +82,7 @@ def main(): publisher.debug("Script Mails is Idling 10s") time.sleep(10) - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) except dns.exception.Timeout: print "dns.exception.Timeout" pass diff --git a/bin/ZMQ_Sub_Mails_Q.py b/bin/ZMQ_Sub_Mails_Q.py index 5d725672..d57e63c6 100755 --- a/bin/ZMQ_Sub_Mails_Q.py +++ b/bin/ZMQ_Sub_Mails_Q.py @@ -1,12 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser +import redis +import ConfigParser from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -16,21 +18,21 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # - Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "mails_categ", "emails") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "mails_categ", "emails") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format("mails_categ")) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Mails_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Mails_Q") diff --git a/bin/ZMQ_Sub_Onion.py b/bin/ZMQ_Sub_Onion.py index 67366bae..57d8e17f 100755 --- a/bin/ZMQ_Sub_Onion.py +++ b/bin/ZMQ_Sub_Onion.py @@ -21,13 +21,17 @@ Requirements *Need the ZMQ_Sub_Onion_Q Module running to be able to work properly. """ -import redis, zmq, ConfigParser, json, pprint, time -from packages import Paste as P +import redis +import ConfigParser +import pprint +import time +from packages import Paste from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -37,20 +41,14 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) - - r_serv2 = redis.StrictRedis( - host = cfg.get("Redis_Cache", "host"), - port = cfg.getint("Redis_Cache", "port"), - db = cfg.getint("Redis_Cache", "db")) - + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) # LOGGING # publisher.channel = "Script" @@ -61,40 +59,40 @@ def main(): # FUNCTIONS # publisher.info("Script subscribed to channel onion_categ") - - #Getting the first message from redis. + # Getting the first message from redis. message = Sub.get_msg_from_queue(r_serv) prec_filename = None - #Thanks to Faup project for this regex + # Thanks to Faup project for this regex # https://github.com/stricaud/faup url_regex = "([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|onion|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" while True: - if message != None: - channel, filename, word, score = message.split() + if message is not None: + channel, filename, word, score = message.split() # "For each new paste" - if prec_filename == None or filename != prec_filename: + if prec_filename is None or filename != prec_filename: domains_list = [] - PST = P.Paste(filename) + PST = Paste.Paste(filename) for x in PST.get_regex(url_regex): - #Extracting url with regex + # Extracting url with regex credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x if f1 == "onion": domains_list.append(domain) - #Saving the list of extracted onion domains. + # Saving the list of extracted onion domains. PST.__setattr__(channel, domains_list) PST.save_attribute_redis(r_serv1, channel, domains_list) pprint.pprint(domains_list) print PST.p_path + to_print = 'Onion;{};{};{};'.format(PST.p_source, PST.p_date, PST.p_name) if len(domains_list) > 0: - publisher.warning('{0};{1};{2};{3};{4}'.format("Onion", PST.p_source, PST.p_date, PST.p_name,"Detected " + str(len(domains_list))+" .onion(s)" )) + publisher.warning('{}Detected {} .onion(s)'.format(to_print, len(domains_list))) else: - publisher.info('{0};{1};{2};{3};{4}'.format("Onion", PST.p_source, PST.p_date, PST.p_name, "Onion related" )) + publisher.info('{}Onion related'.format(to_print)) prec_filename = filename diff --git a/bin/ZMQ_Sub_Onion_Q.py b/bin/ZMQ_Sub_Onion_Q.py index 6b822d9f..c1f559da 100755 --- a/bin/ZMQ_Sub_Onion_Q.py +++ b/bin/ZMQ_Sub_Onion_Q.py @@ -17,12 +17,14 @@ Requirements *Should register to the Publisher "ZMQ_PubSub_Categ" """ -import redis, zmq, ConfigParser +import redis +import ConfigParser from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -32,21 +34,21 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # - Sub = ZMQ_PubSub.ZMQSub(configfile,"PubSub_Categ", "onion_categ", "tor") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "onion_categ", "tor") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format("onion_categ")) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Onion_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Onion_Q") diff --git a/bin/ZMQ_Sub_Urls.py b/bin/ZMQ_Sub_Urls.py index 1f4bac27..1ba8f892 100755 --- a/bin/ZMQ_Sub_Urls.py +++ b/bin/ZMQ_Sub_Urls.py @@ -1,8 +1,11 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser, json, pprint, time +import redis +import ConfigParser +import pprint +import time import dns.exception -from packages import Paste as P +from packages import Paste from packages import lib_refine from packages import ZMQ_PubSub from pubsublogger import publisher @@ -15,6 +18,7 @@ import ipaddress configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -24,62 +28,61 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv1 = redis.StrictRedis( - host = cfg.get("Redis_Data_Merging", "host"), - port = cfg.getint("Redis_Data_Merging", "port"), - db = cfg.getint("Redis_Data_Merging", "db")) + host=cfg.get("Redis_Data_Merging", "host"), + port=cfg.getint("Redis_Data_Merging", "port"), + db=cfg.getint("Redis_Data_Merging", "db")) r_serv2 = redis.StrictRedis( - host = cfg.get("Redis_Cache", "host"), - port = cfg.getint("Redis_Cache", "port"), - db = cfg.getint("Redis_Cache", "db")) - + host=cfg.get("Redis_Cache", "host"), + port=cfg.getint("Redis_Cache", "port"), + db=cfg.getint("Redis_Cache", "db")) # LOGGING # publisher.channel = "Script" # ZMQ # - #Subscriber + # Subscriber subscriber_name = "urls" subscriber_config_section = "PubSub_Categ" - #Publisher + # Publisher publisher_config_section = "PubSub_Url" publisher_name = "adress" pubchannel = cfg.get("PubSub_Url", "channel") - #Country to log as critical + # Country to log as critical cc_critical = cfg.get("PubSub_Url", "cc_critical") - Sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name) - Pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) + sub = ZMQ_PubSub.ZMQSub(configfile, subscriber_config_section, "web_categ", subscriber_name) + pub = ZMQ_PubSub.ZMQPub(configfile, publisher_config_section, publisher_name) # FUNCTIONS # publisher.info("Script URL subscribed to channel web_categ") - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) prec_filename = None url_regex = "(http|https|ftp)\://([a-zA-Z0-9\.\-]+(\:[a-zA-Z0-9\.&%\$\-]+)*@)*((25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|([a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(\:[0-9]+)*(/($|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*" while True: try: - if message != None: - channel, filename, word, score = message.split() + if message is not None: + channel, filename, word, score = message.split() - if prec_filename == None or filename != prec_filename: + if prec_filename is None or filename != prec_filename: domains_list = [] - PST = P.Paste(filename) + PST = Paste.Paste(filename) client = ip2asn() for x in PST.get_regex(url_regex): scheme, credential, subdomain, domain, host, tld, port, resource_path, query_string, f1, f2, f3, f4 = x domains_list.append(domain) msg = pubchannel + " " + str(x) - Pub.send_message(msg) + pub.send_message(msg) publisher.debug('{0} Published'.format(x)) if f1 == "onion": @@ -95,28 +98,29 @@ def main(): continue try: - l = client.lookup(socket.inet_aton(ip),qType='IP') + l = client.lookup(socket.inet_aton(ip), qType='IP') except ipaddress.AddressValueError: continue - cc = getattr(l,'cc') - asn = getattr(l,'asn') + cc = getattr(l, 'cc') + asn = getattr(l, 'asn') # EU is not an official ISO 3166 code (but used by RIPE # IP allocation) if cc is not None and cc != "EU": - print hostl,asn,cc,pycountry.countries.get(alpha2=cc).name + print hostl, asn, cc, pycountry.countries.get(alpha2=cc).name if cc == cc_critical: + # FIXME: That's going to fail. publisher.warning('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Detected " + str(A_values[0]) + " " + hostl + " " + cc)) else: - print hostl,asn,cc + print hostl, asn, cc A_values = lib_refine.checking_A_record(r_serv2, domains_list) if A_values[0] >= 1: PST.__setattr__(channel, A_values) - PST.save_attribute_redis(r_serv1, channel, (A_values[0],list(A_values[1]))) + PST.save_attribute_redis(r_serv1, channel, (A_values[0], list(A_values[1]))) pprint.pprint(A_values) - publisher.info('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Checked " + str(A_values[0]) + " URL" )) + publisher.info('{0};{1};{2};{3};{4}'.format("Url", PST.p_source, PST.p_date, PST.p_name, "Checked " + str(A_values[0]) + " URL")) prec_filename = filename else: @@ -128,9 +132,9 @@ def main(): publisher.debug("Script url is Idling 10s") time.sleep(10) - message = Sub.get_msg_from_queue(r_serv) + message = sub.get_msg_from_queue(r_serv) except dns.exception.Timeout: - print "dns.exception.Timeout", A_values + print "dns.exception.Timeout", A_values pass if __name__ == "__main__": diff --git a/bin/ZMQ_Sub_Urls_Q.py b/bin/ZMQ_Sub_Urls_Q.py index bb6f1ee5..4d4e2931 100755 --- a/bin/ZMQ_Sub_Urls_Q.py +++ b/bin/ZMQ_Sub_Urls_Q.py @@ -1,12 +1,14 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, zmq, ConfigParser +import redis +import ConfigParser from packages import ZMQ_PubSub from pubsublogger import publisher configfile = './packages/config.cfg' + def main(): """Main Function""" @@ -16,21 +18,21 @@ def main(): # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) # LOGGING # publisher.channel = "Queuing" # ZMQ # - Sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls") + sub = ZMQ_PubSub.ZMQSub(configfile, "PubSub_Categ", "web_categ", "urls") # FUNCTIONS # publisher.info("""Suscribed to channel {0}""".format("web_categ")) while True: - Sub.get_and_lpush(r_serv) + sub.get_and_lpush(r_serv) if r_serv.sismember("SHUTDOWN_FLAGS", "Urls_Q"): r_serv.srem("SHUTDOWN_FLAGS", "Urls_Q") diff --git a/bin/indexer_lookup.py b/bin/indexer_lookup.py index 8e0e49fc..1c019c72 100644 --- a/bin/indexer_lookup.py +++ b/bin/indexer_lookup.py @@ -12,13 +12,13 @@ import ConfigParser import argparse -import sys import gzip + def readdoc(path=None): if path is None: return False - f = gzip.open (path, 'r') + f = gzip.open(path, 'r') return f.read() configfile = '../packages/config.cfg' @@ -40,8 +40,8 @@ argParser.add_argument('-s', action='append', help='search similar documents') args = argParser.parse_args() from whoosh import index -from whoosh.fields import * -import whoosh +from whoosh.fields import Schema, TEXT, ID + schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) ix = index.open_dir(indexpath) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 38649590..74debf2a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -1,9 +1,10 @@ #!/usr/bin/env python2 # -*-coding:UTF-8 -* -import redis, ConfigParser, json -from datetime import date, datetime, time -from flask import Flask, request, render_template, jsonify +import redis +import ConfigParser +import json +from flask import Flask, render_template, jsonify import flask # CONFIG # @@ -12,18 +13,19 @@ cfg.read('../../bin/packages/config.cfg') # REDIS # r_serv = redis.StrictRedis( - host = cfg.get("Redis_Queues", "host"), - port = cfg.getint("Redis_Queues", "port"), - db = cfg.getint("Redis_Queues", "db")) + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) r_serv_log = redis.StrictRedis( - host = cfg.get("Redis_Log", "host"), - port = cfg.getint("Redis_Log", "port"), - db = cfg.getint("Redis_Log", "db")) + host=cfg.get("Redis_Log", "host"), + port=cfg.getint("Redis_Log", "port"), + db=cfg.getint("Redis_Log", "db")) app = Flask(__name__, static_url_path='/static/') + def event_stream(): pubsub = r_serv_log.pubsub() pubsub.psubscribe("Script" + '.*') @@ -32,30 +34,34 @@ def event_stream(): if msg['type'] == 'pmessage' and level != "DEBUG": yield 'data: %s\n\n' % json.dumps(msg) + @app.route("/_logs") def logs(): return flask.Response(event_stream(), mimetype="text/event-stream") -@app.route("/_stuff", methods = ['GET']) +@app.route("/_stuff", methods=['GET']) def stuff(): row1 = [] for queue in r_serv.smembers("queues"): row1.append((queue, r_serv.llen(queue))) return jsonify(row1=row1) + @app.route("/") def index(): row = [] for queue in r_serv.smembers("queues"): row.append((queue, r_serv.llen(queue))) - return render_template("index.html", queues_name = row) + return render_template("index.html", queues_name=row) + @app.route("/monitoring/") def monitoring(): for queue in r_serv.smembers("queues"): - return render_template("Queue_live_Monitoring.html",last_value = queue) + return render_template("Queue_live_Monitoring.html", last_value=queue) + @app.route("/wordstrending/") def wordstrending(): @@ -63,4 +69,4 @@ def wordstrending(): if __name__ == "__main__": - app.run(host='0.0.0.0' ,port=7000, threaded=True) + app.run(host='0.0.0.0', port=7000, threaded=True)