Categ now listen to the Global queue

2014-09-05 17:05:45 +02:00 · 2014-09-05 17:05:45 +02:00 · e983c839ad
parent 46f27ada4e
commit e983c839ad
6 changed files with 49 additions and 44 deletions
--- a/bin/Categ.py
+++ b/bin/Categ.py
@ -39,6 +39,7 @@ Requirements
 import os
 import argparse
 import time
 import re
 from pubsublogger import publisher
 from packages import Paste
@ -73,29 +74,29 @@ if __name__ == "__main__":
        bname = os.path.basename(filename)
        tmp_dict[bname] = []
        with open(os.path.join(args.d, filename), 'r') as f:
-            for l in f:
+            patterns = [r'%s' % re.escape(s.strip()) for s in f]
-                tmp_dict[bname].append(l.strip())
+            tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
    prec_filename = None
    while True:
-        message = p.get_from_set()
+        filename = p.get_from_set()
-        if message is not None:
+        if filename is not None:
            filename, word, score = message.split()
-            if prec_filename is None or filename != prec_filename:
+            paste = Paste.Paste(filename)
-                PST = Paste.Paste(filename)
+            content = paste.get_p_content()
                prec_filename = filename
-            for categ, words_list in tmp_dict.items():
+            for categ, pattern in tmp_dict.items():
-
+                found = re.findall(pattern, content)
-                if word.lower() in words_list:
+                if len(found) > 0:
-                    msg = '{} {} {}'.format(PST.p_path, word, score)
+                    msg = '{} {}'.format(paste.p_path, len(found))
                    print msg, categ
                    p.populate_set_out(msg, categ)
                    publisher.info(
-                        'Categ;{};{};{};Detected {} "{}"'.format(
+                        'Categ;{};{};{};Detected {} as {}'.format(
-                            PST.p_source, PST.p_date, PST.p_name, score, word))
+                            paste.p_source, paste.p_date, paste.p_name,
                            len(found), categ))
        else:
            publisher.debug("Script Categ is Idling 10s")
--- a/bin/CreditCard.py
+++ b/bin/CreditCard.py
@ -5,6 +5,8 @@ import time
 from packages import Paste
 from packages import lib_refine
 from pubsublogger import publisher
 import re
 from Helper import Process
@ -19,52 +21,54 @@ if __name__ == "__main__":
    # FUNCTIONS #
    publisher.info("Creditcard script subscribed to channel creditcard_categ")
    message = p.get_from_set()
    prec_filename = None
    creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
    # FIXME For retro compatibility
    channel = 'creditcard_categ'
-    # mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
+    # Source: http://www.richardsramblings.com/regex/credit-card-numbers/
-    # visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
+    cards = [
-    # discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|
+        r'4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}',  # 16-digit VISA, with separators
-    #                   [3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
+        r'5[1-5]\d{2}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}',  # 16 digits MasterCard
-    # jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
+        r'6(?:011|22(?:(?=[\ \-]?(?:2[6-9]|[3-9]))|[2-8]|9(?=[\ \-]?(?:[01]|2[0-5])))|4[4-9]\d|5\d\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}',  # Discover Card
-    # amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
+        r'35(?:2[89]|[3-8]\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}',  # Japan Credit Bureau (JCB)
-    # chinaUP_regex = "62[0-5]\d{13,16}"
+        r'3[47]\d\d(?:[\ \-]?)\d{6}(?:[\ \-]?)\d{5}',  # American Express
-    # maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}"
+        r'(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}',  # Maestro
        ]
    regex = re.compile('|'.join(cards))
    while True:
        message = p.get_from_set()
        if message is not None:
-            filename, word, score = message.split()
+            filename, score = message.split()
-
+            paste = Paste.Paste(filename)
-            if prec_filename is None or filename != prec_filename:
+            content = paste.get_p_content()
            all_cards = re.findall(regex, content)
            if len(all_cards) > 0:
                print 'All matching', all_cards
                creditcard_set = set([])
                PST = Paste.Paste(filename)
-                for x in PST.get_regex(creditcard_regex):
+                for card in all_cards:
-                    if lib_refine.is_luhn_valid(x):
+                    clean_card = re.sub('[^0-9]', '', card)
-                        creditcard_set.add(x)
+                    if lib_refine.is_luhn_valid(clean_card):
                        print clean_card, 'is valid'
                        creditcard_set.add(clean_card)
-                PST.__setattr__(channel, creditcard_set)
+                paste.__setattr__(channel, creditcard_set)
-                PST.save_attribute_redis(channel, creditcard_set)
+                paste.save_attribute_redis(channel, creditcard_set)
                pprint.pprint(creditcard_set)
                to_print = 'CreditCard;{};{};{};'.format(
-                    PST.p_source, PST.p_date, PST.p_name)
+                    paste.p_source, paste.p_date, paste.p_name)
                if (len(creditcard_set) > 0):
-                    publisher.critical('{}Checked {} valid number(s)'.format(
+                    publisher.warning('{}Checked {} valid number(s)'.format(
                        to_print, len(creditcard_set)))
                else:
                    publisher.info('{}CreditCard related'.format(to_print))
            prec_filename = filename
        else:
            publisher.debug("Script creditcard is idling 1m")
            print 'Sleeping'
-            time.sleep(60)
+            time.sleep(10)
        message = p.get_from_set()
--- a/bin/Mail.py
+++ b/bin/Mail.py
@ -41,7 +41,7 @@ if __name__ == "__main__":
    MX_values = None
    while True:
        if message is not None:
-            filename, word, score = message.split()
+            filename, score = message.split()
            if prec_filename is None or filename != prec_filename:
                PST = Paste.Paste(filename)
--- a/bin/Onion.py
+++ b/bin/Onion.py
@ -102,7 +102,7 @@ if __name__ == "__main__":
    while True:
        if message is not None:
            print message
-            filename, word, score = message.split()
+            filename, score = message.split()
            # "For each new paste"
            if prec_filename is None or filename != prec_filename:
--- a/bin/Url.py
+++ b/bin/Url.py
@ -46,7 +46,7 @@ if __name__ == "__main__":
    while True:
        if message is not None:
-            filename, word, score = message.split()
+            filename, score = message.split()
            if prec_filename is None or filename != prec_filename:
                domains_list = []
--- a/bin/packages/modules.cfg
+++ b/bin/packages/modules.cfg
@ -26,7 +26,7 @@ publish = Redis_Words
 subscribe = Redis_Words
 [Categ]
-subscribe = Redis_Words
+subscribe = Redis_Global
 publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web
 [CreditCards]