Categ now listen to the Global queue

pull/38/head
Raphaël Vinot 2014-09-05 17:05:45 +02:00
parent 46f27ada4e
commit e983c839ad
6 changed files with 49 additions and 44 deletions

View File

@ -39,6 +39,7 @@ Requirements
import os
import argparse
import time
import re
from pubsublogger import publisher
from packages import Paste
@ -73,29 +74,29 @@ if __name__ == "__main__":
bname = os.path.basename(filename)
tmp_dict[bname] = []
with open(os.path.join(args.d, filename), 'r') as f:
for l in f:
tmp_dict[bname].append(l.strip())
patterns = [r'%s' % re.escape(s.strip()) for s in f]
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
prec_filename = None
while True:
message = p.get_from_set()
if message is not None:
filename, word, score = message.split()
filename = p.get_from_set()
if filename is not None:
if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename)
prec_filename = filename
paste = Paste.Paste(filename)
content = paste.get_p_content()
for categ, words_list in tmp_dict.items():
if word.lower() in words_list:
msg = '{} {} {}'.format(PST.p_path, word, score)
for categ, pattern in tmp_dict.items():
found = re.findall(pattern, content)
if len(found) > 0:
msg = '{} {}'.format(paste.p_path, len(found))
print msg, categ
p.populate_set_out(msg, categ)
publisher.info(
'Categ;{};{};{};Detected {} "{}"'.format(
PST.p_source, PST.p_date, PST.p_name, score, word))
'Categ;{};{};{};Detected {} as {}'.format(
paste.p_source, paste.p_date, paste.p_name,
len(found), categ))
else:
publisher.debug("Script Categ is Idling 10s")

View File

@ -5,6 +5,8 @@ import time
from packages import Paste
from packages import lib_refine
from pubsublogger import publisher
import re
from Helper import Process
@ -19,52 +21,54 @@ if __name__ == "__main__":
# FUNCTIONS #
publisher.info("Creditcard script subscribed to channel creditcard_categ")
message = p.get_from_set()
prec_filename = None
creditcard_regex = "4[0-9]{12}(?:[0-9]{3})?"
# FIXME For retro compatibility
channel = 'creditcard_categ'
# mastercard_regex = "5[1-5]\d{2}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
# visa_regex = "4\d{3}([\ \-]?)\d{4}\1\d{4}\1\d{4}"
# discover_regex = "6(?:011\d\d|5\d{4}|4[4-9]\d{3}|22(?:1(?:2[6-9]|
# [3-9]\d)|[2-8]\d\d|9(?:[01]\d|2[0-5])))\d{10}"
# jcb_regex = "35(?:2[89]|[3-8]\d)([\ \-]?)\d{4}\1\d{4}\1\d{4}"
# amex_regex = "3[47]\d\d([\ \-]?)\d{6}\1\d{5}"
# chinaUP_regex = "62[0-5]\d{13,16}"
# maestro_regex = "(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}"
# Source: http://www.richardsramblings.com/regex/credit-card-numbers/
cards = [
r'4\d{3}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # 16-digit VISA, with separators
r'5[1-5]\d{2}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # 16 digits MasterCard
r'6(?:011|22(?:(?=[\ \-]?(?:2[6-9]|[3-9]))|[2-8]|9(?=[\ \-]?(?:[01]|2[0-5])))|4[4-9]\d|5\d\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # Discover Card
r'35(?:2[89]|[3-8]\d)(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}(?:[\ \-]?)\d{4}', # Japan Credit Bureau (JCB)
r'3[47]\d\d(?:[\ \-]?)\d{6}(?:[\ \-]?)\d{5}', # American Express
r'(?:5[0678]\d\d|6304|6390|67\d\d)\d{8,15}', # Maestro
]
regex = re.compile('|'.join(cards))
while True:
message = p.get_from_set()
if message is not None:
filename, word, score = message.split()
if prec_filename is None or filename != prec_filename:
filename, score = message.split()
paste = Paste.Paste(filename)
content = paste.get_p_content()
all_cards = re.findall(regex, content)
if len(all_cards) > 0:
print 'All matching', all_cards
creditcard_set = set([])
PST = Paste.Paste(filename)
for x in PST.get_regex(creditcard_regex):
if lib_refine.is_luhn_valid(x):
creditcard_set.add(x)
for card in all_cards:
clean_card = re.sub('[^0-9]', '', card)
if lib_refine.is_luhn_valid(clean_card):
print clean_card, 'is valid'
creditcard_set.add(clean_card)
PST.__setattr__(channel, creditcard_set)
PST.save_attribute_redis(channel, creditcard_set)
paste.__setattr__(channel, creditcard_set)
paste.save_attribute_redis(channel, creditcard_set)
pprint.pprint(creditcard_set)
to_print = 'CreditCard;{};{};{};'.format(
PST.p_source, PST.p_date, PST.p_name)
paste.p_source, paste.p_date, paste.p_name)
if (len(creditcard_set) > 0):
publisher.critical('{}Checked {} valid number(s)'.format(
publisher.warning('{}Checked {} valid number(s)'.format(
to_print, len(creditcard_set)))
else:
publisher.info('{}CreditCard related'.format(to_print))
prec_filename = filename
else:
publisher.debug("Script creditcard is idling 1m")
print 'Sleeping'
time.sleep(60)
time.sleep(10)
message = p.get_from_set()

View File

@ -41,7 +41,7 @@ if __name__ == "__main__":
MX_values = None
while True:
if message is not None:
filename, word, score = message.split()
filename, score = message.split()
if prec_filename is None or filename != prec_filename:
PST = Paste.Paste(filename)

View File

@ -102,7 +102,7 @@ if __name__ == "__main__":
while True:
if message is not None:
print message
filename, word, score = message.split()
filename, score = message.split()
# "For each new paste"
if prec_filename is None or filename != prec_filename:

View File

@ -46,7 +46,7 @@ if __name__ == "__main__":
while True:
if message is not None:
filename, word, score = message.split()
filename, score = message.split()
if prec_filename is None or filename != prec_filename:
domains_list = []

View File

@ -26,7 +26,7 @@ publish = Redis_Words
subscribe = Redis_Words
[Categ]
subscribe = Redis_Words
subscribe = Redis_Global
publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web
[CreditCards]