Refactoring on Credential, Phone and Release

pull/52/head
Raphaël Vinot 2016-02-10 16:39:06 +01:00
parent 837efb4592
commit 1da8675750
3 changed files with 70 additions and 82 deletions

View File

@ -2,46 +2,49 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import re import re
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = "Credential" config_section = "Credential"
p = Process(config_section) p = Process(config_section)
publisher.info("Find credentials") publisher.info("Find credentials")
critical = 10
regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/" critical = 10
regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
while True:
message = p.get_from_set()
if message is not None:
paste = Paste.Paste(message)
content = paste.get_p_content()
all_cred = re.findall(regex_cred, content)
if len(all_cred) > 0:
cred_set = set([])
for cred in all_cred:
cred_set.add(cred)
to_print = 'Cred;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/"
if len(cred_set) > 0: regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
publisher.info(to_print) while True:
for cred in set(cred_set): filepath = p.get_from_set()
print(cred) if filepath is None:
publisher.debug("Script Credential is Idling 10s")
print('Sleeping')
time.sleep(10)
continue
if len(cred_set) > critical: paste = Paste.Paste(filepath)
print("========> Found more than 10 credentials on this file : {}".format(message)) content = paste.get_p_content()
site = re.findall(regex_web, content) creds = set(re.findall(regex_cred, content))
publisher.warning(to_print) if len(creds) == 0:
if len(site) > 0: continue
print("=======> Probably on : {}".format(iter(site).next()))
else: sites = set(re.findall(regex_web, content))
publisher.debug("Script Credential is Idling 10s")
print 'Sleeping' message = '{} credentials found.'.format(len(creds))
time.sleep(10) if sites:
message += ' Related websites: {}'.format(', '.join(sites))
to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
print('\n '.join(creds))
if len(creds) > critical:
print("========> Found more than 10 credentials on this file : {}".format(filepath))
publisher.warning(to_print)
if sites:
print("=======> Probably on : {}".format(', '.join(sites)))
else:
publisher.info(to_print)

7
bin/Phone.py Normal file → Executable file
View File

@ -5,10 +5,8 @@
""" """
import time import time
import pprint
import re import re
from packages import Paste from packages import Paste
from packages import lib_refine
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
@ -22,11 +20,11 @@ def search_phone(message):
results = reg_phone.findall(content) results = reg_phone.findall(content)
# if the list is greater than 4, we consider the Paste may contain a list of phone numbers # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
if len(results) > 4 : if len(results) > 4:
print results print results
publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name)) publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger # Port of the redis instance used by pubsublogger
publisher.port = 6380 publisher.port = 6380
@ -53,4 +51,3 @@ def search_phone(message):
# Do something with the message from the queue # Do something with the message from the queue
search_phone(message) search_phone(message)

View File

@ -2,52 +2,40 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import time import time
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import re import re
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
config_section = "Release" config_section = "Release"
p = Process(config_section) p = Process(config_section)
publisher.info("Release scripts to find release names") publisher.info("Release scripts to find release names")
#REGEX : movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+" xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
regexs = [movie,tv,xxx]
regex = re.compile('|'.join(regexs)) regexs = [movie, tv, xxx]
while True:
message = p.get_from_set()
if message is not None:
paste = Paste.Paste(message)
content = paste.get_p_content()
all_release = re.findall(regex, content)
if len(all_release) > 0:
release_set = set([])
for rlz in all_release:
release_set.add(rlz)
to_print = 'Release;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) regex = '|'.join(regexs)
if (len(release_set) > 0): while True:
publisher.warning('{}Checked {} valids'.format(to_print, len(release_set))) filepath = p.get_from_set()
for rl in set(release_set): if filepath is None:
#publisher.warning('{}'.format(rl)) publisher.debug("Script Release is Idling 10s")
print(rl) print 'Sleeping'
if (len(release_set) > 10): time.sleep(10)
print("----------------------------------- Found more than 10 releases on this file : {}".format(message)) continue
else: paste = Paste.Paste(filepath)
publisher.info('{}Release related'.format(to_print)) content = paste.get_p_content()
releases = set(re.findall(regex, content))
if len(releases) == 0:
continue
to_print = 'Release;{};{};{};{} releases'.format(paste.p_source, paste.p_date, paste.p_name, len(releases))
if len(releases) > 30:
else: publisher.warning(to_print)
publisher.debug("Script Release is Idling 10s") else:
print 'Sleeping' publisher.info(to_print)
time.sleep(10)