Refactoring on Credential, Phone and Release

2016-02-10 16:39:06 +01:00 · 2016-02-10 16:39:06 +01:00 · 1da8675750
parent 837efb4592
commit 1da8675750
3 changed files with 70 additions and 82 deletions
--- a/bin/Credential.py
+++ b/bin/Credential.py
@ -2,46 +2,49 @@
 # -*-coding:UTF-8 -*
 import time
 from packages import Paste
-from pubsublogger import publisher 
+from pubsublogger import publisher
 from Helper import Process
 import re
-if __name__ == "__main__": 
+if __name__ == "__main__":
-	publisher.port = 6380 
+    publisher.port = 6380
-	publisher.channel = "Script"
+    publisher.channel = "Script"
-	config_section = "Credential"
+    config_section = "Credential"
-	p = Process(config_section) 
+    p = Process(config_section)
-	publisher.info("Find credentials")
+    publisher.info("Find credentials")
 	critical = 10
-	regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/"
+    critical = 10
 	regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
 	while True:
 		message = p.get_from_set() 
 		if message is not None:	
 			paste = Paste.Paste(message)
            		content = paste.get_p_content()
            		all_cred = re.findall(regex_cred, content)
            		if len(all_cred) > 0:
                		cred_set = set([])
 				for cred in all_cred:
 					cred_set.add(cred)
-                		to_print = 'Cred;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
+    regex_web = "/^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$/"
-                		if len(cred_set) > 0:
+    regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+"
-                    			publisher.info(to_print)
+    while True:
-					for cred in set(cred_set):
+        filepath = p.get_from_set()
-						print(cred)
+        if filepath is None:
            publisher.debug("Script Credential is Idling 10s")
            print('Sleeping')
            time.sleep(10)
            continue
-					if len(cred_set) > critical:
+        paste = Paste.Paste(filepath)
-						print("========> Found more than 10 credentials on this file : {}".format(message))
+        content = paste.get_p_content()
-						site = re.findall(regex_web, content)
+        creds = set(re.findall(regex_cred, content))
-						publisher.warning(to_print)
+        if len(creds) == 0:
-						if len(site) > 0:
+            continue
 							print("=======> Probably on : {}".format(iter(site).next()))
-		else:
+        sites = set(re.findall(regex_web, content))
-            		publisher.debug("Script Credential is Idling 10s")
+
-            		print 'Sleeping'
+        message = '{} credentials found.'.format(len(creds))
-            		time.sleep(10)
+        if sites:
            message += ' Related websites: {}'.format(', '.join(sites))
        to_print = 'Credential;{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message)
        print('\n '.join(creds))
        if len(creds) > critical:
            print("========> Found more than 10 credentials on this file : {}".format(filepath))
            publisher.warning(to_print)
            if sites:
                print("=======> Probably on : {}".format(', '.join(sites)))
        else:
            publisher.info(to_print)
--- a/bin/Phone.py
+++ b/bin/Phone.py
@ -5,10 +5,8 @@
 """
 import time
 import pprint
 import re
 from packages import Paste
 from packages import lib_refine
 from pubsublogger import publisher
 from Helper import Process
@ -22,11 +20,11 @@ def search_phone(message):
    results = reg_phone.findall(content)
    # if the list is greater than 4, we consider the Paste may contain a list of phone numbers
-    if len(results) > 4 :
+    if len(results) > 4:
        print results
        publisher.warning('{} contains PID (phone numbers)'.format(paste.p_name))
-	if __name__ == '__main__':
+if __name__ == '__main__':
    # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
    # Port of the redis instance used by pubsublogger
    publisher.port = 6380
@ -53,4 +51,3 @@ def search_phone(message):
        # Do something with the message from the queue
        search_phone(message)
--- a/bin/Release.py
+++ b/bin/Release.py
@ -2,52 +2,40 @@
 # -*-coding:UTF-8 -*
 import time
 from packages import Paste
-from pubsublogger import publisher 
+from pubsublogger import publisher
 from Helper import Process
 import re
-if __name__ == "__main__": 
+if __name__ == "__main__":
-	publisher.port = 6380 
+    publisher.port = 6380
-	publisher.channel = "Script"
+    publisher.channel = "Script"
-	config_section = "Release"
+    config_section = "Release"
-	p = Process(config_section) 
+    p = Process(config_section)
-	publisher.info("Release scripts to find release names")
+    publisher.info("Release scripts to find release names")
-	#REGEX :
+    movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
-	
+    tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
-	movie = "[a-zA-Z0-9.]+\.[0-9]{4}.[a-zA-Z0-9.]+\-[a-zA-Z]+"
+    xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
 	tv = "[a-zA-Z0-9.]+\.S[0-9]{2}E[0-9]{2}.[a-zA-Z0-9.]+\.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
 	xxx = "[a-zA-Z0-9._]+.XXX.[a-zA-Z0-9.]+\-[a-zA-Z0-9]+"
 	regexs = [movie,tv,xxx]
-	regex = re.compile('|'.join(regexs))
+    regexs = [movie, tv, xxx]
 	while True:
 		message = p.get_from_set() 
 		if message is not None:		
 			paste = Paste.Paste(message)
            		content = paste.get_p_content()
            		all_release = re.findall(regex, content)
            		if len(all_release) > 0:
                		release_set = set([])
 				for rlz in all_release:
 					release_set.add(rlz)
-                		to_print = 'Release;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
+    regex = '|'.join(regexs)
-                		if (len(release_set) > 0):
+    while True:
-                    			publisher.warning('{}Checked {} valids'.format(to_print, len(release_set)))
+        filepath = p.get_from_set()
-					for rl in set(release_set):
+        if filepath is None:
-        					#publisher.warning('{}'.format(rl))
+            publisher.debug("Script Release is Idling 10s")
-						print(rl)
+            print 'Sleeping'
-					if (len(release_set) > 10):
+            time.sleep(10)
-						print("----------------------------------- Found more than 10 releases on this file : {}".format(message))
+            continue
-                		else:
+        paste = Paste.Paste(filepath)
-                    			publisher.info('{}Release related'.format(to_print))
+        content = paste.get_p_content()
        releases = set(re.findall(regex, content))
        if len(releases) == 0:
            continue
-				
+        to_print = 'Release;{};{};{};{} releases'.format(paste.p_source, paste.p_date, paste.p_name, len(releases))
-
+        if len(releases) > 30:
-		else:
+            publisher.warning(to_print)
-            		publisher.debug("Script Release is Idling 10s")
+        else:
-            		print 'Sleeping'
+            publisher.info(to_print)
            		time.sleep(10)