mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			Merge pull request #266 from kovacsbalu/fix-regexp-timeout
Stop regexp processing after timeout (60sec)pull/267/head^2
						commit
						d9bc2d6f44
					
				|  | @ -9,35 +9,43 @@ supplied in  the term webpage. | |||
| import redis | ||||
| import time | ||||
| from pubsublogger import publisher | ||||
| from packages import lib_words | ||||
| from packages import Paste | ||||
| import os | ||||
| from os import environ | ||||
| import datetime | ||||
| import calendar | ||||
| import re | ||||
| import signal | ||||
| import time | ||||
| from Helper import Process | ||||
| 
 | ||||
| # Email notifications | ||||
| from NotificationHelper import * | ||||
| 
 | ||||
| 
 | ||||
| class TimeoutException(Exception): | ||||
|     pass | ||||
| 
 | ||||
| 
 | ||||
| def timeout_handler(signum, frame): | ||||
|     raise TimeoutException | ||||
| 
 | ||||
| signal.signal(signal.SIGALRM, timeout_handler) | ||||
| 
 | ||||
| # Config Variables | ||||
| DICO_REFRESH_TIME = 60 #s | ||||
| DICO_REFRESH_TIME = 60  # s | ||||
| 
 | ||||
| BlackListTermsSet_Name = "BlackListSetTermSet" | ||||
| TrackedTermsSet_Name = "TrackedSetTermSet" | ||||
| TrackedRegexSet_Name = "TrackedRegexSet" | ||||
| 
 | ||||
| top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set | ||||
| top_term_freq_max_set_cardinality = 20  # Max cardinality of the terms frequences set | ||||
| oneDay = 60*60*24 | ||||
| top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] | ||||
| top_termFreq_setName_week = ["TopTermFreq_set_week", 7] | ||||
| top_termFreq_setName_month = ["TopTermFreq_set_month", 31] | ||||
| top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] | ||||
| top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month] | ||||
| 
 | ||||
| # create direct link in mail | ||||
| full_paste_url = "/showsavedpaste/?paste=" | ||||
| 
 | ||||
| 
 | ||||
| def refresh_dicos(): | ||||
|     dico_regex = {} | ||||
|     dico_regexname_to_redis = {} | ||||
|  | @ -53,6 +61,7 @@ if __name__ == "__main__": | |||
| 
 | ||||
|     config_section = 'RegexForTermsFrequency' | ||||
|     p = Process(config_section) | ||||
|     max_execution_time = p.config.getint(config_section, "max_execution_time") | ||||
| 
 | ||||
|     # REDIS # | ||||
|     server_term = redis.StrictRedis( | ||||
|  | @ -67,7 +76,7 @@ if __name__ == "__main__": | |||
|     # create direct link in mail | ||||
|     full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url | ||||
| 
 | ||||
|     #compile the regex | ||||
|     # compile the regex | ||||
|     dico_refresh_cooldown = time.time() | ||||
|     dico_regex, dico_regexname_to_redis = refresh_dicos() | ||||
| 
 | ||||
|  | @ -87,13 +96,22 @@ if __name__ == "__main__": | |||
|             timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) | ||||
| 
 | ||||
|             curr_set = top_termFreq_setName_day[0] + str(timestamp) | ||||
|             content = Paste.Paste(filename).get_p_content() | ||||
|             paste = Paste.Paste(filename) | ||||
|             content = paste.get_p_content() | ||||
| 
 | ||||
|             #iterate the word with the regex | ||||
|             # iterate the word with the regex | ||||
|             for regex_str, compiled_regex in dico_regex.items(): | ||||
|                 matched = compiled_regex.search(content) | ||||
| 
 | ||||
|                 if matched is not None: #there is a match | ||||
|                 signal.alarm(max_execution_time) | ||||
|                 try: | ||||
|                     matched = compiled_regex.search(content) | ||||
|                 except TimeoutException: | ||||
|                     print ("{0} processing timeout".format(paste.p_path)) | ||||
|                     continue | ||||
|                 else: | ||||
|                     signal.alarm(0) | ||||
| 
 | ||||
|                 if matched is not None:  # there is a match | ||||
|                     print('regex matched {}'.format(regex_str)) | ||||
|                     matched = matched.group(0) | ||||
|                     regex_str_complete = "/" + regex_str + "/" | ||||
|  | @ -104,8 +122,8 @@ if __name__ == "__main__": | |||
| 
 | ||||
|                             # create mail body | ||||
|                             mail_body = ("AIL Framework,\n" | ||||
|                                         "New occurrence for regex: " + regex_str + "\n" | ||||
|                                         ''+full_paste_url + filename) | ||||
|                                          "New occurrence for regex: " + regex_str + "\n" | ||||
|                                          ''+full_paste_url + filename) | ||||
| 
 | ||||
|                             # Send to every associated email adress | ||||
|                             for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete): | ||||
|  | @ -115,9 +133,9 @@ if __name__ == "__main__": | |||
|                         new_to_the_set = server_term.sadd(set_name, filename) | ||||
|                         new_to_the_set = True if new_to_the_set == 1 else False | ||||
| 
 | ||||
|                         #consider the num of occurence of this term | ||||
|                         # consider the num of occurence of this term | ||||
|                         regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1))) | ||||
|                         #1 term per paste | ||||
|                         # 1 term per paste | ||||
|                         if new_to_the_set: | ||||
|                             regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) | ||||
|                             server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1)) | ||||
|  |  | |||
|  | @ -99,6 +99,9 @@ operation_mode = 3 | |||
| #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) | ||||
| ttl_duplicate = 86400 | ||||
| 
 | ||||
| [RegexForTermsFrequency] | ||||
| max_execution_time = 60 | ||||
| 
 | ||||
| ##### Redis ##### | ||||
| [Redis_Cache] | ||||
| host = localhost | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Sami Mokaddem
						Sami Mokaddem