From 6d199f0150cb0550d6cadf0fb61120925effb0ac Mon Sep 17 00:00:00 2001 From: kovacsbalu Date: Mon, 8 Oct 2018 11:25:32 +0200 Subject: [PATCH] Stop regexp processing after timeput (60sec) Minor pep8 fixes --- bin/RegexForTermsFrequency.py | 52 ++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index ecca8e4d..627f8db9 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -9,35 +9,45 @@ supplied in the term webpage. import redis import time from pubsublogger import publisher -from packages import lib_words from packages import Paste -import os -from os import environ -import datetime import calendar import re +import signal +import time from Helper import Process - # Email notifications from NotificationHelper import * + +class TimeoutException(Exception): + pass + + +def timeout_handler(signum, frame): + raise TimeoutException + +signal.signal(signal.SIGALRM, timeout_handler) + + # Config Variables -DICO_REFRESH_TIME = 60 #s +DICO_REFRESH_TIME = 60 # s +PROCESS_TIMEOUT = 60 BlackListTermsSet_Name = "BlackListSetTermSet" TrackedTermsSet_Name = "TrackedSetTermSet" TrackedRegexSet_Name = "TrackedRegexSet" -top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set +top_term_freq_max_set_cardinality = 20 # Max cardinality of the terms frequences set oneDay = 60*60*24 top_termFreq_setName_day = ["TopTermFreq_set_day_", 1] top_termFreq_setName_week = ["TopTermFreq_set_week", 7] top_termFreq_setName_month = ["TopTermFreq_set_month", 31] -top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] +top_termFreq_set_array = [top_termFreq_setName_day, top_termFreq_setName_week, top_termFreq_setName_month] # create direct link in mail full_paste_url = "/showsavedpaste/?paste=" + def refresh_dicos(): dico_regex = {} dico_regexname_to_redis = {} @@ -67,7 +77,7 @@ if __name__ == "__main__": # create direct link in mail full_paste_url = p.config.get("Notifications", "ail_domain") + full_paste_url - #compile the regex + # compile the regex dico_refresh_cooldown = time.time() dico_regex, dico_regexname_to_redis = refresh_dicos() @@ -89,11 +99,21 @@ if __name__ == "__main__": curr_set = top_termFreq_setName_day[0] + str(timestamp) content = Paste.Paste(filename).get_p_content() - #iterate the word with the regex + # iterate the word with the regex for regex_str, compiled_regex in dico_regex.items(): - matched = compiled_regex.search(content) - if matched is not None: #there is a match + signal.alarm(PROCESS_TIMEOUT) + try: + matched = compiled_regex.search(content) + except TimeoutException: + log_msg = "{0} processing timeout".format(filename) + print (log_msg) + publisher.critical(log_msg) + continue + else: + signal.alarm(0) + + if matched is not None: # there is a match print('regex matched {}'.format(regex_str)) matched = matched.group(0) regex_str_complete = "/" + regex_str + "/" @@ -104,8 +124,8 @@ if __name__ == "__main__": # create mail body mail_body = ("AIL Framework,\n" - "New occurrence for regex: " + regex_str + "\n" - ''+full_paste_url + filename) + "New occurrence for regex: " + regex_str + "\n" + ''+full_paste_url + filename) # Send to every associated email adress for email in server_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + regex_str_complete): @@ -115,9 +135,9 @@ if __name__ == "__main__": new_to_the_set = server_term.sadd(set_name, filename) new_to_the_set = True if new_to_the_set == 1 else False - #consider the num of occurence of this term + # consider the num of occurence of this term regex_value = int(server_term.hincrby(timestamp, dico_regexname_to_redis[regex_str], int(1))) - #1 term per paste + # 1 term per paste if new_to_the_set: regex_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), dico_regexname_to_redis[regex_str], int(1))) server_term.zincrby("per_paste_" + curr_set, dico_regexname_to_redis[regex_str], float(1))