From 8738b7cf7531fbcd7b3b639d5af6ba2f957d1131 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 24 Apr 2018 16:44:37 +0200 Subject: [PATCH] fix track term and avoid duplicate mail address --- bin/Curve.py | 4 ++-- bin/packages/lib_refine.py | 6 ++++++ var/www/modules/terms/Flask_terms.py | 14 ++++++++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/bin/Curve.py b/bin/Curve.py index 330987f2..5691fcee 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -49,7 +49,7 @@ top_termFreq_setName_month = ["TopTermFreq_set_month", 31] top_termFreq_set_array = [top_termFreq_setName_day,top_termFreq_setName_week, top_termFreq_setName_month] def check_if_tracked_term(term, path): - if term in server_term.smembers(TrackedTermsSet_Name): + if term.encode('utf8') in server_term.smembers(TrackedTermsSet_Name): #add_paste to tracked_word_set set_name = "tracked_" + term server_term.sadd(set_name, path) @@ -132,7 +132,7 @@ if __name__ == "__main__": curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1))) # Add in set only if term is not in the blacklist - if low_word not in server_term.smembers(BlackListTermsSet_Name): + if low_word.encode('utf8') not in server_term.smembers(BlackListTermsSet_Name): #consider the num of occurence of this term server_term.zincrby(curr_set, low_word, float(score)) #1 term per paste diff --git a/bin/packages/lib_refine.py b/bin/packages/lib_refine.py index f02119ad..6c0bcd9b 100644 --- a/bin/packages/lib_refine.py +++ b/bin/packages/lib_refine.py @@ -28,6 +28,12 @@ def checking_MX_record(r_serv, adress_set): names: on example@gmail.com it will try to resolve gmail.com """ + print('mails:') + print(adress_set) + + #remove duplicate + adress_set = list(set(adress_set)) + score = 0 num = len(adress_set) WalidMX = set([]) diff --git a/var/www/modules/terms/Flask_terms.py b/var/www/modules/terms/Flask_terms.py index d354c33d..db230ee6 100644 --- a/var/www/modules/terms/Flask_terms.py +++ b/var/www/modules/terms/Flask_terms.py @@ -158,6 +158,7 @@ def terms_management(): trackReg_list_num_of_paste = [] for tracked_regex in r_serv_term.smembers(TrackedRegexSet_Name): tracked_regex = tracked_regex.decode('utf8') + print(tracked_regex) notificationEMailTermMapping[tracked_regex] = "\n".join( (r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_regex)).decode('utf8') ) @@ -211,6 +212,8 @@ def terms_management(): track_list_num_of_paste = [] for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): tracked_term = tracked_term.decode('utf8') + print('tracked_term : .') + print(tracked_term) #print(TrackedTermsNotificationEmailsPrefix_Name) print(r_serv_term.smembers(TrackedTermsNotificationEmailsPrefix_Name + tracked_term)) @@ -226,7 +229,11 @@ def terms_management(): term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) set_paste_name = "tracked_" + tracked_term + print('set_paste_name : .') + print(set_paste_name) track_list_num_of_paste.append( r_serv_term.scard(set_paste_name) ) + print('track_list_num_of_paste : .') + print(track_list_num_of_paste) term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" value_range.append(term_date) track_list_values.append(value_range) @@ -252,6 +259,8 @@ def terms_management(): @terms.route("/terms_management_query_paste/") def terms_management_query_paste(): term = request.args.get('term') + print('term :') + print(term) paste_info = [] # check if regex or not @@ -263,10 +272,10 @@ def terms_management_query_paste(): track_list_path = r_serv_term.smembers(set_paste_name) else: set_paste_name = "tracked_" + term - print(r_serv_term.smembers(set_paste_name)) track_list_path = r_serv_term.smembers(set_paste_name) for path in track_list_path: + path = path.decode('utf8') paste = Paste.Paste(path) p_date = str(paste._get_p_date()) p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] @@ -523,6 +532,7 @@ def credentials_management_query_paste(): paste_info = [] for pathNum in allPath: path = r_serv_cred.hget(REDIS_KEY_ALL_PATH_SET_REV, pathNum) + path = path.decode('utf8') paste = Paste.Paste(path) p_date = str(paste._get_p_date()) p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] @@ -531,7 +541,7 @@ def credentials_management_query_paste(): p_size = paste.p_size p_mime = paste.p_mime p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content().decode('utf-8', 'ignore') + p_content = paste.get_p_content() if p_content != 0: p_content = p_content[0:400] paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content})