From 003c72bd7c810326e1575f5dcb2e9ca75eb4fbf5 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Mon, 22 Aug 2016 16:01:42 +0200 Subject: [PATCH] Fixed bug with redis where the inserted key was too long, causing a crash + fixed bug taking min instead of max in terms-top-set --- bin/Curve.py | 8 ++++---- bin/Curve_manage_top_sets.py | 16 ++++++---------- var/www/Flask_server.py | 2 +- 3 files changed, 11 insertions(+), 15 deletions(-) diff --git a/bin/Curve.py b/bin/Curve.py index 641480eb..0a533a35 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -55,7 +55,7 @@ def check_if_tracked_term(term, path): if term in TrackedTermsSet_Name: #add_paste to tracked_word_set set_name = "tracked_" + term - server.sadd(set_name, path) + server_term.sadd(set_name, path) p.populate_set_out("New Term added", 'CurveManageTopSets') @@ -113,7 +113,7 @@ if __name__ == "__main__": temp = filename.split('/') date = temp[-4] + temp[-3] + temp[-2] timestamp = calendar.timegm((int(temp[-4]), int(temp[-3]), int(temp[-2]), 0, 0, 0)) - top_termFreq_setName_day[0] += str(timestamp) + curr_set = top_termFreq_setName_day[0] + str(timestamp) low_word = word.lower() @@ -123,8 +123,8 @@ if __name__ == "__main__": # Update redis curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) - if low_word not in server.smembers(BlackListTermsSet_Name): - server.zincrby(top_termFreq_setName_day[0], int(score), low_word) + if low_word not in server_term.smembers(BlackListTermsSet_Name): + server_term.zincrby(curr_set, low_word, float(score)) #Add more info for tracked terms check_if_tracked_term(low_word, filename) diff --git a/bin/Curve_manage_top_sets.py b/bin/Curve_manage_top_sets.py index 167aa261..92d63b33 100755 --- a/bin/Curve_manage_top_sets.py +++ b/bin/Curve_manage_top_sets.py @@ -52,15 +52,14 @@ def manage_top_set(): # Retreive top data (2*max_card) from days sets for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): curr_set = top_termFreq_setName_day[0] + str(timestamp) - print top_termFreq_setName_day[0] - array_top_day = server_term.zrangebyscore(curr_set, '-inf', '+inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2) + array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2) - print array_top_day for word, value in array_top_day: - if word in dico.keys(): - dico[word] += value - else: - dico[word] = value + if word not in server_term.smembers(BlackListTermsSet_Name): + if word in dico.keys(): + dico[word] += value + else: + dico[word] = value if timestamp == startDate - num_day_week*oneDay: dico_week = copy.deepcopy(dico) @@ -78,9 +77,6 @@ def manage_top_set(): array_week.sort(key=lambda tup: -tup[1]) array_week = array_week[0:20] - print array_month - print array_week - # suppress every terms in top sets for curr_set, curr_num_day in top_termFreq_set_array[1:3]: for w in server_term.zrange(curr_set, 0, -1): diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index d605c118..07001f9b 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -760,7 +760,7 @@ def terms_plot_top_data(): if the_set == "TopTermFreq_set_day": the_set += "_" + str(today_timestamp) - for term, tot_value in r_serv_term.zrangebyscore(the_set, '-inf', '+inf', withscores=True, start=0, num=20): + for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): value_range = [] for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): value = r_serv_term.hget(timestamp, term)