diff --git a/bin/Curve.py b/bin/Curve.py index 59557881..e6c0bb05 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -117,12 +117,18 @@ if __name__ == "__main__": r_serv1.hincrby(low_word, date, int(score)) # Update redis + #consider the num of occurence of this term curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) + #1 term per paste + curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1))) # Add in set only if term is not in the blacklist if low_word not in server_term.smembers(BlackListTermsSet_Name): + #consider the num of occurence of this term server_term.zincrby(curr_set, low_word, float(score)) - + #1 term per paste + server_term.zincrby("per_paste_" + curr_set, low_word, float(1)) + #Add more info for tracked terms check_if_tracked_term(low_word, filename) diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 03ea8f96..562705cf 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -48,11 +48,13 @@ def manage_top_set(): blacklist_size = int(server_term.scard(BlackListTermsSet_Name)) dico = {} + dico_per_paste = {} # Retreive top data (max_card + blacklist_size) from days sets for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): curr_set = top_termFreq_setName_day[0] + str(timestamp) array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) + array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) for word, value in array_top_day: if word not in server_term.smembers(BlackListTermsSet_Name): @@ -61,8 +63,16 @@ def manage_top_set(): else: dico[word] = value + for word, value in array_top_day_per_paste: + if word not in server_term.smembers(BlackListTermsSet_Name): + if word in dico_per_paste.keys(): + dico_per_paste[word] += value + else: + dico_per_paste[word] = value + if timestamp == startDate - num_day_week*oneDay: dico_week = copy.deepcopy(dico) + dico_week_per_paste = copy.deepcopy(dico_per_paste) # convert dico into sorted array array_month = [] @@ -77,17 +87,37 @@ def manage_top_set(): array_week.sort(key=lambda tup: -tup[1]) array_week = array_week[0:20] + # convert dico_per_paste into sorted array + array_month_per_paste = [] + for w, v in dico_per_paste.iteritems(): + array_month_per_paste.append((w, v)) + array_month_per_paste.sort(key=lambda tup: -tup[1]) + array_month_per_paste = array_month_per_paste[0:20] + + array_week_per_paste = [] + for w, v in dico_week_per_paste.iteritems(): + array_week_per_paste.append((w, v)) + array_week_per_paste.sort(key=lambda tup: -tup[1]) + array_week_per_paste = array_week_per_paste[0:20] + + # suppress every terms in top sets for curr_set, curr_num_day in top_termFreq_set_array[1:3]: for w in server_term.zrange(curr_set, 0, -1): server_term.zrem(curr_set, w) + for w in server_term.zrange("per_paste_" + curr_set, 0, -1): + server_term.zrem("per_paste_" + curr_set, w) # Add top term from sorted array in their respective sorted sets for elem in array_week: server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) + for elem in array_week_per_paste: + server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0]) for elem in array_month: server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) + for elem in array_month_per_paste: + server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0]) timestamp = int(time.mktime(datetime.datetime.now().timetuple())) value = str(timestamp) + ", " + "-" diff --git a/doc/all_modules.txt b/doc/all_modules.txt deleted file mode 100644 index fabdf4e9..00000000 --- a/doc/all_modules.txt +++ /dev/null @@ -1,31 +0,0 @@ -Attributes -BrowseWarningPaste -Categ -Credential -CreditCards -Curve -CurveManageTopSets -Cve -DomClassifier -Duplicates -Global -Indexer -Keys -Lines -Mail -Mixer -ModuleInformation -Keys -Lines -Mail -Mixer -ModuleInformation -ModuleStats -Onion -Phone -Release -SentimentAnalysis -SQLInjectionDetection -Tokenize -Web -WebStats diff --git a/var/www/Flasks/Flask_terms.py b/var/www/Flasks/Flask_terms.py index f5416ddc..fad048bd 100644 --- a/var/www/Flasks/Flask_terms.py +++ b/var/www/Flasks/Flask_terms.py @@ -20,13 +20,13 @@ cfg = Flask_config.cfg r_serv_term = Flask_config.r_serv_term # ============ FUNCTIONS ============ -def Term_getValueOverRange(word, startDate, num_day): +def Term_getValueOverRange(word, startDate, num_day, per_paste=""): passed_days = 0 oneDay = 60*60*24 to_return = [] curr_to_return = 0 for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, word) + value = r_serv_term.hget(per_paste+str(timestamp), word) curr_to_return += int(value) if value is not None else 0 for i in num_day: if passed_days == i-1: @@ -39,6 +39,14 @@ def Term_getValueOverRange(word, startDate, num_day): @app.route("/terms_management/") def terms_management(): + per_paste = request.args.get('per_paste') + if per_paste == "1" or per_paste is None: + per_paste_text = "per_paste_" + per_paste = 1 + else: + per_paste_text = "" + per_paste = 0 + TrackedTermsSet_Name = "TrackedSetTermSet" BlackListTermsSet_Name = "BlackListSetTermSet" TrackedTermsDate_Name = "TrackedTermDate" @@ -53,7 +61,7 @@ def terms_management(): track_list_num_of_paste = [] for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): track_list.append(tracked_term) - value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) + value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31], per_paste=per_paste_text) term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) @@ -70,7 +78,7 @@ def terms_management(): term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" black_list.append([blacked_term, term_date]) - return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste, per_paste=per_paste) @app.route("/terms_management_query_paste/") @@ -182,12 +190,19 @@ def terms_plot_tool_data(): range_end = calendar.timegm(range_end.timetuple()) term = request.args.get('term') + per_paste = request.args.get('per_paste') + if per_paste == "1" or per_paste is None: + per_paste = "per_paste_" + else: + per_paste = "" + + if term is None: return "None" else: value_range = [] for timestamp in range(range_start, range_end+oneDay, oneDay): - value = r_serv_term.hget(timestamp, term) + value = r_serv_term.hget(per_paste+str(timestamp), term) curr_value_range = int(value) if value is not None else 0 value_range.append([timestamp, curr_value_range]) value_range.insert(0,term) @@ -196,7 +211,9 @@ def terms_plot_tool_data(): @app.route("/terms_plot_top/") def terms_plot_top(): - return render_template("terms_plot_top.html") + per_paste = request.args.get('per_paste') + per_paste = per_paste if per_paste is not None else 1 + return render_template("terms_plot_top.html", per_paste=per_paste) @app.route("/terms_plot_top_data/") @@ -206,17 +223,24 @@ def terms_plot_top_data(): today = today.replace(hour=0, minute=0, second=0, microsecond=0) today_timestamp = calendar.timegm(today.timetuple()) - set_day = "TopTermFreq_set_day_" + str(today_timestamp) - set_week = "TopTermFreq_set_week"; - set_month = "TopTermFreq_set_month"; + per_paste = request.args.get('per_paste') + if per_paste == "1" or per_paste is None: + per_paste = "per_paste_" + else: + per_paste = "" - the_set = request.args.get('set') + set_day = per_paste + "TopTermFreq_set_day_" + str(today_timestamp) + set_week = per_paste + "TopTermFreq_set_week"; + set_month = per_paste + "TopTermFreq_set_month"; + + the_set = per_paste + request.args.get('set') num_day = int(request.args.get('num_day')) + if the_set is None: return "None" else: to_return = [] - if the_set == "TopTermFreq_set_day": + if "TopTermFreq_set_day" in the_set: the_set += "_" + str(today_timestamp) for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): @@ -229,7 +253,7 @@ def terms_plot_top_data(): position['month'] = position['month']+1 if position['month'] is not None else "<20" value_range = [] for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, term) + value = r_serv_term.hget(per_paste+str(timestamp), term) curr_value_range = int(value) if value is not None else 0 value_range.append([timestamp, curr_value_range]) diff --git a/var/www/templates/terms_management.html b/var/www/templates/terms_management.html index 3d4f2f77..22cbbc51 100644 --- a/var/www/templates/terms_management.html +++ b/var/www/templates/terms_management.html @@ -12,6 +12,7 @@ + @@ -91,6 +92,13 @@
Termx | +Term | Added date | Action | |
---|---|---|---|---|
{{ black_list[i][0] }} | -{{ black_list[i][1] }} | +{{ term }} | +{{ date }} | - + |