From b9ceb4ec795a20787e8fae91bb62a71b1fd392a3 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 14 Feb 2017 09:35:19 +0100 Subject: [PATCH 1/5] Updated feeding AIL with pystemon --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index df1b8218..1e4c7b6d 100644 --- a/README.md +++ b/README.md @@ -127,7 +127,7 @@ Here are the steps to setup pystemon and feed data to AIL: 3. Launch pystemon ``` ./pystemon ``` -4. Edit the file ```bin/feeder/pystemon-feeder.py``` and modify the pystemonpath path accordingly +4. Edit your configuration file ```bin/packages/config.cfg``` and modify the pystemonpath path accordingly 5. Launch pystemon-feeder ``` ./pystemon-feeder.py ``` From 7b2e716d0b6db99f49c5714d0dea8331eb01a298 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Wed, 15 Feb 2017 11:49:58 +0100 Subject: [PATCH 2/5] Deleted all_modules.txt added by error --- doc/all_modules.txt | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 doc/all_modules.txt diff --git a/doc/all_modules.txt b/doc/all_modules.txt deleted file mode 100644 index fabdf4e9..00000000 --- a/doc/all_modules.txt +++ /dev/null @@ -1,31 +0,0 @@ -Attributes -BrowseWarningPaste -Categ -Credential -CreditCards -Curve -CurveManageTopSets -Cve -DomClassifier -Duplicates -Global -Indexer -Keys -Lines -Mail -Mixer -ModuleInformation -Keys -Lines -Mail -Mixer -ModuleInformation -ModuleStats -Onion -Phone -Release -SentimentAnalysis -SQLInjectionDetection -Tokenize -Web -WebStats From 25adae0104aa264d84f4226f36350e4cef29af06 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Wed, 15 Feb 2017 16:29:02 +0100 Subject: [PATCH 3/5] Added possibility to select in top_terms the data per_paste or not (need module testing) --- bin/Curve.py | 8 ++++++- bin/CurveManageTopSets.py | 30 +++++++++++++++++++++++++++ var/www/Flasks/Flask_terms.py | 10 +++++++-- var/www/templates/terms_plot_top.html | 18 +++++++++++++--- 4 files changed, 60 insertions(+), 6 deletions(-) diff --git a/bin/Curve.py b/bin/Curve.py index 59557881..61afc81a 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -117,12 +117,18 @@ if __name__ == "__main__": r_serv1.hincrby(low_word, date, int(score)) # Update redis + #consider the num of occurence of this term curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) + #1 term per paste + curr_word_value_perPaste = int(server_term.hincrby(timestamp, low_word, int(1))) # Add in set only if term is not in the blacklist if low_word not in server_term.smembers(BlackListTermsSet_Name): + #consider the num of occurence of this term server_term.zincrby(curr_set, low_word, float(score)) - + #1 term per paste + server_term.zincrby("per_paste_" + curr_set, low_word, float(score)) + #Add more info for tracked terms check_if_tracked_term(low_word, filename) diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 03ea8f96..8fd5b9bf 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -48,11 +48,13 @@ def manage_top_set(): blacklist_size = int(server_term.scard(BlackListTermsSet_Name)) dico = {} + dico_per_paste = {} # Retreive top data (max_card + blacklist_size) from days sets for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): curr_set = top_termFreq_setName_day[0] + str(timestamp) array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) + array_top_day_per_paste = server_term.zrevrangebyscore("per_paste_" + curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) for word, value in array_top_day: if word not in server_term.smembers(BlackListTermsSet_Name): @@ -61,8 +63,16 @@ def manage_top_set(): else: dico[word] = value + for word, value in "per_paste_" + array_top_day: + if word not in server_term.smembers(BlackListTermsSet_Name): + if word in dico_per_paste.keys(): + dico_per_paste[word] += value + else: + dico_per_paste[word] = value + if timestamp == startDate - num_day_week*oneDay: dico_week = copy.deepcopy(dico) + dico_week_per_paste = copy.deepcopy(dico_per_paste) # convert dico into sorted array array_month = [] @@ -77,17 +87,37 @@ def manage_top_set(): array_week.sort(key=lambda tup: -tup[1]) array_week = array_week[0:20] + # convert dico_per_paste into sorted array + array_month_per_paste = [] + for w, v in dico_per_paste.iteritems(): + array_month_per_paste.append((w, v)) + array_month_per_paste.sort(key=lambda tup: -tup[1]) + array_month_per_paste = array_month_per_paste[0:20] + + array_week_per_paste = [] + for w, v in dico_week_per_paste.iteritems(): + array_week_per_paste.append((w, v)) + array_week_per_paste.sort(key=lambda tup: -tup[1]) + array_week_per_paste = array_week_per_paste[0:20] + + # suppress every terms in top sets for curr_set, curr_num_day in top_termFreq_set_array[1:3]: for w in server_term.zrange(curr_set, 0, -1): server_term.zrem(curr_set, w) + for w in server_term.zrange("per_paste_" + curr_set, 0, -1): + server_term.zrem("per_paste_" + curr_set, w) # Add top term from sorted array in their respective sorted sets for elem in array_week: server_term.zadd(top_termFreq_setName_week[0], float(elem[1]), elem[0]) + for elem in array_week_per_paste: + server_term.zadd("per_paste_" + top_termFreq_setName_week[0], float(elem[1]), elem[0]) for elem in array_month: server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) + for elem in array_month_per_paste: + server_term.zadd("per_paste_" + top_termFreq_setName_month[0], float(elem[1]), elem[0]) timestamp = int(time.mktime(datetime.datetime.now().timetuple())) value = str(timestamp) + ", " + "-" diff --git a/var/www/Flasks/Flask_terms.py b/var/www/Flasks/Flask_terms.py index f5416ddc..3ccf525d 100644 --- a/var/www/Flasks/Flask_terms.py +++ b/var/www/Flasks/Flask_terms.py @@ -196,7 +196,9 @@ def terms_plot_tool_data(): @app.route("/terms_plot_top/") def terms_plot_top(): - return render_template("terms_plot_top.html") + per_paste = request.args.get('per_paste') + per_paste = per_paste if per_paste is not None else 1 + return render_template("terms_plot_top.html", per_paste=per_paste) @app.route("/terms_plot_top_data/") @@ -212,11 +214,15 @@ def terms_plot_top_data(): the_set = request.args.get('set') num_day = int(request.args.get('num_day')) + per_paste = int(request.args.get('per_paste')) + if per_paste == 1: + the_set = "per_paste_" + the_set + if the_set is None: return "None" else: to_return = [] - if the_set == "TopTermFreq_set_day": + if "TopTermFreq_set_day" in the_set: the_set += "_" + str(today_timestamp) for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): diff --git a/var/www/templates/terms_plot_top.html b/var/www/templates/terms_plot_top.html index cf11bc88..af62c33f 100644 --- a/var/www/templates/terms_plot_top.html +++ b/var/www/templates/terms_plot_top.html @@ -67,6 +67,9 @@
+
+ 1 term per paste +
Today @@ -281,7 +284,15 @@ $(document).ready(function(){ activePage = $('h1.page-header').attr('data-page'); $("#"+activePage).addClass("active"); + if({{ per_paste }} == 1) { + $("#per_paste").attr('checked', true) + } }); + + function reload_per_paste() { + var checked = $("#per_paste").prop( "checked" ) ? 1 : 0; + window.location.href = {{ url_for('terms_plot_top') }}+"?per_paste="+checked; + } @@ -332,6 +343,7 @@ set_today = "TopTermFreq_set_day"; set_week = "TopTermFreq_set_week"; set_month = "TopTermFreq_set_month"; default_num_curves = 8; +per_paste = {{ per_paste }} var plot_today; var plot_week; @@ -339,7 +351,7 @@ var plot_month; var promises = []; // Used to know when everything has been received -promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today, num_day: 5 }, function(data, status){ +promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today, num_day: 5, per_paste: per_paste }, function(data, status){ data.sort(function(a, b){return b[2]-a[2];}); // Sort data var table_today = $("#table-today") @@ -380,7 +392,7 @@ promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_today })); -promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, num_day: 7 }, function(data, status){ +promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, num_day: 7, per_paste: per_paste }, function(data, status){ data.sort(function(a, b){return b[2]-a[2];}); // Sort data var table = $("#table-week") @@ -420,7 +432,7 @@ promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_week, }); })); -promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_month, num_day: 31 }, function(data, status){ +promises.push($.getJSON("{{ url_for('terms_plot_top_data') }}", { set: set_month, num_day: 31, per_paste: per_paste }, function(data, status){ data.sort(function(a, b){return b[2]-a[2];}); // Sort data var table = $("#table-month") From 4071bf2e9116a0b35334037881a29db269c2ade1 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 28 Feb 2017 15:01:48 +0100 Subject: [PATCH 4/5] fixed a bug in manage_term (usage of loop.index0) + fixed bug in top_set where graph was not displaying the correct per_paste value --- bin/Curve.py | 4 ++-- bin/CurveManageTopSets.py | 2 +- var/www/Flasks/Flask_terms.py | 21 +++++++++++++-------- var/www/templates/terms_management.html | 24 ++++++++++-------------- var/www/templates/terms_plot_top.html | 12 +++++++++--- 5 files changed, 35 insertions(+), 28 deletions(-) diff --git a/bin/Curve.py b/bin/Curve.py index 61afc81a..e6c0bb05 100755 --- a/bin/Curve.py +++ b/bin/Curve.py @@ -120,14 +120,14 @@ if __name__ == "__main__": #consider the num of occurence of this term curr_word_value = int(server_term.hincrby(timestamp, low_word, int(score))) #1 term per paste - curr_word_value_perPaste = int(server_term.hincrby(timestamp, low_word, int(1))) + curr_word_value_perPaste = int(server_term.hincrby("per_paste_" + str(timestamp), low_word, int(1))) # Add in set only if term is not in the blacklist if low_word not in server_term.smembers(BlackListTermsSet_Name): #consider the num of occurence of this term server_term.zincrby(curr_set, low_word, float(score)) #1 term per paste - server_term.zincrby("per_paste_" + curr_set, low_word, float(score)) + server_term.zincrby("per_paste_" + curr_set, low_word, float(1)) #Add more info for tracked terms check_if_tracked_term(low_word, filename) diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 8fd5b9bf..562705cf 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -63,7 +63,7 @@ def manage_top_set(): else: dico[word] = value - for word, value in "per_paste_" + array_top_day: + for word, value in array_top_day_per_paste: if word not in server_term.smembers(BlackListTermsSet_Name): if word in dico_per_paste.keys(): dico_per_paste[word] += value diff --git a/var/www/Flasks/Flask_terms.py b/var/www/Flasks/Flask_terms.py index 3ccf525d..b56375e7 100644 --- a/var/www/Flasks/Flask_terms.py +++ b/var/www/Flasks/Flask_terms.py @@ -208,16 +208,21 @@ def terms_plot_top_data(): today = today.replace(hour=0, minute=0, second=0, microsecond=0) today_timestamp = calendar.timegm(today.timetuple()) - set_day = "TopTermFreq_set_day_" + str(today_timestamp) - set_week = "TopTermFreq_set_week"; - set_month = "TopTermFreq_set_month"; - - the_set = request.args.get('set') - num_day = int(request.args.get('num_day')) per_paste = int(request.args.get('per_paste')) if per_paste == 1: - the_set = "per_paste_" + the_set + per_paste = "per_paste_" + else: + per_paste = "" + set_day = per_paste + "TopTermFreq_set_day_" + str(today_timestamp) + set_week = per_paste + "TopTermFreq_set_week"; + set_month = per_paste + "TopTermFreq_set_month"; + + the_set = per_paste + request.args.get('set') + num_day = int(request.args.get('num_day')) + + print(set_day) + print(per_paste) if the_set is None: return "None" else: @@ -235,7 +240,7 @@ def terms_plot_top_data(): position['month'] = position['month']+1 if position['month'] is not None else "<20" value_range = [] for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, term) + value = r_serv_term.hget(per_paste+str(timestamp), term) curr_value_range = int(value) if value is not None else 0 value_range.append([timestamp, curr_value_range]) diff --git a/var/www/templates/terms_management.html b/var/www/templates/terms_management.html index 3d4f2f77..1cbfb67c 100644 --- a/var/www/templates/terms_management.html +++ b/var/www/templates/terms_management.html @@ -116,21 +116,19 @@ - {% set i = 0 %} {% for term in track_list %} {{ term }} - {{ track_list_values[i][3] }} - {{ track_list_values[i][0] }} - {{ track_list_values[i][1] }} - {{ track_list_values[i][2] }} - {{ track_list_num_of_paste[i] }} + {{ track_list_values[loop.index0][3] }} + {{ track_list_values[loop.index0][0] }} + {{ track_list_values[loop.index0][1] }} + {{ track_list_values[loop.index0][2] }} + {{ track_list_num_of_paste[loop.index0] }}

- {% set i = i + 1 %} {% endfor %} @@ -162,22 +160,20 @@ - + - {% set i = 0 %} - {% for term in black_list %} + {% for term, date in black_list %} - - + + - {% set i = i + 1 %} {% endfor %}
TermxTerm Added date Action
{{ black_list[i][0] }}{{ black_list[i][1] }}{{ term }}{{ date }}

- +

diff --git a/var/www/templates/terms_plot_top.html b/var/www/templates/terms_plot_top.html index af62c33f..77caa0fe 100644 --- a/var/www/templates/terms_plot_top.html +++ b/var/www/templates/terms_plot_top.html @@ -12,6 +12,7 @@ + @@ -67,9 +68,14 @@
-
- 1 term per paste -
+
+ + 1 term per paste +
Today From 41132fe0bcfb54ec7beebdb43d791c47641f7b38 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Tue, 28 Feb 2017 15:54:39 +0100 Subject: [PATCH 5/5] Added support of per_paste for term-management and plot-term --- var/www/Flasks/Flask_terms.py | 31 ++++++++++++++++++------- var/www/templates/terms_management.html | 18 +++++++++++++- var/www/templates/terms_plot_tool.html | 21 +++++++++++++---- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/var/www/Flasks/Flask_terms.py b/var/www/Flasks/Flask_terms.py index b56375e7..fad048bd 100644 --- a/var/www/Flasks/Flask_terms.py +++ b/var/www/Flasks/Flask_terms.py @@ -20,13 +20,13 @@ cfg = Flask_config.cfg r_serv_term = Flask_config.r_serv_term # ============ FUNCTIONS ============ -def Term_getValueOverRange(word, startDate, num_day): +def Term_getValueOverRange(word, startDate, num_day, per_paste=""): passed_days = 0 oneDay = 60*60*24 to_return = [] curr_to_return = 0 for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, word) + value = r_serv_term.hget(per_paste+str(timestamp), word) curr_to_return += int(value) if value is not None else 0 for i in num_day: if passed_days == i-1: @@ -39,6 +39,14 @@ def Term_getValueOverRange(word, startDate, num_day): @app.route("/terms_management/") def terms_management(): + per_paste = request.args.get('per_paste') + if per_paste == "1" or per_paste is None: + per_paste_text = "per_paste_" + per_paste = 1 + else: + per_paste_text = "" + per_paste = 0 + TrackedTermsSet_Name = "TrackedSetTermSet" BlackListTermsSet_Name = "BlackListSetTermSet" TrackedTermsDate_Name = "TrackedTermDate" @@ -53,7 +61,7 @@ def terms_management(): track_list_num_of_paste = [] for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): track_list.append(tracked_term) - value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) + value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31], per_paste=per_paste_text) term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) @@ -70,7 +78,7 @@ def terms_management(): term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" black_list.append([blacked_term, term_date]) - return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste, per_paste=per_paste) @app.route("/terms_management_query_paste/") @@ -182,12 +190,19 @@ def terms_plot_tool_data(): range_end = calendar.timegm(range_end.timetuple()) term = request.args.get('term') + per_paste = request.args.get('per_paste') + if per_paste == "1" or per_paste is None: + per_paste = "per_paste_" + else: + per_paste = "" + + if term is None: return "None" else: value_range = [] for timestamp in range(range_start, range_end+oneDay, oneDay): - value = r_serv_term.hget(timestamp, term) + value = r_serv_term.hget(per_paste+str(timestamp), term) curr_value_range = int(value) if value is not None else 0 value_range.append([timestamp, curr_value_range]) value_range.insert(0,term) @@ -208,8 +223,8 @@ def terms_plot_top_data(): today = today.replace(hour=0, minute=0, second=0, microsecond=0) today_timestamp = calendar.timegm(today.timetuple()) - per_paste = int(request.args.get('per_paste')) - if per_paste == 1: + per_paste = request.args.get('per_paste') + if per_paste == "1" or per_paste is None: per_paste = "per_paste_" else: per_paste = "" @@ -221,8 +236,6 @@ def terms_plot_top_data(): the_set = per_paste + request.args.get('set') num_day = int(request.args.get('num_day')) - print(set_day) - print(per_paste) if the_set is None: return "None" else: diff --git a/var/www/templates/terms_management.html b/var/www/templates/terms_management.html index 1cbfb67c..22cbbc51 100644 --- a/var/www/templates/terms_management.html +++ b/var/www/templates/terms_management.html @@ -12,6 +12,7 @@ + @@ -91,6 +92,13 @@
+ + 1 term per paste +
Manage tracked terms @@ -194,13 +202,21 @@ @@ -81,6 +82,15 @@
+
+ + 1 term per paste +
+
@@ -202,8 +212,9 @@ function plotData() { plotted_terms = [term] var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000; var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000; + var checked = $("#per_paste").prop( "checked" ) ? 1 : 0; - $.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){ + $.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term, per_paste: checked }, function(data, status){ graph_data = []; var to_plot = []; var curr_data = []; @@ -239,8 +250,9 @@ function addData() { plotted_terms.push(term) var range_start = new Date($( ".sliderRange" ).slider( "values", 0 )).getTime() / 1000; var range_end = new Date($( ".sliderRange" ).slider( "values", 1 )).getTime() / 1000; + var checked = $("#per_paste").prop( "checked" ) ? 1 : 0; - $.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term }, function(data, status){ + $.getJSON("{{ url_for('terms_plot_tool_data') }}", { range_start: range_start, range_end: range_end, term: term, per_paste: checked }, function(data, status){ var curr_data = []; for(i=1; i - -