From 91678179fd72cd303aafc7bacf80204d7729f981 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 14 Oct 2016 16:33:54 +0200 Subject: [PATCH 01/37] Added msgs when queues or feed is not running/coming --- var/www/static/js/indexjavascript.js | 54 +++++++++++++++++----------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index e527aafa..90e6e46c 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -219,28 +219,42 @@ function create_queue_table() { tr.appendChild(th); } - for(i = 0; i < (glob_tabvar.row1).length;i++){ - var tr = document.createElement('TR') - for(j = 0; j < 2; j++){ - var td = document.createElement('TD') - var moduleNum = j == 0 ? "." + glob_tabvar.row1[i][3] : ""; - td.appendChild(document.createTextNode(glob_tabvar.row1[i][j] + moduleNum)); - tr.appendChild(td) - } - // Used to decide the color of the row - // We have glob_tabvar.row1[][j] with: - // - j=0: ModuleName - // - j=1: queueLength - // - j=2: LastProcessedPasteTime - // - j=3: Number of the module belonging in the same category - if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) - tr.className += " danger"; - else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) - tr.className += " warning"; - else - tr.className += " success"; + if ((glob_tabvar.row1).length == 0) { + var tr = document.createElement('TR'); + var td = document.createElement('TD'); + var td2 = document.createElement('TD'); + td.appendChild(document.createTextNode("No running queues")); + td2.appendChild(document.createTextNode("Or no feed")); + td.className += " danger"; + td2.className += " danger"; + tr.appendChild(td); + tr.appendChild(td2); tableBody.appendChild(tr); } + else { + for(i = 0; i < (glob_tabvar.row1).length;i++){ + var tr = document.createElement('TR') + for(j = 0; j < 2; j++){ + var td = document.createElement('TD') + var moduleNum = j == 0 ? "." + glob_tabvar.row1[i][3] : ""; + td.appendChild(document.createTextNode(glob_tabvar.row1[i][j] + moduleNum)); + tr.appendChild(td) + } + // Used to decide the color of the row + // We have glob_tabvar.row1[][j] with: + // - j=0: ModuleName + // - j=1: queueLength + // - j=2: LastProcessedPasteTime + // - j=3: Number of the module belonging in the same category + if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) + tr.className += " danger"; + else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) + tr.className += " warning"; + else + tr.className += " success"; + tableBody.appendChild(tr); + } + } Tablediv.appendChild(table); } From 7c8d414948452cbfc0456cf8ab011bac965fa8a6 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 14 Oct 2016 16:47:22 +0200 Subject: [PATCH 02/37] modified provider name into no data when there is no data --- .../sentiment_analysis_trending.html | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/var/www/templates/sentiment_analysis_trending.html b/var/www/templates/sentiment_analysis_trending.html index e1788f35..b20c3696 100644 --- a/var/www/templates/sentiment_analysis_trending.html +++ b/var/www/templates/sentiment_analysis_trending.html @@ -205,24 +205,24 @@ - worst1 - best1 + no data + no data - worst2 - best2 + no data + no data - worst3 - best3 + no data + no data - worst4 - best4 + no data + no data - worst5 - best5 + no data + no data From 815104f2f4d950e8bf45804fd18446f4a1f35aa3 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 27 Oct 2016 09:05:56 +0200 Subject: [PATCH 03/37] Fixed missing dependency in sentimentAnalysis --- installing_deps.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/installing_deps.sh b/installing_deps.sh index 74a1b000..94e8f936 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -105,6 +105,7 @@ python setup.py install # Download the necessary NLTK corpora and sentiment vader HOME=$(pwd) python -m textblob.download_corpora python -m nltk.downloader vader_lexicon +python -m nltk.downloader punkt #Create the file all_module and update the graph in doc $AIL_HOME/doc/generate_modules_data_flow_graph.sh From 1826b170ec508fc6554ef433a09906fd62d01936 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 27 Oct 2016 11:27:26 +0200 Subject: [PATCH 04/37] Added support of local paste view in dashboard for Mails event only --- bin/Mail.py | 4 ++-- var/www/static/js/indexjavascript.js | 17 +++++++++++++++-- var/www/templates/index.html | 2 ++ 3 files changed, 19 insertions(+), 4 deletions(-) diff --git a/bin/Mail.py b/bin/Mail.py index 2b3ed5fc..161082b0 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -55,9 +55,9 @@ if __name__ == "__main__": list(MX_values[1]))) pprint.pprint(MX_values) - to_print = 'Mails;{};{};{};Checked {} e-mail(s)'.\ + to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, - MX_values[0]) + MX_values[0], PST.p_path) if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index 90e6e46c..19980a69 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -109,11 +109,11 @@ function create_log_table(obj_json) { var pdate = document.createElement('TD') var nam = document.createElement('TD') var msage = document.createElement('TD') + var inspect = document.createElement('TD') var chansplit = obj_json.channel.split('.'); var parsedmess = obj_json.data.split(';'); - if (parsedmess[0] == "Global"){ var paste_processed = parsedmess[4].split(" ")[2]; window.paste_num_tabvar = paste_processed; @@ -139,7 +139,7 @@ function create_log_table(obj_json) { source_url = "http://"+parsedmess[1]+"/"+parsedmess[3].split(".")[0]; } source_link.setAttribute("HREF",source_url); - source_link.setAttribute("TARGET", "_blank") + source_link.setAttribute("TARGET", "_blank"); source_link.appendChild(document.createTextNode(parsedmess[1])); src.appendChild(source_link); @@ -169,6 +169,18 @@ function create_log_table(obj_json) { msage.appendChild(document.createTextNode(message.join(" "))); + var paste_path = parsedmess[5]; + var url_to_saved_paste = url_showSavedPath+"?paste="+paste_path+"&num=0"; + + var action_icon_a = document.createElement("A"); + action_icon_a.setAttribute("TARGET", "_blank"); + action_icon_a.setAttribute("HREF", url_to_saved_paste); + var action_icon_span = document.createElement('SPAN'); + action_icon_span.className = "fa fa-search-plus"; + action_icon_a.appendChild(action_icon_span); + + inspect.appendChild(action_icon_a); + tr.appendChild(time) tr.appendChild(chan); tr.appendChild(level); @@ -177,6 +189,7 @@ function create_log_table(obj_json) { tr.appendChild(pdate); tr.appendChild(nam); tr.appendChild(msage); + tr.appendChild(inspect); if (tr.className == document.getElementById("checkbox_log_info").value && document.getElementById("checkbox_log_info").checked == true) { tableBody.appendChild(tr); diff --git a/var/www/templates/index.html b/var/www/templates/index.html index 5d8639cf..66c38a2c 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -140,6 +140,7 @@ Date Paste name Message + Actions @@ -153,6 +154,7 @@ + + + + + + + + +
+
+
+

Paste: {{ request.args.get('paste') }}

+ +
+ +
+ -

Paste: {{ request.args.get('num') }}

-

{{ request.args.get('paste') }}

- -

+
@@ -46,11 +65,16 @@

No Duplicate

{% else %}

Duplicate list:

- +
{% set i = 0 %} + - + + + + + {% for dup_path in duplicate_list %} @@ -59,6 +83,7 @@ {% set i = i + 1 %} {% endfor %} +
Hash typePaste infoHash typePaste infoPath
{{ hashtype_list[i] }}
{% endif %}

Content:

@@ -66,6 +91,9 @@
- +
+ From 7e7e679ab685bcec1912f6df7b2e489b05c8fbcf Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 27 Oct 2016 15:53:45 +0200 Subject: [PATCH 07/37] Added dataTable for duplicate in show saved paste --- var/www/templates/show_saved_paste.html | 133 ++++++++++++------------ 1 file changed, 65 insertions(+), 68 deletions(-) diff --git a/var/www/templates/show_saved_paste.html b/var/www/templates/show_saved_paste.html index c1393aa6..b164bcb1 100644 --- a/var/www/templates/show_saved_paste.html +++ b/var/www/templates/show_saved_paste.html @@ -19,79 +19,76 @@ -
+
+
+
+

Paste: {{ request.args.get('paste') }}

+ +
+
+
-
-

Paste: {{ request.args.get('paste') }}

- +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
DateSourceEncodingLanguageSize (Kb)MimeNumber of linesMax line length
{{ date }}{{ source }}{{ encoding }}{{ language }}{{ size }}{{ mime }}{{ lineinfo.0 }}{{ lineinfo.1 }}
- -
- - -
- -
-
- - - - - - - - - - - - - - - - - - - - - - - - - -
DateSourceEncodingLanguageSize (Kb)MimeNumber of linesMax line length
{{ date }}{{ source }}{{ encoding }}{{ language }}{{ size }}{{ mime }}{{ lineinfo.0 }}{{ lineinfo.1 }}
-
-
- {% if duplicate_list|length == 0 %} -

No Duplicate

- {% else %} -

Duplicate list:

- - {% set i = 0 %} - - - - - - - - - {% for dup_path in duplicate_list %} +
+ {% if duplicate_list|length == 0 %} +

No Duplicate

+ {% else %} +

Duplicate list:

+
Hash typePaste infoPath
+ {% set i = 0 %} + - - - + + + - {% set i = i + 1 %} - {% endfor %} - -
{{ hashtype_list[i] }}Similarity: {{ simil_list[i] }}%{{ dup_path }}Hash typePaste infoPath
- {% endif %} -

Content:

-

{{ content }}

+ + + {% for dup_path in duplicate_list %} + + {{ hashtype_list[i] }} + Similarity: {{ simil_list[i] }}% + {{ dup_path }} + + {% set i = i + 1 %} + {% endfor %} + + + {% endif %} +

Content:

+

{{ content }}

+
+
+
-
- -
- - - - - - - + + + + + + + + + + + -
-
-
-

Paste: {{ request.args.get('paste') }}

- -
-
- -
+

Paste: {{ request.args.get('paste') }}

+ + @@ -88,7 +81,6 @@ - + + From f51808d91491855bbb1038ca92b6b619902e59e9 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 28 Oct 2016 09:48:22 +0200 Subject: [PATCH 10/37] Added support of html formatting in search result dynamic loading table --- var/www/templates/search.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/var/www/templates/search.html b/var/www/templates/search.html index 8f393302..b984ecdc 100644 --- a/var/www/templates/search.html +++ b/var/www/templates/search.html @@ -153,10 +153,10 @@ for(i=0; i "+ data.path_array[i] +"", data.date_array[i], data.size_array[i], - data.preview_array[i] + "

" ] ).draw( false ); } if (data.moreData == true) From c95000866db2ab371d7ad02f8932c2d06e1c562b Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 28 Oct 2016 14:21:08 +0200 Subject: [PATCH 11/37] Dynamic table in search now load all the data + fixed bugs where tooltip where not shown on other than the first page displayed and tooltip interpret html (not supposed to...) --- var/www/Flask_server.py | 13 ++++++++----- var/www/templates/search.html | 34 ++++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 15 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 73306ac6..a193fc16 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -328,7 +328,6 @@ def search(): paste_size = [] # Search filename - print r_serv_pasteName.smembers(q[0]) for path in r_serv_pasteName.smembers(q[0]): print path r.append(path) @@ -351,7 +350,7 @@ def search(): from whoosh.qparser import QueryParser with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search_page(query, 1, pagelen=20) + results = searcher.search_page(query, 1, pagelen=10) for x in results: r.append(x.items()[0][1]) paste = Paste.Paste(x.items()[0][1]) @@ -370,7 +369,7 @@ def get_more_search_result(): query = request.form['query'] q = [] q.append(query) - offset = request.form['offset'] + offset = int(request.form['offset']) path_array = [] preview_array = [] @@ -386,7 +385,7 @@ def get_more_search_result(): from whoosh.qparser import QueryParser with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search_page(query, offset, pagelen=20) + results = searcher.search_page(query, offset, pagelen=10) for x in results: path_array.append(x.items()[0][1]) paste = Paste.Paste(x.items()[0][1]) @@ -402,7 +401,11 @@ def get_more_search_result(): to_return["preview_array"] = preview_array to_return["date_array"] = date_array to_return["size_array"] = size_array - to_return["moreData"] = False + if len(path_array) < 10: #pagelength + to_return["moreData"] = False + else: + to_return["moreData"] = True + return jsonify(to_return) diff --git a/var/www/templates/search.html b/var/www/templates/search.html index b984ecdc..b1f8cbab 100644 --- a/var/www/templates/search.html +++ b/var/www/templates/search.html @@ -84,7 +84,8 @@
- {{ r|length }} Results for "{{ query }}" + {{ r|length }} Results for "{{ query }} +
@@ -130,12 +131,16 @@ @@ -145,25 +150,34 @@ // Loop to recover all the data from get_more_search_results // And add it dynamically top the dataTable - function load_search_data(search_table, prev_query, offset) { - $.post( "{{ url_for('get_more_search_result') }}", { query: prev_query, offset: offset }).done(function( data ) { - console.log( "Data Loaded: " ) - console.log( data ); + function load_search_data(init_num_of_elements_in_table, search_table, prev_query, offset) { + var options = { query: prev_query, offset: offset }; + console.log(options); + $.post( "{{ url_for('get_more_search_result') }}", options).done(function( data ) { for(i=0; i "+ data.path_array[i] +"", data.date_array[i], data.size_array[i], - "

" + "

" ] ).draw( false ); } + $("#numberOfRes").text(parseInt($("#numberOfRes").text()) + data.path_array.length); if (data.moreData == true) - load_search_data(prev_query, offset+i); + load_search_data(init_num_of_elements_in_table, search_table, prev_query, offset+10); + else { + $("#loading_gif_search").hide(); + } }); } + $('#myTable').on( 'page.dt', function () { + setTimeout(function(){ $('[data-toggle="tooltip"]').tooltip(); }, 300); + } ); + From acdd1367a36ce8b1c1985b6d9bc7a61c4344205f Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 28 Oct 2016 16:55:56 +0200 Subject: [PATCH 12/37] Added event source for getImportantPaste --- var/www/Flask_server.py | 37 +++++++++++++++- .../templates/important_paste_by_module.html | 42 ++++++++++++++++++- var/www/templates/search.html | 2 - 3 files changed, 75 insertions(+), 6 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index a193fc16..7a630ccc 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -78,6 +78,28 @@ def event_stream(): if msg['type'] == 'pmessage' and level != "DEBUG": yield 'data: %s\n\n' % json.dumps(msg) +def event_stream_getImportantPasteByModule(module_name): + index = 0 + all_pastes_list = getPastebyType(r_serv_db, module_name) + for path in all_pastes_list: + index += 1 + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + data = {} + data["module"] = module_name + data["index"] = index + data["path"] = path + data["content"] = content[0:content_range] + data["linenum"] = paste.get_lines_info()[0] + data["date"] = curr_date + data["char_to_display"] = max_preview_modal + data["finished"] = True if index == len(all_pastes_list) else False + print index + yield 'data: %s\n\n' % json.dumps(data) + def get_queues(r): # We may want to put the llen in a pipeline to do only one query. @@ -452,8 +474,9 @@ def importantPasteByModule(): paste_date = [] paste_linenum = [] all_path = [] + allPastes = getPastebyType(r_serv_db, module_name) - for path in getPastebyType(r_serv_db, module_name): + for path in allPastes[0:10]: all_path.append(path) paste = Paste.Paste(path) content = paste.get_p_content().decode('utf8', 'ignore') @@ -464,7 +487,17 @@ def importantPasteByModule(): paste_date.append(curr_date) paste_linenum.append(paste.get_lines_info()[0]) - return render_template("important_paste_by_module.html", all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal) + if len(allPastes) > 10: + finished = "" + else: + finished = "display: none;" + + return render_template("important_paste_by_module.html", moduleName=module_name, all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal, finished=finished) + +@app.route("/_getImportantPasteByModule") +def getImportantPasteByModule(): + module_name = request.args.get('moduleName') + return flask.Response(event_stream_getImportantPasteByModule(module_name), mimetype="text/event-stream") @app.route("/moduletrending/") def moduletrending(): diff --git a/var/www/templates/important_paste_by_module.html b/var/www/templates/important_paste_by_module.html index 061648c4..c1664283 100644 --- a/var/www/templates/important_paste_by_module.html +++ b/var/www/templates/important_paste_by_module.html @@ -1,4 +1,5 @@ -
+ +
@@ -25,10 +26,47 @@

+ + + + diff --git a/var/www/templates/search.html b/var/www/templates/search.html index b1f8cbab..ed50ba89 100644 --- a/var/www/templates/search.html +++ b/var/www/templates/search.html @@ -131,8 +131,6 @@ @@ -186,37 +182,6 @@ var char_to_display = {{ char_to_display }}; var start_index = 0; - // On click, get html content from url and update the corresponding modal - $("[data-toggle='modal']").on("click", function (event) { - event.preventDefault(); - var modal=$(this); - var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); - $.get(url, function (data) { - $("#mymodalbody").html(data); - var button = $(''); - button.tooltip(); - $("#mymodalbody").children(".panel-default").append(button); - - $("#button_show_path").attr('href', $(modal).attr('data-url')); - $("#button_show_path").show('fast'); - $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF - if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed - nothing_to_display(); - } - // On click, donwload all paste's content - $("#load-more-button").on("click", function (event) { - if (complete_paste == null) { //Donwload only once - $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ - complete_paste = data; - update_preview(); - }); - } else { - update_preview(); - } - }); - }); - }); - // When the modal goes out, refresh it to normal content $("#mymodal").on('hidden.bs.modal', function () { $("#mymodalbody").html("

Loading paste information...

"); @@ -254,5 +219,45 @@ new_content.show('fast'); $("#load-more-button").hide(); } + + + $('#myTable').on( 'draw.dt', function () { + // Bind tooltip each time we draw a new page + $('[data-toggle="tooltip"]').tooltip(); + // On click, get html content from url and update the corresponding modal + $("[data-toggle='modal']").off('click.openmodal').on("click.openmodal", function (event) { + var modal=$(this); + var url = " {{ url_for('showpreviewpaste') }}?paste=" + $(this).attr('data-path') + "&num=" + $(this).attr('data-num'); + $.get(url, function (data) { + + // clear data by removing html, body, head tags. prevent dark modal background stack bug. + var cleared_data = data.split("")[1].split("")[0]; + $("#mymodalbody").html(cleared_data); + + var button = $(''); + button.tooltip(); + $("#mymodalbody").children(".panel-default").append(button); + + $("#button_show_path").attr('href', $(modal).attr('data-url')); + $("#button_show_path").show('fast'); + $("#loading-gif-modal").css("visibility", "hidden"); // Hide the loading GIF + if ($("[data-initsize]").attr('data-initsize') < char_to_display) { // All the content is displayed + nothing_to_display(); + } + // On click, donwload all paste's content + $("#load-more-button").off('click.download').on("click.download", function (event) { + if (complete_paste == null) { //Donwload only once + $.get("{{ url_for('getmoredata') }}"+"?paste="+$(modal).attr('data-path'), function(data, status){ + complete_paste = data; + update_preview(); + }); + } else { + update_preview(); + } + }); + }); + }); + } ); + From 7763bfb4c79c8a466268fc418cbe0a1cde951bba Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 10 Nov 2016 15:39:45 +0100 Subject: [PATCH 16/37] Pastes dataTable now display only the latest clicked paste even if there were discarded queries --- var/www/Flask_server.py | 3 +- .../templates/important_paste_by_module.html | 59 +++++++++++-------- var/www/templates/search.html | 59 +++++++++++-------- 3 files changed, 73 insertions(+), 48 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 79474155..e45bc407 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -811,7 +811,8 @@ def showsavedpaste(): @app.route("/showpreviewpaste/") def showpreviewpaste(): - return showpaste(max_preview_modal) + num = request.args.get('num', '') + return "|num|"+num+"|num|"+showpaste(max_preview_modal) @app.route("/getmoredata/") diff --git a/var/www/templates/important_paste_by_module.html b/var/www/templates/important_paste_by_module.html index 1f3b1bae..5b4bd8c1 100644 --- a/var/www/templates/important_paste_by_module.html +++ b/var/www/templates/important_paste_by_module.html @@ -59,7 +59,10 @@ function deploy_source() { From df8d9780880351984555256787d494ed1077bdc1 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 24 Nov 2016 13:31:31 +0100 Subject: [PATCH 21/37] Added dynamic data loading in dataTable in search.html --- var/www/Flask_server.py | 16 ++++-- .../templates/important_paste_by_module.html | 2 +- var/www/templates/search.html | 51 ++++++++++++++----- 3 files changed, 50 insertions(+), 19 deletions(-) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 2d3aa196..2b01b6fa 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -347,6 +347,7 @@ def search(): c = [] #preview of the paste content paste_date = [] paste_size = [] + num_elem_to_get = 50 # Search filename for path in r_serv_pasteName.smembers(q[0]): @@ -370,7 +371,7 @@ def search(): from whoosh.qparser import QueryParser with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search_page(query, 1, pagelen=10) + results = searcher.search_page(query, 1, pagelen=num_elem_to_get) for x in results: r.append(x.items()[0][1]) paste = Paste.Paste(x.items()[0][1]) @@ -381,7 +382,10 @@ def search(): curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] paste_date.append(curr_date) paste_size.append(paste._get_p_size()) - return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal) + results = searcher.search(query) + num_res = len(results) + + return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal, num_res=num_res) @app.route("/get_more_search_result", methods=['POST']) @@ -389,7 +393,8 @@ def get_more_search_result(): query = request.form['query'] q = [] q.append(query) - offset = int(request.form['offset']) + page_offset = int(request.form['page_offset']) + num_elem_to_get = 50 path_array = [] preview_array = [] @@ -405,7 +410,7 @@ def get_more_search_result(): from whoosh.qparser import QueryParser with ix.searcher() as searcher: query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search_page(query, offset, pagelen=10) + results = searcher.search_page(query, page_offset, num_elem_to_get) for x in results: path_array.append(x.items()[0][1]) paste = Paste.Paste(x.items()[0][1]) @@ -421,7 +426,8 @@ def get_more_search_result(): to_return["preview_array"] = preview_array to_return["date_array"] = date_array to_return["size_array"] = size_array - if len(path_array) < 10: #pagelength + print "len(path_array)="+str(len(path_array)) + if len(path_array) < num_elem_to_get: #pagelength to_return["moreData"] = False else: to_return["moreData"] = True diff --git a/var/www/templates/important_paste_by_module.html b/var/www/templates/important_paste_by_module.html index 535925a1..ca98ce59 100644 --- a/var/www/templates/important_paste_by_module.html +++ b/var/www/templates/important_paste_by_module.html @@ -28,7 +28,7 @@
- +

diff --git a/var/www/templates/search.html b/var/www/templates/search.html index 22bb31d2..e7351ca1 100644 --- a/var/www/templates/search.html +++ b/var/www/templates/search.html @@ -83,7 +83,6 @@
{{ r|length }} Results for "{{ query }} -
@@ -114,7 +113,11 @@ {% endfor %}
#
+
+ + Totalling {{ num_res }} items
+
@@ -123,6 +126,7 @@
+
@@ -130,17 +134,29 @@ var search_table; var last_clicked_paste; var can_change_modal_content = true; + var page_offset; + var offset; + var all_loaded; + var init_num_of_elements_in_table; + var query; + var pagelen = 50; $(document).ready(function(){ $('[data-toggle="tooltip"]').tooltip(); $("#button_show_path").hide(); - var search_table = $('#myTable').DataTable(); + search_table = $('#myTable').DataTable(); - var prev_query = "{{ query }}"; - var offset = 2; - var init_num_of_elements_in_table = parseInt("{{ r|length }}"); // Comes from the file search - load_search_data(init_num_of_elements_in_table, search_table, prev_query, offset); + query = "{{ query }}"; + offset = 0; + page_offset = 2; //page 1 already loaded + all_loaded = false; + init_num_of_elements_in_table = parseInt("{{ r|length }}"); // Comes from the file search + + + if (init_num_of_elements_in_table == pagelen) { + $("#load_more_json_button1").show(); + } }); @@ -149,27 +165,36 @@ // Loop to recover all the data from get_more_search_results // And add it dynamically top the dataTable + function add_entries() { //Used to disable the button before going to the big loop + $("#load_more_json_button1").attr('disabled','disabled'); + setTimeout(function() { load_search_50_data();}, 50); + } - function load_search_data(init_num_of_elements_in_table, search_table, prev_query, offset) { - var options = { query: prev_query, offset: offset }; + function load_search_50_data() { + var options = { query: query, page_offset: page_offset }; $.post( "{{ url_for('get_more_search_result') }}", options).done(function( data ) { for(i=0; i "+ data.path_array[i] +"", data.date_array[i], data.size_array[i], "

" ] ).draw( false ); } + offset = offset + data.path_array.length; + page_offset = page_offset+1; $("#numberOfRes").text(parseInt($("#numberOfRes").text()) + data.path_array.length); - if (data.moreData == true) - load_search_data(init_num_of_elements_in_table, search_table, prev_query, offset+10); - else { - $("#loading_gif_search").hide(); + if (data.moreData == true) { + //continue + } else { + all_loaded = true; + $("#load_more_json_button1").hide(); } + $("#load_more_json_button1").removeAttr('disabled'); + return data.path_array.length; }); } From 5d269ea1eebcacf75e9cf58f8d5d84592e23335f Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 24 Nov 2016 15:05:29 +0100 Subject: [PATCH 22/37] Added date in Duplicate for better distinguish duplicate. Also, added a timeout for the dataTable in duplicate inside the modal. --- bin/Duplicates.py | 7 +++++-- var/www/Flask_server.py | 16 +++++++++++++--- var/www/templates/search.html | 3 ++- var/www/templates/show_saved_paste.html | 2 ++ 4 files changed, 22 insertions(+), 6 deletions(-) diff --git a/bin/Duplicates.py b/bin/Duplicates.py index cd9e1f97..50def29f 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -131,8 +131,10 @@ if __name__ == "__main__": # index of paste index_current = r_serv_dico.get(dico_hash) paste_path = r_serv_dico.get(index_current) + paste_date = r_serv_dico.get(index_current+'_date') + paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: - hash_dico[dico_hash] = (hash_type, paste_path, percent) + hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) print '['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent) except Exception,e: @@ -142,6 +144,7 @@ if __name__ == "__main__": # Add paste in DB after checking to prevent its analysis twice # hash_type_i -> index_i AND index_i -> PST.PATH r_serv1.set(index, PST.p_path) + r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.sadd("INDEX", index) # Adding hashes in Redis for hash_type, paste_hash in paste_hashes.iteritems(): @@ -152,7 +155,7 @@ if __name__ == "__main__": # if there is data in this dictionnary if len(hash_dico) != 0: - # paste_tuple = (paste_path, percent) + # paste_tuple = (hash_type, date, paste_path, percent) for dico_hash, paste_tuple in hash_dico.items(): dupl.append(paste_tuple) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 2b01b6fa..fcd67a21 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -143,6 +143,7 @@ def showpaste(content_range): p_duplicate_full_list = json.loads(paste._get_p_duplicate()) p_duplicate_list = [] p_simil_list = [] + p_date_list = [] p_hashtype_list = [] @@ -170,20 +171,29 @@ def showpaste(content_range): hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types) comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) - new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals]) + if len(p_duplicate_full_list[dup_list_index]) > 3: + try: + date_paste = str(int(p_duplicate_full_list[dup_list_index][3])) + date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8] + except ValueError: + date_paste = str(p_duplicate_full_list[dup_list_index][3]) + else: + date_paste = "No date available" + new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste]) # Create the list to pass to the webpage for dup_list in new_dup_list: - hash_type, path, simil_percent = dup_list + hash_type, path, simil_percent, date_paste = dup_list p_duplicate_list.append(path) p_simil_list.append(simil_percent) p_hashtype_list.append(hash_type) + p_date_list.append(date_paste) if content_range != 0: p_content = p_content[0:content_range] - return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list) + return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list) def getPastebyType(server, module_name): all_path = [] diff --git a/var/www/templates/search.html b/var/www/templates/search.html index e7351ca1..9b43967d 100644 --- a/var/www/templates/search.html +++ b/var/www/templates/search.html @@ -115,7 +115,7 @@
- Totalling {{ num_res }} items + Totalling {{ num_res }} results related to paste content
@@ -266,6 +266,7 @@ // clear data by removing html, body, head tags. prevent dark modal background stack bug. var cleared_data = data.split("")[1].split("")[0]; $("#mymodalbody").html(cleared_data); + setTimeout(function() { $('#tableDup').DataTable(); }, 150); var button = $(''); button.tooltip(); diff --git a/var/www/templates/show_saved_paste.html b/var/www/templates/show_saved_paste.html index 6ab209ca..ef955bfe 100644 --- a/var/www/templates/show_saved_paste.html +++ b/var/www/templates/show_saved_paste.html @@ -61,6 +61,7 @@ Hash type Paste info + Date Path @@ -69,6 +70,7 @@ {{ hashtype_list[i] }} Similarity: {{ simil_list[i] }}% + {{ date_list[i] }} {{ dup_path }} {% set i = i + 1 %} From 224fbc8084735a444e10f5edb832e3b242eea24d Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 24 Nov 2016 16:58:32 +0100 Subject: [PATCH 23/37] Better handle stuck modules. Differentiate between not running and no info and tries to restart stuck ones. --- bin/ModuleInformation.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index df07bf14..7520f695 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -36,11 +36,11 @@ command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{ def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) for line in p.stdout: + print line splittedLine = line.split() if 'python2' in splittedLine: return int(splittedLine[0]) - else: - return None + return None def clearRedisModuleInfo(): for k in server.keys("MODULE_*"): @@ -87,7 +87,9 @@ def kill_module(module): p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) else: print 'killing failed!' - time.sleep(7) + else: + print 'Module does not exist' + time.sleep(5) if __name__ == "__main__": @@ -120,6 +122,7 @@ if __name__ == "__main__": lastTime = datetime.datetime.now() module_file_array = set() + no_info_modules = {} path_allmod = os.path.join(os.environ['AIL_HOME'], 'doc/all_modules.txt') with open(path_allmod, 'r') as module_file: for line in module_file: @@ -158,7 +161,19 @@ if __name__ == "__main__": for curr_queue in module_file_array: if curr_queue not in all_queue: - printarray3.append([curr_queue, "Not running"]) + printarray3.append([curr_queue, "Not running"]) + else: + if len(list(server.smembers('MODULE_TYPE_'+curr_queue))) == 0: + if curr_queue not in no_info_modules: + no_info_modules[curr_queue] = int(time.time()) + printarray3.append([curr_queue, "No data"]) + else: + #If no info since long time, try to kill + if int(time.time()) - no_info_modules[curr_queue] > threshold_stucked_module: + kill_module(curr_queue) + no_info_modules[curr_queue] = int(time.time()) + printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(threshold_stucked_module - (int(time.time()) - no_info_modules[curr_queue])) + "s"]) + printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) From a3255d168cdbb66c3569bbc0801ac57a66398eeb Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 25 Nov 2016 11:54:16 +0100 Subject: [PATCH 24/37] ModuleInfo correctly handle CurveManageTopSets, Changed queue coloring in dashboard, ModuleInformation now have a history of executed command and better handle module killing. --- bin/CurveManageTopSets.py | 18 ++++++ bin/ModuleInformation.py | 89 ++++++++++++++++++++++++---- var/www/Flask_server.py | 3 +- var/www/static/js/indexjavascript.js | 4 +- var/www/templates/index.html | 1 + 5 files changed, 100 insertions(+), 15 deletions(-) diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 8f316333..f4aacd94 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -17,6 +17,7 @@ Requirements import redis import time +import datetime import copy from pubsublogger import publisher from packages import lib_words @@ -87,6 +88,11 @@ def manage_top_set(): for elem in array_month: server_term.zadd(top_termFreq_setName_month[0], float(elem[1]), elem[0]) + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) + value = str(timestamp) + ", " + "-" + r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) + print "refreshed module" + if __name__ == '__main__': @@ -105,6 +111,18 @@ if __name__ == '__main__': cfg = ConfigParser.ConfigParser() cfg.read(configfile) + + # For Module Manager + r_temp = redis.StrictRedis( + host=cfg.get('RedisPubSub', 'host'), + port=cfg.getint('RedisPubSub', 'port'), + db=cfg.getint('RedisPubSub', 'db')) + + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) + value = str(timestamp) + ", " + "-" + r_temp.set("MODULE_"+ "CurveManageTopSets" + "_" + str(os.getpid()), value) + r_temp.sadd("MODULE_TYPE_"+ "CurveManageTopSets" , str(os.getpid())) + server_term = redis.StrictRedis( host=cfg.get("Redis_Level_DB_TermFreq", "host"), port=cfg.getint("Redis_Level_DB_TermFreq", "port"), diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index 7520f695..d874898b 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -26,12 +26,16 @@ from terminaltables import AsciiTable import textwrap # CONFIG VARIABLES -threshold_stucked_module = 60*60*1 #1 hour +threshold_stucked_module = 60*10*1 #1 hour +kill_retry_threshold = 60 #1m log_filename = "../logs/moduleInfo.log" command_search_pid = "ps a -o pid,cmd | grep {}" command_search_name = "ps a -o pid,cmd | grep {}" command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" +printarrayGlob = [None]*14 +printarrayGlob.insert(0, ["Time", "Module", "PID", "Action"]) +lastTimeKillCommand = {} def getPid(module): p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) @@ -45,6 +49,9 @@ def getPid(module): def clearRedisModuleInfo(): for k in server.keys("MODULE_*"): server.delete(k) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, "*", "-", "Cleared redis module info"]) + printarrayGlob.pop() def cleanRedis(): for k in server.keys("MODULE_TYPE_*"): @@ -60,36 +67,76 @@ def cleanRedis(): if not flag_pid_valid: print flag_pid_valid, 'cleaning', pid, 'in', k server.srem(k, pid) - time.sleep(5) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, moduleName, pid, "Cleared invalid pid in " + k]) + printarrayGlob.pop() + #time.sleep(5) -def kill_module(module): +def kill_module(module, pid): print '' print '-> trying to kill module:', module - pid = getPid(module) + if pid is None: + print 'pid was None' + printarrayGlob.insert(1, [0, module, pid, "PID was None"]) + printarrayGlob.pop() + pid = getPid(module) + else: #Verify that the pid is at least in redis + if server.exists("MODULE_"+module+"_"+str(pid)) == 0: + return + + lastTimeKillCommand[pid] = int(time.time()) if pid is not None: - os.kill(pid, signal.SIGUSR1) + try: + os.kill(pid, signal.SIGUSR1) + except OSError: + print pid, 'already killed' + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Already killed"]) + printarrayGlob.pop() + return time.sleep(1) if getPid(module) is None: print module, 'has been killed' print 'restarting', module, '...' p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) + printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"]) + printarrayGlob.pop() + printarrayGlob.pop() else: print 'killing failed, retrying...' - time.sleep(3) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killing #1 failed."]) + printarrayGlob.pop() + + time.sleep(1) os.kill(pid, signal.SIGUSR1) time.sleep(1) if getPid(module) is None: print module, 'has been killed' print 'restarting', module, '...' p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killed"]) + printarrayGlob.insert(1, [inst_time, module, "?", "Restarted"]) + printarrayGlob.pop() + printarrayGlob.pop() else: print 'killing failed!' + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed!"]) + printarrayGlob.pop() else: print 'Module does not exist' - time.sleep(5) + inst_time = datetime.datetime.fromtimestamp(int(time.time())) + printarrayGlob.insert(1, [inst_time, module, pid, "Killing failed, module not found"]) + printarrayGlob.pop() + #time.sleep(5) + cleanRedis() if __name__ == "__main__": @@ -110,6 +157,8 @@ if __name__ == "__main__": cfg = ConfigParser.ConfigParser() cfg.read(configfile) + threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") + # REDIS # server = redis.StrictRedis( host=cfg.get("Redis_Queues", "host"), @@ -128,6 +177,8 @@ if __name__ == "__main__": for line in module_file: module_file_array.add(line[:-1]) + cleanRedis() + while True: all_queue = set() @@ -151,8 +202,12 @@ if __name__ == "__main__": if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module: log = open(log_filename, 'a') log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n") - if args.autokill == 1: - kill_module(queue) + try: + last_kill_try = time.time() - lastTimeKillCommand[moduleNum] + except KeyError: + last_kill_try = kill_retry_threshold+1 + if args.autokill == 1 and last_kill_try > kill_retry_threshold : + kill_module(queue, int(moduleNum)) printarray1.append([str(queue), str(moduleNum), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) @@ -170,13 +225,13 @@ if __name__ == "__main__": else: #If no info since long time, try to kill if int(time.time()) - no_info_modules[curr_queue] > threshold_stucked_module: - kill_module(curr_queue) + kill_module(curr_queue, None) no_info_modules[curr_queue] = int(time.time()) printarray3.append([curr_queue, "Stuck or idle, restarting in " + str(threshold_stucked_module - (int(time.time()) - no_info_modules[curr_queue])) + "s"]) - printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) - printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + printarray1.sort(lambda x,y: cmp(x[0], y[0]), reverse=False) + printarray2.sort(lambda x,y: cmp(x[0], y[0]), reverse=False) printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) printarray3.insert(0,["Queue", "State"]) @@ -219,11 +274,21 @@ if __name__ == "__main__": t3 = AsciiTable(printarray3, title="Not running queues") t3.column_max_width(1) + printarray4 = [] + for elem in printarrayGlob: + if elem is not None: + printarray4.append(elem) + + t4 = AsciiTable(printarray4, title="Last actions") + t4.column_max_width(1) + print t1.table print '\n' print t2.table print '\n' print t3.table + print '\n' + print t4.table if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: lastTime = datetime.datetime.now() diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index fcd67a21..4746117e 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -448,7 +448,8 @@ def get_more_search_result(): @app.route("/") def index(): default_minute = cfg.get("Flask", "minute_processed_paste") - return render_template("index.html", default_minute = default_minute) + threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") + return render_template("index.html", default_minute = default_minute, threshold_stucked_module=threshold_stucked_module) @app.route("/monitoring/") diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index a289f5ae..8d50ea9d 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -259,9 +259,9 @@ function create_queue_table() { // - j=1: queueLength // - j=2: LastProcessedPasteTime // - j=3: Number of the module belonging in the same category - if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) + if (parseInt(glob_tabvar.row1[i][2]) > window.threshold_stucked_module && parseInt(glob_tabvar.row1[i][1]) > 2) tr.className += " danger"; - else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) + else if (parseInt(glob_tabvar.row1[i][1]) == 0) tr.className += " warning"; else tr.className += " success"; diff --git a/var/www/templates/index.html b/var/www/templates/index.html index 66c38a2c..74b45c01 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -20,6 +20,7 @@ From 1abba4dcf98ad26b776d4d515d2c00c3dc46907c Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 8 Dec 2016 08:44:10 +0100 Subject: [PATCH 29/37] Added support of re-plotting the plotted terms --- var/www/templates/terms_plot_tool.html | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/var/www/templates/terms_plot_tool.html b/var/www/templates/terms_plot_tool.html index fc5ab017..0205a89f 100644 --- a/var/www/templates/terms_plot_tool.html +++ b/var/www/templates/terms_plot_tool.html @@ -72,7 +72,7 @@
- Date: + Date:
@@ -252,26 +252,28 @@ function addData() { } -function replot(duration) { - console.log(plotted_terms); +function replot() { graph_data = []; + promises = []; for(i=0; i From 570324060e7e02e49027a670e44ef58d89a104dc Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 8 Dec 2016 09:13:31 +0100 Subject: [PATCH 30/37] terms top_sets correctly supports blacklisted terms --- bin/CurveManageTopSets.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bin/CurveManageTopSets.py b/bin/CurveManageTopSets.py index 8f316333..979df7ca 100755 --- a/bin/CurveManageTopSets.py +++ b/bin/CurveManageTopSets.py @@ -44,13 +44,14 @@ def manage_top_set(): startDate = datetime.datetime.now() startDate = startDate.replace(hour=0, minute=0, second=0, microsecond=0) startDate = calendar.timegm(startDate.timetuple()) + blacklist_size = int(server_term.scard(BlackListTermsSet_Name)) dico = {} - # Retreive top data (2*max_card) from days sets + # Retreive top data (max_card + blacklist_size) from days sets for timestamp in range(startDate, startDate - top_termFreq_setName_month[1]*oneDay, -oneDay): curr_set = top_termFreq_setName_day[0] + str(timestamp) - array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality*2) + array_top_day = server_term.zrevrangebyscore(curr_set, '+inf', '-inf', withscores=True, start=0, num=top_term_freq_max_set_cardinality+blacklist_size) for word, value in array_top_day: if word not in server_term.smembers(BlackListTermsSet_Name): From 73d4f9e082ba000893a183cc546e9c74496f3d9a Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 8 Dec 2016 10:05:07 +0100 Subject: [PATCH 31/37] Webstats should correctly updates top_progression_zset (Not fully tested because not enough data. Will be tested latter) --- bin/WebStats.py | 62 ++++++++++++++++++++++++++++++++----------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/bin/WebStats.py b/bin/WebStats.py index d8ff0876..1c41b64d 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -38,35 +38,55 @@ def get_date_range(num_day): date_list.append(date.substract_day(i)) return date_list +# Compute the progression for one keyword +def compute_progression_word(keyword): + date_range = get_date_range(num_day) + # check if this keyword is eligible for progression + keyword_total_sum = 0 + value_list = [] + for date in date_range: # get value up to date_range + curr_value = server.hget(keyword, date) + value_list.append(int(curr_value if curr_value is not None else 0)) + keyword_total_sum += int(curr_value) if curr_value is not None else 0 + oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division + + # The progression is based on the ratio: value[i] / value[i-1] + keyword_increase = 0 + value_list_reversed = value_list[:] + value_list_reversed.reverse() + for i in range(1, len(value_list_reversed)): + divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1 + keyword_increase += value_list_reversed[i] / divisor + + return (keyword_increase, keyword_total_sum) + + +''' + recompute the set top_progression zset + - Compute the current field progression + - re-compute the current progression for each first 2*max_set_cardinality fields in the top_progression_zset +''' def compute_progression(server, field_name, num_day, url_parsed): - redis_progression_name = 'top_progression_'+field_name - redis_progression_name_set = 'top_progression_'+field_name+'_set' + redis_progression_name_set = "z_top_progression_"+field_name keyword = url_parsed[field_name] if keyword is not None: - date_range = get_date_range(num_day) - # check if this keyword is eligible for progression - keyword_total_sum = 0 - value_list = [] - for date in date_range: # get value up to date_range - curr_value = server.hget(keyword, date) - value_list.append(int(curr_value if curr_value is not None else 0)) - keyword_total_sum += int(curr_value) if curr_value is not None else 0 - oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division + #compute the progression of the current word + keyword_increase, keyword_total_sum = compute_progression_word(keyword) - # The progression is based on the ratio: value[i] / value[i-1] - keyword_increase = 0 - value_list_reversed = value_list[:] - value_list_reversed.reverse() - for i in range(1, len(value_list_reversed)): - divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1 - keyword_increase += value_list_reversed[i] / divisor + #re-compute the progression of 2*max_set_cardinality + current_top = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*max_set_cardinality) + for word, value in array_top_day: + word_inc, word_tot_sum = compute_progression_word(word) + server.zrem(redis_progression_name_set, word) + if (word_tot_sum > threshold_total_sum) and (word_inc > threshold_increase): + server.zadd(redis_progression_name_set, float(word_inc), word) - # filter + # filter before adding if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase): - - server.zadd("z_top_progression_"+field_name, float(keyword_increase), keyword) + server.zadd(redis_progression_name_set, float(keyword_increase), keyword) + if __name__ == '__main__': From 8daa72789e09549ba567688b528b7cd85ded52a7 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 9 Dec 2016 08:46:37 +0100 Subject: [PATCH 32/37] Splitted Flask_server into module related to website sections --- var/www/Flask_browsepastes.py | 100 ++++ var/www/Flask_config.py | 65 +++ var/www/Flask_dashboard.py | 65 +++ var/www/Flask_search.py | 124 +++++ var/www/Flask_sentiment.py | 137 ++++++ var/www/Flask_server.py | 821 +------------------------------ var/www/Flask_showpaste.py | 114 +++++ var/www/Flask_terms.py | 240 +++++++++ var/www/Flask_trendingcharts.py | 73 +++ var/www/Flask_trendingmodules.py | 111 +++++ 10 files changed, 1045 insertions(+), 805 deletions(-) create mode 100644 var/www/Flask_browsepastes.py create mode 100644 var/www/Flask_config.py create mode 100644 var/www/Flask_dashboard.py create mode 100644 var/www/Flask_search.py create mode 100644 var/www/Flask_sentiment.py create mode 100644 var/www/Flask_showpaste.py create mode 100644 var/www/Flask_terms.py create mode 100644 var/www/Flask_trendingcharts.py create mode 100644 var/www/Flask_trendingmodules.py diff --git a/var/www/Flask_browsepastes.py b/var/www/Flask_browsepastes.py new file mode 100644 index 00000000..e5aa5b84 --- /dev/null +++ b/var/www/Flask_browsepastes.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import json +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +max_preview_char = Flask_config.max_preview_char +max_preview_modal = Flask_config.max_preview_modal +r_serv_db = Flask_config.r_serv_db +# ============ FUNCTIONS ============ + +def getPastebyType(server, module_name): + all_path = [] + for path in server.smembers('WARNING_'+module_name): + all_path.append(path) + return all_path + + +def event_stream_getImportantPasteByModule(module_name): + index = 0 + all_pastes_list = getPastebyType(r_serv_db, module_name) + for path in all_pastes_list: + index += 1 + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + data = {} + data["module"] = module_name + data["index"] = index + data["path"] = path + data["content"] = content[0:content_range] + data["linenum"] = paste.get_lines_info()[0] + data["date"] = curr_date + data["char_to_display"] = max_preview_modal + data["finished"] = True if index == len(all_pastes_list) else False + yield 'retry: 100000\ndata: %s\n\n' % json.dumps(data) #retry to avoid reconnection of the browser + +# ============ ROUTES ============ + +@app.route("/browseImportantPaste/", methods=['GET']) +def browseImportantPaste(): + module_name = request.args.get('moduleName') + return render_template("browse_important_paste.html") + + +@app.route("/importantPasteByModule/", methods=['GET']) +def importantPasteByModule(): + module_name = request.args.get('moduleName') + + all_content = [] + paste_date = [] + paste_linenum = [] + all_path = [] + allPastes = getPastebyType(r_serv_db, module_name) + + for path in allPastes[0:10]: + all_path.append(path) + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " ")) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_linenum.append(paste.get_lines_info()[0]) + + if len(allPastes) > 10: + finished = "" + else: + finished = "display: none;" + + return render_template("important_paste_by_module.html", + moduleName=module_name, + all_path=all_path, + content=all_content, + paste_date=paste_date, + paste_linenum=paste_linenum, + char_to_display=max_preview_modal, + finished=finished) + +@app.route("/_getImportantPasteByModule") +def getImportantPasteByModule(): + module_name = request.args.get('moduleName') + return flask.Response(event_stream_getImportantPasteByModule(module_name), mimetype="text/event-stream") + + diff --git a/var/www/Flask_config.py b/var/www/Flask_config.py new file mode 100644 index 00000000..c15e4dca --- /dev/null +++ b/var/www/Flask_config.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask global variables shared accross modules +''' +import ConfigParser +import redis +import os + +# FLASK # +app = None + +# CONFIG # +configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') +if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + +cfg = ConfigParser.ConfigParser() +cfg.read(configfile) + + +# REDIS # +r_serv = redis.StrictRedis( + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) + +r_serv_log = redis.StrictRedis( + host=cfg.get("Redis_Log", "host"), + port=cfg.getint("Redis_Log", "port"), + db=cfg.getint("Redis_Log", "db")) + +r_serv_charts = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_Trending", "host"), + port=cfg.getint("Redis_Level_DB_Trending", "port"), + db=cfg.getint("Redis_Level_DB_Trending", "db")) + +r_serv_db = redis.StrictRedis( + host=cfg.get("Redis_Level_DB", "host"), + port=cfg.getint("Redis_Level_DB", "port"), + db=cfg.getint("Redis_Level_DB", "db")) + +r_serv_sentiment = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_Sentiment", "host"), + port=cfg.getint("Redis_Level_DB_Sentiment", "port"), + db=cfg.getint("Redis_Level_DB_Sentiment", "db")) + +r_serv_term = redis.StrictRedis( + host=cfg.get("Redis_Level_DB_TermFreq", "host"), + port=cfg.getint("Redis_Level_DB_TermFreq", "port"), + db=cfg.getint("Redis_Level_DB_TermFreq", "db")) + +r_serv_pasteName = redis.StrictRedis( + host=cfg.get("Redis_Paste_Name", "host"), + port=cfg.getint("Redis_Paste_Name", "port"), + db=cfg.getint("Redis_Paste_Name", "db")) + +# VARIABLES # +max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip +max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal + +tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value diff --git a/var/www/Flask_dashboard.py b/var/www/Flask_dashboard.py new file mode 100644 index 00000000..b6bcb219 --- /dev/null +++ b/var/www/Flask_dashboard.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the dashboard page +''' +import flask +from flask import Flask, render_template, jsonify, request + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv = Flask_config.r_serv +r_serv_log = Flask_config.r_serv_log +# ============ FUNCTIONS ============ + +def event_stream(): + pubsub = r_serv_log.pubsub() + pubsub.psubscribe("Script" + '.*') + for msg in pubsub.listen(): + level = msg['channel'].split('.')[1] + if msg['type'] == 'pmessage' and level != "DEBUG": + yield 'data: %s\n\n' % json.dumps(msg) + +def get_queues(r): + # We may want to put the llen in a pipeline to do only one query. + newData = [] + for queue, card in r.hgetall("queues").iteritems(): + key = "MODULE_" + queue + "_" + keySet = "MODULE_TYPE_" + queue + + for moduleNum in r.smembers(keySet): + + value = r.get(key + str(moduleNum)) + if value is not None: + timestamp, path = value.split(", ") + if timestamp is not None: + startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) + processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] + seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) + newData.append( (queue, card, seconds, moduleNum) ) + else: + newData.append( (queue, cards, 0, moduleNum) ) + + return newData + +# ============ ROUTES ============ + +@app.route("/_logs") +def logs(): + return flask.Response(event_stream(), mimetype="text/event-stream") + + +@app.route("/_stuff", methods=['GET']) +def stuff(): + return jsonify(row1=get_queues(r_serv)) + + +@app.route("/") +def index(): + default_minute = cfg.get("Flask", "minute_processed_paste") + threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") + return render_template("index.html", default_minute = default_minute, threshold_stucked_module=threshold_stucked_module) diff --git a/var/www/Flask_search.py b/var/www/Flask_search.py new file mode 100644 index 00000000..b5c60898 --- /dev/null +++ b/var/www/Flask_search.py @@ -0,0 +1,124 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import json +import os +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_pasteName = Flask_config.r_serv_pasteName +max_preview_char = Flask_config.max_preview_char +max_preview_modal = Flask_config.max_preview_modal +# ============ FUNCTIONS ============ + + +# ============ ROUTES ============ + +@app.route("/search", methods=['POST']) +def search(): + query = request.form['query'] + q = [] + q.append(query) + r = [] #complete path + c = [] #preview of the paste content + paste_date = [] + paste_size = [] + num_elem_to_get = 50 + + # Search filename + for path in r_serv_pasteName.smembers(q[0]): + r.append(path) + paste = Paste.Paste(path) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + c.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) + + # Search full line + from whoosh import index + from whoosh.fields import Schema, TEXT, ID + schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) + + indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) + ix = index.open_dir(indexpath) + from whoosh.qparser import QueryParser + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(" ".join(q)) + results = searcher.search_page(query, 1, pagelen=num_elem_to_get) + for x in results: + r.append(x.items()[0][1]) + paste = Paste.Paste(x.items()[0][1]) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + c.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + paste_date.append(curr_date) + paste_size.append(paste._get_p_size()) + results = searcher.search(query) + num_res = len(results) + + return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal, num_res=num_res) + + +@app.route("/get_more_search_result", methods=['POST']) +def get_more_search_result(): + query = request.form['query'] + q = [] + q.append(query) + page_offset = int(request.form['page_offset']) + num_elem_to_get = 50 + + path_array = [] + preview_array = [] + date_array = [] + size_array = [] + + from whoosh import index + from whoosh.fields import Schema, TEXT, ID + schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) + + indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) + ix = index.open_dir(indexpath) + from whoosh.qparser import QueryParser + with ix.searcher() as searcher: + query = QueryParser("content", ix.schema).parse(" ".join(q)) + results = searcher.search_page(query, page_offset, num_elem_to_get) + for x in results: + path_array.append(x.items()[0][1]) + paste = Paste.Paste(x.items()[0][1]) + content = paste.get_p_content().decode('utf8', 'ignore') + content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 + preview_array.append(content[0:content_range]) + curr_date = str(paste._get_p_date()) + curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] + date_array.append(curr_date) + size_array.append(paste._get_p_size()) + to_return = {} + to_return["path_array"] = path_array + to_return["preview_array"] = preview_array + to_return["date_array"] = date_array + to_return["size_array"] = size_array + print "len(path_array)="+str(len(path_array)) + if len(path_array) < num_elem_to_get: #pagelength + to_return["moreData"] = False + else: + to_return["moreData"] = True + + return jsonify(to_return) + + diff --git a/var/www/Flask_sentiment.py b/var/www/Flask_sentiment.py new file mode 100644 index 00000000..275cce39 --- /dev/null +++ b/var/www/Flask_sentiment.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import calendar +from Date import Date +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_charts = Flask_config.r_serv_charts +r_serv_sentiment = Flask_config.r_serv_sentiment +# ============ FUNCTIONS ============ + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + +# ============ ROUTES ============ + +@app.route("/sentiment_analysis_trending/") +def sentiment_analysis_trending(): + return render_template("sentiment_analysis_trending.html") + + +@app.route("/sentiment_analysis_getplotdata/", methods=['GET']) +def sentiment_analysis_getplotdata(): + # Get the top providers based on number of pastes + oneHour = 60*60 + sevenDays = oneHour*24*7 + dateStart = datetime.datetime.now() + dateStart = dateStart.replace(minute=0, second=0, microsecond=0) + dateStart_timestamp = calendar.timegm(dateStart.timetuple()) + + getAllProviders = request.args.get('getProviders') + provider = request.args.get('provider') + allProvider = request.args.get('all') + if getAllProviders == 'True': + if allProvider == "True": + range_providers = r_serv_charts.smembers('all_provider_set') + return jsonify(list(range_providers)) + else: + range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) + # if empty, get yesterday top providers + range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers + # if still empty, takes from all providers + if range_providers == []: + print 'today provider empty' + range_providers = r_serv_charts.smembers('all_provider_set') + return jsonify(list(range_providers)) + + elif provider is not None: + to_return = {} + + cur_provider_name = provider + '_' + list_date = {} + for cur_timestamp in range(int(dateStart_timestamp), int(dateStart_timestamp)-sevenDays-oneHour, -oneHour): + cur_set_name = cur_provider_name + str(cur_timestamp) + + list_value = [] + for cur_id in r_serv_sentiment.smembers(cur_set_name): + cur_value = r_serv_sentiment.get(cur_id) + list_value.append(cur_value) + list_date[cur_timestamp] = list_value + to_return[provider] = list_date + + return jsonify(to_return) + return "Bad request" + + + +@app.route("/sentiment_analysis_plot_tool/") +def sentiment_analysis_plot_tool(): + return render_template("sentiment_analysis_plot_tool.html") + + + +@app.route("/sentiment_analysis_plot_tool_getdata/", methods=['GET']) +def sentiment_analysis_plot_tool_getdata(): + getProviders = request.args.get('getProviders') + + if getProviders == 'True': + providers = [] + for cur_provider in r_serv_charts.smembers('all_provider_set'): + providers.append(cur_provider) + return jsonify(providers) + + else: + query = request.args.get('query') + query = query.split(',') + Qdate = request.args.get('Qdate') + + date1 = (Qdate.split('-')[0]).split('.') + date1 = datetime.date(int(date1[2]), int(date1[1]), int(date1[0])) + + date2 = (Qdate.split('-')[1]).split('.') + date2 = datetime.date(int(date2[2]), int(date2[1]), int(date2[0])) + + timestamp1 = calendar.timegm(date1.timetuple()) + timestamp2 = calendar.timegm(date2.timetuple()) + + oneHour = 60*60 + oneDay = oneHour*24 + + to_return = {} + for cur_provider in query: + list_date = {} + cur_provider_name = cur_provider + '_' + for cur_timestamp in range(int(timestamp1), int(timestamp2)+oneDay, oneHour): + cur_set_name = cur_provider_name + str(cur_timestamp) + + list_value = [] + for cur_id in r_serv_sentiment.smembers(cur_set_name): + cur_value = r_serv_sentiment.get(cur_id) + list_value.append(cur_value) + list_date[cur_timestamp] = list_value + to_return[cur_provider] = list_date + + return jsonify(to_return) + + diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 0eb04b47..4ece0c75 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -15,231 +15,30 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) import Paste from Date import Date +# Import config +import Flask_config + # CONFIG # -tlsh_to_percent = 1000.0 #Use to display the estimated percentage instead of a raw value +cfg = Flask_config.cfg -configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') -if not os.path.exists(configfile): - raise Exception('Unable to find the configuration file. \ - Did you set environment variables? \ - Or activate the virtualenv.') - -cfg = ConfigParser.ConfigParser() -cfg.read(configfile) - -max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip -max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal - -# REDIS # -r_serv = redis.StrictRedis( - host=cfg.get("Redis_Queues", "host"), - port=cfg.getint("Redis_Queues", "port"), - db=cfg.getint("Redis_Queues", "db")) - -r_serv_log = redis.StrictRedis( - host=cfg.get("Redis_Log", "host"), - port=cfg.getint("Redis_Log", "port"), - db=cfg.getint("Redis_Log", "db")) - -r_serv_charts = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Trending", "host"), - port=cfg.getint("Redis_Level_DB_Trending", "port"), - db=cfg.getint("Redis_Level_DB_Trending", "db")) - -r_serv_db = redis.StrictRedis( - host=cfg.get("Redis_Level_DB", "host"), - port=cfg.getint("Redis_Level_DB", "port"), - db=cfg.getint("Redis_Level_DB", "db")) - -r_serv_sentiment = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_Sentiment", "host"), - port=cfg.getint("Redis_Level_DB_Sentiment", "port"), - db=cfg.getint("Redis_Level_DB_Sentiment", "db")) - -r_serv_term = redis.StrictRedis( - host=cfg.get("Redis_Level_DB_TermFreq", "host"), - port=cfg.getint("Redis_Level_DB_TermFreq", "port"), - db=cfg.getint("Redis_Level_DB_TermFreq", "db")) - -r_serv_pasteName = redis.StrictRedis( - host=cfg.get("Redis_Paste_Name", "host"), - port=cfg.getint("Redis_Paste_Name", "port"), - db=cfg.getint("Redis_Paste_Name", "db")) - - -app = Flask(__name__, static_url_path='/static/') - - -def event_stream(): - pubsub = r_serv_log.pubsub() - pubsub.psubscribe("Script" + '.*') - for msg in pubsub.listen(): - level = msg['channel'].split('.')[1] - if msg['type'] == 'pmessage' and level != "DEBUG": - yield 'data: %s\n\n' % json.dumps(msg) - -def event_stream_getImportantPasteByModule(module_name): - index = 0 - all_pastes_list = getPastebyType(r_serv_db, module_name) - for path in all_pastes_list: - index += 1 - paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - data = {} - data["module"] = module_name - data["index"] = index - data["path"] = path - data["content"] = content[0:content_range] - data["linenum"] = paste.get_lines_info()[0] - data["date"] = curr_date - data["char_to_display"] = max_preview_modal - data["finished"] = True if index == len(all_pastes_list) else False - yield 'retry: 100000\ndata: %s\n\n' % json.dumps(data) #retry to avoid reconnection of the browser - - -def get_queues(r): - # We may want to put the llen in a pipeline to do only one query. - newData = [] - for queue, card in r.hgetall("queues").iteritems(): - key = "MODULE_" + queue + "_" - keySet = "MODULE_TYPE_" + queue - - for moduleNum in r.smembers(keySet): - - value = r.get(key + str(moduleNum)) - if value is not None: - timestamp, path = value.split(", ") - if timestamp is not None: - startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) - processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] - seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) - newData.append( (queue, card, seconds, moduleNum) ) - else: - newData.append( (queue, cards, 0, moduleNum) ) - - return newData +Flask_config.app = Flask(__name__, static_url_path='/static/') +app = Flask_config.app +# import routes and functions from modules +import Flask_dashboard +import Flask_trendingcharts +import Flask_trendingmodules +import Flask_browsepastes +import Flask_sentiment +import Flask_terms +import Flask_search +import Flask_showpaste def list_len(s): return len(s) app.jinja_env.filters['list_len'] = list_len -def showpaste(content_range): - requested_path = request.args.get('paste', '') - paste = Paste.Paste(requested_path) - p_date = str(paste._get_p_date()) - p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] - p_source = paste.p_source - p_encoding = paste._get_p_encoding() - p_language = paste._get_p_language() - p_size = paste.p_size - p_mime = paste.p_mime - p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content().decode('utf-8', 'ignore') - p_duplicate_full_list = json.loads(paste._get_p_duplicate()) - p_duplicate_list = [] - p_simil_list = [] - p_date_list = [] - p_hashtype_list = [] - - - for dup_list in p_duplicate_full_list: - if dup_list[0] == "tlsh": - dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100) - else: - dup_list[2] = int(dup_list[2]) - - p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) - - # Combine multiple duplicate paste name and format for display - new_dup_list = [] - dup_list_removed = [] - for dup_list_index in range(0, len(p_duplicate_full_list)): - if dup_list_index in dup_list_removed: - continue - indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]] - hash_types = [] - comp_vals = [] - for i in indices: - hash_types.append(p_duplicate_full_list[i][0].encode('utf8')) - comp_vals.append(p_duplicate_full_list[i][2]) - dup_list_removed.append(i) - - hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types) - comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) - if len(p_duplicate_full_list[dup_list_index]) > 3: - try: - date_paste = str(int(p_duplicate_full_list[dup_list_index][3])) - date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8] - except ValueError: - date_paste = str(p_duplicate_full_list[dup_list_index][3]) - else: - date_paste = "No date available" - new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste]) - - # Create the list to pass to the webpage - for dup_list in new_dup_list: - hash_type, path, simil_percent, date_paste = dup_list - p_duplicate_list.append(path) - p_simil_list.append(simil_percent) - p_hashtype_list.append(hash_type) - p_date_list.append(date_paste) - - if content_range != 0: - p_content = p_content[0:content_range] - - - return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list) - -def getPastebyType(server, module_name): - all_path = [] - for path in server.smembers('WARNING_'+module_name): - all_path.append(path) - return all_path - - -def get_date_range(num_day): - curr_date = datetime.date.today() - date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) - date_list = [] - - for i in range(0, num_day+1): - date_list.append(date.substract_day(i)) - return date_list - -# Iterate over elements in the module provided and return the today data or the last data -# return format: [('passed_days', num_of_passed_days), ('elem_name1', elem_value1), ('elem_name2', elem_value2)]] -def get_top_relevant_data(server, module_name): - days = 0 - for date in get_date_range(15): - redis_progression_name_set = 'top_'+ module_name +'_set_' + date - member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) - if len(member_set) == 0: #No data for this date - days += 1 - else: - member_set.insert(0, ("passed_days", days)) - return member_set - - -def Term_getValueOverRange(word, startDate, num_day): - passed_days = 0 - oneDay = 60*60*24 - to_return = [] - curr_to_return = 0 - for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, word) - curr_to_return += int(value) if value is not None else 0 - for i in num_day: - if passed_days == i-1: - to_return.append(curr_to_return) - passed_days += 1 - return to_return - - # ========= CACHE CONTROL ======== @app.after_request def add_header(response): @@ -251,595 +50,7 @@ def add_header(response): response.headers['Cache-Control'] = 'public, max-age=0' return response -# ============ ROUTES ============ - -@app.route("/_logs") -def logs(): - return flask.Response(event_stream(), mimetype="text/event-stream") - - -@app.route("/_stuff", methods=['GET']) -def stuff(): - return jsonify(row1=get_queues(r_serv)) - -@app.route("/_progressionCharts", methods=['GET']) -def progressionCharts(): - attribute_name = request.args.get('attributeName') - trending_name = request.args.get('trendingName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value = r_serv_charts.hget(attribute_name, date) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) - bar_values.insert(0, attribute_name) - return jsonify(bar_values) - - else: - redis_progression_name = "z_top_progression_" + trending_name - keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10) - return jsonify(keyw_value) - -@app.route("/_moduleCharts", methods=['GET']) -def modulesCharts(): - keyword_name = request.args.get('keywordName') - module_name = request.args.get('moduleName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) - bar_values.insert(0, keyword_name) - return jsonify(bar_values) - - else: - member_set = get_top_relevant_data(r_serv_charts, module_name) - if len(member_set) == 0: - member_set.append(("No relevant data", int(100))) - return jsonify(member_set) - - -@app.route("/_providersChart", methods=['GET']) -def providersChart(): - keyword_name = request.args.get('keywordName') - module_name = request.args.get('moduleName') - bar_requested = True if request.args.get('bar') == "true" else False - - if (bar_requested): - num_day = int(request.args.get('days')) - bar_values = [] - - date_range = get_date_range(num_day) - # Retreive all data from the last num_day - for date in date_range: - curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) - curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) - curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) - if module_name == "size": - curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0) - else: - curr_value = float(curr_value_num if curr_value_num is not None else 0.0) - - bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], curr_value]) - bar_values.insert(0, keyword_name) - return jsonify(bar_values) - - else: - #redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' - redis_provider_name_set = 'top_avg_size_set_' if module_name == "size" else 'providers_set_' - redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0] - - member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) - # Member set is a list of (value, score) pairs - if len(member_set) == 0: - member_set.append(("No relevant data", float(100))) - return jsonify(member_set) - - - -@app.route("/search", methods=['POST']) -def search(): - query = request.form['query'] - q = [] - q.append(query) - r = [] #complete path - c = [] #preview of the paste content - paste_date = [] - paste_size = [] - num_elem_to_get = 50 - - # Search filename - for path in r_serv_pasteName.smembers(q[0]): - r.append(path) - paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) - - # Search full line - from whoosh import index - from whoosh.fields import Schema, TEXT, ID - schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) - - indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) - ix = index.open_dir(indexpath) - from whoosh.qparser import QueryParser - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search_page(query, 1, pagelen=num_elem_to_get) - for x in results: - r.append(x.items()[0][1]) - paste = Paste.Paste(x.items()[0][1]) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - c.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_size.append(paste._get_p_size()) - results = searcher.search(query) - num_res = len(results) - - return render_template("search.html", r=r, c=c, query=request.form['query'], paste_date=paste_date, paste_size=paste_size, char_to_display=max_preview_modal, num_res=num_res) - - -@app.route("/get_more_search_result", methods=['POST']) -def get_more_search_result(): - query = request.form['query'] - q = [] - q.append(query) - page_offset = int(request.form['page_offset']) - num_elem_to_get = 50 - - path_array = [] - preview_array = [] - date_array = [] - size_array = [] - - from whoosh import index - from whoosh.fields import Schema, TEXT, ID - schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT) - - indexpath = os.path.join(os.environ['AIL_HOME'], cfg.get("Indexer", "path")) - ix = index.open_dir(indexpath) - from whoosh.qparser import QueryParser - with ix.searcher() as searcher: - query = QueryParser("content", ix.schema).parse(" ".join(q)) - results = searcher.search_page(query, page_offset, num_elem_to_get) - for x in results: - path_array.append(x.items()[0][1]) - paste = Paste.Paste(x.items()[0][1]) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - preview_array.append(content[0:content_range]) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - date_array.append(curr_date) - size_array.append(paste._get_p_size()) - to_return = {} - to_return["path_array"] = path_array - to_return["preview_array"] = preview_array - to_return["date_array"] = date_array - to_return["size_array"] = size_array - print "len(path_array)="+str(len(path_array)) - if len(path_array) < num_elem_to_get: #pagelength - to_return["moreData"] = False - else: - to_return["moreData"] = True - - return jsonify(to_return) - - -@app.route("/") -def index(): - default_minute = cfg.get("Flask", "minute_processed_paste") - threshold_stucked_module = cfg.getint("Module_ModuleInformation", "threshold_stucked_module") - return render_template("index.html", default_minute = default_minute, threshold_stucked_module=threshold_stucked_module) - - -@app.route("/monitoring/") -def monitoring(): - for queue in r_serv.smembers("queues"): - return render_template("Queue_live_Monitoring.html", last_value=queue) - - -@app.route("/wordstrending/") -def wordstrending(): - default_display = cfg.get("Flask", "default_display") - return render_template("Wordstrending.html", default_display = default_display) - - -@app.route("/protocolstrending/") -def protocolstrending(): - default_display = cfg.get("Flask", "default_display") - return render_template("Protocolstrending.html", default_display = default_display) - - -@app.route("/trending/") -def trending(): - default_display = cfg.get("Flask", "default_display") - return render_template("Trending.html", default_display = default_display) - -@app.route("/browseImportantPaste/", methods=['GET']) -def browseImportantPaste(): - module_name = request.args.get('moduleName') - return render_template("browse_important_paste.html") - - -@app.route("/importantPasteByModule/", methods=['GET']) -def importantPasteByModule(): - module_name = request.args.get('moduleName') - - all_content = [] - paste_date = [] - paste_linenum = [] - all_path = [] - allPastes = getPastebyType(r_serv_db, module_name) - - for path in allPastes[0:10]: - all_path.append(path) - paste = Paste.Paste(path) - content = paste.get_p_content().decode('utf8', 'ignore') - content_range = max_preview_char if len(content)>max_preview_char else len(content)-1 - all_content.append(content[0:content_range].replace("\"", "\'").replace("\r", " ").replace("\n", " ")) - curr_date = str(paste._get_p_date()) - curr_date = curr_date[0:4]+'/'+curr_date[4:6]+'/'+curr_date[6:] - paste_date.append(curr_date) - paste_linenum.append(paste.get_lines_info()[0]) - - if len(allPastes) > 10: - finished = "" - else: - finished = "display: none;" - - return render_template("important_paste_by_module.html", moduleName=module_name, all_path=all_path, content=all_content, paste_date=paste_date, paste_linenum=paste_linenum, char_to_display=max_preview_modal, finished=finished) - -@app.route("/_getImportantPasteByModule") -def getImportantPasteByModule(): - module_name = request.args.get('moduleName') - return flask.Response(event_stream_getImportantPasteByModule(module_name), mimetype="text/event-stream") - -@app.route("/moduletrending/") -def moduletrending(): - return render_template("Moduletrending.html") - -@app.route("/sentiment_analysis_trending/") -def sentiment_analysis_trending(): - return render_template("sentiment_analysis_trending.html") - - -@app.route("/sentiment_analysis_getplotdata/", methods=['GET']) -def sentiment_analysis_getplotdata(): - # Get the top providers based on number of pastes - oneHour = 60*60 - sevenDays = oneHour*24*7 - dateStart = datetime.datetime.now() - dateStart = dateStart.replace(minute=0, second=0, microsecond=0) - dateStart_timestamp = calendar.timegm(dateStart.timetuple()) - - getAllProviders = request.args.get('getProviders') - provider = request.args.get('provider') - allProvider = request.args.get('all') - if getAllProviders == 'True': - if allProvider == "True": - range_providers = r_serv_charts.smembers('all_provider_set') - return jsonify(list(range_providers)) - else: - range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(0)[0], '+inf', '-inf', start=0, num=8) - # if empty, get yesterday top providers - range_providers = r_serv_charts.zrevrangebyscore('providers_set_'+ get_date_range(1)[1], '+inf', '-inf', start=0, num=8) if range_providers == [] else range_providers - # if still empty, takes from all providers - if range_providers == []: - print 'today provider empty' - range_providers = r_serv_charts.smembers('all_provider_set') - return jsonify(range_providers) - - elif provider is not None: - to_return = {} - - cur_provider_name = provider + '_' - list_date = {} - for cur_timestamp in range(int(dateStart_timestamp), int(dateStart_timestamp)-sevenDays-oneHour, -oneHour): - cur_set_name = cur_provider_name + str(cur_timestamp) - - list_value = [] - for cur_id in r_serv_sentiment.smembers(cur_set_name): - cur_value = r_serv_sentiment.get(cur_id) - list_value.append(cur_value) - list_date[cur_timestamp] = list_value - to_return[provider] = list_date - - return jsonify(to_return) - return "Bad request" - - - -@app.route("/sentiment_analysis_plot_tool/") -def sentiment_analysis_plot_tool(): - return render_template("sentiment_analysis_plot_tool.html") - - - -@app.route("/sentiment_analysis_plot_tool_getdata/", methods=['GET']) -def sentiment_analysis_plot_tool_getdata(): - getProviders = request.args.get('getProviders') - - if getProviders == 'True': - providers = [] - for cur_provider in r_serv_charts.smembers('all_provider_set'): - providers.append(cur_provider) - return jsonify(providers) - - else: - query = request.args.get('query') - query = query.split(',') - Qdate = request.args.get('Qdate') - - date1 = (Qdate.split('-')[0]).split('.') - date1 = datetime.date(int(date1[2]), int(date1[1]), int(date1[0])) - - date2 = (Qdate.split('-')[1]).split('.') - date2 = datetime.date(int(date2[2]), int(date2[1]), int(date2[0])) - - timestamp1 = calendar.timegm(date1.timetuple()) - timestamp2 = calendar.timegm(date2.timetuple()) - - oneHour = 60*60 - oneDay = oneHour*24 - - to_return = {} - for cur_provider in query: - list_date = {} - cur_provider_name = cur_provider + '_' - for cur_timestamp in range(int(timestamp1), int(timestamp2)+oneDay, oneHour): - cur_set_name = cur_provider_name + str(cur_timestamp) - - list_value = [] - for cur_id in r_serv_sentiment.smembers(cur_set_name): - cur_value = r_serv_sentiment.get(cur_id) - list_value.append(cur_value) - list_date[cur_timestamp] = list_value - to_return[cur_provider] = list_date - - return jsonify(to_return) - - -@app.route("/terms_management/") -def terms_management(): - TrackedTermsSet_Name = "TrackedSetTermSet" - BlackListTermsSet_Name = "BlackListSetTermSet" - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - track_list = [] - track_list_values = [] - track_list_num_of_paste = [] - for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): - track_list.append(tracked_term) - value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) - - term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) - - set_paste_name = "tracked_" + tracked_term - track_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(term_date) - track_list_values.append(value_range) - - - black_list = [] - for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): - term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - black_list.append([blacked_term, term_date]) - - return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) - - -@app.route("/terms_management_query_paste/") -def terms_management_query_paste(): - term = request.args.get('term') - TrackedTermsSet_Name = "TrackedSetTermSet" - paste_info = [] - - set_paste_name = "tracked_" + term - track_list_path = r_serv_term.smembers(set_paste_name) - - for path in track_list_path: - paste = Paste.Paste(path) - p_date = str(paste._get_p_date()) - p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] - p_source = paste.p_source - p_encoding = paste._get_p_encoding() - p_size = paste.p_size - p_mime = paste.p_mime - p_lineinfo = paste.get_lines_info() - p_content = paste.get_p_content().decode('utf-8', 'ignore') - if p_content != 0: - p_content = p_content[0:400] - paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) - - return jsonify(paste_info) - - -@app.route("/terms_management_query/") -def terms_management_query(): - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - term = request.args.get('term') - section = request.args.get('section') - - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) - - if section == "followTerm": - term_date = r_serv_term.hget(TrackedTermsDate_Name, term) - elif section == "blacklistTerm": - term_date = r_serv_term.hget(BlackListTermsDate_Name, term) - - term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" - value_range.append(str(term_date)) - return jsonify(value_range) - - -@app.route("/terms_management_action/", methods=['GET']) -def terms_management_action(): - TrackedTermsSet_Name = "TrackedSetTermSet" - TrackedTermsDate_Name = "TrackedTermDate" - BlackListTermsDate_Name = "BlackListTermDate" - BlackListTermsSet_Name = "BlackListSetTermSet" - - today = datetime.datetime.now() - today = today.replace(microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - - section = request.args.get('section') - action = request.args.get('action') - term = request.args.get('term') - if action is None or term is None: - return "None" - else: - if section == "followTerm": - if action == "add": - r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) - r_serv_term.hset(TrackedTermsDate_Name, term, today_timestamp) - else: - r_serv_term.srem(TrackedTermsSet_Name, term.lower()) - elif section == "blacklistTerm": - if action == "add": - r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) - r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) - else: - r_serv_term.srem(BlackListTermsSet_Name, term.lower()) - else: - return "None" - - to_return = {} - to_return["section"] = section - to_return["action"] = action - to_return["term"] = term - return jsonify(to_return) - - - -@app.route("/terms_plot_tool/") -def terms_plot_tool(): - term = request.args.get('term') - if term is not None: - return render_template("terms_plot_tool.html", term=term) - else: - return render_template("terms_plot_tool.html", term="") - - -@app.route("/terms_plot_tool_data/") -def terms_plot_tool_data(): - oneDay = 60*60*24 - range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; - range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) - range_start = calendar.timegm(range_start.timetuple()) - range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; - range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) - range_end = calendar.timegm(range_end.timetuple()) - term = request.args.get('term') - - if term is None: - return "None" - else: - value_range = [] - for timestamp in range(range_start, range_end+oneDay, oneDay): - value = r_serv_term.hget(timestamp, term) - curr_value_range = int(value) if value is not None else 0 - value_range.append([timestamp, curr_value_range]) - value_range.insert(0,term) - return jsonify(value_range) - - -@app.route("/terms_plot_top/") -def terms_plot_top(): - return render_template("terms_plot_top.html") - - -@app.route("/terms_plot_top_data/") -def terms_plot_top_data(): - oneDay = 60*60*24 - today = datetime.datetime.now() - today = today.replace(hour=0, minute=0, second=0, microsecond=0) - today_timestamp = calendar.timegm(today.timetuple()) - - set_day = "TopTermFreq_set_day_" + str(today_timestamp) - set_week = "TopTermFreq_set_week"; - set_month = "TopTermFreq_set_month"; - - the_set = request.args.get('set') - num_day = int(request.args.get('num_day')) - if the_set is None: - return "None" - else: - to_return = [] - if the_set == "TopTermFreq_set_day": - the_set += "_" + str(today_timestamp) - - for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): - position = {} - position['day'] = r_serv_term.zrevrank(set_day, term) - position['day'] = position['day']+1 if position['day'] is not None else "<20" - position['week'] = r_serv_term.zrevrank(set_week, term) - position['week'] = position['week']+1 if position['week'] is not None else "<20" - position['month'] = r_serv_term.zrevrank(set_month, term) - position['month'] = position['month']+1 if position['month'] is not None else "<20" - value_range = [] - for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): - value = r_serv_term.hget(timestamp, term) - curr_value_range = int(value) if value is not None else 0 - value_range.append([timestamp, curr_value_range]) - - to_return.append([term, value_range, tot_value, position]) - - return jsonify(to_return) - - - -@app.route("/showsavedpaste/") #completely shows the paste in a new tab -def showsavedpaste(): - return showpaste(0) - - -@app.route("/showpreviewpaste/") -def showpreviewpaste(): - num = request.args.get('num', '') - return "|num|"+num+"|num|"+showpaste(max_preview_modal) - - -@app.route("/getmoredata/") -def getmoredata(): - requested_path = request.args.get('paste', '') - paste = Paste.Paste(requested_path) - p_content = paste.get_p_content().decode('utf-8', 'ignore') - to_return = p_content[max_preview_modal-1:] - return to_return - +# ============ MAIN ============ if __name__ == "__main__": app.run(host='0.0.0.0', port=7000, threaded=True) diff --git a/var/www/Flask_showpaste.py b/var/www/Flask_showpaste.py new file mode 100644 index 00000000..71e2a4e2 --- /dev/null +++ b/var/www/Flask_showpaste.py @@ -0,0 +1,114 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import json +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_pasteName = Flask_config.r_serv_pasteName +max_preview_char = Flask_config.max_preview_char +max_preview_modal = Flask_config.max_preview_modal +tlsh_to_percent = Flask_config.tlsh_to_percent +# ============ FUNCTIONS ============ + +def showpaste(content_range): + requested_path = request.args.get('paste', '') + paste = Paste.Paste(requested_path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_language = paste._get_p_language() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + p_duplicate_full_list = json.loads(paste._get_p_duplicate()) + p_duplicate_list = [] + p_simil_list = [] + p_date_list = [] + p_hashtype_list = [] + + + for dup_list in p_duplicate_full_list: + if dup_list[0] == "tlsh": + dup_list[2] = int(((tlsh_to_percent - float(dup_list[2])) / tlsh_to_percent)*100) + else: + dup_list[2] = int(dup_list[2]) + + p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True) + + # Combine multiple duplicate paste name and format for display + new_dup_list = [] + dup_list_removed = [] + for dup_list_index in range(0, len(p_duplicate_full_list)): + if dup_list_index in dup_list_removed: + continue + indices = [i for i, x in enumerate(p_duplicate_full_list) if x[1] == p_duplicate_full_list[dup_list_index][1]] + hash_types = [] + comp_vals = [] + for i in indices: + hash_types.append(p_duplicate_full_list[i][0].encode('utf8')) + comp_vals.append(p_duplicate_full_list[i][2]) + dup_list_removed.append(i) + + hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types) + comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals) + if len(p_duplicate_full_list[dup_list_index]) > 3: + try: + date_paste = str(int(p_duplicate_full_list[dup_list_index][3])) + date_paste = date_paste[0:4]+"-"+date_paste[4:6]+"-"+date_paste[6:8] + except ValueError: + date_paste = str(p_duplicate_full_list[dup_list_index][3]) + else: + date_paste = "No date available" + new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste]) + + # Create the list to pass to the webpage + for dup_list in new_dup_list: + hash_type, path, simil_percent, date_paste = dup_list + p_duplicate_list.append(path) + p_simil_list.append(simil_percent) + p_hashtype_list.append(hash_type) + p_date_list.append(date_paste) + + if content_range != 0: + p_content = p_content[0:content_range] + + + return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list) + + + +# ============ ROUTES ============ + +@app.route("/showsavedpaste/") #completely shows the paste in a new tab +def showsavedpaste(): + return showpaste(0) + + +@app.route("/showpreviewpaste/") +def showpreviewpaste(): + num = request.args.get('num', '') + return "|num|"+num+"|num|"+showpaste(max_preview_modal) + + +@app.route("/getmoredata/") +def getmoredata(): + requested_path = request.args.get('paste', '') + paste = Paste.Paste(requested_path) + p_content = paste.get_p_content().decode('utf-8', 'ignore') + to_return = p_content[max_preview_modal-1:] + return to_return + diff --git a/var/www/Flask_terms.py b/var/www/Flask_terms.py new file mode 100644 index 00000000..f5416ddc --- /dev/null +++ b/var/www/Flask_terms.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import calendar +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_term = Flask_config.r_serv_term +# ============ FUNCTIONS ============ + +def Term_getValueOverRange(word, startDate, num_day): + passed_days = 0 + oneDay = 60*60*24 + to_return = [] + curr_to_return = 0 + for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): + value = r_serv_term.hget(timestamp, word) + curr_to_return += int(value) if value is not None else 0 + for i in num_day: + if passed_days == i-1: + to_return.append(curr_to_return) + passed_days += 1 + return to_return + + +# ============ ROUTES ============ + +@app.route("/terms_management/") +def terms_management(): + TrackedTermsSet_Name = "TrackedSetTermSet" + BlackListTermsSet_Name = "BlackListSetTermSet" + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + track_list = [] + track_list_values = [] + track_list_num_of_paste = [] + for tracked_term in r_serv_term.smembers(TrackedTermsSet_Name): + track_list.append(tracked_term) + value_range = Term_getValueOverRange(tracked_term, today_timestamp, [1, 7, 31]) + + term_date = r_serv_term.hget(TrackedTermsDate_Name, tracked_term) + + set_paste_name = "tracked_" + tracked_term + track_list_num_of_paste.append(r_serv_term.scard(set_paste_name)) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + value_range.append(term_date) + track_list_values.append(value_range) + + + black_list = [] + for blacked_term in r_serv_term.smembers(BlackListTermsSet_Name): + term_date = r_serv_term.hget(BlackListTermsDate_Name, blacked_term) + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + black_list.append([blacked_term, term_date]) + + return render_template("terms_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + + +@app.route("/terms_management_query_paste/") +def terms_management_query_paste(): + term = request.args.get('term') + TrackedTermsSet_Name = "TrackedSetTermSet" + paste_info = [] + + set_paste_name = "tracked_" + term + track_list_path = r_serv_term.smembers(set_paste_name) + + for path in track_list_path: + paste = Paste.Paste(path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if p_content != 0: + p_content = p_content[0:400] + paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) + + return jsonify(paste_info) + + +@app.route("/terms_management_query/") +def terms_management_query(): + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + term = request.args.get('term') + section = request.args.get('section') + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + value_range = Term_getValueOverRange(term, today_timestamp, [1, 7, 31]) + + if section == "followTerm": + term_date = r_serv_term.hget(TrackedTermsDate_Name, term) + elif section == "blacklistTerm": + term_date = r_serv_term.hget(BlackListTermsDate_Name, term) + + term_date = datetime.datetime.utcfromtimestamp(int(term_date)) if term_date is not None else "No date recorded" + value_range.append(str(term_date)) + return jsonify(value_range) + + +@app.route("/terms_management_action/", methods=['GET']) +def terms_management_action(): + TrackedTermsSet_Name = "TrackedSetTermSet" + TrackedTermsDate_Name = "TrackedTermDate" + BlackListTermsDate_Name = "BlackListTermDate" + BlackListTermsSet_Name = "BlackListSetTermSet" + + today = datetime.datetime.now() + today = today.replace(microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + + section = request.args.get('section') + action = request.args.get('action') + term = request.args.get('term') + if action is None or term is None: + return "None" + else: + if section == "followTerm": + if action == "add": + r_serv_term.sadd(TrackedTermsSet_Name, term.lower()) + r_serv_term.hset(TrackedTermsDate_Name, term, today_timestamp) + else: + r_serv_term.srem(TrackedTermsSet_Name, term.lower()) + elif section == "blacklistTerm": + if action == "add": + r_serv_term.sadd(BlackListTermsSet_Name, term.lower()) + r_serv_term.hset(BlackListTermsDate_Name, term, today_timestamp) + else: + r_serv_term.srem(BlackListTermsSet_Name, term.lower()) + else: + return "None" + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["term"] = term + return jsonify(to_return) + + + +@app.route("/terms_plot_tool/") +def terms_plot_tool(): + term = request.args.get('term') + if term is not None: + return render_template("terms_plot_tool.html", term=term) + else: + return render_template("terms_plot_tool.html", term="") + + +@app.route("/terms_plot_tool_data/") +def terms_plot_tool_data(): + oneDay = 60*60*24 + range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; + range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) + range_start = calendar.timegm(range_start.timetuple()) + range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; + range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) + range_end = calendar.timegm(range_end.timetuple()) + term = request.args.get('term') + + if term is None: + return "None" + else: + value_range = [] + for timestamp in range(range_start, range_end+oneDay, oneDay): + value = r_serv_term.hget(timestamp, term) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + value_range.insert(0,term) + return jsonify(value_range) + + +@app.route("/terms_plot_top/") +def terms_plot_top(): + return render_template("terms_plot_top.html") + + +@app.route("/terms_plot_top_data/") +def terms_plot_top_data(): + oneDay = 60*60*24 + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + set_day = "TopTermFreq_set_day_" + str(today_timestamp) + set_week = "TopTermFreq_set_week"; + set_month = "TopTermFreq_set_month"; + + the_set = request.args.get('set') + num_day = int(request.args.get('num_day')) + if the_set is None: + return "None" + else: + to_return = [] + if the_set == "TopTermFreq_set_day": + the_set += "_" + str(today_timestamp) + + for term, tot_value in r_serv_term.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): + position = {} + position['day'] = r_serv_term.zrevrank(set_day, term) + position['day'] = position['day']+1 if position['day'] is not None else "<20" + position['week'] = r_serv_term.zrevrank(set_week, term) + position['week'] = position['week']+1 if position['week'] is not None else "<20" + position['month'] = r_serv_term.zrevrank(set_month, term) + position['month'] = position['month']+1 if position['month'] is not None else "<20" + value_range = [] + for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): + value = r_serv_term.hget(timestamp, term) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + + to_return.append([term, value_range, tot_value, position]) + + return jsonify(to_return) + + diff --git a/var/www/Flask_trendingcharts.py b/var/www/Flask_trendingcharts.py new file mode 100644 index 00000000..fdb1a3d4 --- /dev/null +++ b/var/www/Flask_trendingcharts.py @@ -0,0 +1,73 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending charts page +''' +import redis +import datetime +from Date import Date +import flask +from flask import Flask, render_template, jsonify, request + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_charts = Flask_config.r_serv_charts +# ============ FUNCTIONS ============ + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + + +# ============ ROUTES ============ + +@app.route("/_progressionCharts", methods=['GET']) +def progressionCharts(): + attribute_name = request.args.get('attributeName') + trending_name = request.args.get('trendingName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(attribute_name, date) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + bar_values.insert(0, attribute_name) + return jsonify(bar_values) + + else: + redis_progression_name = "z_top_progression_" + trending_name + keyw_value = r_serv_charts.zrevrangebyscore(redis_progression_name, '+inf', '-inf', withscores=True, start=0, num=10) + return jsonify(keyw_value) + +@app.route("/wordstrending/") +def wordstrending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Wordstrending.html", default_display = default_display) + + +@app.route("/protocolstrending/") +def protocolstrending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Protocolstrending.html", default_display = default_display) + + +@app.route("/trending/") +def trending(): + default_display = cfg.get("Flask", "default_display") + return render_template("Trending.html", default_display = default_display) + + diff --git a/var/www/Flask_trendingmodules.py b/var/www/Flask_trendingmodules.py new file mode 100644 index 00000000..73cef7f5 --- /dev/null +++ b/var/www/Flask_trendingmodules.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import flask +from flask import Flask, render_template, jsonify, request + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_charts = Flask_config.r_serv_charts +# ============ FUNCTIONS ============ + +# Iterate over elements in the module provided and return the today data or the last data +# return format: [('passed_days', num_of_passed_days), ('elem_name1', elem_value1), ('elem_name2', elem_value2)]] +def get_top_relevant_data(server, module_name): + days = 0 + for date in get_date_range(15): + redis_progression_name_set = 'top_'+ module_name +'_set_' + date + member_set = server.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True) + if len(member_set) == 0: #No data for this date + days += 1 + else: + member_set.insert(0, ("passed_days", days)) + return member_set + + +def get_date_range(num_day): + curr_date = datetime.date.today() + date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) + date_list = [] + + for i in range(0, num_day+1): + date_list.append(date.substract_day(i)) + return date_list + +# ============ ROUTES ============ + +@app.route("/_moduleCharts", methods=['GET']) +def modulesCharts(): + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value = r_serv_charts.hget(date, module_name+'-'+keyword_name) + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)]) + bar_values.insert(0, keyword_name) + return jsonify(bar_values) + + else: + member_set = get_top_relevant_data(r_serv_charts, module_name) + if len(member_set) == 0: + member_set.append(("No relevant data", int(100))) + return jsonify(member_set) + + +@app.route("/_providersChart", methods=['GET']) +def providersChart(): + keyword_name = request.args.get('keywordName') + module_name = request.args.get('moduleName') + bar_requested = True if request.args.get('bar') == "true" else False + + if (bar_requested): + num_day = int(request.args.get('days')) + bar_values = [] + + date_range = get_date_range(num_day) + # Retreive all data from the last num_day + for date in date_range: + curr_value_size = r_serv_charts.hget(keyword_name+'_'+'size', date) + curr_value_num = r_serv_charts.hget(keyword_name+'_'+'num', date) + curr_value_size_avg = r_serv_charts.hget(keyword_name+'_'+'avg', date) + if module_name == "size": + curr_value = float(curr_value_size_avg if curr_value_size_avg is not None else 0) + else: + curr_value = float(curr_value_num if curr_value_num is not None else 0.0) + + bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], curr_value]) + bar_values.insert(0, keyword_name) + return jsonify(bar_values) + + else: + #redis_provider_name_set = 'top_size_set' if module_name == "size" else 'providers_set' + redis_provider_name_set = 'top_avg_size_set_' if module_name == "size" else 'providers_set_' + redis_provider_name_set = redis_provider_name_set + get_date_range(0)[0] + + member_set = r_serv_charts.zrevrangebyscore(redis_provider_name_set, '+inf', '-inf', withscores=True, start=0, num=8) + # Member set is a list of (value, score) pairs + if len(member_set) == 0: + member_set.append(("No relevant data", float(100))) + return jsonify(member_set) + + +@app.route("/moduletrending/") +def moduletrending(): + return render_template("Moduletrending.html") + + From 0054be5bab79856627f4d6b6cf31e406ad3410a1 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 9 Dec 2016 08:50:36 +0100 Subject: [PATCH 33/37] Added missing json dependency in dashboard --- var/www/Flask_dashboard.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/var/www/Flask_dashboard.py b/var/www/Flask_dashboard.py index b6bcb219..0c975f47 100644 --- a/var/www/Flask_dashboard.py +++ b/var/www/Flask_dashboard.py @@ -4,6 +4,8 @@ ''' Flask functions and routes for the dashboard page ''' +import json + import flask from flask import Flask, render_template, jsonify, request From d30f3ca6c7828b937a3ce86df0ef8270c1652ec1 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Fri, 9 Dec 2016 13:53:57 +0100 Subject: [PATCH 34/37] Added missing depency datetime in dashboard --- var/www/Flask_dashboard.py | 1 + 1 file changed, 1 insertion(+) diff --git a/var/www/Flask_dashboard.py b/var/www/Flask_dashboard.py index 0c975f47..79307f9c 100644 --- a/var/www/Flask_dashboard.py +++ b/var/www/Flask_dashboard.py @@ -6,6 +6,7 @@ ''' import json +import datetime import flask from flask import Flask, render_template, jsonify, request From 9df12cdae828133e4e3efd08219764932daa09af Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 22 Dec 2016 08:57:45 +0100 Subject: [PATCH 35/37] Added sub-flask in a directory --- var/www/Flask_server.py | 1 + var/www/{ => Flasks}/Flask_browsepastes.py | 0 var/www/{ => Flasks}/Flask_config.py | 0 var/www/Flasks/Flask_corpus.py | 219 ++++++++++++++++++ var/www/{ => Flasks}/Flask_dashboard.py | 0 var/www/{ => Flasks}/Flask_search.py | 0 var/www/{ => Flasks}/Flask_sentiment.py | 0 var/www/{ => Flasks}/Flask_showpaste.py | 0 var/www/{ => Flasks}/Flask_terms.py | 0 var/www/{ => Flasks}/Flask_trendingcharts.py | 0 var/www/{ => Flasks}/Flask_trendingmodules.py | 0 11 files changed, 220 insertions(+) rename var/www/{ => Flasks}/Flask_browsepastes.py (100%) rename var/www/{ => Flasks}/Flask_config.py (100%) create mode 100644 var/www/Flasks/Flask_corpus.py rename var/www/{ => Flasks}/Flask_dashboard.py (100%) rename var/www/{ => Flasks}/Flask_search.py (100%) rename var/www/{ => Flasks}/Flask_sentiment.py (100%) rename var/www/{ => Flasks}/Flask_showpaste.py (100%) rename var/www/{ => Flasks}/Flask_terms.py (100%) rename var/www/{ => Flasks}/Flask_trendingcharts.py (100%) rename var/www/{ => Flasks}/Flask_trendingmodules.py (100%) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 4ece0c75..2ee00295 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -12,6 +12,7 @@ import flask import os import sys sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +sys.path.append('./Flasks/') import Paste from Date import Date diff --git a/var/www/Flask_browsepastes.py b/var/www/Flasks/Flask_browsepastes.py similarity index 100% rename from var/www/Flask_browsepastes.py rename to var/www/Flasks/Flask_browsepastes.py diff --git a/var/www/Flask_config.py b/var/www/Flasks/Flask_config.py similarity index 100% rename from var/www/Flask_config.py rename to var/www/Flasks/Flask_config.py diff --git a/var/www/Flasks/Flask_corpus.py b/var/www/Flasks/Flask_corpus.py new file mode 100644 index 00000000..7805e66e --- /dev/null +++ b/var/www/Flasks/Flask_corpus.py @@ -0,0 +1,219 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +''' + Flask functions and routes for the trending modules page +''' +import redis +import datetime +import calendar +import flask +from flask import Flask, render_template, jsonify, request + +import Paste + +# ============ VARIABLES ============ +import Flask_config + +app = Flask_config.app +cfg = Flask_config.cfg +r_serv_corpus = Flask_config.r_serv_corpus +# ============ FUNCTIONS ============ + +def Corpus_getValueOverRange(word, startDate, num_day): + passed_days = 0 + oneDay = 60*60*24 + to_return = [] + curr_to_return = 0 + for timestamp in range(startDate, startDate - max(num_day)*oneDay, -oneDay): + value = r_serv_corpus.hget(timestamp, word) + curr_to_return += int(value) if value is not None else 0 + for i in num_day: + if passed_days == i-1: + to_return.append(curr_to_return) + passed_days += 1 + return to_return + + +# ============ ROUTES ============ + +@app.route("/corpus_management/") +def corpus_management(): + TrackedCorpusSet_Name = "TrackedSetCorpusSet" + TrackedCorpusDate_Name = "TrackedCorpusDate" + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + track_list = [] + track_list_values = [] + track_list_num_of_paste = [] + for tracked_corpus in r_serv_corpus.smembers(TrackedCorpusSet_Name): + track_list.append(tracked_corpus) + value_range = Corpus_getValueOverRange(tracked_corpus, today_timestamp, [1, 7, 31]) + + corpus_date = r_serv_corpus.hget(TrackedCorpusDate_Name, tracked_corpus) + + set_paste_name = "tracked_" + tracked_corpus + track_list_num_of_paste.append(r_serv_corpus.scard(set_paste_name)) + corpus_date = datetime.datetime.utcfromtimestamp(int(corpus_date)) if corpus_date is not None else "No date recorded" + value_range.append(corpus_date) + track_list_values.append(value_range) + + + return render_template("corpus_management.html", black_list=black_list, track_list=track_list, track_list_values=track_list_values, track_list_num_of_paste=track_list_num_of_paste) + + +@app.route("/corpus_management_query_paste/") +def corpus_management_query_paste(): + corpus = request.args.get('corpus') + TrackedCorpusSet_Name = "TrackedSetCorpusSet" + paste_info = [] + + set_paste_name = "tracked_" + corpus + track_list_path = r_serv_corpus.smembers(set_paste_name) + + for path in track_list_path: + paste = Paste.Paste(path) + p_date = str(paste._get_p_date()) + p_date = p_date[6:]+'/'+p_date[4:6]+'/'+p_date[0:4] + p_source = paste.p_source + p_encoding = paste._get_p_encoding() + p_size = paste.p_size + p_mime = paste.p_mime + p_lineinfo = paste.get_lines_info() + p_content = paste.get_p_content().decode('utf-8', 'ignore') + if p_content != 0: + p_content = p_content[0:400] + paste_info.append({"path": path, "date": p_date, "source": p_source, "encoding": p_encoding, "size": p_size, "mime": p_mime, "lineinfo": p_lineinfo, "content": p_content}) + + return jsonify(paste_info) + + +@app.route("/corpus_management_query/") +def corpus_management_query(): + TrackedCorpusDate_Name = "TrackedCorpusDate" + corpus = request.args.get('corpus') + + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + value_range = corpus_getValueOverRange(corpus, today_timestamp, [1, 7, 31]) + + corpus_date = r_serv_corpus.hget(TrackedCorpusDate_Name, corpus) + + corpus_date = datetime.datetime.utcfromtimestamp(int(corpus_date)) if corpus_date is not None else "No date recorded" + value_range.append(str(corpus_date)) + return jsonify(value_range) + + +@app.route("/corpus_management_action/", methods=['GET']) +def corpus_management_action(): + TrackedCorpusSet_Name = "TrackedSetCorpusSet" + TrackedCorpusDate_Name = "TrackedCorpusDate" + + today = datetime.datetime.now() + today = today.replace(microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + + section = request.args.get('section') + action = request.args.get('action') + corpus = request.args.get('corpus') + if action is None or corpus is None: + return "None" + else: + if section == "followCorpus": + if action == "add": + r_serv_corpus.sadd(TrackedCorpusSet_Name, corpus.lower()) + r_serv_corpus.hset(TrackedCorpusDate_Name, corpus, today_timestamp) + else: + r_serv_corpus.srem(TrackedCorpusSet_Name, corpus.lower()) + else: + return "None" + + to_return = {} + to_return["section"] = section + to_return["action"] = action + to_return["corpus"] = corpus + return jsonify(to_return) + + + +@app.route("/corpus_plot_tool/") +def corpus_plot_tool(): + corpus = request.args.get('corpus') + if corpus is not None: + return render_template("corpus_plot_tool.html", corpus=corpus) + else: + return render_template("corpus_plot_tool.html", corpus="") + + +@app.route("/corpus_plot_tool_data/") +def corpus_plot_tool_data(): + oneDay = 60*60*24 + range_start = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_start')))) if request.args.get('range_start') is not None else 0; + range_start = range_start.replace(hour=0, minute=0, second=0, microsecond=0) + range_start = calendar.timegm(range_start.timetuple()) + range_end = datetime.datetime.utcfromtimestamp(int(float(request.args.get('range_end')))) if request.args.get('range_end') is not None else 0; + range_end = range_end.replace(hour=0, minute=0, second=0, microsecond=0) + range_end = calendar.timegm(range_end.timetuple()) + corpus = request.args.get('corpus') + + if corpus is None: + return "None" + else: + value_range = [] + for timestamp in range(range_start, range_end+oneDay, oneDay): + value = r_serv_corpus.hget(timestamp, corpus) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + value_range.insert(0,corpus) + return jsonify(value_range) + + +@app.route("/corpus_plot_top/") +def corpus_plot_top(): + return render_template("corpus_plot_top.html") + + +@app.route("/corpus_plot_top_data/") +def corpus_plot_top_data(): + oneDay = 60*60*24 + today = datetime.datetime.now() + today = today.replace(hour=0, minute=0, second=0, microsecond=0) + today_timestamp = calendar.timegm(today.timetuple()) + + set_day = "TopCorpusFreq_set_day_" + str(today_timestamp) + set_week = "TopCorpusFreq_set_week"; + set_month = "TopCorpusFreq_set_month"; + + the_set = request.args.get('set') + num_day = int(request.args.get('num_day')) + if the_set is None: + return "None" + else: + to_return = [] + if the_set == "TopCorpusFreq_set_day": + the_set += "_" + str(today_timestamp) + + for corpus, tot_value in r_serv_corpus.zrevrangebyscore(the_set, '+inf', '-inf', withscores=True, start=0, num=20): + position = {} + position['day'] = r_serv_corpus.zrevrank(set_day, corpus) + position['day'] = position['day']+1 if position['day'] is not None else "<20" + position['week'] = r_serv_corpus.zrevrank(set_week, corpus) + position['week'] = position['week']+1 if position['week'] is not None else "<20" + position['month'] = r_serv_corpus.zrevrank(set_month, corpus) + position['month'] = position['month']+1 if position['month'] is not None else "<20" + value_range = [] + for timestamp in range(today_timestamp, today_timestamp - num_day*oneDay, -oneDay): + value = r_serv_corpus.hget(timestamp, corpus) + curr_value_range = int(value) if value is not None else 0 + value_range.append([timestamp, curr_value_range]) + + to_return.append([corpus, value_range, tot_value, position]) + + return jsonify(to_return) + + diff --git a/var/www/Flask_dashboard.py b/var/www/Flasks/Flask_dashboard.py similarity index 100% rename from var/www/Flask_dashboard.py rename to var/www/Flasks/Flask_dashboard.py diff --git a/var/www/Flask_search.py b/var/www/Flasks/Flask_search.py similarity index 100% rename from var/www/Flask_search.py rename to var/www/Flasks/Flask_search.py diff --git a/var/www/Flask_sentiment.py b/var/www/Flasks/Flask_sentiment.py similarity index 100% rename from var/www/Flask_sentiment.py rename to var/www/Flasks/Flask_sentiment.py diff --git a/var/www/Flask_showpaste.py b/var/www/Flasks/Flask_showpaste.py similarity index 100% rename from var/www/Flask_showpaste.py rename to var/www/Flasks/Flask_showpaste.py diff --git a/var/www/Flask_terms.py b/var/www/Flasks/Flask_terms.py similarity index 100% rename from var/www/Flask_terms.py rename to var/www/Flasks/Flask_terms.py diff --git a/var/www/Flask_trendingcharts.py b/var/www/Flasks/Flask_trendingcharts.py similarity index 100% rename from var/www/Flask_trendingcharts.py rename to var/www/Flasks/Flask_trendingcharts.py diff --git a/var/www/Flask_trendingmodules.py b/var/www/Flasks/Flask_trendingmodules.py similarity index 100% rename from var/www/Flask_trendingmodules.py rename to var/www/Flasks/Flask_trendingmodules.py From faa8f8302de31eaeb8fa680fa7767193e8cfe066 Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 22 Dec 2016 09:28:55 +0100 Subject: [PATCH 36/37] Removed typo in the number of remaining pastes to display in browseImportantPaste --- var/www/Flasks/Flask_browsepastes.py | 4 ++-- .../templates/important_paste_by_module.html | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/var/www/Flasks/Flask_browsepastes.py b/var/www/Flasks/Flask_browsepastes.py index e5aa5b84..b393ab9e 100644 --- a/var/www/Flasks/Flask_browsepastes.py +++ b/var/www/Flasks/Flask_browsepastes.py @@ -79,9 +79,9 @@ def importantPasteByModule(): paste_linenum.append(paste.get_lines_info()[0]) if len(allPastes) > 10: - finished = "" + finished = False else: - finished = "display: none;" + finished = True return render_template("important_paste_by_module.html", moduleName=module_name, diff --git a/var/www/templates/important_paste_by_module.html b/var/www/templates/important_paste_by_module.html index ca98ce59..3a1c1c64 100644 --- a/var/www/templates/important_paste_by_module.html +++ b/var/www/templates/important_paste_by_module.html @@ -29,7 +29,7 @@
-
+

@@ -38,13 +38,15 @@ var json_array = []; var all_data_received = false; var curr_numElem; -var elem_added = 0; -var tot_num_entry = 0; +var elem_added = 10; //10 elements are added by default in the page loading +var tot_num_entry = 10; //10 elements are added by default in the page loading function deploy_source() { var button_load_more_displayed = false; if(typeof(EventSource) !== "undefined" && typeof(source) !== "") { + $("#load_more_json_button1").show(); + $("#load_more_json_button2").show(); var source = new EventSource("{{ url_for('getImportantPasteByModule') }}"+"?moduleName="+moduleName); source.onmessage = function(event) { var feed = jQuery.parseJSON( event.data ); @@ -52,8 +54,6 @@ function deploy_source() { if (feed.index > curr_numElem & feed.module == moduleName) { // Avoid doubling the pastes json_array.push(feed); tot_num_entry++; - $("#load_more_json_button1").show(); - $("#load_more_json_button2").show(); $("#nbr_entry").text(tot_num_entry + " entries available, " + (tot_num_entry - elem_added) + " not displayed"); $("#myTable_"+moduleName).attr('data-numElem', curr_numElem+1); @@ -125,7 +125,14 @@ $(document).ready(function(){ search_table = $('#myTable_'+moduleName).DataTable({ "order": [[ 2, "desc" ]] }); - deploy_source(); + if( "{{ finished }}" == "True"){ + $("#load_more_json_button1").hide(); + $("#load_more_json_button2").hide(); + $("#nbr_entry").hide(); + $("#loading_gif_browse").hide(); + } else { + deploy_source(); + } }); From 7438f16c631184c631e9a5ad4e4eac861850cb8b Mon Sep 17 00:00:00 2001 From: Mokaddem Date: Thu, 22 Dec 2016 10:06:35 +0100 Subject: [PATCH 37/37] Added new config --- bin/packages/config.cfg.sample | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index e74b5da2..566cf22c 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -36,6 +36,9 @@ threshold_duplicate_tlsh = 100 #Minimum size of the paste considered min_paste_size = 0.3 +[Module_ModuleInformation] +#Threshold to deduce if a module is stuck or not, in seconds. +threshold_stucked_module=600 ##### Redis ##### [Redis_Cache]