Merge branch 'statistics-v2' into uptodate-statistics-v2

pull/64/head
Mokaddem 2016-07-21 15:16:03 +02:00
commit 7c4f4a2f79
8 changed files with 300 additions and 70 deletions

View File

@ -15,24 +15,11 @@ from Helper import Process
from pyfaup.faup import Faup
# Config Var
threshold_need_to_look = 50
range_to_look = 10
threshold_to_plot = 1 # 500%
to_plot = set()
clean_frequency = 10 # minutes
threshold_total_sum = 200 # Above this value, a keyword is eligible for a progression
threshold_increase = 1.0 # The percentage representing the keyword occurence since num_day_to_look
max_set_cardinality = 10 # The cardinality of the progression set
num_day_to_look = 5 # the detection of the progression start num_day_to_look in the past
def analyse(server, field_name):
field = url_parsed[field_name]
if field is not None:
prev_score = server.hget(field, date)
if prev_score is not None:
server.hset(field, date, int(prev_score) + 1)
else:
server.hset(field, date, 1)
def analyse_and_progression(server, field_name):
field = url_parsed[field_name]
if field is not None:
prev_score = server.hget(field, date)
@ -44,28 +31,56 @@ def analyse_and_progression(server, field_name):
to_plot.add(field)
else:
server.hset(field, date, 1)
if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6]
server.sadd(domain_set_name, field)
print "added in " + domain_set_name +": "+ field
def get_date_range(num_day):
curr_date = datetime.date.today()
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
date_list = []
def check_for_progression(server, field, date):
previous_data = set()
tot_sum = 0
for i in range(0, range_to_look):
curr_value = server.hget(field, Date(date).substract_day(i))
if curr_value is None: # no further data
break
else:
curr_value = int(curr_value)
previous_data.add(curr_value)
tot_sum += curr_value
if i == 0:
today_val = curr_value
for i in range(0, num_day+1):
date_list.append(date.substract_day(i))
return date_list
print 'totsum=' + str(tot_sum)
print 'div=' + str(tot_sum / today_val)
if tot_sum / today_val >= threshold_to_plot:
return True
else:
return False
def compute_progression(server, field_name, num_day, url_parsed):
redis_progression_name = 'top_progression_'+field_name
redis_progression_name_set = 'top_progression_'+field_name+'_set'
keyword = url_parsed[field_name]
if keyword is not None:
date_range = get_date_range(num_day)
# check if this keyword is eligible for progression
keyword_total_sum = 0
value_list = []
for date in date_range:
curr_value = server.hget(keyword, date)
value_list.append(int(curr_value if curr_value is not None else 0))
keyword_total_sum += int(curr_value) if curr_value is not None else 0
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
keyword_increase = value_list[0] / oldest_value
# filter
if (keyword_total_sum > threshold_total_sum) and (keyword_increase > threshold_increase):
if server.sismember(redis_progression_name_set, keyword): #if keyword is in the set
server.hset(redis_progression_name, keyword, keyword_increase) #update its value
elif (server.scard(redis_progression_name_set) < max_set_cardinality):
server.sadd(redis_progression_name_set, keyword)
else: #not in the set
#Check value for all members
member_set = []
for keyw in server.smembers(redis_progression_name_set):
member_set += (keyw, int(server.hget(redis_progression_name, keyw)))
member_set.sort(key=lambda tup: tup[1])
if member_set[0] < keyword_increase:
#remove min from set and add the new one
server.srem(redis_progression_name_set, member_set[0])
server.sadd(redis_progression_name_set, keyword)
if __name__ == '__main__':
@ -89,18 +104,18 @@ if __name__ == '__main__':
host=p.config.get("Redis_Level_DB", "host"),
port=p.config.get("Redis_Level_DB", "port"),
db=p.config.get("Redis_Level_DB", "db"))
r_serv2 = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Domain", "host"),
port=p.config.get("Redis_Level_DB_Domain", "port"),
db=p.config.get("Redis_Level_DB_Domain", "db"))
r_serv_trend = redis.StrictRedis(
host=p.config.get("Redis_Level_DB_Trending", "host"),
port=p.config.get("Redis_Level_DB_Trending", "port"),
db=p.config.get("Redis_Level_DB_Trending", "db"))
# FILE CURVE SECTION #
csv_path_proto = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "protocolstrending_csv"))
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "protocolsfile"))
p.config.get("Directories", "protocolsfile"))
csv_path_tld = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "tldstrending_csv"))
tldsfile_path = os.path.join(os.environ['AIL_HOME'],
@ -119,26 +134,29 @@ if __name__ == '__main__':
if message is None:
if generate_new_graph:
generate_new_graph = False
print 'Building graph'
today = datetime.date.today()
year = today.year
month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path_proto,
print 'Building protocol graph'
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_proto,
protocolsfile_path, year,
month)
lib_words.create_curve_with_word_file(r_serv1, csv_path_tld,
print 'Building tld graph'
lib_words.create_curve_with_word_file(r_serv_trend, csv_path_tld,
tldsfile_path, year,
month)
lib_words.create_curve_with_list(r_serv2, csv_path_domain,
to_plot, year, month)
print 'Building domain graph'
lib_words.create_curve_from_redis_set(r_serv_trend, csv_path_domain,
"domain", year,
month)
print 'end building'
publisher.debug("{} queue is empty, waiting".format(config_section))
print 'sleeping'
time.sleep(5)
time.sleep(5*60)
continue
else:
@ -147,7 +165,10 @@ if __name__ == '__main__':
url, date = message.split()
faup.decode(url)
url_parsed = faup.get()
analyse(r_serv1, 'scheme') # Scheme analysis
analyse(r_serv1, 'tld') # Tld analysis
analyse_and_progression(r_serv2, 'domain') # Domain analysis
analyse(r_serv_trend, 'scheme', date, url_parsed) #Scheme analysis
analyse(r_serv_trend, 'tld', date, url_parsed) #Tld analysis
analyse(r_serv_trend, 'domain', date, url_parsed) #Domain analysis
compute_progression(r_serv_trend, 'scheme', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'tld', num_day_to_look, url_parsed)
compute_progression(r_serv_trend, 'domain', num_day_to_look, url_parsed)

View File

@ -62,10 +62,10 @@ host = localhost
port = 2016
db = 0
[Redis_Level_DB_Domain]
[Redis_Level_DB_Trending]
host = localhost
port = 2016
db = 3
db = 0
[Redis_Level_DB_Hashs]
host = localhost

View File

@ -88,7 +88,7 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
with open(feederfilename, 'rb') as f:
# words of the files
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' ])
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ])
headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
@ -112,7 +112,7 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
row.append(value)
writer.writerow(row)
def create_curve_with_list(server, csvfilename, to_plot, year, month):
def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
"""Create a csv file used with dygraph.
:param r_serv: -- connexion to redis database
@ -122,15 +122,17 @@ def create_curve_with_list(server, csvfilename, to_plot, year, month):
:param month: -- (integer) The month to process
This function create a .csv file using datas in redis.
It's checking if the words contained in to_plot and
It's checking if the words contained in set_to_plot and
their respectives values by days exists.
"""
first_day = date(year, month, 01)
last_day = date(year, month, calendar.monthrange(year, month)[1])
words = sorted(to_plot)
redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
words = list(server.smembers(redis_set_name))
headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
writer = csv.writer(f)

View File

@ -4,12 +4,14 @@
import redis
import ConfigParser
import json
import datetime
from flask import Flask, render_template, jsonify, request
import flask
import os
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Paste
from Date import Date
# CONFIG #
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
@ -35,6 +37,11 @@ r_serv_log = redis.StrictRedis(
port=cfg.getint("Redis_Log", "port"),
db=cfg.getint("Redis_Log", "db"))
r_serv_charts = redis.StrictRedis(
host=cfg.get("Redis_Level_DB_Trending", "host"),
port=cfg.getint("Redis_Level_DB_Trending", "port"),
db=cfg.getint("Redis_Level_DB_Trending", "db"))
app = Flask(__name__, static_url_path='/static/')
@ -100,6 +107,20 @@ def showpaste(content_range):
return render_template("show_saved_paste.html", date=p_date, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list)
def get_date_range(num_day):
curr_date = datetime.date.today()
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
date_list = []
for i in range(0, num_day+1):
date_list.append(date.substract_day(i))
return date_list
# ============ ROUTES ============
@app.route("/_logs")
def logs():
@ -110,6 +131,38 @@ def logs():
def stuff():
return jsonify(row1=get_queues(r_serv))
@app.route("/_progressionCharts", methods=['GET'])
def progressionCharts():
#To be used later
attribute_name = request.args.get('attributeName')
trending_name = request.args.get('trendingName')
bar_requested = True if request.args.get('bar') == "true" else False
if (bar_requested):
num_day = int(request.args.get('days'))
bar_values = []
date_range = get_date_range(num_day)
# Retreive all data from the last num_day
for date in date_range:
curr_value = r_serv_charts.hget(attribute_name, date)
bar_values.append([date[0:4]+'/'+date[4:6]+'/'+date[6:8], int(curr_value if curr_value is not None else 0)])
return jsonify(bar_values)
else:
redis_progression_name = 'top_progression_'+trending_name
redis_progression_name_set = 'top_progression_'+trending_name+'_set'
member_set = []
for keyw in r_serv_charts.smembers(redis_progression_name_set):
keyw_value = r_serv_charts.hget(redis_progression_name, keyw)
keyw_value = keyw_value if keyw_value is not None else 0
member_set.append((keyw, int(keyw_value)))
member_set.sort(key=lambda tup: tup[1], reverse=True)
if len(member_set) == 0:
member_set.append(("No relevant data", int(100)))
return jsonify(member_set)
@app.route("/search", methods=['POST'])
def search():

View File

@ -15,7 +15,10 @@
<!-- JS -->
<script type="text/javascript" src="{{ url_for('static', filename='js/dygraph-combined.js') }}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script>
<script src="{{ url_for('static', filename='js/jquery.flot.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.flot.pie.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.flot.time.js') }}"></script>
<script>
var default_display = {{ default_display }};
var current_displayed_graph;
</script>
@ -58,26 +61,34 @@
<!-- /.nav-tabs -->
<ul class="nav nav-tabs">
<li class="active"><a data-toggle="tab" href="#tld-tab" data-pannel="TldTrending" data-path="../static//csv/tldstrendingdata.csv">Top level domains</a></li>
<li><a data-toggle="tab" href="#domain-tab" data-pannel="DomainTrending" data-path="../static//csv/domainstrendingdata.csv">Domains</a></li>
<li class="active"><a data-toggle="tab" href="#tld-tab" data-attribute-name="tld" data-pannel="TldTrending" data-path="../static//csv/tldstrendingdata.csv">Top level domains</a></li>
<li><a data-toggle="tab" href="#domain-tab" data-attribute-name="domain" data-pannel="DomainTrending" data-path="../static//csv/domainstrendingdata.csv">Domains</a></li>
<li><a data-toggle="tab" href="#protocol-tab" data-attribute-name="scheme" data-pannel="ProtocolTrending" data-path="../static//csv/protocolstrendingdata.csv">Protocols</a></li>
<li><a data-toggle="tab" href="#words-tab" data-pannel="WordTrending" data-path="../static//csv/wordstrendingdata.csv">Words</a></li>
<li><a data-toggle="tab" href="#protocol-tab" data-pannel="ProtocolTrending" data-path="../static//csv/protocolstrendingdata.csv">Protocols</a></li>
</ul>
</br>
<script>
var chart_1_num_day = 5;
var chart_2_num_day = 15;
$SCRIPT_ROOT = {{ request.script_root|tojson|safe }};
</script>
<div class="tab-content">
<script type="text/javascript" src="{{ url_for('static', filename='js/trendingchart.js')}}"></script>
<div class="tab-content">
<div class="col-lg-12 tab-pane fade in active" id="tld-tab" >
{% include 'trending_graphs/Tldstrending.html' %}
</div>
<div class="col-lg-12 tab-pane fade" id="domain-tab">
{% include 'trending_graphs/Domainstrending.html' %}
</div>
<div class="col-lg-12 tab-pane fade" id="words-tab">
{% include 'trending_graphs/Wordstrending.html' %}
</div>
<div class="col-lg-12 tab-pane fade" id="protocol-tab">
{% include 'trending_graphs/Protocolstrending.html' %}
</div>
<div class="col-lg-12 tab-pane fade" id="words-tab">
{% include 'trending_graphs/Wordstrending.html' %}
</div>
</div> <!-- tab-content -->
<!-- /.row -->
</div>
@ -86,6 +97,24 @@
<!-- import graph function -->
<script src="{{ url_for('static', filename='js/plot-graph.js') }}"></script>
<script type="text/javascript">
var refresh_interval = 1000*60*2; //number of miliseconds between each call
var launched_refresher = []; //Avoid launching mutliple refresher
var active_tab_name = "tld"; //Avoid a redraw of the graph is the tab is not active
function refresh_top_chart(attr_name, immediate){
if (immediate){
plot_top_graph(attr_name);
}
setTimeout(function() {
$("[flash-"+attr_name+"]").css('color', '#fece00');
setTimeout(function() { $("[flash-"+attr_name+"]").css('color', 'black'); }, 1000);
refresh_top_chart(attr_name, false);
if (active_tab_name == attr_name)
plot_top_graph(attr_name);
}, refresh_interval);
}
</script>
<!-- instanciate and plot graphs -->
<script type="text/javascript">
// Create, plot and set the limit of displayed headers
@ -102,14 +131,22 @@
// When a pannel is shown, create_and_plot.
$('.nav-tabs a').on('shown.bs.tab', function(event){
create_and_plot($(event.target).attr('data-pannel'), $(event.target).attr('data-path'));
create_and_plot($(event.target).attr('data-pannel'), $(event.target).attr('data-path'));
active_tab_name = $(event.target).attr('data-attribute-name')
//Top progression chart
if(launched_refresher.indexOf($(event.target).attr('data-attribute-name')) == -1){
launched_refresher.push($(event.target).attr('data-attribute-name'));
refresh_top_chart($(event.target).attr('data-attribute-name'), true);
}
});
</script>
<script>
$(document).ready(function(){
// Create the graph when the page has just loaded
create_and_plot("TldTrending", '../static//csv/tldstrendingdata.csv')
create_and_plot("TldTrending", '../static//csv/tldstrendingdata.csv')
//Top progression chart
refresh_top_chart("tld", true);
});
// Used when we modify the number of displayed curves

View File

@ -1,3 +1,42 @@
<div class="row">
<div class="col-lg-12">
<div class="row">
<div class="col-lg-6">
<div class="panel panel-default">
<div class="panel-heading">
<i id="flash-domain" class="glyphicon glyphicon-flash " flash-domain=""></i> Top Progression for the last 5 days
</div>
<div class="panel-body">
<div class="">
<div class="flot-chart-content col-lg-3" id="flot-pie-chart1-domain" style="height:250px; width:48%;"></div>
<div class="flot-chart-content col-lg-3" id="flot-bar-chart1-domain" style="height:250px; width:48%;"><div class="alert alert-info">Click on a part</div></div>
</div>
<!-- /.row -->
</div>
<!-- /.panel-body -->
</div>
</div>
<!-- /.panel -->
<div class="col-lg-6">
<div class="panel panel-default">
<div class="panel-heading">
<i id="flash-domain" class="glyphicon glyphicon-flash " flash-domain=""></i> Top Progression for the last 15 days
</div>
<div class="panel-body">
<div class="">
<div class="flot-chart-content col-lg-3" id="flot-bar-chart2-domain" style="height:250px; width:100%;"><div class="alert alert-info">Click on a part</div></div>
</div>
<!-- /.row -->
</div>
<!-- /.panel-body -->
</div>
</div>
</div>
<!-- /.panel -->
</div>
</div>
<div class="panel panel-default">
<div class="panel-heading">
<i class="fa fa-bar-chart-o fa-fw"></i> Top Domain Trending

View File

@ -1,3 +1,42 @@
<div class="row">
<div class="col-lg-12">
<div class="row">
<div class="col-lg-6">
<div class="panel panel-default">
<div class="panel-heading">
<i id="flash-scheme" class="glyphicon glyphicon-flash " flash-scheme=""></i> Top Progression for the last 5 days
</div>
<div class="panel-body">
<div class="">
<div class="flot-chart-content col-lg-3" id="flot-pie-chart1-scheme" style="height:250px; width:48%;"></div>
<div class="flot-chart-content col-lg-3" id="flot-bar-chart1-scheme" style="height:250px; width:48%;"><div class="alert alert-info">Click on a part</div></div>
</div>
<!-- /.row -->
</div>
<!-- /.panel-body -->
</div>
</div>
<!-- /.panel -->
<div class="col-lg-6">
<div class="panel panel-default">
<div class="panel-heading">
<i id="flash-scheme" class="glyphicon glyphicon-flash " flash-scheme=""></i> Top Progression for the last 15 days
</div>
<div class="panel-body">
<div class="">
<div class="flot-chart-content col-lg-3" id="flot-bar-chart2-scheme" style="height:250px; width:100%;"><div class="alert alert-info">Click on a part</div></div>
</div>
<!-- /.row -->
</div>
<!-- /.panel-body -->
</div>
</div>
</div>
<!-- /.panel -->
</div>
</div>
<div class="panel panel-default">
<div class="panel-heading">
<i class="fa fa-bar-chart-o fa-fw"></i> Protocols Trend

View File

@ -1,3 +1,42 @@
<div class="row">
<div class="col-lg-12">
<div class="row">
<div class="col-lg-6">
<div class="panel panel-default">
<div class="panel-heading">
<i id="flash-tld" class="glyphicon glyphicon-flash " flash-tld=""></i> Top Progression for the last 5 days
</div>
<div class="panel-body">
<div class="">
<div class="flot-chart-content col-lg-3" id="flot-pie-chart1-tld" style="height:250px; width:48%;"></div>
<div class="flot-chart-content col-lg-3" id="flot-bar-chart1-tld" style="height:250px; width:48%;"><div class="alert alert-info">Click on a part</div></div>
</div>
<!-- /.row -->
</div>
<!-- /.panel-body -->
</div>
</div>
<!-- /.panel -->
<div class="col-lg-6">
<div class="panel panel-default">
<div class="panel-heading">
<i class="glyphicon glyphicon-flash " flash-tld=""></i> Top Progression for the last 15 days
</div>
<div id="flash-tld" class="panel-body">
<div class="">
<div class="flot-chart-content col-lg-3" id="flot-bar-chart2-tld" style="height:250px; width:100%;"><div class="alert alert-info">Click on a part</div></div>
</div>
<!-- /.row -->
</div>
<!-- /.panel-body -->
</div>
</div>
</div>
<!-- /.panel -->
</div>
</div>
<div class="panel panel-default">
<div class="panel-heading">
<i class="fa fa-bar-chart-o fa-fw"></i> Top Level Domain Trending