mirror of https://github.com/CIRCL/AIL-framework
Added DomainTrending seems working.
Started search features with related html pages, not finish yet.pull/57/head
parent
8c1eeea6e6
commit
7ff9b9a583
|
@ -10,19 +10,72 @@ import re
|
|||
import redis
|
||||
import os
|
||||
from packages import lib_words
|
||||
from packages.Date import Date
|
||||
from pubsublogger import publisher
|
||||
from packages import Paste
|
||||
from Helper import Process
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
def analyse(field_name):
|
||||
# Config Var
|
||||
threshold_need_to_look = 50
|
||||
range_to_look = 10
|
||||
threshold_to_plot = 1 #500%
|
||||
to_plot = set()
|
||||
clean_frequency = 10 #minutes
|
||||
|
||||
def analyse(server, field_name):
|
||||
field = url_parsed[field_name]
|
||||
if field is not None:
|
||||
prev_score = r_serv1.hget(field, date)
|
||||
prev_score = server.hget(field, date)
|
||||
if prev_score is not None:
|
||||
r_serv1.hset(field, date, int(prev_score) + 1)
|
||||
server.hset(field, date, int(prev_score) + 1)
|
||||
else:
|
||||
r_serv1.hset(field, date, 1)
|
||||
server.hset(field, date, 1)
|
||||
|
||||
def analyse_and_progression(server, field_name):
|
||||
field = url_parsed[field_name]
|
||||
if field is not None:
|
||||
prev_score = server.hget(field, date)
|
||||
if prev_score is not None:
|
||||
print field + ' prev_score:' + prev_score
|
||||
server.hset(field, date, int(prev_score) + 1)
|
||||
if int(prev_score) + 1 > threshold_need_to_look: #threshold for false possitive
|
||||
if(check_for_progression(server, field, date)):
|
||||
to_plot.add(field)
|
||||
else:
|
||||
server.hset(field, date, 1)
|
||||
|
||||
def check_for_progression(server, field, date):
|
||||
previous_data = set()
|
||||
tot_sum = 0
|
||||
for i in range(0, range_to_look):
|
||||
curr_value = server.hget(field, Date(date).substract_day(i))
|
||||
if curr_value is None: #no further data
|
||||
break
|
||||
else:
|
||||
curr_value = int(curr_value)
|
||||
previous_data.add(curr_value)
|
||||
tot_sum += curr_value
|
||||
if i == 0:
|
||||
today_val = curr_value
|
||||
|
||||
|
||||
print 'totsum='+str(tot_sum)
|
||||
print 'div='+str(tot_sum/today_val)
|
||||
if tot_sum/today_val >= threshold_to_plot:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def clean_to_plot():
|
||||
temp_to_plot = set()
|
||||
curr_date = datetime.date.today()
|
||||
date = Date(str(curr_date.year)+str(curr_date.month)+str(curr_date.day))
|
||||
|
||||
for elem in to_plot:
|
||||
if(check_for_progression(field, date)):
|
||||
temp_to_plot.add(elem)
|
||||
to_plot = temp_to_plot
|
||||
|
||||
if __name__ == '__main__':
|
||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||
|
@ -46,6 +99,11 @@ if __name__ == '__main__':
|
|||
port=p.config.get("Redis_Level_DB", "port"),
|
||||
db=p.config.get("Redis_Level_DB", "db"))
|
||||
|
||||
r_serv2 = redis.StrictRedis(
|
||||
host=p.config.get("Redis_Level_DB_Domain", "host"),
|
||||
port=p.config.get("Redis_Level_DB_Domain", "port"),
|
||||
db=p.config.get("Redis_Level_DB_Domain", "db"))
|
||||
|
||||
# FILE CURVE SECTION #
|
||||
csv_path_proto = os.path.join(os.environ['AIL_HOME'],
|
||||
p.config.get("Directories", "protocolstrending_csv"))
|
||||
|
@ -57,6 +115,10 @@ if __name__ == '__main__':
|
|||
tldsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||
p.config.get("Directories", "tldsfile"))
|
||||
|
||||
csv_path_domain = os.path.join(os.environ['AIL_HOME'],
|
||||
p.config.get("Directories", "domainstrending_csv"))
|
||||
|
||||
|
||||
faup = Faup()
|
||||
generate_new_graph = False
|
||||
# Endless loop getting messages from the input queue
|
||||
|
@ -71,17 +133,22 @@ if __name__ == '__main__':
|
|||
today = datetime.date.today()
|
||||
year = today.year
|
||||
month = today.month
|
||||
|
||||
print 'b1'
|
||||
lib_words.create_curve_with_word_file(r_serv1, csv_path_proto,
|
||||
protocolsfile_path, year,
|
||||
month)
|
||||
|
||||
print 'b2'
|
||||
lib_words.create_curve_with_word_file(r_serv1, csv_path_tld,
|
||||
tldsfile_path, year,
|
||||
month)
|
||||
|
||||
print 'b3'
|
||||
lib_words.create_curve_with_list(r_serv2, csv_path_domain,
|
||||
to_plot, year,
|
||||
month)
|
||||
print 'end building'
|
||||
publisher.debug("{} queue is empty, waiting".format(config_section))
|
||||
time.sleep(1)
|
||||
print 'sleeping'
|
||||
time.sleep(5)
|
||||
continue
|
||||
|
||||
else:
|
||||
|
@ -91,5 +158,8 @@ if __name__ == '__main__':
|
|||
faup.decode(url)
|
||||
url_parsed = faup.get()
|
||||
|
||||
analyse('scheme') #Scheme analysis
|
||||
analyse('tld') #Tld analysis
|
||||
analyse(r_serv1, 'scheme') #Scheme analysis
|
||||
analyse(r_serv1, 'tld') #Tld analysis
|
||||
analyse_and_progression(r_serv2, 'domain') #Domain analysis
|
||||
print "to_plot:"
|
||||
print to_plot
|
||||
|
|
|
@ -30,3 +30,12 @@ class Date(object):
|
|||
|
||||
def _set_day(self, day):
|
||||
self.day = day
|
||||
|
||||
def substract_day(self, numDay):
|
||||
import datetime
|
||||
computed_date = datetime.date(int(self.year), int(self.month), int(self.day)) - datetime.timedelta(numDay)
|
||||
comp_year = str(computed_date.year)
|
||||
comp_month = str(computed_date.month).zfill(2)
|
||||
comp_day = str(computed_date.day).zfill(2)
|
||||
return comp_year + comp_month + comp_day
|
||||
|
||||
|
|
|
@ -186,6 +186,8 @@ class Paste(object):
|
|||
if the paste doesn't contain any human dictionnary words
|
||||
..seealso: git@github.com:saffsd/langid.py.git
|
||||
|
||||
FIXME: This procedure is using more than 20% of CPU
|
||||
|
||||
"""
|
||||
identifier = LanguageIdentifier.from_modelstring(model, norm_probs=True)
|
||||
return identifier.classify(self.get_p_content())
|
||||
|
|
|
@ -81,13 +81,14 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
|
|||
to keep the timeline of the curve correct.
|
||||
|
||||
"""
|
||||
threshold = 50
|
||||
first_day = date(year, month, 01)
|
||||
last_day = date(year, month, calendar.monthrange(year, month)[1])
|
||||
words = []
|
||||
|
||||
with open(feederfilename, 'rb') as f:
|
||||
# words of the files
|
||||
words = sorted([word.strip() for word in f])
|
||||
words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' ])
|
||||
|
||||
headers = ['Date'] + words
|
||||
with open(csvfilename+'.csv', 'wb') as f:
|
||||
|
@ -102,6 +103,47 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
|
|||
# from the 1srt day to the last of the list
|
||||
for word in words:
|
||||
value = r_serv.hget(word, curdate)
|
||||
if value is None:
|
||||
row.append(0)
|
||||
else:
|
||||
# if the word have a value for the day
|
||||
# FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold
|
||||
if value >= threshold:
|
||||
row.append(value)
|
||||
writer.writerow(row)
|
||||
|
||||
def create_curve_with_list(server, csvfilename, to_plot, year, month):
|
||||
"""Create a csv file used with dygraph.
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param csvfilename: -- the path to the .csv file created
|
||||
:param to_plot: -- the list which contain a words to plot.
|
||||
:param year: -- (integer) The year to process
|
||||
:param month: -- (integer) The month to process
|
||||
|
||||
This function create a .csv file using datas in redis.
|
||||
It's checking if the words contained in to_plot and
|
||||
their respectives values by days exists.
|
||||
|
||||
"""
|
||||
|
||||
first_day = date(year, month, 01)
|
||||
last_day = date(year, month, calendar.monthrange(year, month)[1])
|
||||
words = sorted(to_plot)
|
||||
|
||||
headers = ['Date'] + words
|
||||
with open(csvfilename+'.csv', 'wb') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(headers)
|
||||
|
||||
# for each days
|
||||
for dt in rrule(DAILY, dtstart=first_day, until=last_day):
|
||||
row = []
|
||||
curdate = dt.strftime("%Y%m%d")
|
||||
row.append(curdate)
|
||||
# from the 1srt day to the last of the list
|
||||
for word in words:
|
||||
value = server.hget(word, curdate)
|
||||
if value is None:
|
||||
row.append(0)
|
||||
else:
|
||||
|
|
|
@ -7,7 +7,9 @@ import json
|
|||
from flask import Flask, render_template, jsonify, request
|
||||
import flask
|
||||
import os
|
||||
|
||||
import sys
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
|
||||
import Paste
|
||||
|
||||
# CONFIG #
|
||||
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
||||
|
@ -18,6 +20,7 @@ if not os.path.exists(configfile):
|
|||
|
||||
cfg = ConfigParser.ConfigParser()
|
||||
cfg.read(configfile)
|
||||
max_preview_char = 500
|
||||
|
||||
# REDIS #
|
||||
r_serv = redis.StrictRedis(
|
||||
|
@ -49,6 +52,10 @@ def get_queues(r):
|
|||
r.hgetall("queues").iteritems()]
|
||||
|
||||
|
||||
def list_len(s):
|
||||
return len(s)
|
||||
app.jinja_env.filters['list_len'] = list_len
|
||||
|
||||
@app.route("/_logs")
|
||||
def logs():
|
||||
return flask.Response(event_stream(), mimetype="text/event-stream")
|
||||
|
@ -65,6 +72,7 @@ def search():
|
|||
q = []
|
||||
q.append(query)
|
||||
r = []
|
||||
c = []
|
||||
# Search
|
||||
from whoosh import index
|
||||
from whoosh.fields import Schema, TEXT, ID
|
||||
|
@ -78,7 +86,10 @@ def search():
|
|||
results = searcher.search(query, limit=None)
|
||||
for x in results:
|
||||
r.append(x.items()[0][1])
|
||||
return render_template("search.html", r=r)
|
||||
content = Paste.Paste(x.items()[0][1]).get_p_content()
|
||||
content_range = max_preview_char if len(content)>max_preview_char else len(content)-1
|
||||
c.append(content[0:content_range])
|
||||
return render_template("search.html", r=r, c=c)
|
||||
|
||||
@app.route("/")
|
||||
def index():
|
||||
|
@ -104,6 +115,10 @@ def protocolstrending():
|
|||
def tldstrending():
|
||||
return render_template("Tldstrending.html")
|
||||
|
||||
@app.route("/showsavedpaste/")
|
||||
def showsavedpaste():
|
||||
return render_template("show_saved_paste.html")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host='0.0.0.0', port=7000, threaded=True)
|
||||
|
|
|
@ -130,7 +130,7 @@
|
|||
<!-- instanciate and plot graphs -->
|
||||
<script type="text/javascript">
|
||||
var graph_tld = new Graph("TldsTrending", "../static//csv/tldstrendingdata.csv");
|
||||
var graph_domain = new Graph("DomainTrending", "../static//csv/tldstrendingdata.csv");
|
||||
var graph_domain = new Graph("DomainTrending", "../static//csv/domainstrendingdata.csv");
|
||||
</script>
|
||||
</div>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
|
||||
|
|
|
@ -16,6 +16,16 @@
|
|||
<script type="text/javascript" src="{{ url_for('static', filename='js/dygraph-combined.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery-1.4.2.js') }}"></script>
|
||||
<script language="javascript" src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<style>
|
||||
.tooltip-inner {
|
||||
text-align: left;
|
||||
height: 200%;
|
||||
width: 200%;
|
||||
max-width: 500px;
|
||||
max-height: 500px;
|
||||
font-size: 13px;
|
||||
}
|
||||
</style>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
@ -39,6 +49,26 @@
|
|||
</div>
|
||||
<!-- /.navbar-static-side -->
|
||||
</nav>
|
||||
|
||||
|
||||
<!-- Modal -->
|
||||
<div id="mymodal" class="modal fade" role="dialog">
|
||||
<div class="modal-dialog modal-lg">
|
||||
|
||||
<!-- Modal content-->
|
||||
<div id="mymodalcontent" class="modal-content">
|
||||
<div id="mymodalbody" class="modal-body">
|
||||
<p>Some text in the modal.</p>
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-default" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div id="page-wrapper">
|
||||
<!-- /.row -->
|
||||
<div class="row"> </div>
|
||||
|
@ -53,10 +83,26 @@
|
|||
</div>
|
||||
<!-- /.panel-heading -->
|
||||
<div class="panel-body">
|
||||
<table class="table">
|
||||
{% for result in r %}
|
||||
<tr><td>{{ result }}</td></tr>
|
||||
<table class="table table-hover">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th>Path</th>
|
||||
<th>Action</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% set i = 0 %}
|
||||
{% for path in r %}
|
||||
{% set prev_content = c[i] %}
|
||||
<tr>
|
||||
<td>{{ i + 1 }}</td>
|
||||
<td><a target="_blank" href="{{ url_for('showsavedpaste') }}?paste={{ path }}"> {{ path }}</a></td>
|
||||
<td><p><span class="glyphicon glyphicon-info-sign" data-toggle="tooltip" data-placement="left" title="{{ prev_content }}"></span> <button type="button" class="btn-link" data-toggle="modal" data-target="#mymodal" data-url="{{ url_for('showsavedpaste') }}?paste={{ path }}"><span class="fa fa-search-plus"></span></button></p></td>
|
||||
</tr>
|
||||
{% set i = i + 1 %}
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<!-- /.panel-body -->
|
||||
|
@ -69,4 +115,23 @@
|
|||
<script src="{{ url_for('static', filename='js/bootstrap.min.js') }}"></script>
|
||||
</body>
|
||||
|
||||
<!-- enable tooltip -->
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$('[data-toggle="tooltip"]').tooltip();
|
||||
});
|
||||
</script>
|
||||
|
||||
<!-- Dynamically update the modal -->
|
||||
<script type="text/javascript">
|
||||
// On click, get html content from url and update the corresponding modal
|
||||
$("[data-toggle='modal']").on("click", function (event) {
|
||||
event.preventDefault();
|
||||
var url = $(this).attr('data-url');
|
||||
var modal_id = $(this).attr('data-target');
|
||||
$.get(url, function (data) {
|
||||
$("#mymodalbody").html(data);
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</html>
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<title>Paste information</title>
|
||||
<meta charset="utf-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<h2> Paste: </h2>
|
||||
<h3> {{ request.args.get('paste') }} </h3>
|
||||
|
||||
<hr></br>
|
||||
|
||||
<table class="table table-condensed">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Date</th>
|
||||
<th>Source</th>
|
||||
<th>Encoding</th>
|
||||
<th>Language</th>
|
||||
<th>Size</th>
|
||||
<th>Mime</th>
|
||||
<th>Number of line</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>John</td>
|
||||
<td>Doe</td>
|
||||
<td>john@example.com</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Mary</td>
|
||||
<td>Moe</td>
|
||||
<td>mary@example.com</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>July</td>
|
||||
<td>Dooley</td>
|
||||
<td>july@example.com</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
Loading…
Reference in New Issue