pull/64/merge
Raphaël Vinot 2016-07-20 14:12:18 +02:00
parent 34e23998b1
commit 62eef44ca8
1 changed files with 27 additions and 38 deletions

View File

@ -6,22 +6,21 @@
import time
import datetime
import re
import redis
import os
from packages import lib_words
from packages.Date import Date
from pubsublogger import publisher
from packages import Paste
from Helper import Process
from pyfaup.faup import Faup
# Config Var
threshold_need_to_look = 50
range_to_look = 10
threshold_to_plot = 1 #500%
threshold_to_plot = 1 # 500%
to_plot = set()
clean_frequency = 10 #minutes
clean_frequency = 10 # minutes
def analyse(server, field_name):
field = url_parsed[field_name]
@ -32,6 +31,7 @@ def analyse(server, field_name):
else:
server.hset(field, date, 1)
def analyse_and_progression(server, field_name):
field = url_parsed[field_name]
if field is not None:
@ -39,18 +39,19 @@ def analyse_and_progression(server, field_name):
if prev_score is not None:
print field + ' prev_score:' + prev_score
server.hset(field, date, int(prev_score) + 1)
if int(prev_score) + 1 > threshold_need_to_look: #threshold for false possitive
if int(prev_score) + 1 > threshold_need_to_look: # threshold for false possitive
if(check_for_progression(server, field, date)):
to_plot.add(field)
else:
server.hset(field, date, 1)
def check_for_progression(server, field, date):
previous_data = set()
tot_sum = 0
for i in range(0, range_to_look):
curr_value = server.hget(field, Date(date).substract_day(i))
if curr_value is None: #no further data
if curr_value is None: # no further data
break
else:
curr_value = int(curr_value)
@ -59,23 +60,13 @@ def check_for_progression(server, field, date):
if i == 0:
today_val = curr_value
print 'totsum='+str(tot_sum)
print 'div='+str(tot_sum/today_val)
if tot_sum/today_val >= threshold_to_plot:
print 'totsum=' + str(tot_sum)
print 'div=' + str(tot_sum / today_val)
if tot_sum / today_val >= threshold_to_plot:
return True
else:
return False
def clean_to_plot():
temp_to_plot = set()
curr_date = datetime.date.today()
date = Date(str(curr_date.year)+str(curr_date.month)+str(curr_date.day))
for elem in to_plot:
if(check_for_progression(field, date)):
temp_to_plot.add(elem)
to_plot = temp_to_plot
if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
@ -118,7 +109,6 @@ if __name__ == '__main__':
csv_path_domain = os.path.join(os.environ['AIL_HOME'],
p.config.get("Directories", "domainstrending_csv"))
faup = Faup()
generate_new_graph = False
# Endless loop getting messages from the input queue
@ -143,8 +133,7 @@ if __name__ == '__main__':
month)
lib_words.create_curve_with_list(r_serv2, csv_path_domain,
to_plot, year,
month)
to_plot, year, month)
print 'end building'
publisher.debug("{} queue is empty, waiting".format(config_section))
@ -159,6 +148,6 @@ if __name__ == '__main__':
faup.decode(url)
url_parsed = faup.get()
analyse(r_serv1, 'scheme') #Scheme analysis
analyse(r_serv1, 'tld') #Tld analysis
analyse_and_progression(r_serv2, 'domain') #Domain analysis
analyse(r_serv1, 'scheme') # Scheme analysis
analyse(r_serv1, 'tld') # Tld analysis
analyse_and_progression(r_serv2, 'domain') # Domain analysis