mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			107 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			107 lines
		
	
	
		
			3.8 KiB
		
	
	
	
		
			Python
		
	
	
#!/usr/bin/python3
 | 
						|
 | 
						|
import os
 | 
						|
import string
 | 
						|
 | 
						|
import calendar
 | 
						|
from datetime import date
 | 
						|
from dateutil.rrule import rrule, DAILY
 | 
						|
import csv
 | 
						|
 | 
						|
 | 
						|
clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty))
 | 
						|
"""It filters out non-printable characters from the string it receives."""
 | 
						|
 | 
						|
 | 
						|
def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month):
 | 
						|
    """Create a csv file used with dygraph.
 | 
						|
 | 
						|
    :param r_serv: -- connexion to redis database
 | 
						|
    :param csvfilename: -- the path to the .csv file created
 | 
						|
    :param feederfilename: -- the path to the file which contain a list of words.
 | 
						|
    :param year: -- (integer) The year to process
 | 
						|
    :param month: -- (integer) The month to process
 | 
						|
 | 
						|
    This function create a .csv file using datas in redis.
 | 
						|
    It's checking if the words contained in feederfilename and
 | 
						|
    their respectives values by days exists. If these values are missing
 | 
						|
    (Word not present during a day) it's will automatically put a 0
 | 
						|
    to keep the timeline of the curve correct.
 | 
						|
 | 
						|
    """
 | 
						|
    threshold = 30
 | 
						|
    first_day = date(year, month, 1)
 | 
						|
    last_day = date(year, month, calendar.monthrange(year, month)[1])
 | 
						|
    words = []
 | 
						|
 | 
						|
    with open(feederfilename, 'r') as f:
 | 
						|
        # words of the files
 | 
						|
        words = sorted([word.strip() for word in f if word.strip()[0:2]!='//' and word.strip()!='' ])
 | 
						|
 | 
						|
    headers = ['Date'] + words
 | 
						|
    with open(csvfilename+'.csv', 'w') as f:
 | 
						|
        writer = csv.writer(f)
 | 
						|
        writer.writerow(headers)
 | 
						|
 | 
						|
        # for each days
 | 
						|
        for dt in rrule(DAILY, dtstart=first_day, until=last_day):
 | 
						|
            row = []
 | 
						|
            curdate = dt.strftime("%Y%m%d")
 | 
						|
            row.append(curdate)
 | 
						|
            # from the 1srt day to the last of the list
 | 
						|
            for word in words:
 | 
						|
                value = r_serv.hget(word, curdate)
 | 
						|
 | 
						|
                if value is None:
 | 
						|
                    row.append(0)
 | 
						|
                else:
 | 
						|
                    # if the word have a value for the day
 | 
						|
                    # FIXME Due to performance issues (too many tlds, leads to more than 7s to perform this procedure), I added a threshold
 | 
						|
                    value = r_serv.hget(word, curdate)
 | 
						|
                    value = int(value)
 | 
						|
                    if value >= threshold:
 | 
						|
                        row.append(value)
 | 
						|
            writer.writerow(row)
 | 
						|
 | 
						|
def create_curve_from_redis_set(server, csvfilename, set_to_plot, year, month):
 | 
						|
    """Create a csv file used with dygraph.
 | 
						|
 | 
						|
    :param r_serv: -- connexion to redis database
 | 
						|
    :param csvfilename: -- the path to the .csv file created
 | 
						|
    :param to_plot: -- the list which contain a words to plot.
 | 
						|
    :param year: -- (integer) The year to process
 | 
						|
    :param month: -- (integer) The month to process
 | 
						|
 | 
						|
    This function create a .csv file using datas in redis.
 | 
						|
    It's checking if the words contained in set_to_plot and
 | 
						|
    their respectives values by days exists.
 | 
						|
 | 
						|
    """
 | 
						|
 | 
						|
    first_day = date(year, month, 1)
 | 
						|
    last_day = date(year, month, calendar.monthrange(year, month)[1])
 | 
						|
 | 
						|
    redis_set_name = set_to_plot + "_set_" + str(year) + str(month).zfill(2)
 | 
						|
    words = list(server.smembers(redis_set_name))
 | 
						|
    #words = [x.decode('utf-8') for x in words]
 | 
						|
 | 
						|
    headers = ['Date'] + words
 | 
						|
    with open(csvfilename+'.csv', 'w') as f:
 | 
						|
        writer = csv.writer(f)
 | 
						|
        writer.writerow(headers)
 | 
						|
 | 
						|
        # for each days
 | 
						|
        for dt in rrule(DAILY, dtstart=first_day, until=last_day):
 | 
						|
            row = []
 | 
						|
            curdate = dt.strftime("%Y%m%d")
 | 
						|
            row.append(curdate)
 | 
						|
            # from the 1srt day to the last of the list
 | 
						|
            for word in words:
 | 
						|
                value = server.hget(word, curdate)
 | 
						|
                if value is None:
 | 
						|
                    row.append(0)
 | 
						|
                else:
 | 
						|
                    # if the word have a value for the day
 | 
						|
                    row.append(value)
 | 
						|
            writer.writerow(row)
 |