mirror of https://github.com/CIRCL/AIL-framework
stop killing the disk when creating the word curve
parent
7542eaf739
commit
9e8611a42d
34
bin/Curve.py
34
bin/Curve.py
|
@ -24,10 +24,10 @@ Requirements
|
||||||
"""
|
"""
|
||||||
import redis
|
import redis
|
||||||
import time
|
import time
|
||||||
from packages import Paste
|
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
from packages import lib_words
|
from packages import lib_words
|
||||||
import os
|
import os
|
||||||
|
import datetime
|
||||||
|
|
||||||
from Helper import Process
|
from Helper import Process
|
||||||
|
|
||||||
|
@ -57,23 +57,31 @@ if __name__ == "__main__":
|
||||||
prec_filename = None
|
prec_filename = None
|
||||||
while True:
|
while True:
|
||||||
if message is not None:
|
if message is not None:
|
||||||
filename, word, score = message.split()
|
generate_new_graph = True
|
||||||
if prec_filename is None or filename != prec_filename:
|
|
||||||
PST = Paste.Paste(filename)
|
|
||||||
lib_words.create_curve_with_word_file(
|
|
||||||
r_serv1, csv_path, wordfile_path, int(PST.p_date.year),
|
|
||||||
int(PST.p_date.month))
|
|
||||||
|
|
||||||
prec_filename = filename
|
filename, word, score = message.split()
|
||||||
prev_score = r_serv1.hget(word.lower(), PST.p_date)
|
temp = filename.split('/')
|
||||||
|
date = temp[-4] + temp[-3] + temp[-2]
|
||||||
|
|
||||||
|
low_word = word.lower()
|
||||||
|
prev_score = r_serv1.hget(low_word, date)
|
||||||
if prev_score is not None:
|
if prev_score is not None:
|
||||||
r_serv1.hset(word.lower(), PST.p_date,
|
r_serv1.hset(low_word, date, int(prev_score) + int(score))
|
||||||
int(prev_score) + int(score))
|
|
||||||
else:
|
else:
|
||||||
r_serv1.hset(word.lower(), PST.p_date, score)
|
r_serv1.hset(low_word, date, score)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
if generate_new_graph:
|
||||||
|
generate_new_graph = False
|
||||||
|
print 'Building graph'
|
||||||
|
today = datetime.date.today()
|
||||||
|
year = today.year
|
||||||
|
month = today.month
|
||||||
|
lib_words.create_curve_with_word_file(r_serv1, csv_path,
|
||||||
|
wordfile_path, year,
|
||||||
|
month)
|
||||||
|
|
||||||
publisher.debug("Script Curve is Idling")
|
publisher.debug("Script Curve is Idling")
|
||||||
print "sleeping"
|
print "sleeping"
|
||||||
time.sleep(1)
|
time.sleep(10)
|
||||||
message = p.get_from_set()
|
message = p.get_from_set()
|
||||||
|
|
|
@ -6,6 +6,7 @@ from pubsublogger import publisher
|
||||||
import calendar
|
import calendar
|
||||||
from datetime import date
|
from datetime import date
|
||||||
from dateutil.rrule import rrule, DAILY
|
from dateutil.rrule import rrule, DAILY
|
||||||
|
import csv
|
||||||
|
|
||||||
|
|
||||||
def listdirectory(path):
|
def listdirectory(path):
|
||||||
|
@ -80,60 +81,30 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
|
||||||
to keep the timeline of the curve correct.
|
to keep the timeline of the curve correct.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
a = date(year, month, 01)
|
first_day = date(year, month, 01)
|
||||||
b = date(year, month, calendar.monthrange(year, month)[1])
|
last_day = date(year, month, calendar.monthrange(year, month)[1])
|
||||||
days = {}
|
|
||||||
words = []
|
words = []
|
||||||
|
|
||||||
with open(feederfilename, 'rb') as F:
|
with open(feederfilename, 'rb') as f:
|
||||||
# words of the files
|
# words of the files
|
||||||
for word in F:
|
words = sorted([word.strip() for word in f])
|
||||||
# list of words (sorted as in the file)
|
|
||||||
words.append(word[:-1])
|
headers = ['Date'] + words
|
||||||
|
with open(csvfilename+'.csv', 'wb') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
|
writer.writerow(headers)
|
||||||
|
|
||||||
# for each days
|
# for each days
|
||||||
for dt in rrule(DAILY, dtstart=a, until=b):
|
for dt in rrule(DAILY, dtstart=first_day, until=last_day):
|
||||||
|
row = []
|
||||||
mot = []
|
curdate = dt.strftime("%Y%m%d")
|
||||||
mot1 = []
|
row.append(curdate)
|
||||||
mot2 = []
|
|
||||||
|
|
||||||
days[dt.strftime("%Y%m%d")] = ''
|
|
||||||
# from the 1srt day to the last of the list
|
# from the 1srt day to the last of the list
|
||||||
for word in sorted(words):
|
for word in words:
|
||||||
|
value = r_serv.hget(word, curdate)
|
||||||
# if the word have a value for the day
|
if value is None:
|
||||||
if r_serv.hexists(word, dt.strftime("%Y%m%d")):
|
row.append(0)
|
||||||
mot1.append(str(word))
|
|
||||||
mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d")))
|
|
||||||
|
|
||||||
mot = zip(mot1, mot2)
|
|
||||||
|
|
||||||
days[dt.strftime("%Y%m%d")] = mot
|
|
||||||
else:
|
else:
|
||||||
|
# if the word have a value for the day
|
||||||
mot1.append(str(word))
|
row.append(value)
|
||||||
mot2.append(0)
|
writer.writerow(row)
|
||||||
|
|
||||||
mot = zip(mot1, mot2)
|
|
||||||
|
|
||||||
days[dt.strftime("%Y%m%d")] = mot
|
|
||||||
|
|
||||||
with open(csvfilename+".csv", 'wb') as F:
|
|
||||||
F.write("Date," + ",".join(sorted(words)) + '\n')
|
|
||||||
|
|
||||||
for x, s in days.items():
|
|
||||||
val = []
|
|
||||||
for y in s:
|
|
||||||
val.append(y[1])
|
|
||||||
|
|
||||||
F.write(x + ',' + str(val) + '\n')
|
|
||||||
|
|
||||||
with open(csvfilename+".csv", 'rb') as F:
|
|
||||||
h = F.read()
|
|
||||||
h = h.replace("[", "")
|
|
||||||
h = h.replace("]", "")
|
|
||||||
h = h.replace('\'', "")
|
|
||||||
|
|
||||||
with open(csvfilename+".csv", 'wb') as F:
|
|
||||||
F.write(h)
|
|
||||||
|
|
Loading…
Reference in New Issue