stop killing the disk when creating the word curve

pull/38/head
Raphaël Vinot 2014-09-02 18:20:28 +02:00
parent 7542eaf739
commit 9e8611a42d
2 changed files with 42 additions and 63 deletions

View File

@ -24,10 +24,10 @@ Requirements
""" """
import redis import redis
import time import time
from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
from packages import lib_words from packages import lib_words
import os import os
import datetime
from Helper import Process from Helper import Process
@ -57,23 +57,31 @@ if __name__ == "__main__":
prec_filename = None prec_filename = None
while True: while True:
if message is not None: if message is not None:
generate_new_graph = True
filename, word, score = message.split() filename, word, score = message.split()
if prec_filename is None or filename != prec_filename: temp = filename.split('/')
PST = Paste.Paste(filename) date = temp[-4] + temp[-3] + temp[-2]
lib_words.create_curve_with_word_file(
r_serv1, csv_path, wordfile_path, int(PST.p_date.year),
int(PST.p_date.month))
prec_filename = filename low_word = word.lower()
prev_score = r_serv1.hget(word.lower(), PST.p_date) prev_score = r_serv1.hget(low_word, date)
if prev_score is not None: if prev_score is not None:
r_serv1.hset(word.lower(), PST.p_date, r_serv1.hset(low_word, date, int(prev_score) + int(score))
int(prev_score) + int(score))
else: else:
r_serv1.hset(word.lower(), PST.p_date, score) r_serv1.hset(low_word, date, score)
else: else:
if generate_new_graph:
generate_new_graph = False
print 'Building graph'
today = datetime.date.today()
year = today.year
month = today.month
lib_words.create_curve_with_word_file(r_serv1, csv_path,
wordfile_path, year,
month)
publisher.debug("Script Curve is Idling") publisher.debug("Script Curve is Idling")
print "sleeping" print "sleeping"
time.sleep(1) time.sleep(10)
message = p.get_from_set() message = p.get_from_set()

View File

@ -6,6 +6,7 @@ from pubsublogger import publisher
import calendar import calendar
from datetime import date from datetime import date
from dateutil.rrule import rrule, DAILY from dateutil.rrule import rrule, DAILY
import csv
def listdirectory(path): def listdirectory(path):
@ -80,60 +81,30 @@ def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month
to keep the timeline of the curve correct. to keep the timeline of the curve correct.
""" """
a = date(year, month, 01) first_day = date(year, month, 01)
b = date(year, month, calendar.monthrange(year, month)[1]) last_day = date(year, month, calendar.monthrange(year, month)[1])
days = {}
words = [] words = []
with open(feederfilename, 'rb') as F: with open(feederfilename, 'rb') as f:
# words of the files # words of the files
for word in F: words = sorted([word.strip() for word in f])
# list of words (sorted as in the file)
words.append(word[:-1]) headers = ['Date'] + words
with open(csvfilename+'.csv', 'wb') as f:
writer = csv.writer(f)
writer.writerow(headers)
# for each days # for each days
for dt in rrule(DAILY, dtstart=a, until=b): for dt in rrule(DAILY, dtstart=first_day, until=last_day):
row = []
mot = [] curdate = dt.strftime("%Y%m%d")
mot1 = [] row.append(curdate)
mot2 = []
days[dt.strftime("%Y%m%d")] = ''
# from the 1srt day to the last of the list # from the 1srt day to the last of the list
for word in sorted(words): for word in words:
value = r_serv.hget(word, curdate)
# if the word have a value for the day if value is None:
if r_serv.hexists(word, dt.strftime("%Y%m%d")): row.append(0)
mot1.append(str(word))
mot2.append(r_serv.hget(word, dt.strftime("%Y%m%d")))
mot = zip(mot1, mot2)
days[dt.strftime("%Y%m%d")] = mot
else: else:
# if the word have a value for the day
mot1.append(str(word)) row.append(value)
mot2.append(0) writer.writerow(row)
mot = zip(mot1, mot2)
days[dt.strftime("%Y%m%d")] = mot
with open(csvfilename+".csv", 'wb') as F:
F.write("Date," + ",".join(sorted(words)) + '\n')
for x, s in days.items():
val = []
for y in s:
val.append(y[1])
F.write(x + ',' + str(val) + '\n')
with open(csvfilename+".csv", 'rb') as F:
h = F.read()
h = h.replace("[", "")
h = h.replace("]", "")
h = h.replace('\'', "")
with open(csvfilename+".csv", 'wb') as F:
F.write(h)