new: [trending] Better algorithm to detect trending item

pull/43/merge
AIL 2018-09-24 09:21:02 +02:00
parent 13b3ac456f
commit a7065c1171
2 changed files with 36 additions and 4 deletions

View File

@ -76,15 +76,16 @@ class Trendings_helper:
''' GETTER '''
def getGenericTrending(self, trendingType, dateS, dateE, topNum=0):
def getGenericTrending(self, trendingType, dateS, dateE, topNum=10):
to_ret = []
prev_days = (dateE - dateS).days
for curDate in util.getXPrevDaysSpan(dateE, prev_days):
keyname = "{}:{}".format(trendingType, util.getDateStrFormat(curDate))
data = self.serv_redis_db.zrange(keyname, 0, topNum-1, desc=True, withscores=True)
data = self.serv_redis_db.zrange(keyname, 0, -1, desc=True, withscores=True)
data = [ [record[0].decode('utf8'), record[1]] for record in data ]
data = data if data is not None else []
to_ret.append([util.getTimestamp(curDate), data])
to_ret = util.sortByTrendingScore(to_ret, topNum=topNum)
return to_ret
def getSpecificTrending(self, trendingType, dateS, dateE, specificLabel=''):
@ -97,9 +98,9 @@ class Trendings_helper:
to_ret.append([util.getTimestamp(curDate), data])
return to_ret
def getTrendingEvents(self, dateS, dateE, specificLabel=None):
def getTrendingEvents(self, dateS, dateE, specificLabel=None, topNum=None):
if specificLabel is None:
return self.getGenericTrending(self.keyEvent, dateS, dateE)
return self.getGenericTrending(self.keyEvent, dateS, dateE, topNum=topNum)
else:
specificLabel = specificLabel.replace('\\n', '\n'); # reset correctly label with their \n (CR) instead of their char value
return self.getSpecificTrending(self.keyEvent, dateS, dateE, specificLabel)

31
util.py
View File

@ -1,3 +1,4 @@
from collections import defaultdict
import datetime, time
ONE_DAY = 60*60*24
@ -71,3 +72,33 @@ def getDateHoursStrFormat(date):
def getTimestamp(date):
return int(time.mktime(date.timetuple()))
def sortByTrendingScore(toSort, topNum=5):
scoredLabels = defaultdict(float)
numDay = len(toSort)
baseDecay = 1.0
decayRate = lambda x: baseDecay*((numDay-x)/numDay)
for i, arr in enumerate(toSort):
timestamp = arr[0]
dailyData = arr[1]
for item in dailyData:
label = item[0]
occ = item[1]
scoredLabels[label] += occ*decayRate(i)
topList = [[l, s] for l, s in scoredLabels.items()]
topList.sort(key=lambda x: x[1], reverse=True)
topSet = [ l for l, v in topList[:topNum]]
# now that we have the top, filter out poor scored elements
topArray = []
for arr in toSort:
timestamp = arr[0]
dailyData = arr[1]
topDailyArray = list(filter(lambda item: (item[0] in topSet), dailyData))
dailyCombi = [timestamp, topDailyArray]
topArray.append(dailyCombi)
return topArray