From a7065c117193bd4be89c329fe59abd211bb7e14d Mon Sep 17 00:00:00 2001 From: AIL Date: Mon, 24 Sep 2018 09:21:02 +0200 Subject: [PATCH] new: [trending] Better algorithm to detect trending item --- helpers/trendings_helper.py | 9 +++++---- util.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/helpers/trendings_helper.py b/helpers/trendings_helper.py index 3e70ae3..d385037 100644 --- a/helpers/trendings_helper.py +++ b/helpers/trendings_helper.py @@ -76,15 +76,16 @@ class Trendings_helper: ''' GETTER ''' - def getGenericTrending(self, trendingType, dateS, dateE, topNum=0): + def getGenericTrending(self, trendingType, dateS, dateE, topNum=10): to_ret = [] prev_days = (dateE - dateS).days for curDate in util.getXPrevDaysSpan(dateE, prev_days): keyname = "{}:{}".format(trendingType, util.getDateStrFormat(curDate)) - data = self.serv_redis_db.zrange(keyname, 0, topNum-1, desc=True, withscores=True) + data = self.serv_redis_db.zrange(keyname, 0, -1, desc=True, withscores=True) data = [ [record[0].decode('utf8'), record[1]] for record in data ] data = data if data is not None else [] to_ret.append([util.getTimestamp(curDate), data]) + to_ret = util.sortByTrendingScore(to_ret, topNum=topNum) return to_ret def getSpecificTrending(self, trendingType, dateS, dateE, specificLabel=''): @@ -97,9 +98,9 @@ class Trendings_helper: to_ret.append([util.getTimestamp(curDate), data]) return to_ret - def getTrendingEvents(self, dateS, dateE, specificLabel=None): + def getTrendingEvents(self, dateS, dateE, specificLabel=None, topNum=None): if specificLabel is None: - return self.getGenericTrending(self.keyEvent, dateS, dateE) + return self.getGenericTrending(self.keyEvent, dateS, dateE, topNum=topNum) else: specificLabel = specificLabel.replace('\\n', '\n'); # reset correctly label with their \n (CR) instead of their char value return self.getSpecificTrending(self.keyEvent, dateS, dateE, specificLabel) diff --git a/util.py b/util.py index e37b391..2938213 100644 --- a/util.py +++ b/util.py @@ -1,3 +1,4 @@ +from collections import defaultdict import datetime, time ONE_DAY = 60*60*24 @@ -71,3 +72,33 @@ def getDateHoursStrFormat(date): def getTimestamp(date): return int(time.mktime(date.timetuple())) + + +def sortByTrendingScore(toSort, topNum=5): + scoredLabels = defaultdict(float) + numDay = len(toSort) + baseDecay = 1.0 + decayRate = lambda x: baseDecay*((numDay-x)/numDay) + + for i, arr in enumerate(toSort): + timestamp = arr[0] + dailyData = arr[1] + for item in dailyData: + label = item[0] + occ = item[1] + scoredLabels[label] += occ*decayRate(i) + + topList = [[l, s] for l, s in scoredLabels.items()] + topList.sort(key=lambda x: x[1], reverse=True) + topSet = [ l for l, v in topList[:topNum]] + + # now that we have the top, filter out poor scored elements + topArray = [] + for arr in toSort: + timestamp = arr[0] + dailyData = arr[1] + topDailyArray = list(filter(lambda item: (item[0] in topSet), dailyData)) + dailyCombi = [timestamp, topDailyArray] + topArray.append(dailyCombi) + + return topArray