mirror of https://github.com/MISP/misp-dashboard
new: [trending] Better algorithm to detect trending item
parent
13b3ac456f
commit
a7065c1171
|
@ -76,15 +76,16 @@ class Trendings_helper:
|
|||
|
||||
''' GETTER '''
|
||||
|
||||
def getGenericTrending(self, trendingType, dateS, dateE, topNum=0):
|
||||
def getGenericTrending(self, trendingType, dateS, dateE, topNum=10):
|
||||
to_ret = []
|
||||
prev_days = (dateE - dateS).days
|
||||
for curDate in util.getXPrevDaysSpan(dateE, prev_days):
|
||||
keyname = "{}:{}".format(trendingType, util.getDateStrFormat(curDate))
|
||||
data = self.serv_redis_db.zrange(keyname, 0, topNum-1, desc=True, withscores=True)
|
||||
data = self.serv_redis_db.zrange(keyname, 0, -1, desc=True, withscores=True)
|
||||
data = [ [record[0].decode('utf8'), record[1]] for record in data ]
|
||||
data = data if data is not None else []
|
||||
to_ret.append([util.getTimestamp(curDate), data])
|
||||
to_ret = util.sortByTrendingScore(to_ret, topNum=topNum)
|
||||
return to_ret
|
||||
|
||||
def getSpecificTrending(self, trendingType, dateS, dateE, specificLabel=''):
|
||||
|
@ -97,9 +98,9 @@ class Trendings_helper:
|
|||
to_ret.append([util.getTimestamp(curDate), data])
|
||||
return to_ret
|
||||
|
||||
def getTrendingEvents(self, dateS, dateE, specificLabel=None):
|
||||
def getTrendingEvents(self, dateS, dateE, specificLabel=None, topNum=None):
|
||||
if specificLabel is None:
|
||||
return self.getGenericTrending(self.keyEvent, dateS, dateE)
|
||||
return self.getGenericTrending(self.keyEvent, dateS, dateE, topNum=topNum)
|
||||
else:
|
||||
specificLabel = specificLabel.replace('\\n', '\n'); # reset correctly label with their \n (CR) instead of their char value
|
||||
return self.getSpecificTrending(self.keyEvent, dateS, dateE, specificLabel)
|
||||
|
|
31
util.py
31
util.py
|
@ -1,3 +1,4 @@
|
|||
from collections import defaultdict
|
||||
import datetime, time
|
||||
|
||||
ONE_DAY = 60*60*24
|
||||
|
@ -71,3 +72,33 @@ def getDateHoursStrFormat(date):
|
|||
|
||||
def getTimestamp(date):
|
||||
return int(time.mktime(date.timetuple()))
|
||||
|
||||
|
||||
def sortByTrendingScore(toSort, topNum=5):
|
||||
scoredLabels = defaultdict(float)
|
||||
numDay = len(toSort)
|
||||
baseDecay = 1.0
|
||||
decayRate = lambda x: baseDecay*((numDay-x)/numDay)
|
||||
|
||||
for i, arr in enumerate(toSort):
|
||||
timestamp = arr[0]
|
||||
dailyData = arr[1]
|
||||
for item in dailyData:
|
||||
label = item[0]
|
||||
occ = item[1]
|
||||
scoredLabels[label] += occ*decayRate(i)
|
||||
|
||||
topList = [[l, s] for l, s in scoredLabels.items()]
|
||||
topList.sort(key=lambda x: x[1], reverse=True)
|
||||
topSet = [ l for l, v in topList[:topNum]]
|
||||
|
||||
# now that we have the top, filter out poor scored elements
|
||||
topArray = []
|
||||
for arr in toSort:
|
||||
timestamp = arr[0]
|
||||
dailyData = arr[1]
|
||||
topDailyArray = list(filter(lambda item: (item[0] in topSet), dailyData))
|
||||
dailyCombi = [timestamp, topDailyArray]
|
||||
topArray.append(dailyCombi)
|
||||
|
||||
return topArray
|
||||
|
|
Loading…
Reference in New Issue