2016-07-13 15:24:36 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
from json import JSONDecoder
|
|
|
|
import random
|
|
|
|
import pygal
|
|
|
|
from pygal.style import Style
|
2016-07-26 16:35:46 +02:00
|
|
|
import pandas
|
2016-07-13 15:24:36 +02:00
|
|
|
from datetime import datetime
|
|
|
|
from datetime import timedelta
|
|
|
|
from dateutil.parser import parse
|
2016-07-26 11:05:20 +02:00
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
# ############### Errors ################
|
|
|
|
|
2016-07-26 11:05:20 +02:00
|
|
|
|
|
|
|
class DateError(Exception):
|
|
|
|
def __init__(self, value):
|
|
|
|
self.value = value
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-26 11:05:20 +02:00
|
|
|
def __str__(self):
|
|
|
|
return repr(self.value)
|
2016-07-13 15:24:36 +02:00
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
# ############### Tools ################
|
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
def buildDoubleIndex(index1, index2, datatype):
|
|
|
|
it = -1
|
|
|
|
newindex1 = []
|
|
|
|
for index in index2:
|
|
|
|
if index == 0:
|
2016-07-26 16:35:46 +02:00
|
|
|
it += 1
|
2016-07-13 15:24:36 +02:00
|
|
|
newindex1.append(index1[it])
|
2016-07-26 16:35:46 +02:00
|
|
|
arrays = [newindex1, index2]
|
2016-07-13 15:24:36 +02:00
|
|
|
tuples = list(zip(*arrays))
|
2016-07-26 16:35:46 +02:00
|
|
|
return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
|
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
def buildNewColumn(index2, column):
|
|
|
|
it = -1
|
|
|
|
newcolumn = []
|
|
|
|
for index in index2:
|
|
|
|
if index == 0:
|
2016-07-26 16:35:46 +02:00
|
|
|
it += 1
|
2016-07-13 15:24:36 +02:00
|
|
|
newcolumn.append(column[it])
|
|
|
|
return newcolumn
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
def dateInRange(datetimeTested, begin=None, end=None):
|
2016-07-26 16:35:46 +02:00
|
|
|
if begin is None:
|
|
|
|
begin = datetime(1970, 1, 1)
|
|
|
|
if end is None:
|
2016-07-13 15:24:36 +02:00
|
|
|
end = datetime.now()
|
|
|
|
return begin <= datetimeTested <= end
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
def addColumn(dataframe, columnList, columnName):
|
2016-07-26 16:35:46 +02:00
|
|
|
dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
def toDatetime(date):
|
2016-07-26 11:05:20 +02:00
|
|
|
return parse(date)
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-26 11:05:20 +02:00
|
|
|
def checkDateConsistancy(begindate, enddate, lastdate):
|
2016-07-26 16:35:46 +02:00
|
|
|
if begindate is not None and enddate is not None:
|
|
|
|
if begindate > enddate:
|
|
|
|
raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
|
|
|
|
|
|
|
|
if enddate is not None:
|
|
|
|
if toDatetime(enddate) < lastdate:
|
|
|
|
raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
|
|
|
|
|
|
|
|
if begindate is not None:
|
|
|
|
if toDatetime(begindate) > datetime.now():
|
|
|
|
raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
|
|
|
|
|
2016-07-26 11:05:20 +02:00
|
|
|
|
|
|
|
def setBegindate(begindate, lastdate):
|
|
|
|
return max(begindate, lastdate)
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-26 11:05:20 +02:00
|
|
|
def setEnddate(enddate):
|
|
|
|
return min(enddate, datetime.now())
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-26 11:05:20 +02:00
|
|
|
def getLastdate(last):
|
|
|
|
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
|
2016-07-13 15:24:36 +02:00
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
# ############### Formatting ################
|
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
def eventsListBuildFromList(filename):
|
2016-07-21 10:06:47 +02:00
|
|
|
with open(filename, 'r') as myfile:
|
2016-07-26 16:35:46 +02:00
|
|
|
s = myfile.read().replace('\n', '')
|
2016-07-13 15:24:36 +02:00
|
|
|
decoder = JSONDecoder()
|
|
|
|
s_len = len(s)
|
|
|
|
Events = []
|
|
|
|
end = 0
|
|
|
|
while end != s_len:
|
|
|
|
Event, end = decoder.raw_decode(s, idx=end)
|
|
|
|
Events.append(Event)
|
|
|
|
data = []
|
|
|
|
for e in Events:
|
2016-07-26 16:35:46 +02:00
|
|
|
data.append(pandas.DataFrame.from_dict(e, orient='index'))
|
|
|
|
Events = pandas.concat(data)
|
2016-07-13 15:24:36 +02:00
|
|
|
for it in range(Events['attribute_count'].size):
|
2016-07-26 16:35:46 +02:00
|
|
|
if Events['attribute_count'][it] is None:
|
|
|
|
Events['attribute_count'][it] = '0'
|
2016-07-13 15:24:36 +02:00
|
|
|
else:
|
2016-07-26 16:35:46 +02:00
|
|
|
Events['attribute_count'][it] = int(Events['attribute_count'][it])
|
2016-07-13 15:24:36 +02:00
|
|
|
Events = Events.set_index('id')
|
|
|
|
return Events
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
|
|
|
def eventsListBuildFromArray(jdata):
|
2016-07-13 15:24:36 +02:00
|
|
|
'''
|
|
|
|
returns a structure listing all primary events in the sample
|
|
|
|
'''
|
2016-07-26 16:35:46 +02:00
|
|
|
data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']]
|
|
|
|
events = pandas.concat(data)
|
|
|
|
events = events.set_index(['id'])
|
|
|
|
return events
|
|
|
|
|
|
|
|
|
|
|
|
def attributesListBuild(events):
|
|
|
|
attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']]
|
|
|
|
return pandas.concat(attributes)
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
|
|
|
|
def tagsListBuild(Events):
|
|
|
|
Tags = []
|
|
|
|
for Tag in Events['Tag']:
|
|
|
|
if type(Tag) is not list:
|
|
|
|
continue
|
2016-07-26 16:35:46 +02:00
|
|
|
Tags.append(pandas.DataFrame(Tag))
|
|
|
|
Tags = pandas.concat(Tags)
|
2016-07-13 15:24:36 +02:00
|
|
|
columnDate = buildNewColumn(Tags.index, Events['date'])
|
|
|
|
addColumn(Tags, columnDate, 'date')
|
|
|
|
index = buildDoubleIndex(Events.index, Tags.index, 'tag')
|
|
|
|
Tags = Tags.set_index(index)
|
|
|
|
return Tags
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
def selectInRange(Events, begin=None, end=None):
|
|
|
|
inRange = []
|
|
|
|
for i, Event in Events.iterrows():
|
|
|
|
if dateInRange(parse(Event['date']), begin, end):
|
|
|
|
inRange.append(Event.tolist())
|
2016-07-26 16:35:46 +02:00
|
|
|
inRange = pandas.DataFrame(inRange)
|
2016-07-13 15:24:36 +02:00
|
|
|
temp = Events.columns.tolist()
|
|
|
|
inRange.columns = temp
|
|
|
|
return inRange
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
def isTagIn(dataframe, tag):
|
2016-07-21 10:06:47 +02:00
|
|
|
temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
|
2016-07-13 15:24:36 +02:00
|
|
|
index = []
|
|
|
|
for i in range(len(temp)):
|
|
|
|
if temp[i][0] not in index:
|
|
|
|
index.append(temp[i][0])
|
|
|
|
return index
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
# ############### Basic Stats ################
|
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
def getNbitems(dataframe):
|
|
|
|
return len(dataframe.index)
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
|
|
|
def getNbAttributePerEventCategoryType(attributes):
|
|
|
|
return attributes.groupby(['event_id', 'category', 'type']).count()['id']
|
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
def getNbOccurenceTags(Tags):
|
|
|
|
return Tags.groupby('name').count()['id']
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
# ############### Charts ################
|
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
|
|
|
|
def createStyle(indexlevels):
|
|
|
|
colorsList = []
|
|
|
|
for i in range(len(indexlevels[0])):
|
|
|
|
colorsList.append("#%06X" % random.randint(0, 0xFFFFFF))
|
2016-07-26 16:35:46 +02:00
|
|
|
style = Style(background='transparent',
|
|
|
|
plot_background='#FFFFFF',
|
|
|
|
foreground='#111111',
|
|
|
|
foreground_strong='#111111',
|
|
|
|
foreground_subtle='#111111',
|
|
|
|
opacity='.6',
|
|
|
|
opacity_hover='.9',
|
|
|
|
transition='400ms ease-in',
|
|
|
|
colors=tuple(colorsList))
|
2016-07-13 15:24:36 +02:00
|
|
|
return style, colorsList
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
|
2016-07-13 15:24:36 +02:00
|
|
|
def createLabelsTreemap(indexlevels, indexlabels):
|
|
|
|
categories_levels = indexlevels[0]
|
|
|
|
cat = 0
|
|
|
|
types = []
|
|
|
|
cattypes = []
|
|
|
|
categories_labels = indexlabels[0]
|
|
|
|
types_levels = indexlevels[1]
|
|
|
|
types_labels = indexlabels[1]
|
|
|
|
|
|
|
|
for it in range(len(indexlabels[0])):
|
|
|
|
if categories_labels[it] != cat:
|
|
|
|
cattypes.append(types)
|
|
|
|
types = []
|
|
|
|
cat += 1
|
|
|
|
|
|
|
|
types.append(types_levels[types_labels[it]])
|
|
|
|
cattypes.append(types)
|
|
|
|
|
|
|
|
return categories_levels, cattypes
|
|
|
|
|
|
|
|
|
|
|
|
def createTable(data, title, tablename, colorsList):
|
2016-07-26 16:35:46 +02:00
|
|
|
if tablename is None:
|
2016-07-13 15:24:36 +02:00
|
|
|
target = open('attribute_table.html', 'w')
|
|
|
|
else:
|
|
|
|
target = open(tablename, 'w')
|
|
|
|
target.truncate()
|
|
|
|
target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
|
|
|
|
categories, types = createLabelsTreemap(data.index.levels, data.index.labels)
|
|
|
|
it = 0
|
|
|
|
|
|
|
|
for i in range(len(categories)):
|
|
|
|
table = pygal.Treemap(pretty_print=True)
|
2016-07-26 16:35:46 +02:00
|
|
|
target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colorsList[i], categories[i]))
|
2016-07-13 15:24:36 +02:00
|
|
|
for typ in types[i]:
|
|
|
|
table.add(typ, data[it])
|
|
|
|
it += 1
|
|
|
|
target.write(table.render_table(transpose=True))
|
|
|
|
target.write('\n</body>\n</html>')
|
|
|
|
target.close()
|
|
|
|
|
|
|
|
|
2016-07-26 16:35:46 +02:00
|
|
|
def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
|
2016-07-13 15:24:36 +02:00
|
|
|
style, colorsList = createStyle(data.index.levels)
|
2016-07-26 16:35:46 +02:00
|
|
|
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
|
2016-07-13 15:24:36 +02:00
|
|
|
treemap.title = title
|
|
|
|
treemap.print_values = True
|
|
|
|
treemap.print_labels = True
|
|
|
|
|
|
|
|
categories, types = createLabelsTreemap(data.index.levels, data.index.labels)
|
|
|
|
it = 0
|
|
|
|
|
|
|
|
for i in range(len(categories)):
|
|
|
|
types_labels = []
|
|
|
|
for typ in types[i]:
|
|
|
|
tempdict = {}
|
|
|
|
tempdict['label'] = typ
|
|
|
|
tempdict['value'] = data[it]
|
|
|
|
types_labels.append(tempdict)
|
|
|
|
it += 1
|
|
|
|
treemap.add(categories[i], types_labels)
|
|
|
|
|
|
|
|
createTable(data, 'Attribute Distribution', tablename, colorsList)
|
2016-07-26 16:35:46 +02:00
|
|
|
if treename is None:
|
2016-07-13 15:24:36 +02:00
|
|
|
treemap.render_to_file('attribute_treemap.svg')
|
|
|
|
else:
|
|
|
|
treemap.render_to_file(treename)
|