#!/usr/bin/env python # -*- coding: utf-8 -*- from json import JSONDecoder import pygal from pygal.style import Style import pandas import numpy from scipy import stats from pytaxonomies import Taxonomies import re import matplotlib.pyplot as plt from matplotlib import pylab import os import date_tools from dateutil.parser import parse # ############### Tools ################ def selectInRange(Events, begin=None, end=None): inRange = [] for i, Event in Events.iterrows(): if date_tools.dateInRange(parse(Event['date']), begin, end): inRange.append(Event.tolist()) inRange = pandas.DataFrame(inRange) temp = Events.columns.tolist() if inRange.empty: return None inRange.columns = temp return inRange def getTaxonomies(dataframe): taxonomies = Taxonomies() taxonomies = list(taxonomies.keys()) notInTaxo = [] count = 0 for taxonomy in taxonomies: empty = True for it in dataframe.iterrows(): if it[0].startswith(taxonomy): empty = False dataframe = dataframe.drop([it[0]]) count = count + 1 if empty is True: notInTaxo.append(taxonomy) if dataframe.empty: emptyOther = True else: emptyOther = False for taxonomy in notInTaxo: taxonomies.remove(taxonomy) return taxonomies, emptyOther def buildDoubleIndex(index1, index2, datatype): it = -1 newindex1 = [] for index in index2: if index == 0: it += 1 newindex1.append(index1[it]) arrays = [newindex1, index2] tuples = list(zip(*arrays)) return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype]) def buildNewColumn(index2, column): it = -1 newcolumn = [] for index in index2: if index == 0: it += 1 newcolumn.append(column[it]) return newcolumn def addColumn(dataframe, columnList, columnName): dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index) def concat(data): return pandas.concat(data, axis=1) def createFakeEmptyTagsSeries(): return pandas.Series({'Faketag': 0}) def removeFaketagRow(dataframe): return dataframe.drop(['Faketag']) def getCopyDataframe(dataframe): return dataframe.copy() def createDictTagsColour(colourDict, tags): temp = tags.groupby(['name', 'colour']).count()['id'] levels_name = temp.index.levels[0] levels_colour = temp.index.levels[1] labels_name = temp.index.labels[0] labels_colour = temp.index.labels[1] for i in range(len(labels_name)): colourDict[levels_name[labels_name[i]]] = levels_colour[labels_colour[i]] def createTagsPlotStyle(dataframe, colourDict, taxonomy=None): colours = [] if taxonomy is not None: for it in dataframe.iterrows(): if it[0].startswith(taxonomy): colours.append(colourDict[it[0]]) else: for it in dataframe.iterrows(): colours.append(colourDict[it[0]]) style = Style(background='transparent', plot_background='#eeeeee', foreground='#111111', foreground_strong='#111111', foreground_subtle='#111111', opacity='.6', opacity_hover='.9', transition='400ms ease-in', colors=tuple(colours)) return style # ############### Formatting ################ def eventsListBuildFromList(filename): with open(filename, 'r') as myfile: s = myfile.read().replace('\n', '') decoder = JSONDecoder() s_len = len(s) Events = [] end = 0 while end != s_len: Event, end = decoder.raw_decode(s, idx=end) Events.append(Event) data = [] for e in Events: data.append(pandas.DataFrame.from_dict(e, orient='index')) Events = pandas.concat(data) for it in range(Events['attribute_count'].size): if Events['attribute_count'][it] is None: Events['attribute_count'][it] = '0' else: Events['attribute_count'][it] = int(Events['attribute_count'][it]) Events = Events.set_index('id') return Events def eventsListBuildFromArray(jdata): ''' returns a structure listing all primary events in the sample ''' data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']] events = pandas.concat(data) events = events.set_index(['id']) return events def attributesListBuild(events): attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']] return pandas.concat(attributes) def tagsListBuild(Events): Tags = [] if 'Tag' in Events.columns: for Tag in Events['Tag']: if type(Tag) is not list: continue Tags.append(pandas.DataFrame(Tag)) if Tags: Tags = pandas.concat(Tags) columnDate = buildNewColumn(Tags.index, Events['date']) addColumn(Tags, columnDate, 'date') index = buildDoubleIndex(Events.index, Tags.index, 'tag') Tags = Tags.set_index(index) else: Tags = None return Tags def isTagIn(dataframe, tag): temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist() index = [] for i in range(len(temp)): if temp[i][0] not in index: index.append(temp[i][0]) return index def renameColumns(dataframe, namelist): dataframe.columns = namelist return dataframe def replaceNaN(dataframe, value): return dataframe.fillna(value) # ############### Basic Stats ################ def getNbitems(dataframe): return len(dataframe.index) def getNbAttributePerEventCategoryType(attributes): return attributes.groupby(['event_id', 'category', 'type']).count()['id'] def getNbOccurenceTags(Tags): return Tags.groupby('name').count()['id'] # ############### Charts ################ def tagsToLineChart(dataframe, title, dates, colourDict): style = createTagsPlotStyle(dataframe, colourDict) line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False) line_chart.title = title line_chart.x_labels = dates for it in dataframe.iterrows(): line_chart.add(it[0], it[1].tolist()) line_chart.render_to_file('tags_repartition_plot.svg') def tagstrendToLineChart(dataframe, title, dates, split, colourDict): style = createTagsPlotStyle(dataframe, colourDict) line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False) line_chart.title = title line_chart.x_labels = dates xi = numpy.arange(split) for it in dataframe.iterrows(): slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) line = slope * xi + intercept line_chart.add(it[0], line, show_dots=False) line_chart.render_to_file('tags_repartition_trend_plot.svg') def tagsToTaxoLineChart(dataframe, title, dates, colourDict, taxonomies, emptyOther): style = createTagsPlotStyle(dataframe, colourDict) line_chart = pygal.Line(x_label_rotation=20, style=style) line_chart.title = title line_chart.x_labels = dates for taxonomy in taxonomies: taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy) taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) taxo_line_chart.title = title + ': ' + taxonomy taxo_line_chart.x_labels = dates for it in dataframe.iterrows(): if it[0].startswith(taxonomy): taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), it[1].tolist()) dataframe = dataframe.drop([it[0]]) taxo_line_chart.render_to_file('plot/' + taxonomy + '.svg') if not emptyOther: taxoStyle = createTagsPlotStyle(dataframe, colourDict) taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) taxo_line_chart.title = title + ': other' taxo_line_chart.x_labels = dates for it in dataframe.iterrows(): taxo_line_chart.add(it[0], it[1].tolist()) taxo_line_chart.render_to_file('plot/other.svg') def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther): style = createTagsPlotStyle(dataframe, colourDict) line_chart = pygal.Line(x_label_rotation=20, style=style) line_chart.title = title line_chart.x_labels = dates xi = numpy.arange(split) for taxonomy in taxonomies: taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy) taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) taxo_line_chart.title = title + ': ' + taxonomy taxo_line_chart.x_labels = dates for it in dataframe.iterrows(): if it[0].startswith(taxonomy): slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) line = slope * xi + intercept taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False) dataframe = dataframe.drop([it[0]]) taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg') if not emptyOther: taxoStyle = createTagsPlotStyle(dataframe, colourDict) taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) taxo_line_chart.title = title + ': other' taxo_line_chart.x_labels = dates for it in dataframe.iterrows(): slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) line = slope * xi + intercept taxo_line_chart.add(it[0], line, show_dots=False) taxo_line_chart.render_to_file('plot/other_trend.svg') def tagsToPolyChart(dataframe, split, colourDict, taxonomies, emptyOther, order): for taxonomy in taxonomies: for it in dataframe.iterrows(): if it[0].startswith(taxonomy): points = [] for i in range(split): points.append((i, it[1][i])) color = colourDict[it[0]] label = re.sub(taxonomy + ':', '', it[0]) points = numpy.array(points) dataframe = dataframe.drop([it[0]]) # get x and y vectors x = points[:, 0] y = points[:, 1] # calculate polynomial z = numpy.polyfit(x, y, order) f = numpy.poly1d(z) # calculate new x's and y's x_new = numpy.linspace(x[0], x[-1], 50) y_new = f(x_new) plt.plot(x, y, '.', color=color) plt.plot(x_new, y_new, color=color, label=label + 'trend') pylab.title('Polynomial Fit with Matplotlib: ' + taxonomy) pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5)) ax = plt.gca() # ax.set_facecolor((0.898, 0.898, 0.898)) box = ax.get_position() ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height]) fig = plt.gcf() fig.set_size_inches(20, 15) fig.savefig('plotlib/' + taxonomy + '.png') fig.clf() if not emptyOther: for it in dataframe.iterrows(): points = [] for i in range(split): points.append((i, it[1][i])) color = colourDict[it[0]] label = it[0] points = numpy.array(points) # get x and y vectors x = points[:, 0] y = points[:, 1] # calculate polynomial z = numpy.polyfit(x, y, order) f = numpy.poly1d(z) # calculate new x's and y's x_new = numpy.linspace(x[0], x[-1], 50) y_new = f(x_new) plt.plot(x, y, '.', color=color, label=label) plt.plot(x_new, y_new, color=color, label=label + 'trend') pylab.title('Polynomial Fit with Matplotlib: other') pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5)) ax = plt.gca() #cax.set_facecolor((0.898, 0.898, 0.898)) box = ax.get_position() ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height]) fig = plt.gcf() fig.set_size_inches(20, 15) fig.savefig('plotlib/other.png') def createVisualisation(taxonomies): chain = '\n\n\t\n\t\t\n\t\n\t' chain = chain + '' for taxonomy in taxonomies: chain = chain + '\n' chain = chain + '\n' chain = chain + '
graph
graph
' chain = chain + '\n\t\n' with open('test_tags_trend.html', 'w') as target: target.write(chain)