mirror of https://github.com/MISP/PyMISP
add tags_to_graphs.py in ecamples/situational-awareness
parent
95654e083c
commit
fa66c77cd1
|
@ -20,10 +20,10 @@
|
||||||
* tags_repartition_plot that present the raw data
|
* tags_repartition_plot that present the raw data
|
||||||
* tags_repartition_trend_plot that present the general evolution for each tag
|
* tags_repartition_trend_plot that present the general evolution for each tag
|
||||||
* Then each taxonomies will be represented in three plots:
|
* Then each taxonomies will be represented in three plots:
|
||||||
* Raw datas: in plot folder, named with the name of the corresponding taxonomy
|
* Raw datas: in "plot" folder, named with the name of the corresponding taxonomy
|
||||||
* Trend: in plot folder, named _taxonomy_\_trend. general evolution of the data (linear fitting, curve fitting at order 1)
|
* Trend: in "plot" folder, named _taxonomy_\_trend. general evolution of the data (linear fitting, curve fitting at order 1)
|
||||||
* Curve fitting: in plotlib folder, name as the taxonomy it presents.
|
* Curve fitting: in "plotlib" folder, name as the taxonomy it presents.
|
||||||
|
* In order to visualize the last plots, a html file is also generated automaticaly (might be improved in the future)
|
||||||
|
|
||||||
:warning: These scripts are not time optimised
|
:warning: These scripts are not time optimised
|
||||||
|
|
||||||
|
|
|
@ -29,11 +29,15 @@ table td
|
||||||
{
|
{
|
||||||
border-left: 1px solid #cbcbcb;
|
border-left: 1px solid #cbcbcb;
|
||||||
border-width: 0 0 0 1px;
|
border-width: 0 0 0 1px;
|
||||||
width: 150px;
|
width: 500px;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 0.5em 1em;
|
padding: 0.5em 1em;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.test
|
||||||
|
{
|
||||||
|
width: 500px;
|
||||||
|
}
|
||||||
|
|
||||||
table tr:nth-child(2n-1) td
|
table tr:nth-child(2n-1) td
|
||||||
{
|
{
|
||||||
|
|
|
@ -0,0 +1,41 @@
|
||||||
|
body
|
||||||
|
{
|
||||||
|
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
|
||||||
|
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1
|
||||||
|
{
|
||||||
|
font-size: 16px;
|
||||||
|
width: 290px;
|
||||||
|
text-align:center;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*** Stats Tables ***/
|
||||||
|
|
||||||
|
table
|
||||||
|
{
|
||||||
|
border-collapse: collapse;
|
||||||
|
border-spacing: 0;
|
||||||
|
table-layout: fixed;
|
||||||
|
width: 6000px;
|
||||||
|
border: 1px solid #cbcbcb;
|
||||||
|
}
|
||||||
|
|
||||||
|
tbody
|
||||||
|
{
|
||||||
|
font-size:12px;
|
||||||
|
}
|
||||||
|
|
||||||
|
td
|
||||||
|
{
|
||||||
|
border-left: 1px solid #cbcbcb;
|
||||||
|
border-width: 0 0 0 1px;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0.5em 1em;
|
||||||
|
}
|
||||||
|
|
||||||
|
table tr td:first-child
|
||||||
|
{
|
||||||
|
font-weight: bold;
|
||||||
|
}
|
|
@ -0,0 +1,91 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from pymisp import PyMISP
|
||||||
|
from keys import misp_url, misp_key, misp_verifycert
|
||||||
|
import argparse
|
||||||
|
import tools
|
||||||
|
|
||||||
|
|
||||||
|
def formattingDataframe(dataframe, dates, NanValue):
|
||||||
|
dataframe.reverse()
|
||||||
|
dates.reverse()
|
||||||
|
dataframe = tools.concat(dataframe)
|
||||||
|
dataframe = tools.renameColumns(dataframe, dates)
|
||||||
|
dataframe = tools.replaceNaN(dataframe, 0)
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='Show the evolution of trend of tags.')
|
||||||
|
parser.add_argument("-p", "--period", help='Define the studied period. Can be the past year (y), month (m) or week (w). Week is the default value if no valid value is given.')
|
||||||
|
parser.add_argument("-a", "--accuracy", help='Define the accuracy of the splits on the studied period. Can be per month (m) -for year only-, week (w) -month only- or day (d). The default value is always the biggest available.')
|
||||||
|
parser.add_argument("-o", "--order", type=int, help='Define the accuracy of the curve fitting. Default value is 3')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
misp = PyMISP(misp_url, misp_key, misp_verifycert)
|
||||||
|
|
||||||
|
if args.period == "y":
|
||||||
|
if args.accuracy == "d":
|
||||||
|
split = 360
|
||||||
|
size = 1
|
||||||
|
else:
|
||||||
|
split = 12
|
||||||
|
size = 30
|
||||||
|
last = '360d'
|
||||||
|
title = 'Tags repartition over the last 360 days'
|
||||||
|
elif args.period == "m":
|
||||||
|
if args.accuracy == "d":
|
||||||
|
split = 28
|
||||||
|
size = 1
|
||||||
|
else:
|
||||||
|
split = 4
|
||||||
|
size = 7
|
||||||
|
last = '28d'
|
||||||
|
title = 'Tags repartition over the last 28 days'
|
||||||
|
else:
|
||||||
|
split = 7
|
||||||
|
size = 1
|
||||||
|
last = '7d'
|
||||||
|
title = 'Tags repartition over the last 7 days'
|
||||||
|
|
||||||
|
result = misp.download_last(last)
|
||||||
|
events = tools.eventsListBuildFromArray(result)
|
||||||
|
result = []
|
||||||
|
dates = []
|
||||||
|
enddate = tools.getToday()
|
||||||
|
colourDict = {}
|
||||||
|
faketag = False
|
||||||
|
|
||||||
|
for i in range(split):
|
||||||
|
begindate = tools.getNDaysBefore(enddate, size)
|
||||||
|
dates.append(str(enddate.date()))
|
||||||
|
eventstemp = tools.selectInRange(events, begin=begindate, end=enddate)
|
||||||
|
if eventstemp is not None:
|
||||||
|
tags = tools.tagsListBuild(eventstemp)
|
||||||
|
if tags is not None:
|
||||||
|
tools.createDictTagsColour(colourDict, tags)
|
||||||
|
result.append(tools.getNbOccurenceTags(tags))
|
||||||
|
else:
|
||||||
|
result.append(tools.createFakeEmptyTagsSeries())
|
||||||
|
faketag = True
|
||||||
|
else:
|
||||||
|
result.append(tools.createFakeEmptyTagsSeries())
|
||||||
|
faketag = True
|
||||||
|
enddate = begindate
|
||||||
|
|
||||||
|
result = formattingDataframe(result, dates, 0)
|
||||||
|
if faketag:
|
||||||
|
result = tools.removeFaketagRow(result)
|
||||||
|
|
||||||
|
taxonomies, emptyOther = tools.getTaxonomies(tools.getCopyDataframe(result))
|
||||||
|
|
||||||
|
|
||||||
|
tools.tagsToLineChart(tools.getCopyDataframe(result), title, dates, colourDict)
|
||||||
|
tools.tagstrendToLineChart(tools.getCopyDataframe(result), title, dates, split, colourDict)
|
||||||
|
tools.tagsToTaxoLineChart(tools.getCopyDataframe(result), title, dates, colourDict, taxonomies, emptyOther)
|
||||||
|
tools.tagstrendToTaxoLineChart(tools.getCopyDataframe(result), title, dates, split, colourDict, taxonomies, emptyOther)
|
||||||
|
if args.order is None:
|
||||||
|
args.order = 3
|
||||||
|
tools.tagsToPolyChart(tools.getCopyDataframe(result), split, colourDict, taxonomies, emptyOther, args.order)
|
||||||
|
tools.createVisualisation(taxonomies)
|
|
@ -9,8 +9,13 @@ import pandas
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from dateutil.parser import parse
|
from dateutil.parser import parse
|
||||||
|
import numpy
|
||||||
# ############### Errors ################
|
from scipy import stats
|
||||||
|
from pytaxonomies import Taxonomies
|
||||||
|
import re
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from matplotlib import pylab
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
class DateError(Exception):
|
class DateError(Exception):
|
||||||
|
@ -20,30 +25,8 @@ class DateError(Exception):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return repr(self.value)
|
return repr(self.value)
|
||||||
|
|
||||||
# ############### Tools ################
|
|
||||||
|
|
||||||
|
|
||||||
def buildDoubleIndex(index1, index2, datatype):
|
|
||||||
it = -1
|
|
||||||
newindex1 = []
|
|
||||||
for index in index2:
|
|
||||||
if index == 0:
|
|
||||||
it += 1
|
|
||||||
newindex1.append(index1[it])
|
|
||||||
arrays = [newindex1, index2]
|
|
||||||
tuples = list(zip(*arrays))
|
|
||||||
return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
|
|
||||||
|
|
||||||
|
|
||||||
def buildNewColumn(index2, column):
|
|
||||||
it = -1
|
|
||||||
newcolumn = []
|
|
||||||
for index in index2:
|
|
||||||
if index == 0:
|
|
||||||
it += 1
|
|
||||||
newcolumn.append(column[it])
|
|
||||||
return newcolumn
|
|
||||||
|
|
||||||
|
# ############### Date Tools ################
|
||||||
|
|
||||||
def dateInRange(datetimeTested, begin=None, end=None):
|
def dateInRange(datetimeTested, begin=None, end=None):
|
||||||
if begin is None:
|
if begin is None:
|
||||||
|
@ -53,10 +36,6 @@ def dateInRange(datetimeTested, begin=None, end=None):
|
||||||
return begin <= datetimeTested <= end
|
return begin <= datetimeTested <= end
|
||||||
|
|
||||||
|
|
||||||
def addColumn(dataframe, columnList, columnName):
|
|
||||||
dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
|
|
||||||
|
|
||||||
|
|
||||||
def toDatetime(date):
|
def toDatetime(date):
|
||||||
return parse(date)
|
return parse(date)
|
||||||
|
|
||||||
|
@ -86,6 +65,115 @@ def setEnddate(enddate):
|
||||||
def getLastdate(last):
|
def getLastdate(last):
|
||||||
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
|
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
|
||||||
|
def getNDaysBefore(date, days):
|
||||||
|
return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
|
||||||
|
def getToday():
|
||||||
|
return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
|
|
||||||
|
# ############### Tools ################
|
||||||
|
|
||||||
|
|
||||||
|
def getTaxonomies(dataframe):
|
||||||
|
taxonomies = Taxonomies()
|
||||||
|
taxonomies = list(taxonomies.keys())
|
||||||
|
notInTaxo = []
|
||||||
|
count = 0
|
||||||
|
for taxonomy in taxonomies:
|
||||||
|
empty = True
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
if it[0].startswith(taxonomy):
|
||||||
|
empty = False
|
||||||
|
dataframe = dataframe.drop([it[0]])
|
||||||
|
count = count + 1
|
||||||
|
if empty is True:
|
||||||
|
notInTaxo.append(taxonomy)
|
||||||
|
if dataframe.empty:
|
||||||
|
emptyOther = True
|
||||||
|
else:
|
||||||
|
emptyOther = False
|
||||||
|
for taxonomy in notInTaxo:
|
||||||
|
taxonomies.remove(taxonomy)
|
||||||
|
return taxonomies, emptyOther
|
||||||
|
|
||||||
|
|
||||||
|
def buildDoubleIndex(index1, index2, datatype):
|
||||||
|
it = -1
|
||||||
|
newindex1 = []
|
||||||
|
for index in index2:
|
||||||
|
if index == 0:
|
||||||
|
it += 1
|
||||||
|
newindex1.append(index1[it])
|
||||||
|
arrays = [newindex1, index2]
|
||||||
|
tuples = list(zip(*arrays))
|
||||||
|
return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
|
||||||
|
|
||||||
|
|
||||||
|
def buildNewColumn(index2, column):
|
||||||
|
it = -1
|
||||||
|
newcolumn = []
|
||||||
|
for index in index2:
|
||||||
|
if index == 0:
|
||||||
|
it += 1
|
||||||
|
newcolumn.append(column[it])
|
||||||
|
return newcolumn
|
||||||
|
|
||||||
|
|
||||||
|
def addColumn(dataframe, columnList, columnName):
|
||||||
|
dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
|
||||||
|
|
||||||
|
|
||||||
|
def concat(data):
|
||||||
|
return pandas.concat(data, axis=1)
|
||||||
|
|
||||||
|
|
||||||
|
def createFakeEmptyTagsSeries():
|
||||||
|
return pandas.Series({'Faketag': 0})
|
||||||
|
|
||||||
|
|
||||||
|
def removeFaketagRow(dataframe):
|
||||||
|
return dataframe.drop(['Faketag'])
|
||||||
|
|
||||||
|
|
||||||
|
def getCopyDataframe(dataframe):
|
||||||
|
return dataframe.copy()
|
||||||
|
|
||||||
|
|
||||||
|
def createDictTagsColour(colourDict, tags):
|
||||||
|
temp = tags.groupby(['name', 'colour']).count()['id']
|
||||||
|
levels_name = temp.index.levels[0]
|
||||||
|
levels_colour = temp.index.levels[1]
|
||||||
|
labels_name = temp.index.labels[0]
|
||||||
|
labels_colour = temp.index.labels[1]
|
||||||
|
|
||||||
|
for i in range(len(labels_name)):
|
||||||
|
colourDict[levels_name[labels_name[i]]] = levels_colour[labels_colour[i]]
|
||||||
|
|
||||||
|
|
||||||
|
def createTagsPlotStyle(dataframe, colourDict, taxonomy=None):
|
||||||
|
colours = []
|
||||||
|
if taxonomy is not None:
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
if it[0].startswith(taxonomy):
|
||||||
|
colours.append(colourDict[it[0]])
|
||||||
|
else:
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
colours.append(colourDict[it[0]])
|
||||||
|
|
||||||
|
style = Style(background='transparent',
|
||||||
|
plot_background='#eeeeee',
|
||||||
|
foreground='#111111',
|
||||||
|
foreground_strong='#111111',
|
||||||
|
foreground_subtle='#111111',
|
||||||
|
opacity='.6',
|
||||||
|
opacity_hover='.9',
|
||||||
|
transition='400ms ease-in',
|
||||||
|
colors=tuple(colours))
|
||||||
|
return style
|
||||||
|
|
||||||
# ############### Formatting ################
|
# ############### Formatting ################
|
||||||
|
|
||||||
|
|
||||||
|
@ -129,15 +217,19 @@ def attributesListBuild(events):
|
||||||
|
|
||||||
def tagsListBuild(Events):
|
def tagsListBuild(Events):
|
||||||
Tags = []
|
Tags = []
|
||||||
|
if 'Tag' in Events.columns:
|
||||||
for Tag in Events['Tag']:
|
for Tag in Events['Tag']:
|
||||||
if type(Tag) is not list:
|
if type(Tag) is not list:
|
||||||
continue
|
continue
|
||||||
Tags.append(pandas.DataFrame(Tag))
|
Tags.append(pandas.DataFrame(Tag))
|
||||||
|
if Tags:
|
||||||
Tags = pandas.concat(Tags)
|
Tags = pandas.concat(Tags)
|
||||||
columnDate = buildNewColumn(Tags.index, Events['date'])
|
columnDate = buildNewColumn(Tags.index, Events['date'])
|
||||||
addColumn(Tags, columnDate, 'date')
|
addColumn(Tags, columnDate, 'date')
|
||||||
index = buildDoubleIndex(Events.index, Tags.index, 'tag')
|
index = buildDoubleIndex(Events.index, Tags.index, 'tag')
|
||||||
Tags = Tags.set_index(index)
|
Tags = Tags.set_index(index)
|
||||||
|
else:
|
||||||
|
Tags = None
|
||||||
return Tags
|
return Tags
|
||||||
|
|
||||||
|
|
||||||
|
@ -148,6 +240,8 @@ def selectInRange(Events, begin=None, end=None):
|
||||||
inRange.append(Event.tolist())
|
inRange.append(Event.tolist())
|
||||||
inRange = pandas.DataFrame(inRange)
|
inRange = pandas.DataFrame(inRange)
|
||||||
temp = Events.columns.tolist()
|
temp = Events.columns.tolist()
|
||||||
|
if inRange.empty:
|
||||||
|
return None
|
||||||
inRange.columns = temp
|
inRange.columns = temp
|
||||||
return inRange
|
return inRange
|
||||||
|
|
||||||
|
@ -160,6 +254,15 @@ def isTagIn(dataframe, tag):
|
||||||
index.append(temp[i][0])
|
index.append(temp[i][0])
|
||||||
return index
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def renameColumns(dataframe, namelist):
|
||||||
|
dataframe.columns = namelist
|
||||||
|
return dataframe
|
||||||
|
|
||||||
|
|
||||||
|
def replaceNaN(dataframe, value):
|
||||||
|
return dataframe.fillna(value)
|
||||||
|
|
||||||
# ############### Basic Stats ################
|
# ############### Basic Stats ################
|
||||||
|
|
||||||
|
|
||||||
|
@ -212,7 +315,7 @@ def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attr
|
||||||
transition='400ms ease-in',
|
transition='400ms ease-in',
|
||||||
colors=tuple(colors.values()))
|
colors=tuple(colors.values()))
|
||||||
|
|
||||||
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style, explicit_size=True, width=2048, height=2048)
|
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
|
||||||
treemap.title = title
|
treemap.title = title
|
||||||
treemap.print_values = True
|
treemap.print_values = True
|
||||||
treemap.print_labels = True
|
treemap.print_labels = True
|
||||||
|
@ -222,3 +325,171 @@ def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attr
|
||||||
|
|
||||||
createTable(colors, categ_types_hash)
|
createTable(colors, categ_types_hash)
|
||||||
treemap.render_to_file(treename)
|
treemap.render_to_file(treename)
|
||||||
|
|
||||||
|
|
||||||
|
def tagsToLineChart(dataframe, title, dates, colourDict):
|
||||||
|
style = createTagsPlotStyle(dataframe, colourDict)
|
||||||
|
line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)
|
||||||
|
line_chart.title = title
|
||||||
|
line_chart.x_labels = dates
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
line_chart.add(it[0], it[1].tolist())
|
||||||
|
line_chart.render_to_file('tags_repartition_plot.svg')
|
||||||
|
|
||||||
|
|
||||||
|
def tagstrendToLineChart(dataframe, title, dates, split, colourDict):
|
||||||
|
style = createTagsPlotStyle(dataframe, colourDict)
|
||||||
|
line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)
|
||||||
|
line_chart.title = title
|
||||||
|
line_chart.x_labels = dates
|
||||||
|
xi = numpy.arange(split)
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
|
||||||
|
line = slope * xi + intercept
|
||||||
|
line_chart.add(it[0], line, show_dots=False)
|
||||||
|
line_chart.render_to_file('tags_repartition_trend_plot.svg')
|
||||||
|
|
||||||
|
|
||||||
|
def tagsToTaxoLineChart(dataframe, title, dates, colourDict, taxonomies, emptyOther):
|
||||||
|
style = createTagsPlotStyle(dataframe, colourDict)
|
||||||
|
line_chart = pygal.Line(x_label_rotation=20, style=style)
|
||||||
|
line_chart.title = title
|
||||||
|
line_chart.x_labels = dates
|
||||||
|
for taxonomy in taxonomies:
|
||||||
|
taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
|
||||||
|
taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
|
||||||
|
taxo_line_chart.title = title + ': ' + taxonomy
|
||||||
|
taxo_line_chart.x_labels = dates
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
if it[0].startswith(taxonomy):
|
||||||
|
taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), it[1].tolist())
|
||||||
|
dataframe = dataframe.drop([it[0]])
|
||||||
|
taxo_line_chart.render_to_file('plot/' + taxonomy + '.svg')
|
||||||
|
|
||||||
|
if not emptyOther:
|
||||||
|
taxoStyle = createTagsPlotStyle(dataframe, colourDict)
|
||||||
|
taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
|
||||||
|
taxo_line_chart.title = title + ': other'
|
||||||
|
taxo_line_chart.x_labels = dates
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
taxo_line_chart.add(it[0], it[1].tolist())
|
||||||
|
taxo_line_chart.render_to_file('plot/other.svg')
|
||||||
|
|
||||||
|
|
||||||
|
def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther):
|
||||||
|
style = createTagsPlotStyle(dataframe, colourDict)
|
||||||
|
line_chart = pygal.Line(x_label_rotation=20, style=style)
|
||||||
|
line_chart.title = title
|
||||||
|
line_chart.x_labels = dates
|
||||||
|
xi = numpy.arange(split)
|
||||||
|
for taxonomy in taxonomies:
|
||||||
|
taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
|
||||||
|
taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
|
||||||
|
taxo_line_chart.title = title + ': ' + taxonomy
|
||||||
|
taxo_line_chart.x_labels = dates
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
if it[0].startswith(taxonomy):
|
||||||
|
slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
|
||||||
|
line = slope * xi + intercept
|
||||||
|
taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False)
|
||||||
|
dataframe = dataframe.drop([it[0]])
|
||||||
|
taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg')
|
||||||
|
|
||||||
|
if not emptyOther:
|
||||||
|
taxoStyle = createTagsPlotStyle(dataframe, colourDict)
|
||||||
|
taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
|
||||||
|
taxo_line_chart.title = title + ': other'
|
||||||
|
taxo_line_chart.x_labels = dates
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
|
||||||
|
line = slope * xi + intercept
|
||||||
|
taxo_line_chart.add(it[0], line, show_dots=False)
|
||||||
|
taxo_line_chart.render_to_file('plot/other_trend.svg')
|
||||||
|
|
||||||
|
|
||||||
|
def tagsToPolyChart(dataframe, split, colourDict, taxonomies, emptyOther, order):
|
||||||
|
for taxonomy in taxonomies:
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
if it[0].startswith(taxonomy):
|
||||||
|
points = []
|
||||||
|
for i in range(split):
|
||||||
|
points.append((i, it[1][i]))
|
||||||
|
color = colourDict[it[0]]
|
||||||
|
label = re.sub(taxonomy + ':', '', it[0])
|
||||||
|
points = numpy.array(points)
|
||||||
|
dataframe = dataframe.drop([it[0]])
|
||||||
|
|
||||||
|
# get x and y vectors
|
||||||
|
x = points[:, 0]
|
||||||
|
y = points[:, 1]
|
||||||
|
|
||||||
|
# calculate polynomial
|
||||||
|
z = numpy.polyfit(x, y, order)
|
||||||
|
f = numpy.poly1d(z)
|
||||||
|
|
||||||
|
# calculate new x's and y's
|
||||||
|
x_new = numpy.linspace(x[0], x[-1], 50)
|
||||||
|
y_new = f(x_new)
|
||||||
|
|
||||||
|
plt.plot(x, y, '.', color=color)
|
||||||
|
plt.plot(x_new, y_new, color=color, label=label + 'trend')
|
||||||
|
|
||||||
|
pylab.title('Polynomial Fit with Matplotlib: ' + taxonomy)
|
||||||
|
pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||||
|
ax = plt.gca()
|
||||||
|
ax.set_facecolor((0.898, 0.898, 0.898))
|
||||||
|
box = ax.get_position()
|
||||||
|
ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(20, 15)
|
||||||
|
fig.savefig('plotlib/' + taxonomy + '.png')
|
||||||
|
fig.clf()
|
||||||
|
|
||||||
|
if not emptyOther:
|
||||||
|
for it in dataframe.iterrows():
|
||||||
|
points = []
|
||||||
|
for i in range(split):
|
||||||
|
points.append((i, it[1][i]))
|
||||||
|
|
||||||
|
color = colourDict[it[0]]
|
||||||
|
label = it[0]
|
||||||
|
points = numpy.array(points)
|
||||||
|
|
||||||
|
# get x and y vectors
|
||||||
|
x = points[:, 0]
|
||||||
|
y = points[:, 1]
|
||||||
|
|
||||||
|
# calculate polynomial
|
||||||
|
z = numpy.polyfit(x, y, order)
|
||||||
|
f = numpy.poly1d(z)
|
||||||
|
|
||||||
|
# calculate new x's and y's
|
||||||
|
x_new = numpy.linspace(x[0], x[-1], 50)
|
||||||
|
y_new = f(x_new)
|
||||||
|
|
||||||
|
plt.plot(x, y, '.', color=color, label=label)
|
||||||
|
plt.plot(x_new, y_new, color=color, label=label + 'trend')
|
||||||
|
|
||||||
|
pylab.title('Polynomial Fit with Matplotlib: other')
|
||||||
|
pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))
|
||||||
|
ax = plt.gca()
|
||||||
|
ax.set_facecolor((0.898, 0.898, 0.898))
|
||||||
|
box = ax.get_position()
|
||||||
|
ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])
|
||||||
|
fig = plt.gcf()
|
||||||
|
fig.set_size_inches(20, 15)
|
||||||
|
fig.savefig('plotlib/other.png')
|
||||||
|
|
||||||
|
|
||||||
|
def createVisualisation(taxonomies):
|
||||||
|
chain = '<!DOCTYPE html>\n<html>\n\t<head>\n\t\t<link rel="stylesheet" href="style2.css">\n\t</head>\n\t<body>'
|
||||||
|
chain = chain + '<table>'
|
||||||
|
for taxonomy in taxonomies:
|
||||||
|
chain = chain + '<tr><td><object type="image/svg+xml" data="plot\\' + taxonomy + '.svg"></object></td><td><img src="plotlib\\' + taxonomy + '.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\\' + taxonomy + '_trend.svg"></object></td></tr>\n'
|
||||||
|
|
||||||
|
chain = chain + '<tr><td><object type="image/svg+xml" data="plot\other.svg"></object></td><td><img src="plotlib\other.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\other_trend.svg"></object></td></tr>\n'
|
||||||
|
chain = chain + '</table>'
|
||||||
|
chain = chain + '\n\t</body>\n</html>'
|
||||||
|
|
||||||
|
with open('test_tags_trend.html', 'w') as target:
|
||||||
|
target.write(chain)
|
||||||
|
|
Loading…
Reference in New Issue