PyMISP/examples/situational-awareness/tools.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from json import JSONDecoder
import pygal
from pygal.style import Style
import pandas
import numpy
from scipy import stats
from pytaxonomies import Taxonomies
import re
import matplotlib.pyplot as plt
from matplotlib import pylab
import os
import date_tools
from dateutil.parser import parse

# ############### Tools ################


def selectInRange(Events, begin=None, end=None):
    inRange = []
    for i, Event in Events.iterrows():
        if date_tools.dateInRange(parse(Event['date']), begin, end):
            inRange.append(Event.tolist())
    inRange = pandas.DataFrame(inRange)
    temp = Events.columns.tolist()
    if inRange.empty:
        return None
    inRange.columns = temp
    return inRange


def getTaxonomies(dataframe):
    taxonomies = Taxonomies()
    taxonomies = list(taxonomies.keys())
    notInTaxo = []
    count = 0
    for taxonomy in taxonomies:
        empty = True
        for it in dataframe.iterrows():
            if it[0].startswith(taxonomy):
                empty = False
                dataframe = dataframe.drop([it[0]])
                count = count + 1
        if empty is True:
            notInTaxo.append(taxonomy)
    if dataframe.empty:
        emptyOther = True
    else:
        emptyOther = False
    for taxonomy in notInTaxo:
        taxonomies.remove(taxonomy)
    return taxonomies, emptyOther


def buildDoubleIndex(index1, index2, datatype):
    it = -1
    newindex1 = []
    for index in index2:
        if index == 0:
            it += 1
        newindex1.append(index1[it])
    arrays = [newindex1, index2]
    tuples = list(zip(*arrays))
    return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])


def buildNewColumn(index2, column):
    it = -1
    newcolumn = []
    for index in index2:
        if index == 0:
            it += 1
        newcolumn.append(column[it])
    return newcolumn


def addColumn(dataframe, columnList, columnName):
    dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)


def concat(data):
    return pandas.concat(data, axis=1)


def createFakeEmptyTagsSeries():
    return pandas.Series({'Faketag': 0})


def removeFaketagRow(dataframe):
    return dataframe.drop(['Faketag'])


def getCopyDataframe(dataframe):
    return dataframe.copy()


def createDictTagsColour(colourDict, tags):
    temp = tags.groupby(['name', 'colour']).count()['id']
    levels_name = temp.index.levels[0]
    levels_colour = temp.index.levels[1]
    labels_name = temp.index.labels[0]
    labels_colour = temp.index.labels[1]

    for i in range(len(labels_name)):
        colourDict[levels_name[labels_name[i]]] = levels_colour[labels_colour[i]]


def createTagsPlotStyle(dataframe, colourDict, taxonomy=None):
    colours = []
    if taxonomy is not None:
        for it in dataframe.iterrows():
            if it[0].startswith(taxonomy):
                colours.append(colourDict[it[0]])
    else:
        for it in dataframe.iterrows():
            colours.append(colourDict[it[0]])

    style = Style(background='transparent',
                  plot_background='#eeeeee',
                  foreground='#111111',
                  foreground_strong='#111111',
                  foreground_subtle='#111111',
                  opacity='.6',
                  opacity_hover='.9',
                  transition='400ms ease-in',
                  colors=tuple(colours))
    return style

# ############### Formatting  ################


def eventsListBuildFromList(filename):
    with open(filename, 'r') as myfile:
        s = myfile.read().replace('\n', '')
    decoder = JSONDecoder()
    s_len = len(s)
    Events = []
    end = 0
    while end != s_len:
        Event, end = decoder.raw_decode(s, idx=end)
        Events.append(Event)
    data = []
    for e in Events:
        data.append(pandas.DataFrame.from_dict(e, orient='index'))
    Events = pandas.concat(data)
    for it in range(Events['attribute_count'].size):
        if Events['attribute_count'][it] is None:
            Events['attribute_count'][it] = '0'
        else:
            Events['attribute_count'][it] = int(Events['attribute_count'][it])
    Events = Events.set_index('id')
    return Events


def eventsListBuildFromArray(jdata):
    '''
    returns a structure listing all primary events in the sample
    '''
    data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']]
    events = pandas.concat(data)
    events = events.set_index(['id'])
    return events


def attributesListBuild(events):
    attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']]
    return pandas.concat(attributes)


def tagsListBuild(Events):
    Tags = []
    if 'Tag' in Events.columns:
        for Tag in Events['Tag']:
            if type(Tag) is not list:
                continue
            Tags.append(pandas.DataFrame(Tag))
    if Tags:
        Tags = pandas.concat(Tags)
        columnDate = buildNewColumn(Tags.index, Events['date'])
        addColumn(Tags, columnDate, 'date')
        index = buildDoubleIndex(Events.index, Tags.index, 'tag')
        Tags = Tags.set_index(index)
    else:
        Tags = None
    return Tags


def isTagIn(dataframe, tag):
    temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
    index = []
    for i in range(len(temp)):
        if temp[i][0] not in index:
            index.append(temp[i][0])
    return index


def renameColumns(dataframe, namelist):
    dataframe.columns = namelist
    return dataframe


def replaceNaN(dataframe, value):
    return dataframe.fillna(value)

# ############### Basic Stats ################


def getNbitems(dataframe):
        return len(dataframe.index)


def getNbAttributePerEventCategoryType(attributes):
    return attributes.groupby(['event_id', 'category', 'type']).count()['id']


def getNbOccurenceTags(Tags):
        return Tags.groupby('name').count()['id']


# ############### Charts ################


def tagsToLineChart(dataframe, title, dates, colourDict):
    style = createTagsPlotStyle(dataframe, colourDict)
    line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)
    line_chart.title = title
    line_chart.x_labels = dates
    for it in dataframe.iterrows():
        line_chart.add(it[0], it[1].tolist())
    line_chart.render_to_file('tags_repartition_plot.svg')


def tagstrendToLineChart(dataframe, title, dates, split, colourDict):
    style = createTagsPlotStyle(dataframe, colourDict)
    line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)
    line_chart.title = title
    line_chart.x_labels = dates
    xi = numpy.arange(split)
    for it in dataframe.iterrows():
        slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
        line = slope * xi + intercept
        line_chart.add(it[0], line, show_dots=False)
    line_chart.render_to_file('tags_repartition_trend_plot.svg')


def tagsToTaxoLineChart(dataframe, title, dates, colourDict, taxonomies, emptyOther):
    style = createTagsPlotStyle(dataframe, colourDict)
    line_chart = pygal.Line(x_label_rotation=20, style=style)
    line_chart.title = title
    line_chart.x_labels = dates
    for taxonomy in taxonomies:
        taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
        taxo_line_chart.title = title + ': ' + taxonomy
        taxo_line_chart.x_labels = dates
        for it in dataframe.iterrows():
            if it[0].startswith(taxonomy):
                taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), it[1].tolist())
                dataframe = dataframe.drop([it[0]])
        taxo_line_chart.render_to_file('plot/' + taxonomy + '.svg')

    if not emptyOther:
        taxoStyle = createTagsPlotStyle(dataframe, colourDict)
        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
        taxo_line_chart.title = title + ': other'
        taxo_line_chart.x_labels = dates
        for it in dataframe.iterrows():
            taxo_line_chart.add(it[0], it[1].tolist())
        taxo_line_chart.render_to_file('plot/other.svg')


def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther):
    style = createTagsPlotStyle(dataframe, colourDict)
    line_chart = pygal.Line(x_label_rotation=20, style=style)
    line_chart.title = title
    line_chart.x_labels = dates
    xi = numpy.arange(split)
    for taxonomy in taxonomies:
        taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
        taxo_line_chart.title = title + ': ' + taxonomy
        taxo_line_chart.x_labels = dates
        for it in dataframe.iterrows():
            if it[0].startswith(taxonomy):
                slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
                line = slope * xi + intercept
                taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False)
                dataframe = dataframe.drop([it[0]])
        taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg')

    if not emptyOther:
        taxoStyle = createTagsPlotStyle(dataframe, colourDict)
        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
        taxo_line_chart.title = title + ': other'
        taxo_line_chart.x_labels = dates
        for it in dataframe.iterrows():
            slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
            line = slope * xi + intercept
            taxo_line_chart.add(it[0], line, show_dots=False)
        taxo_line_chart.render_to_file('plot/other_trend.svg')


def tagsToPolyChart(dataframe, split, colourDict, taxonomies, emptyOther, order):
    for taxonomy in taxonomies:
        for it in dataframe.iterrows():
            if it[0].startswith(taxonomy):
                points = []
                for i in range(split):
                    points.append((i, it[1][i]))
                color = colourDict[it[0]]
                label = re.sub(taxonomy + ':', '', it[0])
                points = numpy.array(points)
                dataframe = dataframe.drop([it[0]])

                # get x and y vectors
                x = points[:, 0]
                y = points[:, 1]

                # calculate polynomial
                z = numpy.polyfit(x, y, order)
                f = numpy.poly1d(z)

                # calculate new x's and y's
                x_new = numpy.linspace(x[0], x[-1], 50)
                y_new = f(x_new)

                plt.plot(x, y, '.', color=color)
                plt.plot(x_new, y_new, color=color, label=label + 'trend')

        pylab.title('Polynomial Fit with Matplotlib: ' + taxonomy)
        pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax = plt.gca()
        # ax.set_facecolor((0.898, 0.898, 0.898))
        box = ax.get_position()
        ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])
        fig = plt.gcf()
        fig.set_size_inches(20, 15)
        fig.savefig('plotlib/' + taxonomy + '.png')
        fig.clf()

    if not emptyOther:
        for it in dataframe.iterrows():
            points = []
            for i in range(split):
                points.append((i, it[1][i]))

            color = colourDict[it[0]]
            label = it[0]
            points = numpy.array(points)

            # get x and y vectors
            x = points[:, 0]
            y = points[:, 1]

            # calculate polynomial
            z = numpy.polyfit(x, y, order)
            f = numpy.poly1d(z)

            # calculate new x's and y's
            x_new = numpy.linspace(x[0], x[-1], 50)
            y_new = f(x_new)

            plt.plot(x, y, '.', color=color, label=label)
            plt.plot(x_new, y_new, color=color, label=label + 'trend')

        pylab.title('Polynomial Fit with Matplotlib: other')
        pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        ax = plt.gca()
        #cax.set_facecolor((0.898, 0.898, 0.898))
        box = ax.get_position()
        ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])
        fig = plt.gcf()
        fig.set_size_inches(20, 15)
        fig.savefig('plotlib/other.png')


def createVisualisation(taxonomies):
    chain = '<!DOCTYPE html>\n<html>\n\t<head>\n\t\t<link rel="stylesheet" href="style2.css">\n\t</head>\n\t<body>'
    chain = chain + '<table>'
    for taxonomy in taxonomies:
        chain = chain + '<tr><td><object type="image/svg+xml" data="plot\\' + taxonomy + '.svg"></object></td><td><img src="plotlib\\' + taxonomy + '.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\\' + taxonomy + '_trend.svg"></object></td></tr>\n'

    chain = chain + '<tr><td><object type="image/svg+xml" data="plot\other.svg"></object></td><td><img src="plotlib\other.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\other_trend.svg"></object></td></tr>\n'
    chain = chain + '</table>'
    chain = chain + '\n\t</body>\n</html>'

    with open('test_tags_trend.html', 'w') as target:
        target.write(chain)
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`#!/usr/bin/env python`
			`# -- coding: utf-8 --`

			`from json import JSONDecoder`
			`import pygal`
			`from pygal.style import Style`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`import pandas`
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`import numpy`
			`from scipy import stats`
			`from pytaxonomies import Taxonomies`
			`import re`
			`import matplotlib.pyplot as plt`
			`from matplotlib import pylab`
			`import os`
add ta_scatter.py script & reorganise tools 2017-02-03 16:12:02 +01:00			`import date_tools`
			`from dateutil.parser import parse`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
add ta_scatter.py script & reorganise tools 2017-02-03 16:12:02 +01:00			`# ############### Tools ################`
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00

add ta_scatter.py script & reorganise tools 2017-02-03 16:12:02 +01:00			`def selectInRange(Events, begin=None, end=None):`
			`inRange = []`
			`for i, Event in Events.iterrows():`
			`if date_tools.dateInRange(parse(Event['date']), begin, end):`
			`inRange.append(Event.tolist())`
			`inRange = pandas.DataFrame(inRange)`
			`temp = Events.columns.tolist()`
			`if inRange.empty:`
			`return None`
			`inRange.columns = temp`
			`return inRange`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`def getTaxonomies(dataframe):`
			`taxonomies = Taxonomies()`
			`taxonomies = list(taxonomies.keys())`
			`notInTaxo = []`
			`count = 0`
			`for taxonomy in taxonomies:`
			`empty = True`
			`for it in dataframe.iterrows():`
			`if it[0].startswith(taxonomy):`
			`empty = False`
			`dataframe = dataframe.drop([it[0]])`
			`count = count + 1`
			`if empty is True:`
			`notInTaxo.append(taxonomy)`
			`if dataframe.empty:`
			`emptyOther = True`
			`else:`
			`emptyOther = False`
			`for taxonomy in notInTaxo:`
			`taxonomies.remove(taxonomy)`
			`return taxonomies, emptyOther`


move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`def buildDoubleIndex(index1, index2, datatype):`
			`it = -1`
			`newindex1 = []`
			`for index in index2:`
			`if index == 0:`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`it += 1`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`newindex1.append(index1[it])`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`arrays = [newindex1, index2]`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`tuples = list(zip(*arrays))`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])`

move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00
			`def buildNewColumn(index2, column):`
			`it = -1`
			`newcolumn = []`
			`for index in index2:`
			`if index == 0:`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`it += 1`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`newcolumn.append(column[it])`
			`return newcolumn`

Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`def addColumn(dataframe, columnList, columnName):`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00

add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`def concat(data):`
			`return pandas.concat(data, axis=1)`
Make printed date more consistent + update README.md 2016-07-26 11:05:20 +02:00
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`def createFakeEmptyTagsSeries():`
			`return pandas.Series({'Faketag': 0})`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00

add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`def removeFaketagRow(dataframe):`
			`return dataframe.drop(['Faketag'])`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
Make printed date more consistent + update README.md 2016-07-26 11:05:20 +02:00
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`def getCopyDataframe(dataframe):`
			`return dataframe.copy()`
Make printed date more consistent + update README.md 2016-07-26 11:05:20 +02:00
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`def createDictTagsColour(colourDict, tags):`
			`temp = tags.groupby(['name', 'colour']).count()['id']`
			`levels_name = temp.index.levels[0]`
			`levels_colour = temp.index.levels[1]`
			`labels_name = temp.index.labels[0]`
			`labels_colour = temp.index.labels[1]`
Make printed date more consistent + update README.md 2016-07-26 11:05:20 +02:00
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`for i in range(len(labels_name)):`
			`colourDict[levels_name[labels_name[i]]] = levels_colour[labels_colour[i]]`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00
			`def createTagsPlotStyle(dataframe, colourDict, taxonomy=None):`
			`colours = []`
			`if taxonomy is not None:`
			`for it in dataframe.iterrows():`
			`if it[0].startswith(taxonomy):`
			`colours.append(colourDict[it[0]])`
			`else:`
			`for it in dataframe.iterrows():`
			`colours.append(colourDict[it[0]])`

			`style = Style(background='transparent',`
			`plot_background='#eeeeee',`
			`foreground='#111111',`
			`foreground_strong='#111111',`
			`foreground_subtle='#111111',`
			`opacity='.6',`
			`opacity_hover='.9',`
			`transition='400ms ease-in',`
			`colors=tuple(colours))`
			`return style`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`# ############### Formatting ################`

move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00
			`def eventsListBuildFromList(filename):`
handling some NaN exceptions 2016-07-21 10:06:47 +02:00			`with open(filename, 'r') as myfile:`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`s = myfile.read().replace('\n', '')`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`decoder = JSONDecoder()`
			`s_len = len(s)`
			`Events = []`
			`end = 0`
			`while end != s_len:`
			`Event, end = decoder.raw_decode(s, idx=end)`
			`Events.append(Event)`
			`data = []`
			`for e in Events:`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`data.append(pandas.DataFrame.from_dict(e, orient='index'))`
			`Events = pandas.concat(data)`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`for it in range(Events['attribute_count'].size):`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`if Events['attribute_count'][it] is None:`
			`Events['attribute_count'][it] = '0'`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`else:`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`Events['attribute_count'][it] = int(Events['attribute_count'][it])`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`Events = Events.set_index('id')`
			`return Events`

Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
			`def eventsListBuildFromArray(jdata):`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`'''`
			`returns a structure listing all primary events in the sample`
			`'''`
Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']]`
			`events = pandas.concat(data)`
			`events = events.set_index(['id'])`
			`return events`


			`def attributesListBuild(events):`
			`attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']]`
			`return pandas.concat(attributes)`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00

			`def tagsListBuild(Events):`
			`Tags = []`
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`if 'Tag' in Events.columns:`
			`for Tag in Events['Tag']:`
			`if type(Tag) is not list:`
			`continue`
			`Tags.append(pandas.DataFrame(Tag))`
			`if Tags:`
			`Tags = pandas.concat(Tags)`
			`columnDate = buildNewColumn(Tags.index, Events['date'])`
			`addColumn(Tags, columnDate, 'date')`
			`index = buildDoubleIndex(Events.index, Tags.index, 'tag')`
			`Tags = Tags.set_index(index)`
			`else:`
			`Tags = None`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`return Tags`

Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`def isTagIn(dataframe, tag):`
handling some NaN exceptions 2016-07-21 10:06:47 +02:00			`temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()`
move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00			`index = []`
			`for i in range(len(temp)):`
			`if temp[i][0] not in index:`
			`index.append(temp[i][0])`
			`return index`

add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00
			`def renameColumns(dataframe, namelist):`
			`dataframe.columns = namelist`
			`return dataframe`


			`def replaceNaN(dataframe, value):`
			`return dataframe.fillna(value)`

Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00			`# ############### Basic Stats ################`

move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00
			`def getNbitems(dataframe):`
			`return len(dataframe.index)`

Initial refactoring, PEP8 and cleanup 2016-07-26 16:35:46 +02:00
			`def getNbAttributePerEventCategoryType(attributes):`
			`return attributes.groupby(['event_id', 'category', 'type']).count()['id']`

move files from examples/treemap to examples/situational-awareness/ 2016-07-13 15:24:36 +02:00
			`def getNbOccurenceTags(Tags):`
			`return Tags.groupby('name').count()['id']`


add ta_scatter.py script & reorganise tools 2017-02-03 16:12:02 +01:00			`# ############### Charts ################`
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00

			`def tagsToLineChart(dataframe, title, dates, colourDict):`
			`style = createTagsPlotStyle(dataframe, colourDict)`
			`line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)`
			`line_chart.title = title`
			`line_chart.x_labels = dates`
			`for it in dataframe.iterrows():`
			`line_chart.add(it[0], it[1].tolist())`
			`line_chart.render_to_file('tags_repartition_plot.svg')`


			`def tagstrendToLineChart(dataframe, title, dates, split, colourDict):`
			`style = createTagsPlotStyle(dataframe, colourDict)`
			`line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)`
			`line_chart.title = title`
			`line_chart.x_labels = dates`
			`xi = numpy.arange(split)`
			`for it in dataframe.iterrows():`
			`slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])`
			`line = slope * xi + intercept`
			`line_chart.add(it[0], line, show_dots=False)`
			`line_chart.render_to_file('tags_repartition_trend_plot.svg')`


			`def tagsToTaxoLineChart(dataframe, title, dates, colourDict, taxonomies, emptyOther):`
			`style = createTagsPlotStyle(dataframe, colourDict)`
			`line_chart = pygal.Line(x_label_rotation=20, style=style)`
			`line_chart.title = title`
			`line_chart.x_labels = dates`
			`for taxonomy in taxonomies:`
			`taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)`
			`taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)`
			`taxo_line_chart.title = title + ': ' + taxonomy`
			`taxo_line_chart.x_labels = dates`
			`for it in dataframe.iterrows():`
			`if it[0].startswith(taxonomy):`
			`taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), it[1].tolist())`
			`dataframe = dataframe.drop([it[0]])`
			`taxo_line_chart.render_to_file('plot/' + taxonomy + '.svg')`

			`if not emptyOther:`
			`taxoStyle = createTagsPlotStyle(dataframe, colourDict)`
			`taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)`
			`taxo_line_chart.title = title + ': other'`
			`taxo_line_chart.x_labels = dates`
			`for it in dataframe.iterrows():`
			`taxo_line_chart.add(it[0], it[1].tolist())`
			`taxo_line_chart.render_to_file('plot/other.svg')`


			`def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther):`
			`style = createTagsPlotStyle(dataframe, colourDict)`
			`line_chart = pygal.Line(x_label_rotation=20, style=style)`
			`line_chart.title = title`
			`line_chart.x_labels = dates`
			`xi = numpy.arange(split)`
			`for taxonomy in taxonomies:`
			`taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)`
			`taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)`
			`taxo_line_chart.title = title + ': ' + taxonomy`
			`taxo_line_chart.x_labels = dates`
			`for it in dataframe.iterrows():`
			`if it[0].startswith(taxonomy):`
			`slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])`
			`line = slope * xi + intercept`
			`taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False)`
			`dataframe = dataframe.drop([it[0]])`
			`taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg')`

			`if not emptyOther:`
			`taxoStyle = createTagsPlotStyle(dataframe, colourDict)`
			`taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)`
			`taxo_line_chart.title = title + ': other'`
			`taxo_line_chart.x_labels = dates`
			`for it in dataframe.iterrows():`
			`slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])`
			`line = slope * xi + intercept`
			`taxo_line_chart.add(it[0], line, show_dots=False)`
			`taxo_line_chart.render_to_file('plot/other_trend.svg')`


			`def tagsToPolyChart(dataframe, split, colourDict, taxonomies, emptyOther, order):`
			`for taxonomy in taxonomies:`
			`for it in dataframe.iterrows():`
			`if it[0].startswith(taxonomy):`
			`points = []`
			`for i in range(split):`
			`points.append((i, it[1][i]))`
			`color = colourDict[it[0]]`
			`label = re.sub(taxonomy + ':', '', it[0])`
			`points = numpy.array(points)`
			`dataframe = dataframe.drop([it[0]])`

			`# get x and y vectors`
			`x = points[:, 0]`
			`y = points[:, 1]`

			`# calculate polynomial`
			`z = numpy.polyfit(x, y, order)`
			`f = numpy.poly1d(z)`

			`# calculate new x's and y's`
			`x_new = numpy.linspace(x[0], x[-1], 50)`
			`y_new = f(x_new)`

			`plt.plot(x, y, '.', color=color)`
			`plt.plot(x_new, y_new, color=color, label=label + 'trend')`

			`pylab.title('Polynomial Fit with Matplotlib: ' + taxonomy)`
			`pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))`
			`ax = plt.gca()`
fix situational-awareness examples 2016-10-12 12:33:42 +02:00			`# ax.set_facecolor((0.898, 0.898, 0.898))`
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`box = ax.get_position()`
			`ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])`
			`fig = plt.gcf()`
			`fig.set_size_inches(20, 15)`
			`fig.savefig('plotlib/' + taxonomy + '.png')`
			`fig.clf()`

			`if not emptyOther:`
			`for it in dataframe.iterrows():`
			`points = []`
			`for i in range(split):`
			`points.append((i, it[1][i]))`

			`color = colourDict[it[0]]`
			`label = it[0]`
			`points = numpy.array(points)`

			`# get x and y vectors`
			`x = points[:, 0]`
			`y = points[:, 1]`

			`# calculate polynomial`
			`z = numpy.polyfit(x, y, order)`
			`f = numpy.poly1d(z)`

			`# calculate new x's and y's`
			`x_new = numpy.linspace(x[0], x[-1], 50)`
			`y_new = f(x_new)`

			`plt.plot(x, y, '.', color=color, label=label)`
			`plt.plot(x_new, y_new, color=color, label=label + 'trend')`

			`pylab.title('Polynomial Fit with Matplotlib: other')`
			`pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))`
			`ax = plt.gca()`
fix situational-awareness examples 2016-10-12 12:33:42 +02:00			`#cax.set_facecolor((0.898, 0.898, 0.898))`
add tags_to_graphs.py in ecamples/situational-awareness 2016-09-05 14:14:29 +02:00			`box = ax.get_position()`
			`ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])`
			`fig = plt.gcf()`
			`fig.set_size_inches(20, 15)`
			`fig.savefig('plotlib/other.png')`


			`def createVisualisation(taxonomies):`
			`chain = '<!DOCTYPE html>\n<html>\n\t<head>\n\t\t<link rel="stylesheet" href="style2.css">\n\t</head>\n\t<body>'`
			`chain = chain + '<table>'`
			`for taxonomy in taxonomies:`
			`chain = chain + '<tr><td><object type="image/svg+xml" data="plot\\' + taxonomy + '.svg"></object></td><td><img src="plotlib\\' + taxonomy + '.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\\' + taxonomy + '_trend.svg"></object></td></tr>\n'`

			`chain = chain + '<tr><td><object type="image/svg+xml" data="plot\other.svg"></object></td><td><img src="plotlib\other.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\other_trend.svg"></object></td></tr>\n'`
			`chain = chain + '</table>'`
			`chain = chain + '\n\t</body>\n</html>'`

			`with open('test_tags_trend.html', 'w') as target:`
			`target.write(chain)`