add tags_to_graphs.py in ecamples/situational-awareness

2016-09-05 14:14:29 +02:00 · 2016-09-05 14:14:29 +02:00 · fa66c77cd1
parent 95654e083c
commit fa66c77cd1
5 changed files with 451 additions and 44 deletions
--- a/examples/situational-awareness/README.md
+++ b/examples/situational-awareness/README.md
@ -20,10 +20,10 @@
 		* tags_repartition_plot that present the raw data
 		* tags_repartition_trend_plot that present the general evolution for each tag
 	* Then each taxonomies will be represented in three plots:
-        * Raw datas: in plot folder, named with the name of the corresponding taxonomy
-        * Trend: in plot folder, named _taxonomy_\_trend. general evolution of the data (linear fitting, curve fitting at order 1)
-        * Curve fitting: in plotlib folder, name as the taxonomy it presents.
-
+        * Raw datas: in "plot" folder, named with the name of the corresponding taxonomy
+        * Trend: in "plot" folder, named _taxonomy_\_trend. general evolution of the data (linear fitting, curve fitting at order 1)
+        * Curve fitting: in "plotlib" folder, name as the taxonomy it presents.
+	* In order to visualize the last plots, a html file is also generated automaticaly (might be improved in the future)

 :warning: These scripts are not time optimised

--- a/examples/situational-awareness/style.css
+++ b/examples/situational-awareness/style.css
@ -29,11 +29,15 @@ table td
 {
 	border-left: 1px solid #cbcbcb;
 	border-width: 0 0 0 1px;
-	width: 150px;
+	width: 500px;
 	margin: 0;
 	padding: 0.5em 1em;
 }

+.test
+{
+	width: 500px;
+}

 table tr:nth-child(2n-1) td
 {
--- a/examples/situational-awareness/style2.css
+++ b/examples/situational-awareness/style2.css
@ -0,0 +1,41 @@
+body
+{
+    /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
+	font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
+}
+
+h1
+{
+	font-size: 16px;
+	width: 290px;
+	text-align:center;
+}
+
+/*** Stats Tables ***/
+
+table
+{
+	border-collapse: collapse;
+	border-spacing: 0;
+    table-layout: fixed;
+	width: 6000px;
+    border: 1px solid #cbcbcb;
+}
+
+tbody
+{
+	font-size:12px;
+}
+
+td
+{
+	border-left: 1px solid #cbcbcb;
+	border-width: 0 0 0 1px;
+	margin: 0;
+	padding: 0.5em 1em;
+}
+
+table tr td:first-child
+{
+	font-weight: bold;
+}
--- a/examples/situational-awareness/tags_to_graphs.py
+++ b/examples/situational-awareness/tags_to_graphs.py
@ -0,0 +1,91 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pymisp import PyMISP
+from keys import misp_url, misp_key, misp_verifycert
+import argparse
+import tools
+
+
+def formattingDataframe(dataframe, dates, NanValue):
+    dataframe.reverse()
+    dates.reverse()
+    dataframe = tools.concat(dataframe)
+    dataframe = tools.renameColumns(dataframe, dates)
+    dataframe = tools.replaceNaN(dataframe, 0)
+    return dataframe
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Show the evolution of trend of tags.')
+    parser.add_argument("-p", "--period", help='Define the studied period. Can be the past year (y), month (m) or week (w). Week is the default value if no valid value is given.')
+    parser.add_argument("-a", "--accuracy", help='Define the accuracy of the splits on the studied period. Can be per month (m) -for year only-, week (w) -month only- or day (d). The default value is always the biggest available.')
+    parser.add_argument("-o", "--order", type=int, help='Define the accuracy of the curve fitting. Default value is 3')
+
+    args = parser.parse_args()
+
+    misp = PyMISP(misp_url, misp_key, misp_verifycert)
+
+    if args.period == "y":
+        if args.accuracy == "d":
+            split = 360
+            size = 1
+        else:
+            split = 12
+            size = 30
+        last = '360d'
+        title = 'Tags repartition over the last 360 days'
+    elif args.period == "m":
+        if args.accuracy == "d":
+            split = 28
+            size = 1
+        else:
+            split = 4
+            size = 7
+        last = '28d'
+        title = 'Tags repartition over the last 28 days'
+    else:
+        split = 7
+        size = 1
+        last = '7d'
+        title = 'Tags repartition over the last 7 days'
+
+    result = misp.download_last(last)
+    events = tools.eventsListBuildFromArray(result)
+    result = []
+    dates = []
+    enddate = tools.getToday()
+    colourDict = {}
+    faketag = False
+
+    for i in range(split):
+        begindate = tools.getNDaysBefore(enddate, size)
+        dates.append(str(enddate.date()))
+        eventstemp = tools.selectInRange(events, begin=begindate, end=enddate)
+        if eventstemp is not None:
+            tags = tools.tagsListBuild(eventstemp)
+            if tags is not None:
+                tools.createDictTagsColour(colourDict, tags)
+                result.append(tools.getNbOccurenceTags(tags))
+            else:
+                result.append(tools.createFakeEmptyTagsSeries())
+                faketag = True
+        else:
+            result.append(tools.createFakeEmptyTagsSeries())
+            faketag = True
+        enddate = begindate
+
+    result = formattingDataframe(result, dates, 0)
+    if faketag:
+        result = tools.removeFaketagRow(result)
+
+    taxonomies, emptyOther = tools.getTaxonomies(tools.getCopyDataframe(result))
+
+
+    tools.tagsToLineChart(tools.getCopyDataframe(result), title, dates, colourDict)
+    tools.tagstrendToLineChart(tools.getCopyDataframe(result), title, dates, split, colourDict)
+    tools.tagsToTaxoLineChart(tools.getCopyDataframe(result), title, dates, colourDict, taxonomies, emptyOther)
+    tools.tagstrendToTaxoLineChart(tools.getCopyDataframe(result), title, dates, split, colourDict, taxonomies, emptyOther)
+    if args.order is None:
+        args.order = 3
+    tools.tagsToPolyChart(tools.getCopyDataframe(result), split, colourDict, taxonomies, emptyOther, args.order)
+    tools.createVisualisation(taxonomies)
--- a/examples/situational-awareness/tools.py
+++ b/examples/situational-awareness/tools.py
@ -9,8 +9,13 @@ import pandas
 from datetime import datetime
 from datetime import timedelta
 from dateutil.parser import parse
-
-# ############### Errors ################
+import numpy
+from scipy import stats
+from pytaxonomies import Taxonomies
+import re
+import matplotlib.pyplot as plt
+from matplotlib import pylab
+import os


 class DateError(Exception):
@ -20,30 +25,8 @@ class DateError(Exception):
    def __str__(self):
        return repr(self.value)

-# ############### Tools ################
-
-
-def buildDoubleIndex(index1, index2, datatype):
-    it = -1
-    newindex1 = []
-    for index in index2:
-        if index == 0:
-            it += 1
-        newindex1.append(index1[it])
-    arrays = [newindex1, index2]
-    tuples = list(zip(*arrays))
-    return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
-
-
-def buildNewColumn(index2, column):
-    it = -1
-    newcolumn = []
-    for index in index2:
-        if index == 0:
-            it += 1
-        newcolumn.append(column[it])
-    return newcolumn

+# ############### Date Tools ################

 def dateInRange(datetimeTested, begin=None, end=None):
    if begin is None:
@ -53,10 +36,6 @@ def dateInRange(datetimeTested, begin=None, end=None):
    return begin <= datetimeTested <= end


-def addColumn(dataframe, columnList, columnName):
-    dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
-
-
 def toDatetime(date):
    return parse(date)

@ -86,6 +65,115 @@ def setEnddate(enddate):
 def getLastdate(last):
    return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)

+
+def getNDaysBefore(date, days):
+    return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
+
+
+def getToday():
+    return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
+
+
+# ############### Tools ################
+
+
+def getTaxonomies(dataframe):
+    taxonomies = Taxonomies()
+    taxonomies = list(taxonomies.keys())
+    notInTaxo = []
+    count = 0
+    for taxonomy in taxonomies:
+        empty = True
+        for it in dataframe.iterrows():
+            if it[0].startswith(taxonomy):
+                empty = False
+                dataframe = dataframe.drop([it[0]])
+                count = count + 1
+        if empty is True:
+            notInTaxo.append(taxonomy)
+    if dataframe.empty:
+        emptyOther = True
+    else:
+        emptyOther = False
+    for taxonomy in notInTaxo:
+        taxonomies.remove(taxonomy)
+    return taxonomies, emptyOther
+
+
+def buildDoubleIndex(index1, index2, datatype):
+    it = -1
+    newindex1 = []
+    for index in index2:
+        if index == 0:
+            it += 1
+        newindex1.append(index1[it])
+    arrays = [newindex1, index2]
+    tuples = list(zip(*arrays))
+    return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
+
+
+def buildNewColumn(index2, column):
+    it = -1
+    newcolumn = []
+    for index in index2:
+        if index == 0:
+            it += 1
+        newcolumn.append(column[it])
+    return newcolumn
+
+
+def addColumn(dataframe, columnList, columnName):
+    dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
+
+
+def concat(data):
+    return pandas.concat(data, axis=1)
+
+
+def createFakeEmptyTagsSeries():
+    return pandas.Series({'Faketag': 0})
+
+
+def removeFaketagRow(dataframe):
+    return dataframe.drop(['Faketag'])
+
+
+def getCopyDataframe(dataframe):
+    return dataframe.copy()
+
+
+def createDictTagsColour(colourDict, tags):
+    temp = tags.groupby(['name', 'colour']).count()['id']
+    levels_name = temp.index.levels[0]
+    levels_colour = temp.index.levels[1]
+    labels_name = temp.index.labels[0]
+    labels_colour = temp.index.labels[1]
+
+    for i in range(len(labels_name)):
+        colourDict[levels_name[labels_name[i]]] = levels_colour[labels_colour[i]]
+
+
+def createTagsPlotStyle(dataframe, colourDict, taxonomy=None):
+    colours = []
+    if taxonomy is not None:
+        for it in dataframe.iterrows():
+            if it[0].startswith(taxonomy):
+                colours.append(colourDict[it[0]])
+    else:
+        for it in dataframe.iterrows():
+            colours.append(colourDict[it[0]])
+
+    style = Style(background='transparent',
+                  plot_background='#eeeeee',
+                  foreground='#111111',
+                  foreground_strong='#111111',
+                  foreground_subtle='#111111',
+                  opacity='.6',
+                  opacity_hover='.9',
+                  transition='400ms ease-in',
+                  colors=tuple(colours))
+    return style
+
 # ############### Formatting  ################


@ -129,15 +217,19 @@ def attributesListBuild(events):

 def tagsListBuild(Events):
    Tags = []
+    if 'Tag' in Events.columns:
        for Tag in Events['Tag']:
            if type(Tag) is not list:
                continue
            Tags.append(pandas.DataFrame(Tag))
+    if Tags:
        Tags = pandas.concat(Tags)
        columnDate = buildNewColumn(Tags.index, Events['date'])
        addColumn(Tags, columnDate, 'date')
        index = buildDoubleIndex(Events.index, Tags.index, 'tag')
        Tags = Tags.set_index(index)
+    else:
+        Tags = None
    return Tags


@ -148,6 +240,8 @@ def selectInRange(Events, begin=None, end=None):
            inRange.append(Event.tolist())
    inRange = pandas.DataFrame(inRange)
    temp = Events.columns.tolist()
+    if inRange.empty:
+        return None
    inRange.columns = temp
    return inRange

@ -160,6 +254,15 @@ def isTagIn(dataframe, tag):
            index.append(temp[i][0])
    return index

+
+def renameColumns(dataframe, namelist):
+    dataframe.columns = namelist
+    return dataframe
+
+
+def replaceNaN(dataframe, value):
+    return dataframe.fillna(value)
+
 # ############### Basic Stats ################


@ -212,7 +315,7 @@ def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attr
                  transition='400ms ease-in',
                  colors=tuple(colors.values()))

-    treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style, explicit_size=True, width=2048, height=2048)
+    treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
    treemap.title = title
    treemap.print_values = True
    treemap.print_labels = True
@ -222,3 +325,171 @@ def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attr

    createTable(colors, categ_types_hash)
    treemap.render_to_file(treename)
+
+
+def tagsToLineChart(dataframe, title, dates, colourDict):
+    style = createTagsPlotStyle(dataframe, colourDict)
+    line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)
+    line_chart.title = title
+    line_chart.x_labels = dates
+    for it in dataframe.iterrows():
+        line_chart.add(it[0], it[1].tolist())
+    line_chart.render_to_file('tags_repartition_plot.svg')
+
+
+def tagstrendToLineChart(dataframe, title, dates, split, colourDict):
+    style = createTagsPlotStyle(dataframe, colourDict)
+    line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)
+    line_chart.title = title
+    line_chart.x_labels = dates
+    xi = numpy.arange(split)
+    for it in dataframe.iterrows():
+        slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
+        line = slope * xi + intercept
+        line_chart.add(it[0], line, show_dots=False)
+    line_chart.render_to_file('tags_repartition_trend_plot.svg')
+
+
+def tagsToTaxoLineChart(dataframe, title, dates, colourDict, taxonomies, emptyOther):
+    style = createTagsPlotStyle(dataframe, colourDict)
+    line_chart = pygal.Line(x_label_rotation=20, style=style)
+    line_chart.title = title
+    line_chart.x_labels = dates
+    for taxonomy in taxonomies:
+        taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
+        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
+        taxo_line_chart.title = title + ': ' + taxonomy
+        taxo_line_chart.x_labels = dates
+        for it in dataframe.iterrows():
+            if it[0].startswith(taxonomy):
+                taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), it[1].tolist())
+                dataframe = dataframe.drop([it[0]])
+        taxo_line_chart.render_to_file('plot/' + taxonomy + '.svg')
+
+    if not emptyOther:
+        taxoStyle = createTagsPlotStyle(dataframe, colourDict)
+        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
+        taxo_line_chart.title = title + ': other'
+        taxo_line_chart.x_labels = dates
+        for it in dataframe.iterrows():
+            taxo_line_chart.add(it[0], it[1].tolist())
+        taxo_line_chart.render_to_file('plot/other.svg')
+
+
+def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther):
+    style = createTagsPlotStyle(dataframe, colourDict)
+    line_chart = pygal.Line(x_label_rotation=20, style=style)
+    line_chart.title = title
+    line_chart.x_labels = dates
+    xi = numpy.arange(split)
+    for taxonomy in taxonomies:
+        taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy)
+        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
+        taxo_line_chart.title = title + ': ' + taxonomy
+        taxo_line_chart.x_labels = dates
+        for it in dataframe.iterrows():
+            if it[0].startswith(taxonomy):
+                slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
+                line = slope * xi + intercept
+                taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False)
+                dataframe = dataframe.drop([it[0]])
+        taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg')
+
+    if not emptyOther:
+        taxoStyle = createTagsPlotStyle(dataframe, colourDict)
+        taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle)
+        taxo_line_chart.title = title + ': other'
+        taxo_line_chart.x_labels = dates
+        for it in dataframe.iterrows():
+            slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1])
+            line = slope * xi + intercept
+            taxo_line_chart.add(it[0], line, show_dots=False)
+        taxo_line_chart.render_to_file('plot/other_trend.svg')
+
+
+def tagsToPolyChart(dataframe, split, colourDict, taxonomies, emptyOther, order):
+    for taxonomy in taxonomies:
+        for it in dataframe.iterrows():
+            if it[0].startswith(taxonomy):
+                points = []
+                for i in range(split):
+                    points.append((i, it[1][i]))
+                color = colourDict[it[0]]
+                label = re.sub(taxonomy + ':', '', it[0])
+                points = numpy.array(points)
+                dataframe = dataframe.drop([it[0]])
+
+                # get x and y vectors
+                x = points[:, 0]
+                y = points[:, 1]
+
+                # calculate polynomial
+                z = numpy.polyfit(x, y, order)
+                f = numpy.poly1d(z)
+
+                # calculate new x's and y's
+                x_new = numpy.linspace(x[0], x[-1], 50)
+                y_new = f(x_new)
+
+                plt.plot(x, y, '.', color=color)
+                plt.plot(x_new, y_new, color=color, label=label + 'trend')
+
+        pylab.title('Polynomial Fit with Matplotlib: ' + taxonomy)
+        pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+        ax = plt.gca()
+        ax.set_facecolor((0.898, 0.898, 0.898))
+        box = ax.get_position()
+        ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])
+        fig = plt.gcf()
+        fig.set_size_inches(20, 15)
+        fig.savefig('plotlib/' + taxonomy + '.png')
+        fig.clf()
+
+    if not emptyOther:
+        for it in dataframe.iterrows():
+            points = []
+            for i in range(split):
+                points.append((i, it[1][i]))
+
+            color = colourDict[it[0]]
+            label = it[0]
+            points = numpy.array(points)
+
+            # get x and y vectors
+            x = points[:, 0]
+            y = points[:, 1]
+
+            # calculate polynomial
+            z = numpy.polyfit(x, y, order)
+            f = numpy.poly1d(z)
+
+            # calculate new x's and y's
+            x_new = numpy.linspace(x[0], x[-1], 50)
+            y_new = f(x_new)
+
+            plt.plot(x, y, '.', color=color, label=label)
+            plt.plot(x_new, y_new, color=color, label=label + 'trend')
+
+        pylab.title('Polynomial Fit with Matplotlib: other')
+        pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5))
+        ax = plt.gca()
+        ax.set_facecolor((0.898, 0.898, 0.898))
+        box = ax.get_position()
+        ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height])
+        fig = plt.gcf()
+        fig.set_size_inches(20, 15)
+        fig.savefig('plotlib/other.png')
+
+
+def createVisualisation(taxonomies):
+    chain = '<!DOCTYPE html>\n<html>\n\t<head>\n\t\t<link rel="stylesheet" href="style2.css">\n\t</head>\n\t<body>'
+    chain = chain + '<table>'
+    for taxonomy in taxonomies:
+        chain = chain + '<tr><td><object type="image/svg+xml" data="plot\\' + taxonomy + '.svg"></object></td><td><img src="plotlib\\' + taxonomy + '.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\\' + taxonomy + '_trend.svg"></object></td></tr>\n'
+
+    chain = chain + '<tr><td><object type="image/svg+xml" data="plot\other.svg"></object></td><td><img src="plotlib\other.png" alt="graph" /></td><td><object type="image/svg+xml" data="plot\other_trend.svg"></object></td></tr>\n'
+    chain = chain + '</table>'
+    chain = chain + '\n\t</body>\n</html>'
+
+    with open('test_tags_trend.html', 'w') as target:
+        target.write(chain)