diff --git a/examples/sharing_groups.py b/examples/sharing_groups.py new file mode 100644 index 0000000..3bf4fa9 --- /dev/null +++ b/examples/sharing_groups.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pymisp import PyMISP +from keys import misp_url, misp_key +import argparse + +# For python2 & 3 compat, a bit dirty, but it seems to be the least bad one +try: + input = raw_input +except NameError: + pass + + +def init(url, key): + return PyMISP(url, key, True, 'json') + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Get a list of the sharing groups from the MISP instance.') + + misp = init(misp_url, misp_key) + + sharing_groups = misp.get_sharing_groups() + print sharing_groups + diff --git a/examples/situational-awareness/README.md b/examples/situational-awareness/README.md index 86f06bd..d481f76 100644 --- a/examples/situational-awareness/README.md +++ b/examples/situational-awareness/README.md @@ -12,8 +12,22 @@ * tag\_search.py allows research for multiple tags is possible by separating each tag by the | symbol. * Partial research is also possible with tag\_search.py. For instance, search for "ransom" will also return tags containin "ransomware". +* tags\_to\_graphs.py is a script that will generate several plots to visualise tags distribution. + * The studied _period_ can be either the 7, 28 or 360 last days + * _accuracy_ allows to get smallers splits of data instead of the default values + * _order_ define the accuracy of the curve fitting. Default value is 3 + * It will generate two plots comparing all the tags: + * tags_repartition_plot that present the raw data + * tags_repartition_trend_plot that present the general evolution for each tag + * Then each taxonomies will be represented in three plots: + * Raw datas: in "plot" folder, named with the name of the corresponding taxonomy + * Trend: in "plot" folder, named _taxonomy_\_trend. general evolution of the data (linear fitting, curve fitting at order 1) + * Curve fitting: in "plotlib" folder, name as the taxonomy it presents. + * In order to visualize the last plots, a html file is also generated automaticaly (might be improved in the future) + :warning: These scripts are not time optimised ## Requierements * [Pygal](https://github.com/Kozea/pygal/) +* [Matplotlib](https://github.com/matplotlib/matplotlib) diff --git a/examples/situational-awareness/style.css b/examples/situational-awareness/style.css index 5afdf7f..ce23448 100644 --- a/examples/situational-awareness/style.css +++ b/examples/situational-awareness/style.css @@ -29,11 +29,15 @@ table td { border-left: 1px solid #cbcbcb; border-width: 0 0 0 1px; - width: 150px; + width: 500px; margin: 0; padding: 0.5em 1em; } +.test +{ + width: 500px; +} table tr:nth-child(2n-1) td { diff --git a/examples/situational-awareness/style2.css b/examples/situational-awareness/style2.css new file mode 100644 index 0000000..6fcec41 --- /dev/null +++ b/examples/situational-awareness/style2.css @@ -0,0 +1,41 @@ +body +{ + /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ + font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace; +} + +h1 +{ + font-size: 16px; + width: 290px; + text-align:center; +} + +/*** Stats Tables ***/ + +table +{ + border-collapse: collapse; + border-spacing: 0; + table-layout: fixed; + width: 6000px; + border: 1px solid #cbcbcb; +} + +tbody +{ + font-size:12px; +} + +td +{ + border-left: 1px solid #cbcbcb; + border-width: 0 0 0 1px; + margin: 0; + padding: 0.5em 1em; +} + +table tr td:first-child +{ + font-weight: bold; +} diff --git a/examples/situational-awareness/tags_to_graphs.py b/examples/situational-awareness/tags_to_graphs.py new file mode 100644 index 0000000..7280165 --- /dev/null +++ b/examples/situational-awareness/tags_to_graphs.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pymisp import PyMISP +from keys import misp_url, misp_key, misp_verifycert +import argparse +import tools + + +def formattingDataframe(dataframe, dates, NanValue): + dataframe.reverse() + dates.reverse() + dataframe = tools.concat(dataframe) + dataframe = tools.renameColumns(dataframe, dates) + dataframe = tools.replaceNaN(dataframe, 0) + return dataframe + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Show the evolution of trend of tags.') + parser.add_argument("-p", "--period", help='Define the studied period. Can be the past year (y), month (m) or week (w). Week is the default value if no valid value is given.') + parser.add_argument("-a", "--accuracy", help='Define the accuracy of the splits on the studied period. Can be per month (m) -for year only-, week (w) -month only- or day (d). The default value is always the biggest available.') + parser.add_argument("-o", "--order", type=int, help='Define the accuracy of the curve fitting. Default value is 3') + + args = parser.parse_args() + + misp = PyMISP(misp_url, misp_key, misp_verifycert) + + if args.period == "y": + if args.accuracy == "d": + split = 360 + size = 1 + else: + split = 12 + size = 30 + last = '360d' + title = 'Tags repartition over the last 360 days' + elif args.period == "m": + if args.accuracy == "d": + split = 28 + size = 1 + else: + split = 4 + size = 7 + last = '28d' + title = 'Tags repartition over the last 28 days' + else: + split = 7 + size = 1 + last = '7d' + title = 'Tags repartition over the last 7 days' + + result = misp.download_last(last) + events = tools.eventsListBuildFromArray(result) + result = [] + dates = [] + enddate = tools.getToday() + colourDict = {} + faketag = False + + for i in range(split): + begindate = tools.getNDaysBefore(enddate, size) + dates.append(str(enddate.date())) + eventstemp = tools.selectInRange(events, begin=begindate, end=enddate) + if eventstemp is not None: + tags = tools.tagsListBuild(eventstemp) + if tags is not None: + tools.createDictTagsColour(colourDict, tags) + result.append(tools.getNbOccurenceTags(tags)) + else: + result.append(tools.createFakeEmptyTagsSeries()) + faketag = True + else: + result.append(tools.createFakeEmptyTagsSeries()) + faketag = True + enddate = begindate + + result = formattingDataframe(result, dates, 0) + if faketag: + result = tools.removeFaketagRow(result) + + taxonomies, emptyOther = tools.getTaxonomies(tools.getCopyDataframe(result)) + + + tools.tagsToLineChart(tools.getCopyDataframe(result), title, dates, colourDict) + tools.tagstrendToLineChart(tools.getCopyDataframe(result), title, dates, split, colourDict) + tools.tagsToTaxoLineChart(tools.getCopyDataframe(result), title, dates, colourDict, taxonomies, emptyOther) + tools.tagstrendToTaxoLineChart(tools.getCopyDataframe(result), title, dates, split, colourDict, taxonomies, emptyOther) + if args.order is None: + args.order = 3 + tools.tagsToPolyChart(tools.getCopyDataframe(result), split, colourDict, taxonomies, emptyOther, args.order) + tools.createVisualisation(taxonomies) diff --git a/examples/situational-awareness/tools.py b/examples/situational-awareness/tools.py index 6cff510..d4b9bea 100644 --- a/examples/situational-awareness/tools.py +++ b/examples/situational-awareness/tools.py @@ -9,8 +9,13 @@ import pandas from datetime import datetime from datetime import timedelta from dateutil.parser import parse - -# ############### Errors ################ +import numpy +from scipy import stats +from pytaxonomies import Taxonomies +import re +import matplotlib.pyplot as plt +from matplotlib import pylab +import os class DateError(Exception): @@ -20,30 +25,8 @@ class DateError(Exception): def __str__(self): return repr(self.value) -# ############### Tools ################ - - -def buildDoubleIndex(index1, index2, datatype): - it = -1 - newindex1 = [] - for index in index2: - if index == 0: - it += 1 - newindex1.append(index1[it]) - arrays = [newindex1, index2] - tuples = list(zip(*arrays)) - return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype]) - - -def buildNewColumn(index2, column): - it = -1 - newcolumn = [] - for index in index2: - if index == 0: - it += 1 - newcolumn.append(column[it]) - return newcolumn +# ############### Date Tools ################ def dateInRange(datetimeTested, begin=None, end=None): if begin is None: @@ -53,10 +36,6 @@ def dateInRange(datetimeTested, begin=None, end=None): return begin <= datetimeTested <= end -def addColumn(dataframe, columnList, columnName): - dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index) - - def toDatetime(date): return parse(date) @@ -86,6 +65,115 @@ def setEnddate(enddate): def getLastdate(last): return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0) + +def getNDaysBefore(date, days): + return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0) + + +def getToday(): + return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) + + +# ############### Tools ################ + + +def getTaxonomies(dataframe): + taxonomies = Taxonomies() + taxonomies = list(taxonomies.keys()) + notInTaxo = [] + count = 0 + for taxonomy in taxonomies: + empty = True + for it in dataframe.iterrows(): + if it[0].startswith(taxonomy): + empty = False + dataframe = dataframe.drop([it[0]]) + count = count + 1 + if empty is True: + notInTaxo.append(taxonomy) + if dataframe.empty: + emptyOther = True + else: + emptyOther = False + for taxonomy in notInTaxo: + taxonomies.remove(taxonomy) + return taxonomies, emptyOther + + +def buildDoubleIndex(index1, index2, datatype): + it = -1 + newindex1 = [] + for index in index2: + if index == 0: + it += 1 + newindex1.append(index1[it]) + arrays = [newindex1, index2] + tuples = list(zip(*arrays)) + return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype]) + + +def buildNewColumn(index2, column): + it = -1 + newcolumn = [] + for index in index2: + if index == 0: + it += 1 + newcolumn.append(column[it]) + return newcolumn + + +def addColumn(dataframe, columnList, columnName): + dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index) + + +def concat(data): + return pandas.concat(data, axis=1) + + +def createFakeEmptyTagsSeries(): + return pandas.Series({'Faketag': 0}) + + +def removeFaketagRow(dataframe): + return dataframe.drop(['Faketag']) + + +def getCopyDataframe(dataframe): + return dataframe.copy() + + +def createDictTagsColour(colourDict, tags): + temp = tags.groupby(['name', 'colour']).count()['id'] + levels_name = temp.index.levels[0] + levels_colour = temp.index.levels[1] + labels_name = temp.index.labels[0] + labels_colour = temp.index.labels[1] + + for i in range(len(labels_name)): + colourDict[levels_name[labels_name[i]]] = levels_colour[labels_colour[i]] + + +def createTagsPlotStyle(dataframe, colourDict, taxonomy=None): + colours = [] + if taxonomy is not None: + for it in dataframe.iterrows(): + if it[0].startswith(taxonomy): + colours.append(colourDict[it[0]]) + else: + for it in dataframe.iterrows(): + colours.append(colourDict[it[0]]) + + style = Style(background='transparent', + plot_background='#eeeeee', + foreground='#111111', + foreground_strong='#111111', + foreground_subtle='#111111', + opacity='.6', + opacity_hover='.9', + transition='400ms ease-in', + colors=tuple(colours)) + return style + # ############### Formatting ################ @@ -129,15 +217,19 @@ def attributesListBuild(events): def tagsListBuild(Events): Tags = [] - for Tag in Events['Tag']: - if type(Tag) is not list: - continue - Tags.append(pandas.DataFrame(Tag)) - Tags = pandas.concat(Tags) - columnDate = buildNewColumn(Tags.index, Events['date']) - addColumn(Tags, columnDate, 'date') - index = buildDoubleIndex(Events.index, Tags.index, 'tag') - Tags = Tags.set_index(index) + if 'Tag' in Events.columns: + for Tag in Events['Tag']: + if type(Tag) is not list: + continue + Tags.append(pandas.DataFrame(Tag)) + if Tags: + Tags = pandas.concat(Tags) + columnDate = buildNewColumn(Tags.index, Events['date']) + addColumn(Tags, columnDate, 'date') + index = buildDoubleIndex(Events.index, Tags.index, 'tag') + Tags = Tags.set_index(index) + else: + Tags = None return Tags @@ -148,6 +240,8 @@ def selectInRange(Events, begin=None, end=None): inRange.append(Event.tolist()) inRange = pandas.DataFrame(inRange) temp = Events.columns.tolist() + if inRange.empty: + return None inRange.columns = temp return inRange @@ -160,6 +254,15 @@ def isTagIn(dataframe, tag): index.append(temp[i][0]) return index + +def renameColumns(dataframe, namelist): + dataframe.columns = namelist + return dataframe + + +def replaceNaN(dataframe, value): + return dataframe.fillna(value) + # ############### Basic Stats ################ @@ -212,7 +315,7 @@ def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attr transition='400ms ease-in', colors=tuple(colors.values())) - treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style, explicit_size=True, width=2048, height=2048) + treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style) treemap.title = title treemap.print_values = True treemap.print_labels = True @@ -222,3 +325,171 @@ def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attr createTable(colors, categ_types_hash) treemap.render_to_file(treename) + + +def tagsToLineChart(dataframe, title, dates, colourDict): + style = createTagsPlotStyle(dataframe, colourDict) + line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False) + line_chart.title = title + line_chart.x_labels = dates + for it in dataframe.iterrows(): + line_chart.add(it[0], it[1].tolist()) + line_chart.render_to_file('tags_repartition_plot.svg') + + +def tagstrendToLineChart(dataframe, title, dates, split, colourDict): + style = createTagsPlotStyle(dataframe, colourDict) + line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False) + line_chart.title = title + line_chart.x_labels = dates + xi = numpy.arange(split) + for it in dataframe.iterrows(): + slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) + line = slope * xi + intercept + line_chart.add(it[0], line, show_dots=False) + line_chart.render_to_file('tags_repartition_trend_plot.svg') + + +def tagsToTaxoLineChart(dataframe, title, dates, colourDict, taxonomies, emptyOther): + style = createTagsPlotStyle(dataframe, colourDict) + line_chart = pygal.Line(x_label_rotation=20, style=style) + line_chart.title = title + line_chart.x_labels = dates + for taxonomy in taxonomies: + taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy) + taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) + taxo_line_chart.title = title + ': ' + taxonomy + taxo_line_chart.x_labels = dates + for it in dataframe.iterrows(): + if it[0].startswith(taxonomy): + taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), it[1].tolist()) + dataframe = dataframe.drop([it[0]]) + taxo_line_chart.render_to_file('plot/' + taxonomy + '.svg') + + if not emptyOther: + taxoStyle = createTagsPlotStyle(dataframe, colourDict) + taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) + taxo_line_chart.title = title + ': other' + taxo_line_chart.x_labels = dates + for it in dataframe.iterrows(): + taxo_line_chart.add(it[0], it[1].tolist()) + taxo_line_chart.render_to_file('plot/other.svg') + + +def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther): + style = createTagsPlotStyle(dataframe, colourDict) + line_chart = pygal.Line(x_label_rotation=20, style=style) + line_chart.title = title + line_chart.x_labels = dates + xi = numpy.arange(split) + for taxonomy in taxonomies: + taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy) + taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) + taxo_line_chart.title = title + ': ' + taxonomy + taxo_line_chart.x_labels = dates + for it in dataframe.iterrows(): + if it[0].startswith(taxonomy): + slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) + line = slope * xi + intercept + taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False) + dataframe = dataframe.drop([it[0]]) + taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg') + + if not emptyOther: + taxoStyle = createTagsPlotStyle(dataframe, colourDict) + taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) + taxo_line_chart.title = title + ': other' + taxo_line_chart.x_labels = dates + for it in dataframe.iterrows(): + slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) + line = slope * xi + intercept + taxo_line_chart.add(it[0], line, show_dots=False) + taxo_line_chart.render_to_file('plot/other_trend.svg') + + +def tagsToPolyChart(dataframe, split, colourDict, taxonomies, emptyOther, order): + for taxonomy in taxonomies: + for it in dataframe.iterrows(): + if it[0].startswith(taxonomy): + points = [] + for i in range(split): + points.append((i, it[1][i])) + color = colourDict[it[0]] + label = re.sub(taxonomy + ':', '', it[0]) + points = numpy.array(points) + dataframe = dataframe.drop([it[0]]) + + # get x and y vectors + x = points[:, 0] + y = points[:, 1] + + # calculate polynomial + z = numpy.polyfit(x, y, order) + f = numpy.poly1d(z) + + # calculate new x's and y's + x_new = numpy.linspace(x[0], x[-1], 50) + y_new = f(x_new) + + plt.plot(x, y, '.', color=color) + plt.plot(x_new, y_new, color=color, label=label + 'trend') + + pylab.title('Polynomial Fit with Matplotlib: ' + taxonomy) + pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + ax = plt.gca() + ax.set_facecolor((0.898, 0.898, 0.898)) + box = ax.get_position() + ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height]) + fig = plt.gcf() + fig.set_size_inches(20, 15) + fig.savefig('plotlib/' + taxonomy + '.png') + fig.clf() + + if not emptyOther: + for it in dataframe.iterrows(): + points = [] + for i in range(split): + points.append((i, it[1][i])) + + color = colourDict[it[0]] + label = it[0] + points = numpy.array(points) + + # get x and y vectors + x = points[:, 0] + y = points[:, 1] + + # calculate polynomial + z = numpy.polyfit(x, y, order) + f = numpy.poly1d(z) + + # calculate new x's and y's + x_new = numpy.linspace(x[0], x[-1], 50) + y_new = f(x_new) + + plt.plot(x, y, '.', color=color, label=label) + plt.plot(x_new, y_new, color=color, label=label + 'trend') + + pylab.title('Polynomial Fit with Matplotlib: other') + pylab.legend(loc='center left', bbox_to_anchor=(1, 0.5)) + ax = plt.gca() + ax.set_facecolor((0.898, 0.898, 0.898)) + box = ax.get_position() + ax.set_position([box.x0 - 0.01, box.y0, box.width * 0.78, box.height]) + fig = plt.gcf() + fig.set_size_inches(20, 15) + fig.savefig('plotlib/other.png') + + +def createVisualisation(taxonomies): + chain = '\n\n\t\n\t\t\n\t\n\t' + chain = chain + '' + for taxonomy in taxonomies: + chain = chain + '\n' + + chain = chain + '\n' + chain = chain + '
graph
graph
' + chain = chain + '\n\t\n' + + with open('test_tags_trend.html', 'w') as target: + target.write(chain) diff --git a/examples/tagstatistics.py b/examples/tagstatistics.py new file mode 100644 index 0000000..9357c8e --- /dev/null +++ b/examples/tagstatistics.py @@ -0,0 +1,28 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pymisp import PyMISP +from keys import misp_url, misp_key, misp_verifycert +import argparse +import json + +# For python2 & 3 compat, a bit dirty, but it seems to be the least bad one +try: + input = raw_input +except NameError: + pass + + +def init(url, key): + return PyMISP(url, key, misp_verifycert, 'json') + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Get statistics from tags.') + parser.add_argument("-p", "--percentage", help="An optional field, if set, it will return the results in percentages, otherwise it returns exact count.") + parser.add_argument("-n", "--namesort", help="An optional field, if set, values are sort by the namespace, otherwise the sorting will happen on the value.") + args = parser.parse_args() + + misp = init(misp_url, misp_key) + + stats = misp.get_tags_statistics(args.percentage, args.namesort) + print json.dumps(stats)