From f8be16a905274930c8a7bd2a6da12600633ebc89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9borah=20Servili?= Date: Fri, 3 Feb 2017 16:12:02 +0100 Subject: [PATCH] add ta_scatter.py script & reorganise tools --- .../attribute_treemap.py | 4 +- examples/situational-awareness/bokeh_tools.py | 33 +++++ examples/situational-awareness/date_tools.py | 70 +++++++++ examples/situational-awareness/pygal_tools.py | 54 +++++++ examples/situational-awareness/tag_scatter.py | 71 +++++++++ examples/situational-awareness/tag_search.py | 9 +- examples/situational-awareness/tags_count.py | 9 +- .../situational-awareness/tags_to_graphs.py | 6 +- .../test_attribute_treemap.html | 26 ---- examples/situational-awareness/tools.py | 137 ++---------------- 10 files changed, 260 insertions(+), 159 deletions(-) create mode 100644 examples/situational-awareness/bokeh_tools.py create mode 100644 examples/situational-awareness/date_tools.py create mode 100644 examples/situational-awareness/pygal_tools.py create mode 100644 examples/situational-awareness/tag_scatter.py delete mode 100644 examples/situational-awareness/test_attribute_treemap.html diff --git a/examples/situational-awareness/attribute_treemap.py b/examples/situational-awareness/attribute_treemap.py index 33ab6b5..d0b0ed4 100755 --- a/examples/situational-awareness/attribute_treemap.py +++ b/examples/situational-awareness/attribute_treemap.py @@ -5,7 +5,7 @@ from pymisp import PyMISP from keys import misp_url, misp_key, misp_verifycert import argparse import tools - +import pygal_tools if __name__ == '__main__': parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.') @@ -26,6 +26,6 @@ if __name__ == '__main__': attributes = tools.attributesListBuild(events) temp = tools.getNbAttributePerEventCategoryType(attributes) temp = temp.groupby(level=['category', 'type']).sum() - tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html') + pygal_tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html') else: print ('There is no event answering the research criteria') diff --git a/examples/situational-awareness/bokeh_tools.py b/examples/situational-awareness/bokeh_tools.py new file mode 100644 index 0000000..7a0d485 --- /dev/null +++ b/examples/situational-awareness/bokeh_tools.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from bokeh.plotting import figure, output_file, show, ColumnDataSource +from bokeh.models import HoverTool +import date_tools + + +def tagsDistributionScatterPlot(NbTags, dates, plotname='Tags Distribution Plot'): + + output_file(plotname + ".html") + + counts = {} + glyphs = {} + desc = {} + hover = HoverTool() + plot = figure(plot_width=800, plot_height=800, x_axis_type="datetime", tools=[hover]) + + for name in NbTags.keys(): + desc[name] = [] + for date in dates[name]: + desc[name].append(date_tools.datetimeToString(date, "%Y-%m-%d")) + counts[name] = plot.circle(dates[name], NbTags[name], source=ColumnDataSource( + data=dict( + desc=desc[name] + ) + )) + glyphs[name] = counts[name].glyph + glyphs[name].size = int(name) * 2 + hover.tooltips = [("date", "@desc")] + if int(name) != 0: + glyphs[name].fill_alpha = 1/int(name) + show(plot) diff --git a/examples/situational-awareness/date_tools.py b/examples/situational-awareness/date_tools.py new file mode 100644 index 0000000..43b0634 --- /dev/null +++ b/examples/situational-awareness/date_tools.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from datetime import datetime +from datetime import timedelta +from dateutil.parser import parse + + +class DateError(Exception): + def __init__(self, value): + self.value = value + + def __str__(self): + return repr(self.value) + + +# ############### Date Tools ################ + +def dateInRange(datetimeTested, begin=None, end=None): + if begin is None: + begin = datetime(1970, 1, 1) + if end is None: + end = datetime.now() + return begin <= datetimeTested <= end + + +def toDatetime(date): + return parse(date) + + +def datetimeToString(datetime, formatstring): + return datetime.strftime(formatstring) + + +def checkDateConsistancy(begindate, enddate, lastdate): + if begindate is not None and enddate is not None: + if begindate > enddate: + raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate)) + + if enddate is not None: + if toDatetime(enddate) < lastdate: + raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate)) + + if begindate is not None: + if toDatetime(begindate) > datetime.now(): + raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date())) + + +def setBegindate(begindate, lastdate): + return max(begindate, lastdate) + + +def setEnddate(enddate): + return min(enddate, datetime.now()) + + +def getLastdate(last): + return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0) + + +def getNDaysBefore(date, days): + return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0) + + +def getToday(): + return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) + + +def days_between(date_1, date_2): + return abs((date_2 - date_1).days) diff --git a/examples/situational-awareness/pygal_tools.py b/examples/situational-awareness/pygal_tools.py new file mode 100644 index 0000000..57379bc --- /dev/null +++ b/examples/situational-awareness/pygal_tools.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import pygal +from pygal.style import Style +import pandas +import random + + +def createTable(colors, categ_types_hash, tablename='attribute_table.html'): + with open(tablename, 'w') as target: + target.write('\n\n\n\n\n') + for categ_name, types in categ_types_hash.items(): + table = pygal.Treemap(pretty_print=True) + target.write('\n

{}

\n'.format(colors[categ_name], categ_name)) + for d in types: + table.add(d['label'], d['value']) + target.write(table.render_table(transpose=True)) + target.write('\n\n') + + +def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'): + labels_categ = data.index.labels[0] + labels_types = data.index.labels[1] + names_categ = data.index.levels[0] + names_types = data.index.levels[1] + categ_types_hash = {} + for categ_id, type_val, total in zip(labels_categ, labels_types, data): + if not categ_types_hash.get(names_categ[categ_id]): + categ_types_hash[names_categ[categ_id]] = [] + dict_to_print = {'label': names_types[type_val], 'value': total} + categ_types_hash[names_categ[categ_id]].append(dict_to_print) + + colors = {categ: "#%06X" % random.randint(0, 0xFFFFFF) for categ in categ_types_hash.keys()} + style = Style(background='transparent', + plot_background='#FFFFFF', + foreground='#111111', + foreground_strong='#111111', + foreground_subtle='#111111', + opacity='.6', + opacity_hover='.9', + transition='400ms ease-in', + colors=tuple(colors.values())) + + treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style) + treemap.title = title + treemap.print_values = True + treemap.print_labels = True + + for categ_name, types in categ_types_hash.items(): + treemap.add(categ_name, types) + + createTable(colors, categ_types_hash) + treemap.render_to_file(treename) diff --git a/examples/situational-awareness/tag_scatter.py b/examples/situational-awareness/tag_scatter.py new file mode 100644 index 0000000..68a27de --- /dev/null +++ b/examples/situational-awareness/tag_scatter.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pymisp import PyMISP +from keys import misp_url, misp_key, misp_verifycert +import argparse +import numpy +import tools +import date_tools +import bokeh_tools + +import time + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Show the evolution of trend of tags.') + parser.add_argument("-d", "--days", type=int, required=True, help='') + parser.add_argument("-s", "--begindate", required=True, help='format yyyy-mm-dd') + parser.add_argument("-e", "--enddate", required=True, help='format yyyy-mm-dd') + + args = parser.parse_args() + + misp = PyMISP(misp_url, misp_key, misp_verifycert) + + result = misp.search(date_from=args.begindate, date_to=args.enddate, metadata=False) + + # Getting data + + if 'response' in result: + events = tools.eventsListBuildFromArray(result) + NbTags = [] + dates = [] + enddate = date_tools.toDatetime(args.enddate) + begindate = date_tools.toDatetime(args.begindate) + + for i in range(round(date_tools.days_between(enddate, begindate)/args.days)): + begindate = date_tools.getNDaysBefore(enddate, args.days) + eventstemp = tools.selectInRange(events, begindate, enddate) + if eventstemp is not None: + for event in eventstemp.iterrows(): + if 'Tag' in event[1]: + dates.append(enddate) + if isinstance(event[1]['Tag'], list): + NbTags.append(len(event[1]['Tag'])) + else: + NbTags.append(0) + enddate = begindate + + # Prepare plot + + NbTagsPlot = {} + datesPlot = {} + + for i in range(len(NbTags)): + if NbTags[i] == -1: + continue + count = 1 + for j in range(i+1, len(NbTags)): + if NbTags[i] == NbTags[j] and dates[i] == dates[j]: + count = count + 1 + NbTags[j] = -1 + if str(count) in NbTagsPlot: + NbTagsPlot[str(count)].append(NbTags[i]) + datesPlot[str(count)].append(dates[i]) + else: + NbTagsPlot[str(count)] = [NbTags[i]] + datesPlot[str(count)] = [dates[i]] + NbTags[i] = -1 + + # Plot + + bokeh_tools.tagsDistributionScatterPlot(NbTagsPlot, datesPlot) diff --git a/examples/situational-awareness/tag_search.py b/examples/situational-awareness/tag_search.py index 20d422d..989a404 100644 --- a/examples/situational-awareness/tag_search.py +++ b/examples/situational-awareness/tag_search.py @@ -6,6 +6,7 @@ from keys import misp_url, misp_key, misp_verifycert from datetime import datetime import argparse import tools +import date_tools def init(url, key): @@ -29,17 +30,17 @@ if __name__ == '__main__': args.days = 7 result = misp.search(last='{}d'.format(args.days), metadata=True) - tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days)) + date_tools.checkDateConsistancy(args.begindate, args.enddate, date_tools.getLastdate(args.days)) if args.begindate is None: - args.begindate = tools.getLastdate(args.days) + args.begindate = date_tools.getLastdate(args.days) else: - args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days)) + args.begindate = date_tools.setBegindate(date_tools.toDatetime(args.begindate), tools.getLastdate(args.days)) if args.enddate is None: args.enddate = datetime.now() else: - args.enddate = tools.setEnddate(tools.toDatetime(args.enddate)) + args.enddate = date_tools.setEnddate(date_tools.toDatetime(args.enddate)) if 'response' in result: events = tools.selectInRange(tools.eventsListBuildFromArray(result), begin=args.begindate, end=args.enddate) diff --git a/examples/situational-awareness/tags_count.py b/examples/situational-awareness/tags_count.py index c58ca5b..acddc23 100644 --- a/examples/situational-awareness/tags_count.py +++ b/examples/situational-awareness/tags_count.py @@ -6,6 +6,7 @@ from keys import misp_url, misp_key, misp_verifycert from datetime import datetime import argparse import tools +import date_tools def init(url, key): @@ -28,17 +29,17 @@ if __name__ == '__main__': args.days = 7 result = misp.search(last='{}d'.format(args.days), metadata=True) - tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days)) + date_tools.checkDateConsistancy(args.begindate, args.enddate, date_tools.getLastdate(args.days)) if args.begindate is None: - args.begindate = tools.getLastdate(args.days) + args.begindate = date_tools.getLastdate(args.days) else: - args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days)) + args.begindate = date_tools.setBegindate(date_tools.toDatetime(args.begindate), date_tools.getLastdate(args.days)) if args.enddate is None: args.enddate = datetime.now() else: - args.enddate = tools.setEnddate(tools.toDatetime(args.enddate)) + args.enddate = date_tools.setEnddate(date_tools.toDatetime(args.enddate)) if 'response' in result: events = tools.selectInRange(tools.eventsListBuildFromArray(result), begin=args.begindate, end=args.enddate) diff --git a/examples/situational-awareness/tags_to_graphs.py b/examples/situational-awareness/tags_to_graphs.py index 76464a4..f153961 100644 --- a/examples/situational-awareness/tags_to_graphs.py +++ b/examples/situational-awareness/tags_to_graphs.py @@ -5,6 +5,8 @@ from pymisp import PyMISP from keys import misp_url, misp_key, misp_verifycert import argparse import tools +import date_tools +import bokeh_tools def formattingDataframe(dataframe, dates, NanValue): @@ -54,12 +56,12 @@ if __name__ == '__main__': events = tools.eventsListBuildFromArray(result) result = [] dates = [] - enddate = tools.getToday() + enddate = date_tools.getToday() colourDict = {} faketag = False for i in range(split): - begindate = tools.getNDaysBefore(enddate, size) + begindate = date_tools.getNDaysBefore(enddate, size) dates.append(str(enddate.date())) eventstemp = tools.selectInRange(events, begin=begindate, end=enddate) if eventstemp is not None: diff --git a/examples/situational-awareness/test_attribute_treemap.html b/examples/situational-awareness/test_attribute_treemap.html deleted file mode 100644 index 0bc9c72..0000000 --- a/examples/situational-awareness/test_attribute_treemap.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - -
- - - diff --git a/examples/situational-awareness/tools.py b/examples/situational-awareness/tools.py index 694eb2b..5ef7cf4 100644 --- a/examples/situational-awareness/tools.py +++ b/examples/situational-awareness/tools.py @@ -2,13 +2,9 @@ # -*- coding: utf-8 -*- from json import JSONDecoder -import random import pygal from pygal.style import Style import pandas -from datetime import datetime -from datetime import timedelta -from dateutil.parser import parse import numpy from scipy import stats from pytaxonomies import Taxonomies @@ -16,67 +12,25 @@ import re import matplotlib.pyplot as plt from matplotlib import pylab import os - - -class DateError(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) - - -# ############### Date Tools ################ - -def dateInRange(datetimeTested, begin=None, end=None): - if begin is None: - begin = datetime(1970, 1, 1) - if end is None: - end = datetime.now() - return begin <= datetimeTested <= end - - -def toDatetime(date): - return parse(date) - - -def checkDateConsistancy(begindate, enddate, lastdate): - if begindate is not None and enddate is not None: - if begindate > enddate: - raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate)) - - if enddate is not None: - if toDatetime(enddate) < lastdate: - raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate)) - - if begindate is not None: - if toDatetime(begindate) > datetime.now(): - raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date())) - - -def setBegindate(begindate, lastdate): - return max(begindate, lastdate) - - -def setEnddate(enddate): - return min(enddate, datetime.now()) - - -def getLastdate(last): - return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0) - - -def getNDaysBefore(date, days): - return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0) - - -def getToday(): - return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0) - +import date_tools +from dateutil.parser import parse # ############### Tools ################ +def selectInRange(Events, begin=None, end=None): + inRange = [] + for i, Event in Events.iterrows(): + if date_tools.dateInRange(parse(Event['date']), begin, end): + inRange.append(Event.tolist()) + inRange = pandas.DataFrame(inRange) + temp = Events.columns.tolist() + if inRange.empty: + return None + inRange.columns = temp + return inRange + + def getTaxonomies(dataframe): taxonomies = Taxonomies() taxonomies = list(taxonomies.keys()) @@ -233,19 +187,6 @@ def tagsListBuild(Events): return Tags -def selectInRange(Events, begin=None, end=None): - inRange = [] - for i, Event in Events.iterrows(): - if dateInRange(parse(Event['date']), begin, end): - inRange.append(Event.tolist()) - inRange = pandas.DataFrame(inRange) - temp = Events.columns.tolist() - if inRange.empty: - return None - inRange.columns = temp - return inRange - - def isTagIn(dataframe, tag): temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist() index = [] @@ -277,56 +218,10 @@ def getNbAttributePerEventCategoryType(attributes): def getNbOccurenceTags(Tags): return Tags.groupby('name').count()['id'] + # ############### Charts ################ -def createTable(colors, categ_types_hash, tablename='attribute_table.html'): - with open(tablename, 'w') as target: - target.write('\n\n\n\n\n') - for categ_name, types in categ_types_hash.items(): - table = pygal.Treemap(pretty_print=True) - target.write('\n

{}

\n'.format(colors[categ_name], categ_name)) - for d in types: - table.add(d['label'], d['value']) - target.write(table.render_table(transpose=True)) - target.write('\n\n') - - -def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'): - labels_categ = data.index.labels[0] - labels_types = data.index.labels[1] - names_categ = data.index.levels[0] - names_types = data.index.levels[1] - categ_types_hash = {} - for categ_id, type_val, total in zip(labels_categ, labels_types, data): - if not categ_types_hash.get(names_categ[categ_id]): - categ_types_hash[names_categ[categ_id]] = [] - dict_to_print = {'label': names_types[type_val], 'value': total} - categ_types_hash[names_categ[categ_id]].append(dict_to_print) - - colors = {categ: "#%06X" % random.randint(0, 0xFFFFFF) for categ in categ_types_hash.keys()} - style = Style(background='transparent', - plot_background='#FFFFFF', - foreground='#111111', - foreground_strong='#111111', - foreground_subtle='#111111', - opacity='.6', - opacity_hover='.9', - transition='400ms ease-in', - colors=tuple(colors.values())) - - treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style) - treemap.title = title - treemap.print_values = True - treemap.print_labels = True - - for categ_name, types in categ_types_hash.items(): - treemap.add(categ_name, types) - - createTable(colors, categ_types_hash) - treemap.render_to_file(treename) - - def tagsToLineChart(dataframe, title, dates, colourDict): style = createTagsPlotStyle(dataframe, colourDict) line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)