From caa8b963ec7727bfa05ce6375a8f69858e3a4598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A9borah=20Servili?= Date: Wed, 13 Jul 2016 15:24:36 +0200 Subject: [PATCH] move files from examples/treemap to examples/situational-awareness/ --- examples/situational-awareness/README.md | 9 + .../attribute_treemap.py | 0 examples/situational-awareness/style.css | 46 ++++ examples/situational-awareness/tag_search.py | 69 +++++ examples/situational-awareness/tags_count.py | 70 +++++ .../test_attribute_treemap.html | 26 ++ examples/situational-awareness/tools.py | 245 ++++++++++++++++++ 7 files changed, 465 insertions(+) create mode 100644 examples/situational-awareness/README.md rename examples/{statistics => situational-awareness}/attribute_treemap.py (100%) create mode 100644 examples/situational-awareness/style.css create mode 100644 examples/situational-awareness/tag_search.py create mode 100644 examples/situational-awareness/tags_count.py create mode 100644 examples/situational-awareness/test_attribute_treemap.html create mode 100644 examples/situational-awareness/tools.py diff --git a/examples/situational-awareness/README.md b/examples/situational-awareness/README.md new file mode 100644 index 0000000..f0e4b19 --- /dev/null +++ b/examples/situational-awareness/README.md @@ -0,0 +1,9 @@ +## Explanation + +* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples. +* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute. +* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time. + +## Requierements + +* [Pygal](https://github.com/Kozea/pygal/) diff --git a/examples/statistics/attribute_treemap.py b/examples/situational-awareness/attribute_treemap.py similarity index 100% rename from examples/statistics/attribute_treemap.py rename to examples/situational-awareness/attribute_treemap.py diff --git a/examples/situational-awareness/style.css b/examples/situational-awareness/style.css new file mode 100644 index 0000000..8c5313b --- /dev/null +++ b/examples/situational-awareness/style.css @@ -0,0 +1,46 @@ +body +{ + /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ + font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace; +} + +h1 +{ + font-size: 16px; + width: 290px; + text-align:center; +} + +/*** Stats Tables ***/ + +table +{ + border-collapse: collapse; + border-spacing: 0; + border: 1px solid #cbcbcb; +} + +tbody +{ + font-size:12px; +} + +table td +{ + border-left: 1px solid #cbcbcb; + border-width: 0 0 0 1px; + width: 150px; + margin: 0; + padding: 0.5em 1em; +} + + +table tr:nth-child(2n-1) td +{ + background-color: #f2f2f2; +} + +table tr td:first-child +{ + font-weight: bold; +} diff --git a/examples/situational-awareness/tag_search.py b/examples/situational-awareness/tag_search.py new file mode 100644 index 0000000..a04f54a --- /dev/null +++ b/examples/situational-awareness/tag_search.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pymisp import PyMISP +from keys import misp_url, misp_key, misp_verifycert +from datetime import datetime +import argparse +import json +import tools + +def init(url, key): + return PyMISP(url, key, misp_verifycert, 'json') + +########## fetch data ########## + +def searchall(m, search, url): + result = m.search_all(search) + with open('data', 'w') as f: + f.write(json.dumps(result)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.') + parser.add_argument("-s", "--search", help="string to search") + parser.add_argument("-t", "--tag", required=True, help="String to search in tags, can be composed. Example: \"ransomware|Ransomware\"") + parser.add_argument("-b", "--begindate", help="The research will look for Tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)") + parser.add_argument("-e", "--enddate", help="The research will look for Tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()") + + args = parser.parse_args() + + misp = init(misp_url, misp_key) + + searchall(misp, args.search, misp_url) + + if args.begindate is not None: + args.begindate = tools.toDatetime(args.begindate) + if args.enddate is not None: + args.enddate = tools.toDatetime(args.enddate) + + Events = tools.eventsListBuildFromArray('data') + TotalEvents = tools.getNbitems(Events) + Tags = tools.tagsListBuild(Events) + result = tools.isTagIn(Tags, args.tag) + TotalTags = len(result) + + Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate) + TotalPeriodEvents = tools.getNbitems(Events) + Tags = tools.tagsListBuild(Events) + result = tools.isTagIn(Tags, args.tag) + TotalPeriodTags = len(result) + + text = 'Studied pediod: from ' + if args.begindate is None: + text = text + '1970-01-01' + else: + text = text + str(args.begindate.date()) + text = text + ' to ' + if args.enddate is None: + text = text + str(datetime.now().date()) + else: + text = text + str(args.enddate.date()) + + print '\n========================================================' + print text + print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.' + if TotalTags != 0: + print 'It represents ' + str(round(100*TotalPeriodTags/TotalTags, 3)) + '% of the fetched events (' + str(TotalTags) + ') including this tag.' + if TotalEvents != 0: + print 'It also represents ' + str(round(100*TotalPeriodTags/TotalEvents, 3)) + '% of all the fetched events (' + str(TotalEvents) + ').' + diff --git a/examples/situational-awareness/tags_count.py b/examples/situational-awareness/tags_count.py new file mode 100644 index 0000000..cff5d9b --- /dev/null +++ b/examples/situational-awareness/tags_count.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +from pymisp import PyMISP +from keys import misp_url, misp_key, misp_verifycert +from datetime import datetime +import argparse +import json +import tools + +def init(url, key): + return PyMISP(url, key, misp_verifycert, 'json') + +########## fetch data ########## + +def searchall(m, search, url): + result = m.search_all(search) + with open('data', 'w') as f: + f.write(json.dumps(result)) + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.') + parser.add_argument("-s", "--search", help="string to search") + parser.add_argument("-b", "--begindate", help="The research will look for Tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)") + parser.add_argument("-e", "--enddate", help="The research will look for Tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()") + + args = parser.parse_args() + + misp = init(misp_url, misp_key) + + if args.search is None: + args.search = '' + searchall(misp, args.search, misp_url) + + if args.begindate is not None: + args.begindate = tools.toDatetime(args.begindate) + if args.enddate is not None: + args.enddate = tools.toDatetime(args.enddate) + + Events = tools.eventsListBuildFromArray('data') + TotalEvents = tools.getNbitems(Events) + Tags = tools.tagsListBuild(Events) + result = tools.getNbOccurenceTags(Tags) + TotalTags = tools.getNbitems(Tags) + + Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate) + TotalPeriodEvents = tools.getNbitems(Events) + Tags = tools.tagsListBuild(Events) + result = tools.getNbOccurenceTags(Tags) + TotalPeriodTags = tools.getNbitems(Tags) + + text = 'Studied pediod: from ' + if args.begindate is None: + text = text + '1970-01-01' + else: + text = text + str(args.begindate.date()) + text = text + ' to ' + if args.enddate is None: + text = text + str(datetime.now().date()) + else: + text = text + str(args.enddate.date()) + + print '\n========================================================' + print text + print result + ''' + print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.' + print 'It represents ' + str(round(100*TotalPeriodTags/TotalTags,3)) + '% of the fetched events (' + str(TotalTags) + ') including this tag.' + print 'It also represents ' + str(round(100*TotalPeriodTags/TotalEvents,3)) + '% of all the fetched events (' + str(TotalEvents) + ').' + ''' diff --git a/examples/situational-awareness/test_attribute_treemap.html b/examples/situational-awareness/test_attribute_treemap.html new file mode 100644 index 0000000..d6e8fc4 --- /dev/null +++ b/examples/situational-awareness/test_attribute_treemap.html @@ -0,0 +1,26 @@ + + + + + + + + +
+ + + diff --git a/examples/situational-awareness/tools.py b/examples/situational-awareness/tools.py new file mode 100644 index 0000000..f259f9c --- /dev/null +++ b/examples/situational-awareness/tools.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import json +from json import JSONDecoder +import random +import pygal +from pygal.style import Style +import pandas as pd +from datetime import datetime +from datetime import timedelta +from dateutil.parser import parse + +################ Tools ################ + +def buildDoubleIndex(index1, index2, datatype): + it = -1 + newindex1 = [] + for index in index2: + if index == 0: + it+=1 + newindex1.append(index1[it]) + arrays = [newindex1, index2] + tuples = list(zip(*arrays)) + return pd.MultiIndex.from_tuples(tuples, names=['event', datatype]) + +def buildNewColumn(index2, column): + it = -1 + newcolumn = [] + for index in index2: + if index == 0: + it+=1 + newcolumn.append(column[it]) + return newcolumn + +def dateInRange(datetimeTested, begin=None, end=None): + if begin == None: + begin = datetime(1970,1,1) + if end == None: + end = datetime.now() + return begin <= datetimeTested <= end + +def addColumn(dataframe, columnList, columnName): + dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index) + +def dateInRange(datetimeTested, begin=None, end=None): + if begin == None: + begin = datetime(1970,1,1) + if end == None: + end = datetime.now() + return begin <= datetimeTested <= end + +def toDatetime(date): + temp = date.split('-') + return datetime(int(temp[0]), int(temp[1]), int(temp[2])) + +################ Formatting ################ + +def eventsListBuildFromList(filename): + with open('testt', 'r') as myfile: + s=myfile.read().replace('\n', '') + decoder = JSONDecoder() + s_len = len(s) + Events = [] + end = 0 + while end != s_len: + Event, end = decoder.raw_decode(s, idx=end) + Events.append(Event) + data = [] + for e in Events: + data.append(pd.DataFrame.from_dict(e, orient='index')) + Events = pd.concat(data) + for it in range(Events['attribute_count'].size): + if Events['attribute_count'][it] == None: + Events['attribute_count'][it]='0' + else: + Events['attribute_count'][it]=int(Events['attribute_count'][it]) + Events = Events.set_index('id') + return Events + +def eventsListBuildFromArray(filename): + ''' + returns a structure listing all primary events in the sample + ''' + jdata = json.load(open(filename)) + jdata = jdata['response'] + Events = [] + for e in jdata: + Events.append(e) + data = [] + for e in Events: + data.append(pd.DataFrame.from_dict(e, orient='index')) + Events = pd.concat(data) + for it in range(Events['attribute_count'].size): + if Events['attribute_count'][it] == None: + Events['attribute_count'][it]='0' + else: + Events['attribute_count'][it]=int(Events['attribute_count'][it]) + Events = Events.set_index('id') + return Events + +def attributesListBuild(Events): + Attributes = [] + for Attribute in Events['Attribute']: + Attributes.append(pd.DataFrame(Attribute)) + return pd.concat(Attributes) + +def tagsListBuild(Events): + Tags = [] + for Tag in Events['Tag']: + if type(Tag) is not list: + continue + Tags.append(pd.DataFrame(Tag)) + Tags = pd.concat(Tags) + columnDate = buildNewColumn(Tags.index, Events['date']) + addColumn(Tags, columnDate, 'date') + index = buildDoubleIndex(Events.index, Tags.index, 'tag') + Tags = Tags.set_index(index) + return Tags + +def selectInRange(Events, begin=None, end=None): + inRange = [] + for i, Event in Events.iterrows(): + if dateInRange(parse(Event['date']), begin, end): + inRange.append(Event.tolist()) + inRange = pd.DataFrame(inRange) + temp = Events.columns.tolist() + inRange.columns = temp + return inRange +''' +def isTagIn(dataframe, tag): + print 'tag =' + tag + result = [] + for tagname in dataframe['name']: + print tagname + if tag in tagname: + print 'True' + result.append(tagname) + return result +''' + +def isTagIn(dataframe, tag): + temp = Tags[Tags['name'].str.contains(test)].index.tolist() + index = [] + for i in range(len(temp)): + if temp[i][0] not in index: + index.append(temp[i][0]) + return index + +################ Basic Stats ################ + +def getNbitems(dataframe): + return len(dataframe.index) + +def getNbAttributePerEventCategoryType(Attributes): + return Attributes.groupby(['event_id', 'category', 'type']).count()['id'] + +def getNbOccurenceTags(Tags): + return Tags.groupby('name').count()['id'] + +################ Charts ################ + +def createStyle(indexlevels): + colorsList = [] + for i in range(len(indexlevels[0])): + colorsList.append("#%06X" % random.randint(0, 0xFFFFFF)) + style = Style( + background='transparent', + plot_background='#FFFFFF', + foreground='#111111', + foreground_strong='#111111', + foreground_subtle='#111111', + opacity='.6', + opacity_hover='.9', + transition='400ms ease-in', + colors=tuple(colorsList)) + return style, colorsList + +def createLabelsTreemap(indexlevels, indexlabels): + categories_levels = indexlevels[0] + cat = 0 + types = [] + cattypes = [] + categories_labels = indexlabels[0] + types_levels = indexlevels[1] + types_labels = indexlabels[1] + + for it in range(len(indexlabels[0])): + if categories_labels[it] != cat: + cattypes.append(types) + types = [] + cat += 1 + + types.append(types_levels[types_labels[it]]) + cattypes.append(types) + + return categories_levels, cattypes + + +def createTable(data, title, tablename, colorsList): + if tablename == None: + target = open('attribute_table.html', 'w') + else: + target = open(tablename, 'w') + target.truncate() + target.write('\n\n\n\n\n') + categories, types = createLabelsTreemap(data.index.levels, data.index.labels) + it = 0 + + for i in range(len(categories)): + table = pygal.Treemap(pretty_print=True) + target.write('\n

' + categories[i] + '

\n') + for typ in types[i]: + table.add(typ, data[it]) + it += 1 + target.write(table.render_table(transpose=True)) + target.write('\n\n') + target.close() + + +def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = 'attribute_table.html'): + style, colorsList = createStyle(data.index.levels) + treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style = style) + treemap.title = title + treemap.print_values = True + treemap.print_labels = True + + categories, types = createLabelsTreemap(data.index.levels, data.index.labels) + it = 0 + + for i in range(len(categories)): + types_labels = [] + for typ in types[i]: + tempdict = {} + tempdict['label'] = typ + tempdict['value'] = data[it] + types_labels.append(tempdict) + it += 1 + treemap.add(categories[i], types_labels) + + createTable(data, 'Attribute Distribution', tablename, colorsList) + if treename == None: + treemap.render_to_file('attribute_treemap.svg') + else: + treemap.render_to_file(treename)