move files from examples/treemap to examples/situational-awareness/

2016-07-13 15:24:36 +02:00 · 2016-07-13 15:24:36 +02:00 · caa8b963ec
parent e53f59bcbf
commit caa8b963ec
7 changed files with 465 additions and 0 deletions
--- a/examples/situational-awareness/README.md
+++ b/examples/situational-awareness/README.md
@ -0,0 +1,9 @@
+## Explanation
+
+* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
+* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
+* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
+
+## Requierements
+
+* [Pygal](https://github.com/Kozea/pygal/)
--- a/examples/situational-awareness/attribute_treemap.py
+++ b/examples/situational-awareness/attribute_treemap.py
--- a/examples/situational-awareness/style.css
+++ b/examples/situational-awareness/style.css
@ -0,0 +1,46 @@
+body 
+{
+    /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
+	font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
+}
+
+h1
+{
+	font-size: 16px;
+	width: 290px;
+	text-align:center;
+}
+
+/*** Stats Tables ***/
+
+table
+{
+	border-collapse: collapse;
+	border-spacing: 0;
+    border: 1px solid #cbcbcb;
+}
+
+tbody 
+{
+	font-size:12px;
+}
+
+table td 
+{
+	border-left: 1px solid #cbcbcb;
+	border-width: 0 0 0 1px;
+	width: 150px;	
+	margin: 0;
+	padding: 0.5em 1em;
+}
+
+
+table tr:nth-child(2n-1) td 
+{
+	background-color: #f2f2f2;
+}
+
+table tr td:first-child 
+{
+	font-weight: bold;
+}
--- a/examples/situational-awareness/tag_search.py
+++ b/examples/situational-awareness/tag_search.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pymisp import PyMISP
+from keys import misp_url, misp_key, misp_verifycert
+from datetime import datetime
+import argparse
+import json
+import tools
+
+def init(url, key):
+    return PyMISP(url, key, misp_verifycert, 'json')
+
+########## fetch data ##########
+
+def searchall(m, search, url):
+    result = m.search_all(search)
+    with open('data', 'w') as f:
+        f.write(json.dumps(result))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
+    parser.add_argument("-s", "--search", help="string to search")
+    parser.add_argument("-t", "--tag", required=True, help="String to search in tags, can be composed. Example: \"ransomware|Ransomware\"")
+    parser.add_argument("-b", "--begindate", help="The research will look for Tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
+    parser.add_argument("-e", "--enddate", help="The research will look for Tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
+
+    args = parser.parse_args()
+
+    misp = init(misp_url, misp_key)
+
+    searchall(misp, args.search, misp_url)
+
+    if args.begindate is not None:
+        args.begindate = tools.toDatetime(args.begindate)
+    if args.enddate is not None:
+        args.enddate = tools.toDatetime(args.enddate)
+
+    Events = tools.eventsListBuildFromArray('data')
+    TotalEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.isTagIn(Tags, args.tag)
+    TotalTags = len(result)
+
+    Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
+    TotalPeriodEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.isTagIn(Tags, args.tag)
+    TotalPeriodTags = len(result)
+
+    text = 'Studied pediod: from '
+    if args.begindate is None:
+        text = text + '1970-01-01'
+    else:
+        text = text + str(args.begindate.date())
+    text = text + ' to '
+    if args.enddate is None:
+        text = text + str(datetime.now().date())
+    else:
+        text = text + str(args.enddate.date())
+
+    print '\n========================================================'
+    print text
+    print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
+    if TotalTags != 0:
+        print 'It represents ' + str(round(100*TotalPeriodTags/TotalTags, 3)) + '% of the fetched events (' + str(TotalTags) + ') including this tag.'
+    if TotalEvents != 0:
+        print 'It also represents ' + str(round(100*TotalPeriodTags/TotalEvents, 3)) + '% of all the fetched events (' + str(TotalEvents) + ').'
+
--- a/examples/situational-awareness/tags_count.py
+++ b/examples/situational-awareness/tags_count.py
@ -0,0 +1,70 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pymisp import PyMISP
+from keys import misp_url, misp_key, misp_verifycert
+from datetime import datetime
+import argparse
+import json
+import tools
+
+def init(url, key):
+    return PyMISP(url, key, misp_verifycert, 'json')
+
+########## fetch data ##########
+
+def searchall(m, search, url):
+    result = m.search_all(search)
+    with open('data', 'w') as f:
+        f.write(json.dumps(result))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
+    parser.add_argument("-s", "--search", help="string to search")
+    parser.add_argument("-b", "--begindate", help="The research will look for Tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
+    parser.add_argument("-e", "--enddate", help="The research will look for Tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
+
+    args = parser.parse_args()
+
+    misp = init(misp_url, misp_key)
+
+    if args.search is None:
+        args.search = ''
+    searchall(misp, args.search, misp_url)
+
+    if args.begindate is not None:
+        args.begindate = tools.toDatetime(args.begindate)
+    if args.enddate is not None:
+        args.enddate = tools.toDatetime(args.enddate)
+
+    Events = tools.eventsListBuildFromArray('data')
+    TotalEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.getNbOccurenceTags(Tags)
+    TotalTags = tools.getNbitems(Tags)
+
+    Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
+    TotalPeriodEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.getNbOccurenceTags(Tags)
+    TotalPeriodTags = tools.getNbitems(Tags)
+
+    text = 'Studied pediod: from '
+    if args.begindate is None:
+        text = text + '1970-01-01'
+    else:
+        text = text + str(args.begindate.date())
+    text = text + ' to '
+    if args.enddate is None:
+        text = text + str(datetime.now().date())
+    else:
+        text = text + str(args.enddate.date())
+
+    print '\n========================================================'
+    print text
+    print result
+    '''
+    print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
+    print 'It represents ' + str(round(100*TotalPeriodTags/TotalTags,3)) + '% of the fetched events (' + str(TotalTags) + ') including this tag.'
+    print 'It also represents ' + str(round(100*TotalPeriodTags/TotalEvents,3)) + '% of all the fetched events (' + str(TotalEvents) + ').'
+    '''
--- a/examples/situational-awareness/test_attribute_treemap.html
+++ b/examples/situational-awareness/test_attribute_treemap.html
@ -0,0 +1,26 @@
+<html>
+	<head>
+		<style>
+		#stats
+		{
+			height: 746px;
+			margin-top: 100px;
+		}
+
+		#treemap
+		{
+			width: 1000px;
+		}
+		</style>
+	</head>
+	<body>
+		<table><tr>
+		<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td> 
+		<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
+		</tr></table>
+	<!--
+		<div id="stats"><iframe src="table.html"></iframe></div> 
+		<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
+	-->
+	</body>
+</html>
--- a/examples/situational-awareness/tools.py
+++ b/examples/situational-awareness/tools.py
@ -0,0 +1,245 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import json
+from json import JSONDecoder
+import random
+import pygal
+from pygal.style import Style
+import pandas as pd
+from datetime import datetime
+from datetime import timedelta
+from dateutil.parser import parse
+
+################ Tools ################
+
+def buildDoubleIndex(index1, index2, datatype):
+    it = -1
+    newindex1 = []
+    for index in index2:
+        if index == 0:
+            it+=1
+        newindex1.append(index1[it])
+    arrays =  [newindex1, index2]
+    tuples = list(zip(*arrays))
+    return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
+
+def buildNewColumn(index2, column):
+    it = -1
+    newcolumn = []
+    for index in index2:
+        if index == 0:
+            it+=1
+        newcolumn.append(column[it])
+    return newcolumn
+
+def dateInRange(datetimeTested, begin=None, end=None):
+    if begin == None:
+        begin = datetime(1970,1,1)
+    if end == None:
+        end = datetime.now()
+    return begin <= datetimeTested <= end
+
+def addColumn(dataframe, columnList, columnName):
+        dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
+
+def dateInRange(datetimeTested, begin=None, end=None):
+    if begin == None:
+        begin = datetime(1970,1,1)
+    if end == None:
+        end = datetime.now()
+    return begin <= datetimeTested <= end
+
+def toDatetime(date):
+    temp = date.split('-')
+    return datetime(int(temp[0]), int(temp[1]), int(temp[2]))
+
+################ Formatting  ################
+
+def eventsListBuildFromList(filename):
+    with open('testt', 'r') as myfile:
+        s=myfile.read().replace('\n', '')
+    decoder = JSONDecoder()
+    s_len = len(s)
+    Events = []
+    end = 0
+    while end != s_len:
+        Event, end = decoder.raw_decode(s, idx=end)
+        Events.append(Event)
+    data = []
+    for e in Events:
+        data.append(pd.DataFrame.from_dict(e, orient='index'))
+    Events = pd.concat(data)
+    for it in range(Events['attribute_count'].size):
+        if Events['attribute_count'][it] == None:
+            Events['attribute_count'][it]='0'
+        else:
+            Events['attribute_count'][it]=int(Events['attribute_count'][it])
+    Events = Events.set_index('id')
+    return Events
+
+def eventsListBuildFromArray(filename):
+    '''
+    returns a structure listing all primary events in the sample
+    '''
+    jdata = json.load(open(filename))
+    jdata = jdata['response']
+    Events = []
+    for e in jdata:
+        Events.append(e)
+    data = []
+    for e in Events:
+        data.append(pd.DataFrame.from_dict(e, orient='index'))
+    Events = pd.concat(data)
+    for it in range(Events['attribute_count'].size):
+        if Events['attribute_count'][it] == None:
+            Events['attribute_count'][it]='0'
+        else:
+            Events['attribute_count'][it]=int(Events['attribute_count'][it])
+    Events = Events.set_index('id')
+    return Events
+
+def attributesListBuild(Events):
+    Attributes = []
+    for Attribute in Events['Attribute']:
+        Attributes.append(pd.DataFrame(Attribute))
+    return pd.concat(Attributes)
+
+def tagsListBuild(Events):
+    Tags = []
+    for Tag in Events['Tag']:
+        if type(Tag) is not list:
+            continue
+        Tags.append(pd.DataFrame(Tag))
+    Tags = pd.concat(Tags)
+    columnDate = buildNewColumn(Tags.index, Events['date'])
+    addColumn(Tags, columnDate, 'date')
+    index = buildDoubleIndex(Events.index, Tags.index, 'tag')
+    Tags = Tags.set_index(index)
+    return Tags
+
+def selectInRange(Events, begin=None, end=None):
+    inRange = []
+    for i, Event in Events.iterrows():
+        if dateInRange(parse(Event['date']), begin, end):
+            inRange.append(Event.tolist())
+    inRange = pd.DataFrame(inRange)
+    temp = Events.columns.tolist()
+    inRange.columns = temp
+    return inRange
+'''
+def isTagIn(dataframe, tag):
+    print 'tag =' + tag
+    result = []
+    for tagname in dataframe['name']:
+        print tagname
+        if tag in tagname:
+            print 'True'
+            result.append(tagname)
+    return result
+'''
+
+def isTagIn(dataframe, tag):
+    temp = Tags[Tags['name'].str.contains(test)].index.tolist()
+    index = []
+    for i in range(len(temp)):
+        if temp[i][0] not in index:
+            index.append(temp[i][0])
+    return index
+
+################ Basic Stats ################
+
+def getNbitems(dataframe):
+        return len(dataframe.index)
+
+def getNbAttributePerEventCategoryType(Attributes):
+    return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
+
+def getNbOccurenceTags(Tags):
+        return Tags.groupby('name').count()['id']
+
+################ Charts ################
+
+def createStyle(indexlevels):
+    colorsList = []
+    for i in range(len(indexlevels[0])):
+        colorsList.append("#%06X" % random.randint(0, 0xFFFFFF))
+    style = Style(
+                background='transparent',
+                plot_background='#FFFFFF',
+                foreground='#111111',
+                foreground_strong='#111111',
+                foreground_subtle='#111111',
+                opacity='.6',
+                opacity_hover='.9',
+                transition='400ms ease-in',
+                colors=tuple(colorsList))
+    return style, colorsList
+
+def createLabelsTreemap(indexlevels, indexlabels):
+    categories_levels = indexlevels[0]
+    cat = 0
+    types = []
+    cattypes = []
+    categories_labels = indexlabels[0]
+    types_levels = indexlevels[1]
+    types_labels = indexlabels[1]
+
+    for it in range(len(indexlabels[0])):
+        if categories_labels[it] != cat:
+            cattypes.append(types)
+            types = []
+            cat += 1
+
+        types.append(types_levels[types_labels[it]])
+    cattypes.append(types)
+
+    return categories_levels, cattypes
+
+
+def createTable(data, title, tablename, colorsList):
+    if tablename == None:
+        target = open('attribute_table.html', 'w')
+    else:
+        target = open(tablename, 'w')
+    target.truncate()
+    target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
+    categories, types = createLabelsTreemap(data.index.levels, data.index.labels)
+    it = 0
+
+    for i in range(len(categories)):
+        table = pygal.Treemap(pretty_print=True)
+        target.write('\n <h1 style="color:'+ colorsList[i]+ ';">' + categories[i] + '</h1>\n')
+        for typ in types[i]:
+            table.add(typ, data[it])
+            it += 1
+        target.write(table.render_table(transpose=True))
+    target.write('\n</body>\n</html>')
+    target.close()
+
+
+def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = 'attribute_table.html'):
+    style, colorsList = createStyle(data.index.levels)
+    treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style = style)
+    treemap.title = title
+    treemap.print_values = True
+    treemap.print_labels = True
+
+    categories, types = createLabelsTreemap(data.index.levels, data.index.labels)
+    it = 0
+
+    for i in range(len(categories)):
+        types_labels = []
+        for typ in types[i]:
+            tempdict = {}
+            tempdict['label'] = typ
+            tempdict['value'] = data[it]
+            types_labels.append(tempdict)
+            it += 1
+        treemap.add(categories[i], types_labels)
+
+    createTable(data, 'Attribute Distribution', tablename, colorsList)
+    if treename == None:
+        treemap.render_to_file('attribute_treemap.svg')
+    else:
+        treemap.render_to_file(treename)