move files from examples/treemap to examples/situational-awareness/

pull/23/head
Déborah Servili 2016-07-13 15:24:36 +02:00
parent e53f59bcbf
commit caa8b963ec
7 changed files with 465 additions and 0 deletions

View File

@ -0,0 +1,9 @@
## Explanation
* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
## Requierements
* [Pygal](https://github.com/Kozea/pygal/)

View File

@ -0,0 +1,46 @@
body
{
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
}
h1
{
font-size: 16px;
width: 290px;
text-align:center;
}
/*** Stats Tables ***/
table
{
border-collapse: collapse;
border-spacing: 0;
border: 1px solid #cbcbcb;
}
tbody
{
font-size:12px;
}
table td
{
border-left: 1px solid #cbcbcb;
border-width: 0 0 0 1px;
width: 150px;
margin: 0;
padding: 0.5em 1em;
}
table tr:nth-child(2n-1) td
{
background-color: #f2f2f2;
}
table tr td:first-child
{
font-weight: bold;
}

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import json
import tools
def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ##########
def searchall(m, search, url):
result = m.search_all(search)
with open('data', 'w') as f:
f.write(json.dumps(result))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
parser.add_argument("-s", "--search", help="string to search")
parser.add_argument("-t", "--tag", required=True, help="String to search in tags, can be composed. Example: \"ransomware|Ransomware\"")
parser.add_argument("-b", "--begindate", help="The research will look for Tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
parser.add_argument("-e", "--enddate", help="The research will look for Tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
args = parser.parse_args()
misp = init(misp_url, misp_key)
searchall(misp, args.search, misp_url)
if args.begindate is not None:
args.begindate = tools.toDatetime(args.begindate)
if args.enddate is not None:
args.enddate = tools.toDatetime(args.enddate)
Events = tools.eventsListBuildFromArray('data')
TotalEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.isTagIn(Tags, args.tag)
TotalTags = len(result)
Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
TotalPeriodEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.isTagIn(Tags, args.tag)
TotalPeriodTags = len(result)
text = 'Studied pediod: from '
if args.begindate is None:
text = text + '1970-01-01'
else:
text = text + str(args.begindate.date())
text = text + ' to '
if args.enddate is None:
text = text + str(datetime.now().date())
else:
text = text + str(args.enddate.date())
print '\n========================================================'
print text
print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
if TotalTags != 0:
print 'It represents ' + str(round(100*TotalPeriodTags/TotalTags, 3)) + '% of the fetched events (' + str(TotalTags) + ') including this tag.'
if TotalEvents != 0:
print 'It also represents ' + str(round(100*TotalPeriodTags/TotalEvents, 3)) + '% of all the fetched events (' + str(TotalEvents) + ').'

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import json
import tools
def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ##########
def searchall(m, search, url):
result = m.search_all(search)
with open('data', 'w') as f:
f.write(json.dumps(result))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
parser.add_argument("-s", "--search", help="string to search")
parser.add_argument("-b", "--begindate", help="The research will look for Tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
parser.add_argument("-e", "--enddate", help="The research will look for Tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
args = parser.parse_args()
misp = init(misp_url, misp_key)
if args.search is None:
args.search = ''
searchall(misp, args.search, misp_url)
if args.begindate is not None:
args.begindate = tools.toDatetime(args.begindate)
if args.enddate is not None:
args.enddate = tools.toDatetime(args.enddate)
Events = tools.eventsListBuildFromArray('data')
TotalEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.getNbOccurenceTags(Tags)
TotalTags = tools.getNbitems(Tags)
Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
TotalPeriodEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.getNbOccurenceTags(Tags)
TotalPeriodTags = tools.getNbitems(Tags)
text = 'Studied pediod: from '
if args.begindate is None:
text = text + '1970-01-01'
else:
text = text + str(args.begindate.date())
text = text + ' to '
if args.enddate is None:
text = text + str(datetime.now().date())
else:
text = text + str(args.enddate.date())
print '\n========================================================'
print text
print result
'''
print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
print 'It represents ' + str(round(100*TotalPeriodTags/TotalTags,3)) + '% of the fetched events (' + str(TotalTags) + ') including this tag.'
print 'It also represents ' + str(round(100*TotalPeriodTags/TotalEvents,3)) + '% of all the fetched events (' + str(TotalEvents) + ').'
'''

View File

@ -0,0 +1,26 @@
<html>
<head>
<style>
#stats
{
height: 746px;
margin-top: 100px;
}
#treemap
{
width: 1000px;
}
</style>
</head>
<body>
<table><tr>
<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
</tr></table>
<!--
<div id="stats"><iframe src="table.html"></iframe></div>
<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
-->
</body>
</html>

View File

@ -0,0 +1,245 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
from json import JSONDecoder
import random
import pygal
from pygal.style import Style
import pandas as pd
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
################ Tools ################
def buildDoubleIndex(index1, index2, datatype):
it = -1
newindex1 = []
for index in index2:
if index == 0:
it+=1
newindex1.append(index1[it])
arrays = [newindex1, index2]
tuples = list(zip(*arrays))
return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
def buildNewColumn(index2, column):
it = -1
newcolumn = []
for index in index2:
if index == 0:
it+=1
newcolumn.append(column[it])
return newcolumn
def dateInRange(datetimeTested, begin=None, end=None):
if begin == None:
begin = datetime(1970,1,1)
if end == None:
end = datetime.now()
return begin <= datetimeTested <= end
def addColumn(dataframe, columnList, columnName):
dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
def dateInRange(datetimeTested, begin=None, end=None):
if begin == None:
begin = datetime(1970,1,1)
if end == None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date):
temp = date.split('-')
return datetime(int(temp[0]), int(temp[1]), int(temp[2]))
################ Formatting ################
def eventsListBuildFromList(filename):
with open('testt', 'r') as myfile:
s=myfile.read().replace('\n', '')
decoder = JSONDecoder()
s_len = len(s)
Events = []
end = 0
while end != s_len:
Event, end = decoder.raw_decode(s, idx=end)
Events.append(Event)
data = []
for e in Events:
data.append(pd.DataFrame.from_dict(e, orient='index'))
Events = pd.concat(data)
for it in range(Events['attribute_count'].size):
if Events['attribute_count'][it] == None:
Events['attribute_count'][it]='0'
else:
Events['attribute_count'][it]=int(Events['attribute_count'][it])
Events = Events.set_index('id')
return Events
def eventsListBuildFromArray(filename):
'''
returns a structure listing all primary events in the sample
'''
jdata = json.load(open(filename))
jdata = jdata['response']
Events = []
for e in jdata:
Events.append(e)
data = []
for e in Events:
data.append(pd.DataFrame.from_dict(e, orient='index'))
Events = pd.concat(data)
for it in range(Events['attribute_count'].size):
if Events['attribute_count'][it] == None:
Events['attribute_count'][it]='0'
else:
Events['attribute_count'][it]=int(Events['attribute_count'][it])
Events = Events.set_index('id')
return Events
def attributesListBuild(Events):
Attributes = []
for Attribute in Events['Attribute']:
Attributes.append(pd.DataFrame(Attribute))
return pd.concat(Attributes)
def tagsListBuild(Events):
Tags = []
for Tag in Events['Tag']:
if type(Tag) is not list:
continue
Tags.append(pd.DataFrame(Tag))
Tags = pd.concat(Tags)
columnDate = buildNewColumn(Tags.index, Events['date'])
addColumn(Tags, columnDate, 'date')
index = buildDoubleIndex(Events.index, Tags.index, 'tag')
Tags = Tags.set_index(index)
return Tags
def selectInRange(Events, begin=None, end=None):
inRange = []
for i, Event in Events.iterrows():
if dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist())
inRange = pd.DataFrame(inRange)
temp = Events.columns.tolist()
inRange.columns = temp
return inRange
'''
def isTagIn(dataframe, tag):
print 'tag =' + tag
result = []
for tagname in dataframe['name']:
print tagname
if tag in tagname:
print 'True'
result.append(tagname)
return result
'''
def isTagIn(dataframe, tag):
temp = Tags[Tags['name'].str.contains(test)].index.tolist()
index = []
for i in range(len(temp)):
if temp[i][0] not in index:
index.append(temp[i][0])
return index
################ Basic Stats ################
def getNbitems(dataframe):
return len(dataframe.index)
def getNbAttributePerEventCategoryType(Attributes):
return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
def getNbOccurenceTags(Tags):
return Tags.groupby('name').count()['id']
################ Charts ################
def createStyle(indexlevels):
colorsList = []
for i in range(len(indexlevels[0])):
colorsList.append("#%06X" % random.randint(0, 0xFFFFFF))
style = Style(
background='transparent',
plot_background='#FFFFFF',
foreground='#111111',
foreground_strong='#111111',
foreground_subtle='#111111',
opacity='.6',
opacity_hover='.9',
transition='400ms ease-in',
colors=tuple(colorsList))
return style, colorsList
def createLabelsTreemap(indexlevels, indexlabels):
categories_levels = indexlevels[0]
cat = 0
types = []
cattypes = []
categories_labels = indexlabels[0]
types_levels = indexlevels[1]
types_labels = indexlabels[1]
for it in range(len(indexlabels[0])):
if categories_labels[it] != cat:
cattypes.append(types)
types = []
cat += 1
types.append(types_levels[types_labels[it]])
cattypes.append(types)
return categories_levels, cattypes
def createTable(data, title, tablename, colorsList):
if tablename == None:
target = open('attribute_table.html', 'w')
else:
target = open(tablename, 'w')
target.truncate()
target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
categories, types = createLabelsTreemap(data.index.levels, data.index.labels)
it = 0
for i in range(len(categories)):
table = pygal.Treemap(pretty_print=True)
target.write('\n <h1 style="color:'+ colorsList[i]+ ';">' + categories[i] + '</h1>\n')
for typ in types[i]:
table.add(typ, data[it])
it += 1
target.write(table.render_table(transpose=True))
target.write('\n</body>\n</html>')
target.close()
def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = 'attribute_table.html'):
style, colorsList = createStyle(data.index.levels)
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style = style)
treemap.title = title
treemap.print_values = True
treemap.print_labels = True
categories, types = createLabelsTreemap(data.index.levels, data.index.labels)
it = 0
for i in range(len(categories)):
types_labels = []
for typ in types[i]:
tempdict = {}
tempdict['label'] = typ
tempdict['value'] = data[it]
types_labels.append(tempdict)
it += 1
treemap.add(categories[i], types_labels)
createTable(data, 'Attribute Distribution', tablename, colorsList)
if treename == None:
treemap.render_to_file('attribute_treemap.svg')
else:
treemap.render_to_file(treename)