Initial refactoring, PEP8 and cleanup

pull/24/head
Raphaël Vinot 2016-07-26 16:35:46 +02:00
parent 68f270dd4b
commit 24d131aa32
6 changed files with 108 additions and 139 deletions

36
examples/situational-awareness/attribute_treemap.py Normal file → Executable file
View File

@ -4,45 +4,25 @@
from pymisp import PyMISP from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert from keys import misp_url, misp_key, misp_verifycert
import argparse import argparse
import os
import json
import tools import tools
def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ##########
def searchall(m, search, url):
result = m.search_all(search)
with open('data', 'w') as f:
f.write(json.dumps(result))
def download_last(m, last):
result = m.download_last(last)
with open('data', 'w') as f:
f.write(json.dumps(result))
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.') parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
parser.add_argument("-f", "--function", required=True, help="The parameter can be either set to \"last\" or \"searchall\". If the parameter is not valid, \"last\" will be the default setting.") parser.add_argument("-f", "--function", required=True, help='The parameter can be either set to "last" or "searchall". If the parameter is not valid, "last" will be the default setting.')
parser.add_argument("-a", "--argument", required=True, help="if function is \"last\", time can be defined in days, hours, minutes (for example 5d or 12h or 30m). Otherwise, this argument is the string to search") parser.add_argument("-a", "--argument", required=True, help='if function is "last", time can be defined in days, hours, minutes (for example 5d or 12h or 30m). Otherwise, this argument is the string to search')
args = parser.parse_args() args = parser.parse_args()
misp = init(misp_url, misp_key) misp = PyMISP(misp_url, misp_key, misp_verifycert, 'json')
if args.function == "searchall": if args.function == "searchall":
searchall(misp, args.argument, misp_url) result = misp.search_all(args.argument)
else: else:
download_last(misp, args.argument) result = misp.download_last(args.argument)
Events = tools.eventsListBuildFromArray('data') events = tools.eventsListBuildFromArray(result)
attributes = tools.attributesListBuild(events)
Attributes = tools.attributesListBuild(Events) temp = tools.getNbAttributePerEventCategoryType(attributes)
temp = tools.getNbAttributePerEventCategoryType(Attributes)
temp = temp.groupby(level=['category', 'type']).sum() temp = temp.groupby(level=['category', 'type']).sum()
tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html') tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html')

View File

@ -1,4 +1,4 @@
body body
{ {
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace; font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
@ -20,27 +20,27 @@ table
border: 1px solid #cbcbcb; border: 1px solid #cbcbcb;
} }
tbody tbody
{ {
font-size:12px; font-size:12px;
} }
table td table td
{ {
border-left: 1px solid #cbcbcb; border-left: 1px solid #cbcbcb;
border-width: 0 0 0 1px; border-width: 0 0 0 1px;
width: 150px; width: 150px;
margin: 0; margin: 0;
padding: 0.5em 1em; padding: 0.5em 1em;
} }
table tr:nth-child(2n-1) td table tr:nth-child(2n-1) td
{ {
background-color: #f2f2f2; background-color: #f2f2f2;
} }
table tr td:first-child table tr td:first-child
{ {
font-weight: bold; font-weight: bold;
} }

View File

@ -8,10 +8,12 @@ import argparse
import json import json
import tools import tools
def init(url, key): def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json') return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ########## # ######### fetch data ##########
def download_last(m, last): def download_last(m, last):
result = m.download_last(last) result = m.download_last(last)
@ -62,8 +64,8 @@ if __name__ == '__main__':
else: else:
text = text + str(args.enddate.date()) text = text + str(args.enddate.date())
print '\n========================================================' print('\n========================================================')
print text print(text)
print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.' print('During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.')
if TotalPeriodEvents != 0: if TotalPeriodEvents != 0:
print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.' print('It represents {}% of the events in this period.'.format(round(100 * TotalPeriodTags / TotalPeriodEvents, 3)))

View File

@ -8,10 +8,12 @@ import argparse
import json import json
import tools import tools
def init(url, key): def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json') return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ########## # ######### fetch data ##########
def download_last(m, last): def download_last(m, last):
result = m.download_last(last) result = m.download_last(last)
@ -21,7 +23,7 @@ def download_last(m, last):
if __name__ == '__main__': if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.') parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.')
parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7") parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)") parser.add_argument("-b", "--begindate", default='1970-01-01', help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()") parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
args = parser.parse_args() args = parser.parse_args()
@ -30,7 +32,7 @@ if __name__ == '__main__':
if args.days is None: if args.days is None:
args.days = 7 args.days = 7
download_last(misp, str(args.days) + 'd') download_last(misp, '{}d'.format(args.days))
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days)) tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
@ -67,6 +69,6 @@ if __name__ == '__main__':
else: else:
text = text + str(args.enddate.date()) text = text + str(args.enddate.date())
print '\n========================================================' print('\n========================================================')
print text print(text)
print result print(result)

View File

@ -15,11 +15,11 @@
</head> </head>
<body> <body>
<table><tr> <table><tr>
<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td> <td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td> <td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
</tr></table> </tr></table>
<!-- <!--
<div id="stats"><iframe src="table.html"></iframe></div> <div id="stats"><iframe src="table.html"></iframe></div>
<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div> <div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
--> -->
</body> </body>

View File

@ -1,107 +1,97 @@
#!/usr/bin/env python #!/usr/bin/env python
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json
from json import JSONDecoder from json import JSONDecoder
import math
import random import random
import pygal import pygal
from pygal.style import Style from pygal.style import Style
import pandas as pd import pandas
from datetime import datetime from datetime import datetime
from datetime import timedelta from datetime import timedelta
from dateutil.parser import parse from dateutil.parser import parse
import sys
################ Errors ################ # ############### Errors ################
class DateError(Exception): class DateError(Exception):
def __init__(self, value): def __init__(self, value):
self.value = value self.value = value
def __str__(self): def __str__(self):
return repr(self.value) return repr(self.value)
################ Tools ################ # ############### Tools ################
def buildDoubleIndex(index1, index2, datatype): def buildDoubleIndex(index1, index2, datatype):
it = -1 it = -1
newindex1 = [] newindex1 = []
for index in index2: for index in index2:
if index == 0: if index == 0:
it+=1 it += 1
newindex1.append(index1[it]) newindex1.append(index1[it])
arrays = [newindex1, index2] arrays = [newindex1, index2]
tuples = list(zip(*arrays)) tuples = list(zip(*arrays))
return pd.MultiIndex.from_tuples(tuples, names=['event', datatype]) return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
def buildNewColumn(index2, column): def buildNewColumn(index2, column):
it = -1 it = -1
newcolumn = [] newcolumn = []
for index in index2: for index in index2:
if index == 0: if index == 0:
it+=1 it += 1
newcolumn.append(column[it]) newcolumn.append(column[it])
return newcolumn return newcolumn
def dateInRange(datetimeTested, begin=None, end=None): def dateInRange(datetimeTested, begin=None, end=None):
if begin == None: if begin is None:
begin = datetime(1970,1,1) begin = datetime(1970, 1, 1)
if end == None: if end is None:
end = datetime.now() end = datetime.now()
return begin <= datetimeTested <= end return begin <= datetimeTested <= end
def addColumn(dataframe, columnList, columnName): def addColumn(dataframe, columnList, columnName):
dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index) dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
def dateInRange(datetimeTested, begin=None, end=None):
if begin == None:
begin = datetime(1970,1,1)
if end == None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date): def toDatetime(date):
return parse(date) return parse(date)
def checkDateConsistancy(begindate, enddate, lastdate): def checkDateConsistancy(begindate, enddate, lastdate):
try: if begindate is not None and enddate is not None:
if begindate is not None and enddate is not None: if begindate > enddate:
if begindate > enddate: raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
raise DateError('begindate (' + begindate + ') cannot be after enddate (' + enddate + ')')
except DateError as e:
print('DateError: ' + e.value)
sys.exit(1)
try: if enddate is not None:
if enddate is not None: if toDatetime(enddate) < lastdate:
if toDatetime(enddate) < lastdate: raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' )
except DateError as e: if begindate is not None:
print('DateError: ' + e.value) if toDatetime(begindate) > datetime.now():
sys.exit(1) raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
try:
if begindate is not None:
if toDatetime(begindate) > datetime.now():
raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')')
except DateError as e:
print('DateError: ' + e.value)
sys.exit(1)
def setBegindate(begindate, lastdate): def setBegindate(begindate, lastdate):
return max(begindate, lastdate) return max(begindate, lastdate)
def setEnddate(enddate): def setEnddate(enddate):
return min(enddate, datetime.now()) return min(enddate, datetime.now())
def getLastdate(last): def getLastdate(last):
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0) return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
################ Formatting ################ # ############### Formatting ################
def eventsListBuildFromList(filename): def eventsListBuildFromList(filename):
with open(filename, 'r') as myfile: with open(filename, 'r') as myfile:
s=myfile.read().replace('\n', '') s = myfile.read().replace('\n', '')
decoder = JSONDecoder() decoder = JSONDecoder()
s_len = len(s) s_len = len(s)
Events = [] Events = []
@ -111,66 +101,57 @@ def eventsListBuildFromList(filename):
Events.append(Event) Events.append(Event)
data = [] data = []
for e in Events: for e in Events:
data.append(pd.DataFrame.from_dict(e, orient='index')) data.append(pandas.DataFrame.from_dict(e, orient='index'))
Events = pd.concat(data) Events = pandas.concat(data)
for it in range(Events['attribute_count'].size): for it in range(Events['attribute_count'].size):
if Events['attribute_count'][it] == None: if Events['attribute_count'][it] is None:
Events['attribute_count'][it]='0' Events['attribute_count'][it] = '0'
else: else:
Events['attribute_count'][it]=int(Events['attribute_count'][it]) Events['attribute_count'][it] = int(Events['attribute_count'][it])
Events = Events.set_index('id') Events = Events.set_index('id')
return Events return Events
def eventsListBuildFromArray(filename):
def eventsListBuildFromArray(jdata):
''' '''
returns a structure listing all primary events in the sample returns a structure listing all primary events in the sample
''' '''
jdata = json.load(open(filename)) data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']]
jdata = jdata['response'] events = pandas.concat(data)
Events = [] events = events.set_index(['id'])
for e in jdata: return events
Events.append(e)
data = []
for e in Events: def attributesListBuild(events):
data.append(pd.DataFrame.from_dict(e, orient='index')) attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']]
Events = pd.concat(data) return pandas.concat(attributes)
for it in range(Events['attribute_count'].size):
if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])):
Events['attribute_count'][it]='0'
else:
Events['attribute_count'][it]=int(Events['attribute_count'][it])
Events = Events.set_index('id')
return Events
def attributesListBuild(Events):
Attributes = []
for Attribute in Events['Attribute']:
Attributes.append(pd.DataFrame(Attribute))
return pd.concat(Attributes)
def tagsListBuild(Events): def tagsListBuild(Events):
Tags = [] Tags = []
for Tag in Events['Tag']: for Tag in Events['Tag']:
if type(Tag) is not list: if type(Tag) is not list:
continue continue
Tags.append(pd.DataFrame(Tag)) Tags.append(pandas.DataFrame(Tag))
Tags = pd.concat(Tags) Tags = pandas.concat(Tags)
columnDate = buildNewColumn(Tags.index, Events['date']) columnDate = buildNewColumn(Tags.index, Events['date'])
addColumn(Tags, columnDate, 'date') addColumn(Tags, columnDate, 'date')
index = buildDoubleIndex(Events.index, Tags.index, 'tag') index = buildDoubleIndex(Events.index, Tags.index, 'tag')
Tags = Tags.set_index(index) Tags = Tags.set_index(index)
return Tags return Tags
def selectInRange(Events, begin=None, end=None): def selectInRange(Events, begin=None, end=None):
inRange = [] inRange = []
for i, Event in Events.iterrows(): for i, Event in Events.iterrows():
if dateInRange(parse(Event['date']), begin, end): if dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist()) inRange.append(Event.tolist())
inRange = pd.DataFrame(inRange) inRange = pandas.DataFrame(inRange)
temp = Events.columns.tolist() temp = Events.columns.tolist()
inRange.columns = temp inRange.columns = temp
return inRange return inRange
def isTagIn(dataframe, tag): def isTagIn(dataframe, tag):
temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist() temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
index = [] index = []
@ -179,35 +160,39 @@ def isTagIn(dataframe, tag):
index.append(temp[i][0]) index.append(temp[i][0])
return index return index
################ Basic Stats ################ # ############### Basic Stats ################
def getNbitems(dataframe): def getNbitems(dataframe):
return len(dataframe.index) return len(dataframe.index)
def getNbAttributePerEventCategoryType(Attributes):
return Attributes.groupby(['event_id', 'category', 'type']).count()['id'] def getNbAttributePerEventCategoryType(attributes):
return attributes.groupby(['event_id', 'category', 'type']).count()['id']
def getNbOccurenceTags(Tags): def getNbOccurenceTags(Tags):
return Tags.groupby('name').count()['id'] return Tags.groupby('name').count()['id']
################ Charts ################ # ############### Charts ################
def createStyle(indexlevels): def createStyle(indexlevels):
colorsList = [] colorsList = []
for i in range(len(indexlevels[0])): for i in range(len(indexlevels[0])):
colorsList.append("#%06X" % random.randint(0, 0xFFFFFF)) colorsList.append("#%06X" % random.randint(0, 0xFFFFFF))
style = Style( style = Style(background='transparent',
background='transparent', plot_background='#FFFFFF',
plot_background='#FFFFFF', foreground='#111111',
foreground='#111111', foreground_strong='#111111',
foreground_strong='#111111', foreground_subtle='#111111',
foreground_subtle='#111111', opacity='.6',
opacity='.6', opacity_hover='.9',
opacity_hover='.9', transition='400ms ease-in',
transition='400ms ease-in', colors=tuple(colorsList))
colors=tuple(colorsList))
return style, colorsList return style, colorsList
def createLabelsTreemap(indexlevels, indexlabels): def createLabelsTreemap(indexlevels, indexlabels):
categories_levels = indexlevels[0] categories_levels = indexlevels[0]
cat = 0 cat = 0
@ -230,7 +215,7 @@ def createLabelsTreemap(indexlevels, indexlabels):
def createTable(data, title, tablename, colorsList): def createTable(data, title, tablename, colorsList):
if tablename == None: if tablename is None:
target = open('attribute_table.html', 'w') target = open('attribute_table.html', 'w')
else: else:
target = open(tablename, 'w') target = open(tablename, 'w')
@ -241,7 +226,7 @@ def createTable(data, title, tablename, colorsList):
for i in range(len(categories)): for i in range(len(categories)):
table = pygal.Treemap(pretty_print=True) table = pygal.Treemap(pretty_print=True)
target.write('\n <h1 style="color:'+ colorsList[i]+ ';">' + categories[i] + '</h1>\n') target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colorsList[i], categories[i]))
for typ in types[i]: for typ in types[i]:
table.add(typ, data[it]) table.add(typ, data[it])
it += 1 it += 1
@ -250,9 +235,9 @@ def createTable(data, title, tablename, colorsList):
target.close() target.close()
def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = 'attribute_table.html'): def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
style, colorsList = createStyle(data.index.levels) style, colorsList = createStyle(data.index.levels)
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style = style) treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
treemap.title = title treemap.title = title
treemap.print_values = True treemap.print_values = True
treemap.print_labels = True treemap.print_labels = True
@ -271,7 +256,7 @@ def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = '
treemap.add(categories[i], types_labels) treemap.add(categories[i], types_labels)
createTable(data, 'Attribute Distribution', tablename, colorsList) createTable(data, 'Attribute Distribution', tablename, colorsList)
if treename == None: if treename is None:
treemap.render_to_file('attribute_treemap.svg') treemap.render_to_file('attribute_treemap.svg')
else: else:
treemap.render_to_file(treename) treemap.render_to_file(treename)