From 24d131aa32f15f85359996003f3f319985f50193 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 26 Jul 2016 16:35:46 +0200 Subject: [PATCH] Initial refactoring, PEP8 and cleanup --- .../attribute_treemap.py | 36 +--- examples/situational-awareness/style.css | 12 +- examples/situational-awareness/tag_search.py | 12 +- examples/situational-awareness/tags_count.py | 14 +- .../test_attribute_treemap.html | 4 +- examples/situational-awareness/tools.py | 169 ++++++++---------- 6 files changed, 108 insertions(+), 139 deletions(-) mode change 100644 => 100755 examples/situational-awareness/attribute_treemap.py diff --git a/examples/situational-awareness/attribute_treemap.py b/examples/situational-awareness/attribute_treemap.py old mode 100644 new mode 100755 index 1e7ef63..0536590 --- a/examples/situational-awareness/attribute_treemap.py +++ b/examples/situational-awareness/attribute_treemap.py @@ -4,45 +4,25 @@ from pymisp import PyMISP from keys import misp_url, misp_key, misp_verifycert import argparse -import os -import json import tools -def init(url, key): - return PyMISP(url, key, misp_verifycert, 'json') - -########## fetch data ########## - -def searchall(m, search, url): - result = m.search_all(search) - with open('data', 'w') as f: - f.write(json.dumps(result)) - -def download_last(m, last): - result = m.download_last(last) - with open('data', 'w') as f: - f.write(json.dumps(result)) - - if __name__ == '__main__': parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.') - parser.add_argument("-f", "--function", required=True, help="The parameter can be either set to \"last\" or \"searchall\". If the parameter is not valid, \"last\" will be the default setting.") - parser.add_argument("-a", "--argument", required=True, help="if function is \"last\", time can be defined in days, hours, minutes (for example 5d or 12h or 30m). Otherwise, this argument is the string to search") + parser.add_argument("-f", "--function", required=True, help='The parameter can be either set to "last" or "searchall". If the parameter is not valid, "last" will be the default setting.') + parser.add_argument("-a", "--argument", required=True, help='if function is "last", time can be defined in days, hours, minutes (for example 5d or 12h or 30m). Otherwise, this argument is the string to search') args = parser.parse_args() - misp = init(misp_url, misp_key) + misp = PyMISP(misp_url, misp_key, misp_verifycert, 'json') if args.function == "searchall": - searchall(misp, args.argument, misp_url) + result = misp.search_all(args.argument) else: - download_last(misp, args.argument) + result = misp.download_last(args.argument) - Events = tools.eventsListBuildFromArray('data') - - Attributes = tools.attributesListBuild(Events) - temp = tools.getNbAttributePerEventCategoryType(Attributes) + events = tools.eventsListBuildFromArray(result) + attributes = tools.attributesListBuild(events) + temp = tools.getNbAttributePerEventCategoryType(attributes) temp = temp.groupby(level=['category', 'type']).sum() tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html') - diff --git a/examples/situational-awareness/style.css b/examples/situational-awareness/style.css index 8c5313b..5afdf7f 100644 --- a/examples/situational-awareness/style.css +++ b/examples/situational-awareness/style.css @@ -1,4 +1,4 @@ -body +body { /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/ font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace; @@ -20,27 +20,27 @@ table border: 1px solid #cbcbcb; } -tbody +tbody { font-size:12px; } -table td +table td { border-left: 1px solid #cbcbcb; border-width: 0 0 0 1px; - width: 150px; + width: 150px; margin: 0; padding: 0.5em 1em; } -table tr:nth-child(2n-1) td +table tr:nth-child(2n-1) td { background-color: #f2f2f2; } -table tr td:first-child +table tr td:first-child { font-weight: bold; } diff --git a/examples/situational-awareness/tag_search.py b/examples/situational-awareness/tag_search.py index d695a00..5a7c648 100644 --- a/examples/situational-awareness/tag_search.py +++ b/examples/situational-awareness/tag_search.py @@ -8,10 +8,12 @@ import argparse import json import tools + def init(url, key): return PyMISP(url, key, misp_verifycert, 'json') -########## fetch data ########## +# ######### fetch data ########## + def download_last(m, last): result = m.download_last(last) @@ -62,8 +64,8 @@ if __name__ == '__main__': else: text = text + str(args.enddate.date()) - print '\n========================================================' - print text - print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.' + print('\n========================================================') + print(text) + print('During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.') if TotalPeriodEvents != 0: - print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.' + print('It represents {}% of the events in this period.'.format(round(100 * TotalPeriodTags / TotalPeriodEvents, 3))) diff --git a/examples/situational-awareness/tags_count.py b/examples/situational-awareness/tags_count.py index 58e6194..8e9ce29 100644 --- a/examples/situational-awareness/tags_count.py +++ b/examples/situational-awareness/tags_count.py @@ -8,10 +8,12 @@ import argparse import json import tools + def init(url, key): return PyMISP(url, key, misp_verifycert, 'json') -########## fetch data ########## +# ######### fetch data ########## + def download_last(m, last): result = m.download_last(last) @@ -21,7 +23,7 @@ def download_last(m, last): if __name__ == '__main__': parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.') parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7") - parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)") + parser.add_argument("-b", "--begindate", default='1970-01-01', help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)") parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()") args = parser.parse_args() @@ -30,7 +32,7 @@ if __name__ == '__main__': if args.days is None: args.days = 7 - download_last(misp, str(args.days) + 'd') + download_last(misp, '{}d'.format(args.days)) tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days)) @@ -67,6 +69,6 @@ if __name__ == '__main__': else: text = text + str(args.enddate.date()) - print '\n========================================================' - print text - print result + print('\n========================================================') + print(text) + print(result) diff --git a/examples/situational-awareness/test_attribute_treemap.html b/examples/situational-awareness/test_attribute_treemap.html index d6e8fc4..0bc9c72 100644 --- a/examples/situational-awareness/test_attribute_treemap.html +++ b/examples/situational-awareness/test_attribute_treemap.html @@ -15,11 +15,11 @@ - +
diff --git a/examples/situational-awareness/tools.py b/examples/situational-awareness/tools.py index 098d035..232ce8e 100644 --- a/examples/situational-awareness/tools.py +++ b/examples/situational-awareness/tools.py @@ -1,107 +1,97 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import json from json import JSONDecoder -import math import random import pygal from pygal.style import Style -import pandas as pd +import pandas from datetime import datetime from datetime import timedelta from dateutil.parser import parse -import sys -################ Errors ################ +# ############### Errors ################ + class DateError(Exception): def __init__(self, value): self.value = value + def __str__(self): return repr(self.value) -################ Tools ################ +# ############### Tools ################ + def buildDoubleIndex(index1, index2, datatype): it = -1 newindex1 = [] for index in index2: if index == 0: - it+=1 + it += 1 newindex1.append(index1[it]) - arrays = [newindex1, index2] + arrays = [newindex1, index2] tuples = list(zip(*arrays)) - return pd.MultiIndex.from_tuples(tuples, names=['event', datatype]) + return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype]) + def buildNewColumn(index2, column): it = -1 newcolumn = [] for index in index2: if index == 0: - it+=1 + it += 1 newcolumn.append(column[it]) return newcolumn + def dateInRange(datetimeTested, begin=None, end=None): - if begin == None: - begin = datetime(1970,1,1) - if end == None: + if begin is None: + begin = datetime(1970, 1, 1) + if end is None: end = datetime.now() return begin <= datetimeTested <= end + def addColumn(dataframe, columnList, columnName): - dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index) + dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index) -def dateInRange(datetimeTested, begin=None, end=None): - if begin == None: - begin = datetime(1970,1,1) - if end == None: - end = datetime.now() - return begin <= datetimeTested <= end def toDatetime(date): return parse(date) + def checkDateConsistancy(begindate, enddate, lastdate): - try: - if begindate is not None and enddate is not None: - if begindate > enddate: - raise DateError('begindate (' + begindate + ') cannot be after enddate (' + enddate + ')') - except DateError as e: - print('DateError: ' + e.value) - sys.exit(1) + if begindate is not None and enddate is not None: + if begindate > enddate: + raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate)) - try: - if enddate is not None: - if toDatetime(enddate) < lastdate: - raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' ) - except DateError as e: - print('DateError: ' + e.value) - sys.exit(1) + if enddate is not None: + if toDatetime(enddate) < lastdate: + raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate)) + + if begindate is not None: + if toDatetime(begindate) > datetime.now(): + raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date())) - try: - if begindate is not None: - if toDatetime(begindate) > datetime.now(): - raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')') - except DateError as e: - print('DateError: ' + e.value) - sys.exit(1) def setBegindate(begindate, lastdate): return max(begindate, lastdate) + def setEnddate(enddate): return min(enddate, datetime.now()) + def getLastdate(last): return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0) -################ Formatting ################ +# ############### Formatting ################ + def eventsListBuildFromList(filename): with open(filename, 'r') as myfile: - s=myfile.read().replace('\n', '') + s = myfile.read().replace('\n', '') decoder = JSONDecoder() s_len = len(s) Events = [] @@ -111,66 +101,57 @@ def eventsListBuildFromList(filename): Events.append(Event) data = [] for e in Events: - data.append(pd.DataFrame.from_dict(e, orient='index')) - Events = pd.concat(data) + data.append(pandas.DataFrame.from_dict(e, orient='index')) + Events = pandas.concat(data) for it in range(Events['attribute_count'].size): - if Events['attribute_count'][it] == None: - Events['attribute_count'][it]='0' + if Events['attribute_count'][it] is None: + Events['attribute_count'][it] = '0' else: - Events['attribute_count'][it]=int(Events['attribute_count'][it]) + Events['attribute_count'][it] = int(Events['attribute_count'][it]) Events = Events.set_index('id') return Events -def eventsListBuildFromArray(filename): + +def eventsListBuildFromArray(jdata): ''' returns a structure listing all primary events in the sample ''' - jdata = json.load(open(filename)) - jdata = jdata['response'] - Events = [] - for e in jdata: - Events.append(e) - data = [] - for e in Events: - data.append(pd.DataFrame.from_dict(e, orient='index')) - Events = pd.concat(data) - for it in range(Events['attribute_count'].size): - if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])): - Events['attribute_count'][it]='0' - else: - Events['attribute_count'][it]=int(Events['attribute_count'][it]) - Events = Events.set_index('id') - return Events + data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']] + events = pandas.concat(data) + events = events.set_index(['id']) + return events + + +def attributesListBuild(events): + attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']] + return pandas.concat(attributes) -def attributesListBuild(Events): - Attributes = [] - for Attribute in Events['Attribute']: - Attributes.append(pd.DataFrame(Attribute)) - return pd.concat(Attributes) def tagsListBuild(Events): Tags = [] for Tag in Events['Tag']: if type(Tag) is not list: continue - Tags.append(pd.DataFrame(Tag)) - Tags = pd.concat(Tags) + Tags.append(pandas.DataFrame(Tag)) + Tags = pandas.concat(Tags) columnDate = buildNewColumn(Tags.index, Events['date']) addColumn(Tags, columnDate, 'date') index = buildDoubleIndex(Events.index, Tags.index, 'tag') Tags = Tags.set_index(index) return Tags + def selectInRange(Events, begin=None, end=None): inRange = [] for i, Event in Events.iterrows(): if dateInRange(parse(Event['date']), begin, end): inRange.append(Event.tolist()) - inRange = pd.DataFrame(inRange) + inRange = pandas.DataFrame(inRange) temp = Events.columns.tolist() inRange.columns = temp return inRange + def isTagIn(dataframe, tag): temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist() index = [] @@ -179,35 +160,39 @@ def isTagIn(dataframe, tag): index.append(temp[i][0]) return index -################ Basic Stats ################ +# ############### Basic Stats ################ + def getNbitems(dataframe): return len(dataframe.index) -def getNbAttributePerEventCategoryType(Attributes): - return Attributes.groupby(['event_id', 'category', 'type']).count()['id'] + +def getNbAttributePerEventCategoryType(attributes): + return attributes.groupby(['event_id', 'category', 'type']).count()['id'] + def getNbOccurenceTags(Tags): return Tags.groupby('name').count()['id'] -################ Charts ################ +# ############### Charts ################ + def createStyle(indexlevels): colorsList = [] for i in range(len(indexlevels[0])): colorsList.append("#%06X" % random.randint(0, 0xFFFFFF)) - style = Style( - background='transparent', - plot_background='#FFFFFF', - foreground='#111111', - foreground_strong='#111111', - foreground_subtle='#111111', - opacity='.6', - opacity_hover='.9', - transition='400ms ease-in', - colors=tuple(colorsList)) + style = Style(background='transparent', + plot_background='#FFFFFF', + foreground='#111111', + foreground_strong='#111111', + foreground_subtle='#111111', + opacity='.6', + opacity_hover='.9', + transition='400ms ease-in', + colors=tuple(colorsList)) return style, colorsList + def createLabelsTreemap(indexlevels, indexlabels): categories_levels = indexlevels[0] cat = 0 @@ -230,7 +215,7 @@ def createLabelsTreemap(indexlevels, indexlabels): def createTable(data, title, tablename, colorsList): - if tablename == None: + if tablename is None: target = open('attribute_table.html', 'w') else: target = open(tablename, 'w') @@ -241,7 +226,7 @@ def createTable(data, title, tablename, colorsList): for i in range(len(categories)): table = pygal.Treemap(pretty_print=True) - target.write('\n

' + categories[i] + '

\n') + target.write('\n

{}

\n'.format(colorsList[i], categories[i])) for typ in types[i]: table.add(typ, data[it]) it += 1 @@ -250,9 +235,9 @@ def createTable(data, title, tablename, colorsList): target.close() -def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = 'attribute_table.html'): +def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'): style, colorsList = createStyle(data.index.levels) - treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style = style) + treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style) treemap.title = title treemap.print_values = True treemap.print_labels = True @@ -271,7 +256,7 @@ def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = ' treemap.add(categories[i], types_labels) createTable(data, 'Attribute Distribution', tablename, colorsList) - if treename == None: + if treename is None: treemap.render_to_file('attribute_treemap.svg') else: treemap.render_to_file(treename)