Initial refactoring, PEP8 and cleanup

2016-07-26 16:35:46 +02:00 · 2016-07-26 16:35:46 +02:00 · 24d131aa32
parent 68f270dd4b
commit 24d131aa32
6 changed files with 108 additions and 139 deletions
--- a/examples/situational-awareness/attribute_treemap.py
+++ b/examples/situational-awareness/attribute_treemap.py
@ -4,45 +4,25 @@
 from pymisp import PyMISP
 from keys import misp_url, misp_key, misp_verifycert
 import argparse
 import os
 import json
 import tools
 def init(url, key):
    return PyMISP(url, key, misp_verifycert, 'json')
 ########## fetch data ##########
 def searchall(m, search, url):
    result = m.search_all(search)
    with open('data', 'w') as f:
        f.write(json.dumps(result))
 def download_last(m, last):
    result = m.download_last(last)
    with open('data', 'w') as f:
        f.write(json.dumps(result))
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
-    parser.add_argument("-f", "--function", required=True, help="The parameter can be either set to \"last\" or \"searchall\". If the parameter is not valid, \"last\" will be the default setting.")
+    parser.add_argument("-f", "--function", required=True, help='The parameter can be either set to "last" or "searchall". If the parameter is not valid, "last" will be the default setting.')
-    parser.add_argument("-a", "--argument", required=True, help="if function is \"last\", time can be defined in days, hours, minutes (for example 5d or 12h or 30m). Otherwise, this argument is the string to search")
+    parser.add_argument("-a", "--argument", required=True, help='if function is "last", time can be defined in days, hours, minutes (for example 5d or 12h or 30m). Otherwise, this argument is the string to search')
    args = parser.parse_args()
-    misp = init(misp_url, misp_key)
+    misp = PyMISP(misp_url, misp_key, misp_verifycert, 'json')
    if args.function == "searchall":
-        searchall(misp, args.argument, misp_url)
+        result = misp.search_all(args.argument)
    else:
-        download_last(misp, args.argument)
+        result = misp.download_last(args.argument)
-    Events = tools.eventsListBuildFromArray('data')
+    events = tools.eventsListBuildFromArray(result)
-
+    attributes = tools.attributesListBuild(events)
-    Attributes = tools.attributesListBuild(Events)
+    temp = tools.getNbAttributePerEventCategoryType(attributes)
    temp = tools.getNbAttributePerEventCategoryType(Attributes)
    temp = temp.groupby(level=['category', 'type']).sum()
    tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html')
--- a/examples/situational-awareness/style.css
+++ b/examples/situational-awareness/style.css
@ -1,4 +1,4 @@
-body 
+body
 {
    /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
 	font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
@ -20,27 +20,27 @@ table
    border: 1px solid #cbcbcb;
 }
-tbody 
+tbody
 {
 	font-size:12px;
 }
-table td 
+table td
 {
 	border-left: 1px solid #cbcbcb;
 	border-width: 0 0 0 1px;
-	width: 150px;	
+	width: 150px;
 	margin: 0;
 	padding: 0.5em 1em;
 }
-table tr:nth-child(2n-1) td 
+table tr:nth-child(2n-1) td
 {
 	background-color: #f2f2f2;
 }
-table tr td:first-child 
+table tr td:first-child
 {
 	font-weight: bold;
 }
--- a/examples/situational-awareness/tag_search.py
+++ b/examples/situational-awareness/tag_search.py
@ -8,10 +8,12 @@ import argparse
 import json
 import tools
 def init(url, key):
    return PyMISP(url, key, misp_verifycert, 'json')
-########## fetch data ##########
+# ######### fetch data ##########
 def download_last(m, last):
    result = m.download_last(last)
@ -62,8 +64,8 @@ if __name__ == '__main__':
    else:
        text = text + str(args.enddate.date())
-    print '\n========================================================'
+    print('\n========================================================')
-    print text
+    print(text)
-    print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
+    print('During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.')
    if TotalPeriodEvents != 0:
-        print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.'
+        print('It represents {}% of the events in this period.'.format(round(100 * TotalPeriodTags / TotalPeriodEvents, 3)))
--- a/examples/situational-awareness/tags_count.py
+++ b/examples/situational-awareness/tags_count.py
@ -8,10 +8,12 @@ import argparse
 import json
 import tools
 def init(url, key):
    return PyMISP(url, key, misp_verifycert, 'json')
-########## fetch data ##########
+# ######### fetch data ##########
 def download_last(m, last):
    result = m.download_last(last)
@ -21,7 +23,7 @@ def download_last(m, last):
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.')
    parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
-    parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
+    parser.add_argument("-b", "--begindate", default='1970-01-01', help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
    parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
    args = parser.parse_args()
@ -30,7 +32,7 @@ if __name__ == '__main__':
    if args.days is None:
        args.days = 7
-    download_last(misp, str(args.days) + 'd')
+    download_last(misp, '{}d'.format(args.days))
    tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
@ -67,6 +69,6 @@ if __name__ == '__main__':
    else:
        text = text + str(args.enddate.date())
-    print '\n========================================================'
+    print('\n========================================================')
-    print text
+    print(text)
-    print result
+    print(result)
--- a/examples/situational-awareness/test_attribute_treemap.html
+++ b/examples/situational-awareness/test_attribute_treemap.html
@ -15,11 +15,11 @@
 	</head>
 	<body>
 		<table><tr>
-		<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td> 
+		<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
 		<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
 		</tr></table>
 	<!--
-		<div id="stats"><iframe src="table.html"></iframe></div> 
+		<div id="stats"><iframe src="table.html"></iframe></div>
 		<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
 	-->
 	</body>
--- a/examples/situational-awareness/tools.py
+++ b/examples/situational-awareness/tools.py
@ -1,107 +1,97 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 import json
 from json import JSONDecoder
 import math
 import random
 import pygal
 from pygal.style import Style
-import pandas as pd
+import pandas
 from datetime import datetime
 from datetime import timedelta
 from dateutil.parser import parse
 import sys
-################ Errors ################
+# ############### Errors ################
 class DateError(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)
-################ Tools ################
+# ############### Tools ################
 def buildDoubleIndex(index1, index2, datatype):
    it = -1
    newindex1 = []
    for index in index2:
        if index == 0:
-            it+=1
+            it += 1
        newindex1.append(index1[it])
-    arrays =  [newindex1, index2]
+    arrays = [newindex1, index2]
    tuples = list(zip(*arrays))
-    return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
+    return pandas.MultiIndex.from_tuples(tuples, names=['event', datatype])
 def buildNewColumn(index2, column):
    it = -1
    newcolumn = []
    for index in index2:
        if index == 0:
-            it+=1
+            it += 1
        newcolumn.append(column[it])
    return newcolumn
 def dateInRange(datetimeTested, begin=None, end=None):
-    if begin == None:
+    if begin is None:
-        begin = datetime(1970,1,1)
+        begin = datetime(1970, 1, 1)
-    if end == None:
+    if end is None:
        end = datetime.now()
    return begin <= datetimeTested <= end
 def addColumn(dataframe, columnList, columnName):
-        dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
+    dataframe.loc[:, columnName] = pandas.Series(columnList, index=dataframe.index)
 def dateInRange(datetimeTested, begin=None, end=None):
    if begin == None:
        begin = datetime(1970,1,1)
    if end == None:
        end = datetime.now()
    return begin <= datetimeTested <= end
 def toDatetime(date):
    return parse(date)
 def checkDateConsistancy(begindate, enddate, lastdate):
-    try:
+    if begindate is not None and enddate is not None:
-        if begindate is not None and enddate is not None:
+        if begindate > enddate:
-            if begindate > enddate:
+            raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
                raise DateError('begindate (' + begindate +  ') cannot be after enddate (' + enddate + ')')
    except DateError as e:
        print('DateError: ' + e.value)
        sys.exit(1)
-    try:
+    if enddate is not None:
-        if enddate is not None:
+        if toDatetime(enddate) < lastdate:
-            if toDatetime(enddate) < lastdate:
+            raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
-                raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' )
+
-    except DateError as e:
+    if begindate is not None:
-        print('DateError: ' + e.value)
+        if toDatetime(begindate) > datetime.now():
-        sys.exit(1)
+            raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
    try:
        if begindate is not None:
            if toDatetime(begindate) > datetime.now():
                raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')')
    except DateError as e:
        print('DateError: ' + e.value)
        sys.exit(1)
 def setBegindate(begindate, lastdate):
    return max(begindate, lastdate)
 def setEnddate(enddate):
    return min(enddate, datetime.now())
 def getLastdate(last):
    return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
-################ Formatting  ################
+# ############### Formatting  ################
 def eventsListBuildFromList(filename):
    with open(filename, 'r') as myfile:
-        s=myfile.read().replace('\n', '')
+        s = myfile.read().replace('\n', '')
    decoder = JSONDecoder()
    s_len = len(s)
    Events = []
@ -111,66 +101,57 @@ def eventsListBuildFromList(filename):
        Events.append(Event)
    data = []
    for e in Events:
-        data.append(pd.DataFrame.from_dict(e, orient='index'))
+        data.append(pandas.DataFrame.from_dict(e, orient='index'))
-    Events = pd.concat(data)
+    Events = pandas.concat(data)
    for it in range(Events['attribute_count'].size):
-        if Events['attribute_count'][it] == None:
+        if Events['attribute_count'][it] is None:
-            Events['attribute_count'][it]='0'
+            Events['attribute_count'][it] = '0'
        else:
-            Events['attribute_count'][it]=int(Events['attribute_count'][it])
+            Events['attribute_count'][it] = int(Events['attribute_count'][it])
    Events = Events.set_index('id')
    return Events
-def eventsListBuildFromArray(filename):
+
 def eventsListBuildFromArray(jdata):
    '''
    returns a structure listing all primary events in the sample
    '''
-    jdata = json.load(open(filename))
+    data = [pandas.DataFrame.from_dict(e, orient='index') for e in jdata['response']]
-    jdata = jdata['response']
+    events = pandas.concat(data)
-    Events = []
+    events = events.set_index(['id'])
-    for e in jdata:
+    return events
-        Events.append(e)
+
-    data = []
+
-    for e in Events:
+def attributesListBuild(events):
-        data.append(pd.DataFrame.from_dict(e, orient='index'))
+    attributes = [pandas.DataFrame(attribute) for attribute in events['Attribute']]
-    Events = pd.concat(data)
+    return pandas.concat(attributes)
    for it in range(Events['attribute_count'].size):
        if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])):
            Events['attribute_count'][it]='0'
        else:
            Events['attribute_count'][it]=int(Events['attribute_count'][it])
    Events = Events.set_index('id')
    return Events
 def attributesListBuild(Events):
    Attributes = []
    for Attribute in Events['Attribute']:
        Attributes.append(pd.DataFrame(Attribute))
    return pd.concat(Attributes)
 def tagsListBuild(Events):
    Tags = []
    for Tag in Events['Tag']:
        if type(Tag) is not list:
            continue
-        Tags.append(pd.DataFrame(Tag))
+        Tags.append(pandas.DataFrame(Tag))
-    Tags = pd.concat(Tags)
+    Tags = pandas.concat(Tags)
    columnDate = buildNewColumn(Tags.index, Events['date'])
    addColumn(Tags, columnDate, 'date')
    index = buildDoubleIndex(Events.index, Tags.index, 'tag')
    Tags = Tags.set_index(index)
    return Tags
 def selectInRange(Events, begin=None, end=None):
    inRange = []
    for i, Event in Events.iterrows():
        if dateInRange(parse(Event['date']), begin, end):
            inRange.append(Event.tolist())
-    inRange = pd.DataFrame(inRange)
+    inRange = pandas.DataFrame(inRange)
    temp = Events.columns.tolist()
    inRange.columns = temp
    return inRange
 def isTagIn(dataframe, tag):
    temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
    index = []
@ -179,35 +160,39 @@ def isTagIn(dataframe, tag):
            index.append(temp[i][0])
    return index
-################ Basic Stats ################
+# ############### Basic Stats ################
 def getNbitems(dataframe):
        return len(dataframe.index)
-def getNbAttributePerEventCategoryType(Attributes):
+
-    return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
+def getNbAttributePerEventCategoryType(attributes):
    return attributes.groupby(['event_id', 'category', 'type']).count()['id']
 def getNbOccurenceTags(Tags):
        return Tags.groupby('name').count()['id']
-################ Charts ################
+# ############### Charts ################
 def createStyle(indexlevels):
    colorsList = []
    for i in range(len(indexlevels[0])):
        colorsList.append("#%06X" % random.randint(0, 0xFFFFFF))
-    style = Style(
+    style = Style(background='transparent',
-                background='transparent',
+                  plot_background='#FFFFFF',
-                plot_background='#FFFFFF',
+                  foreground='#111111',
-                foreground='#111111',
+                  foreground_strong='#111111',
-                foreground_strong='#111111',
+                  foreground_subtle='#111111',
-                foreground_subtle='#111111',
+                  opacity='.6',
-                opacity='.6',
+                  opacity_hover='.9',
-                opacity_hover='.9',
+                  transition='400ms ease-in',
-                transition='400ms ease-in',
+                  colors=tuple(colorsList))
                colors=tuple(colorsList))
    return style, colorsList
 def createLabelsTreemap(indexlevels, indexlabels):
    categories_levels = indexlevels[0]
    cat = 0
@ -230,7 +215,7 @@ def createLabelsTreemap(indexlevels, indexlabels):
 def createTable(data, title, tablename, colorsList):
-    if tablename == None:
+    if tablename is None:
        target = open('attribute_table.html', 'w')
    else:
        target = open(tablename, 'w')
@ -241,7 +226,7 @@ def createTable(data, title, tablename, colorsList):
    for i in range(len(categories)):
        table = pygal.Treemap(pretty_print=True)
-        target.write('\n <h1 style="color:'+ colorsList[i]+ ';">' + categories[i] + '</h1>\n')
+        target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colorsList[i], categories[i]))
        for typ in types[i]:
            table.add(typ, data[it])
            it += 1
@ -250,9 +235,9 @@ def createTable(data, title, tablename, colorsList):
    target.close()
-def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = 'attribute_table.html'):
+def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
    style, colorsList = createStyle(data.index.levels)
-    treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style = style)
+    treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
    treemap.title = title
    treemap.print_values = True
    treemap.print_labels = True
@ -271,7 +256,7 @@ def createTreemap(data, title, treename = 'attribute_treemap.svg', tablename = '
        treemap.add(categories[i], types_labels)
    createTable(data, 'Attribute Distribution', tablename, colorsList)
-    if treename == None:
+    if treename is None:
        treemap.render_to_file('attribute_treemap.svg')
    else:
        treemap.render_to_file(treename)