Merge pull request #23 from Delta-Sierra/master

Add tags_count and tag_search script in examples/situational-awareness/
2016-07-26 14:28:52 +02:00 · 2016-07-26 14:28:52 +02:00 · 68f270dd4b
parent 328e3b7a92 f8dbcde607
commit 68f270dd4b
8 changed files with 293 additions and 13 deletions
--- a/examples/situational-awareness/README.md
+++ b/examples/situational-awareness/README.md
@ -0,0 +1,19 @@
+## Explanation
+
+* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
+* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
+* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
+
+* tags\_count.py is a script that count the number of occurences of every tags in a fetched sample of Events in a given period of time.
+* tag\_search.py is a script that count the number of occurences of a given tag  in a fetched sample of Events in a given period of time.
+    * Events will be fetched from _days_ days ago to today.
+    * _begindate_ is the beginning of the studied period. If it is later than today, an error will be raised.
+    * _enddate_ is the end of the studied period. If it is earlier than _begindate_, an error will be raised.
+    * tag\_search.py allows research for multiple tags is possible by separating each tag by the | symbol.
+    * Partial research is also possible with tag\_search.py. For instance, search for "ransom" will also return tags containin "ransomware".
+
+:warning: These scripts are not time optimised
+
+## Requierements
+
+* [Pygal](https://github.com/Kozea/pygal/)
--- a/examples/situational-awareness/attribute_treemap.py
+++ b/examples/situational-awareness/attribute_treemap.py
--- a/examples/situational-awareness/style.css
+++ b/examples/situational-awareness/style.css
@ -1,5 +1,6 @@
 body 
 {
+    /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
 	font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
 }

@ -10,6 +11,8 @@ h1
 	text-align:center;
 }

+/*** Stats Tables ***/
+
 table
 {
 	border-collapse: collapse;
--- a/examples/situational-awareness/tag_search.py
+++ b/examples/situational-awareness/tag_search.py
@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pymisp import PyMISP
+from keys import misp_url, misp_key, misp_verifycert
+from datetime import datetime
+import argparse
+import json
+import tools
+
+def init(url, key):
+    return PyMISP(url, key, misp_verifycert, 'json')
+
+########## fetch data ##########
+
+def download_last(m, last):
+    result = m.download_last(last)
+    with open('data', 'w') as f:
+        f.write(json.dumps(result))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the number of occurrence of the given tag in this sample.')
+    parser.add_argument("-t", "--tag", required=True, help="tag to search (search for multiple tags is possible by using |. example : \"osint|OSINT\")")
+    parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
+    parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
+    parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
+
+    args = parser.parse_args()
+
+    misp = init(misp_url, misp_key)
+
+    if args.days is None:
+        args.days = 7
+    download_last(misp, str(args.days) + 'd')
+
+    tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
+
+    if args.begindate is None:
+        args.begindate = tools.getLastdate(args.days)
+    else:
+        args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
+
+    if args.enddate is None:
+        args.enddate = datetime.now()
+    else:
+        args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
+
+    Events = tools.selectInRange(tools.eventsListBuildFromArray('data'), begin=args.begindate, end=args.enddate)
+    TotalPeriodEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.isTagIn(Tags, args.tag)
+    TotalPeriodTags = len(result)
+
+    text = 'Studied pediod: from '
+    if args.begindate is None:
+        text = text + '1970-01-01'
+    else:
+        text = text + str(args.begindate.date())
+    text = text + ' to '
+    if args.enddate is None:
+        text = text + str(datetime.now().date())
+    else:
+        text = text + str(args.enddate.date())
+
+    print '\n========================================================'
+    print text
+    print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
+    if TotalPeriodEvents != 0:
+        print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.'
--- a/examples/situational-awareness/tags_count.py
+++ b/examples/situational-awareness/tags_count.py
@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from pymisp import PyMISP
+from keys import misp_url, misp_key, misp_verifycert
+from datetime import datetime
+import argparse
+import json
+import tools
+
+def init(url, key):
+    return PyMISP(url, key, misp_verifycert, 'json')
+
+########## fetch data ##########
+
+def download_last(m, last):
+    result = m.download_last(last)
+    with open('data', 'w') as f:
+        f.write(json.dumps(result))
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.')
+    parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
+    parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
+    parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
+
+    args = parser.parse_args()
+
+    misp = init(misp_url, misp_key)
+
+    if args.days is None:
+        args.days = 7
+    download_last(misp, str(args.days) + 'd')
+
+    tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
+
+    if args.begindate is None:
+        args.begindate = tools.getLastdate(args.days)
+    else:
+        args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
+
+    if args.enddate is None:
+        args.enddate = datetime.now()
+    else:
+        args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
+
+    Events = tools.eventsListBuildFromArray('data')
+    TotalEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.getNbOccurenceTags(Tags)
+    TotalTags = tools.getNbitems(Tags)
+
+    Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
+    TotalPeriodEvents = tools.getNbitems(Events)
+    Tags = tools.tagsListBuild(Events)
+    result = tools.getNbOccurenceTags(Tags)
+    TotalPeriodTags = tools.getNbitems(Tags)
+
+    text = 'Studied pediod: from '
+    if args.begindate is None:
+        text = text + '1970-01-01'
+    else:
+        text = text + str(args.begindate.date())
+    text = text + ' to '
+    if args.enddate is None:
+        text = text + str(datetime.now().date())
+    else:
+        text = text + str(args.enddate.date())
+
+    print '\n========================================================'
+    print text
+    print result
--- a/examples/situational-awareness/test_attribute_treemap.html
+++ b/examples/situational-awareness/test_attribute_treemap.html
@ -18,5 +18,9 @@
 		<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td> 
 		<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
 		</tr></table>
+	<!--
+		<div id="stats"><iframe src="table.html"></iframe></div> 
+		<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
+	-->
 	</body>
 </html>
--- a/examples/situational-awareness/tools.py
+++ b/examples/situational-awareness/tools.py
@ -3,15 +3,104 @@

 import json
 from json import JSONDecoder
+import math
 import random
 import pygal
 from pygal.style import Style
 import pandas as pd
+from datetime import datetime
+from datetime import timedelta
+from dateutil.parser import parse
+import sys
+
+################ Errors ################
+
+class DateError(Exception):
+    def __init__(self, value):
+        self.value = value
+    def __str__(self):
+        return repr(self.value)
+
+################ Tools ################
+
+def buildDoubleIndex(index1, index2, datatype):
+    it = -1
+    newindex1 = []
+    for index in index2:
+        if index == 0:
+            it+=1
+        newindex1.append(index1[it])
+    arrays =  [newindex1, index2]
+    tuples = list(zip(*arrays))
+    return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
+
+def buildNewColumn(index2, column):
+    it = -1
+    newcolumn = []
+    for index in index2:
+        if index == 0:
+            it+=1
+        newcolumn.append(column[it])
+    return newcolumn
+
+def dateInRange(datetimeTested, begin=None, end=None):
+    if begin == None:
+        begin = datetime(1970,1,1)
+    if end == None:
+        end = datetime.now()
+    return begin <= datetimeTested <= end
+
+def addColumn(dataframe, columnList, columnName):
+        dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
+
+def dateInRange(datetimeTested, begin=None, end=None):
+    if begin == None:
+        begin = datetime(1970,1,1)
+    if end == None:
+        end = datetime.now()
+    return begin <= datetimeTested <= end
+
+def toDatetime(date):
+    return parse(date)
+
+def checkDateConsistancy(begindate, enddate, lastdate):
+    try:
+        if begindate is not None and enddate is not None:
+            if begindate > enddate:
+                raise DateError('begindate (' + begindate +  ') cannot be after enddate (' + enddate + ')')
+    except DateError as e:
+        print('DateError: ' + e.value)
+        sys.exit(1)
+
+    try:
+        if enddate is not None:
+            if toDatetime(enddate) < lastdate:
+                raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' )
+    except DateError as e:
+        print('DateError: ' + e.value)
+        sys.exit(1)
+
+    try:
+        if begindate is not None:
+            if toDatetime(begindate) > datetime.now():
+                raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')')
+    except DateError as e:
+        print('DateError: ' + e.value)
+        sys.exit(1)
+
+def setBegindate(begindate, lastdate):
+    return max(begindate, lastdate)
+
+def setEnddate(enddate):
+    return min(enddate, datetime.now())
+
+def getLastdate(last):
+    return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)

 ################ Formatting  ################

 def eventsListBuildFromList(filename):
-    with open('testt', 'r') as myfile:
+    with open(filename, 'r') as myfile:
        s=myfile.read().replace('\n', '')
    decoder = JSONDecoder()
    s_len = len(s)
@ -46,7 +135,7 @@ def eventsListBuildFromArray(filename):
        data.append(pd.DataFrame.from_dict(e, orient='index'))
    Events = pd.concat(data)
    for it in range(Events['attribute_count'].size):
-        if Events['attribute_count'][it] == None:
+        if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])):
            Events['attribute_count'][it]='0'
        else:
            Events['attribute_count'][it]=int(Events['attribute_count'][it])
@ -59,12 +148,47 @@ def attributesListBuild(Events):
        Attributes.append(pd.DataFrame(Attribute))
    return pd.concat(Attributes)

+def tagsListBuild(Events):
+    Tags = []
+    for Tag in Events['Tag']:
+        if type(Tag) is not list:
+            continue
+        Tags.append(pd.DataFrame(Tag))
+    Tags = pd.concat(Tags)
+    columnDate = buildNewColumn(Tags.index, Events['date'])
+    addColumn(Tags, columnDate, 'date')
+    index = buildDoubleIndex(Events.index, Tags.index, 'tag')
+    Tags = Tags.set_index(index)
+    return Tags
+
+def selectInRange(Events, begin=None, end=None):
+    inRange = []
+    for i, Event in Events.iterrows():
+        if dateInRange(parse(Event['date']), begin, end):
+            inRange.append(Event.tolist())
+    inRange = pd.DataFrame(inRange)
+    temp = Events.columns.tolist()
+    inRange.columns = temp
+    return inRange
+
+def isTagIn(dataframe, tag):
+    temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
+    index = []
+    for i in range(len(temp)):
+        if temp[i][0] not in index:
+            index.append(temp[i][0])
+    return index

 ################ Basic Stats ################

+def getNbitems(dataframe):
+        return len(dataframe.index)
+
 def getNbAttributePerEventCategoryType(Attributes):
    return Attributes.groupby(['event_id', 'category', 'type']).count()['id']

+def getNbOccurenceTags(Tags):
+        return Tags.groupby('name').count()['id']

 ################ Charts ################

--- a/examples/treemap/README.md
+++ b/examples/treemap/README.md
@ -1,11 +0,0 @@
-## Explanation
-
-* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
-* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
-* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
-
-## Requierements
-
-* [Pygal](https://github.com/Kozea/pygal/)
-
-