Merge pull request #23 from Delta-Sierra/master

Add tags_count and tag_search script in examples/situational-awareness/
2016-07-26 14:28:52 +02:00 · 2016-07-26 14:28:52 +02:00 · 68f270dd4b
parent 328e3b7a92 f8dbcde607
commit 68f270dd4b
8 changed files with 293 additions and 13 deletions
--- a/examples/situational-awareness/README.md
+++ b/examples/situational-awareness/README.md
@ -0,0 +1,19 @@
 ## Explanation
 * treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
 * It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
 * test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
 * tags\_count.py is a script that count the number of occurences of every tags in a fetched sample of Events in a given period of time.
 * tag\_search.py is a script that count the number of occurences of a given tag  in a fetched sample of Events in a given period of time.
    * Events will be fetched from _days_ days ago to today.
    * _begindate_ is the beginning of the studied period. If it is later than today, an error will be raised.
    * _enddate_ is the end of the studied period. If it is earlier than _begindate_, an error will be raised.
    * tag\_search.py allows research for multiple tags is possible by separating each tag by the | symbol.
    * Partial research is also possible with tag\_search.py. For instance, search for "ransom" will also return tags containin "ransomware".
 :warning: These scripts are not time optimised
 ## Requierements
 * [Pygal](https://github.com/Kozea/pygal/)
--- a/examples/situational-awareness/attribute_treemap.py
+++ b/examples/situational-awareness/attribute_treemap.py
--- a/examples/situational-awareness/style.css
+++ b/examples/situational-awareness/style.css
@ -1,5 +1,6 @@
 body 
 {
    /*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
 	font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
 }
@ -10,6 +11,8 @@ h1
 	text-align:center;
 }
 /*** Stats Tables ***/
 table
 {
 	border-collapse: collapse;
--- a/examples/situational-awareness/tag_search.py
+++ b/examples/situational-awareness/tag_search.py
@ -0,0 +1,69 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from pymisp import PyMISP
 from keys import misp_url, misp_key, misp_verifycert
 from datetime import datetime
 import argparse
 import json
 import tools
 def init(url, key):
    return PyMISP(url, key, misp_verifycert, 'json')
 ########## fetch data ##########
 def download_last(m, last):
    result = m.download_last(last)
    with open('data', 'w') as f:
        f.write(json.dumps(result))
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the number of occurrence of the given tag in this sample.')
    parser.add_argument("-t", "--tag", required=True, help="tag to search (search for multiple tags is possible by using |. example : \"osint|OSINT\")")
    parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
    parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
    parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
    args = parser.parse_args()
    misp = init(misp_url, misp_key)
    if args.days is None:
        args.days = 7
    download_last(misp, str(args.days) + 'd')
    tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
    if args.begindate is None:
        args.begindate = tools.getLastdate(args.days)
    else:
        args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
    if args.enddate is None:
        args.enddate = datetime.now()
    else:
        args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
    Events = tools.selectInRange(tools.eventsListBuildFromArray('data'), begin=args.begindate, end=args.enddate)
    TotalPeriodEvents = tools.getNbitems(Events)
    Tags = tools.tagsListBuild(Events)
    result = tools.isTagIn(Tags, args.tag)
    TotalPeriodTags = len(result)
    text = 'Studied pediod: from '
    if args.begindate is None:
        text = text + '1970-01-01'
    else:
        text = text + str(args.begindate.date())
    text = text + ' to '
    if args.enddate is None:
        text = text + str(datetime.now().date())
    else:
        text = text + str(args.enddate.date())
    print '\n========================================================'
    print text
    print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
    if TotalPeriodEvents != 0:
        print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.'
--- a/examples/situational-awareness/tags_count.py
+++ b/examples/situational-awareness/tags_count.py
@ -0,0 +1,72 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 from pymisp import PyMISP
 from keys import misp_url, misp_key, misp_verifycert
 from datetime import datetime
 import argparse
 import json
 import tools
 def init(url, key):
    return PyMISP(url, key, misp_verifycert, 'json')
 ########## fetch data ##########
 def download_last(m, last):
    result = m.download_last(last)
    with open('data', 'w') as f:
        f.write(json.dumps(result))
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.')
    parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
    parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
    parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
    args = parser.parse_args()
    misp = init(misp_url, misp_key)
    if args.days is None:
        args.days = 7
    download_last(misp, str(args.days) + 'd')
    tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
    if args.begindate is None:
        args.begindate = tools.getLastdate(args.days)
    else:
        args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
    if args.enddate is None:
        args.enddate = datetime.now()
    else:
        args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
    Events = tools.eventsListBuildFromArray('data')
    TotalEvents = tools.getNbitems(Events)
    Tags = tools.tagsListBuild(Events)
    result = tools.getNbOccurenceTags(Tags)
    TotalTags = tools.getNbitems(Tags)
    Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
    TotalPeriodEvents = tools.getNbitems(Events)
    Tags = tools.tagsListBuild(Events)
    result = tools.getNbOccurenceTags(Tags)
    TotalPeriodTags = tools.getNbitems(Tags)
    text = 'Studied pediod: from '
    if args.begindate is None:
        text = text + '1970-01-01'
    else:
        text = text + str(args.begindate.date())
    text = text + ' to '
    if args.enddate is None:
        text = text + str(datetime.now().date())
    else:
        text = text + str(args.enddate.date())
    print '\n========================================================'
    print text
    print result
--- a/examples/situational-awareness/test_attribute_treemap.html
+++ b/examples/situational-awareness/test_attribute_treemap.html
@ -18,5 +18,9 @@
 		<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td> 
 		<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
 		</tr></table>
 	<!--
 		<div id="stats"><iframe src="table.html"></iframe></div> 
 		<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
 	-->
 	</body>
 </html>
--- a/examples/situational-awareness/tools.py
+++ b/examples/situational-awareness/tools.py
@ -3,15 +3,104 @@
 import json
 from json import JSONDecoder
 import math
 import random
 import pygal
 from pygal.style import Style
 import pandas as pd
 from datetime import datetime
 from datetime import timedelta
 from dateutil.parser import parse
 import sys
 ################ Errors ################
 class DateError(Exception):
    def __init__(self, value):
        self.value = value
    def __str__(self):
        return repr(self.value)
 ################ Tools ################
 def buildDoubleIndex(index1, index2, datatype):
    it = -1
    newindex1 = []
    for index in index2:
        if index == 0:
            it+=1
        newindex1.append(index1[it])
    arrays =  [newindex1, index2]
    tuples = list(zip(*arrays))
    return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
 def buildNewColumn(index2, column):
    it = -1
    newcolumn = []
    for index in index2:
        if index == 0:
            it+=1
        newcolumn.append(column[it])
    return newcolumn
 def dateInRange(datetimeTested, begin=None, end=None):
    if begin == None:
        begin = datetime(1970,1,1)
    if end == None:
        end = datetime.now()
    return begin <= datetimeTested <= end
 def addColumn(dataframe, columnList, columnName):
        dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
 def dateInRange(datetimeTested, begin=None, end=None):
    if begin == None:
        begin = datetime(1970,1,1)
    if end == None:
        end = datetime.now()
    return begin <= datetimeTested <= end
 def toDatetime(date):
    return parse(date)
 def checkDateConsistancy(begindate, enddate, lastdate):
    try:
        if begindate is not None and enddate is not None:
            if begindate > enddate:
                raise DateError('begindate (' + begindate +  ') cannot be after enddate (' + enddate + ')')
    except DateError as e:
        print('DateError: ' + e.value)
        sys.exit(1)
    try:
        if enddate is not None:
            if toDatetime(enddate) < lastdate:
                raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' )
    except DateError as e:
        print('DateError: ' + e.value)
        sys.exit(1)
    try:
        if begindate is not None:
            if toDatetime(begindate) > datetime.now():
                raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')')
    except DateError as e:
        print('DateError: ' + e.value)
        sys.exit(1)
 def setBegindate(begindate, lastdate):
    return max(begindate, lastdate)
 def setEnddate(enddate):
    return min(enddate, datetime.now())
 def getLastdate(last):
    return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
 ################ Formatting  ################
 def eventsListBuildFromList(filename):
-    with open('testt', 'r') as myfile:
+    with open(filename, 'r') as myfile:
        s=myfile.read().replace('\n', '')
    decoder = JSONDecoder()
    s_len = len(s)
@ -46,7 +135,7 @@ def eventsListBuildFromArray(filename):
        data.append(pd.DataFrame.from_dict(e, orient='index'))
    Events = pd.concat(data)
    for it in range(Events['attribute_count'].size):
-        if Events['attribute_count'][it] == None:
+        if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])):
            Events['attribute_count'][it]='0'
        else:
            Events['attribute_count'][it]=int(Events['attribute_count'][it])
@ -59,12 +148,47 @@ def attributesListBuild(Events):
        Attributes.append(pd.DataFrame(Attribute))
    return pd.concat(Attributes)
 def tagsListBuild(Events):
    Tags = []
    for Tag in Events['Tag']:
        if type(Tag) is not list:
            continue
        Tags.append(pd.DataFrame(Tag))
    Tags = pd.concat(Tags)
    columnDate = buildNewColumn(Tags.index, Events['date'])
    addColumn(Tags, columnDate, 'date')
    index = buildDoubleIndex(Events.index, Tags.index, 'tag')
    Tags = Tags.set_index(index)
    return Tags
 def selectInRange(Events, begin=None, end=None):
    inRange = []
    for i, Event in Events.iterrows():
        if dateInRange(parse(Event['date']), begin, end):
            inRange.append(Event.tolist())
    inRange = pd.DataFrame(inRange)
    temp = Events.columns.tolist()
    inRange.columns = temp
    return inRange
 def isTagIn(dataframe, tag):
    temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
    index = []
    for i in range(len(temp)):
        if temp[i][0] not in index:
            index.append(temp[i][0])
    return index
 ################ Basic Stats ################
 def getNbitems(dataframe):
        return len(dataframe.index)
 def getNbAttributePerEventCategoryType(Attributes):
    return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
 def getNbOccurenceTags(Tags):
        return Tags.groupby('name').count()['id']
 ################ Charts ################
--- a/examples/treemap/README.md
+++ b/examples/treemap/README.md
@ -1,11 +0,0 @@
 ## Explanation
 * treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
 * It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
 * test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
 ## Requierements
 * [Pygal](https://github.com/Kozea/pygal/)