Merge pull request #23 from Delta-Sierra/master

Add tags_count and tag_search script in examples/situational-awareness/
pull/24/head
Raphaël Vinot 2016-07-26 14:28:52 +02:00 committed by GitHub
commit 68f270dd4b
8 changed files with 293 additions and 13 deletions

View File

@ -0,0 +1,19 @@
## Explanation
* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
* tags\_count.py is a script that count the number of occurences of every tags in a fetched sample of Events in a given period of time.
* tag\_search.py is a script that count the number of occurences of a given tag in a fetched sample of Events in a given period of time.
* Events will be fetched from _days_ days ago to today.
* _begindate_ is the beginning of the studied period. If it is later than today, an error will be raised.
* _enddate_ is the end of the studied period. If it is earlier than _begindate_, an error will be raised.
* tag\_search.py allows research for multiple tags is possible by separating each tag by the | symbol.
* Partial research is also possible with tag\_search.py. For instance, search for "ransom" will also return tags containin "ransomware".
:warning: These scripts are not time optimised
## Requierements
* [Pygal](https://github.com/Kozea/pygal/)

View File

@ -1,5 +1,6 @@
body
{
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
}
@ -10,6 +11,8 @@ h1
text-align:center;
}
/*** Stats Tables ***/
table
{
border-collapse: collapse;

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import json
import tools
def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ##########
def download_last(m, last):
result = m.download_last(last)
with open('data', 'w') as f:
f.write(json.dumps(result))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the number of occurrence of the given tag in this sample.')
parser.add_argument("-t", "--tag", required=True, help="tag to search (search for multiple tags is possible by using |. example : \"osint|OSINT\")")
parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
args = parser.parse_args()
misp = init(misp_url, misp_key)
if args.days is None:
args.days = 7
download_last(misp, str(args.days) + 'd')
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
if args.begindate is None:
args.begindate = tools.getLastdate(args.days)
else:
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
if args.enddate is None:
args.enddate = datetime.now()
else:
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
Events = tools.selectInRange(tools.eventsListBuildFromArray('data'), begin=args.begindate, end=args.enddate)
TotalPeriodEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.isTagIn(Tags, args.tag)
TotalPeriodTags = len(result)
text = 'Studied pediod: from '
if args.begindate is None:
text = text + '1970-01-01'
else:
text = text + str(args.begindate.date())
text = text + ' to '
if args.enddate is None:
text = text + str(datetime.now().date())
else:
text = text + str(args.enddate.date())
print '\n========================================================'
print text
print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
if TotalPeriodEvents != 0:
print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.'

View File

@ -0,0 +1,72 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import json
import tools
def init(url, key):
return PyMISP(url, key, misp_verifycert, 'json')
########## fetch data ##########
def download_last(m, last):
result = m.download_last(last)
with open('data', 'w') as f:
f.write(json.dumps(result))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.')
parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
args = parser.parse_args()
misp = init(misp_url, misp_key)
if args.days is None:
args.days = 7
download_last(misp, str(args.days) + 'd')
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
if args.begindate is None:
args.begindate = tools.getLastdate(args.days)
else:
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
if args.enddate is None:
args.enddate = datetime.now()
else:
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
Events = tools.eventsListBuildFromArray('data')
TotalEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.getNbOccurenceTags(Tags)
TotalTags = tools.getNbitems(Tags)
Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
TotalPeriodEvents = tools.getNbitems(Events)
Tags = tools.tagsListBuild(Events)
result = tools.getNbOccurenceTags(Tags)
TotalPeriodTags = tools.getNbitems(Tags)
text = 'Studied pediod: from '
if args.begindate is None:
text = text + '1970-01-01'
else:
text = text + str(args.begindate.date())
text = text + ' to '
if args.enddate is None:
text = text + str(datetime.now().date())
else:
text = text + str(args.enddate.date())
print '\n========================================================'
print text
print result

View File

@ -18,5 +18,9 @@
<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
</tr></table>
<!--
<div id="stats"><iframe src="table.html"></iframe></div>
<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
-->
</body>
</html>

View File

@ -3,15 +3,104 @@
import json
from json import JSONDecoder
import math
import random
import pygal
from pygal.style import Style
import pandas as pd
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
import sys
################ Errors ################
class DateError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
################ Tools ################
def buildDoubleIndex(index1, index2, datatype):
it = -1
newindex1 = []
for index in index2:
if index == 0:
it+=1
newindex1.append(index1[it])
arrays = [newindex1, index2]
tuples = list(zip(*arrays))
return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
def buildNewColumn(index2, column):
it = -1
newcolumn = []
for index in index2:
if index == 0:
it+=1
newcolumn.append(column[it])
return newcolumn
def dateInRange(datetimeTested, begin=None, end=None):
if begin == None:
begin = datetime(1970,1,1)
if end == None:
end = datetime.now()
return begin <= datetimeTested <= end
def addColumn(dataframe, columnList, columnName):
dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
def dateInRange(datetimeTested, begin=None, end=None):
if begin == None:
begin = datetime(1970,1,1)
if end == None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date):
return parse(date)
def checkDateConsistancy(begindate, enddate, lastdate):
try:
if begindate is not None and enddate is not None:
if begindate > enddate:
raise DateError('begindate (' + begindate + ') cannot be after enddate (' + enddate + ')')
except DateError as e:
print('DateError: ' + e.value)
sys.exit(1)
try:
if enddate is not None:
if toDatetime(enddate) < lastdate:
raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' )
except DateError as e:
print('DateError: ' + e.value)
sys.exit(1)
try:
if begindate is not None:
if toDatetime(begindate) > datetime.now():
raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')')
except DateError as e:
print('DateError: ' + e.value)
sys.exit(1)
def setBegindate(begindate, lastdate):
return max(begindate, lastdate)
def setEnddate(enddate):
return min(enddate, datetime.now())
def getLastdate(last):
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
################ Formatting ################
def eventsListBuildFromList(filename):
with open('testt', 'r') as myfile:
with open(filename, 'r') as myfile:
s=myfile.read().replace('\n', '')
decoder = JSONDecoder()
s_len = len(s)
@ -46,7 +135,7 @@ def eventsListBuildFromArray(filename):
data.append(pd.DataFrame.from_dict(e, orient='index'))
Events = pd.concat(data)
for it in range(Events['attribute_count'].size):
if Events['attribute_count'][it] == None:
if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])):
Events['attribute_count'][it]='0'
else:
Events['attribute_count'][it]=int(Events['attribute_count'][it])
@ -59,12 +148,47 @@ def attributesListBuild(Events):
Attributes.append(pd.DataFrame(Attribute))
return pd.concat(Attributes)
def tagsListBuild(Events):
Tags = []
for Tag in Events['Tag']:
if type(Tag) is not list:
continue
Tags.append(pd.DataFrame(Tag))
Tags = pd.concat(Tags)
columnDate = buildNewColumn(Tags.index, Events['date'])
addColumn(Tags, columnDate, 'date')
index = buildDoubleIndex(Events.index, Tags.index, 'tag')
Tags = Tags.set_index(index)
return Tags
def selectInRange(Events, begin=None, end=None):
inRange = []
for i, Event in Events.iterrows():
if dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist())
inRange = pd.DataFrame(inRange)
temp = Events.columns.tolist()
inRange.columns = temp
return inRange
def isTagIn(dataframe, tag):
temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
index = []
for i in range(len(temp)):
if temp[i][0] not in index:
index.append(temp[i][0])
return index
################ Basic Stats ################
def getNbitems(dataframe):
return len(dataframe.index)
def getNbAttributePerEventCategoryType(Attributes):
return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
def getNbOccurenceTags(Tags):
return Tags.groupby('name').count()['id']
################ Charts ################

View File

@ -1,11 +0,0 @@
## Explanation
* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
## Requierements
* [Pygal](https://github.com/Kozea/pygal/)