mirror of https://github.com/MISP/PyMISP
Merge pull request #23 from Delta-Sierra/master
Add tags_count and tag_search script in examples/situational-awareness/pull/24/head
commit
68f270dd4b
|
@ -0,0 +1,19 @@
|
||||||
|
## Explanation
|
||||||
|
|
||||||
|
* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
|
||||||
|
* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
|
||||||
|
* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
|
||||||
|
|
||||||
|
* tags\_count.py is a script that count the number of occurences of every tags in a fetched sample of Events in a given period of time.
|
||||||
|
* tag\_search.py is a script that count the number of occurences of a given tag in a fetched sample of Events in a given period of time.
|
||||||
|
* Events will be fetched from _days_ days ago to today.
|
||||||
|
* _begindate_ is the beginning of the studied period. If it is later than today, an error will be raised.
|
||||||
|
* _enddate_ is the end of the studied period. If it is earlier than _begindate_, an error will be raised.
|
||||||
|
* tag\_search.py allows research for multiple tags is possible by separating each tag by the | symbol.
|
||||||
|
* Partial research is also possible with tag\_search.py. For instance, search for "ransom" will also return tags containin "ransomware".
|
||||||
|
|
||||||
|
:warning: These scripts are not time optimised
|
||||||
|
|
||||||
|
## Requierements
|
||||||
|
|
||||||
|
* [Pygal](https://github.com/Kozea/pygal/)
|
|
@ -1,5 +1,6 @@
|
||||||
body
|
body
|
||||||
{
|
{
|
||||||
|
/*font-family: "Helvetica Neue", Helvetica, Arial, sans-serif;*/
|
||||||
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
|
font-family: Consolas, "Liberation Mono", Menlo, Courier, monospace;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,6 +11,8 @@ h1
|
||||||
text-align:center;
|
text-align:center;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*** Stats Tables ***/
|
||||||
|
|
||||||
table
|
table
|
||||||
{
|
{
|
||||||
border-collapse: collapse;
|
border-collapse: collapse;
|
|
@ -0,0 +1,69 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from pymisp import PyMISP
|
||||||
|
from keys import misp_url, misp_key, misp_verifycert
|
||||||
|
from datetime import datetime
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import tools
|
||||||
|
|
||||||
|
def init(url, key):
|
||||||
|
return PyMISP(url, key, misp_verifycert, 'json')
|
||||||
|
|
||||||
|
########## fetch data ##########
|
||||||
|
|
||||||
|
def download_last(m, last):
|
||||||
|
result = m.download_last(last)
|
||||||
|
with open('data', 'w') as f:
|
||||||
|
f.write(json.dumps(result))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the number of occurrence of the given tag in this sample.')
|
||||||
|
parser.add_argument("-t", "--tag", required=True, help="tag to search (search for multiple tags is possible by using |. example : \"osint|OSINT\")")
|
||||||
|
parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
|
||||||
|
parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
|
||||||
|
parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
misp = init(misp_url, misp_key)
|
||||||
|
|
||||||
|
if args.days is None:
|
||||||
|
args.days = 7
|
||||||
|
download_last(misp, str(args.days) + 'd')
|
||||||
|
|
||||||
|
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
|
||||||
|
|
||||||
|
if args.begindate is None:
|
||||||
|
args.begindate = tools.getLastdate(args.days)
|
||||||
|
else:
|
||||||
|
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
|
||||||
|
|
||||||
|
if args.enddate is None:
|
||||||
|
args.enddate = datetime.now()
|
||||||
|
else:
|
||||||
|
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
|
||||||
|
|
||||||
|
Events = tools.selectInRange(tools.eventsListBuildFromArray('data'), begin=args.begindate, end=args.enddate)
|
||||||
|
TotalPeriodEvents = tools.getNbitems(Events)
|
||||||
|
Tags = tools.tagsListBuild(Events)
|
||||||
|
result = tools.isTagIn(Tags, args.tag)
|
||||||
|
TotalPeriodTags = len(result)
|
||||||
|
|
||||||
|
text = 'Studied pediod: from '
|
||||||
|
if args.begindate is None:
|
||||||
|
text = text + '1970-01-01'
|
||||||
|
else:
|
||||||
|
text = text + str(args.begindate.date())
|
||||||
|
text = text + ' to '
|
||||||
|
if args.enddate is None:
|
||||||
|
text = text + str(datetime.now().date())
|
||||||
|
else:
|
||||||
|
text = text + str(args.enddate.date())
|
||||||
|
|
||||||
|
print '\n========================================================'
|
||||||
|
print text
|
||||||
|
print 'During the studied pediod, ' + str(TotalPeriodTags) + ' events out of ' + str(TotalPeriodEvents) + ' contains at least one tag with ' + args.tag + '.'
|
||||||
|
if TotalPeriodEvents != 0:
|
||||||
|
print 'It represents ' + str(round(100*TotalPeriodTags/TotalPeriodEvents, 3)) + '% of the events in this period.'
|
|
@ -0,0 +1,72 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from pymisp import PyMISP
|
||||||
|
from keys import misp_url, misp_key, misp_verifycert
|
||||||
|
from datetime import datetime
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import tools
|
||||||
|
|
||||||
|
def init(url, key):
|
||||||
|
return PyMISP(url, key, misp_verifycert, 'json')
|
||||||
|
|
||||||
|
########## fetch data ##########
|
||||||
|
|
||||||
|
def download_last(m, last):
|
||||||
|
result = m.download_last(last)
|
||||||
|
with open('data', 'w') as f:
|
||||||
|
f.write(json.dumps(result))
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py) and give the repartition of tags in this sample.')
|
||||||
|
parser.add_argument("-d", "--days", type=int, help="number of days before today to search. If not define, default value is 7")
|
||||||
|
parser.add_argument("-b", "--begindate", help="The research will look for tags attached to events posted at or after the given startdate (format: yyyy-mm-dd): If no date is given, default time is epoch time (1970-1-1)")
|
||||||
|
parser.add_argument("-e", "--enddate", help="The research will look for tags attached to events posted at or before the given enddate (format: yyyy-mm-dd): If no date is given, default time is now()")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
misp = init(misp_url, misp_key)
|
||||||
|
|
||||||
|
if args.days is None:
|
||||||
|
args.days = 7
|
||||||
|
download_last(misp, str(args.days) + 'd')
|
||||||
|
|
||||||
|
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
|
||||||
|
|
||||||
|
if args.begindate is None:
|
||||||
|
args.begindate = tools.getLastdate(args.days)
|
||||||
|
else:
|
||||||
|
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
|
||||||
|
|
||||||
|
if args.enddate is None:
|
||||||
|
args.enddate = datetime.now()
|
||||||
|
else:
|
||||||
|
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
|
||||||
|
|
||||||
|
Events = tools.eventsListBuildFromArray('data')
|
||||||
|
TotalEvents = tools.getNbitems(Events)
|
||||||
|
Tags = tools.tagsListBuild(Events)
|
||||||
|
result = tools.getNbOccurenceTags(Tags)
|
||||||
|
TotalTags = tools.getNbitems(Tags)
|
||||||
|
|
||||||
|
Events = tools.selectInRange(Events, begin=args.begindate, end=args.enddate)
|
||||||
|
TotalPeriodEvents = tools.getNbitems(Events)
|
||||||
|
Tags = tools.tagsListBuild(Events)
|
||||||
|
result = tools.getNbOccurenceTags(Tags)
|
||||||
|
TotalPeriodTags = tools.getNbitems(Tags)
|
||||||
|
|
||||||
|
text = 'Studied pediod: from '
|
||||||
|
if args.begindate is None:
|
||||||
|
text = text + '1970-01-01'
|
||||||
|
else:
|
||||||
|
text = text + str(args.begindate.date())
|
||||||
|
text = text + ' to '
|
||||||
|
if args.enddate is None:
|
||||||
|
text = text + str(datetime.now().date())
|
||||||
|
else:
|
||||||
|
text = text + str(args.enddate.date())
|
||||||
|
|
||||||
|
print '\n========================================================'
|
||||||
|
print text
|
||||||
|
print result
|
|
@ -18,5 +18,9 @@
|
||||||
<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
|
<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
|
||||||
<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
|
<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
|
||||||
</tr></table>
|
</tr></table>
|
||||||
|
<!--
|
||||||
|
<div id="stats"><iframe src="table.html"></iframe></div>
|
||||||
|
<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
|
||||||
|
-->
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -3,15 +3,104 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from json import JSONDecoder
|
from json import JSONDecoder
|
||||||
|
import math
|
||||||
import random
|
import random
|
||||||
import pygal
|
import pygal
|
||||||
from pygal.style import Style
|
from pygal.style import Style
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
from datetime import timedelta
|
||||||
|
from dateutil.parser import parse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
################ Errors ################
|
||||||
|
|
||||||
|
class DateError(Exception):
|
||||||
|
def __init__(self, value):
|
||||||
|
self.value = value
|
||||||
|
def __str__(self):
|
||||||
|
return repr(self.value)
|
||||||
|
|
||||||
|
################ Tools ################
|
||||||
|
|
||||||
|
def buildDoubleIndex(index1, index2, datatype):
|
||||||
|
it = -1
|
||||||
|
newindex1 = []
|
||||||
|
for index in index2:
|
||||||
|
if index == 0:
|
||||||
|
it+=1
|
||||||
|
newindex1.append(index1[it])
|
||||||
|
arrays = [newindex1, index2]
|
||||||
|
tuples = list(zip(*arrays))
|
||||||
|
return pd.MultiIndex.from_tuples(tuples, names=['event', datatype])
|
||||||
|
|
||||||
|
def buildNewColumn(index2, column):
|
||||||
|
it = -1
|
||||||
|
newcolumn = []
|
||||||
|
for index in index2:
|
||||||
|
if index == 0:
|
||||||
|
it+=1
|
||||||
|
newcolumn.append(column[it])
|
||||||
|
return newcolumn
|
||||||
|
|
||||||
|
def dateInRange(datetimeTested, begin=None, end=None):
|
||||||
|
if begin == None:
|
||||||
|
begin = datetime(1970,1,1)
|
||||||
|
if end == None:
|
||||||
|
end = datetime.now()
|
||||||
|
return begin <= datetimeTested <= end
|
||||||
|
|
||||||
|
def addColumn(dataframe, columnList, columnName):
|
||||||
|
dataframe.loc[:, columnName] = pd.Series(columnList, index=dataframe.index)
|
||||||
|
|
||||||
|
def dateInRange(datetimeTested, begin=None, end=None):
|
||||||
|
if begin == None:
|
||||||
|
begin = datetime(1970,1,1)
|
||||||
|
if end == None:
|
||||||
|
end = datetime.now()
|
||||||
|
return begin <= datetimeTested <= end
|
||||||
|
|
||||||
|
def toDatetime(date):
|
||||||
|
return parse(date)
|
||||||
|
|
||||||
|
def checkDateConsistancy(begindate, enddate, lastdate):
|
||||||
|
try:
|
||||||
|
if begindate is not None and enddate is not None:
|
||||||
|
if begindate > enddate:
|
||||||
|
raise DateError('begindate (' + begindate + ') cannot be after enddate (' + enddate + ')')
|
||||||
|
except DateError as e:
|
||||||
|
print('DateError: ' + e.value)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if enddate is not None:
|
||||||
|
if toDatetime(enddate) < lastdate:
|
||||||
|
raise DateError('enddate (' + enddate + ') cannot be before lastdate (' + str(lastdate) + ')' )
|
||||||
|
except DateError as e:
|
||||||
|
print('DateError: ' + e.value)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
try:
|
||||||
|
if begindate is not None:
|
||||||
|
if toDatetime(begindate) > datetime.now():
|
||||||
|
raise DateError('begindate (' + begindate + ') cannot be after today (' + str(datetime.now().date()) + ')')
|
||||||
|
except DateError as e:
|
||||||
|
print('DateError: ' + e.value)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def setBegindate(begindate, lastdate):
|
||||||
|
return max(begindate, lastdate)
|
||||||
|
|
||||||
|
def setEnddate(enddate):
|
||||||
|
return min(enddate, datetime.now())
|
||||||
|
|
||||||
|
def getLastdate(last):
|
||||||
|
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
|
||||||
|
|
||||||
################ Formatting ################
|
################ Formatting ################
|
||||||
|
|
||||||
def eventsListBuildFromList(filename):
|
def eventsListBuildFromList(filename):
|
||||||
with open('testt', 'r') as myfile:
|
with open(filename, 'r') as myfile:
|
||||||
s=myfile.read().replace('\n', '')
|
s=myfile.read().replace('\n', '')
|
||||||
decoder = JSONDecoder()
|
decoder = JSONDecoder()
|
||||||
s_len = len(s)
|
s_len = len(s)
|
||||||
|
@ -46,7 +135,7 @@ def eventsListBuildFromArray(filename):
|
||||||
data.append(pd.DataFrame.from_dict(e, orient='index'))
|
data.append(pd.DataFrame.from_dict(e, orient='index'))
|
||||||
Events = pd.concat(data)
|
Events = pd.concat(data)
|
||||||
for it in range(Events['attribute_count'].size):
|
for it in range(Events['attribute_count'].size):
|
||||||
if Events['attribute_count'][it] == None:
|
if Events['attribute_count'][it] == None or (isinstance(Events['attribute_count'][it], float) and math.isnan(Events['attribute_count'][it])):
|
||||||
Events['attribute_count'][it]='0'
|
Events['attribute_count'][it]='0'
|
||||||
else:
|
else:
|
||||||
Events['attribute_count'][it]=int(Events['attribute_count'][it])
|
Events['attribute_count'][it]=int(Events['attribute_count'][it])
|
||||||
|
@ -59,12 +148,47 @@ def attributesListBuild(Events):
|
||||||
Attributes.append(pd.DataFrame(Attribute))
|
Attributes.append(pd.DataFrame(Attribute))
|
||||||
return pd.concat(Attributes)
|
return pd.concat(Attributes)
|
||||||
|
|
||||||
|
def tagsListBuild(Events):
|
||||||
|
Tags = []
|
||||||
|
for Tag in Events['Tag']:
|
||||||
|
if type(Tag) is not list:
|
||||||
|
continue
|
||||||
|
Tags.append(pd.DataFrame(Tag))
|
||||||
|
Tags = pd.concat(Tags)
|
||||||
|
columnDate = buildNewColumn(Tags.index, Events['date'])
|
||||||
|
addColumn(Tags, columnDate, 'date')
|
||||||
|
index = buildDoubleIndex(Events.index, Tags.index, 'tag')
|
||||||
|
Tags = Tags.set_index(index)
|
||||||
|
return Tags
|
||||||
|
|
||||||
|
def selectInRange(Events, begin=None, end=None):
|
||||||
|
inRange = []
|
||||||
|
for i, Event in Events.iterrows():
|
||||||
|
if dateInRange(parse(Event['date']), begin, end):
|
||||||
|
inRange.append(Event.tolist())
|
||||||
|
inRange = pd.DataFrame(inRange)
|
||||||
|
temp = Events.columns.tolist()
|
||||||
|
inRange.columns = temp
|
||||||
|
return inRange
|
||||||
|
|
||||||
|
def isTagIn(dataframe, tag):
|
||||||
|
temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
|
||||||
|
index = []
|
||||||
|
for i in range(len(temp)):
|
||||||
|
if temp[i][0] not in index:
|
||||||
|
index.append(temp[i][0])
|
||||||
|
return index
|
||||||
|
|
||||||
################ Basic Stats ################
|
################ Basic Stats ################
|
||||||
|
|
||||||
|
def getNbitems(dataframe):
|
||||||
|
return len(dataframe.index)
|
||||||
|
|
||||||
def getNbAttributePerEventCategoryType(Attributes):
|
def getNbAttributePerEventCategoryType(Attributes):
|
||||||
return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
|
return Attributes.groupby(['event_id', 'category', 'type']).count()['id']
|
||||||
|
|
||||||
|
def getNbOccurenceTags(Tags):
|
||||||
|
return Tags.groupby('name').count()['id']
|
||||||
|
|
||||||
################ Charts ################
|
################ Charts ################
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
## Explanation
|
|
||||||
|
|
||||||
* treemap.py is a script that will generate an interactive svg (attribute\_treemap.svg) containing a treepmap representing the distribution of attributes in a sample (data) fetched from the instance using "last" or "searchall" examples.
|
|
||||||
* It will also generate a html document with a table (attribute\_table.html) containing count for each type of attribute.
|
|
||||||
* test\_attribute\_treemap.html is a quick page made to visualize both treemap and table at the same time.
|
|
||||||
|
|
||||||
## Requierements
|
|
||||||
|
|
||||||
* [Pygal](https://github.com/Kozea/pygal/)
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue