add ta_scatter.py script & reorganise tools

pull/51/head
Déborah Servili 2017-02-03 16:12:02 +01:00
parent aadae9b20e
commit f8be16a905
10 changed files with 260 additions and 159 deletions

View File

@ -5,7 +5,7 @@ from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import tools
import pygal_tools
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
@ -26,6 +26,6 @@ if __name__ == '__main__':
attributes = tools.attributesListBuild(events)
temp = tools.getNbAttributePerEventCategoryType(attributes)
temp = temp.groupby(level=['category', 'type']).sum()
tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html')
pygal_tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html')
else:
print ('There is no event answering the research criteria')

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
import date_tools
def tagsDistributionScatterPlot(NbTags, dates, plotname='Tags Distribution Plot'):
output_file(plotname + ".html")
counts = {}
glyphs = {}
desc = {}
hover = HoverTool()
plot = figure(plot_width=800, plot_height=800, x_axis_type="datetime", tools=[hover])
for name in NbTags.keys():
desc[name] = []
for date in dates[name]:
desc[name].append(date_tools.datetimeToString(date, "%Y-%m-%d"))
counts[name] = plot.circle(dates[name], NbTags[name], source=ColumnDataSource(
data=dict(
desc=desc[name]
)
))
glyphs[name] = counts[name].glyph
glyphs[name].size = int(name) * 2
hover.tooltips = [("date", "@desc")]
if int(name) != 0:
glyphs[name].fill_alpha = 1/int(name)
show(plot)

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
class DateError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
# ############### Date Tools ################
def dateInRange(datetimeTested, begin=None, end=None):
if begin is None:
begin = datetime(1970, 1, 1)
if end is None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date):
return parse(date)
def datetimeToString(datetime, formatstring):
return datetime.strftime(formatstring)
def checkDateConsistancy(begindate, enddate, lastdate):
if begindate is not None and enddate is not None:
if begindate > enddate:
raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
if enddate is not None:
if toDatetime(enddate) < lastdate:
raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
if begindate is not None:
if toDatetime(begindate) > datetime.now():
raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
def setBegindate(begindate, lastdate):
return max(begindate, lastdate)
def setEnddate(enddate):
return min(enddate, datetime.now())
def getLastdate(last):
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
def getNDaysBefore(date, days):
return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
def getToday():
return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
def days_between(date_1, date_2):
return abs((date_2 - date_1).days)

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pygal
from pygal.style import Style
import pandas
import random
def createTable(colors, categ_types_hash, tablename='attribute_table.html'):
with open(tablename, 'w') as target:
target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
for categ_name, types in categ_types_hash.items():
table = pygal.Treemap(pretty_print=True)
target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colors[categ_name], categ_name))
for d in types:
table.add(d['label'], d['value'])
target.write(table.render_table(transpose=True))
target.write('\n</body>\n</html>')
def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
labels_categ = data.index.labels[0]
labels_types = data.index.labels[1]
names_categ = data.index.levels[0]
names_types = data.index.levels[1]
categ_types_hash = {}
for categ_id, type_val, total in zip(labels_categ, labels_types, data):
if not categ_types_hash.get(names_categ[categ_id]):
categ_types_hash[names_categ[categ_id]] = []
dict_to_print = {'label': names_types[type_val], 'value': total}
categ_types_hash[names_categ[categ_id]].append(dict_to_print)
colors = {categ: "#%06X" % random.randint(0, 0xFFFFFF) for categ in categ_types_hash.keys()}
style = Style(background='transparent',
plot_background='#FFFFFF',
foreground='#111111',
foreground_strong='#111111',
foreground_subtle='#111111',
opacity='.6',
opacity_hover='.9',
transition='400ms ease-in',
colors=tuple(colors.values()))
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
treemap.title = title
treemap.print_values = True
treemap.print_labels = True
for categ_name, types in categ_types_hash.items():
treemap.add(categ_name, types)
createTable(colors, categ_types_hash)
treemap.render_to_file(treename)

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import numpy
import tools
import date_tools
import bokeh_tools
import time
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Show the evolution of trend of tags.')
parser.add_argument("-d", "--days", type=int, required=True, help='')
parser.add_argument("-s", "--begindate", required=True, help='format yyyy-mm-dd')
parser.add_argument("-e", "--enddate", required=True, help='format yyyy-mm-dd')
args = parser.parse_args()
misp = PyMISP(misp_url, misp_key, misp_verifycert)
result = misp.search(date_from=args.begindate, date_to=args.enddate, metadata=False)
# Getting data
if 'response' in result:
events = tools.eventsListBuildFromArray(result)
NbTags = []
dates = []
enddate = date_tools.toDatetime(args.enddate)
begindate = date_tools.toDatetime(args.begindate)
for i in range(round(date_tools.days_between(enddate, begindate)/args.days)):
begindate = date_tools.getNDaysBefore(enddate, args.days)
eventstemp = tools.selectInRange(events, begindate, enddate)
if eventstemp is not None:
for event in eventstemp.iterrows():
if 'Tag' in event[1]:
dates.append(enddate)
if isinstance(event[1]['Tag'], list):
NbTags.append(len(event[1]['Tag']))
else:
NbTags.append(0)
enddate = begindate
# Prepare plot
NbTagsPlot = {}
datesPlot = {}
for i in range(len(NbTags)):
if NbTags[i] == -1:
continue
count = 1
for j in range(i+1, len(NbTags)):
if NbTags[i] == NbTags[j] and dates[i] == dates[j]:
count = count + 1
NbTags[j] = -1
if str(count) in NbTagsPlot:
NbTagsPlot[str(count)].append(NbTags[i])
datesPlot[str(count)].append(dates[i])
else:
NbTagsPlot[str(count)] = [NbTags[i]]
datesPlot[str(count)] = [dates[i]]
NbTags[i] = -1
# Plot
bokeh_tools.tagsDistributionScatterPlot(NbTagsPlot, datesPlot)

View File

@ -6,6 +6,7 @@ from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import tools
import date_tools
def init(url, key):
@ -29,17 +30,17 @@ if __name__ == '__main__':
args.days = 7
result = misp.search(last='{}d'.format(args.days), metadata=True)
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
date_tools.checkDateConsistancy(args.begindate, args.enddate, date_tools.getLastdate(args.days))
if args.begindate is None:
args.begindate = tools.getLastdate(args.days)
args.begindate = date_tools.getLastdate(args.days)
else:
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
args.begindate = date_tools.setBegindate(date_tools.toDatetime(args.begindate), tools.getLastdate(args.days))
if args.enddate is None:
args.enddate = datetime.now()
else:
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
args.enddate = date_tools.setEnddate(date_tools.toDatetime(args.enddate))
if 'response' in result:
events = tools.selectInRange(tools.eventsListBuildFromArray(result), begin=args.begindate, end=args.enddate)

View File

@ -6,6 +6,7 @@ from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import tools
import date_tools
def init(url, key):
@ -28,17 +29,17 @@ if __name__ == '__main__':
args.days = 7
result = misp.search(last='{}d'.format(args.days), metadata=True)
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
date_tools.checkDateConsistancy(args.begindate, args.enddate, date_tools.getLastdate(args.days))
if args.begindate is None:
args.begindate = tools.getLastdate(args.days)
args.begindate = date_tools.getLastdate(args.days)
else:
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
args.begindate = date_tools.setBegindate(date_tools.toDatetime(args.begindate), date_tools.getLastdate(args.days))
if args.enddate is None:
args.enddate = datetime.now()
else:
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
args.enddate = date_tools.setEnddate(date_tools.toDatetime(args.enddate))
if 'response' in result:
events = tools.selectInRange(tools.eventsListBuildFromArray(result), begin=args.begindate, end=args.enddate)

View File

@ -5,6 +5,8 @@ from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import tools
import date_tools
import bokeh_tools
def formattingDataframe(dataframe, dates, NanValue):
@ -54,12 +56,12 @@ if __name__ == '__main__':
events = tools.eventsListBuildFromArray(result)
result = []
dates = []
enddate = tools.getToday()
enddate = date_tools.getToday()
colourDict = {}
faketag = False
for i in range(split):
begindate = tools.getNDaysBefore(enddate, size)
begindate = date_tools.getNDaysBefore(enddate, size)
dates.append(str(enddate.date()))
eventstemp = tools.selectInRange(events, begin=begindate, end=enddate)
if eventstemp is not None:

View File

@ -1,26 +0,0 @@
<html>
<head>
<style>
#stats
{
height: 746px;
margin-top: 100px;
}
#treemap
{
width: 1000px;
}
</style>
</head>
<body>
<table><tr>
<td><iframe id="stats" src="attribute_table.html" frameBorder="0"></iframe></td>
<td id="treemap"><object type="image/svg+xml" data="attribute_treemap.svg"></object></td>
</tr></table>
<!--
<div id="stats"><iframe src="table.html"></iframe></div>
<div id="treemap"><object type="image/svg+xml" data="test.svg"></object></div>
-->
</body>
</html>

View File

@ -2,13 +2,9 @@
# -*- coding: utf-8 -*-
from json import JSONDecoder
import random
import pygal
from pygal.style import Style
import pandas
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
import numpy
from scipy import stats
from pytaxonomies import Taxonomies
@ -16,67 +12,25 @@ import re
import matplotlib.pyplot as plt
from matplotlib import pylab
import os
class DateError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
# ############### Date Tools ################
def dateInRange(datetimeTested, begin=None, end=None):
if begin is None:
begin = datetime(1970, 1, 1)
if end is None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date):
return parse(date)
def checkDateConsistancy(begindate, enddate, lastdate):
if begindate is not None and enddate is not None:
if begindate > enddate:
raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
if enddate is not None:
if toDatetime(enddate) < lastdate:
raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
if begindate is not None:
if toDatetime(begindate) > datetime.now():
raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
def setBegindate(begindate, lastdate):
return max(begindate, lastdate)
def setEnddate(enddate):
return min(enddate, datetime.now())
def getLastdate(last):
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
def getNDaysBefore(date, days):
return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
def getToday():
return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
import date_tools
from dateutil.parser import parse
# ############### Tools ################
def selectInRange(Events, begin=None, end=None):
inRange = []
for i, Event in Events.iterrows():
if date_tools.dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist())
inRange = pandas.DataFrame(inRange)
temp = Events.columns.tolist()
if inRange.empty:
return None
inRange.columns = temp
return inRange
def getTaxonomies(dataframe):
taxonomies = Taxonomies()
taxonomies = list(taxonomies.keys())
@ -233,19 +187,6 @@ def tagsListBuild(Events):
return Tags
def selectInRange(Events, begin=None, end=None):
inRange = []
for i, Event in Events.iterrows():
if dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist())
inRange = pandas.DataFrame(inRange)
temp = Events.columns.tolist()
if inRange.empty:
return None
inRange.columns = temp
return inRange
def isTagIn(dataframe, tag):
temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
index = []
@ -277,56 +218,10 @@ def getNbAttributePerEventCategoryType(attributes):
def getNbOccurenceTags(Tags):
return Tags.groupby('name').count()['id']
# ############### Charts ################
def createTable(colors, categ_types_hash, tablename='attribute_table.html'):
with open(tablename, 'w') as target:
target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
for categ_name, types in categ_types_hash.items():
table = pygal.Treemap(pretty_print=True)
target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colors[categ_name], categ_name))
for d in types:
table.add(d['label'], d['value'])
target.write(table.render_table(transpose=True))
target.write('\n</body>\n</html>')
def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
labels_categ = data.index.labels[0]
labels_types = data.index.labels[1]
names_categ = data.index.levels[0]
names_types = data.index.levels[1]
categ_types_hash = {}
for categ_id, type_val, total in zip(labels_categ, labels_types, data):
if not categ_types_hash.get(names_categ[categ_id]):
categ_types_hash[names_categ[categ_id]] = []
dict_to_print = {'label': names_types[type_val], 'value': total}
categ_types_hash[names_categ[categ_id]].append(dict_to_print)
colors = {categ: "#%06X" % random.randint(0, 0xFFFFFF) for categ in categ_types_hash.keys()}
style = Style(background='transparent',
plot_background='#FFFFFF',
foreground='#111111',
foreground_strong='#111111',
foreground_subtle='#111111',
opacity='.6',
opacity_hover='.9',
transition='400ms ease-in',
colors=tuple(colors.values()))
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
treemap.title = title
treemap.print_values = True
treemap.print_labels = True
for categ_name, types in categ_types_hash.items():
treemap.add(categ_name, types)
createTable(colors, categ_types_hash)
treemap.render_to_file(treename)
def tagsToLineChart(dataframe, title, dates, colourDict):
style = createTagsPlotStyle(dataframe, colourDict)
line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)