Merge branch 'master' of github.com:MISP/PyMISP

pull/51/head
Raphaël Vinot 2017-02-07 14:03:22 +01:00
commit 6b5f347423
13 changed files with 358 additions and 137 deletions

View File

@ -5,7 +5,7 @@ from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import tools
import pygal_tools
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Take a sample of events (based on last.py of searchall.py) and create a treemap epresenting the distribution of attributes in this sample.')
@ -26,6 +26,6 @@ if __name__ == '__main__':
attributes = tools.attributesListBuild(events)
temp = tools.getNbAttributePerEventCategoryType(attributes)
temp = temp.groupby(level=['category', 'type']).sum()
tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html')
pygal_tools.createTreemap(temp, 'Attributes Distribution', 'attribute_treemap.svg', 'attribute_table.html')
else:
print ('There is no event answering the research criteria')

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import HoverTool
import date_tools
def tagsDistributionScatterPlot(NbTags, dates, plotname='Tags Distribution Plot'):
output_file(plotname + ".html")
counts = {}
glyphs = {}
desc = {}
hover = HoverTool()
plot = figure(plot_width=800, plot_height=800, x_axis_type="datetime", x_axis_label='Date', y_axis_label='Number of tags', tools=[hover])
for name in NbTags.keys():
desc[name] = []
for date in dates[name]:
desc[name].append(date_tools.datetimeToString(date, "%Y-%m-%d"))
counts[name] = plot.circle(dates[name], NbTags[name], legend="Number of events with y tags", source=ColumnDataSource(
data=dict(
desc=desc[name]
)
))
glyphs[name] = counts[name].glyph
glyphs[name].size = int(name) * 2
hover.tooltips = [("date", "@desc")]
if int(name) != 0:
glyphs[name].fill_alpha = 1/int(name)
show(plot)

View File

@ -0,0 +1,70 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
class DateError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
# ############### Date Tools ################
def dateInRange(datetimeTested, begin=None, end=None):
if begin is None:
begin = datetime(1970, 1, 1)
if end is None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date):
return parse(date)
def datetimeToString(datetime, formatstring):
return datetime.strftime(formatstring)
def checkDateConsistancy(begindate, enddate, lastdate):
if begindate is not None and enddate is not None:
if begindate > enddate:
raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
if enddate is not None:
if toDatetime(enddate) < lastdate:
raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
if begindate is not None:
if toDatetime(begindate) > datetime.now():
raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
def setBegindate(begindate, lastdate):
return max(begindate, lastdate)
def setEnddate(enddate):
return min(enddate, datetime.now())
def getLastdate(last):
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
def getNDaysBefore(date, days):
return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
def getToday():
return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
def days_between(date_1, date_2):
return abs((date_2 - date_1).days)

View File

@ -0,0 +1,54 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pygal
from pygal.style import Style
import pandas
import random
def createTable(colors, categ_types_hash, tablename='attribute_table.html'):
with open(tablename, 'w') as target:
target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
for categ_name, types in categ_types_hash.items():
table = pygal.Treemap(pretty_print=True)
target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colors[categ_name], categ_name))
for d in types:
table.add(d['label'], d['value'])
target.write(table.render_table(transpose=True))
target.write('\n</body>\n</html>')
def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
labels_categ = data.index.labels[0]
labels_types = data.index.labels[1]
names_categ = data.index.levels[0]
names_types = data.index.levels[1]
categ_types_hash = {}
for categ_id, type_val, total in zip(labels_categ, labels_types, data):
if not categ_types_hash.get(names_categ[categ_id]):
categ_types_hash[names_categ[categ_id]] = []
dict_to_print = {'label': names_types[type_val], 'value': total}
categ_types_hash[names_categ[categ_id]].append(dict_to_print)
colors = {categ: "#%06X" % random.randint(0, 0xFFFFFF) for categ in categ_types_hash.keys()}
style = Style(background='transparent',
plot_background='#FFFFFF',
foreground='#111111',
foreground_strong='#111111',
foreground_subtle='#111111',
opacity='.6',
opacity_hover='.9',
transition='400ms ease-in',
colors=tuple(colors.values()))
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
treemap.title = title
treemap.print_values = True
treemap.print_labels = True
for categ_name, types in categ_types_hash.items():
treemap.add(categ_name, types)
createTable(colors, categ_types_hash)
treemap.render_to_file(treename)

View File

@ -0,0 +1,71 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import numpy
import tools
import date_tools
import bokeh_tools
import time
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Show the evolution of trend of tags.')
parser.add_argument("-d", "--days", type=int, required=True, help='')
parser.add_argument("-s", "--begindate", required=True, help='format yyyy-mm-dd')
parser.add_argument("-e", "--enddate", required=True, help='format yyyy-mm-dd')
args = parser.parse_args()
misp = PyMISP(misp_url, misp_key, misp_verifycert)
result = misp.search(date_from=args.begindate, date_to=args.enddate, metadata=False)
# Getting data
if 'response' in result:
events = tools.eventsListBuildFromArray(result)
NbTags = []
dates = []
enddate = date_tools.toDatetime(args.enddate)
begindate = date_tools.toDatetime(args.begindate)
for i in range(round(date_tools.days_between(enddate, begindate)/args.days)):
begindate = date_tools.getNDaysBefore(enddate, args.days)
eventstemp = tools.selectInRange(events, begindate, enddate)
if eventstemp is not None:
for event in eventstemp.iterrows():
if 'Tag' in event[1]:
dates.append(enddate)
if isinstance(event[1]['Tag'], list):
NbTags.append(len(event[1]['Tag']))
else:
NbTags.append(0)
enddate = begindate
# Prepare plot
NbTagsPlot = {}
datesPlot = {}
for i in range(len(NbTags)):
if NbTags[i] == -1:
continue
count = 1
for j in range(i+1, len(NbTags)):
if NbTags[i] == NbTags[j] and dates[i] == dates[j]:
count = count + 1
NbTags[j] = -1
if str(count) in NbTagsPlot:
NbTagsPlot[str(count)].append(NbTags[i])
datesPlot[str(count)].append(dates[i])
else:
NbTagsPlot[str(count)] = [NbTags[i]]
datesPlot[str(count)] = [dates[i]]
NbTags[i] = -1
# Plot
bokeh_tools.tagsDistributionScatterPlot(NbTagsPlot, datesPlot)

View File

@ -6,6 +6,7 @@ from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import tools
import date_tools
def init(url, key):
@ -29,17 +30,17 @@ if __name__ == '__main__':
args.days = 7
result = misp.search(last='{}d'.format(args.days), metadata=True)
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
date_tools.checkDateConsistancy(args.begindate, args.enddate, date_tools.getLastdate(args.days))
if args.begindate is None:
args.begindate = tools.getLastdate(args.days)
args.begindate = date_tools.getLastdate(args.days)
else:
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
args.begindate = date_tools.setBegindate(date_tools.toDatetime(args.begindate), tools.getLastdate(args.days))
if args.enddate is None:
args.enddate = datetime.now()
else:
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
args.enddate = date_tools.setEnddate(date_tools.toDatetime(args.enddate))
if 'response' in result:
events = tools.selectInRange(tools.eventsListBuildFromArray(result), begin=args.begindate, end=args.enddate)

View File

@ -6,6 +6,7 @@ from keys import misp_url, misp_key, misp_verifycert
from datetime import datetime
import argparse
import tools
import date_tools
def init(url, key):
@ -28,17 +29,17 @@ if __name__ == '__main__':
args.days = 7
result = misp.search(last='{}d'.format(args.days), metadata=True)
tools.checkDateConsistancy(args.begindate, args.enddate, tools.getLastdate(args.days))
date_tools.checkDateConsistancy(args.begindate, args.enddate, date_tools.getLastdate(args.days))
if args.begindate is None:
args.begindate = tools.getLastdate(args.days)
args.begindate = date_tools.getLastdate(args.days)
else:
args.begindate = tools.setBegindate(tools.toDatetime(args.begindate), tools.getLastdate(args.days))
args.begindate = date_tools.setBegindate(date_tools.toDatetime(args.begindate), date_tools.getLastdate(args.days))
if args.enddate is None:
args.enddate = datetime.now()
else:
args.enddate = tools.setEnddate(tools.toDatetime(args.enddate))
args.enddate = date_tools.setEnddate(date_tools.toDatetime(args.enddate))
if 'response' in result:
events = tools.selectInRange(tools.eventsListBuildFromArray(result), begin=args.begindate, end=args.enddate)

View File

@ -5,6 +5,8 @@ from pymisp import PyMISP
from keys import misp_url, misp_key, misp_verifycert
import argparse
import tools
import date_tools
import bokeh_tools
def formattingDataframe(dataframe, dates, NanValue):
@ -54,12 +56,12 @@ if __name__ == '__main__':
events = tools.eventsListBuildFromArray(result)
result = []
dates = []
enddate = tools.getToday()
enddate = date_tools.getToday()
colourDict = {}
faketag = False
for i in range(split):
begindate = tools.getNDaysBefore(enddate, size)
begindate = date_tools.getNDaysBefore(enddate, size)
dates.append(str(enddate.date()))
eventstemp = tools.selectInRange(events, begin=begindate, end=enddate)
if eventstemp is not None:

View File

@ -6,7 +6,6 @@
height: 746px;
margin-top: 100px;
}
#treemap
{
width: 1000px;

View File

@ -2,13 +2,9 @@
# -*- coding: utf-8 -*-
from json import JSONDecoder
import random
import pygal
from pygal.style import Style
import pandas
from datetime import datetime
from datetime import timedelta
from dateutil.parser import parse
import numpy
from scipy import stats
from pytaxonomies import Taxonomies
@ -16,67 +12,25 @@ import re
import matplotlib.pyplot as plt
from matplotlib import pylab
import os
class DateError(Exception):
def __init__(self, value):
self.value = value
def __str__(self):
return repr(self.value)
# ############### Date Tools ################
def dateInRange(datetimeTested, begin=None, end=None):
if begin is None:
begin = datetime(1970, 1, 1)
if end is None:
end = datetime.now()
return begin <= datetimeTested <= end
def toDatetime(date):
return parse(date)
def checkDateConsistancy(begindate, enddate, lastdate):
if begindate is not None and enddate is not None:
if begindate > enddate:
raise DateError('begindate ({}) cannot be after enddate ({})'.format(begindate, enddate))
if enddate is not None:
if toDatetime(enddate) < lastdate:
raise DateError('enddate ({}) cannot be before lastdate ({})'.format(enddate, lastdate))
if begindate is not None:
if toDatetime(begindate) > datetime.now():
raise DateError('begindate ({}) cannot be after today ({})'.format(begindate, datetime.now().date()))
def setBegindate(begindate, lastdate):
return max(begindate, lastdate)
def setEnddate(enddate):
return min(enddate, datetime.now())
def getLastdate(last):
return (datetime.now() - timedelta(days=int(last))).replace(hour=0, minute=0, second=0, microsecond=0)
def getNDaysBefore(date, days):
return (date - timedelta(days=days)).replace(hour=0, minute=0, second=0, microsecond=0)
def getToday():
return (datetime.now()).replace(hour=0, minute=0, second=0, microsecond=0)
import date_tools
from dateutil.parser import parse
# ############### Tools ################
def selectInRange(Events, begin=None, end=None):
inRange = []
for i, Event in Events.iterrows():
if date_tools.dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist())
inRange = pandas.DataFrame(inRange)
temp = Events.columns.tolist()
if inRange.empty:
return None
inRange.columns = temp
return inRange
def getTaxonomies(dataframe):
taxonomies = Taxonomies()
taxonomies = list(taxonomies.keys())
@ -233,19 +187,6 @@ def tagsListBuild(Events):
return Tags
def selectInRange(Events, begin=None, end=None):
inRange = []
for i, Event in Events.iterrows():
if dateInRange(parse(Event['date']), begin, end):
inRange.append(Event.tolist())
inRange = pandas.DataFrame(inRange)
temp = Events.columns.tolist()
if inRange.empty:
return None
inRange.columns = temp
return inRange
def isTagIn(dataframe, tag):
temp = dataframe[dataframe['name'].str.contains(tag)].index.tolist()
index = []
@ -277,56 +218,10 @@ def getNbAttributePerEventCategoryType(attributes):
def getNbOccurenceTags(Tags):
return Tags.groupby('name').count()['id']
# ############### Charts ################
def createTable(colors, categ_types_hash, tablename='attribute_table.html'):
with open(tablename, 'w') as target:
target.write('<!DOCTYPE html>\n<html>\n<head>\n<link rel="stylesheet" href="style.css">\n</head>\n<body>')
for categ_name, types in categ_types_hash.items():
table = pygal.Treemap(pretty_print=True)
target.write('\n <h1 style="color:{};">{}</h1>\n'.format(colors[categ_name], categ_name))
for d in types:
table.add(d['label'], d['value'])
target.write(table.render_table(transpose=True))
target.write('\n</body>\n</html>')
def createTreemap(data, title, treename='attribute_treemap.svg', tablename='attribute_table.html'):
labels_categ = data.index.labels[0]
labels_types = data.index.labels[1]
names_categ = data.index.levels[0]
names_types = data.index.levels[1]
categ_types_hash = {}
for categ_id, type_val, total in zip(labels_categ, labels_types, data):
if not categ_types_hash.get(names_categ[categ_id]):
categ_types_hash[names_categ[categ_id]] = []
dict_to_print = {'label': names_types[type_val], 'value': total}
categ_types_hash[names_categ[categ_id]].append(dict_to_print)
colors = {categ: "#%06X" % random.randint(0, 0xFFFFFF) for categ in categ_types_hash.keys()}
style = Style(background='transparent',
plot_background='#FFFFFF',
foreground='#111111',
foreground_strong='#111111',
foreground_subtle='#111111',
opacity='.6',
opacity_hover='.9',
transition='400ms ease-in',
colors=tuple(colors.values()))
treemap = pygal.Treemap(pretty_print=True, legend_at_bottom=True, style=style)
treemap.title = title
treemap.print_values = True
treemap.print_labels = True
for categ_name, types in categ_types_hash.items():
treemap.add(categ_name, types)
createTable(colors, categ_types_hash)
treemap.render_to_file(treename)
def tagsToLineChart(dataframe, title, dates, colourDict):
style = createTagsPlotStyle(dataframe, colourDict)
line_chart = pygal.Line(x_label_rotation=20, style=style, show_legend=False)

93
examples/yara_dump.py Executable file
View File

@ -0,0 +1,93 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
YARA dumper for MISP
by Christophe Vandeplas
'''
import keys
from pymisp import PyMISP
import yara
import re
def dirty_cleanup(value):
changed = False
substitutions = (('', '"'),
('', '"'),
('', '"'),
('`', "'"),
('\r', '')
# ('$ ', '$'), # this breaks rules
# ('\t\t', '\n'), # this breaks rules
)
for substitution in substitutions:
if substitution[0] in value:
changed = True
value = value.replace(substitution[0], substitution[1])
return value, changed
misp = PyMISP(keys.misp_url, keys.misp_key, keys.misp_verify, 'json')
result = misp.search(controller='attributes', type_attribute='yara')
attr_cnt = 0
attr_cnt_invalid = 0
attr_cnt_duplicate = 0
attr_cnt_changed = 0
yara_rules = []
yara_rule_names = []
if 'response' in result and 'Attribute' in result['response']:
for attribute in result['response']['Attribute']:
value = attribute['value']
event_id = attribute['event_id']
attribute_id = attribute['id']
value = re.sub('^[ \t]*rule ', 'rule misp_e{}_'.format(event_id), value, flags=re.MULTILINE)
value, changed = dirty_cleanup(value)
if changed:
attr_cnt_changed += 1
if 'global rule' in value: # refuse any global rules as they might disable everything
continue
# compile the yara rule to confirm it's validity
# if valid, ignore duplicate rules
try:
attr_cnt += 1
yara.compile(source=value)
yara_rules.append(value)
# print("Rule e{} a{} OK".format(event_id, attribute_id))
except yara.SyntaxError as e:
attr_cnt_invalid += 1
# print("Rule e{} a{} NOK - {}".format(event_id, attribute_id, e))
except yara.Error as e:
attr_cnt_invalid += 1
print(e)
import traceback
print(traceback.format_exc())
# remove duplicates - process the full yara rule list and process errors to eliminate duplicate rule names
all_yara_rules = '\n'.join(yara_rules)
while True:
try:
yara.compile(source=all_yara_rules)
except yara.SyntaxError as e:
if 'duplicated identifier' in e.args[0]:
duplicate_rule_names = re.findall('duplicated identifier "(.*)"', e.args[0])
for item in duplicate_rule_names:
all_yara_rules = all_yara_rules.replace('rule {}'.format(item), 'rule duplicate_{}'.format(item), 1)
attr_cnt_duplicate += 1
continue
else:
# This should never happen as all rules were processed before separately. So logically we should only have duplicates.
exit("ERROR SyntaxError in rules: {}".format(e.args))
break
# save to a file
fname = 'misp.yara'
with open(fname, 'w') as f_out:
f_out.write(all_yara_rules)
print("")
print("MISP attributes with YARA rules: total={} valid={} invalid={} duplicate={} changed={}.".format(attr_cnt, attr_cnt - attr_cnt_invalid, attr_cnt_invalid, attr_cnt_duplicate, attr_cnt_changed))
print("Valid YARA rule file save to file '{}'. Invalid rules/attributes were ignored.".format(fname))

View File

@ -1071,10 +1071,10 @@ class PyMISP(object):
# ############## Export Attributes in text ####################################
def get_all_attributes_txt(self, type_attr):
"""Get all attributes from a specific type as plain text. Only published and IDS flagged attributes are exported."""
def get_all_attributes_txt(self, type_attr, tags=False, eventId=False, allowNonIDS=False, date_from=False, date_to=False, last=False, enforceWarninglist=False, allowNotPublished=False):
"""Get all attributes from a specific type as plain text. Only published and IDS flagged attributes are exported, except if stated otherwise."""
session = self.__prepare_session('txt')
url = urljoin(self.root_url, 'attributes/text/download/%s' % type_attr)
url = urljoin(self.root_url, 'attributes/text/download/%s/%s/%s/%s/%s/%s/%s/%s/%s' % (type_attr, tags, eventId, allowNonIDS, date_from, date_to, last, enforceWarninglist, allowNotPublished))
response = session.get(url)
return response

View File

@ -440,6 +440,8 @@ class MISPEvent(object):
if self.analysis not in [0, 1, 2]:
raise NewEventError('{} is invalid, the analysis has to be in 0, 1, 2'.format(self.analysis))
if kwargs.get('published') is not None:
self.unpublish()
if kwargs.get("published") == True:
self.publish()
if kwargs.get('date'):
self.set_date(kwargs['date'])