2017-10-29 01:40:41 +02:00
|
|
|
#!/usr/bin/env python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import json
|
|
|
|
from json import JSONEncoder
|
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import collections
|
|
|
|
from glob import glob
|
2017-11-01 01:06:50 +01:00
|
|
|
from ipaddress import ip_address, ip_network
|
|
|
|
|
2017-10-29 01:40:41 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
import jsonschema
|
|
|
|
HAS_JSONSCHEMA = True
|
|
|
|
except ImportError:
|
|
|
|
HAS_JSONSCHEMA = False
|
|
|
|
|
|
|
|
|
2017-11-01 00:06:50 +01:00
|
|
|
class EncodeWarningList(JSONEncoder):
|
2017-10-29 01:40:41 +02:00
|
|
|
def default(self, obj):
|
2017-11-01 00:06:50 +01:00
|
|
|
if isinstance(obj, WarningList):
|
|
|
|
return obj.to_dict()
|
|
|
|
return JSONEncoder.default(self, obj)
|
2017-10-29 01:40:41 +02:00
|
|
|
|
|
|
|
|
|
|
|
class PyMISPWarningListsError(Exception):
|
|
|
|
def __init__(self, message):
|
|
|
|
super(PyMISPWarningListsError, self).__init__(message)
|
|
|
|
self.message = message
|
|
|
|
|
|
|
|
|
|
|
|
class WarningList():
|
|
|
|
|
2017-12-24 13:14:34 +01:00
|
|
|
expected_types = ['string', 'substring', 'hostname', 'cidr']
|
|
|
|
|
2017-11-01 01:06:50 +01:00
|
|
|
def __init__(self, warninglist, slow_search=False):
|
2017-10-29 01:40:41 +02:00
|
|
|
self.warninglist = warninglist
|
|
|
|
self.list = self.warninglist['list']
|
|
|
|
self.description = self.warninglist['description']
|
|
|
|
self.version = int(self.warninglist['version'])
|
|
|
|
self.name = self.warninglist['name']
|
2017-12-24 13:14:34 +01:00
|
|
|
if self.warninglist['type'] not in self.expected_types:
|
|
|
|
raise Exception('Unexpected type, please update the expected_type list')
|
2017-12-22 15:35:36 +01:00
|
|
|
self.type = self.warninglist['type']
|
2017-10-29 01:40:41 +02:00
|
|
|
if self.warninglist.get('matching_attributes'):
|
|
|
|
self.matching_attributes = self.warninglist['matching_attributes']
|
|
|
|
|
2017-11-01 01:06:50 +01:00
|
|
|
self.slow_search = slow_search
|
|
|
|
self._network_objects = []
|
|
|
|
|
2017-12-24 13:14:34 +01:00
|
|
|
if self.slow_search and self.type == 'cidr':
|
|
|
|
self._network_objects = self._network_index()
|
2017-11-01 01:06:50 +01:00
|
|
|
# If network objects is empty, reverting to default anyway
|
|
|
|
if not self._network_objects:
|
|
|
|
self.slow_search = False
|
|
|
|
|
|
|
|
def __contains__(self, value):
|
|
|
|
if self.slow_search:
|
|
|
|
return self._slow_search(value)
|
|
|
|
return self._fast_search(value)
|
|
|
|
|
2017-11-01 00:06:50 +01:00
|
|
|
def to_dict(self):
|
|
|
|
to_return = {'list': [str(e) for e in self.list], 'name': self.name,
|
2017-12-22 15:35:36 +01:00
|
|
|
'description': self.description, 'version': self.version,
|
|
|
|
'type': self.type}
|
2017-10-29 01:40:41 +02:00
|
|
|
if hasattr(self, 'matching_attributes'):
|
|
|
|
to_return['matching_attributes'] = self.matching_attributes
|
|
|
|
return to_return
|
|
|
|
|
2017-11-01 00:06:50 +01:00
|
|
|
def to_json(self):
|
2017-11-01 19:09:08 +01:00
|
|
|
return json.dumps(self, cls=EncodeWarningList)
|
2017-11-01 00:06:50 +01:00
|
|
|
|
2017-11-01 01:06:50 +01:00
|
|
|
def _fast_search(self, value):
|
|
|
|
return value in self.list
|
|
|
|
|
2017-12-24 13:14:34 +01:00
|
|
|
def _network_index(self):
|
2017-11-01 01:06:50 +01:00
|
|
|
to_return = []
|
|
|
|
for entry in self.list:
|
|
|
|
try:
|
2017-11-01 01:28:54 +01:00
|
|
|
# Try if the entry is a network bloc or an IP
|
2017-11-01 01:06:50 +01:00
|
|
|
to_return.append(ip_network(entry))
|
|
|
|
except ValueError:
|
|
|
|
pass
|
|
|
|
return to_return
|
|
|
|
|
|
|
|
def _slow_search(self, value):
|
2017-12-24 13:14:34 +01:00
|
|
|
if self.type == 'string':
|
|
|
|
# Exact match only, using fast search
|
2017-11-01 01:06:50 +01:00
|
|
|
return self._fast_search(value)
|
2017-12-24 13:14:34 +01:00
|
|
|
elif self.type == 'substring':
|
|
|
|
# Expected to match on a part of the value
|
|
|
|
# i.e.: value = 'blah.de' self.list == ['.fr', '.de']
|
|
|
|
return any(v in value for v in self.list)
|
|
|
|
elif self.type == 'hostname':
|
|
|
|
# Expected to match on hostnames in URLs (i.e. the search query is a URL)
|
|
|
|
# So we do a reverse search if any of the entries in the list are present in the URL
|
|
|
|
# i.e.: value = 'http://foo.blah.de/meh' self.list == ['blah.de', 'blah.fr']
|
|
|
|
return any(v in value for v in self.list)
|
|
|
|
elif self.type == 'cidr':
|
|
|
|
try:
|
|
|
|
value = ip_address(value)
|
|
|
|
except ValueError:
|
|
|
|
# The value to search isn't an IP address, falling back to default
|
|
|
|
return self._fast_search(value)
|
|
|
|
return any((value == obj or value in obj) for obj in self._network_objects)
|
2017-10-31 03:11:37 +01:00
|
|
|
|
2017-10-29 01:40:41 +02:00
|
|
|
|
|
|
|
class WarningLists(collections.Mapping):
|
|
|
|
|
2018-01-25 17:54:01 +01:00
|
|
|
def __init__(self, slow_search=False, lists=False):
|
2017-12-24 13:14:34 +01:00
|
|
|
"""Load all the warning lists from the package.
|
|
|
|
:slow_search: If true, uses the most appropriate search method. Can be slower. Default: exact match.
|
2018-01-25 17:54:01 +01:00
|
|
|
:lists: A list of warning lists (typically fetched from a MISP instance)
|
2017-12-24 13:14:34 +01:00
|
|
|
"""
|
2018-01-25 17:54:01 +01:00
|
|
|
if not lists:
|
|
|
|
lists = []
|
|
|
|
self.root_dir_warninglists = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispwarninglists'].__file__)),
|
|
|
|
'data', 'misp-warninglists', 'lists')
|
|
|
|
for warninglist_file in glob(os.path.join(self.root_dir_warninglists, '*', 'list.json')):
|
|
|
|
with open(warninglist_file, 'r') as f:
|
|
|
|
lists.append(json.load(f))
|
2017-10-29 01:40:41 +02:00
|
|
|
self.warninglists = {}
|
2018-01-25 17:54:01 +01:00
|
|
|
for warninglist in lists:
|
2017-11-01 01:06:50 +01:00
|
|
|
self.warninglists[warninglist['name']] = WarningList(warninglist, slow_search)
|
2017-10-29 01:40:41 +02:00
|
|
|
|
|
|
|
def validate_with_schema(self):
|
|
|
|
if not HAS_JSONSCHEMA:
|
|
|
|
raise ImportError('jsonschema is required: pip install jsonschema')
|
|
|
|
schema = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispwarninglists'].__file__)),
|
|
|
|
'data', 'misp-warninglists', 'schema.json')
|
|
|
|
with open(schema, 'r') as f:
|
|
|
|
loaded_schema = json.load(f)
|
|
|
|
for w in self.warninglists.values():
|
|
|
|
jsonschema.validate(w.warninglist, loaded_schema)
|
|
|
|
|
|
|
|
def __getitem__(self, name):
|
|
|
|
return self.warninglists[name]
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
return iter(self.warninglists)
|
|
|
|
|
2017-10-31 03:11:37 +01:00
|
|
|
def search(self, value):
|
|
|
|
matches = []
|
|
|
|
for name, wl in self.warninglists.items():
|
2017-11-01 00:06:50 +01:00
|
|
|
if value in wl:
|
2017-12-22 15:35:36 +01:00
|
|
|
matches.append(wl)
|
2017-10-31 03:11:37 +01:00
|
|
|
return matches
|
|
|
|
|
2017-10-29 01:40:41 +02:00
|
|
|
def __len__(self):
|
|
|
|
return len(self.warninglists)
|