PyMISPWarningLists/pymispwarninglists/api.py

160 lines
6.1 KiB
Python
Raw Normal View History

2017-10-29 01:40:41 +02:00
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import json
import sys
import collections
from glob import glob
2017-11-01 01:06:50 +01:00
from ipaddress import ip_address, ip_network
2019-07-25 17:11:52 +02:00
from pathlib import Path
2020-04-20 10:23:29 +02:00
from urllib.parse import urlparse
2021-01-21 15:23:15 +01:00
from typing import Union, Dict, Any, List, Optional
2017-10-29 01:40:41 +02:00
try:
2021-01-21 15:23:15 +01:00
import jsonschema # type: ignore
2017-10-29 01:40:41 +02:00
HAS_JSONSCHEMA = True
except ImportError:
HAS_JSONSCHEMA = False
2021-01-21 15:23:15 +01:00
def json_default(obj: 'WarningList') -> Union[Dict, str]:
if isinstance(obj, WarningList):
return obj.to_dict()
2017-10-29 01:40:41 +02:00
class PyMISPWarningListsError(Exception):
2021-01-21 15:23:15 +01:00
def __init__(self, message: str):
2017-10-29 01:40:41 +02:00
super(PyMISPWarningListsError, self).__init__(message)
self.message = message
class WarningList():
2020-04-07 14:30:55 +02:00
expected_types = ['string', 'substring', 'hostname', 'cidr', 'regex']
2017-12-24 13:14:34 +01:00
2021-01-21 15:23:15 +01:00
def __init__(self, warninglist: Dict[str, Any], slow_search: bool=False):
2017-10-29 01:40:41 +02:00
self.warninglist = warninglist
self.list = self.warninglist['list']
self.description = self.warninglist['description']
self.version = int(self.warninglist['version'])
self.name = self.warninglist['name']
2017-12-24 13:14:34 +01:00
if self.warninglist['type'] not in self.expected_types:
2020-04-07 14:30:55 +02:00
raise PyMISPWarningListsError(f'Unexpected type ({self.warninglist["type"]}), please update the expected_type list')
self.type = self.warninglist['type']
2017-10-29 01:40:41 +02:00
if self.warninglist.get('matching_attributes'):
self.matching_attributes = self.warninglist['matching_attributes']
2017-11-01 01:06:50 +01:00
self.slow_search = slow_search
self._network_objects = []
2017-12-24 13:14:34 +01:00
if self.slow_search and self.type == 'cidr':
self._network_objects = self._network_index()
2019-07-08 23:27:38 +02:00
# If network objects is empty, reverting to default anyway
if not self._network_objects:
self.slow_search = False
2017-11-01 01:06:50 +01:00
2021-01-21 15:23:15 +01:00
def __repr__(self) -> str:
2019-07-25 17:11:52 +02:00
return f'<{self.__class__.__name__}(type="{self.name}", version="{self.version}", description="{self.description}")'
2019-05-13 10:24:47 +02:00
2021-01-21 15:23:15 +01:00
def __contains__(self, value: str) -> bool:
2017-11-01 01:06:50 +01:00
if self.slow_search:
return self._slow_search(value)
return self._fast_search(value)
2021-01-21 15:23:15 +01:00
def to_dict(self) -> Dict:
2017-11-01 00:06:50 +01:00
to_return = {'list': [str(e) for e in self.list], 'name': self.name,
'description': self.description, 'version': self.version,
'type': self.type}
2017-10-29 01:40:41 +02:00
if hasattr(self, 'matching_attributes'):
to_return['matching_attributes'] = self.matching_attributes
return to_return
2021-01-21 15:23:15 +01:00
def to_json(self) -> str:
return json.dumps(self, default=json_default)
2017-11-01 00:06:50 +01:00
2021-01-21 15:23:15 +01:00
def _fast_search(self, value) -> bool:
2017-11-01 01:06:50 +01:00
return value in self.list
2021-01-21 15:23:15 +01:00
def _network_index(self) -> List:
2017-11-01 01:06:50 +01:00
to_return = []
for entry in self.list:
try:
2017-11-01 01:28:54 +01:00
# Try if the entry is a network bloc or an IP
2017-11-01 01:06:50 +01:00
to_return.append(ip_network(entry))
except ValueError:
pass
return to_return
2021-01-21 15:23:15 +01:00
def _slow_search(self, value: str) -> bool:
2017-12-24 13:14:34 +01:00
if self.type == 'string':
# Exact match only, using fast search
2017-11-01 01:06:50 +01:00
return self._fast_search(value)
2017-12-24 13:14:34 +01:00
elif self.type == 'substring':
# Expected to match on a part of the value
# i.e.: value = 'blah.de' self.list == ['.fr', '.de']
return any(v in value for v in self.list)
elif self.type == 'hostname':
# Expected to match on hostnames in URLs (i.e. the search query is a URL)
# So we do a reverse search if any of the entries in the list are present in the URL
# i.e.: value = 'http://foo.blah.de/meh' self.list == ['blah.de', 'blah.fr']
2020-04-20 10:23:29 +02:00
parsed_url = urlparse(value)
if parsed_url.hostname:
value = parsed_url.hostname
2021-01-21 15:23:15 +01:00
return any(value == v or value.endswith("." + v.lstrip(".")) for v in self.list)
2017-12-24 13:14:34 +01:00
elif self.type == 'cidr':
try:
value = ip_address(value)
except ValueError:
# The value to search isn't an IP address, falling back to default
return self._fast_search(value)
return any((value == obj or value in obj) for obj in self._network_objects)
2021-01-21 15:23:15 +01:00
return False
2017-10-31 03:11:37 +01:00
2017-10-29 01:40:41 +02:00
class WarningLists(collections.Mapping):
2021-01-21 15:23:15 +01:00
def __init__(self, slow_search: bool=False, lists: Optional[List]=None):
2017-12-24 13:14:34 +01:00
"""Load all the warning lists from the package.
:slow_search: If true, uses the most appropriate search method. Can be slower. Default: exact match.
:lists: A list of warning lists (typically fetched from a MISP instance)
2017-12-24 13:14:34 +01:00
"""
if not lists:
lists = []
2019-07-25 17:11:52 +02:00
self.root_dir_warninglists = Path(sys.modules['pymispwarninglists'].__file__).parent / 'data' / 'misp-warninglists' / 'lists'
for warninglist_file in glob(str(self.root_dir_warninglists / '*' / 'list.json')):
with open(warninglist_file, 'r') as f:
lists.append(json.load(f))
2019-07-25 17:11:52 +02:00
if not lists:
raise PyMISPWarningListsError('Unable to load the lists. Do not forget to initialize the submodule (git submodule update --init).')
2017-10-29 01:40:41 +02:00
self.warninglists = {}
for warninglist in lists:
2017-11-01 01:06:50 +01:00
self.warninglists[warninglist['name']] = WarningList(warninglist, slow_search)
2017-10-29 01:40:41 +02:00
def validate_with_schema(self):
if not HAS_JSONSCHEMA:
raise ImportError('jsonschema is required: pip install jsonschema')
2019-07-25 17:11:52 +02:00
schema = Path(sys.modules['pymispwarninglists'].__file__).parent / 'data' / 'misp-warninglists' / 'schema.json'
2017-10-29 01:40:41 +02:00
with open(schema, 'r') as f:
loaded_schema = json.load(f)
for w in self.warninglists.values():
jsonschema.validate(w.warninglist, loaded_schema)
def __getitem__(self, name):
return self.warninglists[name]
def __iter__(self):
return iter(self.warninglists)
2021-01-21 15:23:15 +01:00
def search(self, value) -> List:
2017-10-31 03:11:37 +01:00
matches = []
for name, wl in self.warninglists.items():
2017-11-01 00:06:50 +01:00
if value in wl:
matches.append(wl)
2017-10-31 03:11:37 +01:00
return matches
2017-10-29 01:40:41 +02:00
def __len__(self):
return len(self.warninglists)
2019-10-23 20:42:47 +02:00
def get_loaded_lists(self):
return self.warninglists