mirror of https://github.com/MISP/misp-modules
				
				
				
			
		
			
				
	
	
		
			374 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			374 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			Python
		
	
	
| """
 | |
| Import content from a TAXII 2.1 server.
 | |
| """
 | |
| import collections
 | |
| import itertools
 | |
| import json
 | |
| import misp_modules.lib.stix2misp
 | |
| from pathlib import Path
 | |
| import re
 | |
| import stix2.v20
 | |
| import taxii2client
 | |
| import taxii2client.exceptions
 | |
| import requests
 | |
| 
 | |
| 
 | |
| class ConfigError(Exception):
 | |
|     """
 | |
|     Represents an error in the config settings for one invocation of this
 | |
|     module.
 | |
|     """
 | |
|     pass
 | |
| 
 | |
| 
 | |
| misperrors = {'error': 'Error'}
 | |
| 
 | |
| moduleinfo = {'version': '0.1', 'author': 'Abc',
 | |
|               'description': 'Import content from a TAXII 2.1 server',
 | |
|               'module-type': ['import']}
 | |
| 
 | |
| mispattributes = {
 | |
|     'inputSource': [],
 | |
|     'output': ['MISP objects'],
 | |
|     'format': 'misp_standard',
 | |
| }
 | |
| 
 | |
| 
 | |
| userConfig = {
 | |
|     "url": {
 | |
|         "type": "String",
 | |
|         "message": "A TAXII 2.1 collection URL",
 | |
|     },
 | |
|     "added_after": {
 | |
|         "type": "String",
 | |
|         "message": "Lower bound on time the object was uploaded to the TAXII server"
 | |
|     },
 | |
|     "stix_id": {
 | |
|         "type": "String",
 | |
|         "message": "STIX ID(s) of objects"
 | |
|     },
 | |
|     "spec_version": {  # TAXII 2.1 specific
 | |
|         "type": "String",
 | |
|         "message": "STIX version(s) of objects"
 | |
|     },
 | |
|     "type": {
 | |
|         "type": "String",
 | |
|         "message": "STIX type(s) of objects"
 | |
|     },
 | |
|     "version": {
 | |
|         "type": "String",
 | |
|         "message": 'Version timestamp(s), or "first"/"last"/"all"'
 | |
|     },
 | |
|     # Should we give some user control over this?  It will not be allowed to
 | |
|     # exceed the admin setting.
 | |
|     "STIX object limit": {
 | |
|         "type": "Integer",
 | |
|         "message": "Maximum number of STIX objects to process"
 | |
|     },
 | |
|     "username": {
 | |
|         "type": "String",
 | |
|         "message": "Username for TAXII server authentication, if necessary"
 | |
|     },
 | |
|     "password": {
 | |
|         "type": "String",
 | |
|         "message": "Password for TAXII server authentication, if necessary"
 | |
|     }
 | |
| }
 | |
| 
 | |
| # Paging will be handled transparently by this module, so user-defined
 | |
| # paging-related filtering parameters will not be supported.
 | |
| 
 | |
| 
 | |
| # This module will not process more than this number of STIX objects in total
 | |
| # from a TAXII server in one module invocation (across all pages), to limit
 | |
| # resource consumption.
 | |
| moduleconfig = [
 | |
|     "stix_object_limit"
 | |
| ]
 | |
| 
 | |
| 
 | |
| # In case there is neither an admin nor user setting given.
 | |
| _DEFAULT_STIX_OBJECT_LIMIT = 1000
 | |
| 
 | |
| 
 | |
| # Page size to use when paging TAXII results.  Trades off the amount of
 | |
| # hammering on TAXII servers and overhead of repeated requests, with the
 | |
| # resource consumption of a single page.  (Should be an admin setting too?)
 | |
| _PAGE_SIZE = 100
 | |
| 
 | |
| 
 | |
| _synonymsToTagNames_path = Path(__file__).parent / "../../lib/synonymsToTagNames.json"
 | |
| 
 | |
| 
 | |
| # Collects module config information necessary to perform the TAXII query.
 | |
| Config = collections.namedtuple("Config", [
 | |
|     "url",
 | |
|     "added_after",
 | |
|     "id",
 | |
|     "spec_version",
 | |
|     "type",
 | |
|     "version",
 | |
|     "stix_object_limit",
 | |
|     "username",
 | |
|     "password"
 | |
| ])
 | |
| 
 | |
| 
 | |
| def _pymisp_to_json_serializable(obj):
 | |
|     """
 | |
|     Work around a possible bug with PyMISP's
 | |
|     AbstractMisp.to_dict(json_format=True) method, which doesn't always produce
 | |
|     a JSON-serializable value (i.e. a value which is serializable with the
 | |
|     default JSON encoder).
 | |
| 
 | |
|     :param obj: A PyMISP object
 | |
|     :return: A JSON-serializable version of the object
 | |
|     """
 | |
| 
 | |
|     # The workaround creates a JSON string and then parses it back to a
 | |
|     # JSON-serializable value.
 | |
|     json_ = obj.to_json()
 | |
|     json_serializable = json.loads(json_)
 | |
| 
 | |
|     return json_serializable
 | |
| 
 | |
| 
 | |
| def _normalize_multi_values(value):
 | |
|     """
 | |
|     Some TAXII filters may contain multiple values separated by commas,
 | |
|     without spaces around the commas.  Maybe give MISP users a little more
 | |
|     flexibility?  This function normalizes a possible multi-valued value
 | |
|     (e.g. multiple values delimited by commas or spaces, all in the same
 | |
|     string) to TAXII-required format.
 | |
| 
 | |
|     :param value: A MISP config value
 | |
|     :return: A normalized value
 | |
|     """
 | |
| 
 | |
|     if "," in value:
 | |
|         value = re.sub(r"\s*,\s*", ",", value)
 | |
|     else:
 | |
|         # Assume space delimiting; replace spaces with commas.
 | |
|         # I don't think we need to worry about spaces embedded in values.
 | |
|         value = re.sub(r"\s+", ",", value)
 | |
| 
 | |
|     value = value.strip(",")
 | |
| 
 | |
|     return value
 | |
| 
 | |
| 
 | |
| def _get_config(config):
 | |
|     """
 | |
|     Combine user, admin, and default config settings to produce a config
 | |
|     object with all settings together.
 | |
| 
 | |
|     :param config: The misp-modules request's "config" value.
 | |
|     :return: A Config object
 | |
|     :raises ConfigError: if any config errors are detected
 | |
|     """
 | |
| 
 | |
|     # Strip whitespace from all config settings... except for password?
 | |
|     for key, val in config.items():
 | |
|         if isinstance(val, str) and key != "password":
 | |
|             config[key] = val.strip()
 | |
| 
 | |
|     url = config.get("url")
 | |
|     added_after = config.get("added_after")
 | |
|     id_ = config.get("stix_id")
 | |
|     spec_version = config.get("spec_version")
 | |
|     type_ = config.get("type")
 | |
|     version_ = config.get("version")
 | |
|     username = config.get("username")
 | |
|     password = config.get("password")
 | |
|     admin_stix_object_limit = config.get("stix_object_limit")
 | |
|     user_stix_object_limit = config.get("STIX object limit")
 | |
| 
 | |
|     if admin_stix_object_limit:
 | |
|         admin_stix_object_limit = int(admin_stix_object_limit)
 | |
|     else:
 | |
|         admin_stix_object_limit = _DEFAULT_STIX_OBJECT_LIMIT
 | |
| 
 | |
|     if user_stix_object_limit:
 | |
|         user_stix_object_limit = int(user_stix_object_limit)
 | |
|         stix_object_limit = min(user_stix_object_limit, admin_stix_object_limit)
 | |
|     else:
 | |
|         stix_object_limit = admin_stix_object_limit
 | |
| 
 | |
|     # How much of this should we sanity-check here before passing it off to the
 | |
|     # TAXII client (and thence, to the TAXII server)?
 | |
| 
 | |
|     if not url:
 | |
|         raise ConfigError("A TAXII 2.1 collection URL is required.")
 | |
| 
 | |
|     if admin_stix_object_limit < 1:
 | |
|         raise ConfigError(
 | |
|             "Invalid admin object limit: must be positive: "
 | |
|             + str(admin_stix_object_limit)
 | |
|         )
 | |
| 
 | |
|     if stix_object_limit < 1:
 | |
|         raise ConfigError(
 | |
|             "Invalid object limit: must be positive: "
 | |
|             + str(stix_object_limit)
 | |
|         )
 | |
| 
 | |
|     if id_:
 | |
|         id_ = _normalize_multi_values(id_)
 | |
|     if spec_version:
 | |
|         spec_version = _normalize_multi_values(spec_version)
 | |
|     if type_:
 | |
|         type_ = _normalize_multi_values(type_)
 | |
|     if version_:
 | |
|         version_ = _normalize_multi_values(version_)
 | |
| 
 | |
|     # STIX->MISP converter currently only supports STIX 2.0, so let's force
 | |
|     # spec_version="2.0".
 | |
|     if not spec_version:
 | |
|         spec_version = "2.0"
 | |
|     elif spec_version != "2.0":
 | |
|         raise ConfigError('Only spec_version="2.0" is supported for now.')
 | |
| 
 | |
|     if (username and not password) or (not username and password):
 | |
|         raise ConfigError(
 | |
|             'Both or neither of "username" and "password" are required.'
 | |
|         )
 | |
| 
 | |
|     config_obj = Config(
 | |
|         url, added_after, id_, spec_version, type_, version_, stix_object_limit,
 | |
|         username, password
 | |
|     )
 | |
| 
 | |
|     return config_obj
 | |
| 
 | |
| 
 | |
| def _query_taxii(config):
 | |
|     """
 | |
|     Query the TAXII server according to the given config, convert the STIX
 | |
|     results to MISP, and return a standard misp-modules response.
 | |
| 
 | |
|     :param config: Module config information as a Config object
 | |
|     :return: A dict containing a misp-modules response
 | |
|     """
 | |
| 
 | |
|     collection = taxii2client.Collection(
 | |
|         config.url, user=config.username, password=config.password
 | |
|     )
 | |
| 
 | |
|     # No point in asking for more than our overall limit.
 | |
|     page_size = min(_PAGE_SIZE, config.stix_object_limit)
 | |
| 
 | |
|     kwargs = {
 | |
|         "per_request": page_size
 | |
|     }
 | |
| 
 | |
|     if config.spec_version:
 | |
|         kwargs["spec_version"] = config.spec_version
 | |
|     if config.version:
 | |
|         kwargs["version"] = config.version
 | |
|     if config.id:
 | |
|         kwargs["id"] = config.id
 | |
|     if config.type:
 | |
|         kwargs["type"] = config.type
 | |
|     if config.added_after:
 | |
|         kwargs["added_after"] = config.added_after
 | |
| 
 | |
|     pages = taxii2client.as_pages(
 | |
|         collection.get_objects,
 | |
|         **kwargs
 | |
|     )
 | |
| 
 | |
|     # Chain all the objects from all pages together...
 | |
|     all_stix_objects = itertools.chain.from_iterable(
 | |
|         taxii_envelope.get("objects", [])
 | |
|         for taxii_envelope in pages
 | |
|     )
 | |
| 
 | |
|     # And only take the first N objects from that.
 | |
|     limited_stix_objects = itertools.islice(
 | |
|         all_stix_objects, 0, config.stix_object_limit
 | |
|     )
 | |
| 
 | |
|     # Collect into a list.  This is... unfortunate, but I don't think the
 | |
|     # converter will work incrementally (will it?).  It expects all objects to
 | |
|     # be given at once.
 | |
|     #
 | |
|     # It may also be desirable to have all objects available at once so that
 | |
|     # cross-references can be made where possible, but it results in increased
 | |
|     # memory usage.
 | |
|     stix_objects = list(limited_stix_objects)
 | |
| 
 | |
|     # The STIX 2.0 converter wants a 2.0 bundle.  (Hope the TAXII server isn't
 | |
|     # returning 2.1 objects!)
 | |
|     bundle20 = stix2.v20.Bundle(stix_objects, allow_custom=True)
 | |
| 
 | |
|     converter = misp_modules.lib.stix2misp.ExternalStixParser()
 | |
|     converter.handler(
 | |
|         bundle20, None, [0, "event", str(_synonymsToTagNames_path)]
 | |
|     )
 | |
| 
 | |
|     attributes = [
 | |
|         _pymisp_to_json_serializable(attr)
 | |
|         for attr in converter.misp_event.attributes
 | |
|     ]
 | |
| 
 | |
|     objects = [
 | |
|         _pymisp_to_json_serializable(obj)
 | |
|         for obj in converter.misp_event.objects
 | |
|     ]
 | |
| 
 | |
|     tags = [
 | |
|         _pymisp_to_json_serializable(tag)
 | |
|         for tag in converter.misp_event.tags
 | |
|     ]
 | |
| 
 | |
|     result = {
 | |
|         "results": {
 | |
|             "Attribute": attributes,
 | |
|             "Object": objects,
 | |
|             "Tag": tags
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def handler(q=False):
 | |
|     if q is False:
 | |
|         return False
 | |
|     request = json.loads(q)
 | |
| 
 | |
|     result = None
 | |
|     config = None
 | |
| 
 | |
|     try:
 | |
|         config = _get_config(request["config"])
 | |
|     except ConfigError as e:
 | |
|         result = misperrors
 | |
|         result["error"] = e.args[0]
 | |
| 
 | |
|     if not result:
 | |
|         try:
 | |
|             result = _query_taxii(config)
 | |
|         except taxii2client.exceptions.TAXIIServiceException as e:
 | |
|             result = misperrors
 | |
|             result["error"] = str(e)
 | |
|         except requests.HTTPError as e:
 | |
|             # Let's give a better error message for auth issues.
 | |
|             if e.response.status_code in (401, 403):
 | |
|                 result = misperrors
 | |
|                 result["error"] = "Access was denied."
 | |
|             else:
 | |
|                 raise
 | |
| 
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def introspection():
 | |
|     mispattributes["userConfig"] = userConfig
 | |
|     return mispattributes
 | |
| 
 | |
| 
 | |
| def version():
 | |
|     moduleinfo['config'] = moduleconfig
 | |
|     return moduleinfo
 |