2021-09-16 11:22:02 +02:00
#!/usr/bin/env python3
import re
2023-07-30 17:48:16 +02:00
from io import BytesIO
2021-09-16 11:22:02 +02:00
from collections import defaultdict
2023-08-28 17:25:55 +02:00
from collections . abc import Mapping
2024-01-12 17:15:41 +01:00
from typing import Any , Dict , List , Optional , Set , Union , TYPE_CHECKING , Iterator
2021-09-16 11:22:02 +02:00
import requests
2024-01-12 17:15:41 +01:00
from har2tree import HostNode , URLNode , Har2TreeError # type: ignore[attr-defined]
from pymisp import MISPAttribute , MISPEvent , PyMISP , MISPTag # type: ignore[attr-defined]
from pymisp . tools import FileObject , URLObject # type: ignore[attr-defined]
2021-09-16 11:22:02 +02:00
2021-10-18 13:06:43 +02:00
from . . default import get_config , get_homedir
from . . helpers import get_public_suffix_list
2022-12-07 13:03:15 +01:00
2023-10-11 14:57:36 +02:00
from . abstractmodule import AbstractModule
2021-09-27 16:04:00 +02:00
if TYPE_CHECKING :
from . . capturecache import CaptureCache
2021-09-16 11:22:02 +02:00
2024-01-12 17:15:41 +01:00
class MISPs ( Mapping , AbstractModule ) : # type: ignore[type-arg]
2023-08-28 17:25:55 +02:00
2023-10-11 14:57:36 +02:00
def module_init ( self ) - > bool :
if not self . config . get ( ' default ' ) :
2023-08-28 17:25:55 +02:00
self . logger . info ( ' No default instance configured, disabling MISP. ' )
2023-10-11 14:57:36 +02:00
return False
if not self . config . get ( ' instances ' ) :
2023-08-28 17:25:55 +02:00
self . logger . warning ( ' No MISP instances configured, disabling MISP. ' )
2023-10-11 14:57:36 +02:00
return False
2023-08-28 17:25:55 +02:00
2023-10-11 14:57:36 +02:00
self . default_instance = self . config [ ' default ' ]
2023-08-28 17:25:55 +02:00
2023-10-11 14:57:36 +02:00
if self . default_instance not in self . config [ ' instances ' ] :
self . logger . warning ( f " The default MISP instance ( { self . default_instance } ) is missing in the instances ( { ' , ' . join ( self . config [ ' instances ' ] . keys ( ) ) } ), disabling MISP. " )
return False
2023-08-28 17:25:55 +02:00
2024-01-12 17:15:41 +01:00
self . __misps = { }
2023-10-11 14:57:36 +02:00
for instance_name , instance_config in self . config [ ' instances ' ] . items ( ) :
if misp_connector := MISP ( config = instance_config ) :
2023-08-28 17:25:55 +02:00
if misp_connector . available :
self . __misps [ instance_name ] = misp_connector
else :
self . logger . warning ( f " MISP ' { instance_name } ' isn ' t available. " )
else :
self . logger . warning ( f " Unable to initialize the connector to ' { instance_name } ' . It won ' t be available. " )
if not self . __misps . get ( self . default_instance ) or not self . __misps [ self . default_instance ] . available :
self . logger . warning ( " Unable to initialize the connector to the default MISP instance, disabling MISP. " )
2023-10-11 14:57:36 +02:00
return False
2023-08-28 17:25:55 +02:00
2023-10-11 14:57:36 +02:00
return True
2023-08-28 17:25:55 +02:00
def __getitem__ ( self , name : str ) - > ' MISP ' :
return self . __misps [ name ]
2024-01-12 17:15:41 +01:00
def __iter__ ( self ) - > Iterator [ dict [ str , ' MISP ' ] ] :
2023-08-28 17:25:55 +02:00
return iter ( self . __misps )
2024-01-12 17:15:41 +01:00
def __len__ ( self ) - > int :
2023-08-28 17:25:55 +02:00
return len ( self . __misps )
@property
def default_misp ( self ) - > ' MISP ' :
return self . __misps [ self . default_instance ]
def export ( self , cache : ' CaptureCache ' , is_public_instance : bool = False ,
submitted_filename : Optional [ str ] = None ,
submitted_file : Optional [ BytesIO ] = None ) - > MISPEvent :
''' Export a capture in MISP format. You can POST the return of this method
directly to a MISP instance and it will create an event . '''
public_domain = get_config ( ' generic ' , ' public_domain ' )
event = MISPEvent ( )
2024-01-08 13:28:56 +01:00
if cache . url . startswith ( ' file:// ' ) :
2023-08-28 17:25:55 +02:00
filename = cache . url . rsplit ( ' / ' , 1 ) [ - 1 ]
event . info = f ' Lookyloo Capture ( { filename } ) '
# Create file object as initial
if hasattr ( cache . tree . root_hartree . url_tree , ' body ' ) :
# The file could be viewed in the browser
filename = cache . tree . root_hartree . url_tree . name
pseudofile = cache . tree . root_hartree . url_tree . body
elif submitted_filename :
# Impossible to get the file from the HAR.
filename = submitted_filename
pseudofile = submitted_file
else :
raise Exception ( ' We must have a file here. ' )
initial_file = FileObject ( pseudofile = pseudofile , filename = filename )
initial_file . comment = ' This is a capture of a file, rendered in the browser '
initial_obj = event . add_object ( initial_file )
else :
event . info = f ' Lookyloo Capture ( { cache . url } ) '
initial_url = URLObject ( cache . url )
initial_url . comment = ' Submitted URL '
self . __misp_add_ips_to_URLObject ( initial_url , cache . tree . root_hartree . hostname_tree )
initial_obj = event . add_object ( initial_url )
lookyloo_link : MISPAttribute = event . add_attribute ( ' link ' , f ' https:// { public_domain } /tree/ { cache . uuid } ' ) # type: ignore
if not is_public_instance :
lookyloo_link . distribution = 0
initial_obj . add_reference ( lookyloo_link , ' captured-by ' , ' Capture on lookyloo ' )
redirects : List [ URLObject ] = [ ]
for nb , url in enumerate ( cache . redirects ) :
if url == cache . url :
continue
obj = URLObject ( url )
obj . comment = f ' Redirect { nb } '
self . __misp_add_ips_to_URLObject ( obj , cache . tree . root_hartree . hostname_tree )
redirects . append ( obj )
if redirects :
redirects [ - 1 ] . comment = f ' Last redirect ( { nb } ) '
if redirects :
prec_object = initial_url
for u_object in redirects :
prec_object . add_reference ( u_object , ' redirects-to ' )
prec_object = u_object
for u_object in redirects :
event . add_object ( u_object )
final_redirect = event . objects [ - 1 ]
try :
fo = FileObject ( pseudofile = cache . tree . root_hartree . rendered_node . body , filename = cache . tree . root_hartree . rendered_node . filename )
fo . comment = ' Content received for the final redirect (before rendering) '
fo . add_reference ( final_redirect , ' loaded-by ' , ' URL loading that content ' )
event . add_object ( fo )
except Har2TreeError :
pass
except AttributeError :
# No `body` in rendered node
pass
return event
def __misp_add_ips_to_URLObject ( self , obj : URLObject , hostname_tree : HostNode ) - > None :
hosts = obj . get_attributes_by_relation ( ' host ' )
if hosts :
hostnodes = hostname_tree . search_nodes ( name = hosts [ 0 ] . value )
if hostnodes and hasattr ( hostnodes [ 0 ] , ' resolved_ips ' ) :
obj . add_attributes ( ' ip ' , * hostnodes [ 0 ] . resolved_ips )
2023-10-11 14:57:36 +02:00
class MISP ( AbstractModule ) :
2021-09-16 11:22:02 +02:00
2023-10-11 14:57:36 +02:00
def module_init ( self ) - > bool :
if not self . config . get ( ' apikey ' ) :
self . logger . info ( ' No API key: {self.config} . ' )
return False
2021-09-16 11:22:02 +02:00
try :
2023-10-11 14:57:36 +02:00
self . client = PyMISP ( url = self . config [ ' url ' ] , key = self . config [ ' apikey ' ] ,
ssl = self . config [ ' verify_tls_cert ' ] , timeout = self . config [ ' timeout ' ] )
2021-09-16 11:22:02 +02:00
except Exception as e :
self . logger . warning ( f ' Unable to connect to MISP: { e } ' )
2023-10-11 14:57:36 +02:00
return False
self . enable_lookup = bool ( self . config . get ( ' enable_lookup ' , False ) )
self . enable_push = bool ( self . config . get ( ' enable_push ' , False ) )
self . allow_auto_trigger = bool ( self . config . get ( ' allow_auto_trigger ' , False ) )
self . default_tags : List [ str ] = self . config . get ( ' default_tags ' ) # type: ignore
self . auto_publish = bool ( self . config . get ( ' auto_publish ' , False ) )
2021-09-16 11:22:02 +02:00
self . storage_dir_misp = get_homedir ( ) / ' misp '
self . storage_dir_misp . mkdir ( parents = True , exist_ok = True )
self . psl = get_public_suffix_list ( )
2023-10-11 14:57:36 +02:00
return True
2021-09-16 11:22:02 +02:00
2024-01-12 17:15:41 +01:00
def get_fav_tags ( self ) - > dict [ Any , Any ] | list [ MISPTag ] :
2021-09-16 11:22:02 +02:00
return self . client . tags ( pythonify = True , favouritesOnly = 1 )
2024-01-12 17:15:41 +01:00
def _prepare_push ( self , to_push : Union [ List [ MISPEvent ] , MISPEvent ] , allow_duplicates : bool = False , auto_publish : Optional [ bool ] = False ) - > Union [ List [ MISPEvent ] , Dict [ str , str ] ] :
2021-09-16 11:22:02 +02:00
''' Adds the pre-configured information as required by the instance.
If duplicates aren ' t allowed, they will be automatically skiped and the
extends_uuid key in the next element in the list updated '''
if isinstance ( to_push , MISPEvent ) :
events = [ to_push ]
else :
events = to_push
events_to_push = [ ]
existing_uuid_to_extend = None
for event in events :
if not allow_duplicates :
existing_event = self . get_existing_event ( event . attributes [ 0 ] . value )
if existing_event :
existing_uuid_to_extend = existing_event . uuid
continue
if existing_uuid_to_extend :
event . extends_uuid = existing_uuid_to_extend
existing_uuid_to_extend = None
for tag in self . default_tags :
event . add_tag ( tag )
if auto_publish :
2024-01-12 17:15:41 +01:00
event . publish ( ) # type: ignore[no-untyped-call]
2021-09-16 11:22:02 +02:00
events_to_push . append ( event )
return events_to_push
2024-01-12 17:15:41 +01:00
def push ( self , to_push : Union [ List [ MISPEvent ] , MISPEvent ] , allow_duplicates : bool = False , auto_publish : Optional [ bool ] = None ) - > Union [ List [ MISPEvent ] , Dict [ Any , Any ] ] :
2021-09-16 11:22:02 +02:00
if auto_publish is None :
auto_publish = self . auto_publish
if self . available and self . enable_push :
events = self . _prepare_push ( to_push , allow_duplicates , auto_publish )
if not events :
return { ' error ' : ' All the events are already on the MISP instance. ' }
if isinstance ( events , Dict ) :
return { ' error ' : events }
to_return = [ ]
for event in events :
try :
2021-12-20 16:50:29 +01:00
# NOTE: POST the event as published publishes inline, which can tak a long time.
# Here, we POST as not published, and trigger the publishing in a second call.
2022-05-11 16:54:21 +02:00
if hasattr ( event , ' published ' ) :
background_publish = event . published
else :
background_publish = False
2021-12-20 16:50:29 +01:00
if background_publish :
event . published = False
2021-09-16 11:22:02 +02:00
new_event = self . client . add_event ( event , pythonify = True )
2021-12-20 16:50:29 +01:00
if background_publish and isinstance ( new_event , MISPEvent ) :
self . client . publish ( new_event )
2021-09-16 11:22:02 +02:00
except requests . exceptions . ReadTimeout :
return { ' error ' : ' The connection to MISP timed out, try increasing the timeout in the config. ' }
if isinstance ( new_event , MISPEvent ) :
to_return . append ( new_event )
else :
return { ' error ' : new_event }
return to_return
else :
return { ' error ' : ' Module not available or push not enabled. ' }
def get_existing_event_url ( self , permaurl : str ) - > Optional [ str ] :
attributes = self . client . search ( ' attributes ' , value = permaurl , limit = 1 , page = 1 , pythonify = True )
if not attributes or not isinstance ( attributes [ 0 ] , MISPAttribute ) :
return None
url = f ' { self . client . root_url } /events/ { attributes [ 0 ] . event_id } '
return url
def get_existing_event ( self , permaurl : str ) - > Optional [ MISPEvent ] :
attributes = self . client . search ( ' attributes ' , value = permaurl , limit = 1 , page = 1 , pythonify = True )
if not attributes or not isinstance ( attributes [ 0 ] , MISPAttribute ) :
return None
event = self . client . get_event ( attributes [ 0 ] . event_id , pythonify = True )
if isinstance ( event , MISPEvent ) :
return event
return None
def lookup ( self , node : URLNode , hostnode : HostNode ) - > Union [ Dict [ str , Set [ str ] ] , Dict [ str , Any ] ] :
if self . available and self . enable_lookup :
2023-04-20 15:23:56 +02:00
tld = self . psl . publicsuffix ( hostnode . name )
2021-09-16 11:22:02 +02:00
domain = re . sub ( f ' . { tld } $ ' , ' ' , hostnode . name ) . split ( ' . ' ) [ - 1 ]
2023-05-04 10:20:54 +02:00
to_lookup = [ node . name , hostnode . name , f ' { domain } . { tld } ' ]
if ' v4 ' in hostnode . resolved_ips :
to_lookup + = hostnode . resolved_ips [ ' v4 ' ]
if ' v6 ' in hostnode . resolved_ips :
to_lookup + = hostnode . resolved_ips [ ' v6 ' ]
2021-09-16 11:22:02 +02:00
if hasattr ( hostnode , ' cnames ' ) :
to_lookup + = hostnode . cnames
if not node . empty_response :
to_lookup . append ( node . body_hash )
if attributes := self . client . search ( controller = ' attributes ' , value = to_lookup ,
enforce_warninglist = True , pythonify = True ) :
if isinstance ( attributes , list ) :
to_return : Dict [ str , Set [ str ] ] = defaultdict ( set )
# NOTE: We have MISPAttribute in that list
for a in attributes :
to_return [ a . event_id ] . add ( a . value ) # type: ignore
return to_return
else :
# The request returned an error
return attributes # type: ignore
return { ' info ' : ' No hits. ' }
else :
return { ' error ' : ' Module not available or lookup not enabled. ' }