2018-01-16 11:05:26 +01:00
'''
import
define mandatory
'''
import json
import base64
import re
import zipfile
import ipaddress
import io
import logging
misperrors = { ' error ' : ' Error ' }
userConfig = { }
inputSource = [ ' file ' ]
2024-08-12 11:23:10 +02:00
moduleinfo = {
' version ' : ' 0.10 ' ,
' author ' : ' Christophe Vandeplas ' ,
' description ' : ' Module to import ThreatAnalyzer archive.zip / analysis.json files. ' ,
' module-type ' : [ ' import ' ] ,
' name ' : ' ThreadAnalyzer Sandbox Import ' ,
' logo ' : ' ' ,
' requirements ' : [ ] ,
' features ' : ' The module imports MISP Attributes from a ThreatAnalyzer format file. This file can be either ZIP, or JSON format. \n There is by the way no special feature for users to make the module work. ' ,
' references ' : [ ' https://www.threattrack.com/malware-analysis.aspx ' ] ,
' input ' : ' ThreatAnalyzer format file ' ,
' output ' : ' MISP Event attributes ' ,
}
2018-01-16 11:05:26 +01:00
moduleconfig = [ ]
log = logging . getLogger ( ' misp-modules ' )
# FIXME - many hardcoded filters should be migrated to import regexes. See also https://github.com/MISP/MISP/issues/2712
# DISCLAIMER - This module is to be considered as experimental and needs much fine-tuning.
# more can be done with what's in the ThreatAnalyzer archive.zip
def handler ( q = False ) :
if q is False :
return False
results = [ ]
zip_starts = ' PK '
request = json . loads ( q )
data = base64 . b64decode ( request [ ' data ' ] )
if data [ : len ( zip_starts ) ] . decode ( ) == zip_starts :
with zipfile . ZipFile ( io . BytesIO ( data ) , ' r ' ) as zf :
# unzipped_files = []
modified_files_mapping = { }
# pre-process some of the files in the zip
for zip_file_name in zf . namelist ( ) : # Get all files in the zip file
# find the filenames of the modified_files
if re . match ( r " Analysis/proc_ \ d+/modified_files/mapping \ .log " , zip_file_name ) :
with zf . open ( zip_file_name , mode = ' r ' , pwd = None ) as fp :
file_data = fp . read ( )
2018-08-21 11:13:08 +02:00
for line in file_data . decode ( " utf-8 " , ' ignore ' ) . split ( ' \n ' ) :
2018-07-27 14:44:06 +02:00
if not line :
continue
if line . count ( ' | ' ) == 3 :
2018-01-16 11:05:26 +01:00
l_fname , l_size , l_md5 , l_created = line . split ( ' | ' )
2018-07-27 14:44:06 +02:00
if line . count ( ' | ' ) == 4 :
l_fname , l_size , l_md5 , l_sha256 , l_created = line . split ( ' | ' )
l_fname = cleanup_filepath ( l_fname )
if l_fname :
if l_size == 0 :
2018-08-21 11:13:08 +02:00
results . append ( { ' values ' : l_fname , ' type ' : ' filename ' , ' to_ids ' : True ,
' categories ' : [ ' Artifacts dropped ' , ' Payload delivery ' ] , ' comment ' : ' ' } )
2018-07-27 14:44:06 +02:00
else :
# file is a non empty sample, upload the sample later
modified_files_mapping [ l_md5 ] = l_fname
2018-01-16 11:05:26 +01:00
# now really process the data
for zip_file_name in zf . namelist ( ) : # Get all files in the zip file
# print('Processing file: {}'.format(zip_file_name))
if re . match ( r " Analysis/proc_ \ d+/modified_files/.+ \ . " , zip_file_name ) and " mapping.log " not in zip_file_name :
sample_md5 = zip_file_name . split ( ' / ' ) [ - 1 ] . split ( ' . ' ) [ 0 ]
if sample_md5 in modified_files_mapping :
2018-05-03 09:42:38 +02:00
current_sample_filename = modified_files_mapping [ sample_md5 ]
# print("{} maps to {}".format(sample_md5, current_sample_filename))
2018-01-16 11:05:26 +01:00
with zf . open ( zip_file_name , mode = ' r ' , pwd = None ) as fp :
file_data = fp . read ( )
results . append ( {
2018-05-03 09:42:38 +02:00
' values ' : current_sample_filename ,
2018-01-16 11:05:26 +01:00
' data ' : base64 . b64encode ( file_data ) . decode ( ) ,
2018-08-03 13:58:53 +02:00
' type ' : ' malware-sample ' , ' categories ' : [ ' Artifacts dropped ' , ' Payload delivery ' ] , ' to_ids ' : True , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
if ' Analysis/analysis.json ' in zip_file_name :
with zf . open ( zip_file_name , mode = ' r ' , pwd = None ) as fp :
file_data = fp . read ( )
analysis_json = json . loads ( file_data . decode ( ' utf-8 ' ) )
results + = process_analysis_json ( analysis_json )
2018-05-03 09:42:38 +02:00
try :
sample_filename = analysis_json . get ( ' analysis ' ) . get ( ' @filename ' )
if sample_filename :
with zf . open ( ' sample ' , mode = ' r ' , pwd = None ) as fp :
file_data = fp . read ( )
results . append ( {
' values ' : sample_filename ,
' data ' : base64 . b64encode ( file_data ) . decode ( ) ,
2018-08-03 13:58:53 +02:00
' type ' : ' malware-sample ' , ' categories ' : [ ' Payload delivery ' , ' Artifacts dropped ' ] , ' to_ids ' : True , ' comment ' : ' ' } )
2018-12-11 15:29:09 +01:00
except Exception :
2018-05-03 09:42:38 +02:00
# no 'sample' in archive, might be an url analysis, just ignore
pass
2018-01-16 11:05:26 +01:00
else :
try :
results = process_analysis_json ( json . loads ( data . decode ( ' utf-8 ' ) ) )
except ValueError :
log . warning ( ' MISP modules {0} failed: uploaded file is not a zip or json file. ' . format ( request [ ' module ' ] ) )
2020-06-30 23:10:35 +02:00
return { ' error ' : ' Uploaded file is not a zip or json file. ' }
2018-01-16 11:05:26 +01:00
pass
# keep only unique entries based on the value field
results = list ( { v [ ' values ' ] : v for v in results } . values ( ) )
r = { ' results ' : results }
return r
def process_analysis_json ( analysis_json ) :
if ' analysis ' in analysis_json and ' processes ' in analysis_json [ ' analysis ' ] and ' process ' in analysis_json [ ' analysis ' ] [ ' processes ' ] :
# if 'analysis' in analysis_json and '@filename' in analysis_json['analysis']:
# sample['values'] = analysis_json['analysis']['@filename']
for process in analysis_json [ ' analysis ' ] [ ' processes ' ] [ ' process ' ] :
# print_json(process)
if ' connection_section ' in process and ' connection ' in process [ ' connection_section ' ] :
2018-08-03 13:58:53 +02:00
# compensate for absurd behavior of the data format: if one entry = immediately the dict, if multiple entries = list containing dicts
# this will always create a list, even with only one item
if isinstance ( process [ ' connection_section ' ] [ ' connection ' ] , dict ) :
process [ ' connection_section ' ] [ ' connection ' ] = [ process [ ' connection_section ' ] [ ' connection ' ] ]
2018-12-11 15:29:09 +01:00
2018-08-03 13:58:53 +02:00
# iterate over each entry
2018-01-16 11:05:26 +01:00
for connection_section_connection in process [ ' connection_section ' ] [ ' connection ' ] :
2018-11-16 13:27:54 +01:00
# compensate for absurd behavior of the data format: if one entry = immediately the dict, if multiple entries = list containing dicts
# this will always create a list, even with only one item
for subsection in [ ' http_command ' , ' http_header ' ] :
if isinstance ( connection_section_connection [ subsection ] , dict ) :
connection_section_connection [ subsection ] = [ connection_section_connection [ subsection ] ]
2018-12-11 15:29:09 +01:00
2018-08-03 13:58:53 +02:00
if ' name_to_ip ' in connection_section_connection : # TA 6.1 data format
connection_section_connection [ ' @remote_ip ' ] = connection_section_connection [ ' name_to_ip ' ] [ ' @result_addresses ' ]
connection_section_connection [ ' @remote_hostname ' ] = connection_section_connection [ ' name_to_ip ' ] [ ' @request_name ' ]
2018-01-16 11:05:26 +01:00
connection_section_connection [ ' @remote_ip ' ] = cleanup_ip ( connection_section_connection [ ' @remote_ip ' ] )
connection_section_connection [ ' @remote_hostname ' ] = cleanup_hostname ( connection_section_connection [ ' @remote_hostname ' ] )
if connection_section_connection [ ' @remote_ip ' ] and connection_section_connection [ ' @remote_hostname ' ] :
val = ' {} | {} ' . format ( connection_section_connection [ ' @remote_hostname ' ] ,
connection_section_connection [ ' @remote_ip ' ] )
# print("connection_section_connection hostname|ip: {}|{} IDS:yes".format(
# connection_section_connection['@remote_hostname'],
# connection_section_connection['@remote_ip'])
# )
2018-08-03 13:58:53 +02:00
yield ( { ' values ' : val , ' type ' : ' domain|ip ' , ' categories ' : [ ' Network activity ' ] , ' to_ids ' : True , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
elif connection_section_connection [ ' @remote_ip ' ] :
# print("connection_section_connection ip-dst: {} IDS:yes".format(
# connection_section_connection['@remote_ip'])
# )
yield ( { ' values ' : connection_section_connection [ ' @remote_ip ' ] , ' type ' : ' ip-dst ' , ' to_ids ' : True , ' comment ' : ' ' } )
elif connection_section_connection [ ' @remote_hostname ' ] :
# print("connection_section_connection hostname: {} IDS:yes".format(
# connection_section_connection['@remote_hostname'])
# )
yield ( { ' values ' : connection_section_connection [ ' @remote_hostname ' ] , ' type ' : ' hostname ' , ' to_ids ' : True , ' comment ' : ' ' } )
if ' http_command ' in connection_section_connection :
2018-08-21 11:13:08 +02:00
for http_command in connection_section_connection [ ' http_command ' ] :
# print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
# connection_section_connection['http_command']['@method'], # comment
# connection_section_connection['http_command']['@url']) # url
# )
val = cleanup_url ( http_command [ ' @url ' ] )
if val :
yield ( { ' values ' : val , ' type ' : ' url ' , ' categories ' : [ ' Network activity ' ] , ' to_ids ' : True , ' comment ' : http_command [ ' @method ' ] } )
2018-01-16 11:05:26 +01:00
if ' http_header ' in connection_section_connection :
for http_header in connection_section_connection [ ' http_header ' ] :
if ' User-Agent: ' in http_header [ ' @header ' ] :
val = http_header [ ' @header ' ] [ len ( ' User-Agent: ' ) : ]
2018-08-03 13:58:53 +02:00
yield ( { ' values ' : val , ' type ' : ' user-agent ' , ' categories ' : [ ' Network activity ' ] , ' to_ids ' : False , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
elif ' Host: ' in http_header [ ' @header ' ] :
val = http_header [ ' @header ' ] [ len ( ' Host: ' ) : ]
if ' : ' in val :
try :
val_port = int ( val . split ( ' : ' ) [ 1 ] )
2018-12-11 15:29:09 +01:00
except ValueError :
2018-01-16 11:05:26 +01:00
val_port = False
val_hostname = cleanup_hostname ( val . split ( ' : ' ) [ 0 ] )
val_ip = cleanup_ip ( val . split ( ' : ' ) [ 0 ] )
if val_hostname and val_port :
val_combined = ' {} | {} ' . format ( val_hostname , val_port )
# print({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
2018-08-03 13:58:53 +02:00
yield ( { ' values ' : val_combined , ' type ' : ' hostname|port ' , ' categories ' : [ ' Network activity ' ] , ' to_ids ' : True , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
elif val_ip and val_port :
val_combined = ' {} | {} ' . format ( val_ip , val_port )
# print({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
yield ( { ' values ' : val_combined , ' type ' : ' ip-dst|port ' , ' to_ids ' : True , ' comment ' : ' ' } )
else :
continue
val_hostname = cleanup_hostname ( val )
if val_hostname :
# print({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''})
yield ( { ' values ' : val_hostname , ' type ' : ' hostname ' , ' to_ids ' : True , ' comment ' : ' ' } )
else :
# LATER header not processed
pass
if ' filesystem_section ' in process and ' create_file ' in process [ ' filesystem_section ' ] :
for filesystem_section_create_file in process [ ' filesystem_section ' ] [ ' create_file ' ] :
# first skip some items
if filesystem_section_create_file [ ' @create_disposition ' ] in { ' FILE_OPEN_IF ' } :
continue
# FIXME - this section is probably not needed considering the 'stored_files stored_created_file' section we process later.
# print('CREATE FILE: {}\t{}'.format(
# filesystem_section_create_file['@srcfile'], # filename
# filesystem_section_create_file['@create_disposition']) # comment - use this to filter out cases
# )
if ' networkoperation_section ' in process and ' dns_request_by_addr ' in process [ ' networkoperation_section ' ] :
for networkoperation_section_dns_request_by_addr in process [ ' networkoperation_section ' ] [ ' dns_request_by_addr ' ] :
# FIXME - it's unclear what this section is for.
# TODO filter this
# print('DNS REQUEST: {}\t{}'.format(
# networkoperation_section_dns_request_by_addr['@request_address'], # ip-dst
# networkoperation_section_dns_request_by_addr['@result_name']) # hostname
# ) # => NOT hostname|ip
pass
if ' networkoperation_section ' in process and ' dns_request_by_name ' in process [ ' networkoperation_section ' ] :
for networkoperation_section_dns_request_by_name in process [ ' networkoperation_section ' ] [ ' dns_request_by_name ' ] :
networkoperation_section_dns_request_by_name [ ' @request_name ' ] = cleanup_hostname ( networkoperation_section_dns_request_by_name [ ' @request_name ' ] . rstrip ( ' . ' ) )
networkoperation_section_dns_request_by_name [ ' @result_addresses ' ] = cleanup_ip ( networkoperation_section_dns_request_by_name [ ' @result_addresses ' ] )
if networkoperation_section_dns_request_by_name [ ' @request_name ' ] and networkoperation_section_dns_request_by_name [ ' @result_addresses ' ] :
val = ' {} | {} ' . format ( networkoperation_section_dns_request_by_name [ ' @request_name ' ] ,
networkoperation_section_dns_request_by_name [ ' @result_addresses ' ] )
# print("networkoperation_section_dns_request_by_name hostname|ip: {}|{} IDS:yes".format(
# networkoperation_section_dns_request_by_name['@request_name'],
# networkoperation_section_dns_request_by_name['@result_addresses'])
# )
2018-08-03 13:58:53 +02:00
yield ( { ' values ' : val , ' type ' : ' domain|ip ' , ' categories ' : [ ' Network activity ' ] , ' to_ids ' : True , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
elif networkoperation_section_dns_request_by_name [ ' @request_name ' ] :
# print("networkoperation_section_dns_request_by_name hostname: {} IDS:yes".format(
# networkoperation_section_dns_request_by_name['@request_name'])
# )
yield ( { ' values ' : networkoperation_section_dns_request_by_name [ ' @request_name ' ] , ' type ' : ' hostname ' , ' to_ids ' : True , ' comment ' : ' ' } )
elif networkoperation_section_dns_request_by_name [ ' @result_addresses ' ] :
# this happens when the IP is both in the request_name and result_address.
# print("networkoperation_section_dns_request_by_name hostname: {} IDS:yes".format(
# networkoperation_section_dns_request_by_name['@result_addresses'])
# )
yield ( { ' values ' : networkoperation_section_dns_request_by_name [ ' @result_addresses ' ] , ' type ' : ' ip-dst ' , ' to_ids ' : True , ' comment ' : ' ' } )
if ' networkpacket_section ' in process and ' connect_to_computer ' in process [ ' networkpacket_section ' ] :
for networkpacket_section_connect_to_computer in process [ ' networkpacket_section ' ] [ ' connect_to_computer ' ] :
networkpacket_section_connect_to_computer [ ' @remote_hostname ' ] = cleanup_hostname ( networkpacket_section_connect_to_computer [ ' @remote_hostname ' ] )
networkpacket_section_connect_to_computer [ ' @remote_ip ' ] = cleanup_ip ( networkpacket_section_connect_to_computer [ ' @remote_ip ' ] )
if networkpacket_section_connect_to_computer [ ' @remote_hostname ' ] and networkpacket_section_connect_to_computer [ ' @remote_ip ' ] :
# print("networkpacket_section_connect_to_computer hostname|ip: {}|{} IDS:yes COMMENT:port {}".format(
# networkpacket_section_connect_to_computer['@remote_hostname'],
# networkpacket_section_connect_to_computer['@remote_ip'],
# networkpacket_section_connect_to_computer['@remote_port'])
# )
val_combined = " {} | {} " . format ( networkpacket_section_connect_to_computer [ ' @remote_hostname ' ] , networkpacket_section_connect_to_computer [ ' @remote_ip ' ] )
2018-08-03 13:58:53 +02:00
yield ( { ' values ' : val_combined , ' type ' : ' domain|ip ' , ' to_ids ' : True , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
elif networkpacket_section_connect_to_computer [ ' @remote_hostname ' ] :
# print("networkpacket_section_connect_to_computer hostname: {} IDS:yes COMMENT:port {}".format(
# networkpacket_section_connect_to_computer['@remote_hostname'],
# networkpacket_section_connect_to_computer['@remote_port'])
# )
val_combined = " {} | {} " . format ( networkpacket_section_connect_to_computer [ ' @remote_hostname ' ] , networkpacket_section_connect_to_computer [ ' @remote_port ' ] )
2018-08-03 13:58:53 +02:00
yield ( { ' values ' : val_combined , ' type ' : ' hostname|port ' , ' categories ' : [ ' Network activity ' ] , ' to_ids ' : True , ' comment ' : ' ' } )
2018-01-16 11:05:26 +01:00
elif networkpacket_section_connect_to_computer [ ' @remote_ip ' ] :
# print("networkpacket_section_connect_to_computer ip-dst: {} IDS:yes COMMENT:port {}".format(
# networkpacket_section_connect_to_computer['@remote_ip'],
# networkpacket_section_connect_to_computer['@remote_port'])
# )
val_combined = " {} | {} " . format ( networkpacket_section_connect_to_computer [ ' @remote_ip ' ] , networkpacket_section_connect_to_computer [ ' @remote_port ' ] )
yield ( { ' values ' : val_combined , ' type ' : ' ip-dst|port ' , ' to_ids ' : True , ' comment ' : ' ' } )
if ' registry_section ' in process and ' create_key ' in process [ ' registry_section ' ] :
# FIXME this is a complicated section, together with the 'set_value'.
# it looks like this section is not ONLY about creating registry keys,
# more about accessing a handle to keys (with specific permissions)
# maybe we don't want to keep this, in favor of 'set_value'
for create_key in process [ ' registry_section ' ] [ ' create_key ' ] :
# print('REG CREATE: {}\t{}'.format(
# create_key['@desired_access'],
# create_key['@key_name']))
pass
if ' registry_section ' in process and ' delete_key ' in process [ ' registry_section ' ] :
# LATER we probably don't want to keep this. Much pollution.
# Maybe for later once we have filtered out this.
for delete_key in process [ ' registry_section ' ] [ ' delete_key ' ] :
# print('REG DELETE: {}'.format(
# delete_key['@key_name'])
# )
pass
if ' registry_section ' in process and ' set_value ' in process [ ' registry_section ' ] :
# FIXME this is a complicated section, together with the 'create_key'.
for set_value in process [ ' registry_section ' ] [ ' set_value ' ] :
# '@data_type' == 'REG_BINARY',
# '@data_type' == 'REG_DWORD',
# '@data_type' == 'REG_EXPAND_SZ',
# '@data_type' == 'REG_MULTI_SZ',
# '@data_type' == 'REG_NONE',
# '@data_type' == 'REG_QWORD',
# '@data_type' == 'REG_SZ',
regkey = cleanup_regkey ( " {} \\ {} " . format ( set_value [ ' @key_name ' ] , set_value [ ' @value_name ' ] ) )
regdata = cleanup_regdata ( set_value . get ( ' @data ' ) )
if not regkey :
continue
if set_value [ ' @data_size ' ] == ' 0 ' or not regdata :
# print('registry_section set_value REG SET: {}\t{}\t{}'.format(
# set_value['@data_type'],
# set_value['@key_name'],
# set_value['@value_name'])
# )
yield ( { ' values ' : regkey , ' type ' : ' regkey ' , ' to_ids ' : True ,
' categories ' : [ ' External analysis ' , ' Persistence mechanism ' , ' Artifacts dropped ' ] , ' comment ' : set_value [ ' @data_type ' ] } )
else :
try :
# unicode fun...
# print('registry_section set_value REG SET: {}\t{}\t{}\t{}'.format(
# set_value['@data_type'],
# set_value['@key_name'],
# set_value['@value_name'],
# set_value['@data'])
# )
val = " {} | {} " . format ( regkey , regdata )
yield ( { ' values ' : val , ' type ' : ' regkey|value ' , ' to_ids ' : True ,
' categories ' : [ ' External analysis ' , ' Persistence mechanism ' , ' Artifacts dropped ' ] , ' comment ' : set_value [ ' @data_type ' ] } )
except Exception as e :
print ( " EXCEPTION registry_section {} " . format ( e ) )
# TODO - maybe we want to handle these later, or not...
pass
pass
if ' stored_files ' in process and ' stored_created_file ' in process [ ' stored_files ' ] :
for stored_created_file in process [ ' stored_files ' ] [ ' stored_created_file ' ] :
stored_created_file [ ' @filename ' ] = cleanup_filepath ( stored_created_file [ ' @filename ' ] )
if stored_created_file [ ' @filename ' ] :
2019-02-04 11:05:51 +01:00
if stored_created_file [ ' @filesize ' ] != ' 0 ' :
2018-01-16 11:05:26 +01:00
val = ' {} | {} ' . format ( stored_created_file [ ' @filename ' ] , stored_created_file [ ' @md5 ' ] )
# print("stored_created_file filename|md5: {}|{} IDS:yes".format(
# stored_created_file['@filename'], # filename
# stored_created_file['@md5']) # md5
# ) # => filename|md5
yield ( { ' values ' : val , ' type ' : ' filename|md5 ' , ' to_ids ' : True ,
' categories ' : [ ' Artifacts dropped ' , ' Payload delivery ' ] , ' comment ' : ' ' } )
else :
# print("stored_created_file filename: {} IDS:yes".format(
# stored_created_file['@filename']) # filename
# ) # => filename
yield ( { ' values ' : stored_created_file [ ' @filename ' ] ,
' type ' : ' filename ' , ' to_ids ' : True ,
' categories ' : [ ' Artifacts dropped ' , ' Payload delivery ' ] , ' comment ' : ' ' } )
if ' stored_files ' in process and ' stored_modified_file ' in process [ ' stored_files ' ] :
for stored_modified_file in process [ ' stored_files ' ] [ ' stored_modified_file ' ] :
stored_modified_file [ ' @filename ' ] = cleanup_filepath ( stored_modified_file [ ' @filename ' ] )
if stored_modified_file [ ' @filename ' ] :
2019-02-04 11:05:51 +01:00
if stored_modified_file [ ' @filesize ' ] != ' 0 ' :
2018-01-16 11:05:26 +01:00
val = ' {} | {} ' . format ( stored_modified_file [ ' @filename ' ] , stored_modified_file [ ' @md5 ' ] )
# print("stored_modified_file MODIFY FILE: {}\t{}".format(
# stored_modified_file['@filename'], # filename
# stored_modified_file['@md5']) # md5
# ) # => filename|md5
yield ( { ' values ' : val , ' type ' : ' filename|md5 ' , ' to_ids ' : True ,
' categories ' : [ ' Artifacts dropped ' , ' Payload delivery ' ] ,
' comment ' : ' modified ' } )
else :
# print("stored_modified_file MODIFY FILE: {}\t{}".format(
# stored_modified_file['@filename']) # filename
# ) # => filename
yield ( { ' values ' : stored_modified_file [ ' @filename ' ] , ' type ' : ' filename ' , ' to_ids ' : True ,
' categories ' : [ ' Artifacts dropped ' , ' Payload delivery ' ] ,
' comment ' : ' modified ' } )
def add_file ( filename , results , hash , index , filedata = None ) :
pass
# results.append({'values': filename, 'data': "{}|{}".format(filename, filedata.decode()), 'type': 'malware-sample',
# 'categories': ['Artifacts dropped', 'Payload delivery']})
def add_file_zip ( ) :
# if 'malware-sample' in request:
# sample_filename = request.get("malware-sample").split("|", 1)[0]
# data = base64.b64decode(data)
# fl = io.BytesIO(data)
# zf = zipfile.ZipFile(fl)
# sample_hashname = zf.namelist()[0]
# data = zf.read(sample_hashname, b"infected")
# zf.close()
pass
def print_json ( data ) :
print ( json . dumps ( data , sort_keys = True , indent = 4 , separators = ( ' , ' , ' : ' ) ) )
def list_in_string ( lst , data , regex = False ) :
for item in lst :
if regex :
if re . search ( item , data , flags = re . IGNORECASE ) :
return True
else :
if item in data :
return True
def cleanup_ip ( item ) :
# you should exclude private IP ranges via import regexes
noise_substrings = {
' 224.0.0. ' ,
' 127.0.0. ' ,
' 8.8.8.8 ' ,
' 8.8.4.4 ' ,
' 0.0.0.0 ' ,
' NONE '
}
if list_in_string ( noise_substrings , item ) :
return None
try :
ipaddress . ip_address ( item )
return item
except ValueError :
return None
def cleanup_hostname ( item ) :
noise_substrings = {
' wpad ' ,
' teredo.ipv6.microsoft.com ' ,
' WIN7SP1-x64-UNP '
}
# take away common known bad
if list_in_string ( noise_substrings , item ) :
return None
# eliminate IP addresses
try :
ipaddress . ip_address ( item )
except ValueError :
# this is not an IP, so continue
return item
return None
def cleanup_url ( item ) :
if item in [ ' / ' ] :
return None
return item
def cleanup_filepath ( item ) :
noise_substrings = {
2018-05-16 11:50:47 +02:00
' \\ AppData \\ Local \\ GDIPFONTCACHEV1.DAT ' ,
2018-01-16 11:05:26 +01:00
' \\ AppData \\ Local \\ Microsoft \\ Internet Explorer \\ DOMStore \\ ' ,
2018-05-16 11:50:47 +02:00
' \\ AppData \\ Local \\ Microsoft \\ Internet Explorer \\ Recovery \\ High \\ ' ,
2018-01-16 11:05:26 +01:00
' \\ AppData \\ Local \\ Microsoft \\ Windows \\ Caches \\ ' ,
' \\ AppData \\ Local \\ Microsoft \\ Windows \\ Explorer \\ thumbcache ' ,
2018-05-16 11:50:47 +02:00
' \\ AppData \\ Local \\ Microsoft \\ Windows \\ History \\ History. ' ,
' \\ AppData \\ Local \\ Microsoft \\ Windows \\ Temporary Internet Files \\ Content. ' ,
' \\ AppData \\ Local \\ Microsoft \\ Windows \\ WebCache \\ ' ,
' \\ AppData \\ Local \\ Temp \\ .*tmp$ ' ,
' \\ AppData \\ LocalLow \\ Microsoft \\ CryptnetUrlCache \\ ' ,
' \\ AppData \\ LocalLow \\ Microsoft \\ Internet Explorer \\ Services \\ search_ ' ,
' \\ AppData \\ Roaming \\ Microsoft \\ Office \\ Recent \\ ' ,
' \\ AppData \\ Roaming \\ Microsoft \\ Windows \\ Cookies \\ ' ,
' \\ AppData \\ Roaming \\ Microsoft \\ Windows \\ Recent \\ ' ,
' C: \\ ProgramData \\ Microsoft \\ OfficeSoftwareProtectionPlatform \\ Cache \\ cache.dat ' ,
' C: \\ Windows \\ Prefetch \\ ' ,
2018-01-16 11:05:26 +01:00
' \\ AppData \\ Roaming \\ Adobe \\ Acrobat \\ 9.0 \\ SharedDataEvents-journal ' ,
' \\ AppData \\ Roaming \\ Adobe \\ Acrobat \\ 9.0 \\ UserCache.bin ' ,
' \\ AppData \\ Roaming \\ Macromedia \\ Flash Player \\ macromedia.com \\ support \\ flashplayer \\ sys \\ settings.sol ' ,
2018-08-21 11:13:08 +02:00
' \\ AppData \\ Roaming \\ Adobe \\ Flash Player \\ NativeCache \\ ' ,
2018-01-16 11:05:26 +01:00
' C: \\ Windows \\ AppCompat \\ Programs \\ ' ,
2018-08-21 11:13:08 +02:00
' C: \\ ~ ' # caused by temp file created by MS Office when opening malicious doc/xls/...
2018-01-16 11:05:26 +01:00
}
if list_in_string ( noise_substrings , item ) :
return None
return item
def cleanup_regkey ( item ) :
noise_substrings = {
2018-05-15 12:59:55 +02:00
r ' \\ CurrentVersion \\ Explorer \\ FileExts \\ [a-z \ .]+ \\ OpenWith ' ,
2018-01-16 11:05:26 +01:00
r ' \\ CurrentVersion \\ Explorer \\ RecentDocs \\ ' ,
r ' \\ CurrentVersion \\ Explorer \\ UserAssist \\ ' ,
2018-05-15 12:59:55 +02:00
r ' \\ Local Settings \\ Software \\ Microsoft \\ Windows \\ Shell \\ Bag ' ,
r ' \\ Software \\ Classes \\ CLSID \\ ' ,
r ' \\ Software \\ Classes \\ Local Settings \\ MuiCache \\ ' ,
2018-01-16 11:05:26 +01:00
r ' \\ Software \\ Microsoft \\ Internet Explorer \\ Main \\ WindowsSearch ' ,
r ' \\ Software \\ Microsoft \\ Office \\ [0-9 \ .]+ \\ ' ,
r ' \\ Software \\ Microsoft \\ Office \\ Common \\ Smart Tag \\ ' ,
2018-05-16 11:50:47 +02:00
r ' \\ Software \\ Microsoft \\ OfficeSoftwareProtectionPlatform \\ ' ,
2018-05-15 12:59:55 +02:00
r ' \\ Software \\ Microsoft \\ Shared Tools \\ Panose \\ ' ,
r ' \\ Software \\ Microsoft \\ Tracing \\ ' ,
r ' \\ Software \\ Microsoft \\ Tracing \\ powershell_RASAPI32 \\ ' ,
2018-05-03 09:42:38 +02:00
r ' \\ Software \\ Microsoft \\ Tracing \\ powershell_RASMANCS \\ ' ,
2018-05-16 11:50:47 +02:00
r ' \\ Software \\ Microsoft \\ Windows \\ CurrentVersion \\ Action Center \\ ' ,
2018-05-15 12:59:55 +02:00
r ' \\ Software \\ Microsoft \\ Windows \\ CurrentVersion \\ Explorer \\ RunMRU \\ ' ,
r ' \\ Software \\ Microsoft \\ Windows \\ CurrentVersion \\ Installer \\ UserData \\ ' ,
r ' \\ Software \\ Microsoft \\ Windows \\ CurrentVersion \\ Internet Settings \\ ' ,
2018-05-16 11:50:47 +02:00
r ' \\ System \\ CurrentControlSet \\ Services \\ RdyBoost \\ ' ,
2018-05-15 12:59:55 +02:00
r ' \\ Usage \\ SpellingAndGrammarFiles '
2018-01-16 11:05:26 +01:00
}
if list_in_string ( noise_substrings , item , regex = True ) :
return None
return item
def cleanup_regdata ( item ) :
if not item :
return None
item = item . replace ( ' (UNICODE_0x00000000) ' , ' ' )
return item
def get_zipped_contents ( filename , data , password = None ) :
with zipfile . ZipFile ( io . BytesIO ( data ) , ' r ' ) as zf :
unzipped_files = [ ]
if password is not None :
password = str . encode ( password ) # Byte encoded password required
for zip_file_name in zf . namelist ( ) : # Get all files in the zip file
# print(zip_file_name)
with zf . open ( zip_file_name , mode = ' r ' , pwd = password ) as fp :
file_data = fp . read ( )
unzipped_files . append ( { ' values ' : zip_file_name ,
' data ' : file_data ,
' comment ' : ' Extracted from {0} ' . format ( filename ) } )
# print("{} : {}".format(zip_file_name, len(file_data)))
return unzipped_files
def introspection ( ) :
modulesetup = { }
try :
userConfig
modulesetup [ ' userConfig ' ] = userConfig
except NameError :
pass
try :
inputSource
modulesetup [ ' inputSource ' ] = inputSource
except NameError :
pass
return modulesetup
def version ( ) :
moduleinfo [ ' config ' ] = moduleconfig
return moduleinfo