From 46975f4f16e6ebfee4127809401131549d76d257 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 16 Jan 2018 11:05:26 +0100 Subject: [PATCH] Added ThreatAnalyzer sandbox import Experimental module - some parts should be migrated to --- README.md | 9 +- misp_modules/modules/import_mod/__init__.py | 2 +- .../import_mod/threatanalyzer_import.py | 507 ++++++++++++++++++ 3 files changed, 513 insertions(+), 5 deletions(-) create mode 100755 misp_modules/modules/import_mod/threatanalyzer_import.py diff --git a/README.md b/README.md index 6dd06ce..339c56f 100644 --- a/README.md +++ b/README.md @@ -18,8 +18,9 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ ### Expansion modules * [ASN History](misp_modules/modules/expansion/asn_history.py) - a hover and expansion module to expand an AS number with the ASN description and its history. -* [CIRCL Passive SSL](misp_modules/modules/expansion/circl_passivessl.py) - a hover and expansion module to expand IP addresses with the X.509 certificate seen. * [CIRCL Passive DNS](misp_modules/modules/expansion/circl_passivedns.py) - a hover and expansion module to expand hostname and IP addresses with passive DNS information. +* [CIRCL Passive SSL](misp_modules/modules/expansion/circl_passivessl.py) - a hover and expansion module to expand IP addresses with the X.509 certificate seen. +* [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to. * [CVE](misp_modules/modules/expansion/cve.py) - a hover module to give more information about a vulnerability (CVE). * [DNS](misp_modules/modules/expansion/dns.py) - a simple module to resolve MISP attributes like hostname and domain to expand IP addresses attributes. * [DomainTools](misp_modules/modules/expansion/domaintools.py) - a hover and expansion module to get information from [DomainTools](http://www.domaintools.com/) whois. @@ -28,13 +29,12 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ * [GeoIP](misp_modules/modules/expansion/geoip_country.py) - a hover and expansion module to get GeoIP information from geolite/maxmind. * [IPASN](misp_modules/modules/expansion/ipasn.py) - a hover and expansion to get the BGP ASN of an IP address. * [iprep](misp-modules/modules/expansion/iprep.py) - an expansion module to get IP reputation from packetmail.net. +* [OTX](misp_modules/modules/expansion/otx.py) - an expansion module for [OTX](https://otx.alienvault.com/). * [passivetotal](misp_modules/modules/expansion/passivetotal.py) - a [passivetotal](https://www.passivetotal.org/) module that queries a number of different PassiveTotal datasets. * [shodan](misp_modules/modules/expansion/shodan.py) - a minimal [shodan](https://www.shodan.io/) expansion module. * [sourcecache](misp_modules/modules/expansion/sourcecache.py) - a module to cache a specific link from a MISP instance. * [ThreatCrowd](misp_modules/modules/expansion/threatcrowd.py) - an expansion module for [ThreatCrowd](https://www.threatcrowd.org/). -* [OTX](misp_modules/modules/expansion/otx.py) - an expansion module for [OTX](https://otx.alienvault.com/). * [threatminer](misp_modules/modules/expansion/threatminer.py) - an expansion module to expand from [ThreatMiner](https://www.threatminer.org/). -* [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to. * [virustotal](misp_modules/modules/expansion/virustotal.py) - an expansion module to pull known resolutions and malware samples related with an IP/Domain from virusTotal (this modules require a VirusTotal private API key) * [wikidata](misp_modules/modules/expansion/wiki.py) - a [wikidata](https://www.wikidata.org) expansion module. * [xforce](misp_modules/modules/expansion/xforceexchange.py) - an IBM X-Force Exchange expansion module. @@ -50,10 +50,11 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ ### Import modules * [Cuckoo JSON](misp_modules/modules/import_mod/cuckooimport.py) Cuckoo JSON import. +* [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata. * [OCR](misp_modules/modules/import_mod/ocr.py) Optical Character Recognition (OCR) module for MISP to import attributes from images, scan or faxes. * [OpenIOC](misp_modules/modules/import_mod/openiocimport.py) OpenIOC import based on PyMISP library. * [stiximport](misp_modules/modules/import_mod/stiximport.py) - An import module to process STIX xml/json. -* [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata. +* [ThreatAnalyzer](misp_modules/modules/import_mod/threatanalyzer_import.py) - An import module to process ThreatAnalyzer archive.zip/analysis.json sandbox exports. * [VMRay](misp_modules/modules/import_mod/vmray_import.py) - An import module to process VMRay export. ## How to install and start MISP modules? diff --git a/misp_modules/modules/import_mod/__init__.py b/misp_modules/modules/import_mod/__init__.py index 6beeaa2..e2941d7 100644 --- a/misp_modules/modules/import_mod/__init__.py +++ b/misp_modules/modules/import_mod/__init__.py @@ -1,4 +1,4 @@ from . import _vmray __all__ = ['vmray_import', 'testimport', 'ocr', 'stiximport', 'cuckooimport', - 'email_import', 'mispjson', 'openiocimport'] + 'email_import', 'mispjson', 'openiocimport', 'threatanalyzer_import'] diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py new file mode 100755 index 0000000..fded508 --- /dev/null +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -0,0 +1,507 @@ +''' +import +define mandatory + +''' +import json +import base64 +import re +import zipfile +import ipaddress +import io +import logging + +misperrors = {'error': 'Error'} +userConfig = {} +inputSource = ['file'] + +moduleinfo = {'version': '0.6', 'author': 'Christophe Vandeplas', + 'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files', + 'module-type': ['import']} + +moduleconfig = [] +log = logging.getLogger('misp-modules') + +# FIXME - many hardcoded filters should be migrated to import regexes. See also https://github.com/MISP/MISP/issues/2712 +# DISCLAIMER - This module is to be considered as experimental and needs much fine-tuning. +# more can be done with what's in the ThreatAnalyzer archive.zip + + +def handler(q=False): + if q is False: + return False + results = [] + zip_starts = 'PK' + request = json.loads(q) + data = base64.b64decode(request['data']) + + if data[:len(zip_starts)].decode() == zip_starts: + with zipfile.ZipFile(io.BytesIO(data), 'r') as zf: + # unzipped_files = [] + modified_files_mapping = {} + # pre-process some of the files in the zip + for zip_file_name in zf.namelist(): # Get all files in the zip file + # find the filenames of the modified_files + if re.match(r"Analysis/proc_\d+/modified_files/mapping\.log", zip_file_name): + with zf.open(zip_file_name, mode='r', pwd=None) as fp: + file_data = fp.read() + for line in file_data.decode().split('\n'): + if line: + l_fname, l_size, l_md5, l_created = line.split('|') + l_fname = cleanup_filepath(l_fname) + if l_fname: + if l_size == 0: + pass # FIXME create an attribute for the filename/path + else: + # file is a non empty sample, upload the sample later + modified_files_mapping[l_md5] = l_fname + + # now really process the data + for zip_file_name in zf.namelist(): # Get all files in the zip file + # print('Processing file: {}'.format(zip_file_name)) + if re.match(r"Analysis/proc_\d+/modified_files/.+\.", zip_file_name) and "mapping.log" not in zip_file_name: + sample_md5 = zip_file_name.split('/')[-1].split('.')[0] + if sample_md5 in modified_files_mapping: + sample_filename = modified_files_mapping[sample_md5] + # print("{} maps to {}".format(sample_md5, sample_filename)) + with zf.open(zip_file_name, mode='r', pwd=None) as fp: + file_data = fp.read() + results.append({ + 'values': sample_filename, + 'data': base64.b64encode(file_data).decode(), + 'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''}) + + if 'Analysis/analysis.json' in zip_file_name: + with zf.open(zip_file_name, mode='r', pwd=None) as fp: + file_data = fp.read() + analysis_json = json.loads(file_data.decode('utf-8')) + results += process_analysis_json(analysis_json) + # if 'sample' in zip_file_name: + # sample['data'] = base64.b64encode(file_data).decode() + + else: + try: + results = process_analysis_json(json.loads(data.decode('utf-8'))) + except ValueError: + log.warning('MISP modules {0} failed: uploaded file is not a zip or json file.'.format(request['module'])) + return {'error': 'Uploaded file is not a zip or json file.'.format(request['module'])} + pass + # keep only unique entries based on the value field + results = list({v['values']: v for v in results}.values()) + r = {'results': results} + return r + + +def process_analysis_json(analysis_json): + if 'analysis' in analysis_json and 'processes' in analysis_json['analysis'] and 'process' in analysis_json['analysis']['processes']: + # if 'analysis' in analysis_json and '@filename' in analysis_json['analysis']: + # sample['values'] = analysis_json['analysis']['@filename'] + for process in analysis_json['analysis']['processes']['process']: + # print_json(process) + if 'connection_section' in process and 'connection' in process['connection_section']: + for connection_section_connection in process['connection_section']['connection']: + + connection_section_connection['@remote_ip'] = cleanup_ip(connection_section_connection['@remote_ip']) + connection_section_connection['@remote_hostname'] = cleanup_hostname(connection_section_connection['@remote_hostname']) + if connection_section_connection['@remote_ip'] and connection_section_connection['@remote_hostname']: + val = '{}|{}'.format(connection_section_connection['@remote_hostname'], + connection_section_connection['@remote_ip']) + # print("connection_section_connection hostname|ip: {}|{} IDS:yes".format( + # connection_section_connection['@remote_hostname'], + # connection_section_connection['@remote_ip']) + # ) + yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''}) + elif connection_section_connection['@remote_ip']: + # print("connection_section_connection ip-dst: {} IDS:yes".format( + # connection_section_connection['@remote_ip']) + # ) + yield({'values': connection_section_connection['@remote_ip'], 'type': 'ip-dst', 'to_ids': True, 'comment': ''}) + elif connection_section_connection['@remote_hostname']: + # print("connection_section_connection hostname: {} IDS:yes".format( + # connection_section_connection['@remote_hostname']) + # ) + yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''}) + if 'http_command' in connection_section_connection: + for http_command in connection_section_connection['http_command']: + # print('connection_section_connection HTTP COMMAND: {}\t{}'.format( + # http_command['@method'], # comment + # http_command['@url']) # url + # ) + val = cleanup_url(http_command['@url']) + if val: + yield({'values': val, 'type': 'url', 'categories': 'Network activity', 'to_ids': True, 'comment': http_command['@method']}) + + if 'http_header' in connection_section_connection: + for http_header in connection_section_connection['http_header']: + if 'User-Agent:' in http_header['@header']: + val = http_header['@header'][len('User-Agent: '):] + yield({'values': val, 'type': 'user-agent', 'categories': 'Network activity', 'to_ids': False, 'comment': ''}) + elif 'Host:' in http_header['@header']: + val = http_header['@header'][len('Host: '):] + if ':' in val: + try: + val_port = int(val.split(':')[1]) + except ValueError as e: + val_port = False + val_hostname = cleanup_hostname(val.split(':')[0]) + val_ip = cleanup_ip(val.split(':')[0]) + if val_hostname and val_port: + val_combined = '{}|{}'.format(val_hostname, val_port) + # print({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''}) + yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''}) + elif val_ip and val_port: + val_combined = '{}|{}'.format(val_ip, val_port) + # print({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''}) + yield({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''}) + else: + continue + val_hostname = cleanup_hostname(val) + if val_hostname: + # print({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''}) + yield({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''}) + else: + # LATER header not processed + pass + if 'filesystem_section' in process and 'create_file' in process['filesystem_section']: + for filesystem_section_create_file in process['filesystem_section']['create_file']: + # first skip some items + if filesystem_section_create_file['@create_disposition'] in {'FILE_OPEN_IF'}: + continue + # FIXME - this section is probably not needed considering the 'stored_files stored_created_file' section we process later. + # print('CREATE FILE: {}\t{}'.format( + # filesystem_section_create_file['@srcfile'], # filename + # filesystem_section_create_file['@create_disposition']) # comment - use this to filter out cases + # ) + + if 'networkoperation_section' in process and 'dns_request_by_addr' in process['networkoperation_section']: + for networkoperation_section_dns_request_by_addr in process['networkoperation_section']['dns_request_by_addr']: + # FIXME - it's unclear what this section is for. + # TODO filter this + # print('DNS REQUEST: {}\t{}'.format( + # networkoperation_section_dns_request_by_addr['@request_address'], # ip-dst + # networkoperation_section_dns_request_by_addr['@result_name']) # hostname + # ) # => NOT hostname|ip + pass + if 'networkoperation_section' in process and 'dns_request_by_name' in process['networkoperation_section']: + for networkoperation_section_dns_request_by_name in process['networkoperation_section']['dns_request_by_name']: + networkoperation_section_dns_request_by_name['@request_name'] = cleanup_hostname(networkoperation_section_dns_request_by_name['@request_name'].rstrip('.')) + networkoperation_section_dns_request_by_name['@result_addresses'] = cleanup_ip(networkoperation_section_dns_request_by_name['@result_addresses']) + if networkoperation_section_dns_request_by_name['@request_name'] and networkoperation_section_dns_request_by_name['@result_addresses']: + val = '{}|{}'.format(networkoperation_section_dns_request_by_name['@request_name'], + networkoperation_section_dns_request_by_name['@result_addresses']) + # print("networkoperation_section_dns_request_by_name hostname|ip: {}|{} IDS:yes".format( + # networkoperation_section_dns_request_by_name['@request_name'], + # networkoperation_section_dns_request_by_name['@result_addresses']) + # ) + yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''}) + elif networkoperation_section_dns_request_by_name['@request_name']: + # print("networkoperation_section_dns_request_by_name hostname: {} IDS:yes".format( + # networkoperation_section_dns_request_by_name['@request_name']) + # ) + yield({'values': networkoperation_section_dns_request_by_name['@request_name'], 'type': 'hostname', 'to_ids': True, 'comment': ''}) + elif networkoperation_section_dns_request_by_name['@result_addresses']: + # this happens when the IP is both in the request_name and result_address. + # print("networkoperation_section_dns_request_by_name hostname: {} IDS:yes".format( + # networkoperation_section_dns_request_by_name['@result_addresses']) + # ) + yield({'values': networkoperation_section_dns_request_by_name['@result_addresses'], 'type': 'ip-dst', 'to_ids': True, 'comment': ''}) + + if 'networkpacket_section' in process and 'connect_to_computer' in process['networkpacket_section']: + for networkpacket_section_connect_to_computer in process['networkpacket_section']['connect_to_computer']: + networkpacket_section_connect_to_computer['@remote_hostname'] = cleanup_hostname(networkpacket_section_connect_to_computer['@remote_hostname']) + networkpacket_section_connect_to_computer['@remote_ip'] = cleanup_ip(networkpacket_section_connect_to_computer['@remote_ip']) + if networkpacket_section_connect_to_computer['@remote_hostname'] and networkpacket_section_connect_to_computer['@remote_ip']: + # print("networkpacket_section_connect_to_computer hostname|ip: {}|{} IDS:yes COMMENT:port {}".format( + # networkpacket_section_connect_to_computer['@remote_hostname'], + # networkpacket_section_connect_to_computer['@remote_ip'], + # networkpacket_section_connect_to_computer['@remote_port']) + # ) + val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_ip']) + yield({'values': val_combined, 'type': 'hostname|ip', 'to_ids': True, 'comment': ''}) + elif networkpacket_section_connect_to_computer['@remote_hostname']: + # print("networkpacket_section_connect_to_computer hostname: {} IDS:yes COMMENT:port {}".format( + # networkpacket_section_connect_to_computer['@remote_hostname'], + # networkpacket_section_connect_to_computer['@remote_port']) + # ) + val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_port']) + yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''}) + elif networkpacket_section_connect_to_computer['@remote_ip']: + # print("networkpacket_section_connect_to_computer ip-dst: {} IDS:yes COMMENT:port {}".format( + # networkpacket_section_connect_to_computer['@remote_ip'], + # networkpacket_section_connect_to_computer['@remote_port']) + # ) + val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_ip'], networkpacket_section_connect_to_computer['@remote_port']) + yield({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''}) + + if 'registry_section' in process and 'create_key' in process['registry_section']: + # FIXME this is a complicated section, together with the 'set_value'. + # it looks like this section is not ONLY about creating registry keys, + # more about accessing a handle to keys (with specific permissions) + # maybe we don't want to keep this, in favor of 'set_value' + for create_key in process['registry_section']['create_key']: + # print('REG CREATE: {}\t{}'.format( + # create_key['@desired_access'], + # create_key['@key_name'])) + pass + if 'registry_section' in process and 'delete_key' in process['registry_section']: + # LATER we probably don't want to keep this. Much pollution. + # Maybe for later once we have filtered out this. + for delete_key in process['registry_section']['delete_key']: + # print('REG DELETE: {}'.format( + # delete_key['@key_name']) + # ) + pass + if 'registry_section' in process and 'set_value' in process['registry_section']: + # FIXME this is a complicated section, together with the 'create_key'. + for set_value in process['registry_section']['set_value']: + # '@data_type' == 'REG_BINARY', + # '@data_type' == 'REG_DWORD', + # '@data_type' == 'REG_EXPAND_SZ', + # '@data_type' == 'REG_MULTI_SZ', + # '@data_type' == 'REG_NONE', + # '@data_type' == 'REG_QWORD', + # '@data_type' == 'REG_SZ', + regkey = cleanup_regkey("{}\\{}".format(set_value['@key_name'], set_value['@value_name'])) + regdata = cleanup_regdata(set_value.get('@data')) + if not regkey: + continue + if set_value['@data_size'] == '0' or not regdata: + # print('registry_section set_value REG SET: {}\t{}\t{}'.format( + # set_value['@data_type'], + # set_value['@key_name'], + # set_value['@value_name']) + # ) + yield({'values': regkey, 'type': 'regkey', 'to_ids': True, + 'categories': ['External analysis', 'Persistence mechanism', 'Artifacts dropped'], 'comment': set_value['@data_type']}) + else: + try: + # unicode fun... + # print('registry_section set_value REG SET: {}\t{}\t{}\t{}'.format( + # set_value['@data_type'], + # set_value['@key_name'], + # set_value['@value_name'], + # set_value['@data']) + # ) + val = "{}|{}".format(regkey, regdata) + yield({'values': val, 'type': 'regkey|value', 'to_ids': True, + 'categories': ['External analysis', 'Persistence mechanism', 'Artifacts dropped'], 'comment': set_value['@data_type']}) + except Exception as e: + print("EXCEPTION registry_section {}".format(e)) + # TODO - maybe we want to handle these later, or not... + pass + pass + + if 'stored_files' in process and 'stored_created_file' in process['stored_files']: + for stored_created_file in process['stored_files']['stored_created_file']: + stored_created_file['@filename'] = cleanup_filepath(stored_created_file['@filename']) + if stored_created_file['@filename']: + if stored_created_file['@filesize'] is not '0': + val = '{}|{}'.format(stored_created_file['@filename'], stored_created_file['@md5']) + # print("stored_created_file filename|md5: {}|{} IDS:yes".format( + # stored_created_file['@filename'], # filename + # stored_created_file['@md5']) # md5 + # ) # => filename|md5 + yield({'values': val, 'type': 'filename|md5', 'to_ids': True, + 'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''}) + + else: + # print("stored_created_file filename: {} IDS:yes".format( + # stored_created_file['@filename']) # filename + # ) # => filename + yield({'values': stored_created_file['@filename'], + 'type': 'filename', 'to_ids': True, + 'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''}) + + if 'stored_files' in process and 'stored_modified_file' in process['stored_files']: + for stored_modified_file in process['stored_files']['stored_modified_file']: + stored_modified_file['@filename'] = cleanup_filepath(stored_modified_file['@filename']) + if stored_modified_file['@filename']: + if stored_modified_file['@filesize'] is not '0': + val = '{}|{}'.format(stored_modified_file['@filename'], stored_modified_file['@md5']) + # print("stored_modified_file MODIFY FILE: {}\t{}".format( + # stored_modified_file['@filename'], # filename + # stored_modified_file['@md5']) # md5 + # ) # => filename|md5 + yield({'values': val, 'type': 'filename|md5', 'to_ids': True, + 'categories': ['Artifacts dropped', 'Payload delivery'], + 'comment': 'modified'}) + else: + # print("stored_modified_file MODIFY FILE: {}\t{}".format( + # stored_modified_file['@filename']) # filename + # ) # => filename + yield({'values': stored_modified_file['@filename'], 'type': 'filename', 'to_ids': True, + 'categories': ['Artifacts dropped', 'Payload delivery'], + 'comment': 'modified'}) + + +def add_file(filename, results, hash, index, filedata=None): + pass + # results.append({'values': filename, 'data': "{}|{}".format(filename, filedata.decode()), 'type': 'malware-sample', + # 'categories': ['Artifacts dropped', 'Payload delivery']}) + + +def add_file_zip(): + # if 'malware-sample' in request: + # sample_filename = request.get("malware-sample").split("|", 1)[0] + # data = base64.b64decode(data) + # fl = io.BytesIO(data) + # zf = zipfile.ZipFile(fl) + # sample_hashname = zf.namelist()[0] + # data = zf.read(sample_hashname, b"infected") + # zf.close() + pass + + +def print_json(data): + print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))) + + +def list_in_string(lst, data, regex=False): + for item in lst: + if regex: + if re.search(item, data, flags=re.IGNORECASE): + return True + else: + if item in data: + return True + + +def cleanup_ip(item): + # you should exclude private IP ranges via import regexes + noise_substrings = { + '224.0.0.', + '127.0.0.', + '8.8.8.8', + '8.8.4.4', + '0.0.0.0', + 'NONE' + } + if list_in_string(noise_substrings, item): + return None + try: + ipaddress.ip_address(item) + return item + except ValueError: + return None + + +def cleanup_hostname(item): + noise_substrings = { + 'wpad', + 'teredo.ipv6.microsoft.com', + 'WIN7SP1-x64-UNP' + } + # take away common known bad + if list_in_string(noise_substrings, item): + return None + # eliminate IP addresses + try: + ipaddress.ip_address(item) + except ValueError: + # this is not an IP, so continue + return item + return None + + +def cleanup_url(item): + if item in ['/']: + return None + return item + + +def cleanup_filepath(item): + noise_substrings = { + 'C:\\Windows\\Prefetch\\', + '\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\', + '\\AppData\\Roaming\\Microsoft\\Office\\Recent\\', + 'C:\\ProgramData\\Microsoft\\OfficeSoftwareProtectionPlatform\\Cache\\cache.dat', + '\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Content.', + '\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\High\\', + '\\AppData\\Local\\Microsoft\\Internet Explorer\\DOMStore\\', + '\\AppData\\LocalLow\\Microsoft\\Internet Explorer\\Services\\search_', + '\\AppData\\Local\\Microsoft\\Windows\\History\\History.', + '\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\', + '\\AppData\\LocalLow\\Microsoft\\CryptnetUrlCache\\', + '\\AppData\\Local\\Microsoft\\Windows\\Caches\\', + '\\AppData\\Local\\Microsoft\\Windows\WebCache\\', + '\\AppData\\Local\\Microsoft\\Windows\\Explorer\\thumbcache', + + '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\SharedDataEvents-journal', + '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\UserCache.bin', + + '\\AppData\\Roaming\\Macromedia\\Flash Player\\macromedia.com\\support\\flashplayer\\sys\\settings.sol', + '\\AppData\\Roaming\Adobe\\Flash Player\\NativeCache\\', + 'C:\\Windows\\AppCompat\\Programs\\', + 'C:\~' # caused by temp file created by MS Office when opening malicious doc/xls/... + } + if list_in_string(noise_substrings, item): + return None + return item + + +def cleanup_regkey(item): + noise_substrings = { + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\', + r'\\CurrentVersion\\Explorer\\RecentDocs\\', + r'\\CurrentVersion\\Explorer\\UserAssist\\', + r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith', + r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch', + r'\\Software\\Microsoft\\Office\\[0-9\.]+\\', + r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\', + r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\', + r'\\Usage\\SpellingAndGrammarFiles', + r'^HKLM\\Software\\Microsoft\\Tracing\\', + r'\\Software\\Classes\\CLSID\\', + r'\\Software\\Classes\\Local Settings\\MuiCache\\', + r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag', + r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\' + } + item = item.replace('\\REGISTRY\\MACHINE\\', 'HKLM\\') + item = item.replace('\\REGISTRY\\USER\\', 'HKCU\\') + if list_in_string(noise_substrings, item, regex=True): + return None + return item + + +def cleanup_regdata(item): + if not item: + return None + item = item.replace('(UNICODE_0x00000000)', '') + return item + + +def get_zipped_contents(filename, data, password=None): + with zipfile.ZipFile(io.BytesIO(data), 'r') as zf: + unzipped_files = [] + if password is not None: + password = str.encode(password) # Byte encoded password required + for zip_file_name in zf.namelist(): # Get all files in the zip file + # print(zip_file_name) + with zf.open(zip_file_name, mode='r', pwd=password) as fp: + file_data = fp.read() + unzipped_files.append({'values': zip_file_name, + 'data': file_data, + 'comment': 'Extracted from {0}'.format(filename)}) + # print("{} : {}".format(zip_file_name, len(file_data))) + return unzipped_files + + +def introspection(): + modulesetup = {} + try: + userConfig + modulesetup['userConfig'] = userConfig + except NameError: + pass + try: + inputSource + modulesetup['inputSource'] = inputSource + except NameError: + pass + return modulesetup + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo