misp-modules/misp_modules/modules/import_mod/threatanalyzer_import.py

'''
import
define mandatory

'''
import json
import base64
import re
import zipfile
import ipaddress
import io
import logging

misperrors = {'error': 'Error'}
userConfig = {}
inputSource = ['file']

moduleinfo = {
    'version': '0.10',
    'author': 'Christophe Vandeplas',
    'description': 'Module to import ThreatAnalyzer archive.zip / analysis.json files.',
    'module-type': ['import'],
    'name': 'ThreadAnalyzer Sandbox Import',
    'logo': '',
    'requirements': [],
    'features': 'The module imports MISP Attributes from a ThreatAnalyzer format file. This file can be either ZIP, or JSON format.\nThere is by the way no special feature for users to make the module work.',
    'references': ['https://www.threattrack.com/malware-analysis.aspx'],
    'input': 'ThreatAnalyzer format file',
    'output': 'MISP Event attributes',
}

moduleconfig = []
log = logging.getLogger('misp-modules')

# FIXME - many hardcoded filters should be migrated to import regexes. See also https://github.com/MISP/MISP/issues/2712
# DISCLAIMER - This module is to be considered as experimental and needs much fine-tuning.
# more can be done with what's in the ThreatAnalyzer archive.zip


def handler(q=False):
    if q is False:
        return False
    results = []
    zip_starts = 'PK'
    request = json.loads(q)
    data = base64.b64decode(request['data'])

    if data[:len(zip_starts)].decode() == zip_starts:
        with zipfile.ZipFile(io.BytesIO(data), 'r') as zf:
            # unzipped_files = []
            modified_files_mapping = {}
            # pre-process some of the files in the zip
            for zip_file_name in zf.namelist():  # Get all files in the zip file
                # find the filenames of the modified_files
                if re.match(r"Analysis/proc_\d+/modified_files/mapping\.log", zip_file_name):
                    with zf.open(zip_file_name, mode='r', pwd=None) as fp:
                        file_data = fp.read()
                        for line in file_data.decode("utf-8", 'ignore').split('\n'):
                            if not line:
                                continue
                            if line.count('|') == 3:
                                l_fname, l_size, l_md5, l_created = line.split('|')
                            if line.count('|') == 4:
                                l_fname, l_size, l_md5, l_sha256, l_created = line.split('|')
                            l_fname = cleanup_filepath(l_fname)
                            if l_fname:
                                if l_size == 0:
                                    results.append({'values': l_fname, 'type': 'filename', 'to_ids': True,
                                                    'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''})
                                else:
                                    # file is a non empty sample, upload the sample later
                                    modified_files_mapping[l_md5] = l_fname

            # now really process the data
            for zip_file_name in zf.namelist():  # Get all files in the zip file
                # print('Processing file: {}'.format(zip_file_name))
                if re.match(r"Analysis/proc_\d+/modified_files/.+\.", zip_file_name) and "mapping.log" not in zip_file_name:
                    sample_md5 = zip_file_name.split('/')[-1].split('.')[0]
                    if sample_md5 in modified_files_mapping:
                        current_sample_filename = modified_files_mapping[sample_md5]
                        # print("{} maps to {}".format(sample_md5, current_sample_filename))
                        with zf.open(zip_file_name, mode='r', pwd=None) as fp:
                            file_data = fp.read()
                            results.append({
                                'values': current_sample_filename,
                                'data': base64.b64encode(file_data).decode(),
                                'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''})

                if 'Analysis/analysis.json' in zip_file_name:
                    with zf.open(zip_file_name, mode='r', pwd=None) as fp:
                        file_data = fp.read()
                        analysis_json = json.loads(file_data.decode('utf-8'))
                    results += process_analysis_json(analysis_json)
            try:
                sample_filename = analysis_json.get('analysis').get('@filename')
                if sample_filename:
                    with zf.open('sample', mode='r', pwd=None) as fp:
                        file_data = fp.read()
                        results.append({
                            'values': sample_filename,
                            'data': base64.b64encode(file_data).decode(),
                            'type': 'malware-sample', 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': True, 'comment': ''})
            except Exception:
                # no 'sample' in archive, might be an url analysis, just ignore
                pass

    else:
        try:
            results = process_analysis_json(json.loads(data.decode('utf-8')))
        except ValueError:
            log.warning('MISP modules {0} failed: uploaded file is not a zip or json file.'.format(request['module']))
            return {'error': 'Uploaded file is not a zip or json file.'}
            pass
    # keep only unique entries based on the value field
    results = list({v['values']: v for v in results}.values())
    r = {'results': results}
    return r


def process_analysis_json(analysis_json):
    if 'analysis' in analysis_json and 'processes' in analysis_json['analysis'] and 'process' in analysis_json['analysis']['processes']:
        # if 'analysis' in analysis_json and '@filename' in analysis_json['analysis']:
        #     sample['values'] = analysis_json['analysis']['@filename']
        for process in analysis_json['analysis']['processes']['process']:
            # print_json(process)
            if 'connection_section' in process and 'connection' in process['connection_section']:
                # compensate for absurd behavior of the data format: if one entry = immediately the dict, if multiple entries = list containing dicts
                # this will always create a list, even with only one item
                if isinstance(process['connection_section']['connection'], dict):
                    process['connection_section']['connection'] = [process['connection_section']['connection']]

                # iterate over each entry
                for connection_section_connection in process['connection_section']['connection']:
                    # compensate for absurd behavior of the data format: if one entry = immediately the dict, if multiple entries = list containing dicts
                    # this will always create a list, even with only one item
                    for subsection in ['http_command', 'http_header']:
                        if isinstance(connection_section_connection[subsection], dict):
                            connection_section_connection[subsection] = [connection_section_connection[subsection]]

                    if 'name_to_ip' in connection_section_connection:  # TA 6.1 data format
                        connection_section_connection['@remote_ip'] = connection_section_connection['name_to_ip']['@result_addresses']
                        connection_section_connection['@remote_hostname'] = connection_section_connection['name_to_ip']['@request_name']

                    connection_section_connection['@remote_ip'] = cleanup_ip(connection_section_connection['@remote_ip'])
                    connection_section_connection['@remote_hostname'] = cleanup_hostname(connection_section_connection['@remote_hostname'])
                    if connection_section_connection['@remote_ip'] and connection_section_connection['@remote_hostname']:
                        val = '{}|{}'.format(connection_section_connection['@remote_hostname'],
                                             connection_section_connection['@remote_ip'])
                        # print("connection_section_connection hostname|ip: {}|{}  IDS:yes".format(
                        #     connection_section_connection['@remote_hostname'],
                        #     connection_section_connection['@remote_ip'])
                        # )
                        yield({'values': val, 'type': 'domain|ip', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                    elif connection_section_connection['@remote_ip']:
                        # print("connection_section_connection ip-dst: {}  IDS:yes".format(
                        #     connection_section_connection['@remote_ip'])
                        # )
                        yield({'values': connection_section_connection['@remote_ip'], 'type': 'ip-dst', 'to_ids': True, 'comment': ''})
                    elif connection_section_connection['@remote_hostname']:
                        # print("connection_section_connection hostname: {}  IDS:yes".format(
                        #     connection_section_connection['@remote_hostname'])
                        # )
                        yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''})
                    if 'http_command' in connection_section_connection:
                        for http_command in connection_section_connection['http_command']:
                            # print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
                            #     connection_section_connection['http_command']['@method'],                    # comment
                            #     connection_section_connection['http_command']['@url'])                       # url
                            # )
                            val = cleanup_url(http_command['@url'])
                            if val:
                                yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': http_command['@method']})

                    if 'http_header' in connection_section_connection:
                        for http_header in connection_section_connection['http_header']:
                            if 'User-Agent:' in http_header['@header']:
                                val = http_header['@header'][len('User-Agent: '):]
                                yield({'values': val, 'type': 'user-agent', 'categories': ['Network activity'], 'to_ids': False, 'comment': ''})
                            elif 'Host:' in http_header['@header']:
                                val = http_header['@header'][len('Host: '):]
                                if ':' in val:
                                    try:
                                        val_port = int(val.split(':')[1])
                                    except ValueError:
                                        val_port = False
                                    val_hostname = cleanup_hostname(val.split(':')[0])
                                    val_ip = cleanup_ip(val.split(':')[0])
                                    if val_hostname and val_port:
                                        val_combined = '{}|{}'.format(val_hostname, val_port)
                                        # print({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
                                        yield({'values': val_combined, 'type': 'hostname|port', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                                    elif val_ip and val_port:
                                        val_combined = '{}|{}'.format(val_ip, val_port)
                                        # print({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
                                        yield({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
                                    else:
                                        continue
                                val_hostname = cleanup_hostname(val)
                                if val_hostname:
                                    # print({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''})
                                    yield({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''})
                            else:
                                # LATER header not processed
                                pass
            if 'filesystem_section' in process and 'create_file' in process['filesystem_section']:
                for filesystem_section_create_file in process['filesystem_section']['create_file']:
                    # first skip some items
                    if filesystem_section_create_file['@create_disposition'] in {'FILE_OPEN_IF'}:
                        continue
                        # FIXME - this section is probably not needed considering the 'stored_files stored_created_file' section we process later.
                        # print('CREATE FILE: {}\t{}'.format(
                        #     filesystem_section_create_file['@srcfile'],             # filename
                        #     filesystem_section_create_file['@create_disposition'])  # comment - use this to filter out cases
                        # )

            if 'networkoperation_section' in process and 'dns_request_by_addr' in process['networkoperation_section']:
                for networkoperation_section_dns_request_by_addr in process['networkoperation_section']['dns_request_by_addr']:
                    # FIXME - it's unclear what this section is for.
                    # TODO filter this
                    # print('DNS REQUEST: {}\t{}'.format(
                    #     networkoperation_section_dns_request_by_addr['@request_address'],       # ip-dst
                    #     networkoperation_section_dns_request_by_addr['@result_name'])           # hostname
                    # )                                                                           # => NOT hostname|ip
                    pass
            if 'networkoperation_section' in process and 'dns_request_by_name' in process['networkoperation_section']:
                for networkoperation_section_dns_request_by_name in process['networkoperation_section']['dns_request_by_name']:
                    networkoperation_section_dns_request_by_name['@request_name'] = cleanup_hostname(networkoperation_section_dns_request_by_name['@request_name'].rstrip('.'))
                    networkoperation_section_dns_request_by_name['@result_addresses'] = cleanup_ip(networkoperation_section_dns_request_by_name['@result_addresses'])
                    if networkoperation_section_dns_request_by_name['@request_name'] and networkoperation_section_dns_request_by_name['@result_addresses']:
                        val = '{}|{}'.format(networkoperation_section_dns_request_by_name['@request_name'],
                                             networkoperation_section_dns_request_by_name['@result_addresses'])
                        # print("networkoperation_section_dns_request_by_name hostname|ip: {}|{}  IDS:yes".format(
                        #     networkoperation_section_dns_request_by_name['@request_name'],
                        #     networkoperation_section_dns_request_by_name['@result_addresses'])
                        # )
                        yield({'values': val, 'type': 'domain|ip', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                    elif networkoperation_section_dns_request_by_name['@request_name']:
                        # print("networkoperation_section_dns_request_by_name hostname: {}  IDS:yes".format(
                        #     networkoperation_section_dns_request_by_name['@request_name'])
                        # )
                        yield({'values': networkoperation_section_dns_request_by_name['@request_name'], 'type': 'hostname', 'to_ids': True, 'comment': ''})
                    elif networkoperation_section_dns_request_by_name['@result_addresses']:
                        # this happens when the IP is both in the request_name and result_address.
                        # print("networkoperation_section_dns_request_by_name hostname: {}  IDS:yes".format(
                        #     networkoperation_section_dns_request_by_name['@result_addresses'])
                        # )
                        yield({'values': networkoperation_section_dns_request_by_name['@result_addresses'], 'type': 'ip-dst', 'to_ids': True, 'comment': ''})

            if 'networkpacket_section' in process and 'connect_to_computer' in process['networkpacket_section']:
                for networkpacket_section_connect_to_computer in process['networkpacket_section']['connect_to_computer']:
                    networkpacket_section_connect_to_computer['@remote_hostname'] = cleanup_hostname(networkpacket_section_connect_to_computer['@remote_hostname'])
                    networkpacket_section_connect_to_computer['@remote_ip'] = cleanup_ip(networkpacket_section_connect_to_computer['@remote_ip'])
                    if networkpacket_section_connect_to_computer['@remote_hostname'] and networkpacket_section_connect_to_computer['@remote_ip']:
                        # print("networkpacket_section_connect_to_computer hostname|ip: {}|{}  IDS:yes COMMENT:port {}".format(
                        #     networkpacket_section_connect_to_computer['@remote_hostname'],
                        #     networkpacket_section_connect_to_computer['@remote_ip'],
                        #     networkpacket_section_connect_to_computer['@remote_port'])
                        # )
                        val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_ip'])
                        yield({'values': val_combined, 'type': 'domain|ip', 'to_ids': True, 'comment': ''})
                    elif networkpacket_section_connect_to_computer['@remote_hostname']:
                        # print("networkpacket_section_connect_to_computer hostname: {}  IDS:yes COMMENT:port {}".format(
                        #     networkpacket_section_connect_to_computer['@remote_hostname'],
                        #     networkpacket_section_connect_to_computer['@remote_port'])
                        # )
                        val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_port'])
                        yield({'values': val_combined, 'type': 'hostname|port', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                    elif networkpacket_section_connect_to_computer['@remote_ip']:
                        # print("networkpacket_section_connect_to_computer ip-dst: {}  IDS:yes COMMENT:port {}".format(
                        #     networkpacket_section_connect_to_computer['@remote_ip'],
                        #     networkpacket_section_connect_to_computer['@remote_port'])
                        # )
                        val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_ip'], networkpacket_section_connect_to_computer['@remote_port'])
                        yield({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})

            if 'registry_section' in process and 'create_key' in process['registry_section']:
                # FIXME this is a complicated section, together with the 'set_value'.
                # it looks like this section is not ONLY about creating registry keys,
                # more about accessing a handle to keys (with specific permissions)
                # maybe we don't want to keep this, in favor of 'set_value'
                for create_key in process['registry_section']['create_key']:
                    # print('REG CREATE: {}\t{}'.format(
                    #     create_key['@desired_access'],
                    #     create_key['@key_name']))
                    pass
            if 'registry_section' in process and 'delete_key' in process['registry_section']:
                # LATER we probably don't want to keep this. Much pollution.
                # Maybe for later once we have filtered out this.
                for delete_key in process['registry_section']['delete_key']:
                    # print('REG DELETE: {}'.format(
                    #     delete_key['@key_name'])
                    # )
                    pass
            if 'registry_section' in process and 'set_value' in process['registry_section']:
                # FIXME this is a complicated section, together with the 'create_key'.
                for set_value in process['registry_section']['set_value']:
                    # '@data_type' == 'REG_BINARY',
                    # '@data_type' == 'REG_DWORD',
                    # '@data_type' == 'REG_EXPAND_SZ',
                    # '@data_type' == 'REG_MULTI_SZ',
                    # '@data_type' == 'REG_NONE',
                    # '@data_type' == 'REG_QWORD',
                    # '@data_type' == 'REG_SZ',
                    regkey = cleanup_regkey("{}\\{}".format(set_value['@key_name'], set_value['@value_name']))
                    regdata = cleanup_regdata(set_value.get('@data'))
                    if not regkey:
                        continue
                    if set_value['@data_size'] == '0' or not regdata:
                        # print('registry_section set_value REG SET: {}\t{}\t{}'.format(
                        #     set_value['@data_type'],
                        #     set_value['@key_name'],
                        #     set_value['@value_name'])
                        # )
                        yield({'values': regkey, 'type': 'regkey', 'to_ids': True,
                               'categories': ['External analysis', 'Persistence mechanism', 'Artifacts dropped'], 'comment': set_value['@data_type']})
                    else:
                        try:
                            # unicode fun...
                            # print('registry_section set_value REG SET: {}\t{}\t{}\t{}'.format(
                            #     set_value['@data_type'],
                            #     set_value['@key_name'],
                            #     set_value['@value_name'],
                            #     set_value['@data'])
                            # )
                            val = "{}|{}".format(regkey, regdata)
                            yield({'values': val, 'type': 'regkey|value', 'to_ids': True,
                                   'categories': ['External analysis', 'Persistence mechanism', 'Artifacts dropped'], 'comment': set_value['@data_type']})
                        except Exception as e:
                            print("EXCEPTION registry_section {}".format(e))
                            # TODO - maybe we want to handle these later, or not...
                        pass
                    pass

            if 'stored_files' in process and 'stored_created_file' in process['stored_files']:
                for stored_created_file in process['stored_files']['stored_created_file']:
                    stored_created_file['@filename'] = cleanup_filepath(stored_created_file['@filename'])
                    if stored_created_file['@filename']:
                        if stored_created_file['@filesize'] != '0':
                            val = '{}|{}'.format(stored_created_file['@filename'], stored_created_file['@md5'])
                            # print("stored_created_file filename|md5: {}|{}  IDS:yes".format(
                            #     stored_created_file['@filename'],                       # filename
                            #     stored_created_file['@md5'])                            # md5
                            # )                                                           # => filename|md5
                            yield({'values': val, 'type': 'filename|md5', 'to_ids': True,
                                   'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''})

                        else:
                            # print("stored_created_file filename: {}  IDS:yes".format(
                            #     stored_created_file['@filename'])                        # filename
                            # )                                                           # => filename
                            yield({'values': stored_created_file['@filename'],
                                   'type': 'filename', 'to_ids': True,
                                   'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''})

            if 'stored_files' in process and 'stored_modified_file' in process['stored_files']:
                for stored_modified_file in process['stored_files']['stored_modified_file']:
                    stored_modified_file['@filename'] = cleanup_filepath(stored_modified_file['@filename'])
                    if stored_modified_file['@filename']:
                        if stored_modified_file['@filesize'] != '0':
                            val = '{}|{}'.format(stored_modified_file['@filename'], stored_modified_file['@md5'])
                            # print("stored_modified_file MODIFY FILE: {}\t{}".format(
                            #     stored_modified_file['@filename'],                       # filename
                            #     stored_modified_file['@md5'])                            # md5
                            # )                                                            # => filename|md5
                            yield({'values': val, 'type': 'filename|md5', 'to_ids': True,
                                   'categories': ['Artifacts dropped', 'Payload delivery'],
                                   'comment': 'modified'})
                        else:
                            # print("stored_modified_file MODIFY FILE: {}\t{}".format(
                            #     stored_modified_file['@filename'])                       # filename
                            # )                                                            # => filename
                            yield({'values': stored_modified_file['@filename'], 'type': 'filename', 'to_ids': True,
                                   'categories': ['Artifacts dropped', 'Payload delivery'],
                                   'comment': 'modified'})


def add_file(filename, results, hash, index, filedata=None):
    pass
    # results.append({'values': filename, 'data': "{}|{}".format(filename, filedata.decode()), 'type': 'malware-sample',
    #                 'categories': ['Artifacts dropped', 'Payload delivery']})


def add_file_zip():
    # if 'malware-sample' in request:
    # sample_filename = request.get("malware-sample").split("|", 1)[0]
    #            data = base64.b64decode(data)
    #            fl = io.BytesIO(data)
    #            zf = zipfile.ZipFile(fl)
    #            sample_hashname = zf.namelist()[0]
    #            data = zf.read(sample_hashname, b"infected")
    #            zf.close()
    pass


def print_json(data):
    print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')))


def list_in_string(lst, data, regex=False):
    for item in lst:
        if regex:
            if re.search(item, data, flags=re.IGNORECASE):
                return True
        else:
            if item in data:
                return True


def cleanup_ip(item):
    # you should exclude private IP ranges via import regexes
    noise_substrings = {
        '224.0.0.',
        '127.0.0.',
        '8.8.8.8',
        '8.8.4.4',
        '0.0.0.0',
        'NONE'
    }
    if list_in_string(noise_substrings, item):
        return None
    try:
        ipaddress.ip_address(item)
        return item
    except ValueError:
        return None


def cleanup_hostname(item):
    noise_substrings = {
        'wpad',
        'teredo.ipv6.microsoft.com',
        'WIN7SP1-x64-UNP'
    }
    # take away common known bad
    if list_in_string(noise_substrings, item):
        return None
    # eliminate IP addresses
    try:
        ipaddress.ip_address(item)
    except ValueError:
        # this is not an IP, so continue
        return item
    return None


def cleanup_url(item):
    if item in ['/']:
        return None
    return item


def cleanup_filepath(item):
    noise_substrings = {
        '\\AppData\\Local\\GDIPFONTCACHEV1.DAT',
        '\\AppData\\Local\\Microsoft\\Internet Explorer\\DOMStore\\',
        '\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\High\\',
        '\\AppData\\Local\\Microsoft\\Windows\\Caches\\',
        '\\AppData\\Local\\Microsoft\\Windows\\Explorer\\thumbcache',
        '\\AppData\\Local\\Microsoft\\Windows\\History\\History.',
        '\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Content.',
        '\\AppData\\Local\\Microsoft\\Windows\\WebCache\\',
        '\\AppData\\Local\\Temp\\.*tmp$',
        '\\AppData\\LocalLow\\Microsoft\\CryptnetUrlCache\\',
        '\\AppData\\LocalLow\\Microsoft\\Internet Explorer\\Services\\search_',
        '\\AppData\\Roaming\\Microsoft\\Office\\Recent\\',
        '\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\',
        '\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\',
        'C:\\ProgramData\\Microsoft\\OfficeSoftwareProtectionPlatform\\Cache\\cache.dat',
        'C:\\Windows\\Prefetch\\',

        '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\SharedDataEvents-journal',
        '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\UserCache.bin',

        '\\AppData\\Roaming\\Macromedia\\Flash Player\\macromedia.com\\support\\flashplayer\\sys\\settings.sol',
        '\\AppData\\Roaming\\Adobe\\Flash Player\\NativeCache\\',
        'C:\\Windows\\AppCompat\\Programs\\',
        'C:\\~'  # caused by temp file created by MS Office when opening malicious doc/xls/...
    }
    if list_in_string(noise_substrings, item):
        return None
    return item


def cleanup_regkey(item):
    noise_substrings = {
        r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith',
        r'\\CurrentVersion\\Explorer\\RecentDocs\\',
        r'\\CurrentVersion\\Explorer\\UserAssist\\',
        r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag',
        r'\\Software\\Classes\\CLSID\\',
        r'\\Software\\Classes\\Local Settings\\MuiCache\\',
        r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch',
        r'\\Software\\Microsoft\\Office\\[0-9\.]+\\',
        r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\',
        r'\\Software\\Microsoft\\OfficeSoftwareProtectionPlatform\\',
        r'\\Software\\Microsoft\\Shared Tools\\Panose\\',
        r'\\Software\\Microsoft\\Tracing\\',
        r'\\Software\\Microsoft\\Tracing\\powershell_RASAPI32\\',
        r'\\Software\\Microsoft\\Tracing\\powershell_RASMANCS\\',
        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Action Center\\',
        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\',
        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\',
        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\',
        r'\\System\\CurrentControlSet\\Services\\RdyBoost\\',
        r'\\Usage\\SpellingAndGrammarFiles'
    }
    if list_in_string(noise_substrings, item, regex=True):
        return None
    return item


def cleanup_regdata(item):
    if not item:
        return None
    item = item.replace('(UNICODE_0x00000000)', '')
    return item


def get_zipped_contents(filename, data, password=None):
    with zipfile.ZipFile(io.BytesIO(data), 'r') as zf:
        unzipped_files = []
        if password is not None:
            password = str.encode(password)  # Byte encoded password required
        for zip_file_name in zf.namelist():  # Get all files in the zip file
            # print(zip_file_name)
            with zf.open(zip_file_name, mode='r', pwd=password) as fp:
                file_data = fp.read()
            unzipped_files.append({'values': zip_file_name,
                                   'data': file_data,
                                   'comment': 'Extracted from {0}'.format(filename)})
            # print("{} : {}".format(zip_file_name, len(file_data)))
    return unzipped_files


def introspection():
    modulesetup = {}
    try:
        userConfig
        modulesetup['userConfig'] = userConfig
    except NameError:
        pass
    try:
        inputSource
        modulesetup['inputSource'] = inputSource
    except NameError:
        pass
    return modulesetup


def version():
    moduleinfo['config'] = moduleconfig
    return moduleinfo