From 46975f4f16e6ebfee4127809401131549d76d257 Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Tue, 16 Jan 2018 11:05:26 +0100
Subject: [PATCH] Added ThreatAnalyzer sandbox import

Experimental module - some parts should be migrated to
---
 README.md                                     |   9 +-
 misp_modules/modules/import_mod/__init__.py   |   2 +-
 .../import_mod/threatanalyzer_import.py       | 507 ++++++++++++++++++
 3 files changed, 513 insertions(+), 5 deletions(-)
 create mode 100755 misp_modules/modules/import_mod/threatanalyzer_import.py

diff --git a/README.md b/README.md
index 6dd06ce..339c56f 100644
--- a/README.md
+++ b/README.md
@@ -18,8 +18,9 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/
 ### Expansion modules
 
 * [ASN History](misp_modules/modules/expansion/asn_history.py) - a hover and expansion module to expand an AS number with the ASN description and its history.
-* [CIRCL Passive SSL](misp_modules/modules/expansion/circl_passivessl.py) - a hover and expansion module to expand IP addresses with the X.509 certificate seen.
 * [CIRCL Passive DNS](misp_modules/modules/expansion/circl_passivedns.py) - a hover and expansion module to expand hostname and IP addresses with passive DNS information.
+* [CIRCL Passive SSL](misp_modules/modules/expansion/circl_passivessl.py) - a hover and expansion module to expand IP addresses with the X.509 certificate seen.
+* [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to.
 * [CVE](misp_modules/modules/expansion/cve.py) - a hover module to give more information about a vulnerability (CVE).
 * [DNS](misp_modules/modules/expansion/dns.py) - a simple module to resolve MISP attributes like hostname and domain to expand IP addresses attributes.
 * [DomainTools](misp_modules/modules/expansion/domaintools.py) - a hover and expansion module to get information from [DomainTools](http://www.domaintools.com/) whois.
@@ -28,13 +29,12 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/
 * [GeoIP](misp_modules/modules/expansion/geoip_country.py) - a hover and expansion module to get GeoIP information from geolite/maxmind.
 * [IPASN](misp_modules/modules/expansion/ipasn.py) - a hover and expansion to get the BGP ASN of an IP address.
 * [iprep](misp-modules/modules/expansion/iprep.py) - an expansion module to get IP reputation from packetmail.net.
+* [OTX](misp_modules/modules/expansion/otx.py) - an expansion module for [OTX](https://otx.alienvault.com/).
 * [passivetotal](misp_modules/modules/expansion/passivetotal.py) - a [passivetotal](https://www.passivetotal.org/) module that queries a number of different PassiveTotal datasets.
 * [shodan](misp_modules/modules/expansion/shodan.py) - a minimal [shodan](https://www.shodan.io/) expansion module.
 * [sourcecache](misp_modules/modules/expansion/sourcecache.py) - a module to cache a specific link from a MISP instance.
 * [ThreatCrowd](misp_modules/modules/expansion/threatcrowd.py) - an expansion module for [ThreatCrowd](https://www.threatcrowd.org/).
-* [OTX](misp_modules/modules/expansion/otx.py) - an expansion module for [OTX](https://otx.alienvault.com/).
 * [threatminer](misp_modules/modules/expansion/threatminer.py) - an expansion module to expand from [ThreatMiner](https://www.threatminer.org/).
-* [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to.
 * [virustotal](misp_modules/modules/expansion/virustotal.py) - an expansion module to pull known resolutions and malware samples related with an IP/Domain from virusTotal (this modules require a VirusTotal private API key)
 * [wikidata](misp_modules/modules/expansion/wiki.py) - a [wikidata](https://www.wikidata.org) expansion module.
 * [xforce](misp_modules/modules/expansion/xforceexchange.py) - an IBM X-Force Exchange expansion module.
@@ -50,10 +50,11 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/
 ### Import modules
 
 * [Cuckoo JSON](misp_modules/modules/import_mod/cuckooimport.py) Cuckoo JSON import.
+* [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata.
 * [OCR](misp_modules/modules/import_mod/ocr.py) Optical Character Recognition (OCR) module for MISP to import attributes from images, scan or faxes.
 * [OpenIOC](misp_modules/modules/import_mod/openiocimport.py) OpenIOC import based on PyMISP library.
 * [stiximport](misp_modules/modules/import_mod/stiximport.py) - An import module to process STIX xml/json.
-* [Email Import](misp_modules/modules/import_mod/email_import.py) Email import module for MISP to import basic metadata.
+* [ThreatAnalyzer](misp_modules/modules/import_mod/threatanalyzer_import.py) - An import module to process ThreatAnalyzer archive.zip/analysis.json sandbox exports.
 * [VMRay](misp_modules/modules/import_mod/vmray_import.py) - An import module to process VMRay export.
 
 ## How to install and start MISP modules?
diff --git a/misp_modules/modules/import_mod/__init__.py b/misp_modules/modules/import_mod/__init__.py
index 6beeaa2..e2941d7 100644
--- a/misp_modules/modules/import_mod/__init__.py
+++ b/misp_modules/modules/import_mod/__init__.py
@@ -1,4 +1,4 @@
 from . import _vmray
 
 __all__ = ['vmray_import', 'testimport', 'ocr', 'stiximport', 'cuckooimport',
-           'email_import', 'mispjson', 'openiocimport']
+           'email_import', 'mispjson', 'openiocimport', 'threatanalyzer_import']
diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py
new file mode 100755
index 0000000..fded508
--- /dev/null
+++ b/misp_modules/modules/import_mod/threatanalyzer_import.py
@@ -0,0 +1,507 @@
+'''
+import
+define mandatory
+
+'''
+import json
+import base64
+import re
+import zipfile
+import ipaddress
+import io
+import logging
+
+misperrors = {'error': 'Error'}
+userConfig = {}
+inputSource = ['file']
+
+moduleinfo = {'version': '0.6', 'author': 'Christophe Vandeplas',
+              'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files',
+              'module-type': ['import']}
+
+moduleconfig = []
+log = logging.getLogger('misp-modules')
+
+# FIXME - many hardcoded filters should be migrated to import regexes. See also https://github.com/MISP/MISP/issues/2712
+# DISCLAIMER - This module is to be considered as experimental and needs much fine-tuning.
+# more can be done with what's in the ThreatAnalyzer archive.zip
+
+
+def handler(q=False):
+    if q is False:
+        return False
+    results = []
+    zip_starts = 'PK'
+    request = json.loads(q)
+    data = base64.b64decode(request['data'])
+
+    if data[:len(zip_starts)].decode() == zip_starts:
+        with zipfile.ZipFile(io.BytesIO(data), 'r') as zf:
+            # unzipped_files = []
+            modified_files_mapping = {}
+            # pre-process some of the files in the zip
+            for zip_file_name in zf.namelist():  # Get all files in the zip file
+                # find the filenames of the modified_files
+                if re.match(r"Analysis/proc_\d+/modified_files/mapping\.log", zip_file_name):
+                    with zf.open(zip_file_name, mode='r', pwd=None) as fp:
+                        file_data = fp.read()
+                        for line in file_data.decode().split('\n'):
+                            if line:
+                                l_fname, l_size, l_md5, l_created = line.split('|')
+                                l_fname = cleanup_filepath(l_fname)
+                                if l_fname:
+                                    if l_size == 0:
+                                        pass  # FIXME create an attribute for the filename/path
+                                    else:
+                                        # file is a non empty sample, upload the sample later
+                                        modified_files_mapping[l_md5] = l_fname
+
+            # now really process the data
+            for zip_file_name in zf.namelist():  # Get all files in the zip file
+                # print('Processing file: {}'.format(zip_file_name))
+                if re.match(r"Analysis/proc_\d+/modified_files/.+\.", zip_file_name) and "mapping.log" not in zip_file_name:
+                    sample_md5 = zip_file_name.split('/')[-1].split('.')[0]
+                    if sample_md5 in modified_files_mapping:
+                        sample_filename = modified_files_mapping[sample_md5]
+                        # print("{} maps to {}".format(sample_md5, sample_filename))
+                        with zf.open(zip_file_name, mode='r', pwd=None) as fp:
+                            file_data = fp.read()
+                            results.append({
+                                'values': sample_filename,
+                                'data': base64.b64encode(file_data).decode(),
+                                'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''})
+
+                if 'Analysis/analysis.json' in zip_file_name:
+                    with zf.open(zip_file_name, mode='r', pwd=None) as fp:
+                        file_data = fp.read()
+                        analysis_json = json.loads(file_data.decode('utf-8'))
+                    results += process_analysis_json(analysis_json)
+                # if 'sample' in zip_file_name:
+                #     sample['data'] = base64.b64encode(file_data).decode()
+
+    else:
+        try:
+            results = process_analysis_json(json.loads(data.decode('utf-8')))
+        except ValueError:
+            log.warning('MISP modules {0} failed: uploaded file is not a zip or json file.'.format(request['module']))
+            return {'error': 'Uploaded file is not a zip or json file.'.format(request['module'])}
+            pass
+    # keep only unique entries based on the value field
+    results = list({v['values']: v for v in results}.values())
+    r = {'results': results}
+    return r
+
+
+def process_analysis_json(analysis_json):
+    if 'analysis' in analysis_json and 'processes' in analysis_json['analysis'] and 'process' in analysis_json['analysis']['processes']:
+        # if 'analysis' in analysis_json and '@filename' in analysis_json['analysis']:
+        #     sample['values'] = analysis_json['analysis']['@filename']
+        for process in analysis_json['analysis']['processes']['process']:
+            # print_json(process)
+            if 'connection_section' in process and 'connection' in process['connection_section']:
+                for connection_section_connection in process['connection_section']['connection']:
+
+                    connection_section_connection['@remote_ip'] = cleanup_ip(connection_section_connection['@remote_ip'])
+                    connection_section_connection['@remote_hostname'] = cleanup_hostname(connection_section_connection['@remote_hostname'])
+                    if connection_section_connection['@remote_ip'] and connection_section_connection['@remote_hostname']:
+                        val = '{}|{}'.format(connection_section_connection['@remote_hostname'],
+                                             connection_section_connection['@remote_ip'])
+                        # print("connection_section_connection hostname|ip: {}|{}  IDS:yes".format(
+                        #     connection_section_connection['@remote_hostname'],
+                        #     connection_section_connection['@remote_ip'])
+                        # )
+                        yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''})
+                    elif connection_section_connection['@remote_ip']:
+                        # print("connection_section_connection ip-dst: {}  IDS:yes".format(
+                        #     connection_section_connection['@remote_ip'])
+                        # )
+                        yield({'values': connection_section_connection['@remote_ip'], 'type': 'ip-dst', 'to_ids': True, 'comment': ''})
+                    elif connection_section_connection['@remote_hostname']:
+                        # print("connection_section_connection hostname: {}  IDS:yes".format(
+                        #     connection_section_connection['@remote_hostname'])
+                        # )
+                        yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''})
+                    if 'http_command' in connection_section_connection:
+                        for http_command in connection_section_connection['http_command']:
+                            # print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
+                            #     http_command['@method'],                    # comment
+                            #     http_command['@url'])                       # url
+                            # )
+                            val = cleanup_url(http_command['@url'])
+                            if val:
+                                yield({'values': val, 'type': 'url', 'categories': 'Network activity', 'to_ids': True, 'comment': http_command['@method']})
+
+                    if 'http_header' in connection_section_connection:
+                        for http_header in connection_section_connection['http_header']:
+                            if 'User-Agent:' in http_header['@header']:
+                                val = http_header['@header'][len('User-Agent: '):]
+                                yield({'values': val, 'type': 'user-agent', 'categories': 'Network activity', 'to_ids': False, 'comment': ''})
+                            elif 'Host:' in http_header['@header']:
+                                val = http_header['@header'][len('Host: '):]
+                                if ':' in val:
+                                    try:
+                                        val_port = int(val.split(':')[1])
+                                    except ValueError as e:
+                                        val_port = False
+                                    val_hostname = cleanup_hostname(val.split(':')[0])
+                                    val_ip = cleanup_ip(val.split(':')[0])
+                                    if val_hostname and val_port:
+                                        val_combined = '{}|{}'.format(val_hostname, val_port)
+                                        # print({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
+                                        yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
+                                    elif val_ip and val_port:
+                                        val_combined = '{}|{}'.format(val_ip, val_port)
+                                        # print({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
+                                        yield({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
+                                    else:
+                                        continue
+                                val_hostname = cleanup_hostname(val)
+                                if val_hostname:
+                                    # print({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''})
+                                    yield({'values': val_hostname, 'type': 'hostname', 'to_ids': True, 'comment': ''})
+                            else:
+                                # LATER header not processed
+                                pass
+            if 'filesystem_section' in process and 'create_file' in process['filesystem_section']:
+                for filesystem_section_create_file in process['filesystem_section']['create_file']:
+                    # first skip some items
+                    if filesystem_section_create_file['@create_disposition'] in {'FILE_OPEN_IF'}:
+                        continue
+                        # FIXME - this section is probably not needed considering the 'stored_files stored_created_file' section we process later.
+                        # print('CREATE FILE: {}\t{}'.format(
+                        #     filesystem_section_create_file['@srcfile'],             # filename
+                        #     filesystem_section_create_file['@create_disposition'])  # comment - use this to filter out cases
+                        # )
+
+            if 'networkoperation_section' in process and 'dns_request_by_addr' in process['networkoperation_section']:
+                for networkoperation_section_dns_request_by_addr in process['networkoperation_section']['dns_request_by_addr']:
+                    # FIXME - it's unclear what this section is for.
+                    # TODO filter this
+                    # print('DNS REQUEST: {}\t{}'.format(
+                    #     networkoperation_section_dns_request_by_addr['@request_address'],       # ip-dst
+                    #     networkoperation_section_dns_request_by_addr['@result_name'])           # hostname
+                    # )                                                                           # => NOT hostname|ip
+                    pass
+            if 'networkoperation_section' in process and 'dns_request_by_name' in process['networkoperation_section']:
+                for networkoperation_section_dns_request_by_name in process['networkoperation_section']['dns_request_by_name']:
+                    networkoperation_section_dns_request_by_name['@request_name'] = cleanup_hostname(networkoperation_section_dns_request_by_name['@request_name'].rstrip('.'))
+                    networkoperation_section_dns_request_by_name['@result_addresses'] = cleanup_ip(networkoperation_section_dns_request_by_name['@result_addresses'])
+                    if networkoperation_section_dns_request_by_name['@request_name'] and networkoperation_section_dns_request_by_name['@result_addresses']:
+                        val = '{}|{}'.format(networkoperation_section_dns_request_by_name['@request_name'],
+                                             networkoperation_section_dns_request_by_name['@result_addresses'])
+                        # print("networkoperation_section_dns_request_by_name hostname|ip: {}|{}  IDS:yes".format(
+                        #     networkoperation_section_dns_request_by_name['@request_name'],
+                        #     networkoperation_section_dns_request_by_name['@result_addresses'])
+                        # )
+                        yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''})
+                    elif networkoperation_section_dns_request_by_name['@request_name']:
+                        # print("networkoperation_section_dns_request_by_name hostname: {}  IDS:yes".format(
+                        #     networkoperation_section_dns_request_by_name['@request_name'])
+                        # )
+                        yield({'values': networkoperation_section_dns_request_by_name['@request_name'], 'type': 'hostname', 'to_ids': True, 'comment': ''})
+                    elif networkoperation_section_dns_request_by_name['@result_addresses']:
+                        # this happens when the IP is both in the request_name and result_address.
+                        # print("networkoperation_section_dns_request_by_name hostname: {}  IDS:yes".format(
+                        #     networkoperation_section_dns_request_by_name['@result_addresses'])
+                        # )
+                        yield({'values': networkoperation_section_dns_request_by_name['@result_addresses'], 'type': 'ip-dst', 'to_ids': True, 'comment': ''})
+
+            if 'networkpacket_section' in process and 'connect_to_computer' in process['networkpacket_section']:
+                for networkpacket_section_connect_to_computer in process['networkpacket_section']['connect_to_computer']:
+                    networkpacket_section_connect_to_computer['@remote_hostname'] = cleanup_hostname(networkpacket_section_connect_to_computer['@remote_hostname'])
+                    networkpacket_section_connect_to_computer['@remote_ip'] = cleanup_ip(networkpacket_section_connect_to_computer['@remote_ip'])
+                    if networkpacket_section_connect_to_computer['@remote_hostname'] and networkpacket_section_connect_to_computer['@remote_ip']:
+                        # print("networkpacket_section_connect_to_computer hostname|ip: {}|{}  IDS:yes COMMENT:port {}".format(
+                        #     networkpacket_section_connect_to_computer['@remote_hostname'],
+                        #     networkpacket_section_connect_to_computer['@remote_ip'],
+                        #     networkpacket_section_connect_to_computer['@remote_port'])
+                        # )
+                        val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_ip'])
+                        yield({'values': val_combined, 'type': 'hostname|ip', 'to_ids': True, 'comment': ''})
+                    elif networkpacket_section_connect_to_computer['@remote_hostname']:
+                        # print("networkpacket_section_connect_to_computer hostname: {}  IDS:yes COMMENT:port {}".format(
+                        #     networkpacket_section_connect_to_computer['@remote_hostname'],
+                        #     networkpacket_section_connect_to_computer['@remote_port'])
+                        # )
+                        val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_port'])
+                        yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
+                    elif networkpacket_section_connect_to_computer['@remote_ip']:
+                        # print("networkpacket_section_connect_to_computer ip-dst: {}  IDS:yes COMMENT:port {}".format(
+                        #     networkpacket_section_connect_to_computer['@remote_ip'],
+                        #     networkpacket_section_connect_to_computer['@remote_port'])
+                        # )
+                        val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_ip'], networkpacket_section_connect_to_computer['@remote_port'])
+                        yield({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
+
+            if 'registry_section' in process and 'create_key' in process['registry_section']:
+                # FIXME this is a complicated section, together with the 'set_value'.
+                # it looks like this section is not ONLY about creating registry keys,
+                # more about accessing a handle to keys (with specific permissions)
+                # maybe we don't want to keep this, in favor of 'set_value'
+                for create_key in process['registry_section']['create_key']:
+                    # print('REG CREATE: {}\t{}'.format(
+                    #     create_key['@desired_access'],
+                    #     create_key['@key_name']))
+                    pass
+            if 'registry_section' in process and 'delete_key' in process['registry_section']:
+                # LATER we probably don't want to keep this. Much pollution.
+                # Maybe for later once we have filtered out this.
+                for delete_key in process['registry_section']['delete_key']:
+                    # print('REG DELETE: {}'.format(
+                    #     delete_key['@key_name'])
+                    # )
+                    pass
+            if 'registry_section' in process and 'set_value' in process['registry_section']:
+                # FIXME this is a complicated section, together with the 'create_key'.
+                for set_value in process['registry_section']['set_value']:
+                    # '@data_type' == 'REG_BINARY',
+                    # '@data_type' == 'REG_DWORD',
+                    # '@data_type' == 'REG_EXPAND_SZ',
+                    # '@data_type' == 'REG_MULTI_SZ',
+                    # '@data_type' == 'REG_NONE',
+                    # '@data_type' == 'REG_QWORD',
+                    # '@data_type' == 'REG_SZ',
+                    regkey = cleanup_regkey("{}\\{}".format(set_value['@key_name'], set_value['@value_name']))
+                    regdata = cleanup_regdata(set_value.get('@data'))
+                    if not regkey:
+                        continue
+                    if set_value['@data_size'] == '0' or not regdata:
+                        # print('registry_section set_value REG SET: {}\t{}\t{}'.format(
+                        #     set_value['@data_type'],
+                        #     set_value['@key_name'],
+                        #     set_value['@value_name'])
+                        # )
+                        yield({'values': regkey, 'type': 'regkey', 'to_ids': True,
+                               'categories': ['External analysis', 'Persistence mechanism', 'Artifacts dropped'], 'comment': set_value['@data_type']})
+                    else:
+                        try:
+                            # unicode fun...
+                            # print('registry_section set_value REG SET: {}\t{}\t{}\t{}'.format(
+                            #     set_value['@data_type'],
+                            #     set_value['@key_name'],
+                            #     set_value['@value_name'],
+                            #     set_value['@data'])
+                            # )
+                            val = "{}|{}".format(regkey, regdata)
+                            yield({'values': val, 'type': 'regkey|value', 'to_ids': True,
+                                   'categories': ['External analysis', 'Persistence mechanism', 'Artifacts dropped'], 'comment': set_value['@data_type']})
+                        except Exception as e:
+                            print("EXCEPTION registry_section {}".format(e))
+                            # TODO - maybe we want to handle these later, or not...
+                        pass
+                    pass
+
+            if 'stored_files' in process and 'stored_created_file' in process['stored_files']:
+                for stored_created_file in process['stored_files']['stored_created_file']:
+                    stored_created_file['@filename'] = cleanup_filepath(stored_created_file['@filename'])
+                    if stored_created_file['@filename']:
+                        if stored_created_file['@filesize'] is not '0':
+                            val = '{}|{}'.format(stored_created_file['@filename'], stored_created_file['@md5'])
+                            # print("stored_created_file filename|md5: {}|{}  IDS:yes".format(
+                            #     stored_created_file['@filename'],                       # filename
+                            #     stored_created_file['@md5'])                            # md5
+                            # )                                                           # => filename|md5
+                            yield({'values': val, 'type': 'filename|md5', 'to_ids': True,
+                                   'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''})
+
+                        else:
+                            # print("stored_created_file filename: {}  IDS:yes".format(
+                            #     stored_created_file['@filename'])                        # filename
+                            # )                                                           # => filename
+                            yield({'values': stored_created_file['@filename'],
+                                   'type': 'filename', 'to_ids': True,
+                                   'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''})
+
+            if 'stored_files' in process and 'stored_modified_file' in process['stored_files']:
+                for stored_modified_file in process['stored_files']['stored_modified_file']:
+                    stored_modified_file['@filename'] = cleanup_filepath(stored_modified_file['@filename'])
+                    if stored_modified_file['@filename']:
+                        if stored_modified_file['@filesize'] is not '0':
+                            val = '{}|{}'.format(stored_modified_file['@filename'], stored_modified_file['@md5'])
+                            # print("stored_modified_file MODIFY FILE: {}\t{}".format(
+                            #     stored_modified_file['@filename'],                       # filename
+                            #     stored_modified_file['@md5'])                            # md5
+                            # )                                                            # => filename|md5
+                            yield({'values': val, 'type': 'filename|md5', 'to_ids': True,
+                                   'categories': ['Artifacts dropped', 'Payload delivery'],
+                                   'comment': 'modified'})
+                        else:
+                            # print("stored_modified_file MODIFY FILE: {}\t{}".format(
+                            #     stored_modified_file['@filename'])                       # filename
+                            # )                                                            # => filename
+                            yield({'values': stored_modified_file['@filename'], 'type': 'filename', 'to_ids': True,
+                                   'categories': ['Artifacts dropped', 'Payload delivery'],
+                                   'comment': 'modified'})
+
+
+def add_file(filename, results, hash, index, filedata=None):
+    pass
+    # results.append({'values': filename, 'data': "{}|{}".format(filename, filedata.decode()), 'type': 'malware-sample',
+    #                 'categories': ['Artifacts dropped', 'Payload delivery']})
+
+
+def add_file_zip():
+    # if 'malware-sample' in request:
+    # sample_filename = request.get("malware-sample").split("|", 1)[0]
+    #            data = base64.b64decode(data)
+    #            fl = io.BytesIO(data)
+    #            zf = zipfile.ZipFile(fl)
+    #            sample_hashname = zf.namelist()[0]
+    #            data = zf.read(sample_hashname, b"infected")
+    #            zf.close()
+    pass
+
+
+def print_json(data):
+    print(json.dumps(data, sort_keys=True, indent=4, separators=(',', ': ')))
+
+
+def list_in_string(lst, data, regex=False):
+    for item in lst:
+        if regex:
+            if re.search(item, data, flags=re.IGNORECASE):
+                return True
+        else:
+            if item in data:
+                return True
+
+
+def cleanup_ip(item):
+    # you should exclude private IP ranges via import regexes
+    noise_substrings = {
+        '224.0.0.',
+        '127.0.0.',
+        '8.8.8.8',
+        '8.8.4.4',
+        '0.0.0.0',
+        'NONE'
+    }
+    if list_in_string(noise_substrings, item):
+        return None
+    try:
+        ipaddress.ip_address(item)
+        return item
+    except ValueError:
+        return None
+
+
+def cleanup_hostname(item):
+    noise_substrings = {
+        'wpad',
+        'teredo.ipv6.microsoft.com',
+        'WIN7SP1-x64-UNP'
+    }
+    # take away common known bad
+    if list_in_string(noise_substrings, item):
+        return None
+    # eliminate IP addresses
+    try:
+        ipaddress.ip_address(item)
+    except ValueError:
+        # this is not an IP, so continue
+        return item
+    return None
+
+
+def cleanup_url(item):
+    if item in ['/']:
+        return None
+    return item
+
+
+def cleanup_filepath(item):
+    noise_substrings = {
+        'C:\\Windows\\Prefetch\\',
+        '\\AppData\\Roaming\\Microsoft\\Windows\\Recent\\',
+        '\\AppData\\Roaming\\Microsoft\\Office\\Recent\\',
+        'C:\\ProgramData\\Microsoft\\OfficeSoftwareProtectionPlatform\\Cache\\cache.dat',
+        '\\AppData\\Local\\Microsoft\\Windows\\Temporary Internet Files\\Content.',
+        '\\AppData\\Local\\Microsoft\\Internet Explorer\\Recovery\\High\\',
+        '\\AppData\\Local\\Microsoft\\Internet Explorer\\DOMStore\\',
+        '\\AppData\\LocalLow\\Microsoft\\Internet Explorer\\Services\\search_',
+        '\\AppData\\Local\\Microsoft\\Windows\\History\\History.',
+        '\\AppData\\Roaming\\Microsoft\\Windows\\Cookies\\',
+        '\\AppData\\LocalLow\\Microsoft\\CryptnetUrlCache\\',
+        '\\AppData\\Local\\Microsoft\\Windows\\Caches\\',
+        '\\AppData\\Local\\Microsoft\\Windows\WebCache\\',
+        '\\AppData\\Local\\Microsoft\\Windows\\Explorer\\thumbcache',
+
+        '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\SharedDataEvents-journal',
+        '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\UserCache.bin',
+
+        '\\AppData\\Roaming\\Macromedia\\Flash Player\\macromedia.com\\support\\flashplayer\\sys\\settings.sol',
+        '\\AppData\\Roaming\Adobe\\Flash Player\\NativeCache\\',
+        'C:\\Windows\\AppCompat\\Programs\\',
+        'C:\~'  # caused by temp file created by MS Office when opening malicious doc/xls/...
+    }
+    if list_in_string(noise_substrings, item):
+        return None
+    return item
+
+
+def cleanup_regkey(item):
+    noise_substrings = {
+        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Installer\\UserData\\',
+        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Internet Settings\\',
+        r'\\CurrentVersion\\Explorer\\RecentDocs\\',
+        r'\\CurrentVersion\\Explorer\\UserAssist\\',
+        r'\\CurrentVersion\\Explorer\\FileExts\\[a-z\.]+\\OpenWith',
+        r'\\Software\\Microsoft\\Internet Explorer\\Main\\WindowsSearch',
+        r'\\Software\\Microsoft\\Office\\[0-9\.]+\\',
+        r'\\SOFTWARE\\Microsoft\\OfficeSoftwareProtectionPlatform\\',
+        r'\\Software\\Microsoft\\Office\\Common\\Smart Tag\\',
+        r'\\Usage\\SpellingAndGrammarFiles',
+        r'^HKLM\\Software\\Microsoft\\Tracing\\',
+        r'\\Software\\Classes\\CLSID\\',
+        r'\\Software\\Classes\\Local Settings\\MuiCache\\',
+        r'\\Local Settings\\Software\\Microsoft\\Windows\\Shell\\Bag',
+        r'\\Software\\Microsoft\\Windows\\CurrentVersion\\Explorer\\RunMRU\\'
+    }
+    item = item.replace('\\REGISTRY\\MACHINE\\', 'HKLM\\')
+    item = item.replace('\\REGISTRY\\USER\\', 'HKCU\\')
+    if list_in_string(noise_substrings, item, regex=True):
+        return None
+    return item
+
+
+def cleanup_regdata(item):
+    if not item:
+        return None
+    item = item.replace('(UNICODE_0x00000000)', '')
+    return item
+
+
+def get_zipped_contents(filename, data, password=None):
+    with zipfile.ZipFile(io.BytesIO(data), 'r') as zf:
+        unzipped_files = []
+        if password is not None:
+            password = str.encode(password)  # Byte encoded password required
+        for zip_file_name in zf.namelist():  # Get all files in the zip file
+            # print(zip_file_name)
+            with zf.open(zip_file_name, mode='r', pwd=password) as fp:
+                file_data = fp.read()
+            unzipped_files.append({'values': zip_file_name,
+                                   'data': file_data,
+                                   'comment': 'Extracted from {0}'.format(filename)})
+            # print("{} : {}".format(zip_file_name, len(file_data)))
+    return unzipped_files
+
+
+def introspection():
+    modulesetup = {}
+    try:
+        userConfig
+        modulesetup['userConfig'] = userConfig
+    except NameError:
+        pass
+    try:
+        inputSource
+        modulesetup['inputSource'] = inputSource
+    except NameError:
+        pass
+    return modulesetup
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo