From 8d4e2025f760279eca24a640cd732291bb0c88af Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Fri, 3 Aug 2018 13:58:53 +0200
Subject: [PATCH 1/7] ta_import - bugfixes for TA 6.1

---
 .../import_mod/threatanalyzer_import.py       | 42 +++++++++++--------
 1 file changed, 25 insertions(+), 17 deletions(-)

diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py
index a6358ab..916628e 100755
--- a/misp_modules/modules/import_mod/threatanalyzer_import.py
+++ b/misp_modules/modules/import_mod/threatanalyzer_import.py
@@ -15,7 +15,7 @@ misperrors = {'error': 'Error'}
 userConfig = {}
 inputSource = ['file']
 
-moduleinfo = {'version': '0.7', 'author': 'Christophe Vandeplas',
+moduleinfo = {'version': '0.8', 'author': 'Christophe Vandeplas',
               'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files',
               'module-type': ['import']}
 
@@ -73,7 +73,7 @@ def handler(q=False):
                             results.append({
                                 'values': current_sample_filename,
                                 'data': base64.b64encode(file_data).decode(),
-                                'type': 'malware-sample', 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': True, 'comment': ''})
+                                'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''})
 
                 if 'Analysis/analysis.json' in zip_file_name:
                     with zf.open(zip_file_name, mode='r', pwd=None) as fp:
@@ -88,7 +88,7 @@ def handler(q=False):
                         results.append({
                             'values': sample_filename,
                             'data': base64.b64encode(file_data).decode(),
-                            'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''})
+                            'type': 'malware-sample', 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': True, 'comment': ''})
             except Exception as e:
                 # no 'sample' in archive, might be an url analysis, just ignore
                 pass
@@ -113,7 +113,15 @@ def process_analysis_json(analysis_json):
         for process in analysis_json['analysis']['processes']['process']:
             # print_json(process)
             if 'connection_section' in process and 'connection' in process['connection_section']:
+                # compensate for absurd behavior of the data format: if one entry = immediately the dict, if multiple entries = list containing dicts
+                # this will always create a list, even with only one item
+                if isinstance(process['connection_section']['connection'], dict):
+                    process['connection_section']['connection'] = [process['connection_section']['connection']]
+                # iterate over each entry
                 for connection_section_connection in process['connection_section']['connection']:
+                    if 'name_to_ip' in connection_section_connection:  # TA 6.1 data format
+                        connection_section_connection['@remote_ip'] = connection_section_connection['name_to_ip']['@result_addresses']
+                        connection_section_connection['@remote_hostname'] = connection_section_connection['name_to_ip']['@request_name']
 
                     connection_section_connection['@remote_ip'] = cleanup_ip(connection_section_connection['@remote_ip'])
                     connection_section_connection['@remote_hostname'] = cleanup_hostname(connection_section_connection['@remote_hostname'])
@@ -124,7 +132,7 @@ def process_analysis_json(analysis_json):
                         #     connection_section_connection['@remote_hostname'],
                         #     connection_section_connection['@remote_ip'])
                         # )
-                        yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''})
+                        yield({'values': val, 'type': 'domain|ip', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                     elif connection_section_connection['@remote_ip']:
                         # print("connection_section_connection ip-dst: {}  IDS:yes".format(
                         #     connection_section_connection['@remote_ip'])
@@ -136,20 +144,19 @@ def process_analysis_json(analysis_json):
                         # )
                         yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''})
                     if 'http_command' in connection_section_connection:
-                        for http_command in connection_section_connection['http_command']:
-                            # print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
-                            #     http_command['@method'],                    # comment
-                            #     http_command['@url'])                       # url
-                            # )
-                            val = cleanup_url(http_command['@url'])
-                            if val:
-                                yield({'values': val, 'type': 'url', 'categories': 'Network activity', 'to_ids': True, 'comment': http_command['@method']})
+                        # print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
+                        #     connection_section_connection['http_command']['@method'],                    # comment
+                        #     connection_section_connection['http_command']['@url'])                       # url
+                        # )
+                        val = cleanup_url(connection_section_connection['http_command']['@url'])
+                        if val:
+                            yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': connection_section_connection['http_command']['@method']})
 
                     if 'http_header' in connection_section_connection:
                         for http_header in connection_section_connection['http_header']:
                             if 'User-Agent:' in http_header['@header']:
                                 val = http_header['@header'][len('User-Agent: '):]
-                                yield({'values': val, 'type': 'user-agent', 'categories': 'Network activity', 'to_ids': False, 'comment': ''})
+                                yield({'values': val, 'type': 'user-agent', 'categories': ['Network activity'], 'to_ids': False, 'comment': ''})
                             elif 'Host:' in http_header['@header']:
                                 val = http_header['@header'][len('Host: '):]
                                 if ':' in val:
@@ -162,7 +169,7 @@ def process_analysis_json(analysis_json):
                                     if val_hostname and val_port:
                                         val_combined = '{}|{}'.format(val_hostname, val_port)
                                         # print({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
-                                        yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
+                                        yield({'values': val_combined, 'type': 'hostname|port', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                                     elif val_ip and val_port:
                                         val_combined = '{}|{}'.format(val_ip, val_port)
                                         # print({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''})
@@ -207,7 +214,7 @@ def process_analysis_json(analysis_json):
                         #     networkoperation_section_dns_request_by_name['@request_name'],
                         #     networkoperation_section_dns_request_by_name['@result_addresses'])
                         # )
-                        yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''})
+                        yield({'values': val, 'type': 'domain|ip', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                     elif networkoperation_section_dns_request_by_name['@request_name']:
                         # print("networkoperation_section_dns_request_by_name hostname: {}  IDS:yes".format(
                         #     networkoperation_section_dns_request_by_name['@request_name'])
@@ -231,14 +238,14 @@ def process_analysis_json(analysis_json):
                         #     networkpacket_section_connect_to_computer['@remote_port'])
                         # )
                         val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_ip'])
-                        yield({'values': val_combined, 'type': 'hostname|ip', 'to_ids': True, 'comment': ''})
+                        yield({'values': val_combined, 'type': 'domain|ip', 'to_ids': True, 'comment': ''})
                     elif networkpacket_section_connect_to_computer['@remote_hostname']:
                         # print("networkpacket_section_connect_to_computer hostname: {}  IDS:yes COMMENT:port {}".format(
                         #     networkpacket_section_connect_to_computer['@remote_hostname'],
                         #     networkpacket_section_connect_to_computer['@remote_port'])
                         # )
                         val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_port'])
-                        yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''})
+                        yield({'values': val_combined, 'type': 'hostname|port', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''})
                     elif networkpacket_section_connect_to_computer['@remote_ip']:
                         # print("networkpacket_section_connect_to_computer ip-dst: {}  IDS:yes COMMENT:port {}".format(
                         #     networkpacket_section_connect_to_computer['@remote_ip'],
@@ -524,3 +531,4 @@ def introspection():
 def version():
     moduleinfo['config'] = moduleconfig
     return moduleinfo
+

From 61232ad93e5bda5dd98796a964c74b8a8d179d2d Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Wed, 8 Aug 2018 17:00:10 +0200
Subject: [PATCH 2/7] new: Expansion hover module to check spamhaus DBL for a
 domain name

---
 .../modules/expansion/dbl_spamhaus.py         | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 misp_modules/modules/expansion/dbl_spamhaus.py

diff --git a/misp_modules/modules/expansion/dbl_spamhaus.py b/misp_modules/modules/expansion/dbl_spamhaus.py
new file mode 100644
index 0000000..f78cb74
--- /dev/null
+++ b/misp_modules/modules/expansion/dbl_spamhaus.py
@@ -0,0 +1,60 @@
+import json
+import datetime
+from collections import defaultdict
+
+try:
+    import dns.resolver
+    resolver = dns.resolver.Resolver()
+    resolver.timeout = 0.2
+    resolver.lifetime = 0.2
+except ModuleNotFoundError:
+    print("dnspython3 is missing, use 'pip install dnspython3' to install it.")
+    sys.exit(0)
+
+misperrors = {'error': 'Error'}
+mispattributes = {'input': ['domain', 'domain|ip', 'hostname', 'hostname|port'], 'output': ['text']}
+moduleinfo = {'version': '0.1', 'author': 'Christian Studer',
+              'description': 'Checks Spamhaus DBL for a domain name.',
+              'module-type': ['expansion', 'hover']}
+moduleconfig = []
+
+dbl = 'dbl.spamhaus.org'
+dbl_mapping = {'127.0.1.2': 'spam domain',
+               '127.0.1.4': 'phish domain',
+               '127.0.1.5': 'malware domain',
+               '127.0.1.6': 'botnet C&C domain',
+               '127.0.1.102': 'abused legit spam',
+               '127.0.1.103': 'abused spammed redirector domain',
+               '127.0.1.104': 'abused legit phish',
+               '127.0.1.105': 'abused legit malware',
+               '127.0.1.106': 'abused legit botnet C&C',
+               '127.0.1.255': 'IP queries prohibited!'}
+
+def fetch_requested_value(request):
+    for attribute_type in mispattributes['input']:
+        if request.get(attribute_type):
+            return request[attribute_type].split('|')[0]
+    return None
+
+def handler(q=False):
+    if q is False:
+        return False
+    request = json.loads(q)
+    requested_value = fetch_requested_value(request)
+    if requested_value is None:
+        misperrors['error'] = "Unsupported attributes type"
+        return misperrors
+    query = "{}.{}".format(requested_value, dbl)
+    try:
+        query_result = resolver.query(query, 'A')[0]
+        result = "{} - {}".format(requested_value, dbl_mapping[str(query_result)])
+    except Exception as e:
+        result = e
+    return {'results': [{'types': mispattributes.get('output'), 'values': result}]}
+
+def introspection():
+    return mispattributes
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo

From 90baa1dd5a1c931e02a1a91dda351dde7983c3b6 Mon Sep 17 00:00:00 2001
From: chrisr3d <chris.studer.68@gmail.com>
Date: Wed, 8 Aug 2018 17:05:22 +0200
Subject: [PATCH 3/7] add: Added DBL spamhaus module documentation and in
 expansion init file

---
 README.md                                  | 1 +
 misp_modules/modules/expansion/__init__.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index cbf9550..ad4b098 100644
--- a/README.md
+++ b/README.md
@@ -23,6 +23,7 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/
 * [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to.
 * [CrowdStrike Falcon](misp_modules/modules/expansion/crowdstrike_falcon.py) - an expansion module to expand using CrowdStrike Falcon Intel Indicator API.
 * [CVE](misp_modules/modules/expansion/cve.py) - a hover module to give more information about a vulnerability (CVE).
+* [DBL Spamhaus](misp_modules/modules/expansion/dbl_spamhaus.py) - a hover module to check Spamhaus DBL for a domain name.
 * [DNS](misp_modules/modules/expansion/dns.py) - a simple module to resolve MISP attributes like hostname and domain to expand IP addresses attributes.
 * [DomainTools](misp_modules/modules/expansion/domaintools.py) - a hover and expansion module to get information from [DomainTools](http://www.domaintools.com/) whois.
 * [EUPI](misp_modules/modules/expansion/eupi.py) - a hover and expansion module to get information about an URL from the [Phishing Initiative project](https://phishing-initiative.eu/?lang=en).
diff --git a/misp_modules/modules/expansion/__init__.py b/misp_modules/modules/expansion/__init__.py
index cda3af5..c6e81a7 100644
--- a/misp_modules/modules/expansion/__init__.py
+++ b/misp_modules/modules/expansion/__init__.py
@@ -1,3 +1,3 @@
 from . import _vmray
 
-__all__ = ['vmray_submit', 'asn_history', 'circl_passivedns', 'circl_passivessl', 'countrycode', 'cve', 'dns', 'domaintools', 'eupi', 'farsight_passivedns', 'ipasn', 'passivetotal', 'sourcecache', 'virustotal', 'whois', 'shodan', 'reversedns', 'geoip_country', 'wiki', 'iprep', 'threatminer', 'otx', 'threatcrowd', 'vulndb', 'crowdstrike_falcon', 'yara_syntax_validator', 'hashdd', 'onyphe', 'onyphe_full', 'rbl', 'xforceexchange', 'sigma_syntax_validator', 'stix2_pattern_syntax_validator', 'sigma_queries']
+__all__ = ['vmray_submit', 'asn_history', 'circl_passivedns', 'circl_passivessl', 'countrycode', 'cve', 'dns', 'domaintools', 'eupi', 'farsight_passivedns', 'ipasn', 'passivetotal', 'sourcecache', 'virustotal', 'whois', 'shodan', 'reversedns', 'geoip_country', 'wiki', 'iprep', 'threatminer', 'otx', 'threatcrowd', 'vulndb', 'crowdstrike_falcon', 'yara_syntax_validator', 'hashdd', 'onyphe', 'onyphe_full', 'rbl', 'xforceexchange', 'sigma_syntax_validator', 'stix2_pattern_syntax_validator', 'sigma_queries', 'dbl_spamhaus']

From bdbf5388934478d79fee2c5c3ae21b0642d3cd78 Mon Sep 17 00:00:00 2001
From: David J <david.lee.johnson@gmail.com>
Date: Fri, 10 Aug 2018 16:00:01 -0500
Subject: [PATCH 4/7] Create urlscan.py

---
 misp_modules/modules/expansion/urlscan.py | 269 ++++++++++++++++++++++
 1 file changed, 269 insertions(+)
 create mode 100644 misp_modules/modules/expansion/urlscan.py

diff --git a/misp_modules/modules/expansion/urlscan.py b/misp_modules/modules/expansion/urlscan.py
new file mode 100644
index 0000000..8f4067c
--- /dev/null
+++ b/misp_modules/modules/expansion/urlscan.py
@@ -0,0 +1,269 @@
+import json
+import requests
+import logging
+import sys
+import time
+# Need base64 if encoding data for attachments, but disabled for now
+# import base64
+
+log = logging.getLogger('urlscan')
+log.setLevel(logging.DEBUG)
+ch = logging.StreamHandler(sys.stdout)
+ch.setLevel(logging.DEBUG)
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+ch.setFormatter(formatter)
+log.addHandler(ch)
+
+moduleinfo = {
+                'version': '0.1',
+                'author': 'Dave Johnson',
+                'description': 'Module to query urlscan.io',
+                'module-type': ['expansion']
+             }
+
+moduleconfig = ['apikey']
+misperrors = {'error': 'Error'}
+mispattributes = {
+                    'input': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url'],
+                    'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link']
+                 }
+
+def handler(q=False):
+    if q is False:
+        return False
+    request = json.loads(q)
+    if (request.get('config')):
+        if (request['config'].get('apikey') is None):
+            misperrors['error'] = 'urlscan apikey is missing'
+            return misperrors
+    client = urlscanAPI(request['config']['apikey'])
+
+    r = {'results': []}
+
+    if 'ip-src' in request:
+        r['results'] += lookup_indicator(client, request['ip-src'])
+    if 'ip-dst' in request:
+        r['results'] += lookup_indicator(client, request['ip-dst'])
+    if 'domain' in request:
+        r['results'] += lookup_indicator(client, request['domain'])
+    if 'hostname' in request:
+        r['results'] += lookup_indicator(client, request['hostname'])
+    if 'url' in request:
+        r['results'] += lookup_indicator(client, request['url'])
+
+    uniq = []
+    for item in r['results']:
+        if item not in uniq:
+            uniq.append(item)
+    r['results'] = uniq
+    return r
+
+
+def lookup_indicator(client, query):
+    result = client.search_url(query)
+    log.debug('RESULTS: ' + json.dumps(result))
+    r = []
+    if result.get('page'):
+        if result['page'].get('domain'):
+            misp_val = result['page']['domain']
+            misp_comment = "Domain associated with {} (source: urlscan.io)".format(query)
+            r.append({'types': 'domain',
+                      'categories': ['Network activity'],
+                      'values': misp_val,
+                      'comment': misp_comment})
+
+        if result['page'].get('ip'):
+            misp_val = result['page']['ip']
+            misp_comment = "IP associated with {} (source: urlscan.io)".format(query)
+            r.append({'types': 'ip-dst',
+                      'categories': ['Network activity'],
+                      'values': misp_val,
+                      'comment': misp_comment})
+
+        if result['page'].get('country'):
+            misp_val = 'Country: ' + result['page']['country']
+            if result['page'].get('city'):
+                misp_val += ', City: ' + result['page']['city']
+            misp_comment = "Location associated with {} (source: urlscan.io)".format(query)
+            r.append({'types': 'text',
+                      'categories': ['External analysis'],
+                      'values': misp_val,
+                      'comment': misp_comment})
+
+        if result['page'].get('asn'):
+            misp_val = result['page']['asn']
+            misp_comment = "ASN associated with {} (source: urlscan.io)".format(query)
+            r.append({'types': 'AS', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment})
+
+        if result['page'].get('asnname'):
+            misp_val = result['page']['asnname']
+            misp_comment = "ASN name associated with {} (source: urlscan.io)".format(query)
+            r.append({'types': 'text',
+                      'categories': ['External analysis'],
+                      'values': misp_val,
+                      'comment': misp_comment})
+
+    if result.get('stats'):
+        if result['stats'].get('malicious'):
+            log.debug('There is something in results > stats > malicious')
+            threat_list = set()
+
+            if 'matches' in result['meta']['processors']['gsb']['data']:
+                for item in result['meta']['processors']['gsb']['data']['matches']:
+                    if item['threatType']:
+                        threat_list.add(item['threatType'])
+
+            threat_list = ', '.join(threat_list)
+            log.debug('threat_list values are: \'' + threat_list + '\'')
+
+            if threat_list:
+                misp_val = '{} threat(s) detected'.format(threat_list)
+                misp_comment = '{} malicious indicator(s) were present on ' \
+                               '{} (source: urlscan.io)'.format(result['stats']['malicious'], query, threat_list)
+                r.append({'types': 'text',
+                          'categories': ['External analysis'],
+                          'values': misp_val,
+                          'comment': misp_comment})
+
+    if result.get('lists'):
+        if result['lists'].get('urls'):
+            for url in result['lists']['urls']:
+                url = url.lower()
+                if 'office' in url:
+                    misp_val = 'Possible Microsoft Office themed phishing page'
+                    misp_comment = 'There was resource containing an \'Office\' string in the URL.'
+                elif 'o365' in url or '0365' in url:
+                    misp_val = 'Possible Microsoft O365 themed phishing page'
+                    misp_comment = 'There was resource containing an \'O365\' string in the URL.'
+                elif 'microsoft' in url:
+                    misp_val = 'Possible Microsoft themed phishing page'
+                    misp_comment = 'There was resource containing an \'Office\' string in the URL.'
+                elif 'paypal' in url:
+                    misp_val = 'Possible PayPal themed phishing page'
+                    misp_comment = 'There was resource containing a \'PayPal\' string in the URL.'
+                elif 'onedrive' in url:
+                    misp_val = 'Possible OneDrive themed phishing page'
+                    misp_comment = 'There was resource containing a \'OneDrive\' string in the URL.'
+                elif 'docusign' in url:
+                    misp_val = 'Possible DocuSign themed phishing page'
+                    misp_comment = 'There was resource containing a \'DocuSign\' string in the URL'
+                r.append({'types': 'text',
+                          'categories': ['External analysis'],
+                          'values': misp_val,
+                          'comment': misp_comment})
+
+    if result.get('task'):
+        if result['task'].get('reportURL'):
+            misp_val = result['task']['reportURL']
+            misp_comment = 'Link to full report (source: urlscan.io)'
+            r.append({'types': 'link',
+                      'categories': ['External analysis'],
+                      'values': misp_val,
+                      'comment': misp_comment})
+
+        if result['task'].get('screenshotURL'):
+            image_url = result['task']['screenshotURL']
+            misp_comment = 'Link to screenshot (source: urlscan.io)'
+            r.append({'types': 'link',
+                      'categories': ['External analysis'],
+                      'values': image_url,
+                      'comment': misp_comment})
+            ### TO DO ###
+            ### Add ability to add an in-line screenshot of the target website into an attribute
+            # screenshot = requests.get(image_url).content
+            # r.append({'types': ['attachment'],
+            #           'categories': ['External analysis'],
+            #           'values': image_url,
+            #           'image': str(base64.b64encode(screenshot), 'utf-8'),
+            #           'comment': 'Screenshot of website'})
+
+        if result['task'].get('domURL'):
+            misp_val = result['task']['domURL']
+            misp_comment = 'Link to DOM (source: urlscan.io)'
+            r.append({'types': 'link',
+                      'categories': ['External analysis'],
+                      'values': misp_val,
+                      'comment': misp_comment})
+
+    return r
+
+
+def introspection():
+    return mispattributes
+
+
+def version():
+    moduleinfo['config'] = moduleconfig
+    return moduleinfo
+
+
+class urlscanAPI():
+    def __init__(self, apikey=None, uuid=None):
+        self.key = apikey
+        self.uuid = uuid
+
+    def request(self, query):
+        log.debug('From request function with the parameter: ' + query)
+        payload = {'url': query}
+        headers = {'API-Key': self.key,
+                   'Content-Type': "application/json",
+                   'Cache-Control': "no-cache"}
+
+        # Troubleshooting problems with initial search request
+        log.debug('PAYLOAD: ' + json.dumps(payload))
+        log.debug('HEADERS: ' + json.dumps(headers))
+
+        search_url_string = "https://urlscan.io/api/v1/scan/"
+        response = requests.request("POST",
+                                    search_url_string,
+                                    data=json.dumps(payload),
+                                    headers=headers)
+
+        # HTTP 400 - Bad Request
+        if response.status_code == 400:
+            raise Exception('HTTP Error 400 - Bad Request')
+
+        # HTTP 404 - Not found
+        if response.status_code == 404:
+            raise Exception('HTTP Error 404 - These are not the droids you\'re looking for')
+
+        # Any other status code
+        if response.status_code != 200:
+            raise Exception('HTTP Error ' + str(response.status_code))
+
+        if response.text:
+            response = json.loads(response.content.decode("utf-8"))
+            time.sleep(3)
+            self.uuid = response['uuid']
+
+            # Strings for to check for errors on the results page
+            # Null response string for any unavailable resources
+            null_response_string = '"status": 404'
+            # Redirect string accounting for 301/302/303/307/308 status codes
+            redirect_string = '"status": 30'
+            # Normal response string with 200 status code
+            normal_response_string = '"status": 200'
+
+            results_url_string = "https://urlscan.io/api/v1/result/" + self.uuid
+            log.debug('Results URL: ' + results_url_string)
+
+            # Need to wait for results to process and check if they are valid
+            tries = 10
+            while tries >= 0:
+                results = requests.request("GET", results_url_string)
+                log.debug('Made a GET request')
+                results = results.content.decode("utf-8")
+                # checking if there is a 404 status code and no available resources
+                if null_response_string in results and \
+                        redirect_string not in results and \
+                        normal_response_string not in results:
+                    log.debug('Results not processed. Please check again later.')
+                    time.sleep(3)
+                    tries -= 1
+                else:
+                    return json.loads(results)
+            raise Exception('Results contained a 404 status error and could not be processed.')
+
+    def search_url(self, query):
+        log.debug('From search_url with parameter: ' + query)
+        return self.request(query)

From a697f653822b893d95a4142a4d88cbf66821f208 Mon Sep 17 00:00:00 2001
From: David J <david.lee.johnson@gmail.com>
Date: Tue, 14 Aug 2018 10:51:15 -0500
Subject: [PATCH 5/7] Add error handling for DNS failures, reduce imports, and
 simplify misp_comments

---
 misp_modules/modules/expansion/urlscan.py | 78 +++++++++++------------
 1 file changed, 36 insertions(+), 42 deletions(-)

diff --git a/misp_modules/modules/expansion/urlscan.py b/misp_modules/modules/expansion/urlscan.py
index 8f4067c..a0adc25 100644
--- a/misp_modules/modules/expansion/urlscan.py
+++ b/misp_modules/modules/expansion/urlscan.py
@@ -3,8 +3,6 @@ import requests
 import logging
 import sys
 import time
-# Need base64 if encoding data for attachments, but disabled for now
-# import base64
 
 log = logging.getLogger('urlscan')
 log.setLevel(logging.DEBUG)
@@ -15,18 +13,19 @@ ch.setFormatter(formatter)
 log.addHandler(ch)
 
 moduleinfo = {
-                'version': '0.1',
-                'author': 'Dave Johnson',
-                'description': 'Module to query urlscan.io',
-                'module-type': ['expansion']
-             }
+    'version': '0.1',
+    'author': 'Dave Johnson',
+    'description': 'Module to query urlscan.io',
+    'module-type': ['expansion']
+}
 
 moduleconfig = ['apikey']
 misperrors = {'error': 'Error'}
 mispattributes = {
-                    'input': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url'],
-                    'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link']
-                 }
+    'input': ['hostname', 'domain', 'url'],
+    'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link']
+}
+
 
 def handler(q=False):
     if q is False:
@@ -51,8 +50,15 @@ def handler(q=False):
     if 'url' in request:
         r['results'] += lookup_indicator(client, request['url'])
 
+    # Return any errors generated from lookup to the UI and remove duplicates
+
     uniq = []
+    log.debug(r['results'])
     for item in r['results']:
+        log.debug(item)
+        if 'error' in item:
+            misperrors['error'] = item['error']
+            return misperrors
         if item not in uniq:
             uniq.append(item)
     r['results'] = uniq
@@ -63,10 +69,19 @@ def lookup_indicator(client, query):
     result = client.search_url(query)
     log.debug('RESULTS: ' + json.dumps(result))
     r = []
+    misp_comment = "{}: Enriched via the urlscan module".format(query)
+
+    # Determine if the page is reachable
+    for request in result['data']['requests']:
+        if request['response'].get('failed'):
+            if request['response']['failed']['errorText']:
+                log.debug('The page could not load')
+                r.append(
+                    {'error': 'Domain could not be resolved: {}'.format(request['response']['failed']['errorText'])})
+
     if result.get('page'):
         if result['page'].get('domain'):
             misp_val = result['page']['domain']
-            misp_comment = "Domain associated with {} (source: urlscan.io)".format(query)
             r.append({'types': 'domain',
                       'categories': ['Network activity'],
                       'values': misp_val,
@@ -74,17 +89,15 @@ def lookup_indicator(client, query):
 
         if result['page'].get('ip'):
             misp_val = result['page']['ip']
-            misp_comment = "IP associated with {} (source: urlscan.io)".format(query)
             r.append({'types': 'ip-dst',
                       'categories': ['Network activity'],
                       'values': misp_val,
                       'comment': misp_comment})
 
         if result['page'].get('country'):
-            misp_val = 'Country: ' + result['page']['country']
+            misp_val = 'country: ' + result['page']['country']
             if result['page'].get('city'):
-                misp_val += ', City: ' + result['page']['city']
-            misp_comment = "Location associated with {} (source: urlscan.io)".format(query)
+                misp_val += ', city: ' + result['page']['city']
             r.append({'types': 'text',
                       'categories': ['External analysis'],
                       'values': misp_val,
@@ -92,12 +105,10 @@ def lookup_indicator(client, query):
 
         if result['page'].get('asn'):
             misp_val = result['page']['asn']
-            misp_comment = "ASN associated with {} (source: urlscan.io)".format(query)
-            r.append({'types': 'AS', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment})
+            r.append({'types': 'AS', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment})
 
         if result['page'].get('asnname'):
             misp_val = result['page']['asnname']
-            misp_comment = "ASN name associated with {} (source: urlscan.io)".format(query)
             r.append({'types': 'text',
                       'categories': ['External analysis'],
                       'values': misp_val,
@@ -118,8 +129,6 @@ def lookup_indicator(client, query):
 
             if threat_list:
                 misp_val = '{} threat(s) detected'.format(threat_list)
-                misp_comment = '{} malicious indicator(s) were present on ' \
-                               '{} (source: urlscan.io)'.format(result['stats']['malicious'], query, threat_list)
                 r.append({'types': 'text',
                           'categories': ['External analysis'],
                           'values': misp_val,
@@ -130,23 +139,17 @@ def lookup_indicator(client, query):
             for url in result['lists']['urls']:
                 url = url.lower()
                 if 'office' in url:
-                    misp_val = 'Possible Microsoft Office themed phishing page'
-                    misp_comment = 'There was resource containing an \'Office\' string in the URL.'
+                    misp_val = "Possible Office-themed phishing"
                 elif 'o365' in url or '0365' in url:
-                    misp_val = 'Possible Microsoft O365 themed phishing page'
-                    misp_comment = 'There was resource containing an \'O365\' string in the URL.'
+                    misp_val = "Possible O365-themed phishing"
                 elif 'microsoft' in url:
-                    misp_val = 'Possible Microsoft themed phishing page'
-                    misp_comment = 'There was resource containing an \'Office\' string in the URL.'
+                    misp_val = "Possible Microsoft-themed phishing"
                 elif 'paypal' in url:
-                    misp_val = 'Possible PayPal themed phishing page'
-                    misp_comment = 'There was resource containing a \'PayPal\' string in the URL.'
+                    misp_val = "Possible PayPal-themed phishing"
                 elif 'onedrive' in url:
-                    misp_val = 'Possible OneDrive themed phishing page'
-                    misp_comment = 'There was resource containing a \'OneDrive\' string in the URL.'
+                    misp_val = "Possible OneDrive-themed phishing"
                 elif 'docusign' in url:
-                    misp_val = 'Possible DocuSign themed phishing page'
-                    misp_comment = 'There was resource containing a \'DocuSign\' string in the URL'
+                    misp_val = "Possible DocuSign-themed phishing"
                 r.append({'types': 'text',
                           'categories': ['External analysis'],
                           'values': misp_val,
@@ -155,7 +158,6 @@ def lookup_indicator(client, query):
     if result.get('task'):
         if result['task'].get('reportURL'):
             misp_val = result['task']['reportURL']
-            misp_comment = 'Link to full report (source: urlscan.io)'
             r.append({'types': 'link',
                       'categories': ['External analysis'],
                       'values': misp_val,
@@ -163,7 +165,6 @@ def lookup_indicator(client, query):
 
         if result['task'].get('screenshotURL'):
             image_url = result['task']['screenshotURL']
-            misp_comment = 'Link to screenshot (source: urlscan.io)'
             r.append({'types': 'link',
                       'categories': ['External analysis'],
                       'values': image_url,
@@ -177,14 +178,6 @@ def lookup_indicator(client, query):
             #           'image': str(base64.b64encode(screenshot), 'utf-8'),
             #           'comment': 'Screenshot of website'})
 
-        if result['task'].get('domURL'):
-            misp_val = result['task']['domURL']
-            misp_comment = 'Link to DOM (source: urlscan.io)'
-            r.append({'types': 'link',
-                      'categories': ['External analysis'],
-                      'values': misp_val,
-                      'comment': misp_comment})
-
     return r
 
 
@@ -262,6 +255,7 @@ class urlscanAPI():
                     tries -= 1
                 else:
                     return json.loads(results)
+
             raise Exception('Results contained a 404 status error and could not be processed.')
 
     def search_url(self, query):

From 7deeb95820e1e4099208496025807e6ae6ef1164 Mon Sep 17 00:00:00 2001
From: Christophe Vandeplas <christophe@vandeplas.com>
Date: Tue, 21 Aug 2018 11:13:08 +0200
Subject: [PATCH 6/7] fix: ta_import - bugfixes

---
 .../import_mod/threatanalyzer_import.py       | 27 ++++++++++---------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py
index 916628e..2e3a507 100755
--- a/misp_modules/modules/import_mod/threatanalyzer_import.py
+++ b/misp_modules/modules/import_mod/threatanalyzer_import.py
@@ -15,7 +15,7 @@ misperrors = {'error': 'Error'}
 userConfig = {}
 inputSource = ['file']
 
-moduleinfo = {'version': '0.8', 'author': 'Christophe Vandeplas',
+moduleinfo = {'version': '0.9', 'author': 'Christophe Vandeplas',
               'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files',
               'module-type': ['import']}
 
@@ -45,7 +45,7 @@ def handler(q=False):
                 if re.match(r"Analysis/proc_\d+/modified_files/mapping\.log", zip_file_name):
                     with zf.open(zip_file_name, mode='r', pwd=None) as fp:
                         file_data = fp.read()
-                        for line in file_data.decode().split('\n'):
+                        for line in file_data.decode("utf-8", 'ignore').split('\n'):
                             if not line:
                                 continue
                             if line.count('|') == 3:
@@ -55,7 +55,8 @@ def handler(q=False):
                             l_fname = cleanup_filepath(l_fname)
                             if l_fname:
                                 if l_size == 0:
-                                    pass  # FIXME create an attribute for the filename/path
+                                    results.append({'values': l_fname, 'type': 'filename', 'to_ids': True,
+                                                    'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''})
                                 else:
                                     # file is a non empty sample, upload the sample later
                                     modified_files_mapping[l_md5] = l_fname
@@ -144,13 +145,14 @@ def process_analysis_json(analysis_json):
                         # )
                         yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''})
                     if 'http_command' in connection_section_connection:
-                        # print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
-                        #     connection_section_connection['http_command']['@method'],                    # comment
-                        #     connection_section_connection['http_command']['@url'])                       # url
-                        # )
-                        val = cleanup_url(connection_section_connection['http_command']['@url'])
-                        if val:
-                            yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': connection_section_connection['http_command']['@method']})
+                        for http_command in connection_section_connection['http_command']:
+                            # print('connection_section_connection HTTP COMMAND: {}\t{}'.format(
+                            #     connection_section_connection['http_command']['@method'],                    # comment
+                            #     connection_section_connection['http_command']['@url'])                       # url
+                            # )
+                            val = cleanup_url(http_command['@url'])
+                            if val:
+                                yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': http_command['@method']})
 
                     if 'http_header' in connection_section_connection:
                         for http_header in connection_section_connection['http_header']:
@@ -453,9 +455,9 @@ def cleanup_filepath(item):
         '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\UserCache.bin',
 
         '\\AppData\\Roaming\\Macromedia\\Flash Player\\macromedia.com\\support\\flashplayer\\sys\\settings.sol',
-        '\\AppData\\Roaming\Adobe\\Flash Player\\NativeCache\\',
+        '\\AppData\\Roaming\\Adobe\\Flash Player\\NativeCache\\',
         'C:\\Windows\\AppCompat\\Programs\\',
-        'C:\~'  # caused by temp file created by MS Office when opening malicious doc/xls/...
+        'C:\\~'  # caused by temp file created by MS Office when opening malicious doc/xls/...
     }
     if list_in_string(noise_substrings, item):
         return None
@@ -531,4 +533,3 @@ def introspection():
 def version():
     moduleinfo['config'] = moduleconfig
     return moduleinfo
-

From b0be965e576a1fb8e02d36bc4a34ecde5d967e4b Mon Sep 17 00:00:00 2001
From: SuRb0 <1809870+surbo@users.noreply.github.com>
Date: Thu, 30 Aug 2018 19:41:34 -0500
Subject: [PATCH 7/7] Update urlscan.py

Added hash to the search so you can take advantage of the new file down load function on urlscan.io.  You can use this to pivot on file hashes and find out domains that hosting the same malicious file.
---
 misp_modules/modules/expansion/urlscan.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/misp_modules/modules/expansion/urlscan.py b/misp_modules/modules/expansion/urlscan.py
index a0adc25..ecd1a50 100644
--- a/misp_modules/modules/expansion/urlscan.py
+++ b/misp_modules/modules/expansion/urlscan.py
@@ -22,8 +22,8 @@ moduleinfo = {
 moduleconfig = ['apikey']
 misperrors = {'error': 'Error'}
 mispattributes = {
-    'input': ['hostname', 'domain', 'url'],
-    'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link']
+    'input': ['hostname', 'domain', 'url', 'hash'],
+    'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link', 'hash']
 }
 
 
@@ -49,6 +49,8 @@ def handler(q=False):
         r['results'] += lookup_indicator(client, request['hostname'])
     if 'url' in request:
         r['results'] += lookup_indicator(client, request['url'])
+    f 'hash' in request:
+        r['results'] += lookup_indicator(client, request['hash'])
 
     # Return any errors generated from lookup to the UI and remove duplicates