From 8d4e2025f760279eca24a640cd732291bb0c88af Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Fri, 3 Aug 2018 13:58:53 +0200 Subject: [PATCH 1/7] ta_import - bugfixes for TA 6.1 --- .../import_mod/threatanalyzer_import.py | 42 +++++++++++-------- 1 file changed, 25 insertions(+), 17 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index a6358ab..916628e 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -15,7 +15,7 @@ misperrors = {'error': 'Error'} userConfig = {} inputSource = ['file'] -moduleinfo = {'version': '0.7', 'author': 'Christophe Vandeplas', +moduleinfo = {'version': '0.8', 'author': 'Christophe Vandeplas', 'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files', 'module-type': ['import']} @@ -73,7 +73,7 @@ def handler(q=False): results.append({ 'values': current_sample_filename, 'data': base64.b64encode(file_data).decode(), - 'type': 'malware-sample', 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': True, 'comment': ''}) + 'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''}) if 'Analysis/analysis.json' in zip_file_name: with zf.open(zip_file_name, mode='r', pwd=None) as fp: @@ -88,7 +88,7 @@ def handler(q=False): results.append({ 'values': sample_filename, 'data': base64.b64encode(file_data).decode(), - 'type': 'malware-sample', 'categories': ['Artifacts dropped', 'Payload delivery'], 'to_ids': True, 'comment': ''}) + 'type': 'malware-sample', 'categories': ['Payload delivery', 'Artifacts dropped'], 'to_ids': True, 'comment': ''}) except Exception as e: # no 'sample' in archive, might be an url analysis, just ignore pass @@ -113,7 +113,15 @@ def process_analysis_json(analysis_json): for process in analysis_json['analysis']['processes']['process']: # print_json(process) if 'connection_section' in process and 'connection' in process['connection_section']: + # compensate for absurd behavior of the data format: if one entry = immediately the dict, if multiple entries = list containing dicts + # this will always create a list, even with only one item + if isinstance(process['connection_section']['connection'], dict): + process['connection_section']['connection'] = [process['connection_section']['connection']] + # iterate over each entry for connection_section_connection in process['connection_section']['connection']: + if 'name_to_ip' in connection_section_connection: # TA 6.1 data format + connection_section_connection['@remote_ip'] = connection_section_connection['name_to_ip']['@result_addresses'] + connection_section_connection['@remote_hostname'] = connection_section_connection['name_to_ip']['@request_name'] connection_section_connection['@remote_ip'] = cleanup_ip(connection_section_connection['@remote_ip']) connection_section_connection['@remote_hostname'] = cleanup_hostname(connection_section_connection['@remote_hostname']) @@ -124,7 +132,7 @@ def process_analysis_json(analysis_json): # connection_section_connection['@remote_hostname'], # connection_section_connection['@remote_ip']) # ) - yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''}) + yield({'values': val, 'type': 'domain|ip', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''}) elif connection_section_connection['@remote_ip']: # print("connection_section_connection ip-dst: {} IDS:yes".format( # connection_section_connection['@remote_ip']) @@ -136,20 +144,19 @@ def process_analysis_json(analysis_json): # ) yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''}) if 'http_command' in connection_section_connection: - for http_command in connection_section_connection['http_command']: - # print('connection_section_connection HTTP COMMAND: {}\t{}'.format( - # http_command['@method'], # comment - # http_command['@url']) # url - # ) - val = cleanup_url(http_command['@url']) - if val: - yield({'values': val, 'type': 'url', 'categories': 'Network activity', 'to_ids': True, 'comment': http_command['@method']}) + # print('connection_section_connection HTTP COMMAND: {}\t{}'.format( + # connection_section_connection['http_command']['@method'], # comment + # connection_section_connection['http_command']['@url']) # url + # ) + val = cleanup_url(connection_section_connection['http_command']['@url']) + if val: + yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': connection_section_connection['http_command']['@method']}) if 'http_header' in connection_section_connection: for http_header in connection_section_connection['http_header']: if 'User-Agent:' in http_header['@header']: val = http_header['@header'][len('User-Agent: '):] - yield({'values': val, 'type': 'user-agent', 'categories': 'Network activity', 'to_ids': False, 'comment': ''}) + yield({'values': val, 'type': 'user-agent', 'categories': ['Network activity'], 'to_ids': False, 'comment': ''}) elif 'Host:' in http_header['@header']: val = http_header['@header'][len('Host: '):] if ':' in val: @@ -162,7 +169,7 @@ def process_analysis_json(analysis_json): if val_hostname and val_port: val_combined = '{}|{}'.format(val_hostname, val_port) # print({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''}) - yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''}) + yield({'values': val_combined, 'type': 'hostname|port', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''}) elif val_ip and val_port: val_combined = '{}|{}'.format(val_ip, val_port) # print({'values': val_combined, 'type': 'ip-dst|port', 'to_ids': True, 'comment': ''}) @@ -207,7 +214,7 @@ def process_analysis_json(analysis_json): # networkoperation_section_dns_request_by_name['@request_name'], # networkoperation_section_dns_request_by_name['@result_addresses']) # ) - yield({'values': val, 'type': 'domain|ip', 'categories': 'Network activity', 'to_ids': True, 'comment': ''}) + yield({'values': val, 'type': 'domain|ip', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''}) elif networkoperation_section_dns_request_by_name['@request_name']: # print("networkoperation_section_dns_request_by_name hostname: {} IDS:yes".format( # networkoperation_section_dns_request_by_name['@request_name']) @@ -231,14 +238,14 @@ def process_analysis_json(analysis_json): # networkpacket_section_connect_to_computer['@remote_port']) # ) val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_ip']) - yield({'values': val_combined, 'type': 'hostname|ip', 'to_ids': True, 'comment': ''}) + yield({'values': val_combined, 'type': 'domain|ip', 'to_ids': True, 'comment': ''}) elif networkpacket_section_connect_to_computer['@remote_hostname']: # print("networkpacket_section_connect_to_computer hostname: {} IDS:yes COMMENT:port {}".format( # networkpacket_section_connect_to_computer['@remote_hostname'], # networkpacket_section_connect_to_computer['@remote_port']) # ) val_combined = "{}|{}".format(networkpacket_section_connect_to_computer['@remote_hostname'], networkpacket_section_connect_to_computer['@remote_port']) - yield({'values': val_combined, 'type': 'hostname|port', 'to_ids': True, 'comment': ''}) + yield({'values': val_combined, 'type': 'hostname|port', 'categories': ['Network activity'], 'to_ids': True, 'comment': ''}) elif networkpacket_section_connect_to_computer['@remote_ip']: # print("networkpacket_section_connect_to_computer ip-dst: {} IDS:yes COMMENT:port {}".format( # networkpacket_section_connect_to_computer['@remote_ip'], @@ -524,3 +531,4 @@ def introspection(): def version(): moduleinfo['config'] = moduleconfig return moduleinfo + From 61232ad93e5bda5dd98796a964c74b8a8d179d2d Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Wed, 8 Aug 2018 17:00:10 +0200 Subject: [PATCH 2/7] new: Expansion hover module to check spamhaus DBL for a domain name --- .../modules/expansion/dbl_spamhaus.py | 60 +++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 misp_modules/modules/expansion/dbl_spamhaus.py diff --git a/misp_modules/modules/expansion/dbl_spamhaus.py b/misp_modules/modules/expansion/dbl_spamhaus.py new file mode 100644 index 0000000..f78cb74 --- /dev/null +++ b/misp_modules/modules/expansion/dbl_spamhaus.py @@ -0,0 +1,60 @@ +import json +import datetime +from collections import defaultdict + +try: + import dns.resolver + resolver = dns.resolver.Resolver() + resolver.timeout = 0.2 + resolver.lifetime = 0.2 +except ModuleNotFoundError: + print("dnspython3 is missing, use 'pip install dnspython3' to install it.") + sys.exit(0) + +misperrors = {'error': 'Error'} +mispattributes = {'input': ['domain', 'domain|ip', 'hostname', 'hostname|port'], 'output': ['text']} +moduleinfo = {'version': '0.1', 'author': 'Christian Studer', + 'description': 'Checks Spamhaus DBL for a domain name.', + 'module-type': ['expansion', 'hover']} +moduleconfig = [] + +dbl = 'dbl.spamhaus.org' +dbl_mapping = {'127.0.1.2': 'spam domain', + '127.0.1.4': 'phish domain', + '127.0.1.5': 'malware domain', + '127.0.1.6': 'botnet C&C domain', + '127.0.1.102': 'abused legit spam', + '127.0.1.103': 'abused spammed redirector domain', + '127.0.1.104': 'abused legit phish', + '127.0.1.105': 'abused legit malware', + '127.0.1.106': 'abused legit botnet C&C', + '127.0.1.255': 'IP queries prohibited!'} + +def fetch_requested_value(request): + for attribute_type in mispattributes['input']: + if request.get(attribute_type): + return request[attribute_type].split('|')[0] + return None + +def handler(q=False): + if q is False: + return False + request = json.loads(q) + requested_value = fetch_requested_value(request) + if requested_value is None: + misperrors['error'] = "Unsupported attributes type" + return misperrors + query = "{}.{}".format(requested_value, dbl) + try: + query_result = resolver.query(query, 'A')[0] + result = "{} - {}".format(requested_value, dbl_mapping[str(query_result)]) + except Exception as e: + result = e + return {'results': [{'types': mispattributes.get('output'), 'values': result}]} + +def introspection(): + return mispattributes + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo From 90baa1dd5a1c931e02a1a91dda351dde7983c3b6 Mon Sep 17 00:00:00 2001 From: chrisr3d Date: Wed, 8 Aug 2018 17:05:22 +0200 Subject: [PATCH 3/7] add: Added DBL spamhaus module documentation and in expansion init file --- README.md | 1 + misp_modules/modules/expansion/__init__.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index cbf9550..ad4b098 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ For more information: [Extending MISP with Python modules](https://www.circl.lu/ * [countrycode](misp_modules/modules/expansion/countrycode.py) - a hover module to tell you what country a URL belongs to. * [CrowdStrike Falcon](misp_modules/modules/expansion/crowdstrike_falcon.py) - an expansion module to expand using CrowdStrike Falcon Intel Indicator API. * [CVE](misp_modules/modules/expansion/cve.py) - a hover module to give more information about a vulnerability (CVE). +* [DBL Spamhaus](misp_modules/modules/expansion/dbl_spamhaus.py) - a hover module to check Spamhaus DBL for a domain name. * [DNS](misp_modules/modules/expansion/dns.py) - a simple module to resolve MISP attributes like hostname and domain to expand IP addresses attributes. * [DomainTools](misp_modules/modules/expansion/domaintools.py) - a hover and expansion module to get information from [DomainTools](http://www.domaintools.com/) whois. * [EUPI](misp_modules/modules/expansion/eupi.py) - a hover and expansion module to get information about an URL from the [Phishing Initiative project](https://phishing-initiative.eu/?lang=en). diff --git a/misp_modules/modules/expansion/__init__.py b/misp_modules/modules/expansion/__init__.py index cda3af5..c6e81a7 100644 --- a/misp_modules/modules/expansion/__init__.py +++ b/misp_modules/modules/expansion/__init__.py @@ -1,3 +1,3 @@ from . import _vmray -__all__ = ['vmray_submit', 'asn_history', 'circl_passivedns', 'circl_passivessl', 'countrycode', 'cve', 'dns', 'domaintools', 'eupi', 'farsight_passivedns', 'ipasn', 'passivetotal', 'sourcecache', 'virustotal', 'whois', 'shodan', 'reversedns', 'geoip_country', 'wiki', 'iprep', 'threatminer', 'otx', 'threatcrowd', 'vulndb', 'crowdstrike_falcon', 'yara_syntax_validator', 'hashdd', 'onyphe', 'onyphe_full', 'rbl', 'xforceexchange', 'sigma_syntax_validator', 'stix2_pattern_syntax_validator', 'sigma_queries'] +__all__ = ['vmray_submit', 'asn_history', 'circl_passivedns', 'circl_passivessl', 'countrycode', 'cve', 'dns', 'domaintools', 'eupi', 'farsight_passivedns', 'ipasn', 'passivetotal', 'sourcecache', 'virustotal', 'whois', 'shodan', 'reversedns', 'geoip_country', 'wiki', 'iprep', 'threatminer', 'otx', 'threatcrowd', 'vulndb', 'crowdstrike_falcon', 'yara_syntax_validator', 'hashdd', 'onyphe', 'onyphe_full', 'rbl', 'xforceexchange', 'sigma_syntax_validator', 'stix2_pattern_syntax_validator', 'sigma_queries', 'dbl_spamhaus'] From bdbf5388934478d79fee2c5c3ae21b0642d3cd78 Mon Sep 17 00:00:00 2001 From: David J Date: Fri, 10 Aug 2018 16:00:01 -0500 Subject: [PATCH 4/7] Create urlscan.py --- misp_modules/modules/expansion/urlscan.py | 269 ++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 misp_modules/modules/expansion/urlscan.py diff --git a/misp_modules/modules/expansion/urlscan.py b/misp_modules/modules/expansion/urlscan.py new file mode 100644 index 0000000..8f4067c --- /dev/null +++ b/misp_modules/modules/expansion/urlscan.py @@ -0,0 +1,269 @@ +import json +import requests +import logging +import sys +import time +# Need base64 if encoding data for attachments, but disabled for now +# import base64 + +log = logging.getLogger('urlscan') +log.setLevel(logging.DEBUG) +ch = logging.StreamHandler(sys.stdout) +ch.setLevel(logging.DEBUG) +formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') +ch.setFormatter(formatter) +log.addHandler(ch) + +moduleinfo = { + 'version': '0.1', + 'author': 'Dave Johnson', + 'description': 'Module to query urlscan.io', + 'module-type': ['expansion'] + } + +moduleconfig = ['apikey'] +misperrors = {'error': 'Error'} +mispattributes = { + 'input': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url'], + 'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link'] + } + +def handler(q=False): + if q is False: + return False + request = json.loads(q) + if (request.get('config')): + if (request['config'].get('apikey') is None): + misperrors['error'] = 'urlscan apikey is missing' + return misperrors + client = urlscanAPI(request['config']['apikey']) + + r = {'results': []} + + if 'ip-src' in request: + r['results'] += lookup_indicator(client, request['ip-src']) + if 'ip-dst' in request: + r['results'] += lookup_indicator(client, request['ip-dst']) + if 'domain' in request: + r['results'] += lookup_indicator(client, request['domain']) + if 'hostname' in request: + r['results'] += lookup_indicator(client, request['hostname']) + if 'url' in request: + r['results'] += lookup_indicator(client, request['url']) + + uniq = [] + for item in r['results']: + if item not in uniq: + uniq.append(item) + r['results'] = uniq + return r + + +def lookup_indicator(client, query): + result = client.search_url(query) + log.debug('RESULTS: ' + json.dumps(result)) + r = [] + if result.get('page'): + if result['page'].get('domain'): + misp_val = result['page']['domain'] + misp_comment = "Domain associated with {} (source: urlscan.io)".format(query) + r.append({'types': 'domain', + 'categories': ['Network activity'], + 'values': misp_val, + 'comment': misp_comment}) + + if result['page'].get('ip'): + misp_val = result['page']['ip'] + misp_comment = "IP associated with {} (source: urlscan.io)".format(query) + r.append({'types': 'ip-dst', + 'categories': ['Network activity'], + 'values': misp_val, + 'comment': misp_comment}) + + if result['page'].get('country'): + misp_val = 'Country: ' + result['page']['country'] + if result['page'].get('city'): + misp_val += ', City: ' + result['page']['city'] + misp_comment = "Location associated with {} (source: urlscan.io)".format(query) + r.append({'types': 'text', + 'categories': ['External analysis'], + 'values': misp_val, + 'comment': misp_comment}) + + if result['page'].get('asn'): + misp_val = result['page']['asn'] + misp_comment = "ASN associated with {} (source: urlscan.io)".format(query) + r.append({'types': 'AS', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment}) + + if result['page'].get('asnname'): + misp_val = result['page']['asnname'] + misp_comment = "ASN name associated with {} (source: urlscan.io)".format(query) + r.append({'types': 'text', + 'categories': ['External analysis'], + 'values': misp_val, + 'comment': misp_comment}) + + if result.get('stats'): + if result['stats'].get('malicious'): + log.debug('There is something in results > stats > malicious') + threat_list = set() + + if 'matches' in result['meta']['processors']['gsb']['data']: + for item in result['meta']['processors']['gsb']['data']['matches']: + if item['threatType']: + threat_list.add(item['threatType']) + + threat_list = ', '.join(threat_list) + log.debug('threat_list values are: \'' + threat_list + '\'') + + if threat_list: + misp_val = '{} threat(s) detected'.format(threat_list) + misp_comment = '{} malicious indicator(s) were present on ' \ + '{} (source: urlscan.io)'.format(result['stats']['malicious'], query, threat_list) + r.append({'types': 'text', + 'categories': ['External analysis'], + 'values': misp_val, + 'comment': misp_comment}) + + if result.get('lists'): + if result['lists'].get('urls'): + for url in result['lists']['urls']: + url = url.lower() + if 'office' in url: + misp_val = 'Possible Microsoft Office themed phishing page' + misp_comment = 'There was resource containing an \'Office\' string in the URL.' + elif 'o365' in url or '0365' in url: + misp_val = 'Possible Microsoft O365 themed phishing page' + misp_comment = 'There was resource containing an \'O365\' string in the URL.' + elif 'microsoft' in url: + misp_val = 'Possible Microsoft themed phishing page' + misp_comment = 'There was resource containing an \'Office\' string in the URL.' + elif 'paypal' in url: + misp_val = 'Possible PayPal themed phishing page' + misp_comment = 'There was resource containing a \'PayPal\' string in the URL.' + elif 'onedrive' in url: + misp_val = 'Possible OneDrive themed phishing page' + misp_comment = 'There was resource containing a \'OneDrive\' string in the URL.' + elif 'docusign' in url: + misp_val = 'Possible DocuSign themed phishing page' + misp_comment = 'There was resource containing a \'DocuSign\' string in the URL' + r.append({'types': 'text', + 'categories': ['External analysis'], + 'values': misp_val, + 'comment': misp_comment}) + + if result.get('task'): + if result['task'].get('reportURL'): + misp_val = result['task']['reportURL'] + misp_comment = 'Link to full report (source: urlscan.io)' + r.append({'types': 'link', + 'categories': ['External analysis'], + 'values': misp_val, + 'comment': misp_comment}) + + if result['task'].get('screenshotURL'): + image_url = result['task']['screenshotURL'] + misp_comment = 'Link to screenshot (source: urlscan.io)' + r.append({'types': 'link', + 'categories': ['External analysis'], + 'values': image_url, + 'comment': misp_comment}) + ### TO DO ### + ### Add ability to add an in-line screenshot of the target website into an attribute + # screenshot = requests.get(image_url).content + # r.append({'types': ['attachment'], + # 'categories': ['External analysis'], + # 'values': image_url, + # 'image': str(base64.b64encode(screenshot), 'utf-8'), + # 'comment': 'Screenshot of website'}) + + if result['task'].get('domURL'): + misp_val = result['task']['domURL'] + misp_comment = 'Link to DOM (source: urlscan.io)' + r.append({'types': 'link', + 'categories': ['External analysis'], + 'values': misp_val, + 'comment': misp_comment}) + + return r + + +def introspection(): + return mispattributes + + +def version(): + moduleinfo['config'] = moduleconfig + return moduleinfo + + +class urlscanAPI(): + def __init__(self, apikey=None, uuid=None): + self.key = apikey + self.uuid = uuid + + def request(self, query): + log.debug('From request function with the parameter: ' + query) + payload = {'url': query} + headers = {'API-Key': self.key, + 'Content-Type': "application/json", + 'Cache-Control': "no-cache"} + + # Troubleshooting problems with initial search request + log.debug('PAYLOAD: ' + json.dumps(payload)) + log.debug('HEADERS: ' + json.dumps(headers)) + + search_url_string = "https://urlscan.io/api/v1/scan/" + response = requests.request("POST", + search_url_string, + data=json.dumps(payload), + headers=headers) + + # HTTP 400 - Bad Request + if response.status_code == 400: + raise Exception('HTTP Error 400 - Bad Request') + + # HTTP 404 - Not found + if response.status_code == 404: + raise Exception('HTTP Error 404 - These are not the droids you\'re looking for') + + # Any other status code + if response.status_code != 200: + raise Exception('HTTP Error ' + str(response.status_code)) + + if response.text: + response = json.loads(response.content.decode("utf-8")) + time.sleep(3) + self.uuid = response['uuid'] + + # Strings for to check for errors on the results page + # Null response string for any unavailable resources + null_response_string = '"status": 404' + # Redirect string accounting for 301/302/303/307/308 status codes + redirect_string = '"status": 30' + # Normal response string with 200 status code + normal_response_string = '"status": 200' + + results_url_string = "https://urlscan.io/api/v1/result/" + self.uuid + log.debug('Results URL: ' + results_url_string) + + # Need to wait for results to process and check if they are valid + tries = 10 + while tries >= 0: + results = requests.request("GET", results_url_string) + log.debug('Made a GET request') + results = results.content.decode("utf-8") + # checking if there is a 404 status code and no available resources + if null_response_string in results and \ + redirect_string not in results and \ + normal_response_string not in results: + log.debug('Results not processed. Please check again later.') + time.sleep(3) + tries -= 1 + else: + return json.loads(results) + raise Exception('Results contained a 404 status error and could not be processed.') + + def search_url(self, query): + log.debug('From search_url with parameter: ' + query) + return self.request(query) From a697f653822b893d95a4142a4d88cbf66821f208 Mon Sep 17 00:00:00 2001 From: David J Date: Tue, 14 Aug 2018 10:51:15 -0500 Subject: [PATCH 5/7] Add error handling for DNS failures, reduce imports, and simplify misp_comments --- misp_modules/modules/expansion/urlscan.py | 78 +++++++++++------------ 1 file changed, 36 insertions(+), 42 deletions(-) diff --git a/misp_modules/modules/expansion/urlscan.py b/misp_modules/modules/expansion/urlscan.py index 8f4067c..a0adc25 100644 --- a/misp_modules/modules/expansion/urlscan.py +++ b/misp_modules/modules/expansion/urlscan.py @@ -3,8 +3,6 @@ import requests import logging import sys import time -# Need base64 if encoding data for attachments, but disabled for now -# import base64 log = logging.getLogger('urlscan') log.setLevel(logging.DEBUG) @@ -15,18 +13,19 @@ ch.setFormatter(formatter) log.addHandler(ch) moduleinfo = { - 'version': '0.1', - 'author': 'Dave Johnson', - 'description': 'Module to query urlscan.io', - 'module-type': ['expansion'] - } + 'version': '0.1', + 'author': 'Dave Johnson', + 'description': 'Module to query urlscan.io', + 'module-type': ['expansion'] +} moduleconfig = ['apikey'] misperrors = {'error': 'Error'} mispattributes = { - 'input': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url'], - 'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link'] - } + 'input': ['hostname', 'domain', 'url'], + 'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link'] +} + def handler(q=False): if q is False: @@ -51,8 +50,15 @@ def handler(q=False): if 'url' in request: r['results'] += lookup_indicator(client, request['url']) + # Return any errors generated from lookup to the UI and remove duplicates + uniq = [] + log.debug(r['results']) for item in r['results']: + log.debug(item) + if 'error' in item: + misperrors['error'] = item['error'] + return misperrors if item not in uniq: uniq.append(item) r['results'] = uniq @@ -63,10 +69,19 @@ def lookup_indicator(client, query): result = client.search_url(query) log.debug('RESULTS: ' + json.dumps(result)) r = [] + misp_comment = "{}: Enriched via the urlscan module".format(query) + + # Determine if the page is reachable + for request in result['data']['requests']: + if request['response'].get('failed'): + if request['response']['failed']['errorText']: + log.debug('The page could not load') + r.append( + {'error': 'Domain could not be resolved: {}'.format(request['response']['failed']['errorText'])}) + if result.get('page'): if result['page'].get('domain'): misp_val = result['page']['domain'] - misp_comment = "Domain associated with {} (source: urlscan.io)".format(query) r.append({'types': 'domain', 'categories': ['Network activity'], 'values': misp_val, @@ -74,17 +89,15 @@ def lookup_indicator(client, query): if result['page'].get('ip'): misp_val = result['page']['ip'] - misp_comment = "IP associated with {} (source: urlscan.io)".format(query) r.append({'types': 'ip-dst', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment}) if result['page'].get('country'): - misp_val = 'Country: ' + result['page']['country'] + misp_val = 'country: ' + result['page']['country'] if result['page'].get('city'): - misp_val += ', City: ' + result['page']['city'] - misp_comment = "Location associated with {} (source: urlscan.io)".format(query) + misp_val += ', city: ' + result['page']['city'] r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, @@ -92,12 +105,10 @@ def lookup_indicator(client, query): if result['page'].get('asn'): misp_val = result['page']['asn'] - misp_comment = "ASN associated with {} (source: urlscan.io)".format(query) - r.append({'types': 'AS', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment}) + r.append({'types': 'AS', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result['page'].get('asnname'): misp_val = result['page']['asnname'] - misp_comment = "ASN name associated with {} (source: urlscan.io)".format(query) r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, @@ -118,8 +129,6 @@ def lookup_indicator(client, query): if threat_list: misp_val = '{} threat(s) detected'.format(threat_list) - misp_comment = '{} malicious indicator(s) were present on ' \ - '{} (source: urlscan.io)'.format(result['stats']['malicious'], query, threat_list) r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, @@ -130,23 +139,17 @@ def lookup_indicator(client, query): for url in result['lists']['urls']: url = url.lower() if 'office' in url: - misp_val = 'Possible Microsoft Office themed phishing page' - misp_comment = 'There was resource containing an \'Office\' string in the URL.' + misp_val = "Possible Office-themed phishing" elif 'o365' in url or '0365' in url: - misp_val = 'Possible Microsoft O365 themed phishing page' - misp_comment = 'There was resource containing an \'O365\' string in the URL.' + misp_val = "Possible O365-themed phishing" elif 'microsoft' in url: - misp_val = 'Possible Microsoft themed phishing page' - misp_comment = 'There was resource containing an \'Office\' string in the URL.' + misp_val = "Possible Microsoft-themed phishing" elif 'paypal' in url: - misp_val = 'Possible PayPal themed phishing page' - misp_comment = 'There was resource containing a \'PayPal\' string in the URL.' + misp_val = "Possible PayPal-themed phishing" elif 'onedrive' in url: - misp_val = 'Possible OneDrive themed phishing page' - misp_comment = 'There was resource containing a \'OneDrive\' string in the URL.' + misp_val = "Possible OneDrive-themed phishing" elif 'docusign' in url: - misp_val = 'Possible DocuSign themed phishing page' - misp_comment = 'There was resource containing a \'DocuSign\' string in the URL' + misp_val = "Possible DocuSign-themed phishing" r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, @@ -155,7 +158,6 @@ def lookup_indicator(client, query): if result.get('task'): if result['task'].get('reportURL'): misp_val = result['task']['reportURL'] - misp_comment = 'Link to full report (source: urlscan.io)' r.append({'types': 'link', 'categories': ['External analysis'], 'values': misp_val, @@ -163,7 +165,6 @@ def lookup_indicator(client, query): if result['task'].get('screenshotURL'): image_url = result['task']['screenshotURL'] - misp_comment = 'Link to screenshot (source: urlscan.io)' r.append({'types': 'link', 'categories': ['External analysis'], 'values': image_url, @@ -177,14 +178,6 @@ def lookup_indicator(client, query): # 'image': str(base64.b64encode(screenshot), 'utf-8'), # 'comment': 'Screenshot of website'}) - if result['task'].get('domURL'): - misp_val = result['task']['domURL'] - misp_comment = 'Link to DOM (source: urlscan.io)' - r.append({'types': 'link', - 'categories': ['External analysis'], - 'values': misp_val, - 'comment': misp_comment}) - return r @@ -262,6 +255,7 @@ class urlscanAPI(): tries -= 1 else: return json.loads(results) + raise Exception('Results contained a 404 status error and could not be processed.') def search_url(self, query): From 7deeb95820e1e4099208496025807e6ae6ef1164 Mon Sep 17 00:00:00 2001 From: Christophe Vandeplas Date: Tue, 21 Aug 2018 11:13:08 +0200 Subject: [PATCH 6/7] fix: ta_import - bugfixes --- .../import_mod/threatanalyzer_import.py | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/misp_modules/modules/import_mod/threatanalyzer_import.py b/misp_modules/modules/import_mod/threatanalyzer_import.py index 916628e..2e3a507 100755 --- a/misp_modules/modules/import_mod/threatanalyzer_import.py +++ b/misp_modules/modules/import_mod/threatanalyzer_import.py @@ -15,7 +15,7 @@ misperrors = {'error': 'Error'} userConfig = {} inputSource = ['file'] -moduleinfo = {'version': '0.8', 'author': 'Christophe Vandeplas', +moduleinfo = {'version': '0.9', 'author': 'Christophe Vandeplas', 'description': 'Import for ThreatAnalyzer archive.zip/analysis.json files', 'module-type': ['import']} @@ -45,7 +45,7 @@ def handler(q=False): if re.match(r"Analysis/proc_\d+/modified_files/mapping\.log", zip_file_name): with zf.open(zip_file_name, mode='r', pwd=None) as fp: file_data = fp.read() - for line in file_data.decode().split('\n'): + for line in file_data.decode("utf-8", 'ignore').split('\n'): if not line: continue if line.count('|') == 3: @@ -55,7 +55,8 @@ def handler(q=False): l_fname = cleanup_filepath(l_fname) if l_fname: if l_size == 0: - pass # FIXME create an attribute for the filename/path + results.append({'values': l_fname, 'type': 'filename', 'to_ids': True, + 'categories': ['Artifacts dropped', 'Payload delivery'], 'comment': ''}) else: # file is a non empty sample, upload the sample later modified_files_mapping[l_md5] = l_fname @@ -144,13 +145,14 @@ def process_analysis_json(analysis_json): # ) yield({'values': connection_section_connection['@remote_hostname'], 'type': 'hostname', 'to_ids': True, 'comment': ''}) if 'http_command' in connection_section_connection: - # print('connection_section_connection HTTP COMMAND: {}\t{}'.format( - # connection_section_connection['http_command']['@method'], # comment - # connection_section_connection['http_command']['@url']) # url - # ) - val = cleanup_url(connection_section_connection['http_command']['@url']) - if val: - yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': connection_section_connection['http_command']['@method']}) + for http_command in connection_section_connection['http_command']: + # print('connection_section_connection HTTP COMMAND: {}\t{}'.format( + # connection_section_connection['http_command']['@method'], # comment + # connection_section_connection['http_command']['@url']) # url + # ) + val = cleanup_url(http_command['@url']) + if val: + yield({'values': val, 'type': 'url', 'categories': ['Network activity'], 'to_ids': True, 'comment': http_command['@method']}) if 'http_header' in connection_section_connection: for http_header in connection_section_connection['http_header']: @@ -453,9 +455,9 @@ def cleanup_filepath(item): '\\AppData\\Roaming\\Adobe\\Acrobat\\9.0\\UserCache.bin', '\\AppData\\Roaming\\Macromedia\\Flash Player\\macromedia.com\\support\\flashplayer\\sys\\settings.sol', - '\\AppData\\Roaming\Adobe\\Flash Player\\NativeCache\\', + '\\AppData\\Roaming\\Adobe\\Flash Player\\NativeCache\\', 'C:\\Windows\\AppCompat\\Programs\\', - 'C:\~' # caused by temp file created by MS Office when opening malicious doc/xls/... + 'C:\\~' # caused by temp file created by MS Office when opening malicious doc/xls/... } if list_in_string(noise_substrings, item): return None @@ -531,4 +533,3 @@ def introspection(): def version(): moduleinfo['config'] = moduleconfig return moduleinfo - From b0be965e576a1fb8e02d36bc4a34ecde5d967e4b Mon Sep 17 00:00:00 2001 From: SuRb0 <1809870+surbo@users.noreply.github.com> Date: Thu, 30 Aug 2018 19:41:34 -0500 Subject: [PATCH 7/7] Update urlscan.py Added hash to the search so you can take advantage of the new file down load function on urlscan.io. You can use this to pivot on file hashes and find out domains that hosting the same malicious file. --- misp_modules/modules/expansion/urlscan.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/misp_modules/modules/expansion/urlscan.py b/misp_modules/modules/expansion/urlscan.py index a0adc25..ecd1a50 100644 --- a/misp_modules/modules/expansion/urlscan.py +++ b/misp_modules/modules/expansion/urlscan.py @@ -22,8 +22,8 @@ moduleinfo = { moduleconfig = ['apikey'] misperrors = {'error': 'Error'} mispattributes = { - 'input': ['hostname', 'domain', 'url'], - 'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link'] + 'input': ['hostname', 'domain', 'url', 'hash'], + 'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link', 'hash'] } @@ -49,6 +49,8 @@ def handler(q=False): r['results'] += lookup_indicator(client, request['hostname']) if 'url' in request: r['results'] += lookup_indicator(client, request['url']) + f 'hash' in request: + r['results'] += lookup_indicator(client, request['hash']) # Return any errors generated from lookup to the UI and remove duplicates