2018-08-10 23:00:01 +02:00
|
|
|
import json
|
|
|
|
import requests
|
|
|
|
import logging
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
|
|
|
|
log = logging.getLogger('urlscan')
|
|
|
|
log.setLevel(logging.DEBUG)
|
|
|
|
ch = logging.StreamHandler(sys.stdout)
|
|
|
|
ch.setLevel(logging.DEBUG)
|
|
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
|
ch.setFormatter(formatter)
|
|
|
|
log.addHandler(ch)
|
|
|
|
|
|
|
|
moduleinfo = {
|
2018-08-14 17:51:15 +02:00
|
|
|
'version': '0.1',
|
|
|
|
'author': 'Dave Johnson',
|
|
|
|
'description': 'Module to query urlscan.io',
|
|
|
|
'module-type': ['expansion']
|
|
|
|
}
|
2018-08-10 23:00:01 +02:00
|
|
|
|
|
|
|
moduleconfig = ['apikey']
|
|
|
|
misperrors = {'error': 'Error'}
|
|
|
|
mispattributes = {
|
2018-08-31 02:41:34 +02:00
|
|
|
'input': ['hostname', 'domain', 'url', 'hash'],
|
|
|
|
'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link', 'hash']
|
2018-08-14 17:51:15 +02:00
|
|
|
}
|
|
|
|
|
2018-08-10 23:00:01 +02:00
|
|
|
|
|
|
|
def handler(q=False):
|
|
|
|
if q is False:
|
|
|
|
return False
|
|
|
|
request = json.loads(q)
|
|
|
|
if (request.get('config')):
|
|
|
|
if (request['config'].get('apikey') is None):
|
|
|
|
misperrors['error'] = 'urlscan apikey is missing'
|
|
|
|
return misperrors
|
|
|
|
client = urlscanAPI(request['config']['apikey'])
|
|
|
|
|
|
|
|
r = {'results': []}
|
|
|
|
|
|
|
|
if 'ip-src' in request:
|
|
|
|
r['results'] += lookup_indicator(client, request['ip-src'])
|
|
|
|
if 'ip-dst' in request:
|
|
|
|
r['results'] += lookup_indicator(client, request['ip-dst'])
|
|
|
|
if 'domain' in request:
|
|
|
|
r['results'] += lookup_indicator(client, request['domain'])
|
|
|
|
if 'hostname' in request:
|
|
|
|
r['results'] += lookup_indicator(client, request['hostname'])
|
|
|
|
if 'url' in request:
|
|
|
|
r['results'] += lookup_indicator(client, request['url'])
|
2018-09-06 14:05:55 +02:00
|
|
|
if 'hash' in request:
|
2018-08-31 02:41:34 +02:00
|
|
|
r['results'] += lookup_indicator(client, request['hash'])
|
2018-08-10 23:00:01 +02:00
|
|
|
|
2018-08-14 17:51:15 +02:00
|
|
|
# Return any errors generated from lookup to the UI and remove duplicates
|
|
|
|
|
2018-08-10 23:00:01 +02:00
|
|
|
uniq = []
|
2018-08-14 17:51:15 +02:00
|
|
|
log.debug(r['results'])
|
2018-08-10 23:00:01 +02:00
|
|
|
for item in r['results']:
|
2018-08-14 17:51:15 +02:00
|
|
|
log.debug(item)
|
|
|
|
if 'error' in item:
|
|
|
|
misperrors['error'] = item['error']
|
|
|
|
return misperrors
|
2018-08-10 23:00:01 +02:00
|
|
|
if item not in uniq:
|
|
|
|
uniq.append(item)
|
|
|
|
r['results'] = uniq
|
|
|
|
return r
|
|
|
|
|
|
|
|
|
|
|
|
def lookup_indicator(client, query):
|
|
|
|
result = client.search_url(query)
|
|
|
|
log.debug('RESULTS: ' + json.dumps(result))
|
|
|
|
r = []
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_comment = "{}: Enriched via the urlscan module".format(query)
|
|
|
|
|
|
|
|
# Determine if the page is reachable
|
|
|
|
for request in result['data']['requests']:
|
|
|
|
if request['response'].get('failed'):
|
|
|
|
if request['response']['failed']['errorText']:
|
|
|
|
log.debug('The page could not load')
|
|
|
|
r.append(
|
|
|
|
{'error': 'Domain could not be resolved: {}'.format(request['response']['failed']['errorText'])})
|
|
|
|
|
2018-08-10 23:00:01 +02:00
|
|
|
if result.get('page'):
|
|
|
|
if result['page'].get('domain'):
|
|
|
|
misp_val = result['page']['domain']
|
|
|
|
r.append({'types': 'domain',
|
|
|
|
'categories': ['Network activity'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result['page'].get('ip'):
|
|
|
|
misp_val = result['page']['ip']
|
|
|
|
r.append({'types': 'ip-dst',
|
|
|
|
'categories': ['Network activity'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result['page'].get('country'):
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = 'country: ' + result['page']['country']
|
2018-08-10 23:00:01 +02:00
|
|
|
if result['page'].get('city'):
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val += ', city: ' + result['page']['city']
|
2018-08-10 23:00:01 +02:00
|
|
|
r.append({'types': 'text',
|
|
|
|
'categories': ['External analysis'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result['page'].get('asn'):
|
|
|
|
misp_val = result['page']['asn']
|
2018-08-14 17:51:15 +02:00
|
|
|
r.append({'types': 'AS', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment})
|
2018-08-10 23:00:01 +02:00
|
|
|
|
|
|
|
if result['page'].get('asnname'):
|
|
|
|
misp_val = result['page']['asnname']
|
|
|
|
r.append({'types': 'text',
|
|
|
|
'categories': ['External analysis'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result.get('stats'):
|
|
|
|
if result['stats'].get('malicious'):
|
|
|
|
log.debug('There is something in results > stats > malicious')
|
|
|
|
threat_list = set()
|
|
|
|
|
|
|
|
if 'matches' in result['meta']['processors']['gsb']['data']:
|
|
|
|
for item in result['meta']['processors']['gsb']['data']['matches']:
|
|
|
|
if item['threatType']:
|
|
|
|
threat_list.add(item['threatType'])
|
|
|
|
|
|
|
|
threat_list = ', '.join(threat_list)
|
|
|
|
log.debug('threat_list values are: \'' + threat_list + '\'')
|
|
|
|
|
|
|
|
if threat_list:
|
|
|
|
misp_val = '{} threat(s) detected'.format(threat_list)
|
|
|
|
r.append({'types': 'text',
|
|
|
|
'categories': ['External analysis'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result.get('lists'):
|
|
|
|
if result['lists'].get('urls'):
|
|
|
|
for url in result['lists']['urls']:
|
|
|
|
url = url.lower()
|
|
|
|
if 'office' in url:
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = "Possible Office-themed phishing"
|
2018-08-10 23:00:01 +02:00
|
|
|
elif 'o365' in url or '0365' in url:
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = "Possible O365-themed phishing"
|
2018-08-10 23:00:01 +02:00
|
|
|
elif 'microsoft' in url:
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = "Possible Microsoft-themed phishing"
|
2018-08-10 23:00:01 +02:00
|
|
|
elif 'paypal' in url:
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = "Possible PayPal-themed phishing"
|
2018-08-10 23:00:01 +02:00
|
|
|
elif 'onedrive' in url:
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = "Possible OneDrive-themed phishing"
|
2018-08-10 23:00:01 +02:00
|
|
|
elif 'docusign' in url:
|
2018-08-14 17:51:15 +02:00
|
|
|
misp_val = "Possible DocuSign-themed phishing"
|
2018-08-10 23:00:01 +02:00
|
|
|
r.append({'types': 'text',
|
|
|
|
'categories': ['External analysis'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result.get('task'):
|
|
|
|
if result['task'].get('reportURL'):
|
|
|
|
misp_val = result['task']['reportURL']
|
|
|
|
r.append({'types': 'link',
|
|
|
|
'categories': ['External analysis'],
|
|
|
|
'values': misp_val,
|
|
|
|
'comment': misp_comment})
|
|
|
|
|
|
|
|
if result['task'].get('screenshotURL'):
|
|
|
|
image_url = result['task']['screenshotURL']
|
|
|
|
r.append({'types': 'link',
|
|
|
|
'categories': ['External analysis'],
|
|
|
|
'values': image_url,
|
|
|
|
'comment': misp_comment})
|
|
|
|
### TO DO ###
|
|
|
|
### Add ability to add an in-line screenshot of the target website into an attribute
|
|
|
|
# screenshot = requests.get(image_url).content
|
|
|
|
# r.append({'types': ['attachment'],
|
|
|
|
# 'categories': ['External analysis'],
|
|
|
|
# 'values': image_url,
|
|
|
|
# 'image': str(base64.b64encode(screenshot), 'utf-8'),
|
|
|
|
# 'comment': 'Screenshot of website'})
|
|
|
|
|
|
|
|
return r
|
|
|
|
|
|
|
|
|
|
|
|
def introspection():
|
|
|
|
return mispattributes
|
|
|
|
|
|
|
|
|
|
|
|
def version():
|
|
|
|
moduleinfo['config'] = moduleconfig
|
|
|
|
return moduleinfo
|
|
|
|
|
|
|
|
|
|
|
|
class urlscanAPI():
|
|
|
|
def __init__(self, apikey=None, uuid=None):
|
|
|
|
self.key = apikey
|
|
|
|
self.uuid = uuid
|
|
|
|
|
|
|
|
def request(self, query):
|
|
|
|
log.debug('From request function with the parameter: ' + query)
|
|
|
|
payload = {'url': query}
|
|
|
|
headers = {'API-Key': self.key,
|
|
|
|
'Content-Type': "application/json",
|
|
|
|
'Cache-Control': "no-cache"}
|
|
|
|
|
|
|
|
# Troubleshooting problems with initial search request
|
|
|
|
log.debug('PAYLOAD: ' + json.dumps(payload))
|
|
|
|
log.debug('HEADERS: ' + json.dumps(headers))
|
|
|
|
|
|
|
|
search_url_string = "https://urlscan.io/api/v1/scan/"
|
|
|
|
response = requests.request("POST",
|
|
|
|
search_url_string,
|
|
|
|
data=json.dumps(payload),
|
|
|
|
headers=headers)
|
|
|
|
|
|
|
|
# HTTP 400 - Bad Request
|
|
|
|
if response.status_code == 400:
|
|
|
|
raise Exception('HTTP Error 400 - Bad Request')
|
|
|
|
|
|
|
|
# HTTP 404 - Not found
|
|
|
|
if response.status_code == 404:
|
|
|
|
raise Exception('HTTP Error 404 - These are not the droids you\'re looking for')
|
|
|
|
|
|
|
|
# Any other status code
|
|
|
|
if response.status_code != 200:
|
|
|
|
raise Exception('HTTP Error ' + str(response.status_code))
|
|
|
|
|
|
|
|
if response.text:
|
|
|
|
response = json.loads(response.content.decode("utf-8"))
|
|
|
|
time.sleep(3)
|
|
|
|
self.uuid = response['uuid']
|
|
|
|
|
|
|
|
# Strings for to check for errors on the results page
|
|
|
|
# Null response string for any unavailable resources
|
|
|
|
null_response_string = '"status": 404'
|
|
|
|
# Redirect string accounting for 301/302/303/307/308 status codes
|
|
|
|
redirect_string = '"status": 30'
|
|
|
|
# Normal response string with 200 status code
|
|
|
|
normal_response_string = '"status": 200'
|
|
|
|
|
|
|
|
results_url_string = "https://urlscan.io/api/v1/result/" + self.uuid
|
|
|
|
log.debug('Results URL: ' + results_url_string)
|
|
|
|
|
|
|
|
# Need to wait for results to process and check if they are valid
|
|
|
|
tries = 10
|
|
|
|
while tries >= 0:
|
|
|
|
results = requests.request("GET", results_url_string)
|
|
|
|
log.debug('Made a GET request')
|
|
|
|
results = results.content.decode("utf-8")
|
|
|
|
# checking if there is a 404 status code and no available resources
|
|
|
|
if null_response_string in results and \
|
|
|
|
redirect_string not in results and \
|
|
|
|
normal_response_string not in results:
|
|
|
|
log.debug('Results not processed. Please check again later.')
|
|
|
|
time.sleep(3)
|
|
|
|
tries -= 1
|
|
|
|
else:
|
|
|
|
return json.loads(results)
|
2018-08-14 17:51:15 +02:00
|
|
|
|
2018-08-10 23:00:01 +02:00
|
|
|
raise Exception('Results contained a 404 status error and could not be processed.')
|
|
|
|
|
|
|
|
def search_url(self, query):
|
|
|
|
log.debug('From search_url with parameter: ' + query)
|
|
|
|
return self.request(query)
|