mirror of https://github.com/MISP/misp-modules
270 lines
10 KiB
Python
270 lines
10 KiB
Python
import json
|
|
import requests
|
|
import logging
|
|
import sys
|
|
import time
|
|
|
|
log = logging.getLogger('urlscan')
|
|
log.setLevel(logging.DEBUG)
|
|
ch = logging.StreamHandler(sys.stdout)
|
|
ch.setLevel(logging.DEBUG)
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
ch.setFormatter(formatter)
|
|
log.addHandler(ch)
|
|
|
|
moduleinfo = {
|
|
'version': '0.1',
|
|
'author': 'Dave Johnson',
|
|
'description': 'An expansion module to query urlscan.io.',
|
|
'module-type': ['expansion'],
|
|
'name': 'URLScan Lookup',
|
|
'logo': 'urlscan.jpg',
|
|
'requirements': ['An access to the urlscan.io API'],
|
|
'features': 'This module takes a MISP attribute as input and queries urlscan.io with it.\n\nThe result of this query is then parsed and some data is mapped into MISP attributes in order to enrich the input attribute.',
|
|
'references': ['https://urlscan.io/'],
|
|
'input': 'A domain, hostname or url attribute.',
|
|
'output': 'MISP attributes mapped from the result of the query on urlscan.io.',
|
|
}
|
|
|
|
moduleconfig = ['apikey']
|
|
misperrors = {'error': 'Error'}
|
|
mispattributes = {
|
|
'input': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url'],
|
|
'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link', 'hash']
|
|
}
|
|
|
|
|
|
def handler(q=False):
|
|
if q is False:
|
|
return False
|
|
request = json.loads(q)
|
|
if not request.get('config') or not request['config'].get('apikey'):
|
|
misperrors['error'] = 'Urlscan apikey is missing'
|
|
return misperrors
|
|
client = urlscanAPI(request['config']['apikey'])
|
|
|
|
r = {'results': []}
|
|
|
|
if 'ip-src' in request:
|
|
r['results'] += lookup_indicator(client, request['ip-src'])
|
|
if 'ip-dst' in request:
|
|
r['results'] += lookup_indicator(client, request['ip-dst'])
|
|
if 'domain' in request:
|
|
r['results'] += lookup_indicator(client, request['domain'])
|
|
if 'hostname' in request:
|
|
r['results'] += lookup_indicator(client, request['hostname'])
|
|
if 'url' in request:
|
|
r['results'] += lookup_indicator(client, request['url'])
|
|
|
|
# Return any errors generated from lookup to the UI and remove duplicates
|
|
|
|
uniq = []
|
|
log.debug(r['results'])
|
|
for item in r['results']:
|
|
log.debug(item)
|
|
if 'error' in item:
|
|
misperrors['error'] = item['error']
|
|
return misperrors
|
|
if item not in uniq:
|
|
uniq.append(item)
|
|
r['results'] = uniq
|
|
return r
|
|
|
|
|
|
def lookup_indicator(client, query):
|
|
result = client.search_url(query)
|
|
log.debug('RESULTS: ' + json.dumps(result))
|
|
r = []
|
|
misp_comment = "{}: Enriched via the urlscan module".format(query)
|
|
|
|
# Determine if the page is reachable
|
|
for request in result['data']['requests']:
|
|
if request['response'].get('failed'):
|
|
if request['response']['failed']['errorText']:
|
|
log.debug('The page could not load')
|
|
r.append(
|
|
{'error': 'Domain could not be resolved: {}'.format(request['response']['failed']['errorText'])})
|
|
|
|
if result.get('page'):
|
|
if result['page'].get('domain'):
|
|
misp_val = result['page']['domain']
|
|
r.append({'types': 'domain',
|
|
'categories': ['Network activity'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result['page'].get('ip'):
|
|
misp_val = result['page']['ip']
|
|
r.append({'types': 'ip-dst',
|
|
'categories': ['Network activity'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result['page'].get('country'):
|
|
misp_val = 'country: ' + result['page']['country']
|
|
if result['page'].get('city'):
|
|
misp_val += ', city: ' + result['page']['city']
|
|
r.append({'types': 'text',
|
|
'categories': ['External analysis'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result['page'].get('asn'):
|
|
misp_val = result['page']['asn']
|
|
r.append({'types': 'AS', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment})
|
|
|
|
if result['page'].get('asnname'):
|
|
misp_val = result['page']['asnname']
|
|
r.append({'types': 'text',
|
|
'categories': ['External analysis'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result.get('stats'):
|
|
if result['stats'].get('malicious'):
|
|
log.debug('There is something in results > stats > malicious')
|
|
threat_list = set()
|
|
|
|
if 'matches' in result['meta']['processors']['gsb']['data']:
|
|
for item in result['meta']['processors']['gsb']['data']['matches']:
|
|
if item['threatType']:
|
|
threat_list.add(item['threatType'])
|
|
|
|
threat_list = ', '.join(threat_list)
|
|
log.debug('threat_list values are: \'' + threat_list + '\'')
|
|
|
|
if threat_list:
|
|
misp_val = '{} threat(s) detected'.format(threat_list)
|
|
r.append({'types': 'text',
|
|
'categories': ['External analysis'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result.get('lists'):
|
|
if result['lists'].get('urls'):
|
|
for url in result['lists']['urls']:
|
|
url = url.lower()
|
|
if 'office' in url:
|
|
misp_val = "Possible Office-themed phishing"
|
|
elif 'o365' in url or '0365' in url:
|
|
misp_val = "Possible O365-themed phishing"
|
|
elif 'microsoft' in url:
|
|
misp_val = "Possible Microsoft-themed phishing"
|
|
elif 'paypal' in url:
|
|
misp_val = "Possible PayPal-themed phishing"
|
|
elif 'onedrive' in url:
|
|
misp_val = "Possible OneDrive-themed phishing"
|
|
elif 'docusign' in url:
|
|
misp_val = "Possible DocuSign-themed phishing"
|
|
r.append({'types': 'text',
|
|
'categories': ['External analysis'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result.get('task'):
|
|
if result['task'].get('reportURL'):
|
|
misp_val = result['task']['reportURL']
|
|
r.append({'types': 'link',
|
|
'categories': ['External analysis'],
|
|
'values': misp_val,
|
|
'comment': misp_comment})
|
|
|
|
if result['task'].get('screenshotURL'):
|
|
image_url = result['task']['screenshotURL']
|
|
r.append({'types': 'link',
|
|
'categories': ['External analysis'],
|
|
'values': image_url,
|
|
'comment': misp_comment})
|
|
# ## TO DO ###
|
|
# ## Add ability to add an in-line screenshot of the target website into an attribute
|
|
# screenshot = requests.get(image_url).content
|
|
# r.append({'types': ['attachment'],
|
|
# 'categories': ['External analysis'],
|
|
# 'values': image_url,
|
|
# 'image': str(base64.b64encode(screenshot), 'utf-8'),
|
|
# 'comment': 'Screenshot of website'})
|
|
|
|
return r
|
|
|
|
|
|
def introspection():
|
|
return mispattributes
|
|
|
|
|
|
def version():
|
|
moduleinfo['config'] = moduleconfig
|
|
return moduleinfo
|
|
|
|
|
|
class urlscanAPI():
|
|
def __init__(self, apikey=None, uuid=None):
|
|
self.key = apikey
|
|
self.uuid = uuid
|
|
|
|
def request(self, query):
|
|
log.debug('From request function with the parameter: ' + query)
|
|
payload = {'url': query}
|
|
headers = {'API-Key': self.key,
|
|
'Content-Type': "application/json",
|
|
'Cache-Control': "no-cache"}
|
|
|
|
# Troubleshooting problems with initial search request
|
|
log.debug('PAYLOAD: ' + json.dumps(payload))
|
|
log.debug('HEADERS: ' + json.dumps(headers))
|
|
|
|
search_url_string = "https://urlscan.io/api/v1/scan/"
|
|
response = requests.request("POST",
|
|
search_url_string,
|
|
data=json.dumps(payload),
|
|
headers=headers)
|
|
|
|
# HTTP 400 - Bad Request
|
|
if response.status_code == 400:
|
|
raise Exception('HTTP Error 400 - Bad Request')
|
|
|
|
# HTTP 404 - Not found
|
|
if response.status_code == 404:
|
|
raise Exception('HTTP Error 404 - These are not the droids you\'re looking for')
|
|
|
|
# Any other status code
|
|
if response.status_code != 200:
|
|
raise Exception('HTTP Error ' + str(response.status_code))
|
|
|
|
if response.text:
|
|
response = json.loads(response.content.decode("utf-8"))
|
|
time.sleep(3)
|
|
self.uuid = response['uuid']
|
|
|
|
# Strings for to check for errors on the results page
|
|
# Null response string for any unavailable resources
|
|
null_response_string = '"status": 404'
|
|
# Redirect string accounting for 301/302/303/307/308 status codes
|
|
redirect_string = '"status": 30'
|
|
# Normal response string with 200 status code
|
|
normal_response_string = '"status": 200'
|
|
|
|
results_url_string = "https://urlscan.io/api/v1/result/" + self.uuid
|
|
log.debug('Results URL: ' + results_url_string)
|
|
|
|
# Need to wait for results to process and check if they are valid
|
|
tries = 10
|
|
while tries >= 0:
|
|
results = requests.request("GET", results_url_string)
|
|
log.debug('Made a GET request')
|
|
results = results.content.decode("utf-8")
|
|
# checking if there is a 404 status code and no available resources
|
|
if null_response_string in results and \
|
|
redirect_string not in results and \
|
|
normal_response_string not in results:
|
|
log.debug('Results not processed. Please check again later.')
|
|
time.sleep(3)
|
|
tries -= 1
|
|
else:
|
|
return json.loads(results)
|
|
|
|
raise Exception('Results contained a 404 status error and could not be processed.')
|
|
|
|
def search_url(self, query):
|
|
log.debug('From search_url with parameter: ' + query)
|
|
return self.request(query)
|