import json import requests import logging import sys import time log = logging.getLogger('urlscan') log.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) log.addHandler(ch) moduleinfo = { 'version': '0.1', 'author': 'Dave Johnson', 'description': 'Module to query urlscan.io', 'module-type': ['expansion'] } moduleconfig = ['apikey'] misperrors = {'error': 'Error'} mispattributes = { 'input': ['hostname', 'domain', 'url'], 'output': ['hostname', 'domain', 'ip-src', 'ip-dst', 'url', 'text', 'link', 'hash'] } def handler(q=False): if q is False: return False request = json.loads(q) if (request.get('config')): if (request['config'].get('apikey') is None): misperrors['error'] = 'urlscan apikey is missing' return misperrors client = urlscanAPI(request['config']['apikey']) r = {'results': []} if 'ip-src' in request: r['results'] += lookup_indicator(client, request['ip-src']) if 'ip-dst' in request: r['results'] += lookup_indicator(client, request['ip-dst']) if 'domain' in request: r['results'] += lookup_indicator(client, request['domain']) if 'hostname' in request: r['results'] += lookup_indicator(client, request['hostname']) if 'url' in request: r['results'] += lookup_indicator(client, request['url']) # Return any errors generated from lookup to the UI and remove duplicates uniq = [] log.debug(r['results']) for item in r['results']: log.debug(item) if 'error' in item: misperrors['error'] = item['error'] return misperrors if item not in uniq: uniq.append(item) r['results'] = uniq return r def lookup_indicator(client, query): result = client.search_url(query) log.debug('RESULTS: ' + json.dumps(result)) r = [] misp_comment = "{}: Enriched via the urlscan module".format(query) # Determine if the page is reachable for request in result['data']['requests']: if request['response'].get('failed'): if request['response']['failed']['errorText']: log.debug('The page could not load') r.append( {'error': 'Domain could not be resolved: {}'.format(request['response']['failed']['errorText'])}) if result.get('page'): if result['page'].get('domain'): misp_val = result['page']['domain'] r.append({'types': 'domain', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment}) if result['page'].get('ip'): misp_val = result['page']['ip'] r.append({'types': 'ip-dst', 'categories': ['Network activity'], 'values': misp_val, 'comment': misp_comment}) if result['page'].get('country'): misp_val = 'country: ' + result['page']['country'] if result['page'].get('city'): misp_val += ', city: ' + result['page']['city'] r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result['page'].get('asn'): misp_val = result['page']['asn'] r.append({'types': 'AS', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result['page'].get('asnname'): misp_val = result['page']['asnname'] r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result.get('stats'): if result['stats'].get('malicious'): log.debug('There is something in results > stats > malicious') threat_list = set() if 'matches' in result['meta']['processors']['gsb']['data']: for item in result['meta']['processors']['gsb']['data']['matches']: if item['threatType']: threat_list.add(item['threatType']) threat_list = ', '.join(threat_list) log.debug('threat_list values are: \'' + threat_list + '\'') if threat_list: misp_val = '{} threat(s) detected'.format(threat_list) r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result.get('lists'): if result['lists'].get('urls'): for url in result['lists']['urls']: url = url.lower() if 'office' in url: misp_val = "Possible Office-themed phishing" elif 'o365' in url or '0365' in url: misp_val = "Possible O365-themed phishing" elif 'microsoft' in url: misp_val = "Possible Microsoft-themed phishing" elif 'paypal' in url: misp_val = "Possible PayPal-themed phishing" elif 'onedrive' in url: misp_val = "Possible OneDrive-themed phishing" elif 'docusign' in url: misp_val = "Possible DocuSign-themed phishing" r.append({'types': 'text', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result.get('task'): if result['task'].get('reportURL'): misp_val = result['task']['reportURL'] r.append({'types': 'link', 'categories': ['External analysis'], 'values': misp_val, 'comment': misp_comment}) if result['task'].get('screenshotURL'): image_url = result['task']['screenshotURL'] r.append({'types': 'link', 'categories': ['External analysis'], 'values': image_url, 'comment': misp_comment}) # ## TO DO ### # ## Add ability to add an in-line screenshot of the target website into an attribute # screenshot = requests.get(image_url).content # r.append({'types': ['attachment'], # 'categories': ['External analysis'], # 'values': image_url, # 'image': str(base64.b64encode(screenshot), 'utf-8'), # 'comment': 'Screenshot of website'}) return r def introspection(): return mispattributes def version(): moduleinfo['config'] = moduleconfig return moduleinfo class urlscanAPI(): def __init__(self, apikey=None, uuid=None): self.key = apikey self.uuid = uuid def request(self, query): log.debug('From request function with the parameter: ' + query) payload = {'url': query} headers = {'API-Key': self.key, 'Content-Type': "application/json", 'Cache-Control': "no-cache"} # Troubleshooting problems with initial search request log.debug('PAYLOAD: ' + json.dumps(payload)) log.debug('HEADERS: ' + json.dumps(headers)) search_url_string = "https://urlscan.io/api/v1/scan/" response = requests.request("POST", search_url_string, data=json.dumps(payload), headers=headers) # HTTP 400 - Bad Request if response.status_code == 400: raise Exception('HTTP Error 400 - Bad Request') # HTTP 404 - Not found if response.status_code == 404: raise Exception('HTTP Error 404 - These are not the droids you\'re looking for') # Any other status code if response.status_code != 200: raise Exception('HTTP Error ' + str(response.status_code)) if response.text: response = json.loads(response.content.decode("utf-8")) time.sleep(3) self.uuid = response['uuid'] # Strings for to check for errors on the results page # Null response string for any unavailable resources null_response_string = '"status": 404' # Redirect string accounting for 301/302/303/307/308 status codes redirect_string = '"status": 30' # Normal response string with 200 status code normal_response_string = '"status": 200' results_url_string = "https://urlscan.io/api/v1/result/" + self.uuid log.debug('Results URL: ' + results_url_string) # Need to wait for results to process and check if they are valid tries = 10 while tries >= 0: results = requests.request("GET", results_url_string) log.debug('Made a GET request') results = results.content.decode("utf-8") # checking if there is a 404 status code and no available resources if null_response_string in results and \ redirect_string not in results and \ normal_response_string not in results: log.debug('Results not processed. Please check again later.') time.sleep(3) tries -= 1 else: return json.loads(results) raise Exception('Results contained a 404 status error and could not be processed.') def search_url(self, query): log.debug('From search_url with parameter: ' + query) return self.request(query)