mirror of https://github.com/CIRCL/lookyloo
chg: Improve urlscan support, get results.
parent
7933670941
commit
3436f5bd4e
|
@ -33,7 +33,8 @@
|
||||||
"UrlScan": {
|
"UrlScan": {
|
||||||
"apikey": null,
|
"apikey": null,
|
||||||
"autosubmit": false,
|
"autosubmit": false,
|
||||||
"allow_auto_trigger": false
|
"allow_auto_trigger": false,
|
||||||
|
"force_visibility": false
|
||||||
},
|
},
|
||||||
"_notes": {
|
"_notes": {
|
||||||
"apikey": "null disables the module. Pass a string otherwise.",
|
"apikey": "null disables the module. Pass a string otherwise.",
|
||||||
|
|
|
@ -398,10 +398,15 @@ class Lookyloo():
|
||||||
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_uuid}) is cached.')
|
self.logger.warning(f'Unable to trigger the modules unless the tree ({capture_uuid}) is cached.')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
capture_cache = self.capture_cache(capture_uuid)
|
||||||
|
|
||||||
self.pi.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
self.pi.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
||||||
self.vt.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
self.vt.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
||||||
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
self.uwhois.capture_default_trigger(ct, force=force, auto_trigger=auto_trigger)
|
||||||
self.urlscan.capture_default_trigger(self.get_info(capture_uuid), force=force, auto_trigger=auto_trigger)
|
self.urlscan.capture_default_trigger(
|
||||||
|
self.get_info(capture_uuid),
|
||||||
|
visibility='unlisted' if (capture_cache and capture_cache.no_index) else 'public',
|
||||||
|
force=force, auto_trigger=auto_trigger)
|
||||||
|
|
||||||
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
|
def get_modules_responses(self, capture_uuid: str, /) -> Optional[Dict[str, Any]]:
|
||||||
'''Get the responses of the modules from the cached responses on the disk'''
|
'''Get the responses of the modules from the cached responses on the disk'''
|
||||||
|
@ -426,8 +431,14 @@ class Lookyloo():
|
||||||
else:
|
else:
|
||||||
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
|
to_return['pi'][ct.root_hartree.har.root_url] = self.pi.get_url_lookup(ct.root_hartree.har.root_url)
|
||||||
if self.urlscan.available:
|
if self.urlscan.available:
|
||||||
|
info = self.get_info(capture_uuid)
|
||||||
to_return['urlscan'] = {'submission': {}, 'result': {}}
|
to_return['urlscan'] = {'submission': {}, 'result': {}}
|
||||||
to_return['urlscan']['submission'] = self.urlscan.url_submit(self.get_info(capture_uuid))
|
to_return['urlscan']['submission'] = self.urlscan.get_url_submission(info)
|
||||||
|
if to_return['urlscan']['submission'] and 'uuid' in to_return['urlscan']['submission']:
|
||||||
|
# The submission was done, try to get the results
|
||||||
|
result = self.urlscan.url_result(info)
|
||||||
|
if 'error' not in result:
|
||||||
|
to_return['urlscan']['result'] = result
|
||||||
return to_return
|
return to_return
|
||||||
|
|
||||||
def get_misp_occurrences(self, capture_uuid: str, /) -> Optional[Dict[str, Set[str]]]:
|
def get_misp_occurrences(self, capture_uuid: str, /) -> Optional[Dict[str, Set[str]]]:
|
||||||
|
@ -593,7 +604,9 @@ class Lookyloo():
|
||||||
self.logger.warning(f'No cache available for {capture_dir}.')
|
self.logger.warning(f'No cache available for {capture_dir}.')
|
||||||
return None
|
return None
|
||||||
try:
|
try:
|
||||||
return CaptureCache(cached)
|
cc = CaptureCache(cached)
|
||||||
|
self._captures_index[cc.uuid] = cc
|
||||||
|
return cc
|
||||||
except LookylooException as e:
|
except LookylooException as e:
|
||||||
self.logger.warning(f'Cache ({capture_dir}) is invalid ({e}): {json.dumps(cached, indent=2)}')
|
self.logger.warning(f'Cache ({capture_dir}) is invalid ({e}): {json.dumps(cached, indent=2)}')
|
||||||
return None
|
return None
|
||||||
|
|
|
@ -445,7 +445,6 @@ class VirusTotal():
|
||||||
if not self.available:
|
if not self.available:
|
||||||
raise ConfigError('VirusTotal not available, probably no API key')
|
raise ConfigError('VirusTotal not available, probably no API key')
|
||||||
|
|
||||||
url_id = vt.url_id(url)
|
|
||||||
url_storage_dir = self.__get_cache_directory(url)
|
url_storage_dir = self.__get_cache_directory(url)
|
||||||
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
url_storage_dir.mkdir(parents=True, exist_ok=True)
|
||||||
vt_file = url_storage_dir / date.today().isoformat()
|
vt_file = url_storage_dir / date.today().isoformat()
|
||||||
|
@ -458,6 +457,7 @@ class VirusTotal():
|
||||||
if not force and vt_file.exists():
|
if not force and vt_file.exists():
|
||||||
return
|
return
|
||||||
|
|
||||||
|
url_id = vt.url_id(url)
|
||||||
for _ in range(3):
|
for _ in range(3):
|
||||||
try:
|
try:
|
||||||
url_information = self.client.get_object(f"/urls/{url_id}")
|
url_information = self.client.get_object(f"/urls/{url_id}")
|
||||||
|
@ -476,6 +476,8 @@ class VirusTotal():
|
||||||
class UrlScan():
|
class UrlScan():
|
||||||
|
|
||||||
def __init__(self, config: Dict[str, Any]):
|
def __init__(self, config: Dict[str, Any]):
|
||||||
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
if not config.get('apikey'):
|
if not config.get('apikey'):
|
||||||
self.available = False
|
self.available = False
|
||||||
return
|
return
|
||||||
|
@ -494,6 +496,19 @@ class UrlScan():
|
||||||
if config.get('autosubmit'):
|
if config.get('autosubmit'):
|
||||||
self.autosubmit = True
|
self.autosubmit = True
|
||||||
|
|
||||||
|
if config.get('force_visibility'):
|
||||||
|
# Cases:
|
||||||
|
# 1. False: unlisted for hidden captures / public for others
|
||||||
|
# 2. "key": default visibility defined on urlscan.io
|
||||||
|
# 3. "public", "unlisted", "private": is set for all submissions
|
||||||
|
self.force_visibility = config['force_visibility']
|
||||||
|
else:
|
||||||
|
self.force_visibility = False
|
||||||
|
|
||||||
|
if self.force_visibility not in [False, 'key', 'public', 'unlisted', 'private']:
|
||||||
|
self.logger.warning("Invalid value for force_visibility, default to False (unlisted for hidden captures / public for others).")
|
||||||
|
self.force_visibility = False
|
||||||
|
|
||||||
self.storage_dir_urlscan = get_homedir() / 'urlscan'
|
self.storage_dir_urlscan = get_homedir() / 'urlscan'
|
||||||
self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True)
|
self.storage_dir_urlscan.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
@ -503,8 +518,10 @@ class UrlScan():
|
||||||
m.update(to_hash.encode())
|
m.update(to_hash.encode())
|
||||||
return self.storage_dir_urlscan / m.hexdigest()
|
return self.storage_dir_urlscan / m.hexdigest()
|
||||||
|
|
||||||
def get_url_submission(self, url: str, useragent: str, referer: str) -> Optional[Dict[str, Any]]:
|
def get_url_submission(self, capture_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||||||
url_storage_dir = self.__get_cache_directory(url, useragent, referer)
|
url_storage_dir = self.__get_cache_directory(capture_info['url'],
|
||||||
|
capture_info['user_agent'],
|
||||||
|
capture_info['referer']) / 'submit'
|
||||||
if not url_storage_dir.exists():
|
if not url_storage_dir.exists():
|
||||||
return None
|
return None
|
||||||
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
|
cached_entries = sorted(url_storage_dir.glob('*'), reverse=True)
|
||||||
|
@ -514,18 +531,28 @@ class UrlScan():
|
||||||
with cached_entries[0].open() as f:
|
with cached_entries[0].open() as f:
|
||||||
return json.load(f)
|
return json.load(f)
|
||||||
|
|
||||||
def capture_default_trigger(self, capture_info: Dict[str, Any], /, *, force: bool=False, auto_trigger: bool=False) -> None:
|
def capture_default_trigger(self, capture_info: Dict[str, Any], /, visibility: str, *, force: bool=False, auto_trigger: bool=False) -> None:
|
||||||
'''Run the module on the initial URL'''
|
'''Run the module on the initial URL'''
|
||||||
if not self.available:
|
if not self.available:
|
||||||
return None
|
return None
|
||||||
if auto_trigger and not self.allow_auto_trigger:
|
if auto_trigger and not self.allow_auto_trigger:
|
||||||
|
# NOTE: if auto_trigger is true, it means the request comes from the
|
||||||
|
# auto trigger feature (disabled by default)
|
||||||
|
# Each module can disable auto-trigger to avoid depleating the
|
||||||
|
# API limits.
|
||||||
return None
|
return None
|
||||||
|
|
||||||
self.url_submit(capture_info, force)
|
self.url_submit(capture_info, visibility, force)
|
||||||
|
|
||||||
def __submit_url(self, url: str, useragent: str, referer: str) -> Dict:
|
def __submit_url(self, url: str, useragent: str, referer: str, visibility: str) -> Dict:
|
||||||
data = {"url": url, "visibility": "unlisted",
|
data = {"url": url, 'customagent': useragent, 'referer': referer}
|
||||||
'customagent': useragent, 'referer': referer}
|
if self.force_visibility is False:
|
||||||
|
data["visibility"] = visibility
|
||||||
|
elif self.force_visibility in ["public", "unlisted", "private"]:
|
||||||
|
data["visibility"] = self.force_visibility
|
||||||
|
else:
|
||||||
|
# default to key config on urlscan.io website
|
||||||
|
pass
|
||||||
response = self.client.post('https://urlscan.io/api/v1/scan/', json=data)
|
response = self.client.post('https://urlscan.io/api/v1/scan/', json=data)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
@ -535,7 +562,7 @@ class UrlScan():
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
return response.json()
|
return response.json()
|
||||||
|
|
||||||
def url_submit(self, capture_info: Dict[str, Any], force: bool=False) -> Dict:
|
def url_submit(self, capture_info: Dict[str, Any], visibility: str, force: bool=False) -> Dict:
|
||||||
'''Lookup an URL on urlscan.io
|
'''Lookup an URL on urlscan.io
|
||||||
Note: force means 2 things:
|
Note: force means 2 things:
|
||||||
* (re)scan of the URL
|
* (re)scan of the URL
|
||||||
|
@ -561,7 +588,8 @@ class UrlScan():
|
||||||
try:
|
try:
|
||||||
response = self.__submit_url(capture_info['url'],
|
response = self.__submit_url(capture_info['url'],
|
||||||
capture_info['user_agent'],
|
capture_info['user_agent'],
|
||||||
capture_info['referer'])
|
capture_info['referer'],
|
||||||
|
visibility)
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
return {'error': e}
|
return {'error': e}
|
||||||
with urlscan_file_submit.open('w') as _f:
|
with urlscan_file_submit.open('w') as _f:
|
||||||
|
@ -569,9 +597,9 @@ class UrlScan():
|
||||||
return response
|
return response
|
||||||
return {'error': 'Submitting is not allowed by the configuration'}
|
return {'error': 'Submitting is not allowed by the configuration'}
|
||||||
|
|
||||||
def url_result(self, url: str, useragent: str, referer: str):
|
def url_result(self, capture_info: Dict[str, Any]):
|
||||||
'''Get the result from a submission.'''
|
'''Get the result from a submission.'''
|
||||||
submission = self.get_url_submission(url, useragent, referer)
|
submission = self.get_url_submission(capture_info)
|
||||||
if submission and 'uuid' in submission:
|
if submission and 'uuid' in submission:
|
||||||
uuid = submission['uuid']
|
uuid = submission['uuid']
|
||||||
if (self.storage_dir_urlscan / f'{uuid}.json').exists():
|
if (self.storage_dir_urlscan / f'{uuid}.json').exists():
|
||||||
|
|
|
@ -233,8 +233,8 @@ def hostnode_popup(tree_uuid: str, node_uuid: str):
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
|
||||||
def trigger_modules(tree_uuid: str):
|
def trigger_modules(tree_uuid: str):
|
||||||
force = True if request.args.get('force') else False
|
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
||||||
auto_trigger = True if request.args.get('auto_trigger') else False
|
auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False
|
||||||
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
|
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
|
||||||
return redirect(url_for('modules', tree_uuid=tree_uuid))
|
return redirect(url_for('modules', tree_uuid=tree_uuid))
|
||||||
|
|
||||||
|
@ -392,11 +392,24 @@ def modules(tree_uuid: str):
|
||||||
continue
|
continue
|
||||||
pi_short_result[url] = full_report['results'][0]['tag_label']
|
pi_short_result[url] = full_report['results'][0]['tag_label']
|
||||||
|
|
||||||
urlscan_permaurl: str = ''
|
urlscan_to_display: Dict = {}
|
||||||
if 'urlscan' in modules_responses:
|
if 'urlscan' in modules_responses:
|
||||||
urlscan = modules_responses.pop('urlscan')
|
urlscan = modules_responses.pop('urlscan')
|
||||||
urlscan_permaurl = urlscan['submission']['result']
|
urlscan_to_display = {'permaurl': '', 'malicious': False, 'tags': []}
|
||||||
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result, urlscan=urlscan_permaurl)
|
if urlscan['submission'].get('result'):
|
||||||
|
urlscan_to_display['permaurl'] = urlscan['submission']['result']
|
||||||
|
if urlscan['result']:
|
||||||
|
# We have a result available, get the verdicts
|
||||||
|
if (urlscan['result'].get('verdicts')
|
||||||
|
and urlscan['result']['verdicts'].get('overall')):
|
||||||
|
if urlscan['result']['verdicts']['overall'].get('malicious') is not None:
|
||||||
|
urlscan_to_display['malicious'] = urlscan['result']['verdicts']['overall']['malicious']
|
||||||
|
if urlscan['result']['verdicts']['overall'].get('tags'):
|
||||||
|
urlscan_to_display['tags'] = urlscan['result']['verdicts']['overall']['tags']
|
||||||
|
else:
|
||||||
|
# unable to run the query, probably an invalid key
|
||||||
|
pass
|
||||||
|
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result, urlscan=urlscan_to_display)
|
||||||
|
|
||||||
|
|
||||||
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
||||||
|
@ -648,7 +661,7 @@ def get_index_params(request):
|
||||||
show_error: bool = True
|
show_error: bool = True
|
||||||
category: str = ''
|
category: str = ''
|
||||||
if hide_captures_with_error:
|
if hide_captures_with_error:
|
||||||
show_error = True if request.args.get('show_error') else False
|
show_error = True if (request.args.get('show_error') and request.args.get('show_error') == 'True') else False
|
||||||
|
|
||||||
if enable_categorization:
|
if enable_categorization:
|
||||||
category = request.args['category'] if request.args.get('category') else ''
|
category = request.args['category'] if request.args.get('category') else ''
|
||||||
|
@ -805,7 +818,7 @@ def cookies_name_detail(cookie_name: str):
|
||||||
|
|
||||||
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
||||||
def body_hash_details(body_hash: str):
|
def body_hash_details(body_hash: str):
|
||||||
from_popup = request.args.get('from_popup')
|
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
||||||
captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
|
captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
|
||||||
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
|
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
|
||||||
|
|
||||||
|
|
|
@ -1,30 +1,48 @@
|
||||||
{% from "macros.html" import shorten_string %}
|
{% from "macros.html" import shorten_string %}
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
{% if urlscan %}
|
||||||
|
<hr>
|
||||||
|
<center>
|
||||||
|
<h1 class="display-4">urlscan.io</h1>
|
||||||
|
<div>
|
||||||
|
<p>A scan was triggered for this capture,
|
||||||
|
<a href="{{urlscan['permaurl']}}">click to view it</a> on urlscan.io.</p>
|
||||||
|
{% if urlscan['malicious']%}
|
||||||
|
<p>It is considered malicious.</p>
|
||||||
|
{% endif%}
|
||||||
|
{% if urlscan['tags'] %}
|
||||||
|
<p>It is tagged as {{ ','.join(urlscan['tags']) }}.</p>
|
||||||
|
{% endif%}
|
||||||
|
|
||||||
|
</div>
|
||||||
|
</center>
|
||||||
|
{% endif%}
|
||||||
{% if vt %}
|
{% if vt %}
|
||||||
<center><h1 class="display-4">Virus Total</h1></center>
|
<hr>
|
||||||
{% for url, entries in vt.items() %}
|
<center><h1 class="display-4">Virus Total</h1></center>
|
||||||
<div class="border-top my-3"></div>
|
{% for url, entries in vt.items() %}
|
||||||
<center>
|
<div class="border-top my-3"></div>
|
||||||
<h3><small class="text-muted">URL</small>
|
<center>
|
||||||
{{ shorten_string(url, 50, with_title=True) }}
|
<h3><small class="text-muted">URL</small>
|
||||||
</h3>
|
{{ shorten_string(url, 50, with_title=True) }}
|
||||||
</center>
|
</h3>
|
||||||
{% if entries['malicious'] %}
|
</center>
|
||||||
<center>
|
{% if entries['malicious'] %}
|
||||||
<p class="lead">Detected as malicious by the following vendors</p>
|
<center>
|
||||||
<dl class="row">
|
<p class="lead">Detected as malicious by the following vendors</p>
|
||||||
{% for e in entries['malicious'] %}
|
<dl class="row">
|
||||||
<dt class="col-sm-3">{{ e[0] }}</dt>
|
{% for e in entries['malicious'] %}
|
||||||
<dd class="col-sm-3">{{ e[1] }}</dd>
|
<dt class="col-sm-3">{{ e[0] }}</dt>
|
||||||
{% endfor %}
|
<dd class="col-sm-3">{{ e[1] }}</dd>
|
||||||
</center>
|
{% endfor %}
|
||||||
</dl>
|
</center>
|
||||||
{% else %}
|
</dl>
|
||||||
<p class="lead">No vendors consider this URL as malicious.</p>
|
{% else %}
|
||||||
{% endif%}
|
<p class="lead">No vendors consider this URL as malicious.</p>
|
||||||
<h5 class="text-right"><a href="{{ entries['permaurl'] }}">Full report on VirusTotal</a></h5>
|
{% endif%}
|
||||||
{% endfor %}
|
<h5 class="text-right"><a href="{{ entries['permaurl'] }}">Full report on VirusTotal</a></h5>
|
||||||
|
{% endfor %}
|
||||||
{% endif%}
|
{% endif%}
|
||||||
{% if pi%}
|
{% if pi%}
|
||||||
<center><h1 class="display-4">Phishing Initiative</h1></center>
|
<center><h1 class="display-4">Phishing Initiative</h1></center>
|
||||||
|
@ -37,10 +55,4 @@
|
||||||
</center>
|
</center>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endif%}
|
{% endif%}
|
||||||
{% if urlscan %}
|
</div>
|
||||||
<center><h1 class="display-4">urlscan.io</h1></center>
|
|
||||||
<div>
|
|
||||||
<p>A scan was triggered for this capture, <a href="{{urlscan}}">click see it</a> on urlscan.io.</p>
|
|
||||||
<p>Note that if you get a 404, it probably means the capture is still ongoing.</p>
|
|
||||||
</div>
|
|
||||||
{% endif%}
|
|
||||||
|
|
|
@ -471,11 +471,18 @@
|
||||||
<div class="modal-dialog modal-xl" role="document">
|
<div class="modal-dialog modal-xl" role="document">
|
||||||
<div class="modal-content">
|
<div class="modal-content">
|
||||||
<div class="modal-header">
|
<div class="modal-header">
|
||||||
<h5 class="modal-title" id="modulesModalLabel">Reports from 3rd party services</h5>
|
<h4 class="modal-title" id="modulesModalLabel">
|
||||||
|
Reports from 3rd party services
|
||||||
|
</h4>
|
||||||
|
</br>
|
||||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||||
<span aria-hidden="true">×</span>
|
<span aria-hidden="true">×</span>
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
|
</br>
|
||||||
|
<center><h5>Note that if you get an error when you click on a
|
||||||
|
link below, it probably means the capture is still ongoing.
|
||||||
|
Try reloading the page after a few seconds.</h5></center>
|
||||||
<div class="modal-body">
|
<div class="modal-body">
|
||||||
... loading results from 3rd party modules ...
|
... loading results from 3rd party modules ...
|
||||||
</div>
|
</div>
|
||||||
|
|
Loading…
Reference in New Issue