lookyloo/website/web/__init__.py

762 lines
30 KiB
Python
Raw Normal View History

2019-01-30 14:30:01 +01:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
2020-10-23 20:51:15 +02:00
import base64
2019-01-30 14:30:01 +01:00
from zipfile import ZipFile, ZIP_DEFLATED
2021-01-08 13:03:23 +01:00
from io import BytesIO, StringIO
2019-01-30 14:30:01 +01:00
import os
2020-01-06 15:32:38 +01:00
from pathlib import Path
from datetime import datetime, timedelta, timezone
2020-05-23 03:37:24 +02:00
import json
2020-08-07 13:11:16 +02:00
import http
2020-11-25 15:27:34 +01:00
import calendar
2019-01-30 14:30:01 +01:00
2020-06-26 12:07:25 +02:00
from flask import Flask, render_template, request, send_file, redirect, url_for, Response, flash, jsonify
2020-01-06 15:32:38 +01:00
from flask_bootstrap import Bootstrap # type: ignore
2020-04-01 17:44:06 +02:00
from flask_httpauth import HTTPDigestAuth # type: ignore
2019-01-30 14:30:01 +01:00
2020-10-28 18:49:15 +01:00
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies
from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID
2020-04-22 14:58:01 +02:00
from .proxied import ReverseProxied
2019-01-30 14:30:01 +01:00
2020-10-01 11:48:00 +02:00
from typing import Optional, Dict, Any, Union
2019-01-30 14:30:01 +01:00
2020-04-03 17:51:58 +02:00
import logging
2020-01-06 15:32:38 +01:00
app: Flask = Flask(__name__)
2020-04-22 15:54:02 +02:00
app.wsgi_app = ReverseProxied(app.wsgi_app) # type: ignore
2019-01-30 14:30:01 +01:00
2020-01-06 15:32:38 +01:00
secret_file_path: Path = get_homedir() / 'secret_key'
2019-01-30 14:30:01 +01:00
if not secret_file_path.exists() or secret_file_path.stat().st_size < 64:
with secret_file_path.open('wb') as f:
f.write(os.urandom(64))
with secret_file_path.open('rb') as f:
app.config['SECRET_KEY'] = f.read()
Bootstrap(app)
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
app.debug = False
2020-04-01 17:44:06 +02:00
auth = HTTPDigestAuth()
2019-01-30 14:30:01 +01:00
lookyloo: Lookyloo = Lookyloo()
2019-01-30 14:30:01 +01:00
user = get_config('generic', 'cache_clean_user')
time_delta_on_index = get_config('generic', 'time_delta_on_index')
blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
max_depth = get_config('generic', 'max_depth')
2020-04-01 17:44:06 +02:00
enable_mail_notification = get_config('generic', 'enable_mail_notification')
enable_context_by_users = get_config('generic', 'enable_context_by_users')
enable_categorization = get_config('generic', 'enable_categorization')
2020-11-29 23:56:42 +01:00
enable_bookmark = get_config('generic', 'enable_bookmark')
auto_trigger_modules = get_config('generic', 'auto_trigger_modules')
logging.basicConfig(level=get_config('generic', 'loglevel'))
2020-04-03 17:51:58 +02:00
2020-04-01 17:44:06 +02:00
2020-10-09 18:05:04 +02:00
# ##### Global methods passed to jinja
2020-05-23 03:37:24 +02:00
# Method to make sizes in bytes human readable
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
def sizeof_fmt(num, suffix='B'):
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return ("%.1f%s%s" % (num, 'Yi', suffix)).strip()
2020-05-23 03:37:24 +02:00
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
2020-08-07 13:11:16 +02:00
def http_status_description(code: int):
if code in http.client.responses:
return http.client.responses[code]
return f'Invalid code: {code}'
2020-08-07 13:11:16 +02:00
app.jinja_env.globals.update(http_status_description=http_status_description)
2020-11-25 15:27:34 +01:00
def month_name(month: int):
return calendar.month_name[month]
app.jinja_env.globals.update(month_name=month_name)
2020-10-09 18:05:04 +02:00
# ##### Generic/configuration methods #####
@app.after_request
def after_request(response):
2020-06-25 17:08:11 +02:00
ua = request.headers.get('User-Agent')
real_ip = request.headers.get('X-Real-IP')
2020-06-25 17:08:11 +02:00
if ua:
if real_ip:
lookyloo.cache_user_agents(ua, real_ip)
else:
lookyloo.cache_user_agents(ua, request.remote_addr)
return response
2020-04-01 17:44:06 +02:00
@auth.get_password
2020-05-18 18:32:59 +02:00
def get_pw(username: str) -> Optional[str]:
2020-04-01 17:44:06 +02:00
if username in user:
return user.get(username)
return None
2020-10-09 18:05:04 +02:00
# ##### Hostnode level methods #####
2019-01-30 14:30:01 +01:00
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
def hashes_hostnode(tree_uuid: str, node_uuid: str):
hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid)
return send_file(BytesIO('\n'.join(hashes).encode()),
2020-10-21 12:22:50 +02:00
mimetype='test/plain', as_attachment=True, attachment_filename=f'hashes.{node_uuid}.txt')
2019-01-30 14:30:01 +01:00
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET'])
2020-10-21 12:22:50 +02:00
def urls_hostnode(tree_uuid: str, node_uuid: str):
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
2020-10-21 12:22:50 +02:00
return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()),
mimetype='test/plain', as_attachment=True, attachment_filename=f'urls.{node_uuid}.txt')
2019-01-30 14:30:01 +01:00
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
2020-05-20 19:11:15 +02:00
def hostnode_popup(tree_uuid: str, node_uuid: str):
2020-05-23 03:37:24 +02:00
keys_response = {
'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'},
'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'},
'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'},
'font': {'icon': "font.png", 'tooltip': 'The content of the response is a font'},
'html': {'icon': "html.png", 'tooltip': 'The content of the response is a HTML document'},
'json': {'icon': "json.png", 'tooltip': 'The content of the response is a Json'},
'text': {'icon': "json.png", 'tooltip': 'The content of the response is a text'}, # FIXME: Need new icon
'iframe': {'icon': "ifr.png", 'tooltip': 'This content is loaded from an Iframe'},
'image': {'icon': "img.png", 'tooltip': 'The content of the response is an image'},
'unset_mimetype': {'icon': "wtf.png", 'tooltip': 'The type of content of the response is not set'},
'octet-stream': {'icon': "wtf.png", 'tooltip': 'The type of content of the response is a binary blob'},
'unknown_mimetype': {'icon': "wtf.png", 'tooltip': 'The type of content of the response is of an unknown type'},
'video': {'icon': "video.png", 'tooltip': 'The content of the response is a video'},
'livestream': {'icon': "video.png", 'tooltip': 'The content of the response is a livestream'},
'response_cookie': {'icon': "cookie_received.png", 'tooltip': 'There are cookies in the response'},
# redirect has to be last
'redirect': {'icon': "redirect.png", 'tooltip': 'The request is redirected'},
'redirect_to_nothing': {'icon': "cookie_in_url.png", 'tooltip': 'The request is redirected to an URL we do not have in the capture'}
}
2020-05-23 03:37:24 +02:00
keys_request = {
'request_cookie': {'icon': "cookie_read.png", 'tooltip': 'There are cookies in the request'}
2020-05-23 03:37:24 +02:00
}
hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
2020-05-27 12:38:25 +02:00
return render_template('hostname_popup.html',
2020-05-20 19:11:15 +02:00
tree_uuid=tree_uuid,
hostnode_uuid=node_uuid,
2020-09-24 18:46:43 +02:00
hostnode=hostnode,
urls=urls,
2020-05-23 03:37:24 +02:00
keys_response=keys_response,
keys_request=keys_request,
enable_context_by_users=enable_context_by_users)
2020-05-23 03:37:24 +02:00
2020-10-09 18:05:04 +02:00
# ##### Tree level Methods #####
2020-05-26 17:45:04 +02:00
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/rebuild')
@auth.login_required
def rebuild_tree(tree_uuid: str):
try:
lookyloo.remove_pickle(tree_uuid)
return redirect(url_for('tree', tree_uuid=tree_uuid))
except Exception:
return redirect(url_for('index'))
2019-01-30 14:30:01 +01:00
@app.route('/tree/<string:tree_uuid>/trigger_modules/', defaults={'force': False})
@app.route('/tree/<string:tree_uuid>/trigger_modules/<int:force>', methods=['GET'])
2020-05-18 18:32:59 +02:00
def trigger_modules(tree_uuid: str, force: int):
lookyloo.trigger_modules(tree_uuid, True if force else False)
return redirect(url_for('modules', tree_uuid=tree_uuid))
2020-10-28 18:49:15 +01:00
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
def categories_capture(tree_uuid: str, query: str):
if not enable_categorization:
return redirect(url_for('tree', tree_uuid=tree_uuid))
2020-10-28 18:49:15 +01:00
current_categories = lookyloo.categories_capture(tree_uuid)
matching_categories = None
if query:
matching_categories = {}
t = get_taxonomies()
entries = t.search(query)
if entries:
matching_categories = {e: t.revert_machinetag(e) for e in entries}
return render_template('categories_capture.html', tree_uuid=tree_uuid,
current_categories=current_categories,
matching_categories=matching_categories)
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
def uncategorize_capture(tree_uuid: str, category: str):
if not enable_categorization:
return jsonify({'response': 'Categorization not enabled.'})
2020-10-28 18:49:15 +01:00
lookyloo.uncategorize_capture(tree_uuid, category)
return jsonify({'response': f'{category} successfully added to {tree_uuid}'})
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
def categorize_capture(tree_uuid: str, category: str):
if not enable_categorization:
return jsonify({'response': 'Categorization not enabled.'})
2020-10-28 18:49:15 +01:00
lookyloo.categorize_capture(tree_uuid, category)
return jsonify({'response': f'{category} successfully removed from {tree_uuid}'})
2020-05-13 17:31:27 +02:00
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
2020-05-18 18:32:59 +02:00
def stats(tree_uuid: str):
stats = lookyloo.get_statistics(tree_uuid)
2020-05-13 17:31:27 +02:00
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
2020-05-18 18:32:59 +02:00
def modules(tree_uuid: str):
modules_responses = lookyloo.get_modules_responses(tree_uuid)
if not modules_responses:
return redirect(url_for('tree', tree_uuid=tree_uuid))
2020-05-18 18:32:59 +02:00
vt_short_result: Dict[str, Dict[str, Any]] = {}
if 'vt' in modules_responses:
# VirusTotal cleanup
vt = modules_responses.pop('vt')
# Get malicious entries
for url, full_report in vt.items():
2020-12-03 12:33:35 +01:00
if not full_report:
continue
vt_short_result[url] = {
'permaurl': f'https://www.virustotal.com/gui/url/{full_report["id"]}/detection',
'malicious': []
}
for vendor, result in full_report['attributes']['last_analysis_results'].items():
if result['category'] == 'malicious':
vt_short_result[url]['malicious'].append((vendor, result['result']))
2020-06-09 15:06:35 +02:00
pi_short_result: Dict[str, str] = {}
if 'pi' in modules_responses:
pi = modules_responses.pop('pi')
for url, full_report in pi.items():
if not full_report:
continue
pi_short_result[url] = full_report['results'][0]['tag_label']
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result, pi=pi_short_result)
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
def redirects(tree_uuid: str):
cache = lookyloo.capture_cache(tree_uuid)
if not cache:
return Response('Not available.', mimetype='text/text')
if not cache.redirects:
2020-10-09 18:05:04 +02:00
return Response('No redirects.', mimetype='text/text')
if cache.url == cache.redirects[0]:
to_return = BytesIO('\n'.join(cache.redirects).encode())
2020-10-09 18:05:04 +02:00
else:
to_return = BytesIO('\n'.join([cache.url] + cache.redirects).encode())
2020-10-09 18:05:04 +02:00
return send_file(to_return, mimetype='text/text',
as_attachment=True, attachment_filename='redirects.txt')
2019-01-30 14:30:01 +01:00
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
2020-05-18 18:32:59 +02:00
def image(tree_uuid: str):
to_return = lookyloo.get_screenshot(tree_uuid)
2019-01-30 14:30:01 +01:00
return send_file(to_return, mimetype='image/png',
as_attachment=True, attachment_filename='image.png')
@app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET'])
@app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET'])
def thumbnail(tree_uuid: str, width: int):
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width)
return send_file(to_return, mimetype='image/png')
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
2020-05-18 18:32:59 +02:00
def html(tree_uuid: str):
to_return = lookyloo.get_html(tree_uuid)
return send_file(to_return, mimetype='text/html',
as_attachment=True, attachment_filename='page.html')
2020-05-26 17:45:04 +02:00
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
def cookies(tree_uuid: str):
to_return = lookyloo.get_cookies(tree_uuid)
2020-05-26 17:45:04 +02:00
return send_file(to_return, mimetype='application/json',
as_attachment=True, attachment_filename='cookies.json')
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/hashes', methods=['GET'])
def hashes_tree(tree_uuid: str):
hashes = lookyloo.get_hashes(tree_uuid)
return send_file(BytesIO('\n'.join(hashes).encode()),
mimetype='test/plain', as_attachment=True, attachment_filename='hashes.txt')
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
2020-05-18 18:32:59 +02:00
def export(tree_uuid: str):
to_return = lookyloo.get_capture(tree_uuid)
return send_file(to_return, mimetype='application/zip',
as_attachment=True, attachment_filename='capture.zip')
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
@auth.login_required
def hide_capture(tree_uuid: str):
lookyloo.hide_capture(tree_uuid)
return redirect(url_for('tree', tree_uuid=tree_uuid))
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/cache', methods=['GET'])
2020-05-18 18:32:59 +02:00
def cache_tree(tree_uuid: str):
2021-01-13 15:35:29 +01:00
lookyloo.capture_cache(tree_uuid)
return redirect(url_for('index'))
2020-05-11 19:58:46 +02:00
@app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET'])
2020-05-18 18:32:59 +02:00
def send_mail(tree_uuid: str):
if not enable_mail_notification:
return redirect(url_for('tree', tree_uuid=tree_uuid))
email: str = request.form.get('email') if request.form.get('email') else '' # type: ignore
2020-08-20 15:05:27 +02:00
if '@' not in email:
# skip clearly incorrect emails
email = ''
2020-05-18 18:32:59 +02:00
comment: str = request.form.get('comment') if request.form.get('comment') else '' # type: ignore
lookyloo.send_mail(tree_uuid, email, comment)
2020-05-11 19:01:02 +02:00
return redirect(url_for('tree', tree_uuid=tree_uuid))
2019-01-30 14:30:01 +01:00
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
@app.route('/tree/<string:tree_uuid>/<string:urlnode_uuid>', methods=['GET'])
def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
2020-03-17 15:27:04 +01:00
if tree_uuid == 'False':
2020-03-23 12:45:57 +01:00
flash("Unable to process your request. The domain may not exist, or splash isn't started", 'error')
2020-03-17 15:27:04 +01:00
return redirect(url_for('index'))
try:
cache = lookyloo.capture_cache(tree_uuid)
except MissingUUID:
flash(f'Unable to find this UUID ({tree_uuid}). The capture may still be ongoing, try again later.', 'error')
return redirect(url_for('index'))
2020-05-18 18:32:59 +02:00
if not cache:
2020-05-26 17:45:04 +02:00
flash('Invalid cache.', 'error')
2020-05-18 18:32:59 +02:00
return redirect(url_for('index'))
if cache.error:
flash(cache.error, 'error')
2019-01-30 16:01:55 +01:00
2019-02-18 13:52:48 +01:00
try:
2021-01-12 17:22:51 +01:00
ct = lookyloo.get_crawled_tree(tree_uuid)
ct = lookyloo.context.contextualize_tree(ct)
b64_thumbnail = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=True)
2021-01-12 17:22:51 +01:00
meta = lookyloo.get_meta(tree_uuid)
return render_template('tree.html', tree_json=ct.to_json(),
start_time=ct.start_time.isoformat(),
user_agent=ct.user_agent, root_url=ct.root_url,
2021-01-17 12:41:01 +01:00
tree_uuid=tree_uuid, public_domain=lookyloo.public_domain,
screenshot_thumbnail=b64_thumbnail, page_title=cache.title,
meta=meta, enable_mail_notification=enable_mail_notification,
enable_context_by_users=enable_context_by_users,
2020-10-28 18:49:15 +01:00
enable_categorization=enable_categorization,
2020-11-29 23:56:42 +01:00
enable_bookmark=enable_bookmark,
blur_screenshot=blur_screenshot, urlnode_uuid=urlnode_uuid,
auto_trigger_modules=auto_trigger_modules,
has_redirects=True if cache.redirects else False)
2019-02-18 13:52:48 +01:00
except NoValidHarFile as e:
2019-04-05 14:05:54 +02:00
return render_template('error.html', error_message=e)
2019-01-30 14:30:01 +01:00
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST'])
@auth.login_required
def mark_as_legitimate(tree_uuid: str):
if request.data:
legitimate_entries = request.get_json(force=True)
lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries)
else:
lookyloo.add_to_legitimate(tree_uuid)
return jsonify({'message': 'Legitimate entry added.'})
# ##### helpers #####
def index_generic(show_hidden: bool=False, category: Optional[str]=None):
2019-01-30 14:30:01 +01:00
titles = []
if time_delta_on_index:
# We want to filter the captures on the index
cut_time = (datetime.now() - timedelta(**time_delta_on_index)).replace(tzinfo=timezone.utc)
else:
2020-05-18 18:32:59 +02:00
cut_time = None # type: ignore
2020-10-29 13:29:13 +01:00
for cached in lookyloo.sorted_cache:
2020-07-03 18:25:16 +02:00
if not cached:
continue
2021-01-18 13:26:02 +01:00
if cut_time and cached.timestamp < cut_time:
continue
if category:
if not cached.categories or category not in cached.categories:
continue
if show_hidden:
if not cached.no_index:
# Only display the hidden ones
continue
elif cached.no_index:
2019-01-30 14:30:01 +01:00
continue
titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
cached.redirects, cached.incomplete_redirects))
2020-02-03 18:30:41 +01:00
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
2021-01-17 12:54:16 +01:00
return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain)
2020-10-09 18:05:04 +02:00
# ##### Index level methods #####
@app.route('/', methods=['GET'])
def index():
if request.method == 'HEAD':
# Just returns ack if the webserver is running
return 'Ack'
update_user_agents()
return index_generic()
@app.route('/hidden', methods=['GET'])
@auth.login_required
def index_hidden():
return index_generic(show_hidden=True)
2020-06-26 12:07:25 +02:00
@app.route('/category/<string:category>', methods=['GET'])
def index_category(category: str):
return index_generic(category=category)
@app.route('/cookies', methods=['GET'])
def cookies_lookup():
i = Indexing()
cookies_names = [(name, freq, i.cookies_names_number_domains(name)) for name, freq in i.cookies_names]
return render_template('cookies.html', cookies_names=cookies_names)
@app.route('/ressources', methods=['GET'])
def ressources():
i = Indexing()
ressources = []
for h, freq in i.ressources:
domain_freq = i.ressources_number_domains(h)
context = lookyloo.context.find_known_content(h)
capture_uuid, url_uuid, hostnode_uuid = i.get_hash_uuids(h)
ressources.append((h, freq, domain_freq, context.get(h), capture_uuid, url_uuid, hostnode_uuid))
return render_template('ressources.html', ressources=ressources)
@app.route('/categories', methods=['GET'])
def categories():
i = Indexing()
print(i.categories)
return render_template('categories.html', categories=i.categories)
2020-10-09 18:05:04 +02:00
@app.route('/rebuild_all')
@auth.login_required
def rebuild_all():
lookyloo.rebuild_all()
return redirect(url_for('index'))
@app.route('/rebuild_cache')
@auth.login_required
def rebuild_cache():
lookyloo.rebuild_cache()
return redirect(url_for('index'))
@app.route('/submit', methods=['POST', 'GET'])
def submit():
to_query = request.get_json(force=True)
perma_uuid = lookyloo.enqueue_capture(to_query)
2020-10-09 18:05:04 +02:00
return Response(perma_uuid, mimetype='text/text')
@app.route('/capture', methods=['GET', 'POST'])
def capture_web():
2020-10-09 18:05:04 +02:00
if request.form.get('url'):
# check if the post request has the file part
if 'cookies' in request.files and request.files['cookies'].filename:
cookie_file = request.files['cookies'].stream
else:
cookie_file = None
url = request.form.get('url')
2020-12-10 17:23:37 +01:00
if request.form.get('personal_ua') and request.headers.get('User-Agent'):
user_agent = request.headers.get('User-Agent')
os = None
browser = None
2020-12-10 17:23:37 +01:00
else:
user_agent = request.form.get('user_agent')
os = request.form.get('os')
browser = request.form.get('browser')
2020-10-09 18:05:04 +02:00
if url:
depth: int = request.form.get('depth') if request.form.get('depth') else 1 # type: ignore
listing: bool = request.form.get('listing') if request.form.get('listing') else False # type: ignore
perma_uuid = lookyloo.capture(url=url, cookies_pseudofile=cookie_file,
depth=depth, listing=listing,
2020-12-10 17:23:37 +01:00
user_agent=user_agent,
referer=request.form.get('referer'), # type: ignore
os=os, browser=browser)
2020-10-09 18:05:04 +02:00
return redirect(url_for('tree', tree_uuid=perma_uuid))
user_agents: Dict[str, Any] = {}
if get_config('generic', 'use_user_agents_users'):
lookyloo.build_ua_file()
# NOTE: For now, just generate the file, so we have an idea of the size
# user_agents = get_user_agents('own_user_agents')
if not user_agents:
user_agents = get_user_agents()
user_agents.pop('by_frequency')
2020-12-10 17:23:37 +01:00
return render_template('capture.html', user_agents=user_agents,
max_depth=max_depth, personal_ua=request.headers.get('User-Agent'))
2020-10-09 18:05:04 +02:00
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
def cookies_name_detail(cookie_name: str):
captures, domains = lookyloo.get_cookie_name_investigator(cookie_name)
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
def body_hash_details(body_hash: str):
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
2020-06-26 12:07:25 +02:00
2020-11-25 12:07:01 +01:00
@app.route('/stats', methods=['GET'])
def statsfull():
stats = lookyloo.get_stats()
return render_template('stats.html', stats=stats)
2020-10-09 18:05:04 +02:00
# ##### Methods related to a specific URLNode #####
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
def urlnode_request_cookies(tree_uuid: str, node_uuid: str):
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.request_cookie:
return
return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()),
mimetype='text/plain', as_attachment=True, attachment_filename='request_cookies.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.response_cookie:
return
return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()),
mimetype='text/plain', as_attachment=True, attachment_filename='response_cookies.txt')
2021-01-08 13:03:23 +01:00
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
ct = lookyloo.get_crawled_tree(tree_uuid)
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
2021-01-08 13:03:23 +01:00
if not urlnode.rendered_html:
return
not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
- set(ct.root_hartree.all_url_requests.keys()))
2021-01-08 13:03:23 +01:00
to_return = StringIO()
to_return.writelines([f'{u}\n' for u in not_loaded_urls])
2021-01-08 13:03:23 +01:00
return send_file(BytesIO(to_return.getvalue().encode()), mimetype='text/plain',
as_attachment=True, attachment_filename='urls_in_rendered_content.txt')
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET'])
def urlnode_rendered_content(tree_uuid: str, node_uuid: str):
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.rendered_html:
return
return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain',
as_attachment=True, attachment_filename='rendered_content.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
def urlnode_post_request(tree_uuid: str, node_uuid: str):
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.posted_data:
return
posted: Union[str, bytes]
if isinstance(urlnode.posted_data, (dict, list)):
# JSON blob, pretty print.
posted = json.dumps(urlnode.posted_data, indent=2)
else:
2020-10-09 18:05:04 +02:00
posted = urlnode.posted_data
if isinstance(posted, str):
to_return = BytesIO(posted.encode())
is_blob = False
else:
to_return = BytesIO(posted)
is_blob = True
to_return.seek(0)
if is_blob:
return send_file(to_return, mimetype='application/octet-stream',
as_attachment=True, attachment_filename='posted_data.bin')
else:
return send_file(to_return, mimetype='text/plain',
as_attachment=True, attachment_filename='posted_data.txt')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
def get_ressource(tree_uuid: str, node_uuid: str):
if request.method == 'POST':
h_request = request.form.get('ressource_hash')
else:
h_request = None
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_request)
to_return = BytesIO()
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
if ressource:
filename, r, mimetype = ressource
2020-10-09 18:05:04 +02:00
zfile.writestr(filename, r.getvalue())
else:
zfile.writestr('file.txt', b'Unknown Hash')
to_return.seek(0)
return send_file(to_return, mimetype='application/zip',
as_attachment=True, attachment_filename='file.zip')
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['POST', 'GET'])
def get_ressource_preview(tree_uuid: str, node_uuid: str):
if request.method == 'POST':
h_request = request.form.get('ressource_hash')
else:
h_request = None
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_request)
if not ressource:
return None
filename, r, mimetype = ressource
if mimetype.startswith('image'):
return send_file(r, mimetype=mimetype,
as_attachment=True, attachment_filename=filename)
return None
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET'])
def hashes_urlnode(tree_uuid: str, node_uuid: str):
hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid)
return send_file(BytesIO('\n'.join(hashes).encode()),
mimetype='test/plain', as_attachment=True, attachment_filename='hashes.txt')
2020-10-09 18:05:04 +02:00
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST'])
@auth.login_required
2020-10-09 18:05:04 +02:00
def add_context(tree_uuid: str, node_uuid: str):
if not enable_context_by_users:
return redirect(url_for('ressources'))
context_data = request.form
2020-08-28 18:26:47 +02:00
ressource_hash: str = context_data.get('hash_to_contextualize') # type: ignore
hostnode_uuid: str = context_data.get('hostnode_uuid') # type: ignore
callback_str: str = context_data.get('callback_str') # type: ignore
legitimate: bool = True if context_data.get('legitimate') else False
malicious: bool = True if context_data.get('malicious') else False
2020-08-28 18:26:47 +02:00
details: Dict[str, Dict] = {'malicious': {}, 'legitimate': {}}
if malicious:
malicious_details = {}
if context_data.get('malicious_type'):
malicious_details['type'] = context_data['malicious_type']
if context_data.get('malicious_target'):
malicious_details['target'] = context_data['malicious_target']
details['malicious'] = malicious_details
if legitimate:
legitimate_details = {}
if context_data.get('legitimate_domain'):
legitimate_details['domain'] = context_data['legitimate_domain']
if context_data.get('legitimate_description'):
legitimate_details['description'] = context_data['legitimate_description']
details['legitimate'] = legitimate_details
2020-10-09 18:05:04 +02:00
lookyloo.add_context(tree_uuid, node_uuid, ressource_hash, legitimate, malicious, details)
if callback_str == 'hostnode_popup':
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
elif callback_str == 'ressources':
return redirect(url_for('ressources'))
2020-06-29 17:23:01 +02:00
# Query API
2020-06-26 12:07:25 +02:00
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
def json_redirects(tree_uuid: str):
cache = lookyloo.capture_cache(tree_uuid)
2020-06-26 12:07:25 +02:00
if not cache:
return {'error': 'UUID missing in cache, try again later.'}
to_return: Dict[str, Any] = {'response': {'url': cache.url, 'redirects': []}}
if not cache.redirects:
2020-06-26 12:07:25 +02:00
to_return['response']['info'] = 'No redirects'
return to_return
if cache.incomplete_redirects:
2020-06-26 12:07:25 +02:00
# Trigger tree build, get all redirects
2021-01-12 17:22:51 +01:00
lookyloo.get_crawled_tree(tree_uuid)
cache = lookyloo.capture_cache(tree_uuid)
2020-06-26 18:11:22 +02:00
if cache:
to_return['response']['redirects'] = cache.redirects
2020-06-26 18:11:22 +02:00
else:
to_return['response']['redirects'] = cache.redirects
2020-06-26 18:11:22 +02:00
2020-06-26 12:07:25 +02:00
return jsonify(to_return)
2020-10-23 20:51:15 +02:00
@app.route('/json/<string:tree_uuid>/misp_export', methods=['GET'])
def misp_export(tree_uuid: str):
event = lookyloo.misp_export(tree_uuid)
2020-12-07 20:54:33 +01:00
if isinstance(event, dict):
return jsonify(event)
return Response(event.to_json(indent=2), mimetype='application/json')
2020-10-23 20:51:15 +02:00
@app.route('/json/hash_info/<h>', methods=['GET'])
def json_hash_info(h: str):
details, body = lookyloo.get_body_hash_full(h)
if not details:
return {'error': 'Unknown Hash.'}
to_return: Dict[str, Any] = {'response': {'hash': h, 'details': details,
'body': base64.b64encode(body.getvalue()).decode()}}
return jsonify(to_return)
2020-10-27 00:02:18 +01:00
@app.route('/json/url_info', methods=['POST'])
def json_url_info():
to_query = request.get_json(force=True)
2020-10-27 01:42:00 +01:00
occurrences = lookyloo.get_url_occurrences(**to_query)
return jsonify(occurrences)
@app.route('/json/hostname_info', methods=['POST'])
def json_hostname_info():
to_query = request.get_json(force=True)
occurrences = lookyloo.get_hostname_occurrences(**to_query)
return jsonify(occurrences)
2020-11-24 16:46:57 +01:00
@app.route('/json/stats', methods=['GET'])
def json_stats():
2020-11-24 18:19:53 +01:00
to_return = lookyloo.get_stats()
2020-11-24 21:01:30 +01:00
return Response(json.dumps(to_return), mimetype='application/json')