2019-01-30 14:30:01 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
from __future__ import annotations
|
|
|
|
|
2023-05-15 16:08:19 +02:00
|
|
|
import base64
|
2020-11-25 15:27:34 +01:00
|
|
|
import calendar
|
2023-03-17 11:10:01 +01:00
|
|
|
import functools
|
2024-02-20 01:13:20 +01:00
|
|
|
import hashlib
|
2021-09-07 12:59:31 +02:00
|
|
|
import http
|
|
|
|
import json
|
2021-02-02 22:04:33 +01:00
|
|
|
import logging
|
2022-11-23 16:24:51 +01:00
|
|
|
import logging.config
|
2021-09-24 12:02:28 +02:00
|
|
|
import os
|
2023-06-07 15:05:40 +02:00
|
|
|
import sys
|
2021-04-08 19:15:53 +02:00
|
|
|
import time
|
2022-08-04 16:58:07 +02:00
|
|
|
|
2024-01-26 15:03:36 +01:00
|
|
|
import filetype # type: ignore[import-untyped]
|
2022-08-04 16:58:07 +02:00
|
|
|
|
2024-03-05 20:51:21 +01:00
|
|
|
from collections import defaultdict
|
2021-09-07 12:59:31 +02:00
|
|
|
from datetime import date, datetime, timedelta, timezone
|
2022-08-31 16:33:13 +02:00
|
|
|
from importlib.metadata import version
|
2021-09-07 12:59:31 +02:00
|
|
|
from io import BytesIO, StringIO
|
2024-03-05 20:51:21 +01:00
|
|
|
from typing import Any, TypedDict, Iterable
|
2021-09-07 12:59:31 +02:00
|
|
|
from urllib.parse import quote_plus, unquote_plus, urlparse
|
2022-11-19 01:32:03 +01:00
|
|
|
from uuid import uuid4
|
2022-11-21 15:02:12 +01:00
|
|
|
from zipfile import ZipFile
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2024-03-05 20:51:21 +01:00
|
|
|
from har2tree import HostNode, URLNode
|
2024-01-26 15:03:36 +01:00
|
|
|
import flask_login # type: ignore[import-untyped]
|
2024-01-12 17:15:41 +01:00
|
|
|
from flask import (Flask, Response, Request, flash, jsonify, redirect, render_template,
|
2021-09-07 12:59:31 +02:00
|
|
|
request, send_file, url_for)
|
2024-01-26 15:03:36 +01:00
|
|
|
from flask_bootstrap import Bootstrap5 # type: ignore[import-untyped]
|
|
|
|
from flask_cors import CORS # type: ignore[import-untyped]
|
|
|
|
from flask_restx import Api # type: ignore[import-untyped]
|
2024-09-04 16:30:33 +02:00
|
|
|
from flask_talisman import Talisman # type: ignore[import-untyped]
|
2024-07-22 13:14:21 +02:00
|
|
|
from lacuscore import CaptureStatus, CaptureSettingsError
|
2024-07-15 17:39:51 +02:00
|
|
|
from puremagic import from_string
|
2024-01-12 17:15:41 +01:00
|
|
|
from pymisp import MISPEvent, MISPServerError # type: ignore[attr-defined]
|
2021-09-07 12:59:31 +02:00
|
|
|
from werkzeug.security import check_password_hash
|
2024-01-12 17:15:41 +01:00
|
|
|
from werkzeug.wrappers.response import Response as WerkzeugResponse
|
2021-01-28 18:37:44 +01:00
|
|
|
|
2024-05-14 18:52:26 +02:00
|
|
|
from lookyloo import Lookyloo, CaptureSettings
|
2024-05-07 18:46:57 +02:00
|
|
|
from lookyloo.default import get_config
|
2024-05-17 17:29:11 +02:00
|
|
|
from lookyloo.exceptions import MissingUUID, NoValidHarFile, LacusUnreachable
|
2024-07-23 00:21:26 +02:00
|
|
|
from lookyloo.helpers import (UserAgents, load_cookies,
|
|
|
|
load_user_config,
|
|
|
|
get_taxonomies
|
|
|
|
)
|
2021-06-07 22:12:23 +02:00
|
|
|
|
2023-06-07 15:05:40 +02:00
|
|
|
if sys.version_info < (3, 9):
|
|
|
|
from pytz import all_timezones_set
|
|
|
|
else:
|
|
|
|
from zoneinfo import available_timezones
|
|
|
|
all_timezones_set = available_timezones()
|
|
|
|
|
2021-09-07 12:59:31 +02:00
|
|
|
from .genericapi import api as generic_api
|
2024-05-07 18:46:57 +02:00
|
|
|
from .helpers import (User, build_users_table, get_secret_key,
|
2023-11-07 16:10:47 +01:00
|
|
|
load_user_from_request, src_request_ip, sri_load,
|
2024-06-17 19:04:08 +02:00
|
|
|
get_lookyloo_instance, get_indexing, build_keys_table)
|
2020-04-22 14:58:01 +02:00
|
|
|
from .proxied import ReverseProxied
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2022-11-23 16:24:51 +01:00
|
|
|
logging.config.dictConfig(get_config('logging'))
|
|
|
|
|
2020-01-06 15:32:38 +01:00
|
|
|
app: Flask = Flask(__name__)
|
2024-01-26 15:03:36 +01:00
|
|
|
app.wsgi_app = ReverseProxied(app.wsgi_app) # type: ignore[method-assign]
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2021-06-07 22:12:23 +02:00
|
|
|
app.config['SECRET_KEY'] = get_secret_key()
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2022-01-14 15:58:06 +01:00
|
|
|
Bootstrap5(app)
|
2019-01-30 14:30:01 +01:00
|
|
|
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
|
|
|
app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
|
2021-03-30 01:10:18 +02:00
|
|
|
app.config['SESSION_COOKIE_SAMESITE'] = 'Strict'
|
2024-09-04 16:30:33 +02:00
|
|
|
app.debug = bool(os.environ.get('DEBUG', False))
|
|
|
|
|
|
|
|
SELF = "'self'"
|
|
|
|
Talisman(app,
|
2024-09-05 15:42:13 +02:00
|
|
|
force_https=False,
|
2024-09-10 17:23:57 +02:00
|
|
|
content_security_policy_nonce_in=['script-src',
|
|
|
|
# Cannot enable that because https://github.com/python-restx/flask-restx/issues/252
|
|
|
|
# 'script-src-elem'
|
|
|
|
],
|
2024-09-04 16:30:33 +02:00
|
|
|
content_security_policy={
|
|
|
|
'default-src': SELF,
|
|
|
|
'base-uri': SELF,
|
|
|
|
'img-src': [
|
|
|
|
SELF,
|
|
|
|
"data:",
|
|
|
|
"blob:",
|
|
|
|
"'unsafe-inline'"
|
|
|
|
],
|
|
|
|
'script-src': [
|
|
|
|
SELF,
|
|
|
|
"'strict-dynamic'",
|
|
|
|
"'unsafe-inline'",
|
|
|
|
"http:",
|
|
|
|
"https:"
|
|
|
|
],
|
|
|
|
'script-src-elem': [
|
|
|
|
SELF,
|
2024-09-10 17:23:57 +02:00
|
|
|
# Cannot enable that because https://github.com/python-restx/flask-restx/issues/252
|
|
|
|
# "'strict-dynamic'",
|
2024-09-04 16:30:33 +02:00
|
|
|
"'unsafe-inline'",
|
|
|
|
],
|
|
|
|
'style-src': [
|
|
|
|
SELF,
|
|
|
|
"'unsafe-inline'"
|
|
|
|
],
|
|
|
|
'media-src': [
|
|
|
|
SELF,
|
|
|
|
"data:",
|
|
|
|
"blob:",
|
|
|
|
"'unsafe-inline'"
|
|
|
|
],
|
|
|
|
'frame-ancestors': [
|
|
|
|
SELF,
|
|
|
|
],
|
|
|
|
})
|
2021-02-02 22:04:33 +01:00
|
|
|
|
2022-08-31 16:33:13 +02:00
|
|
|
pkg_version = version('lookyloo')
|
2022-01-03 17:02:00 +01:00
|
|
|
|
2021-02-02 22:04:33 +01:00
|
|
|
# Auth stuff
|
2021-02-04 19:51:43 +01:00
|
|
|
login_manager = flask_login.LoginManager()
|
|
|
|
login_manager.init_app(app)
|
2024-06-17 19:04:08 +02:00
|
|
|
build_keys_table()
|
2021-02-02 22:04:33 +01:00
|
|
|
|
2022-06-09 18:57:40 +02:00
|
|
|
# User agents manager
|
|
|
|
user_agents = UserAgents()
|
|
|
|
|
2024-05-21 16:45:50 +02:00
|
|
|
if get_config('generic', 'index_is_capture'):
|
|
|
|
@app.route('/', methods=['GET'])
|
2024-05-25 12:29:01 +02:00
|
|
|
def landing_page() -> WerkzeugResponse | str:
|
2024-05-22 01:06:19 +02:00
|
|
|
if request.method == 'HEAD':
|
|
|
|
# Just returns ack if the webserver is running
|
|
|
|
return 'Ack'
|
2024-05-21 16:45:50 +02:00
|
|
|
return redirect(url_for('capture_web'))
|
|
|
|
else:
|
|
|
|
@app.route('/', methods=['GET'])
|
2024-05-25 12:29:01 +02:00
|
|
|
def landing_page() -> WerkzeugResponse | str:
|
2024-05-22 01:06:19 +02:00
|
|
|
if request.method == 'HEAD':
|
|
|
|
# Just returns ack if the webserver is running
|
|
|
|
return 'Ack'
|
2024-05-21 16:45:50 +02:00
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
2021-02-02 22:04:33 +01:00
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
@login_manager.user_loader # type: ignore[misc]
|
|
|
|
def user_loader(username: str) -> User | None:
|
2021-06-07 22:12:23 +02:00
|
|
|
if username not in build_users_table():
|
2021-02-04 19:51:43 +01:00
|
|
|
return None
|
|
|
|
user = User()
|
|
|
|
user.id = username
|
|
|
|
return user
|
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
@login_manager.request_loader # type: ignore[misc]
|
|
|
|
def _load_user_from_request(request: Request) -> User | None:
|
2021-06-07 22:12:23 +02:00
|
|
|
return load_user_from_request(request)
|
2021-02-04 19:51:43 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/login', methods=['GET', 'POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def login() -> WerkzeugResponse | str | Response:
|
2021-02-04 19:51:43 +01:00
|
|
|
if request.method == 'GET':
|
|
|
|
return '''
|
|
|
|
<form action='login' method='POST'>
|
|
|
|
<input type='text' name='username' id='username' placeholder='username'/>
|
|
|
|
<input type='password' name='password' id='password' placeholder='password'/>
|
|
|
|
<input type='submit' name='submit'/>
|
|
|
|
</form>
|
|
|
|
'''
|
|
|
|
|
|
|
|
username = request.form['username']
|
2021-06-07 22:12:23 +02:00
|
|
|
users_table = build_users_table()
|
2021-02-04 19:51:43 +01:00
|
|
|
if username in users_table and check_password_hash(users_table[username]['password'], request.form['password']):
|
|
|
|
user = User()
|
|
|
|
user.id = username
|
|
|
|
flask_login.login_user(user)
|
|
|
|
flash(f'Logged in as: {flask_login.current_user.id}', 'success')
|
|
|
|
else:
|
|
|
|
flash(f'Unable to login as: {username}', 'error')
|
|
|
|
|
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/logout')
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def logout() -> WerkzeugResponse:
|
2021-02-04 19:51:43 +01:00
|
|
|
flask_login.logout_user()
|
|
|
|
flash('Successfully logged out.', 'success')
|
|
|
|
return redirect(url_for('index'))
|
2021-02-02 22:04:33 +01:00
|
|
|
|
|
|
|
|
|
|
|
# Config
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2023-11-07 16:10:47 +01:00
|
|
|
lookyloo: Lookyloo = get_lookyloo_instance()
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2020-09-21 16:41:30 +02:00
|
|
|
time_delta_on_index = get_config('generic', 'time_delta_on_index')
|
|
|
|
blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
|
2020-04-01 17:44:06 +02:00
|
|
|
|
2021-01-20 20:54:37 +01:00
|
|
|
use_own_ua = get_config('generic', 'use_user_agents_users')
|
2020-11-05 15:38:34 +01:00
|
|
|
enable_mail_notification = get_config('generic', 'enable_mail_notification')
|
2024-05-02 13:25:27 +02:00
|
|
|
ignore_sri = get_config('generic', 'ignore_sri')
|
2021-05-26 21:07:47 +02:00
|
|
|
if enable_mail_notification:
|
|
|
|
confirm_message = get_config('generic', 'email').get('confirm_message')
|
|
|
|
else:
|
|
|
|
confirm_message = ''
|
2020-11-05 15:38:34 +01:00
|
|
|
enable_context_by_users = get_config('generic', 'enable_context_by_users')
|
|
|
|
enable_categorization = get_config('generic', 'enable_categorization')
|
2020-11-29 23:56:42 +01:00
|
|
|
enable_bookmark = get_config('generic', 'enable_bookmark')
|
2020-11-22 23:23:42 +01:00
|
|
|
auto_trigger_modules = get_config('generic', 'auto_trigger_modules')
|
2021-05-19 19:18:43 +02:00
|
|
|
hide_captures_with_error = get_config('generic', 'hide_captures_with_error')
|
2020-11-05 15:38:34 +01:00
|
|
|
|
2020-04-01 17:44:06 +02:00
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### Global methods passed to jinja
|
|
|
|
|
2020-05-23 03:37:24 +02:00
|
|
|
# Method to make sizes in bytes human readable
|
|
|
|
# Source: https://stackoverflow.com/questions/1094841/reusable-library-to-get-human-readable-version-of-file-size
|
2024-01-12 17:15:41 +01:00
|
|
|
def sizeof_fmt(num: float, suffix: str='B') -> str:
|
2020-05-23 03:37:24 +02:00
|
|
|
for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']:
|
|
|
|
if abs(num) < 1024.0:
|
2022-03-31 11:30:53 +02:00
|
|
|
return f"{num:3.1f}{unit}{suffix}"
|
2020-05-23 03:37:24 +02:00
|
|
|
num /= 1024.0
|
2022-03-31 11:30:53 +02:00
|
|
|
return ("{:.1f}{}{}".format(num, 'Yi', suffix)).strip()
|
2020-05-23 03:37:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt)
|
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def http_status_description(code: int) -> str:
|
2020-08-07 17:06:25 +02:00
|
|
|
if code in http.client.responses:
|
|
|
|
return http.client.responses[code]
|
|
|
|
return f'Invalid code: {code}'
|
2020-08-07 13:11:16 +02:00
|
|
|
|
|
|
|
|
|
|
|
app.jinja_env.globals.update(http_status_description=http_status_description)
|
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def month_name(month: int) -> str:
|
2020-11-25 15:27:34 +01:00
|
|
|
return calendar.month_name[month]
|
|
|
|
|
|
|
|
|
|
|
|
app.jinja_env.globals.update(month_name=month_name)
|
|
|
|
|
|
|
|
|
2021-06-17 02:36:01 +02:00
|
|
|
def get_sri(directory: str, filename: str) -> str:
|
2024-05-02 13:25:27 +02:00
|
|
|
if ignore_sri:
|
2024-04-29 13:09:54 +02:00
|
|
|
return ""
|
2021-06-17 02:36:01 +02:00
|
|
|
sha512 = sri_load()[directory][filename]
|
2024-04-29 13:09:54 +02:00
|
|
|
return f'integrity=sha512-{sha512}'
|
2021-06-17 02:36:01 +02:00
|
|
|
|
|
|
|
|
|
|
|
app.jinja_env.globals.update(get_sri=get_sri)
|
|
|
|
|
|
|
|
|
2022-08-01 17:51:43 +02:00
|
|
|
class Icon(TypedDict):
|
|
|
|
icon: str
|
|
|
|
tooltip: str
|
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def get_icon(icon_id: str) -> Icon | None:
|
|
|
|
available_icons: dict[str, Icon] = {
|
2022-08-01 17:51:43 +02:00
|
|
|
'js': {'icon': "javascript.png", 'tooltip': 'The content of the response is a javascript'},
|
|
|
|
'exe': {'icon': "exe.png", 'tooltip': 'The content of the response is an executable'},
|
|
|
|
'css': {'icon': "css.png", 'tooltip': 'The content of the response is a CSS'},
|
|
|
|
'font': {'icon': "font.png", 'tooltip': 'The content of the response is a font'},
|
|
|
|
'html': {'icon': "html.png", 'tooltip': 'The content of the response is a HTML document'},
|
|
|
|
'json': {'icon': "json.png", 'tooltip': 'The content of the response is a Json'},
|
|
|
|
'text': {'icon': "json.png", 'tooltip': 'The content of the response is a text'}, # FIXME: Need new icon
|
|
|
|
'iframe': {'icon': "ifr.png", 'tooltip': 'This content is loaded from an Iframe'},
|
|
|
|
'image': {'icon': "img.png", 'tooltip': 'The content of the response is an image'},
|
|
|
|
'unset_mimetype': {'icon': "wtf.png", 'tooltip': 'The type of content of the response is not set'},
|
|
|
|
'octet-stream': {'icon': "wtf.png", 'tooltip': 'The type of content of the response is a binary blob'},
|
|
|
|
'unknown_mimetype': {'icon': "wtf.png", 'tooltip': 'The type of content of the response is of an unknown type'},
|
|
|
|
'video': {'icon': "video.png", 'tooltip': 'The content of the response is a video'},
|
|
|
|
'livestream': {'icon': "video.png", 'tooltip': 'The content of the response is a livestream'},
|
|
|
|
'response_cookie': {'icon': "cookie_received.png", 'tooltip': 'There are cookies in the response'},
|
|
|
|
'request_cookie': {'icon': "cookie_read.png", 'tooltip': 'There are cookies in the request'},
|
|
|
|
'redirect': {'icon': "redirect.png", 'tooltip': 'The request is redirected'},
|
|
|
|
'redirect_to_nothing': {'icon': "cookie_in_url.png", 'tooltip': 'The request is redirected to an URL we do not have in the capture'}
|
|
|
|
}
|
|
|
|
return available_icons.get(icon_id)
|
|
|
|
|
|
|
|
|
|
|
|
app.jinja_env.globals.update(get_icon=get_icon)
|
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def get_tz_info() -> tuple[str | None, str, set[str]]:
|
2023-06-07 15:05:40 +02:00
|
|
|
now = datetime.now().astimezone()
|
|
|
|
local_TZ = now.tzname()
|
|
|
|
local_UTC_offset = f'UTC{now.strftime("%z")}'
|
|
|
|
return local_TZ, local_UTC_offset, all_timezones_set
|
|
|
|
|
|
|
|
|
|
|
|
app.jinja_env.globals.update(tz_info=get_tz_info)
|
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### Generic/configuration methods #####
|
|
|
|
|
2020-06-25 16:43:36 +02:00
|
|
|
@app.after_request
|
2024-01-12 17:15:41 +01:00
|
|
|
def after_request(response: Response) -> Response:
|
2021-08-25 13:36:48 +02:00
|
|
|
if use_own_ua:
|
|
|
|
# We keep a list user agents in order to build a list to use in the capture
|
|
|
|
# interface: this is the easiest way to have something up to date.
|
|
|
|
# The reason we also get the IP address of the client is because we
|
|
|
|
# count the frequency of each user agents and use it to sort them on the
|
|
|
|
# capture page, and we want to avoid counting the same user (same IP)
|
|
|
|
# multiple times in a day.
|
|
|
|
# The cache of IPs is deleted after the UA file is generated once a day.
|
|
|
|
# See bin/background_processing.py
|
|
|
|
ua = request.headers.get('User-Agent')
|
|
|
|
real_ip = src_request_ip(request)
|
|
|
|
if ua:
|
|
|
|
today = date.today().isoformat()
|
|
|
|
lookyloo.redis.zincrby(f'user_agents|{today}', 1, f'{real_ip}|{ua}')
|
2021-04-18 01:43:50 +02:00
|
|
|
# Opt out of FLoC
|
|
|
|
response.headers.set('Permissions-Policy', 'interest-cohort=()')
|
2020-06-25 16:43:36 +02:00
|
|
|
return response
|
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def file_response(func): # type: ignore[no-untyped-def]
|
2023-03-17 11:10:01 +01:00
|
|
|
@functools.wraps(func)
|
2024-01-12 17:15:41 +01:00
|
|
|
def wrapper(*args, **kwargs) -> Response: # type: ignore[no-untyped-def]
|
2023-03-17 11:10:01 +01:00
|
|
|
try:
|
|
|
|
return func(*args, **kwargs)
|
|
|
|
except NoValidHarFile:
|
|
|
|
return send_file(BytesIO(b'The capture is broken and does not contain any HAR files.'),
|
|
|
|
mimetype='test/plain', as_attachment=True, download_name='error.txt')
|
|
|
|
except MissingUUID as e:
|
|
|
|
return send_file(BytesIO(str(e).encode()),
|
|
|
|
mimetype='test/plain', as_attachment=True, download_name='error.txt')
|
|
|
|
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
2024-07-22 13:14:21 +02:00
|
|
|
@app.errorhandler(CaptureSettingsError)
|
|
|
|
def handle_pydandic_validation_exception(error: CaptureSettingsError) -> Response | str | WerkzeugResponse:
|
|
|
|
'''Return the validation error message and 400 status code'''
|
|
|
|
if error.pydantic_validation_errors:
|
|
|
|
flash(f'Unable to validate capture settings: {error.pydantic_validation_errors.errors()}')
|
|
|
|
else:
|
|
|
|
flash(str(error))
|
|
|
|
return redirect(url_for('landing_page'))
|
|
|
|
|
|
|
|
|
2024-03-05 20:51:21 +01:00
|
|
|
# ##### Methods querying the indexes #####
|
|
|
|
|
2024-10-07 13:15:15 +02:00
|
|
|
def _get_body_hash_investigator(body_hash: str, /) -> list[tuple[str, str, datetime, str, str]]:
|
2024-03-05 20:51:21 +01:00
|
|
|
'''Returns all the captures related to a hash (sha512), used in the web interface.'''
|
2024-10-07 21:58:43 +02:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_body_hash(body_hash)],
|
|
|
|
cached_captures_only=True)
|
2024-03-05 20:51:21 +01:00
|
|
|
captures = []
|
2024-10-07 21:58:43 +02:00
|
|
|
for cache in cached_captures:
|
2024-03-05 20:51:21 +01:00
|
|
|
if not cache:
|
|
|
|
continue
|
2024-10-07 21:58:43 +02:00
|
|
|
for urlnode_uuid in get_indexing(flask_login.current_user).get_capture_body_hash_nodes(cache.uuid, body_hash):
|
|
|
|
try:
|
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(cache.uuid, urlnode_uuid)
|
|
|
|
except IndexError:
|
|
|
|
continue
|
2024-10-07 13:15:15 +02:00
|
|
|
captures.append((cache.uuid, cache.title, cache.timestamp, urlnode.hostnode_uuid, urlnode.name))
|
|
|
|
return captures
|
2024-03-05 20:51:21 +01:00
|
|
|
|
|
|
|
|
|
|
|
def get_all_body_hashes(capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
|
|
|
|
ct = lookyloo.get_crawled_tree(capture_uuid)
|
|
|
|
to_return: dict[str, dict[str, URLNode | int]] = defaultdict()
|
|
|
|
for node in ct.root_hartree.url_tree.traverse():
|
|
|
|
if node.empty_response or node.body_hash in to_return:
|
|
|
|
# If we have the same hash more than once, skip
|
|
|
|
continue
|
2024-10-07 13:15:15 +02:00
|
|
|
total_captures = get_indexing(flask_login.current_user).get_captures_body_hash_count(node.body_hash)
|
2024-03-05 20:51:21 +01:00
|
|
|
to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
|
|
|
|
return to_return
|
|
|
|
|
|
|
|
|
2024-05-14 16:08:38 +02:00
|
|
|
def get_all_hostnames(capture_uuid: str, /) -> dict[str, dict[str, int | list[URLNode]]]:
|
|
|
|
ct = lookyloo.get_crawled_tree(capture_uuid)
|
|
|
|
to_return: dict[str, dict[str, list[URLNode] | int]] = defaultdict()
|
|
|
|
for node in ct.root_hartree.url_tree.traverse():
|
|
|
|
if not node.hostname:
|
|
|
|
continue
|
2024-09-25 12:00:59 +02:00
|
|
|
captures_count = get_indexing(flask_login.current_user).get_captures_hostname_count(node.hostname)
|
2024-05-14 16:08:38 +02:00
|
|
|
# Note for future: mayeb get url, capture title, something better than just the hash to show to the user
|
|
|
|
if node.hostname not in to_return:
|
2024-09-25 12:00:59 +02:00
|
|
|
to_return[node.hostname] = {'total_captures': captures_count, 'nodes': []}
|
2024-05-14 16:08:38 +02:00
|
|
|
to_return[node.hostname]['nodes'].append(node) # type: ignore[union-attr]
|
|
|
|
return to_return
|
|
|
|
|
|
|
|
|
2024-05-14 18:52:26 +02:00
|
|
|
def get_all_urls(capture_uuid: str, /) -> dict[str, dict[str, int | list[URLNode] | str]]:
|
|
|
|
ct = lookyloo.get_crawled_tree(capture_uuid)
|
|
|
|
to_return: dict[str, dict[str, list[URLNode] | int | str]] = defaultdict()
|
|
|
|
for node in ct.root_hartree.url_tree.traverse():
|
|
|
|
if not node.name:
|
|
|
|
continue
|
2024-09-25 12:00:59 +02:00
|
|
|
captures_count = get_indexing(flask_login.current_user).get_captures_url_count(node.name)
|
2024-05-14 18:52:26 +02:00
|
|
|
# Note for future: mayeb get url, capture title, something better than just the hash to show to the user
|
|
|
|
if node.hostname not in to_return:
|
2024-09-25 12:00:59 +02:00
|
|
|
to_return[node.name] = {'total_captures': captures_count, 'nodes': [],
|
2024-05-14 18:52:26 +02:00
|
|
|
'quoted_url': quote_plus(node.name)}
|
|
|
|
to_return[node.name]['nodes'].append(node) # type: ignore[union-attr]
|
2024-03-05 20:51:21 +01:00
|
|
|
return to_return
|
|
|
|
|
|
|
|
|
2024-09-25 15:28:54 +02:00
|
|
|
def get_hostname_investigator(hostname: str) -> list[tuple[str, str, str, datetime, set[str]]]:
|
2024-05-14 18:52:26 +02:00
|
|
|
'''Returns all the captures loading content from that hostname, used in the web interface.'''
|
2024-09-25 12:00:59 +02:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hostname(hostname=hostname)],
|
|
|
|
cached_captures_only=True)
|
2024-09-25 15:28:54 +02:00
|
|
|
return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp,
|
|
|
|
get_indexing(flask_login.current_user).get_capture_hostname_nodes(cache.uuid, hostname)
|
|
|
|
) for cache in cached_captures]
|
2024-05-14 16:08:38 +02:00
|
|
|
|
|
|
|
|
2024-09-25 15:28:54 +02:00
|
|
|
def get_url_investigator(url: str) -> list[tuple[str, str, str, datetime, set[str]]]:
|
2024-05-14 18:52:26 +02:00
|
|
|
'''Returns all the captures loading content from that url, used in the web interface.'''
|
2024-09-25 12:00:59 +02:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_url(url=url)],
|
|
|
|
cached_captures_only=True)
|
2024-09-25 15:28:54 +02:00
|
|
|
return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp,
|
|
|
|
get_indexing(flask_login.current_user).get_capture_url_nodes(cache.uuid, url)
|
|
|
|
) for cache in cached_captures]
|
2024-05-14 18:52:26 +02:00
|
|
|
|
|
|
|
|
2024-10-07 20:45:08 +02:00
|
|
|
def get_cookie_name_investigator(cookie_name: str, /) -> list[tuple[str, str, datetime, set[str]]]:
|
2024-03-05 20:51:21 +01:00
|
|
|
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
2024-10-07 20:45:08 +02:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_cookies_name(cookie_name=cookie_name)],
|
|
|
|
cached_captures_only=True)
|
|
|
|
captures = [(cache.uuid, cache.title, cache.timestamp, get_indexing(flask_login.current_user).get_capture_cookie_name_nodes(cache.uuid, cookie_name)) for cache in cached_captures]
|
|
|
|
return captures
|
2024-03-05 20:51:21 +01:00
|
|
|
|
|
|
|
|
2024-03-14 00:56:28 +01:00
|
|
|
def get_identifier_investigator(identifier_type: str, identifier: str) -> list[tuple[str, str, str, datetime]]:
|
2024-10-31 13:35:36 +01:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_identifier(identifier_type=identifier_type, identifier=identifier)])
|
2024-03-14 00:56:28 +01:00
|
|
|
return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
|
|
|
|
|
|
|
|
|
2024-04-11 17:46:04 +02:00
|
|
|
def get_capture_hash_investigator(hash_type: str, h: str) -> list[tuple[str, str, str, datetime]]:
|
2024-10-28 15:45:26 +01:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hash_type(hash_type=hash_type, h=h)],
|
|
|
|
cached_captures_only=True)
|
2024-04-11 17:46:04 +02:00
|
|
|
return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
|
|
|
|
|
|
|
|
|
2024-03-05 20:51:21 +01:00
|
|
|
def get_favicon_investigator(favicon_sha512: str,
|
|
|
|
/,
|
|
|
|
get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]],
|
|
|
|
tuple[str, str, str],
|
|
|
|
dict[str, dict[str, dict[str, tuple[str, str]]]]]:
|
|
|
|
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
|
|
|
cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)])
|
|
|
|
captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
|
|
|
|
favicon = get_indexing(flask_login.current_user).get_favicon(favicon_sha512)
|
|
|
|
if favicon:
|
|
|
|
mimetype = from_string(favicon, mime=True)
|
|
|
|
b64_favicon = base64.b64encode(favicon).decode()
|
|
|
|
mmh3_shodan = lookyloo.compute_mmh3_shodan(favicon)
|
|
|
|
else:
|
|
|
|
mimetype = ''
|
|
|
|
b64_favicon = ''
|
|
|
|
mmh3_shodan = ''
|
|
|
|
|
|
|
|
# For now, there is only one probabilistic hash algo for favicons, keeping it simple
|
|
|
|
probabilistic_hash_algos = ['mmh3-shodan']
|
|
|
|
probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {}
|
|
|
|
if get_probabilistic:
|
|
|
|
for algo in probabilistic_hash_algos:
|
|
|
|
probabilistic_favicons[algo] = {}
|
|
|
|
for mm3hash in get_indexing(flask_login.current_user).get_probabilistic_hashes_favicon(algo, favicon_sha512):
|
|
|
|
probabilistic_favicons[algo][mm3hash] = {}
|
|
|
|
for sha512 in get_indexing(flask_login.current_user).get_hashes_favicon_probablistic(algo, mm3hash):
|
|
|
|
if sha512 == favicon_sha512:
|
|
|
|
# Skip entry if it is the same as the favicon we are investigating
|
|
|
|
continue
|
|
|
|
favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
|
|
|
|
if favicon:
|
|
|
|
mimetype = from_string(favicon, mime=True)
|
|
|
|
b64_favicon = base64.b64encode(favicon).decode()
|
|
|
|
probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon)
|
|
|
|
if not probabilistic_favicons[algo][mm3hash]:
|
|
|
|
# remove entry if it has no favicon
|
|
|
|
probabilistic_favicons[algo].pop(mm3hash)
|
|
|
|
if not probabilistic_favicons[algo]:
|
|
|
|
# remove entry if it has no hash
|
|
|
|
probabilistic_favicons.pop(algo)
|
|
|
|
return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
|
|
|
|
|
|
|
|
|
|
|
|
def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
|
|
|
|
'''Returns all the captures related to a cookie name entry, used in the web interface.'''
|
2024-10-07 21:58:43 +02:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_hhhash(hhh)],
|
|
|
|
cached_captures_only=True)
|
2024-10-07 17:27:44 +02:00
|
|
|
captures = []
|
|
|
|
headers: list[tuple[str, str]] = []
|
2024-10-07 21:58:43 +02:00
|
|
|
for cache in cached_captures:
|
2024-10-07 17:27:44 +02:00
|
|
|
if not cache:
|
|
|
|
continue
|
2024-10-07 21:58:43 +02:00
|
|
|
for urlnode_uuid in get_indexing(flask_login.current_user).get_capture_hhhash_nodes(cache.uuid, hhh):
|
|
|
|
try:
|
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(cache.uuid, urlnode_uuid)
|
|
|
|
except IndexError:
|
|
|
|
continue
|
2024-03-05 20:51:21 +01:00
|
|
|
captures.append((cache.uuid, urlnode.hostnode_uuid, urlnode.name, cache.title))
|
2024-10-07 17:27:44 +02:00
|
|
|
if not headers:
|
|
|
|
# Just do that once.
|
|
|
|
headers = [(header["name"], header["value"]) for header in urlnode.response['headers']]
|
|
|
|
return captures, headers
|
2024-03-05 20:51:21 +01:00
|
|
|
|
|
|
|
|
2024-10-07 13:15:15 +02:00
|
|
|
def hash_lookup(blob_hash: str, url: str, current_capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]:
|
2024-03-05 20:51:21 +01:00
|
|
|
'''Search all the captures a specific hash was seen.
|
|
|
|
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
|
|
|
Capture UUID avoids duplicates on the same capture'''
|
|
|
|
captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
2024-10-08 14:02:14 +02:00
|
|
|
cached_captures = lookyloo.sorted_capture_cache(
|
2024-10-08 23:10:46 +02:00
|
|
|
[uuid for uuid, _ in get_indexing(flask_login.current_user).get_captures_body_hash(blob_hash,
|
|
|
|
oldest_capture=datetime.now() - timedelta(**time_delta_on_index))],
|
2024-10-08 14:02:14 +02:00
|
|
|
cached_captures_only=True)
|
|
|
|
for cache in cached_captures:
|
|
|
|
if cache.uuid == current_capture_uuid:
|
2024-10-07 13:15:15 +02:00
|
|
|
continue
|
2024-10-08 23:10:46 +02:00
|
|
|
urlnodes = get_indexing(flask_login.current_user).get_capture_body_hash_nodes(cache.uuid, blob_hash)
|
|
|
|
for urlnode_uuid in urlnodes:
|
2024-10-07 21:58:43 +02:00
|
|
|
try:
|
2024-10-08 23:10:46 +02:00
|
|
|
urlnode = cache.tree.root_hartree.get_url_node_by_uuid(urlnode_uuid)
|
2024-10-07 21:58:43 +02:00
|
|
|
except IndexError:
|
|
|
|
continue
|
2024-10-07 13:15:15 +02:00
|
|
|
if url == urlnode.name:
|
2024-10-08 14:02:14 +02:00
|
|
|
captures_list['same_url'].append((cache.uuid, urlnode_uuid, cache.title, cache.timestamp.isoformat(), urlnode.hostname))
|
2024-03-05 20:51:21 +01:00
|
|
|
else:
|
2024-10-08 14:02:14 +02:00
|
|
|
captures_list['different_url'].append((cache.uuid, urlnode_uuid, cache.title, cache.timestamp.isoformat(), urlnode.hostname))
|
2024-03-05 20:51:21 +01:00
|
|
|
# Sort by timestamp by default
|
|
|
|
captures_list['same_url'].sort(key=lambda y: y[3])
|
|
|
|
captures_list['different_url'].sort(key=lambda y: y[3])
|
2024-10-07 13:15:15 +02:00
|
|
|
total_captures = get_indexing(flask_login.current_user).get_captures_body_hash_count(blob_hash)
|
2024-03-05 20:51:21 +01:00
|
|
|
return total_captures, captures_list
|
|
|
|
|
|
|
|
|
|
|
|
def get_hostnode_investigator(capture_uuid: str, /, node_uuid: str) -> tuple[HostNode, list[dict[str, Any]]]:
|
|
|
|
'''Gather all the informations needed to display the Hostnode investigator popup.'''
|
|
|
|
|
|
|
|
def normalize_known_content(h: str, /, known_content: dict[str, Any], url: URLNode) -> tuple[str | list[Any] | None, tuple[bool, Any] | None]:
|
|
|
|
''' There are a few different sources to figure out known vs. legitimate content,
|
|
|
|
this method normalize it for the web interface.'''
|
|
|
|
known: str | list[Any] | None = None
|
|
|
|
legitimate: tuple[bool, Any] | None = None
|
|
|
|
if h not in known_content:
|
|
|
|
return known, legitimate
|
|
|
|
|
|
|
|
if known_content[h]['type'] in ['generic', 'sanejs']:
|
|
|
|
known = known_content[h]['details']
|
|
|
|
elif known_content[h]['type'] == 'legitimate_on_domain':
|
|
|
|
legit = False
|
|
|
|
if url.hostname in known_content[h]['details']:
|
|
|
|
legit = True
|
|
|
|
legitimate = (legit, known_content[h]['details'])
|
|
|
|
elif known_content[h]['type'] == 'malicious':
|
|
|
|
legitimate = (False, known_content[h]['details'])
|
|
|
|
|
|
|
|
return known, legitimate
|
|
|
|
|
|
|
|
ct = lookyloo.get_crawled_tree(capture_uuid)
|
|
|
|
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
|
|
|
|
|
|
|
known_content = lookyloo.context.find_known_content(hostnode)
|
|
|
|
lookyloo.uwhois.query_whois_hostnode(hostnode)
|
|
|
|
|
|
|
|
urls: list[dict[str, Any]] = []
|
|
|
|
for url in hostnode.urls:
|
|
|
|
# For the popup, we need:
|
|
|
|
# * https vs http
|
|
|
|
# * everything after the domain
|
|
|
|
# * the full URL
|
|
|
|
to_append: dict[str, Any] = {
|
|
|
|
'encrypted': url.name.startswith('https'),
|
|
|
|
'url_path': url.name.split('/', 3)[-1],
|
|
|
|
'url_object': url,
|
|
|
|
}
|
|
|
|
|
|
|
|
if not url.empty_response:
|
|
|
|
# Index lookup
|
|
|
|
# %%% Full body %%%
|
2024-10-07 13:15:15 +02:00
|
|
|
if freq := get_indexing(flask_login.current_user).get_captures_body_hash_count(url.body_hash):
|
2024-10-08 23:10:46 +02:00
|
|
|
to_append['body_hash_details'] = {'hash_freq': freq, 'other_captures': (freq, {'same_url': [], 'different_url': []})}
|
|
|
|
if freq > 1:
|
|
|
|
to_append['body_hash_details']['other_captures'] = hash_lookup(url.body_hash, url.name, capture_uuid)
|
2024-03-05 20:51:21 +01:00
|
|
|
|
|
|
|
# %%% Embedded ressources %%%
|
|
|
|
if hasattr(url, 'embedded_ressources') and url.embedded_ressources:
|
|
|
|
to_append['embedded_ressources'] = {}
|
|
|
|
for mimetype, blobs in url.embedded_ressources.items():
|
|
|
|
for h, blob in blobs:
|
|
|
|
if h in to_append['embedded_ressources']:
|
|
|
|
# Skip duplicates
|
|
|
|
continue
|
2024-10-08 23:10:46 +02:00
|
|
|
to_append['embedded_ressources'][h] = {'body_size': blob.getbuffer().nbytes,
|
|
|
|
'type': mimetype}
|
2024-10-07 13:15:15 +02:00
|
|
|
if freq := get_indexing(flask_login.current_user).get_captures_body_hash_count(h):
|
|
|
|
to_append['embedded_ressources'][h]['hash_freq'] = freq
|
2024-10-08 23:10:46 +02:00
|
|
|
to_append['embedded_ressources'][h]['other_captures'] = (freq, {'same_url': [], 'different_url': []})
|
|
|
|
if freq > 1:
|
|
|
|
to_append['embedded_ressources'][h]['other_captures'] = hash_lookup(h, url.name, capture_uuid)
|
2024-03-05 20:51:21 +01:00
|
|
|
for h in to_append['embedded_ressources'].keys():
|
|
|
|
known, legitimate = normalize_known_content(h, known_content, url)
|
|
|
|
if known:
|
|
|
|
to_append['embedded_ressources'][h]['known_content'] = known
|
|
|
|
elif legitimate:
|
|
|
|
to_append['embedded_ressources'][h]['legitimacy'] = legitimate
|
|
|
|
|
|
|
|
known, legitimate = normalize_known_content(url.body_hash, known_content, url)
|
|
|
|
if known:
|
|
|
|
to_append['known_content'] = known
|
|
|
|
elif legitimate:
|
|
|
|
to_append['legitimacy'] = legitimate
|
|
|
|
|
|
|
|
# Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
|
|
|
|
if hasattr(url, 'cookies_sent'):
|
|
|
|
to_display_sent: dict[str, set[Iterable[str | None]]] = defaultdict(set)
|
|
|
|
for cookie, contexts in url.cookies_sent.items():
|
|
|
|
if not contexts:
|
|
|
|
# Locally created?
|
|
|
|
to_display_sent[cookie].add(('Unknown origin', ))
|
|
|
|
continue
|
|
|
|
for context in contexts:
|
|
|
|
to_display_sent[cookie].add((context['setter'].hostname, context['setter'].hostnode_uuid))
|
|
|
|
to_append['cookies_sent'] = to_display_sent
|
|
|
|
|
|
|
|
# Optional: Cookies received from server in response -> map to nodes who send the cookie in request
|
|
|
|
if hasattr(url, 'cookies_received'):
|
|
|
|
to_display_received: dict[str, dict[str, set[Iterable[str | None]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
|
|
|
|
for domain, c_received, is_3rd_party in url.cookies_received:
|
|
|
|
if c_received not in ct.root_hartree.cookies_sent:
|
|
|
|
# This cookie is never sent.
|
|
|
|
if is_3rd_party:
|
|
|
|
to_display_received['3rd_party'][c_received].add((domain, ))
|
|
|
|
else:
|
|
|
|
to_display_received['not_sent'][c_received].add((domain, ))
|
|
|
|
continue
|
|
|
|
|
|
|
|
for url_node in ct.root_hartree.cookies_sent[c_received]:
|
|
|
|
if is_3rd_party:
|
|
|
|
to_display_received['3rd_party'][c_received].add((url_node.hostname, url_node.hostnode_uuid))
|
|
|
|
else:
|
|
|
|
to_display_received['sent'][c_received].add((url_node.hostname, url_node.hostnode_uuid))
|
|
|
|
to_append['cookies_received'] = to_display_received
|
|
|
|
|
|
|
|
urls.append(to_append)
|
|
|
|
return hostnode, urls
|
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### Hostnode level methods #####
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def hashes_hostnode(tree_uuid: str, node_uuid: str) -> Response:
|
2023-03-17 11:10:01 +01:00
|
|
|
hashes = lookyloo.get_hashes(tree_uuid, hostnode_uuid=node_uuid)
|
|
|
|
return send_file(BytesIO('\n'.join(hashes).encode()),
|
|
|
|
mimetype='test/plain', as_attachment=True, download_name=f'hashes.{node_uuid}.txt')
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/text', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def urls_hostnode(tree_uuid: str, node_uuid: str) -> Response:
|
2020-06-29 11:59:01 +02:00
|
|
|
hostnode = lookyloo.get_hostnode_from_tree(tree_uuid, node_uuid)
|
2020-10-21 12:22:50 +02:00
|
|
|
return send_file(BytesIO('\n'.join(url.name for url in hostnode.urls).encode()),
|
2022-11-02 12:23:41 +01:00
|
|
|
mimetype='test/plain', as_attachment=True, download_name=f'urls.{node_uuid}.txt')
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def hostnode_popup(tree_uuid: str, node_uuid: str) -> str | WerkzeugResponse | Response:
|
2021-09-24 16:16:41 +02:00
|
|
|
try:
|
2024-03-05 20:51:21 +01:00
|
|
|
hostnode, urls = get_hostnode_investigator(tree_uuid, node_uuid)
|
2021-09-24 16:16:41 +02:00
|
|
|
except IndexError:
|
|
|
|
return render_template('error.html', error_message='Sorry, this one is on us. The tree was rebuild, please reload the tree and try again.')
|
2020-05-27 12:38:25 +02:00
|
|
|
|
2020-05-19 17:47:55 +02:00
|
|
|
return render_template('hostname_popup.html',
|
2020-05-20 19:11:15 +02:00
|
|
|
tree_uuid=tree_uuid,
|
2020-08-28 18:03:52 +02:00
|
|
|
hostnode_uuid=node_uuid,
|
2020-09-24 18:46:43 +02:00
|
|
|
hostnode=hostnode,
|
2020-05-19 17:47:55 +02:00
|
|
|
urls=urls,
|
2022-08-23 17:48:36 +02:00
|
|
|
has_pandora=lookyloo.pandora.available,
|
2021-04-26 00:52:08 +02:00
|
|
|
enable_context_by_users=enable_context_by_users,
|
|
|
|
uwhois_available=lookyloo.uwhois.available)
|
2020-05-23 03:37:24 +02:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### Tree level Methods #####
|
2020-05-26 17:45:04 +02:00
|
|
|
|
2021-05-20 00:03:07 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/trigger_modules', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def trigger_modules(tree_uuid: str) -> WerkzeugResponse | str | Response:
|
2021-08-11 15:26:12 +02:00
|
|
|
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
|
|
|
auto_trigger = True if (request.args.get('auto_trigger') and request.args.get('auto_trigger') == 'True') else False
|
2021-05-20 00:03:07 +02:00
|
|
|
lookyloo.trigger_modules(tree_uuid, force=force, auto_trigger=auto_trigger)
|
2020-04-20 16:41:42 +02:00
|
|
|
return redirect(url_for('modules', tree_uuid=tree_uuid))
|
|
|
|
|
|
|
|
|
2022-07-15 18:53:49 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/historical_lookups', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def historical_lookups(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
2022-07-15 18:53:49 +02:00
|
|
|
force = True if (request.args.get('force') and request.args.get('force') == 'True') else False
|
|
|
|
data = lookyloo.get_historical_lookups(tree_uuid, force)
|
|
|
|
return render_template('historical_lookups.html', tree_uuid=tree_uuid,
|
2023-12-12 16:19:01 +01:00
|
|
|
riskiq=data.get('riskiq'),
|
|
|
|
circl_pdns=data.get('circl_pdns'))
|
2022-07-15 18:53:49 +02:00
|
|
|
|
|
|
|
|
2024-07-03 09:43:48 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''}, methods=['GET', 'POST'])
|
2020-10-28 18:49:15 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
|
2024-07-16 14:28:08 +02:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
2024-01-12 17:15:41 +01:00
|
|
|
def categories_capture(tree_uuid: str, query: str) -> str | WerkzeugResponse | Response:
|
2020-11-05 15:38:34 +01:00
|
|
|
if not enable_categorization:
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
2024-07-23 00:21:26 +02:00
|
|
|
matching_categories: dict[str, Any] = {}
|
2024-07-03 09:43:48 +02:00
|
|
|
if 'verification-status' in request.form:
|
2024-07-12 17:50:55 +02:00
|
|
|
status = request.form.get('verification-status')
|
2024-07-03 09:43:48 +02:00
|
|
|
# fast categories
|
|
|
|
categories = []
|
2024-07-12 17:50:55 +02:00
|
|
|
possible_ctgs = {
|
|
|
|
'legitimate': ["parking-page", "default-page", 'institution', 'captcha', 'authentication-form', 'adult-content', 'shop'],
|
2024-07-16 14:28:08 +02:00
|
|
|
'malicious': ['clone', 'phishing', 'captcha', 'authentication-form', 'adult-content', 'shop'],
|
|
|
|
'unclear': ['captcha', 'authentication-form', 'adult-content', 'shop']
|
2024-07-12 17:50:55 +02:00
|
|
|
}
|
|
|
|
if status in possible_ctgs.keys():
|
|
|
|
lookyloo.categorize_capture(tree_uuid, status)
|
|
|
|
for category in possible_ctgs[status]:
|
2024-07-03 09:43:48 +02:00
|
|
|
if category in request.form:
|
|
|
|
categories.append(category)
|
|
|
|
for category in categories:
|
|
|
|
lookyloo.categorize_capture(tree_uuid, category)
|
2024-07-12 17:50:55 +02:00
|
|
|
if 'query' in request.form and request.form.get('query', '').strip():
|
2020-10-28 18:49:15 +01:00
|
|
|
matching_categories = {}
|
|
|
|
t = get_taxonomies()
|
|
|
|
entries = t.search(query)
|
|
|
|
if entries:
|
|
|
|
matching_categories = {e: t.revert_machinetag(e) for e in entries}
|
2024-07-03 09:43:48 +02:00
|
|
|
current_categories = lookyloo.categories_capture(tree_uuid)
|
2020-10-28 18:49:15 +01:00
|
|
|
return render_template('categories_capture.html', tree_uuid=tree_uuid,
|
|
|
|
current_categories=current_categories,
|
|
|
|
matching_categories=matching_categories)
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
|
|
|
|
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
|
2024-07-16 14:28:08 +02:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
2024-01-12 17:15:41 +01:00
|
|
|
def uncategorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
|
2020-11-05 15:38:34 +01:00
|
|
|
if not enable_categorization:
|
|
|
|
return jsonify({'response': 'Categorization not enabled.'})
|
2020-10-28 18:49:15 +01:00
|
|
|
lookyloo.uncategorize_capture(tree_uuid, category)
|
2024-06-24 11:28:27 +02:00
|
|
|
return jsonify({'response': f'{category} successfully removed from {tree_uuid}'})
|
2020-10-28 18:49:15 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
|
|
|
|
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
|
2024-07-16 14:28:08 +02:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
2024-01-12 17:15:41 +01:00
|
|
|
def categorize_capture(tree_uuid: str, category: str) -> str | WerkzeugResponse | Response:
|
2020-11-05 15:38:34 +01:00
|
|
|
if not enable_categorization:
|
|
|
|
return jsonify({'response': 'Categorization not enabled.'})
|
2020-10-28 18:49:15 +01:00
|
|
|
lookyloo.categorize_capture(tree_uuid, category)
|
2024-06-24 11:28:27 +02:00
|
|
|
return jsonify({'response': f'{category} successfully added to {tree_uuid}'})
|
2020-10-28 18:49:15 +01:00
|
|
|
|
|
|
|
|
2020-05-13 17:31:27 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def stats(tree_uuid: str) -> str:
|
2020-06-29 11:59:01 +02:00
|
|
|
stats = lookyloo.get_statistics(tree_uuid)
|
2020-05-13 17:31:27 +02:00
|
|
|
return render_template('statistics.html', uuid=tree_uuid, stats=stats)
|
|
|
|
|
|
|
|
|
2021-06-02 00:31:14 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/misp_lookup', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def web_misp_lookup_view(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
2023-08-28 17:25:55 +02:00
|
|
|
if not lookyloo.misps.available:
|
|
|
|
flash('There are no MISP instances available.', 'error')
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
misps_occurrences = {}
|
2024-01-12 17:15:41 +01:00
|
|
|
for instance_name in lookyloo.misps.keys():
|
2023-08-28 17:25:55 +02:00
|
|
|
if occurrences := lookyloo.get_misp_occurrences(tree_uuid, instance_name=instance_name):
|
|
|
|
misps_occurrences[instance_name] = occurrences
|
|
|
|
return render_template('misp_lookup.html', uuid=tree_uuid,
|
|
|
|
current_misp=lookyloo.misps.default_instance,
|
|
|
|
misps_occurrences=misps_occurrences)
|
2021-06-02 00:31:14 +02:00
|
|
|
|
|
|
|
|
2021-06-07 22:12:23 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/misp_push', methods=['GET', 'POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def web_misp_push_view(tree_uuid: str) -> str | WerkzeugResponse | Response | None:
|
2023-08-28 17:25:55 +02:00
|
|
|
if not lookyloo.misps.available:
|
|
|
|
flash('There are no MISP instances available.', 'error')
|
2021-06-07 22:12:23 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
2023-08-28 17:25:55 +02:00
|
|
|
|
|
|
|
event = lookyloo.misp_export(tree_uuid)
|
|
|
|
if isinstance(event, dict):
|
|
|
|
flash(f'Unable to generate the MISP export: {event}', 'error')
|
2021-06-07 22:12:23 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
2023-08-28 17:25:55 +02:00
|
|
|
if request.method == 'GET':
|
|
|
|
# Initialize settings that will be displayed on the template
|
|
|
|
misp_instances_settings = {}
|
|
|
|
for name, instance in lookyloo.misps.items():
|
|
|
|
# the 1st attribute in the event is the link to lookyloo
|
|
|
|
misp_instances_settings[name] = {
|
|
|
|
'default_tags': instance.default_tags,
|
|
|
|
'fav_tags': [tag.name for tag in instance.get_fav_tags()],
|
|
|
|
'auto_publish': instance.auto_publish
|
|
|
|
}
|
|
|
|
if existing_misp_url := instance.get_existing_event_url(event[-1].attributes[0].value):
|
|
|
|
misp_instances_settings[name]['existing_event'] = existing_misp_url
|
|
|
|
|
|
|
|
cache = lookyloo.capture_cache(tree_uuid)
|
|
|
|
return render_template('misp_push_view.html',
|
|
|
|
current_misp=lookyloo.misps.default_instance,
|
|
|
|
tree_uuid=tree_uuid,
|
|
|
|
event=event[0],
|
|
|
|
misp_instances_settings=misp_instances_settings,
|
|
|
|
has_parent=True if cache and cache.parent else False)
|
|
|
|
|
|
|
|
elif request.method == 'POST':
|
2021-06-07 22:12:23 +02:00
|
|
|
# event is a MISPEvent at this point
|
2023-08-28 17:25:55 +02:00
|
|
|
misp_instance_name = request.form.get('misp_instance_name')
|
|
|
|
if not misp_instance_name or misp_instance_name not in lookyloo.misps:
|
|
|
|
flash(f'MISP instance {misp_instance_name} is unknown.', 'error')
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
misp = lookyloo.misps[misp_instance_name]
|
|
|
|
if not misp.enable_push:
|
|
|
|
flash('Push not enabled in MISP module.', 'error')
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
2021-06-07 22:12:23 +02:00
|
|
|
# Submit the event
|
|
|
|
tags = request.form.getlist('tags')
|
|
|
|
error = False
|
2024-01-12 17:15:41 +01:00
|
|
|
events: list[MISPEvent] = []
|
2021-06-07 22:12:23 +02:00
|
|
|
with_parents = request.form.get('with_parents')
|
|
|
|
if with_parents:
|
|
|
|
exports = lookyloo.misp_export(tree_uuid, True)
|
|
|
|
if isinstance(exports, dict):
|
|
|
|
flash(f'Unable to create event: {exports}', 'error')
|
|
|
|
error = True
|
|
|
|
else:
|
|
|
|
events = exports
|
|
|
|
else:
|
|
|
|
events = event
|
|
|
|
|
|
|
|
if error:
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
|
|
|
for e in events:
|
|
|
|
for tag in tags:
|
|
|
|
e.add_tag(tag)
|
|
|
|
|
|
|
|
# Change the event info field of the last event in the chain
|
|
|
|
events[-1].info = request.form.get('event_info')
|
|
|
|
|
|
|
|
try:
|
2023-08-28 17:25:55 +02:00
|
|
|
new_events = misp.push(events, True if request.form.get('force_push') else False,
|
|
|
|
True if request.form.get('auto_publish') else False)
|
2021-06-07 22:12:23 +02:00
|
|
|
except MISPServerError:
|
2023-08-28 17:25:55 +02:00
|
|
|
flash(f'MISP returned an error, the event(s) might still have been created on {misp.client.root_url}', 'error')
|
2021-06-07 22:12:23 +02:00
|
|
|
else:
|
|
|
|
if isinstance(new_events, dict):
|
|
|
|
flash(f'Unable to create event(s): {new_events}', 'error')
|
|
|
|
else:
|
|
|
|
for e in new_events:
|
2023-08-28 17:25:55 +02:00
|
|
|
flash(f'MISP event {e.id} created on {misp.client.root_url}', 'success')
|
2021-06-07 22:12:23 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
2021-06-07 22:12:23 +02:00
|
|
|
|
|
|
|
|
2020-04-20 16:41:42 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/modules', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def modules(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
2020-06-29 11:59:01 +02:00
|
|
|
modules_responses = lookyloo.get_modules_responses(tree_uuid)
|
2020-04-20 16:41:42 +02:00
|
|
|
if not modules_responses:
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
vt_short_result: dict[str, dict[str, Any]] = {}
|
2020-04-20 16:41:42 +02:00
|
|
|
if 'vt' in modules_responses:
|
|
|
|
# VirusTotal cleanup
|
|
|
|
vt = modules_responses.pop('vt')
|
|
|
|
# Get malicious entries
|
|
|
|
for url, full_report in vt.items():
|
2020-12-03 12:33:35 +01:00
|
|
|
if not full_report:
|
|
|
|
continue
|
2020-04-20 16:41:42 +02:00
|
|
|
vt_short_result[url] = {
|
|
|
|
'permaurl': f'https://www.virustotal.com/gui/url/{full_report["id"]}/detection',
|
|
|
|
'malicious': []
|
|
|
|
}
|
|
|
|
for vendor, result in full_report['attributes']['last_analysis_results'].items():
|
|
|
|
if result['category'] == 'malicious':
|
|
|
|
vt_short_result[url]['malicious'].append((vendor, result['result']))
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
pi_short_result: dict[str, str] = {}
|
2020-06-09 15:06:35 +02:00
|
|
|
if 'pi' in modules_responses:
|
|
|
|
pi = modules_responses.pop('pi')
|
|
|
|
for url, full_report in pi.items():
|
|
|
|
if not full_report:
|
|
|
|
continue
|
|
|
|
pi_short_result[url] = full_report['results'][0]['tag_label']
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
phishtank_short_result: dict[str, dict[str, Any]] = {'urls': {}, 'ips_hits': {}}
|
2021-09-16 16:33:44 +02:00
|
|
|
if 'phishtank' in modules_responses:
|
|
|
|
pt = modules_responses.pop('phishtank')
|
2021-09-23 13:58:40 +02:00
|
|
|
for url, full_report in pt['urls'].items():
|
2021-09-16 16:33:44 +02:00
|
|
|
if not full_report:
|
|
|
|
continue
|
2021-09-23 13:58:40 +02:00
|
|
|
phishtank_short_result['urls'][url] = full_report['phish_detail_url']
|
|
|
|
|
|
|
|
for ip, entries in pt['ips_hits'].items():
|
|
|
|
if not entries:
|
|
|
|
continue
|
|
|
|
phishtank_short_result['ips_hits'] = {ip: []}
|
|
|
|
for full_report in entries:
|
|
|
|
phishtank_short_result['ips_hits'][ip].append((
|
|
|
|
full_report['url'],
|
|
|
|
full_report['phish_detail_url']))
|
2021-09-16 16:33:44 +02:00
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
urlhaus_short_result: dict[str, list[Any]] = {'urls': []}
|
2022-11-30 17:52:12 +01:00
|
|
|
if 'urlhaus' in modules_responses:
|
|
|
|
# TODO: make a short result
|
|
|
|
uh = modules_responses.pop('urlhaus')
|
|
|
|
for url, results in uh['urls'].items():
|
2022-12-05 18:22:47 +01:00
|
|
|
if results:
|
|
|
|
urlhaus_short_result['urls'].append(results)
|
2022-11-30 17:52:12 +01:00
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
urlscan_to_display: dict[str, Any] = {}
|
2021-09-17 10:06:59 +02:00
|
|
|
if 'urlscan' in modules_responses and modules_responses.get('urlscan'):
|
2021-08-10 17:38:47 +02:00
|
|
|
urlscan = modules_responses.pop('urlscan')
|
2021-09-17 09:51:52 +02:00
|
|
|
if 'error' in urlscan['submission']:
|
|
|
|
if 'description' in urlscan['submission']['error']:
|
|
|
|
urlscan_to_display = {'error_message': urlscan['submission']['error']['description']}
|
|
|
|
else:
|
|
|
|
urlscan_to_display = {'error_message': urlscan['submission']['error']}
|
2021-08-11 15:26:12 +02:00
|
|
|
else:
|
2021-09-17 09:51:52 +02:00
|
|
|
urlscan_to_display = {'permaurl': '', 'malicious': False, 'tags': []}
|
|
|
|
if urlscan['submission'] and urlscan['submission'].get('result'):
|
|
|
|
urlscan_to_display['permaurl'] = urlscan['submission']['result']
|
|
|
|
if urlscan['result']:
|
|
|
|
# We have a result available, get the verdicts
|
|
|
|
if (urlscan['result'].get('verdicts')
|
|
|
|
and urlscan['result']['verdicts'].get('overall')):
|
|
|
|
if urlscan['result']['verdicts']['overall'].get('malicious') is not None:
|
|
|
|
urlscan_to_display['malicious'] = urlscan['result']['verdicts']['overall']['malicious']
|
|
|
|
if urlscan['result']['verdicts']['overall'].get('tags'):
|
|
|
|
urlscan_to_display['tags'] = urlscan['result']['verdicts']['overall']['tags']
|
|
|
|
else:
|
|
|
|
# unable to run the query, probably an invalid key
|
|
|
|
pass
|
2021-09-16 16:33:44 +02:00
|
|
|
return render_template('modules.html', uuid=tree_uuid, vt=vt_short_result,
|
|
|
|
pi=pi_short_result, urlscan=urlscan_to_display,
|
2022-11-30 17:52:12 +01:00
|
|
|
phishtank=phishtank_short_result,
|
|
|
|
urlhaus=urlhaus_short_result)
|
2020-04-20 16:41:42 +02:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/redirects', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def redirects(tree_uuid: str) -> Response:
|
2020-10-09 18:05:04 +02:00
|
|
|
cache = lookyloo.capture_cache(tree_uuid)
|
2022-09-26 20:50:42 +02:00
|
|
|
if not cache or not hasattr(cache, 'redirects'):
|
2020-10-09 18:05:04 +02:00
|
|
|
return Response('Not available.', mimetype='text/text')
|
2021-01-14 17:12:16 +01:00
|
|
|
if not cache.redirects:
|
2020-10-09 18:05:04 +02:00
|
|
|
return Response('No redirects.', mimetype='text/text')
|
2021-01-14 17:12:16 +01:00
|
|
|
if cache.url == cache.redirects[0]:
|
|
|
|
to_return = BytesIO('\n'.join(cache.redirects).encode())
|
2020-10-09 18:05:04 +02:00
|
|
|
else:
|
2021-01-14 17:12:16 +01:00
|
|
|
to_return = BytesIO('\n'.join([cache.url] + cache.redirects).encode())
|
2020-10-09 18:05:04 +02:00
|
|
|
return send_file(to_return, mimetype='text/text',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='redirects.txt')
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
2019-01-30 14:30:01 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/image', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def image(tree_uuid: str) -> Response:
|
2021-04-28 18:12:17 +02:00
|
|
|
max_width = request.args.get('width')
|
2023-06-26 15:26:41 +02:00
|
|
|
if max_width and max_width.isdigit():
|
2021-04-28 18:12:17 +02:00
|
|
|
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, width=int(max_width))
|
|
|
|
else:
|
|
|
|
to_return = lookyloo.get_screenshot(tree_uuid)
|
2019-01-30 14:30:01 +01:00
|
|
|
return send_file(to_return, mimetype='image/png',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='image.png')
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2022-08-04 16:58:07 +02:00
|
|
|
|
2022-08-09 18:23:11 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/data', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def data(tree_uuid: str) -> Response:
|
2022-08-09 18:23:11 +02:00
|
|
|
filename, data = lookyloo.get_data(tree_uuid)
|
2022-08-04 16:58:07 +02:00
|
|
|
if len(filename) == 0:
|
2024-01-12 17:15:41 +01:00
|
|
|
return Response('No files.', mimetype='text/text')
|
2022-08-04 16:58:07 +02:00
|
|
|
|
|
|
|
if filetype.guess_mime(data.getvalue()) is None:
|
|
|
|
mime = 'application/octet-stream'
|
|
|
|
else:
|
|
|
|
mime = filetype.guess_mime(data.getvalue())
|
|
|
|
return send_file(data, mimetype=mime,
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name=filename)
|
2022-08-09 18:23:11 +02:00
|
|
|
|
2019-01-30 14:30:01 +01:00
|
|
|
|
2021-01-18 12:30:07 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/thumbnail/', defaults={'width': 64}, methods=['GET'])
|
|
|
|
@app.route('/tree/<string:tree_uuid>/thumbnail/<int:width>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def thumbnail(tree_uuid: str, width: int) -> Response:
|
2021-01-18 12:30:07 +01:00
|
|
|
to_return = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=False, width=width)
|
|
|
|
return send_file(to_return, mimetype='image/png')
|
|
|
|
|
|
|
|
|
2020-05-12 16:53:10 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/html', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def html(tree_uuid: str) -> Response:
|
2020-06-29 11:59:01 +02:00
|
|
|
to_return = lookyloo.get_html(tree_uuid)
|
2020-05-12 16:53:10 +02:00
|
|
|
return send_file(to_return, mimetype='text/html',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='page.html')
|
2020-05-12 16:53:10 +02:00
|
|
|
|
|
|
|
|
2020-05-26 17:45:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/cookies', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def cookies(tree_uuid: str) -> Response:
|
2020-06-29 11:59:01 +02:00
|
|
|
to_return = lookyloo.get_cookies(tree_uuid)
|
2020-05-26 17:45:04 +02:00
|
|
|
return send_file(to_return, mimetype='application/json',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='cookies.json')
|
2020-05-26 17:45:04 +02:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/hashes', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def hashes_tree(tree_uuid: str) -> Response:
|
2020-10-09 18:05:04 +02:00
|
|
|
hashes = lookyloo.get_hashes(tree_uuid)
|
|
|
|
return send_file(BytesIO('\n'.join(hashes).encode()),
|
2022-11-02 12:23:41 +01:00
|
|
|
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
2020-05-12 16:53:10 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/export', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def export(tree_uuid: str) -> Response:
|
2020-06-29 11:59:01 +02:00
|
|
|
to_return = lookyloo.get_capture(tree_uuid)
|
2020-05-12 16:53:10 +02:00
|
|
|
return send_file(to_return, mimetype='application/zip',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='capture.zip')
|
2020-05-12 16:53:10 +02:00
|
|
|
|
|
|
|
|
2021-03-19 17:51:25 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def urls_rendered_page(tree_uuid: str) -> WerkzeugResponse | str | Response:
|
2023-03-16 16:47:37 +01:00
|
|
|
try:
|
|
|
|
urls = lookyloo.get_urls_rendered_page(tree_uuid)
|
|
|
|
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
|
|
|
|
except Exception:
|
|
|
|
flash('Unable to find the rendered node in this capture, cannot get the URLs.', 'error')
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
2021-03-19 17:51:25 +01:00
|
|
|
|
|
|
|
|
2021-11-30 14:59:48 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/hashlookup', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def hashlookup(tree_uuid: str) -> str | WerkzeugResponse | Response:
|
2024-05-20 10:41:15 +02:00
|
|
|
try:
|
|
|
|
merged, total_ressources = lookyloo.merge_hashlookup_tree(tree_uuid)
|
|
|
|
# We only want unique URLs for the template
|
|
|
|
for sha1, entries in merged.items():
|
|
|
|
entries['nodes'] = {node.name for node in entries['nodes']}
|
|
|
|
except Exception: # error or module not enabled
|
2024-05-21 14:54:31 +02:00
|
|
|
merged = {}
|
|
|
|
total_ressources = 0
|
2021-11-30 15:43:42 +01:00
|
|
|
return render_template('hashlookup.html', base_tree_uuid=tree_uuid, merged=merged, total_ressources=total_ressources)
|
2021-11-30 14:59:48 +01:00
|
|
|
|
|
|
|
|
2021-03-19 17:51:25 +01:00
|
|
|
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def bulk_captures(base_tree_uuid: str) -> WerkzeugResponse | str | Response:
|
2021-05-18 23:58:56 +02:00
|
|
|
if flask_login.current_user.is_authenticated:
|
|
|
|
user = flask_login.current_user.get_id()
|
|
|
|
else:
|
|
|
|
user = src_request_ip(request)
|
2021-03-19 17:51:25 +01:00
|
|
|
selected_urls = request.form.getlist('url')
|
|
|
|
urls = lookyloo.get_urls_rendered_page(base_tree_uuid)
|
2021-09-08 12:24:00 +02:00
|
|
|
cache = lookyloo.capture_cache(base_tree_uuid)
|
2022-12-08 11:57:45 +01:00
|
|
|
if not cache:
|
|
|
|
flash('Unable to find capture {base_tree_uuid} in cache.', 'error')
|
|
|
|
return redirect(url_for('tree', tree_uuid=base_tree_uuid))
|
2021-03-19 22:29:13 +01:00
|
|
|
cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
|
2024-08-16 14:33:57 +02:00
|
|
|
original_capture_settings = lookyloo.get_capture_settings(base_tree_uuid)
|
2021-03-19 17:51:25 +01:00
|
|
|
bulk_captures = []
|
|
|
|
for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
|
2024-08-16 14:33:57 +02:00
|
|
|
if original_capture_settings:
|
|
|
|
capture = original_capture_settings.model_copy(
|
|
|
|
update={
|
|
|
|
'url': url,
|
|
|
|
'cookies': cookies,
|
|
|
|
'referer': cache.redirects[-1] if cache.redirects else cache.url,
|
|
|
|
'user_agent': cache.user_agent,
|
|
|
|
'parent': base_tree_uuid,
|
|
|
|
'listing': False if cache and cache.no_index else True
|
|
|
|
})
|
|
|
|
else:
|
|
|
|
_capture: dict[str, Any] = {
|
|
|
|
'url': url,
|
|
|
|
'cookies': cookies,
|
|
|
|
'referer': cache.redirects[-1] if cache.redirects else cache.url,
|
|
|
|
'user_agent': cache.user_agent,
|
|
|
|
'parent': base_tree_uuid,
|
|
|
|
'listing': False if cache and cache.no_index else True
|
|
|
|
}
|
|
|
|
capture = CaptureSettings(**_capture)
|
|
|
|
new_capture_uuid = lookyloo.enqueue_capture(capture, source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
2021-03-19 17:51:25 +01:00
|
|
|
bulk_captures.append((new_capture_uuid, url))
|
|
|
|
|
|
|
|
return render_template('bulk_captures.html', uuid=base_tree_uuid, bulk_captures=bulk_captures)
|
|
|
|
|
|
|
|
|
2020-08-10 12:35:16 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def hide_capture(tree_uuid: str) -> WerkzeugResponse:
|
2020-08-10 12:35:16 +02:00
|
|
|
lookyloo.hide_capture(tree_uuid)
|
2023-03-29 15:15:17 +02:00
|
|
|
flash('Successfully hidden.', 'success')
|
2020-08-10 12:35:16 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
|
|
|
|
2024-04-02 11:37:49 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/remove', methods=['GET'])
|
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def remove_capture(tree_uuid: str) -> WerkzeugResponse:
|
|
|
|
lookyloo.remove_capture(tree_uuid)
|
|
|
|
flash(f'{tree_uuid} successfully removed.', 'success')
|
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
|
|
|
2021-06-08 00:37:11 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/rebuild')
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def rebuild_tree(tree_uuid: str) -> WerkzeugResponse:
|
2021-06-08 00:37:11 +02:00
|
|
|
try:
|
|
|
|
lookyloo.remove_pickle(tree_uuid)
|
2023-03-29 15:15:17 +02:00
|
|
|
flash('Successfully rebuilt.', 'success')
|
2021-06-08 00:37:11 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
except Exception:
|
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/cache', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def cache_tree(tree_uuid: str) -> WerkzeugResponse:
|
2021-01-13 15:35:29 +01:00
|
|
|
lookyloo.capture_cache(tree_uuid)
|
2020-03-26 01:56:24 +01:00
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
|
|
|
2023-02-23 18:37:40 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/monitor', methods=['POST', 'GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def monitor(tree_uuid: str) -> WerkzeugResponse:
|
2023-02-23 18:37:40 +01:00
|
|
|
if not lookyloo.monitoring_enabled:
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
if request.form.get('name') or not request.form.get('confirm'):
|
|
|
|
# got a bot.
|
|
|
|
logging.info(f'{src_request_ip(request)} is a bot - {request.headers.get("User-Agent")}.')
|
|
|
|
return redirect('https://www.youtube.com/watch?v=iwGFalTRHDA')
|
|
|
|
|
|
|
|
collection: str = request.form['collection'] if request.form.get('collection') else ''
|
2023-05-11 16:04:06 +02:00
|
|
|
notification_email: str = request.form['notification'] if request.form.get('notification') else ''
|
2023-02-23 18:37:40 +01:00
|
|
|
frequency: str = request.form['frequency'] if request.form.get('frequency') else 'daily'
|
2024-01-12 17:15:41 +01:00
|
|
|
expire_at: float | None = datetime.fromisoformat(request.form['expire_at']).timestamp() if request.form.get('expire_at') else None
|
2023-02-23 18:37:40 +01:00
|
|
|
cache = lookyloo.capture_cache(tree_uuid)
|
2023-02-23 18:47:16 +01:00
|
|
|
if cache:
|
2023-02-28 17:30:12 +01:00
|
|
|
monitoring_uuid = lookyloo.monitoring.monitor({'url': cache.url, 'user_agent': cache.user_agent, 'listing': False},
|
2023-05-11 16:04:06 +02:00
|
|
|
frequency=frequency, collection=collection, expire_at=expire_at,
|
|
|
|
notification={'email': notification_email})
|
2023-02-27 16:01:46 +01:00
|
|
|
flash(f"Sent to monitoring ({monitoring_uuid}).", 'success')
|
|
|
|
if collection:
|
|
|
|
flash(f"See monitored captures in the same collection here: {lookyloo.monitoring.root_url}/monitored/{collection}.", 'success')
|
|
|
|
else:
|
2023-05-11 16:04:06 +02:00
|
|
|
flash(f"Comparison available as soon as we have more than one capture: {lookyloo.monitoring.root_url}/changes_tracking/{monitoring_uuid}.", 'success')
|
2023-02-23 18:47:16 +01:00
|
|
|
else:
|
|
|
|
flash(f"Unable to send to monitoring, uuid {tree_uuid} not found in cache.", 'error')
|
2023-02-23 18:37:40 +01:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
|
|
|
|
2020-05-11 19:58:46 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/send_mail', methods=['POST', 'GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def send_mail(tree_uuid: str) -> WerkzeugResponse:
|
2020-11-05 15:38:34 +01:00
|
|
|
if not enable_mail_notification:
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
2021-05-26 21:07:47 +02:00
|
|
|
if request.form.get('name') or not request.form.get('confirm'):
|
2021-05-25 22:20:24 +02:00
|
|
|
# got a bot.
|
|
|
|
logging.info(f'{src_request_ip(request)} is a bot - {request.headers.get("User-Agent")}.')
|
|
|
|
return redirect('https://www.youtube.com/watch?v=iwGFalTRHDA')
|
2021-05-26 21:07:47 +02:00
|
|
|
|
2021-05-18 03:29:46 +02:00
|
|
|
email: str = request.form['email'] if request.form.get('email') else ''
|
2020-08-20 15:05:27 +02:00
|
|
|
if '@' not in email:
|
|
|
|
# skip clearly incorrect emails
|
|
|
|
email = ''
|
2021-05-18 03:29:46 +02:00
|
|
|
comment: str = request.form['comment'] if request.form.get('comment') else ''
|
2020-05-27 15:15:37 +02:00
|
|
|
lookyloo.send_mail(tree_uuid, email, comment)
|
2021-02-04 19:51:43 +01:00
|
|
|
flash("Email notification sent", 'success')
|
2020-05-11 19:01:02 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
|
|
|
|
2024-08-29 13:32:38 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/trigger_indexing', methods=['POST', 'GET'])
|
|
|
|
def trigger_indexing(tree_uuid: str) -> WerkzeugResponse:
|
|
|
|
cache = lookyloo.capture_cache(tree_uuid)
|
|
|
|
if cache and hasattr(cache, 'capture_dir'):
|
|
|
|
get_indexing(flask_login.current_user).index_capture(tree_uuid, cache.capture_dir)
|
|
|
|
return redirect(url_for('tree', tree_uuid=tree_uuid))
|
|
|
|
|
|
|
|
|
2019-01-30 14:30:01 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>', methods=['GET'])
|
2021-01-20 01:28:54 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/<string:node_uuid>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def tree(tree_uuid: str, node_uuid: str | None=None) -> Response | str | WerkzeugResponse:
|
2020-03-17 15:27:04 +01:00
|
|
|
if tree_uuid == 'False':
|
2022-07-27 14:36:56 +02:00
|
|
|
flash("Unable to process your request.", 'warning')
|
2020-03-17 15:27:04 +01:00
|
|
|
return redirect(url_for('index'))
|
2024-05-17 17:29:11 +02:00
|
|
|
try:
|
|
|
|
cache = lookyloo.capture_cache(tree_uuid, force_update=True)
|
|
|
|
if not cache:
|
|
|
|
status = lookyloo.get_capture_status(tree_uuid)
|
|
|
|
if status == CaptureStatus.UNKNOWN:
|
|
|
|
flash(f'Unable to find this UUID ({tree_uuid}).', 'warning')
|
|
|
|
return index_generic()
|
|
|
|
elif status == CaptureStatus.QUEUED:
|
|
|
|
message = "The capture is queued, but didn't start yet."
|
|
|
|
elif status in [CaptureStatus.ONGOING, CaptureStatus.DONE]:
|
|
|
|
# If CaptureStatus.DONE, the capture finished between the query to the cache and
|
|
|
|
# the request for a status. Give it an extra few seconds.
|
|
|
|
message = "The capture is ongoing."
|
|
|
|
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
|
|
|
|
except LacusUnreachable:
|
|
|
|
message = "Unable to connect to the Lacus backend, the capture will start as soon as the administrator wakes up."
|
2021-04-01 18:51:42 +02:00
|
|
|
return render_template('tree_wait.html', message=message, tree_uuid=tree_uuid)
|
2020-03-17 14:17:18 +01:00
|
|
|
|
2019-02-18 13:52:48 +01:00
|
|
|
try:
|
2021-01-12 17:22:51 +01:00
|
|
|
ct = lookyloo.get_crawled_tree(tree_uuid)
|
2020-12-09 19:11:19 +01:00
|
|
|
b64_thumbnail = lookyloo.get_screenshot_thumbnail(tree_uuid, for_datauri=True)
|
2021-01-18 14:19:19 +01:00
|
|
|
screenshot_size = lookyloo.get_screenshot(tree_uuid).getbuffer().nbytes
|
2021-01-12 17:22:51 +01:00
|
|
|
meta = lookyloo.get_meta(tree_uuid)
|
2023-05-15 16:33:43 +02:00
|
|
|
capture_settings = lookyloo.get_capture_settings(tree_uuid)
|
2023-08-09 16:50:27 +02:00
|
|
|
# Get a potential favicon, if it exists
|
2024-02-22 16:14:26 +01:00
|
|
|
mime_favicon, b64_potential_favicon = lookyloo.get_potential_favicons(tree_uuid, all_favicons=False, for_datauri=True)
|
2021-01-20 01:28:54 +01:00
|
|
|
hostnode_to_highlight = None
|
|
|
|
if node_uuid:
|
|
|
|
try:
|
|
|
|
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
|
|
|
if urlnode:
|
|
|
|
hostnode_to_highlight = urlnode.hostnode_uuid
|
|
|
|
except IndexError:
|
|
|
|
# node_uuid is not a urlnode, trying a hostnode
|
|
|
|
try:
|
|
|
|
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
|
|
|
if hostnode:
|
|
|
|
hostnode_to_highlight = hostnode.uuid
|
|
|
|
except IndexError as e:
|
2022-12-27 01:58:47 +01:00
|
|
|
logging.info(f'Invalid uuid ({e}): {node_uuid}')
|
2022-09-26 14:58:30 +02:00
|
|
|
if cache.error:
|
|
|
|
flash(cache.error, 'warning')
|
2023-02-27 16:01:46 +01:00
|
|
|
|
2023-03-02 20:51:14 +01:00
|
|
|
if lookyloo.monitoring_enabled:
|
|
|
|
try:
|
|
|
|
monitoring_collections = lookyloo.monitoring.collections()
|
|
|
|
except Exception as e:
|
|
|
|
monitoring_collections = []
|
|
|
|
flash(f'Unable to get existing connections from the monitoring : {e}', 'warning')
|
|
|
|
|
2024-08-29 13:32:38 +02:00
|
|
|
# Check if the capture has been indexed yet. Print a warning if not.
|
|
|
|
capture_indexed = all(get_indexing(flask_login.current_user).capture_indexed(tree_uuid))
|
|
|
|
if not capture_indexed:
|
|
|
|
flash('The capture has not been indexed yet. Some correlations will be missing.', 'warning')
|
|
|
|
|
2021-01-12 17:22:51 +01:00
|
|
|
return render_template('tree.html', tree_json=ct.to_json(),
|
2022-12-07 14:32:13 +01:00
|
|
|
info=cache,
|
2021-01-17 12:41:01 +01:00
|
|
|
tree_uuid=tree_uuid, public_domain=lookyloo.public_domain,
|
2023-07-27 15:21:06 +02:00
|
|
|
screenshot_thumbnail=b64_thumbnail, page_title=cache.title if hasattr(cache, 'title') else '',
|
2023-08-09 16:50:27 +02:00
|
|
|
favicon=b64_potential_favicon,
|
2024-02-22 16:14:26 +01:00
|
|
|
mime_favicon=mime_favicon,
|
2021-01-18 14:19:19 +01:00
|
|
|
screenshot_size=screenshot_size,
|
2020-06-29 11:59:01 +02:00
|
|
|
meta=meta, enable_mail_notification=enable_mail_notification,
|
2023-02-23 18:37:40 +01:00
|
|
|
enable_monitoring=lookyloo.monitoring_enabled,
|
2024-05-02 13:25:27 +02:00
|
|
|
ignore_sri=ignore_sri,
|
2023-02-28 17:30:12 +01:00
|
|
|
monitoring_settings=lookyloo.monitoring_settings if lookyloo.monitoring_enabled else None,
|
2023-03-02 20:51:14 +01:00
|
|
|
monitoring_collections=monitoring_collections if lookyloo.monitoring_enabled else [],
|
2020-09-01 18:37:49 +02:00
|
|
|
enable_context_by_users=enable_context_by_users,
|
2020-10-28 18:49:15 +01:00
|
|
|
enable_categorization=enable_categorization,
|
2020-11-29 23:56:42 +01:00
|
|
|
enable_bookmark=enable_bookmark,
|
2023-08-28 17:25:55 +02:00
|
|
|
misp_push=lookyloo.misps.available and lookyloo.misps.default_misp.enable_push,
|
|
|
|
misp_lookup=lookyloo.misps.available and lookyloo.misps.default_misp.enable_lookup,
|
2021-01-20 01:28:54 +01:00
|
|
|
blur_screenshot=blur_screenshot, urlnode_uuid=hostnode_to_highlight,
|
2020-11-22 23:23:42 +01:00
|
|
|
auto_trigger_modules=auto_trigger_modules,
|
2021-05-26 21:07:47 +02:00
|
|
|
confirm_message=confirm_message if confirm_message else 'Tick to confirm.',
|
2021-05-28 00:23:04 +02:00
|
|
|
parent_uuid=cache.parent,
|
2023-05-15 16:33:43 +02:00
|
|
|
has_redirects=True if cache.redirects else False,
|
2024-08-29 13:32:38 +02:00
|
|
|
capture_indexed=capture_indexed,
|
2024-07-19 16:12:24 +02:00
|
|
|
capture_settings=capture_settings.model_dump(exclude_none=True) if capture_settings else {})
|
2020-06-22 19:17:25 +02:00
|
|
|
|
2022-09-26 14:58:30 +02:00
|
|
|
except NoValidHarFile:
|
|
|
|
flash(f'Unable to build a tree for {tree_uuid}: {cache.error}.', 'warning')
|
|
|
|
return index_generic()
|
2021-09-24 12:02:28 +02:00
|
|
|
finally:
|
|
|
|
lookyloo.update_tree_cache_info(os.getpid(), 'website')
|
2019-01-30 14:30:01 +01:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/mark_as_legitimate', methods=['POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def mark_as_legitimate(tree_uuid: str) -> Response:
|
2020-10-09 18:05:04 +02:00
|
|
|
if request.data:
|
2024-01-12 17:15:41 +01:00
|
|
|
legitimate_entries: dict[str, Any] = request.get_json(force=True)
|
2020-10-09 18:05:04 +02:00
|
|
|
lookyloo.add_to_legitimate(tree_uuid, **legitimate_entries)
|
|
|
|
else:
|
|
|
|
lookyloo.add_to_legitimate(tree_uuid)
|
|
|
|
return jsonify({'message': 'Legitimate entry added.'})
|
|
|
|
|
|
|
|
|
2024-03-14 00:56:28 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/identifiers', methods=['GET'])
|
|
|
|
def tree_identifiers(tree_uuid: str) -> str:
|
|
|
|
to_return: list[tuple[int, str, str]] = []
|
|
|
|
|
|
|
|
for id_type, identifiers in get_indexing(flask_login.current_user).get_identifiers_capture(tree_uuid).items():
|
|
|
|
for identifier in identifiers:
|
2024-10-31 13:35:36 +01:00
|
|
|
nb_captures = get_indexing(flask_login.current_user).get_captures_identifier_count(id_type, identifier)
|
2024-03-14 00:56:28 +01:00
|
|
|
to_return.append((nb_captures, id_type, identifier))
|
|
|
|
return render_template('tree_identifiers.html', tree_uuid=tree_uuid, identifiers=to_return)
|
|
|
|
|
|
|
|
|
2024-02-20 01:13:20 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/favicons', methods=['GET'])
|
|
|
|
def tree_favicons(tree_uuid: str) -> str:
|
|
|
|
favicons = []
|
|
|
|
favicons_zip = lookyloo.get_potential_favicons(tree_uuid, all_favicons=True, for_datauri=False)
|
|
|
|
with ZipFile(favicons_zip, 'r') as myzip:
|
|
|
|
for name in myzip.namelist():
|
|
|
|
if not name.endswith('.ico'):
|
|
|
|
continue
|
|
|
|
favicon = myzip.read(name)
|
|
|
|
if not favicon:
|
|
|
|
continue
|
2024-02-26 17:07:23 +01:00
|
|
|
mimetype = from_string(favicon, mime=True)
|
2024-02-20 01:13:20 +01:00
|
|
|
favicon_sha512 = hashlib.sha512(favicon).hexdigest()
|
2024-03-05 20:51:21 +01:00
|
|
|
frequency = get_indexing(flask_login.current_user).favicon_frequency(favicon_sha512)
|
|
|
|
number_captures = get_indexing(flask_login.current_user).favicon_number_captures(favicon_sha512)
|
2024-02-20 01:13:20 +01:00
|
|
|
b64_favicon = base64.b64encode(favicon).decode()
|
2024-02-26 19:09:48 +01:00
|
|
|
mmh3_shodan = lookyloo.compute_mmh3_shodan(favicon)
|
|
|
|
favicons.append((favicon_sha512, frequency, number_captures, mimetype, b64_favicon, mmh3_shodan))
|
2024-02-20 01:13:20 +01:00
|
|
|
return render_template('tree_favicons.html', tree_uuid=tree_uuid, favicons=favicons)
|
|
|
|
|
|
|
|
|
2024-04-11 17:46:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/hashes_types', methods=['GET'])
|
|
|
|
def tree_capture_hashes_types(tree_uuid: str) -> str:
|
|
|
|
to_return: list[tuple[int, str, str]] = []
|
|
|
|
|
|
|
|
for hash_type, h in get_indexing(flask_login.current_user).get_hashes_types_capture(tree_uuid).items():
|
2024-10-28 15:45:26 +01:00
|
|
|
nb_captures = get_indexing(flask_login.current_user).get_captures_hash_type_count(hash_type, h)
|
2024-04-11 17:46:04 +02:00
|
|
|
to_return.append((nb_captures, hash_type, h))
|
|
|
|
return render_template('tree_hashes_types.html', tree_uuid=tree_uuid, hashes=to_return)
|
|
|
|
|
|
|
|
|
2022-08-01 17:51:43 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def tree_body_hashes(tree_uuid: str) -> str:
|
2024-03-05 20:51:21 +01:00
|
|
|
body_hashes = get_all_body_hashes(tree_uuid)
|
2022-08-01 17:51:43 +02:00
|
|
|
return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes)
|
|
|
|
|
|
|
|
|
2024-05-14 16:08:38 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/hostnames', methods=['GET'])
|
|
|
|
def tree_hostnames(tree_uuid: str) -> str:
|
|
|
|
hostnames = get_all_hostnames(tree_uuid)
|
|
|
|
return render_template('tree_hostnames.html', tree_uuid=tree_uuid, hostnames=hostnames)
|
|
|
|
|
|
|
|
|
2024-05-14 18:52:26 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/urls', methods=['GET'])
|
|
|
|
def tree_urls(tree_uuid: str) -> str:
|
|
|
|
urls = get_all_urls(tree_uuid)
|
|
|
|
return render_template('tree_urls.html', tree_uuid=tree_uuid, urls=urls)
|
|
|
|
|
|
|
|
|
2023-04-26 15:40:36 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def pandora_submit(tree_uuid: str) -> dict[str, Any] | Response:
|
2023-04-26 15:40:36 +02:00
|
|
|
node_uuid = None
|
|
|
|
if request.method == 'POST':
|
|
|
|
input_json = request.get_json(force=True)
|
|
|
|
node_uuid = input_json.get('node_uuid')
|
|
|
|
h_request = input_json.get('ressource_hash')
|
|
|
|
if node_uuid:
|
|
|
|
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_request)
|
|
|
|
if ressource:
|
|
|
|
filename, content, mimetype = ressource
|
|
|
|
elif h_request:
|
|
|
|
return {'error': 'Unable to find resource {h_request} in node {node_uuid} of tree {tree_uuid}'}
|
|
|
|
else:
|
|
|
|
return {'error': 'Unable to find resource in node {node_uuid} of tree {tree_uuid}'}
|
|
|
|
else:
|
|
|
|
filename, content = lookyloo.get_data(tree_uuid)
|
|
|
|
|
2022-08-23 17:48:36 +02:00
|
|
|
response = lookyloo.pandora.submit_file(content, filename)
|
|
|
|
return jsonify(response)
|
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### helpers #####
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def index_generic(show_hidden: bool=False, show_error: bool=True, category: str | None=None) -> str:
|
2023-07-30 16:59:41 +02:00
|
|
|
"""This method is used to generate the index page. It is possible that some of the captures
|
|
|
|
do not have their pickle yet.
|
|
|
|
|
|
|
|
We must assume that calling cached.tree will fail, and handle it gracefully.
|
|
|
|
"""
|
2019-01-30 14:30:01 +01:00
|
|
|
titles = []
|
2024-01-12 17:15:41 +01:00
|
|
|
cut_time: datetime | None = None
|
2020-04-22 12:03:10 +02:00
|
|
|
if time_delta_on_index:
|
|
|
|
# We want to filter the captures on the index
|
2023-11-22 14:50:43 +01:00
|
|
|
cut_time = (datetime.now() - timedelta(**time_delta_on_index))
|
|
|
|
cut_time_with_tz = cut_time.replace(tzinfo=timezone.utc)
|
2020-11-09 16:02:54 +01:00
|
|
|
|
2024-10-09 18:29:15 +02:00
|
|
|
lookyloo.update_cache_index()
|
2022-12-07 21:05:27 +01:00
|
|
|
for cached in lookyloo.sorted_capture_cache(index_cut_time=cut_time):
|
2023-11-22 14:50:43 +01:00
|
|
|
if cut_time and cached.timestamp < cut_time_with_tz:
|
2021-01-18 13:26:02 +01:00
|
|
|
continue
|
2021-05-19 19:18:43 +02:00
|
|
|
|
2024-07-23 00:21:26 +02:00
|
|
|
if category and not get_indexing(flask_login.current_user).capture_in_category(cached.uuid, category):
|
|
|
|
continue
|
2021-05-19 19:18:43 +02:00
|
|
|
|
2020-04-23 00:12:10 +02:00
|
|
|
if show_hidden:
|
2021-05-19 19:18:43 +02:00
|
|
|
# Only display the hidden ones
|
2021-01-14 17:12:16 +01:00
|
|
|
if not cached.no_index:
|
2020-04-23 00:12:10 +02:00
|
|
|
continue
|
2021-01-14 17:12:16 +01:00
|
|
|
elif cached.no_index:
|
2019-01-30 14:30:01 +01:00
|
|
|
continue
|
2020-11-09 16:02:54 +01:00
|
|
|
|
2021-05-19 19:18:43 +02:00
|
|
|
if not show_error and cached.error:
|
|
|
|
continue
|
|
|
|
|
2021-01-14 17:12:16 +01:00
|
|
|
titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
|
2023-07-28 14:05:28 +02:00
|
|
|
cached.redirects))
|
2020-02-03 18:30:41 +01:00
|
|
|
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
2022-07-12 10:24:10 +02:00
|
|
|
return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain,
|
2024-06-17 19:04:08 +02:00
|
|
|
show_hidden=show_hidden,
|
2022-07-12 10:24:10 +02:00
|
|
|
show_project_page=get_config('generic', 'show_project_page'),
|
2024-06-28 14:28:06 +02:00
|
|
|
enable_takedown_form=get_config('generic', 'enable_takedown_form'),
|
2022-08-31 16:33:13 +02:00
|
|
|
version=pkg_version)
|
2020-04-23 00:12:10 +02:00
|
|
|
|
|
|
|
|
2024-01-12 17:15:41 +01:00
|
|
|
def get_index_params(request: Request) -> tuple[bool, str]:
|
2021-05-19 19:18:43 +02:00
|
|
|
show_error: bool = True
|
|
|
|
category: str = ''
|
|
|
|
if hide_captures_with_error:
|
2021-08-11 15:26:12 +02:00
|
|
|
show_error = True if (request.args.get('show_error') and request.args.get('show_error') == 'True') else False
|
2021-05-19 19:18:43 +02:00
|
|
|
|
|
|
|
if enable_categorization:
|
|
|
|
category = request.args['category'] if request.args.get('category') else ''
|
|
|
|
return show_error, category
|
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### Index level methods #####
|
|
|
|
|
2024-05-21 16:45:50 +02:00
|
|
|
@app.route('/index', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def index() -> str:
|
2021-05-19 19:18:43 +02:00
|
|
|
show_error, category = get_index_params(request)
|
2024-07-23 00:21:26 +02:00
|
|
|
return index_generic(show_error=show_error, category=category)
|
2020-04-23 00:12:10 +02:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/hidden', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def index_hidden() -> str:
|
2021-05-19 19:18:43 +02:00
|
|
|
show_error, category = get_index_params(request)
|
|
|
|
return index_generic(show_hidden=True, show_error=show_error, category=category)
|
2020-11-09 16:02:54 +01:00
|
|
|
|
|
|
|
|
2020-06-13 02:49:07 +02:00
|
|
|
@app.route('/cookies', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def cookies_lookup() -> str:
|
2024-10-07 20:45:08 +02:00
|
|
|
cookies_names = []
|
|
|
|
for name in get_indexing(flask_login.current_user).cookies_names:
|
|
|
|
cookies_names.append((name, get_indexing(flask_login.current_user).get_captures_cookie_name_count(name)))
|
2020-06-13 02:49:07 +02:00
|
|
|
return render_template('cookies.html', cookies_names=cookies_names)
|
|
|
|
|
|
|
|
|
2023-07-21 15:48:20 +02:00
|
|
|
@app.route('/hhhashes', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def hhhashes_lookup() -> str:
|
2024-10-07 17:27:44 +02:00
|
|
|
hhhashes = []
|
|
|
|
for hhh in get_indexing(flask_login.current_user).http_headers_hashes:
|
|
|
|
hhhashes.append((hhh, get_indexing(flask_login.current_user).get_captures_hhhash_count(hhh)))
|
2023-07-21 15:48:20 +02:00
|
|
|
return render_template('hhhashes.html', hhhashes=hhhashes)
|
|
|
|
|
|
|
|
|
2024-02-19 16:15:52 +01:00
|
|
|
@app.route('/favicons', methods=['GET'])
|
|
|
|
def favicons_lookup() -> str:
|
|
|
|
favicons = []
|
2024-03-05 20:51:21 +01:00
|
|
|
for sha512, freq in get_indexing(flask_login.current_user).favicons:
|
|
|
|
favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
|
2024-02-19 16:15:52 +01:00
|
|
|
if not favicon:
|
|
|
|
continue
|
|
|
|
favicon_b64 = base64.b64encode(favicon).decode()
|
2024-03-05 20:51:21 +01:00
|
|
|
nb_captures = get_indexing(flask_login.current_user).favicon_number_captures(sha512)
|
2024-02-19 16:15:52 +01:00
|
|
|
favicons.append((sha512, freq, nb_captures, favicon_b64))
|
|
|
|
return render_template('favicons.html', favicons=favicons)
|
|
|
|
|
|
|
|
|
2020-09-03 14:39:38 +02:00
|
|
|
@app.route('/ressources', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def ressources() -> str:
|
2020-09-03 14:39:38 +02:00
|
|
|
ressources = []
|
2024-10-07 13:15:15 +02:00
|
|
|
for h in get_indexing(flask_login.current_user).ressources:
|
|
|
|
freq = get_indexing(flask_login.current_user).get_captures_body_hash_count(h)
|
2020-09-03 14:39:38 +02:00
|
|
|
context = lookyloo.context.find_known_content(h)
|
2024-10-07 13:15:15 +02:00
|
|
|
# Only get the recent captures
|
|
|
|
for capture_uuid, capture_ts in get_indexing(flask_login.current_user).get_captures_body_hash(h):
|
|
|
|
url_nodes = get_indexing(flask_login.current_user).get_capture_body_hash_nodes(capture_uuid, h)
|
|
|
|
url_node = url_nodes.pop()
|
|
|
|
ressource = lookyloo.get_ressource(capture_uuid, url_node, h)
|
|
|
|
if not ressource:
|
|
|
|
continue
|
|
|
|
ressources.append((h, freq, context.get(h), capture_uuid, url_node, ressource[0], ressource[2]))
|
2020-09-03 14:39:38 +02:00
|
|
|
return render_template('ressources.html', ressources=ressources)
|
|
|
|
|
|
|
|
|
2020-11-09 16:02:54 +01:00
|
|
|
@app.route('/categories', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def categories() -> str:
|
2024-03-05 20:51:21 +01:00
|
|
|
return render_template('categories.html', categories=get_indexing(flask_login.current_user).categories)
|
2020-11-09 16:02:54 +01:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/rebuild_all')
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def rebuild_all() -> WerkzeugResponse:
|
2020-10-09 18:05:04 +02:00
|
|
|
lookyloo.rebuild_all()
|
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
|
|
|
|
|
|
@app.route('/rebuild_cache')
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def rebuild_cache() -> WerkzeugResponse:
|
2020-10-09 18:05:04 +02:00
|
|
|
lookyloo.rebuild_cache()
|
|
|
|
return redirect(url_for('index'))
|
|
|
|
|
|
|
|
|
2021-03-18 00:40:14 +01:00
|
|
|
@app.route('/search', methods=['GET', 'POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def search() -> str | Response | WerkzeugResponse:
|
2021-03-18 00:40:14 +01:00
|
|
|
if request.form.get('url'):
|
2021-05-18 03:29:46 +02:00
|
|
|
quoted_url: str = quote_plus(request.form['url'])
|
2021-03-18 00:50:42 +01:00
|
|
|
return redirect(url_for('url_details', url=quoted_url))
|
2021-03-18 00:40:14 +01:00
|
|
|
if request.form.get('hostname'):
|
|
|
|
return redirect(url_for('hostname_details', hostname=request.form.get('hostname')))
|
|
|
|
if request.form.get('ressource'):
|
|
|
|
return redirect(url_for('body_hash_details', body_hash=request.form.get('ressource')))
|
|
|
|
if request.form.get('cookie'):
|
|
|
|
return redirect(url_for('cookies_name_detail', cookie_name=request.form.get('cookie')))
|
|
|
|
return render_template('search.html')
|
|
|
|
|
|
|
|
|
2024-07-19 16:12:24 +02:00
|
|
|
def _prepare_capture_template(user_ua: str | None, predefined_settings: dict[str, Any] | None=None, *,
|
|
|
|
user_config: dict[str, Any] | None=None) -> str:
|
2022-06-09 18:57:40 +02:00
|
|
|
return render_template('capture.html', user_agents=user_agents.user_agents,
|
|
|
|
default=user_agents.default,
|
2022-06-11 14:49:58 +02:00
|
|
|
personal_ua=user_ua,
|
2021-10-26 16:47:02 +02:00
|
|
|
default_public=get_config('generic', 'default_public'),
|
2022-08-22 17:34:00 +02:00
|
|
|
devices=lookyloo.get_playwright_devices(),
|
2024-06-25 12:41:18 +02:00
|
|
|
predefined_settings=predefined_settings if predefined_settings else {},
|
2024-06-17 19:04:08 +02:00
|
|
|
user_config=user_config,
|
2024-06-25 11:37:19 +02:00
|
|
|
show_project_page=get_config('generic', 'show_project_page'),
|
|
|
|
version=pkg_version,
|
2023-06-13 11:53:27 +02:00
|
|
|
has_global_proxy=True if lookyloo.global_proxy else False)
|
2021-09-03 16:57:37 +02:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/recapture/<string:tree_uuid>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
|
2021-09-03 16:57:37 +02:00
|
|
|
cache = lookyloo.capture_cache(tree_uuid)
|
2024-06-25 12:41:18 +02:00
|
|
|
if cache and hasattr(cache, 'capture_dir'):
|
2024-07-19 16:12:24 +02:00
|
|
|
if capture_settings := lookyloo.get_capture_settings(tree_uuid):
|
|
|
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
|
|
|
|
predefined_settings=capture_settings.model_dump(exclude_none=True))
|
2021-09-03 17:05:26 +02:00
|
|
|
flash(f'Unable to find the capture {tree_uuid} in the cache.', 'error')
|
|
|
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
2021-09-03 16:57:37 +02:00
|
|
|
|
|
|
|
|
2024-02-21 14:36:19 +01:00
|
|
|
@app.route('/ressource_by_hash/<string:sha512>', methods=['GET'])
|
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def ressource_by_hash(sha512: str) -> Response:
|
2024-10-16 15:25:12 +02:00
|
|
|
content_fallback = f'Unable to find "{sha512}"'
|
2024-10-07 13:15:15 +02:00
|
|
|
if uuids := get_indexing(flask_login.current_user).get_hash_uuids(sha512):
|
|
|
|
# got UUIDs for this hash
|
|
|
|
capture_uuid, urlnode_uuid = uuids
|
2024-10-16 15:25:12 +02:00
|
|
|
content_fallback += f' in capture "{capture_uuid}" and node "{urlnode_uuid}"'
|
2024-10-07 13:15:15 +02:00
|
|
|
if ressource := lookyloo.get_ressource(capture_uuid, urlnode_uuid, sha512):
|
|
|
|
filename, body, mimetype = ressource
|
|
|
|
return send_file(body, as_attachment=True, download_name=filename)
|
|
|
|
|
2024-10-16 15:25:12 +02:00
|
|
|
return send_file(BytesIO(content_fallback.encode()), as_attachment=True, download_name='Unknown_Hash.txt')
|
2024-02-21 14:36:19 +01:00
|
|
|
|
|
|
|
|
2022-11-19 01:32:03 +01:00
|
|
|
# ################## Submit existing capture ##################
|
|
|
|
|
|
|
|
@app.route('/submit_capture', methods=['GET', 'POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def submit_capture() -> str | Response | WerkzeugResponse:
|
2022-11-19 01:32:03 +01:00
|
|
|
|
|
|
|
if request.method == 'POST':
|
2022-11-21 15:02:12 +01:00
|
|
|
listing = True if request.form.get('listing') else False
|
2024-01-12 17:15:41 +01:00
|
|
|
har: dict[str, Any] | None = None
|
|
|
|
html: str | None = None
|
|
|
|
last_redirected_url: str | None = None
|
|
|
|
screenshot: bytes | None = None
|
2022-11-21 15:02:12 +01:00
|
|
|
if 'har_file' in request.files and request.files['har_file']:
|
2024-06-11 20:07:38 +02:00
|
|
|
uuid = str(uuid4())
|
2022-11-19 01:32:03 +01:00
|
|
|
har = json.loads(request.files['har_file'].stream.read())
|
2022-11-21 15:02:12 +01:00
|
|
|
last_redirected_url = request.form.get('landing_page')
|
|
|
|
if 'screenshot_file' in request.files:
|
|
|
|
screenshot = request.files['screenshot_file'].stream.read()
|
|
|
|
if 'html_file' in request.files:
|
|
|
|
html = request.files['html_file'].stream.read().decode()
|
|
|
|
lookyloo.store_capture(uuid, is_public=listing, har=har,
|
|
|
|
last_redirected_url=last_redirected_url,
|
|
|
|
png=screenshot, html=html)
|
2022-11-19 01:32:03 +01:00
|
|
|
return redirect(url_for('tree', tree_uuid=uuid))
|
2022-11-21 15:02:12 +01:00
|
|
|
elif 'full_capture' in request.files and request.files['full_capture']:
|
|
|
|
# it *only* accepts a lookyloo export.
|
2024-06-11 20:07:38 +02:00
|
|
|
full_capture_file = BytesIO(request.files['full_capture'].stream.read())
|
|
|
|
uuid, messages = lookyloo.unpack_full_capture_archive(full_capture_file, listing)
|
|
|
|
if 'errors' in messages and messages['errors']:
|
|
|
|
for error in messages['errors']:
|
|
|
|
flash(error, 'error')
|
|
|
|
else:
|
|
|
|
if 'warnings' in messages:
|
|
|
|
for warning in messages['warnings']:
|
|
|
|
flash(warning, 'warning')
|
2022-11-21 15:02:12 +01:00
|
|
|
return redirect(url_for('tree', tree_uuid=uuid))
|
|
|
|
else:
|
|
|
|
flash('Invalid submission: please submit at least an HAR file.', 'error')
|
2022-11-19 01:32:03 +01:00
|
|
|
|
|
|
|
return render_template('submit_capture.html',
|
|
|
|
default_public=get_config('generic', 'default_public'),
|
|
|
|
public_domain=lookyloo.public_domain)
|
|
|
|
|
|
|
|
|
|
|
|
# #############################################################
|
|
|
|
|
2020-11-03 16:32:04 +01:00
|
|
|
@app.route('/capture', methods=['GET', 'POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def capture_web() -> str | Response | WerkzeugResponse:
|
2024-07-19 16:12:24 +02:00
|
|
|
user_config: dict[str, Any] | None = None
|
2021-08-18 10:53:26 +02:00
|
|
|
if flask_login.current_user.is_authenticated:
|
|
|
|
user = flask_login.current_user.get_id()
|
2024-06-17 19:04:08 +02:00
|
|
|
user_config = load_user_config(user)
|
2021-08-18 10:53:26 +02:00
|
|
|
else:
|
|
|
|
user = src_request_ip(request)
|
|
|
|
|
2022-08-04 16:58:07 +02:00
|
|
|
if request.method == 'POST':
|
|
|
|
if not (request.form.get('url') or request.form.get('urls') or 'document' in request.files):
|
|
|
|
flash('Invalid submission: please submit at least a URL or a document.', 'error')
|
|
|
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
|
|
|
|
2024-07-19 16:12:24 +02:00
|
|
|
capture_query: dict[str, Any] = {}
|
2020-10-09 18:05:04 +02:00
|
|
|
# check if the post request has the file part
|
|
|
|
if 'cookies' in request.files and request.files['cookies'].filename:
|
2023-05-15 16:08:19 +02:00
|
|
|
capture_query['cookies'] = load_cookies(request.files['cookies'].stream.read())
|
2021-04-08 19:15:53 +02:00
|
|
|
|
2022-08-22 17:34:00 +02:00
|
|
|
if request.form.get('device_name'):
|
|
|
|
capture_query['device_name'] = request.form['device_name']
|
|
|
|
elif request.form.get('freetext_ua'):
|
2021-11-26 01:36:01 +01:00
|
|
|
capture_query['user_agent'] = request.form['freetext_ua']
|
|
|
|
elif request.form.get('personal_ua') and request.headers.get('User-Agent'):
|
2021-05-18 03:29:46 +02:00
|
|
|
capture_query['user_agent'] = request.headers['User-Agent']
|
2021-04-08 19:15:53 +02:00
|
|
|
else:
|
2021-05-18 03:29:46 +02:00
|
|
|
capture_query['user_agent'] = request.form['user_agent']
|
|
|
|
capture_query['os'] = request.form['os']
|
2024-07-15 17:39:51 +02:00
|
|
|
browser = request.form['browser']
|
|
|
|
if browser in ['chromium', 'firefox', 'webkit']:
|
|
|
|
# Will be guessed otherwise.
|
2024-07-19 16:12:24 +02:00
|
|
|
capture_query['browser'] = browser
|
2021-04-08 19:15:53 +02:00
|
|
|
|
2021-05-18 03:29:46 +02:00
|
|
|
capture_query['listing'] = True if request.form.get('listing') else False
|
2024-03-19 19:21:41 +01:00
|
|
|
capture_query['allow_tracking'] = True if request.form.get('allow_tracking') else False
|
2024-10-10 17:23:13 +02:00
|
|
|
capture_query['java_script_enabled'] = True if request.form.get('java_script_enabled') else False
|
2021-04-08 19:15:53 +02:00
|
|
|
if request.form.get('referer'):
|
2021-05-18 03:29:46 +02:00
|
|
|
capture_query['referer'] = request.form['referer']
|
2021-04-08 19:15:53 +02:00
|
|
|
|
2022-08-03 12:07:41 +02:00
|
|
|
if request.form.get('dnt'):
|
|
|
|
capture_query['dnt'] = request.form['dnt']
|
|
|
|
|
2021-11-23 21:59:56 +01:00
|
|
|
if request.form.get('headers'):
|
|
|
|
capture_query['headers'] = request.form['headers']
|
|
|
|
|
2023-06-07 15:05:40 +02:00
|
|
|
if request.form.get('timezone_id'):
|
|
|
|
capture_query['timezone_id'] = request.form['timezone_id']
|
|
|
|
|
|
|
|
if request.form.get('locale'):
|
|
|
|
capture_query['locale'] = request.form['locale']
|
|
|
|
|
|
|
|
if request.form.get('geo_longitude') and request.form.get('geo_latitude'):
|
|
|
|
capture_query['geolocation'] = {'longitude': float(request.form['geo_longitude']),
|
|
|
|
'latitude': float(request.form['geo_latitude'])}
|
|
|
|
|
|
|
|
if request.form.get('http_auth_username') and request.form.get('http_auth_password'):
|
|
|
|
capture_query['http_credentials'] = {'username': request.form['http_auth_username'],
|
|
|
|
'password': request.form['http_auth_password']}
|
|
|
|
|
|
|
|
if request.form.get('color_scheme'):
|
|
|
|
capture_query['color_scheme'] = request.form['color_scheme']
|
|
|
|
|
2021-06-22 18:39:14 +02:00
|
|
|
if request.form.get('proxy'):
|
2021-06-29 18:00:44 +02:00
|
|
|
parsed_proxy = urlparse(request.form['proxy'])
|
|
|
|
if parsed_proxy.scheme and parsed_proxy.hostname and parsed_proxy.port:
|
2024-05-07 18:46:57 +02:00
|
|
|
if parsed_proxy.scheme in ['http', 'https', 'socks5', 'socks5h']:
|
2024-01-12 17:15:41 +01:00
|
|
|
if (parsed_proxy.username and parsed_proxy.password) or (not parsed_proxy.username and not parsed_proxy.password):
|
2021-06-29 18:00:44 +02:00
|
|
|
capture_query['proxy'] = request.form['proxy']
|
|
|
|
else:
|
|
|
|
flash('You need to enter a username AND a password for your proxy.', 'error')
|
|
|
|
else:
|
|
|
|
flash('Proxy scheme not supported: must be http(s) or socks5.', 'error')
|
|
|
|
else:
|
|
|
|
flash('Invalid proxy: Check that you entered a scheme, a hostname and a port.', 'error')
|
2021-06-22 18:39:14 +02:00
|
|
|
|
2024-05-08 16:00:43 +02:00
|
|
|
# auto report
|
|
|
|
if flask_login.current_user.is_authenticated:
|
|
|
|
if request.form.get('auto-report'):
|
|
|
|
capture_query['auto_report'] = {
|
2024-05-10 10:27:25 +02:00
|
|
|
'email': request.form.get('email', ""),
|
|
|
|
'comment': request.form.get('comment', ""),
|
|
|
|
'recipient_mail': request.form.get('recipient-mail', "")
|
2024-05-08 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
2022-03-31 00:48:58 +02:00
|
|
|
if request.form.get('url'):
|
|
|
|
capture_query['url'] = request.form['url']
|
2024-07-19 16:12:24 +02:00
|
|
|
perma_uuid = lookyloo.enqueue_capture(CaptureSettings(**capture_query), source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
2022-03-31 00:48:58 +02:00
|
|
|
time.sleep(2)
|
|
|
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
2022-08-04 16:58:07 +02:00
|
|
|
elif request.form.get('urls'):
|
2022-03-31 00:48:58 +02:00
|
|
|
# bulk query
|
|
|
|
bulk_captures = []
|
2022-08-05 11:28:44 +02:00
|
|
|
for url in request.form['urls'].strip().split('\n'):
|
|
|
|
if not url:
|
|
|
|
continue
|
2022-03-31 00:48:58 +02:00
|
|
|
query = capture_query.copy()
|
|
|
|
query['url'] = url
|
2024-07-19 16:12:24 +02:00
|
|
|
new_capture_uuid = lookyloo.enqueue_capture(CaptureSettings(**query), source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
2022-03-31 00:48:58 +02:00
|
|
|
bulk_captures.append((new_capture_uuid, url))
|
|
|
|
|
|
|
|
return render_template('bulk_captures.html', bulk_captures=bulk_captures)
|
2022-08-04 16:58:07 +02:00
|
|
|
elif 'document' in request.files:
|
|
|
|
# File upload
|
2023-05-15 16:08:19 +02:00
|
|
|
capture_query['document'] = base64.b64encode(request.files['document'].stream.read()).decode()
|
2022-11-02 12:23:41 +01:00
|
|
|
if request.files['document'].filename:
|
|
|
|
capture_query['document_name'] = request.files['document'].filename
|
|
|
|
else:
|
|
|
|
capture_query['document_name'] = 'unknown_name.bin'
|
2024-07-19 16:12:24 +02:00
|
|
|
perma_uuid = lookyloo.enqueue_capture(CaptureSettings(**capture_query), source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
2022-08-04 16:58:07 +02:00
|
|
|
time.sleep(2)
|
|
|
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
2022-08-05 11:28:44 +02:00
|
|
|
else:
|
|
|
|
flash('Invalid submission: please submit at least a URL or a document.', 'error')
|
2021-08-18 10:53:26 +02:00
|
|
|
elif request.method == 'GET' and request.args.get('url'):
|
|
|
|
url = unquote_plus(request.args['url']).strip()
|
|
|
|
capture_query = {'url': url}
|
2024-07-19 16:12:24 +02:00
|
|
|
perma_uuid = lookyloo.enqueue_capture(CaptureSettings(**capture_query), source='web', user=user, authenticated=flask_login.current_user.is_authenticated)
|
2021-08-18 10:53:26 +02:00
|
|
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
|
|
|
|
2021-09-03 16:57:37 +02:00
|
|
|
# render template
|
2024-06-25 11:37:19 +02:00
|
|
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
|
|
|
|
user_config=user_config)
|
2020-10-09 18:05:04 +02:00
|
|
|
|
2024-05-07 18:46:57 +02:00
|
|
|
|
|
|
|
@app.route('/simple_capture', methods=['GET', 'POST'])
|
2024-05-02 10:29:46 +02:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
2024-04-30 11:54:24 +02:00
|
|
|
def simple_capture() -> str | Response | WerkzeugResponse:
|
2024-05-02 12:08:10 +02:00
|
|
|
user = flask_login.current_user.get_id()
|
2024-04-30 11:54:24 +02:00
|
|
|
if request.method == 'POST':
|
|
|
|
if not (request.form.get('url') or request.form.get('urls')):
|
|
|
|
flash('Invalid submission: please submit at least a URL.', 'error')
|
|
|
|
return render_template('simple_capture.html')
|
2024-07-19 16:12:24 +02:00
|
|
|
capture_query: dict[str, Any] = {}
|
2024-04-30 15:32:14 +02:00
|
|
|
if request.form.get('url'):
|
|
|
|
capture_query['url'] = request.form['url']
|
2024-07-19 16:12:24 +02:00
|
|
|
perma_uuid = lookyloo.enqueue_capture(CaptureSettings(**capture_query), source='web', user=user,
|
2024-04-30 15:32:14 +02:00
|
|
|
authenticated=flask_login.current_user.is_authenticated)
|
|
|
|
time.sleep(2)
|
|
|
|
if perma_uuid:
|
|
|
|
flash('Recording is in progress and is reported automatically.', 'success')
|
|
|
|
return redirect(url_for('simple_capture'))
|
|
|
|
elif request.form.get('urls'):
|
|
|
|
for url in request.form['urls'].strip().split('\n'):
|
|
|
|
if not url:
|
|
|
|
continue
|
|
|
|
query = capture_query.copy()
|
|
|
|
query['url'] = url
|
2024-07-19 16:12:24 +02:00
|
|
|
new_capture_uuid = lookyloo.enqueue_capture(CaptureSettings(**query), source='web', user=user,
|
2024-04-30 15:32:14 +02:00
|
|
|
authenticated=flask_login.current_user.is_authenticated)
|
|
|
|
if new_capture_uuid:
|
|
|
|
flash('Recording is in progress and is reported automatically.', 'success')
|
|
|
|
return redirect(url_for('simple_capture'))
|
2024-04-30 11:54:24 +02:00
|
|
|
# render template
|
|
|
|
return render_template('simple_capture.html')
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
|
2020-06-13 02:49:07 +02:00
|
|
|
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def cookies_name_detail(cookie_name: str) -> str:
|
2024-10-07 20:45:08 +02:00
|
|
|
captures = get_cookie_name_investigator(cookie_name.strip())
|
|
|
|
return render_template('cookie_name.html', cookie_name=cookie_name, captures=captures)
|
2020-06-13 02:49:07 +02:00
|
|
|
|
2020-06-19 00:25:24 +02:00
|
|
|
|
2023-07-21 15:48:20 +02:00
|
|
|
@app.route('/hhhdetails/<string:hhh>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def hhh_detail(hhh: str) -> str:
|
2024-09-04 16:30:33 +02:00
|
|
|
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
2024-03-05 20:51:21 +01:00
|
|
|
captures, headers = get_hhh_investigator(hhh.strip())
|
2024-09-04 16:30:33 +02:00
|
|
|
return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers, from_popup=from_popup)
|
2023-07-21 15:48:20 +02:00
|
|
|
|
|
|
|
|
2024-03-14 00:56:28 +01:00
|
|
|
@app.route('/identifier_details/<string:identifier_type>/<string:identifier>', methods=['GET'])
|
|
|
|
def identifier_details(identifier_type: str, identifier: str) -> str:
|
|
|
|
captures = get_identifier_investigator(identifier_type, identifier)
|
|
|
|
return render_template('identifier_details.html', identifier_type=identifier_type,
|
|
|
|
identifier=identifier,
|
|
|
|
captures=captures)
|
2024-04-11 17:46:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/capture_hash_details/<string:hash_type>/<string:h>', methods=['GET'])
|
|
|
|
def capture_hash_details(hash_type: str, h: str) -> str:
|
|
|
|
captures = get_capture_hash_investigator(hash_type, h)
|
2024-08-26 11:39:46 +02:00
|
|
|
return render_template('hash_type_details.html', hash_type=hash_type,
|
2024-04-11 17:46:04 +02:00
|
|
|
h=h,
|
|
|
|
captures=captures)
|
2024-03-14 00:56:28 +01:00
|
|
|
|
|
|
|
|
2024-02-19 16:15:52 +01:00
|
|
|
@app.route('/favicon_details/<string:favicon_sha512>', methods=['GET'])
|
2024-02-26 17:07:23 +01:00
|
|
|
@app.route('/favicon_details/<string:favicon_sha512>/<int:get_probabilistic>', methods=['GET'])
|
2024-02-26 19:09:48 +01:00
|
|
|
def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str:
|
2024-02-26 17:07:23 +01:00
|
|
|
_get_prob = bool(get_probabilistic)
|
2024-03-05 20:51:21 +01:00
|
|
|
captures, favicon, probabilistic_favicons = get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
|
2024-02-26 19:09:48 +01:00
|
|
|
mimetype, b64_favicon, mmh3_shodan = favicon
|
2024-02-19 16:15:52 +01:00
|
|
|
return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
|
2024-02-26 19:09:48 +01:00
|
|
|
captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan,
|
2024-02-26 17:07:23 +01:00
|
|
|
probabilistic_favicons=probabilistic_favicons)
|
2024-02-19 16:15:52 +01:00
|
|
|
|
|
|
|
|
2020-06-19 00:25:24 +02:00
|
|
|
@app.route('/body_hashes/<string:body_hash>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def body_hash_details(body_hash: str) -> str:
|
2021-08-11 15:26:12 +02:00
|
|
|
from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
|
2024-10-07 13:15:15 +02:00
|
|
|
captures = _get_body_hash_investigator(body_hash.strip())
|
|
|
|
return render_template('body_hash.html', body_hash=body_hash, captures=captures, from_popup=from_popup)
|
2020-06-19 00:25:24 +02:00
|
|
|
|
2020-06-26 12:07:25 +02:00
|
|
|
|
2021-03-18 00:40:14 +01:00
|
|
|
@app.route('/urls/<string:url>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def url_details(url: str) -> str:
|
2021-03-18 00:40:14 +01:00
|
|
|
url = unquote_plus(url).strip()
|
2024-05-14 18:52:26 +02:00
|
|
|
captures = get_url_investigator(url)
|
|
|
|
return render_template('url.html', url=url, captures=captures)
|
2021-03-18 00:40:14 +01:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/hostnames/<string:hostname>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def hostname_details(hostname: str) -> str:
|
2024-05-14 16:08:38 +02:00
|
|
|
captures = get_hostname_investigator(hostname.strip())
|
|
|
|
return render_template('hostname.html', hostname=hostname, captures=captures)
|
2021-03-18 00:40:14 +01:00
|
|
|
|
|
|
|
|
2020-11-25 12:07:01 +01:00
|
|
|
@app.route('/stats', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
def statsfull() -> str:
|
2020-11-25 12:07:01 +01:00
|
|
|
stats = lookyloo.get_stats()
|
|
|
|
return render_template('stats.html', stats=stats)
|
|
|
|
|
|
|
|
|
2021-06-07 22:12:23 +02:00
|
|
|
@app.route('/whois/<string:query>', methods=['GET'])
|
2022-12-26 23:01:44 +01:00
|
|
|
@app.route('/whois/<string:query>/<int:email_only>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def whois(query: str, email_only: int=0) -> Response:
|
2022-12-26 23:01:44 +01:00
|
|
|
to_return = lookyloo.uwhois.whois(query, bool(email_only))
|
|
|
|
if isinstance(to_return, str):
|
|
|
|
return send_file(BytesIO(to_return.encode()),
|
|
|
|
mimetype='test/plain', as_attachment=True, download_name=f'whois.{query}.txt')
|
|
|
|
return jsonify(to_return)
|
2021-06-07 22:12:23 +02:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
# ##### Methods related to a specific URLNode #####
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/request_cookies', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def urlnode_request_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
|
2020-10-09 18:05:04 +02:00
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
|
|
|
if not urlnode.request_cookie:
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
return send_file(BytesIO(json.dumps(urlnode.request_cookie, indent=2).encode()),
|
2022-11-02 12:23:41 +01:00
|
|
|
mimetype='text/plain', as_attachment=True, download_name='request_cookies.txt')
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/response_cookies', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def urlnode_response_cookies(tree_uuid: str, node_uuid: str) -> Response | None:
|
2020-10-09 18:05:04 +02:00
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
|
|
|
if not urlnode.response_cookie:
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
return send_file(BytesIO(json.dumps(urlnode.response_cookie, indent=2).encode()),
|
2022-11-02 12:23:41 +01:00
|
|
|
mimetype='text/plain', as_attachment=True, download_name='response_cookies.txt')
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
2021-01-08 13:03:23 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
|
2021-03-19 17:51:25 +01:00
|
|
|
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
|
|
|
|
# we have multiple page rendered on one tree, it will be a problem.
|
2022-12-08 11:57:45 +01:00
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
|
|
|
if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html:
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
2021-01-11 15:12:44 +01:00
|
|
|
|
2022-12-08 11:57:45 +01:00
|
|
|
ct = lookyloo.get_crawled_tree(tree_uuid)
|
2021-01-11 15:12:44 +01:00
|
|
|
not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
|
|
|
|
- set(ct.root_hartree.all_url_requests.keys()))
|
2021-01-08 13:03:23 +01:00
|
|
|
to_return = StringIO()
|
2021-01-11 15:12:44 +01:00
|
|
|
to_return.writelines([f'{u}\n' for u in not_loaded_urls])
|
2021-01-08 13:03:23 +01:00
|
|
|
return send_file(BytesIO(to_return.getvalue().encode()), mimetype='text/plain',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='urls_in_rendered_content.txt')
|
2021-01-08 13:03:23 +01:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/rendered_content', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def urlnode_rendered_content(tree_uuid: str, node_uuid: str) -> Response | None:
|
2020-10-09 18:05:04 +02:00
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
|
|
|
if not urlnode.rendered_html:
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
2020-10-09 18:05:04 +02:00
|
|
|
return send_file(BytesIO(urlnode.rendered_html.getvalue()), mimetype='text/plain',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='rendered_content.txt')
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/posted_data', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def urlnode_post_request(tree_uuid: str, node_uuid: str) -> Response | None:
|
2020-10-09 18:05:04 +02:00
|
|
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
|
|
|
if not urlnode.posted_data:
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
|
|
|
posted: str | bytes
|
2020-10-09 18:05:04 +02:00
|
|
|
if isinstance(urlnode.posted_data, (dict, list)):
|
|
|
|
# JSON blob, pretty print.
|
|
|
|
posted = json.dumps(urlnode.posted_data, indent=2)
|
2020-08-20 19:39:03 +02:00
|
|
|
else:
|
2020-10-09 18:05:04 +02:00
|
|
|
posted = urlnode.posted_data
|
|
|
|
|
|
|
|
if isinstance(posted, str):
|
|
|
|
to_return = BytesIO(posted.encode())
|
|
|
|
is_blob = False
|
|
|
|
else:
|
|
|
|
to_return = BytesIO(posted)
|
|
|
|
is_blob = True
|
|
|
|
to_return.seek(0)
|
|
|
|
|
|
|
|
if is_blob:
|
|
|
|
return send_file(to_return, mimetype='application/octet-stream',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='posted_data.bin')
|
2020-10-09 18:05:04 +02:00
|
|
|
else:
|
|
|
|
return send_file(to_return, mimetype='text/plain',
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name='posted_data.txt')
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource', methods=['POST', 'GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def get_ressource(tree_uuid: str, node_uuid: str) -> Response:
|
2020-10-09 18:05:04 +02:00
|
|
|
if request.method == 'POST':
|
|
|
|
h_request = request.form.get('ressource_hash')
|
|
|
|
else:
|
|
|
|
h_request = None
|
|
|
|
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_request)
|
2021-04-08 01:05:11 +02:00
|
|
|
if ressource:
|
|
|
|
filename, to_return, mimetype = ressource
|
|
|
|
if not mimetype.startswith('image'):
|
|
|
|
# Force a .txt extension
|
|
|
|
filename += '.txt'
|
|
|
|
else:
|
|
|
|
to_return = BytesIO(b'Unknown Hash')
|
|
|
|
filename = 'file.txt'
|
|
|
|
mimetype = 'text/text'
|
2022-11-02 12:23:41 +01:00
|
|
|
return send_file(to_return, mimetype=mimetype, as_attachment=True, download_name=filename)
|
2020-10-09 18:05:04 +02:00
|
|
|
|
|
|
|
|
2021-02-09 22:01:20 +01:00
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview', methods=['GET'])
|
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/ressource_preview/<string:h_ressource>', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def get_ressource_preview(tree_uuid: str, node_uuid: str, h_ressource: str | None=None) -> Response:
|
2021-02-09 22:01:20 +01:00
|
|
|
ressource = lookyloo.get_ressource(tree_uuid, node_uuid, h_ressource)
|
2020-12-22 18:23:26 +01:00
|
|
|
if not ressource:
|
2021-02-09 22:01:20 +01:00
|
|
|
return Response('No preview available.', mimetype='text/text')
|
2020-12-22 18:23:26 +01:00
|
|
|
filename, r, mimetype = ressource
|
|
|
|
if mimetype.startswith('image'):
|
|
|
|
return send_file(r, mimetype=mimetype,
|
2022-11-02 12:23:41 +01:00
|
|
|
as_attachment=True, download_name=filename)
|
2021-02-09 22:01:20 +01:00
|
|
|
return Response('No preview available.', mimetype='text/text')
|
2020-12-22 18:23:26 +01:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/hashes', methods=['GET'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@file_response # type: ignore[misc]
|
|
|
|
def hashes_urlnode(tree_uuid: str, node_uuid: str) -> Response:
|
2020-10-09 18:05:04 +02:00
|
|
|
hashes = lookyloo.get_hashes(tree_uuid, urlnode_uuid=node_uuid)
|
|
|
|
return send_file(BytesIO('\n'.join(hashes).encode()),
|
2022-11-02 12:23:41 +01:00
|
|
|
mimetype='test/plain', as_attachment=True, download_name='hashes.txt')
|
2020-08-20 19:39:03 +02:00
|
|
|
|
|
|
|
|
2020-10-09 18:05:04 +02:00
|
|
|
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/add_context', methods=['POST'])
|
2024-01-12 17:15:41 +01:00
|
|
|
@flask_login.login_required # type: ignore[misc]
|
|
|
|
def add_context(tree_uuid: str, node_uuid: str) -> WerkzeugResponse | None:
|
2020-11-05 15:38:34 +01:00
|
|
|
if not enable_context_by_users:
|
|
|
|
return redirect(url_for('ressources'))
|
|
|
|
|
2020-08-25 15:51:08 +02:00
|
|
|
context_data = request.form
|
2021-05-18 03:29:46 +02:00
|
|
|
ressource_hash: str = context_data['hash_to_contextualize']
|
|
|
|
callback_str: str = context_data['callback_str']
|
2020-08-28 18:03:52 +02:00
|
|
|
legitimate: bool = True if context_data.get('legitimate') else False
|
|
|
|
malicious: bool = True if context_data.get('malicious') else False
|
2024-01-12 17:15:41 +01:00
|
|
|
details: dict[str, dict[str, Any]] = {'malicious': {}, 'legitimate': {}}
|
2020-08-28 18:03:52 +02:00
|
|
|
if malicious:
|
|
|
|
malicious_details = {}
|
|
|
|
if context_data.get('malicious_type'):
|
|
|
|
malicious_details['type'] = context_data['malicious_type']
|
|
|
|
if context_data.get('malicious_target'):
|
|
|
|
malicious_details['target'] = context_data['malicious_target']
|
|
|
|
details['malicious'] = malicious_details
|
|
|
|
if legitimate:
|
|
|
|
legitimate_details = {}
|
|
|
|
if context_data.get('legitimate_domain'):
|
|
|
|
legitimate_details['domain'] = context_data['legitimate_domain']
|
|
|
|
if context_data.get('legitimate_description'):
|
2020-08-28 18:24:14 +02:00
|
|
|
legitimate_details['description'] = context_data['legitimate_description']
|
2020-08-28 18:03:52 +02:00
|
|
|
details['legitimate'] = legitimate_details
|
2021-05-18 02:08:43 +02:00
|
|
|
lookyloo.add_context(tree_uuid, urlnode_uuid=node_uuid, ressource_hash=ressource_hash,
|
|
|
|
legitimate=legitimate, malicious=malicious, details=details)
|
2020-09-03 16:31:45 +02:00
|
|
|
if callback_str == 'hostnode_popup':
|
2024-10-07 13:15:15 +02:00
|
|
|
hostnode_uuid = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid).hostnode_uuid
|
2020-09-03 16:31:45 +02:00
|
|
|
return redirect(url_for('hostnode_popup', tree_uuid=tree_uuid, node_uuid=hostnode_uuid))
|
|
|
|
elif callback_str == 'ressources':
|
|
|
|
return redirect(url_for('ressources'))
|
2024-01-12 17:15:41 +01:00
|
|
|
return None
|
2020-08-25 15:51:08 +02:00
|
|
|
|
|
|
|
|
2020-06-29 17:23:01 +02:00
|
|
|
# Query API
|
2021-06-05 02:30:14 +02:00
|
|
|
authorizations = {
|
|
|
|
'apikey': {
|
|
|
|
'type': 'apiKey',
|
|
|
|
'in': 'header',
|
|
|
|
'name': 'Authorization'
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-09 01:47:39 +01:00
|
|
|
CORS(app, resources={r"/submit": {"origins": "*"}})
|
2021-04-26 00:52:08 +02:00
|
|
|
|
2021-06-05 02:30:14 +02:00
|
|
|
api = Api(app, title='Lookyloo API',
|
|
|
|
description='API to submit captures and query a lookyloo instance.',
|
|
|
|
doc='/doc/',
|
|
|
|
authorizations=authorizations,
|
2022-08-31 16:33:13 +02:00
|
|
|
version=pkg_version)
|
2021-06-05 02:30:14 +02:00
|
|
|
|
2021-06-07 22:12:23 +02:00
|
|
|
api.add_namespace(generic_api)
|