From bf7ee0508b4f161c1745a0bf6ed8094b7822b3c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= <raphael@vinot.info>
Date: Tue, 14 May 2024 18:52:26 +0200
Subject: [PATCH] new: find related captures by hostname and URL

---
 .gitignore                                  |  29 ++++-
 website/web/__init__.py                     | 107 +++++-------------
 website/web/genericapi.py                   |  55 +++++++++-
 website/web/helpers.py                      |  21 +++-
 website/web/templates/body_hash.html        |   4 +-
 website/web/templates/hostname.html         |  38 +------
 website/web/templates/tree.html             |  65 ++++++++++-
 website/web/templates/tree_body_hashes.html |   2 +-
 website/web/templates/tree_hostnames.html   |  64 +++++++++++
 website/web/templates/tree_urls.html        |  38 +++++++
 website/web/templates/url.html              | 115 ++++++++------------
 11 files changed, 342 insertions(+), 196 deletions(-)
 create mode 100644 website/web/templates/tree_hostnames.html
 create mode 100644 website/web/templates/tree_urls.html

diff --git a/.gitignore b/.gitignore
index e652d89..3b2b988 100644
--- a/.gitignore
+++ b/.gitignore
@@ -112,11 +112,14 @@ FileSaver.js
 d3.v5.min.js
 d3.v5.js
 
-cache.pid
-dump.rdb
+*.pid
+*.rdb
+*log*
+full_index/db
 
 # Local config files
 config/*.json
+config/users/*.json
 config/*.json.bkp
 config/takedown_filters.ini
 
@@ -126,3 +129,25 @@ known_content_user/
 user_agents/
 
 .DS_Store
+
+.idea
+
+archived_captures
+discarded_captures
+removed_captures
+
+website/web/static/d3.min.js
+website/web/static/datatables.min.css
+website/web/static/datatables.min.js
+website/web/static/jquery.min.js
+
+# Modules
+circl_pypdns
+eupi
+own_user_agents
+phishtank
+riskiq
+sanejs
+urlhaus
+urlscan
+vt_url
diff --git a/website/web/__init__.py b/website/web/__init__.py
index afb26d6..bd3357b 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -39,8 +39,7 @@ from pymisp import MISPEvent, MISPServerError  # type: ignore[attr-defined]
 from werkzeug.security import check_password_hash
 from werkzeug.wrappers.response import Response as WerkzeugResponse
 
-from lookyloo import Lookyloo, CaptureSettings, Indexing
-from lookyloo.capturecache import CaptureCache
+from lookyloo import Lookyloo, CaptureSettings
 from lookyloo.default import get_config
 from lookyloo.exceptions import MissingUUID, NoValidHarFile
 from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
@@ -54,7 +53,7 @@ else:
 from .genericapi import api as generic_api
 from .helpers import (User, build_users_table, get_secret_key,
                       load_user_from_request, src_request_ip, sri_load,
-                      get_lookyloo_instance)
+                      get_lookyloo_instance, get_indexing)
 from .proxied import ReverseProxied
 
 logging.config.dictConfig(get_config('logging'))
@@ -270,23 +269,6 @@ def file_response(func):  # type: ignore[no-untyped-def]
 
 # ##### Methods querying the indexes #####
 
-@functools.cache
-def get_indexing(user: User | None) -> Indexing:
-    '''Depending if we're logged in or not, we (can) get different indexes:
-        if index_everything is enabled, we have an index in kvrocks that contains all
-        the indexes for all the captures.
-        It is only accessible to the admin user.
-    '''
-    if not get_config('generic', 'index_everything'):
-        return Indexing()
-
-    if not user or not user.is_authenticated:
-        # No user or anonymous
-        return Indexing()
-    # Logged in user
-    return Indexing(full_index=True)
-
-
 def _get_body_hash_investigator(body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:
     '''Returns all the captures related to a hash (sha512), used in the web interface.'''
     total_captures, details = get_indexing(flask_login.current_user).get_body_hash_captures(body_hash, limit=-1)
@@ -365,70 +347,33 @@ def get_all_hostnames(capture_uuid: str, /) -> dict[str, dict[str, int | list[UR
     return to_return
 
 
-def get_latest_url_capture(url: str, /) -> CaptureCache | None:
-    '''Get the most recent capture with this URL'''
-    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_url(url))
-    if captures:
-        return captures[0]
-    return None
-
-
-def get_url_occurrences(url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
-    '''Get the most recent captures and URL nodes where the URL has been seen.'''
-    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_url(url), cached_captures_only=cached_captures_only)
-
-    to_return: list[dict[str, Any]] = []
-    for capture in captures[:limit]:
-        ct = lookyloo.get_crawled_tree(capture.uuid)
-        to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
-                                                      'start_timestamp': capture.timestamp.isoformat(),
-                                                      'title': capture.title}
-        urlnodes: dict[str, dict[str, str]] = {}
-        for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
-            urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
-                                      'hostnode_uuid': urlnode.hostnode_uuid}
-            if hasattr(urlnode, 'body_hash'):
-                urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
-        to_append['urlnodes'] = urlnodes
-        to_return.append(to_append)
-    return to_return
-
-
-def get_hostname_occurrences(hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
-    '''Get the most recent captures and URL nodes where the hostname has been seen.'''
-    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
-
-    to_return: list[dict[str, Any]] = []
-    for capture in captures[:limit]:
-        ct = lookyloo.get_crawled_tree(capture.uuid)
-        to_append: dict[str, str | list[Any] | dict[str, Any]] = {
-            'capture_uuid': capture.uuid,
-            'start_timestamp': capture.timestamp.isoformat(),
-            'title': capture.title}
-        hostnodes: list[str] = []
-        if with_urls_occurrences:
-            urlnodes: dict[str, dict[str, str]] = {}
-        for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
-            hostnodes.append(hostnode.uuid)
-            if with_urls_occurrences:
-                for urlnode in hostnode.urls:
-                    urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
-                                              'url': urlnode.name,
-                                              'hostnode_uuid': urlnode.hostnode_uuid}
-                    if hasattr(urlnode, 'body_hash'):
-                        urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
-            to_append['hostnodes'] = hostnodes
-            if with_urls_occurrences:
-                to_append['urlnodes'] = urlnodes
-            to_return.append(to_append)
+def get_all_urls(capture_uuid: str, /) -> dict[str, dict[str, int | list[URLNode] | str]]:
+    ct = lookyloo.get_crawled_tree(capture_uuid)
+    to_return: dict[str, dict[str, list[URLNode] | int | str]] = defaultdict()
+    for node in ct.root_hartree.url_tree.traverse():
+        if not node.name:
+            continue
+        captures = get_indexing(flask_login.current_user).get_captures_url(node.name)
+        # Note for future: mayeb get url, capture title, something better than just the hash to show to the user
+        if node.hostname not in to_return:
+            to_return[node.name] = {'total_captures': len(captures), 'nodes': [],
+                                    'quoted_url': quote_plus(node.name)}
+        to_return[node.name]['nodes'].append(node)  # type: ignore[union-attr]
     return to_return
 
 
 def get_hostname_investigator(hostname: str) -> list[tuple[str, str, str, datetime]]:
+    '''Returns all the captures loading content from that hostname, used in the web interface.'''
     cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_hostname(hostname=hostname)])
     return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
 
 
+def get_url_investigator(url: str) -> list[tuple[str, str, str, datetime]]:
+    '''Returns all the captures loading content from that url, used in the web interface.'''
+    cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_url(url=url)])
+    return [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
+
+
 def get_cookie_name_investigator(cookie_name: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float, list[tuple[str, float]]]]]:
     '''Returns all the captures related to a cookie name entry, used in the web interface.'''
     cached_captures = lookyloo.sorted_capture_cache([entry[0] for entry in get_indexing(flask_login.current_user).get_cookies_names_captures(cookie_name)])
@@ -1282,6 +1227,12 @@ def tree_hostnames(tree_uuid: str) -> str:
     return render_template('tree_hostnames.html', tree_uuid=tree_uuid, hostnames=hostnames)
 
 
+@app.route('/tree/<string:tree_uuid>/urls', methods=['GET'])
+def tree_urls(tree_uuid: str) -> str:
+    urls = get_all_urls(tree_uuid)
+    return render_template('tree_urls.html', tree_uuid=tree_uuid, urls=urls)
+
+
 @app.route('/tree/<string:tree_uuid>/pandora', methods=['GET', 'POST'])
 def pandora_submit(tree_uuid: str) -> dict[str, Any] | Response:
     node_uuid = None
@@ -1752,8 +1703,8 @@ def body_hash_details(body_hash: str) -> str:
 @app.route('/urls/<string:url>', methods=['GET'])
 def url_details(url: str) -> str:
     url = unquote_plus(url).strip()
-    hits = get_url_occurrences(url, limit=50)
-    return render_template('url.html', url=url, hits=hits)
+    captures = get_url_investigator(url)
+    return render_template('url.html', url=url, captures=captures)
 
 
 @app.route('/hostnames/<string:hostname>', methods=['GET'])
diff --git a/website/web/genericapi.py b/website/web/genericapi.py
index 9e51bf7..56332bd 100644
--- a/website/web/genericapi.py
+++ b/website/web/genericapi.py
@@ -21,7 +21,7 @@ from lookyloo import CaptureSettings, Lookyloo
 from lookyloo.comparator import Comparator
 from lookyloo.exceptions import MissingUUID, NoValidHarFile
 
-from .helpers import build_users_table, load_user_from_request, src_request_ip, get_lookyloo_instance
+from .helpers import build_users_table, load_user_from_request, src_request_ip, get_lookyloo_instance, get_indexing
 
 api = Namespace('GenericAPI', description='Generic Lookyloo API', path='/')
 
@@ -305,6 +305,27 @@ class HashInfo(Resource):  # type: ignore[misc]
         return to_return
 
 
+def get_url_occurrences(url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
+    '''Get the most recent captures and URL nodes where the URL has been seen.'''
+    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_url(url), cached_captures_only=cached_captures_only)
+
+    to_return: list[dict[str, Any]] = []
+    for capture in captures[:limit]:
+        ct = lookyloo.get_crawled_tree(capture.uuid)
+        to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
+                                                      'start_timestamp': capture.timestamp.isoformat(),
+                                                      'title': capture.title}
+        urlnodes: dict[str, dict[str, str]] = {}
+        for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
+            urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
+                                      'hostnode_uuid': urlnode.hostnode_uuid}
+            if hasattr(urlnode, 'body_hash'):
+                urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
+        to_append['urlnodes'] = urlnodes
+        to_return.append(to_append)
+    return to_return
+
+
 url_info_fields = api.model('URLInfoFields', {
     'url': fields.String(description="The URL to search", required=True),
     'limit': fields.Integer(description="The maximal amount of captures to return", example=20),
@@ -318,12 +339,41 @@ class URLInfo(Resource):  # type: ignore[misc]
 
     @api.doc(body=url_info_fields)  # type: ignore[misc]
     def post(self) -> list[dict[str, Any]]:
-        from . import get_url_occurrences
         to_query: dict[str, Any] = request.get_json(force=True)
         occurrences = get_url_occurrences(to_query.pop('url'), **to_query)
         return occurrences
 
 
+def get_hostname_occurrences(hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
+    '''Get the most recent captures and URL nodes where the hostname has been seen.'''
+    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
+
+    to_return: list[dict[str, Any]] = []
+    for capture in captures[:limit]:
+        ct = lookyloo.get_crawled_tree(capture.uuid)
+        to_append: dict[str, str | list[Any] | dict[str, Any]] = {
+            'capture_uuid': capture.uuid,
+            'start_timestamp': capture.timestamp.isoformat(),
+            'title': capture.title}
+        hostnodes: list[str] = []
+        if with_urls_occurrences:
+            urlnodes: dict[str, dict[str, str]] = {}
+        for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
+            hostnodes.append(hostnode.uuid)
+            if with_urls_occurrences:
+                for urlnode in hostnode.urls:
+                    urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
+                                              'url': urlnode.name,
+                                              'hostnode_uuid': urlnode.hostnode_uuid}
+                    if hasattr(urlnode, 'body_hash'):
+                        urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
+            to_append['hostnodes'] = hostnodes
+            if with_urls_occurrences:
+                to_append['urlnodes'] = urlnodes
+            to_return.append(to_append)
+    return to_return
+
+
 hostname_info_fields = api.model('HostnameInfoFields', {
     'hostname': fields.String(description="The hostname to search", required=True),
     'limit': fields.Integer(description="The maximal amount of captures to return", example=20),
@@ -337,7 +387,6 @@ class HostnameInfo(Resource):  # type: ignore[misc]
 
     @api.doc(body=hostname_info_fields)  # type: ignore[misc]
     def post(self) -> list[dict[str, Any]]:
-        from . import get_hostname_occurrences
         to_query: dict[str, Any] = request.get_json(force=True)
         return get_hostname_occurrences(to_query.pop('hostname'), **to_query)
 
diff --git a/website/web/helpers.py b/website/web/helpers.py
index 6464895..6d15aa3 100644
--- a/website/web/helpers.py
+++ b/website/web/helpers.py
@@ -6,14 +6,14 @@ import hashlib
 import json
 import os
 import re
-from functools import lru_cache
+from functools import lru_cache, cache
 from pathlib import Path
 
 import flask_login  # type: ignore[import-untyped]
 from flask import Request
 from werkzeug.security import generate_password_hash
 
-from lookyloo import Lookyloo
+from lookyloo import Lookyloo, Indexing
 from lookyloo.default import get_config, get_homedir
 
 __global_lookyloo_instance = None
@@ -113,3 +113,20 @@ def get_secret_key() -> bytes:
 def sri_load() -> dict[str, dict[str, str]]:
     with (get_homedir() / 'website' / 'web' / 'sri.txt').open() as f:
         return json.load(f)
+
+
+@cache
+def get_indexing(user: User | None) -> Indexing:
+    '''Depending if we're logged in or not, we (can) get different indexes:
+        if index_everything is enabled, we have an index in kvrocks that contains all
+        the indexes for all the captures.
+        It is only accessible to the admin user.
+    '''
+    if not get_config('generic', 'index_everything'):
+        return Indexing()
+
+    if not user or not user.is_authenticated:
+        # No user or anonymous
+        return Indexing()
+    # Logged in user
+    return Indexing(full_index=True)
diff --git a/website/web/templates/body_hash.html b/website/web/templates/body_hash.html
index 0fe7bb0..2a6a687 100644
--- a/website/web/templates/body_hash.html
+++ b/website/web/templates/body_hash.html
@@ -77,8 +77,8 @@
 <table id="bodyHashDetailsTable" class="table table-striped" style="width:100%">
   <thead>
    <tr>
-     <th>Timestamp</th>
-     <th>Title</th>
+     <th>Capture Time</th>
+     <th>Capture Title</th>
      <th>URL</th>
    </tr>
   </thead>
diff --git a/website/web/templates/hostname.html b/website/web/templates/hostname.html
index 701d829..0b7b419 100644
--- a/website/web/templates/hostname.html
+++ b/website/web/templates/hostname.html
@@ -1,42 +1,7 @@
 {% from "macros.html" import shorten_string %}
 
-{% if from_popup %}
-{% extends "main.html" %}
-
-{% from 'bootstrap5/utils.html' import render_messages %}
-
-{% block title %}{{ url }}{% endblock %}
-
-{% block scripts %}
-{{ super() }}
-<script type="text/javascript">
-    $('#table').DataTable( {
-        "order": [[ 0, "desc" ]],
-        "pageLength": 50,
-        "columnDefs": [{
-            "targets": 0,
-            "render": function ( data, type, row, meta ) {
-                let date = new Date(data);
-                return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString();
-            }
-        }]
-    });
-</script>
-
-<script type="text/javascript">
-    function openTreeInNewTab(treeUUID) {
-        window.opener.openTreeInNewTab(treeUUID);
-    };
-</script>
-{% endblock %}
-{%endif%}
-
 {% block content %}
 
-{% if from_popup %}
-<button onclick="window.history.back();" class="btn btn-primary" type="button">Go Back</button>
-{%endif%}
-
 <center>
     <h4>{{ hostname }}</h4>
 </center>
@@ -50,7 +15,8 @@
                     return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString();
                    }
                  },
-                 { width: '80%', targets: 1 }],
+                 { width: '40%', targets: 1 },
+                 { width: '40%', targets: 2 }],
   });
 </script>
 
diff --git a/website/web/templates/tree.html b/website/web/templates/tree.html
index f57bcf8..56c46e7 100644
--- a/website/web/templates/tree.html
+++ b/website/web/templates/tree.html
@@ -161,6 +161,20 @@
   });
   </script>
   <script>
+  $('#urlsModal').on('show.bs.modal', function(e) {
+      var button = $(e.relatedTarget);
+      var modal = $(this);
+      modal.find('.modal-body').load(button.data("remote"));
+  });
+  </script>
+  <script>
+  $('#urlDetailsModal').on('show.bs.modal', function(e) {
+      var button = $(e.relatedTarget);
+      var modal = $(this);
+      modal.find('.modal-body').load(button.data("remote"));
+  });
+  </script>
+  <script>
   $('#mispPushModal').on('show.bs.modal', function(e) {
       var button = $(e.relatedTarget);
       var modal = $(this);
@@ -343,23 +357,27 @@
 
               <a href="#bodyHashesModal" data-remote="{{ url_for('tree_body_hashes', tree_uuid=tree_uuid) }}"
                   data-bs-toggle="modal" data-bs-target="#bodyHashesModal" role="button"
-                  title="All ressources contained in the tree">Ressources Capture</a>
+                  title="All ressources contained in the tree">Ressources</a>
 
               <a href="#hostnamesModal" data-remote="{{ url_for('tree_hostnames', tree_uuid=tree_uuid) }}"
                   data-bs-toggle="modal" data-bs-target="#hostnamesModal" role="button"
-                  title="All hostnames contained in the tree">Hostnames Capture</a>
+                  title="All hostnames contained in the tree">Hostnames</a>
+
+              <a href="#urlsModal" data-remote="{{ url_for('tree_urls', tree_uuid=tree_uuid) }}"
+                 data-bs-toggle="modal" data-bs-target="#urlsModal" role="button"
+                 title="All URLs contained in the tree">URLs</a>
 
               <a href="#faviconsModal" data-remote="{{ url_for('tree_favicons', tree_uuid=tree_uuid) }}"
                   data-bs-toggle="modal" data-bs-target="#faviconsModal" role="button"
-                  title="Favicons found on the rendered page">Favicons Capture</a>
+                  title="Favicons found on the rendered page">Favicons</a>
 
               <a href="#captureHashesTypesModal" data-remote="{{ url_for('tree_capture_hashes_types', tree_uuid=tree_uuid) }}"
                   data-bs-toggle="modal" data-bs-target="#captureHashesTypesModal" role="button"
-                  title="Compare hashes of the rendered page">Capture hashes types</a>
+                  title="Compare hashes of the rendered page">(Fuzzy)Hashes types</a>
 
               <a href="#identifiersModal" data-remote="{{ url_for('tree_identifiers', tree_uuid=tree_uuid) }}"
                   data-bs-toggle="modal" data-bs-target="#identifiersModal" role="button"
-                  title="Identifiers found on the rendered page">Identifiers Capture</a>
+                  title="Identifiers found on the rendered page">Other Identifiers</a>
             </div>
           </div>
 
@@ -798,6 +816,43 @@
   </div>
 </div>
 
+<div class="modal fade" id="urlsModal" tabindex="-1" role="dialog">
+  <div class="modal-dialog modal-xl" role="document">
+      <div class="modal-content">
+          <div class="modal-header">
+              <h5 class="modal-title" id="urlsModalLabel">URLs in tree</h5>
+              <button type="button" class="btn btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+          </div>
+          <div class="modal-body">
+              ... loading urls ...
+          </div>
+          <div class="modal-footer">
+              <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
+          </div>
+      </div>
+  </div>
+</div>
+
+<div class="modal fade" id="urlDetailsModal" tabindex="-1" role="dialog">
+  <div class="modal-dialog modal-xl" role="document">
+      <div class="modal-content">
+          <div class="modal-header">
+              <h5 class="modal-title" id="hostnameDetailsModalLabel">Other occurrences of the URL</h5>
+              <button type="button" class="btn btn-close" data-bs-dismiss="modal" aria-label="Close"></button>
+          </div>
+          <div class="modal-body">
+              ... loading url details ...
+          </div>
+          <div class="modal-footer">
+              <a class="btn btn-primary" href="#HostnamesModal"
+                 data-remote="{{ url_for('tree_urls', tree_uuid=tree_uuid) }}"
+                 data-bs-toggle="modal" data-bs-target="#urlsModal" role="button">Back to capture's URLs</a>
+              <button type="button" class="btn btn-secondary" data-bs-dismiss="modal">Close</button>
+          </div>
+      </div>
+  </div>
+</div>
+
 <div class="modal fade" id="mispPushModal" tabindex="-1" role="dialog">
   <div class="modal-dialog modal-xl" role="document">
       <div class="modal-content">
diff --git a/website/web/templates/tree_body_hashes.html b/website/web/templates/tree_body_hashes.html
index 6544559..a836c2e 100644
--- a/website/web/templates/tree_body_hashes.html
+++ b/website/web/templates/tree_body_hashes.html
@@ -18,7 +18,7 @@
 <table id="bodyHashesTable" class="table table-striped" style="width:100%">
 <thead>
  <tr>
-   <th>Captures total</th>
+   <th>Number of captures</th>
    <th>File type</th>
    <th>Ressource URL in capture</th>
    <th>Hash (sha512)</th>
diff --git a/website/web/templates/tree_hostnames.html b/website/web/templates/tree_hostnames.html
new file mode 100644
index 0000000..831f529
--- /dev/null
+++ b/website/web/templates/tree_hostnames.html
@@ -0,0 +1,64 @@
+{% from "macros.html" import popup_icons_response %}
+
+<script type="text/javascript">
+  var table = new DataTable('#hostnamesTable', {
+    order: [[ 0, "desc" ]],
+    columnDefs: [{ width: '10%', targets: 0 },
+                 { width: '40%', targets: 1 },
+                 { width: '50%', targets: 2 }],
+    initComplete: function (settings, json) {
+      $('[data-bs-toggle="tooltip"]').tooltip({html: true});
+    }
+  }).on('draw', function() {
+    $('[data-bs-toggle="tooltip"]').tooltip({html: true});
+  });
+</script>
+
+<table id="hostnamesTable" class="table table-striped" style="width:100%">
+<thead>
+ <tr>
+   <th>Number of captures</th>
+   <th>Hostname</th>
+   <th>URLs</th>
+ </tr>
+</thead>
+<tbody>
+  {% for hostname, info in hostnames.items() %}
+  <tr>
+    <td>{{ info['total_captures'] }}</td>
+    <td>
+      <span class="d-inline-block text-break">
+         <a href="#hostnameDetailsModal" data-remote="{{ url_for('hostname_details', hostname=hostname) }}"
+            data-bs-toggle="modal" data-bs-target="#hostnameDetailsModal" role="button">
+            {{hostname}}
+         </a>
+      </span>
+    </td>
+    <td>
+      <p class="d-inline-flex gap-1">
+        <button class="btn btn-primary" type="button"
+                data-bs-toggle="collapse" data-bs-target="#collapseAllNodes_{{loop.index}}"
+                aria-expanded="false" aria-controls="collapseAllNodes_{{loop.index}}">
+          Show
+        </button>
+      </p>
+      <div class="collapse" id="collapseAllNodes_{{loop.index}}">
+          <div class="card card-body">
+            <span class="d-inline-block text-break">
+              <ul>
+              {% for node in info['nodes'] %}
+                <li>
+                  <p class="text-break">{{ node.name }}</p>
+                  <a href="#/" onclick="openTreeInNewTab('{{ tree_uuid }}', '{{ node.uuid }}')">Show on tree</a>
+                </li>
+              {% endfor %}
+              <ul>
+            </span>
+          </div>
+        </div>
+      </div>
+    </td>
+  </tr>
+  {% endfor %}
+</tbody>
+</table>
diff --git a/website/web/templates/tree_urls.html b/website/web/templates/tree_urls.html
new file mode 100644
index 0000000..ac3e5c8
--- /dev/null
+++ b/website/web/templates/tree_urls.html
@@ -0,0 +1,38 @@
+{% from "macros.html" import popup_icons_response %}
+
+<script type="text/javascript">
+  var table = new DataTable('#urlsTable', {
+    order: [[ 0, "desc" ]],
+    columnDefs: [{ width: '10%', targets: 0 },
+                 { width: '90%', targets: 1 }],
+    initComplete: function (settings, json) {
+      $('[data-bs-toggle="tooltip"]').tooltip({html: true});
+    }
+  }).on('draw', function() {
+    $('[data-bs-toggle="tooltip"]').tooltip({html: true});
+  });
+</script>
+
+<table id="urlsTable" class="table table-striped" style="width:100%">
+<thead>
+ <tr>
+   <th>Number of captures</th>
+   <th>URL</th>
+ </tr>
+</thead>
+<tbody>
+  {% for url, info in urls.items() %}
+  <tr>
+    <td>{{ info['total_captures'] }}</td>
+    <td>
+      <span class="d-inline-block text-break">
+         <a href="#urlDetailsModal" data-remote="{{ url_for('url_details', url=info['quoted_url']) }}"
+            data-bs-toggle="modal" data-bs-target="#urlDetailsModal" role="button">
+            {{url}}
+         </a>
+      </span>
+    </td>
+  </tr>
+  {% endfor %}
+</tbody>
+</table>
diff --git a/website/web/templates/url.html b/website/web/templates/url.html
index e60b4ce..393feb2 100644
--- a/website/web/templates/url.html
+++ b/website/web/templates/url.html
@@ -1,70 +1,51 @@
-{% extends "main.html" %}
-
-{% from 'bootstrap5/utils.html' import render_messages %}
-
-{% block title %}{{ url }}{% endblock %}
-
-{% block scripts %}
-{{ super() }}
-<script type="text/javascript">
-    $('#table').DataTable( {
-        "order": [[ 0, "desc" ]],
-        "pageLength": 50,
-        "columnDefs": [{
-            "targets": 0,
-            "render": function ( data, type, row, meta ) {
-                let date = new Date(data);
-                return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.toTimeString();
-            }
-        }]
-    });
-</script>
-
-<script type="text/javascript">
-    function openTreeInNewTab(treeUUID) {
-        window.opener.openTreeInNewTab(treeUUID);
-    };
-</script>
-
-{% endblock %}
+{% from "macros.html" import shorten_string %}
 
 {% block content %}
-  <center>
-      <h4>{{ url }}</h4>
-      <button onclick="window.history.back();" class="btn btn-primary" type="button">Go Back</button>
-  </center>
-  <div class="table-responsive">
-  <table id="table" class="table" style="width:96%">
-    <thead>
-     <tr>
-       <th>Start timestamp</th>
-       <th>Captures</th>
-     </tr>
-    </thead>
-    <tbody>
-      {% for hit in hits %}
-      <tr>
-        <td>
-          {{ hit['start_timestamp'] }}
-        </td>
-        <td><a href="{{ url_for('tree', tree_uuid=hit['capture_uuid']) }}">{{ hit['title'] }}</a>
-            </br>
-            Nodes:
-            <ul>
-              {% for urlnode_uuid, data in hit['urlnodes'].items() %}
-              <li><a href="{{ url_for('tree', tree_uuid=hit['capture_uuid'], node_uuid=data['hostnode_uuid']) }}">{{ data['start_time'] }}</a></li>
-              {% endfor %}
-            </ul>
-        </td>
-      </tr>
-      {% endfor %}
-    </tbody>
-  </table>
-  </div>
-  <p>The same file was seen in these captures:</p>
-  <ul>
-  {% for capture_uuid, title in captures %}
-    <li><a href="#/" onclick="openTreeInNewTab('{{ capture_uuid }}')">{{ title }}</a></li>
-  {% endfor %}
-  </ul>
+
+<center>
+  <h4>{{ url }}</h4>
+</center>
+
+<script type="text/javascript">
+  new DataTable('#urlTable', {
+    order: [[ 0, "desc" ]],
+    columnDefs: [{ width: '20%', targets: 0,
+                   render: (data) => {
+                    const date = new Date(data);
+                    return date.getFullYear() + '-' + (date.getMonth() + 1).toString().padStart(2, "0") + '-' + date.getDate().toString().padStart(2, "0") + ' ' + date.toTimeString();
+                   }
+                 },
+                 { width: '40%', targets: 1 },
+                 { width: '40%', targets: 2 }],
+  });
+</script>
+
+<table id="urlTable" class="table table-striped" style="width:100%">
+  <thead>
+   <tr>
+     <th>Capture Time</th>
+     <th>Capture Title</th>
+     <th>Landing page</th>
+   </tr>
+  </thead>
+  <tbody>
+    {% for capture_uuid, title, landing_page, capture_time in captures %}
+    <tr>
+      <td>
+        {{capture_time}}
+      </td>
+      <td>
+        <a href="{{ url_for('tree', tree_uuid=capture_uuid) }}">
+          {{ title }}
+        </a>
+      </td>
+      <td>
+        <span class="d-inline-block text-break" style="max-width: 400px;">
+          {{ landing_page }}
+        </span>
+      </td>
+    </tr>
+    {% endfor %}
+  </tbody>
+</table>
 {% endblock %}