diff --git a/bin/background_indexer.py b/bin/background_indexer.py
index ae525a2..2d468b1 100755
--- a/bin/background_indexer.py
+++ b/bin/background_indexer.py
@@ -5,15 +5,10 @@ from __future__ import annotations
 import logging
 import logging.config
 import os
-import shutil
 
-from datetime import datetime, timedelta
-from pathlib import Path
-
-from lookyloo import Lookyloo
+from lookyloo import Lookyloo, Indexing
 from lookyloo.default import AbstractManager, get_config
-from lookyloo.exceptions import MissingUUID, NoValidHarFile
-from lookyloo.helpers import is_locked, get_sorted_captures_from_disk, make_dirs_list
+from lookyloo.exceptions import NoValidHarFile
 
 
 logging.config.dictConfig(get_config('logging'))
@@ -21,125 +16,39 @@ logging.config.dictConfig(get_config('logging'))
 
 class BackgroundIndexer(AbstractManager):
 
-    def __init__(self, loglevel: int | None=None):
+    def __init__(self, full: bool=False, loglevel: int | None=None):
         super().__init__(loglevel)
         self.lookyloo = Lookyloo()
-        self.script_name = 'background_indexer'
+        self.full_indexer = full
+        self.indexing = Indexing(full_index=self.full_indexer)
+        if self.full_indexer:
+            self.script_name = 'background_full_indexer'
+        else:
+            self.script_name = 'background_indexer'
         # make sure discarded captures dir exists
         self.discarded_captures_dir = self.lookyloo.capture_dir.parent / 'discarded_captures'
         self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
 
     def _to_run_forever(self) -> None:
-        all_done = self._build_missing_pickles()
-        if all_done:
-            self._check_indexes()
-            # Disable probabilistic indexing for now, mmh3 isn't a fuzzy hash ago.
-            # self._check_probabilistic_indexes()
+        self._check_indexes()
         self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
 
-    def _build_missing_pickles(self) -> bool:
-        self.logger.debug('Build missing pickles...')
-        # Sometimes, we have a huge backlog and the process might get stuck on old captures for a very long time
-        # This value makes sure we break out of the loop and build pickles of the most recent captures
-        max_captures = 50
-        got_new_captures = False
-
-        # Initialize time where we do not want to build the pickles anymore.
-        archive_interval = timedelta(days=get_config('generic', 'archive'))
-        cut_time = (datetime.now() - archive_interval)
-        for month_dir in make_dirs_list(self.lookyloo.capture_dir):
-            __counter_shutdown = 0
-            for capture_time, path in sorted(get_sorted_captures_from_disk(month_dir, cut_time=cut_time, keep_more_recent=True), reverse=True):
-                __counter_shutdown += 1
-                if __counter_shutdown % 10 and self.shutdown_requested():
-                    self.logger.warning('Shutdown requested, breaking.')
-                    return False
-                if ((path / 'tree.pickle.gz').exists() or (path / 'tree.pickle').exists()):
-                    # We already have a pickle file
-                    self.logger.debug(f'{path} has a pickle.')
-                    continue
-                if not list(path.rglob('*.har.gz')) and not list(path.rglob('*.har')):
-                    # No HAR file
-                    self.logger.debug(f'{path} has no HAR file.')
-                    continue
-
-                if is_locked(path):
-                    # it is really locked
-                    self.logger.debug(f'{path} is locked, pickle generated by another process.')
-                    continue
-
-                with (path / 'uuid').open() as f:
-                    uuid = f.read()
-
-                if not self.lookyloo.redis.hexists('lookup_dirs', uuid):
-                    # The capture with this UUID exists, but it is for some reason missing in lookup_dirs
-                    self.lookyloo.redis.hset('lookup_dirs', uuid, str(path))
-                else:
-                    cached_path = Path(self.lookyloo.redis.hget('lookup_dirs', uuid))  # type: ignore[arg-type]
-                    if cached_path != path:
-                        # we have a duplicate UUID, it is proably related to some bad copy/paste
-                        if cached_path.exists():
-                            # Both paths exist, move the one that isn't in lookup_dirs
-                            self.logger.critical(f'Duplicate UUID for {uuid} in {cached_path} and {path}, discarding the latest')
-                            try:
-                                shutil.move(str(path), str(self.discarded_captures_dir / path.name))
-                            except FileNotFoundError as e:
-                                self.logger.warning(f'Unable to move capture: {e}')
-                            continue
-                        else:
-                            # The path in lookup_dirs for that UUID doesn't exists, just update it.
-                            self.lookyloo.redis.hset('lookup_dirs', uuid, str(path))
-
-                try:
-                    self.logger.info(f'Build pickle for {uuid}: {path.name}')
-                    self.lookyloo.get_crawled_tree(uuid)
-                    self.lookyloo.trigger_modules(uuid, auto_trigger=True)
-                    self.logger.info(f'Pickle for {uuid} built.')
-                    got_new_captures = True
-                    max_captures -= 1
-                except MissingUUID:
-                    self.logger.warning(f'Unable to find {uuid}. That should not happen.')
-                except NoValidHarFile as e:
-                    self.logger.critical(f'There are no HAR files in the capture {uuid}: {path.name} - {e}')
-                except FileNotFoundError:
-                    self.logger.warning(f'Capture {uuid} disappeared during processing, probably archived.')
-                except Exception:
-                    self.logger.exception(f'Unable to build pickle for {uuid}: {path.name}')
-                    # The capture is not working, moving it away.
-                    try:
-                        shutil.move(str(path), str(self.discarded_captures_dir / path.name))
-                        self.lookyloo.redis.hdel('lookup_dirs', uuid)
-                    except FileNotFoundError as e:
-                        self.logger.warning(f'Unable to move capture: {e}')
-                        continue
-                if max_captures <= 0:
-                    self.logger.info('Too many captures in the backlog, start from the beginning.')
-                    return False
-        if got_new_captures:
-            self.logger.info('Finished building all missing pickles.')
-            # Only return True if we built new pickles.
-            return True
-        return False
-
     def _check_indexes(self) -> None:
-        index_redis = self.lookyloo.indexing.redis
-        can_index = index_redis.set('ongoing_indexing', 1, ex=3600, nx=True)
-        if not can_index:
+        if not self.indexing.can_index:
             # There is no reason to run this method in multiple scripts.
             self.logger.info('Indexing already ongoing in another process.')
             return None
-        self.logger.info('Check indexes...')
+        self.logger.info(f'Check {self.script_name}...')
         for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
-            if self.lookyloo.is_public_instance and cache.no_index:
-                # Capture unindexed
+            if not self.full_indexer:
+                # If we're not running the full indexer, check if the capture should be indexed.
+                if self.lookyloo.is_public_instance and cache.no_index:
+                    # Capture unindexed
+                    continue
+            if not cache.tree_ready:
+                # pickle isn't ready, we can't index.
                 continue
-            p = index_redis.pipeline()
-            p.sismember('indexed_urls', cache.uuid)
-            p.sismember('indexed_body_hashes', cache.uuid)
-            p.sismember('indexed_cookies', cache.uuid)
-            p.sismember('indexed_hhhashes', cache.uuid)
-            p.sismember('indexed_favicons', cache.uuid)
-            indexed = p.execute()
+            indexed = self.indexing.capture_indexed(cache.uuid)
             if all(indexed):
                 continue
             try:
@@ -151,50 +60,23 @@ class BackgroundIndexer(AbstractManager):
 
             if not indexed[0]:
                 self.logger.info(f'Indexing urls for {cache.uuid}')
-                self.lookyloo.indexing.index_url_capture(ct)
+                self.indexing.index_url_capture(ct)
             if not indexed[1]:
                 self.logger.info(f'Indexing resources for {cache.uuid}')
-                self.lookyloo.indexing.index_body_hashes_capture(ct)
+                self.indexing.index_body_hashes_capture(ct)
             if not indexed[2]:
                 self.logger.info(f'Indexing cookies for {cache.uuid}')
-                self.lookyloo.indexing.index_cookies_capture(ct)
+                self.indexing.index_cookies_capture(ct)
             if not indexed[3]:
                 self.logger.info(f'Indexing HH Hashes for {cache.uuid}')
-                self.lookyloo.indexing.index_http_headers_hashes_capture(ct)
+                self.indexing.index_http_headers_hashes_capture(ct)
             if not indexed[4]:
                 self.logger.info(f'Indexing favicons for {cache.uuid}')
                 favicons = self.lookyloo.get_potential_favicons(cache.uuid, all_favicons=True, for_datauri=False)
-                self.lookyloo.indexing.index_favicons_capture(cache.uuid, favicons)
+                self.indexing.index_favicons_capture(cache.uuid, favicons)
             # NOTE: categories aren't taken in account here, should be fixed(?)
             # see indexing.index_categories_capture(capture_uuid, categories)
-        index_redis.delete('ongoing_indexing')
-        self.logger.info('... done.')
-
-    def _check_probabilistic_indexes(self) -> None:
-        index_redis = self.lookyloo.indexing.redis
-        can_index = index_redis.set('ongoing_probalistic_indexing', 1, ex=3600, nx=True)
-        if not can_index:
-            # There is no reason to run this method in multiple scripts.
-            self.logger.info('Probalistic indexing already ongoing in another process.')
-            return None
-        self.logger.info('Check probabilistic indexes...')
-        algorithms = ['mmh3-shodan']
-        for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
-            if self.lookyloo.is_public_instance and cache.no_index:
-                # Capture unindexed
-                continue
-            p = index_redis.pipeline()
-            for algorithm in algorithms:
-                p.sismember(f'indexed_favicons_probabilistic|{algorithm}', cache.uuid)
-            indexed = p.execute()
-            if all(indexed):
-                continue
-            for i, algorithm in enumerate(algorithms):
-                if not indexed[i]:
-                    self.logger.info(f'Probabilistic indexing favicons for {cache.uuid} with {algorithm}')
-                    favicons = self.lookyloo.get_potential_favicons(cache.uuid, all_favicons=True, for_datauri=False)
-                    self.lookyloo.indexing.index_favicons_probabilistic(cache.uuid, favicons, algorithm)
-        index_redis.delete('ongoing_probalistic_indexing')
+        self.indexing.indexing_done()
         self.logger.info('... done.')
 
 
@@ -203,5 +85,12 @@ def main() -> None:
     i.run(sleep_in_sec=60)
 
 
+def main_full_indexer() -> None:
+    if not get_config('generic', 'index_everything'):
+        raise Exception('Full indexer is disabled.')
+    i = BackgroundIndexer(full=True)
+    i.run(sleep_in_sec=60)
+
+
 if __name__ == '__main__':
     main()
diff --git a/bin/run_backend.py b/bin/run_backend.py
index 014350f..4349b12 100755
--- a/bin/run_backend.py
+++ b/bin/run_backend.py
@@ -11,7 +11,7 @@ from subprocess import Popen
 from redis import Redis
 from redis.exceptions import ConnectionError
 
-from lookyloo.default import get_homedir, get_socket_path
+from lookyloo.default import get_homedir, get_socket_path, get_config
 
 
 def check_running(name: str) -> bool:
@@ -55,13 +55,32 @@ def shutdown_indexing(storage_directory: Path | None=None) -> None:
     print('Redis indexing database shutdown.')
 
 
+def launch_full_index(storage_directory: Path | None=None) -> None:
+    if not storage_directory:
+        storage_directory = get_homedir()
+    if not check_running('full_index'):
+        Popen(["./run_kvrocks.sh"], cwd=(storage_directory / 'full_index'))
+
+
+def shutdown_full_index(storage_directory: Path | None=None) -> None:
+    if not storage_directory:
+        storage_directory = get_homedir()
+    r = Redis(unix_socket_path=get_socket_path('full_index'))
+    r.shutdown(save=True)
+    print('Kvrocks full indexing database shutdown.')
+
+
 def launch_all() -> None:
     launch_cache()
     launch_indexing()
+    if get_config('generic', 'index_everything'):
+        launch_full_index()
 
 
 def check_all(stop: bool=False) -> None:
     backends: dict[str, bool] = {'cache': False, 'indexing': False}
+    if get_config('generic', 'index_everything'):
+        backends['full_index'] = False
     while True:
         for db_name in backends.keys():
             try:
@@ -85,6 +104,8 @@ def check_all(stop: bool=False) -> None:
 def stop_all() -> None:
     shutdown_cache()
     shutdown_indexing()
+    if get_config('generic', 'index_everything'):
+        shutdown_full_index()
 
 
 def main() -> None:
diff --git a/bin/start.py b/bin/start.py
index 30fadd1..2ec0983 100755
--- a/bin/start.py
+++ b/bin/start.py
@@ -2,7 +2,7 @@
 
 from subprocess import Popen, run
 
-from lookyloo.default import get_homedir
+from lookyloo.default import get_homedir, get_config
 
 
 def main() -> None:
@@ -18,9 +18,16 @@ def main() -> None:
     print('Start asynchronous ingestor...')
     Popen(['async_capture'])
     print('done.')
+    print('Start background capture builder...')
+    Popen(['background_build_captures'])
+    print('done.')
     print('Start background indexer...')
     Popen(['background_indexer'])
     print('done.')
+    if get_config('generic', 'index_everything'):
+        print('Start background full indexer...')
+        Popen(['background_full_indexer'])
+        print('done.')
     print('Start background processing...')
     Popen(['processing'])
     print('done.')
diff --git a/config/generic.json.sample b/config/generic.json.sample
index 9e33fb7..a2db28d 100644
--- a/config/generic.json.sample
+++ b/config/generic.json.sample
@@ -79,6 +79,7 @@
       "bucket_name": ""
     }
   },
+  "index_everything": false,
   "_notes": {
     "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
     "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
@@ -110,6 +111,7 @@
     "archive": "The captures older than this value (in days) will be archived. They're not cached by default in the Lookyloo class.",
     "max_capture_time": "The very maximal time we allow a capture to keep going. Should only be triggered by captures that cause playwright to never quit.",
     "max_tree_create_time": "The max time the generation of a tree is allowed to take",
-    "s3fs": "The config to access a S3FS instance with the s3fs python module - it is not integrated properly for now as it requires urllib < 2.0 which is a non-started at this stage."
+    "s3fs": "The config to access a S3FS instance with the s3fs python module - it is not integrated properly for now as it requires urllib < 2.0 which is a non-started at this stage.",
+    "index_everything": "If true, index every capture, even if it's not public. This feature requires a dedicated kvrocks instance, and is only accessible when logged-in as admin."
   }
 }
diff --git a/full_index/kvrocks.conf b/full_index/kvrocks.conf
new file mode 100644
index 0000000..43c4428
--- /dev/null
+++ b/full_index/kvrocks.conf
@@ -0,0 +1,875 @@
+################################ GENERAL #####################################
+
+# By default kvrocks listens for connections from localhost interface.
+# It is possible to listen to just one or multiple interfaces using
+# the "bind" configuration directive, followed by one or more IP addresses.
+#
+# Examples:
+#
+# bind 192.168.1.100 10.0.0.1
+# bind 127.0.0.1 ::1
+# bind 0.0.0.0
+# bind 127.0.0.1
+
+# Unix socket.
+#
+# Specify the path for the unix socket that will be used to listen for
+# incoming connections. There is no default, so kvrocks will not listen
+# on a unix socket when not specified.
+#
+unixsocket full_index.sock
+unixsocketperm 777
+
+# Accept connections on the specified port, default is 6666.
+# port 6666
+
+# Close the connection after a client is idle for N seconds (0 to disable)
+timeout 0
+
+# The number of worker's threads, increase or decrease would affect the performance.
+workers 8
+
+# By default, kvrocks does not run as a daemon. Use 'yes' if you need it.
+# Note that kvrocks will write a PID file in /var/run/kvrocks.pid when daemonized
+daemonize yes
+
+# Kvrocks implements the cluster solution that is similar to the Redis cluster solution.
+# You can get cluster information by CLUSTER NODES|SLOTS|INFO command, it also is
+# adapted to redis-cli, redis-benchmark, Redis cluster SDK, and Redis cluster proxy.
+# But kvrocks doesn't support communicating with each other, so you must set
+# cluster topology by CLUSTER SETNODES|SETNODEID commands, more details: #219.
+#
+# PLEASE NOTE:
+# If you enable cluster, kvrocks will encode key with its slot id calculated by
+# CRC16 and modulo 16384, encoding key with its slot id makes it efficient to
+# migrate keys based on the slot. So if you enabled at first time, cluster mode must
+# not be disabled after restarting, and vice versa. That is to say, data is not
+# compatible between standalone mode with cluster mode, you must migrate data
+# if you want to change mode, otherwise, kvrocks will make data corrupt.
+#
+# Default: no
+
+cluster-enabled no
+
+# By default, namespaces are stored in the configuration file and won't be replicated
+# to replicas. This option allows to change this behavior, so that namespaces are also
+# propagated to slaves. Note that:
+# 1) it won't replicate the 'masterauth' to prevent breaking master/replica replication
+# 2) it will overwrite replica's namespace with master's namespace, so be careful of in-using namespaces
+# 3) cannot switch off the namespace replication once it's enabled
+#
+# Default: no
+repl-namespace-enabled no
+
+# Persist the cluster nodes topology in local file($dir/nodes.conf). This configuration
+# takes effect only if the cluster mode was enabled.
+#
+# If yes, it will try to load the cluster topology from the local file when starting,
+# and dump the cluster nodes into the file if it was changed.
+#
+# Default: yes
+persist-cluster-nodes-enabled yes
+
+# Set the max number of connected clients at the same time. By default
+# this limit is set to 10000 clients. However, if the server is not
+# able to configure the process file limit to allow for the specified limit
+# the max number of allowed clients is set to the current file limit
+#
+# Once the limit is reached the server will close all the new connections sending
+# an error 'max number of clients reached'.
+#
+maxclients 10000
+
+# Require clients to issue AUTH <PASSWORD> before processing any other
+# commands.  This might be useful in environments in which you do not trust
+# others with access to the host running kvrocks.
+#
+# This should stay commented out for backward compatibility and because most
+# people do not need auth (e.g. they run their own servers).
+#
+# Warning: since kvrocks is pretty fast an outside user can try up to
+# 150k passwords per second against a good box. This means that you should
+# use a very strong password otherwise it will be very easy to break.
+#
+# requirepass foobared
+
+# If the master is password protected (using the "masterauth" configuration
+# directive below) it is possible to tell the slave to authenticate before
+# starting the replication synchronization process. Otherwise, the master will
+# refuse the slave request.
+#
+# masterauth foobared
+
+# Master-Salve replication would check db name is matched. if not, the slave should
+# refuse to sync the db from master. Don't use the default value, set the db-name to identify
+# the cluster.
+db-name change.me.db
+
+# The working directory
+#
+# The DB will be written inside this directory
+# Note that you must specify a directory here, not a file name.
+dir ./
+
+# You can configure where to store your server logs by the log-dir.
+# If you don't specify one, we will use the above `dir` as our default log directory.
+# We also can send logs to stdout/stderr is as simple as:
+#
+log-dir stdout
+
+# Log level
+# Possible values: info, warning, error, fatal
+# Default: info
+log-level info
+
+# You can configure log-retention-days to control whether to enable the log cleaner
+# and the maximum retention days that the INFO level logs will be kept.
+#
+# if set to -1, that means to disable the log cleaner.
+# if set to 0, all previous INFO level logs will be immediately removed.
+# if set to between 0 to INT_MAX, that means it will retent latest N(log-retention-days) day logs.
+
+# By default the log-retention-days is -1.
+log-retention-days -1
+
+# When running in daemonize mode, kvrocks writes a PID file in ${CONFIG_DIR}/kvrocks.pid by
+# default. You can specify a custom pid file location here.
+pidfile kvrocks.pid
+
+# You can configure a slave instance to accept writes or not. Writing against
+# a slave instance may be useful to store some ephemeral data (because data
+# written on a slave will be easily deleted after resync with the master) but
+# may also cause problems if clients are writing to it because of a
+# misconfiguration.
+slave-read-only yes
+
+# The slave priority is an integer number published by Kvrocks in the INFO output.
+# It is used by Redis Sentinel in order to select a slave to promote into a
+# master if the master is no longer working correctly.
+#
+# A slave with a low priority number is considered better for promotion, so
+# for instance if there are three slave with priority 10, 100, 25 Sentinel will
+# pick the one with priority 10, that is the lowest.
+#
+# However a special priority of 0 marks the replica as not able to perform the
+# role of master, so a slave with priority of 0 will never be selected by
+# Redis Sentinel for promotion.
+#
+# By default the priority is 100.
+slave-priority 100
+
+# TCP listen() backlog.
+#
+# In high requests-per-second environments you need an high backlog in order
+# to avoid slow clients connections issues. Note that the Linux kernel
+# will silently truncate it to the value of /proc/sys/net/core/somaxconn so
+# make sure to raise both the value of somaxconn and tcp_max_syn_backlog
+# in order to Get the desired effect.
+tcp-backlog 511
+
+# If the master is an old version, it may have specified replication threads
+# that use 'port + 1' as listening port, but in new versions, we don't use
+# extra port to implement replication. In order to allow the new replicas to
+# copy old masters, you should indicate that the master uses replication port
+# or not.
+# If yes, that indicates master uses replication port and replicas will connect
+# to 'master's listening port + 1' when synchronization.
+# If no, that indicates master doesn't use replication port and replicas will
+# connect 'master's listening port' when synchronization.
+master-use-repl-port no
+
+# Currently, master only checks sequence number when replica asks for PSYNC,
+# that is not enough since they may have different replication histories even
+# the replica asking sequence is in the range of the master current WAL.
+#
+# We design 'Replication Sequence ID' PSYNC, we add unique replication id for
+# every write batch (the operation of each command on the storage engine), so
+# the combination of replication id and sequence is unique for write batch.
+# The master can identify whether the replica has the same replication history
+# by checking replication id and sequence.
+#
+# By default, it is not enabled since this stricter check may easily lead to
+# full synchronization.
+use-rsid-psync no
+
+# Master-Slave replication. Use slaveof to make a kvrocks instance a copy of
+# another kvrocks server. A few things to understand ASAP about kvrocks replication.
+#
+# 1) Kvrocks replication is asynchronous, but you can configure a master to
+#    stop accepting writes if it appears to be not connected with at least
+#    a given number of slaves.
+# 2) Kvrocks slaves are able to perform a partial resynchronization with the
+#    master if the replication link is lost for a relatively small amount of
+#    time. You may want to configure the replication backlog size (see the next
+#    sections of this file) with a sensible value depending on your needs.
+# 3) Replication is automatic and does not need user intervention. After a
+#    network partition slaves automatically try to reconnect to masters
+#    and resynchronize with them.
+#
+# slaveof <masterip> <masterport>
+# slaveof 127.0.0.1 6379
+
+# When a slave loses its connection with the master, or when the replication
+# is still in progress, the slave can act in two different ways:
+#
+# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will
+#    still reply to client requests, possibly with out-of-date data, or the
+#    data set may just be empty if this is the first synchronization.
+#
+# 2) if slave-serve-stale-data is set to 'no' the slave will reply with
+#    an error "SYNC with master in progress" to all kinds of commands
+#    but to INFO and SLAVEOF.
+#
+slave-serve-stale-data yes
+
+# To guarantee slave's data safe and serve when it is in full synchronization
+# state, slave still keep itself data. But this way needs to occupy much disk
+# space, so we provide a way to reduce disk occupation, slave will delete itself
+# entire database before fetching files from master during full synchronization.
+# If you want to enable this way, you can set 'slave-delete-db-before-fullsync'
+# to yes, but you must know that database will be lost if master is down during
+# full synchronization, unless you have a backup of database.
+#
+# This option is similar redis replicas RDB diskless load option:
+#       repl-diskless-load on-empty-db
+#
+# Default: no
+slave-empty-db-before-fullsync no
+
+# A Kvrocks master is able to list the address and port of the attached
+# replicas in different ways. For example the "INFO replication" section
+# offers this information, which is used, among other tools, by
+# Redis Sentinel in order to discover replica instances.
+# Another place where this info is available is in the output of the
+# "ROLE" command of a master.
+#
+# The listed IP address and port normally reported by a replica is
+# obtained in the following way:
+#
+#   IP: The address is auto detected by checking the peer address
+#   of the socket used by the replica to connect with the master.
+#
+#   Port: The port is communicated by the replica during the replication
+#   handshake, and is normally the port that the replica is using to
+#   listen for connections.
+#
+# However when port forwarding or Network Address Translation (NAT) is
+# used, the replica may actually be reachable via different IP and port
+# pairs. The following two options can be used by a replica in order to
+# report to its master a specific set of IP and port, so that both INFO
+# and ROLE will report those values.
+#
+# There is no need to use both the options if you need to override just
+# the port or the IP address.
+#
+# replica-announce-ip 5.5.5.5
+# replica-announce-port 1234
+
+# If replicas need full synchronization with master, master need to create
+# checkpoint for feeding replicas, and replicas also stage a checkpoint of
+# the master. If we also keep the backup, it maybe occupy extra disk space.
+# You can enable 'purge-backup-on-fullsync' if disk is not sufficient, but
+# that may cause remote backup copy failing.
+#
+# Default: no
+purge-backup-on-fullsync no
+
+# The maximum allowed rate (in MB/s) that should be used by replication.
+# If the rate exceeds max-replication-mb, replication will slow down.
+# Default: 0 (i.e. no limit)
+max-replication-mb 0
+
+# The maximum allowed aggregated write rate of flush and compaction (in MB/s).
+# If the rate exceeds max-io-mb, io will slow down.
+# 0 is no limit
+# Default: 0
+max-io-mb 0
+
+# The maximum allowed space (in GB) that should be used by RocksDB.
+# If the total size of the SST files exceeds max_allowed_space, writes to RocksDB will fail.
+# Please see: https://github.com/facebook/rocksdb/wiki/Managing-Disk-Space-Utilization
+# Default: 0 (i.e. no limit)
+max-db-size 0
+
+# The maximum backup to keep, server cron would run every minutes to check the num of current
+# backup, and purge the old backup if exceed the max backup num to keep. If max-backup-to-keep
+# is 0, no backup would be kept. But now, we only support 0 or 1.
+max-backup-to-keep 1
+
+# The maximum hours to keep the backup. If max-backup-keep-hours is 0, wouldn't purge any backup.
+# default: 1 day
+max-backup-keep-hours 24
+
+# max-bitmap-to-string-mb use to limit the max size of bitmap to string transformation(MB).
+#
+# Default: 16
+max-bitmap-to-string-mb 16
+
+# Whether to enable SCAN-like cursor compatible with Redis.
+# If enabled, the cursor will be unsigned 64-bit integers.
+# If disabled, the cursor will be a string.
+# Default: no
+redis-cursor-compatible no
+
+# Maximum nesting depth allowed when parsing and serializing
+# JSON documents while using JSON commands like JSON.SET.
+# Default: 1024
+json-max-nesting-depth 1024
+
+# The underlying storage format of JSON data type
+# NOTE: This option only affects newly written/updated key-values
+# The CBOR format may reduce the storage size and speed up JSON commands
+# Available values: json, cbor
+# Default: json
+json-storage-format json
+
+################################## TLS ###################################
+
+# By default, TLS/SSL is disabled, i.e. `tls-port` is set to 0.
+# To enable it, `tls-port` can be used to define TLS-listening ports.
+# tls-port 0
+
+# Configure a X.509 certificate and private key to use for authenticating the
+# server to connected clients, masters or cluster peers.
+# These files should be PEM formatted.
+#
+# tls-cert-file kvrocks.crt
+# tls-key-file kvrocks.key
+
+# If the key file is encrypted using a passphrase, it can be included here
+# as well.
+#
+# tls-key-file-pass secret
+
+# Configure a CA certificate(s) bundle or directory to authenticate TLS/SSL
+# clients and peers.  Kvrocks requires an explicit configuration of at least one
+# of these, and will not implicitly use the system wide configuration.
+#
+# tls-ca-cert-file ca.crt
+# tls-ca-cert-dir /etc/ssl/certs
+
+# By default, clients on a TLS port are required
+# to authenticate using valid client side certificates.
+#
+# If "no" is specified, client certificates are not required and not accepted.
+# If "optional" is specified, client certificates are accepted and must be
+# valid if provided, but are not required.
+#
+# tls-auth-clients no
+# tls-auth-clients optional
+
+# By default, only TLSv1.2 and TLSv1.3 are enabled and it is highly recommended
+# that older formally deprecated versions are kept disabled to reduce the attack surface.
+# You can explicitly specify TLS versions to support.
+# Allowed values are case insensitive and include "TLSv1", "TLSv1.1", "TLSv1.2",
+# "TLSv1.3" (OpenSSL >= 1.1.1) or any combination.
+# To enable only TLSv1.2 and TLSv1.3, use:
+#
+# tls-protocols "TLSv1.2 TLSv1.3"
+
+# Configure allowed ciphers.  See the ciphers(1ssl) manpage for more information
+# about the syntax of this string.
+#
+# Note: this configuration applies only to <= TLSv1.2.
+#
+# tls-ciphers DEFAULT:!MEDIUM
+
+# Configure allowed TLSv1.3 ciphersuites.  See the ciphers(1ssl) manpage for more
+# information about the syntax of this string, and specifically for TLSv1.3
+# ciphersuites.
+#
+# tls-ciphersuites TLS_CHACHA20_POLY1305_SHA256
+
+# When choosing a cipher, use the server's preference instead of the client
+# preference. By default, the server follows the client's preference.
+#
+# tls-prefer-server-ciphers yes
+
+# By default, TLS session caching is enabled to allow faster and less expensive
+# reconnections by clients that support it. Use the following directive to disable
+# caching.
+#
+# tls-session-caching no
+
+# Change the default number of TLS sessions cached. A zero value sets the cache
+# to unlimited size. The default size is 20480.
+#
+# tls-session-cache-size 5000
+
+# Change the default timeout of cached TLS sessions. The default timeout is 300
+# seconds.
+#
+# tls-session-cache-timeout 60
+
+# By default, a replica does not attempt to establish a TLS connection
+# with its master.
+#
+# Use the following directive to enable TLS on replication links.
+#
+# tls-replication yes
+
+################################## SLOW LOG ###################################
+
+# The Kvrocks Slow Log is a mechanism to log queries that exceeded a specified
+# execution time. The execution time does not include the I/O operations
+# like talking with the client, sending the reply and so forth,
+# but just the time needed to actually execute the command (this is the only
+# stage of command execution where the thread is blocked and can not serve
+# other requests in the meantime).
+#
+# You can configure the slow log with two parameters: one tells Kvrocks
+# what is the execution time, in microseconds, to exceed in order for the
+# command to get logged, and the other parameter is the length of the
+# slow log. When a new command is logged the oldest one is removed from the
+# queue of logged commands.
+
+# The following time is expressed in microseconds, so 1000000 is equivalent
+# to one second. Note that -1 value disables the slow log, while
+# a value of zero forces the logging of every command.
+slowlog-log-slower-than 100000
+
+# There is no limit to this length. Just be aware that it will consume memory.
+# You can reclaim memory used by the slow log with SLOWLOG RESET.
+slowlog-max-len 128
+
+# If you run kvrocks from upstart or systemd, kvrocks can interact with your
+# supervision tree. Options:
+#   supervised no      - no supervision interaction
+#   supervised upstart - signal upstart by putting kvrocks into SIGSTOP mode
+#   supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET
+#   supervised auto    - detect upstart or systemd method based on
+#                        UPSTART_JOB or NOTIFY_SOCKET environment variables
+# Note: these supervision methods only signal "process is ready."
+#       They do not enable continuous liveness pings back to your supervisor.
+supervised no
+
+################################## PERF LOG ###################################
+
+# The Kvrocks Perf Log is a mechanism to log queries' performance context that
+# exceeded a specified execution time. This mechanism uses rocksdb's
+# Perf Context and IO Stats Context, Please see:
+# https://github.com/facebook/rocksdb/wiki/Perf-Context-and-IO-Stats-Context
+#
+# This mechanism is enabled when profiling-sample-commands is not empty and
+# profiling-sample-ratio greater than 0.
+# It is important to note that this mechanism affects performance, but it is
+# useful for troubleshooting performance bottlenecks, so it should only be
+# enabled when performance problems occur.
+
+# The name of the commands you want to record. Must be original name of
+# commands supported by Kvrocks. Use ',' to separate multiple commands and
+# use '*' to record all commands supported by Kvrocks.
+# Example:
+#   - Single command: profiling-sample-commands get
+#   - Multiple commands: profiling-sample-commands get,mget,hget
+#
+# Default: empty
+# profiling-sample-commands ""
+
+# Ratio of the samples would be recorded. It is a number between 0 and 100.
+# We simply use the rand to determine whether to record the sample or not.
+#
+# Default: 0
+profiling-sample-ratio 0
+
+# There is no limit to this length. Just be aware that it will consume memory.
+# You can reclaim memory used by the perf log with PERFLOG RESET.
+#
+# Default: 256
+profiling-sample-record-max-len 256
+
+# profiling-sample-record-threshold-ms use to tell the kvrocks when to record.
+#
+# Default: 100 millisecond
+profiling-sample-record-threshold-ms 100
+
+################################## CRON ###################################
+
+# Compact Scheduler, auto compact at schedule time
+# time expression format is the same as crontab(currently only support * and int)
+# e.g. compact-cron 0 3 * * * 0 4 * * *
+# would compact the db at 3am and 4am everyday
+# compact-cron 0 3 * * *
+
+# The hour range that compaction checker would be active
+# e.g. compaction-checker-range 0-7 means compaction checker would be worker between
+# 0-7am every day.
+compaction-checker-range 0-7
+
+# When the compaction checker is triggered, the db will periodically pick the SST file
+# with the highest "deleted percentage" (i.e. the percentage of deleted keys in the SST
+# file) to compact, in order to free disk space.
+# However, if a specific SST file was created more than "force-compact-file-age" seconds
+# ago, and its percentage of deleted keys is higher than
+# "force-compact-file-min-deleted-percentage", it will be forcely compacted as well.
+
+# Default: 172800 seconds; Range: [60, INT64_MAX];
+# force-compact-file-age 172800
+# Default: 10 %; Range: [1, 100];
+# force-compact-file-min-deleted-percentage 10
+
+# Bgsave scheduler, auto bgsave at scheduled time
+# time expression format is the same as crontab(currently only support * and int)
+# e.g. bgsave-cron 0 3 * * * 0 4 * * *
+# would bgsave the db at 3am and 4am every day
+
+# Command renaming.
+#
+# It is possible to change the name of dangerous commands in a shared
+# environment. For instance, the KEYS command may be renamed into something
+# hard to guess so that it will still be available for internal-use tools
+# but not available for general clients.
+#
+# Example:
+#
+# rename-command KEYS b840fc02d524045429941cc15f59e41cb7be6c52
+#
+# It is also possible to completely kill a command by renaming it into
+# an empty string:
+#
+# rename-command KEYS ""
+
+################################ MIGRATE #####################################
+# If the network bandwidth is completely consumed by the migration task,
+# it will affect the availability of kvrocks. To avoid this situation,
+# migrate-speed is adopted to limit the migrating speed.
+# Migrating speed is limited by controlling the duration between sending data,
+# the duration is calculated by: 1000000 * migrate-pipeline-size / migrate-speed (us).
+# Value: [0,INT_MAX], 0 means no limit
+#
+# Default: 4096
+migrate-speed 4096
+
+# In order to reduce data transmission times and improve the efficiency of data migration,
+# pipeline is adopted to send multiple data at once. Pipeline size can be set by this option.
+# Value: [1, INT_MAX], it can't be 0
+#
+# Default: 16
+migrate-pipeline-size 16
+
+# In order to reduce the write forbidden time during migrating slot, we will migrate the incremental
+# data several times to reduce the amount of incremental data. Until the quantity of incremental
+# data is reduced to a certain threshold, slot will be forbidden write. The threshold is set by
+# this option.
+# Value: [1, INT_MAX], it can't be 0
+#
+# Default: 10000
+migrate-sequence-gap 10000
+
+################################ ROCKSDB #####################################
+
+# Specify the capacity of column family block cache. A larger block cache
+# may make requests faster while more keys would be cached. Max Size is 400*1024.
+# Default: 4096MB
+rocksdb.block_cache_size 4096
+
+# A global cache for table-level rows in RocksDB. If almost always point
+# lookups, enlarging row cache may improve read performance. Otherwise,
+# if we enlarge this value, we can lessen metadata/subkey block cache size.
+#
+# Default: 0 (disabled)
+rocksdb.row_cache_size 0
+
+# Number of open files that can be used by the DB.  You may need to
+# increase this if your database has a large working set. Value -1 means
+# files opened are always kept open. You can estimate number of files based
+# on target_file_size_base and target_file_size_multiplier for level-based
+# compaction. For universal-style compaction, you can usually set it to -1.
+# Default: 8096
+rocksdb.max_open_files 8096
+
+# Amount of data to build up in memory (backed by an unsorted log
+# on disk) before converting to a sorted on-disk file.
+#
+# Larger values increase performance, especially during bulk loads.
+# Up to max_write_buffer_number write buffers may be held in memory
+# at the same time,
+# so you may wish to adjust this parameter to control memory usage.
+# Also, a larger write buffer will result in a longer recovery time
+# the next time the database is opened.
+#
+# Note that write_buffer_size is enforced per column family.
+# See db_write_buffer_size for sharing memory across column families.
+
+# default is 64MB
+rocksdb.write_buffer_size 64
+
+# Target file size for compaction, target file size for Level N can be calculated
+# by target_file_size_base * (target_file_size_multiplier ^ (L-1))
+#
+# Default: 128MB
+rocksdb.target_file_size_base 128
+
+# The maximum number of write buffers that are built up in memory.
+# The default and the minimum number is 2, so that when 1 write buffer
+# is being flushed to storage, new writes can continue to the other
+# write buffer.
+# If max_write_buffer_number > 3, writing will be slowed down to
+# options.delayed_write_rate if we are writing to the last write buffer
+# allowed.
+rocksdb.max_write_buffer_number 4
+
+# Maximum number of concurrent background jobs (compactions and flushes).
+# For backwards compatibility we will set `max_background_jobs =
+# max_background_compactions + max_background_flushes` in the case where user
+# sets at least one of `max_background_compactions` or `max_background_flushes`
+# (we replace -1 by 1 in case one option is unset).
+rocksdb.max_background_jobs 4
+
+# DEPRECATED: it is automatically decided based on the value of rocksdb.max_background_jobs
+# Maximum number of concurrent background compaction jobs, submitted to
+# the default LOW priority thread pool.
+rocksdb.max_background_compactions -1
+
+# DEPRECATED: it is automatically decided based on the value of rocksdb.max_background_jobs
+# Maximum number of concurrent background memtable flush jobs, submitted by
+# default to the HIGH priority thread pool. If the HIGH priority thread pool
+# is configured to have zero threads, flush jobs will share the LOW priority
+# thread pool with compaction jobs.
+rocksdb.max_background_flushes -1
+
+# This value represents the maximum number of threads that will
+# concurrently perform a compaction job by breaking it into multiple,
+# smaller ones that are run simultaneously.
+# Default: 2
+rocksdb.max_sub_compactions 2
+
+# In order to limit the size of WALs, RocksDB uses DBOptions::max_total_wal_size
+# as the trigger of column family flush. Once WALs exceed this size, RocksDB
+# will start forcing the flush of column families to allow deletion of some
+# oldest WALs. This config can be useful when column families are updated at
+# non-uniform frequencies. If there's no size limit, users may need to keep
+# really old WALs when the infrequently-updated column families hasn't flushed
+# for a while.
+#
+# In kvrocks, we use multiple column families to store metadata, subkeys, etc.
+# If users always use string type, but use list, hash and other complex data types
+# infrequently, there will be a lot of old WALs if we don't set size limit
+# (0 by default in rocksdb), because rocksdb will dynamically choose the WAL size
+# limit to be [sum of all write_buffer_size * max_write_buffer_number] * 4 if set to 0.
+#
+# Moreover, you should increase this value if you already set rocksdb.write_buffer_size
+# to a big value, to avoid influencing the effect of rocksdb.write_buffer_size and
+# rocksdb.max_write_buffer_number.
+#
+# default is 512MB
+rocksdb.max_total_wal_size 512
+
+# We implement the replication with rocksdb WAL, it would trigger full sync when the seq was out of range.
+# wal_ttl_seconds and wal_size_limit_mb would affect how archived logs will be deleted.
+# If WAL_ttl_seconds is not 0, then WAL files will be checked every WAL_ttl_seconds / 2 and those that
+# are older than WAL_ttl_seconds will be deleted#
+#
+# Default: 3 Hours
+rocksdb.wal_ttl_seconds 10800
+
+# If WAL_ttl_seconds is 0 and WAL_size_limit_MB is not 0,
+# WAL files will be checked every 10 min and if total size is greater
+# then WAL_size_limit_MB, they will be deleted starting with the
+# earliest until size_limit is met. All empty files will be deleted
+# Default: 16GB
+rocksdb.wal_size_limit_mb 16384
+
+# Approximate size of user data packed per block.  Note that the
+# block size specified here corresponds to uncompressed data. The
+# actual size of the unit read from disk may be smaller if
+# compression is enabled.
+#
+# Default: 16KB
+rocksdb.block_size 16384
+
+# Indicating if we'd put index/filter blocks to the block cache
+#
+# Default: yes
+rocksdb.cache_index_and_filter_blocks yes
+
+# Specify the compression to use. Only compress level greater
+# than 2 to improve performance.
+# Accept value: "no", "snappy", "lz4", "zstd", "zlib"
+# default snappy
+rocksdb.compression snappy
+
+# If non-zero, we perform bigger reads when doing compaction. If you're
+# running RocksDB on spinning disks, you should set this to at least 2MB.
+# That way RocksDB's compaction is doing sequential instead of random reads.
+# When non-zero, we also force new_table_reader_for_compaction_inputs to
+# true.
+#
+# Default: 2 MB
+rocksdb.compaction_readahead_size 2097152
+
+# he limited write rate to DB if soft_pending_compaction_bytes_limit or
+# level0_slowdown_writes_trigger is triggered.
+
+# If the value is 0, we will infer a value from `rater_limiter` value
+# if it is not empty, or 16MB if `rater_limiter` is empty. Note that
+# if users change the rate in `rate_limiter` after DB is opened,
+# `delayed_write_rate` won't be adjusted.
+#
+rocksdb.delayed_write_rate 0
+# If enable_pipelined_write is true, separate write thread queue is
+#  maintained for WAL write and memtable write.
+#
+#  Default: no
+rocksdb.enable_pipelined_write no
+
+# Soft limit on number of level-0 files. We start slowing down writes at this
+#  point. A value <0 means that no writing slow down will be triggered by
+# number of files in level-0.
+#
+# Default: 20
+rocksdb.level0_slowdown_writes_trigger 20
+
+# Maximum number of level-0 files.  We stop writes at this point.
+#
+# Default: 40
+rocksdb.level0_stop_writes_trigger 40
+
+# Number of files to trigger level-0 compaction.
+#
+# Default: 4
+rocksdb.level0_file_num_compaction_trigger 4
+
+# if not zero, dump rocksdb.stats to LOG every stats_dump_period_sec
+#
+# Default: 0
+rocksdb.stats_dump_period_sec 0
+
+# if yes, the auto compaction would be disabled, but the manual compaction remain works
+#
+# Default: no
+rocksdb.disable_auto_compactions no
+
+# BlobDB(key-value separation) is essentially RocksDB for large-value use cases.
+# Since 6.18.0, The new implementation is integrated into the RocksDB core.
+# When set, large values (blobs) are written to separate blob files, and only
+# pointers to them are stored in SST files. This can reduce write amplification
+# for large-value use cases at the cost of introducing a level of indirection
+# for reads. Please see: https://github.com/facebook/rocksdb/wiki/BlobDB.
+#
+# Note that when enable_blob_files is set to yes, BlobDB-related configuration
+# items will take effect.
+#
+# Default: no
+rocksdb.enable_blob_files no
+
+# The size of the smallest value to be stored separately in a blob file. Values
+# which have an uncompressed size smaller than this threshold are stored alongside
+# the keys in SST files in the usual fashion.
+#
+# Default: 4096 byte, 0 means that all values are stored in blob files
+rocksdb.min_blob_size 4096
+
+# The size limit for blob files. When writing blob files, a new file is
+# opened once this limit is reached.
+#
+# Default: 268435456 bytes
+rocksdb.blob_file_size 268435456
+
+# Enables garbage collection of blobs. Valid blobs residing in blob files
+# older than a cutoff get relocated to new files as they are encountered
+# during compaction, which makes it possible to clean up blob files once
+# they contain nothing but obsolete/garbage blobs.
+# See also rocksdb.blob_garbage_collection_age_cutoff below.
+#
+# Default: yes
+rocksdb.enable_blob_garbage_collection yes
+
+# The percentage cutoff in terms of blob file age for garbage collection.
+# Blobs in the oldest N blob files will be relocated when encountered during
+# compaction, where N = (garbage_collection_cutoff/100) * number_of_blob_files.
+# Note that this value must belong to [0, 100].
+#
+# Default: 25
+rocksdb.blob_garbage_collection_age_cutoff 25
+
+
+# The purpose of the following three options are to dynamically adjust the upper limit of
+# the data that each layer can store according to the size of the different
+# layers of the LSM. Enabling this option will bring some improvements in
+# deletion efficiency and space amplification, but it will lose a certain
+# amount of read performance.
+# If you want to know more details about Levels' Target Size, you can read RocksDB wiki:
+# https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#levels-target-size
+#
+# Default: yes
+rocksdb.level_compaction_dynamic_level_bytes yes
+
+# The total file size of level-1 sst.
+#
+# Default: 268435456 bytes
+rocksdb.max_bytes_for_level_base 268435456
+
+# Multiplication factor for the total file size of L(n+1) layers.
+# This option is a double type number in RocksDB, but kvrocks is
+# not support the double data type number yet, so we use integer
+# number instead of double currently.
+#
+# Default: 10
+rocksdb.max_bytes_for_level_multiplier 10
+
+# This feature only takes effect in Iterators and MultiGet.
+# If yes, RocksDB will try to read asynchronously and in parallel as much as possible to hide IO latency.
+# In iterators, it will prefetch data asynchronously in the background for each file being iterated on.
+# In MultiGet, it will read the necessary data blocks from those files in parallel as much as possible.
+
+# Default no
+rocksdb.read_options.async_io no
+
+# If yes, the write will be flushed from the operating system
+# buffer cache before the write is considered complete.
+# If this flag is enabled, writes will be slower.
+# If this flag is disabled, and the machine crashes, some recent
+# rites may be lost.  Note that if it is just the process that
+# crashes (i.e., the machine does not reboot), no writes will be
+# lost even if sync==false.
+#
+# Default: no
+rocksdb.write_options.sync no
+
+# If yes, writes will not first go to the write ahead log,
+# and the write may get lost after a crash.
+#
+# Default: no
+rocksdb.write_options.disable_wal no
+
+# If enabled and we need to wait or sleep for the write request, fails
+# immediately.
+#
+# Default: no
+rocksdb.write_options.no_slowdown no
+
+# If enabled, write requests are of lower priority if compaction is
+# behind. In this case, no_slowdown = true, the request will be canceled
+# immediately. Otherwise, it will be slowed down.
+# The slowdown value is determined by RocksDB to guarantee
+# it introduces minimum impacts to high priority writes.
+#
+# Default: no
+rocksdb.write_options.low_pri no
+
+# If enabled, this writebatch will maintain the last insert positions of each
+# memtable as hints in concurrent write. It can improve write performance
+# in concurrent writes if keys in one writebatch are sequential.
+#
+# Default: no
+rocksdb.write_options.memtable_insert_hint_per_batch no
+
+
+# Support RocksDB auto-tune rate limiter for the background IO
+# if enabled, Rate limiter will limit the compaction write if flush write is high
+# Please see https://rocksdb.org/blog/2017/12/18/17-auto-tuned-rate-limiter.html
+#
+# Default: yes
+rocksdb.rate_limiter_auto_tuned yes
+
+# Enable this option will schedule the deletion of obsolete files in a background thread
+# on iterator destruction. It can reduce the latency if there are many files to be removed.
+# see https://github.com/facebook/rocksdb/wiki/IO#avoid-blocking-io
+#
+# Default: yes
+# rocksdb.avoid_unnecessary_blocking_io yes
+
+################################ NAMESPACE #####################################
+# namespace.test change.me
+backup-dir .//backup
diff --git a/full_index/run_kvrocks.sh b/full_index/run_kvrocks.sh
new file mode 100755
index 0000000..0475498
--- /dev/null
+++ b/full_index/run_kvrocks.sh
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+set -e
+set -x
+
+../../kvrocks/build/kvrocks -c kvrocks.conf
diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py
index ff88994..60cbcfc 100644
--- a/lookyloo/capturecache.py
+++ b/lookyloo/capturecache.py
@@ -93,6 +93,10 @@ class CaptureCache():
         self.user_agent: str | None = cache_entry.get('user_agent')
         self.referer: str | None = cache_entry.get('referer')
 
+    @property
+    def tree_ready(self) -> bool:
+        return bool(_pickle_path(self.capture_dir))
+
     @property
     def tree(self) -> CrawledTree:
         if not self.capture_dir.exists():
@@ -102,27 +106,36 @@ class CaptureCache():
         return load_pickle_tree(self.capture_dir, self.capture_dir.stat().st_mtime, self.logger)
 
 
-def remove_pickle_tree(capture_dir: Path) -> None:
-    pickle_file = capture_dir / 'tree.pickle'
+def _pickle_path(capture_dir: Path) -> Path | None:
     pickle_file_gz = capture_dir / 'tree.pickle.gz'
-    if pickle_file.exists():
-        pickle_file.unlink()
     if pickle_file_gz.exists():
-        pickle_file_gz.unlink()
+        return pickle_file_gz
+
+    pickle_file = capture_dir / 'tree.pickle'
+    if pickle_file.exists():
+        return pickle_file
+
+    return None
+
+
+def remove_pickle_tree(capture_dir: Path) -> None:
+    pickle_path = _pickle_path(capture_dir)
+    if pickle_path and pickle_path.exists():
+        pickle_path.unlink()
 
 
 @lru_cache(maxsize=64)
 def load_pickle_tree(capture_dir: Path, last_mod_time: int, logger: Logger) -> CrawledTree:
-    pickle_file = capture_dir / 'tree.pickle'
-    pickle_file_gz = capture_dir / 'tree.pickle.gz'
+    pickle_path = _pickle_path(capture_dir)
     tree = None
     try:
-        if pickle_file.exists():
-            with pickle_file.open('rb') as _p:
-                tree = pickle.load(_p)
-        elif pickle_file_gz.exists():
-            with gzip.open(pickle_file_gz, 'rb') as _pg:
-                tree = pickle.load(_pg)
+        if pickle_path:
+            if pickle_path.suffix == '.gz':
+                with gzip.open(pickle_path, 'rb') as _pg:
+                    tree = pickle.load(_pg)
+            else:  # not a GZ pickle
+                with pickle_path.open('rb') as _p:
+                    tree = pickle.load(_p)
     except pickle.UnpicklingError:
         remove_pickle_tree(capture_dir)
     except EOFError:
diff --git a/lookyloo/default/helpers.py b/lookyloo/default/helpers.py
index 64ca095..4631a8f 100644
--- a/lookyloo/default/helpers.py
+++ b/lookyloo/default/helpers.py
@@ -95,8 +95,10 @@ def safe_create_dir(to_create: Path) -> None:
 def get_socket_path(name: str) -> str:
     mapping = {
         'cache': Path('cache', 'cache.sock'),
-        'indexing': Path('indexing', 'indexing.sock'),
+        'indexing': Path('indexing', 'indexing.sock')
     }
+    if get_config('generic', 'index_everything'):
+        mapping['full_index'] = Path('full_index', 'full_index.sock')
     return str(get_homedir() / mapping[name])
 
 
diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py
index 917a3d8..de05c77 100644
--- a/lookyloo/indexing.py
+++ b/lookyloo/indexing.py
@@ -24,24 +24,49 @@ from .default import get_socket_path, get_config
 
 class Indexing():
 
-    def __init__(self) -> None:
+    def __init__(self, full_index: bool=False) -> None:
         self.logger = logging.getLogger(f'{self.__class__.__name__}')
         self.logger.setLevel(get_config('generic', 'loglevel'))
-        self.redis_pool_bytes: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
-                                                               path=get_socket_path('indexing'))
-        self.redis_pool: ConnectionPool = ConnectionPool(connection_class=UnixDomainSocketConnection,
-                                                         path=get_socket_path('indexing'), decode_responses=True)
+        self.__redis_pool_bytes: ConnectionPool
+        self.__redis_pool: ConnectionPool
+        if full_index:
+            self.__redis_pool_bytes = ConnectionPool(connection_class=UnixDomainSocketConnection,
+                                                     path=get_socket_path('full_index'))
+            self.__redis_pool = ConnectionPool(connection_class=UnixDomainSocketConnection,
+                                               path=get_socket_path('full_index'), decode_responses=True)
+        else:
+            self.__redis_pool_bytes = ConnectionPool(connection_class=UnixDomainSocketConnection,
+                                                     path=get_socket_path('indexing'))
+            self.__redis_pool = ConnectionPool(connection_class=UnixDomainSocketConnection,
+                                               path=get_socket_path('indexing'), decode_responses=True)
 
     def clear_indexes(self) -> None:
         self.redis.flushdb()
 
     @property
     def redis_bytes(self) -> Redis:  # type: ignore[type-arg]
-        return Redis(connection_pool=self.redis_pool_bytes)
+        return Redis(connection_pool=self.__redis_pool_bytes)
 
     @property
     def redis(self) -> Redis:  # type: ignore[type-arg]
-        return Redis(connection_pool=self.redis_pool)
+        return Redis(connection_pool=self.__redis_pool)
+
+    @property
+    def can_index(self) -> bool:
+        return bool(self.redis.set('ongoing_indexing', 1, ex=3600, nx=True))
+
+    def indexing_done(self) -> None:
+        self.redis.delete('ongoing_indexing')
+
+    def capture_indexed(self, capture_uuid: str) -> tuple[bool, bool, bool, bool, bool]:
+        p = self.redis.pipeline()
+        p.sismember('indexed_urls', capture_uuid)
+        p.sismember('indexed_body_hashes', capture_uuid)
+        p.sismember('indexed_cookies', capture_uuid)
+        p.sismember('indexed_hhhashes', capture_uuid)
+        p.sismember('indexed_favicons', capture_uuid)
+        # This call for sure returns a tuple of 5 booleans
+        return p.execute()  # type: ignore[return-value]
 
     def new_internal_uuids(self, crawled_tree: CrawledTree) -> None:
         # only trigger this method if the capture was already indexed.
diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py
index 15471e9..f4c3513 100644
--- a/lookyloo/lookyloo.py
+++ b/lookyloo/lookyloo.py
@@ -56,7 +56,6 @@ from .helpers import (get_captures_dir, get_email_template,
                       get_resources_hashes, get_taxonomies,
                       uniq_domains, ParsedUserAgent, load_cookies, UserAgents,
                       get_useragent_for_requests, make_ts_from_dirname)
-from .indexing import Indexing
 from .modules import (MISPs, PhishingInitiative, UniversalWhois,
                       UrlScan, VirusTotal, Phishtank, Hashlookup,
                       RiskIQ, RiskIQError, Pandora, URLhaus, CIRCLPDNS)
@@ -81,7 +80,6 @@ class Lookyloo():
     def __init__(self) -> None:
         self.logger = logging.getLogger(f'{self.__class__.__name__}')
         self.logger.setLevel(get_config('generic', 'loglevel'))
-        self.indexing = Indexing()
         self.user_agents = UserAgents()
         self.is_public_instance = get_config('generic', 'public_instance')
         self.public_domain = get_config('generic', 'public_domain')
@@ -938,214 +936,10 @@ class Lookyloo():
         return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
                       - set(ct.root_hartree.all_url_requests.keys()))
 
-    def get_body_hash_investigator(self, body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:
-        '''Returns all the captures related to a hash (sha512), used in the web interface.'''
-        total_captures, details = self.indexing.get_body_hash_captures(body_hash, limit=-1)
-        captures = []
-        for capture_uuid, hostnode_uuid, hostname, _, url in details:
-            cache = self.capture_cache(capture_uuid)
-            if not cache:
-                continue
-            captures.append((cache.uuid, cache.title, cache.timestamp, hostnode_uuid, url))
-        domains = self.indexing.get_body_hash_domains(body_hash)
-        return captures, domains
-
-    def get_body_hash_full(self, body_hash: str, /) -> tuple[dict[str, list[dict[str, str]]], BytesIO]:
-        '''Returns a lot of information about the hash (sha512) and the hits in the instance.
-        Also contains the data (base64 encoded)'''
-        details = self.indexing.get_body_hash_urls(body_hash)
-
-        # Break immediately if we have the hash of the empty file
-        if body_hash == 'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e':
-            return details, BytesIO()
-
-        # get the body from the first entry in the details list
-        for _, entries in details.items():
-            if not entries:
-                continue
-            ct = self.get_crawled_tree(entries[0]['capture'])
-            try:
-                urlnode = ct.root_hartree.get_url_node_by_uuid(entries[0]['urlnode'])
-            except Exception:
-                # Unable to find URLnode in the tree, it probably has been rebuild.
-                self.logger.warning(f'Unable to find {entries[0]["urlnode"]} in entries[0]["capture"]')
-                continue
-
-            # From that point, we just try to get the content. Break as soon as we found one.
-            if urlnode.body_hash == body_hash:
-                # the hash we're looking for is the whole file
-                return details, urlnode.body
-            else:
-                # The hash is an embedded resource
-                for _, blobs in urlnode.embedded_ressources.items():
-                    for h, b in blobs:
-                        if h == body_hash:
-                            return details, b
-
-        # TODO: Couldn't find the file anywhere. Maybe return a warning in the file?
-        return details, BytesIO()
-
-    def get_all_body_hashes(self, capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
-        ct = self.get_crawled_tree(capture_uuid)
-        to_return: dict[str, dict[str, URLNode | int]] = defaultdict()
-        for node in ct.root_hartree.url_tree.traverse():
-            if node.empty_response or node.body_hash in to_return:
-                # If we have the same hash more than once, skip
-                continue
-            total_captures, details = self.indexing.get_body_hash_captures(node.body_hash, limit=-1)
-            # Note for future: mayeb get url, capture title, something better than just the hash to show to the user
-            to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
-        return to_return
-
-    def get_latest_url_capture(self, url: str, /) -> CaptureCache | None:
-        '''Get the most recent capture with this URL'''
-        captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
-        if captures:
-            return captures[0]
-        return None
-
-    def get_url_occurrences(self, url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
-        '''Get the most recent captures and URL nodes where the URL has been seen.'''
-        captures = self.sorted_capture_cache(self.indexing.get_captures_url(url), cached_captures_only=cached_captures_only)
-
-        to_return: list[dict[str, Any]] = []
-        for capture in captures[:limit]:
-            ct = self.get_crawled_tree(capture.uuid)
-            to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
-                                                          'start_timestamp': capture.timestamp.isoformat(),
-                                                          'title': capture.title}
-            urlnodes: dict[str, dict[str, str]] = {}
-            for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
-                urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
-                                          'hostnode_uuid': urlnode.hostnode_uuid}
-                if hasattr(urlnode, 'body_hash'):
-                    urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
-            to_append['urlnodes'] = urlnodes
-            to_return.append(to_append)
-        return to_return
-
-    def get_hostname_occurrences(self, hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
-        '''Get the most recent captures and URL nodes where the hostname has been seen.'''
-        captures = self.sorted_capture_cache(self.indexing.get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
-
-        to_return: list[dict[str, Any]] = []
-        for capture in captures[:limit]:
-            ct = self.get_crawled_tree(capture.uuid)
-            to_append: dict[str, str | list[Any] | dict[str, Any]] = {
-                'capture_uuid': capture.uuid,
-                'start_timestamp': capture.timestamp.isoformat(),
-                'title': capture.title}
-            hostnodes: list[str] = []
-            if with_urls_occurrences:
-                urlnodes: dict[str, dict[str, str]] = {}
-            for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
-                hostnodes.append(hostnode.uuid)
-                if with_urls_occurrences:
-                    for urlnode in hostnode.urls:
-                        urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
-                                                  'url': urlnode.name,
-                                                  'hostnode_uuid': urlnode.hostnode_uuid}
-                        if hasattr(urlnode, 'body_hash'):
-                            urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
-                to_append['hostnodes'] = hostnodes
-                if with_urls_occurrences:
-                    to_append['urlnodes'] = urlnodes
-                to_return.append(to_append)
-        return to_return
-
-    def get_cookie_name_investigator(self, cookie_name: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float, list[tuple[str, float]]]]]:
-        '''Returns all the captures related to a cookie name entry, used in the web interface.'''
-        cached_captures = self.sorted_capture_cache([entry[0] for entry in self.indexing.get_cookies_names_captures(cookie_name)])
-        captures = [(cache.uuid, cache.title) for cache in cached_captures]
-        domains = [(domain, freq, self.indexing.cookies_names_domains_values(cookie_name, domain))
-                   for domain, freq in self.indexing.get_cookie_domains(cookie_name)]
-        return captures, domains
-
     def compute_mmh3_shodan(self, favicon: bytes, /) -> str:
         b64 = base64.encodebytes(favicon)
         return str(mmh3.hash(b64))
 
-    def get_favicon_investigator(self, favicon_sha512: str,
-                                 /,
-                                 get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]],
-                                                                         tuple[str, str, str],
-                                                                         dict[str, dict[str, dict[str, tuple[str, str]]]]]:
-        '''Returns all the captures related to a cookie name entry, used in the web interface.'''
-        cached_captures = self.sorted_capture_cache([uuid for uuid in self.indexing.get_captures_favicon(favicon_sha512)])
-        captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
-        favicon = self.indexing.get_favicon(favicon_sha512)
-        if favicon:
-            mimetype = from_string(favicon, mime=True)
-            b64_favicon = base64.b64encode(favicon).decode()
-            mmh3_shodan = self.compute_mmh3_shodan(favicon)
-        else:
-            mimetype = ''
-            b64_favicon = ''
-            mmh3_shodan = ''
-
-        # For now, there is only one probabilistic hash algo for favicons, keeping it simple
-        probabilistic_hash_algos = ['mmh3-shodan']
-        probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {}
-        if get_probabilistic:
-            for algo in probabilistic_hash_algos:
-                probabilistic_favicons[algo] = {}
-                for mm3hash in self.indexing.get_probabilistic_hashes_favicon(algo, favicon_sha512):
-                    probabilistic_favicons[algo][mm3hash] = {}
-                    for sha512 in self.indexing.get_hashes_favicon_probablistic(algo, mm3hash):
-                        if sha512 == favicon_sha512:
-                            # Skip entry if it is the same as the favicon we are investigating
-                            continue
-                        favicon = self.indexing.get_favicon(sha512)
-                        if favicon:
-                            mimetype = from_string(favicon, mime=True)
-                            b64_favicon = base64.b64encode(favicon).decode()
-                            probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon)
-                    if not probabilistic_favicons[algo][mm3hash]:
-                        # remove entry if it has no favicon
-                        probabilistic_favicons[algo].pop(mm3hash)
-                if not probabilistic_favicons[algo]:
-                    # remove entry if it has no hash
-                    probabilistic_favicons.pop(algo)
-        return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
-
-    def get_hhh_investigator(self, hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
-        '''Returns all the captures related to a cookie name entry, used in the web interface.'''
-        all_captures = dict(self.indexing.get_http_headers_hashes_captures(hhh))
-        if cached_captures := self.sorted_capture_cache([entry for entry in all_captures]):
-            captures = []
-            for cache in cached_captures:
-                try:
-                    urlnode = self.get_urlnode_from_tree(cache.uuid, all_captures[cache.uuid])
-                except Exception as e:
-                    self.logger.warning(f'Cache for {cache.uuid} needs a rebuild: {e}.')
-                    self._captures_index.remove_pickle(cache.uuid)
-                    continue
-                captures.append((cache.uuid, urlnode.hostnode_uuid, urlnode.name, cache.title))
-            # get the headers and format them as they were in the response
-            urlnode = self.get_urlnode_from_tree(cached_captures[0].uuid, all_captures[cached_captures[0].uuid])
-            headers = [(header["name"], header["value"]) for header in urlnode.response['headers']]
-            return captures, headers
-        return [], []
-
-    def hash_lookup(self, blob_hash: str, url: str, capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]:
-        '''Search all the captures a specific hash was seen.
-        If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
-        Capture UUID avoids duplicates on the same capture'''
-        captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
-        total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
-                                                                       prefered_uuids=set(self._captures_index.keys()))
-        for h_capture_uuid, url_uuid, url_hostname, same_url, url in details:
-            cache = self.capture_cache(h_capture_uuid)
-            if cache and hasattr(cache, 'title'):
-                if same_url:
-                    captures_list['same_url'].append((h_capture_uuid, url_uuid, cache.title, cache.timestamp.isoformat(), url_hostname))
-                else:
-                    captures_list['different_url'].append((h_capture_uuid, url_uuid, cache.title, cache.timestamp.isoformat(), url_hostname))
-        # Sort by timestamp by default
-        captures_list['same_url'].sort(key=lambda y: y[3])
-        captures_list['different_url'].sort(key=lambda y: y[3])
-        return total_captures, captures_list
-
     def get_ressource(self, tree_uuid: str, /, urlnode_uuid: str, h: str | None) -> tuple[str, BytesIO, str] | None:
         '''Get a specific resource from a URL node. If a hash s also given, we want an embeded resource'''
         try:
@@ -1375,116 +1169,6 @@ class Lookyloo():
         """Get the preconfigured devices from Playwright"""
         return get_devices()
 
-    def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> tuple[HostNode, list[dict[str, Any]]]:
-        '''Gather all the informations needed to display the Hostnode investigator popup.'''
-
-        def normalize_known_content(h: str, /, known_content: dict[str, Any], url: URLNode) -> tuple[str | list[Any] | None, tuple[bool, Any] | None]:
-            ''' There are a few different sources to figure out known vs. legitimate content,
-            this method normalize it for the web interface.'''
-            known: str | list[Any] | None = None
-            legitimate: tuple[bool, Any] | None = None
-            if h not in known_content:
-                return known, legitimate
-
-            if known_content[h]['type'] in ['generic', 'sanejs']:
-                known = known_content[h]['details']
-            elif known_content[h]['type'] == 'legitimate_on_domain':
-                legit = False
-                if url.hostname in known_content[h]['details']:
-                    legit = True
-                legitimate = (legit, known_content[h]['details'])
-            elif known_content[h]['type'] == 'malicious':
-                legitimate = (False, known_content[h]['details'])
-
-            return known, legitimate
-
-        ct = self.get_crawled_tree(capture_uuid)
-        hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
-
-        known_content = self.context.find_known_content(hostnode)
-        self.uwhois.query_whois_hostnode(hostnode)
-
-        urls: list[dict[str, Any]] = []
-        for url in hostnode.urls:
-            # For the popup, we need:
-            # * https vs http
-            # * everything after the domain
-            # * the full URL
-            to_append: dict[str, Any] = {
-                'encrypted': url.name.startswith('https'),
-                'url_path': url.name.split('/', 3)[-1],
-                'url_object': url,
-            }
-
-            if not url.empty_response:
-                # Index lookup
-                # %%% Full body %%%
-                freq = self.indexing.body_hash_fequency(url.body_hash)
-                to_append['body_hash_details'] = freq
-                if freq and 'hash_freq' in freq and freq['hash_freq'] and freq['hash_freq'] > 1:
-                    to_append['body_hash_details']['other_captures'] = self.hash_lookup(url.body_hash, url.name, capture_uuid)
-
-                # %%% Embedded ressources %%%
-                if hasattr(url, 'embedded_ressources') and url.embedded_ressources:
-                    to_append['embedded_ressources'] = {}
-                    for mimetype, blobs in url.embedded_ressources.items():
-                        for h, blob in blobs:
-                            if h in to_append['embedded_ressources']:
-                                # Skip duplicates
-                                continue
-                            freq_embedded = self.indexing.body_hash_fequency(h)
-                            to_append['embedded_ressources'][h] = freq_embedded
-                            to_append['embedded_ressources'][h]['body_size'] = blob.getbuffer().nbytes
-                            to_append['embedded_ressources'][h]['type'] = mimetype
-                            if freq_embedded['hash_freq'] > 1:
-                                to_append['embedded_ressources'][h]['other_captures'] = self.hash_lookup(h, url.name, capture_uuid)
-                    for h in to_append['embedded_ressources'].keys():
-                        known, legitimate = normalize_known_content(h, known_content, url)
-                        if known:
-                            to_append['embedded_ressources'][h]['known_content'] = known
-                        elif legitimate:
-                            to_append['embedded_ressources'][h]['legitimacy'] = legitimate
-
-                known, legitimate = normalize_known_content(url.body_hash, known_content, url)
-                if known:
-                    to_append['known_content'] = known
-                elif legitimate:
-                    to_append['legitimacy'] = legitimate
-
-            # Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
-            if hasattr(url, 'cookies_sent'):
-                to_display_sent: dict[str, set[Iterable[str | None]]] = defaultdict(set)
-                for cookie, contexts in url.cookies_sent.items():
-                    if not contexts:
-                        # Locally created?
-                        to_display_sent[cookie].add(('Unknown origin', ))
-                        continue
-                    for context in contexts:
-                        to_display_sent[cookie].add((context['setter'].hostname, context['setter'].hostnode_uuid))
-                to_append['cookies_sent'] = to_display_sent
-
-            # Optional: Cookies received from server in response -> map to nodes who send the cookie in request
-            if hasattr(url, 'cookies_received'):
-                to_display_received: dict[str, dict[str, set[Iterable[str | None]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
-                for domain, c_received, is_3rd_party in url.cookies_received:
-                    if c_received not in ct.root_hartree.cookies_sent:
-                        # This cookie is never sent.
-                        if is_3rd_party:
-                            to_display_received['3rd_party'][c_received].add((domain, ))
-                        else:
-                            to_display_received['not_sent'][c_received].add((domain, ))
-                        continue
-
-                    for url_node in ct.root_hartree.cookies_sent[c_received]:
-                        if is_3rd_party:
-                            to_display_received['3rd_party'][c_received].add((url_node.hostname, url_node.hostnode_uuid))
-                        else:
-                            to_display_received['sent'][c_received].add((url_node.hostname, url_node.hostnode_uuid))
-                to_append['cookies_received'] = to_display_received
-
-            urls.append(to_append)
-        return hostnode, urls
-
     def get_stats(self) -> dict[str, list[Any]]:
         '''Gather statistics about the lookyloo instance'''
         today = date.today()
diff --git a/poetry.lock b/poetry.lock
index 375e51a..4c0facc 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,25 +1,25 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.1 and should not be changed by hand.
 
 [[package]]
 name = "aiobotocore"
-version = "2.11.2"
+version = "2.12.1"
 description = "Async client for aws services using botocore and aiohttp"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "aiobotocore-2.11.2-py3-none-any.whl", hash = "sha256:487fede588040bfa3a43df945275c28c1c73ca75bf705295adb9fbadd2e89be7"},
-    {file = "aiobotocore-2.11.2.tar.gz", hash = "sha256:6dd7352248e3523019c5a54a395d2b1c31080697fc80a9ad2672de4eec8c7abd"},
+    {file = "aiobotocore-2.12.1-py3-none-any.whl", hash = "sha256:6a9a3d646cf422f45fdc1e4256e78563ebffba64733bc9b8ca9123614e8ba9af"},
+    {file = "aiobotocore-2.12.1.tar.gz", hash = "sha256:8706b28f16f93c541f6ed50352115a79d8f3499539f8d0bb70aa0f7a5379c1fe"},
 ]
 
 [package.dependencies]
 aiohttp = ">=3.7.4.post0,<4.0.0"
 aioitertools = ">=0.5.1,<1.0.0"
-botocore = ">=1.33.2,<1.34.35"
+botocore = ">=1.34.41,<1.34.52"
 wrapt = ">=1.10.10,<2.0.0"
 
 [package.extras]
-awscli = ["awscli (>=1.31.2,<1.32.35)"]
-boto3 = ["boto3 (>=1.33.2,<1.34.35)"]
+awscli = ["awscli (>=1.32.41,<1.32.52)"]
+boto3 = ["boto3 (>=1.34.41,<1.34.52)"]
 
 [[package]]
 name = "aiohttp"
@@ -308,13 +308,13 @@ WTForms = "*"
 
 [[package]]
 name = "botocore"
-version = "1.34.34"
+version = "1.34.51"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">= 3.8"
 files = [
-    {file = "botocore-1.34.34-py3-none-any.whl", hash = "sha256:cd060b0d88ebb2b893f1411c1db7f2ba66cc18e52dcc57ad029564ef5fec437b"},
-    {file = "botocore-1.34.34.tar.gz", hash = "sha256:54093dc97372bb7683f5c61a279aa8240408abf3b2cc494ae82a9a90c1b784b5"},
+    {file = "botocore-1.34.51-py3-none-any.whl", hash = "sha256:01d5156247f991b3466a8404e3d7460a9ecbd9b214f9992d6ba797d9ddc6f120"},
+    {file = "botocore-1.34.51.tar.gz", hash = "sha256:5086217442e67dd9de36ec7e87a0c663f76b7790d5fb6a12de565af95e87e319"},
 ]
 
 [package.dependencies]
@@ -1331,13 +1331,13 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pa
 
 [[package]]
 name = "ipython"
-version = "8.22.1"
+version = "8.22.2"
 description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.10"
 files = [
-    {file = "ipython-8.22.1-py3-none-any.whl", hash = "sha256:869335e8cded62ffb6fac8928e5287a05433d6462e3ebaac25f4216474dd6bc4"},
-    {file = "ipython-8.22.1.tar.gz", hash = "sha256:39c6f9efc079fb19bfb0f17eee903978fe9a290b1b82d68196c641cecb76ea22"},
+    {file = "ipython-8.22.2-py3-none-any.whl", hash = "sha256:3c86f284c8f3d8f2b6c662f885c4889a91df7cd52056fd02b7d8d6195d7f56e9"},
+    {file = "ipython-8.22.2.tar.gz", hash = "sha256:2dcaad9049f9056f1fef63514f176c7d41f930daa78d05b82a176202818f2c14"},
 ]
 
 [package.dependencies]
@@ -1463,20 +1463,21 @@ referencing = ">=0.31.0"
 
 [[package]]
 name = "lacuscore"
-version = "1.8.7"
+version = "1.8.8"
 description = "Core of Lacus, usable as a module"
 optional = false
 python-versions = ">=3.8,<4.0"
 files = [
-    {file = "lacuscore-1.8.7-py3-none-any.whl", hash = "sha256:1ac849b1308eb780f1976fdf21d6476bd911e6ae1e91f79d83612baef90afee8"},
-    {file = "lacuscore-1.8.7.tar.gz", hash = "sha256:67268fb4da1282d1c7f747b02611dd5ee549644e034d0acba2173396ce0d0408"},
+    {file = "lacuscore-1.8.8-py3-none-any.whl", hash = "sha256:04812225e101ec59b3d1dcc6d3474e3cd2f3fd656a72d619e7d7d238d00b5a27"},
+    {file = "lacuscore-1.8.8.tar.gz", hash = "sha256:41949ff67d056f8ba717b649d8b45307ff7d38d4c38291cb1a8b80ca2ce94f6f"},
 ]
 
 [package.dependencies]
+async-timeout = {version = ">=4.0.3,<5.0.0", markers = "python_version < \"3.11\""}
 defang = ">=0.5.3,<0.6.0"
 dnspython = ">=2.6.1,<3.0.0"
-playwrightcapture = {version = ">=1.23.8,<2.0.0", extras = ["recaptcha"]}
-redis = {version = ">=5.0.1,<6.0.0", extras = ["hiredis"]}
+playwrightcapture = {version = ">=1.23.9,<2.0.0", extras = ["recaptcha"]}
+redis = {version = ">=5.0.2,<6.0.0", extras = ["hiredis"]}
 requests = ">=2.31.0,<3.0.0"
 ua-parser = ">=0.18.0,<0.19.0"
 
@@ -2286,13 +2287,13 @@ test = ["pytest"]
 
 [[package]]
 name = "playwrightcapture"
-version = "1.23.8"
+version = "1.23.9"
 description = "A simple library to capture websites using playwright"
 optional = false
 python-versions = ">=3.8,<4.0"
 files = [
-    {file = "playwrightcapture-1.23.8-py3-none-any.whl", hash = "sha256:f3e4d6c0355b013e465f9d3eea961b9431303a5de227a1388a7287c872203b9e"},
-    {file = "playwrightcapture-1.23.8.tar.gz", hash = "sha256:d2caea8d7a16d739f28dc06bbbc12665be89d07d325bba6868dab5f8520db809"},
+    {file = "playwrightcapture-1.23.9-py3-none-any.whl", hash = "sha256:0324f587605aa85ede1b71c12ec735383d932324f0e66ef35345c6e08734273c"},
+    {file = "playwrightcapture-1.23.9.tar.gz", hash = "sha256:e7217fc2a6109f240918de977452c556f482822abb12f0db43fa28228d3c0c90"},
 ]
 
 [package.dependencies]
@@ -2339,13 +2340,13 @@ files = [
 
 [[package]]
 name = "publicsuffixlist"
-version = "0.10.0.20240214"
+version = "0.10.0.20240305"
 description = "publicsuffixlist implement"
 optional = false
 python-versions = ">=2.6"
 files = [
-    {file = "publicsuffixlist-0.10.0.20240214-py2.py3-none-any.whl", hash = "sha256:2c3b8da819571bb610328bda5b25d27fcbf6bc400896ca3c6502d291a16b32f4"},
-    {file = "publicsuffixlist-0.10.0.20240214.tar.gz", hash = "sha256:45a206c5f9c1eccf138481280cfb0a67c2ccafc782ef89c7fd6dc6c4356230fe"},
+    {file = "publicsuffixlist-0.10.0.20240305-py2.py3-none-any.whl", hash = "sha256:f6869119f8781501c0c625e59b4b65eb60e2ed5185cfd6c142c792f74ac47c21"},
+    {file = "publicsuffixlist-0.10.0.20240305.tar.gz", hash = "sha256:6e79ea73b0278ce1b102f3ad6815f2a5b683864da9948ba0b0eab3180c419f7f"},
 ]
 
 [package.extras]
@@ -2571,13 +2572,13 @@ docs = ["Sphinx (<7.2)", "Sphinx (>=7.2,<8.0)"]
 
 [[package]]
 name = "pymisp"
-version = "2.4.185"
+version = "2.4.186"
 description = "Python API for MISP."
 optional = false
 python-versions = ">=3.8,<4.0"
 files = [
-    {file = "pymisp-2.4.185-py3-none-any.whl", hash = "sha256:e2635a2be92321d4f812c7220bd955817e95a286343720f138b87892a827117a"},
-    {file = "pymisp-2.4.185.tar.gz", hash = "sha256:3ccdc6ee48d26d82c77ba3f5d8fd41a79eaaef0ad5619f37a65b060e92f6da4c"},
+    {file = "pymisp-2.4.186-py3-none-any.whl", hash = "sha256:bb8ae23d038848a86cf5d6a4c965dbed79e48cd6f671681b17f72410aecf07a0"},
+    {file = "pymisp-2.4.186.tar.gz", hash = "sha256:bdf2d54b297ad890418179b044dd4ea79821fccef723823919d12262e9794ca3"},
 ]
 
 [package.dependencies]
@@ -2593,7 +2594,7 @@ requests = ">=2.31.0,<3.0.0"
 [package.extras]
 brotli = ["urllib3[brotli]"]
 docs = ["Sphinx (<7.2)", "Sphinx (>=7.2,<8.0)", "recommonmark (>=0.7.1,<0.8.0)", "sphinx-autodoc-typehints (>=2.0.0,<3.0.0)"]
-email = ["RTFDE (>=0.1.1,<0.2.0)", "extract_msg (>=0.47.0,<0.48.0)", "oletools (>=0.60.1,<0.61.0)"]
+email = ["RTFDE (>=0.1.1,<0.2.0)", "extract_msg (>=0.48.0,<0.49.0)", "oletools (>=0.60.1,<0.61.0)"]
 fileobjects = ["lief (>=0.14.1,<0.15.0)", "pydeep2 (>=0.5.1,<0.6.0)", "python-magic (>=0.4.27,<0.5.0)"]
 openioc = ["beautifulsoup4 (>=4.12.3,<5.0.0)"]
 pdfexport = ["reportlab (>=4.1.0,<5.0.0)"]
@@ -2668,13 +2669,13 @@ requests = ">=2.31.0,<3.0.0"
 
 [[package]]
 name = "pysecuritytxt"
-version = "1.2.2"
+version = "1.3.0"
 description = "Python CLI and module for querying security.txt files on domains."
 optional = false
 python-versions = ">=3.8,<4.0"
 files = [
-    {file = "pysecuritytxt-1.2.2-py3-none-any.whl", hash = "sha256:08d8750d82e9502ba949a6ea7bab355ca183cfc3cd722ed3e492ba35a8d4edda"},
-    {file = "pysecuritytxt-1.2.2.tar.gz", hash = "sha256:31d4ea4814e2cdeffce304e7b6f9d58580e7fb6578c8694bb6f8c0df59e65b3d"},
+    {file = "pysecuritytxt-1.3.0-py3-none-any.whl", hash = "sha256:9e4eb6b4fdca8f8471c80696c4d7642be24d44c8c3f627870ca9b7bd3f221cd5"},
+    {file = "pysecuritytxt-1.3.0.tar.gz", hash = "sha256:3669be69e90672ed0d448b385e5fef49cb3a6a611d7e386d673c4f0e1cc3e83b"},
 ]
 
 [package.dependencies]
@@ -2712,13 +2713,13 @@ webui = ["Flask (>=2.0,<3.0)", "Flask-Bootstrap (>=3.3.7.1,<4.0.0.0)", "Flask-WT
 
 [[package]]
 name = "python-dateutil"
-version = "2.8.2"
+version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
 files = [
-    {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"},
-    {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"},
+    {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
+    {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
 ]
 
 [package.dependencies]
@@ -2748,17 +2749,17 @@ files = [
 
 [[package]]
 name = "redis"
-version = "5.0.1"
+version = "5.0.2"
 description = "Python client for Redis database and key-value store"
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "redis-5.0.1-py3-none-any.whl", hash = "sha256:ed4802971884ae19d640775ba3b03aa2e7bd5e8fb8dfaed2decce4d0fc48391f"},
-    {file = "redis-5.0.1.tar.gz", hash = "sha256:0dab495cd5753069d3bc650a0dde8a8f9edde16fc5691b689a566eda58100d0f"},
+    {file = "redis-5.0.2-py3-none-any.whl", hash = "sha256:4caa8e1fcb6f3c0ef28dba99535101d80934b7d4cd541bbb47f4a3826ee472d1"},
+    {file = "redis-5.0.2.tar.gz", hash = "sha256:3f82cc80d350e93042c8e6e7a5d0596e4dd68715babffba79492733e1f367037"},
 ]
 
 [package.dependencies]
-async-timeout = {version = ">=4.0.2", markers = "python_full_version <= \"3.11.2\""}
+async-timeout = ">=4.0.3"
 hiredis = {version = ">=1.0.0", optional = true, markers = "extra == \"hiredis\""}
 
 [package.extras]
@@ -2942,6 +2943,7 @@ optional = false
 python-versions = "*"
 files = [
     {file = "requests-file-2.0.0.tar.gz", hash = "sha256:20c5931629c558fda566cacc10cfe2cd502433e628f568c34c80d96a0cc95972"},
+    {file = "requests_file-2.0.0-py2.py3-none-any.whl", hash = "sha256:3e493d390adb44aa102ebea827a48717336d5268968c370eaf19abaf5cae13bf"},
 ]
 
 [package.dependencies]
@@ -2949,13 +2951,13 @@ requests = ">=1.0.0"
 
 [[package]]
 name = "rich"
-version = "13.7.0"
+version = "13.7.1"
 description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
 optional = false
 python-versions = ">=3.7.0"
 files = [
-    {file = "rich-13.7.0-py3-none-any.whl", hash = "sha256:6da14c108c4866ee9520bbffa71f6fe3962e193b7da68720583850cd4548e235"},
-    {file = "rich-13.7.0.tar.gz", hash = "sha256:5cb5123b5cf9ee70584244246816e9114227e0b98ad9176eede6ad54bf5403fa"},
+    {file = "rich-13.7.1-py3-none-any.whl", hash = "sha256:4edbae314f59eb482f54e9e30bf00d33350aaa94f4bfcd4e9e3110e64d0d7222"},
+    {file = "rich-13.7.1.tar.gz", hash = "sha256:9be308cb1fe2f1f57d67ce99e95af38a1e2bc71ad9813b0e247cf7ffbcc3a432"},
 ]
 
 [package.dependencies]
@@ -3217,13 +3219,13 @@ test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,
 
 [[package]]
 name = "types-beautifulsoup4"
-version = "4.12.0.20240106"
+version = "4.12.0.20240229"
 description = "Typing stubs for beautifulsoup4"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-beautifulsoup4-4.12.0.20240106.tar.gz", hash = "sha256:98d628985b71b140bd3bc22a8cb0ab603c2f2d08f20d37925965eb4a21739be8"},
-    {file = "types_beautifulsoup4-4.12.0.20240106-py3-none-any.whl", hash = "sha256:cbdd60ab8aeac737ac014431b6e921b43e84279c0405fdd25a6900bb0e71da5b"},
+    {file = "types-beautifulsoup4-4.12.0.20240229.tar.gz", hash = "sha256:e37e4cfa11b03b01775732e56d2c010cb24ee107786277bae6bc0fa3e305b686"},
+    {file = "types_beautifulsoup4-4.12.0.20240229-py3-none-any.whl", hash = "sha256:000cdddb8aee4effb45a04be95654de8629fb8594a4f2f1231cff81108977324"},
 ]
 
 [package.dependencies]
@@ -3242,13 +3244,13 @@ files = [
 
 [[package]]
 name = "types-html5lib"
-version = "1.1.11.20240222"
+version = "1.1.11.20240228"
 description = "Typing stubs for html5lib"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-html5lib-1.1.11.20240222.tar.gz", hash = "sha256:d9517ec6ba2fa1f63113e2930a59b60722a976cc983b94d7fd772f14865e1152"},
-    {file = "types_html5lib-1.1.11.20240222-py3-none-any.whl", hash = "sha256:86b2dcbbebca846e68d2eac46b2717980e632de4b5d8f62ccd23d8333d2e7647"},
+    {file = "types-html5lib-1.1.11.20240228.tar.gz", hash = "sha256:22736b7299e605ec4ba539d48691e905fd0c61c3ea610acc59922232dc84cede"},
+    {file = "types_html5lib-1.1.11.20240228-py3-none-any.whl", hash = "sha256:af5de0125cb0fe5667543b158db83849b22e25c0e36c9149836b095548bf1020"},
 ]
 
 [[package]]
@@ -3275,13 +3277,13 @@ files = [
 
 [[package]]
 name = "types-pyopenssl"
-version = "24.0.0.20240130"
+version = "24.0.0.20240228"
 description = "Typing stubs for pyOpenSSL"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "types-pyOpenSSL-24.0.0.20240130.tar.gz", hash = "sha256:c812e5c1c35249f75ef5935708b2a997d62abf9745be222e5f94b9595472ab25"},
-    {file = "types_pyOpenSSL-24.0.0.20240130-py3-none-any.whl", hash = "sha256:24a255458b5b8a7fca8139cf56f2a8ad5a4f1a5f711b73a5bb9cb50dc688fab5"},
+    {file = "types-pyOpenSSL-24.0.0.20240228.tar.gz", hash = "sha256:cd990717d8aa3743ef0e73e0f462e64b54d90c304249232d48fece4f0f7c3c6a"},
+    {file = "types_pyOpenSSL-24.0.0.20240228-py3-none-any.whl", hash = "sha256:a472cf877a873549175e81972f153f44e975302a3cf17381eb5f3d41ccfb75a4"},
 ]
 
 [package.dependencies]
@@ -3734,4 +3736,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.13"
-content-hash = "bc64701a9d95985f7d0c91086fabfc29a1c54affc60bfab612fecc3771d6acd4"
+content-hash = "7e76c4614efed850e101ecaa1e91f141649ef4ad508522f0323e8efffc9eda7d"
diff --git a/pyproject.toml b/pyproject.toml
index e1d8387..0c87a94 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,6 +29,8 @@ shutdown = "bin.shutdown:main"
 run_backend = "bin.run_backend:main"
 async_capture = "bin.async_capture:main"
 background_indexer = "bin.background_indexer:main"
+background_build_captures = "bin.background_build_captures:main"
+background_full_indexer = "bin.background_indexer:main_full_indexer"
 archiver = "bin.archiver:main"
 processing = "bin.background_processing:main"
 start_website = "bin.start_website:main"
@@ -40,7 +42,7 @@ requests = "^2.31.0"
 flask = "^3.0.2"
 gunicorn = "^21.2.0"
 charset-normalizer = "^3.3.2"
-redis = {version = "^5.0.1", extras = ["hiredis"]}
+redis = {version = "^5.0.2", extras = ["hiredis"]}
 beautifulsoup4 = {version = "^4.12.3", extras = ["lxml", "charset_normalizer"]}
 bootstrap-flask = "^2.3.3"
 defang = "^0.5.3"
@@ -50,10 +52,10 @@ pysanejs = "^2.0.2"
 pylookyloo = "^1.23.1"
 dnspython = "^2.6.1"
 pytaxonomies = "^1.5.0"
-pymisp = {version = "^2.4.185", extras = ["url", "fileobjects"]}
+pymisp = {version = "^2.4.186", extras = ["url", "fileobjects"]}
 Pillow = "^10.2.0"
 flask-restx = "^1.3.0"
-rich = "^13.7.0"
+rich = "^13.7.1"
 pyphishtanklookup = "^1.3.2"
 Flask-Cors = "^4.0.0"
 pyhashlookup = "^1.2.2"
@@ -65,13 +67,13 @@ passivetotal = "^2.5.9"
 werkzeug = "^3.0.1"
 filetype = "^1.2.0"
 pypandora = "^1.8.0"
-lacuscore = "^1.8.7"
+lacuscore = "^1.8.8"
 pylacus = "^1.8.0"
 pyipasnhistory = "^2.1.2"
 publicsuffixlist = "^0.10.0.20240205"
 pyfaup = "^1.2"
 chardet = "^5.2.0"
-pysecuritytxt = "^1.2.2"
+pysecuritytxt = "^1.3.0"
 pylookyloomonitoring = "^1.1.3"
 pytz = {"version" = "^2024.1", python = "<3.9"}
 s3fs = "^2024.2.0"
@@ -98,7 +100,7 @@ types-redis = {version = "^4.6.0.20240218"}
 types-pkg-resources = "^0.1.3"
 types-Deprecated = "^1.2.9.20240106"
 types-python-dateutil = "^2.8.19.20240106"
-types-beautifulsoup4 = "^4.12.0.20240106"
+types-beautifulsoup4 = "^4.12.0.20240229"
 types-Pillow = "^10.2.0.20240213"
 types-pytz = "^2024.1.0.20240203"
 
diff --git a/website/web/__init__.py b/website/web/__init__.py
index 7980381..57c2a3d 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -17,14 +17,16 @@ import time
 
 import filetype  # type: ignore[import-untyped]
 
+from collections import defaultdict
 from datetime import date, datetime, timedelta, timezone
 from importlib.metadata import version
 from io import BytesIO, StringIO
-from typing import Any, TypedDict
+from typing import Any, TypedDict, Iterable
 from urllib.parse import quote_plus, unquote_plus, urlparse
 from uuid import uuid4
 from zipfile import ZipFile
 
+from har2tree import HostNode, URLNode
 import flask_login  # type: ignore[import-untyped]
 from flask import (Flask, Response, Request, flash, jsonify, redirect, render_template,
                    request, send_file, url_for)
@@ -37,7 +39,8 @@ from pymisp import MISPEvent, MISPServerError  # type: ignore[attr-defined]
 from werkzeug.security import check_password_hash
 from werkzeug.wrappers.response import Response as WerkzeugResponse
 
-from lookyloo import Lookyloo, CaptureSettings
+from lookyloo import Lookyloo, CaptureSettings, Indexing
+from lookyloo.capturecache import CaptureCache
 from lookyloo.default import get_config
 from lookyloo.exceptions import MissingUUID, NoValidHarFile
 from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
@@ -262,6 +265,353 @@ def file_response(func):  # type: ignore[no-untyped-def]
     return wrapper
 
 
+# ##### Methods querying the indexes #####
+
+@functools.cache
+def get_indexing(user: User | None) -> Indexing:
+    '''Depending if we're logged in or not, we (can) get different indexes:
+        if index_everything is enabled, we have an index in kvrocks that contains all
+        the indexes for all the captures.
+        It is only accessible to the admin user.
+    '''
+    if not get_config('generic', 'index_everything'):
+        return Indexing()
+
+    if not user or not user.is_authenticated:
+        # No user or anonymous
+        return Indexing()
+    # Logged in user
+    return Indexing(full_index=True)
+
+
+def _get_body_hash_investigator(body_hash: str, /) -> tuple[list[tuple[str, str, datetime, str, str]], list[tuple[str, float]]]:
+    '''Returns all the captures related to a hash (sha512), used in the web interface.'''
+    total_captures, details = get_indexing(flask_login.current_user).get_body_hash_captures(body_hash, limit=-1)
+    captures = []
+    for capture_uuid, hostnode_uuid, hostname, _, url in details:
+        cache = lookyloo.capture_cache(capture_uuid)
+        if not cache:
+            continue
+        captures.append((cache.uuid, cache.title, cache.timestamp, hostnode_uuid, url))
+    domains = get_indexing(flask_login.current_user).get_body_hash_domains(body_hash)
+    return captures, domains
+
+
+def get_body_hash_full(body_hash: str, /) -> tuple[dict[str, list[dict[str, str]]], BytesIO]:
+    '''Returns a lot of information about the hash (sha512) and the hits in the instance.
+    Also contains the data (base64 encoded)'''
+    details = get_indexing(flask_login.current_user).get_body_hash_urls(body_hash)
+
+    # Break immediately if we have the hash of the empty file
+    if body_hash == 'cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e':
+        return details, BytesIO()
+
+    # get the body from the first entry in the details list
+    for _, entries in details.items():
+        if not entries:
+            continue
+        ct = lookyloo.get_crawled_tree(entries[0]['capture'])
+        try:
+            urlnode = ct.root_hartree.get_url_node_by_uuid(entries[0]['urlnode'])
+        except Exception:
+            # Unable to find URLnode in the tree, it probably has been rebuild.
+            # TODO throw a log line or something
+            # self.logger.warning(f'Unable to find {entries[0]["urlnode"]} in entries[0]["capture"]')
+            # lookyloo._captures_index.remove_pickle(<capture UUID>)
+            continue
+
+        # From that point, we just try to get the content. Break as soon as we found one.
+        if urlnode.body_hash == body_hash:
+            # the hash we're looking for is the whole file
+            return details, urlnode.body
+        else:
+            # The hash is an embedded resource
+            for _, blobs in urlnode.embedded_ressources.items():
+                for h, b in blobs:
+                    if h == body_hash:
+                        return details, b
+
+    # TODO: Couldn't find the file anywhere. Maybe return a warning in the file?
+    return details, BytesIO()
+
+
+def get_all_body_hashes(capture_uuid: str, /) -> dict[str, dict[str, URLNode | int]]:
+    ct = lookyloo.get_crawled_tree(capture_uuid)
+    to_return: dict[str, dict[str, URLNode | int]] = defaultdict()
+    for node in ct.root_hartree.url_tree.traverse():
+        if node.empty_response or node.body_hash in to_return:
+            # If we have the same hash more than once, skip
+            continue
+        total_captures, details = get_indexing(flask_login.current_user).get_body_hash_captures(node.body_hash, limit=-1)
+        # Note for future: mayeb get url, capture title, something better than just the hash to show to the user
+        to_return[node.body_hash] = {'node': node, 'total_captures': total_captures}
+    return to_return
+
+
+def get_latest_url_capture(url: str, /) -> CaptureCache | None:
+    '''Get the most recent capture with this URL'''
+    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_url(url))
+    if captures:
+        return captures[0]
+    return None
+
+
+def get_url_occurrences(url: str, /, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
+    '''Get the most recent captures and URL nodes where the URL has been seen.'''
+    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_url(url), cached_captures_only=cached_captures_only)
+
+    to_return: list[dict[str, Any]] = []
+    for capture in captures[:limit]:
+        ct = lookyloo.get_crawled_tree(capture.uuid)
+        to_append: dict[str, str | dict[str, Any]] = {'capture_uuid': capture.uuid,
+                                                      'start_timestamp': capture.timestamp.isoformat(),
+                                                      'title': capture.title}
+        urlnodes: dict[str, dict[str, str]] = {}
+        for urlnode in ct.root_hartree.url_tree.search_nodes(name=url):
+            urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
+                                      'hostnode_uuid': urlnode.hostnode_uuid}
+            if hasattr(urlnode, 'body_hash'):
+                urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
+        to_append['urlnodes'] = urlnodes
+        to_return.append(to_append)
+    return to_return
+
+
+def get_hostname_occurrences(hostname: str, /, with_urls_occurrences: bool=False, limit: int=20, cached_captures_only: bool=True) -> list[dict[str, Any]]:
+    '''Get the most recent captures and URL nodes where the hostname has been seen.'''
+    captures = lookyloo.sorted_capture_cache(get_indexing(flask_login.current_user).get_captures_hostname(hostname), cached_captures_only=cached_captures_only)
+
+    to_return: list[dict[str, Any]] = []
+    for capture in captures[:limit]:
+        ct = lookyloo.get_crawled_tree(capture.uuid)
+        to_append: dict[str, str | list[Any] | dict[str, Any]] = {
+            'capture_uuid': capture.uuid,
+            'start_timestamp': capture.timestamp.isoformat(),
+            'title': capture.title}
+        hostnodes: list[str] = []
+        if with_urls_occurrences:
+            urlnodes: dict[str, dict[str, str]] = {}
+        for hostnode in ct.root_hartree.hostname_tree.search_nodes(name=hostname):
+            hostnodes.append(hostnode.uuid)
+            if with_urls_occurrences:
+                for urlnode in hostnode.urls:
+                    urlnodes[urlnode.uuid] = {'start_time': urlnode.start_time.isoformat(),
+                                              'url': urlnode.name,
+                                              'hostnode_uuid': urlnode.hostnode_uuid}
+                    if hasattr(urlnode, 'body_hash'):
+                        urlnodes[urlnode.uuid]['hash'] = urlnode.body_hash
+            to_append['hostnodes'] = hostnodes
+            if with_urls_occurrences:
+                to_append['urlnodes'] = urlnodes
+            to_return.append(to_append)
+    return to_return
+
+
+def get_cookie_name_investigator(cookie_name: str, /) -> tuple[list[tuple[str, str]], list[tuple[str, float, list[tuple[str, float]]]]]:
+    '''Returns all the captures related to a cookie name entry, used in the web interface.'''
+    cached_captures = lookyloo.sorted_capture_cache([entry[0] for entry in get_indexing(flask_login.current_user).get_cookies_names_captures(cookie_name)])
+    captures = [(cache.uuid, cache.title) for cache in cached_captures]
+    domains = [(domain, freq, get_indexing(flask_login.current_user).cookies_names_domains_values(cookie_name, domain))
+               for domain, freq in get_indexing(flask_login.current_user).get_cookie_domains(cookie_name)]
+    return captures, domains
+
+
+def get_favicon_investigator(favicon_sha512: str,
+                             /,
+                             get_probabilistic: bool=False) -> tuple[list[tuple[str, str, str, datetime]],
+                                                                     tuple[str, str, str],
+                                                                     dict[str, dict[str, dict[str, tuple[str, str]]]]]:
+    '''Returns all the captures related to a cookie name entry, used in the web interface.'''
+    cached_captures = lookyloo.sorted_capture_cache([uuid for uuid in get_indexing(flask_login.current_user).get_captures_favicon(favicon_sha512)])
+    captures = [(cache.uuid, cache.title, cache.redirects[-1], cache.timestamp) for cache in cached_captures]
+    favicon = get_indexing(flask_login.current_user).get_favicon(favicon_sha512)
+    if favicon:
+        mimetype = from_string(favicon, mime=True)
+        b64_favicon = base64.b64encode(favicon).decode()
+        mmh3_shodan = lookyloo.compute_mmh3_shodan(favicon)
+    else:
+        mimetype = ''
+        b64_favicon = ''
+        mmh3_shodan = ''
+
+    # For now, there is only one probabilistic hash algo for favicons, keeping it simple
+    probabilistic_hash_algos = ['mmh3-shodan']
+    probabilistic_favicons: dict[str, dict[str, dict[str, tuple[str, str]]]] = {}
+    if get_probabilistic:
+        for algo in probabilistic_hash_algos:
+            probabilistic_favicons[algo] = {}
+            for mm3hash in get_indexing(flask_login.current_user).get_probabilistic_hashes_favicon(algo, favicon_sha512):
+                probabilistic_favicons[algo][mm3hash] = {}
+                for sha512 in get_indexing(flask_login.current_user).get_hashes_favicon_probablistic(algo, mm3hash):
+                    if sha512 == favicon_sha512:
+                        # Skip entry if it is the same as the favicon we are investigating
+                        continue
+                    favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
+                    if favicon:
+                        mimetype = from_string(favicon, mime=True)
+                        b64_favicon = base64.b64encode(favicon).decode()
+                        probabilistic_favicons[algo][mm3hash][sha512] = (mimetype, b64_favicon)
+                if not probabilistic_favicons[algo][mm3hash]:
+                    # remove entry if it has no favicon
+                    probabilistic_favicons[algo].pop(mm3hash)
+            if not probabilistic_favicons[algo]:
+                # remove entry if it has no hash
+                probabilistic_favicons.pop(algo)
+    return captures, (mimetype, b64_favicon, mmh3_shodan), probabilistic_favicons
+
+
+def get_hhh_investigator(hhh: str, /) -> tuple[list[tuple[str, str, str, str]], list[tuple[str, str]]]:
+    '''Returns all the captures related to a cookie name entry, used in the web interface.'''
+    all_captures = dict(get_indexing(flask_login.current_user).get_http_headers_hashes_captures(hhh))
+    if cached_captures := lookyloo.sorted_capture_cache([entry for entry in all_captures]):
+        captures = []
+        for cache in cached_captures:
+            try:
+                urlnode = lookyloo.get_urlnode_from_tree(cache.uuid, all_captures[cache.uuid])
+            except Exception:
+                # NOTE: print a logline
+                # logger.warning(f'Cache for {cache.uuid} needs a rebuild: {e}.')
+                lookyloo._captures_index.remove_pickle(cache.uuid)
+                continue
+            captures.append((cache.uuid, urlnode.hostnode_uuid, urlnode.name, cache.title))
+        # get the headers and format them as they were in the response
+        urlnode = lookyloo.get_urlnode_from_tree(cached_captures[0].uuid, all_captures[cached_captures[0].uuid])
+        headers = [(header["name"], header["value"]) for header in urlnode.response['headers']]
+        return captures, headers
+    return [], []
+
+
+def hash_lookup(blob_hash: str, url: str, capture_uuid: str) -> tuple[int, dict[str, list[tuple[str, str, str, str, str]]]]:
+    '''Search all the captures a specific hash was seen.
+    If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
+    Capture UUID avoids duplicates on the same capture'''
+    captures_list: dict[str, list[tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
+    total_captures, details = get_indexing(flask_login.current_user).get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
+                                                                                            prefered_uuids=set(lookyloo._captures_index.keys()))
+    for h_capture_uuid, url_uuid, url_hostname, same_url, url in details:
+        cache = lookyloo.capture_cache(h_capture_uuid)
+        if cache and hasattr(cache, 'title'):
+            if same_url:
+                captures_list['same_url'].append((h_capture_uuid, url_uuid, cache.title, cache.timestamp.isoformat(), url_hostname))
+            else:
+                captures_list['different_url'].append((h_capture_uuid, url_uuid, cache.title, cache.timestamp.isoformat(), url_hostname))
+    # Sort by timestamp by default
+    captures_list['same_url'].sort(key=lambda y: y[3])
+    captures_list['different_url'].sort(key=lambda y: y[3])
+    return total_captures, captures_list
+
+
+def get_hostnode_investigator(capture_uuid: str, /, node_uuid: str) -> tuple[HostNode, list[dict[str, Any]]]:
+    '''Gather all the informations needed to display the Hostnode investigator popup.'''
+
+    def normalize_known_content(h: str, /, known_content: dict[str, Any], url: URLNode) -> tuple[str | list[Any] | None, tuple[bool, Any] | None]:
+        ''' There are a few different sources to figure out known vs. legitimate content,
+        this method normalize it for the web interface.'''
+        known: str | list[Any] | None = None
+        legitimate: tuple[bool, Any] | None = None
+        if h not in known_content:
+            return known, legitimate
+
+        if known_content[h]['type'] in ['generic', 'sanejs']:
+            known = known_content[h]['details']
+        elif known_content[h]['type'] == 'legitimate_on_domain':
+            legit = False
+            if url.hostname in known_content[h]['details']:
+                legit = True
+            legitimate = (legit, known_content[h]['details'])
+        elif known_content[h]['type'] == 'malicious':
+            legitimate = (False, known_content[h]['details'])
+
+        return known, legitimate
+
+    ct = lookyloo.get_crawled_tree(capture_uuid)
+    hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
+
+    known_content = lookyloo.context.find_known_content(hostnode)
+    lookyloo.uwhois.query_whois_hostnode(hostnode)
+
+    urls: list[dict[str, Any]] = []
+    for url in hostnode.urls:
+        # For the popup, we need:
+        # * https vs http
+        # * everything after the domain
+        # * the full URL
+        to_append: dict[str, Any] = {
+            'encrypted': url.name.startswith('https'),
+            'url_path': url.name.split('/', 3)[-1],
+            'url_object': url,
+        }
+
+        if not url.empty_response:
+            # Index lookup
+            # %%% Full body %%%
+            freq = get_indexing(flask_login.current_user).body_hash_fequency(url.body_hash)
+            to_append['body_hash_details'] = freq
+            if freq and 'hash_freq' in freq and freq['hash_freq'] and freq['hash_freq'] > 1:
+                to_append['body_hash_details']['other_captures'] = hash_lookup(url.body_hash, url.name, capture_uuid)
+
+            # %%% Embedded ressources %%%
+            if hasattr(url, 'embedded_ressources') and url.embedded_ressources:
+                to_append['embedded_ressources'] = {}
+                for mimetype, blobs in url.embedded_ressources.items():
+                    for h, blob in blobs:
+                        if h in to_append['embedded_ressources']:
+                            # Skip duplicates
+                            continue
+                        freq_embedded = get_indexing(flask_login.current_user).body_hash_fequency(h)
+                        to_append['embedded_ressources'][h] = freq_embedded
+                        to_append['embedded_ressources'][h]['body_size'] = blob.getbuffer().nbytes
+                        to_append['embedded_ressources'][h]['type'] = mimetype
+                        if freq_embedded['hash_freq'] > 1:
+                            to_append['embedded_ressources'][h]['other_captures'] = hash_lookup(h, url.name, capture_uuid)
+                for h in to_append['embedded_ressources'].keys():
+                    known, legitimate = normalize_known_content(h, known_content, url)
+                    if known:
+                        to_append['embedded_ressources'][h]['known_content'] = known
+                    elif legitimate:
+                        to_append['embedded_ressources'][h]['legitimacy'] = legitimate
+
+            known, legitimate = normalize_known_content(url.body_hash, known_content, url)
+            if known:
+                to_append['known_content'] = known
+            elif legitimate:
+                to_append['legitimacy'] = legitimate
+
+        # Optional: Cookies sent to server in request -> map to nodes who set the cookie in response
+        if hasattr(url, 'cookies_sent'):
+            to_display_sent: dict[str, set[Iterable[str | None]]] = defaultdict(set)
+            for cookie, contexts in url.cookies_sent.items():
+                if not contexts:
+                    # Locally created?
+                    to_display_sent[cookie].add(('Unknown origin', ))
+                    continue
+                for context in contexts:
+                    to_display_sent[cookie].add((context['setter'].hostname, context['setter'].hostnode_uuid))
+            to_append['cookies_sent'] = to_display_sent
+
+        # Optional: Cookies received from server in response -> map to nodes who send the cookie in request
+        if hasattr(url, 'cookies_received'):
+            to_display_received: dict[str, dict[str, set[Iterable[str | None]]]] = {'3rd_party': defaultdict(set), 'sent': defaultdict(set), 'not_sent': defaultdict(set)}
+            for domain, c_received, is_3rd_party in url.cookies_received:
+                if c_received not in ct.root_hartree.cookies_sent:
+                    # This cookie is never sent.
+                    if is_3rd_party:
+                        to_display_received['3rd_party'][c_received].add((domain, ))
+                    else:
+                        to_display_received['not_sent'][c_received].add((domain, ))
+                    continue
+
+                for url_node in ct.root_hartree.cookies_sent[c_received]:
+                    if is_3rd_party:
+                        to_display_received['3rd_party'][c_received].add((url_node.hostname, url_node.hostnode_uuid))
+                    else:
+                        to_display_received['sent'][c_received].add((url_node.hostname, url_node.hostnode_uuid))
+            to_append['cookies_received'] = to_display_received
+
+        urls.append(to_append)
+    return hostnode, urls
+
+
 # ##### Hostnode level methods #####
 
 @app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>/hashes', methods=['GET'])
@@ -283,7 +633,7 @@ def urls_hostnode(tree_uuid: str, node_uuid: str) -> Response:
 @app.route('/tree/<string:tree_uuid>/host/<string:node_uuid>', methods=['GET'])
 def hostnode_popup(tree_uuid: str, node_uuid: str) -> str | WerkzeugResponse | Response:
     try:
-        hostnode, urls = lookyloo.get_hostnode_investigator(tree_uuid, node_uuid)
+        hostnode, urls = get_hostnode_investigator(tree_uuid, node_uuid)
     except IndexError:
         return render_template('error.html', error_message='Sorry, this one is on us. The tree was rebuild, please reload the tree and try again.')
 
@@ -850,8 +1200,8 @@ def tree_favicons(tree_uuid: str) -> str:
                 continue
             mimetype = from_string(favicon, mime=True)
             favicon_sha512 = hashlib.sha512(favicon).hexdigest()
-            frequency = lookyloo.indexing.favicon_frequency(favicon_sha512)
-            number_captures = lookyloo.indexing.favicon_number_captures(favicon_sha512)
+            frequency = get_indexing(flask_login.current_user).favicon_frequency(favicon_sha512)
+            number_captures = get_indexing(flask_login.current_user).favicon_number_captures(favicon_sha512)
             b64_favicon = base64.b64encode(favicon).decode()
             mmh3_shodan = lookyloo.compute_mmh3_shodan(favicon)
             favicons.append((favicon_sha512, frequency, number_captures, mimetype, b64_favicon, mmh3_shodan))
@@ -860,7 +1210,7 @@ def tree_favicons(tree_uuid: str) -> str:
 
 @app.route('/tree/<string:tree_uuid>/body_hashes', methods=['GET'])
 def tree_body_hashes(tree_uuid: str) -> str:
-    body_hashes = lookyloo.get_all_body_hashes(tree_uuid)
+    body_hashes = get_all_body_hashes(tree_uuid)
     return render_template('tree_body_hashes.html', tree_uuid=tree_uuid, body_hashes=body_hashes)
 
 
@@ -958,27 +1308,27 @@ def index_hidden() -> str:
 
 @app.route('/cookies', methods=['GET'])
 def cookies_lookup() -> str:
-    cookies_names = [(name, freq, lookyloo.indexing.cookies_names_number_domains(name))
-                     for name, freq in lookyloo.indexing.cookies_names]
+    cookies_names = [(name, freq, get_indexing(flask_login.current_user).cookies_names_number_domains(name))
+                     for name, freq in get_indexing(flask_login.current_user).cookies_names]
     return render_template('cookies.html', cookies_names=cookies_names)
 
 
 @app.route('/hhhashes', methods=['GET'])
 def hhhashes_lookup() -> str:
-    hhhashes = [(hhh, freq, lookyloo.indexing.http_headers_hashes_number_captures(hhh))
-                for hhh, freq in lookyloo.indexing.http_headers_hashes]
+    hhhashes = [(hhh, freq, get_indexing(flask_login.current_user).http_headers_hashes_number_captures(hhh))
+                for hhh, freq in get_indexing(flask_login.current_user).http_headers_hashes]
     return render_template('hhhashes.html', hhhashes=hhhashes)
 
 
 @app.route('/favicons', methods=['GET'])
 def favicons_lookup() -> str:
     favicons = []
-    for sha512, freq in lookyloo.indexing.favicons:
-        favicon = lookyloo.indexing.get_favicon(sha512)
+    for sha512, freq in get_indexing(flask_login.current_user).favicons:
+        favicon = get_indexing(flask_login.current_user).get_favicon(sha512)
         if not favicon:
             continue
         favicon_b64 = base64.b64encode(favicon).decode()
-        nb_captures = lookyloo.indexing.favicon_number_captures(sha512)
+        nb_captures = get_indexing(flask_login.current_user).favicon_number_captures(sha512)
         favicons.append((sha512, freq, nb_captures, favicon_b64))
     return render_template('favicons.html', favicons=favicons)
 
@@ -986,10 +1336,10 @@ def favicons_lookup() -> str:
 @app.route('/ressources', methods=['GET'])
 def ressources() -> str:
     ressources = []
-    for h, freq in lookyloo.indexing.ressources:
-        domain_freq = lookyloo.indexing.ressources_number_domains(h)
+    for h, freq in get_indexing(flask_login.current_user).ressources:
+        domain_freq = get_indexing(flask_login.current_user).ressources_number_domains(h)
         context = lookyloo.context.find_known_content(h)
-        capture_uuid, url_uuid, hostnode_uuid = lookyloo.indexing.get_hash_uuids(h)
+        capture_uuid, url_uuid, hostnode_uuid = get_indexing(flask_login.current_user).get_hash_uuids(h)
         try:
             ressource = lookyloo.get_ressource(capture_uuid, url_uuid, h)
         except MissingUUID:
@@ -1003,7 +1353,7 @@ def ressources() -> str:
 
 @app.route('/categories', methods=['GET'])
 def categories() -> str:
-    return render_template('categories.html', categories=lookyloo.indexing.categories)
+    return render_template('categories.html', categories=get_indexing(flask_login.current_user).categories)
 
 
 @app.route('/rebuild_all')
@@ -1057,7 +1407,7 @@ def recapture(tree_uuid: str) -> str | Response | WerkzeugResponse:
 @app.route('/ressource_by_hash/<string:sha512>', methods=['GET'])
 @file_response  # type: ignore[misc]
 def ressource_by_hash(sha512: str) -> Response:
-    details, body = lookyloo.get_body_hash_full(sha512)
+    details, body = get_body_hash_full(sha512)
     return send_file(body, as_attachment=True, download_name='ressource.bin')
 
 
@@ -1245,13 +1595,13 @@ def capture_web() -> str | Response | WerkzeugResponse:
 
 @app.route('/cookies/<string:cookie_name>', methods=['GET'])
 def cookies_name_detail(cookie_name: str) -> str:
-    captures, domains = lookyloo.get_cookie_name_investigator(cookie_name.strip())
+    captures, domains = get_cookie_name_investigator(cookie_name.strip())
     return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures)
 
 
 @app.route('/hhhdetails/<string:hhh>', methods=['GET'])
 def hhh_detail(hhh: str) -> str:
-    captures, headers = lookyloo.get_hhh_investigator(hhh.strip())
+    captures, headers = get_hhh_investigator(hhh.strip())
     return render_template('hhh_details.html', hhh=hhh, captures=captures, headers=headers)
 
 
@@ -1259,7 +1609,7 @@ def hhh_detail(hhh: str) -> str:
 @app.route('/favicon_details/<string:favicon_sha512>/<int:get_probabilistic>', methods=['GET'])
 def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str:
     _get_prob = bool(get_probabilistic)
-    captures, favicon, probabilistic_favicons = lookyloo.get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
+    captures, favicon, probabilistic_favicons = get_favicon_investigator(favicon_sha512.strip(), get_probabilistic=_get_prob)
     mimetype, b64_favicon, mmh3_shodan = favicon
     return render_template('favicon_details.html', favicon_sha512=favicon_sha512,
                            captures=captures, mimetype=mimetype, b64_favicon=b64_favicon, mmh3_shodan=mmh3_shodan,
@@ -1269,20 +1619,20 @@ def favicon_detail(favicon_sha512: str, get_probabilistic: int=0) -> str:
 @app.route('/body_hashes/<string:body_hash>', methods=['GET'])
 def body_hash_details(body_hash: str) -> str:
     from_popup = True if (request.args.get('from_popup') and request.args.get('from_popup') == 'True') else False
-    captures, domains = lookyloo.get_body_hash_investigator(body_hash.strip())
+    captures, domains = _get_body_hash_investigator(body_hash.strip())
     return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures, from_popup=from_popup)
 
 
 @app.route('/urls/<string:url>', methods=['GET'])
 def url_details(url: str) -> str:
     url = unquote_plus(url).strip()
-    hits = lookyloo.get_url_occurrences(url, limit=50)
+    hits = get_url_occurrences(url, limit=50)
     return render_template('url.html', url=url, hits=hits)
 
 
 @app.route('/hostnames/<string:hostname>', methods=['GET'])
 def hostname_details(hostname: str) -> str:
-    hits = lookyloo.get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50)
+    hits = get_hostname_occurrences(hostname.strip(), with_urls_occurrences=True, limit=50)
     return render_template('hostname.html', hostname=hostname, hits=hits)
 
 
diff --git a/website/web/genericapi.py b/website/web/genericapi.py
index bc29479..e7a5deb 100644
--- a/website/web/genericapi.py
+++ b/website/web/genericapi.py
@@ -287,7 +287,9 @@ class TriggerModules(Resource):  # type: ignore[misc]
          params={'h': 'The hash (sha512)'})
 class HashInfo(Resource):  # type: ignore[misc]
     def get(self, h: str) -> dict[str, Any] | tuple[dict[str, Any], int]:
-        details, body = lookyloo.get_body_hash_full(h)
+        from . import get_body_hash_full
+
+        details, body = get_body_hash_full(h)
         if not details:
             return {'error': 'Unknown Hash.'}, 400
         to_return: dict[str, Any] = {'response': {'hash': h, 'details': details,
@@ -308,8 +310,9 @@ class URLInfo(Resource):  # type: ignore[misc]
 
     @api.doc(body=url_info_fields)  # type: ignore[misc]
     def post(self) -> list[dict[str, Any]]:
+        from . import get_url_occurrences
         to_query: dict[str, Any] = request.get_json(force=True)
-        occurrences = lookyloo.get_url_occurrences(to_query.pop('url'), **to_query)
+        occurrences = get_url_occurrences(to_query.pop('url'), **to_query)
         return occurrences
 
 
@@ -326,8 +329,9 @@ class HostnameInfo(Resource):  # type: ignore[misc]
 
     @api.doc(body=hostname_info_fields)  # type: ignore[misc]
     def post(self) -> list[dict[str, Any]]:
+        from . import get_hostname_occurrences
         to_query: dict[str, Any] = request.get_json(force=True)
-        return lookyloo.get_hostname_occurrences(to_query.pop('hostname'), **to_query)
+        return get_hostname_occurrences(to_query.pop('hostname'), **to_query)
 
 
 @api.route('/json/stats')