fix: Major improvement in index display when you break the cache...

pull/559/head
Raphaël Vinot 2022-12-07 21:05:27 +01:00
parent 8e94f7e12a
commit 8083c8cabc
2 changed files with 24 additions and 11 deletions

View File

@ -7,7 +7,7 @@ import operator
import smtplib import smtplib
from collections import defaultdict from collections import defaultdict
from datetime import date, datetime from datetime import date, datetime, timezone
from email.message import EmailMessage from email.message import EmailMessage
from functools import cached_property from functools import cached_property
from io import BytesIO from io import BytesIO
@ -380,13 +380,22 @@ class Lookyloo():
def update_tree_cache_info(self, process_id: int, classname: str) -> None: def update_tree_cache_info(self, process_id: int, classname: str) -> None:
self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status())) self.redis.hset('tree_cache', f'{process_id}|{classname}', str(self._captures_index.lru_cache_status()))
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True) -> List[CaptureCache]: def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]:
'''Get all the captures in the cache, sorted by timestamp (new -> old). '''Get all the captures in the cache, sorted by timestamp (new -> old).
By default, this method will only return the captures that are currently cached.''' By default, this method will only return the captures that are currently cached.'''
if capture_uuids is None: if capture_uuids is None:
# Call from the index, we want all the recent captures all_captures = self.redis.hgetall('lookup_dirs')
capture_uuids = self.redis.hkeys('lookup_dirs') if index_cut_time is None:
capture_uuids = list(all_captures.keys())
else:
capture_uuids = []
for uuid, directory in self.redis.hgetall('lookup_dirs').items():
date_str = directory.rsplit('/', 1)[1]
if datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc) < index_cut_time:
continue
capture_uuids.append(uuid)
cached_captures_only = False cached_captures_only = False
if not capture_uuids: if not capture_uuids:
# No captures at all on the instance # No captures at all on the instance
return [] return []
@ -424,12 +433,16 @@ class Lookyloo():
return CaptureStatusCore.ONGOING return CaptureStatusCore.ONGOING
return lacus_status return lacus_status
def capture_cache(self, capture_uuid: str, /) -> Optional[CaptureCache]: def capture_cache(self, capture_uuid: str, /, *, force_update: bool = False) -> Optional[CaptureCache]:
"""Get the cache from redis, rebuild the tree if the internal UUID changed => slow""" """Get the cache from redis, rebuild the tree if the internal UUID changed => slow"""
try: try:
cache = self._captures_index[capture_uuid] cache = self._captures_index[capture_uuid]
# 2022-12-07: New cache format, store the user agent and referers. Re-cache if needed if cache and force_update:
if cache and not cache.user_agent and not cache.error: needs_update = False
if not cache.user_agent and not cache.error:
# 2022-12-07: New cache format, store the user agent and referers.
needs_update = True
if needs_update:
self._captures_index.reload_cache(capture_uuid) self._captures_index.reload_cache(capture_uuid)
cache = self._captures_index[capture_uuid] cache = self._captures_index[capture_uuid]
return cache return cache

View File

@ -644,7 +644,7 @@ def tree(tree_uuid: str, node_uuid: Optional[str]=None):
if tree_uuid == 'False': if tree_uuid == 'False':
flash("Unable to process your request.", 'warning') flash("Unable to process your request.", 'warning')
return redirect(url_for('index')) return redirect(url_for('index'))
cache = lookyloo.capture_cache(tree_uuid) cache = lookyloo.capture_cache(tree_uuid, force_update=True)
if not cache: if not cache:
status = lookyloo.get_capture_status(tree_uuid) status = lookyloo.get_capture_status(tree_uuid)
if status == CaptureStatus.UNKNOWN: if status == CaptureStatus.UNKNOWN:
@ -737,7 +737,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
# We want to filter the captures on the index # We want to filter the captures on the index
cut_time = (datetime.now() - timedelta(**time_delta_on_index)).replace(tzinfo=timezone.utc) cut_time = (datetime.now() - timedelta(**time_delta_on_index)).replace(tzinfo=timezone.utc)
for cached in lookyloo.sorted_capture_cache(): for cached in lookyloo.sorted_capture_cache(index_cut_time=cut_time):
if cut_time and cached.timestamp < cut_time: if cut_time and cached.timestamp < cut_time:
continue continue