fix: Always skip archived captures from sorted capture cache

pull/836/head
Raphaël Vinot 2023-11-22 13:53:14 +01:00
parent e8c9dcb3b1
commit 175e7294ea
3 changed files with 20 additions and 16 deletions

View File

@ -11,7 +11,7 @@ import ssl
import time
from collections import defaultdict
from datetime import date, datetime, timezone
from datetime import date, datetime, timezone, timedelta
from email.message import EmailMessage
from functools import cached_property
from io import BytesIO
@ -452,17 +452,21 @@ class Lookyloo():
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]:
'''Get all the captures in the cache, sorted by timestamp (new -> old).
By default, this method will only return the captures that are currently cached.'''
# Make sure we do not try to load archived captures that would still be in 'lookup_dirs'
archive_interval = timedelta(days=get_config('generic', 'archive') + 1)
cut_time = (datetime.now() - archive_interval)
if index_cut_time:
if index_cut_time < cut_time:
index_cut_time = cut_time
else:
index_cut_time = cut_time
if capture_uuids is None:
all_captures = {k: v for k, v in sorted(self.redis.hgetall('lookup_dirs').items(), key=lambda item: item[1], reverse=True)}
if index_cut_time is None:
capture_uuids = list(all_captures.keys())
else:
capture_uuids = []
for uuid, directory in self.redis.hgetall('lookup_dirs').items():
date_str = directory.rsplit('/', 1)[1]
if datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc) < index_cut_time:
continue
capture_uuids.append(uuid)
capture_uuids = []
for uuid, directory in sorted(self.redis.hgetall('lookup_dirs').items(), key=lambda item: item[1], reverse=True):
date_str = directory.rsplit('/', 1)[1]
if datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc) < index_cut_time:
continue
capture_uuids.append(uuid)
# NOTE: we absolutely have to respect the cached_captures_only setting and
# never overwrite it. This method is called to display the index
# and if we try to display everything, including the non-cached entries,

8
poetry.lock generated
View File

@ -2052,13 +2052,13 @@ files = [
[[package]]
name = "publicsuffixlist"
version = "0.10.0.20231121"
version = "0.10.0.20231122"
description = "publicsuffixlist implement"
optional = false
python-versions = ">=2.6"
files = [
{file = "publicsuffixlist-0.10.0.20231121-py2.py3-none-any.whl", hash = "sha256:f2b8c30c25e9dddd6ef56c270783df597f64c54e6d24e37882608e7683211b9a"},
{file = "publicsuffixlist-0.10.0.20231121.tar.gz", hash = "sha256:e89735db141c8728d77c8d24bef0820463cda62beccf9d62ce04bfe3526b5674"},
{file = "publicsuffixlist-0.10.0.20231122-py2.py3-none-any.whl", hash = "sha256:ce47acf7fb7e817fba8397b43896fcb4b4307d41e2c9d2f63418b3e65c7f15cb"},
{file = "publicsuffixlist-0.10.0.20231122.tar.gz", hash = "sha256:d02ac754fa104d2dc8f593ef7ffe305905f8bd89f9bc079653135a9e39db17ad"},
]
[package.extras]
@ -3287,4 +3287,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata]
lock-version = "2.0"
python-versions = ">=3.8,<3.12"
content-hash = "052d4c8853de1b1ff82c4466646bf8f2ea996361ea7eae14c605d384f860be22"
content-hash = "a2ffd3682c37661f34935c88f601553627e703e331885e7f7cb6681b55b72409"

View File

@ -68,7 +68,7 @@ pypandora = "^1.6.1"
lacuscore = "^1.7.3"
pylacus = "^1.7.1"
pyipasnhistory = "^2.1.2"
publicsuffixlist = "^0.10.0.20231121"
publicsuffixlist = "^0.10.0.20231122"
pyfaup = "^1.2"
chardet = "^5.2.0"
pysecuritytxt = "^1.2.0"