fix: Always skip archived captures from sorted capture cache

pull/836/head
Raphaël Vinot 2023-11-22 13:53:14 +01:00
parent e8c9dcb3b1
commit 175e7294ea
3 changed files with 20 additions and 16 deletions

View File

@ -11,7 +11,7 @@ import ssl
import time import time
from collections import defaultdict from collections import defaultdict
from datetime import date, datetime, timezone from datetime import date, datetime, timezone, timedelta
from email.message import EmailMessage from email.message import EmailMessage
from functools import cached_property from functools import cached_property
from io import BytesIO from io import BytesIO
@ -452,17 +452,21 @@ class Lookyloo():
def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]: def sorted_capture_cache(self, capture_uuids: Optional[Iterable[str]]=None, cached_captures_only: bool=True, index_cut_time: Optional[datetime]=None) -> List[CaptureCache]:
'''Get all the captures in the cache, sorted by timestamp (new -> old). '''Get all the captures in the cache, sorted by timestamp (new -> old).
By default, this method will only return the captures that are currently cached.''' By default, this method will only return the captures that are currently cached.'''
# Make sure we do not try to load archived captures that would still be in 'lookup_dirs'
archive_interval = timedelta(days=get_config('generic', 'archive') + 1)
cut_time = (datetime.now() - archive_interval)
if index_cut_time:
if index_cut_time < cut_time:
index_cut_time = cut_time
else:
index_cut_time = cut_time
if capture_uuids is None: if capture_uuids is None:
all_captures = {k: v for k, v in sorted(self.redis.hgetall('lookup_dirs').items(), key=lambda item: item[1], reverse=True)} capture_uuids = []
if index_cut_time is None: for uuid, directory in sorted(self.redis.hgetall('lookup_dirs').items(), key=lambda item: item[1], reverse=True):
capture_uuids = list(all_captures.keys()) date_str = directory.rsplit('/', 1)[1]
else: if datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc) < index_cut_time:
capture_uuids = [] continue
for uuid, directory in self.redis.hgetall('lookup_dirs').items(): capture_uuids.append(uuid)
date_str = directory.rsplit('/', 1)[1]
if datetime.fromisoformat(date_str).replace(tzinfo=timezone.utc) < index_cut_time:
continue
capture_uuids.append(uuid)
# NOTE: we absolutely have to respect the cached_captures_only setting and # NOTE: we absolutely have to respect the cached_captures_only setting and
# never overwrite it. This method is called to display the index # never overwrite it. This method is called to display the index
# and if we try to display everything, including the non-cached entries, # and if we try to display everything, including the non-cached entries,

8
poetry.lock generated
View File

@ -2052,13 +2052,13 @@ files = [
[[package]] [[package]]
name = "publicsuffixlist" name = "publicsuffixlist"
version = "0.10.0.20231121" version = "0.10.0.20231122"
description = "publicsuffixlist implement" description = "publicsuffixlist implement"
optional = false optional = false
python-versions = ">=2.6" python-versions = ">=2.6"
files = [ files = [
{file = "publicsuffixlist-0.10.0.20231121-py2.py3-none-any.whl", hash = "sha256:f2b8c30c25e9dddd6ef56c270783df597f64c54e6d24e37882608e7683211b9a"}, {file = "publicsuffixlist-0.10.0.20231122-py2.py3-none-any.whl", hash = "sha256:ce47acf7fb7e817fba8397b43896fcb4b4307d41e2c9d2f63418b3e65c7f15cb"},
{file = "publicsuffixlist-0.10.0.20231121.tar.gz", hash = "sha256:e89735db141c8728d77c8d24bef0820463cda62beccf9d62ce04bfe3526b5674"}, {file = "publicsuffixlist-0.10.0.20231122.tar.gz", hash = "sha256:d02ac754fa104d2dc8f593ef7ffe305905f8bd89f9bc079653135a9e39db17ad"},
] ]
[package.extras] [package.extras]
@ -3287,4 +3287,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = ">=3.8,<3.12" python-versions = ">=3.8,<3.12"
content-hash = "052d4c8853de1b1ff82c4466646bf8f2ea996361ea7eae14c605d384f860be22" content-hash = "a2ffd3682c37661f34935c88f601553627e703e331885e7f7cb6681b55b72409"

View File

@ -68,7 +68,7 @@ pypandora = "^1.6.1"
lacuscore = "^1.7.3" lacuscore = "^1.7.3"
pylacus = "^1.7.1" pylacus = "^1.7.1"
pyipasnhistory = "^2.1.2" pyipasnhistory = "^2.1.2"
publicsuffixlist = "^0.10.0.20231121" publicsuffixlist = "^0.10.0.20231122"
pyfaup = "^1.2" pyfaup = "^1.2"
chardet = "^5.2.0" chardet = "^5.2.0"
pysecuritytxt = "^1.2.0" pysecuritytxt = "^1.2.0"