From cce300034ffd4bd6f247e8f937deec0caa3ce74b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 3 Dec 2024 00:10:16 +0100 Subject: [PATCH] chg: Support new format on TLD index --- lookyloo/indexing.py | 10 ++++++---- website/web/genericapi.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index 3fbef23c..baa8bc5d 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -619,16 +619,18 @@ class Indexing(): self.logger.debug(f'done with TLDs for {crawled_tree.uuid}.') def get_captures_tld(self, tld: str, most_recent_capture: datetime | None = None, - oldest_capture: datetime | None= None) -> list[tuple[str, float]]: + oldest_capture: datetime | None=None, + offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]: """Get all the captures for a specific TLD, on a time interval starting from the most recent one. :param tld: The TLD :param most_recent_capture: The capture time of the most recent capture to consider - :param oldest_capture: The capture time of the oldest capture to consider, defaults to 5 days ago. + :param oldest_capture: The capture time of the oldest capture to consider. """ max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf' - min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=5)).timestamp() - return self.redis.zrevrangebyscore(f'tlds|{tld}|captures', max_score, min_score, withscores=True) + min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf' + total = self.redis.zcard(f'tlds|{tld}|captures') + return total, self.redis.zrevrangebyscore(f'tlds|{tld}|captures', max_score, min_score, withscores=True, start=offset, num=limit) def get_capture_tld_counter(self, capture_uuid: str, tld: str) -> int: # NOTE: what to do when the capture isn't indexed yet? Raise an exception? diff --git a/website/web/genericapi.py b/website/web/genericapi.py index ab887164..c7e83230 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -873,7 +873,7 @@ class TLDCaptures(Resource): # type: ignore[misc] except Exception: oldest_capture = None - recent_captures_with_tld = get_indexing(flask_login.current_user).get_captures_tld(tld, most_recent_capture, oldest_capture) + _, recent_captures_with_tld = get_indexing(flask_login.current_user).get_captures_tld(tld, most_recent_capture, oldest_capture) if not recent_captures_with_tld: return [] if not urls_only: