mirror of https://github.com/CIRCL/lookyloo
chg: Support new format on TLD index
parent
73d90070c6
commit
cce300034f
|
@ -619,16 +619,18 @@ class Indexing():
|
||||||
self.logger.debug(f'done with TLDs for {crawled_tree.uuid}.')
|
self.logger.debug(f'done with TLDs for {crawled_tree.uuid}.')
|
||||||
|
|
||||||
def get_captures_tld(self, tld: str, most_recent_capture: datetime | None = None,
|
def get_captures_tld(self, tld: str, most_recent_capture: datetime | None = None,
|
||||||
oldest_capture: datetime | None= None) -> list[tuple[str, float]]:
|
oldest_capture: datetime | None=None,
|
||||||
|
offset: int | None=None, limit: int | None=None) -> tuple[int, list[tuple[str, float]]]:
|
||||||
"""Get all the captures for a specific TLD, on a time interval starting from the most recent one.
|
"""Get all the captures for a specific TLD, on a time interval starting from the most recent one.
|
||||||
|
|
||||||
:param tld: The TLD
|
:param tld: The TLD
|
||||||
:param most_recent_capture: The capture time of the most recent capture to consider
|
:param most_recent_capture: The capture time of the most recent capture to consider
|
||||||
:param oldest_capture: The capture time of the oldest capture to consider, defaults to 5 days ago.
|
:param oldest_capture: The capture time of the oldest capture to consider.
|
||||||
"""
|
"""
|
||||||
max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf'
|
max_score: str | float = most_recent_capture.timestamp() if most_recent_capture else '+Inf'
|
||||||
min_score: str | float = oldest_capture.timestamp() if oldest_capture else (datetime.now() - timedelta(days=5)).timestamp()
|
min_score: str | float = oldest_capture.timestamp() if oldest_capture else '-Inf'
|
||||||
return self.redis.zrevrangebyscore(f'tlds|{tld}|captures', max_score, min_score, withscores=True)
|
total = self.redis.zcard(f'tlds|{tld}|captures')
|
||||||
|
return total, self.redis.zrevrangebyscore(f'tlds|{tld}|captures', max_score, min_score, withscores=True, start=offset, num=limit)
|
||||||
|
|
||||||
def get_capture_tld_counter(self, capture_uuid: str, tld: str) -> int:
|
def get_capture_tld_counter(self, capture_uuid: str, tld: str) -> int:
|
||||||
# NOTE: what to do when the capture isn't indexed yet? Raise an exception?
|
# NOTE: what to do when the capture isn't indexed yet? Raise an exception?
|
||||||
|
|
|
@ -873,7 +873,7 @@ class TLDCaptures(Resource): # type: ignore[misc]
|
||||||
except Exception:
|
except Exception:
|
||||||
oldest_capture = None
|
oldest_capture = None
|
||||||
|
|
||||||
recent_captures_with_tld = get_indexing(flask_login.current_user).get_captures_tld(tld, most_recent_capture, oldest_capture)
|
_, recent_captures_with_tld = get_indexing(flask_login.current_user).get_captures_tld(tld, most_recent_capture, oldest_capture)
|
||||||
if not recent_captures_with_tld:
|
if not recent_captures_with_tld:
|
||||||
return []
|
return []
|
||||||
if not urls_only:
|
if not urls_only:
|
||||||
|
|
Loading…
Reference in New Issue