fix: [crawler] fix crawler queue stats

2024-09-17 15:36:15 +02:00 · 2024-09-17 15:36:15 +02:00 · a20b6054e8
parent cc7e67d5ed
commit a20b6054e8
2 changed files with 12 additions and 0 deletions
--- a/bin/crawlers/Crawler.py
+++ b/bin/crawlers/Crawler.py
@ -61,6 +61,8 @@ class Crawler(AbstractModule):
        crawlers.load_blacklist()
        # update captures cache
        crawlers.reload_crawler_captures()
        # update crawler queue stats
        crawlers.reload_crawlers_stats()
        self.crawler_scheduler = crawlers.CrawlerScheduler()
--- a/bin/lib/crawlers.py
+++ b/bin/lib/crawlers.py
@ -1018,6 +1018,16 @@ def get_crawlers_stats(domain_type=None):
        stats[domain_type] = {'queue': queue, 'up': up, 'down': down, 'crawled': crawled}
    return stats
 def reload_crawlers_stats():
    for domain_type in get_crawler_all_types():
        to_remove = []
        for task_uuid in r_crawler.smembers(f'crawler:queue:type:{domain_type}'):
            task = CrawlerTask(task_uuid)
            if not task.exists():
                to_remove.append(task_uuid)
        for task_uuid in to_remove:
            r_crawler.srem(f'crawler:queue:type:{domain_type}', task_uuid)
 #### Blocklist ####
 def get_blacklist():