mirror of https://github.com/CIRCL/AIL-framework
fix: [crawler] fix crawler queue stats
parent
cc7e67d5ed
commit
a20b6054e8
|
@ -61,6 +61,8 @@ class Crawler(AbstractModule):
|
|||
crawlers.load_blacklist()
|
||||
# update captures cache
|
||||
crawlers.reload_crawler_captures()
|
||||
# update crawler queue stats
|
||||
crawlers.reload_crawlers_stats()
|
||||
|
||||
self.crawler_scheduler = crawlers.CrawlerScheduler()
|
||||
|
||||
|
|
|
@ -1018,6 +1018,16 @@ def get_crawlers_stats(domain_type=None):
|
|||
stats[domain_type] = {'queue': queue, 'up': up, 'down': down, 'crawled': crawled}
|
||||
return stats
|
||||
|
||||
def reload_crawlers_stats():
|
||||
for domain_type in get_crawler_all_types():
|
||||
to_remove = []
|
||||
for task_uuid in r_crawler.smembers(f'crawler:queue:type:{domain_type}'):
|
||||
task = CrawlerTask(task_uuid)
|
||||
if not task.exists():
|
||||
to_remove.append(task_uuid)
|
||||
for task_uuid in to_remove:
|
||||
r_crawler.srem(f'crawler:queue:type:{domain_type}', task_uuid)
|
||||
|
||||
#### Blocklist ####
|
||||
|
||||
def get_blacklist():
|
||||
|
|
Loading…
Reference in New Issue