mirror of https://github.com/CIRCL/AIL-framework
fix: [crawler] fix crawler queue stats
parent
a20b6054e8
commit
759d241b75
|
@ -1020,13 +1020,11 @@ def get_crawlers_stats(domain_type=None):
|
||||||
|
|
||||||
def reload_crawlers_stats():
|
def reload_crawlers_stats():
|
||||||
for domain_type in get_crawler_all_types():
|
for domain_type in get_crawler_all_types():
|
||||||
to_remove = []
|
tasks = r_crawler.smembers(f'crawler:queue:type:{domain_type}')
|
||||||
for task_uuid in r_crawler.smembers(f'crawler:queue:type:{domain_type}'):
|
for task_uuid in tasks:
|
||||||
task = CrawlerTask(task_uuid)
|
task = CrawlerTask(task_uuid)
|
||||||
if not task.exists():
|
if not task.is_in_queue() and task.get_status() is None:
|
||||||
to_remove.append(task_uuid)
|
task.delete()
|
||||||
for task_uuid in to_remove:
|
|
||||||
r_crawler.srem(f'crawler:queue:type:{domain_type}', task_uuid)
|
|
||||||
|
|
||||||
#### Blocklist ####
|
#### Blocklist ####
|
||||||
|
|
||||||
|
@ -1533,6 +1531,12 @@ class CrawlerTask:
|
||||||
def exists(self):
|
def exists(self):
|
||||||
return r_crawler.exists(f'crawler:task:{self.uuid}')
|
return r_crawler.exists(f'crawler:task:{self.uuid}')
|
||||||
|
|
||||||
|
def is_in_queue(self):
|
||||||
|
if r_crawler.zscore('crawler:queue', self.uuid) is not None:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
def get_url(self):
|
def get_url(self):
|
||||||
return r_crawler.hget(f'crawler:task:{self.uuid}', 'url')
|
return r_crawler.hget(f'crawler:task:{self.uuid}', 'url')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue