chg: Improve async processing

pull/42/head
Raphaël Vinot 2019-04-05 16:12:54 +02:00
parent da3d1fe392
commit 12b8e4f949
2 changed files with 11 additions and 5 deletions

View File

@ -5,7 +5,7 @@ from pathlib import Path
import logging
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import get_homedir, set_running, unset_running
from lookyloo.helpers import get_homedir, set_running, unset_running, shutdown_requested
from lookyloo.lookyloo import Lookyloo
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
@ -22,7 +22,10 @@ class AsyncScraper(AbstractManager):
def _to_run_forever(self):
set_running('async_scrape')
self.lookyloo.process_scrape_queue()
while True:
url = self.lookyloo.process_scrape_queue()
if url is None or shutdown_requested():
break
unset_running('async_scrape')

View File

@ -113,11 +113,14 @@ class Lookyloo():
def process_scrape_queue(self):
uuid = self.redis.spop('to_scrape')
if not uuid:
return
return None
to_scrape = self.redis.hgetall(uuid)
self.redis.delete(uuid)
to_scrape['perma_uuid'] = uuid
self.scrape(**to_scrape)
if self.scrape(**to_scrape):
self.logger.info(f'Processed {to_scrape["url"]}')
return True
return False
def load_tree(self, report_dir: Path):
har_files = sorted(report_dir.glob('*.har'))
@ -152,7 +155,7 @@ class Lookyloo():
items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO')
if not items:
# broken
pass
return False
if not perma_uuid:
perma_uuid = str(uuid4())
width = len(str(len(items)))