diff --git a/bin/async_capture.py b/bin/async_capture.py index 66ad77f2..90bf47da 100755 --- a/bin/async_capture.py +++ b/bin/async_capture.py @@ -18,7 +18,7 @@ from redis import Redis from scrapysplashwrapper import crawl from lookyloo.abstractmanager import AbstractManager -from lookyloo.helpers import (shutdown_requested, splash_status, get_socket_path, +from lookyloo.helpers import (splash_status, get_socket_path, load_cookies, safe_create_dir, get_config, get_splash_url, get_captures_dir) from lookyloo.lookyloo import Lookyloo @@ -181,7 +181,7 @@ class AsyncCapture(AbstractManager): break self.process_capture_queue() - if shutdown_requested(): + if self.shutdown_requested(): break diff --git a/bin/run_backend.py b/bin/run_backend.py index 570d61e1..41b0173e 100755 --- a/bin/run_backend.py +++ b/bin/run_backend.py @@ -1,13 +1,28 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from lookyloo.helpers import get_homedir, check_running -from subprocess import Popen +import argparse +import os import time + from pathlib import Path +from subprocess import Popen from typing import Optional, List, Union -import argparse +from redis import Redis + +from lookyloo.helpers import get_homedir, get_socket_path + + +def check_running(name: str) -> bool: + socket_path = get_socket_path(name) + if not os.path.exists(socket_path): + return False + try: + r = Redis(unix_socket_path=socket_path) + return True if r.ping() else False + except ConnectionError: + return False def launch_cache(storage_directory: Optional[Path]=None): diff --git a/bin/shutdown.py b/bin/shutdown.py index c211b0cb..2dab6f7b 100755 --- a/bin/shutdown.py +++ b/bin/shutdown.py @@ -1,17 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from lookyloo.helpers import is_running, get_socket_path import time -from redis import Redis + +from lookyloo.abstractmanager import AbstractManager def main(): - r = Redis(unix_socket_path=get_socket_path('cache'), db=1) - r.set('shutdown', 1) + AbstractManager.force_shutdown() time.sleep(5) while True: - running = is_running() + running = AbstractManager.is_running() if not running: break print(running) diff --git a/bin/start.py b/bin/start.py index e10d5d32..828a2b6e 100755 --- a/bin/start.py +++ b/bin/start.py @@ -32,7 +32,7 @@ def main(): Popen(['archiver']) print('done.') print('Start website...') - Popen(['start_website']) + # Popen(['start_website']) print('done.') diff --git a/bin/start_website.py b/bin/start_website.py index f8bd78b7..73c02759 100755 --- a/bin/start_website.py +++ b/bin/start_website.py @@ -6,7 +6,7 @@ import logging from subprocess import Popen from lookyloo.abstractmanager import AbstractManager -from lookyloo.helpers import get_homedir, get_config, set_running +from lookyloo.helpers import get_homedir, get_config logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S') @@ -18,7 +18,7 @@ class Website(AbstractManager): super().__init__(loglevel) self.script_name = 'website' self.process = self._launch_website() - set_running(self.script_name) + self.set_running() def _launch_website(self): website_dir = get_homedir() / 'website' diff --git a/lookyloo/abstractmanager.py b/lookyloo/abstractmanager.py index 6ec48237..f4c2913b 100644 --- a/lookyloo/abstractmanager.py +++ b/lookyloo/abstractmanager.py @@ -1,11 +1,18 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from abc import ABC import logging import signal +import time -from .helpers import long_sleep, shutdown_requested, set_running, unset_running +from abc import ABC +from datetime import datetime, timedelta +from typing import Optional, List, Tuple +from subprocess import Popen + +from redis import Redis + +from .helpers import get_socket_path class AbstractManager(ABC): @@ -17,7 +24,44 @@ class AbstractManager(ABC): self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger.setLevel(loglevel) self.logger.info(f'Initializing {self.__class__.__name__}') - self.process = None + self.process: Optional[Popen] = None + self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) + + @staticmethod + def is_running() -> List[Tuple[str, float]]: + r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) + return r.zrangebyscore('running', '-inf', '+inf', withscores=True) + + @staticmethod + def force_shutdown(): + r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) + r.set('shutdown', 1) + + def set_running(self) -> None: + self.__redis.zincrby('running', 1, self.script_name) + + def unset_running(self) -> None: + current_running = self.__redis.zincrby('running', -1, self.script_name) + if int(current_running) <= 0: + self.__redis.zrem('running', self.script_name) + + def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool: + if shutdown_check > sleep_in_sec: + shutdown_check = sleep_in_sec + sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec) + while sleep_until > datetime.now(): + time.sleep(shutdown_check) + if self.shutdown_requested(): + return False + return True + + def shutdown_requested(self) -> bool: + try: + return True if self.__redis.exists('shutdown') else False + except ConnectionRefusedError: + return True + except ConnectionError: + return True async def _to_run_forever_async(self) -> None: pass @@ -29,7 +73,7 @@ class AbstractManager(ABC): self.logger.info(f'Launching {self.__class__.__name__}') try: while True: - if shutdown_requested(): + if self.shutdown_requested(): break try: if self.process: @@ -37,7 +81,7 @@ class AbstractManager(ABC): self.logger.critical(f'Unable to start {self.script_name}.') break else: - set_running(self.script_name) + self.set_running() self._to_run_forever() except Exception: self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') @@ -45,8 +89,8 @@ class AbstractManager(ABC): if not self.process: # self.process means we run an external script, all the time, # do not unset between sleep. - unset_running(self.script_name) - if not long_sleep(sleep_in_sec): + self.unset_running() + if not self.long_sleep(sleep_in_sec): break except KeyboardInterrupt: self.logger.warning(f'{self.script_name} killed by user.') @@ -58,5 +102,5 @@ class AbstractManager(ABC): self.process.send_signal(signal.SIGTERM) except Exception: pass - unset_running(self.script_name) + self.unset_running() self.logger.info(f'Shutting down {self.__class__.__name__}') diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 12ffddb4..b10e34b4 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -2,7 +2,6 @@ # -*- coding: utf-8 -*- import os import logging -import time import json import pickle import pkg_resources @@ -18,7 +17,6 @@ from enum import IntEnum, unique from har2tree import CrawledTree, HostNode, URLNode from redis import Redis -from redis.exceptions import ConnectionError import requests from requests.exceptions import HTTPError from publicsuffix2 import PublicSuffixList, fetch # type: ignore @@ -161,23 +159,6 @@ def safe_create_dir(to_create: Path) -> None: to_create.mkdir(parents=True, exist_ok=True) -def set_running(name: str) -> None: - r = Redis(unix_socket_path=get_socket_path('cache'), db=1) - r.zincrby('running', 1, name) - - -def unset_running(name: str) -> None: - r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) - current_running = r.zincrby('running', -1, name) - if int(current_running) <= 0: - r.zrem('running', name) - - -def is_running() -> List[Tuple[str, float]]: - r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) - return r.zrangebyscore('running', '-inf', '+inf', withscores=True) - - def get_socket_path(name: str) -> str: mapping = { 'cache': Path('cache', 'cache.sock'), @@ -187,36 +168,6 @@ def get_socket_path(name: str) -> str: return str(get_homedir() / mapping[name]) -def check_running(name: str) -> bool: - socket_path = get_socket_path(name) - try: - r = Redis(unix_socket_path=socket_path) - return True if r.ping() else False - except ConnectionError: - return False - - -def shutdown_requested() -> bool: - try: - r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) - return True if r.exists('shutdown') else False - except ConnectionRefusedError: - return True - except ConnectionError: - return True - - -def long_sleep(sleep_in_sec: int, shutdown_check: int=10) -> bool: - if shutdown_check > sleep_in_sec: - shutdown_check = sleep_in_sec - sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec) - while sleep_until > datetime.now(): - time.sleep(shutdown_check) - if shutdown_requested(): - return False - return True - - def get_user_agents(directory: str='user_agents') -> Dict[str, Any]: ua_files_path = str(get_homedir() / directory / '*' / '*' / '*.json') paths = sorted(glob(ua_files_path), reverse=True)