chg: Move the process managment methods to the proper class

pull/251/head
Raphaël Vinot 2021-08-27 17:28:26 +02:00
parent 85e43fc677
commit 8a51383d7a
7 changed files with 79 additions and 70 deletions

View File

@ -18,7 +18,7 @@ from redis import Redis
from scrapysplashwrapper import crawl from scrapysplashwrapper import crawl
from lookyloo.abstractmanager import AbstractManager from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import (shutdown_requested, splash_status, get_socket_path, from lookyloo.helpers import (splash_status, get_socket_path,
load_cookies, safe_create_dir, get_config, get_splash_url, load_cookies, safe_create_dir, get_config, get_splash_url,
get_captures_dir) get_captures_dir)
from lookyloo.lookyloo import Lookyloo from lookyloo.lookyloo import Lookyloo
@ -181,7 +181,7 @@ class AsyncCapture(AbstractManager):
break break
self.process_capture_queue() self.process_capture_queue()
if shutdown_requested(): if self.shutdown_requested():
break break

View File

@ -1,13 +1,28 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from lookyloo.helpers import get_homedir, check_running import argparse
from subprocess import Popen import os
import time import time
from pathlib import Path from pathlib import Path
from subprocess import Popen
from typing import Optional, List, Union from typing import Optional, List, Union
import argparse from redis import Redis
from lookyloo.helpers import get_homedir, get_socket_path
def check_running(name: str) -> bool:
socket_path = get_socket_path(name)
if not os.path.exists(socket_path):
return False
try:
r = Redis(unix_socket_path=socket_path)
return True if r.ping() else False
except ConnectionError:
return False
def launch_cache(storage_directory: Optional[Path]=None): def launch_cache(storage_directory: Optional[Path]=None):

View File

@ -1,17 +1,16 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from lookyloo.helpers import is_running, get_socket_path
import time import time
from redis import Redis
from lookyloo.abstractmanager import AbstractManager
def main(): def main():
r = Redis(unix_socket_path=get_socket_path('cache'), db=1) AbstractManager.force_shutdown()
r.set('shutdown', 1)
time.sleep(5) time.sleep(5)
while True: while True:
running = is_running() running = AbstractManager.is_running()
if not running: if not running:
break break
print(running) print(running)

View File

@ -32,7 +32,7 @@ def main():
Popen(['archiver']) Popen(['archiver'])
print('done.') print('done.')
print('Start website...') print('Start website...')
Popen(['start_website']) # Popen(['start_website'])
print('done.') print('done.')

View File

@ -6,7 +6,7 @@ import logging
from subprocess import Popen from subprocess import Popen
from lookyloo.abstractmanager import AbstractManager from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import get_homedir, get_config, set_running from lookyloo.helpers import get_homedir, get_config
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S') level=logging.INFO, datefmt='%I:%M:%S')
@ -18,7 +18,7 @@ class Website(AbstractManager):
super().__init__(loglevel) super().__init__(loglevel)
self.script_name = 'website' self.script_name = 'website'
self.process = self._launch_website() self.process = self._launch_website()
set_running(self.script_name) self.set_running()
def _launch_website(self): def _launch_website(self):
website_dir = get_homedir() / 'website' website_dir = get_homedir() / 'website'

View File

@ -1,11 +1,18 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from abc import ABC
import logging import logging
import signal import signal
import time
from .helpers import long_sleep, shutdown_requested, set_running, unset_running from abc import ABC
from datetime import datetime, timedelta
from typing import Optional, List, Tuple
from subprocess import Popen
from redis import Redis
from .helpers import get_socket_path
class AbstractManager(ABC): class AbstractManager(ABC):
@ -17,7 +24,44 @@ class AbstractManager(ABC):
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(loglevel) self.logger.setLevel(loglevel)
self.logger.info(f'Initializing {self.__class__.__name__}') self.logger.info(f'Initializing {self.__class__.__name__}')
self.process = None self.process: Optional[Popen] = None
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
@staticmethod
def is_running() -> List[Tuple[str, float]]:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
@staticmethod
def force_shutdown():
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.set('shutdown', 1)
def set_running(self) -> None:
self.__redis.zincrby('running', 1, self.script_name)
def unset_running(self) -> None:
current_running = self.__redis.zincrby('running', -1, self.script_name)
if int(current_running) <= 0:
self.__redis.zrem('running', self.script_name)
def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
if shutdown_check > sleep_in_sec:
shutdown_check = sleep_in_sec
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
while sleep_until > datetime.now():
time.sleep(shutdown_check)
if self.shutdown_requested():
return False
return True
def shutdown_requested(self) -> bool:
try:
return True if self.__redis.exists('shutdown') else False
except ConnectionRefusedError:
return True
except ConnectionError:
return True
async def _to_run_forever_async(self) -> None: async def _to_run_forever_async(self) -> None:
pass pass
@ -29,7 +73,7 @@ class AbstractManager(ABC):
self.logger.info(f'Launching {self.__class__.__name__}') self.logger.info(f'Launching {self.__class__.__name__}')
try: try:
while True: while True:
if shutdown_requested(): if self.shutdown_requested():
break break
try: try:
if self.process: if self.process:
@ -37,7 +81,7 @@ class AbstractManager(ABC):
self.logger.critical(f'Unable to start {self.script_name}.') self.logger.critical(f'Unable to start {self.script_name}.')
break break
else: else:
set_running(self.script_name) self.set_running()
self._to_run_forever() self._to_run_forever()
except Exception: except Exception:
self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.') self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
@ -45,8 +89,8 @@ class AbstractManager(ABC):
if not self.process: if not self.process:
# self.process means we run an external script, all the time, # self.process means we run an external script, all the time,
# do not unset between sleep. # do not unset between sleep.
unset_running(self.script_name) self.unset_running()
if not long_sleep(sleep_in_sec): if not self.long_sleep(sleep_in_sec):
break break
except KeyboardInterrupt: except KeyboardInterrupt:
self.logger.warning(f'{self.script_name} killed by user.') self.logger.warning(f'{self.script_name} killed by user.')
@ -58,5 +102,5 @@ class AbstractManager(ABC):
self.process.send_signal(signal.SIGTERM) self.process.send_signal(signal.SIGTERM)
except Exception: except Exception:
pass pass
unset_running(self.script_name) self.unset_running()
self.logger.info(f'Shutting down {self.__class__.__name__}') self.logger.info(f'Shutting down {self.__class__.__name__}')

View File

@ -2,7 +2,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import os import os
import logging import logging
import time
import json import json
import pickle import pickle
import pkg_resources import pkg_resources
@ -18,7 +17,6 @@ from enum import IntEnum, unique
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode
from redis import Redis from redis import Redis
from redis.exceptions import ConnectionError
import requests import requests
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from publicsuffix2 import PublicSuffixList, fetch # type: ignore from publicsuffix2 import PublicSuffixList, fetch # type: ignore
@ -161,23 +159,6 @@ def safe_create_dir(to_create: Path) -> None:
to_create.mkdir(parents=True, exist_ok=True) to_create.mkdir(parents=True, exist_ok=True)
def set_running(name: str) -> None:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
r.zincrby('running', 1, name)
def unset_running(name: str) -> None:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
current_running = r.zincrby('running', -1, name)
if int(current_running) <= 0:
r.zrem('running', name)
def is_running() -> List[Tuple[str, float]]:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
def get_socket_path(name: str) -> str: def get_socket_path(name: str) -> str:
mapping = { mapping = {
'cache': Path('cache', 'cache.sock'), 'cache': Path('cache', 'cache.sock'),
@ -187,36 +168,6 @@ def get_socket_path(name: str) -> str:
return str(get_homedir() / mapping[name]) return str(get_homedir() / mapping[name])
def check_running(name: str) -> bool:
socket_path = get_socket_path(name)
try:
r = Redis(unix_socket_path=socket_path)
return True if r.ping() else False
except ConnectionError:
return False
def shutdown_requested() -> bool:
try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return True if r.exists('shutdown') else False
except ConnectionRefusedError:
return True
except ConnectionError:
return True
def long_sleep(sleep_in_sec: int, shutdown_check: int=10) -> bool:
if shutdown_check > sleep_in_sec:
shutdown_check = sleep_in_sec
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
while sleep_until > datetime.now():
time.sleep(shutdown_check)
if shutdown_requested():
return False
return True
def get_user_agents(directory: str='user_agents') -> Dict[str, Any]: def get_user_agents(directory: str='user_agents') -> Dict[str, Any]:
ua_files_path = str(get_homedir() / directory / '*' / '*' / '*.json') ua_files_path = str(get_homedir() / directory / '*' / '*' / '*.json')
paths = sorted(glob(ua_files_path), reverse=True) paths = sorted(glob(ua_files_path), reverse=True)