chg: Move the process managment methods to the proper class

pull/251/head
Raphaël Vinot 2021-08-27 17:28:26 +02:00
parent 85e43fc677
commit 8a51383d7a
7 changed files with 79 additions and 70 deletions

View File

@ -18,7 +18,7 @@ from redis import Redis
from scrapysplashwrapper import crawl
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import (shutdown_requested, splash_status, get_socket_path,
from lookyloo.helpers import (splash_status, get_socket_path,
load_cookies, safe_create_dir, get_config, get_splash_url,
get_captures_dir)
from lookyloo.lookyloo import Lookyloo
@ -181,7 +181,7 @@ class AsyncCapture(AbstractManager):
break
self.process_capture_queue()
if shutdown_requested():
if self.shutdown_requested():
break

View File

@ -1,13 +1,28 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from lookyloo.helpers import get_homedir, check_running
from subprocess import Popen
import argparse
import os
import time
from pathlib import Path
from subprocess import Popen
from typing import Optional, List, Union
import argparse
from redis import Redis
from lookyloo.helpers import get_homedir, get_socket_path
def check_running(name: str) -> bool:
socket_path = get_socket_path(name)
if not os.path.exists(socket_path):
return False
try:
r = Redis(unix_socket_path=socket_path)
return True if r.ping() else False
except ConnectionError:
return False
def launch_cache(storage_directory: Optional[Path]=None):

View File

@ -1,17 +1,16 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from lookyloo.helpers import is_running, get_socket_path
import time
from redis import Redis
from lookyloo.abstractmanager import AbstractManager
def main():
r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
r.set('shutdown', 1)
AbstractManager.force_shutdown()
time.sleep(5)
while True:
running = is_running()
running = AbstractManager.is_running()
if not running:
break
print(running)

View File

@ -32,7 +32,7 @@ def main():
Popen(['archiver'])
print('done.')
print('Start website...')
Popen(['start_website'])
# Popen(['start_website'])
print('done.')

View File

@ -6,7 +6,7 @@ import logging
from subprocess import Popen
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import get_homedir, get_config, set_running
from lookyloo.helpers import get_homedir, get_config
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S')
@ -18,7 +18,7 @@ class Website(AbstractManager):
super().__init__(loglevel)
self.script_name = 'website'
self.process = self._launch_website()
set_running(self.script_name)
self.set_running()
def _launch_website(self):
website_dir = get_homedir() / 'website'

View File

@ -1,11 +1,18 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from abc import ABC
import logging
import signal
import time
from .helpers import long_sleep, shutdown_requested, set_running, unset_running
from abc import ABC
from datetime import datetime, timedelta
from typing import Optional, List, Tuple
from subprocess import Popen
from redis import Redis
from .helpers import get_socket_path
class AbstractManager(ABC):
@ -17,7 +24,44 @@ class AbstractManager(ABC):
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(loglevel)
self.logger.info(f'Initializing {self.__class__.__name__}')
self.process = None
self.process: Optional[Popen] = None
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
@staticmethod
def is_running() -> List[Tuple[str, float]]:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
@staticmethod
def force_shutdown():
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.set('shutdown', 1)
def set_running(self) -> None:
self.__redis.zincrby('running', 1, self.script_name)
def unset_running(self) -> None:
current_running = self.__redis.zincrby('running', -1, self.script_name)
if int(current_running) <= 0:
self.__redis.zrem('running', self.script_name)
def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
if shutdown_check > sleep_in_sec:
shutdown_check = sleep_in_sec
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
while sleep_until > datetime.now():
time.sleep(shutdown_check)
if self.shutdown_requested():
return False
return True
def shutdown_requested(self) -> bool:
try:
return True if self.__redis.exists('shutdown') else False
except ConnectionRefusedError:
return True
except ConnectionError:
return True
async def _to_run_forever_async(self) -> None:
pass
@ -29,7 +73,7 @@ class AbstractManager(ABC):
self.logger.info(f'Launching {self.__class__.__name__}')
try:
while True:
if shutdown_requested():
if self.shutdown_requested():
break
try:
if self.process:
@ -37,7 +81,7 @@ class AbstractManager(ABC):
self.logger.critical(f'Unable to start {self.script_name}.')
break
else:
set_running(self.script_name)
self.set_running()
self._to_run_forever()
except Exception:
self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
@ -45,8 +89,8 @@ class AbstractManager(ABC):
if not self.process:
# self.process means we run an external script, all the time,
# do not unset between sleep.
unset_running(self.script_name)
if not long_sleep(sleep_in_sec):
self.unset_running()
if not self.long_sleep(sleep_in_sec):
break
except KeyboardInterrupt:
self.logger.warning(f'{self.script_name} killed by user.')
@ -58,5 +102,5 @@ class AbstractManager(ABC):
self.process.send_signal(signal.SIGTERM)
except Exception:
pass
unset_running(self.script_name)
self.unset_running()
self.logger.info(f'Shutting down {self.__class__.__name__}')

View File

@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
import os
import logging
import time
import json
import pickle
import pkg_resources
@ -18,7 +17,6 @@ from enum import IntEnum, unique
from har2tree import CrawledTree, HostNode, URLNode
from redis import Redis
from redis.exceptions import ConnectionError
import requests
from requests.exceptions import HTTPError
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
@ -161,23 +159,6 @@ def safe_create_dir(to_create: Path) -> None:
to_create.mkdir(parents=True, exist_ok=True)
def set_running(name: str) -> None:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
r.zincrby('running', 1, name)
def unset_running(name: str) -> None:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
current_running = r.zincrby('running', -1, name)
if int(current_running) <= 0:
r.zrem('running', name)
def is_running() -> List[Tuple[str, float]]:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
def get_socket_path(name: str) -> str:
mapping = {
'cache': Path('cache', 'cache.sock'),
@ -187,36 +168,6 @@ def get_socket_path(name: str) -> str:
return str(get_homedir() / mapping[name])
def check_running(name: str) -> bool:
socket_path = get_socket_path(name)
try:
r = Redis(unix_socket_path=socket_path)
return True if r.ping() else False
except ConnectionError:
return False
def shutdown_requested() -> bool:
try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return True if r.exists('shutdown') else False
except ConnectionRefusedError:
return True
except ConnectionError:
return True
def long_sleep(sleep_in_sec: int, shutdown_check: int=10) -> bool:
if shutdown_check > sleep_in_sec:
shutdown_check = sleep_in_sec
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
while sleep_until > datetime.now():
time.sleep(shutdown_check)
if shutdown_requested():
return False
return True
def get_user_agents(directory: str='user_agents') -> Dict[str, Any]:
ua_files_path = str(get_homedir() / directory / '*' / '*' / '*.json')
paths = sorted(glob(ua_files_path), reverse=True)