mirror of https://github.com/CIRCL/lookyloo
chg: Move the process managment methods to the proper class
parent
85e43fc677
commit
8a51383d7a
|
@ -18,7 +18,7 @@ from redis import Redis
|
|||
from scrapysplashwrapper import crawl
|
||||
|
||||
from lookyloo.abstractmanager import AbstractManager
|
||||
from lookyloo.helpers import (shutdown_requested, splash_status, get_socket_path,
|
||||
from lookyloo.helpers import (splash_status, get_socket_path,
|
||||
load_cookies, safe_create_dir, get_config, get_splash_url,
|
||||
get_captures_dir)
|
||||
from lookyloo.lookyloo import Lookyloo
|
||||
|
@ -181,7 +181,7 @@ class AsyncCapture(AbstractManager):
|
|||
break
|
||||
|
||||
self.process_capture_queue()
|
||||
if shutdown_requested():
|
||||
if self.shutdown_requested():
|
||||
break
|
||||
|
||||
|
||||
|
|
|
@ -1,13 +1,28 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from lookyloo.helpers import get_homedir, check_running
|
||||
from subprocess import Popen
|
||||
import argparse
|
||||
import os
|
||||
import time
|
||||
|
||||
from pathlib import Path
|
||||
from subprocess import Popen
|
||||
from typing import Optional, List, Union
|
||||
|
||||
import argparse
|
||||
from redis import Redis
|
||||
|
||||
from lookyloo.helpers import get_homedir, get_socket_path
|
||||
|
||||
|
||||
def check_running(name: str) -> bool:
|
||||
socket_path = get_socket_path(name)
|
||||
if not os.path.exists(socket_path):
|
||||
return False
|
||||
try:
|
||||
r = Redis(unix_socket_path=socket_path)
|
||||
return True if r.ping() else False
|
||||
except ConnectionError:
|
||||
return False
|
||||
|
||||
|
||||
def launch_cache(storage_directory: Optional[Path]=None):
|
||||
|
|
|
@ -1,17 +1,16 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from lookyloo.helpers import is_running, get_socket_path
|
||||
import time
|
||||
from redis import Redis
|
||||
|
||||
from lookyloo.abstractmanager import AbstractManager
|
||||
|
||||
|
||||
def main():
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
|
||||
r.set('shutdown', 1)
|
||||
AbstractManager.force_shutdown()
|
||||
time.sleep(5)
|
||||
while True:
|
||||
running = is_running()
|
||||
running = AbstractManager.is_running()
|
||||
if not running:
|
||||
break
|
||||
print(running)
|
||||
|
|
|
@ -32,7 +32,7 @@ def main():
|
|||
Popen(['archiver'])
|
||||
print('done.')
|
||||
print('Start website...')
|
||||
Popen(['start_website'])
|
||||
# Popen(['start_website'])
|
||||
print('done.')
|
||||
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@ import logging
|
|||
from subprocess import Popen
|
||||
|
||||
from lookyloo.abstractmanager import AbstractManager
|
||||
from lookyloo.helpers import get_homedir, get_config, set_running
|
||||
from lookyloo.helpers import get_homedir, get_config
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
@ -18,7 +18,7 @@ class Website(AbstractManager):
|
|||
super().__init__(loglevel)
|
||||
self.script_name = 'website'
|
||||
self.process = self._launch_website()
|
||||
set_running(self.script_name)
|
||||
self.set_running()
|
||||
|
||||
def _launch_website(self):
|
||||
website_dir = get_homedir() / 'website'
|
||||
|
|
|
@ -1,11 +1,18 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from abc import ABC
|
||||
import logging
|
||||
import signal
|
||||
import time
|
||||
|
||||
from .helpers import long_sleep, shutdown_requested, set_running, unset_running
|
||||
from abc import ABC
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Tuple
|
||||
from subprocess import Popen
|
||||
|
||||
from redis import Redis
|
||||
|
||||
from .helpers import get_socket_path
|
||||
|
||||
|
||||
class AbstractManager(ABC):
|
||||
|
@ -17,7 +24,44 @@ class AbstractManager(ABC):
|
|||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||
self.logger.setLevel(loglevel)
|
||||
self.logger.info(f'Initializing {self.__class__.__name__}')
|
||||
self.process = None
|
||||
self.process: Optional[Popen] = None
|
||||
self.__redis = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
|
||||
@staticmethod
|
||||
def is_running() -> List[Tuple[str, float]]:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
|
||||
|
||||
@staticmethod
|
||||
def force_shutdown():
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
r.set('shutdown', 1)
|
||||
|
||||
def set_running(self) -> None:
|
||||
self.__redis.zincrby('running', 1, self.script_name)
|
||||
|
||||
def unset_running(self) -> None:
|
||||
current_running = self.__redis.zincrby('running', -1, self.script_name)
|
||||
if int(current_running) <= 0:
|
||||
self.__redis.zrem('running', self.script_name)
|
||||
|
||||
def long_sleep(self, sleep_in_sec: int, shutdown_check: int=10) -> bool:
|
||||
if shutdown_check > sleep_in_sec:
|
||||
shutdown_check = sleep_in_sec
|
||||
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
|
||||
while sleep_until > datetime.now():
|
||||
time.sleep(shutdown_check)
|
||||
if self.shutdown_requested():
|
||||
return False
|
||||
return True
|
||||
|
||||
def shutdown_requested(self) -> bool:
|
||||
try:
|
||||
return True if self.__redis.exists('shutdown') else False
|
||||
except ConnectionRefusedError:
|
||||
return True
|
||||
except ConnectionError:
|
||||
return True
|
||||
|
||||
async def _to_run_forever_async(self) -> None:
|
||||
pass
|
||||
|
@ -29,7 +73,7 @@ class AbstractManager(ABC):
|
|||
self.logger.info(f'Launching {self.__class__.__name__}')
|
||||
try:
|
||||
while True:
|
||||
if shutdown_requested():
|
||||
if self.shutdown_requested():
|
||||
break
|
||||
try:
|
||||
if self.process:
|
||||
|
@ -37,7 +81,7 @@ class AbstractManager(ABC):
|
|||
self.logger.critical(f'Unable to start {self.script_name}.')
|
||||
break
|
||||
else:
|
||||
set_running(self.script_name)
|
||||
self.set_running()
|
||||
self._to_run_forever()
|
||||
except Exception:
|
||||
self.logger.exception(f'Something went terribly wrong in {self.__class__.__name__}.')
|
||||
|
@ -45,8 +89,8 @@ class AbstractManager(ABC):
|
|||
if not self.process:
|
||||
# self.process means we run an external script, all the time,
|
||||
# do not unset between sleep.
|
||||
unset_running(self.script_name)
|
||||
if not long_sleep(sleep_in_sec):
|
||||
self.unset_running()
|
||||
if not self.long_sleep(sleep_in_sec):
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
self.logger.warning(f'{self.script_name} killed by user.')
|
||||
|
@ -58,5 +102,5 @@ class AbstractManager(ABC):
|
|||
self.process.send_signal(signal.SIGTERM)
|
||||
except Exception:
|
||||
pass
|
||||
unset_running(self.script_name)
|
||||
self.unset_running()
|
||||
self.logger.info(f'Shutting down {self.__class__.__name__}')
|
||||
|
|
|
@ -2,7 +2,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import os
|
||||
import logging
|
||||
import time
|
||||
import json
|
||||
import pickle
|
||||
import pkg_resources
|
||||
|
@ -18,7 +17,6 @@ from enum import IntEnum, unique
|
|||
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from redis import Redis
|
||||
from redis.exceptions import ConnectionError
|
||||
import requests
|
||||
from requests.exceptions import HTTPError
|
||||
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
||||
|
@ -161,23 +159,6 @@ def safe_create_dir(to_create: Path) -> None:
|
|||
to_create.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def set_running(name: str) -> None:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1)
|
||||
r.zincrby('running', 1, name)
|
||||
|
||||
|
||||
def unset_running(name: str) -> None:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
current_running = r.zincrby('running', -1, name)
|
||||
if int(current_running) <= 0:
|
||||
r.zrem('running', name)
|
||||
|
||||
|
||||
def is_running() -> List[Tuple[str, float]]:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
return r.zrangebyscore('running', '-inf', '+inf', withscores=True)
|
||||
|
||||
|
||||
def get_socket_path(name: str) -> str:
|
||||
mapping = {
|
||||
'cache': Path('cache', 'cache.sock'),
|
||||
|
@ -187,36 +168,6 @@ def get_socket_path(name: str) -> str:
|
|||
return str(get_homedir() / mapping[name])
|
||||
|
||||
|
||||
def check_running(name: str) -> bool:
|
||||
socket_path = get_socket_path(name)
|
||||
try:
|
||||
r = Redis(unix_socket_path=socket_path)
|
||||
return True if r.ping() else False
|
||||
except ConnectionError:
|
||||
return False
|
||||
|
||||
|
||||
def shutdown_requested() -> bool:
|
||||
try:
|
||||
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
|
||||
return True if r.exists('shutdown') else False
|
||||
except ConnectionRefusedError:
|
||||
return True
|
||||
except ConnectionError:
|
||||
return True
|
||||
|
||||
|
||||
def long_sleep(sleep_in_sec: int, shutdown_check: int=10) -> bool:
|
||||
if shutdown_check > sleep_in_sec:
|
||||
shutdown_check = sleep_in_sec
|
||||
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
|
||||
while sleep_until > datetime.now():
|
||||
time.sleep(shutdown_check)
|
||||
if shutdown_requested():
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_user_agents(directory: str='user_agents') -> Dict[str, Any]:
|
||||
ua_files_path = str(get_homedir() / directory / '*' / '*' / '*.json')
|
||||
paths = sorted(glob(ua_files_path), reverse=True)
|
||||
|
|
Loading…
Reference in New Issue