mirror of https://github.com/CIRCL/lookyloo
fix: Properly handle lock file.
parent
345a2f3f45
commit
484aec5ddd
|
@ -5,10 +5,9 @@ import logging.config
|
|||
import os
|
||||
import shutil
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from lookyloo.default import AbstractManager, get_config, try_make_file
|
||||
from lookyloo.default import AbstractManager, get_config
|
||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||
from lookyloo.lookyloo import Lookyloo
|
||||
from lookyloo.helpers import is_locked
|
||||
|
@ -33,20 +32,20 @@ class BackgroundIndexer(AbstractManager):
|
|||
self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
|
||||
|
||||
def _build_missing_pickles(self):
|
||||
self.logger.info('Build missing pickles...')
|
||||
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
||||
if ((uuid_path.parent / 'tree.pickle.gz').exists()
|
||||
or (uuid_path.parent / 'tree.pickle').exists()
|
||||
or not list(uuid_path.parent.rglob('*.har.gz'))
|
||||
or not list(uuid_path.parent.rglob('*.har'))):
|
||||
if ((uuid_path.parent / 'tree.pickle.gz').exists() or (uuid_path.parent / 'tree.pickle').exists()):
|
||||
# We already have a pickle file
|
||||
self.logger.debug(f'{uuid_path.parent} has a pickle.')
|
||||
continue
|
||||
elif not list(uuid_path.parent.rglob('*.har.gz')) and not list(uuid_path.parent.rglob('*.har')):
|
||||
# No HAR file
|
||||
self.logger.info(f'{uuid_path.parent} has no HAR file.')
|
||||
continue
|
||||
|
||||
lock_file = uuid_path.parent / 'lock'
|
||||
if try_make_file(lock_file):
|
||||
# Lock created, we can process
|
||||
with lock_file.open('w') as f:
|
||||
f.write(f"{datetime.now().isoformat()};{os.getpid()}")
|
||||
elif is_locked(uuid_path.parent):
|
||||
if is_locked(uuid_path.parent):
|
||||
# it is really locked
|
||||
self.logger.info(f'{uuid_path.parent} is locked, pickle generated by another process.')
|
||||
continue
|
||||
|
||||
with uuid_path.open() as f:
|
||||
|
@ -69,8 +68,10 @@ class BackgroundIndexer(AbstractManager):
|
|||
# The capture is not working, moving it away.
|
||||
self.lookyloo.redis.hdel('lookup_dirs', uuid)
|
||||
shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name))
|
||||
self.logger.info('... done.')
|
||||
|
||||
def _check_indexes(self):
|
||||
self.logger.info('Check indexes...')
|
||||
index_redis = self.lookyloo.indexing.redis
|
||||
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
|
||||
if self.lookyloo.is_public_instance and cache.no_index:
|
||||
|
@ -105,6 +106,7 @@ class BackgroundIndexer(AbstractManager):
|
|||
self.lookyloo.indexing.index_http_headers_hashes_capture(ct)
|
||||
# NOTE: categories aren't taken in account here, should be fixed(?)
|
||||
# see indexing.index_categories_capture(capture_uuid, categories)
|
||||
self.logger.info('... done.')
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
@ -246,9 +246,6 @@ def is_locked(locked_dir_path: Path, /) -> bool:
|
|||
time.sleep(1)
|
||||
|
||||
ts, pid = content.split(';')
|
||||
if pid == str(os.getpid()):
|
||||
# Locked by the same PID, was locked by the indexer.
|
||||
return False
|
||||
try:
|
||||
os.kill(int(pid), 0)
|
||||
except OSError:
|
||||
|
@ -267,6 +264,7 @@ def is_locked(locked_dir_path: Path, /) -> bool:
|
|||
return False
|
||||
|
||||
# The lockfile is here for a good reason.
|
||||
logger.info(f'Directory locked by {pid}.')
|
||||
return True
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue