diff --git a/bin/background_indexer.py b/bin/background_indexer.py index 7fe29ef2..0d3e4fe7 100755 --- a/bin/background_indexer.py +++ b/bin/background_indexer.py @@ -5,10 +5,9 @@ import logging.config import os import shutil -from datetime import datetime from typing import Optional -from lookyloo.default import AbstractManager, get_config, try_make_file +from lookyloo.default import AbstractManager, get_config from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.lookyloo import Lookyloo from lookyloo.helpers import is_locked @@ -33,20 +32,20 @@ class BackgroundIndexer(AbstractManager): self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name) def _build_missing_pickles(self): + self.logger.info('Build missing pickles...') for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True): - if ((uuid_path.parent / 'tree.pickle.gz').exists() - or (uuid_path.parent / 'tree.pickle').exists() - or not list(uuid_path.parent.rglob('*.har.gz')) - or not list(uuid_path.parent.rglob('*.har'))): + if ((uuid_path.parent / 'tree.pickle.gz').exists() or (uuid_path.parent / 'tree.pickle').exists()): + # We already have a pickle file + self.logger.debug(f'{uuid_path.parent} has a pickle.') + continue + elif not list(uuid_path.parent.rglob('*.har.gz')) and not list(uuid_path.parent.rglob('*.har')): + # No HAR file + self.logger.info(f'{uuid_path.parent} has no HAR file.') continue - lock_file = uuid_path.parent / 'lock' - if try_make_file(lock_file): - # Lock created, we can process - with lock_file.open('w') as f: - f.write(f"{datetime.now().isoformat()};{os.getpid()}") - elif is_locked(uuid_path.parent): + if is_locked(uuid_path.parent): # it is really locked + self.logger.info(f'{uuid_path.parent} is locked, pickle generated by another process.') continue with uuid_path.open() as f: @@ -69,8 +68,10 @@ class BackgroundIndexer(AbstractManager): # The capture is not working, moving it away. self.lookyloo.redis.hdel('lookup_dirs', uuid) shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name)) + self.logger.info('... done.') def _check_indexes(self): + self.logger.info('Check indexes...') index_redis = self.lookyloo.indexing.redis for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False): if self.lookyloo.is_public_instance and cache.no_index: @@ -105,6 +106,7 @@ class BackgroundIndexer(AbstractManager): self.lookyloo.indexing.index_http_headers_hashes_capture(ct) # NOTE: categories aren't taken in account here, should be fixed(?) # see indexing.index_categories_capture(capture_uuid, categories) + self.logger.info('... done.') def main(): diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 2dcff753..c165ae0c 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -246,9 +246,6 @@ def is_locked(locked_dir_path: Path, /) -> bool: time.sleep(1) ts, pid = content.split(';') - if pid == str(os.getpid()): - # Locked by the same PID, was locked by the indexer. - return False try: os.kill(int(pid), 0) except OSError: @@ -267,6 +264,7 @@ def is_locked(locked_dir_path: Path, /) -> bool: return False # The lockfile is here for a good reason. + logger.info(f'Directory locked by {pid}.') return True