mirror of https://github.com/CIRCL/lookyloo
fix: Properly handle lock file.
parent
345a2f3f45
commit
484aec5ddd
|
@ -5,10 +5,9 @@ import logging.config
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
from datetime import datetime
|
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from lookyloo.default import AbstractManager, get_config, try_make_file
|
from lookyloo.default import AbstractManager, get_config
|
||||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo
|
||||||
from lookyloo.helpers import is_locked
|
from lookyloo.helpers import is_locked
|
||||||
|
@ -33,20 +32,20 @@ class BackgroundIndexer(AbstractManager):
|
||||||
self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
|
self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
|
||||||
|
|
||||||
def _build_missing_pickles(self):
|
def _build_missing_pickles(self):
|
||||||
|
self.logger.info('Build missing pickles...')
|
||||||
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
||||||
if ((uuid_path.parent / 'tree.pickle.gz').exists()
|
if ((uuid_path.parent / 'tree.pickle.gz').exists() or (uuid_path.parent / 'tree.pickle').exists()):
|
||||||
or (uuid_path.parent / 'tree.pickle').exists()
|
# We already have a pickle file
|
||||||
or not list(uuid_path.parent.rglob('*.har.gz'))
|
self.logger.debug(f'{uuid_path.parent} has a pickle.')
|
||||||
or not list(uuid_path.parent.rglob('*.har'))):
|
continue
|
||||||
|
elif not list(uuid_path.parent.rglob('*.har.gz')) and not list(uuid_path.parent.rglob('*.har')):
|
||||||
|
# No HAR file
|
||||||
|
self.logger.info(f'{uuid_path.parent} has no HAR file.')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
lock_file = uuid_path.parent / 'lock'
|
if is_locked(uuid_path.parent):
|
||||||
if try_make_file(lock_file):
|
|
||||||
# Lock created, we can process
|
|
||||||
with lock_file.open('w') as f:
|
|
||||||
f.write(f"{datetime.now().isoformat()};{os.getpid()}")
|
|
||||||
elif is_locked(uuid_path.parent):
|
|
||||||
# it is really locked
|
# it is really locked
|
||||||
|
self.logger.info(f'{uuid_path.parent} is locked, pickle generated by another process.')
|
||||||
continue
|
continue
|
||||||
|
|
||||||
with uuid_path.open() as f:
|
with uuid_path.open() as f:
|
||||||
|
@ -69,8 +68,10 @@ class BackgroundIndexer(AbstractManager):
|
||||||
# The capture is not working, moving it away.
|
# The capture is not working, moving it away.
|
||||||
self.lookyloo.redis.hdel('lookup_dirs', uuid)
|
self.lookyloo.redis.hdel('lookup_dirs', uuid)
|
||||||
shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name))
|
shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name))
|
||||||
|
self.logger.info('... done.')
|
||||||
|
|
||||||
def _check_indexes(self):
|
def _check_indexes(self):
|
||||||
|
self.logger.info('Check indexes...')
|
||||||
index_redis = self.lookyloo.indexing.redis
|
index_redis = self.lookyloo.indexing.redis
|
||||||
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
|
for cache in self.lookyloo.sorted_capture_cache(cached_captures_only=False):
|
||||||
if self.lookyloo.is_public_instance and cache.no_index:
|
if self.lookyloo.is_public_instance and cache.no_index:
|
||||||
|
@ -105,6 +106,7 @@ class BackgroundIndexer(AbstractManager):
|
||||||
self.lookyloo.indexing.index_http_headers_hashes_capture(ct)
|
self.lookyloo.indexing.index_http_headers_hashes_capture(ct)
|
||||||
# NOTE: categories aren't taken in account here, should be fixed(?)
|
# NOTE: categories aren't taken in account here, should be fixed(?)
|
||||||
# see indexing.index_categories_capture(capture_uuid, categories)
|
# see indexing.index_categories_capture(capture_uuid, categories)
|
||||||
|
self.logger.info('... done.')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
|
@ -246,9 +246,6 @@ def is_locked(locked_dir_path: Path, /) -> bool:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
ts, pid = content.split(';')
|
ts, pid = content.split(';')
|
||||||
if pid == str(os.getpid()):
|
|
||||||
# Locked by the same PID, was locked by the indexer.
|
|
||||||
return False
|
|
||||||
try:
|
try:
|
||||||
os.kill(int(pid), 0)
|
os.kill(int(pid), 0)
|
||||||
except OSError:
|
except OSError:
|
||||||
|
@ -267,6 +264,7 @@ def is_locked(locked_dir_path: Path, /) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# The lockfile is here for a good reason.
|
# The lockfile is here for a good reason.
|
||||||
|
logger.info(f'Directory locked by {pid}.')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue