mirror of https://github.com/CIRCL/lookyloo
chg: Avoid building old pickles forever
parent
ebd2b29993
commit
fc5850e147
|
@ -27,13 +27,18 @@ class BackgroundIndexer(AbstractManager):
|
||||||
self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
|
self.discarded_captures_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
self._build_missing_pickles()
|
all_done = self._build_missing_pickles()
|
||||||
|
if all_done:
|
||||||
self._check_indexes()
|
self._check_indexes()
|
||||||
self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
|
self.lookyloo.update_tree_cache_info(os.getpid(), self.script_name)
|
||||||
|
|
||||||
def _build_missing_pickles(self):
|
def _build_missing_pickles(self) -> bool:
|
||||||
self.logger.info('Build missing pickles...')
|
self.logger.info('Build missing pickles...')
|
||||||
|
# Sometimes, we have a huge backlog and the process might get stuck on old captures for a very long time
|
||||||
|
# This value makes sure we break out of the loop and build pickles of the most recent captures
|
||||||
|
max_captures = 50
|
||||||
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
for uuid_path in sorted(self.lookyloo.capture_dir.glob('**/uuid'), reverse=True):
|
||||||
|
max_captures -= 1
|
||||||
if ((uuid_path.parent / 'tree.pickle.gz').exists() or (uuid_path.parent / 'tree.pickle').exists()):
|
if ((uuid_path.parent / 'tree.pickle.gz').exists() or (uuid_path.parent / 'tree.pickle').exists()):
|
||||||
# We already have a pickle file
|
# We already have a pickle file
|
||||||
self.logger.debug(f'{uuid_path.parent} has a pickle.')
|
self.logger.debug(f'{uuid_path.parent} has a pickle.')
|
||||||
|
@ -69,7 +74,13 @@ class BackgroundIndexer(AbstractManager):
|
||||||
# The capture is not working, moving it away.
|
# The capture is not working, moving it away.
|
||||||
self.lookyloo.redis.hdel('lookup_dirs', uuid)
|
self.lookyloo.redis.hdel('lookup_dirs', uuid)
|
||||||
shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name))
|
shutil.move(str(uuid_path.parent), str(self.discarded_captures_dir / uuid_path.parent.name))
|
||||||
|
if max_captures <= 0:
|
||||||
|
break
|
||||||
|
else:
|
||||||
self.logger.info('... done.')
|
self.logger.info('... done.')
|
||||||
|
return True
|
||||||
|
self.logger.info('... too many captures in the backlog, start from the beginning.')
|
||||||
|
return False
|
||||||
|
|
||||||
def _check_indexes(self):
|
def _check_indexes(self):
|
||||||
self.logger.info('Check indexes...')
|
self.logger.info('Check indexes...')
|
||||||
|
|
Loading…
Reference in New Issue