mirror of https://github.com/CIRCL/lookyloo
chg: Avoid waiting when many captures need to be archived
parent
2037883945
commit
6c3f64e78a
|
@ -35,7 +35,15 @@ class Archiver(AbstractManager):
|
||||||
self._load_indexes()
|
self._load_indexes()
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
self._archive()
|
archiving_done = False
|
||||||
|
# NOTE: When we archive a big directory, moving *a lot* of files, expecially to MinIO
|
||||||
|
# can take a very long time. In order to avoid being stuck on the archiving, we break that in chunks
|
||||||
|
# but we also want to keep archiving without waiting 1h between each run.
|
||||||
|
while not archiving_done:
|
||||||
|
if self.shutdown_requested():
|
||||||
|
self.logger.warning('Shutdown requested, breaking.')
|
||||||
|
break
|
||||||
|
archiving_done = self._archive()
|
||||||
self._update_all_capture_indexes()
|
self._update_all_capture_indexes()
|
||||||
self._load_indexes()
|
self._load_indexes()
|
||||||
# The HARs are supposedly all compressed so this call shouldn't be required
|
# The HARs are supposedly all compressed so this call shouldn't be required
|
||||||
|
@ -156,6 +164,8 @@ class Archiver(AbstractManager):
|
||||||
archive_interval = timedelta(days=get_config('generic', 'archive'))
|
archive_interval = timedelta(days=get_config('generic', 'archive'))
|
||||||
cut_time = (datetime.now() - archive_interval).date()
|
cut_time = (datetime.now() - archive_interval).date()
|
||||||
cut_time = cut_time.replace(day=1)
|
cut_time = cut_time.replace(day=1)
|
||||||
|
self.logger.info('Archiving all captures older than {cut_time.isoformat()}.')
|
||||||
|
archiving_done = True
|
||||||
|
|
||||||
# Format:
|
# Format:
|
||||||
# { 2020: { 12: [(directory, uuid)] } }
|
# { 2020: { 12: [(directory, uuid)] } }
|
||||||
|
@ -179,7 +189,7 @@ class Archiver(AbstractManager):
|
||||||
if timestamp.date() >= cut_time:
|
if timestamp.date() >= cut_time:
|
||||||
continue
|
continue
|
||||||
to_archive[timestamp.year][timestamp.month].append(capture_uuid.parent)
|
to_archive[timestamp.year][timestamp.month].append(capture_uuid.parent)
|
||||||
self.logger.info(f'Archiving {capture_uuid.parent}.')
|
self.logger.debug(f'Archiving {capture_uuid.parent}.')
|
||||||
|
|
||||||
if not to_archive:
|
if not to_archive:
|
||||||
self.logger.info('Nothing to archive.')
|
self.logger.info('Nothing to archive.')
|
||||||
|
@ -190,14 +200,16 @@ class Archiver(AbstractManager):
|
||||||
for month, captures in month_captures.items():
|
for month, captures in month_captures.items():
|
||||||
dest_dir = self.archived_captures_dir / str(year) / f'{month:02}'
|
dest_dir = self.archived_captures_dir / str(year) / f'{month:02}'
|
||||||
dest_dir.mkdir(parents=True, exist_ok=True)
|
dest_dir.mkdir(parents=True, exist_ok=True)
|
||||||
capture_breakpoint = 1000
|
capture_breakpoint = 100
|
||||||
|
self.logger.info(f'{len(captures)} captures to archive in {year}-{month}.')
|
||||||
for capture_path in captures:
|
for capture_path in captures:
|
||||||
capture_breakpoint -= 1
|
capture_breakpoint -= 1
|
||||||
if capture_breakpoint <= 0:
|
if capture_breakpoint <= 0:
|
||||||
# Break and restart later
|
# Break and restart later
|
||||||
self.logger.info('Archived many captures in {year}-{month}, will keep going later.')
|
self.logger.info('Archived many captures in {year}-{month}, will keep going later.')
|
||||||
|
archiving_done = False
|
||||||
break
|
break
|
||||||
elif capture_breakpoint % 100:
|
elif capture_breakpoint % 10:
|
||||||
# Just check if we requested a shutdown.
|
# Just check if we requested a shutdown.
|
||||||
if self.shutdown_requested():
|
if self.shutdown_requested():
|
||||||
self.logger.warning('Shutdown requested, breaking.')
|
self.logger.warning('Shutdown requested, breaking.')
|
||||||
|
@ -213,8 +225,9 @@ class Archiver(AbstractManager):
|
||||||
(capture_path / 'tree.pickle.gz').unlink(missing_ok=True)
|
(capture_path / 'tree.pickle.gz').unlink(missing_ok=True)
|
||||||
shutil.move(str(capture_path), str(dest_dir))
|
shutil.move(str(capture_path), str(dest_dir))
|
||||||
p.execute()
|
p.execute()
|
||||||
|
if archiving_done:
|
||||||
self.logger.info('Archiving done.')
|
self.logger.info('Archiving done.')
|
||||||
|
return archiving_done
|
||||||
|
|
||||||
def _compress_hars(self):
|
def _compress_hars(self):
|
||||||
"""This method is very slow (it checks every single capture for non-compressed HARs)
|
"""This method is very slow (it checks every single capture for non-compressed HARs)
|
||||||
|
|
Loading…
Reference in New Issue