mirror of https://github.com/CIRCL/lookyloo
fix: Update index files to remove archived (or simply gone) captures
parent
cbfa52e0fa
commit
cd11df7ac4
|
@ -94,6 +94,8 @@ class Archiver(AbstractManager):
|
|||
sub_indexes: List[Path] = []
|
||||
current_index_dirs: Set[str] = set(current_index.values())
|
||||
new_captures: Set[Path] = set()
|
||||
# Directories that are actually in the listing.
|
||||
current_dirs: Set[str] = set()
|
||||
|
||||
if s3fs_parent_dir:
|
||||
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
|
||||
|
@ -117,6 +119,8 @@ class Archiver(AbstractManager):
|
|||
# got a capture
|
||||
if str(dir_on_disk) not in current_index_dirs:
|
||||
new_captures.add(dir_on_disk)
|
||||
else:
|
||||
current_dirs.add(dir_on_disk.name)
|
||||
|
||||
else:
|
||||
with os.scandir(root_dir) as it:
|
||||
|
@ -134,6 +138,15 @@ class Archiver(AbstractManager):
|
|||
# isoformat
|
||||
if str(dir_on_disk) not in current_index_dirs:
|
||||
new_captures.add(dir_on_disk)
|
||||
else:
|
||||
current_dirs.add(dir_on_disk.name)
|
||||
|
||||
# Check if all the directories in current_dirs (that we got by listing the directory)
|
||||
# are the same as the one in the index. If they're not, we pop the UUID before sriting the index
|
||||
if current_dirs != current_index_dirs:
|
||||
non_existing_dirs = current_index_dirs - current_dirs
|
||||
self.logger.info(f'Got {len(non_existing_dirs)} in {root_dir}, removing them from the index.')
|
||||
current_index = {uuid: path for uuid, path in current_index.items() if path not in non_existing_dirs}
|
||||
|
||||
if not current_index and not new_captures and not sub_indexes:
|
||||
# No captures at all in the directory and subdirectories, quitting
|
||||
|
|
Loading…
Reference in New Issue