mirror of https://github.com/CIRCL/lookyloo
fix: Update index files to remove archived (or simply gone) captures
parent
cbfa52e0fa
commit
cd11df7ac4
|
@ -94,6 +94,8 @@ class Archiver(AbstractManager):
|
||||||
sub_indexes: List[Path] = []
|
sub_indexes: List[Path] = []
|
||||||
current_index_dirs: Set[str] = set(current_index.values())
|
current_index_dirs: Set[str] = set(current_index.values())
|
||||||
new_captures: Set[Path] = set()
|
new_captures: Set[Path] = set()
|
||||||
|
# Directories that are actually in the listing.
|
||||||
|
current_dirs: Set[str] = set()
|
||||||
|
|
||||||
if s3fs_parent_dir:
|
if s3fs_parent_dir:
|
||||||
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
|
s3fs_dir = '/'.join([s3fs_parent_dir, root_dir.name])
|
||||||
|
@ -117,6 +119,8 @@ class Archiver(AbstractManager):
|
||||||
# got a capture
|
# got a capture
|
||||||
if str(dir_on_disk) not in current_index_dirs:
|
if str(dir_on_disk) not in current_index_dirs:
|
||||||
new_captures.add(dir_on_disk)
|
new_captures.add(dir_on_disk)
|
||||||
|
else:
|
||||||
|
current_dirs.add(dir_on_disk.name)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
with os.scandir(root_dir) as it:
|
with os.scandir(root_dir) as it:
|
||||||
|
@ -134,6 +138,15 @@ class Archiver(AbstractManager):
|
||||||
# isoformat
|
# isoformat
|
||||||
if str(dir_on_disk) not in current_index_dirs:
|
if str(dir_on_disk) not in current_index_dirs:
|
||||||
new_captures.add(dir_on_disk)
|
new_captures.add(dir_on_disk)
|
||||||
|
else:
|
||||||
|
current_dirs.add(dir_on_disk.name)
|
||||||
|
|
||||||
|
# Check if all the directories in current_dirs (that we got by listing the directory)
|
||||||
|
# are the same as the one in the index. If they're not, we pop the UUID before sriting the index
|
||||||
|
if current_dirs != current_index_dirs:
|
||||||
|
non_existing_dirs = current_index_dirs - current_dirs
|
||||||
|
self.logger.info(f'Got {len(non_existing_dirs)} in {root_dir}, removing them from the index.')
|
||||||
|
current_index = {uuid: path for uuid, path in current_index.items() if path not in non_existing_dirs}
|
||||||
|
|
||||||
if not current_index and not new_captures and not sub_indexes:
|
if not current_index and not new_captures and not sub_indexes:
|
||||||
# No captures at all in the directory and subdirectories, quitting
|
# No captures at all in the directory and subdirectories, quitting
|
||||||
|
|
Loading…
Reference in New Issue