chg: Avoid a few more disk access whenever possible.

pull/752/head
Raphaël Vinot 2023-08-07 13:13:57 +02:00
parent 212ba9a0d9
commit fa1d353834
1 changed files with 32 additions and 10 deletions

View File

@ -47,32 +47,54 @@ class Archiver(AbstractManager):
root_dir.rmdir() root_dir.rmdir()
return return
self.logger.debug(f'Updating index for {root_dir}')
index_file = root_dir / 'index' index_file = root_dir / 'index'
existing_captures = index_file.parent.iterdir() existing_captures_names = {existing_capture.name for existing_capture in index_file.parent.iterdir()
if existing_capture.name != 'index'}
if index_file.exists(): if index_file.exists():
# Skip index if the directory has been archived. # Skip index if the directory has been archived.
try: try:
with index_file.open('r') as _f: with index_file.open('r') as _f:
current_index = {uuid: dirname for uuid, dirname in csv.reader(_f) current_index = {uuid: dirname for uuid, dirname in csv.reader(_f)
if (index_file.parent / dirname) in existing_captures} if uuid
and dirname
and dirname in existing_captures_names}
except Exception as e: except Exception as e:
# the index file is broken, it will be recreated. # the index file is broken, it will be recreated.
self.logger.warning(f'Index for {root_dir} broken, recreating it: {e}') self.logger.warning(f'Index for {root_dir} broken, recreating it: {e}')
pass pass
if not current_index: if not current_index:
index_file.unlink() index_file.unlink()
for capture_dir in existing_captures:
if set(current_index.values()) == existing_captures_names:
# No new captures, quitting
self.logger.debug(f'No new captures in {root_dir}.')
return
new_captures = sorted(existing_captures_names - set(current_index.values()), reverse=True)
self.logger.info(f'{len(new_captures)} new captures in {root_dir}.')
for capture_dir_name in new_captures:
capture_dir = root_dir / capture_dir_name
if not capture_dir.is_dir(): if not capture_dir.is_dir():
continue self.logger.warning(f'{capture_dir} is not a directory')
if capture_dir.name in current_index.values():
# The path is already in the index file, no need to read the uuid file
continue continue
uuid_file = capture_dir / 'uuid' uuid_file = capture_dir / 'uuid'
if not uuid_file.exists(): if not uuid_file.exists():
self.logger.warning(f'No UUID file in {capture_dir}.') self.logger.warning(f'No UUID file in {capture_dir}.')
shutil.move(str(capture_dir), str(get_homedir() / 'discarded_captures' / capture_dir.name))
continue continue
with uuid_file.open() as _f: with uuid_file.open() as _f:
current_index[_f.read().strip()] = uuid_file.parent.name uuid = _f.read().strip()
if not uuid:
self.logger.warning(f'{uuid_file} is empty')
shutil.move(str(capture_dir), str(get_homedir() / 'discarded_captures' / capture_dir.name))
continue
if uuid in current_index:
self.logger.warning(f'Duplicate UUID ({uuid}) in {current_index[uuid]} and {uuid_file.parent.name}')
shutil.move(str(capture_dir), str(get_homedir() / 'discarded_captures' / capture_dir.name))
continue
current_index[uuid] = uuid_file.parent.name
if not current_index: if not current_index:
# The directory has been archived. It is probably safe to unlink, but # The directory has been archived. It is probably safe to unlink, but
@ -110,17 +132,17 @@ class Archiver(AbstractManager):
# directories_to_index = {capture_dir.parent.parent # directories_to_index = {capture_dir.parent.parent
# for capture_dir in get_captures_dir().glob('*/*/*/uuid')} # for capture_dir in get_captures_dir().glob('*/*/*/uuid')}
for directory_to_index in self._make_dirs_list(get_captures_dir()): for directory_to_index in self._make_dirs_list(get_captures_dir()):
self.logger.debug(f'Updating index for {directory_to_index}') if self.shutdown_requested():
self.logger.warning('Shutdown requested, breaking.')
break
self._update_index(directory_to_index) self._update_index(directory_to_index)
self.logger.info('Recent indexes updated') self.logger.info('Recent indexes updated')
# Archived captures # Archived captures
self.logger.info('Update archives indexes') self.logger.info('Update archives indexes')
for directory_to_index in self._make_dirs_list(self.archived_captures_dir): for directory_to_index in self._make_dirs_list(self.archived_captures_dir):
if self.shutdown_requested(): if self.shutdown_requested():
self.logger.warning('Shutdown requested, breaking.') self.logger.warning('Shutdown requested, breaking.')
break break
self.logger.debug(f'Updating index for {directory_to_index}')
self._update_index(directory_to_index) self._update_index(directory_to_index)
self.logger.info('Archived indexes updated') self.logger.info('Archived indexes updated')