fix: use glob with path instead of rglob (faster))

2023-08-04 13:15:03 +02:00 · 2023-08-04 13:15:03 +02:00 · 959b7ca96d
parent 7ae2b6e0d1
commit 959b7ca96d
2 changed files with 7 additions and 7 deletions
--- a/bin/archiver.py
+++ b/bin/archiver.py
@ -78,7 +78,7 @@ class Archiver(AbstractManager):
        '''Run that after the captures are in the proper directories'''
        # Recent captures
        self.logger.info('Update recent indexes')
-        directories_to_index = {capture_dir.parent.parent for capture_dir in get_captures_dir().rglob('uuid')}
+        directories_to_index = {capture_dir.parent.parent for capture_dir in get_captures_dir().glob('*/*/*/uuid')}
        for directory_to_index in directories_to_index:
            self.logger.debug(f'Updating index for {directory_to_index}')
            self._update_index(directory_to_index)
@ -86,7 +86,7 @@ class Archiver(AbstractManager):

        # Archived captures
        self.logger.info('Update archives indexes')
-        directories_to_index = {capture_dir.parent.parent for capture_dir in self.archived_captures_dir.rglob('uuid')}
+        directories_to_index = {capture_dir.parent.parent for capture_dir in self.archived_captures_dir.glob('*/*/*/uuid')}
        for directory_to_index in directories_to_index:
            self.logger.debug(f'Updating index for {directory_to_index}')
            self._update_index(directory_to_index)
@ -100,7 +100,7 @@ class Archiver(AbstractManager):
        # Format:
        # { 2020: { 12: [(directory, uuid)] } }
        to_archive: Dict[int, Dict[int, List[Path]]] = defaultdict(lambda: defaultdict(list))
-        for capture_uuid in get_captures_dir().rglob('uuid'):
+        for capture_uuid in get_captures_dir().glob('*/*/*/uuid'):
            try:
                timestamp = datetime.strptime(capture_uuid.parent.name, '%Y-%m-%dT%H:%M:%S.%f')
            except ValueError:
@ -130,7 +130,7 @@ class Archiver(AbstractManager):

    def _compress_hars(self):
        self.logger.info('Compressing archived captures')
-        for index in self.archived_captures_dir.rglob('index'):
+        for index in self.archived_captures_dir.glob('*/*/index'):
            with index.open('r') as _f:
                for uuid, dirname in csv.reader(_f):
                    for har in (index.parent / dirname).rglob('*.har'):
@ -144,7 +144,7 @@ class Archiver(AbstractManager):

    def _load_indexes(self):
        # Initialize archives
-        for index in get_captures_dir().rglob('index'):
+        for index in get_captures_dir().glob('*/*/index'):
            with index.open('r') as _f:
                recent_uuids: Mapping = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
            if recent_uuids:
@ -154,7 +154,7 @@ class Archiver(AbstractManager):
        self.logger.info('Recent indexes loaded')

        # Initialize archives
-        for index in self.archived_captures_dir.rglob('index'):
+        for index in self.archived_captures_dir.glob('*/*/index'):
            with index.open('r') as _f:
                archived_uuids: Mapping = {uuid: str(index.parent / dirname) for uuid, dirname in csv.reader(_f) if (index.parent / dirname).exists()}
            if archived_uuids:
--- a/bin/background_indexer.py
+++ b/bin/background_indexer.py
@ -39,7 +39,7 @@ class BackgroundIndexer(AbstractManager):
        # This value makes sure we break out of the loop and build pickles of the most recent captures
        max_captures = 50
        got_new_captures = False
-        for uuid_path in sorted(self.lookyloo.capture_dir.rglob('uuid'), reverse=True):
+        for uuid_path in sorted(self.lookyloo.capture_dir.glob('*/*/*/uuid'), reverse=True):
            if ((uuid_path.parent / 'tree.pickle.gz').exists() or (uuid_path.parent / 'tree.pickle').exists()):
                # We already have a pickle file
                self.logger.debug(f'{uuid_path.parent} has a pickle.')