Fix Archive bomb

2016-05-16 12:25:52 +02:00 · 2016-05-16 12:25:52 +02:00 · d05f8e9665
parent cd69454642
commit d05f8e9665
2 changed files with 18 additions and 17 deletions
--- a/bin/filecheck.py
+++ b/bin/filecheck.py
@ -14,7 +14,7 @@ import officedissector
 import warnings
 import exifread
 from PIL import Image
-from PIL import PngImagePlugin
+# from PIL import PngImagePlugin
 from pdfid import PDFiD, cPDFiD
@ -444,7 +444,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        except Exception as e:
            print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path))
            print(e)
-        if tags == None:
+        if tags is None:
            try:
                tags = exifread.process_file(img, debug=True)
            except Exception as e:
@ -458,7 +458,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
            if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
                printable = str(tags[tag])
-                #Exifreader truncates data.
+                # Exifreader truncates data.
                if len(printable) > 25 and printable.endswith(", ... ]"):
                    value = tags[tag].values
                    if isinstance(value, basestring):
@ -488,13 +488,13 @@ class KittenGroomerFileCheck(KittenGroomerBase):
            self._safe_copy()
            return False
    def extract_metadata(self):
        metadataFile = self._safe_metadata_split(".metadata.txt")
        success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile)
        metadataFile.close()
        if not success:
-            pass #FIXME Delete empty metadata file
+            # FIXME Delete empty metadata file
            pass
    #######################
    # ##### Not converted, checking the mime type ######
@ -503,13 +503,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self.cur_file.log_string += 'Audio file'
        self._media_processing()
    def image(self):
        '''Way to process an image'''
        if self.cur_file.has_metadata():
            self.extract_metadata()
-        ## FIXME make sure this works for png, gif, tiff
+        # FIXME make sure this works for png, gif, tiff
        # Create a temp directory
        dst_dir, filename = os.path.split(self.cur_file.dst_path)
        tmpdir = os.path.join(dst_dir, 'temp')
@ -523,7 +522,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
            imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
            imOut.save(tmppath)
-            #Copy the file back out and cleanup
+            # Copy the file back out and cleanup
            self._safe_copy(tmppath)
            self._safe_rmtree(tmpdir)
@ -537,7 +536,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
        self.cur_file.log_string += 'Image file'
        self.cur_file.add_log_details('processing_type', 'image')
    def video(self):
        '''Way to process a video'''
        self.cur_file.log_string += 'Video file'
@ -563,9 +561,11 @@ class KittenGroomerFileCheck(KittenGroomerBase):
            self._print_log()
        if self.recursive >= self.max_recursive:
-            self.cur_log.warning('ARCHIVE BOMB.')
+            self.cur_file.make_dangerous()
-            self.cur_log.warning('The content of the archive contains recursively other archives.')
+            self.cur_file.add_log_details('Archive Bomb', True)
-            self.cur_log.warning('This is a bad sign so the archive is not extracted to the destination key.')
+            self.log_name.warning('ARCHIVE BOMB.')
            self.log_name.warning('The content of the archive contains recursively other archives.')
            self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
            self._safe_rmtree(src_dir)
            if src_dir.endswith('_temp'):
                archbomb_path = src_dir[:-len('_temp')]
--- a/bin/generic.py
+++ b/bin/generic.py
@ -252,8 +252,7 @@ class KittenGroomer(KittenGroomerBase):
    def _pdfa(self, tmpsrcpath):
        '''Way to process PDF/A file'''
-        pdf_command = '{} --dest-dir / "{}" "{}"'.format(PDF2HTMLEX, tmpsrcpath,
+        pdf_command = '{} --dest-dir / "{}" "{}"'.format(PDF2HTMLEX, tmpsrcpath, self.cur_file.dst_path + '.html')
                                                     self.cur_file.dst_path + '.html')
        self._run_process(pdf_command)
    def _pdf(self):
@ -336,9 +335,11 @@ class KittenGroomer(KittenGroomerBase):
            self._print_log()
        if self.recursive >= self.max_recursive:
-            self.cur_log.warning('ARCHIVE BOMB.')
+            self.cur_file.make_dangerous()
-            self.cur_log.warning('The content of the archive contains recursively other archives.')
+            self.cur_file.add_log_details('Archive Bomb', True)
-            self.cur_log.warning('This is a bad sign so the archive is not extracted to the destination key.')
+            self.log_name.warning('ARCHIVE BOMB.')
            self.log_name.warning('The content of the archive contains recursively other archives.')
            self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
            self._safe_rmtree(src_dir)
            if src_dir.endswith('_temp'):
                archbomb_path = src_dir[:-len('_temp')]