Fix Archive bomb

pull/9/head
Raphaël Vinot 2016-05-16 12:25:52 +02:00
parent cd69454642
commit d05f8e9665
2 changed files with 18 additions and 17 deletions

View File

@ -14,7 +14,7 @@ import officedissector
import warnings import warnings
import exifread import exifread
from PIL import Image from PIL import Image
from PIL import PngImagePlugin # from PIL import PngImagePlugin
from pdfid import PDFiD, cPDFiD from pdfid import PDFiD, cPDFiD
@ -444,7 +444,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
except Exception as e: except Exception as e:
print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path)) print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path))
print(e) print(e)
if tags == None: if tags is None:
try: try:
tags = exifread.process_file(img, debug=True) tags = exifread.process_file(img, debug=True)
except Exception as e: except Exception as e:
@ -458,7 +458,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
if tag not in ('JPEGThumbnail', 'TIFFThumbnail'): if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
printable = str(tags[tag]) printable = str(tags[tag])
#Exifreader truncates data. # Exifreader truncates data.
if len(printable) > 25 and printable.endswith(", ... ]"): if len(printable) > 25 and printable.endswith(", ... ]"):
value = tags[tag].values value = tags[tag].values
if isinstance(value, basestring): if isinstance(value, basestring):
@ -488,13 +488,13 @@ class KittenGroomerFileCheck(KittenGroomerBase):
self._safe_copy() self._safe_copy()
return False return False
def extract_metadata(self): def extract_metadata(self):
metadataFile = self._safe_metadata_split(".metadata.txt") metadataFile = self._safe_metadata_split(".metadata.txt")
success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile) success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile)
metadataFile.close() metadataFile.close()
if not success: if not success:
pass #FIXME Delete empty metadata file # FIXME Delete empty metadata file
pass
####################### #######################
# ##### Not converted, checking the mime type ###### # ##### Not converted, checking the mime type ######
@ -503,13 +503,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
self.cur_file.log_string += 'Audio file' self.cur_file.log_string += 'Audio file'
self._media_processing() self._media_processing()
def image(self): def image(self):
'''Way to process an image''' '''Way to process an image'''
if self.cur_file.has_metadata(): if self.cur_file.has_metadata():
self.extract_metadata() self.extract_metadata()
## FIXME make sure this works for png, gif, tiff # FIXME make sure this works for png, gif, tiff
# Create a temp directory # Create a temp directory
dst_dir, filename = os.path.split(self.cur_file.dst_path) dst_dir, filename = os.path.split(self.cur_file.dst_path)
tmpdir = os.path.join(dst_dir, 'temp') tmpdir = os.path.join(dst_dir, 'temp')
@ -523,7 +522,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes()) imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
imOut.save(tmppath) imOut.save(tmppath)
#Copy the file back out and cleanup # Copy the file back out and cleanup
self._safe_copy(tmppath) self._safe_copy(tmppath)
self._safe_rmtree(tmpdir) self._safe_rmtree(tmpdir)
@ -537,7 +536,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
self.cur_file.log_string += 'Image file' self.cur_file.log_string += 'Image file'
self.cur_file.add_log_details('processing_type', 'image') self.cur_file.add_log_details('processing_type', 'image')
def video(self): def video(self):
'''Way to process a video''' '''Way to process a video'''
self.cur_file.log_string += 'Video file' self.cur_file.log_string += 'Video file'
@ -563,9 +561,11 @@ class KittenGroomerFileCheck(KittenGroomerBase):
self._print_log() self._print_log()
if self.recursive >= self.max_recursive: if self.recursive >= self.max_recursive:
self.cur_log.warning('ARCHIVE BOMB.') self.cur_file.make_dangerous()
self.cur_log.warning('The content of the archive contains recursively other archives.') self.cur_file.add_log_details('Archive Bomb', True)
self.cur_log.warning('This is a bad sign so the archive is not extracted to the destination key.') self.log_name.warning('ARCHIVE BOMB.')
self.log_name.warning('The content of the archive contains recursively other archives.')
self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
self._safe_rmtree(src_dir) self._safe_rmtree(src_dir)
if src_dir.endswith('_temp'): if src_dir.endswith('_temp'):
archbomb_path = src_dir[:-len('_temp')] archbomb_path = src_dir[:-len('_temp')]

View File

@ -252,8 +252,7 @@ class KittenGroomer(KittenGroomerBase):
def _pdfa(self, tmpsrcpath): def _pdfa(self, tmpsrcpath):
'''Way to process PDF/A file''' '''Way to process PDF/A file'''
pdf_command = '{} --dest-dir / "{}" "{}"'.format(PDF2HTMLEX, tmpsrcpath, pdf_command = '{} --dest-dir / "{}" "{}"'.format(PDF2HTMLEX, tmpsrcpath, self.cur_file.dst_path + '.html')
self.cur_file.dst_path + '.html')
self._run_process(pdf_command) self._run_process(pdf_command)
def _pdf(self): def _pdf(self):
@ -336,9 +335,11 @@ class KittenGroomer(KittenGroomerBase):
self._print_log() self._print_log()
if self.recursive >= self.max_recursive: if self.recursive >= self.max_recursive:
self.cur_log.warning('ARCHIVE BOMB.') self.cur_file.make_dangerous()
self.cur_log.warning('The content of the archive contains recursively other archives.') self.cur_file.add_log_details('Archive Bomb', True)
self.cur_log.warning('This is a bad sign so the archive is not extracted to the destination key.') self.log_name.warning('ARCHIVE BOMB.')
self.log_name.warning('The content of the archive contains recursively other archives.')
self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
self._safe_rmtree(src_dir) self._safe_rmtree(src_dir)
if src_dir.endswith('_temp'): if src_dir.endswith('_temp'):
archbomb_path = src_dir[:-len('_temp')] archbomb_path = src_dir[:-len('_temp')]