mirror of https://github.com/CIRCL/PyCIRCLean
Fix Archive bomb
parent
cd69454642
commit
d05f8e9665
|
@ -14,7 +14,7 @@ import officedissector
|
||||||
import warnings
|
import warnings
|
||||||
import exifread
|
import exifread
|
||||||
from PIL import Image
|
from PIL import Image
|
||||||
from PIL import PngImagePlugin
|
# from PIL import PngImagePlugin
|
||||||
|
|
||||||
from pdfid import PDFiD, cPDFiD
|
from pdfid import PDFiD, cPDFiD
|
||||||
|
|
||||||
|
@ -444,7 +444,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path))
|
print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path))
|
||||||
print(e)
|
print(e)
|
||||||
if tags == None:
|
if tags is None:
|
||||||
try:
|
try:
|
||||||
tags = exifread.process_file(img, debug=True)
|
tags = exifread.process_file(img, debug=True)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
@ -458,7 +458,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
|
if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
|
||||||
printable = str(tags[tag])
|
printable = str(tags[tag])
|
||||||
|
|
||||||
#Exifreader truncates data.
|
# Exifreader truncates data.
|
||||||
if len(printable) > 25 and printable.endswith(", ... ]"):
|
if len(printable) > 25 and printable.endswith(", ... ]"):
|
||||||
value = tags[tag].values
|
value = tags[tag].values
|
||||||
if isinstance(value, basestring):
|
if isinstance(value, basestring):
|
||||||
|
@ -488,13 +488,13 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
self._safe_copy()
|
self._safe_copy()
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def extract_metadata(self):
|
def extract_metadata(self):
|
||||||
metadataFile = self._safe_metadata_split(".metadata.txt")
|
metadataFile = self._safe_metadata_split(".metadata.txt")
|
||||||
success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile)
|
success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile)
|
||||||
metadataFile.close()
|
metadataFile.close()
|
||||||
if not success:
|
if not success:
|
||||||
pass #FIXME Delete empty metadata file
|
# FIXME Delete empty metadata file
|
||||||
|
pass
|
||||||
|
|
||||||
#######################
|
#######################
|
||||||
# ##### Not converted, checking the mime type ######
|
# ##### Not converted, checking the mime type ######
|
||||||
|
@ -503,13 +503,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
self.cur_file.log_string += 'Audio file'
|
self.cur_file.log_string += 'Audio file'
|
||||||
self._media_processing()
|
self._media_processing()
|
||||||
|
|
||||||
|
|
||||||
def image(self):
|
def image(self):
|
||||||
'''Way to process an image'''
|
'''Way to process an image'''
|
||||||
if self.cur_file.has_metadata():
|
if self.cur_file.has_metadata():
|
||||||
self.extract_metadata()
|
self.extract_metadata()
|
||||||
|
|
||||||
## FIXME make sure this works for png, gif, tiff
|
# FIXME make sure this works for png, gif, tiff
|
||||||
# Create a temp directory
|
# Create a temp directory
|
||||||
dst_dir, filename = os.path.split(self.cur_file.dst_path)
|
dst_dir, filename = os.path.split(self.cur_file.dst_path)
|
||||||
tmpdir = os.path.join(dst_dir, 'temp')
|
tmpdir = os.path.join(dst_dir, 'temp')
|
||||||
|
@ -523,7 +522,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
|
imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
|
||||||
imOut.save(tmppath)
|
imOut.save(tmppath)
|
||||||
|
|
||||||
#Copy the file back out and cleanup
|
# Copy the file back out and cleanup
|
||||||
self._safe_copy(tmppath)
|
self._safe_copy(tmppath)
|
||||||
self._safe_rmtree(tmpdir)
|
self._safe_rmtree(tmpdir)
|
||||||
|
|
||||||
|
@ -537,7 +536,6 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
self.cur_file.log_string += 'Image file'
|
self.cur_file.log_string += 'Image file'
|
||||||
self.cur_file.add_log_details('processing_type', 'image')
|
self.cur_file.add_log_details('processing_type', 'image')
|
||||||
|
|
||||||
|
|
||||||
def video(self):
|
def video(self):
|
||||||
'''Way to process a video'''
|
'''Way to process a video'''
|
||||||
self.cur_file.log_string += 'Video file'
|
self.cur_file.log_string += 'Video file'
|
||||||
|
@ -563,9 +561,11 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
self._print_log()
|
self._print_log()
|
||||||
|
|
||||||
if self.recursive >= self.max_recursive:
|
if self.recursive >= self.max_recursive:
|
||||||
self.cur_log.warning('ARCHIVE BOMB.')
|
self.cur_file.make_dangerous()
|
||||||
self.cur_log.warning('The content of the archive contains recursively other archives.')
|
self.cur_file.add_log_details('Archive Bomb', True)
|
||||||
self.cur_log.warning('This is a bad sign so the archive is not extracted to the destination key.')
|
self.log_name.warning('ARCHIVE BOMB.')
|
||||||
|
self.log_name.warning('The content of the archive contains recursively other archives.')
|
||||||
|
self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
|
||||||
self._safe_rmtree(src_dir)
|
self._safe_rmtree(src_dir)
|
||||||
if src_dir.endswith('_temp'):
|
if src_dir.endswith('_temp'):
|
||||||
archbomb_path = src_dir[:-len('_temp')]
|
archbomb_path = src_dir[:-len('_temp')]
|
||||||
|
|
|
@ -252,8 +252,7 @@ class KittenGroomer(KittenGroomerBase):
|
||||||
|
|
||||||
def _pdfa(self, tmpsrcpath):
|
def _pdfa(self, tmpsrcpath):
|
||||||
'''Way to process PDF/A file'''
|
'''Way to process PDF/A file'''
|
||||||
pdf_command = '{} --dest-dir / "{}" "{}"'.format(PDF2HTMLEX, tmpsrcpath,
|
pdf_command = '{} --dest-dir / "{}" "{}"'.format(PDF2HTMLEX, tmpsrcpath, self.cur_file.dst_path + '.html')
|
||||||
self.cur_file.dst_path + '.html')
|
|
||||||
self._run_process(pdf_command)
|
self._run_process(pdf_command)
|
||||||
|
|
||||||
def _pdf(self):
|
def _pdf(self):
|
||||||
|
@ -336,9 +335,11 @@ class KittenGroomer(KittenGroomerBase):
|
||||||
self._print_log()
|
self._print_log()
|
||||||
|
|
||||||
if self.recursive >= self.max_recursive:
|
if self.recursive >= self.max_recursive:
|
||||||
self.cur_log.warning('ARCHIVE BOMB.')
|
self.cur_file.make_dangerous()
|
||||||
self.cur_log.warning('The content of the archive contains recursively other archives.')
|
self.cur_file.add_log_details('Archive Bomb', True)
|
||||||
self.cur_log.warning('This is a bad sign so the archive is not extracted to the destination key.')
|
self.log_name.warning('ARCHIVE BOMB.')
|
||||||
|
self.log_name.warning('The content of the archive contains recursively other archives.')
|
||||||
|
self.log_name.warning('This is a bad sign so the archive is not extracted to the destination key.')
|
||||||
self._safe_rmtree(src_dir)
|
self._safe_rmtree(src_dir)
|
||||||
if src_dir.endswith('_temp'):
|
if src_dir.endswith('_temp'):
|
||||||
archbomb_path = src_dir[:-len('_temp')]
|
archbomb_path = src_dir[:-len('_temp')]
|
||||||
|
|
Loading…
Reference in New Issue