mirror of https://github.com/CIRCL/PyCIRCLean
Initial working version of EXIF splitting and image format validation by round-trip conversion.
parent
d6476dab38
commit
ca90a08159
|
@ -11,6 +11,10 @@ import oletools.oleid
|
||||||
import olefile
|
import olefile
|
||||||
import officedissector
|
import officedissector
|
||||||
|
|
||||||
|
import warnings
|
||||||
|
import exifread
|
||||||
|
from PIL import Image
|
||||||
|
|
||||||
from pdfid import PDFiD, cPDFiD
|
from pdfid import PDFiD, cPDFiD
|
||||||
|
|
||||||
from kittengroomer import FileBase, KittenGroomerBase, main
|
from kittengroomer import FileBase, KittenGroomerBase, main
|
||||||
|
@ -30,6 +34,9 @@ mimes_compressed = ['zip', 'rar', 'bzip2', 'lzip', 'lzma', 'lzop',
|
||||||
'xz', 'compress', 'gzip', 'tar']
|
'xz', 'compress', 'gzip', 'tar']
|
||||||
mimes_data = ['octet-stream']
|
mimes_data = ['octet-stream']
|
||||||
|
|
||||||
|
# Prepare image/<subtype>
|
||||||
|
mimes_metadata = ['jpeg', 'tiff']
|
||||||
|
|
||||||
# Aliases
|
# Aliases
|
||||||
aliases = {
|
aliases = {
|
||||||
# Win executables
|
# Win executables
|
||||||
|
@ -120,6 +127,11 @@ class File(FileBase):
|
||||||
# there are no known extensions associated to this mimetype.
|
# there are no known extensions associated to this mimetype.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def has_image_metadata(self):
|
||||||
|
if self.sub_type in mimes_metadata:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
class KittenGroomerFileCheck(KittenGroomerBase):
|
class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
|
|
||||||
|
@ -410,8 +422,37 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
|
|
||||||
def image(self):
|
def image(self):
|
||||||
'''Way to process an image'''
|
'''Way to process an image'''
|
||||||
|
# Extract the metadata
|
||||||
|
if self.cur_file.has_image_metadata():
|
||||||
|
metadataFile = self._safe_metadata_split(".exif")
|
||||||
|
f = open(self.cur_file.src_path, 'rb')
|
||||||
|
tags = exifread.process_file(f) # TODO: Switch to PyExifTool for raw, etc. support?
|
||||||
|
for tag in sorted(tags.keys()):
|
||||||
|
if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'EXIF MakerNote'):
|
||||||
|
metadataFile.write("Key: {}\tValue: {}\n".format(tag, tags[tag]))
|
||||||
|
metadataFile.close()
|
||||||
|
f.close()
|
||||||
|
self.cur_file.add_log_details('metadata', 'exif')
|
||||||
|
|
||||||
|
# Create a temp directory
|
||||||
|
dst_dir, filename = os.path.split(self.cur_file.dst_path)
|
||||||
|
tmpdir = os.path.join(dst_dir, 'temp')
|
||||||
|
tmppath = os.path.join(tmpdir, filename)
|
||||||
|
self._safe_mkdir(tmpdir)
|
||||||
|
|
||||||
|
# Do our image conversions
|
||||||
|
warnings.simplefilter('error', Image.DecompressionBombWarning)
|
||||||
|
imIn = Image.open(self.cur_file.src_path)
|
||||||
|
imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
|
||||||
|
imOut.save(tmppath)
|
||||||
|
|
||||||
|
#Copy the file back out and cleanup
|
||||||
|
self._safe_copy(tmppath)
|
||||||
|
self._safe_rmtree(tmpdir)
|
||||||
|
|
||||||
self.cur_file.log_string += 'Image file'
|
self.cur_file.log_string += 'Image file'
|
||||||
self._media_processing()
|
self.cur_file.add_log_details('processing_type', 'image')
|
||||||
|
|
||||||
|
|
||||||
def video(self):
|
def video(self):
|
||||||
'''Way to process a video'''
|
'''Way to process a video'''
|
||||||
|
|
|
@ -194,12 +194,12 @@ class KittenGroomerBase(object):
|
||||||
os.remove(filepath)
|
os.remove(filepath)
|
||||||
|
|
||||||
def _safe_mkdir(self, directory):
|
def _safe_mkdir(self, directory):
|
||||||
'''Remove a directory if it exists'''
|
'''Make a directory if it does not exist'''
|
||||||
if not os.path.exists(directory):
|
if not os.path.exists(directory):
|
||||||
os.makedirs(directory)
|
os.makedirs(directory)
|
||||||
|
|
||||||
def _safe_copy(self, src=None, dst=None):
|
def _safe_copy(self, src=None, dst=None):
|
||||||
''' Copy a file and create directory if needed '''
|
''' Copy a file and create directory if needed'''
|
||||||
if src is None:
|
if src is None:
|
||||||
src = self.cur_file.src_path
|
src = self.cur_file.src_path
|
||||||
if dst is None:
|
if dst is None:
|
||||||
|
@ -214,8 +214,24 @@ class KittenGroomerBase(object):
|
||||||
print(e)
|
print(e)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _safe_metadata_split(self, ext):
|
||||||
|
'''Create a separate file to hold this file's metadata'''
|
||||||
|
dst = self.cur_file.dst_path
|
||||||
|
try:
|
||||||
|
if os.path.exists(self.cur_file.src_path+ext):
|
||||||
|
raise KittenGroomerError("Cannot create split metadata file for \"" +
|
||||||
|
self.cur_file.dst_path + "\", type '"
|
||||||
|
+ ext + "': File exists.")
|
||||||
|
dst_path, filename = os.path.split(dst)
|
||||||
|
self._safe_mkdir(dst_path)
|
||||||
|
return open(dst+ext, 'w+')
|
||||||
|
except Exception as e:
|
||||||
|
# TODO: Logfile
|
||||||
|
print(e)
|
||||||
|
return False
|
||||||
|
|
||||||
def _list_all_files(self, directory):
|
def _list_all_files(self, directory):
|
||||||
''' Generate an iterator over all the files in a directory tree '''
|
''' Generate an iterator over all the files in a directory tree'''
|
||||||
for root, dirs, files in os.walk(directory):
|
for root, dirs, files in os.walk(directory):
|
||||||
for filename in files:
|
for filename in files:
|
||||||
filepath = os.path.join(root, filename)
|
filepath = os.path.join(root, filename)
|
||||||
|
|
Loading…
Reference in New Issue