Initial working version of EXIF splitting and image format validation by round-trip conversion.

pull/2/head
Eleanor Saitta 2015-12-09 20:26:26 -05:00
parent d6476dab38
commit ca90a08159
2 changed files with 61 additions and 4 deletions

View File

@ -11,6 +11,10 @@ import oletools.oleid
import olefile import olefile
import officedissector import officedissector
import warnings
import exifread
from PIL import Image
from pdfid import PDFiD, cPDFiD from pdfid import PDFiD, cPDFiD
from kittengroomer import FileBase, KittenGroomerBase, main from kittengroomer import FileBase, KittenGroomerBase, main
@ -30,6 +34,9 @@ mimes_compressed = ['zip', 'rar', 'bzip2', 'lzip', 'lzma', 'lzop',
'xz', 'compress', 'gzip', 'tar'] 'xz', 'compress', 'gzip', 'tar']
mimes_data = ['octet-stream'] mimes_data = ['octet-stream']
# Prepare image/<subtype>
mimes_metadata = ['jpeg', 'tiff']
# Aliases # Aliases
aliases = { aliases = {
# Win executables # Win executables
@ -120,6 +127,11 @@ class File(FileBase):
# there are no known extensions associated to this mimetype. # there are no known extensions associated to this mimetype.
pass pass
def has_image_metadata(self):
if self.sub_type in mimes_metadata:
return True
return False
class KittenGroomerFileCheck(KittenGroomerBase): class KittenGroomerFileCheck(KittenGroomerBase):
@ -410,8 +422,37 @@ class KittenGroomerFileCheck(KittenGroomerBase):
def image(self): def image(self):
'''Way to process an image''' '''Way to process an image'''
# Extract the metadata
if self.cur_file.has_image_metadata():
metadataFile = self._safe_metadata_split(".exif")
f = open(self.cur_file.src_path, 'rb')
tags = exifread.process_file(f) # TODO: Switch to PyExifTool for raw, etc. support?
for tag in sorted(tags.keys()):
if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'EXIF MakerNote'):
metadataFile.write("Key: {}\tValue: {}\n".format(tag, tags[tag]))
metadataFile.close()
f.close()
self.cur_file.add_log_details('metadata', 'exif')
# Create a temp directory
dst_dir, filename = os.path.split(self.cur_file.dst_path)
tmpdir = os.path.join(dst_dir, 'temp')
tmppath = os.path.join(tmpdir, filename)
self._safe_mkdir(tmpdir)
# Do our image conversions
warnings.simplefilter('error', Image.DecompressionBombWarning)
imIn = Image.open(self.cur_file.src_path)
imOut = Image.frombytes(imIn.mode, imIn.size, imIn.tobytes())
imOut.save(tmppath)
#Copy the file back out and cleanup
self._safe_copy(tmppath)
self._safe_rmtree(tmpdir)
self.cur_file.log_string += 'Image file' self.cur_file.log_string += 'Image file'
self._media_processing() self.cur_file.add_log_details('processing_type', 'image')
def video(self): def video(self):
'''Way to process a video''' '''Way to process a video'''

View File

@ -194,12 +194,12 @@ class KittenGroomerBase(object):
os.remove(filepath) os.remove(filepath)
def _safe_mkdir(self, directory): def _safe_mkdir(self, directory):
'''Remove a directory if it exists''' '''Make a directory if it does not exist'''
if not os.path.exists(directory): if not os.path.exists(directory):
os.makedirs(directory) os.makedirs(directory)
def _safe_copy(self, src=None, dst=None): def _safe_copy(self, src=None, dst=None):
''' Copy a file and create directory if needed ''' ''' Copy a file and create directory if needed'''
if src is None: if src is None:
src = self.cur_file.src_path src = self.cur_file.src_path
if dst is None: if dst is None:
@ -214,8 +214,24 @@ class KittenGroomerBase(object):
print(e) print(e)
return False return False
def _safe_metadata_split(self, ext):
'''Create a separate file to hold this file's metadata'''
dst = self.cur_file.dst_path
try:
if os.path.exists(self.cur_file.src_path+ext):
raise KittenGroomerError("Cannot create split metadata file for \"" +
self.cur_file.dst_path + "\", type '"
+ ext + "': File exists.")
dst_path, filename = os.path.split(dst)
self._safe_mkdir(dst_path)
return open(dst+ext, 'w+')
except Exception as e:
# TODO: Logfile
print(e)
return False
def _list_all_files(self, directory): def _list_all_files(self, directory):
''' Generate an iterator over all the files in a directory tree ''' ''' Generate an iterator over all the files in a directory tree'''
for root, dirs, files in os.walk(directory): for root, dirs, files in os.walk(directory):
for filename in files: for filename in files:
filepath = os.path.join(root, filename) filepath = os.path.join(root, filename)