Merge branch 'dev'

Conflicts:
	bin/filecheck.py
pull/2/head
Eleanor Saitta 2015-12-16 16:22:21 -05:00
commit 18d8e64dc5
1 changed files with 50 additions and 21 deletions

View File

@ -12,8 +12,8 @@ import olefile
import officedissector import officedissector
import warnings import warnings
import exifread
from PIL import Image from PIL import Image
from PIL.ExifTags import TAGS, GPSTAGS
from PIL import PngImagePlugin from PIL import PngImagePlugin
from pdfid import PDFiD, cPDFiD from pdfid import PDFiD, cPDFiD
@ -426,37 +426,65 @@ class KittenGroomerFileCheck(KittenGroomerBase):
####################### #######################
# Metadata extractors # Metadata extractors
def _metadata_exif(self, metadataFile): def _metadata_exif(self, metadataFile):
img = Image.open(self.cur_file.src_path) img = open(self.cur_file.src_path, 'rb')
exif = img._getexif().items() tags = None
md = {}
for tag, value in exif: try:
decoded = TAGS[tag] tags = exifread.process_file(img, debug=True)
if "GPSInfo" == decoded: except Exception as e:
for t in value: print("Error while trying to grab full metadata for file {}; retrying for partial data.".format(self.cur_file.src_path))
md[GPSTAGS[t]] = value[t] print(e)
if tags == None:
try:
tags = exifread.process_file(img, debug=True)
except Exception as e:
print("Failed to get any metadata for file {}.".format(self.cur_file.src_path))
print(e)
img.close()
return False
for tag in sorted(tags.keys()):
# These are long and obnoxious/binary
if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
printable = str(tags[tag])
#Exifreader truncates data.
if len(printable) > 25 and printable.endswith(", ... ]"):
value = tags[tag].values
if isinstance(value, basestring):
printable = value
else: else:
md[decoded] = value printable = str(value)
metadataFile.write("Key: {}\tValue: {}\n".format(tag, printable))
for tag in sorted(md.keys()):
if tag not in ('JPEGThumbnail', 'TIFFThumbnail', 'EXIF MakerNote'):
metadataFile.write("Key: {}\tValue: {}\n".format(tag, md[tag]))
self.cur_file.add_log_details('metadata', 'exif') self.cur_file.add_log_details('metadata', 'exif')
img.close() img.close()
return True
def _metadata_png(self, metadataFile): def _metadata_png(self, metadataFile):
warnings.simplefilter('error', Image.DecompressionBombWarning)
try:
img = Image.open(self.cur_file.src_path) img = Image.open(self.cur_file.src_path)
for tag in sorted(img.info.keys()): for tag in sorted(img.info.keys()):
# These are long and obnoxious/binary
if tag not in ('icc_profile'):
metadataFile.write("Key: {}\tValue: {}\n".format(tag, img.info[tag])) metadataFile.write("Key: {}\tValue: {}\n".format(tag, img.info[tag]))
self.cur_file.add_log_details('metadata', 'png') self.cur_file.add_log_details('metadata', 'png')
img.close() img.close()
# Catch decompression bombs
except Exception as e:
print("Caught exception processing metadata for {}".format(self.cur_file.src_path))
print(e)
self.cur_file.make_dangerous()
self._safe_copy()
return False
def extract_metadata(self): def extract_metadata(self):
metadataFile = self._safe_metadata_split(".metadata.txt") metadataFile = self._safe_metadata_split(".metadata.txt")
self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile) success = self.metadata_processing_options.get(self.cur_file.mimetype)(metadataFile)
metadataFile.close() metadataFile.close()
if not success:
pass #FIXME Delete empty metadata file
####################### #######################
# ##### Not converted, checking the mime type ###### # ##### Not converted, checking the mime type ######
@ -491,6 +519,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
# Catch decompression bombs # Catch decompression bombs
except Exception as e: except Exception as e:
print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.cur_file.src_path))
print(e) print(e)
self.cur_file.make_dangerous() self.cur_file.make_dangerous()
self._safe_copy() self._safe_copy()