Proper handling of OOXML docs

pull/9/head
Raphaël Vinot 2016-02-01 12:34:47 +01:00
parent aaad11b5c1
commit e8de330d34
1 changed files with 14 additions and 10 deletions

View File

@ -171,7 +171,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
(mimes_png, self._metadata_png), (mimes_png, self._metadata_png),
] ]
self.metadata_processing_options = self._init_subtypes_application(types_metadata) self.metadata_processing_options = self._init_subtypes_application(types_metadata)
self.mime_processing_options = { self.mime_processing_options = {
'text': self.text, 'text': self.text,
'audio': self.audio, 'audio': self.audio,
@ -274,17 +274,21 @@ class KittenGroomerFileCheck(KittenGroomerBase):
# ##### Converted ###### # ##### Converted ######
def text(self): def text(self):
''' LibreOffice should be able to open all the files '''
for r in mimes_rtf: for r in mimes_rtf:
if r in self.cur_file.sub_type: if r in self.cur_file.sub_type:
self.cur_file.log_string += 'Rich Text file' self.cur_file.log_string += 'Rich Text file'
# TODO: need a way to convert it to plain text # TODO: need a way to convert it to plain text
self.cur_file.force_ext('.txt') self.cur_file.force_ext('.txt')
self._safe_copy() self._safe_copy()
else: return
self.cur_file.log_string += 'Text file' for o in mimes_ooxml:
self.cur_file.force_ext('.txt') if o in self.cur_file.sub_type:
self._safe_copy() self.cur_file.log_string += 'OOXML File'
self._ooxml()
return
self.cur_file.log_string += 'Text file'
self.cur_file.force_ext('.txt')
self._safe_copy()
def application(self): def application(self):
''' Everything can be there, using the subtype to decide ''' ''' Everything can be there, using the subtype to decide '''
@ -428,7 +432,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
def _metadata_exif(self, metadataFile): def _metadata_exif(self, metadataFile):
img = open(self.cur_file.src_path, 'rb') img = open(self.cur_file.src_path, 'rb')
tags = None tags = None
try: try:
tags = exifread.process_file(img, debug=True) tags = exifread.process_file(img, debug=True)
except Exception as e: except Exception as e:
@ -442,7 +446,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
print(e) print(e)
img.close() img.close()
return False return False
for tag in sorted(tags.keys()): for tag in sorted(tags.keys()):
# These are long and obnoxious/binary # These are long and obnoxious/binary
if tag not in ('JPEGThumbnail', 'TIFFThumbnail'): if tag not in ('JPEGThumbnail', 'TIFFThumbnail'):
@ -493,7 +497,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
self.cur_file.log_string += 'Audio file' self.cur_file.log_string += 'Audio file'
self._media_processing() self._media_processing()
def image(self): def image(self):
'''Way to process an image''' '''Way to process an image'''
if self.cur_file.has_metadata(): if self.cur_file.has_metadata():
@ -516,7 +520,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
#Copy the file back out and cleanup #Copy the file back out and cleanup
self._safe_copy(tmppath) self._safe_copy(tmppath)
self._safe_rmtree(tmpdir) self._safe_rmtree(tmpdir)
# Catch decompression bombs # Catch decompression bombs
except Exception as e: except Exception as e:
print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.cur_file.src_path)) print("Caught exception (possible decompression bomb?) while translating file {}.".format(self.cur_file.src_path))