Fix completely buggy mimetype/extension xcheck

pull/2/head
Raphaël Vinot 2015-05-17 15:58:31 +02:00
parent e9d76adb42
commit ac372dc59d
3 changed files with 48 additions and 33 deletions

View File

@ -26,48 +26,58 @@ mimes_compressed = ['zip', 'x-rar', 'x-bzip2', 'x-lzip', 'x-lzma', 'x-lzop',
'x-xz', 'x-compress', 'x-gzip', 'x-tar', 'compressed']
mimes_data = ['octet-stream']
# Aliases
aliases = {
# Win executables
'application/x-msdos-program': 'application/x-dosexec',
'application/x-dosexec': 'application/x-msdos-program'
}
# Sometimes, mimetypes.guess_type is giving unexpected results, such as for the .tar.gz files:
# In [12]: mimetypes.guess_type('toot.tar.gz', strict=False)
# Out[12]: ('application/x-tar', 'gzip')
# It works as expected if you do mimetypes.guess_type('application/gzip', strict=False)
propertype = {'.gz': 'application/gzip'}
class File(FileBase):
def __init__(self, src_path, dst_path):
''' Init file object, set the mimetype '''
super(File, self).__init__(src_path, dst_path)
mimetype = magic.from_file(src_path, mime=True)
self.main_type, self.sub_type = mimetype.split('/')
self.log_details.update({'maintype': self.main_type, 'subtype': self.sub_type})
self.expected_mimetype, self.expected_extensions = self.crosscheck_mime()
self.is_recursive = False
a, self.extension = os.path.splitext(src_path)
def crosscheck_mime(self):
'''
Set the expected mime and extension variables based on mime type.
'''
# /usr/share/mime has interesting stuff
self.log_details.update({'maintype': self.main_type, 'subtype': self.sub_type, 'extension': self.extension})
# guess_type uses the extension to get a mime type
expected_mimetype, encoding = mimetypes.guess_type(self.src_path, strict=False)
if expected_mimetype is not None:
expected_extensions = mimetypes.guess_all_extensions(expected_mimetype,
strict=False)
# Check correlation known extension => actual mime type
if propertype.get(self.extension) is not None:
expected_mimetype = propertype.get(self.extension)
else:
# the extension is unknown...
expected_extensions = None
expected_mimetype, encoding = mimetypes.guess_type(self.src_path, strict=False)
if aliases.get(expected_mimetype) is not None:
expected_mimetype = aliases.get(expected_mimetype)
return expected_mimetype, expected_extensions
is_known_extension = self.extension in mimetypes.types_map.keys()
if is_known_extension and expected_mimetype != mimetype:
self.log_details.update({'expected_mimetype': expected_mimetype})
self.make_dangerous()
def verify_extension(self):
'''Check if the extension is the one we expect'''
if self.expected_extensions is None:
return None
path, actual_extension = os.path.splitext(self.src_path)
return actual_extension in self.expected_extensions
# check correlation actual mime type => known extensions
if aliases.get(mimetype) is not None:
mimetype = aliases.get(mimetype)
expected_extensions = mimetypes.guess_all_extensions(mimetype, strict=False)
if expected_extensions is not None:
if len(self.extension) > 0 and self.extension not in expected_extensions:
self.log_details.update({'expected_mimetype': expected_extensions})
self.make_dangerous()
else:
# there are no known extensions associated to this mimetype.
pass
def verify_mime(self):
'''Check if the mime is the one we expect'''
if self.expected_mimetype is None:
return None
actual_mimetype = '{}/{}'.format(self.main_type, self.sub_type)
return actual_mimetype == self.expected_mimetype
self.is_recursive = False
class KittenGroomer(KittenGroomerBase):
@ -290,10 +300,6 @@ class KittenGroomer(KittenGroomerBase):
def _media_processing(self):
'''Generic way to process all the media files'''
self.cur_log.fields(processing_type='media')
if not self.cur_file.verify_mime() or not self.cur_file.verify_extension():
# The extension is unknown or doesn't match the mime type => suspicious
# TODO: write details in the logfile
self.cur_file.make_dangerous()
self._safe_copy()
#######################

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from helpers import FileBase, KittenGroomerBase, main
from .helpers import FileBase, KittenGroomerBase, main

View File

@ -46,6 +46,9 @@ class FileBase(object):
Prepending and appending DANGEROUS to the destination
file name avoid double-click of death
'''
if self.log_details.get('dangerous'):
# Already marked as dangerous, do nothing
return
self.log_details['dangerous'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'DANGEROUS_{}_DANGEROUS'.format(filename))
@ -56,6 +59,9 @@ class FileBase(object):
a decision. Theuser will have to decide what to do.
Prepending UNKNOWN
'''
if self.log_details.get('dangerous') or self.log_details.get('binary'):
# Already marked as dangerous or binary, do nothing
return
self.log_details['unknown'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, 'UNKNOWN_{}'.format(filename))
@ -66,6 +72,9 @@ class FileBase(object):
Appending .bin avoir double click of death but the user
will have to decide by itself.
'''
if self.log_details.get('dangerous'):
# Already marked as dangerous, do nothing
return
self.log_details['binary'] = True
path, filename = os.path.split(self.dst_path)
self.dst_path = os.path.join(path, '{}.bin'.format(filename))