mirror of https://github.com/CIRCL/PyCIRCLean
Split mimetype methods
- Instead of one large function that mutates FileBase properties, mimetype and main/subtype are determined by two separate methods that return mimetypes. - The API is not changed. - Absence of mimetype is now None instead of an empty string.pull/12/head
parent
9832101c85
commit
b6c01db1fb
|
@ -27,7 +27,6 @@ class KittenGroomerError(Exception):
|
||||||
|
|
||||||
class ImplementationRequired(KittenGroomerError):
|
class ImplementationRequired(KittenGroomerError):
|
||||||
"""Implementation required error."""
|
"""Implementation required error."""
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,7 +45,9 @@ class FileBase(object):
|
||||||
self.log_details = {'filepath': self.src_path}
|
self.log_details = {'filepath': self.src_path}
|
||||||
self.log_string = ''
|
self.log_string = ''
|
||||||
self.extension = self._determine_extension()
|
self.extension = self._determine_extension()
|
||||||
self._determine_mimetype()
|
self.mimetype = self._determine_mimetype()
|
||||||
|
if self.mimetype:
|
||||||
|
self.main_type, self.sub_type = self._split_subtypes(self.mimetype)
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
self.filename = os.path.basename(self.src_path)
|
self.filename = os.path.basename(self.src_path)
|
||||||
|
|
||||||
|
@ -57,24 +58,27 @@ class FileBase(object):
|
||||||
def _determine_mimetype(self):
|
def _determine_mimetype(self):
|
||||||
if os.path.islink(self.src_path):
|
if os.path.islink(self.src_path):
|
||||||
# magic will throw an IOError on a broken symlink
|
# magic will throw an IOError on a broken symlink
|
||||||
self.mimetype = 'inode/symlink'
|
mimetype = 'inode/symlink'
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
mt = magic.from_file(self.src_path, mime=True)
|
mt = magic.from_file(self.src_path, mime=True)
|
||||||
# Note: magic will always return something, even if it's just 'data'
|
# Note: magic will always return something, even if it's just 'data'
|
||||||
except UnicodeEncodeError as e:
|
except UnicodeEncodeError as e:
|
||||||
# FIXME: The encoding of the file is broken (possibly UTF-16)
|
# FIXME: The encoding of the file is broken (possibly UTF-16)
|
||||||
mt = ''
|
mt = None
|
||||||
self.log_details.update({'UnicodeError': e})
|
self.log_details.update({'UnicodeError': e})
|
||||||
try:
|
try:
|
||||||
self.mimetype = mt.decode("utf-8")
|
mimetype = mt.decode("utf-8")
|
||||||
except:
|
except:
|
||||||
self.mimetype = mt
|
mimetype = mt
|
||||||
if self.mimetype and '/' in self.mimetype:
|
return mimetype
|
||||||
self.main_type, self.sub_type = self.mimetype.split('/')
|
|
||||||
|
def _split_subtypes(self, mimetype):
|
||||||
|
if '/' in mimetype:
|
||||||
|
main_type, sub_type = mimetype.split('/')
|
||||||
else:
|
else:
|
||||||
self.main_type = ''
|
main_type, sub_type = None, None
|
||||||
self.sub_type = ''
|
return main_type, sub_type
|
||||||
|
|
||||||
def has_mimetype(self):
|
def has_mimetype(self):
|
||||||
"""
|
"""
|
||||||
|
@ -243,7 +247,6 @@ class KittenGroomerBase(object):
|
||||||
self.cur_file = None
|
self.cur_file = None
|
||||||
self.logger = GroomerLogger(self.dst_root_dir, debug)
|
self.logger = GroomerLogger(self.dst_root_dir, debug)
|
||||||
|
|
||||||
# ##### Helpers #####
|
|
||||||
def _safe_rmtree(self, directory):
|
def _safe_rmtree(self, directory):
|
||||||
"""Remove a directory tree if it exists."""
|
"""Remove a directory tree if it exists."""
|
||||||
if os.path.exists(directory):
|
if os.path.exists(directory):
|
||||||
|
|
Loading…
Reference in New Issue